Sorting is a great triumph of functional programming

Garden variety sorting

  • Insertion Sort
  • Merge Sort
  • Bubble Sort
  • Quicksort

All rely on comparison, cmp :: a -> a -> bool



Herman Hollerith

Exotic varietals

  • Counting Sort
  • Radix Sort
  • Bucket Sort

Rely on exploiting the structure of the data



Fritz Henglein



  • Generic Top-down Discrimination for

    Sorting and Partitioning in Linear Time
  • compose
  • compose
  • compose

sort :: Order v -> [v] -> [v]
      
  •  

sort :: forall v. Order k -> [(k,v)] -> [v]
      
  • where k & v are related

sort :: forall v. Order k -> [(k,v)] -> [[v]]
      
  •  

sort _ [] = []
sort _ [(_, v)] = [[v]]
      
  •  
data Order t where
   TrivO :: Order t
   NatO  :: Int -> Order Int
   SumL  :: Order t1 -> Order t2 -> Order (Either t1 t2)
   ProdL :: Order t1 -> Order t2 -> Order (t1, t2)
   MapO  :: (t1 -> t2) -> Order t2 -> Order t1
   ListL :: Order t -> Order [t]
      

sort TrivO xs = [[ v | (_, v) <- xs ]]
      

type Disc k = forall v. [(k,v)] -> [[v]]

sort :: Order k -> Disc k
sort (NatO n) xs = discNat n xs

discNat :: Int -> Disc Int
discNat n xs = filter (not . null) (bdiscNat n update xs)

update :: [v] -> v -> [v]
update vs v = v : vs

bdiscNat :: Int -> ([v] -> v -> [v]) -> [(Int, v)] -> [[v]]
bdiscNat (n :: Int) update xs =
            map reverse (elems (accumArray update [] (0, n-1) xs))
      
SumL  :: Order t1 -> Order t2 -> Order (Either t1 t2)

sort (SumL r1 r2) xs = sort r1 [ (k, v) | (Left k, v) <- xs ]
                        ++ sort r2 [ (k, v) | (Right k, v) <- xs ]
      

ProdL :: Order t1 -> Order t2 -> Order (t1, t2)

sort (ProdL r1 r2) xs =
   [ vs | ys <- sort r1 [ (k1,(k2,v)) | ((k1,k2), v) <-xs],
          vs <- sort r2 ys ]
      

MapO  :: (t1 -> t2) -> Order t2 -> Order t1

sort (MapO f r) xs = sort r [ (f k, v) | (k, v) <- xs ]
      

ListL :: Order t -> Order [t]

sort (ListL r) xs = sort (listL r) xs

listL :: Order t -> Order [t]
listL r = MapO fromList (SumL ordUnit (ProdL r (listL r)))

fromList :: [t] -> Either () (t, [t])
fromList [] = Left ()
fromList (x : xs) = Right (x, xs)
      

ordUnit :: Order ()
ordUnit = TrivO

ordNat8 :: Order Int
ordNat8 = NatO 255
      

ordInt32 :: Order Int
ordInt32 = MapO (splitW . (+ (-2147483648))) (ProdL ordNat16 ordNat16)

splitW :: Int -> (Int, Int)
splitW x = (shiftR x 16 .&. 65535, x .&. 65535)
      

ordChar8 :: Order Char
ordChar8 = MapO ord ordNat8

ordChar16 :: Order Char
ordChar16 = MapO ord ordNat16
      

ordString8 :: Order String
ordString8 = listL ordChar8

ordString16 :: Order String
ordString16 = listL ordChar16

listL r = MapO fromList (SumL ordUnit (ProdL r (listL r)))

fromList :: [t] -> Either () (t, [t])
fromList [] = Left ()
fromList (x : xs) = Right (x, xs)
      

let ws = ["AA","BCA","BA","BB","AAC","AB"]
sort $ zip ws ws
[["AA"],["AAC"],["AB"],["BA"],["BB"],["BCA"]]

-- If we'd input ["AA","BCA","BA","BB","AAC","AB"] ++ ["BCA"]
[["AA"],["AAC"],["AB"],["BA"],["BB"],["BCA","BCA"]]
      

...and breathe


abstract class Order[A]
case class TrivO[A]() extends Order[A]
case class NatO(i: Int) extends Order[Int]
case class SumL[A,B](t1: Order[A], t2: Order[B]) extends Order[Either[A,B]]
case class ProdL[A,B](t1: Order[A], t2: Order[B]) extends Order[(A,B)]
case class MapO[A,B](f: A => B, t2: Order[B]) extends Order[A]
case class ListL[A](t: Order[A]) extends Order[Stream[A]]
      
data Order t where
   TrivO :: Order t
   NatO  :: Int -> Order Int
   SumL  :: Order t1 -> Order t2 -> Order (Either t1 t2)
   ProdL :: Order t1 -> Order t2 -> Order (t1, t2)
   MapO  :: (t1 -> t2) -> Order t2 -> Order t1
   ListL :: Order t -> Order [t]
      

val ordUnit = TrivO[Unit]()
val ordNat8 = NatO(255)
val ordInt32 = MapO(splitW º {x:Int => x+(-2147483648)}, ProdL(ordNat16, ordNat16))
def splitW: Int => (Int,Int) = x => ((x>>16) & 65535,x & 65535)
val ordChar8 = MapO({x: Char => x.toInt},ordNat8)
val ordString8 = ListL(ordChar8)
     

ordUnit = TrivO
ordNat8 = NatO 255
ordInt32 = MapO (splitW . (+ (-2147483648))) (ProdL ordNat16 ordNat16)
splitW x = (shiftR x 16 .&. 65535, x .&. 65535)
ordChar8 = MapO ord ordNat8
ordString8 = listL ordChar8
     
def sort[A,B](ord: Order[A], xs: Stream[(A,B)]): Stream[Stream[B]] =
   xs match {
      case Stream() => Stream()
      case Stream((_,v)) => Stream(Stream(v))
      case _ => ord match {
         case TrivO() => Stream(xs map { _._2 })
         case n: NatO => sortNat(n,xs)
         case SumL(orda,ordb) => sortSum(orda,ordb,xs)
         case ProdL(orda,ordb) => sortProd(orda,ordb,xs)
         case MapO(f,ordb) => sortMap(f,ordb,xs)
         case ListL(orda) => sortList(orda,xs)
      }
   }
def stripPartition[A](l: Stream[A]): (Either[Unit,(A,Stream[A])])  =
   l.headOption.cata( { x: A => Right((x,l.tail)) }, { Left(()) } )
def sortList[A,B](orda: Order[A], xs: Stream[(Stream[A],B)]): Stream[Stream[B]] =
   sort(MapO(stripPartition[A], SumL(ordUnit,ProdL(orda,ListL(orda)))), xs)
def sortMap[A,B,C](f: A => B, ordb: Order[B], xs: Stream[(A,C)]): Stream[Stream[C]] =
   sort(ordb,xs map { case (a,c) => (f(a),c) })
def sortProd[A,B,C](orda: Order[A], ordb: Order[B], xs: Stream[((A,B),C)]): Stream[Stream[C]] =
   sort(orda,xs map { case ((a,b),c) => (a,(b,c)) }) flatMap { sort(ordb,_) }
def sortSum[A,B,C](orda: Order[A], ordb: Order[B], xs: Stream[(Either[A,B],C)]): Stream[Stream[C]] = {
   val (lefts,rights) = xs.foldRight((Stream[(A,C)](),Stream[(B,C)]())) {
      case ((Left(x),v),(ls,rs)) => (ls :+ (x,v),rs)
      case ((Right(x),v),(ls,rs)) => (ls,rs :+ (x,v))
   }
   sort(orda,lefts) ++ sort(ordb,rights)
}
     

def sortNat[A](z: NatO, xs: Stream[(Int,A)]): Stream[Stream[A]] = {
  val arr = new Array[MutableList[A]](z.i+1)
  xs.foreach { arg: (Int,A) =>
     val idx = arg._1
     if(arr(idx) == null) arr(idx) = MutableList[A](arg._2)
     else arr(idx) += arg._2
  }
  arr.toStream.filter(_ != null).map(_.toStream)
}
     
  • mutable at the core...

Semantics!

  • Lazy vs Strict matters!
  • Garbage collection
  • Recursion
  • Stability

Scala vs Haskell - FIGHT!

  • Not much to fight about
  • Both more or less can encode the algorithm
  • Scala
    • more verbose
    • typesystem needs more hand holding
    • lacks 'proper' tail-calls, polyfill with tramoplining & Free
    • you have to explicitly manage lazyness
    • garbage creation/collection is a cost
    • friends don't let friends use Stream
  • Haskell
    • more natural encoding
    • still has garbage creation/coll cost
    • 'probably' harder to encode dependent types as optimizations

Advice

  • Read the paper
  • Don't worry about the proofs
    • (iff it has been peer reviewed)
  • Read the paper again; you will miss vital things the first time

References

...phew!