-- (c) CP (2012/3) -- NB: this is not a proper library, it is just an "ad hoc" illustration of -- the map-reduce programming strategy. A more accurate account -- is given in: R. Lammel's "Google's MapReduce programming model --- Revisited", -- http://dx.doi.org/10.1016/j.scico.2007.07.001 import Data.List import Cp import Exp import BTree import LTree doc1 = "This is an example of document and a good example" doc2 = "This is another document" doc3 = "A piece of text is given here as example" a |-> b = (a,b) db = map (id> doc1, 2 |-> doc2, 3 |-> doc3 ] -- prepare data prepare = anaLTree lsplit -- map mapstep :: (Functor f, Eq a, Eq t) => f (t, [a]) -> f [(a, [t])] mapstep = fmap (freq.(map swap.lstr)) -- reduce reducestep :: Eq b => LTree [(b, [a])] -> [(b, [a])] reducestep = cataLTree (either id (uncurry col)) -- build searchable db db1 = (anaBTree qsep . reducestep . mapstep . prepare) db -- demos of intermediate steps d1= (pict . prepare) db d2= (pict . mapstep . prepare) db d3= (pict . reducestep . mapstep . prepare) db d4= pict db1 -- aux functions freq l = nub [ (a, collect a l) | (a,b) <- l ] where collect a l = [ b | (x,b) <- l, x == a] col :: Eq b => [(b, [a])] -> [(b, [a])] -> [(b, [a])] col m n = m .\ n ++ n .\ m ++ [(a,x++concat[l' | (a',l') <- n, a'==a]) | (a,x) <- m .= n] m .\ n = [(a,b) | (a,b) <- m, not(a `elem` (map fst n))] m .= n = [(a,b) | (a,b) <- m, a `elem` (map fst n)]