-- (c) CP (2012/3)

-- NB: this is not a proper library, it is just an "ad hoc" illustration of
-- the map-reduce programming strategy. A more accurate account
-- is given in: R. Lammel's "Google's MapReduce programming model --- Revisited",
-- http://dx.doi.org/10.1016/j.scico.2007.07.001

import Data.List
import Cp
import Exp
import BTree
import LTree

doc1 = "This is an example of document and a good example"
doc2 = "This is another document"
doc3 = "A piece of text is given here as example"

a |-> b = (a,b)

db = map (id><words) [ 1 |-> doc1, 2 |-> doc2, 3 |-> doc3 ]  

-- prepare data
prepare = anaLTree lsplit

-- map
mapstep :: (Functor f, Eq a, Eq t) => f (t, [a]) -> f [(a, [t])]
mapstep = fmap (freq.(map swap.lstr)) 

-- reduce
reducestep :: Eq b => LTree [(b, [a])] -> [(b, [a])]
reducestep = cataLTree (either id (uncurry col))

-- build searchable db

db1 = (anaBTree qsep . reducestep . mapstep . prepare) db

-- demos of intermediate steps

d1= (pict . prepare) db
d2= (pict . mapstep . prepare) db
d3= (pict . reducestep . mapstep . prepare) db
d4= pict db1

-- aux functions

freq l = nub [ (a, collect a l) | (a,b) <- l ]
         where collect a l = [ b | (x,b) <- l, x == a]

col :: Eq b => [(b, [a])] -> [(b, [a])] -> [(b, [a])]
col m n =
    m .\ n ++ n .\ m ++ [(a,x++concat[l' | (a',l') <- n, a'==a]) | (a,x) <- m .= n]

m .\ n = [(a,b) | (a,b) <- m, not(a `elem` (map fst n))]

m .= n = [(a,b) | (a,b) <- m, a `elem` (map fst n)]