module Statistics where
import List(sortBy)
import OpTypes(cmpBy)
import Char(toUpper)
import PrettyPrint
import Array
ppStatistics descv descu [] = empty
ppStatistics descv descu xs =
cap descu<+>"count:"<+> n
$$ "Total"<+>descv<>":" <+> s
$$ "Average"<+>descv<+>"per"<+>descu<>":"<+> (s `div` n)
$$ "Median "<+>descv<+>"per"<+>descu<>":"<+> median n (map snd sxs)
$$ topbottom
$$ "Histogram:"<+> ppHistogram (map snd xs)
where
n = length xs
s = sum (map snd xs)
sxs = sortBy (cmpBy snd) xs
ppObs (n,x) = n<>":"<+>x
top = 10
topbottom =
if n <= 2*top+5
then "All :"<+> vcat (map ppObs sxs)
else "Top 10:"<+> vcat (map ppObs . take top . reverse $ sxs)
$$ "Bottom 10:"<+> vcat (map ppObs . take top $ sxs)
cap (c:cs) = toUpper c:cs
cap [] = []
-- pre: n == length xs
median n xs =
if odd n
then xs!!(n `div` 2)
else let x1:x2:_ = drop (n `div` 2 - 1) xs
in (x1+x2) `div` 2
ppHistogram xs = vcat (map bar h)
where
bar x = "|"<>replicate (x*scale `div` m) '*'<+> (100*x `div` s)<>"%"
h = histogram n xs
m = maximum h
scale = min m 50
s = sum h
n = min 10 (maximum xs) -- number of bars
histogram n xs = elems $ accumArray (+) 0 (0,n-1) [(x*n `div` (m+1),1)|x<-xs]
where
m = maximum xs