PrettyLib

{- A simple pretty printer combinator library. A pretty printer converts an inductively defined data structure (i.e. a tree) into a text (i.e. a list of lines). Indentation of lines reflects the tree form of the data structure. However, to produce only a minimal number of lines substructures are put on a single line as far as possible. The combinators provided here enable simple definition of a pretty printer for any tree structure. The pretty printer requires time linear in the size of the data structure and space linear in the desired maximal width of the output. The interface of the library is based on: Philip Wadler: A prettier printer, http://cm.bell-labs.com/cm/cs/who/wadler/topics/recent.html The implementation uses some ideas from Derek C Oppen: Prettyprinting, TOPLAS volume 2 number 4, ACM, 1980, 465-483. (Wadler's implementation performs some backtracking and hence may require time linear in both the size of the data structure and the maximal width.) -}

{- For debugging with Hood: import Observe instance Observable Doc where observer NIL = send "NIL" (return NIL) observer (doc1 :<> doc2) = send "(:<>)" (return (:<>) << doc1 << doc2) observer (TEXT s) = send "TEXT" (return TEXT << s) observer (LINE s) = send "LINE" (return LINE << s) observer (GROUP s) = send "GROUP" (return GROUP << s) observer (NEST i doc) = send "NEST" (return NEST << i << doc) instance Observable Token where observer (Text s l) = send "Text" (return Text << s << l) observer (Line s l i) = send "Line" (return Line << s << l << i) observer Open = send "Open" (return Open) observer Close = send "Close" (return Close) instance Observable a => Observable (Q1 a) where observer (Q1 lenf f lenr r) = send "Q1" (return Q1 << lenf << f << lenr << r) instance Observable a => Observable (Q2 a) where observer (Q2 f r) = send "Q2" (return Q2 << f << r) -} {- Exported definitions ================================================== -} -- Core pretty printer combinators ------------------------------------------

groupNest :: Int -> Doc -> Doc groupNest indentation doc = group (nest indentation doc) doubleQuotes :: String -> Doc doubleQuotes s = text ('"' : s ++ "\"") parens :: Doc -> Doc parens doc = text "(" <> doc <> text ")" brackets :: Doc -> Doc brackets doc = text "[" <> doc <> text "]" braces :: Doc -> Doc braces doc = text "{" <> doc <> text "}" string :: String -> Doc string s = text ('\"' : s ++ "\"")

token2String :: [Token] -> String

token2String [] = ""
token2String (Text s _ : tokens) = s ++ token2String tokens
token2String (Delimiter s _ _ : tokens) = s ++ token2String tokens
token2String (FDelimiter s _ _ : tokens) = s ++ token2String tokens
token2String (Line i : tokens) = '\n' : replicate i ' ' ++ token2String tokens
token2String (Open : tokens) = token2String tokens
token2String (Close : tokens) = token2String tokens

flatten :: Doc -> [Token]

flatten doc = go 0 doc []
  where
  go :: Int -> Doc -> [Token] -> [Token]
  {- use accumulator to perform flattening in linear time -}
  go i NIL rest = rest
  go i (d1 :<> d2) rest = go i d1 (go i d2 rest)
  go i (TEXT s) rest = Text s (length s) : rest
  go i (DELIMITER s) rest = Delimiter s (length s) i : rest
  go i (FDELIMITER s) rest = FDelimiter s (length s) i : rest
  go i LINE rest = Line i : rest
  go i (GROUP doc) rest = Open : go i doc (Close : rest)
  go i (NEST j doc) rest = go (i+j) doc rest

removeFDelimiter :: [Token] -> [Token]

removeFDelimiter = go 0 [] 
  where
  {-
  Invariants: elements of stack are strictly sorted, largest on top
              nesting depth is greater or equal top element of stack (if ex.)
  -}
  go :: Int {- nesting depth of brackets; without new ones -}
     -> [Int] {- stack for closing brackets that still have to be inserted -}
     -> [Token] {- stream of tokens with FDelimiter -}
     -> [Token] {- stream of tokens without FDelimiter -}
  go _ [] [] = [] -- first argument should be 0
  go _ _  [] = [Close] -- the stack can at most contain one entry
  go d toClose (Open : tokens) = Open : go (d+1) toClose tokens
  go d toClose (Close : tokens) = 
    possiblyClose (Close :) (subtract 1) id d toClose tokens
  go d toClose (FDelimiter s l i : tokens) =
    possiblyClose (\x -> Open : Delimiter s l i : x) id (d:) d toClose tokens
  go d toClose (Delimiter s l i : tokens) =
    possiblyClose (Delimiter s l i :) id id d toClose tokens
  go d toClose (Line i : tokens) =
    possiblyClose (Line i :) id id d toClose tokens
  go d toClose (text : tokens) = text : go d toClose tokens

  possiblyClose :: ([Token] -> [Token])  -- what to put into output stream
                -> (Int -> Int)          -- how to change nesting depth
                -> ([Int] -> [Int])      -- how to change stack
                -> Int                   -- nesting depth
                -> [Int]                 -- stack
                -> [Token]
                -> [Token]
  possiblyClose modOut modDepth modStack d (close : toClose) tokens
    | close == d = Close : modOut (go (modDepth d) (modStack toClose) tokens) 
  possiblyClose modOut modDepth modStack d toClose tokens =
    modOut (go (modDepth d) (modStack toClose) tokens)

{- Normalise a stream of tokens wrt the following rewriting rules Open, Text s l ==> Text s l, Open Close, Text s l ==> Text s l, Close Then any block of brackets is followed by a Line s l i or the end of the stream -}

normalise :: [Token] -> [Token]

normalise tokens = go 0 0 tokens
  where
  go :: Int {- number of deferred closing brackets -} 
     -> Int {- number of deferred opening brackets -} 
     -> [Token] {- stream of tokens to normalise -}
     -> [Token] {- normalised stream of tokens -}
  go i _ [] = replicate i Close -- there should be no deferred opening brackets
  go i j (Open : tokens) = go i (j+1) tokens
  go i j (Close : tokens)
    | j == 0    = go (i+1) j tokens
    | otherwise = go i (j-1) tokens
  go i j (deli@(Delimiter _ _ _) : tokens) =
    replicate i Close ++ replicate j Open ++ deli : go 0 0 tokens
  go i j (line@(Line _) : tokens) =
    replicate i Close ++ replicate j Open ++ line : go 0 0 tokens
  go i j (text : tokens) = text : go i j tokens

{- The list of tokens is optimally pretty printed within the given width. Precondition: between a `Close' and a `Text s l' is always a `Line s l i'. For efficiency also better if no `Open' is directly followed by `Text s l'. A normalised token list has this property. -}

layout :: Int {- width of layout -}
       -> [Token] 
       -> String

layout width tokens = snd $ go width 1 empty1 0 tokens
  where
  go :: Int -- space remaining in current line (in characters)
     -> Int -- current stream position (in characters)
     -> Q1 Int -- queue of maximal streamposition for end of group to fit
     -> Int -- how deep inside a fitting group (&lt;= 0: not in fitting group)
     -> [Token]
     -> (Q2 Bool -- queue which states if groups fit
        ,String) -- optimally layouted output
  go _ _ groupsMaxEnd _ [] = (empty2, "")
  go r p groupsMaxEnd fitDepth (Open : tokens) = (groupFits', string)
    where
    (groupsMaxEnd', fits', groupFits') = cons (r+p) groupsMaxEnd groupsFits
    (groupsFits, string) = go r p groupsMaxEnd' fitDepth' tokens
    fitDepth' = if fitDepth > 0 then succ fitDepth else if fits' then 1 else 0
  go r p groupsMaxEnd fitDepth (Close : tokens) 
    | isEmpty1 groupsMaxEnd = go r p groupsMaxEnd (pred fitDepth) tokens
    | otherwise = (groupFits', string)
    where
    (_, groupsMaxEnd', groupFits') = lview groupsMaxEnd True groupFits
    (groupFits, string) = go r p groupsMaxEnd' (pred fitDepth) tokens
  go r p groupsMaxEnd fitDepth (Delimiter s l i : tokens)  = 
    (groupFits', output ++ string)
    where
    (output, newRemainingSpace) 
      | fitDepth > 0 = (s, r-l)
      | otherwise    = ('\n' : replicate i ' ', width-i)
    (groupFits', string) = 
      checkGroupsMaxEnd newRemainingSpace (p+l) groupsMaxEnd fitDepth tokens 
  go r p groupsMaxEnd fitDepth (Line i : tokens) =
    -- a compulsary newline makes all surrounding blocks not to fit
    (map1To2 (\_ -> False) groupsMaxEnd, '\n' : replicate i ' ' ++ string)
    where
    (_, string) = go (width-i) p empty1 fitDepth tokens
  go r p groupsMaxEnd fitDepth (Text s l : tokens) = (groupFits', s ++ string)
    where
    (groupFits', string) = 
      checkGroupsMaxEnd (r-l) (p+l) groupsMaxEnd fitDepth tokens 

  checkGroupsMaxEnd :: Int -> Int -> Q1 Int -> Int -> [Token] 
                    -> (Q2 Bool, String)
  checkGroupsMaxEnd r p groupsMaxEnd fitDepth tokens 
    | isEmpty1 groupsMaxEnd || p <= maxEnd = 
      go r p groupsMaxEnd fitDepth tokens
    | otherwise = (groupsFits', string)
    where
    (groupsMaxEnd', maxEnd, groupsFits') = rview groupsMaxEnd groupsFits False
    (groupsFits, string) = checkGroupsMaxEnd r p groupsMaxEnd' fitDepth tokens

{- Special double ended queues -------------------------------------------- Two related kinds of double ended queue types are defined. A queue operation always operates on two queues, one of each kind. Each operation performs an operation on one queue and exactly the inverse operation on the other queue. For example, an operation that splits one queue into its first element and the remaining queue, also puts an element in front of the other queue to obtain a longer queue. The inverse operations are very lazy. Let there be a sequence of queue operations such that the first queue resulting from an operation is always the input first queue to the *next* operation and the second queue resulting from an operation is always the input second queue to the *preceding* operation. Let the first operation `cons' an element to the front of the first queue and the last operation remove exactly this element from the first queue (either front or rear). Then the last operation adds another element to the second queue and the first operation removes exactly this element. The operations on the second queue are so lazy that for obtaining this element the second queue which is input to the last operation is not demanded, that is, does not need to be evaluated. Note that the inverse operations only work correctly, if the two queues are passed in such a sequence. The implementation does not only assume that the two queues handled by an operation have the same number of elements, but also that they have the same internal structure. Using a monad to ascertain correct passing of queues is possible but seems rather restrictive and hides the idea. However, maybe another kind of sequencing interface would be good to ensure safe usage. element element v ^ queue1 -> +----+ -> +---------+ -> -> +---------+ -> |cons| |operation| ... |operation| <- +----+ <- +---------+ <- <- +---------+ <- queue2 v ^ element' element' Except for the additional lazy inverse operations the implementation agrees with the Banker's queue given in: Chris Okasaki: Purely Functional Data Structures, CUP, 1998, Section 8.4.2 -} {- q12List (Q1 _ f _ r) = f ++ reverse r Furthermore, the structure contains the lengths of the two lists Q2 does not contain lengths -}

data Q1 a = Q1 !Int [a] !Int [a] deriving Show
data Q2 a = Q2 [a] [a] deriving Show


reverse1 :: Q1 a -> Q1 a
reverse1 (Q1 lenf f lenr r) = Q1 lenr r lenf f

reverse2 :: Q2 a -> Q2 a
reverse2 (Q2 f r) = Q2 r f


empty1 = Q1 0 [] 0 []
empty2 = Q2 [] []


isEmpty1 (Q1 lenf _ lenr _) = lenf + lenr == 0 


map1To2 :: (a -> b) -> Q1 a -> Q2 b
map1To2 g (Q1 _ f _ r) = Q2 (map g f) (map g r)

check :: Int -> [a] -> Int -> [a] -> Q2 b -> (Q1 a, [b], [b]) check lenf f lenr r q2 = if lenf > balanceConstant * lenr + 1 then let len = lenf + lenr lenf' = len `div` 2 lenr' = len - lenf' (f', rf') = splitAt lenf' f (r2, rf2) = lsplitAt lenr r2' in (Q1 lenf' f' lenr' (r ++ reverse rf') ,lappend lenf' f2' (lreverse (lenr'-lenr) rf2) ,r2) else (Q1 lenf f lenr r, f2', r2') where Q2 f2' r2' = q2 balanceConstant = 3 :: Int

rview :: Q1 a -> Q2 b -> b -> (Q1 a, a, Q2 b) rview (Q1 _ (x:_) _ []) q2' y = (empty1, x, Q2 [y] []) rview (Q1 _ [] _ []) _ _ = error "empty queue" rview (Q1 lenf f lenr (x:r)) q2' y = (q', x, Q2 f2 (y:r2)) where (q', f2, r2) = check lenf f (lenr-1) r q2'

{- Some very lazy variants of standard list functions. They get the length of the result or a list argument as first argument and thus are able to construct the list structure of the result without demanding evaluation of any of its other arguments. Demanding some list element of the result will naturally lead to more demand of the arguments. -} {- The first argument gives the length of the argument/result list. -}