module LexerGen2(lexerGen,OutputFun(..)) where import RegExp(Trans(..),Transducer) import DFA(DFA(..),renumberEdges,tokenClasses,showDFA) import Minimalize import CompileRegExp(compile) import DetMachineToHaskell2(dfaToHaskell,OutputFun(..)) import PPrint(pprint) import qualified OrdMap as OM(fromList) import List(sort) import HaskellChars(HaskellChar)
The lexer generator takes the name of the module to generate, the name of the lexer function to export from that module and the regular expression that defines the lexical syntax. It returns the generated Haskell module as a string.
lexerGen
also consults the command line arguments. If
the word nocc is present, it does not use character classes to reduce
the size of the code.
lexerGen :: (Ord o,Show o,OutputFun o) => String -> String -> Transducer HaskellChar o -> [String] -> String lexerGen moduleName functionName program args = outputDFA (dfa2old (compile program)) where outDFA = "dfa" `elem` args -- output the DFA or generate Haskell? useCC = "nocc" `notElem` args -- use character classes? outputDFA = if useCC then outputWithCharClasses else outputDetm Nothing outputWithCharClasses (n,dfa) = outputDetm (Just ccs) (n,renumberEdges ccs dfa) where charclasses = sort $ tokenClasses dfa ccs = [(c,n)|(n,(cs,_))<-zip [(1::Int)..] charclasses,c<-cs] outputDetm optccs dfa0 = if outDFA then showDFA dfa else "\n-- Automatically generated code for a DFA follows:\n" ++ "--Equal states: "++show eqs++"\n"++ "{-# OPTIONS_GHC -O #-}\n" ++ pprint haskellCode where (eqs,dfa) = minimalize dfa0 haskellCode = dfaToHaskell optccs moduleName ["Char","HsLexUtils"] functionName dfa
A function to convert from the new to the old DFA represenation...
dfa2old dfa = ((1::Int,final),DFA (OM.fromList states))
where
final = [s|(s,(True,_))<-dfa]
states = map state dfa
state (n,(_,edges)) = (n,(input,output))
where
input = [(i,n)|(I i,n)<-edges]
output = [(o,n)|(O o,n)<-edges]