LexerGen2.hs

module LexerGen2(lexerGen,OutputFun(..)) where

import RegExp(Trans(..),Transducer)
import DFA(DFA(..),renumberEdges,tokenClasses,showDFA)
import Minimalize
import CompileRegExp(compile)
import DetMachineToHaskell2(dfaToHaskell,OutputFun(..))
import PPrint(pprint)
import qualified OrdMap as OM(fromList)
import List(sort)
import HaskellChars(HaskellChar)

The lexer generator takes the name of the module to generate, the name of the lexer function to export from that module and the regular expression that defines the lexical syntax. It returns the generated Haskell module as a string.

lexerGen also consults the command line arguments. If the word nocc is present, it does not use character classes to reduce the size of the code.

lexerGen :: (Ord o,Show o,OutputFun o) =>
            String -> String -> Transducer HaskellChar o -> [String] -> String
lexerGen moduleName functionName program args =
    outputDFA (dfa2old (compile program))
  where
    outDFA = "dfa"  `elem`    args -- output the DFA or generate Haskell?
    useCC  = "nocc" `notElem` args -- use character classes?

    outputDFA = if useCC then outputWithCharClasses else outputDetm Nothing

    outputWithCharClasses (n,dfa) =
        outputDetm (Just ccs) (n,renumberEdges ccs dfa)
      where
        charclasses = sort $ tokenClasses dfa
	ccs = [(c,n)|(n,(cs,_))<-zip [(1::Int)..] charclasses,c<-cs]

    outputDetm optccs dfa0 =
        if outDFA
	then showDFA dfa
	else "\n-- Automatically generated code for a DFA follows:\n" ++
	     "--Equal states: "++show eqs++"\n"++
             "{-# OPTIONS_GHC -O #-}\n" ++
	     pprint haskellCode
      where
        (eqs,dfa) = minimalize dfa0
	haskellCode =
          dfaToHaskell optccs moduleName ["Char","HsLexUtils"] functionName dfa


A function to convert from the new to the old DFA represenation...

dfa2old dfa = ((1::Int,final),DFA (OM.fromList states))
  where
    final = [s|(s,(True,_))<-dfa]
    states = map state dfa
    state (n,(_,edges)) = (n,(input,output))
      where
        input  = [(i,n)|(I i,n)<-edges]
	output = [(o,n)|(O o,n)<-edges]

Plain-text version of LexerGen2.hs | Valid HTML?