module HaskellLexicalSyntax where
{-+
This module is a transcription of the Lexical Syntax given in appendix B.2
of the Haskell 98 Report, except for character set definitions, which are
given in module HaskellChars.
The grammar below contains extra annotations (not found in the report) to
allow the recognized strings to be paired with token classifications, which
are used in the token specifications in the Happy parser.
-}
import List((\\))
import HaskellChars
import HsTokens
import RegExp
--
program = many (lexeme ! whitespace)
lexeme = varid & o Varid
! conid & o Conid
! varsym & o Varsym
! consym & o Consym
! literal -- & o Literal
! a special & o Special
! reservedop & o Reservedop
! reservedid & o Reservedid
-- ! specialid & o Specialid -- recognized by the parser
! qvarid & o Qvarid
! qconid & o Qconid
! qvarsym & o Qvarsym
! qconsym & o Qconsym
literal = integer & o IntLit
! float & o FloatLit
! char & o CharLit
! string & o StringLit
whitechar = newline ! a vertab ! a space ! a tab ! a uniWhite
newline = a creturn & a linefeed
! a creturn ! a linefeed ! a formfeed
whitespace = some whitechar & o Whitespace
! comment & o Comment
! ncomment & o NestedComment
= dashes & o Commentstart & many (a cany) & newline
dashes = as "--" & many (aa "-")
opencom = as "{-"
--closecom = as "-}"
= opencom & o NestedCommentStart -- handled by calling an external function
-- This doesn't work, since regular expressions can't be recursive...
--ncomment = opencom & lANYseq & many (ncomment & lANYseq) & closecom
--ncomment = fix (opencom & lANYseq & many (Self & lANYseq) & closecom)
--ncomment = opencom & lANYseq & closecom -- unnested comments!
--lANYseq = many cANY -! (many cANY & ( opencom ! closecom ) & many cANY)
--cANY = a graphic ! whitechar
varid = a small & idtail -! reservedid
conid = a large & idtail
idtail = many (a small ! a large ! a digit ! aa "'")
reservedid =
ass [ "case", "class", "data", "default", "deriving", "do", "else",
"if", "import", "in", "infix", "infixl", "infixr", "instance",
"let", "module", "newtype", "of", "then", "type", "where", "_"]
{-
specialid = as"as" ! as"qualified" ! as"hiding"
! as"forall" -- rank 2 polymorphism extension
! as"primitive" -- Hugs foreign function interface
-}
varsym = (a symbol & many (a symbol ! aa ":")) -! (reservedop ! dashes)
consym = (aa ":" & many (a symbol ! aa ":")) -! reservedop
reservedop = ass ["..", ":","::", "=","\\","|","<-","->","@","~","=>"]
--specialop = aa "-" ! aa "!" -- recognized by the parser instead
modid = conid
--optq = opt qual
-- qual = modid & aa "." -- For Haskell 98
qual = some (modid & aa ".") -- For "hierachical module names"
{-+
In the report, qvarid etc include both qualified and unqualified names, but
here they denote qualified names only. This allows qualified and unqualified
names to be distinguished in a more natural way in the parser.
-}
qvarid = qual & varid
qconid = qual & conid
qvarsym = qual & varsym
qconsym = qual & consym
decimal = some (a digit)
octal = some (a octit)
hexadecimal = some (a hexit)
integer = decimal
! aa "0" & aa "Oo" & octal
! aa "0" & aa "Xx" & hexadecimal
float = decimal & aa "." & decimal & opt (aa "eE" & opt (aa "-+") & decimal)
char = aa "'" & (a (graphic \\ acs "'\\") ! a space ! escape{-<\&>-}) & aa "'"
string = aa "\"" & many (a (graphic \\ acs "\"\\") ! a space ! escape ! gap) & aa "\""
escape = aa "\\" & ( charesc ! ascii ! decimal !
aa "o" & octal ! aa "x" & hexadecimal )
charesc = aa "abfnrtv\\\"'&"
ascii = aa "^" & cntrl !
ass [ "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK",
"BEL", "BS", "HT", "LF", "VT", "FF", "CR", "SO", "SI", "DLE",
"DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", "CAN",
"EM", "SUB", "ESC", "FS", "GS", "RS", "US", "SP", "DEL"]
cntrl = a ascLarge ! aa "@[\\]^_"
gap = aa "\\" & some whitechar & aa "\\"
--
aa = a . acs -- any of the ASCII chars
as = ts . acs -- ASCII string
ass = foldr1 (!) . map as
--
--tyvar = varid
--tycon = conid
--tycls = conid
--qtycon = qual & tycon
--qtycls = qual & tycls