HaskellChars

Plain source file: LexerSpec/HaskellChars.hs (2009-01-04)

HaskellChars is imported by: DetMachineToHaskell2, LexerGen2, HaskellLexicalSyntax.

{-# OPTIONS_GHC -cpp #-}
module HaskellChars where
import Char

This module collects the definitions from the Lexical Syntax in appendix B.3 of the (revised) Haskell 98 report that define sets of characters. These sets are referred to in the rest of the lexical syntax, which is given in module HaskellLexicalSyntax.

ASCII characters are represented by themselves, while non-ASCII characters are represented by the class they belong to.

data HaskellChar
  = ASCII Char
  | UniWhite   -- any Unicode character defined as whitespace
  | UniSymbol  -- any Unicode symbol or punctuation
  | UniDigit   -- any Unicode numeric
  | UniLarge   -- any uppercase or titlecase Unicode letter
  | UniSmall   -- any Unicode lowercase letter
  deriving (Eq,Ord{-,Show-})

acs = map ASCII

-- Character classifications:
special   = acs "(),;[]`{}"
creturn   = acs "\r"
linefeed  = acs "\LF"
vertab    = acs "\VT"
formfeed  = acs "\FF"
space     = acs " \xa0"
tab       = acs "\t"
uniWhite  = [UniWhite]
cany      = graphic++space++tab
graphic   = small++large++symbol++digit++special++acs ":\"'"++latin1Print
small     = ascSmall++uniSmall++acs "_"
ascSmall  = acs ['a'..'z']
uniSmall  = [UniSmall] ++acs latin1Lower -- because of isSymbol workaround
large     = ascLarge++uniLarge
ascLarge  = acs ['A'..'Z']
uniLarge  = [UniLarge] ++acs latin1Upper -- because of isSymbol workaround
symbol    = ascSymbol++uniSymbol
ascSymbol = acs "!#$%&*+./<=>?@\\^|-~"
uniSymbol = [UniSymbol]++acs latin1Symbol -- !! GHC missing isSymbol workaround
digit     = ascDigit++uniDigit
ascDigit  = acs ['0'..'9']
uniDigit  = [UniDigit]
octit     = acs ['0'..'7']
hexit     = digit ++ acs ['A'..'F'] ++ acs ['a'..'f']

-- Some of these are actually UniSmall, UniLarge, UniSymbol, but since GHC's
-- isUpper etc doesn't recognize them, we include them here, as a partial
-- workaround.
latin1Print = acs ['\160'..'\255']





latin1Upper = "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ"
latin1Lower = "ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ"
latin1Symbol = "¡¢£¤¥¦§¨©¬­®¯°±´¶·¸¿×÷"


----

instance Show HaskellChar where
  showsPrec d h =
    case h of
      ASCII c   -> showsPrec d c
      UniWhite  -> showString "UW"
      UniSymbol -> showString "US"
      UniDigit  -> showString "UD"
      UniLarge  -> showString "UU"
      UniSmall  -> showString "UL"

  showList s = showChar '"' . foldr ((.).shC) id s . showChar '"'
    where
      shC h =
	case h of
	  ASCII c   -> showString (init . tail $ show c)
	  UniWhite  -> showS "UW"
	  UniSymbol -> showS "US"
	  UniDigit  -> showS "UD"
	  UniLarge  -> showS "UU"
	  UniSmall  -> showS "UL"
        where showS s = showChar '"' . showString s . showChar '"'

Index

(HTML for this module was generated on 2009-01-04. About the conversion tool.)