RegExp.hs

module RegExp where
--import List
import MUtils(usort)

infixl 2 !,-!,:!,:-!
infixr 3 &,:&

Transitions for transducers:

data Trans i o = I i | O o deriving (Show,Eq,Ord)

type Transducer i o = RegExp (Trans i o)

Regular expressions:

data RegExp t
  = One -- the language containing the empty string, unit of :&
  | Zero -- the empty language, unit of :!
  | S t -- a single symbol
  | RegExp t :& RegExp t -- sequence (associative)
  | RegExp t :! RegExp t -- alternative (cummutative and associative)
  | RegExp t :-! RegExp t -- difference
  | Many (RegExp t) -- sequence of zero or more
  | Some (RegExp t) -- sequence of one or more
-- An experimental extension with a fix-point operator:
  | Fix (RegExp t)
  | Self
  deriving (Eq,Ord,Show)

Some convenient constructor functions:

e     = One
z     = Zero
t     = S . I
o     = S . O
ts rs = seqs $ map t rs
a  rs = alts $ map t rs

opt r = r ! e

alts rs = foldr (!) z rs
seqs rs = foldr (&) e rs

Optimizing regular expression constructors:

many One = One
many Zero = One
many (Many r) = Many r
many (Some r) = Many r
many r = Many r

some One = One
some Zero = Zero
some (Many r) = Many r
some (Some r) = Some r
some r = Some r

r1 ! r2   = alts' (usort ts)
  where
    ts = terms r1 . terms r2 $ []

    alts' [] = z
    alts' ts = foldr1 (:!) ts

    terms Zero = id
    terms (r1 :! r2) = terms r1 . terms r2
    terms r = (r:)

r1 & Zero = Zero
r1 & One  = r1
r1 & r2   = consFactors r1 r2
  where
    consFactors One = id
    consFactors Zero = const Zero
    consFactors (r1:&r2) = consFactors r1 . consFactors r2
    consFactors r = (r:&)

Zero -! r = Zero
r -! Zero = r
r1 -! r2 = if r1==r2 then Zero else r1 :-! r2

fix Zero = Zero
fix One = One
fix Self = Zero
fix ((One:!Self):&r) | not (selfIn r) = some r
fix r = if selfIn r then Fix r else r

selfIn r =
     case r of
       Zero -> False
       One -> False
       S _ -> False
       r1 :& r2  -> selfIn r1 || selfIn r2
       r1 :! r2  -> selfIn r1 || selfIn r2
       r1 :-! r2 -> selfIn r1 || selfIn r2
       Many r -> selfIn r
       Some r -> selfIn r
       Fix r -> False
       Self -> True

Plain-text version of RegExp.hs | Valid HTML?