Created
November 30, 2017 12:57
-
-
Save anonymous/ecdfa265e369711e15d53e7fa28c1de9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| {-# LANGUAGE PatternSynonyms #-} | |
| {-# LANGUAGE FlexibleContexts #-} | |
| {-# LANGUAGE OverloadedStrings #-} | |
| module DFA where | |
| import qualified Data.Map.Strict as Map | |
| import Data.Map (Map) | |
| import qualified Data.Set as Set | |
| import Data.Set (Set) | |
| import Data.Maybe (fromMaybe) | |
| import Data.Foldable (foldl') | |
| import qualified Data.Text as Text | |
| import Control.Monad.State.Lazy (State, evalState, get, modify') | |
| import qualified Text.Dot as Dot | |
| -- | A transition between state | |
| data Transition symbol | |
| = Epsilon -- ^ Free transition (does not consume any symbol) | |
| | Singleton symbol -- ^ Consume a symbol | |
| deriving (Show, Ord, Eq) | |
| -- | A Non Finite Automata | |
| data NFA symbol | |
| = NFA (Map (Int, Transition symbol) (Set.Set Int)) -- ^ Transition Map (from, transition) -> [to] | |
| deriving (Show) | |
| -- | Current Status of the NFA | |
| data NFAStatus = NFAStatus (Set Int) deriving (Show) | |
| -- | Check if the NFA can terminate | |
| isBroken :: NFAStatus -> Bool | |
| isBroken (NFAStatus s) = Set.null s | |
| -- | Check if the NFA is in a terminal state | |
| isTerminal :: NFAStatus -> Bool | |
| isTerminal (NFAStatus s) = internalEnd `Set.member` s | |
| internalStart :: Int | |
| internalStart = 0 | |
| internalEnd :: Int | |
| internalEnd = 1 | |
| {- | |
| Knowing a transition, move the status to the node reachable through this transition | |
| -} | |
| internalStepNFA :: Ord symbol => NFA symbol -> NFAStatus -> Transition symbol -> NFAStatus | |
| internalStepNFA (NFA m) (NFAStatus states) trans = NFAStatus (Set.unions (map stepOneState (Set.toList states))) | |
| where | |
| stepOneState state = fromMaybe (Set.empty) (Map.lookup (state, trans) m) | |
| {- | |
| Recursivly move all epsilon (i.e. "Free") transition | |
| -} | |
| internalEpsilonWalk :: Ord symbol => NFA symbol -> NFAStatus -> NFAStatus | |
| internalEpsilonWalk nfa states@(NFAStatus s) = | |
| -- This moves until a fixpoint is reached | |
| let NFAStatus s' = internalStepNFA nfa states Epsilon | |
| newStates = Set.union s s' | |
| in if newStates == s then states | |
| else internalEpsilonWalk nfa (NFAStatus newStates) | |
| -- | Move the NFA status with one symbol. | |
| {- It takes into account the Epsilon transitions -} | |
| stepNFA :: Ord symbol => NFA symbol -> NFAStatus -> symbol -> NFAStatus | |
| stepNFA nfa states c = internalEpsilonWalk nfa (internalStepNFA nfa states (Singleton c)) | |
| -- | Run the NFA on a list of symbols | |
| walkNFA :: (Ord symbol, Foldable t) => NFA symbol -> t symbol -> NFAStatus | |
| walkNFA nfa symbols = foldl' (stepNFA nfa) (internalEpsilonWalk nfa (NFAStatus (Set.singleton 0))) symbols | |
| -- 0 is the begininng by construction | |
| -- TODO: shortcut, this will consume all the symbols even if no transition are available | |
| -- | Returns True if the given NFA Match the sequence of symbols | |
| matchNFA :: (Ord symbol, Foldable t) => NFA symbol -> t symbol -> Bool | |
| matchNFA nfa symbols = isTerminal (walkNFA nfa symbols) | |
| -- REGEX | |
| data Regex symbol | |
| = Symbol symbol -- ^ a | |
| | Many (Regex symbol) -- ^ a* | |
| | (Regex symbol) :|: (Regex symbol) -- ^ a | b | |
| | (Regex symbol) :+: (Regex symbol) -- ^ ab | |
| | Option (Regex symbol) -- ^ a? | |
| | Some (Regex symbol) -- ^ a+ | |
| deriving (Show) | |
| -- a*(b|cd?)+ | |
| regex0 :: Regex Char | |
| regex0 = Many (Symbol 'a') :+: Some (Symbol 'b' :|: (Symbol 'c' :+: (Option (Symbol 'd')))) | |
| -- | Convert a regex to an NFA | |
| regexToNFA :: Ord symbol => Regex symbol -> NFA symbol | |
| regexToNFA reg = NFA (evalState (go reg internalStart internalEnd) 2) | |
| where | |
| nextLabel = do | |
| l <- get | |
| modify' (+1) | |
| pure l | |
| -- union of two NFA | |
| unionNFA nfaA nfaB = Map.unionWith Set.union nfaA nfaB | |
| -- free transition from a to b | |
| a ---> b = Map.singleton (a, Epsilon) (Set.singleton b) | |
| go :: Ord c => Regex c -> Int -> Int -> State Int (Map (Int, Transition c) (Set Int)) | |
| go (Symbol c) start end = do | |
| -- start -c-> end | |
| pure (Map.singleton (start, Singleton c) (Set.singleton end)) | |
| go (a :+: b) start end = do | |
| middle <- nextLabel | |
| unionNFA <$> go a start middle | |
| <*> go b middle end | |
| go (a :|: b) start end = | |
| unionNFA <$> go a start end | |
| <*> go b start end | |
| go (Some sub) start end = do | |
| stop <- nextLabel | |
| unionNFA ((stop ---> end) `unionNFA` | |
| (stop ---> start)) | |
| <$> go sub start stop | |
| go (Option sub) start end = do | |
| unionNFA (start ---> end) | |
| <$> go sub start end | |
| go (Many sub) start end = do | |
| stop <- nextLabel | |
| unionNFA ((start ---> end) `unionNFA` | |
| (stop ---> start)) | |
| <$> go sub start stop | |
| -- | Match a regex ? | |
| -- >>> matchRegex (Many (Symbol 'a')) "aaaa" | |
| -- True | |
| -- >>> matchRegex (Symbol 'a' :+: Symbol 'b' :+: Optional (Symbol 'c')) "abc" | |
| -- True | |
| -- >>> matchRegex (Symbol 'a' :+: Symbol 'b' :+: Symbol 'c') "ab" | |
| -- True | |
| -- >>> matchRegex (Symbol 'a' :+: Symbol 'b' :+: Optional (Symbol 'c')) "abd" | |
| -- False | |
| matchRegex :: (Foldable f, Ord symbol) => Regex symbol -> f symbol -> Bool | |
| matchRegex r l = matchNFA (regexToNFA r) l | |
| -- Display | |
| -- | NFA to Dot | |
| {- I don't know how to write this kind of function correctly ;( -} | |
| nfaToDot :: NFA Char -> Dot.DotGraph | |
| nfaToDot (NFA nfa) = Dot.graph Dot.directed "g" $ do | |
| let nodesLabel = Set.toList (Set.map fst (Map.keysSet nfa) `Set.union` Set.unions (Map.elems nfa)) | |
| let edgesLabel = concatMap (\((from, transition), tos) -> map (\to -> (from, to, transition)) (Set.toList tos)) (Map.toList nfa) | |
| nodes <- mapM (Dot.node . (Text.pack) . show) nodesLabel | |
| let nameToNode = Map.fromList (zip nodesLabel nodes) | |
| mapM_ (\(f,t, l) -> Dot.genEdge (nameToNode Map.! f) (nameToNode Map.! t) ["label" Dot.=: showTransition l]) edgesLabel | |
| showTransition :: Transition Char -> Text.Text | |
| showTransition (Singleton c) = Text.singleton c | |
| showTransition Epsilon = "Eps" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment