From d3f6fdc3ea71386c2b9a9cd5a686455dbee3e60e Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Mon, 21 May 2018 16:14:26 +0200 Subject: Introduce FSTs & Generalize input/output `toFST` is currently invalid --- edit-lens/src/Control/DFST.lhs | 78 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 edit-lens/src/Control/DFST.lhs (limited to 'edit-lens/src/Control/DFST.lhs') diff --git a/edit-lens/src/Control/DFST.lhs b/edit-lens/src/Control/DFST.lhs new file mode 100644 index 0000000..aec7bbb --- /dev/null +++ b/edit-lens/src/Control/DFST.lhs @@ -0,0 +1,78 @@ +\begin{code} +{-# LANGUAGE ScopedTypeVariables +#-} + +{-| +Description: Deterministic finite state transducers +-} +module Control.DFST + ( DFST(..) + , runDFST, runDFST' + , toFST + ) where + +import Data.Map.Strict (Map, (!?)) +import qualified Data.Map.Strict as Map + +import Data.Set (Set) +import qualified Data.Set as Set + +import Data.Sequence (Seq(..)) +import qualified Data.Sequence as Seq + +import Data.Monoid + +import Numeric.Natural + +import Control.Monad +import Control.Monad.State + +import Control.FST (FST(FST)) +import qualified Control.FST as FST + + +data DFST state input output = DFST + { stInitial :: state + , stTransition :: Map (state, input) (state, Seq output) + -- ^ All @(s, c)@-combinations not mapped are assumed to map to @(s, Nothing)@ + , stAccept :: Set state + } + + +toFST :: forall state input output. (Ord state, Ord input, Ord output) => DFST state input output -> FST (state, Natural) input output +-- ^ Split apart non-singleton outputs into a series of epsilon-transitions +-- +-- This function is currently invalid since multiple out-edges in the `DFST` visit the same intermediate states (we should label intermediate states not only with an ordinal but also with the input Symbol from the `DFST`) +toFST DFST{..} = flip execState initialFST $ forM_ (Map.toList stTransition) handleTransition + where + initialFST = FST + { stInitial = (stInitial, 0) + , stTransition = Map.empty + , stAccept = Set.map (,0) stAccept + } + addTransition :: forall state input output. (Ord state, Ord input, Ord output) => (state, Maybe input) -> (state, Maybe output) -> State (FST state input output) () + addTransition k v = modify $ \f@FST{ stTransition } -> f { FST.stTransition = Map.insertWith Set.union k (Set.singleton v) stTransition } + handleTransition :: ((state, input), (state, Seq output)) -> State (FST (state, Natural) input output) () + handleTransition ((st, inS), (st', outs)) = handleTransition' (st, 0) (Just inS) outs (st', 0) + handleTransition' :: (state, Natural) -> Maybe input -> Seq output -> (state, Natural) -> State (FST (state, Natural) input output) () + handleTransition' from inS Empty to = addTransition (from, inS) (to, Nothing) + handleTransition' from inS (outS :<| Empty) to = addTransition (from, inS) (to, Just outS) + handleTransition' from@(st, i) inS (outS :<| oo) to = addTransition (from, inS) ((st, succ i), Just outS) >> handleTransition' (st, succ i) Nothing oo to + +runDFST :: forall state input output. (Ord state, Ord input) => DFST state input output -> Seq input -> Maybe (Seq output) +runDFST dfst@DFST{..} str = let (finalState, str') = runDFST' dfst stInitial str Seq.empty + in str' <$ guard (finalState `Set.member` stAccept) + +runDFST' :: forall state input output. (Ord state, Ord input) + => DFST state input output + -> state -- ^ Current state + -> Seq input -- ^ Remaining input + -> Seq output -- ^ Accumulator containing previous output + -> (state, Seq output) -- ^ Next state, altered output +runDFST' _ st Empty acc = (st, acc) +runDFST' dfst@DFST{..} st (c :<| cs) acc + | Just (st', mc') <- stTransition !? (st, c) + = runDFST' dfst st' cs $ acc <> mc' + | otherwise + = runDFST' dfst st cs acc +\end{code} -- cgit v1.2.3