\begin{code} {-# LANGUAGE ScopedTypeVariables #-} {-| Description: Finite state transducers with epsilon-transitions -} module Control.FST ( FST(..) -- * Constructing FSTs , wordFST -- * Operations on FSTs , productFST, restrictFST -- * Debugging Utilities , liveFST ) where import Data.Map.Strict (Map, (!?)) import qualified Data.Map.Strict as Map import Data.Set (Set) import qualified Data.Set as Set import Data.Sequence (Seq) import qualified Data.Sequence as Seq import Data.Maybe (mapMaybe, fromMaybe, isJust, fromJust) import Numeric.Natural import Control.Lens import Control.Monad.State.Strict import Text.PrettyPrint.Leijen (Pretty(..)) import qualified Text.PrettyPrint.Leijen as PP data FST state input output = FST { stInitial :: Set state , stTransition :: Map (state, Maybe input) (Set (state, Maybe output)) , stAccept :: Set state } deriving (Show, Read) instance (Show state, Show input, Show output) => Pretty (FST state input output) where pretty FST{..} = PP.vsep [ PP.text "Initial states:" PP. PP.hang 2 (list . map (PP.text . show) $ Set.toAscList stInitial) , PP.text "State transitions:" PP.<$> PP.indent 2 (PP.vsep [ PP.text (show st) PP.<+> (PP.text "-" PP.<> PP.tupled [label inS, label outS] PP.<> PP.text "→") PP.<+> PP.text (show st') | ((st, inS), to) <- Map.toList stTransition , (st', outS) <- Set.toAscList to ]) , PP.text "Accepting states:" PP. PP.hang 2 (list . map (PP.text . show) $ Set.toAscList stAccept) ] where label :: Show a => Maybe a -> PP.Doc label = maybe (PP.text "ɛ") (PP.text . show) list :: [PP.Doc] -> PP.Doc list = PP.encloseSep (PP.lbracket PP.<> PP.space) (PP.space PP.<> PP.rbracket) (PP.comma PP.<> PP.space) runFST :: forall input output state. (Ord input, Ord output, Ord state) => FST state input output -> Seq input -> [Seq output] runFST = fmap (map $ catMaybes . fmap (view _2) . view _2) . runFST' where catMaybes = fmap fromJust . Seq.filter isJust runFST' :: forall input output state. (Ord input, Ord output, Ord state) => FST state input output -> Seq input -> [(state, Seq (state, Maybe output))] -- ^ Tuples of initial state and chosen transitions; not neccessarily finite -- ^ Compute all possible runs on the given input runFST' fst Seq.Empty = guardAccept $ (\(_, st, _) -> (st, Seq.Empty)) <$> step fst Nothing Nothing runFST' fst cs = guardAccept $ do initial <- view _2 <$> step fst Nothing Nothing go (initial, Seq.Empty) cs where guardAccept res = do (initial, path) <- res let finalState | (_ :> (st, _)) <- path = st | otherwise = initial guard $ finalState `Set.member` stAccept return res go :: (state, Seq (state, Maybe output)) -> Seq input-> [(state, Seq (state, Maybe output))] go (initial, path) cs = do let current | (_ :> (st, _)) <- path = st | otherwise = initial (head, next, out) <- step fst (Just current) (Seq.lookup 0 cs) let nPath = path :> (next, out) ncs = maybe id (:<) head cs go (initial, nPath) ncs step :: forall input output state. (Ord input, Ord output, Ord state) => FST state input output -> Maybe state -- ^ Current state -> Maybe input -- ^ Head of remaining input -> [(Maybe input, state, Maybe output)] -- ^ Tuples of unconsumed input, next state, and produced output step FST{..} Nothing inS = (\s -> (inS, s, Nothing)) <$> Set.toList stInitial step FST{..} (Just c) inS = let consuming = fromMaybe Set.empty $ Map.lookup (c, inS) stTransition unconsuming = fromMaybe Set.empty $ Map.lookup (c, Nothing) stTransition in Set.toList $ Set.map (\(n, mOut) -> (Nothing, n, mOut)) consuming `Set.union` Set.map (\(n, mOut) -> (inS, n, mOut)) unconsuming wordFST :: forall input output. Seq output -> FST Natural input output -- ^ @wordFST str@ is the linear FST generating @str@ as output when given no input wordFST outs = FST { stInitial = Set.singleton 0 , stAccept = Set.singleton l , stTransition = Map.fromSet next states } where l :: Natural l = fromIntegral $ Seq.length outs states :: Set (Natural, Maybe input) states = Set.fromDistinctAscList [ (n, Nothing) | n <- [0..pred l] ] next :: (Natural, Maybe input) -> Set (Natural, Maybe output) next (i, _) = Set.singleton (succ i, Just . Seq.index outs $ fromIntegral i) productFST :: forall state1 state2 input output. (Ord state1, Ord state2, Ord input, Ord output) => FST state1 input output -> FST state2 input output -> FST (state1, state2) input output -- ^ Cartesian product on states, logical conjunction on transitions and state-properties (initial and accept) -- -- This is the "natural" (that is component-wise) product when considering FSTs to be weighted in the boolean semiring. -- -- Intuitively this corresponds to running both FSTs at the same time requiring them to produce the same output and "agree" (epsilon agreeing with every character) on their input. productFST fst1 fst2 = FST { stInitial = stInitial fst1 `setProduct` stInitial fst2 , stAccept = stAccept fst1 `setProduct` stAccept fst2 , stTransition = Map.fromSet transitions . Set.fromList . mapMaybe filterTransition . Set.toAscList $ Map.keysSet (stTransition fst1) `setProduct` Map.keysSet (stTransition fst2) } where setProduct :: forall a b. Set a -> Set b -> Set (a, b) setProduct as bs = Set.fromDistinctAscList $ (,) <$> Set.toAscList as <*> Set.toAscList bs filterTransition :: forall label. Eq label => ((state1, Maybe label), (state2, Maybe label)) -> Maybe ((state1, state2), Maybe label) filterTransition ((st1, Nothing ), (st2, in2 )) = Just ((st1, st2), in2) filterTransition ((st1, in1 ), (st2, Nothing )) = Just ((st1, st2), in1) filterTransition ((st1, Just in1), (st2, Just in2)) | in1 == in2 = Just ((st1, st2), Just in1) | otherwise = Nothing transitions :: ((state1, state2), Maybe input) -> Set ((state1, state2), Maybe output) transitions ((st1, st2), inS) = Set.fromList . mapMaybe filterTransition . Set.toAscList $ out1 `setProduct` out2 where out1 = (fromMaybe Set.empty $ stTransition fst1 !? (st1, inS)) `Set.union` (fromMaybe Set.empty $ stTransition fst1 !? (st1, Nothing)) out2 = (fromMaybe Set.empty $ stTransition fst2 !? (st2, inS)) `Set.union` (fromMaybe Set.empty $ stTransition fst2 !? (st2, Nothing)) restrictFST :: forall state input output. (Ord state, Ord input, Ord output) => Set state -> FST state input output -> FST state input output -- ^ @restrictFST states fst@ removes from @fst@ all states not in @states@ including transitions leading to or originating from them restrictFST sts FST{..} = FST { stInitial = stInitial `Set.intersection` sts , stAccept = stAccept `Set.intersection` sts , stTransition = Map.mapMaybeWithKey restrictTransition stTransition } where restrictTransition :: (state, Maybe input) -> Set (state, Maybe output) -> Maybe (Set (state, Maybe output)) restrictTransition (st, _) tos = tos' <$ guard (st `Set.member` sts) where tos' = Set.filter (\(st', _) -> st' `Set.member` sts) tos liveFST :: forall state input output. (Ord state, Ord input, Ord output, Show state) => FST state input output -> Set state -- ^ Compute the set of "live" states (with no particular complexity) -- -- A state is "live" iff there is a path from it to an accepting state and a path from an initial state to it liveFST fst@FST{..} = flip execState Set.empty $ mapM_ (depthSearch Set.empty) stInitial where stTransition' :: Map state (Set state) stTransition' = Map.map (Set.map $ \(st, _) -> st) $ Map.mapKeysWith Set.union (\(st, _) -> st) stTransition depthSearch :: Set state -> state -> State (Set state) () depthSearch acc curr = do let acc' = Set.insert curr acc next = fromMaybe Set.empty $ stTransition' !? curr alreadyLive <- get when (curr `Set.member` Set.union stAccept alreadyLive) $ modify $ Set.union acc' alreadyLive <- get mapM_ (depthSearch acc') $ next `Set.difference` alreadyLive \end{code}