summaryrefslogtreecommitdiff
path: root/edit-lens/src
diff options
context:
space:
mode:
authorGregor Kleen <gkleen@yggdrasil.li>2018-05-21 16:14:26 +0200
committerGregor Kleen <gkleen@yggdrasil.li>2018-05-21 16:14:26 +0200
commitd3f6fdc3ea71386c2b9a9cd5a686455dbee3e60e (patch)
treeb62e4748b8e058a5ab4122accf6b33e15bdd9b49 /edit-lens/src
parenteb599b2394e62842423cc0bbee2432a9cae95f4b (diff)
downloadincremental-dfsts-d3f6fdc3ea71386c2b9a9cd5a686455dbee3e60e.tar
incremental-dfsts-d3f6fdc3ea71386c2b9a9cd5a686455dbee3e60e.tar.gz
incremental-dfsts-d3f6fdc3ea71386c2b9a9cd5a686455dbee3e60e.tar.bz2
incremental-dfsts-d3f6fdc3ea71386c2b9a9cd5a686455dbee3e60e.tar.xz
incremental-dfsts-d3f6fdc3ea71386c2b9a9cd5a686455dbee3e60e.zip
Introduce FSTs & Generalize input/output
`toFST` is currently invalid
Diffstat (limited to 'edit-lens/src')
-rw-r--r--edit-lens/src/Control/DFST.lhs78
-rw-r--r--edit-lens/src/Control/DFST/Lens.lhs (renamed from edit-lens/src/Data/String/DFST/Lens.lhs)83
-rw-r--r--edit-lens/src/Control/FST.lhs24
-rw-r--r--edit-lens/src/Data/String/DFST.hs42
4 files changed, 146 insertions, 81 deletions
diff --git a/edit-lens/src/Control/DFST.lhs b/edit-lens/src/Control/DFST.lhs
new file mode 100644
index 0000000..aec7bbb
--- /dev/null
+++ b/edit-lens/src/Control/DFST.lhs
@@ -0,0 +1,78 @@
1\begin{code}
2{-# LANGUAGE ScopedTypeVariables
3#-}
4
5{-|
6Description: Deterministic finite state transducers
7-}
8module Control.DFST
9 ( DFST(..)
10 , runDFST, runDFST'
11 , toFST
12 ) where
13
14import Data.Map.Strict (Map, (!?))
15import qualified Data.Map.Strict as Map
16
17import Data.Set (Set)
18import qualified Data.Set as Set
19
20import Data.Sequence (Seq(..))
21import qualified Data.Sequence as Seq
22
23import Data.Monoid
24
25import Numeric.Natural
26
27import Control.Monad
28import Control.Monad.State
29
30import Control.FST (FST(FST))
31import qualified Control.FST as FST
32
33
34data DFST state input output = DFST
35 { stInitial :: state
36 , stTransition :: Map (state, input) (state, Seq output)
37 -- ^ All @(s, c)@-combinations not mapped are assumed to map to @(s, Nothing)@
38 , stAccept :: Set state
39 }
40
41
42toFST :: forall state input output. (Ord state, Ord input, Ord output) => DFST state input output -> FST (state, Natural) input output
43-- ^ Split apart non-singleton outputs into a series of epsilon-transitions
44--
45-- This function is currently invalid since multiple out-edges in the `DFST` visit the same intermediate states (we should label intermediate states not only with an ordinal but also with the input Symbol from the `DFST`)
46toFST DFST{..} = flip execState initialFST $ forM_ (Map.toList stTransition) handleTransition
47 where
48 initialFST = FST
49 { stInitial = (stInitial, 0)
50 , stTransition = Map.empty
51 , stAccept = Set.map (,0) stAccept
52 }
53 addTransition :: forall state input output. (Ord state, Ord input, Ord output) => (state, Maybe input) -> (state, Maybe output) -> State (FST state input output) ()
54 addTransition k v = modify $ \f@FST{ stTransition } -> f { FST.stTransition = Map.insertWith Set.union k (Set.singleton v) stTransition }
55 handleTransition :: ((state, input), (state, Seq output)) -> State (FST (state, Natural) input output) ()
56 handleTransition ((st, inS), (st', outs)) = handleTransition' (st, 0) (Just inS) outs (st', 0)
57 handleTransition' :: (state, Natural) -> Maybe input -> Seq output -> (state, Natural) -> State (FST (state, Natural) input output) ()
58 handleTransition' from inS Empty to = addTransition (from, inS) (to, Nothing)
59 handleTransition' from inS (outS :<| Empty) to = addTransition (from, inS) (to, Just outS)
60 handleTransition' from@(st, i) inS (outS :<| oo) to = addTransition (from, inS) ((st, succ i), Just outS) >> handleTransition' (st, succ i) Nothing oo to
61
62runDFST :: forall state input output. (Ord state, Ord input) => DFST state input output -> Seq input -> Maybe (Seq output)
63runDFST dfst@DFST{..} str = let (finalState, str') = runDFST' dfst stInitial str Seq.empty
64 in str' <$ guard (finalState `Set.member` stAccept)
65
66runDFST' :: forall state input output. (Ord state, Ord input)
67 => DFST state input output
68 -> state -- ^ Current state
69 -> Seq input -- ^ Remaining input
70 -> Seq output -- ^ Accumulator containing previous output
71 -> (state, Seq output) -- ^ Next state, altered output
72runDFST' _ st Empty acc = (st, acc)
73runDFST' dfst@DFST{..} st (c :<| cs) acc
74 | Just (st', mc') <- stTransition !? (st, c)
75 = runDFST' dfst st' cs $ acc <> mc'
76 | otherwise
77 = runDFST' dfst st cs acc
78\end{code}
diff --git a/edit-lens/src/Data/String/DFST/Lens.lhs b/edit-lens/src/Control/DFST/Lens.lhs
index bf06f53..0976314 100644
--- a/edit-lens/src/Data/String/DFST/Lens.lhs
+++ b/edit-lens/src/Control/DFST/Lens.lhs
@@ -3,17 +3,17 @@
3 , TemplateHaskell 3 , TemplateHaskell
4#-} 4#-}
5 5
6module Data.String.DFST.Lens 6module Control.DFST.Lens
7 ( StringEdit(..) 7 ( StringEdit(..)
8 , StringEdits(..) 8 , StringEdits(..)
9 , insert, delete 9 , insert, delete
10 , DFSTAction(..), DFSTComplement 10 , DFSTAction(..), DFSTComplement
11 , dfstLens 11 , dfstLens
12 , module Data.String.DFST 12 , module Control.DFST
13 , module Control.Lens.Edit 13 , module Control.Lens.Edit
14 ) where 14 ) where
15 15
16import Data.String.DFST 16import Control.DFST
17import Control.Lens.Edit 17import Control.Lens.Edit
18import Control.Lens 18import Control.Lens
19import Control.Lens.TH 19import Control.Lens.TH
@@ -31,30 +31,31 @@ import qualified Data.Algorithm.Diff as Diff
31 31
32import Data.Monoid 32import Data.Monoid
33import Data.Function (on) 33import Data.Function (on)
34import Data.Foldable (toList)
34 35
35 36
36data StringEdit = Insert { _sePos :: Natural, _seInsertion :: Char } 37data StringEdit char = Insert { _sePos :: Natural, _seInsertion :: char }
37 | Delete { _sePos :: Natural } 38 | Delete { _sePos :: Natural }
38 deriving (Eq, Ord, Show, Read) 39 deriving (Eq, Ord, Show, Read)
39 40
40makeLenses ''StringEdit 41makeLenses ''StringEdit
41 42
42data StringEdits = StringEdits (Seq StringEdit) 43data StringEdits char = StringEdits (Seq (StringEdit char))
43 | SEFail 44 | SEFail
44 deriving (Eq, Ord, Show, Read) 45 deriving (Eq, Ord, Show, Read)
45 46
46makePrisms ''StringEdits 47makePrisms ''StringEdits
47 48
48stringEdits :: Traversal' StringEdits StringEdit 49stringEdits :: Traversal' (StringEdits char) (StringEdit char)
49stringEdits = _StringEdits . traverse 50stringEdits = _StringEdits . traverse
50 51
51insert :: Natural -> Char -> StringEdits 52insert :: Natural -> char -> StringEdits char
52insert n c = StringEdits . Seq.singleton $ Insert n c 53insert n c = StringEdits . Seq.singleton $ Insert n c
53 54
54delete :: Natural -> StringEdits 55delete :: Natural -> StringEdits char
55delete n = StringEdits . Seq.singleton $ Delete n 56delete n = StringEdits . Seq.singleton $ Delete n
56 57
57instance Monoid StringEdits where 58instance Monoid (StringEdits char) where
58 mempty = StringEdits Seq.empty 59 mempty = StringEdits Seq.empty
59 SEFail `mappend` _ = SEFail 60 SEFail `mappend` _ = SEFail
60 _ `mappend` SEFail = SEFail 61 _ `mappend` SEFail = SEFail
@@ -67,30 +68,30 @@ instance Monoid StringEdits where
67 = StringEdits bs `mappend` StringEdits as 68 = StringEdits bs `mappend` StringEdits as
68 | otherwise = StringEdits $ x `mappend` y 69 | otherwise = StringEdits $ x `mappend` y
69 70
70instance Module StringEdits where 71instance Module (StringEdits char) where
71 type Domain StringEdits = String 72 type Domain (StringEdits char) = Seq char
72 apply str SEFail = Nothing 73 apply str SEFail = Nothing
73 apply str (StringEdits Seq.Empty) = Just str 74 apply str (StringEdits Seq.Empty) = Just str
74 apply str (StringEdits (es :|> Insert n c)) = (flip apply) (StringEdits es) =<< go str n c 75 apply str (StringEdits (es :|> Insert n c)) = (flip apply) (StringEdits es) =<< go str n c
75 where 76 where
76 go [] n c 77 go Seq.Empty n c
77 | n == 0 = Just [c] 78 | n == 0 = Just $ Seq.singleton c
78 | otherwise = Nothing 79 | otherwise = Nothing
79 go str@(x:xs) n c 80 go str@(x :<| xs) n c
80 | n == 0 = Just $ c : str 81 | n == 0 = Just $ c <| str
81 | otherwise = (x:) <$> go xs (pred n) c 82 | otherwise = (x <|) <$> go xs (pred n) c
82 apply str (StringEdits (es :|> Delete n)) = (flip apply) (StringEdits es) =<< go str n 83 apply str (StringEdits (es :|> Delete n)) = (flip apply) (StringEdits es) =<< go str n
83 where 84 where
84 go [] _ = Nothing 85 go Seq.Empty _ = Nothing
85 go (x:xs) n 86 go (x :<| xs) n
86 | n == 0 = Just xs 87 | n == 0 = Just xs
87 | otherwise = (x:) <$> go xs (pred n) 88 | otherwise = (x <|) <$> go xs (pred n)
88 89
89 init = "" 90 init = Seq.empty
90 divInit = StringEdits . Seq.unfoldl go . (0,) 91 divInit = StringEdits . Seq.unfoldl go . (0,)
91 where 92 where
92 go (_, []) = Nothing 93 go (_, Seq.Empty) = Nothing
93 go (n, (c:cs)) = Just ((succ n, cs), Insert n c) 94 go (n, (c :<| cs)) = Just ((succ n, cs), Insert n c)
94 95
95\end{code} 96\end{code}
96 97
@@ -119,46 +120,50 @@ Nun gilt es nur noch die Differenz (als `StringEdits`) des vorherigen Suffixes i
119 120
120% Kann nicht funktionieren, denn $A^{-1}$ ist notwendigerweise nondeterministisch. Wird $A^{-1}$ dann zu einem DFST forciert (durch arbiträre Wahl einer Transition pro Zustand) gehen Informationen verloren—$A^{-1}$ produziert nicht den minimale edit auf dem input string (in der Tat beliebig schlecht) für einen gegeben edit auf dem output string. 121% Kann nicht funktionieren, denn $A^{-1}$ ist notwendigerweise nondeterministisch. Wird $A^{-1}$ dann zu einem DFST forciert (durch arbiträre Wahl einer Transition pro Zustand) gehen Informationen verloren—$A^{-1}$ produziert nicht den minimale edit auf dem input string (in der Tat beliebig schlecht) für einen gegeben edit auf dem output string.
121 122
123% Stelle im bisherigen Lauf isolieren, an der edit im output-string passieren soll, breitensuche auf pfaden, die sich von dieser stelle aus unterscheiden?
124% Gegeben einen Pfad und eine markierte Transition, finde Liste aller Pfade aufsteigend sortiert nach Unterschied zu gegebenem Pfad, mit Unterschieden "nahe" der markierten Transition zuerst — zudem jeweils edit auf dem Eingabestring
125% Einfacher ist Breitensuche ab `stInitial` und zunächst diff auf eingabe-strings.
126
122\begin{code} 127\begin{code}
123 128
124data DFSTAction state = DFSTAction { runDFSTAction :: state -> (state, String -> String) } 129data DFSTAction state input output = DFSTAction { runDFSTAction :: state -> (state, Seq output) }
125 130
126instance Monoid (DFSTAction state) where 131instance Monoid (DFSTAction state input output) where
127 mempty = DFSTAction $ \x -> (x, id) 132 mempty = DFSTAction $ \x -> (x, Seq.empty)
128 (DFSTAction f) `mappend` (DFSTAction g) = DFSTAction $ \s -> let ((f -> (s', out')), out) = g s in (s', out' . out) 133 (DFSTAction f) `mappend` (DFSTAction g) = DFSTAction $ \s -> let ((f -> (s', out')), out) = g s in (s', out <> out')
129 134
130type DFSTComplement state = Compositions (DFSTAction state) 135type DFSTComplement state input output = Compositions (DFSTAction state input output)
131 136
132dfstLens :: forall state. Ord state => DFST state -> EditLens (DFSTComplement state) StringEdits StringEdits 137dfstLens :: forall state input output. (Ord state, Ord input, Ord output) => DFST state input output -> EditLens (DFSTComplement state input output) (StringEdits input) (StringEdits output)
133dfstLens dfst@DFST{..} = EditLens ground propR propL 138dfstLens dfst@DFST{..} = EditLens ground propR propL
134 where 139 where
135 ground :: DFSTComplement state 140 ground :: DFSTComplement state input output
136 ground = Comp.fromList [] 141 ground = Comp.fromList []
137 142
138 propR :: (DFSTComplement state, StringEdits) -> (DFSTComplement state, StringEdits) 143 propR :: (DFSTComplement state input output, StringEdits input) -> (DFSTComplement state input output, StringEdits output)
139 propR (c, SEFail) = (c, SEFail) 144 propR (c, SEFail) = (c, SEFail)
140 propR (c, StringEdits (es :|> e)) = (c', es' <> es'') 145 propR (c, StringEdits (es :|> e)) = (c', es' <> es'')
141 where 146 where
142 (cSuffix, cPrefix) = Comp.splitAt (Comp.length c - (e ^. sePos . from enum)) c 147 (cSuffix, cPrefix) = Comp.splitAt (Comp.length c - (e ^. sePos . from enum)) c
143 cSuffix' 148 cSuffix'
144 | Delete _ <- e = Comp.take (pred $ Comp.length cSuffix) cSuffix -- TODO unsafe 149 | Delete _ <- e = Comp.take (pred $ Comp.length cSuffix) cSuffix -- TODO unsafe
145 | Insert _ nChar <- e = cSuffix <> Comp.singleton (DFSTAction $ \x -> runDFST' dfst x (pure nChar) id) 150 | Insert _ nChar <- e = cSuffix <> Comp.singleton (DFSTAction $ \x -> runDFST' dfst x (pure nChar) Seq.empty)
146 (pState, pOutput) = runDFSTAction (Comp.composed cPrefix) stInitial 151 (pState, pOutput) = runDFSTAction (Comp.composed cPrefix) stInitial
147 (_, sOutput ) = runDFSTAction (Comp.composed cSuffix ) pState 152 (_, sOutput ) = runDFSTAction (Comp.composed cSuffix ) pState
148 (_, sOutput') = runDFSTAction (Comp.composed cSuffix') pState 153 (_, sOutput') = runDFSTAction (Comp.composed cSuffix') pState
149 (c', es') = propR (cSuffix' <> cPrefix, StringEdits es) 154 (c', es') = propR (cSuffix' <> cPrefix, StringEdits es)
150 es'' = (strDiff `on` ($ "")) sOutput sOutput' & stringEdits . sePos . from enum +~ (length $ pOutput []) 155 es'' = strDiff sOutput sOutput' & stringEdits . sePos . from enum +~ Seq.length pOutput
151 propR (c, mempty) = (c, mempty) 156 propR (c, StringEdits Seq.Empty) = (c, mempty)
152 157
153 158
154 propL :: (DFSTComplement state, StringEdits) -> (DFSTComplement state, StringEdits) 159 propL :: (DFSTComplement state input output, StringEdits output) -> (DFSTComplement state input output, StringEdits input)
155 propL = undefined 160 propL = undefined
156 161
157strDiff :: String -> String -> StringEdits 162strDiff :: forall sym. Eq sym => Seq sym -> Seq sym -> StringEdits sym
158-- ^ @strDiff a b@ calculates a set of edits, which, when applied to @a@, produce @b@ 163-- ^ @strDiff a b@ calculates a set of edits, which, when applied to @a@, produce @b@
159strDiff a b = snd . foldr toEdit (0, mempty) $ getDiff a b 164strDiff a b = snd . foldr toEdit (0, mempty) $ (getDiff `on` toList) a b
160 where 165 where
161 toEdit :: Diff Char -> (Natural, StringEdits) -> (Natural, StringEdits) 166 toEdit :: Diff sym -> (Natural, StringEdits sym) -> (Natural, StringEdits sym)
162 toEdit (Diff.Both _ _) (n, es) = (succ n, es) 167 toEdit (Diff.Both _ _) (n, es) = (succ n, es)
163 toEdit (Diff.First _ ) (n, es) = (succ n, delete n <> es) 168 toEdit (Diff.First _ ) (n, es) = (succ n, delete n <> es)
164 toEdit (Diff.Second c) (n, es) = (succ (succ n), insert n c <> es) 169 toEdit (Diff.Second c) (n, es) = (succ (succ n), insert n c <> es)
diff --git a/edit-lens/src/Control/FST.lhs b/edit-lens/src/Control/FST.lhs
new file mode 100644
index 0000000..d3c8ca9
--- /dev/null
+++ b/edit-lens/src/Control/FST.lhs
@@ -0,0 +1,24 @@
1\begin{code}
2
3{-|
4Description: Finite state transducers with epsilon-transitions
5-}
6module Control.FST
7 ( FST(..)
8 ) where
9
10import Data.Map.Strict (Map)
11import qualified Data.Map.Strict as Map
12
13import Data.Set (Set)
14
15import Data.Sequence (Seq)
16
17import Control.Lens.TH
18
19data FST state input output = FST
20 { stInitial :: state
21 , stTransition :: Map (state, Maybe input) (Set (state, Maybe output))
22 , stAccept :: Set state
23 }
24\end{code}
diff --git a/edit-lens/src/Data/String/DFST.hs b/edit-lens/src/Data/String/DFST.hs
deleted file mode 100644
index 54a1336..0000000
--- a/edit-lens/src/Data/String/DFST.hs
+++ /dev/null
@@ -1,42 +0,0 @@
1{-# LANGUAGE ScopedTypeVariables
2#-}
3
4{-|
5Description: Deterministic finite state transducers
6-}
7module Data.String.DFST
8 ( DFST(..)
9 , runDFST, runDFST'
10 ) where
11
12import Data.Map.Strict (Map, (!?))
13import qualified Data.Map.Strict as Map
14
15import Data.Set (Set)
16import qualified Data.Set as Set
17
18import Control.Monad
19
20data DFST state = DFST
21 { stInitial :: state
22 , stTransition :: Map (state, Char) (state, String)
23 -- ^ All @(s, c)@-combinations not mapped are assumed to map to @(s, Nothing)@
24 , stAccept :: Set state
25 }
26
27runDFST :: forall state. Ord state => DFST state -> String -> Maybe String
28runDFST dfst@DFST{..} str = let (finalState, str') = runDFST' dfst stInitial str id
29 in str' "" <$ guard (finalState `Set.member` stAccept)
30
31runDFST' :: forall state. Ord state
32 => DFST state
33 -> state -- ^ Current state
34 -> String -- ^ Remaining input
35 -> (String -> String) -- ^ Output as difference list
36 -> (state, (String -> String)) -- ^ Next state, altered output
37runDFST' _ st [] acc = (st, acc)
38runDFST' dfst@DFST{..} st (c:cs) acc
39 | Just (st', mc') <- stTransition !? (st, c)
40 = runDFST' dfst st' cs $ acc . (mc' ++)
41 | otherwise
42 = runDFST' dfst st cs acc