diff options
Diffstat (limited to 'edit-lens/src/Control/DFST/Lens.lhs')
-rw-r--r-- | edit-lens/src/Control/DFST/Lens.lhs | 412 |
1 files changed, 265 insertions, 147 deletions
diff --git a/edit-lens/src/Control/DFST/Lens.lhs b/edit-lens/src/Control/DFST/Lens.lhs index 95be34e..fe33bd6 100644 --- a/edit-lens/src/Control/DFST/Lens.lhs +++ b/edit-lens/src/Control/DFST/Lens.lhs | |||
@@ -1,12 +1,14 @@ | |||
1 | \begin{comment} | ||
1 | \begin{code} | 2 | \begin{code} |
2 | {-# LANGUAGE ScopedTypeVariables | 3 | {-# LANGUAGE ScopedTypeVariables |
3 | , TemplateHaskell | 4 | , TemplateHaskell |
4 | , ConstraintKinds | 5 | , ConstraintKinds |
6 | , GeneralizedNewtypeDeriving | ||
5 | #-} | 7 | #-} |
6 | 8 | ||
7 | module Control.DFST.Lens | 9 | module Control.DFST.Lens |
8 | ( StringEdit(..) | 10 | ( StringEdit(..), sePos, seInsertion |
9 | , StringEdits(..) | 11 | , StringEdits(..), _StringEdits, _SEFail, stringEdits |
10 | , insert, delete, replace | 12 | , insert, delete, replace |
11 | , DFSTAction(..), DFSTComplement | 13 | , DFSTAction(..), DFSTComplement |
12 | , dfstLens | 14 | , dfstLens |
@@ -16,7 +18,7 @@ module Control.DFST.Lens | |||
16 | 18 | ||
17 | import Control.DFST | 19 | import Control.DFST |
18 | import Control.FST hiding (stInitial, stTransition, stAccept) | 20 | import Control.FST hiding (stInitial, stTransition, stAccept) |
19 | import qualified Control.FST as FST (stInitial, stTransition, stAccept) | 21 | import qualified Control.FST as FST (stInitial, stTransition, stAccept, step) |
20 | import Control.Lens.Edit | 22 | import Control.Lens.Edit |
21 | import Control.Lens | 23 | import Control.Lens |
22 | import Control.Lens.TH | 24 | import Control.Lens.TH |
@@ -32,11 +34,11 @@ import Data.Sequence (Seq((:<|), (:|>))) | |||
32 | import qualified Data.Sequence as Seq | 34 | import qualified Data.Sequence as Seq |
33 | import Data.Set (Set) | 35 | import Data.Set (Set) |
34 | import qualified Data.Set as Set | 36 | import qualified Data.Set as Set |
35 | import Data.Map.Strict (Map) | 37 | import Data.Map.Lazy (Map) |
36 | import qualified Data.Map.Strict as Map | 38 | import qualified Data.Map.Lazy as Map |
37 | 39 | ||
38 | import Data.Compositions.Snoc (Compositions) | 40 | import Data.Compositions (Compositions) |
39 | import qualified Data.Compositions.Snoc as Comp | 41 | import qualified Data.Compositions as Comp |
40 | 42 | ||
41 | import Data.Algorithm.Diff (Diff, getDiff) | 43 | import Data.Algorithm.Diff (Diff, getDiff) |
42 | import qualified Data.Algorithm.Diff as Diff | 44 | import qualified Data.Algorithm.Diff as Diff |
@@ -48,69 +50,72 @@ import Data.Function (on) | |||
48 | import Data.Foldable (toList) | 50 | import Data.Foldable (toList) |
49 | import Data.List (partition) | 51 | import Data.List (partition) |
50 | 52 | ||
51 | import Debug.Trace | 53 | import Control.Exception (assert) |
52 | 54 | ||
55 | import System.IO.Unsafe | ||
56 | import Text.PrettyPrint.Leijen (Pretty(..)) | ||
53 | 57 | ||
54 | data StringEdit char = Insert { _sePos :: Natural, _seInsertion :: char } | 58 | \end{code} |
55 | | Delete { _sePos :: Natural } | 59 | \end{comment} |
60 | |||
61 | |||
62 | Wir betrachten, zur Einfachheit, ein minimiales Set von Edits auf Strings\footnote{Wie in der Konstruktion zum Longest Common Subsequence Problem}: | ||
63 | |||
64 | \begin{defn}[Atomare edits of strings] | ||
65 | \begin{code} | ||
66 | data StringEdit pos char = Insert { _sePos :: pos, _seInsertion :: char } | ||
67 | | Delete { _sePos :: pos } | ||
56 | deriving (Eq, Ord, Show, Read) | 68 | deriving (Eq, Ord, Show, Read) |
57 | 69 | ||
70 | -- Automatically derive van-leerhoven-lenses: | ||
71 | -- | ||
72 | -- @sePos :: Lens' (StringEdits pos char) pos@ | ||
73 | -- @seInsertion :: Traversal' (StringEdits pos char) char@ | ||
58 | makeLenses ''StringEdit | 74 | makeLenses ''StringEdit |
75 | \end{code} | ||
76 | \end{defn} | ||
59 | 77 | ||
60 | data StringEdits char = StringEdits (Seq (StringEdit char)) | 78 | Atomare edits werden, als Liste, zu edits komponiert. |
61 | | SEFail | 79 | Wir führen einen speziellen edit ein, der nicht-Anwendbarkeit der edits repräsentiert: |
80 | \begin{code} | ||
81 | data StringEdits pos char = StringEdits (Seq (StringEdit pos char)) | ||
82 | | SEFail | ||
62 | deriving (Eq, Ord, Show, Read) | 83 | deriving (Eq, Ord, Show, Read) |
63 | 84 | ||
64 | makePrisms ''StringEdits | 85 | makePrisms ''StringEdits |
65 | 86 | ||
66 | stringEdits :: Traversal' (StringEdits char) (StringEdit char) | 87 | stringEdits :: Traversal (StringEdits pos char) (StringEdits pos' char') (StringEdit pos char) (StringEdit pos' char') |
88 | \end{code} | ||
89 | \begin{comment} | ||
90 | \begin{code} | ||
67 | stringEdits = _StringEdits . traverse | 91 | stringEdits = _StringEdits . traverse |
68 | 92 | \end{code} | |
69 | affected :: forall char. StringEdits char -> Maybe (Interval Natural) | 93 | \end{comment} |
70 | -- ^ For a given set of edits @es@ return the interval @i = a ... b@ such that for any given string @str@ of sufficient length the following holds: | 94 | \begin{code} |
71 | -- | 95 | insert :: pos -> char -> StringEdits pos char |
72 | -- - For all @n :: Natural@: @n < a ==> str ! n == (str `apply` es) ! n@ | 96 | \end{code} |
73 | -- - There exists a @k :: Integer@ such that for all @n :: Integer@: @n > b ==> str ! (n + k) == (str `apply` es) ! n@ | 97 | \begin{comment} |
74 | -- | 98 | \begin{code} |
75 | -- Intuitively: for any character @c@ of the new string @str `apply` es@ there exists a corresponding character in @str@ (offset by either 0 or a constant shift @k@) if the index of @c@ is /not/ contained in @affected es@. | ||
76 | affected SEFail = Nothing | ||
77 | affected (StringEdits es) = Just . toInterval $ go es Map.empty | ||
78 | where | ||
79 | toInterval :: Map Natural Integer -> Interval Natural | ||
80 | toInterval map | ||
81 | | Just (((minK, _), _), ((maxK, _), _)) <- (,) <$> Map.minViewWithKey map <*> Map.maxViewWithKey map | ||
82 | = let | ||
83 | maxV' = maximum . (0 :) $ do | ||
84 | offset <- [0..maxK] | ||
85 | v <- maybeToList $ Map.lookup (maxK - offset) map | ||
86 | v' <- maybeToList . fmap fromInteger $ negate v <$ guard (v <= 0) | ||
87 | guard $ v' >= succ offset | ||
88 | return $ v' - offset | ||
89 | in (minK Int.... maxK + maxV') | ||
90 | | otherwise | ||
91 | = Int.empty | ||
92 | go :: Seq (StringEdit char) -> Map Natural Integer -> Map Natural Integer | ||
93 | go Seq.Empty offsets = offsets | ||
94 | go (es :> e) offsets = go es offsets' | ||
95 | where | ||
96 | p = e ^. sePos | ||
97 | p' = fromIntegral $ Map.foldrWithKey (\k o p -> bool (fromIntegral p) (o + p) $ k < fromIntegral p) (fromIntegral p) offsets | ||
98 | offsets' = Map.alter (Just . myOffset . fromMaybe 0) p offsets | ||
99 | myOffset :: Integer -> Integer | ||
100 | myOffset | ||
101 | | Insert _ _ <- e = pred | ||
102 | | Delete _ <- e = succ | ||
103 | |||
104 | insert :: Natural -> char -> StringEdits char | ||
105 | insert n c = StringEdits . Seq.singleton $ Insert n c | 99 | insert n c = StringEdits . Seq.singleton $ Insert n c |
106 | 100 | \end{code} | |
107 | delete :: Natural -> StringEdits char | 101 | \end{comment} |
102 | \begin{code} | ||
103 | delete :: pos -> StringEdits pos char | ||
104 | \end{code} | ||
105 | \begin{comment} | ||
106 | \begin{code} | ||
108 | delete n = StringEdits . Seq.singleton $ Delete n | 107 | delete n = StringEdits . Seq.singleton $ Delete n |
109 | 108 | \end{code} | |
110 | replace :: Natural -> char -> StringEdits char | 109 | \end{comment} |
110 | \begin{code} | ||
111 | replace :: Eq pos => pos -> char -> StringEdits pos char | ||
112 | \end{code} | ||
113 | \begin{comment} | ||
114 | \begin{code} | ||
111 | replace n c = insert n c <> delete n | 115 | replace n c = insert n c <> delete n |
112 | 116 | ||
113 | instance Monoid (StringEdits char) where | 117 | -- | Rudimentarily optimize edit composition |
118 | instance Eq pos => Monoid (StringEdits pos char) where | ||
114 | mempty = StringEdits Seq.empty | 119 | mempty = StringEdits Seq.empty |
115 | SEFail `mappend` _ = SEFail | 120 | SEFail `mappend` _ = SEFail |
116 | _ `mappend` SEFail = SEFail | 121 | _ `mappend` SEFail = SEFail |
@@ -122,12 +127,16 @@ instance Monoid (StringEdits char) where | |||
122 | , n == n' | 127 | , n == n' |
123 | = StringEdits bs `mappend` StringEdits as | 128 | = StringEdits bs `mappend` StringEdits as |
124 | | otherwise = StringEdits $ x `mappend` y | 129 | | otherwise = StringEdits $ x `mappend` y |
130 | \end{code} | ||
131 | \end{comment} | ||
125 | 132 | ||
126 | instance Module (StringEdits char) where | 133 | Da wir ein minimales set an atomaren edits gewählt haben, ist die Definiton der Modulnstruktur über Strings des passenden Alphabets recht einfach: |
127 | type Domain (StringEdits char) = Seq char | 134 | \begin{code} |
135 | instance Module (StringEdits Natural char) where | ||
136 | type Domain (StringEdits Natural char) = Seq char | ||
128 | apply str SEFail = Nothing | 137 | apply str SEFail = Nothing |
129 | apply str (StringEdits Seq.Empty) = Just str | 138 | apply str (StringEdits Seq.Empty) = Just str |
130 | apply str (StringEdits (es :|> Insert n c)) = (flip apply) (StringEdits es) =<< go str n c | 139 | apply str (StringEdits (es :|> Insert n c)) = flip apply (StringEdits es) =<< go str n c |
131 | where | 140 | where |
132 | go Seq.Empty n c | 141 | go Seq.Empty n c |
133 | | n == 0 = Just $ Seq.singleton c | 142 | | n == 0 = Just $ Seq.singleton c |
@@ -135,7 +144,7 @@ instance Module (StringEdits char) where | |||
135 | go str@(x :<| xs) n c | 144 | go str@(x :<| xs) n c |
136 | | n == 0 = Just $ c <| str | 145 | | n == 0 = Just $ c <| str |
137 | | otherwise = (x <|) <$> go xs (pred n) c | 146 | | otherwise = (x <|) <$> go xs (pred n) c |
138 | apply str (StringEdits (es :|> Delete n)) = (flip apply) (StringEdits es) =<< go str n | 147 | apply str (StringEdits (es :|> Delete n)) = flip apply (StringEdits es) =<< go str n |
139 | where | 148 | where |
140 | go Seq.Empty _ = Nothing | 149 | go Seq.Empty _ = Nothing |
141 | go (x :<| xs) n | 150 | go (x :<| xs) n |
@@ -146,99 +155,128 @@ instance Module (StringEdits char) where | |||
146 | divInit = StringEdits . Seq.unfoldl go . (0,) | 155 | divInit = StringEdits . Seq.unfoldl go . (0,) |
147 | where | 156 | where |
148 | go (_, Seq.Empty) = Nothing | 157 | go (_, Seq.Empty) = Nothing |
149 | go (n, (c :<| cs)) = Just ((succ n, cs), Insert n c) | 158 | go (n, c :<| cs ) = Just ((succ n, cs), Insert n c) |
150 | 159 | ||
151 | \end{code} | 160 | \end{code} |
152 | 161 | ||
153 | % TODO Make notation mathy | 162 | % TODO Make notation mathy |
154 | 163 | ||
155 | Um zunächst eine asymmetrische edit-lens `StringEdits -> StringEdits` mit akzeptabler Komplexität für einen bestimmten `DFST s` (entlang der \emph{Richtung} des DFSTs) zu konstruieren möchten wir folgendes Verfahren anwenden: | 164 | Um zunächst eine asymmetrische edit-lens \texttt{StringEdits -> StringEdits} mit akzeptabler Komplexität für einen bestimmten DFST (entlang der \emph{Richtung} des DFSTs) zu konstruieren möchten wir folgendes Verfahren anwenden: |
156 | 165 | ||
157 | Gegeben eine Sequenz (`StringEdits`) von zu übersetzenden Änderungen genügt es die Übersetzung eines einzelnen `StringEdit`s in eine womöglich längere Sequenz von `StringEdits` anzugeben, alle `StringEdits` aus der Sequenz zu übersetzen (hierbei muss auf die korrekte Handhabung des Komplements geachtet werden) und jene Übersetzungen dann zu concatenieren. | 166 | Gegeben eine Sequenz von zu übersetzenden Änderungen genügt es die Übersetzung eines einzelnen \texttt{StringEdit}s in eine womöglich längere Sequenz von \texttt{StringEdits} anzugeben, alle \texttt{StringEdits} aus der Sequenz derart zu übersetzen (hierbei muss auf die korrekte Handhabung des Komplements geachtet werden) und jene Übersetzungen dann zu concatenieren. |
158 | 167 | ||
159 | Wir definieren zunächst die \emph{Wirkung} eines DFST auf einen festen String als eine Abbildung `state -> (state, String)`, die den aktuellen Zustand vorm Parsen des Strings auf den Zustand danach und die (womöglich leere) Ausgabe schickt. | 168 | Wir definieren zunächst die \emph{Wirkung} eines DFST auf einen festen String als eine Abbildung \texttt{state -> (Seq output, Maybe state)}, die den aktuellen Zustand vor dem Parsen des Strings auf den Zustand danach und die (womöglich leere) Ausgabe schickt. |
169 | Wir annotieren Wirkungen zudem mit dem konsumierten String. | ||
160 | Diese Wirkungen bilden einen Monoiden analog zu Endomorphismen, wobei die Resultat-Strings concateniert werden. | 170 | Diese Wirkungen bilden einen Monoiden analog zu Endomorphismen, wobei die Resultat-Strings concateniert werden. |
161 | 171 | ||
162 | Die Unterliegende Idee ist nun im Komplement der edit-lens eine Liste von Wirkungen (eine für jedes Zeichen der Eingabe des DFSTs) und einen Cache der monoidalen Summen aller kontinuirlichen Teillisten zu halten. | ||
163 | Da wir wissen welche Stelle im input-String von einem gegebenen edit betroffen ist können wir, anhand der Wirkung des Teilstücks bis zu jener Stelle, den output-String in einen durch den edit unveränderten Prefix und einen womöglich betroffenen Suffix unterteilen. | ||
164 | Die Wirkung ab der betroffenen Stelle im input-String können wir also Komposition der Wirkung der durch den edit betroffenen Stelle und derer aller Zeichen danach bestimmen. | ||
165 | Nun gilt es nur noch die Differenz (als `StringEdits`) des vorherigen Suffixes im output-String und des aus der gerade berechneten Wirkung Bestimmten zu bestimmen. | ||
166 | |||
167 | |||
168 | % Für die Rückrichtung bietet es sich an eine Art primitive Invertierung des DFSTs zu berechnen. | ||
169 | % Gegeben den aktuellen DFST $A$ möchten wir einen anderen $A^{-1}$ finden, sodass gilt: | ||
170 | |||
171 | % \begin{itemize} | ||
172 | % \item $A^{-1}$ akzeptiert einen String $s^{-1}$ (endet seinen Lauf in einem finalen Zustand) gdw. es einen String $s$ gibt, der unter $A$ die Ausgabe $s^{-1}$ produziert. | ||
173 | % \item Wenn $A^{-1}$ einen String $s^{-1}$ akzeptiert so produziert die resultierende Ausgabe $s$ unter $A$ die Ausgabe $s^{-1}$. | ||
174 | % \end{itemize} | ||
175 | |||
176 | % Kann nicht funktionieren, denn $A^{-1}$ ist notwendigerweise nondeterministisch. Wird $A^{-1}$ dann zu einem DFST forciert (durch arbiträre Wahl einer Transition pro Zustand) gehen Informationen verloren—$A^{-1}$ produziert nicht den minimale edit auf dem input string (in der Tat beliebig schlecht) für einen gegeben edit auf dem output string. | ||
177 | |||
178 | % Stelle im bisherigen Lauf isolieren, an der edit im output-string passieren soll, breitensuche auf pfaden, die sich von dieser stelle aus unterscheiden? | ||
179 | % Gegeben einen Pfad und eine markierte Transition, finde Liste aller Pfade aufsteigend sortiert nach Unterschied zu gegebenem Pfad, mit Unterschieden "nahe" der markierten Transition zuerst — zudem jeweils edit auf dem Eingabestring | ||
180 | % Einfacher ist Breitensuche ab `stInitial` und zunächst diff auf eingabe-strings. | ||
181 | |||
182 | \begin{code} | 172 | \begin{code} |
183 | |||
184 | data DFSTAction state input output = DFSTAction | 173 | data DFSTAction state input output = DFSTAction |
185 | { runDFSTAction :: state -> (state, Seq output) | 174 | { runDFSTAction :: state -> (Seq output, Maybe state) |
186 | , dfstaConsumes :: Seq input | 175 | , dfstaConsumes :: Seq input |
187 | } | 176 | } |
188 | 177 | ||
189 | instance Monoid (DFSTAction state input output) where | 178 | instance Monoid (DFSTAction state input output) where |
190 | mempty = DFSTAction (\x -> (x, Seq.empty)) Seq.empty | 179 | \end{code} |
180 | \begin{comment} | ||
181 | \begin{code} | ||
182 | mempty = DFSTAction (\x -> (Seq.empty, Just x)) Seq.empty | ||
191 | DFSTAction f cf `mappend` DFSTAction g cg = DFSTAction | 183 | DFSTAction f cf `mappend` DFSTAction g cg = DFSTAction |
192 | { runDFSTAction = \s -> let ((f -> (s', out')), out) = g s in (s', out <> out') | 184 | { runDFSTAction = \x -> |
185 | let (outG, x') = g x | ||
186 | (outF, x'') = maybe (mempty, Nothing) f x' | ||
187 | in (outG <> outF, x'') | ||
193 | , dfstaConsumes = cg <> cf | 188 | , dfstaConsumes = cg <> cf |
194 | } | 189 | } |
190 | \end{code} | ||
191 | \end{comment} | ||
192 | \begin{code} | ||
195 | 193 | ||
196 | type DFSTComplement state input output = Compositions (DFSTAction state input output) | 194 | type DFSTComplement state input output = Compositions (DFSTAction state input output) |
197 | 195 | ||
198 | runDFSTAction' :: DFSTComplement state input output -> state -> (state, Seq output) | 196 | runDFSTAction' :: DFSTComplement state input output -> state -> (Seq output, Maybe state) |
199 | runDFSTAction' = runDFSTAction . Comp.composed | 197 | runDFSTAction' = runDFSTAction . Comp.composed |
200 | 198 | ||
201 | dfstaConsumes' :: DFSTComplement state input output -> Seq input | 199 | dfstaConsumes' :: DFSTComplement state input output -> Seq input |
202 | dfstaConsumes' = dfstaConsumes . Comp.composed | 200 | dfstaConsumes' = dfstaConsumes . Comp.composed |
203 | 201 | ||
204 | dfstaProduces :: DFST state input output -> DFSTComplement state input output -> Seq output | 202 | dfstaProduces :: DFSTComplement state input output -> state -> Seq output |
205 | dfstaProduces DFST{..} = snd . flip runDFSTAction' stInitial | 203 | dfstaProduces = fmap fst . runDFSTAction' |
204 | \end{code} | ||
206 | 205 | ||
207 | type Debug state input output = (Show state, Show input, Show output) | 206 | Die Unterliegende Idee von $\Rrightarrow$ ist nun im Komplement der edit-lens eine Liste von Wirkungen (eine für jedes Zeichen der Eingabe des DFSTs) und einen Cache der monoidalen Summen aller kontinuirlichen Teillisten zu halten. |
208 | 207 | ||
209 | type LState state input output = (Natural, (state, Maybe (input, Natural))) | 208 | Wir können die alte DFST-Wirkung zunächst anhand des Intervalls indem der input-String von allen gegebenen edits betroffen ist in einen unveränderten Prefix und einen womöglich betroffenen Suffix unterteilen. |
209 | |||
210 | Da wir wissen welche Stelle im input-String vom ersten gegebenen edit betroffen ist können wir, anhand der Wirkung des Teilstücks bis zu jener Stelle, den betroffenen Suffix wiederum teilen. | ||
211 | Die Wirkung ab der betroffenen Stelle im input-String können wir als Komposition der Wirkung der durch den edit betroffenen Stelle und derer aller Zeichen danach bestimmen. | ||
212 | Nun gilt es nur noch die Differenz (als `StringEdits`) des vorherigen Suffixes im output-String und des aus der gerade berechneten Wirkung zu bestimmen, wir bedienen uns hierzu dem Unix Standard-Diff-Algorithmus zwischen der ursprünglichen Ausgabe und dem Ergebnis der Iteration des Verfahrens auf alle gegebenen edits. | ||
213 | |||
214 | Für die asymmetrische edit-lens entgegen der DFST-Richtung $\Lleftarrow$ verwenden wir Breitensuche über die Zustände des DFST innerhalb eines iterative vergrößerten Intervalls: | ||
215 | |||
216 | Wir bestimmen zunächst (`affected`) eine obere Schranke an das Intervall in dem der Ausgabe-String vom edit betroffen ist und generieren eine von dort quadratisch wachsende Serie von Intervallen. | ||
217 | |||
218 | Für jedes Intervall ("lokalere" Änderungen werden präferiert) schränken wir zunächst den DFST (zur einfachereren Implementierung in seiner Darstellung als FST) vermöge \texttt{restrictOutput} derart ein, dass nur die gewünschte Ausgabe produziert werden kann. | ||
210 | 219 | ||
211 | dfstLens :: forall state input output. (Ord state, Ord input, Ord output, Debug state input output) => DFST state input output -> EditLens (DFSTComplement state input output) (StringEdits input) (StringEdits output) | 220 | Wir betrachten dann in jedem Schritt (beginnend mit dem initialen Zustand des DFST) alle ausgehenden Transitionen und ziehen hierbei jene vor, die im vorherigen Lauf (gespeichert im Komplement der edit-lens), ebenfalls genommen wurden. |
221 | Abweichungen vom im Komplement gespeicherten Lauf lassen wir nur innerhalb des betrachteten Intervalls zu und wählen in diesem Fall einen Edit auf der Eingabe, der die gewählte Abweichung produziert. | ||
222 | Es wird zudem, wie für Breitensuche üblich, jeder besuchte Zustand markiert und ausgehende Transitionen nicht ein zweites mal betrachtet. | ||
223 | |||
224 | Erreichen wir einen finalen Zustand (wegen der Einschränkung des DFSTs wurde dann auch genau die gewünschte Ausgabe produziert), so fügen wir an die gesammelten Eingabe-edits eine Serie von deletions an, die den noch nicht konsumierten suffix der Eingabe verwerfen und brechen die Suche unter Rückgabe der Eingabe-edits und des neuen Laufs ab. | ||
225 | |||
226 | In Haskell formulieren wir das vorzeitige Abbrechen der Suche indem wir eine vollständige Liste von Rückgabe-Kandidaten konstruieren und dann immer ihr erstes Element zurück geben. | ||
227 | Wegen der verzögerten Auswertungsstrategie von Haskell wird auch tatsächlich nur der erste Rückgabe-Kandidat konstruiert. | ||
228 | |||
229 | \begin{comment} | ||
230 | \begin{code} | ||
231 | type LState state input output = (Natural, (state, Maybe (input, Natural))) | ||
232 | \end{code} | ||
233 | \end{comment} | ||
234 | \begin{code} | ||
235 | dfstLens :: forall state input output. (Ord state, Ord input, Ord output, Show state, Show input, Show output) => DFST state input output -> EditLens (DFSTComplement state input output) (StringEdits Natural input) (StringEdits Natural output) | ||
236 | \end{code} | ||
237 | \begin{comment} | ||
238 | \begin{code} | ||
212 | dfstLens dfst@DFST{..} = EditLens ground propR propL | 239 | dfstLens dfst@DFST{..} = EditLens ground propR propL |
213 | where | 240 | where |
214 | ground :: DFSTComplement state input output | 241 | ground :: DFSTComplement state input output |
215 | ground = Comp.fromList [] | 242 | ground = mempty |
216 | 243 | ||
217 | propR :: (DFSTComplement state input output, StringEdits input) -> (DFSTComplement state input output, StringEdits output) | 244 | propR :: (DFSTComplement state input output, StringEdits Natural input) -> (DFSTComplement state input output, StringEdits Natural output) |
218 | propR (c, SEFail) = (c, SEFail) | 245 | propR (c, SEFail) = (c, SEFail) |
219 | propR (c, StringEdits Seq.Empty) = (c, mempty) | 246 | propR (c, StringEdits Seq.Empty) = (c, mempty) |
220 | propR (c, StringEdits (es :> e)) | 247 | propR (c, es'@(StringEdits (es :> e))) |
221 | | fst (runDFSTAction' c' stInitial) `Set.member` stAccept = (c', es' <> es'') | 248 | | (_, Just final) <- runDFSTAction' c' stInitial |
222 | | otherwise = (c', SEFail) | 249 | , final `Set.member` stAccept |
250 | = (c', rEs) | ||
251 | | otherwise | ||
252 | = (c, SEFail) | ||
223 | where | 253 | where |
254 | Just int = affected es' | ||
255 | (cAffSuffix, cAffPrefix) = Comp.splitAt (Comp.length c - fromIntegral (Int.inf int)) c | ||
224 | (cSuffix, cPrefix) = Comp.splitAt (Comp.length c - (e ^. sePos . from enum)) c | 256 | (cSuffix, cPrefix) = Comp.splitAt (Comp.length c - (e ^. sePos . from enum)) c |
225 | cSuffix' | 257 | cSuffix' |
226 | | Delete _ <- e = Comp.take (pred $ Comp.length cSuffix) cSuffix -- TODO unsafe | 258 | | Delete _ <- e |
259 | , Comp.length cSuffix > 0 = Comp.take (pred $ Comp.length cSuffix) cSuffix | ||
227 | | Insert _ nChar <- e = cSuffix <> Comp.singleton (DFSTAction (\x -> runDFST' dfst x (pure nChar) Seq.empty) (Seq.singleton nChar)) | 260 | | Insert _ nChar <- e = cSuffix <> Comp.singleton (DFSTAction (\x -> runDFST' dfst x (pure nChar) Seq.empty) (Seq.singleton nChar)) |
228 | (pState, pOutput) = runDFSTAction' cPrefix stInitial | 261 | | otherwise = Comp.singleton $ DFSTAction (\_ -> (Seq.empty, Nothing)) Seq.empty |
229 | (_, sOutput ) = runDFSTAction' cSuffix pState | 262 | (c', _) = propR (cSuffix' <> cPrefix, StringEdits es) |
230 | (_, sOutput') = runDFSTAction' cSuffix' pState | 263 | (cAffSuffix', _) = Comp.splitAt (Comp.length c' - Comp.length cAffPrefix) c' |
231 | (c', es') = propR (cSuffix' <> cPrefix, StringEdits es) | 264 | (_, Just pFinal) = runDFSTAction' cPrefix stInitial |
232 | es'' = strDiff sOutput sOutput' & stringEdits . sePos . from enum +~ Seq.length pOutput | 265 | rEs = strDiff (fst $ runDFSTAction' cAffSuffix pFinal) (fst $ runDFSTAction' cAffSuffix' pFinal) & stringEdits . sePos . from enum +~ length (dfstaProduces cAffPrefix stInitial) |
233 | 266 | ||
234 | 267 | ||
235 | propL :: (DFSTComplement state input output, StringEdits output) -> (DFSTComplement state input output, StringEdits input) | 268 | propL :: (DFSTComplement state input output, StringEdits Natural output) -> (DFSTComplement state input output, StringEdits Natural input) |
236 | propL (c, StringEdits Seq.Empty) = (c, mempty) | 269 | propL (c, StringEdits Seq.Empty) = (c, mempty) |
237 | propL (c, es) = fromMaybe (c, SEFail) $ do | 270 | propL (c, es) = fromMaybe (c, SEFail) $ do |
271 | let prevOut = dfstaProduces c stInitial | ||
238 | newOut <- prevOut `apply` es | 272 | newOut <- prevOut `apply` es |
239 | affected' <- affected es | 273 | affected' <- affected es |
240 | let outFST :: FST (LState state input output) input output | 274 | let outFST :: FST (LState state input output) input output |
241 | outFST = wordFST newOut `productFST` toFST dfst | 275 | -- outFST = wordFST newOut `productFST` toFST dfst |
276 | outFST = restrictOutput newOut $ toFST dfst | ||
277 | |||
278 | trace x y = flip seq y . unsafePerformIO $ appendFile "lens.log" (x <> "\n\n") | ||
279 | |||
242 | inflate by int | 280 | inflate by int |
243 | | Int.null int = Int.empty | 281 | | Int.null int = Int.empty |
244 | | inf >= by = inf - by Int.... sup + by | 282 | | inf >= by = inf - by Int.... sup + by |
@@ -251,53 +289,90 @@ dfstLens dfst@DFST{..} = EditLens ground propR propL | |||
251 | max = fromIntegral $ Seq.length newOut | 289 | max = fromIntegral $ Seq.length newOut |
252 | all = 0 Int.... max | 290 | all = 0 Int.... max |
253 | runCandidates :: Interval Natural -- ^ Departure from complement-run only permitted within interval (to guarantee locality) | 291 | runCandidates :: Interval Natural -- ^ Departure from complement-run only permitted within interval (to guarantee locality) |
254 | -> [ ( Seq (LState state input output, Maybe output) -- ^ Computed run | 292 | -> [ ( Seq (LState state input output, Maybe output) -- ^ Computed run |
255 | , StringEdits input | 293 | , StringEdits Natural input |
256 | , DFSTComplement state input output | 294 | , DFSTComplement state input output |
257 | ) | 295 | ) |
258 | ] | 296 | ] |
259 | runCandidates focus = continueRun (Seq.empty, mempty) (c, mempty) 0 | 297 | runCandidates focus = map ((,,) <$> view _1 <*> view _2 <*> view (_3 . _2)) $ go Set.empty [(Seq.empty, mempty, (c, mempty), 0)] |
260 | where | 298 | where |
261 | continueRun :: (Seq (LState state input output, Maybe output), StringEdits input) | 299 | go _ [] = [] |
300 | go visited (args@(run, edits, compZipper, inP) : alts) = | ||
301 | [ (run', finalizeEdits remC inP' edits', compZipper', inP') | (run', edits', compZipper'@(remC, _), inP') <- args : conts, isFinal run' ] | ||
302 | ++ go visited' (alts ++ conts) | ||
303 | where | ||
304 | conts | ||
305 | | lastSt <- view _1 <$> Seq.lookup (pred $ Seq.length run) run | ||
306 | , lastSt `Set.member` visited = [] | ||
307 | | otherwise = continueRun edits compZipper inP run | ||
308 | visited' = Set.insert (view _1 <$> Seq.lookup (pred $ Seq.length run) run) visited | ||
309 | |||
310 | isFinal :: Seq (LState state input output, Maybe output) -> Bool | ||
311 | -- ^ Is the final state of the run a final state of the DFST? | ||
312 | isFinal Seq.Empty = (0, (stInitial, Nothing)) `Set.member` FST.stAccept outFST | ||
313 | && (0 Int.... fromIntegral (Seq.length newOut)) `Int.isSubsetOf` focus | ||
314 | isFinal (_ :> (lastSt, _)) = lastSt `Set.member` FST.stAccept outFST | ||
315 | |||
316 | finalizeEdits :: DFSTComplement state input output -- ^ Remaining complement | ||
317 | -> Natural -- ^ Input position | ||
318 | -> StringEdits Natural input -> StringEdits Natural input | ||
319 | finalizeEdits remC inP = mappend . mconcat . replicate (Seq.length $ dfstaConsumes' remC) $ delete inP | ||
320 | |||
321 | continueRun :: StringEdits Natural input | ||
262 | -> (DFSTComplement state input output, DFSTComplement state input output) -- ^ Zipper into complement | 322 | -> (DFSTComplement state input output, DFSTComplement state input output) -- ^ Zipper into complement |
263 | -> Natural -- ^ Input position | 323 | -> Natural -- ^ Input position |
264 | -> [(Seq (LState state input output, Maybe output), StringEdits input, DFSTComplement state input output)] | 324 | -> Seq (LState state input output, Maybe output) |
265 | continueRun (run, inEdits) (c', remC) inP = do | 325 | -> [ ( Seq (LState state input output, Maybe output) |
326 | , StringEdits Natural input | ||
327 | , (DFSTComplement state input output, DFSTComplement state input output) | ||
328 | , Natural | ||
329 | ) | ||
330 | ] | ||
331 | -- ^ Nondeterministically make a single further step, continueing a given run | ||
332 | continueRun inEdits (c', remC) inP run = do | ||
266 | let | 333 | let |
267 | pos :: Natural | 334 | pos :: Natural |
268 | pos = fromIntegral $ Comp.length c - Comp.length c' | 335 | -- pos = fromIntegral $ Comp.length c - Comp.length c' -- FIXME: should use length of dfstaProduces |
336 | pos = fromIntegral . Seq.length $ dfstaProduces remC stInitial | ||
269 | (c'', step) = Comp.splitAt (pred $ Comp.length c') c' -- TODO: unsafe? | 337 | (c'', step) = Comp.splitAt (pred $ Comp.length c') c' -- TODO: unsafe? |
270 | current :: LState state input output | 338 | current :: LState state input output |
271 | current | 339 | current |
272 | | Seq.Empty <- run = (0, (stInitial, Nothing)) | 340 | | Seq.Empty <- run = (0, (stInitial, Nothing)) |
273 | | (_ :> (st, _)) <- run = st | 341 | | (_ :> (st, _)) <- run = st |
274 | current' :: state | 342 | current' :: state |
275 | current' = let (_, (st, _)) = current | ||
276 | in st | ||
277 | next' :: state | ||
278 | next' = fst . runDFSTAction' step $ current' | ||
279 | oldIn :: Maybe input | 343 | oldIn :: Maybe input |
280 | oldIn = Seq.lookup 0 $ dfstaConsumes' step | 344 | (current', oldIn) |
345 | | (_ :> ((_, (st, _)), _)) <- rest | ||
346 | , (_ :> ((_, (_, Just (partialIn, _))), _)) <- partial = (st, Just partialIn) | ||
347 | | (_ :> ((_, (_, Just (partialIn, _))), _)) <- partial = (stInitial, Just partialIn) | ||
348 | | Seq.Empty <- rest = (stInitial, Seq.lookup 0 $ dfstaConsumes' step) | ||
349 | | (_ :> ((_, (st, _)), _)) <- rest = (st, Seq.lookup 0 $ dfstaConsumes' step) | ||
350 | where | ||
351 | (partial, rest) = Seq.spanr (\((_, (_, inp)), _) -> isJust inp) run | ||
352 | next' <- trace (show ("next'", pos, focus, run, (current', oldIn), current, dfstaConsumes' step, runDFST' dfst current' (maybe Seq.empty Seq.singleton oldIn) Seq.empty)) . maybeToList . snd $ runDFST' dfst current' (maybe Seq.empty Seq.singleton oldIn) Seq.empty | ||
353 | let | ||
281 | outgoing :: LState state input output -> [(LState state input output, Maybe input, Maybe output)] | 354 | outgoing :: LState state input output -> [(LState state input output, Maybe input, Maybe output)] |
282 | outgoing current = let go (st, minS) os acc | 355 | outgoing current = let go (st, minS) outs acc |
283 | | st == current = ($ acc) $ Set.fold (\(st', moutS) -> (. ((st', minS, moutS) :))) id os | 356 | | st == current = Set.foldr (\(st', moutS) -> ((st', minS, moutS) :)) acc outs |
284 | | otherwise = acc | 357 | | otherwise = acc |
285 | in Map.foldrWithKey go [] $ FST.stTransition outFST | 358 | in Map.foldrWithKey go [] $ FST.stTransition outFST |
286 | isPreferred :: (LState state input output, Maybe input, Maybe output) -> Bool | 359 | isPreferred :: (LState state input output, Maybe input, Maybe output) -> Bool |
287 | isPreferred ((_, (st, Nothing)), inS, _) = st == next' && (fromMaybe True $ (==) <$> oldIn <*> inS) | 360 | isPreferred ((_, (st, Nothing)), _, _) = st == next' |
288 | isPreferred (st, _, _) = any isPreferred $ outgoing st -- By construction of `outFST`, `outgoing st` is a singleton | 361 | isPreferred (st@(_, (_, Just (inS , _))), _, _) = maybe True (== inS) oldIn && any isPreferred (outgoing st) -- By construction of `outFST`, `outgoing st` is a singleton in this case |
289 | (preferred, alternate) = partition isPreferred $ outgoing current | 362 | (preferred, alternate) = partition isPreferred $ outgoing current |
290 | assocEdit :: (LState state input output, Maybe input, Maybe output) -- ^ Transition | 363 | assocEdit :: (LState state input output, Maybe input, Maybe output) -- ^ Transition |
291 | -> [ ( (DFSTComplement state input output, DFSTComplement state input output) -- ^ new `(c', remC)`, i.e. complement-zipper `(c', remC)` but with edit applied | 364 | -> [ ( (DFSTComplement state input output, DFSTComplement state input output) -- ^ new `(c', remC)`, i.e. complement-zipper `(c', remC)` but with edit applied |
292 | , StringEdits input | 365 | , StringEdits Natural input |
293 | , Natural | 366 | , Natural |
294 | ) | 367 | ) |
295 | ] | 368 | ] |
296 | assocEdit (_, Just inS, _) | 369 | assocEdit (_, Just inS, _) |
297 | | oldIn == Just inS = [((c'', step <> remC), mempty, succ inP)] | 370 | | oldIn == Just inS = [ ((c'', step <> remC), mempty, succ inP) ] |
298 | | isJust oldIn = [((c'', altStep inS <> remC), replace inP inS, succ inP), ((c', altStep inS <> remC), insert inP inS, succ inP)] | 371 | | isJust oldIn = [ ((c', altStep inS <> remC), insert inP inS, succ inP) |
299 | | otherwise = [((c', altStep inS <> remC), insert inP inS, succ inP)] | 372 | , ((c'', altStep inS <> remC), replace inP inS, succ inP) |
300 | assocEdit (_, Nothing, _) = [((c', remC), mempty, inP)] -- TODO: is this correct? | 373 | ] |
374 | | otherwise = [ ((c', altStep inS <> remC), insert inP inS, succ inP) ] | ||
375 | assocEdit (_, Nothing, _) = [((c', remC), mempty, inP)] | ||
301 | altStep :: input -> DFSTComplement state input output | 376 | altStep :: input -> DFSTComplement state input output |
302 | altStep inS = Comp.singleton DFSTAction{..} | 377 | altStep inS = Comp.singleton DFSTAction{..} |
303 | where | 378 | where |
@@ -306,7 +381,7 @@ dfstLens dfst@DFST{..} = EditLens ground propR propL | |||
306 | options | 381 | options |
307 | | pos `Int.member` focus = preferred ++ alternate | 382 | | pos `Int.member` focus = preferred ++ alternate |
308 | | otherwise = preferred | 383 | | otherwise = preferred |
309 | choice@(next, inS, outS) <- options | 384 | choice@(next, inS, outS) <- trace (unlines $ show (pretty outFST) : map show options) options |
310 | ((c3, remC'), inEdits', inP') <- assocEdit choice | 385 | ((c3, remC'), inEdits', inP') <- assocEdit choice |
311 | -- let | 386 | -- let |
312 | -- -- | Replace prefix of old complement to reflect current candidate | 387 | -- -- | Replace prefix of old complement to reflect current candidate |
@@ -317,27 +392,70 @@ dfstLens dfst@DFST{..} = EditLens ground propR propL | |||
317 | -- fin | 392 | -- fin |
318 | -- | (trans, inEs, newComplement) <- acc = (trans, dropSuffix <> inEs, newComplement) | 393 | -- | (trans, inEs, newComplement) <- acc = (trans, dropSuffix <> inEs, newComplement) |
319 | let | 394 | let |
320 | acc = (run :> (next, outS), inEdits' <> inEdits) | 395 | trans = run :> (next, outS) |
321 | dropSuffix = mconcat (replicate (Seq.length $ dfstaConsumes' c3) $ delete inP') | 396 | inEs = inEdits' <> inEdits |
322 | fin | 397 | -- dropSuffix = mconcat (replicate (Seq.length $ dfstaConsumes' c3) $ delete inP') |
323 | | (trans, inEs) <- acc = (trans, dropSuffix <> inEs, remC') | 398 | -- fin |
324 | bool id (fin :) (next `Set.member` FST.stAccept outFST) $ continueRun acc (c3, remC') inP' | 399 | -- | (trans, inEs) <- acc = (trans, dropSuffix <> inEs, remC') |
400 | -- bool id (over _BFS $ cons fin) (next `Set.member` FST.stAccept outFST) $ continueRun acc (c3, remC') inP' | ||
401 | return (trans, inEs, (c3, remC'), inP') | ||
325 | 402 | ||
326 | -- Properties of the edits computed are determined mostly by the order candidates are generated below | 403 | -- Properties of the edits computed are determined mostly by the order candidates are generated below |
327 | -- (_, inEs, c') <- (\xs -> foldr (\x f -> x `seq` f) listToMaybe xs $ xs) $ traceShowId fragmentIntervals >>= (\x -> (\y@(y1, y2, _) -> traceShow (y1, y2) y) <$> runCandidates x) | 404 | -- (_, inEs, c') <- (\xs -> foldr (\x f -> x `seq` f) listToMaybe xs $ xs) $ traceShowId fragmentIntervals >>= (\x -> (\y@(y1, y2, _) -> traceShow (y1, y2) y) <$> runCandidates x) |
328 | 405 | ||
329 | (_, inEs, c') <- listToMaybe $ runCandidates =<< fragmentIntervals | 406 | fmap ((,) <$> view _3 <*> view _2) . listToMaybe $ runCandidates =<< fragmentIntervals |
330 | |||
331 | return (c', inEs) | ||
332 | where | ||
333 | (_, prevOut) = runDFSTAction' c stInitial | ||
334 | 407 | ||
335 | strDiff :: forall sym. Eq sym => Seq sym -> Seq sym -> StringEdits sym | 408 | strDiff :: forall sym pos. (Eq sym, Integral pos) => Seq sym -> Seq sym -> StringEdits pos sym |
336 | -- ^ @strDiff a b@ calculates a set of edits, which, when applied to @a@, produce @b@ | 409 | -- ^ @strDiff a b@ calculates a set of edits, which, when applied to @a@, produce @b@ |
337 | strDiff a b = snd . foldr toEdit (0, mempty) $ (getDiff `on` toList) a b | 410 | strDiff a b = snd . foldl toEdit (0, mempty) $ (getDiff `on` toList) a b |
411 | where | ||
412 | toEdit :: (pos, StringEdits pos sym) -> Diff sym -> (pos, StringEdits pos sym) | ||
413 | toEdit (n, es) (Diff.Both _ _) = (succ n, es) | ||
414 | toEdit (n, es) (Diff.First _ ) = (n, delete n <> es) | ||
415 | toEdit (n, es) (Diff.Second c) = (succ n, insert n c <> es) | ||
416 | \end{code} | ||
417 | \end{comment} | ||
418 | |||
419 | Um eine obere Schranke an das von einer Serie von edits betroffene Intervall zu bestimmen ordnen wir zunächst jeder von mindestens einem atomaren edit betroffenen Position $n$ im Eingabe-Wort einen $\text{offset}_n = \text{\# deletions} - \text{\# inserts}$ zu. | ||
420 | Das gesuchte Intervall ist nun $(\text{minK}, \text{maxK})$, mit $\text{minK}$ der Position im Eingabe-Wort mit niedrigstem $\text{offset}$ und $\text{maxK}$ die Position im Eingabe-Wort mit höchstem $\text{offset}$, $\text{maxK}^\prime$, modifiziert um das Maximum aus $\{ 0 \} \cup \{ \text{maxK}_n \colon n \in \{ 0 \ldots \text{maxK}^\prime \} \}$ wobei $\text{maxK}_n = -1 \cdot (n + \text{offset}_n)$ an Position $n$ ist. | ||
421 | |||
422 | \begin{code} | ||
423 | affected :: forall char. StringEdits Natural char -> Maybe (Interval Natural) | ||
424 | -- ^ For a given set of edits @es@ return the interval @i = a ... b@ such that for any given string @str@ of sufficient length the following holds: | ||
425 | -- | ||
426 | -- - For all @n :: Natural@: @n < a ==> str ! n == (str `apply` es) ! n@ | ||
427 | -- - There exists a @k :: Integer@ such that for all @n :: Integer@: @n > b ==> str ! (n + k) == (str `apply` es) ! n@ | ||
428 | -- | ||
429 | -- Intuitively: for any character @c@ of the new string @str `apply` es@ there exists a corresponding character in @str@ (offset by either 0 or a constant shift @k@) if the index of @c@ is /not/ contained in @affected es@. | ||
430 | \end{code} | ||
431 | \begin{comment} | ||
432 | \begin{code} | ||
433 | affected SEFail = Nothing | ||
434 | affected (StringEdits es) = Just . toInterval $ go es Map.empty | ||
338 | where | 435 | where |
339 | toEdit :: Diff sym -> (Natural, StringEdits sym) -> (Natural, StringEdits sym) | 436 | toInterval :: Map Natural Integer -> Interval Natural |
340 | toEdit (Diff.Both _ _) (n, es) = (succ n, es) | 437 | toInterval map |
341 | toEdit (Diff.First _ ) (n, es) = (n, delete n <> es) | 438 | | Just (((minK, _), _), ((maxK, _), _)) <- (,) <$> Map.minViewWithKey map <*> Map.maxViewWithKey map |
342 | toEdit (Diff.Second c) (n, es) = (succ n, insert n c <> es) | 439 | = let |
440 | maxV' = maximum . (0 :) $ do | ||
441 | offset <- [0..maxK] | ||
442 | v <- maybeToList $ Map.lookup (maxK - offset) map | ||
443 | v' <- maybeToList . fmap fromInteger $ negate v <$ guard (v <= 0) | ||
444 | guard $ v' >= succ offset | ||
445 | return $ v' - offset | ||
446 | in (minK Int.... maxK + maxV') | ||
447 | | otherwise | ||
448 | = Int.empty | ||
449 | go :: Seq (StringEdit Natural char) -> Map Natural Integer -> Map Natural Integer | ||
450 | go Seq.Empty offsets = offsets | ||
451 | go (es :> e) offsets = go es offsets' | ||
452 | where | ||
453 | p = e ^. sePos | ||
454 | -- p' = fromIntegral $ Map.foldrWithKey (\k o p -> bool (fromIntegral p) (o + p) $ k < fromIntegral p) (fromIntegral p) offsets | ||
455 | offsets' = Map.alter (Just . myOffset . fromMaybe 0) p offsets | ||
456 | myOffset :: Integer -> Integer | ||
457 | myOffset | ||
458 | | Insert _ _ <- e = pred | ||
459 | | Delete _ <- e = succ | ||
343 | \end{code} | 460 | \end{code} |
461 | \end{comment} | ||