Much ado about nothing

author: Gregor Kleen <gkleen@yggdrasil.li> 2018-12-18 13:51:16 +0100
committer: Gregor Kleen <gkleen@yggdrasil.li> 2018-12-18 13:51:16 +0100
commit: 46ae60eaca841b554ba20c6a2b7a15b43c12b4df (patch)
tree: 0bb06127a0e08e75f8be755f5a5dfb1702b627b6 /edit-lens/src/Control/DFST/Lens.lhs
parent: b0b18979d5ccd109d5a56937396acdeb85c857aa (diff)
download: incremental-dfsts-46ae60eaca841b554ba20c6a2b7a15b43c12b4df.tar
incremental-dfsts-46ae60eaca841b554ba20c6a2b7a15b43c12b4df.tar.gz
incremental-dfsts-46ae60eaca841b554ba20c6a2b7a15b43c12b4df.tar.bz2
incremental-dfsts-46ae60eaca841b554ba20c6a2b7a15b43c12b4df.tar.xz
incremental-dfsts-46ae60eaca841b554ba20c6a2b7a15b43c12b4df.zip
1 files changed, 265 insertions, 147 deletions
diff --git a/edit-lens/src/Control/DFST/Lens.lhs b/edit-lens/src/Control/DFST/Lens.lhs
index 95be34e..fe33bd6 100644
--- a/edit-lens/src/Control/DFST/Lens.lhs
+++ b/edit-lens/src/Control/DFST/Lens.lhs
@@ -1,12 +1,14 @@
+\begin{comment}
 \begin{code}
 {-# LANGUAGE ScopedTypeVariables
           , TemplateHaskell
           , ConstraintKinds
+           , GeneralizedNewtypeDeriving
 #-}
 module Control.DFST.Lens
-  ( StringEdit(..)
+  ( StringEdit(..), sePos, seInsertion
-  , StringEdits(..)
+  , StringEdits(..), _StringEdits, _SEFail, stringEdits
  , insert, delete, replace
  , DFSTAction(..), DFSTComplement
  , dfstLens
@@ -16,7 +18,7 @@ module Control.DFST.Lens
 import Control.DFST
 import Control.FST hiding (stInitial, stTransition, stAccept)
-import qualified Control.FST as FST (stInitial, stTransition, stAccept)
+import qualified Control.FST as FST (stInitial, stTransition, stAccept, step)
 import Control.Lens.Edit
 import Control.Lens
 import Control.Lens.TH
@@ -32,11 +34,11 @@ import Data.Sequence (Seq((:<|), (:|>)))
 import qualified Data.Sequence as Seq
 import Data.Set (Set)
 import qualified Data.Set as Set
-import Data.Map.Strict (Map)
+import Data.Map.Lazy (Map)
-import qualified Data.Map.Strict as Map
+import qualified Data.Map.Lazy as Map
-import Data.Compositions.Snoc (Compositions)
+import Data.Compositions (Compositions)
-import qualified Data.Compositions.Snoc as Comp
+import qualified Data.Compositions as Comp
 import Data.Algorithm.Diff (Diff, getDiff)
 import qualified Data.Algorithm.Diff as Diff
@@ -48,69 +50,72 @@ import Data.Function (on)
 import Data.Foldable (toList)
 import Data.List (partition)
-import Debug.Trace
+import Control.Exception (assert)
+import System.IO.Unsafe
+import Text.PrettyPrint.Leijen (Pretty(..))
-data StringEdit char = Insert { _sePos :: Natural, _seInsertion :: char }
+\end{code}
-                     | Delete { _sePos :: Natural }
+\end{comment}
+Wir betrachten, zur Einfachheit, ein minimiales Set von Edits auf Strings\footnote{Wie in der Konstruktion zum Longest Common Subsequence Problem}:
+\begin{defn}[Atomare edits of strings]
+\begin{code}
+data StringEdit pos char = Insert { _sePos :: pos, _seInsertion :: char }
+                         | Delete { _sePos :: pos }
  deriving (Eq, Ord, Show, Read)
+-- Automatically derive van-leerhoven-lenses:
+--
+-- @sePos :: Lens' (StringEdits pos char) pos@
+-- @seInsertion :: Traversal' (StringEdits pos char) char@
 makeLenses ''StringEdit
+\end{code}
+\end{defn}
-data StringEdits char = StringEdits (Seq (StringEdit char))
+Atomare edits werden, als Liste, zu edits komponiert.
-                      | SEFail
+Wir führen einen speziellen edit ein, der nicht-Anwendbarkeit der edits repräsentiert:
+\begin{code}
+data StringEdits pos char = StringEdits (Seq (StringEdit pos char))
+                          | SEFail
  deriving (Eq, Ord, Show, Read)
 makePrisms ''StringEdits
-stringEdits :: Traversal' (StringEdits char) (StringEdit char)
+stringEdits :: Traversal (StringEdits pos char) (StringEdits pos' char') (StringEdit pos char) (StringEdit pos' char')
+\end{code}
+\begin{comment}
+\begin{code}
 stringEdits = _StringEdits . traverse
+\end{code}
-affected :: forall char. StringEdits char -> Maybe (Interval Natural)
+\end{comment}
-- ^ For a given set of edits @es@ return the interval @i = a ... b@ such that for any given string @str@ of sufficient length the following holds:
+\begin{code}
--
+insert :: pos -> char -> StringEdits pos char
--   - For all @n :: Natural@: @n < a ==> str ! n == (str `apply` es) ! n@
+\end{code}
--   - There exists a @k :: Integer@ such that for all @n :: Integer@: @n > b ==> str ! (n + k) == (str `apply` es) ! n@
+\begin{comment}
--
+\begin{code}
-- Intuitively: for any character @c@ of the new string @str `apply` es@ there exists a corresponding character in @str@ (offset by either 0 or a constant shift @k@) if the index of @c@ is /not/ contained in @affected es@.
-affected SEFail = Nothing
-affected (StringEdits es) = Just . toInterval $ go es Map.empty
-  where
-    toInterval :: Map Natural Integer -> Interval Natural
-    toInterval map
-      | Just (((minK, _), _), ((maxK, _), _)) <- (,) <$> Map.minViewWithKey map <*> Map.maxViewWithKey map
-      = let
-          maxV' = maximum . (0 :) $ do
-            offset <- [0..maxK]
-            v <- maybeToList $ Map.lookup (maxK - offset) map
-            v' <- maybeToList . fmap fromInteger $ negate v <$ guard (v <= 0)
-            guard $ v' >= succ offset
-            return $ v' - offset
-        in (minK Int.... maxK + maxV')
-      | otherwise
-      = Int.empty
-    go :: Seq (StringEdit char) -> Map Natural Integer -> Map Natural Integer
-    go Seq.Empty offsets = offsets
-    go (es :> e) offsets = go es offsets'
-      where
-        p = e ^. sePos
-        p' = fromIntegral $ Map.foldrWithKey (\k o p -> bool (fromIntegral p) (o + p) $ k < fromIntegral p) (fromIntegral p) offsets
-        offsets' = Map.alter (Just . myOffset . fromMaybe 0) p offsets
-        myOffset :: Integer -> Integer
-        myOffset
-          | Insert _ _ <- e = pred
-          | Delete _   <- e = succ
-insert :: Natural -> char -> StringEdits char
 insert n c = StringEdits .  Seq.singleton $ Insert n c
-  
+\end{code}
-delete :: Natural -> StringEdits char
+\end{comment}
+\begin{code}
+delete :: pos -> StringEdits pos char
+\end{code}
+\begin{comment}
+\begin{code}
 delete n = StringEdits .  Seq.singleton $ Delete n
+\end{code}
-replace :: Natural -> char -> StringEdits char
+\end{comment}
+\begin{code}
+replace :: Eq pos => pos -> char -> StringEdits pos char
+\end{code}
+\begin{comment}
+\begin{code}
 replace n c = insert n c <> delete n
-instance Monoid (StringEdits char) where
+-- | Rudimentarily optimize edit composition
+instance Eq pos => Monoid (StringEdits pos char) where
  mempty = StringEdits Seq.empty
  SEFail `mappend` _ = SEFail
  _ `mappend` SEFail = SEFail
@@ -122,12 +127,16 @@ instance Monoid (StringEdits char) where
    , n == n'
    = StringEdits bs `mappend` StringEdits as
    | otherwise = StringEdits $ x `mappend` y
+\end{code}
+\end{comment}
-instance Module (StringEdits char) where
+Da wir ein minimales set an atomaren edits gewählt haben, ist die Definiton der Modulnstruktur über Strings des passenden Alphabets recht einfach:
-  type Domain (StringEdits char) = Seq char
+\begin{code}
+instance Module (StringEdits Natural char) where
+  type Domain (StringEdits Natural char) = Seq char
  apply str SEFail = Nothing
  apply str (StringEdits Seq.Empty) = Just str
-  apply str (StringEdits (es :|> Insert n c)) = (flip apply) (StringEdits es) =<< go str n c
+  apply str (StringEdits (es :|> Insert n c)) = flip apply (StringEdits es) =<< go str n c
    where
      go Seq.Empty n c
        | n == 0 = Just $ Seq.singleton c
@@ -135,7 +144,7 @@ instance Module (StringEdits char) where
      go str@(x :<| xs) n c
        | n == 0 = Just $ c <| str
        | otherwise = (x <|) <$> go xs (pred n) c
-  apply str (StringEdits (es :|> Delete n)) = (flip apply) (StringEdits es) =<< go str n
+  apply str (StringEdits (es :|> Delete n)) = flip apply (StringEdits es) =<< go str n
    where
      go Seq.Empty _ = Nothing
      go (x :<| xs) n
@@ -146,99 +155,128 @@ instance Module (StringEdits char) where
  divInit = StringEdits . Seq.unfoldl go . (0,)
    where
      go (_, Seq.Empty) = Nothing
-      go (n, (c :<| cs)) = Just ((succ n, cs), Insert n c)
+      go (n, c :<| cs ) = Just ((succ n, cs), Insert n c)
 \end{code}
 % TODO Make notation mathy
-Um zunächst eine asymmetrische edit-lens `StringEdits -> StringEdits` mit akzeptabler Komplexität für einen bestimmten `DFST s` (entlang der \emph{Richtung} des DFSTs) zu konstruieren möchten wir folgendes Verfahren anwenden:
+Um zunächst eine asymmetrische edit-lens \texttt{StringEdits -> StringEdits} mit akzeptabler Komplexität für einen bestimmten DFST (entlang der \emph{Richtung} des DFSTs) zu konstruieren möchten wir folgendes Verfahren anwenden:
-Gegeben eine Sequenz (`StringEdits`) von zu übersetzenden Änderungen genügt es die Übersetzung eines einzelnen `StringEdit`s in eine womöglich längere Sequenz von `StringEdits` anzugeben, alle `StringEdits` aus der Sequenz zu übersetzen (hierbei muss auf die korrekte Handhabung des Komplements geachtet werden) und jene Übersetzungen dann zu concatenieren.
+Gegeben eine Sequenz von zu übersetzenden Änderungen genügt es die Übersetzung eines einzelnen \texttt{StringEdit}s in eine womöglich längere Sequenz von \texttt{StringEdits} anzugeben, alle \texttt{StringEdits} aus der Sequenz derart zu übersetzen (hierbei muss auf die korrekte Handhabung des Komplements geachtet werden) und jene Übersetzungen dann zu concatenieren.
-Wir definieren zunächst die \emph{Wirkung} eines DFST auf einen festen String als eine Abbildung `state -> (state, String)`, die den aktuellen Zustand vorm Parsen des Strings auf den Zustand danach und die (womöglich leere) Ausgabe schickt.
+Wir definieren zunächst die \emph{Wirkung} eines DFST auf einen festen String als eine Abbildung \texttt{state -> (Seq output, Maybe state)}, die den aktuellen Zustand vor dem Parsen des Strings auf den Zustand danach und die (womöglich leere) Ausgabe schickt.
+Wir annotieren Wirkungen zudem mit dem konsumierten String.
 Diese Wirkungen bilden einen Monoiden analog zu Endomorphismen, wobei die Resultat-Strings concateniert werden.
-Die Unterliegende Idee ist nun im Komplement der edit-lens eine Liste von Wirkungen (eine für jedes Zeichen der Eingabe des DFSTs) und einen Cache der monoidalen Summen aller kontinuirlichen Teillisten zu halten.
-Da wir wissen welche Stelle im input-String von einem gegebenen edit betroffen ist können wir, anhand der Wirkung des Teilstücks bis zu jener Stelle, den output-String in einen durch den edit unveränderten Prefix und einen womöglich betroffenen Suffix unterteilen.
-Die Wirkung ab der betroffenen Stelle im input-String können wir also Komposition der Wirkung der durch den edit betroffenen Stelle und derer aller Zeichen danach bestimmen.
-Nun gilt es nur noch die Differenz (als `StringEdits`) des vorherigen Suffixes im output-String und des aus der gerade berechneten Wirkung Bestimmten zu bestimmen.
-% Für die Rückrichtung bietet es sich an eine Art primitive Invertierung des DFSTs zu berechnen.
-% Gegeben den aktuellen DFST $A$ möchten wir einen anderen $A^{-1}$ finden, sodass gilt:
-% \begin{itemize}
-%   \item $A^{-1}$ akzeptiert einen String $s^{-1}$ (endet seinen Lauf in einem finalen Zustand) gdw. es einen String $s$ gibt, der unter $A$ die Ausgabe $s^{-1}$ produziert.
-%   \item Wenn $A^{-1}$ einen String $s^{-1}$ akzeptiert so produziert die resultierende Ausgabe $s$ unter $A$ die Ausgabe $s^{-1}$.
-% \end{itemize}
-% Kann nicht funktionieren, denn $A^{-1}$ ist notwendigerweise nondeterministisch. Wird $A^{-1}$ dann zu einem DFST forciert (durch arbiträre Wahl einer Transition pro Zustand) gehen Informationen verloren—$A^{-1}$ produziert nicht den minimale edit auf dem input string (in der Tat beliebig schlecht) für einen gegeben edit auf dem output string.
-  
-% Stelle im bisherigen Lauf isolieren, an der edit im output-string passieren soll, breitensuche auf pfaden, die sich von dieser stelle aus unterscheiden?
-% Gegeben einen Pfad und eine markierte Transition, finde Liste aller Pfade aufsteigend sortiert nach Unterschied zu gegebenem Pfad, mit Unterschieden "nahe" der markierten Transition zuerst — zudem jeweils edit auf dem Eingabestring
-% Einfacher ist Breitensuche ab `stInitial` und zunächst diff auf eingabe-strings.
-  
 \begin{code}
 data DFSTAction state input output = DFSTAction
-  { runDFSTAction :: state -> (state, Seq output)
+  { runDFSTAction :: state -> (Seq output, Maybe state)
  , dfstaConsumes :: Seq input
  }
 instance Monoid (DFSTAction state input output) where
-  mempty = DFSTAction (\x -> (x, Seq.empty)) Seq.empty
+\end{code}
+\begin{comment}
+\begin{code}
+  mempty = DFSTAction (\x -> (Seq.empty, Just x)) Seq.empty
  DFSTAction f cf `mappend` DFSTAction g cg = DFSTAction
-    { runDFSTAction = \s -> let ((f -> (s', out')), out) = g s in (s', out <> out')
+    { runDFSTAction = \x ->
+        let (outG, x') = g x
+            (outF, x'') = maybe (mempty, Nothing) f x'
+         in (outG <> outF, x'')
    , dfstaConsumes = cg <> cf
    }
+\end{code}
+\end{comment}
+\begin{code}
 type DFSTComplement state input output = Compositions (DFSTAction state input output)
-runDFSTAction' :: DFSTComplement state input output -> state -> (state, Seq output)
+runDFSTAction' :: DFSTComplement state input output -> state -> (Seq output, Maybe state)
 runDFSTAction' = runDFSTAction . Comp.composed
 dfstaConsumes' :: DFSTComplement state input output -> Seq input
 dfstaConsumes' = dfstaConsumes . Comp.composed
-dfstaProduces :: DFST state input output -> DFSTComplement state input output -> Seq output
+dfstaProduces :: DFSTComplement state input output -> state -> Seq output
-dfstaProduces DFST{..} = snd . flip runDFSTAction' stInitial
+dfstaProduces = fmap fst . runDFSTAction'
+\end{code}
-type Debug state input output = (Show state, Show input, Show output)
+Die Unterliegende Idee von $\Rrightarrow$ ist nun im Komplement der edit-lens eine Liste von Wirkungen (eine für jedes Zeichen der Eingabe des DFSTs) und einen Cache der monoidalen Summen aller kontinuirlichen Teillisten zu halten.
-type LState state input output = (Natural, (state, Maybe (input, Natural)))
+Wir können die alte DFST-Wirkung zunächst anhand des Intervalls indem der input-String von allen gegebenen edits betroffen ist in einen unveränderten Prefix und einen womöglich betroffenen Suffix unterteilen.
+Da wir wissen welche Stelle im input-String vom ersten gegebenen edit betroffen ist können wir, anhand der Wirkung des Teilstücks bis zu jener Stelle, den betroffenen Suffix wiederum teilen.
+Die Wirkung ab der betroffenen Stelle im input-String können wir als Komposition der Wirkung der durch den edit betroffenen Stelle und derer aller Zeichen danach bestimmen.
+Nun gilt es nur noch die Differenz (als `StringEdits`) des vorherigen Suffixes im output-String und des aus der gerade berechneten Wirkung zu bestimmen, wir bedienen uns hierzu dem Unix Standard-Diff-Algorithmus zwischen der ursprünglichen Ausgabe und dem Ergebnis der Iteration des Verfahrens auf alle gegebenen edits.
+Für die asymmetrische edit-lens entgegen der DFST-Richtung $\Lleftarrow$ verwenden wir Breitensuche über die Zustände des DFST innerhalb eines iterative vergrößerten Intervalls:
+Wir bestimmen zunächst (`affected`) eine obere Schranke an das Intervall in dem der Ausgabe-String vom edit betroffen ist und generieren eine von dort quadratisch wachsende Serie von Intervallen.
+Für jedes Intervall ("lokalere" Änderungen werden präferiert) schränken wir zunächst den DFST (zur einfachereren Implementierung in seiner Darstellung als FST) vermöge \texttt{restrictOutput} derart ein, dass nur die gewünschte Ausgabe produziert werden kann.
-dfstLens :: forall state input output. (Ord state, Ord input, Ord output, Debug state input output) => DFST state input output -> EditLens (DFSTComplement state input output) (StringEdits input) (StringEdits output)
+Wir betrachten dann in jedem Schritt (beginnend mit dem initialen Zustand des DFST) alle ausgehenden Transitionen und ziehen hierbei jene vor, die im vorherigen Lauf (gespeichert im Komplement der edit-lens), ebenfalls genommen wurden.
+Abweichungen vom im Komplement gespeicherten Lauf lassen wir nur innerhalb des betrachteten Intervalls zu und wählen in diesem Fall einen Edit auf der Eingabe, der die gewählte Abweichung produziert.
+Es wird zudem, wie für Breitensuche üblich, jeder besuchte Zustand markiert und ausgehende Transitionen nicht ein zweites mal betrachtet.
+Erreichen wir einen finalen Zustand (wegen der Einschränkung des DFSTs wurde dann auch genau die gewünschte Ausgabe produziert), so fügen wir an die gesammelten Eingabe-edits eine Serie von deletions an, die den noch nicht konsumierten suffix der Eingabe verwerfen und brechen die Suche unter Rückgabe der Eingabe-edits und des neuen Laufs ab.
+In Haskell formulieren wir das vorzeitige Abbrechen der Suche indem wir eine vollständige Liste von Rückgabe-Kandidaten konstruieren und dann immer ihr erstes Element zurück geben.
+Wegen der verzögerten Auswertungsstrategie von Haskell wird auch tatsächlich nur der erste Rückgabe-Kandidat konstruiert.
+\begin{comment}
+\begin{code}
+type LState state input output = (Natural, (state, Maybe (input, Natural)))
+\end{code}
+\end{comment}
+\begin{code}
+dfstLens :: forall state input output. (Ord state, Ord input, Ord output, Show state, Show input, Show output) => DFST state input output -> EditLens (DFSTComplement state input output) (StringEdits Natural input) (StringEdits Natural output)
+\end{code}
+\begin{comment}
+\begin{code}
 dfstLens dfst@DFST{..} = EditLens ground propR propL
  where
    ground :: DFSTComplement state input output
-    ground = Comp.fromList []
+    ground = mempty
-    propR :: (DFSTComplement state input output, StringEdits input) -> (DFSTComplement state input output, StringEdits output)
+    propR :: (DFSTComplement state input output, StringEdits Natural input) -> (DFSTComplement state input output, StringEdits Natural output)
    propR (c, SEFail) = (c, SEFail)
    propR (c, StringEdits Seq.Empty) = (c, mempty)
-    propR (c, StringEdits (es :> e))
+    propR (c, es'@(StringEdits (es :> e)))
-      | fst (runDFSTAction' c' stInitial) `Set.member` stAccept = (c', es' <> es'')
+      | (_, Just final) <- runDFSTAction' c' stInitial
-      | otherwise                                               = (c', SEFail)
+      , final `Set.member` stAccept
+      = (c', rEs)
+      | otherwise
+      = (c, SEFail)
      where
+        Just int = affected es'
+        (cAffSuffix, cAffPrefix) = Comp.splitAt (Comp.length c - fromIntegral (Int.inf int)) c
        (cSuffix, cPrefix) = Comp.splitAt (Comp.length c - (e ^. sePos . from enum)) c
        cSuffix'
-          | Delete _       <- e = Comp.take (pred $ Comp.length cSuffix) cSuffix -- TODO unsafe
+          | Delete _       <- e
+          , Comp.length cSuffix > 0 = Comp.take (pred $ Comp.length cSuffix) cSuffix
          | Insert _ nChar <- e = cSuffix <> Comp.singleton (DFSTAction (\x -> runDFST' dfst x (pure nChar) Seq.empty) (Seq.singleton nChar))
-        (pState, pOutput)  = runDFSTAction' cPrefix stInitial
+          | otherwise = Comp.singleton $ DFSTAction (\_ -> (Seq.empty, Nothing)) Seq.empty
-        (_, sOutput ) = runDFSTAction' cSuffix  pState
+        (c', _) = propR (cSuffix' <> cPrefix, StringEdits es)
-        (_, sOutput') = runDFSTAction' cSuffix' pState
+        (cAffSuffix', _) = Comp.splitAt (Comp.length c' - Comp.length cAffPrefix) c'
-        (c', es') = propR (cSuffix' <> cPrefix, StringEdits es)
+        (_, Just pFinal) = runDFSTAction' cPrefix stInitial
-        es'' = strDiff sOutput sOutput' & stringEdits . sePos . from enum +~ Seq.length pOutput
+        rEs = strDiff (fst $ runDFSTAction' cAffSuffix pFinal) (fst $ runDFSTAction' cAffSuffix' pFinal) & stringEdits . sePos . from enum +~ length (dfstaProduces cAffPrefix stInitial)
        
-    propL :: (DFSTComplement state input output, StringEdits output) -> (DFSTComplement state input output, StringEdits input)
+    propL :: (DFSTComplement state input output, StringEdits Natural output) -> (DFSTComplement state input output, StringEdits Natural input)
    propL (c, StringEdits Seq.Empty) = (c, mempty)
    propL (c, es) = fromMaybe (c, SEFail) $ do
+      let prevOut = dfstaProduces c stInitial
      newOut <- prevOut `apply` es
      affected' <- affected es
      let outFST :: FST (LState state input output) input output
-          outFST = wordFST newOut `productFST` toFST dfst
+          -- outFST = wordFST newOut `productFST` toFST dfst
+          outFST = restrictOutput newOut $ toFST dfst
+      
+          trace x y = flip seq y . unsafePerformIO $ appendFile "lens.log" (x <> "\n\n")
          inflate by int
            | Int.null int = Int.empty
            | inf >= by = inf - by Int.... sup + by
@@ -251,53 +289,90 @@ dfstLens dfst@DFST{..} = EditLens ground propR propL
              max = fromIntegral $ Seq.length newOut
              all = 0 Int.... max
          runCandidates :: Interval Natural -- ^ Departure from complement-run only permitted within interval (to guarantee locality)
-                       -> [ ( Seq (LState state input output, Maybe output) -- ^ Computed run
+                        -> [ ( Seq (LState state input output, Maybe output) -- ^ Computed run
-                            , StringEdits input
+                             , StringEdits Natural input
-                            , DFSTComplement state input output
+                             , DFSTComplement state input output
-                            )
+                             )
-                          ]
+                           ]
-          runCandidates focus = continueRun (Seq.empty, mempty) (c, mempty) 0
+          runCandidates focus = map ((,,) <$> view _1 <*> view _2 <*> view (_3 . _2)) $ go Set.empty [(Seq.empty, mempty, (c, mempty), 0)]
            where
-              continueRun :: (Seq (LState state input output, Maybe output), StringEdits input)
+              go _ [] = []
+              go visited (args@(run, edits, compZipper, inP) : alts) = 
+                  [ (run', finalizeEdits remC inP' edits', compZipper', inP') | (run', edits', compZipper'@(remC, _), inP') <- args : conts, isFinal run' ]
+                  ++ go visited' (alts ++ conts)
+                where
+                  conts
+                    | lastSt <- view _1 <$> Seq.lookup (pred $ Seq.length run) run
+                    , lastSt `Set.member` visited = []
+                    | otherwise = continueRun edits compZipper inP run
+                  visited' = Set.insert (view _1 <$> Seq.lookup (pred $ Seq.length run) run) visited
+              isFinal :: Seq (LState state input output, Maybe output) -> Bool
+              -- ^ Is the final state of the run a final state of the DFST?
+              isFinal Seq.Empty = (0, (stInitial, Nothing)) `Set.member` FST.stAccept outFST
+                               && (0 Int.... fromIntegral (Seq.length newOut)) `Int.isSubsetOf` focus
+              isFinal (_ :> (lastSt, _)) = lastSt `Set.member` FST.stAccept outFST
+              finalizeEdits :: DFSTComplement state input output -- ^ Remaining complement
+                            -> Natural -- ^ Input position
+                            -> StringEdits Natural input -> StringEdits Natural input
+              finalizeEdits remC inP = mappend . mconcat . replicate (Seq.length $ dfstaConsumes' remC) $ delete inP
+              
+              continueRun :: StringEdits Natural input
                          -> (DFSTComplement state input output, DFSTComplement state input output) -- ^ Zipper into complement
                          -> Natural -- ^ Input position
-                          -> [(Seq (LState state input output, Maybe output), StringEdits input, DFSTComplement state input output)]
+                          -> Seq (LState state input output, Maybe output)
-              continueRun (run, inEdits) (c', remC) inP = do
+                          -> [ ( Seq (LState state input output, Maybe output)
+                               , StringEdits Natural input
+                               , (DFSTComplement state input output, DFSTComplement state input output)
+                               , Natural
+                               )
+                             ]
+              -- ^ Nondeterministically make a single further step, continueing a given run
+              continueRun inEdits (c', remC) inP run = do
                let
                  pos :: Natural
-                  pos = fromIntegral $ Comp.length c - Comp.length c'
+                  -- pos = fromIntegral $ Comp.length c - Comp.length c' -- FIXME: should use length of dfstaProduces
+                  pos = fromIntegral . Seq.length $ dfstaProduces remC stInitial
                  (c'', step) = Comp.splitAt (pred $ Comp.length c') c' -- TODO: unsafe?
                  current :: LState state input output
                  current
                    | Seq.Empty <- run      = (0, (stInitial, Nothing))
                    | (_ :> (st, _)) <- run = st
                  current' :: state
-                  current' = let (_, (st, _)) = current
-                              in st
-                  next' :: state
-                  next' = fst . runDFSTAction' step $ current'
                  oldIn :: Maybe input
-                  oldIn = Seq.lookup 0 $ dfstaConsumes' step
+                  (current', oldIn)
+                    | (_ :> ((_, (st, _)), _)) <- rest
+                    , (_ :> ((_, (_, Just (partialIn, _))), _)) <- partial = (st, Just partialIn)
+                    | (_ :> ((_, (_, Just (partialIn, _))), _)) <- partial = (stInitial, Just partialIn)
+                    | Seq.Empty <- rest = (stInitial, Seq.lookup 0 $ dfstaConsumes' step)
+                    | (_ :> ((_, (st, _)), _)) <- rest = (st, Seq.lookup 0 $ dfstaConsumes' step)
+                    where
+                      (partial, rest) = Seq.spanr (\((_, (_, inp)), _) -> isJust inp) run
+                next' <- trace (show ("next'", pos, focus, run, (current', oldIn), current, dfstaConsumes' step, runDFST' dfst current' (maybe Seq.empty Seq.singleton oldIn) Seq.empty)) . maybeToList . snd $ runDFST' dfst current' (maybe Seq.empty Seq.singleton oldIn) Seq.empty
+                let
                  outgoing :: LState state input output -> [(LState state input output, Maybe input, Maybe output)]
-                  outgoing current = let go (st, minS) os acc
+                  outgoing current = let go (st, minS) outs acc
-                                           | st == current = ($ acc) $ Set.fold (\(st', moutS) -> (. ((st', minS, moutS) :))) id os
+                                           | st == current = Set.foldr (\(st', moutS) -> ((st', minS, moutS) :)) acc outs
                                           | otherwise     = acc
                                      in Map.foldrWithKey go [] $ FST.stTransition outFST 
                  isPreferred :: (LState state input output, Maybe input, Maybe output) -> Bool
-                  isPreferred ((_, (st, Nothing)), inS, _) = st == next' && (fromMaybe True $ (==) <$> oldIn <*> inS)
+                  isPreferred ((_, (st, Nothing)), _, _) = st == next'
-                  isPreferred (st, _, _) = any isPreferred $ outgoing st -- By construction of `outFST`, `outgoing st` is a singleton
+                  isPreferred (st@(_, (_, Just (inS , _))), _, _) = maybe True (== inS) oldIn && any isPreferred (outgoing st) -- By construction of `outFST`, `outgoing st` is a singleton in this case
                  (preferred, alternate) = partition isPreferred $ outgoing current
                  assocEdit :: (LState state input output, Maybe input, Maybe output) -- ^ Transition
                            -> [ ( (DFSTComplement state input output, DFSTComplement state input output) -- ^ new `(c', remC)`, i.e. complement-zipper `(c', remC)` but with edit applied
-                                 , StringEdits input
+                                 , StringEdits Natural input
                                 , Natural
                                 )
                               ]
                  assocEdit (_, Just inS, _)
-                    | oldIn == Just inS = [((c'', step <> remC), mempty, succ inP)]
+                    | oldIn == Just inS = [ ((c'', step <> remC), mempty, succ inP) ]
-                    | isJust oldIn      = [((c'', altStep inS <> remC), replace inP inS, succ inP), ((c', altStep inS <> remC), insert inP inS, succ inP)]
+                    | isJust oldIn      = [ ((c', altStep inS <> remC), insert inP inS, succ inP)
-                    | otherwise         = [((c', altStep inS <> remC), insert inP inS, succ inP)]
+                                          , ((c'', altStep inS <> remC), replace inP inS, succ inP)
-                  assocEdit (_, Nothing, _) = [((c', remC), mempty, inP)] -- TODO: is this correct?
+                                          ]
+                    | otherwise         = [ ((c', altStep inS <> remC), insert inP inS, succ inP) ]
+                  assocEdit (_, Nothing, _) = [((c', remC), mempty, inP)]
                  altStep :: input -> DFSTComplement state input output
                  altStep inS = Comp.singleton DFSTAction{..}
                    where
@@ -306,7 +381,7 @@ dfstLens dfst@DFST{..} = EditLens ground propR propL
                  options
                    | pos `Int.member` focus = preferred ++ alternate
                    | otherwise              = preferred
-                choice@(next, inS, outS) <- options
+                choice@(next, inS, outS) <- trace (unlines $ show (pretty outFST) : map show options) options
                ((c3, remC'), inEdits', inP') <- assocEdit choice
                -- let
                --   -- | Replace prefix of old complement to reflect current candidate
@@ -317,27 +392,70 @@ dfstLens dfst@DFST{..} = EditLens ground propR propL
                --   fin
                --     | (trans, inEs, newComplement) <- acc = (trans, dropSuffix <> inEs, newComplement)
                let
-                  acc = (run :> (next, outS), inEdits' <> inEdits)
+                  trans = run :> (next, outS)
-                  dropSuffix = mconcat (replicate (Seq.length $ dfstaConsumes' c3) $ delete inP')
+                  inEs = inEdits' <> inEdits
-                  fin
+                --   dropSuffix = mconcat (replicate (Seq.length $ dfstaConsumes' c3) $ delete inP')
-                    | (trans, inEs) <- acc = (trans, dropSuffix <> inEs, remC')
+                --   fin
-                bool id (fin :) (next `Set.member` FST.stAccept outFST) $ continueRun acc (c3, remC') inP'
+                --     | (trans, inEs) <- acc = (trans, dropSuffix <> inEs, remC')
+                -- bool id (over _BFS $ cons fin) (next `Set.member` FST.stAccept outFST) $ continueRun acc (c3, remC') inP'
+                return (trans, inEs, (c3, remC'), inP')
              
      -- Properties of the edits computed are determined mostly by the order candidates are generated below
      -- (_, inEs, c') <- (\xs -> foldr (\x f -> x `seq` f) listToMaybe xs $ xs) $ traceShowId fragmentIntervals >>= (\x -> (\y@(y1, y2, _) -> traceShow (y1, y2) y) <$> runCandidates x)
-      (_, inEs, c') <- listToMaybe $ runCandidates =<< fragmentIntervals
+      fmap ((,) <$> view _3 <*> view _2) .  listToMaybe $ runCandidates =<< fragmentIntervals
-      
-      return (c', inEs)
-      where
-        (_, prevOut) = runDFSTAction' c stInitial
-strDiff :: forall sym. Eq sym => Seq sym -> Seq sym -> StringEdits sym
+strDiff :: forall sym pos. (Eq sym, Integral pos) => Seq sym -> Seq sym -> StringEdits pos sym
 -- ^ @strDiff a b@ calculates a set of edits, which, when applied to @a@, produce @b@
-strDiff a b = snd . foldr toEdit (0, mempty) $ (getDiff `on` toList) a b
+strDiff a b = snd . foldl toEdit (0, mempty) $ (getDiff `on` toList) a b
+  where
+    toEdit :: (pos, StringEdits pos sym) -> Diff sym -> (pos, StringEdits pos sym)
+    toEdit (n, es) (Diff.Both _ _) = (succ n, es)
+    toEdit (n, es) (Diff.First _ ) = (n, delete n <> es)
+    toEdit (n, es) (Diff.Second c) = (succ n, insert n c <> es)
+\end{code}
+\end{comment}
+Um eine obere Schranke an das von einer Serie von edits betroffene Intervall zu bestimmen ordnen wir zunächst jeder von mindestens einem atomaren edit betroffenen Position $n$ im Eingabe-Wort einen $\text{offset}_n = \text{\# deletions} - \text{\# inserts}$ zu.
+Das gesuchte Intervall ist nun $(\text{minK}, \text{maxK})$, mit $\text{minK}$ der Position im Eingabe-Wort mit niedrigstem $\text{offset}$ und $\text{maxK}$ die Position im Eingabe-Wort mit höchstem $\text{offset}$, $\text{maxK}^\prime$, modifiziert um das Maximum aus $\{ 0 \} \cup \{ \text{maxK}_n \colon n \in \{ 0 \ldots \text{maxK}^\prime \} \}$ wobei $\text{maxK}_n = -1 \cdot (n + \text{offset}_n)$ an Position $n$ ist.
+\begin{code}
+affected :: forall char. StringEdits Natural char -> Maybe (Interval Natural)
+-- ^ For a given set of edits @es@ return the interval @i = a ... b@ such that for any given string @str@ of sufficient length the following holds:
+--
+--   - For all @n :: Natural@: @n < a ==> str ! n == (str `apply` es) ! n@
+--   - There exists a @k :: Integer@ such that for all @n :: Integer@: @n > b ==> str ! (n + k) == (str `apply` es) ! n@
+--
+-- Intuitively: for any character @c@ of the new string @str `apply` es@ there exists a corresponding character in @str@ (offset by either 0 or a constant shift @k@) if the index of @c@ is /not/ contained in @affected es@.
+\end{code}
+\begin{comment}
+\begin{code}
+affected SEFail = Nothing
+affected (StringEdits es) = Just . toInterval $ go es Map.empty
  where
-    toEdit :: Diff sym -> (Natural, StringEdits sym) -> (Natural, StringEdits sym)
+    toInterval :: Map Natural Integer -> Interval Natural
-    toEdit (Diff.Both _ _) (n, es) = (succ n, es)
+    toInterval map
-    toEdit (Diff.First _ ) (n, es) = (n, delete n <> es)
+      | Just (((minK, _), _), ((maxK, _), _)) <- (,) <$> Map.minViewWithKey map <*> Map.maxViewWithKey map
-    toEdit (Diff.Second c) (n, es) = (succ n, insert n c <> es)
+      = let
+          maxV' = maximum . (0 :) $ do
+            offset <- [0..maxK]
+            v <- maybeToList $ Map.lookup (maxK - offset) map
+            v' <- maybeToList . fmap fromInteger $ negate v <$ guard (v <= 0)
+            guard $ v' >= succ offset
+            return $ v' - offset
+        in (minK Int.... maxK + maxV')
+      | otherwise
+      = Int.empty
+    go :: Seq (StringEdit Natural char) -> Map Natural Integer -> Map Natural Integer
+    go Seq.Empty offsets = offsets
+    go (es :> e) offsets = go es offsets'
+      where
+        p = e ^. sePos
+        -- p' = fromIntegral $ Map.foldrWithKey (\k o p -> bool (fromIntegral p) (o + p) $ k < fromIntegral p) (fromIntegral p) offsets
+        offsets' = Map.alter (Just . myOffset . fromMaybe 0) p offsets
+        myOffset :: Integer -> Integer
+        myOffset
+          | Insert _ _ <- e = pred
+          | Delete _   <- e = succ
 \end{code}
+\end{comment}
author	Gregor Kleen <gkleen@yggdrasil.li>	2018-12-18 13:51:16 +0100
committer	Gregor Kleen <gkleen@yggdrasil.li>	2018-12-18 13:51:16 +0100
commit	46ae60eaca841b554ba20c6a2b7a15b43c12b4df (patch)
tree	0bb06127a0e08e75f8be755f5a5dfb1702b627b6 /edit-lens/src/Control/DFST/Lens.lhs
parent	b0b18979d5ccd109d5a56937396acdeb85c857aa (diff)
download	incremental-dfsts-46ae60eaca841b554ba20c6a2b7a15b43c12b4df.tar incremental-dfsts-46ae60eaca841b554ba20c6a2b7a15b43c12b4df.tar.gz incremental-dfsts-46ae60eaca841b554ba20c6a2b7a15b43c12b4df.tar.bz2 incremental-dfsts-46ae60eaca841b554ba20c6a2b7a15b43c12b4df.tar.xz incremental-dfsts-46ae60eaca841b554ba20c6a2b7a15b43c12b4df.zip