\begin{code} {-# LANGUAGE ScopedTypeVariables , TemplateHaskell , ConstraintKinds #-} module Control.DFST.Lens ( StringEdit(..) , StringEdits(..) , insert, delete , DFSTAction(..), DFSTComplement , dfstLens , module Control.DFST , module Control.Lens.Edit ) where import Control.DFST import Control.FST hiding (stInitial, stTransition, stAccept) import qualified Control.FST as FST (stInitial, stTransition, stAccept) import Control.Lens.Edit import Control.Lens import Control.Lens.TH import Control.Edit import Control.Monad import Numeric.Natural import Numeric.Interval (Interval, (...)) import qualified Numeric.Interval as Int import Data.Sequence (Seq((:<|), (:|>))) import qualified Data.Sequence as Seq import Data.Set (Set) import qualified Data.Set as Set import Data.Map (Map) import qualified Data.Map as Map import Data.Compositions.Snoc (Compositions) import qualified Data.Compositions.Snoc as Comp import Data.Algorithm.Diff (Diff, getDiff) import qualified Data.Algorithm.Diff as Diff import Data.Monoid import Data.Bool (bool) import Data.Maybe (fromMaybe, maybeToList, listToMaybe, catMaybes) import Data.Function (on) import Data.Foldable (toList) import Data.List (partition) import Debug.Trace data StringEdit char = Insert { _sePos :: Natural, _seInsertion :: char } | Delete { _sePos :: Natural } deriving (Eq, Ord, Show, Read) makeLenses ''StringEdit data StringEdits char = StringEdits (Seq (StringEdit char)) | SEFail deriving (Eq, Ord, Show, Read) makePrisms ''StringEdits stringEdits :: Traversal' (StringEdits char) (StringEdit char) stringEdits = _StringEdits . traverse affected :: forall char. StringEdits char -> Maybe (Interval Natural) -- ^ For a given set of edits @es@ return the interval @i = a ... b@ such that for any given string @str@ of sufficient length the following holds: -- -- - For all @n :: Natural@: @n < a ==> str ! n == (str `apply` es) ! n@ -- - There exists a @k :: Integer@ such that for all @n :: Integer@: @n > b ==> str ! (n + k) == (str `apply` es) ! n@ -- -- Intuitively: for any character @c@ of the new string @str `apply` es@ there exists a corresponding character in @str@ (offset by either 0 or a constant shift @k@) if the index of @c@ is /not/ contained in @affected es@. affected SEFail = Nothing affected (StringEdits es) = Just . toInterval $ go es Map.empty where toInterval :: Map Natural Integer -> Interval Natural toInterval map | Just (((minK, _), _), ((maxK, _), _)) <- (,) <$> Map.minViewWithKey map <*> Map.maxViewWithKey map = let maxV' = maximum . (0 :) $ do offset <- [0..maxK] v <- maybeToList $ Map.lookup (maxK - offset) map v' <- maybeToList . fmap fromInteger $ negate v <$ guard (v <= 0) guard $ v' >= succ offset return $ v' - offset in (minK Int.... maxK + maxV') | otherwise = Int.empty go :: Seq (StringEdit char) -> Map Natural Integer -> Map Natural Integer go Seq.Empty offsets = offsets go (es :> e) offsets = go es offsets' where p = e ^. sePos p' = fromIntegral $ Map.foldrWithKey (\k o p -> bool (fromIntegral p) (o + p) $ k < fromIntegral p) (fromIntegral p) offsets offsets' = Map.alter (Just . myOffset . fromMaybe 0) p offsets myOffset :: Integer -> Integer myOffset | Insert _ _ <- e = pred | Delete _ <- e = succ insert :: Natural -> char -> StringEdits char insert n c = StringEdits . Seq.singleton $ Insert n c delete :: Natural -> StringEdits char delete n = StringEdits . Seq.singleton $ Delete n instance Monoid (StringEdits char) where mempty = StringEdits Seq.empty SEFail `mappend` _ = SEFail _ `mappend` SEFail = SEFail (StringEdits Seq.Empty) `mappend` x = x x `mappend` (StringEdits Seq.Empty) = x (StringEdits x@(bs :|> b)) `mappend` (StringEdits y@(a :<| as)) | (Insert n _) <- a , (Delete n') <- b , n == n' = StringEdits bs `mappend` StringEdits as | otherwise = StringEdits $ x `mappend` y instance Module (StringEdits char) where type Domain (StringEdits char) = Seq char apply str SEFail = Nothing apply str (StringEdits Seq.Empty) = Just str apply str (StringEdits (es :|> Insert n c)) = (flip apply) (StringEdits es) =<< go str n c where go Seq.Empty n c | n == 0 = Just $ Seq.singleton c | otherwise = Nothing go str@(x :<| xs) n c | n == 0 = Just $ c <| str | otherwise = (x <|) <$> go xs (pred n) c apply str (StringEdits (es :|> Delete n)) = (flip apply) (StringEdits es) =<< go str n where go Seq.Empty _ = Nothing go (x :<| xs) n | n == 0 = Just xs | otherwise = (x <|) <$> go xs (pred n) init = Seq.empty divInit = StringEdits . Seq.unfoldl go . (0,) where go (_, Seq.Empty) = Nothing go (n, (c :<| cs)) = Just ((succ n, cs), Insert n c) \end{code} % TODO Make notation mathy Um zunächst eine asymmetrische edit-lens `StringEdits -> StringEdits` mit akzeptabler Komplexität für einen bestimmten `DFST s` (entlang der \emph{Richtung} des DFSTs) zu konstruieren möchten wir folgendes Verfahren anwenden: Gegeben eine Sequenz (`StringEdits`) von zu übersetzenden Änderungen genügt es die Übersetzung eines einzelnen `StringEdit`s in eine womöglich längere Sequenz von `StringEdits` anzugeben, alle `StringEdits` aus der Sequenz zu übersetzen (hierbei muss auf die korrekte Handhabung des Komplements geachtet werden) und jene Übersetzungen dann zu concatenieren. Wir definieren zunächst die \emph{Wirkung} eines DFST auf einen festen String als eine Abbildung `state -> (state, String)`, die den aktuellen Zustand vorm Parsen des Strings auf den Zustand danach und die (womöglich leere) Ausgabe schickt. Diese Wirkungen bilden einen Monoiden analog zu Endomorphismen, wobei die Resultat-Strings concateniert werden. Die Unterliegende Idee ist nun im Komplement der edit-lens eine Liste von Wirkungen (eine für jedes Zeichen der Eingabe des DFSTs) und einen Cache der monoidalen Summen aller kontinuirlichen Teillisten zu halten. Da wir wissen welche Stelle im input-String von einem gegebenen edit betroffen ist können wir, anhand der Wirkung des Teilstücks bis zu jener Stelle, den output-String in einen durch den edit unveränderten Prefix und einen womöglich betroffenen Suffix unterteilen. Die Wirkung ab der betroffenen Stelle im input-String können wir also Komposition der Wirkung der durch den edit betroffenen Stelle und derer aller Zeichen danach bestimmen. Nun gilt es nur noch die Differenz (als `StringEdits`) des vorherigen Suffixes im output-String und des aus der gerade berechneten Wirkung Bestimmten zu bestimmen. % Für die Rückrichtung bietet es sich an eine Art primitive Invertierung des DFSTs zu berechnen. % Gegeben den aktuellen DFST $A$ möchten wir einen anderen $A^{-1}$ finden, sodass gilt: % \begin{itemize} % \item $A^{-1}$ akzeptiert einen String $s^{-1}$ (endet seinen Lauf in einem finalen Zustand) gdw. es einen String $s$ gibt, der unter $A$ die Ausgabe $s^{-1}$ produziert. % \item Wenn $A^{-1}$ einen String $s^{-1}$ akzeptiert so produziert die resultierende Ausgabe $s$ unter $A$ die Ausgabe $s^{-1}$. % \end{itemize} % Kann nicht funktionieren, denn $A^{-1}$ ist notwendigerweise nondeterministisch. Wird $A^{-1}$ dann zu einem DFST forciert (durch arbiträre Wahl einer Transition pro Zustand) gehen Informationen verloren—$A^{-1}$ produziert nicht den minimale edit auf dem input string (in der Tat beliebig schlecht) für einen gegeben edit auf dem output string. % Stelle im bisherigen Lauf isolieren, an der edit im output-string passieren soll, breitensuche auf pfaden, die sich von dieser stelle aus unterscheiden? % Gegeben einen Pfad und eine markierte Transition, finde Liste aller Pfade aufsteigend sortiert nach Unterschied zu gegebenem Pfad, mit Unterschieden "nahe" der markierten Transition zuerst — zudem jeweils edit auf dem Eingabestring % Einfacher ist Breitensuche ab `stInitial` und zunächst diff auf eingabe-strings. \begin{code} data DFSTAction state input output = DFSTAction { runDFSTAction :: state -> (state, Seq output) , dfstaConsumes :: Seq input } instance Monoid (DFSTAction state input output) where mempty = DFSTAction (\x -> (x, Seq.empty)) Seq.empty DFSTAction f cf `mappend` DFSTAction g cg = DFSTAction { runDFSTAction = \s -> let ((f -> (s', out')), out) = g s in (s', out <> out') , dfstaConsumes = cg <> cf } type DFSTComplement state input output = Compositions (DFSTAction state input output) runDFSTAction' :: DFSTComplement state input output -> state -> (state, Seq output) runDFSTAction' = runDFSTAction . Comp.composed dfstaConsumes' :: DFSTComplement state input output -> Seq input dfstaConsumes' = dfstaConsumes . Comp.composed type Debug state input output = (Show state, Show input, Show output) type LState state input output = (Natural, (state, Maybe (input, Natural))) dfstLens :: forall state input output. (Ord state, Ord input, Ord output, Debug state input output) => DFST state input output -> EditLens (DFSTComplement state input output) (StringEdits input) (StringEdits output) dfstLens dfst@DFST{..} = EditLens ground propR propL where ground :: DFSTComplement state input output ground = Comp.fromList [] propR :: (DFSTComplement state input output, StringEdits input) -> (DFSTComplement state input output, StringEdits output) propR (c, SEFail) = (c, SEFail) propR (c, StringEdits Seq.Empty) = (c, mempty) propR (c, StringEdits (es :> e)) | fst (runDFSTAction' c' stInitial) `Set.member` stAccept = (c', es' <> es'') | otherwise = (c', SEFail) where (cSuffix, cPrefix) = Comp.splitAt (Comp.length c - (e ^. sePos . from enum)) c cSuffix' | Delete _ <- e = Comp.take (pred $ Comp.length cSuffix) cSuffix -- TODO unsafe | Insert _ nChar <- e = cSuffix <> Comp.singleton (DFSTAction (\x -> runDFST' dfst x (pure nChar) Seq.empty) (Seq.singleton nChar)) (pState, pOutput) = runDFSTAction' cPrefix stInitial (_, sOutput ) = runDFSTAction' cSuffix pState (_, sOutput') = runDFSTAction' cSuffix' pState (c', es') = propR (cSuffix' <> cPrefix, StringEdits es) es'' = strDiff sOutput sOutput' & stringEdits . sePos . from enum +~ Seq.length pOutput propL :: (DFSTComplement state input output, StringEdits output) -> (DFSTComplement state input output, StringEdits input) propL (c, StringEdits Seq.Empty) = (c, mempty) propL (c, es) = fromMaybe (c, SEFail) $ do newOut <- prevOut `apply` es affected' <- affected es let outFST :: FST (LState state input output) input output outFST = wordFST newOut `productFST` toFST dfst inflate by int | Int.null int = Int.empty | inf >= by = inf - by Int.... sup + by | otherwise = 0 Int.... sup + by where (inf, sup) = (,) <$> Int.inf <*> Int.sup $ int fragmentIntervals = (++ [all]) . takeWhile (not . Int.isSubsetOf (0 Int.... max)) $ inflate <$> 0 : [ 2^n | n <- [0..ceiling (logBase 2.0 max)] ] <*> pure affected' where max :: Num a => a max = fromIntegral $ Seq.length newOut all = 0 Int.... max runCandidates :: Interval Natural -- ^ Departure from complement-run only permitted within interval (to guarantee locality) -> [(Seq ((Natural, (state, Maybe (input, Natural))), Maybe output), StringEdits input)] runCandidates focus = continueRun (Seq.empty, mempty) c 0 where -- TODO: generate new complement continueRun :: (Seq (LState state input output, Maybe output), StringEdits input) -> DFSTComplement state input output -> Natural -- ^ Input position -> [(Seq (LState state input output, Maybe output), StringEdits input)] continueRun (run, inEdits) c' inP = do let pos :: Natural pos = fromIntegral $ Comp.length c - Comp.length c' (c'', step) = Comp.splitAt (pred $ Comp.length c') c' -- TODO: unsafe current :: LState state input output current | Seq.Empty <- run = (0, (stInitial, Nothing)) | (_ :> (st, _)) <- run = st current' :: state current' = let (_, (st, _)) = current in st next' :: state next' = fst . runDFSTAction' step $ current' oldIn :: Maybe input oldIn = Seq.lookup 0 $ dfstaConsumes' step outgoing :: LState state input output -> [(LState state input output, Maybe input, Maybe output)] outgoing current = let go (st, minS) os acc | st == current = ($ acc) $ Set.fold (\(st', moutS) -> (. ((st', minS, moutS) :))) id os | otherwise = acc in Map.foldrWithKey go [] $ FST.stTransition outFST isPreferred :: (LState state input output, Maybe input, Maybe output) -> Bool isPreferred ((_, (st, Nothing)), inS, _) = st == next' && (fromMaybe True $ (==) <$> oldIn <*> inS) isPreferred (st, _, _) = any isPreferred $ outgoing st (preferred, alternate) = partition isPreferred $ outgoing current assocEdit :: (LState state input output, Maybe input, Maybe output) -> [(DFSTComplement state input output, StringEdits input, Natural)] assocEdit (_, Just inS, _) | oldIn == Just inS = [(c'', mempty, succ inP)] | otherwise = [(c', insert inP inS, succ inP), (c'', insert inP inS <> delete inP, succ inP)] assocEdit (_, Nothing, _) = [(c', mempty, inP)] options | pos `Int.member` focus = preferred ++ alternate | otherwise = preferred choice@(next, inS, outS) <- options (c', inEdits', inP') <- assocEdit choice let acc = (run :> (next, outS), inEdits' <> inEdits) bool id (acc :) (next `Set.member` FST.stAccept outFST) $ continueRun acc c' inP' -- Properties of the edits computed are determined mostly by the order candidates are generated below chosenRun <- (\xs -> foldr (\x f -> x `seq` f) listToMaybe xs $ xs) $ traceShowId fragmentIntervals >>= (\x -> traceShowId <$> runCandidates x) return $ traceShow chosenRun undefined where (_, prevOut) = runDFSTAction' c stInitial strDiff :: forall sym. Eq sym => Seq sym -> Seq sym -> StringEdits sym -- ^ @strDiff a b@ calculates a set of edits, which, when applied to @a@, produce @b@ strDiff a b = snd . foldr toEdit (0, mempty) $ (getDiff `on` toList) a b where toEdit :: Diff sym -> (Natural, StringEdits sym) -> (Natural, StringEdits sym) toEdit (Diff.Both _ _) (n, es) = (succ n, es) toEdit (Diff.First _ ) (n, es) = (n, delete n <> es) toEdit (Diff.Second c) (n, es) = (succ n, insert n c <> es) \end{code}