From f4c419b9ddec15bad267a4463f0720d6e28042d2 Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Thu, 30 May 2019 12:18:08 +0200 Subject: Further work --- edit-lens/package.yaml | 3 + edit-lens/src/Control/DFST.lhs.tex | 1 + edit-lens/src/Control/DFST/Lens.lhs | 405 +++++++-------------- edit-lens/src/Control/DFST/Lens.lhs.tex | 1 + edit-lens/src/Control/Edit.lhs | 2 + edit-lens/src/Control/Edit.lhs.tex | 1 + edit-lens/src/Control/Edit/String.lhs | 120 ++++++ edit-lens/src/Control/Edit/String.lhs.tex | 1 + edit-lens/src/Control/Edit/String/Affected.lhs | 73 ++++ edit-lens/src/Control/Edit/String/Affected.lhs.tex | 1 + edit-lens/src/Control/FST.lhs | 22 +- edit-lens/src/Control/FST.lhs.tex | 1 + edit-lens/src/Control/Lens/Edit.lhs | 6 +- edit-lens/src/Control/Lens/Edit.lhs.tex | 1 + edit-lens/src/Control/Lens/Edit/ActionTree.lhs | 204 +++++++++++ edit-lens/src/Control/Lens/Edit/ActionTree.lhs.tex | 1 + 16 files changed, 564 insertions(+), 279 deletions(-) create mode 120000 edit-lens/src/Control/DFST.lhs.tex create mode 120000 edit-lens/src/Control/DFST/Lens.lhs.tex create mode 120000 edit-lens/src/Control/Edit.lhs.tex create mode 100644 edit-lens/src/Control/Edit/String.lhs create mode 120000 edit-lens/src/Control/Edit/String.lhs.tex create mode 100644 edit-lens/src/Control/Edit/String/Affected.lhs create mode 120000 edit-lens/src/Control/Edit/String/Affected.lhs.tex create mode 120000 edit-lens/src/Control/FST.lhs.tex create mode 120000 edit-lens/src/Control/Lens/Edit.lhs.tex create mode 100644 edit-lens/src/Control/Lens/Edit/ActionTree.lhs create mode 120000 edit-lens/src/Control/Lens/Edit/ActionTree.lhs.tex (limited to 'edit-lens') diff --git a/edit-lens/package.yaml b/edit-lens/package.yaml index 7455dcc..631e7ed 100644 --- a/edit-lens/package.yaml +++ b/edit-lens/package.yaml @@ -45,7 +45,10 @@ library: source-dirs: src exposed-modules: - Control.Edit + - Control.Edit.String + - Control.Edit.String.Affected - Control.Lens.Edit + - Control.Lens.Edit.ActionTree - Control.DFST - Control.FST - Control.DFST.Lens diff --git a/edit-lens/src/Control/DFST.lhs.tex b/edit-lens/src/Control/DFST.lhs.tex new file mode 120000 index 0000000..eb8c83e --- /dev/null +++ b/edit-lens/src/Control/DFST.lhs.tex @@ -0,0 +1 @@ +DFST.lhs \ No newline at end of file diff --git a/edit-lens/src/Control/DFST/Lens.lhs b/edit-lens/src/Control/DFST/Lens.lhs index 1e5bbb1..8222db2 100644 --- a/edit-lens/src/Control/DFST/Lens.lhs +++ b/edit-lens/src/Control/DFST/Lens.lhs @@ -7,11 +7,9 @@ #-} module Control.DFST.Lens - ( StringEdit(..), sePos, seInsertion - , StringEdits(..), _StringEdits, _SEFail, stringEdits - , insert, delete, replace - , DFSTAction(..), DFSTComplement + ( DFSTAction(..), DFSTComplement , dfstLens + , module Control.Edit.String , module Control.DFST , module Control.Lens.Edit ) where @@ -20,9 +18,12 @@ import Control.DFST import Control.FST hiding (stInitial, stTransition, stAccept) import qualified Control.FST as FST (stInitial, stTransition, stAccept, step) import Control.Lens.Edit +import Control.Lens.Edit.ActionTree import Control.Lens import Control.Lens.TH import Control.Edit +import Control.Edit.String +import Control.Edit.String.Affected import Control.Monad @@ -64,94 +65,8 @@ import Data.Universe (Finite(..)) \end{code} \end{comment} - -\begin{defn}[Atomare edits of strings] -Wir betrachten, zur Einfachheit, ein minimiales Set von Edits auf Strings\footnote{Wie in der Konstruktion zum Longest Common Subsequence Problem}, bestehend nur aus Einfügung eines einzelnen Zeichens und löschen des Zeichens an einer einzelnen Position: - -\begin{code} -data StringEdit pos char = Insert { _sePos :: pos, _seInsertion :: char } - | Delete { _sePos :: pos } - deriving (Eq, Ord, Show, Read) - --- Automatically derive van-leerhoven-lenses: --- --- @sePos :: Lens' (StringEdits pos char) pos@ --- @seInsertion :: Traversal' (StringEdits pos char) char@ -makeLenses ''StringEdit -\end{code} - -Atomare edits werden, als Liste, zu edits komponiert. -Wir führen einen speziellen edit ein, der nicht-Anwendbarkeit der edits repräsentiert: -\begin{code} -data StringEdits pos char = StringEdits (Seq (StringEdit pos char)) - | SEFail - deriving (Eq, Ord, Show, Read) - -makePrisms ''StringEdits - -stringEdits :: Traversal (StringEdits pos char) (StringEdits pos' char') (StringEdit pos char) (StringEdit pos' char') -\end{code} -\end{defn} - -\begin{comment} -\begin{code} -stringEdits = _StringEdits . traverse - -insert :: pos -> char -> StringEdits pos char -insert n c = StringEdits . Seq.singleton $ Insert n c - -delete :: pos -> StringEdits pos char -delete n = StringEdits . Seq.singleton $ Delete n - -replace :: Eq pos => pos -> char -> StringEdits pos char -replace n c = insert n c <> delete n - --- | Rudimentarily optimize edit composition -instance Eq pos => Monoid (StringEdits pos char) where - mempty = StringEdits Seq.empty - SEFail `mappend` _ = SEFail - _ `mappend` SEFail = SEFail - (StringEdits Seq.Empty) `mappend` x = x - x `mappend` (StringEdits Seq.Empty) = x - (StringEdits x@(bs :|> b)) `mappend` (StringEdits y@(a :<| as)) - | (Insert n _) <- a - , (Delete n') <- b - , n == n' - = StringEdits bs `mappend` StringEdits as - | otherwise = StringEdits $ x `mappend` y -\end{code} -\end{comment} - -Da wir ein minimales Set an atomaren edits gewählt haben, ist die Definiton der Modulnstruktur über Strings des passenden Alphabets recht einfach: -\begin{code} -instance Module (StringEdits Natural char) where - type Domain (StringEdits Natural char) = Seq char - apply str SEFail = Nothing - apply str (StringEdits Seq.Empty) = Just str - apply str (StringEdits (es :|> Insert n c)) = flip apply (StringEdits es) =<< go str n c - where - go Seq.Empty n c - | n == 0 = Just $ Seq.singleton c - | otherwise = Nothing - go str@(x :<| xs) n c - | n == 0 = Just $ c <| str - | otherwise = (x <|) <$> go xs (pred n) c - apply str (StringEdits (es :|> Delete n)) = flip apply (StringEdits es) =<< go str n - where - go Seq.Empty _ = Nothing - go (x :<| xs) n - | n == 0 = Just xs - | otherwise = (x <|) <$> go xs (pred n) - - init = Seq.empty - divInit = StringEdits . Seq.unfoldl go . (0,) - where - go (_, Seq.Empty) = Nothing - go (n, c :<| cs ) = Just ((succ n, cs), Insert n c) - -\end{code} - -Wir definieren zunächst die \emph{Wirkung} eines DFST auf einen festen String als eine Abbildung \texttt{state -> (Seq output, Maybe state)}, die den aktuellen Zustand vor dem Parsen des Strings auf den Zustand danach und die (womöglich leere) Ausgabe schickt. +\begin{defn}[Ausgabe-Wirkung von DFSTs] +Wir definieren zunächst die \emph{Ausgabe-Wirkung}\footnote{Wir schreiben im Folgenden auch nur \emph{Wirkung}} eines DFST auf einen festen String als eine Abbildung \texttt{state -> (Seq output, Maybe state)}, die den aktuellen Zustand vor dem Parsen des Strings auf den Zustand danach und die (womöglich leere) Ausgabe schickt. Wir annotieren Wirkungen zudem mit dem konsumierten String. Diese Wirkungen bilden einen Monoiden analog zu Endomorphismen, wobei die Resultat-Strings concateniert werden. @@ -175,13 +90,24 @@ instance Monoid (DFSTAction state input output) where } \end{code} \end{comment} +\end{defn} +\begin{eg} \label{eg:linebreakonw100} + Die Wirkung des DFST aus Beispiel~\ref{eg:linebreak} auf das Wort $w$ aus Beispiel~\ref{eg:w100} ist: + + \begin{align*} + \text{act} & \colon Q \to \Delta^\star \times \{ \bot \} \cup Q \\ + 0 & \mapsto (0\, 1\, \ldots\, 80\, \text{\tt \textbackslash n}\, 81\, \ldots\, 98, 19) \\ + 1 & \mapsto (0\, 1\, \ldots\, 79\, \text{\tt \textbackslash n}\, 80\, \ldots\, 98, 18) \\ + & \vdots + \end{align*} +\end{eg} + + +\begin{comment} \begin{code} dfstAction :: forall state input output. (Finite state, Ord state, Ord input) => DFST state input output -> input -> DFSTAction state input output -- | Smart constructor of `DFSTAction` ensuring that `Seq.length . dfstaConsumes == const 1` and that `runDFSTAction` has constant complexity -\end{code} -\begin{comment} -\begin{code} dfstAction dfst (Seq.singleton -> dfstaConsumes) = DFSTAction{..} where runDFSTAction :: state -> (Seq output, Maybe state) @@ -193,11 +119,35 @@ dfstAction dfst (Seq.singleton -> dfstaConsumes) = DFSTAction{..} \end{comment} Wir halten im Komplement der edit-lens einen Cache der monoidalen Summen aller kontinuirlichen Teillisten. -\texttt{Compositions} ist ein balancierter Binärbaum, dessen innere Knoten mit der monoidalen Summe der Annotationen aller Blätter annotiert sind. +\texttt{Compositions} ist hierbei ein balancierter Binärbaum, dessen innere Knoten mit der monoidalen Summe der Annotationen aller Blätter des jeweiligen Teilbaums annotiert sind. \begin{code} type DFSTComplement state input output = Compositions (DFSTAction state input output) \end{code} +Wir bedienen uns hierbei einer bestehenden Programmbibliothek \cite{composition-tree} um das Balancieren bei Modifikation des Baums nicht implementieren zu müssen. + +\begin{eg} + Wir definieren zunächst einen weiteren, sehr einfachen, DFST: + + \begin{figure}[H] + \centering + \pinclude{presentation/switchdfst.tex} + \caption{\label{fig:switchdfst} Ein einfacher DFST, der zwischen zwei Zustanden wechselt und Ausgabe abhängig vom aktuellen Zustand erzeugt} + \end{figure} + + Auf $s$ wechselt der DFST seinen Zustand, auf $p$ produziert er, abhängig vom aktuellen Zustand, genau ein Zeichen. + + Wir stellen die Wirkung des DFST auf den Eingabe-String $spp$ grafisch analog zur Baumstruktur von \texttt{Compositions} dar. + Wir bedienen uns hier der Darstellung von Automaten-Wirkungen als \emph{Schaltboxen} aus \cite{hofmann2011automatentheorie}, angepasst für DFSTs indem wir die Ausgabe des Automaten an den Pfaden innerhalb der Schaltbox annotieren. + + \begin{figure}[H] + \centering + \pinclude{presentation/comptree.tex} + \caption{Die Wirkung der Eingabe $spp$ auf den Automaten aus Abbildung \ref{fig:switchdfst}} + \end{figure} +\end{eg} + +\begin{comment} \begin{code} runDFSTAction' :: DFSTComplement state input output -> state -> (Seq output, Maybe state) runDFSTAction' = runDFSTAction . Comp.composed @@ -208,6 +158,7 @@ dfstaConsumes' = dfstaConsumes . Comp.composed dfstaProduces :: DFSTComplement state input output -> state -> Seq output dfstaProduces = fmap fst . runDFSTAction' \end{code} +\end{comment} Für $\Rrightarrow$ können wir die alte DFST-Wirkung zunächst anhand des Intervalls indem der input-String von allen gegebenen edits betroffen ist (\texttt{affected}) in einen unveränderten Prefix und einen womöglich betroffenen Suffix unterteilen. @@ -229,6 +180,55 @@ Die input-edits können nun wiederum, unter Beachtung der Verschiebung der indic \begin{comment} \begin{code} type LState state input output = (Natural, (state, Maybe (input, Natural))) + +instance (Ord state, Ord input, Ord output, Show state, Show input, Show output, Finite state) => Action (DFSTAction state input output) input output where + type ActionParam (DFSTAction state input output) = DFST state input output + type ActionState (DFSTAction state input output) = state + + actionFail = DFSTAction (\_ -> (Seq.empty, Nothing)) Seq.empty + + actionSingleInput = dfstAction + + actionGroundState = stInitial + actionStateFinal DFST{..} final = final `Set.member` stAccept + actionState _ act st = view _2 $ runDFSTAction' act st + actionProduces _ act st = dfstaProduces act st + + actionConsumes _ = dfstaConsumes' + + actionFindPath DFST{..} iState affNewOut fState suffix = do + let + outFST :: FST (LState state input output) input output + outFST = restrictOutput affNewOut $ toFST DFST + { stInitial = iState + , stTransition + , stAccept = Set.fromList $ do + fin <- Set.toList $ stAccept `Set.union` Set.map fst (Map.keysSet stTransition) + (suffOut, Just fin') <- return $ runDFSTAction' suffix fin + guard $ Set.member fin' stAccept + guard $ suffOut == dfstaProduces suffix fState + return fin + } + -- trace (show $ pretty outFST) $ Just () + + newPath <- + let + FST{..} = outFST + detOutgoing :: Maybe (LState state input output) -> [(LState state input output, (Maybe input, Maybe output))] + detOutgoing Nothing = concatMap detOutgoing . map Just $ toList stInitial + detOutgoing (Just st) = concatMap (\((_, inS), outs) -> map (\(st', outS) -> (st', (inS, outS))) $ toList outs) . Map.toList $ Map.filterWithKey (\(st', _) _ -> st == st') stTransition + predicate :: [(LState state input output, (Maybe input, Maybe output))] -> Maybe Bool + predicate [] + | not . Set.null $ Set.intersection stInitial stAccept = Just True + | otherwise = Nothing + predicate ((lastSt, _) : _) + | Set.member lastSt stAccept = Just True + | otherwise = Nothing + in bfs detOutgoing predicate + + -- trace (show newPath) $ Just () + + return . Seq.fromList . mapMaybe (\(_st, (inS, _outS)) -> inS) $ reverse newPath \end{code} \end{comment} \begin{code} @@ -236,130 +236,12 @@ dfstLens :: forall state input output. (Ord state, Ord input, Ord output, Show s \end{code} \begin{comment} \begin{code} -dfstLens dfst@DFST{..} = EditLens ground propR propL - where - ground :: DFSTComplement state input output - ground = mempty - - propR :: (DFSTComplement state input output, StringEdits Natural input) -> (DFSTComplement state input output, StringEdits Natural output) - propR (c, SEFail) = (c, SEFail) - propR (c, StringEdits Seq.Empty) = (c, mempty) - propR (c, lEs@(StringEdits (es :> e))) - | (_, Just final) <- runDFSTAction' c' stInitial - , final `Set.member` stAccept - = (c', rEs) - | otherwise - = (c, SEFail) - where - Just int = affected lEs - (cAffSuffix, cAffPrefix) = Comp.splitAt (Comp.length c - fromIntegral (Int.inf int)) c - (cSuffix, cPrefix) = Comp.splitAt (Comp.length c - (e ^. sePos . from enum)) c - cSuffix' - | Delete _ <- e - , Comp.length cSuffix > 0 = Comp.take (pred $ Comp.length cSuffix) cSuffix - | Insert _ nChar <- e = cSuffix <> Comp.singleton (dfstAction dfst nChar) - | otherwise = Comp.singleton $ DFSTAction (\_ -> (Seq.empty, Nothing)) Seq.empty - (c', _) = propR (cSuffix' <> cPrefix, StringEdits es) - (cAffSuffix', _) = Comp.splitAt (Comp.length c' - Comp.length cAffPrefix) c' - (_, Just pFinal) = runDFSTAction' cAffPrefix stInitial - rEs = strDiff (dfstaProduces cAffSuffix pFinal) (dfstaProduces cAffSuffix' pFinal) & stringEdits . sePos . from enum +~ length (dfstaProduces cAffPrefix stInitial) - - - propL :: (DFSTComplement state input output, StringEdits Natural output) -> (DFSTComplement state input output, StringEdits Natural input) - propL (c, StringEdits Seq.Empty) = (c, mempty) - propL (c, es) = fromMaybe (c, SEFail) $ do - -- Determine states @(iState, fState)@ at the boundary of the region affected by @es@ - ((,) <$> Int.inf <*> Int.sup -> (minAff, maxAff)) <- affected es - trace (show (minAff, maxAff)) $ Just () - let - prevTrans :: Natural -> Maybe ( DFSTComplement state input output -- ^ Run after chosen transition to accepting state - , (state, input, Seq output, state) - , DFSTComplement state input output -- ^ Run from `stInitial` up to chosen transition - ) - -- ^ Given an index in the output, find the associated transition in @c@ - prevTrans needle = do - let (after, before) = prevTrans' (c, mempty) - transSt <- view _2 $ runDFSTAction (Comp.composed before) stInitial - trace ("transSt = " ++ show transSt) $ Just () - let (after', trans) = Comp.splitAt (pred $ Comp.length after) after - DFSTAction{..} = Comp.composed trans - [inS] <- return $ toList dfstaConsumes - (outSs , Just postTransSt) <- return $ runDFSTAction transSt - return (after', (transSt, inS, outSs, postTransSt), before) - where - -- | Move monoid summands from @after@ to @before@ until first transition of @after@ produces @needle@ or @after@ is a singleton - prevTrans' (after, before) - | producedNext > needle = (after, before) - | Comp.length after == 1 = (after, before) - | otherwise = prevTrans' (after', before') - where - producedNext = fromIntegral . Seq.length . traceShowId $ dfstaProduces before' stInitial - (after', nextTrans) = Comp.splitAt (pred $ Comp.length after) after - before' = nextTrans `mappend` before - (_, (iState, _, _, _), prefix) <- prevTrans minAff - trace (show (iState, Comp.length prefix)) $ Just () - (suffix, (pfState, _, _, fState), _) <- prevTrans maxAff - trace (show (pfState, fState, Comp.length suffix)) $ Just () - - newOut <- dfstaProduces c stInitial `apply` es - let affNewOut = (\s -> Seq.take (Seq.length s - Seq.length (dfstaProduces suffix fState)) s) $ Seq.drop (Seq.length $ dfstaProduces prefix stInitial) newOut - trace (show (iState, fState, affNewOut)) $ Just () - - let outFST :: FST (LState state input output) input output - outFST = restrictOutput affNewOut $ toFST DFST - { stInitial = iState - , stTransition - , stAccept = Set.fromList $ do - fin <- Set.toList $ stAccept `Set.union` Set.map fst (Map.keysSet stTransition) - (suffOut, Just fin') <- return $ runDFSTAction' suffix fin - guard $ Set.member fin' stAccept - guard $ suffOut == dfstaProduces suffix fState - return fin - } - trace (show $ pretty outFST) $ Just () - - newPath <- - let - FST{ .. } = outFST - detOutgoing :: Maybe (LState state input output) -> [(LState state input output, (Maybe input, Maybe output))] - detOutgoing Nothing = concatMap detOutgoing . map Just $ toList stInitial - detOutgoing (Just st) = concatMap (\((_, inS), outs) -> map (\(st', outS) -> (st', (inS, outS))) $ toList outs) . Map.toList $ Map.filterWithKey (\(st', _) _ -> st == st') stTransition - predicate :: [(LState state input output, (Maybe input, Maybe output))] -> Maybe Bool - predicate [] - | not . Set.null $ Set.intersection stInitial stAccept = Just True - | otherwise = Nothing - predicate ((lastSt, _) : _) - | Set.member lastSt stAccept = Just True - | otherwise = Nothing - in bfs detOutgoing predicate - - trace (show newPath) $ Just () - - let oldIn = dfstaConsumes' . Comp.drop (Comp.length suffix) $ Comp.take (Comp.length c - Comp.length prefix) c - newIn = Seq.fromList . mapMaybe (\(_st, (inS, _outS)) -> inS) $ reverse newPath - inDiff = oldIn `strDiff` newIn - diffOffset = fromIntegral . Seq.length $ dfstaConsumes' prefix - inDiff' = inDiff & stringEdits . sePos +~ diffOffset - - trace (show (oldIn, newIn, inDiff')) $ Just () - - let affComp = Comp.fromList [ dfstAction dfst inS | (_st, (Just inS, _outS)) <- newPath ] - - return (suffix <> affComp <> prefix, inDiff') - -strDiff :: forall sym pos. (Eq sym, Integral pos) => Seq sym -> Seq sym -> StringEdits pos sym --- ^ @strDiff a b@ calculates a set of edits, which, when applied to @a@, produce @b@ -strDiff a b = snd . foldl toEdit (0, mempty) $ (getDiff `on` toList) a b - where - toEdit :: (pos, StringEdits pos sym) -> Diff sym -> (pos, StringEdits pos sym) - toEdit (n, es) (Diff.Both _ _) = (succ n, es) - toEdit (n, es) (Diff.First _ ) = (n, delete n <> es) - toEdit (n, es) (Diff.Second c) = (succ n, insert n c <> es) +dfstLens = treeLens -- | Generic breadth-first search bfs :: forall state transition. Ord state => (Maybe state -> [(state, transition)]) -- ^ Find outgoing edges - -> ([(state, transition)] {- ^ Reverse path -} -> Maybe Bool {- ^ Continue search, finish successfully, or abort search on this branch -}) -- ^ Search predicate + -> ([(state, transition)] {- Reverse path -} -> Maybe Bool {- Continue search, finish successfully, or abort search on this branch -}) -- ^ Search predicate -> Maybe [(state, transition)] -- ^ Reverse path bfs outgoing predicate | Just True <- emptyRes = Just [] @@ -378,59 +260,46 @@ bfs outgoing predicate Nothing -> bfs' visited' $ cs <> Seq.fromList (map (: c) . filter (\(st, _) -> not $ Set.member st visited) . outgoing $ Just lastSt) where visited' = Set.insert lastSt visited -\end{code} -\end{comment} -Um eine obere Schranke an das von einer Serie von edits betroffene Intervall zu bestimmen ordnen wir zunächst jeder von mindestens einem atomaren edit betroffenen Position $n$ im Eingabe-Wort einen $\text{offset}_n = \text{\# deletions} - \text{\# inserts}$ zu. -Das gesuchte Intervall ist nun $(\text{minK}, \text{maxK})$, mit $\text{minK}$ der Position im Eingabe-Wort mit niedrigstem $\text{offset}$ und $\text{maxK}$ die Position im Eingabe-Wort mit höchstem $\text{offset}$, $\text{maxK}^\prime$, modifiziert um das Maximum aus $\{ 0 \} \cup \{ \text{maxK}_n \colon n \in \{ 0 \ldots \text{maxK}^\prime \} \}$ wobei $\text{maxK}_n = -1 \cdot (n + \text{offset}_n)$ an Position $n$ ist. +-- trace :: String -> a -> a +-- {-# NOINLINE trace #-} +-- trace str y = flip seq y . unsafePerformIO . withFile "lens.log" AppendMode $ \h -> +-- hPutStrLn h str -\begin{code} -affected :: forall char. StringEdits Natural char -> Maybe (Interval Natural) --- ^ For a given set of edits @es@ return the interval @i = a ... b@ such that for any given string @str@ of sufficient length the following holds: --- --- - For all @n :: Natural@: @n < a ==> str ! n == (str `apply` es) ! n@ --- - There exists a @k :: Integer@ such that for all @n :: Integer@: @n > b ==> str ! (n + k) == (str `apply` es) ! n@ --- --- Intuitively: for any character @c@ of the new string @str `apply` es@ there exists a corresponding character in @str@ (offset by either 0 or a constant shift @k@) if the index of @c@ is /not/ contained in @affected es@. -\end{code} -\begin{comment} -\begin{code} -affected SEFail = Nothing -affected (StringEdits es) = Just . toInterval $ go es Map.empty - where - toInterval :: Map Natural Integer -> Interval Natural - toInterval map - | Just (((minK, _), _), ((maxK, _), _)) <- (,) <$> Map.minViewWithKey map <*> Map.maxViewWithKey map - = let - maxV' = maximum . (0 :) $ do - offset <- [0..maxK] - v <- maybeToList $ Map.lookup (maxK - offset) map - v' <- maybeToList . fmap fromInteger $ negate v <$ guard (v <= 0) - guard $ v' >= succ offset - return $ v' - offset - in (minK Int.... maxK + maxV') - | otherwise - = Int.empty - go :: Seq (StringEdit Natural char) -> Map Natural Integer -> Map Natural Integer - go Seq.Empty offsets = offsets - go (es :> e) offsets = go es offsets' - where - p = e ^. sePos - -- p' = fromIntegral $ Map.foldrWithKey (\k o p -> bool (fromIntegral p) (o + p) $ k < fromIntegral p) (fromIntegral p) offsets - offsets' = Map.alter (Just . myOffset . fromMaybe 0) p offsets - myOffset :: Integer -> Integer - myOffset - | Insert _ _ <- e = pred - | Delete _ <- e = succ - -trace :: String -> a -> a -{-# NOINLINE trace #-} -trace str y = flip seq y . unsafePerformIO . withFile "lens.log" AppendMode $ \h -> - hPutStrLn h str - -traceShowId :: Show a => a -> a -traceShowId x = trace (show x) x - - +-- traceShowId :: Show a => a -> a +-- traceShowId x = trace (show x) x \end{code} \end{comment} + +\begin{eg} + Wir wollen einen edit $e^\prime = \rho_{80}$ anwenden auf das Ergebnis $w^\prime$ der Ausführung des Zeilenumbruch-DFST aus Beispiel \ref{eg:linebreak} auf das Wort $w = 0\, 1\, \ldots\, 80\, \text{\tt \textbackslash n}\, 81\, \ldots\, 98$ aus Beispiel \ref{eg:w100} und dabei $e^\prime$ mittels $\Lleftarrow$ nach links propagieren und den assoziierten edit $e$ auf $w$ konstruieren. + + \begin{equation*} + w^\prime = w = 0\, 1\, \ldots\, 80\, \text{\textbackslash n}\, 81\, \ldots\, 98 + \end{equation*} + + Das von $e$ betroffene Intervall von $w^\prime$ ist $(80, 80)$. + + Das Komplement nach Anwendung des DFST auf $w$ ist, wie in Beispiel \ref{eg:linebreakonw100} dargelegt: + + \begin{align*} + \text{act} & \colon Q \to \Delta^\star \times \{ \bot \} \cup Q \\ + 0 & \mapsto (0\, 1\, \ldots\, 80\, \text{\tt \textbackslash n}\, 81\, \ldots\, 98, 19) \\ + 1 & \mapsto (0\, 1\, \ldots\, 79\, \text{\tt \textbackslash n}\, 80\, \ldots\, 98, 18) \\ + & \vdots + \end{align*} + + Wir unterteilen $\text{act}$ an $(80, 80)$ in $\text{act}_\text{R} \circ \text{act}_e \circ \text{act}_\text{L}$: + + \begin{align*} + \text{act}_\text{R} & \colon Q \to \Delta^\star \times \{ \bot \} \cup Q \\ + n & \mapsto (81\, \ldots\, 98, \min ( 80, n + 19 ) ) \\ + & \\ + \text{act}_e & \colon Q \to \Delta^\star \times \{ \bot \} \cup Q \\ + n & \mapsto ( \text{\tt \textbackslash n}, 0 ) \\ + & \\ + \text{act}_\text{L} & \colon Q \to \Delta^\star \times \{ \bot \} \cup Q \\ + n & \mapsto (0\, 1\, \ldots\, 80, 80) \\ + \text{other} & \mapsto (\epsilon, \bot) + \end{align*} +\end{eg} diff --git a/edit-lens/src/Control/DFST/Lens.lhs.tex b/edit-lens/src/Control/DFST/Lens.lhs.tex new file mode 120000 index 0000000..09aa15c --- /dev/null +++ b/edit-lens/src/Control/DFST/Lens.lhs.tex @@ -0,0 +1 @@ +Lens.lhs \ No newline at end of file diff --git a/edit-lens/src/Control/Edit.lhs b/edit-lens/src/Control/Edit.lhs index 8c4f045..ba4b8e6 100644 --- a/edit-lens/src/Control/Edit.lhs +++ b/edit-lens/src/Control/Edit.lhs @@ -6,6 +6,8 @@ module Control.Edit \end{code} \end{comment} +Um das Intuitive Verhalten von Änderungen auf Texten\footnote{Im folgenden \emph{edits}} und ihre interne algebraische Struktur zu fassen formalisieren wir sie als \emph{Moduln}: + \begin{defn}[Moduln] Ein Modul $M$ ist eine \emph{partielle Monoidwirkung} zusammen mit einem schwach-initialen Element\footnote{Gemeint ist hier die übliche Definition von \emph{schwach-initial} aus der Kategorientheorie—ein Modul $M$ bildet eine Kategorie mit Objekten aus $\Dom M$ und Morphismen von $x$ nach $y$ den Monoidelementen $\partial x \in \partial M$ sodass $x \cdot \partial x = y$} (bzgl. der Monoidwirkung) auf dem Träger, d.h. $M = (\Dom M, \partial M, \init_M)$ ist ein Tupel aus einer Trägermenge $\Dom M$, einem Monoid $\partial M$ zusammen mit mit einer partiellen Funktion $\cdot \colon \Dom M \times \partial M \to \Dom$, die \emph{kompatibel} ist mit der Monoid-Struktur: diff --git a/edit-lens/src/Control/Edit.lhs.tex b/edit-lens/src/Control/Edit.lhs.tex new file mode 120000 index 0000000..33d3341 --- /dev/null +++ b/edit-lens/src/Control/Edit.lhs.tex @@ -0,0 +1 @@ +Edit.lhs \ No newline at end of file diff --git a/edit-lens/src/Control/Edit/String.lhs b/edit-lens/src/Control/Edit/String.lhs new file mode 100644 index 0000000..c1411cf --- /dev/null +++ b/edit-lens/src/Control/Edit/String.lhs @@ -0,0 +1,120 @@ +\begin{comment} +\begin{code} +{-# LANGUAGE TemplateHaskell +#-} + +module Control.Edit.String + ( StringEdit(..), sePos, seInsertion + , StringEdits(..), _StringEdits, _SEFail, stringEdits + , insert, delete, replace + ) where + +import Control.Lens +import Control.Lens.TH + +import Control.Edit + +import Numeric.Natural + +import Data.Sequence (Seq((:<|), (:|>))) +import qualified Data.Sequence as Seq + +import Data.Monoid +\end{code} +\end{comment} + +\begin{defn}[Atomare edits of strings] +Wir betrachten, zur Einfachheit, ein minimiales Set von Edits auf Strings\footnote{Wie in der Konstruktion zum Longest Common Subsequence Problem}, bestehend nur aus Einfügung eines einzelnen Zeichens $\sigma$ an einer bestimmten Position $\iota_n(\sigma)$ und löschen des Zeichens an einer einzelnen Position $\rho_n$: + +\begin{code} +data StringEdit pos char = Insert { _sePos :: pos, _seInsertion :: char } + | Delete { _sePos :: pos } + deriving (Eq, Ord, Show, Read) + +-- Automatically derive van-leerhoven-lenses: +-- +-- @sePos :: Lens' (StringEdits pos char) pos@ +-- @seInsertion :: Traversal' (StringEdits pos char) char@ +makeLenses ''StringEdit +\end{code} + +Atomare edits werden, als Liste, zu edits komponiert. +Wir führen einen speziellen edit ein, der nicht-Anwendbarkeit der edits repräsentiert: +\begin{code} +data StringEdits pos char = StringEdits (Seq (StringEdit pos char)) + | SEFail + deriving (Eq, Ord, Show, Read) + +makePrisms ''StringEdits + +stringEdits :: Traversal (StringEdits pos char) (StringEdits pos' char') (StringEdit pos char) (StringEdit pos' char') +\end{code} +\end{defn} + +\begin{comment} +\begin{code} +stringEdits = _StringEdits . traverse + +insert :: pos -> char -> StringEdits pos char +insert n c = StringEdits . Seq.singleton $ Insert n c + +delete :: pos -> StringEdits pos char +delete n = StringEdits . Seq.singleton $ Delete n + +replace :: Eq pos => pos -> char -> StringEdits pos char +replace n c = insert n c <> delete n + +-- | Rudimentarily optimize edit composition +instance Eq pos => Monoid (StringEdits pos char) where + mempty = StringEdits Seq.empty + SEFail `mappend` _ = SEFail + _ `mappend` SEFail = SEFail + (StringEdits Seq.Empty) `mappend` x = x + x `mappend` (StringEdits Seq.Empty) = x + (StringEdits x@(bs :|> b)) `mappend` (StringEdits y@(a :<| as)) + | (Insert n _) <- a + , (Delete n') <- b + , n == n' + = StringEdits bs `mappend` StringEdits as + | otherwise = StringEdits $ x `mappend` y +\end{code} +\end{comment} + +Da wir ein minimales Set an atomaren edits gewählt haben, ist die Definiton der Modulnstruktur über Strings des passenden Alphabets recht einfach: +\begin{code} +instance Module (StringEdits Natural char) where + type Domain (StringEdits Natural char) = Seq char + apply str SEFail = Nothing + apply str (StringEdits Seq.Empty) = Just str + apply str (StringEdits (es :|> Insert n c)) = flip apply (StringEdits es) =<< go str n c + where + go str (fromIntegral -> n) c + | Seq.length str >= n + = Just $ Seq.insertAt n c str + | otherwise + = Nothing + apply str (StringEdits (es :|> Delete n)) = flip apply (StringEdits es) =<< go str n + where + go str (fromIntegral -> n) + | Seq.length str > n + = Just $ Seq.deleteAt n str + | otherwise + = Nothing + + init = Seq.empty + divInit = StringEdits . Seq.unfoldl go . (0,) + where + go (_, Seq.Empty) = Nothing + go (n, c :<| cs ) = Just ((succ n, cs), Insert n c) + +\end{code} + +\begin{eg} + Wir wenden $\iota_{80}(100) \rho_{80}$ auf das Wort $w$ aus Beispiel~\ref{eg:w100} an: + + \begin{align*} + w^\prime & = w \cdot \iota_{80}(100) \rho_{80} \\ + & = 0\, 1\, \ldots\, 80\, 100\, 81\, 82\, \ldots\, 98 + \end{align*} +\end{eg} + diff --git a/edit-lens/src/Control/Edit/String.lhs.tex b/edit-lens/src/Control/Edit/String.lhs.tex new file mode 120000 index 0000000..6a78642 --- /dev/null +++ b/edit-lens/src/Control/Edit/String.lhs.tex @@ -0,0 +1 @@ +String.lhs \ No newline at end of file diff --git a/edit-lens/src/Control/Edit/String/Affected.lhs b/edit-lens/src/Control/Edit/String/Affected.lhs new file mode 100644 index 0000000..851267b --- /dev/null +++ b/edit-lens/src/Control/Edit/String/Affected.lhs @@ -0,0 +1,73 @@ +\begin{comment} +\begin{code} +module Control.Edit.String.Affected + ( affected + ) where + +import Control.Lens +import Control.Lens.TH + +import Control.Edit +import Control.Edit.String + +import Numeric.Natural +import Numeric.Interval (Interval, (...)) +import qualified Numeric.Interval as Int + +import Data.Sequence (Seq((:<|), (:|>))) +import qualified Data.Sequence as Seq + +import Data.Map.Lazy (Map) +import qualified Data.Map.Lazy as Map + +import Data.Monoid + +import Control.Monad + +import Data.Maybe (fromMaybe, maybeToList, listToMaybe, catMaybes, isNothing, isJust, mapMaybe) +\end{code} +\end{comment} + +Um eine obere Schranke an das von einer Serie von edits betroffene Intervall zu bestimmen ordnen wir zunächst jeder von mindestens einem atomaren edit betroffenen Position $n$ im Eingabe-Wort einen $\text{offset}_n = \text{\# deletions} - \text{\# inserts}$ zu. +Das gesuchte Intervall ist nun $(\text{minK}, \text{maxK})$, mit $\text{minK}$ der Position im Eingabe-Wort mit niedrigstem $\text{offset}$ und $\text{maxK}$ die Position im Eingabe-Wort mit höchstem $\text{offset}$, $\text{maxK}^\prime$, modifiziert um das Maximum aus $\{ 0 \} \cup \{ \text{maxK}_n \colon n \in \{ 0 \ldots \text{maxK}^\prime \} \}$ wobei $\text{maxK}_n = -1 \cdot (n + \text{offset}_n)$ an Position $n$ ist. + +\begin{code} +affected :: forall char. StringEdits Natural char -> Maybe (Interval Natural) +-- ^ For a given set of edits @es@ return the interval @i = a ... b@ such that for any given string @str@ of sufficient length the following holds: +-- +-- - For all @n :: Natural@: @n < a ==> str ! n == (str `apply` es) ! n@ +-- - There exists a @k :: Integer@ such that for all @n :: Integer@: @n > b ==> str ! (n + k) == (str `apply` es) ! n@ +-- +-- Intuitively: for any character @c@ of the new string @str `apply` es@ there exists a corresponding character in @str@ (offset by either 0 or a constant shift @k@) if the index of @c@ is /not/ contained in @affected es@. +\end{code} +\begin{comment} +\begin{code} +affected SEFail = Nothing +affected (StringEdits es) = Just . toInterval $ go es Map.empty + where + toInterval :: Map Natural Integer -> Interval Natural + toInterval map + | Just (((minK, _), _), ((maxK, _), _)) <- (,) <$> Map.minViewWithKey map <*> Map.maxViewWithKey map + = let + maxV' = maximum . (0 :) $ do + offset <- [0..maxK] + v <- maybeToList $ Map.lookup (maxK - offset) map + v' <- maybeToList . fmap fromInteger $ negate v <$ guard (v <= 0) + guard $ v' >= succ offset + return $ v' - offset + in (minK Int.... maxK + maxV') + | otherwise + = Int.empty + go :: Seq (StringEdit Natural char) -> Map Natural Integer -> Map Natural Integer + go Seq.Empty offsets = offsets + go (es :> e) offsets = go es offsets' + where + p = e ^. sePos + -- p' = fromIntegral $ Map.foldrWithKey (\k o p -> bool (fromIntegral p) (o + p) $ k < fromIntegral p) (fromIntegral p) offsets + offsets' = Map.alter (Just . myOffset . fromMaybe 0) p offsets + myOffset :: Integer -> Integer + myOffset + | Insert _ _ <- e = pred + | Delete _ <- e = succ +\end{code} +\end{comment} diff --git a/edit-lens/src/Control/Edit/String/Affected.lhs.tex b/edit-lens/src/Control/Edit/String/Affected.lhs.tex new file mode 120000 index 0000000..b85cc7a --- /dev/null +++ b/edit-lens/src/Control/Edit/String/Affected.lhs.tex @@ -0,0 +1 @@ +Affected.lhs \ No newline at end of file diff --git a/edit-lens/src/Control/FST.lhs b/edit-lens/src/Control/FST.lhs index 9357da7..9739adc 100644 --- a/edit-lens/src/Control/FST.lhs +++ b/edit-lens/src/Control/FST.lhs @@ -77,7 +77,9 @@ data FST state input output = FST & \cup \{ (80, \sigma, \sigma, 80) \mid \sigma \in \Sigma \mysetminus \{ \text{\tt ' '}, \text{\tt \textbackslash n} \} \} \end{align*} - \begin{figure}[p] + Siehe auch Abbildung~\ref{fig:linebreak} + + \begin{figure}[h] \centering \begin{tikzpicture}[->,auto,node distance=5cm] \node[initial,state,accepting] (0) {$0$}; @@ -101,7 +103,7 @@ data FST state input output = FST \draw[-] (rest)--(i.north); \draw[-] (rest)--(si.west); \end{tikzpicture} - \caption{Beispiel \ref{eg:linebreak} dargestellt als Graph} + \caption{\label{fig:linebreak} Ein Transducer der, durch Übersetzung zwischen Leerzeichen und Zeilenumbrüchen, sicher stellt, dass jede Zeile eines Texts mindestens 80 Zeichen hat} \end{figure} \end{eg} @@ -314,7 +316,9 @@ wordFST inps outs = FST & \cup \{ (i, \sigma, \epsilon, i) \mid \sigma \in \Sigma \cup \{ \epsilon \}, i \in Q \mysetminus \{ 99 \} \} \end{align*} - \begin{figure}[p] + Siehe auch Abbildung~\ref{fig:w100}. + + \begin{figure}[h] \centering \begin{tikzpicture}[->,auto,node distance=5cm] \node[initial,state] (0) {$0$}; @@ -323,11 +327,11 @@ wordFST inps outs = FST \path (0) edge [loop above] node {$\{(\sigma, \epsilon) \mid \sigma \in \Sigma \cup \{ \epsilon \} \}$} (0) edge node {$\{ (\sigma, 1) \mid \sigma \in \Sigma \cup \{ \epsilon \} \}$} (rest) - (rest) edge node {$\{ \sigma, 100 \} \mid \sigma \in \Sigma \cup \{ \epsilon \}$} (99) + (rest) edge node {$\{ (\sigma, 98) \mid \sigma \in \Sigma \cup \{ \epsilon \} \}$} (99) (99) edge [loop above] node {$\{(\sigma, \epsilon) \mid \sigma \in \Sigma \}$} (99); \end{tikzpicture} - \caption{Beispiel \ref{eg:w100} dargestellt als Graph} + \caption{\label{fig:w100} Der Wort-FST eines längeren Wortes} \end{figure} \end{eg} @@ -351,7 +355,9 @@ restrictOutput :: forall state input output. (Ord state, Ord input, Ord output) & \cup \{ ((80, 80), \text{\tt \textbackslash n}, \text{\tt \textbackslash n}, (81, 0)) \} \end{align*} - \begin{figure}[p] + Siehe auch Abbildung~\ref{fig:l80timesw100}. + + \begin{figure}[h] \centering \begin{tikzpicture}[->,auto,node distance=5cm] \node[initial,state] (0) {$0_{W_w}\, 0_{L_{80}}$}; @@ -368,12 +374,12 @@ restrictOutput :: forall state input output. (Ord state, Ord input, Ord output) (rest2) edge node {$(98, 98)$} (99); \end{tikzpicture} - \caption{Beispiel \ref{eg:l80timesw100} dargestellt als Graph} + \caption{\label{fig:l80timesw100} Die Einschränkung des Automaten aus Abbildung \ref{fig:linebreak} auf das Wort aus Abbildung \ref{fig:w100}} \end{figure} \end{eg} \begin{rem} - Es ist bemerkenswert, dass in Beispiel \ref{eg:l80timesw100} die Zirkuläre Struktur von $L_80$ durch Produkt mit einem Wort verloren geht. + Es ist bemerkenswert, dass in Beispiel \ref{eg:l80timesw100} die Zirkuläre Struktur von $L_{80}$ durch Produkt mit einem Wort verloren geht. I.\@A.\@ ist das Produkt eines beliebigen FST mit einem Wort-FST zwar nicht azyklisch, erbt jedoch die lineare Struktur des Wort-FST in dem Sinne, dass Fortschritt in Richtung der akzeptierenden Zustände nur möglich ist indem der $(i, \sigma, w_i, i + 1)$-Klasse von Transitionen des Wort-FSTs gefolgt wird. \end{rem} diff --git a/edit-lens/src/Control/FST.lhs.tex b/edit-lens/src/Control/FST.lhs.tex new file mode 120000 index 0000000..b6f3a8f --- /dev/null +++ b/edit-lens/src/Control/FST.lhs.tex @@ -0,0 +1 @@ +FST.lhs \ No newline at end of file diff --git a/edit-lens/src/Control/Lens/Edit.lhs b/edit-lens/src/Control/Lens/Edit.lhs index 96b2114..6561528 100644 --- a/edit-lens/src/Control/Lens/Edit.lhs +++ b/edit-lens/src/Control/Lens/Edit.lhs @@ -12,7 +12,7 @@ import Control.Edit \end{comment} \begin{defn}[Zustandsbehaftete Monoidhomomorphismen] -Gegeben eine Menge von Komplementen $C$ und Monoiden $M$ und $N$ nennen wir eine partielle Funktion $\psi \colon C \times M \to C \times N$ einen zustandsbehafteten Monoidhomomorphismus wenn sie den folgenden Ansprüchen genügt: +Gegeben eine Menge $C$ von \emph{Komplementen} und zwei Monoiden $M$ und $N$ nennen wir eine partielle Funktion $\psi \colon C \times M \to C \times N$ einen zustandsbehafteten Monoidhomomorphismus wenn sie den folgenden Ansprüchen genügt: \begin{itemize} \item $\forall c \in C \colon \psi(1_M, c) = (1_N, c)$ @@ -28,7 +28,7 @@ type StateMonoidHom s m n = (s, m) -> (s, n) \end{defn} \begin{defn}[edit-lenses] -Für Moduln $M$ und $N$ besteht eine symmetrische edit-lens zwischen $M$ und $N$ aus zwei zustandsbehafteten Monoidhomomorphismen $\Rrightarrow \colon C \times \partial M \to C \times \partial N$ und $\Lleftarrow \colon C \times \partial N \to C \times \partial M$, mit kompatiblem Komplement $C$, einem ausgezeichneten Element $\ground_C$ und einer \emph{Konsistenzrelation} $K \subset \Dom M \times C \times \Dom N$ sodass gilt: +Für Moduln $M$ und $N$ besteht eine symmetrische edit-lens zwischen $M$ und $N$ aus zwei zustandsbehafteten Monoidhomomorphismen $\Rrightarrow \colon C \times \partial M \to C \times \partial N$ und $\Lleftarrow \colon C \times \partial N \to C \times \partial M$, mit kompatiblem Komplement $C$, einem ausgezeichneten Element $\ground_C \in C$ und einer \emph{Konsistenzrelation} $K \subset \Dom M \times C \times \Dom N$ sodass gilt: \begin{itemize} \item $(\init_M, \ground_C, \init_N) \in K$ @@ -67,7 +67,7 @@ instance (Module m, Module n) => HasEditLens (EditLens c m n) m n where \end{code} \end{defn} -\paragraph{Kompatibilität mit bestehenden lens frameworks} +\subsection{Kompatibilität mit bestehenden lens frameworks} Das einschlägige bestehende lens framework \cite{lens} konstruiert seine Linsen alá \citeauthor{laarhoven} wie folgt: diff --git a/edit-lens/src/Control/Lens/Edit.lhs.tex b/edit-lens/src/Control/Lens/Edit.lhs.tex new file mode 120000 index 0000000..33d3341 --- /dev/null +++ b/edit-lens/src/Control/Lens/Edit.lhs.tex @@ -0,0 +1 @@ +Edit.lhs \ No newline at end of file diff --git a/edit-lens/src/Control/Lens/Edit/ActionTree.lhs b/edit-lens/src/Control/Lens/Edit/ActionTree.lhs new file mode 100644 index 0000000..6632dce --- /dev/null +++ b/edit-lens/src/Control/Lens/Edit/ActionTree.lhs @@ -0,0 +1,204 @@ +\begin{comment} +\begin{code} +{-# LANGUAGE ScopedTypeVariables + , TypeApplications + , TypeFamilyDependencies + #-} + +module Control.Lens.Edit.ActionTree + ( Action(..) + , treeLens + ) where + +import Control.Edit +import Control.Edit.String +import Control.Edit.String.Affected +import Control.Lens.Edit + +import Control.Lens + +import Numeric.Natural +import Numeric.Interval (Interval, (...)) +import qualified Numeric.Interval as Int + +import Data.Compositions (Compositions) +import qualified Data.Compositions as Comp + +import Data.Algorithm.Diff (Diff, getDiff) +import qualified Data.Algorithm.Diff as Diff + +import Data.Sequence (Seq((:<|), (:|>))) +import qualified Data.Sequence as Seq +import Data.Set (Set) +import qualified Data.Set as Set + +import Data.Monoid +import Data.Function (on) +import Data.Foldable (toList) +import Data.Maybe (fromMaybe) + +import System.IO (Handle, hPutStrLn, IOMode(AppendMode), withFile) +import System.IO.Unsafe +\end{code} +\end{comment} + +Das beschrieben Verfahren wurde prinzipiell agnostisch in Bezug auf die konkret gewählte Parser-Konstruktion gewählt. + +Hierfür wurden die benötigten Operationen auf der DFST-Wirkung und das in $\Lleftarrow$ verwendete Suchschema abstrakt als Typklasse angegeben: + +\begin{code} +class Monoid action => Action action input output | action -> input, action -> output where +\end{code} +\begin{comment} +\begin{code} + -- | Most operations of `Action` permit access to some underlying description of the parser (i.e. an automaton) + type ActionParam action = param | param -> action + + -- | A full capture of the Parser-State (i.e. a state for a given automaton) + type ActionState action :: * + + -- | `mempty` should be neutral under `(<>)`, `actionFail` should be absorptive + actionFail :: action + + -- | Construct an @action@ from a single character of input + actionSingleInput :: ActionParam action -> input -> action + -- | Initial state of the parser + actionGroundState :: ActionParam action -> ActionState action + -- | Is a certain state acceptable as final? + actionStateFinal :: ActionParam action -> ActionState action -> Bool + -- | Run an @action@ (actually a binary tree thereof, use `Comp.composed` to extract the root) on a given state + actionState :: ActionParam action -> Compositions action -> ActionState action -> Maybe (ActionState action) + -- | What @output@ does running an @action@ on a given state produce? + actionProduces :: ActionParam action -> Compositions action -> ActionState action -> Seq output + -- | What @input@ does running an @action@ on a given state consume? + actionConsumes :: ActionParam action -> Compositions action -> Seq input + + -- | Find a new string of @input@-symbols to travel between the given states while producing exactly the given @output@ + -- + -- @actionFindPath@ also has access to the remaining action to be run after it's new @input@ has been consumed. + -- This is necessary to further restrict the considered paths in such a way that the resulting run as a whole is acceptable in the sense of `actionStateFinal`. + actionFindPath :: ActionParam action + -> ActionState action -- ^ From + -> Seq output -- ^ New output to be produced + -> ActionState action -- ^ To + -> Compositions action -- ^ Suffix + -> Maybe (Seq input) +\end{code} +\end{comment} + +Das Verfahren kann nun auf andere Sorten von Parser angewendet werden, indem nur die oben aufgeführte \texttt{Action}-Typklasse implementiert wird: + +\begin{code} +treeLens :: forall action input output. + ( Ord input, Ord output + , Show input, Show output + , Action action input output + , Show (ActionState action) + ) => ActionParam action -> EditLens (Compositions action) (StringEdits Natural input) (StringEdits Natural output) +\end{code} +\begin{comment} +\begin{code} +treeLens param = EditLens ground propR propL + where + ground :: Compositions action + ground = mempty + + propR :: (Compositions action, StringEdits Natural input) + -> (Compositions action, StringEdits Natural output) + propR (c, SEFail) = (c, SEFail) + propR (c, StringEdits Seq.Empty) = (c, mempty) + propR (c, lEs@(StringEdits (es :> e))) + | Just final <- actionState param c' $ actionGroundState @action param + , actionStateFinal param final + = (c', rEs) + | otherwise + = (c, SEFail) + where + Just int = affected lEs + (cAffSuffix, cAffPrefix) = Comp.splitAt (Comp.length c - fromIntegral (Int.inf int)) c + (cSuffix, cPrefix) = Comp.splitAt (Comp.length c - (e ^. sePos . from enum)) c + cSuffix' + | Delete _ <- e + , Comp.length cSuffix > 0 = Comp.take (pred $ Comp.length cSuffix) cSuffix + | Insert _ nChar <- e = cSuffix <> Comp.singleton (actionSingleInput param nChar) + | otherwise = Comp.singleton actionFail + (c', _) = propR (cSuffix' <> cPrefix, StringEdits es) + (cAffSuffix', _) = Comp.splitAt (Comp.length c' - Comp.length cAffPrefix) c' + Just pFinal = actionState param cAffPrefix $ actionGroundState @action param + rEs = strDiff (actionProduces param cAffSuffix pFinal) (actionProduces param cAffSuffix' pFinal) & stringEdits . sePos . from enum +~ length (actionProduces param cAffPrefix $ actionGroundState @action param) + + propL :: (Compositions action, StringEdits Natural output) + -> (Compositions action, StringEdits Natural input) + propL (c, StringEdits Seq.Empty) = (c, mempty) + propL (c, es) = fromMaybe (c, SEFail) $ do + -- Determine states @(iState, fState)@ at the boundary of the region affected by @es@ + ((,) <$> Int.inf <*> Int.sup -> (minAff, maxAff)) <- affected es + trace (show (minAff, maxAff)) $ Just () + let + prevTrans :: Natural -> Maybe ( Compositions action {- Run after chosen transition to accepting state -} + , (ActionState action, input, Seq output, ActionState action) + , Compositions action {- Run from `stInitial` up to chosen transition -} + ) + -- ^ Given an index in the output, find the associated transition in @c@ + prevTrans needle = do + let (after, before) = prevTrans' (c, mempty) + transSt <- actionState param before $ actionGroundState @action param + trace ("transSt = " ++ show transSt) $ Just () + let (after', trans) = Comp.splitAt (pred $ Comp.length after) after + [inS] <- return . toList $ actionConsumes param trans + Just postTransSt <- return $ actionState param trans transSt + outSs <- return $ actionProduces param trans transSt + return (after', (transSt, inS, outSs, postTransSt), before) + where + -- | Move monoid summands from @after@ to @before@ until first transition of @after@ produces @needle@ or @after@ is a singleton + prevTrans' :: (Compositions action, Compositions action) + -> (Compositions action, Compositions action) + prevTrans' (after, before) + | producedNext > needle = (after, before) + | Comp.length after == 1 = (after, before) + | otherwise = prevTrans' (after', before') + where + producedNext = fromIntegral . Seq.length . traceShowId . actionProduces param before' $ actionGroundState @action param + (after', nextTrans) = Comp.splitAt (pred $ Comp.length after) after + before' = nextTrans `mappend` before + (_, (iState, _, _, _), prefix) <- prevTrans minAff + trace (show (iState, Comp.length prefix)) $ Just () + (suffix, (pfState, _, _, fState), _) <- prevTrans maxAff + trace (show (pfState, fState, Comp.length suffix)) $ Just () + + newOut <- actionProduces param c (actionGroundState @action param) `apply` es + let affNewOut = (\s -> Seq.take (Seq.length s - Seq.length (actionProduces param suffix fState)) s) $ Seq.drop (Seq.length . actionProduces param prefix $ actionGroundState @action param) newOut + trace (show (iState, fState, affNewOut)) $ Just () + + newIn <- actionFindPath param iState affNewOut fState suffix + + let oldIn = actionConsumes param . Comp.drop (Comp.length suffix) $ Comp.take (Comp.length c - Comp.length prefix) c + inDiff = oldIn `strDiff` newIn + diffOffset = fromIntegral . Seq.length $ actionConsumes param prefix + inDiff' = inDiff & stringEdits . sePos +~ diffOffset + + trace (show (oldIn, newIn, inDiff')) $ Just () + + let affComp = Comp.fromList $ actionSingleInput param <$> toList newIn + + return (suffix <> affComp <> prefix, inDiff') + + +strDiff :: forall sym pos. (Eq sym, Integral pos) => Seq sym -> Seq sym -> StringEdits pos sym +-- ^ @strDiff a b@ calculates a set of edits, which, when applied to @a@, produce @b@ +strDiff a b = snd . foldl toEdit (0, mempty) $ (getDiff `on` toList) a b + where + toEdit :: (pos, StringEdits pos sym) -> Diff sym -> (pos, StringEdits pos sym) + toEdit (n, es) (Diff.Both _ _) = (succ n, es) + toEdit (n, es) (Diff.First _ ) = (n, delete n <> es) + toEdit (n, es) (Diff.Second c) = (succ n, insert n c <> es) + +trace :: String -> a -> a +{-# NOINLINE trace #-} +trace str y = flip seq y . unsafePerformIO . withFile "lens.log" AppendMode $ \h -> + hPutStrLn h str + +traceShowId :: Show a => a -> a +traceShowId x = trace (show x) x +\end{code} +\end{comment} diff --git a/edit-lens/src/Control/Lens/Edit/ActionTree.lhs.tex b/edit-lens/src/Control/Lens/Edit/ActionTree.lhs.tex new file mode 120000 index 0000000..6e3c68c --- /dev/null +++ b/edit-lens/src/Control/Lens/Edit/ActionTree.lhs.tex @@ -0,0 +1 @@ +ActionTree.lhs \ No newline at end of file -- cgit v1.2.3