From f4c419b9ddec15bad267a4463f0720d6e28042d2 Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Thu, 30 May 2019 12:18:08 +0200 Subject: Further work --- edit-lens/src/Control/Edit/String.lhs | 120 +++++++++++++++++++++ edit-lens/src/Control/Edit/String.lhs.tex | 1 + edit-lens/src/Control/Edit/String/Affected.lhs | 73 +++++++++++++ edit-lens/src/Control/Edit/String/Affected.lhs.tex | 1 + 4 files changed, 195 insertions(+) create mode 100644 edit-lens/src/Control/Edit/String.lhs create mode 120000 edit-lens/src/Control/Edit/String.lhs.tex create mode 100644 edit-lens/src/Control/Edit/String/Affected.lhs create mode 120000 edit-lens/src/Control/Edit/String/Affected.lhs.tex (limited to 'edit-lens/src/Control/Edit') diff --git a/edit-lens/src/Control/Edit/String.lhs b/edit-lens/src/Control/Edit/String.lhs new file mode 100644 index 0000000..c1411cf --- /dev/null +++ b/edit-lens/src/Control/Edit/String.lhs @@ -0,0 +1,120 @@ +\begin{comment} +\begin{code} +{-# LANGUAGE TemplateHaskell +#-} + +module Control.Edit.String + ( StringEdit(..), sePos, seInsertion + , StringEdits(..), _StringEdits, _SEFail, stringEdits + , insert, delete, replace + ) where + +import Control.Lens +import Control.Lens.TH + +import Control.Edit + +import Numeric.Natural + +import Data.Sequence (Seq((:<|), (:|>))) +import qualified Data.Sequence as Seq + +import Data.Monoid +\end{code} +\end{comment} + +\begin{defn}[Atomare edits of strings] +Wir betrachten, zur Einfachheit, ein minimiales Set von Edits auf Strings\footnote{Wie in der Konstruktion zum Longest Common Subsequence Problem}, bestehend nur aus Einfügung eines einzelnen Zeichens $\sigma$ an einer bestimmten Position $\iota_n(\sigma)$ und löschen des Zeichens an einer einzelnen Position $\rho_n$: + +\begin{code} +data StringEdit pos char = Insert { _sePos :: pos, _seInsertion :: char } + | Delete { _sePos :: pos } + deriving (Eq, Ord, Show, Read) + +-- Automatically derive van-leerhoven-lenses: +-- +-- @sePos :: Lens' (StringEdits pos char) pos@ +-- @seInsertion :: Traversal' (StringEdits pos char) char@ +makeLenses ''StringEdit +\end{code} + +Atomare edits werden, als Liste, zu edits komponiert. +Wir führen einen speziellen edit ein, der nicht-Anwendbarkeit der edits repräsentiert: +\begin{code} +data StringEdits pos char = StringEdits (Seq (StringEdit pos char)) + | SEFail + deriving (Eq, Ord, Show, Read) + +makePrisms ''StringEdits + +stringEdits :: Traversal (StringEdits pos char) (StringEdits pos' char') (StringEdit pos char) (StringEdit pos' char') +\end{code} +\end{defn} + +\begin{comment} +\begin{code} +stringEdits = _StringEdits . traverse + +insert :: pos -> char -> StringEdits pos char +insert n c = StringEdits . Seq.singleton $ Insert n c + +delete :: pos -> StringEdits pos char +delete n = StringEdits . Seq.singleton $ Delete n + +replace :: Eq pos => pos -> char -> StringEdits pos char +replace n c = insert n c <> delete n + +-- | Rudimentarily optimize edit composition +instance Eq pos => Monoid (StringEdits pos char) where + mempty = StringEdits Seq.empty + SEFail `mappend` _ = SEFail + _ `mappend` SEFail = SEFail + (StringEdits Seq.Empty) `mappend` x = x + x `mappend` (StringEdits Seq.Empty) = x + (StringEdits x@(bs :|> b)) `mappend` (StringEdits y@(a :<| as)) + | (Insert n _) <- a + , (Delete n') <- b + , n == n' + = StringEdits bs `mappend` StringEdits as + | otherwise = StringEdits $ x `mappend` y +\end{code} +\end{comment} + +Da wir ein minimales Set an atomaren edits gewählt haben, ist die Definiton der Modulnstruktur über Strings des passenden Alphabets recht einfach: +\begin{code} +instance Module (StringEdits Natural char) where + type Domain (StringEdits Natural char) = Seq char + apply str SEFail = Nothing + apply str (StringEdits Seq.Empty) = Just str + apply str (StringEdits (es :|> Insert n c)) = flip apply (StringEdits es) =<< go str n c + where + go str (fromIntegral -> n) c + | Seq.length str >= n + = Just $ Seq.insertAt n c str + | otherwise + = Nothing + apply str (StringEdits (es :|> Delete n)) = flip apply (StringEdits es) =<< go str n + where + go str (fromIntegral -> n) + | Seq.length str > n + = Just $ Seq.deleteAt n str + | otherwise + = Nothing + + init = Seq.empty + divInit = StringEdits . Seq.unfoldl go . (0,) + where + go (_, Seq.Empty) = Nothing + go (n, c :<| cs ) = Just ((succ n, cs), Insert n c) + +\end{code} + +\begin{eg} + Wir wenden $\iota_{80}(100) \rho_{80}$ auf das Wort $w$ aus Beispiel~\ref{eg:w100} an: + + \begin{align*} + w^\prime & = w \cdot \iota_{80}(100) \rho_{80} \\ + & = 0\, 1\, \ldots\, 80\, 100\, 81\, 82\, \ldots\, 98 + \end{align*} +\end{eg} + diff --git a/edit-lens/src/Control/Edit/String.lhs.tex b/edit-lens/src/Control/Edit/String.lhs.tex new file mode 120000 index 0000000..6a78642 --- /dev/null +++ b/edit-lens/src/Control/Edit/String.lhs.tex @@ -0,0 +1 @@ +String.lhs \ No newline at end of file diff --git a/edit-lens/src/Control/Edit/String/Affected.lhs b/edit-lens/src/Control/Edit/String/Affected.lhs new file mode 100644 index 0000000..851267b --- /dev/null +++ b/edit-lens/src/Control/Edit/String/Affected.lhs @@ -0,0 +1,73 @@ +\begin{comment} +\begin{code} +module Control.Edit.String.Affected + ( affected + ) where + +import Control.Lens +import Control.Lens.TH + +import Control.Edit +import Control.Edit.String + +import Numeric.Natural +import Numeric.Interval (Interval, (...)) +import qualified Numeric.Interval as Int + +import Data.Sequence (Seq((:<|), (:|>))) +import qualified Data.Sequence as Seq + +import Data.Map.Lazy (Map) +import qualified Data.Map.Lazy as Map + +import Data.Monoid + +import Control.Monad + +import Data.Maybe (fromMaybe, maybeToList, listToMaybe, catMaybes, isNothing, isJust, mapMaybe) +\end{code} +\end{comment} + +Um eine obere Schranke an das von einer Serie von edits betroffene Intervall zu bestimmen ordnen wir zunächst jeder von mindestens einem atomaren edit betroffenen Position $n$ im Eingabe-Wort einen $\text{offset}_n = \text{\# deletions} - \text{\# inserts}$ zu. +Das gesuchte Intervall ist nun $(\text{minK}, \text{maxK})$, mit $\text{minK}$ der Position im Eingabe-Wort mit niedrigstem $\text{offset}$ und $\text{maxK}$ die Position im Eingabe-Wort mit höchstem $\text{offset}$, $\text{maxK}^\prime$, modifiziert um das Maximum aus $\{ 0 \} \cup \{ \text{maxK}_n \colon n \in \{ 0 \ldots \text{maxK}^\prime \} \}$ wobei $\text{maxK}_n = -1 \cdot (n + \text{offset}_n)$ an Position $n$ ist. + +\begin{code} +affected :: forall char. StringEdits Natural char -> Maybe (Interval Natural) +-- ^ For a given set of edits @es@ return the interval @i = a ... b@ such that for any given string @str@ of sufficient length the following holds: +-- +-- - For all @n :: Natural@: @n < a ==> str ! n == (str `apply` es) ! n@ +-- - There exists a @k :: Integer@ such that for all @n :: Integer@: @n > b ==> str ! (n + k) == (str `apply` es) ! n@ +-- +-- Intuitively: for any character @c@ of the new string @str `apply` es@ there exists a corresponding character in @str@ (offset by either 0 or a constant shift @k@) if the index of @c@ is /not/ contained in @affected es@. +\end{code} +\begin{comment} +\begin{code} +affected SEFail = Nothing +affected (StringEdits es) = Just . toInterval $ go es Map.empty + where + toInterval :: Map Natural Integer -> Interval Natural + toInterval map + | Just (((minK, _), _), ((maxK, _), _)) <- (,) <$> Map.minViewWithKey map <*> Map.maxViewWithKey map + = let + maxV' = maximum . (0 :) $ do + offset <- [0..maxK] + v <- maybeToList $ Map.lookup (maxK - offset) map + v' <- maybeToList . fmap fromInteger $ negate v <$ guard (v <= 0) + guard $ v' >= succ offset + return $ v' - offset + in (minK Int.... maxK + maxV') + | otherwise + = Int.empty + go :: Seq (StringEdit Natural char) -> Map Natural Integer -> Map Natural Integer + go Seq.Empty offsets = offsets + go (es :> e) offsets = go es offsets' + where + p = e ^. sePos + -- p' = fromIntegral $ Map.foldrWithKey (\k o p -> bool (fromIntegral p) (o + p) $ k < fromIntegral p) (fromIntegral p) offsets + offsets' = Map.alter (Just . myOffset . fromMaybe 0) p offsets + myOffset :: Integer -> Integer + myOffset + | Insert _ _ <- e = pred + | Delete _ <- e = succ +\end{code} +\end{comment} diff --git a/edit-lens/src/Control/Edit/String/Affected.lhs.tex b/edit-lens/src/Control/Edit/String/Affected.lhs.tex new file mode 120000 index 0000000..b85cc7a --- /dev/null +++ b/edit-lens/src/Control/Edit/String/Affected.lhs.tex @@ -0,0 +1 @@ +Affected.lhs \ No newline at end of file -- cgit v1.2.3