From f4c419b9ddec15bad267a4463f0720d6e28042d2 Mon Sep 17 00:00:00 2001
From: Gregor Kleen <gkleen@yggdrasil.li>
Date: Thu, 30 May 2019 12:18:08 +0200
Subject: Further work

---
 edit-lens/src/Control/Edit/String.lhs              | 120 +++++++++++++++++++++
 edit-lens/src/Control/Edit/String.lhs.tex          |   1 +
 edit-lens/src/Control/Edit/String/Affected.lhs     |  73 +++++++++++++
 edit-lens/src/Control/Edit/String/Affected.lhs.tex |   1 +
 4 files changed, 195 insertions(+)
 create mode 100644 edit-lens/src/Control/Edit/String.lhs
 create mode 120000 edit-lens/src/Control/Edit/String.lhs.tex
 create mode 100644 edit-lens/src/Control/Edit/String/Affected.lhs
 create mode 120000 edit-lens/src/Control/Edit/String/Affected.lhs.tex

(limited to 'edit-lens/src/Control/Edit')

diff --git a/edit-lens/src/Control/Edit/String.lhs b/edit-lens/src/Control/Edit/String.lhs
new file mode 100644
index 0000000..c1411cf
--- /dev/null
+++ b/edit-lens/src/Control/Edit/String.lhs
@@ -0,0 +1,120 @@
+\begin{comment}
+\begin{code}
+{-# LANGUAGE TemplateHaskell
+#-}
+
+module Control.Edit.String
+  ( StringEdit(..), sePos, seInsertion
+  , StringEdits(..), _StringEdits, _SEFail, stringEdits
+  , insert, delete, replace
+  ) where
+
+import Control.Lens
+import Control.Lens.TH
+
+import Control.Edit
+
+import Numeric.Natural
+
+import Data.Sequence (Seq((:<|), (:|>)))
+import qualified Data.Sequence as Seq
+
+import Data.Monoid
+\end{code}
+\end{comment}
+
+\begin{defn}[Atomare edits of strings]
+Wir betrachten, zur Einfachheit, ein minimiales Set von Edits auf Strings\footnote{Wie in der Konstruktion zum Longest Common Subsequence Problem}, bestehend nur aus Einfügung eines einzelnen Zeichens $\sigma$ an einer bestimmten Position $\iota_n(\sigma)$ und löschen des Zeichens an einer einzelnen Position $\rho_n$:
+
+\begin{code}
+data StringEdit pos char = Insert { _sePos :: pos, _seInsertion :: char }
+                         | Delete { _sePos :: pos }
+  deriving (Eq, Ord, Show, Read)
+
+-- Automatically derive van-leerhoven-lenses:
+--
+-- @sePos :: Lens' (StringEdits pos char) pos@
+-- @seInsertion :: Traversal' (StringEdits pos char) char@
+makeLenses ''StringEdit
+\end{code}
+
+Atomare edits werden, als Liste, zu edits komponiert.
+Wir führen einen speziellen edit ein, der nicht-Anwendbarkeit der edits repräsentiert:
+\begin{code}
+data StringEdits pos char = StringEdits (Seq (StringEdit pos char))
+                          | SEFail
+  deriving (Eq, Ord, Show, Read)
+
+makePrisms ''StringEdits
+
+stringEdits :: Traversal (StringEdits pos char) (StringEdits pos' char') (StringEdit pos char) (StringEdit pos' char')
+\end{code}
+\end{defn}
+
+\begin{comment}
+\begin{code}
+stringEdits = _StringEdits . traverse
+
+insert :: pos -> char -> StringEdits pos char
+insert n c = StringEdits .  Seq.singleton $ Insert n c
+
+delete :: pos -> StringEdits pos char
+delete n = StringEdits .  Seq.singleton $ Delete n
+
+replace :: Eq pos => pos -> char -> StringEdits pos char
+replace n c = insert n c <> delete n
+
+-- | Rudimentarily optimize edit composition
+instance Eq pos => Monoid (StringEdits pos char) where
+  mempty = StringEdits Seq.empty
+  SEFail `mappend` _ = SEFail
+  _ `mappend` SEFail = SEFail
+  (StringEdits Seq.Empty) `mappend` x = x
+  x `mappend` (StringEdits Seq.Empty) = x
+  (StringEdits x@(bs :|> b)) `mappend` (StringEdits y@(a :<| as))
+    | (Insert n _) <- a
+    , (Delete n') <- b
+    , n == n'
+    = StringEdits bs `mappend` StringEdits as
+    | otherwise = StringEdits $ x `mappend` y
+\end{code}
+\end{comment}
+
+Da wir ein minimales Set an atomaren edits gewählt haben, ist die Definiton der Modulnstruktur über Strings des passenden Alphabets recht einfach:
+\begin{code}
+instance Module (StringEdits Natural char) where
+  type Domain (StringEdits Natural char) = Seq char
+  apply str SEFail = Nothing
+  apply str (StringEdits Seq.Empty) = Just str
+  apply str (StringEdits (es :|> Insert n c)) = flip apply (StringEdits es) =<< go str n c
+    where
+      go str (fromIntegral -> n) c
+        | Seq.length str >= n
+        = Just $ Seq.insertAt n c str
+        | otherwise
+        = Nothing
+  apply str (StringEdits (es :|> Delete n)) = flip apply (StringEdits es) =<< go str n
+    where
+      go str (fromIntegral -> n)
+        | Seq.length str  > n
+        = Just $ Seq.deleteAt n str
+        | otherwise
+        = Nothing
+
+  init = Seq.empty
+  divInit = StringEdits . Seq.unfoldl go . (0,)
+    where
+      go (_, Seq.Empty) = Nothing
+      go (n, c :<| cs ) = Just ((succ n, cs), Insert n c)
+
+\end{code}
+
+\begin{eg}
+  Wir wenden $\iota_{80}(100) \rho_{80}$ auf das Wort $w$ aus Beispiel~\ref{eg:w100} an:
+
+  \begin{align*}
+    w^\prime & = w \cdot \iota_{80}(100) \rho_{80} \\
+             & = 0\, 1\, \ldots\, 80\, 100\, 81\, 82\, \ldots\, 98
+  \end{align*}
+\end{eg}
+
diff --git a/edit-lens/src/Control/Edit/String.lhs.tex b/edit-lens/src/Control/Edit/String.lhs.tex
new file mode 120000
index 0000000..6a78642
--- /dev/null
+++ b/edit-lens/src/Control/Edit/String.lhs.tex
@@ -0,0 +1 @@
+String.lhs
\ No newline at end of file
diff --git a/edit-lens/src/Control/Edit/String/Affected.lhs b/edit-lens/src/Control/Edit/String/Affected.lhs
new file mode 100644
index 0000000..851267b
--- /dev/null
+++ b/edit-lens/src/Control/Edit/String/Affected.lhs
@@ -0,0 +1,73 @@
+\begin{comment}
+\begin{code}
+module Control.Edit.String.Affected
+  ( affected
+  ) where
+
+import Control.Lens
+import Control.Lens.TH
+
+import Control.Edit
+import Control.Edit.String
+
+import Numeric.Natural
+import Numeric.Interval (Interval, (...))
+import qualified Numeric.Interval as Int
+
+import Data.Sequence (Seq((:<|), (:|>)))
+import qualified Data.Sequence as Seq
+
+import Data.Map.Lazy (Map)
+import qualified Data.Map.Lazy as Map
+
+import Data.Monoid
+
+import Control.Monad
+
+import Data.Maybe (fromMaybe, maybeToList, listToMaybe, catMaybes, isNothing, isJust, mapMaybe)
+\end{code}
+\end{comment}
+
+Um eine obere Schranke an das von einer Serie von edits betroffene Intervall zu bestimmen ordnen wir zunächst jeder von mindestens einem atomaren edit betroffenen Position $n$ im Eingabe-Wort einen $\text{offset}_n = \text{\# deletions} - \text{\# inserts}$ zu.
+Das gesuchte Intervall ist nun $(\text{minK}, \text{maxK})$, mit $\text{minK}$ der Position im Eingabe-Wort mit niedrigstem $\text{offset}$ und $\text{maxK}$ die Position im Eingabe-Wort mit höchstem $\text{offset}$, $\text{maxK}^\prime$, modifiziert um das Maximum aus $\{ 0 \} \cup \{ \text{maxK}_n \colon n \in \{ 0 \ldots \text{maxK}^\prime \} \}$ wobei $\text{maxK}_n = -1 \cdot (n + \text{offset}_n)$ an Position $n$ ist.
+
+\begin{code}
+affected :: forall char. StringEdits Natural char -> Maybe (Interval Natural)
+-- ^ For a given set of edits @es@ return the interval @i = a ... b@ such that for any given string @str@ of sufficient length the following holds:
+--
+--   - For all @n :: Natural@: @n < a ==> str ! n == (str `apply` es) ! n@
+--   - There exists a @k :: Integer@ such that for all @n :: Integer@: @n > b ==> str ! (n + k) == (str `apply` es) ! n@
+--
+-- Intuitively: for any character @c@ of the new string @str `apply` es@ there exists a corresponding character in @str@ (offset by either 0 or a constant shift @k@) if the index of @c@ is /not/ contained in @affected es@.
+\end{code}
+\begin{comment}
+\begin{code}
+affected SEFail = Nothing
+affected (StringEdits es) = Just . toInterval $ go es Map.empty
+  where
+    toInterval :: Map Natural Integer -> Interval Natural
+    toInterval map
+      | Just (((minK, _), _), ((maxK, _), _)) <- (,) <$> Map.minViewWithKey map <*> Map.maxViewWithKey map
+      = let
+          maxV' = maximum . (0 :) $ do
+            offset <- [0..maxK]
+            v <- maybeToList $ Map.lookup (maxK - offset) map
+            v' <- maybeToList . fmap fromInteger $ negate v <$ guard (v <= 0)
+            guard $ v' >= succ offset
+            return $ v' - offset
+        in (minK Int.... maxK + maxV')
+      | otherwise
+      = Int.empty
+    go :: Seq (StringEdit Natural char) -> Map Natural Integer -> Map Natural Integer
+    go Seq.Empty offsets = offsets
+    go (es :> e) offsets = go es offsets'
+      where
+        p = e ^. sePos
+        -- p' = fromIntegral $ Map.foldrWithKey (\k o p -> bool (fromIntegral p) (o + p) $ k < fromIntegral p) (fromIntegral p) offsets
+        offsets' = Map.alter (Just . myOffset . fromMaybe 0) p offsets
+        myOffset :: Integer -> Integer
+        myOffset
+          | Insert _ _ <- e = pred
+          | Delete _   <- e = succ
+\end{code}
+\end{comment}
diff --git a/edit-lens/src/Control/Edit/String/Affected.lhs.tex b/edit-lens/src/Control/Edit/String/Affected.lhs.tex
new file mode 120000
index 0000000..b85cc7a
--- /dev/null
+++ b/edit-lens/src/Control/Edit/String/Affected.lhs.tex
@@ -0,0 +1 @@
+Affected.lhs
\ No newline at end of file
-- 
cgit v1.2.3