summaryrefslogtreecommitdiff
path: root/edit-lens/src/Control/Edit
diff options
context:
space:
mode:
Diffstat (limited to 'edit-lens/src/Control/Edit')
-rw-r--r--edit-lens/src/Control/Edit/String.lhs120
l---------edit-lens/src/Control/Edit/String.lhs.tex1
-rw-r--r--edit-lens/src/Control/Edit/String/Affected.lhs73
l---------edit-lens/src/Control/Edit/String/Affected.lhs.tex1
4 files changed, 195 insertions, 0 deletions
diff --git a/edit-lens/src/Control/Edit/String.lhs b/edit-lens/src/Control/Edit/String.lhs
new file mode 100644
index 0000000..c1411cf
--- /dev/null
+++ b/edit-lens/src/Control/Edit/String.lhs
@@ -0,0 +1,120 @@
1\begin{comment}
2\begin{code}
3{-# LANGUAGE TemplateHaskell
4#-}
5
6module Control.Edit.String
7 ( StringEdit(..), sePos, seInsertion
8 , StringEdits(..), _StringEdits, _SEFail, stringEdits
9 , insert, delete, replace
10 ) where
11
12import Control.Lens
13import Control.Lens.TH
14
15import Control.Edit
16
17import Numeric.Natural
18
19import Data.Sequence (Seq((:<|), (:|>)))
20import qualified Data.Sequence as Seq
21
22import Data.Monoid
23\end{code}
24\end{comment}
25
26\begin{defn}[Atomare edits of strings]
27Wir betrachten, zur Einfachheit, ein minimiales Set von Edits auf Strings\footnote{Wie in der Konstruktion zum Longest Common Subsequence Problem}, bestehend nur aus Einfügung eines einzelnen Zeichens $\sigma$ an einer bestimmten Position $\iota_n(\sigma)$ und löschen des Zeichens an einer einzelnen Position $\rho_n$:
28
29\begin{code}
30data StringEdit pos char = Insert { _sePos :: pos, _seInsertion :: char }
31 | Delete { _sePos :: pos }
32 deriving (Eq, Ord, Show, Read)
33
34-- Automatically derive van-leerhoven-lenses:
35--
36-- @sePos :: Lens' (StringEdits pos char) pos@
37-- @seInsertion :: Traversal' (StringEdits pos char) char@
38makeLenses ''StringEdit
39\end{code}
40
41Atomare edits werden, als Liste, zu edits komponiert.
42Wir führen einen speziellen edit ein, der nicht-Anwendbarkeit der edits repräsentiert:
43\begin{code}
44data StringEdits pos char = StringEdits (Seq (StringEdit pos char))
45 | SEFail
46 deriving (Eq, Ord, Show, Read)
47
48makePrisms ''StringEdits
49
50stringEdits :: Traversal (StringEdits pos char) (StringEdits pos' char') (StringEdit pos char) (StringEdit pos' char')
51\end{code}
52\end{defn}
53
54\begin{comment}
55\begin{code}
56stringEdits = _StringEdits . traverse
57
58insert :: pos -> char -> StringEdits pos char
59insert n c = StringEdits . Seq.singleton $ Insert n c
60
61delete :: pos -> StringEdits pos char
62delete n = StringEdits . Seq.singleton $ Delete n
63
64replace :: Eq pos => pos -> char -> StringEdits pos char
65replace n c = insert n c <> delete n
66
67-- | Rudimentarily optimize edit composition
68instance Eq pos => Monoid (StringEdits pos char) where
69 mempty = StringEdits Seq.empty
70 SEFail `mappend` _ = SEFail
71 _ `mappend` SEFail = SEFail
72 (StringEdits Seq.Empty) `mappend` x = x
73 x `mappend` (StringEdits Seq.Empty) = x
74 (StringEdits x@(bs :|> b)) `mappend` (StringEdits y@(a :<| as))
75 | (Insert n _) <- a
76 , (Delete n') <- b
77 , n == n'
78 = StringEdits bs `mappend` StringEdits as
79 | otherwise = StringEdits $ x `mappend` y
80\end{code}
81\end{comment}
82
83Da wir ein minimales Set an atomaren edits gewählt haben, ist die Definiton der Modulnstruktur über Strings des passenden Alphabets recht einfach:
84\begin{code}
85instance Module (StringEdits Natural char) where
86 type Domain (StringEdits Natural char) = Seq char
87 apply str SEFail = Nothing
88 apply str (StringEdits Seq.Empty) = Just str
89 apply str (StringEdits (es :|> Insert n c)) = flip apply (StringEdits es) =<< go str n c
90 where
91 go str (fromIntegral -> n) c
92 | Seq.length str >= n
93 = Just $ Seq.insertAt n c str
94 | otherwise
95 = Nothing
96 apply str (StringEdits (es :|> Delete n)) = flip apply (StringEdits es) =<< go str n
97 where
98 go str (fromIntegral -> n)
99 | Seq.length str > n
100 = Just $ Seq.deleteAt n str
101 | otherwise
102 = Nothing
103
104 init = Seq.empty
105 divInit = StringEdits . Seq.unfoldl go . (0,)
106 where
107 go (_, Seq.Empty) = Nothing
108 go (n, c :<| cs ) = Just ((succ n, cs), Insert n c)
109
110\end{code}
111
112\begin{eg}
113 Wir wenden $\iota_{80}(100) \rho_{80}$ auf das Wort $w$ aus Beispiel~\ref{eg:w100} an:
114
115 \begin{align*}
116 w^\prime & = w \cdot \iota_{80}(100) \rho_{80} \\
117 & = 0\, 1\, \ldots\, 80\, 100\, 81\, 82\, \ldots\, 98
118 \end{align*}
119\end{eg}
120
diff --git a/edit-lens/src/Control/Edit/String.lhs.tex b/edit-lens/src/Control/Edit/String.lhs.tex
new file mode 120000
index 0000000..6a78642
--- /dev/null
+++ b/edit-lens/src/Control/Edit/String.lhs.tex
@@ -0,0 +1 @@
String.lhs \ No newline at end of file
diff --git a/edit-lens/src/Control/Edit/String/Affected.lhs b/edit-lens/src/Control/Edit/String/Affected.lhs
new file mode 100644
index 0000000..851267b
--- /dev/null
+++ b/edit-lens/src/Control/Edit/String/Affected.lhs
@@ -0,0 +1,73 @@
1\begin{comment}
2\begin{code}
3module Control.Edit.String.Affected
4 ( affected
5 ) where
6
7import Control.Lens
8import Control.Lens.TH
9
10import Control.Edit
11import Control.Edit.String
12
13import Numeric.Natural
14import Numeric.Interval (Interval, (...))
15import qualified Numeric.Interval as Int
16
17import Data.Sequence (Seq((:<|), (:|>)))
18import qualified Data.Sequence as Seq
19
20import Data.Map.Lazy (Map)
21import qualified Data.Map.Lazy as Map
22
23import Data.Monoid
24
25import Control.Monad
26
27import Data.Maybe (fromMaybe, maybeToList, listToMaybe, catMaybes, isNothing, isJust, mapMaybe)
28\end{code}
29\end{comment}
30
31Um eine obere Schranke an das von einer Serie von edits betroffene Intervall zu bestimmen ordnen wir zunächst jeder von mindestens einem atomaren edit betroffenen Position $n$ im Eingabe-Wort einen $\text{offset}_n = \text{\# deletions} - \text{\# inserts}$ zu.
32Das gesuchte Intervall ist nun $(\text{minK}, \text{maxK})$, mit $\text{minK}$ der Position im Eingabe-Wort mit niedrigstem $\text{offset}$ und $\text{maxK}$ die Position im Eingabe-Wort mit höchstem $\text{offset}$, $\text{maxK}^\prime$, modifiziert um das Maximum aus $\{ 0 \} \cup \{ \text{maxK}_n \colon n \in \{ 0 \ldots \text{maxK}^\prime \} \}$ wobei $\text{maxK}_n = -1 \cdot (n + \text{offset}_n)$ an Position $n$ ist.
33
34\begin{code}
35affected :: forall char. StringEdits Natural char -> Maybe (Interval Natural)
36-- ^ For a given set of edits @es@ return the interval @i = a ... b@ such that for any given string @str@ of sufficient length the following holds:
37--
38-- - For all @n :: Natural@: @n < a ==> str ! n == (str `apply` es) ! n@
39-- - There exists a @k :: Integer@ such that for all @n :: Integer@: @n > b ==> str ! (n + k) == (str `apply` es) ! n@
40--
41-- Intuitively: for any character @c@ of the new string @str `apply` es@ there exists a corresponding character in @str@ (offset by either 0 or a constant shift @k@) if the index of @c@ is /not/ contained in @affected es@.
42\end{code}
43\begin{comment}
44\begin{code}
45affected SEFail = Nothing
46affected (StringEdits es) = Just . toInterval $ go es Map.empty
47 where
48 toInterval :: Map Natural Integer -> Interval Natural
49 toInterval map
50 | Just (((minK, _), _), ((maxK, _), _)) <- (,) <$> Map.minViewWithKey map <*> Map.maxViewWithKey map
51 = let
52 maxV' = maximum . (0 :) $ do
53 offset <- [0..maxK]
54 v <- maybeToList $ Map.lookup (maxK - offset) map
55 v' <- maybeToList . fmap fromInteger $ negate v <$ guard (v <= 0)
56 guard $ v' >= succ offset
57 return $ v' - offset
58 in (minK Int.... maxK + maxV')
59 | otherwise
60 = Int.empty
61 go :: Seq (StringEdit Natural char) -> Map Natural Integer -> Map Natural Integer
62 go Seq.Empty offsets = offsets
63 go (es :> e) offsets = go es offsets'
64 where
65 p = e ^. sePos
66 -- p' = fromIntegral $ Map.foldrWithKey (\k o p -> bool (fromIntegral p) (o + p) $ k < fromIntegral p) (fromIntegral p) offsets
67 offsets' = Map.alter (Just . myOffset . fromMaybe 0) p offsets
68 myOffset :: Integer -> Integer
69 myOffset
70 | Insert _ _ <- e = pred
71 | Delete _ <- e = succ
72\end{code}
73\end{comment}
diff --git a/edit-lens/src/Control/Edit/String/Affected.lhs.tex b/edit-lens/src/Control/Edit/String/Affected.lhs.tex
new file mode 120000
index 0000000..b85cc7a
--- /dev/null
+++ b/edit-lens/src/Control/Edit/String/Affected.lhs.tex
@@ -0,0 +1 @@
Affected.lhs \ No newline at end of file