diff options
author | Gregor Kleen <gkleen@yggdrasil.li> | 2019-05-30 12:18:08 +0200 |
---|---|---|
committer | Gregor Kleen <gkleen@yggdrasil.li> | 2019-05-30 12:18:08 +0200 |
commit | f4c419b9ddec15bad267a4463f0720d6e28042d2 (patch) | |
tree | 54a0259116476150247619c4410eae33f8669314 /edit-lens/src/Control/Edit/String.lhs | |
parent | 8afbe1f7df24034dd16fdf2e89b0665b2318ae2a (diff) | |
download | incremental-dfsts-f4c419b9ddec15bad267a4463f0720d6e28042d2.tar incremental-dfsts-f4c419b9ddec15bad267a4463f0720d6e28042d2.tar.gz incremental-dfsts-f4c419b9ddec15bad267a4463f0720d6e28042d2.tar.bz2 incremental-dfsts-f4c419b9ddec15bad267a4463f0720d6e28042d2.tar.xz incremental-dfsts-f4c419b9ddec15bad267a4463f0720d6e28042d2.zip |
Further work
Diffstat (limited to 'edit-lens/src/Control/Edit/String.lhs')
-rw-r--r-- | edit-lens/src/Control/Edit/String.lhs | 120 |
1 files changed, 120 insertions, 0 deletions
diff --git a/edit-lens/src/Control/Edit/String.lhs b/edit-lens/src/Control/Edit/String.lhs new file mode 100644 index 0000000..c1411cf --- /dev/null +++ b/edit-lens/src/Control/Edit/String.lhs | |||
@@ -0,0 +1,120 @@ | |||
1 | \begin{comment} | ||
2 | \begin{code} | ||
3 | {-# LANGUAGE TemplateHaskell | ||
4 | #-} | ||
5 | |||
6 | module Control.Edit.String | ||
7 | ( StringEdit(..), sePos, seInsertion | ||
8 | , StringEdits(..), _StringEdits, _SEFail, stringEdits | ||
9 | , insert, delete, replace | ||
10 | ) where | ||
11 | |||
12 | import Control.Lens | ||
13 | import Control.Lens.TH | ||
14 | |||
15 | import Control.Edit | ||
16 | |||
17 | import Numeric.Natural | ||
18 | |||
19 | import Data.Sequence (Seq((:<|), (:|>))) | ||
20 | import qualified Data.Sequence as Seq | ||
21 | |||
22 | import Data.Monoid | ||
23 | \end{code} | ||
24 | \end{comment} | ||
25 | |||
26 | \begin{defn}[Atomare edits of strings] | ||
27 | Wir betrachten, zur Einfachheit, ein minimiales Set von Edits auf Strings\footnote{Wie in der Konstruktion zum Longest Common Subsequence Problem}, bestehend nur aus Einfügung eines einzelnen Zeichens $\sigma$ an einer bestimmten Position $\iota_n(\sigma)$ und löschen des Zeichens an einer einzelnen Position $\rho_n$: | ||
28 | |||
29 | \begin{code} | ||
30 | data StringEdit pos char = Insert { _sePos :: pos, _seInsertion :: char } | ||
31 | | Delete { _sePos :: pos } | ||
32 | deriving (Eq, Ord, Show, Read) | ||
33 | |||
34 | -- Automatically derive van-leerhoven-lenses: | ||
35 | -- | ||
36 | -- @sePos :: Lens' (StringEdits pos char) pos@ | ||
37 | -- @seInsertion :: Traversal' (StringEdits pos char) char@ | ||
38 | makeLenses ''StringEdit | ||
39 | \end{code} | ||
40 | |||
41 | Atomare edits werden, als Liste, zu edits komponiert. | ||
42 | Wir führen einen speziellen edit ein, der nicht-Anwendbarkeit der edits repräsentiert: | ||
43 | \begin{code} | ||
44 | data StringEdits pos char = StringEdits (Seq (StringEdit pos char)) | ||
45 | | SEFail | ||
46 | deriving (Eq, Ord, Show, Read) | ||
47 | |||
48 | makePrisms ''StringEdits | ||
49 | |||
50 | stringEdits :: Traversal (StringEdits pos char) (StringEdits pos' char') (StringEdit pos char) (StringEdit pos' char') | ||
51 | \end{code} | ||
52 | \end{defn} | ||
53 | |||
54 | \begin{comment} | ||
55 | \begin{code} | ||
56 | stringEdits = _StringEdits . traverse | ||
57 | |||
58 | insert :: pos -> char -> StringEdits pos char | ||
59 | insert n c = StringEdits . Seq.singleton $ Insert n c | ||
60 | |||
61 | delete :: pos -> StringEdits pos char | ||
62 | delete n = StringEdits . Seq.singleton $ Delete n | ||
63 | |||
64 | replace :: Eq pos => pos -> char -> StringEdits pos char | ||
65 | replace n c = insert n c <> delete n | ||
66 | |||
67 | -- | Rudimentarily optimize edit composition | ||
68 | instance Eq pos => Monoid (StringEdits pos char) where | ||
69 | mempty = StringEdits Seq.empty | ||
70 | SEFail `mappend` _ = SEFail | ||
71 | _ `mappend` SEFail = SEFail | ||
72 | (StringEdits Seq.Empty) `mappend` x = x | ||
73 | x `mappend` (StringEdits Seq.Empty) = x | ||
74 | (StringEdits x@(bs :|> b)) `mappend` (StringEdits y@(a :<| as)) | ||
75 | | (Insert n _) <- a | ||
76 | , (Delete n') <- b | ||
77 | , n == n' | ||
78 | = StringEdits bs `mappend` StringEdits as | ||
79 | | otherwise = StringEdits $ x `mappend` y | ||
80 | \end{code} | ||
81 | \end{comment} | ||
82 | |||
83 | Da wir ein minimales Set an atomaren edits gewählt haben, ist die Definiton der Modulnstruktur über Strings des passenden Alphabets recht einfach: | ||
84 | \begin{code} | ||
85 | instance Module (StringEdits Natural char) where | ||
86 | type Domain (StringEdits Natural char) = Seq char | ||
87 | apply str SEFail = Nothing | ||
88 | apply str (StringEdits Seq.Empty) = Just str | ||
89 | apply str (StringEdits (es :|> Insert n c)) = flip apply (StringEdits es) =<< go str n c | ||
90 | where | ||
91 | go str (fromIntegral -> n) c | ||
92 | | Seq.length str >= n | ||
93 | = Just $ Seq.insertAt n c str | ||
94 | | otherwise | ||
95 | = Nothing | ||
96 | apply str (StringEdits (es :|> Delete n)) = flip apply (StringEdits es) =<< go str n | ||
97 | where | ||
98 | go str (fromIntegral -> n) | ||
99 | | Seq.length str > n | ||
100 | = Just $ Seq.deleteAt n str | ||
101 | | otherwise | ||
102 | = Nothing | ||
103 | |||
104 | init = Seq.empty | ||
105 | divInit = StringEdits . Seq.unfoldl go . (0,) | ||
106 | where | ||
107 | go (_, Seq.Empty) = Nothing | ||
108 | go (n, c :<| cs ) = Just ((succ n, cs), Insert n c) | ||
109 | |||
110 | \end{code} | ||
111 | |||
112 | \begin{eg} | ||
113 | Wir wenden $\iota_{80}(100) \rho_{80}$ auf das Wort $w$ aus Beispiel~\ref{eg:w100} an: | ||
114 | |||
115 | \begin{align*} | ||
116 | w^\prime & = w \cdot \iota_{80}(100) \rho_{80} \\ | ||
117 | & = 0\, 1\, \ldots\, 80\, 100\, 81\, 82\, \ldots\, 98 | ||
118 | \end{align*} | ||
119 | \end{eg} | ||
120 | |||