edit-lens/src/Control/Edit/String.lhs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120

\begin{comment}
\begin{code}
{-# LANGUAGE TemplateHaskell
#-}

module Control.Edit.String
  ( StringEdit(..), sePos, seInsertion
  , StringEdits(..), _StringEdits, _SEFail, stringEdits
  , insert, delete, replace
  ) where

import Control.Lens
import Control.Lens.TH

import Control.Edit

import Numeric.Natural

import Data.Sequence (Seq((:<|), (:|>)))
import qualified Data.Sequence as Seq

import Data.Monoid
\end{code}
\end{comment}

\begin{defn}[Atomare edits of strings]
Wir betrachten zur Einfachheit ein minimiales Set von Edits auf Strings\footnote{Wie in der Konstruktion zum Longest Common Subsequence Problem} bestehend nur aus Einfügung eines einzelnen Zeichens $\sigma$ an einer bestimmten Position $\iota_n(\sigma)$ und löschen des Zeichens an einer einzelnen Position $\rho_n$:

\begin{code}
data StringEdit pos char = Insert { _sePos :: pos, _seInsertion :: char }
                         | Delete { _sePos :: pos }
  deriving (Eq, Ord, Show, Read)

-- Automatically derive van Laarhoven-lenses:
--
-- @sePos :: Lens' (StringEdits pos char) pos@
-- @seInsertion :: Traversal' (StringEdits pos char) char@
makeLenses ''StringEdit
\end{code}

Atomare edits werden als Liste zu edits komponiert.
Wir führen einen speziellen edit ein, der nicht-Anwendbarkeit der edits repräsentiert:
\begin{code}
data StringEdits pos char = StringEdits (Seq (StringEdit pos char))
                          | SEFail
  deriving (Eq, Ord, Show, Read)

makePrisms ''StringEdits

stringEdits :: Traversal (StringEdits pos char) (StringEdits pos' char') (StringEdit pos char) (StringEdit pos' char')
\end{code}
\end{defn}

\begin{comment}
\begin{code}
stringEdits = _StringEdits . traverse

insert :: pos -> char -> StringEdits pos char
insert n c = StringEdits .  Seq.singleton $ Insert n c

delete :: pos -> StringEdits pos char
delete n = StringEdits .  Seq.singleton $ Delete n

replace :: Eq pos => pos -> char -> StringEdits pos char
replace n c = insert n c <> delete n

-- | Rudimentarily optimize edit composition
instance Eq pos => Monoid (StringEdits pos char) where
  mempty = StringEdits Seq.empty
  SEFail `mappend` _ = SEFail
  _ `mappend` SEFail = SEFail
  (StringEdits Seq.Empty) `mappend` x = x
  x `mappend` (StringEdits Seq.Empty) = x
  (StringEdits x@(bs :|> b)) `mappend` (StringEdits y@(a :<| as))
    | (Insert n _) <- a
    , (Delete n') <- b
    , n == n'
    = StringEdits bs `mappend` StringEdits as
    | otherwise = StringEdits $ x `mappend` y
\end{code}
\end{comment}

Da wir ein minimales Set an atomaren edits gewählt haben, ist die Definiton der Modulnstruktur über Strings des passenden Alphabets recht einfach:
\begin{code}
instance Module (StringEdits Natural char) where
  type Domain (StringEdits Natural char) = Seq char
  apply str SEFail = Nothing
  apply str (StringEdits Seq.Empty) = Just str
  apply str (StringEdits (es :|> Insert n c)) = flip apply (StringEdits es) =<< go str n c
    where
      go str (fromIntegral -> n) c
        | Seq.length str >= n
        = Just $ Seq.insertAt n c str
        | otherwise
        = Nothing
  apply str (StringEdits (es :|> Delete n)) = flip apply (StringEdits es) =<< go str n
    where
      go str (fromIntegral -> n)
        | Seq.length str  > n
        = Just $ Seq.deleteAt n str
        | otherwise
        = Nothing

  init = Seq.empty
  divInit = StringEdits . Seq.unfoldl go . (0,)
    where
      go (_, Seq.Empty) = Nothing
      go (n, c :<| cs ) = Just ((succ n, cs), Insert n c)

\end{code}

\begin{eg}
  Wir wenden $\iota_{80}(100) \rho_{80}$ auf das Wort $w$ aus Beispiel~\ref{eg:w100} an:

  \begin{align*}
    w^\prime & = w \cdot \iota_{80}(100) \rho_{80} \\
             & = 0\, 1\, \ldots\, 80\, 100\, 81\, 82\, \ldots\, 98
  \end{align*}
\end{eg}