diff options
| author | Gregor Kleen <gkleen@yggdrasil.li> | 2019-05-30 12:18:08 +0200 |
|---|---|---|
| committer | Gregor Kleen <gkleen@yggdrasil.li> | 2019-05-30 12:18:08 +0200 |
| commit | f4c419b9ddec15bad267a4463f0720d6e28042d2 (patch) | |
| tree | 54a0259116476150247619c4410eae33f8669314 /edit-lens/src/Control/Edit | |
| parent | 8afbe1f7df24034dd16fdf2e89b0665b2318ae2a (diff) | |
| download | incremental-dfsts-f4c419b9ddec15bad267a4463f0720d6e28042d2.tar incremental-dfsts-f4c419b9ddec15bad267a4463f0720d6e28042d2.tar.gz incremental-dfsts-f4c419b9ddec15bad267a4463f0720d6e28042d2.tar.bz2 incremental-dfsts-f4c419b9ddec15bad267a4463f0720d6e28042d2.tar.xz incremental-dfsts-f4c419b9ddec15bad267a4463f0720d6e28042d2.zip | |
Further work
Diffstat (limited to 'edit-lens/src/Control/Edit')
| -rw-r--r-- | edit-lens/src/Control/Edit/String.lhs | 120 | ||||
| l--------- | edit-lens/src/Control/Edit/String.lhs.tex | 1 | ||||
| -rw-r--r-- | edit-lens/src/Control/Edit/String/Affected.lhs | 73 | ||||
| l--------- | edit-lens/src/Control/Edit/String/Affected.lhs.tex | 1 |
4 files changed, 195 insertions, 0 deletions
diff --git a/edit-lens/src/Control/Edit/String.lhs b/edit-lens/src/Control/Edit/String.lhs new file mode 100644 index 0000000..c1411cf --- /dev/null +++ b/edit-lens/src/Control/Edit/String.lhs | |||
| @@ -0,0 +1,120 @@ | |||
| 1 | \begin{comment} | ||
| 2 | \begin{code} | ||
| 3 | {-# LANGUAGE TemplateHaskell | ||
| 4 | #-} | ||
| 5 | |||
| 6 | module Control.Edit.String | ||
| 7 | ( StringEdit(..), sePos, seInsertion | ||
| 8 | , StringEdits(..), _StringEdits, _SEFail, stringEdits | ||
| 9 | , insert, delete, replace | ||
| 10 | ) where | ||
| 11 | |||
| 12 | import Control.Lens | ||
| 13 | import Control.Lens.TH | ||
| 14 | |||
| 15 | import Control.Edit | ||
| 16 | |||
| 17 | import Numeric.Natural | ||
| 18 | |||
| 19 | import Data.Sequence (Seq((:<|), (:|>))) | ||
| 20 | import qualified Data.Sequence as Seq | ||
| 21 | |||
| 22 | import Data.Monoid | ||
| 23 | \end{code} | ||
| 24 | \end{comment} | ||
| 25 | |||
| 26 | \begin{defn}[Atomare edits of strings] | ||
| 27 | Wir betrachten, zur Einfachheit, ein minimiales Set von Edits auf Strings\footnote{Wie in der Konstruktion zum Longest Common Subsequence Problem}, bestehend nur aus Einfügung eines einzelnen Zeichens $\sigma$ an einer bestimmten Position $\iota_n(\sigma)$ und löschen des Zeichens an einer einzelnen Position $\rho_n$: | ||
| 28 | |||
| 29 | \begin{code} | ||
| 30 | data StringEdit pos char = Insert { _sePos :: pos, _seInsertion :: char } | ||
| 31 | | Delete { _sePos :: pos } | ||
| 32 | deriving (Eq, Ord, Show, Read) | ||
| 33 | |||
| 34 | -- Automatically derive van-leerhoven-lenses: | ||
| 35 | -- | ||
| 36 | -- @sePos :: Lens' (StringEdits pos char) pos@ | ||
| 37 | -- @seInsertion :: Traversal' (StringEdits pos char) char@ | ||
| 38 | makeLenses ''StringEdit | ||
| 39 | \end{code} | ||
| 40 | |||
| 41 | Atomare edits werden, als Liste, zu edits komponiert. | ||
| 42 | Wir führen einen speziellen edit ein, der nicht-Anwendbarkeit der edits repräsentiert: | ||
| 43 | \begin{code} | ||
| 44 | data StringEdits pos char = StringEdits (Seq (StringEdit pos char)) | ||
| 45 | | SEFail | ||
| 46 | deriving (Eq, Ord, Show, Read) | ||
| 47 | |||
| 48 | makePrisms ''StringEdits | ||
| 49 | |||
| 50 | stringEdits :: Traversal (StringEdits pos char) (StringEdits pos' char') (StringEdit pos char) (StringEdit pos' char') | ||
| 51 | \end{code} | ||
| 52 | \end{defn} | ||
| 53 | |||
| 54 | \begin{comment} | ||
| 55 | \begin{code} | ||
| 56 | stringEdits = _StringEdits . traverse | ||
| 57 | |||
| 58 | insert :: pos -> char -> StringEdits pos char | ||
| 59 | insert n c = StringEdits . Seq.singleton $ Insert n c | ||
| 60 | |||
| 61 | delete :: pos -> StringEdits pos char | ||
| 62 | delete n = StringEdits . Seq.singleton $ Delete n | ||
| 63 | |||
| 64 | replace :: Eq pos => pos -> char -> StringEdits pos char | ||
| 65 | replace n c = insert n c <> delete n | ||
| 66 | |||
| 67 | -- | Rudimentarily optimize edit composition | ||
| 68 | instance Eq pos => Monoid (StringEdits pos char) where | ||
| 69 | mempty = StringEdits Seq.empty | ||
| 70 | SEFail `mappend` _ = SEFail | ||
| 71 | _ `mappend` SEFail = SEFail | ||
| 72 | (StringEdits Seq.Empty) `mappend` x = x | ||
| 73 | x `mappend` (StringEdits Seq.Empty) = x | ||
| 74 | (StringEdits x@(bs :|> b)) `mappend` (StringEdits y@(a :<| as)) | ||
| 75 | | (Insert n _) <- a | ||
| 76 | , (Delete n') <- b | ||
| 77 | , n == n' | ||
| 78 | = StringEdits bs `mappend` StringEdits as | ||
| 79 | | otherwise = StringEdits $ x `mappend` y | ||
| 80 | \end{code} | ||
| 81 | \end{comment} | ||
| 82 | |||
| 83 | Da wir ein minimales Set an atomaren edits gewählt haben, ist die Definiton der Modulnstruktur über Strings des passenden Alphabets recht einfach: | ||
| 84 | \begin{code} | ||
| 85 | instance Module (StringEdits Natural char) where | ||
| 86 | type Domain (StringEdits Natural char) = Seq char | ||
| 87 | apply str SEFail = Nothing | ||
| 88 | apply str (StringEdits Seq.Empty) = Just str | ||
| 89 | apply str (StringEdits (es :|> Insert n c)) = flip apply (StringEdits es) =<< go str n c | ||
| 90 | where | ||
| 91 | go str (fromIntegral -> n) c | ||
| 92 | | Seq.length str >= n | ||
| 93 | = Just $ Seq.insertAt n c str | ||
| 94 | | otherwise | ||
| 95 | = Nothing | ||
| 96 | apply str (StringEdits (es :|> Delete n)) = flip apply (StringEdits es) =<< go str n | ||
| 97 | where | ||
| 98 | go str (fromIntegral -> n) | ||
| 99 | | Seq.length str > n | ||
| 100 | = Just $ Seq.deleteAt n str | ||
| 101 | | otherwise | ||
| 102 | = Nothing | ||
| 103 | |||
| 104 | init = Seq.empty | ||
| 105 | divInit = StringEdits . Seq.unfoldl go . (0,) | ||
| 106 | where | ||
| 107 | go (_, Seq.Empty) = Nothing | ||
| 108 | go (n, c :<| cs ) = Just ((succ n, cs), Insert n c) | ||
| 109 | |||
| 110 | \end{code} | ||
| 111 | |||
| 112 | \begin{eg} | ||
| 113 | Wir wenden $\iota_{80}(100) \rho_{80}$ auf das Wort $w$ aus Beispiel~\ref{eg:w100} an: | ||
| 114 | |||
| 115 | \begin{align*} | ||
| 116 | w^\prime & = w \cdot \iota_{80}(100) \rho_{80} \\ | ||
| 117 | & = 0\, 1\, \ldots\, 80\, 100\, 81\, 82\, \ldots\, 98 | ||
| 118 | \end{align*} | ||
| 119 | \end{eg} | ||
| 120 | |||
diff --git a/edit-lens/src/Control/Edit/String.lhs.tex b/edit-lens/src/Control/Edit/String.lhs.tex new file mode 120000 index 0000000..6a78642 --- /dev/null +++ b/edit-lens/src/Control/Edit/String.lhs.tex | |||
| @@ -0,0 +1 @@ | |||
| String.lhs \ No newline at end of file | |||
diff --git a/edit-lens/src/Control/Edit/String/Affected.lhs b/edit-lens/src/Control/Edit/String/Affected.lhs new file mode 100644 index 0000000..851267b --- /dev/null +++ b/edit-lens/src/Control/Edit/String/Affected.lhs | |||
| @@ -0,0 +1,73 @@ | |||
| 1 | \begin{comment} | ||
| 2 | \begin{code} | ||
| 3 | module Control.Edit.String.Affected | ||
| 4 | ( affected | ||
| 5 | ) where | ||
| 6 | |||
| 7 | import Control.Lens | ||
| 8 | import Control.Lens.TH | ||
| 9 | |||
| 10 | import Control.Edit | ||
| 11 | import Control.Edit.String | ||
| 12 | |||
| 13 | import Numeric.Natural | ||
| 14 | import Numeric.Interval (Interval, (...)) | ||
| 15 | import qualified Numeric.Interval as Int | ||
| 16 | |||
| 17 | import Data.Sequence (Seq((:<|), (:|>))) | ||
| 18 | import qualified Data.Sequence as Seq | ||
| 19 | |||
| 20 | import Data.Map.Lazy (Map) | ||
| 21 | import qualified Data.Map.Lazy as Map | ||
| 22 | |||
| 23 | import Data.Monoid | ||
| 24 | |||
| 25 | import Control.Monad | ||
| 26 | |||
| 27 | import Data.Maybe (fromMaybe, maybeToList, listToMaybe, catMaybes, isNothing, isJust, mapMaybe) | ||
| 28 | \end{code} | ||
| 29 | \end{comment} | ||
| 30 | |||
| 31 | Um eine obere Schranke an das von einer Serie von edits betroffene Intervall zu bestimmen ordnen wir zunächst jeder von mindestens einem atomaren edit betroffenen Position $n$ im Eingabe-Wort einen $\text{offset}_n = \text{\# deletions} - \text{\# inserts}$ zu. | ||
| 32 | Das gesuchte Intervall ist nun $(\text{minK}, \text{maxK})$, mit $\text{minK}$ der Position im Eingabe-Wort mit niedrigstem $\text{offset}$ und $\text{maxK}$ die Position im Eingabe-Wort mit höchstem $\text{offset}$, $\text{maxK}^\prime$, modifiziert um das Maximum aus $\{ 0 \} \cup \{ \text{maxK}_n \colon n \in \{ 0 \ldots \text{maxK}^\prime \} \}$ wobei $\text{maxK}_n = -1 \cdot (n + \text{offset}_n)$ an Position $n$ ist. | ||
| 33 | |||
| 34 | \begin{code} | ||
| 35 | affected :: forall char. StringEdits Natural char -> Maybe (Interval Natural) | ||
| 36 | -- ^ For a given set of edits @es@ return the interval @i = a ... b@ such that for any given string @str@ of sufficient length the following holds: | ||
| 37 | -- | ||
| 38 | -- - For all @n :: Natural@: @n < a ==> str ! n == (str `apply` es) ! n@ | ||
| 39 | -- - There exists a @k :: Integer@ such that for all @n :: Integer@: @n > b ==> str ! (n + k) == (str `apply` es) ! n@ | ||
| 40 | -- | ||
| 41 | -- Intuitively: for any character @c@ of the new string @str `apply` es@ there exists a corresponding character in @str@ (offset by either 0 or a constant shift @k@) if the index of @c@ is /not/ contained in @affected es@. | ||
| 42 | \end{code} | ||
| 43 | \begin{comment} | ||
| 44 | \begin{code} | ||
| 45 | affected SEFail = Nothing | ||
| 46 | affected (StringEdits es) = Just . toInterval $ go es Map.empty | ||
| 47 | where | ||
| 48 | toInterval :: Map Natural Integer -> Interval Natural | ||
| 49 | toInterval map | ||
| 50 | | Just (((minK, _), _), ((maxK, _), _)) <- (,) <$> Map.minViewWithKey map <*> Map.maxViewWithKey map | ||
| 51 | = let | ||
| 52 | maxV' = maximum . (0 :) $ do | ||
| 53 | offset <- [0..maxK] | ||
| 54 | v <- maybeToList $ Map.lookup (maxK - offset) map | ||
| 55 | v' <- maybeToList . fmap fromInteger $ negate v <$ guard (v <= 0) | ||
| 56 | guard $ v' >= succ offset | ||
| 57 | return $ v' - offset | ||
| 58 | in (minK Int.... maxK + maxV') | ||
| 59 | | otherwise | ||
| 60 | = Int.empty | ||
| 61 | go :: Seq (StringEdit Natural char) -> Map Natural Integer -> Map Natural Integer | ||
| 62 | go Seq.Empty offsets = offsets | ||
| 63 | go (es :> e) offsets = go es offsets' | ||
| 64 | where | ||
| 65 | p = e ^. sePos | ||
| 66 | -- p' = fromIntegral $ Map.foldrWithKey (\k o p -> bool (fromIntegral p) (o + p) $ k < fromIntegral p) (fromIntegral p) offsets | ||
| 67 | offsets' = Map.alter (Just . myOffset . fromMaybe 0) p offsets | ||
| 68 | myOffset :: Integer -> Integer | ||
| 69 | myOffset | ||
| 70 | | Insert _ _ <- e = pred | ||
| 71 | | Delete _ <- e = succ | ||
| 72 | \end{code} | ||
| 73 | \end{comment} | ||
diff --git a/edit-lens/src/Control/Edit/String/Affected.lhs.tex b/edit-lens/src/Control/Edit/String/Affected.lhs.tex new file mode 120000 index 0000000..b85cc7a --- /dev/null +++ b/edit-lens/src/Control/Edit/String/Affected.lhs.tex | |||
| @@ -0,0 +1 @@ | |||
| Affected.lhs \ No newline at end of file | |||
