edit-lens/src/Control/Edit/String/Affected.lhs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73

\begin{comment}
\begin{code}
module Control.Edit.String.Affected
  ( affected
  ) where

import Control.Lens
import Control.Lens.TH

import Control.Edit
import Control.Edit.String

import Numeric.Natural
import Numeric.Interval (Interval, (...))
import qualified Numeric.Interval as Int

import Data.Sequence (Seq((:<|), (:|>)))
import qualified Data.Sequence as Seq

import Data.Map.Lazy (Map)
import qualified Data.Map.Lazy as Map

import Data.Monoid

import Control.Monad

import Data.Maybe (fromMaybe, maybeToList, listToMaybe, catMaybes, isNothing, isJust, mapMaybe)
\end{code}
\end{comment}

Um eine obere Schranke an das von einer Serie von edits betroffene Intervall zu bestimmen ordnen wir zunächst jeder von mindestens einem atomaren edit betroffenen Position $n$ im Eingabe-Wort einen $\text{offset}_n = \text{\# deletions} - \text{\# inserts}$ zu.
Das gesuchte Intervall ist nun $(\text{minK}, \text{maxK})$, mit $\text{minK}$ der Position im Eingabe-Wort mit niedrigstem $\text{offset}$ und $\text{maxK}$ die Position im Eingabe-Wort mit höchstem $\text{offset}$, $\text{maxK}^\prime$, modifiziert um das Maximum aus $\{ 0 \} \cup \{ \text{maxK}_n \colon n \in \{ 0 \ldots \text{maxK}^\prime \} \}$ wobei $\text{maxK}_n = -1 \cdot (n + \text{offset}_n)$ an Position $n$ ist.

\begin{code}
affected :: forall char. StringEdits Natural char -> Maybe (Interval Natural)
-- ^ For a given set of edits @es@ return the interval @i = a ... b@ such that for any given string @str@ of sufficient length the following holds:
--
--   - For all @n :: Natural@: @n < a ==> str ! n == (str `apply` es) ! n@
--   - There exists a @k :: Integer@ such that for all @n :: Integer@: @n > b ==> str ! (n + k) == (str `apply` es) ! n@
--
-- Intuitively: for any character @c@ of the new string @str `apply` es@ there exists a corresponding character in @str@ (offset by either 0 or a constant shift @k@) if the index of @c@ is /not/ contained in @affected es@.
\end{code}
\begin{comment}
\begin{code}
affected SEFail = Nothing
affected (StringEdits es) = Just . toInterval $ go es Map.empty
  where
    toInterval :: Map Natural Integer -> Interval Natural
    toInterval map
      | Just (((minK, _), _), ((maxK, _), _)) <- (,) <$> Map.minViewWithKey map <*> Map.maxViewWithKey map
      = let
          maxV' = maximum . (0 :) $ do
            offset <- [0..maxK]
            v <- maybeToList $ Map.lookup (maxK - offset) map
            v' <- maybeToList . fmap fromInteger $ negate v <$ guard (v <= 0)
            guard $ v' >= succ offset
            return $ v' - offset
        in (minK Int.... maxK + maxV')
      | otherwise
      = Int.empty
    go :: Seq (StringEdit Natural char) -> Map Natural Integer -> Map Natural Integer
    go Seq.Empty offsets = offsets
    go (es :> e) offsets = go es offsets'
      where
        p = e ^. sePos
        -- p' = fromIntegral $ Map.foldrWithKey (\k o p -> bool (fromIntegral p) (o + p) $ k < fromIntegral p) (fromIntegral p) offsets
        offsets' = Map.alter (Just . myOffset . fromMaybe 0) p offsets
        myOffset :: Integer -> Integer
        myOffset
          | Insert _ _ <- e = pred
          | Delete _   <- e = succ
\end{code}
\end{comment}