summaryrefslogtreecommitdiff
path: root/edit-lens/src/Control/DFST/Lens.lhs
blob: 95be34e35f69797abccf08b41dda90f6836ae1a7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
\begin{code}
{-# LANGUAGE ScopedTypeVariables
           , TemplateHaskell
           , ConstraintKinds
#-}

module Control.DFST.Lens
  ( StringEdit(..)
  , StringEdits(..)
  , insert, delete, replace
  , DFSTAction(..), DFSTComplement
  , dfstLens
  , module Control.DFST
  , module Control.Lens.Edit
  ) where

import Control.DFST
import Control.FST hiding (stInitial, stTransition, stAccept)
import qualified Control.FST as FST (stInitial, stTransition, stAccept)
import Control.Lens.Edit
import Control.Lens
import Control.Lens.TH
import Control.Edit

import Control.Monad

import Numeric.Natural
import Numeric.Interval (Interval, (...))
import qualified Numeric.Interval as Int

import Data.Sequence (Seq((:<|), (:|>)))
import qualified Data.Sequence as Seq
import Data.Set (Set)
import qualified Data.Set as Set
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map

import Data.Compositions.Snoc (Compositions)
import qualified Data.Compositions.Snoc as Comp

import Data.Algorithm.Diff (Diff, getDiff)
import qualified Data.Algorithm.Diff as Diff

import Data.Monoid
import Data.Bool (bool)
import Data.Maybe (fromMaybe, maybeToList, listToMaybe, catMaybes, isNothing, isJust)
import Data.Function (on)
import Data.Foldable (toList)
import Data.List (partition)

import Debug.Trace


data StringEdit char = Insert { _sePos :: Natural, _seInsertion :: char }
                     | Delete { _sePos :: Natural }
  deriving (Eq, Ord, Show, Read)

makeLenses ''StringEdit

data StringEdits char = StringEdits (Seq (StringEdit char))
                      | SEFail
  deriving (Eq, Ord, Show, Read)

makePrisms ''StringEdits

stringEdits :: Traversal' (StringEdits char) (StringEdit char)
stringEdits = _StringEdits . traverse

affected :: forall char. StringEdits char -> Maybe (Interval Natural)
-- ^ For a given set of edits @es@ return the interval @i = a ... b@ such that for any given string @str@ of sufficient length the following holds:
--
--   - For all @n :: Natural@: @n < a ==> str ! n == (str `apply` es) ! n@
--   - There exists a @k :: Integer@ such that for all @n :: Integer@: @n > b ==> str ! (n + k) == (str `apply` es) ! n@
--
-- Intuitively: for any character @c@ of the new string @str `apply` es@ there exists a corresponding character in @str@ (offset by either 0 or a constant shift @k@) if the index of @c@ is /not/ contained in @affected es@.
affected SEFail = Nothing
affected (StringEdits es) = Just . toInterval $ go es Map.empty
  where
    toInterval :: Map Natural Integer -> Interval Natural
    toInterval map
      | Just (((minK, _), _), ((maxK, _), _)) <- (,) <$> Map.minViewWithKey map <*> Map.maxViewWithKey map
      = let
          maxV' = maximum . (0 :) $ do
            offset <- [0..maxK]
            v <- maybeToList $ Map.lookup (maxK - offset) map
            v' <- maybeToList . fmap fromInteger $ negate v <$ guard (v <= 0)
            guard $ v' >= succ offset
            return $ v' - offset
        in (minK Int.... maxK + maxV')
      | otherwise
      = Int.empty
    go :: Seq (StringEdit char) -> Map Natural Integer -> Map Natural Integer
    go Seq.Empty offsets = offsets
    go (es :> e) offsets = go es offsets'
      where
        p = e ^. sePos
        p' = fromIntegral $ Map.foldrWithKey (\k o p -> bool (fromIntegral p) (o + p) $ k < fromIntegral p) (fromIntegral p) offsets
        offsets' = Map.alter (Just . myOffset . fromMaybe 0) p offsets
        myOffset :: Integer -> Integer
        myOffset
          | Insert _ _ <- e = pred
          | Delete _   <- e = succ

insert :: Natural -> char -> StringEdits char
insert n c = StringEdits .  Seq.singleton $ Insert n c
  
delete :: Natural -> StringEdits char
delete n = StringEdits .  Seq.singleton $ Delete n

replace :: Natural -> char -> StringEdits char
replace n c = insert n c <> delete n

instance Monoid (StringEdits char) where
  mempty = StringEdits Seq.empty
  SEFail `mappend` _ = SEFail
  _ `mappend` SEFail = SEFail
  (StringEdits Seq.Empty) `mappend` x = x
  x `mappend` (StringEdits Seq.Empty) = x
  (StringEdits x@(bs :|> b)) `mappend` (StringEdits y@(a :<| as))
    | (Insert n _) <- a
    , (Delete n') <- b
    , n == n'
    = StringEdits bs `mappend` StringEdits as
    | otherwise = StringEdits $ x `mappend` y

instance Module (StringEdits char) where
  type Domain (StringEdits char) = Seq char
  apply str SEFail = Nothing
  apply str (StringEdits Seq.Empty) = Just str
  apply str (StringEdits (es :|> Insert n c)) = (flip apply) (StringEdits es) =<< go str n c
    where
      go Seq.Empty n c
        | n == 0 = Just $ Seq.singleton c
        | otherwise = Nothing
      go str@(x :<| xs) n c
        | n == 0 = Just $ c <| str
        | otherwise = (x <|) <$> go xs (pred n) c
  apply str (StringEdits (es :|> Delete n)) = (flip apply) (StringEdits es) =<< go str n
    where
      go Seq.Empty _ = Nothing
      go (x :<| xs) n
        | n == 0 = Just xs
        | otherwise = (x <|) <$> go xs (pred n)

  init = Seq.empty
  divInit = StringEdits . Seq.unfoldl go . (0,)
    where
      go (_, Seq.Empty) = Nothing
      go (n, (c :<| cs)) = Just ((succ n, cs), Insert n c)

\end{code}

% TODO Make notation mathy

Um zunächst eine asymmetrische edit-lens `StringEdits -> StringEdits` mit akzeptabler Komplexität für einen bestimmten `DFST s` (entlang der \emph{Richtung} des DFSTs) zu konstruieren möchten wir folgendes Verfahren anwenden:

Gegeben eine Sequenz (`StringEdits`) von zu übersetzenden Änderungen genügt es die Übersetzung eines einzelnen `StringEdit`s in eine womöglich längere Sequenz von `StringEdits` anzugeben, alle `StringEdits` aus der Sequenz zu übersetzen (hierbei muss auf die korrekte Handhabung des Komplements geachtet werden) und jene Übersetzungen dann zu concatenieren.

Wir definieren zunächst die \emph{Wirkung} eines DFST auf einen festen String als eine Abbildung `state -> (state, String)`, die den aktuellen Zustand vorm Parsen des Strings auf den Zustand danach und die (womöglich leere) Ausgabe schickt.
Diese Wirkungen bilden einen Monoiden analog zu Endomorphismen, wobei die Resultat-Strings concateniert werden.

Die Unterliegende Idee ist nun im Komplement der edit-lens eine Liste von Wirkungen (eine für jedes Zeichen der Eingabe des DFSTs) und einen Cache der monoidalen Summen aller kontinuirlichen Teillisten zu halten.
Da wir wissen welche Stelle im input-String von einem gegebenen edit betroffen ist können wir, anhand der Wirkung des Teilstücks bis zu jener Stelle, den output-String in einen durch den edit unveränderten Prefix und einen womöglich betroffenen Suffix unterteilen.
Die Wirkung ab der betroffenen Stelle im input-String können wir also Komposition der Wirkung der durch den edit betroffenen Stelle und derer aller Zeichen danach bestimmen.
Nun gilt es nur noch die Differenz (als `StringEdits`) des vorherigen Suffixes im output-String und des aus der gerade berechneten Wirkung Bestimmten zu bestimmen.


% Für die Rückrichtung bietet es sich an eine Art primitive Invertierung des DFSTs zu berechnen.
% Gegeben den aktuellen DFST $A$ möchten wir einen anderen $A^{-1}$ finden, sodass gilt:

% \begin{itemize}
%   \item $A^{-1}$ akzeptiert einen String $s^{-1}$ (endet seinen Lauf in einem finalen Zustand) gdw. es einen String $s$ gibt, der unter $A$ die Ausgabe $s^{-1}$ produziert.
%   \item Wenn $A^{-1}$ einen String $s^{-1}$ akzeptiert so produziert die resultierende Ausgabe $s$ unter $A$ die Ausgabe $s^{-1}$.
% \end{itemize}

% Kann nicht funktionieren, denn $A^{-1}$ ist notwendigerweise nondeterministisch. Wird $A^{-1}$ dann zu einem DFST forciert (durch arbiträre Wahl einer Transition pro Zustand) gehen Informationen verloren—$A^{-1}$ produziert nicht den minimale edit auf dem input string (in der Tat beliebig schlecht) für einen gegeben edit auf dem output string.
  
% Stelle im bisherigen Lauf isolieren, an der edit im output-string passieren soll, breitensuche auf pfaden, die sich von dieser stelle aus unterscheiden?
% Gegeben einen Pfad und eine markierte Transition, finde Liste aller Pfade aufsteigend sortiert nach Unterschied zu gegebenem Pfad, mit Unterschieden "nahe" der markierten Transition zuerst — zudem jeweils edit auf dem Eingabestring
% Einfacher ist Breitensuche ab `stInitial` und zunächst diff auf eingabe-strings.
  
\begin{code}

data DFSTAction state input output = DFSTAction
  { runDFSTAction :: state -> (state, Seq output)
  , dfstaConsumes :: Seq input
  }

instance Monoid (DFSTAction state input output) where
  mempty = DFSTAction (\x -> (x, Seq.empty)) Seq.empty
  DFSTAction f cf `mappend` DFSTAction g cg = DFSTAction
    { runDFSTAction = \s -> let ((f -> (s', out')), out) = g s in (s', out <> out')
    , dfstaConsumes = cg <> cf
    }

type DFSTComplement state input output = Compositions (DFSTAction state input output)

runDFSTAction' :: DFSTComplement state input output -> state -> (state, Seq output)
runDFSTAction' = runDFSTAction . Comp.composed

dfstaConsumes' :: DFSTComplement state input output -> Seq input
dfstaConsumes' = dfstaConsumes . Comp.composed

dfstaProduces :: DFST state input output -> DFSTComplement state input output -> Seq output
dfstaProduces DFST{..} = snd . flip runDFSTAction' stInitial

type Debug state input output = (Show state, Show input, Show output)

type LState state input output = (Natural, (state, Maybe (input, Natural)))

dfstLens :: forall state input output. (Ord state, Ord input, Ord output, Debug state input output) => DFST state input output -> EditLens (DFSTComplement state input output) (StringEdits input) (StringEdits output)
dfstLens dfst@DFST{..} = EditLens ground propR propL
  where
    ground :: DFSTComplement state input output
    ground = Comp.fromList []

    propR :: (DFSTComplement state input output, StringEdits input) -> (DFSTComplement state input output, StringEdits output)
    propR (c, SEFail) = (c, SEFail)
    propR (c, StringEdits Seq.Empty) = (c, mempty)
    propR (c, StringEdits (es :> e))
      | fst (runDFSTAction' c' stInitial) `Set.member` stAccept = (c', es' <> es'')
      | otherwise                                               = (c', SEFail)
      where
        (cSuffix, cPrefix) = Comp.splitAt (Comp.length c - (e ^. sePos . from enum)) c
        cSuffix'
          | Delete _       <- e = Comp.take (pred $ Comp.length cSuffix) cSuffix -- TODO unsafe
          | Insert _ nChar <- e = cSuffix <> Comp.singleton (DFSTAction (\x -> runDFST' dfst x (pure nChar) Seq.empty) (Seq.singleton nChar))
        (pState, pOutput)  = runDFSTAction' cPrefix stInitial
        (_, sOutput ) = runDFSTAction' cSuffix  pState
        (_, sOutput') = runDFSTAction' cSuffix' pState
        (c', es') = propR (cSuffix' <> cPrefix, StringEdits es)
        es'' = strDiff sOutput sOutput' & stringEdits . sePos . from enum +~ Seq.length pOutput
        

    propL :: (DFSTComplement state input output, StringEdits output) -> (DFSTComplement state input output, StringEdits input)
    propL (c, StringEdits Seq.Empty) = (c, mempty)
    propL (c, es) = fromMaybe (c, SEFail) $ do
      newOut <- prevOut `apply` es
      affected' <- affected es
      let outFST :: FST (LState state input output) input output
          outFST = wordFST newOut `productFST` toFST dfst
          inflate by int
            | Int.null int = Int.empty
            | inf >= by = inf - by Int.... sup + by
            | otherwise = 0 Int.... sup + by
            where
              (inf, sup) = (,) <$> Int.inf <*> Int.sup $ int
          fragmentIntervals = (++ [all]) . takeWhile (not . Int.isSubsetOf (0 Int.... max)) $ inflate <$> 0 : [ 2^n | n <- [0..ceiling (logBase 2.0 max)] ] <*> pure affected'
            where
              max :: Num a => a
              max = fromIntegral $ Seq.length newOut
              all = 0 Int.... max
          runCandidates :: Interval Natural -- ^ Departure from complement-run only permitted within interval (to guarantee locality)
                       -> [ ( Seq (LState state input output, Maybe output) -- ^ Computed run
                            , StringEdits input
                            , DFSTComplement state input output
                            )
                          ]
          runCandidates focus = continueRun (Seq.empty, mempty) (c, mempty) 0
            where
              continueRun :: (Seq (LState state input output, Maybe output), StringEdits input)
                          -> (DFSTComplement state input output, DFSTComplement state input output) -- ^ Zipper into complement
                          -> Natural -- ^ Input position
                          -> [(Seq (LState state input output, Maybe output), StringEdits input, DFSTComplement state input output)]
              continueRun (run, inEdits) (c', remC) inP = do
                let
                  pos :: Natural
                  pos = fromIntegral $ Comp.length c - Comp.length c'
                  (c'', step) = Comp.splitAt (pred $ Comp.length c') c' -- TODO: unsafe?
                  current :: LState state input output
                  current
                    | Seq.Empty <- run      = (0, (stInitial, Nothing))
                    | (_ :> (st, _)) <- run = st
                  current' :: state
                  current' = let (_, (st, _)) = current
                              in st
                  next' :: state
                  next' = fst . runDFSTAction' step $ current'
                  oldIn :: Maybe input
                  oldIn = Seq.lookup 0 $ dfstaConsumes' step
                  outgoing :: LState state input output -> [(LState state input output, Maybe input, Maybe output)]
                  outgoing current = let go (st, minS) os acc
                                           | st == current = ($ acc) $ Set.fold (\(st', moutS) -> (. ((st', minS, moutS) :))) id os
                                           | otherwise     = acc
                                      in Map.foldrWithKey go [] $ FST.stTransition outFST 
                  isPreferred :: (LState state input output, Maybe input, Maybe output) -> Bool
                  isPreferred ((_, (st, Nothing)), inS, _) = st == next' && (fromMaybe True $ (==) <$> oldIn <*> inS)
                  isPreferred (st, _, _) = any isPreferred $ outgoing st -- By construction of `outFST`, `outgoing st` is a singleton
                  (preferred, alternate) = partition isPreferred $ outgoing current
                  assocEdit :: (LState state input output, Maybe input, Maybe output) -- ^ Transition
                            -> [ ( (DFSTComplement state input output, DFSTComplement state input output) -- ^ new `(c', remC)`, i.e. complement-zipper `(c', remC)` but with edit applied
                                 , StringEdits input
                                 , Natural
                                 )
                               ]
                  assocEdit (_, Just inS, _)
                    | oldIn == Just inS = [((c'', step <> remC), mempty, succ inP)]
                    | isJust oldIn      = [((c'', altStep inS <> remC), replace inP inS, succ inP), ((c', altStep inS <> remC), insert inP inS, succ inP)]
                    | otherwise         = [((c', altStep inS <> remC), insert inP inS, succ inP)]
                  assocEdit (_, Nothing, _) = [((c', remC), mempty, inP)] -- TODO: is this correct?
                  altStep :: input -> DFSTComplement state input output
                  altStep inS = Comp.singleton DFSTAction{..}
                    where
                      dfstaConsumes = Seq.singleton inS
                      runDFSTAction x = runDFST' dfst x (pure inS) Seq.empty
                  options
                    | pos `Int.member` focus = preferred ++ alternate
                    | otherwise              = preferred
                choice@(next, inS, outS) <- options
                ((c3, remC'), inEdits', inP') <- assocEdit choice
                -- let
                --   -- | Replace prefix of old complement to reflect current candidate
                --   -- TODO: smarter?
                --   (_, ((c3 <>) -> newComplement')) = Comp.splitAt (Comp.length c') c -- TODO: unsafe?
                --   acc = (run :> (next, outS), inEdits' <> inEdits, newComplement')
                --   dropSuffix = mconcat (replicate (Seq.length $ dfstaConsumes' c3) $ delete inP')
                --   fin
                --     | (trans, inEs, newComplement) <- acc = (trans, dropSuffix <> inEs, newComplement)
                let
                  acc = (run :> (next, outS), inEdits' <> inEdits)
                  dropSuffix = mconcat (replicate (Seq.length $ dfstaConsumes' c3) $ delete inP')
                  fin
                    | (trans, inEs) <- acc = (trans, dropSuffix <> inEs, remC')
                bool id (fin :) (next `Set.member` FST.stAccept outFST) $ continueRun acc (c3, remC') inP'
              
      -- Properties of the edits computed are determined mostly by the order candidates are generated below
      -- (_, inEs, c') <- (\xs -> foldr (\x f -> x `seq` f) listToMaybe xs $ xs) $ traceShowId fragmentIntervals >>= (\x -> (\y@(y1, y2, _) -> traceShow (y1, y2) y) <$> runCandidates x)

      (_, inEs, c') <- listToMaybe $ runCandidates =<< fragmentIntervals
      
      return (c', inEs)
      where
        (_, prevOut) = runDFSTAction' c stInitial

strDiff :: forall sym. Eq sym => Seq sym -> Seq sym -> StringEdits sym
-- ^ @strDiff a b@ calculates a set of edits, which, when applied to @a@, produce @b@
strDiff a b = snd . foldr toEdit (0, mempty) $ (getDiff `on` toList) a b
  where
    toEdit :: Diff sym -> (Natural, StringEdits sym) -> (Natural, StringEdits sym)
    toEdit (Diff.Both _ _) (n, es) = (succ n, es)
    toEdit (Diff.First _ ) (n, es) = (n, delete n <> es)
    toEdit (Diff.Second c) (n, es) = (succ n, insert n c <> es)
\end{code}