1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
|
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE GADTs #-}
-- | Use 'Text.BBCode' to parse BBCode
module Thermoprint.Printout.BBCode
( bbcode
, BBCodeError(..)
, TreeError(..)
, SemanticError(..)
, module Thermoprint.Printout.BBCode.Inverse
) where
import Data.Text (Text)
import Data.Map (Map)
import qualified Data.Text.Lazy as Lazy (Text)
import qualified Data.Text.Lazy as TL (fromStrict)
import Data.Sequence (Seq)
import qualified Data.Sequence as Seq (fromList, singleton)
import Data.CaseInsensitive (CI)
import qualified Data.CaseInsensitive as CI
import GHC.Generics (Generic)
import Control.Exception (Exception)
import Data.Typeable (Typeable)
import Data.Bifunctor (bimap, first)
import Control.Monad (join)
import Data.List (groupBy)
import Text.BBCode (DomForest, DomTree(..), TreeError(..))
import qualified Text.BBCode as Raw (bbcode, BBCodeError(..))
import Thermoprint.Printout.BBCode.Inverse
import Thermoprint.Printout
import Thermoprint.Printout.BBCode.Attribute
-- ^ We replicate 'Raw.BBCodeError' but add a new failure mode documenting incompatibly of the parsed syntax tree with our document format
data BBCodeError = LexerError String -- ^ Error while parsing input to stream of tokens
| TreeError TreeError -- ^ Error while parsing stream of tokens to syntax tree
| SemanticError SemanticError -- ^ Error while mapping syntax tree to document format
deriving (Show, Eq, Generic, Typeable)
instance Exception BBCodeError
morph' :: Raw.BBCodeError -> BBCodeError
-- ^ Transform 'Raw.BBCodeError' to 'BBCodeError'
morph' (Raw.LexerError x) = LexerError x
morph' (Raw.TreeError x) = TreeError x
-- | An error ocurred while parsing the DOM-Forest (`['DomTree']`)
data SemanticError = BlockInLineContext -- ^ A 'Block' structure was encountered when a 'Line' was expected
| LineInBlockContext -- ^ A 'Line' structure was encountered when a 'Block' was expected
| UnmappedBlockElement Text -- ^ We encountered an 'Element' that, in a 'Block' context, does not map to any structure
| UnmappedLineElement Text -- ^ We encountered an 'Element' that, in a 'Line' context, does not map to any structure
deriving (Show, Eq, Generic, Typeable)
instance Exception SemanticError
-- | Result of parsing a single 'DomTree'
data ParseResult = RBlock Block -- ^ Parses only as 'Block'
| RLine Line -- ^ Parses only as 'Line'
| RAmbiguous Block Line -- ^ Parses as either 'Block' or 'Line' depending on context
| RNoParse SemanticError SemanticError -- ^ Does not parse as either 'Block' or 'Line'
deriving (Show)
-- | Current parser context
data Context a where
BlockCtx :: Context Block
LineCtx :: Context Line
extract :: Context a -> ParseResult -> Either SemanticError a
-- ^ Extract information from a 'ParseResult' given 'Context'
extract BlockCtx (RBlock b) = Right b
extract LineCtx (RLine l) = Right l
extract BlockCtx (RAmbiguous b _) = Right b
extract LineCtx (RAmbiguous _ l) = Right l
extract BlockCtx (RNoParse bErr _) = Left bErr
extract LineCtx (RNoParse _ lErr) = Left lErr
extract BlockCtx _ = Left LineInBlockContext
extract LineCtx _ = Left BlockInLineContext
hasBlockCtx :: ParseResult -> Bool
-- ^ Result can be 'extract'ed in a 'Block' 'Context'
hasBlockCtx (RLine _) = False
hasBlockCtx _ = True
hasLineCtx :: ParseResult -> Bool
-- ^ Result can be 'extract'ed in a 'Line' 'Context'
hasLineCtx (RBlock _) = False
hasLineCtx _ = True
bbcode :: Text -> Either BBCodeError Printout
-- ^ Parse BBCode
bbcode = join . fmap (first SemanticError) . bimap morph' morph . Raw.bbcode
morph :: DomForest -> Either SemanticError Printout
-- ^ Parse a list of paragraphs
--
-- Since we permit only cooked input via 'Raw' 'Paragraph' is identical to 'Block'
morph = fmap Seq.fromList . mapM (\t -> Seq.singleton . Cooked <$> parse BlockCtx t)
parseDom :: DomTree -> ParseResult
-- ^ Invoke 'asLine' and 'asBlock' to parse a single 'DomTree'
parseDom (Content t) = either RBlock (\l -> RAmbiguous (Line l) l) . text . TL.fromStrict $ t
parseDom (Element t attrs cs)
| Right blockParse' <- blockParse
, Right lineParse' <- lineParse = RAmbiguous blockParse' lineParse'
| Right blockParse' <- blockParse = RBlock blockParse'
| Right lineParse' <- lineParse = RLine lineParse'
| Left bErr <- blockParse
, Left lErr <- lineParse = RNoParse bErr lErr
where
blockParse = asBlock t cs attrs
lineParse = asLine t cs attrs
mergeResult :: Monoid a => Context a -> [ParseResult] -> Either SemanticError a
-- ^ Merge a list of 'ParseResults' in a certain 'Context'
mergeResult _ [] = Right mempty
mergeResult ctx (amb@(RAmbiguous _ _):xs) = mappend <$> extract ctx amb <*> mergeResult ctx xs
mergeResult ctx (err@(RNoParse _ _):_) = extract ctx err
mergeResult ctx (x:xs) = mappend <$> extract ctx x <*> mergeResult ctx xs
parse :: Monoid a => Context a -> [DomTree] -> Either SemanticError a
-- ^ Parse a list of 'DomTree's in a certain 'Context'
--
-- @parse ctx = 'mergeResult' ctx . map 'parseDom'@
parse BlockCtx = fmap mconcat . mapM mergeResult' . groupBy sameCtx . map parseDom
where
sameCtx a b = (hasLineCtx a && hasLineCtx b) || (hasBlockCtx a && hasBlockCtx b)
mergeResult' xs
| hasLineCtx `all` xs = Line <$> mergeResult LineCtx xs
| otherwise = mergeResult BlockCtx xs
parse ctx = mergeResult ctx . map parseDom
asBlock :: CI Text -> [DomTree] -> Map (CI Text) Text -> Either SemanticError Block
asBlock "VSpace" _ = Right . VSpace . lookupAttr "height" True 1
asBlock t _ = const $ Left . UnmappedBlockElement . CI.original $ t
asLine :: CI Text -> [DomTree] -> Map (CI Text) Text -> Either SemanticError Line
asLine "HSpace" _ = Right . HSpace . lookupAttr "width" True 1
asLine t _ = const $ Left . UnmappedLineElement . CI.original $ t
|