aboutsummaryrefslogtreecommitdiff
path: root/tp-bbcode/src/Thermoprint/Printout/BBCode.hs
blob: 1770b6d6b8278e1259e9decc021181d2f2b98c9e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE OverloadedLists #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE GADTs #-}

-- | Use 'Text.BBCode' to parse BBCode
module Thermoprint.Printout.BBCode
       ( bbcode
       , BBCodeError(..)
       , TreeError(..)
       , SemanticError(..)
       , module Thermoprint.Printout.BBCode.Inverse
       ) where

import Data.Text (Text)
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Set (Set)
import qualified Data.Set as Set

import qualified Data.Text.Lazy as Lazy (Text)
import qualified Data.Text.Lazy as TL (fromStrict)

import Data.Sequence (Seq)
import qualified Data.Sequence as Seq (fromList, singleton)

import Data.CaseInsensitive (CI)
import qualified Data.CaseInsensitive as CI

import GHC.Generics (Generic)
import Control.Exception (Exception)
import Data.Typeable (Typeable)

import Data.Bifunctor (bimap, first)
import Control.Monad (join)

import Data.List (groupBy)
import Data.Maybe (mapMaybe)

import Text.BBCode (DomForest, DomTree(..), TreeError(..))
import qualified Text.BBCode as Raw (bbcode, BBCodeError(..))

import Thermoprint.Printout.BBCode.Inverse

import Thermoprint.Printout

import Thermoprint.Printout.BBCode.Attribute

-- ^ We replicate 'Raw.BBCodeError' but add a new failure mode documenting incompatibly of the parsed syntax tree with our document format
data BBCodeError = LexerError String -- ^ Error while parsing input to stream of tokens
                 | TreeError TreeError -- ^ Error while parsing stream of tokens to syntax tree
                 | SemanticError SemanticError -- ^ Error while mapping syntax tree to document format
                 deriving (Show, Eq, Generic, Typeable)

instance Exception BBCodeError

morph' :: Raw.BBCodeError -> BBCodeError
-- ^ Transform 'Raw.BBCodeError' to 'BBCodeError'
morph' (Raw.LexerError x) = LexerError x
morph' (Raw.TreeError x)  = TreeError x

-- | An error ocurred while parsing the DOM-Forest (`['DomTree']`)
data SemanticError = BlockInLineContext -- ^ A 'Block' structure was encountered when a 'Line' was expected
                   | LineInBlockContext -- ^ A 'Line' structure was encountered when a 'Block' was expected
                   | UnmappedBlockElement Text -- ^ We encountered an 'Element' that, in a 'Block' context, does not map to any structure
                   | UnmappedLineElement Text -- ^ We encountered an 'Element' that, in a 'Line' context, does not map to any structure
  deriving (Show, Eq, Generic, Typeable)

instance Exception SemanticError

-- | Result of parsing a single 'DomTree'
data ParseResult = RBlock Block -- ^ Parses only as 'Block'
                 | RLine Line -- ^ Parses only as 'Line'
                 | RAmbiguous Block Line -- ^ Parses as either 'Block' or 'Line' depending on context
                 | RNoParse SemanticError SemanticError -- ^ Does not parse as either 'Block' or 'Line'
                 deriving (Show)

-- | Current parser context
data Context a where
  BlockCtx :: Context Block
  LineCtx  :: Context Line

extract :: Context a -> ParseResult -> Either SemanticError a
-- ^ Extract information from a 'ParseResult' given 'Context'
extract BlockCtx (RBlock b)        = Right b
extract LineCtx  (RLine l)         = Right l
extract BlockCtx (RAmbiguous b _)  = Right b
extract LineCtx  (RAmbiguous _ l)  = Right l
extract BlockCtx (RNoParse bErr _) = Left bErr
extract LineCtx  (RNoParse _ lErr) = Left lErr
extract BlockCtx _                 = Left LineInBlockContext
extract LineCtx  _                 = Left BlockInLineContext

hasBlockCtx :: ParseResult -> Bool
-- ^ Result can be 'extract'ed in a 'Block' 'Context'
hasBlockCtx (RLine _) = False
hasBlockCtx _         = True

hasLineCtx :: ParseResult -> Bool
-- ^ Result can be 'extract'ed in a 'Line' 'Context'
hasLineCtx (RBlock _) = False
hasLineCtx _          = True

bbcode :: Text -> Either BBCodeError Printout
-- ^ Parse BBCode
bbcode = join . fmap (first SemanticError) . bimap morph' morph . Raw.bbcode

morph :: DomForest -> Either SemanticError Printout
-- ^ Parse a list of paragraphs
--
-- Since we permit only cooked input via 'Raw' 'Paragraph' is identical to 'Block'
morph = fmap (Printout . Seq.fromList) . mapM (\t -> Paragraph . Seq.singleton . Cooked <$> parse BlockCtx t)

parseDom :: DomTree -> ParseResult
-- ^ Invoke 'asLine' and 'asBlock' to parse a single 'DomTree'
parseDom (Content t) = either RBlock (\l -> RAmbiguous (Line l) l) . text . TL.fromStrict $ t
parseDom (Element t attrs cs)
  | Right blockParse' <- blockParse
  , Right lineParse' <- lineParse   = RAmbiguous blockParse' lineParse'
  | Right blockParse' <- blockParse = RBlock blockParse'
  | Right lineParse' <- lineParse   = RLine lineParse'
  | Left bErr <- blockParse
  , Left lErr <- lineParse          = RNoParse bErr lErr
  where
    blockParse = asBlock t cs attrs
    lineParse  = asLine t cs attrs

mergeResult :: Monoid a => Context a -> [ParseResult] -> Either SemanticError a
-- ^ Merge a list of 'ParseResults' in a certain 'Context'
mergeResult _        []                        = Right mempty
mergeResult ctx      (amb@(RAmbiguous _ _):xs) = mappend <$> extract ctx amb <*> mergeResult ctx xs 
mergeResult ctx      (err@(RNoParse _ _):_)    = extract ctx err
mergeResult ctx      (x:xs)                    = mappend <$> extract ctx x <*> mergeResult ctx xs

parse :: Monoid a => Context a -> [DomTree] -> Either SemanticError a
-- ^ Parse a list of 'DomTree's in a certain 'Context'
--
-- @parse ctx = 'mergeResult' ctx . map 'parseDom'@
parse BlockCtx = fmap mconcat . mapM mergeResult' . groupBy sameCtx . map parseDom
  where
    sameCtx a b = (hasLineCtx a && hasLineCtx b) || (hasBlockCtx a && hasBlockCtx b)
    mergeResult' xs
      | hasLineCtx `all` xs = Line <$> mergeResult LineCtx xs
      | otherwise           = mergeResult BlockCtx xs
parse ctx      = mergeResult ctx . map parseDom

asBlock :: CI Text -> [DomTree] -> Map (CI Text) Text -> Either SemanticError Block
asBlock "VSpace" _  attrs = Right . VSpace $ lookupAttr "height" True 1 attrs
asBlock t        _  _     = Left . UnmappedBlockElement . CI.original $ t

asLine :: CI Text -> [DomTree] -> Map (CI Text) Text -> Either SemanticError Line
asLine "HSpace"  _  attrs = Right . HSpace $ lookupAttr "width" True 1 attrs
asLine c         t  attrs
  | (Just m) <- lookup c $ map (\(c, _, m) -> (CI.mk [c], m)) mTable
  = Markup [m] <$> parse LineCtx t
  | (Just m) <- lookup c $ map (\(_, a, m) -> (a, m)) mTable
  = Markup [m] <$> parse LineCtx t
  | "markup" <- c
  , ms <- Set.fromList $ mapMaybe (\(_, a, m) -> m <$ Map.lookup a attrs) mTable
  = Markup ms  <$> parse LineCtx t
  where
    mTable = [ ('b', "bold",         Bold)
             , ('u', "underline",    Underline)
             , ('h', "doubleHeight", DoubleHeight)
             , ('w', "doubleWidth",  DoubleWidth)
             ]
asLine t         _  _     = Left . UnmappedLineElement . CI.original $ t