diff options
Diffstat (limited to 'tp-bbcode/src')
-rw-r--r-- | tp-bbcode/src/Thermoprint/Printout/BBCode.hs | 145 | ||||
-rw-r--r-- | tp-bbcode/src/Thermoprint/Printout/BBCode/Attribute.hs | 39 |
2 files changed, 184 insertions, 0 deletions
diff --git a/tp-bbcode/src/Thermoprint/Printout/BBCode.hs b/tp-bbcode/src/Thermoprint/Printout/BBCode.hs new file mode 100644 index 0000000..ce2aa43 --- /dev/null +++ b/tp-bbcode/src/Thermoprint/Printout/BBCode.hs | |||
@@ -0,0 +1,145 @@ | |||
1 | {-# LANGUAGE OverloadedStrings #-} | ||
2 | {-# LANGUAGE DeriveGeneric #-} | ||
3 | {-# LANGUAGE GADTs #-} | ||
4 | |||
5 | -- | Use 'Text.BBCode' to parse BBCode | ||
6 | module Thermoprint.Printout.BBCode | ||
7 | ( bbcode | ||
8 | , BBCodeError(..) | ||
9 | , TreeError(..) | ||
10 | , SemanticError(..) | ||
11 | ) where | ||
12 | |||
13 | import Data.Text (Text) | ||
14 | import Data.Map (Map) | ||
15 | |||
16 | import qualified Data.Text.Lazy as Lazy (Text) | ||
17 | import qualified Data.Text.Lazy as TL (fromStrict) | ||
18 | |||
19 | import Data.Sequence (Seq) | ||
20 | import qualified Data.Sequence as Seq (fromList, singleton) | ||
21 | |||
22 | import Data.CaseInsensitive (CI) | ||
23 | import qualified Data.CaseInsensitive as CI | ||
24 | |||
25 | import GHC.Generics (Generic) | ||
26 | import Control.Exception (Exception) | ||
27 | import Data.Typeable (Typeable) | ||
28 | |||
29 | import Data.Bifunctor (bimap, first) | ||
30 | import Control.Monad (join) | ||
31 | |||
32 | import Data.List (groupBy) | ||
33 | |||
34 | import Text.BBCode (DomForest, DomTree(..), TreeError(..)) | ||
35 | import qualified Text.BBCode as Raw (bbcode, BBCodeError(..)) | ||
36 | |||
37 | import Thermoprint.Printout | ||
38 | |||
39 | import Thermoprint.Printout.BBCode.Attribute | ||
40 | |||
41 | -- ^ We replicate 'Raw.BBCodeError' but add a new failure mode documenting incompatibly of the parsed syntax tree with our document format | ||
42 | data BBCodeError = LexerError String -- ^ Error while parsing input to stream of tokens | ||
43 | | TreeError TreeError -- ^ Error while parsing stream of tokens to syntax tree | ||
44 | | SemanticError SemanticError -- ^ Error while mapping syntax tree to document format | ||
45 | deriving (Show, Eq, Generic, Typeable) | ||
46 | |||
47 | instance Exception BBCodeError | ||
48 | |||
49 | morph' :: Raw.BBCodeError -> BBCodeError | ||
50 | -- ^ Transform 'Raw.BBCodeError' to 'BBCodeError' | ||
51 | morph' (Raw.LexerError x) = LexerError x | ||
52 | morph' (Raw.TreeError x) = TreeError x | ||
53 | |||
54 | -- | An error ocurred while parsing the DOM-Forest (`['DomTree']`) | ||
55 | data SemanticError = BlockInLineContext -- ^ A 'Block' structure was encountered when a 'Line' was expected | ||
56 | | LineInBlockContext -- ^ A 'Line' structure was encountered when a 'Block' was expected | ||
57 | | UnmappedBlockElement Text -- ^ We encountered an 'Element' that, in a 'Block' context, does not map to any structure | ||
58 | | UnmappedLineElement Text -- ^ We encountered an 'Element' that, in a 'Line' context, does not map to any structure | ||
59 | deriving (Show, Eq, Generic, Typeable) | ||
60 | |||
61 | instance Exception SemanticError | ||
62 | |||
63 | -- | Result of parsing a single 'DomTree' | ||
64 | data ParseResult = RBlock Block -- ^ Parses only as 'Block' | ||
65 | | RLine Line -- ^ Parses only as 'Line' | ||
66 | | RAmbiguous Block Line -- ^ Parses as either 'Block' or 'Line' depending on context | ||
67 | | RNoParse SemanticError SemanticError -- ^ Does not parse as either 'Block' or 'Line' | ||
68 | deriving (Show) | ||
69 | |||
70 | -- | Current parser context | ||
71 | data Context a where | ||
72 | BlockCtx :: Context Block | ||
73 | LineCtx :: Context Line | ||
74 | |||
75 | extract :: Context a -> ParseResult -> Either SemanticError a | ||
76 | -- ^ Extract information from a 'ParseResult' given 'Context' | ||
77 | extract BlockCtx (RBlock b) = Right b | ||
78 | extract LineCtx (RLine l) = Right l | ||
79 | extract BlockCtx (RAmbiguous b _) = Right b | ||
80 | extract LineCtx (RAmbiguous _ l) = Right l | ||
81 | extract BlockCtx (RNoParse bErr _) = Left bErr | ||
82 | extract LineCtx (RNoParse _ lErr) = Left lErr | ||
83 | extract BlockCtx _ = Left LineInBlockContext | ||
84 | extract LineCtx _ = Left BlockInLineContext | ||
85 | |||
86 | hasBlockCtx :: ParseResult -> Bool | ||
87 | -- ^ Result can be 'extract'ed in a 'Block' 'Context' | ||
88 | hasBlockCtx (RLine _) = False | ||
89 | hasBlockCtx _ = True | ||
90 | |||
91 | hasLineCtx :: ParseResult -> Bool | ||
92 | -- ^ Result can be 'extract'ed in a 'Line' 'Context' | ||
93 | hasLineCtx (RBlock _) = False | ||
94 | hasLineCtx _ = True | ||
95 | |||
96 | bbcode :: Text -> Either BBCodeError Printout | ||
97 | -- ^ Parse BBCode | ||
98 | bbcode = join . fmap (first SemanticError) . bimap morph' morph . Raw.bbcode | ||
99 | |||
100 | morph :: DomForest -> Either SemanticError Printout | ||
101 | -- ^ Parse a list of paragraphs | ||
102 | -- | ||
103 | -- Since we permit only cooked input via 'Raw' 'Paragraph' is identical to 'Block' | ||
104 | morph = fmap Seq.fromList . mapM (\t -> Seq.singleton . Cooked <$> parse BlockCtx t) | ||
105 | |||
106 | parseDom :: DomTree -> ParseResult | ||
107 | -- ^ Invoke 'asLine' and 'asBlock' to parse a single 'DomTree' | ||
108 | parseDom (Content t) = either RBlock (\l -> RAmbiguous (Line l) l) . text . TL.fromStrict $ t | ||
109 | parseDom (Element t attrs cs) | ||
110 | | Right blockParse' <- blockParse | ||
111 | , Right lineParse' <- lineParse = RAmbiguous blockParse' lineParse' | ||
112 | | Right blockParse' <- blockParse = RBlock blockParse' | ||
113 | | Right lineParse' <- lineParse = RLine lineParse' | ||
114 | | Left bErr <- blockParse | ||
115 | , Left lErr <- lineParse = RNoParse bErr lErr | ||
116 | where | ||
117 | blockParse = asBlock t cs attrs | ||
118 | lineParse = asLine t cs attrs | ||
119 | |||
120 | mergeResult :: Monoid a => Context a -> [ParseResult] -> Either SemanticError a | ||
121 | -- ^ Merge a list of 'ParseResults' in a certain 'Context' | ||
122 | mergeResult _ [] = Right mempty | ||
123 | mergeResult ctx (amb@(RAmbiguous _ _):xs) = mappend <$> extract ctx amb <*> mergeResult ctx xs | ||
124 | mergeResult ctx (err@(RNoParse _ _):_) = extract ctx err | ||
125 | mergeResult ctx (x:xs) = mappend <$> extract ctx x <*> mergeResult ctx xs | ||
126 | |||
127 | parse :: Monoid a => Context a -> [DomTree] -> Either SemanticError a | ||
128 | -- ^ Parse a list of 'DomTree's in a certain 'Context' | ||
129 | -- | ||
130 | -- @parse ctx = 'mergeResult' ctx . map 'parseDom'@ | ||
131 | parse BlockCtx = fmap mconcat . mapM mergeResult' . groupBy sameCtx . map parseDom | ||
132 | where | ||
133 | sameCtx a b = (hasLineCtx a && hasLineCtx b) || (hasBlockCtx a && hasBlockCtx b) | ||
134 | mergeResult' xs | ||
135 | | hasLineCtx `all` xs = Line <$> mergeResult LineCtx xs | ||
136 | | otherwise = mergeResult BlockCtx xs | ||
137 | parse ctx = mergeResult ctx . map parseDom | ||
138 | |||
139 | asBlock :: CI Text -> [DomTree] -> Map (CI Text) Text -> Either SemanticError Block | ||
140 | asBlock "VSpace" _ = Right . VSpace . lookupAttr "height" True 1 | ||
141 | asBlock t _ = const $ Left . UnmappedBlockElement . CI.original $ t | ||
142 | |||
143 | asLine :: CI Text -> [DomTree] -> Map (CI Text) Text -> Either SemanticError Line | ||
144 | asLine "HSpace" _ = Right . HSpace . lookupAttr "width" True 1 | ||
145 | asLine t _ = const $ Left . UnmappedLineElement . CI.original $ t | ||
diff --git a/tp-bbcode/src/Thermoprint/Printout/BBCode/Attribute.hs b/tp-bbcode/src/Thermoprint/Printout/BBCode/Attribute.hs new file mode 100644 index 0000000..538cca2 --- /dev/null +++ b/tp-bbcode/src/Thermoprint/Printout/BBCode/Attribute.hs | |||
@@ -0,0 +1,39 @@ | |||
1 | {-# LANGUAGE DefaultSignatures #-} | ||
2 | |||
3 | -- | Parsing attributes | ||
4 | module Thermoprint.Printout.BBCode.Attribute | ||
5 | ( Attribute(..) | ||
6 | , lookupAttr | ||
7 | ) where | ||
8 | |||
9 | import Data.Text (Text) | ||
10 | import qualified Data.Text as T (unpack, empty) | ||
11 | |||
12 | import Data.Map (Map) | ||
13 | import qualified Data.Map as Map (lookup) | ||
14 | |||
15 | import Data.CaseInsensitive (CI) | ||
16 | import qualified Data.CaseInsensitive as CI | ||
17 | |||
18 | import Text.Read (readMaybe) | ||
19 | import Data.Maybe (fromMaybe) | ||
20 | |||
21 | import Control.Applicative (Alternative(..)) | ||
22 | |||
23 | -- | We build our own version of 'Read' so we can override the presentation used | ||
24 | -- | ||
25 | -- We provide a default implementation for 'Read a => Attribute a' | ||
26 | class Attribute a where | ||
27 | attrRead :: Text -> Maybe a | ||
28 | default attrRead :: Read a => Text -> Maybe a | ||
29 | attrRead = readMaybe . T.unpack | ||
30 | |||
31 | instance Attribute Integer | ||
32 | |||
33 | lookupAttr :: Attribute a => CI Text -> Bool -> a -> Map (CI Text) Text -> a | ||
34 | -- ^ Extract an attribute by name -- the 'Bool' attribute specifies whether we additionally accept the empty string as key | ||
35 | lookupAttr t emptyOk def attrs = fromMaybe def $ (emptyOk' $ Map.lookup t attrs) >>= attrRead | ||
36 | where | ||
37 | emptyOk' | ||
38 | | emptyOk = (<|> Map.lookup (CI.mk T.empty) attrs) | ||
39 | | otherwise = id | ||