diff options
Diffstat (limited to 'tp-bbcode/src')
| -rw-r--r-- | tp-bbcode/src/Thermoprint/Printout/BBCode.hs | 145 | ||||
| -rw-r--r-- | tp-bbcode/src/Thermoprint/Printout/BBCode/Attribute.hs | 39 |
2 files changed, 184 insertions, 0 deletions
diff --git a/tp-bbcode/src/Thermoprint/Printout/BBCode.hs b/tp-bbcode/src/Thermoprint/Printout/BBCode.hs new file mode 100644 index 0000000..ce2aa43 --- /dev/null +++ b/tp-bbcode/src/Thermoprint/Printout/BBCode.hs | |||
| @@ -0,0 +1,145 @@ | |||
| 1 | {-# LANGUAGE OverloadedStrings #-} | ||
| 2 | {-# LANGUAGE DeriveGeneric #-} | ||
| 3 | {-# LANGUAGE GADTs #-} | ||
| 4 | |||
| 5 | -- | Use 'Text.BBCode' to parse BBCode | ||
| 6 | module Thermoprint.Printout.BBCode | ||
| 7 | ( bbcode | ||
| 8 | , BBCodeError(..) | ||
| 9 | , TreeError(..) | ||
| 10 | , SemanticError(..) | ||
| 11 | ) where | ||
| 12 | |||
| 13 | import Data.Text (Text) | ||
| 14 | import Data.Map (Map) | ||
| 15 | |||
| 16 | import qualified Data.Text.Lazy as Lazy (Text) | ||
| 17 | import qualified Data.Text.Lazy as TL (fromStrict) | ||
| 18 | |||
| 19 | import Data.Sequence (Seq) | ||
| 20 | import qualified Data.Sequence as Seq (fromList, singleton) | ||
| 21 | |||
| 22 | import Data.CaseInsensitive (CI) | ||
| 23 | import qualified Data.CaseInsensitive as CI | ||
| 24 | |||
| 25 | import GHC.Generics (Generic) | ||
| 26 | import Control.Exception (Exception) | ||
| 27 | import Data.Typeable (Typeable) | ||
| 28 | |||
| 29 | import Data.Bifunctor (bimap, first) | ||
| 30 | import Control.Monad (join) | ||
| 31 | |||
| 32 | import Data.List (groupBy) | ||
| 33 | |||
| 34 | import Text.BBCode (DomForest, DomTree(..), TreeError(..)) | ||
| 35 | import qualified Text.BBCode as Raw (bbcode, BBCodeError(..)) | ||
| 36 | |||
| 37 | import Thermoprint.Printout | ||
| 38 | |||
| 39 | import Thermoprint.Printout.BBCode.Attribute | ||
| 40 | |||
| 41 | -- ^ We replicate 'Raw.BBCodeError' but add a new failure mode documenting incompatibly of the parsed syntax tree with our document format | ||
| 42 | data BBCodeError = LexerError String -- ^ Error while parsing input to stream of tokens | ||
| 43 | | TreeError TreeError -- ^ Error while parsing stream of tokens to syntax tree | ||
| 44 | | SemanticError SemanticError -- ^ Error while mapping syntax tree to document format | ||
| 45 | deriving (Show, Eq, Generic, Typeable) | ||
| 46 | |||
| 47 | instance Exception BBCodeError | ||
| 48 | |||
| 49 | morph' :: Raw.BBCodeError -> BBCodeError | ||
| 50 | -- ^ Transform 'Raw.BBCodeError' to 'BBCodeError' | ||
| 51 | morph' (Raw.LexerError x) = LexerError x | ||
| 52 | morph' (Raw.TreeError x) = TreeError x | ||
| 53 | |||
| 54 | -- | An error ocurred while parsing the DOM-Forest (`['DomTree']`) | ||
| 55 | data SemanticError = BlockInLineContext -- ^ A 'Block' structure was encountered when a 'Line' was expected | ||
| 56 | | LineInBlockContext -- ^ A 'Line' structure was encountered when a 'Block' was expected | ||
| 57 | | UnmappedBlockElement Text -- ^ We encountered an 'Element' that, in a 'Block' context, does not map to any structure | ||
| 58 | | UnmappedLineElement Text -- ^ We encountered an 'Element' that, in a 'Line' context, does not map to any structure | ||
| 59 | deriving (Show, Eq, Generic, Typeable) | ||
| 60 | |||
| 61 | instance Exception SemanticError | ||
| 62 | |||
| 63 | -- | Result of parsing a single 'DomTree' | ||
| 64 | data ParseResult = RBlock Block -- ^ Parses only as 'Block' | ||
| 65 | | RLine Line -- ^ Parses only as 'Line' | ||
| 66 | | RAmbiguous Block Line -- ^ Parses as either 'Block' or 'Line' depending on context | ||
| 67 | | RNoParse SemanticError SemanticError -- ^ Does not parse as either 'Block' or 'Line' | ||
| 68 | deriving (Show) | ||
| 69 | |||
| 70 | -- | Current parser context | ||
| 71 | data Context a where | ||
| 72 | BlockCtx :: Context Block | ||
| 73 | LineCtx :: Context Line | ||
| 74 | |||
| 75 | extract :: Context a -> ParseResult -> Either SemanticError a | ||
| 76 | -- ^ Extract information from a 'ParseResult' given 'Context' | ||
| 77 | extract BlockCtx (RBlock b) = Right b | ||
| 78 | extract LineCtx (RLine l) = Right l | ||
| 79 | extract BlockCtx (RAmbiguous b _) = Right b | ||
| 80 | extract LineCtx (RAmbiguous _ l) = Right l | ||
| 81 | extract BlockCtx (RNoParse bErr _) = Left bErr | ||
| 82 | extract LineCtx (RNoParse _ lErr) = Left lErr | ||
| 83 | extract BlockCtx _ = Left LineInBlockContext | ||
| 84 | extract LineCtx _ = Left BlockInLineContext | ||
| 85 | |||
| 86 | hasBlockCtx :: ParseResult -> Bool | ||
| 87 | -- ^ Result can be 'extract'ed in a 'Block' 'Context' | ||
| 88 | hasBlockCtx (RLine _) = False | ||
| 89 | hasBlockCtx _ = True | ||
| 90 | |||
| 91 | hasLineCtx :: ParseResult -> Bool | ||
| 92 | -- ^ Result can be 'extract'ed in a 'Line' 'Context' | ||
| 93 | hasLineCtx (RBlock _) = False | ||
| 94 | hasLineCtx _ = True | ||
| 95 | |||
| 96 | bbcode :: Text -> Either BBCodeError Printout | ||
| 97 | -- ^ Parse BBCode | ||
| 98 | bbcode = join . fmap (first SemanticError) . bimap morph' morph . Raw.bbcode | ||
| 99 | |||
| 100 | morph :: DomForest -> Either SemanticError Printout | ||
| 101 | -- ^ Parse a list of paragraphs | ||
| 102 | -- | ||
| 103 | -- Since we permit only cooked input via 'Raw' 'Paragraph' is identical to 'Block' | ||
| 104 | morph = fmap Seq.fromList . mapM (\t -> Seq.singleton . Cooked <$> parse BlockCtx t) | ||
| 105 | |||
| 106 | parseDom :: DomTree -> ParseResult | ||
| 107 | -- ^ Invoke 'asLine' and 'asBlock' to parse a single 'DomTree' | ||
| 108 | parseDom (Content t) = either RBlock (\l -> RAmbiguous (Line l) l) . text . TL.fromStrict $ t | ||
| 109 | parseDom (Element t attrs cs) | ||
| 110 | | Right blockParse' <- blockParse | ||
| 111 | , Right lineParse' <- lineParse = RAmbiguous blockParse' lineParse' | ||
| 112 | | Right blockParse' <- blockParse = RBlock blockParse' | ||
| 113 | | Right lineParse' <- lineParse = RLine lineParse' | ||
| 114 | | Left bErr <- blockParse | ||
| 115 | , Left lErr <- lineParse = RNoParse bErr lErr | ||
| 116 | where | ||
| 117 | blockParse = asBlock t cs attrs | ||
| 118 | lineParse = asLine t cs attrs | ||
| 119 | |||
| 120 | mergeResult :: Monoid a => Context a -> [ParseResult] -> Either SemanticError a | ||
| 121 | -- ^ Merge a list of 'ParseResults' in a certain 'Context' | ||
| 122 | mergeResult _ [] = Right mempty | ||
| 123 | mergeResult ctx (amb@(RAmbiguous _ _):xs) = mappend <$> extract ctx amb <*> mergeResult ctx xs | ||
| 124 | mergeResult ctx (err@(RNoParse _ _):_) = extract ctx err | ||
| 125 | mergeResult ctx (x:xs) = mappend <$> extract ctx x <*> mergeResult ctx xs | ||
| 126 | |||
| 127 | parse :: Monoid a => Context a -> [DomTree] -> Either SemanticError a | ||
| 128 | -- ^ Parse a list of 'DomTree's in a certain 'Context' | ||
| 129 | -- | ||
| 130 | -- @parse ctx = 'mergeResult' ctx . map 'parseDom'@ | ||
| 131 | parse BlockCtx = fmap mconcat . mapM mergeResult' . groupBy sameCtx . map parseDom | ||
| 132 | where | ||
| 133 | sameCtx a b = (hasLineCtx a && hasLineCtx b) || (hasBlockCtx a && hasBlockCtx b) | ||
| 134 | mergeResult' xs | ||
| 135 | | hasLineCtx `all` xs = Line <$> mergeResult LineCtx xs | ||
| 136 | | otherwise = mergeResult BlockCtx xs | ||
| 137 | parse ctx = mergeResult ctx . map parseDom | ||
| 138 | |||
| 139 | asBlock :: CI Text -> [DomTree] -> Map (CI Text) Text -> Either SemanticError Block | ||
| 140 | asBlock "VSpace" _ = Right . VSpace . lookupAttr "height" True 1 | ||
| 141 | asBlock t _ = const $ Left . UnmappedBlockElement . CI.original $ t | ||
| 142 | |||
| 143 | asLine :: CI Text -> [DomTree] -> Map (CI Text) Text -> Either SemanticError Line | ||
| 144 | asLine "HSpace" _ = Right . HSpace . lookupAttr "width" True 1 | ||
| 145 | asLine t _ = const $ Left . UnmappedLineElement . CI.original $ t | ||
diff --git a/tp-bbcode/src/Thermoprint/Printout/BBCode/Attribute.hs b/tp-bbcode/src/Thermoprint/Printout/BBCode/Attribute.hs new file mode 100644 index 0000000..538cca2 --- /dev/null +++ b/tp-bbcode/src/Thermoprint/Printout/BBCode/Attribute.hs | |||
| @@ -0,0 +1,39 @@ | |||
| 1 | {-# LANGUAGE DefaultSignatures #-} | ||
| 2 | |||
| 3 | -- | Parsing attributes | ||
| 4 | module Thermoprint.Printout.BBCode.Attribute | ||
| 5 | ( Attribute(..) | ||
| 6 | , lookupAttr | ||
| 7 | ) where | ||
| 8 | |||
| 9 | import Data.Text (Text) | ||
| 10 | import qualified Data.Text as T (unpack, empty) | ||
| 11 | |||
| 12 | import Data.Map (Map) | ||
| 13 | import qualified Data.Map as Map (lookup) | ||
| 14 | |||
| 15 | import Data.CaseInsensitive (CI) | ||
| 16 | import qualified Data.CaseInsensitive as CI | ||
| 17 | |||
| 18 | import Text.Read (readMaybe) | ||
| 19 | import Data.Maybe (fromMaybe) | ||
| 20 | |||
| 21 | import Control.Applicative (Alternative(..)) | ||
| 22 | |||
| 23 | -- | We build our own version of 'Read' so we can override the presentation used | ||
| 24 | -- | ||
| 25 | -- We provide a default implementation for 'Read a => Attribute a' | ||
| 26 | class Attribute a where | ||
| 27 | attrRead :: Text -> Maybe a | ||
| 28 | default attrRead :: Read a => Text -> Maybe a | ||
| 29 | attrRead = readMaybe . T.unpack | ||
| 30 | |||
| 31 | instance Attribute Integer | ||
| 32 | |||
| 33 | lookupAttr :: Attribute a => CI Text -> Bool -> a -> Map (CI Text) Text -> a | ||
| 34 | -- ^ Extract an attribute by name -- the 'Bool' attribute specifies whether we additionally accept the empty string as key | ||
| 35 | lookupAttr t emptyOk def attrs = fromMaybe def $ (emptyOk' $ Map.lookup t attrs) >>= attrRead | ||
| 36 | where | ||
| 37 | emptyOk' | ||
| 38 | | emptyOk = (<|> Map.lookup (CI.mk T.empty) attrs) | ||
| 39 | | otherwise = id | ||
