From 57c56564d15cd5c83a4f1d1bab5490e6b75e8656 Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Mon, 18 Jan 2016 12:09:36 +0000 Subject: Moved Printout.BBCode to own module --- tp-bbcode/src/Thermoprint/Printout/BBCode.hs | 145 +++++++++++++++++++++ .../src/Thermoprint/Printout/BBCode/Attribute.hs | 39 ++++++ 2 files changed, 184 insertions(+) create mode 100644 tp-bbcode/src/Thermoprint/Printout/BBCode.hs create mode 100644 tp-bbcode/src/Thermoprint/Printout/BBCode/Attribute.hs (limited to 'tp-bbcode/src/Thermoprint/Printout') diff --git a/tp-bbcode/src/Thermoprint/Printout/BBCode.hs b/tp-bbcode/src/Thermoprint/Printout/BBCode.hs new file mode 100644 index 0000000..ce2aa43 --- /dev/null +++ b/tp-bbcode/src/Thermoprint/Printout/BBCode.hs @@ -0,0 +1,145 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE GADTs #-} + +-- | Use 'Text.BBCode' to parse BBCode +module Thermoprint.Printout.BBCode + ( bbcode + , BBCodeError(..) + , TreeError(..) + , SemanticError(..) + ) where + +import Data.Text (Text) +import Data.Map (Map) + +import qualified Data.Text.Lazy as Lazy (Text) +import qualified Data.Text.Lazy as TL (fromStrict) + +import Data.Sequence (Seq) +import qualified Data.Sequence as Seq (fromList, singleton) + +import Data.CaseInsensitive (CI) +import qualified Data.CaseInsensitive as CI + +import GHC.Generics (Generic) +import Control.Exception (Exception) +import Data.Typeable (Typeable) + +import Data.Bifunctor (bimap, first) +import Control.Monad (join) + +import Data.List (groupBy) + +import Text.BBCode (DomForest, DomTree(..), TreeError(..)) +import qualified Text.BBCode as Raw (bbcode, BBCodeError(..)) + +import Thermoprint.Printout + +import Thermoprint.Printout.BBCode.Attribute + +-- ^ We replicate 'Raw.BBCodeError' but add a new failure mode documenting incompatibly of the parsed syntax tree with our document format +data BBCodeError = LexerError String -- ^ Error while parsing input to stream of tokens + | TreeError TreeError -- ^ Error while parsing stream of tokens to syntax tree + | SemanticError SemanticError -- ^ Error while mapping syntax tree to document format + deriving (Show, Eq, Generic, Typeable) + +instance Exception BBCodeError + +morph' :: Raw.BBCodeError -> BBCodeError +-- ^ Transform 'Raw.BBCodeError' to 'BBCodeError' +morph' (Raw.LexerError x) = LexerError x +morph' (Raw.TreeError x) = TreeError x + +-- | An error ocurred while parsing the DOM-Forest (`['DomTree']`) +data SemanticError = BlockInLineContext -- ^ A 'Block' structure was encountered when a 'Line' was expected + | LineInBlockContext -- ^ A 'Line' structure was encountered when a 'Block' was expected + | UnmappedBlockElement Text -- ^ We encountered an 'Element' that, in a 'Block' context, does not map to any structure + | UnmappedLineElement Text -- ^ We encountered an 'Element' that, in a 'Line' context, does not map to any structure + deriving (Show, Eq, Generic, Typeable) + +instance Exception SemanticError + +-- | Result of parsing a single 'DomTree' +data ParseResult = RBlock Block -- ^ Parses only as 'Block' + | RLine Line -- ^ Parses only as 'Line' + | RAmbiguous Block Line -- ^ Parses as either 'Block' or 'Line' depending on context + | RNoParse SemanticError SemanticError -- ^ Does not parse as either 'Block' or 'Line' + deriving (Show) + +-- | Current parser context +data Context a where + BlockCtx :: Context Block + LineCtx :: Context Line + +extract :: Context a -> ParseResult -> Either SemanticError a +-- ^ Extract information from a 'ParseResult' given 'Context' +extract BlockCtx (RBlock b) = Right b +extract LineCtx (RLine l) = Right l +extract BlockCtx (RAmbiguous b _) = Right b +extract LineCtx (RAmbiguous _ l) = Right l +extract BlockCtx (RNoParse bErr _) = Left bErr +extract LineCtx (RNoParse _ lErr) = Left lErr +extract BlockCtx _ = Left LineInBlockContext +extract LineCtx _ = Left BlockInLineContext + +hasBlockCtx :: ParseResult -> Bool +-- ^ Result can be 'extract'ed in a 'Block' 'Context' +hasBlockCtx (RLine _) = False +hasBlockCtx _ = True + +hasLineCtx :: ParseResult -> Bool +-- ^ Result can be 'extract'ed in a 'Line' 'Context' +hasLineCtx (RBlock _) = False +hasLineCtx _ = True + +bbcode :: Text -> Either BBCodeError Printout +-- ^ Parse BBCode +bbcode = join . fmap (first SemanticError) . bimap morph' morph . Raw.bbcode + +morph :: DomForest -> Either SemanticError Printout +-- ^ Parse a list of paragraphs +-- +-- Since we permit only cooked input via 'Raw' 'Paragraph' is identical to 'Block' +morph = fmap Seq.fromList . mapM (\t -> Seq.singleton . Cooked <$> parse BlockCtx t) + +parseDom :: DomTree -> ParseResult +-- ^ Invoke 'asLine' and 'asBlock' to parse a single 'DomTree' +parseDom (Content t) = either RBlock (\l -> RAmbiguous (Line l) l) . text . TL.fromStrict $ t +parseDom (Element t attrs cs) + | Right blockParse' <- blockParse + , Right lineParse' <- lineParse = RAmbiguous blockParse' lineParse' + | Right blockParse' <- blockParse = RBlock blockParse' + | Right lineParse' <- lineParse = RLine lineParse' + | Left bErr <- blockParse + , Left lErr <- lineParse = RNoParse bErr lErr + where + blockParse = asBlock t cs attrs + lineParse = asLine t cs attrs + +mergeResult :: Monoid a => Context a -> [ParseResult] -> Either SemanticError a +-- ^ Merge a list of 'ParseResults' in a certain 'Context' +mergeResult _ [] = Right mempty +mergeResult ctx (amb@(RAmbiguous _ _):xs) = mappend <$> extract ctx amb <*> mergeResult ctx xs +mergeResult ctx (err@(RNoParse _ _):_) = extract ctx err +mergeResult ctx (x:xs) = mappend <$> extract ctx x <*> mergeResult ctx xs + +parse :: Monoid a => Context a -> [DomTree] -> Either SemanticError a +-- ^ Parse a list of 'DomTree's in a certain 'Context' +-- +-- @parse ctx = 'mergeResult' ctx . map 'parseDom'@ +parse BlockCtx = fmap mconcat . mapM mergeResult' . groupBy sameCtx . map parseDom + where + sameCtx a b = (hasLineCtx a && hasLineCtx b) || (hasBlockCtx a && hasBlockCtx b) + mergeResult' xs + | hasLineCtx `all` xs = Line <$> mergeResult LineCtx xs + | otherwise = mergeResult BlockCtx xs +parse ctx = mergeResult ctx . map parseDom + +asBlock :: CI Text -> [DomTree] -> Map (CI Text) Text -> Either SemanticError Block +asBlock "VSpace" _ = Right . VSpace . lookupAttr "height" True 1 +asBlock t _ = const $ Left . UnmappedBlockElement . CI.original $ t + +asLine :: CI Text -> [DomTree] -> Map (CI Text) Text -> Either SemanticError Line +asLine "HSpace" _ = Right . HSpace . lookupAttr "width" True 1 +asLine t _ = const $ Left . UnmappedLineElement . CI.original $ t diff --git a/tp-bbcode/src/Thermoprint/Printout/BBCode/Attribute.hs b/tp-bbcode/src/Thermoprint/Printout/BBCode/Attribute.hs new file mode 100644 index 0000000..538cca2 --- /dev/null +++ b/tp-bbcode/src/Thermoprint/Printout/BBCode/Attribute.hs @@ -0,0 +1,39 @@ +{-# LANGUAGE DefaultSignatures #-} + +-- | Parsing attributes +module Thermoprint.Printout.BBCode.Attribute + ( Attribute(..) + , lookupAttr + ) where + +import Data.Text (Text) +import qualified Data.Text as T (unpack, empty) + +import Data.Map (Map) +import qualified Data.Map as Map (lookup) + +import Data.CaseInsensitive (CI) +import qualified Data.CaseInsensitive as CI + +import Text.Read (readMaybe) +import Data.Maybe (fromMaybe) + +import Control.Applicative (Alternative(..)) + +-- | We build our own version of 'Read' so we can override the presentation used +-- +-- We provide a default implementation for 'Read a => Attribute a' +class Attribute a where + attrRead :: Text -> Maybe a + default attrRead :: Read a => Text -> Maybe a + attrRead = readMaybe . T.unpack + +instance Attribute Integer + +lookupAttr :: Attribute a => CI Text -> Bool -> a -> Map (CI Text) Text -> a +-- ^ Extract an attribute by name -- the 'Bool' attribute specifies whether we additionally accept the empty string as key +lookupAttr t emptyOk def attrs = fromMaybe def $ (emptyOk' $ Map.lookup t attrs) >>= attrRead + where + emptyOk' + | emptyOk = (<|> Map.lookup (CI.mk T.empty) attrs) + | otherwise = id -- cgit v1.2.3