{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE DeriveGeneric #-} -- | An implementation of BBcode parsing 'Text' to a syntax tree module Text.BBCode ( bbcode , BBCodeError(..) , TreeError(..) , DomForest , DomTree(..) -- , dom -- , BBLabel -- , rose -- , matches ) where import Data.Text (Text) import GHC.Generics (Generic) import Control.Exception (Exception) import Data.Typeable (Typeable) import Control.Monad (unless, join, foldM, (<=<)) import Data.Function (on) import Control.Applicative import Text.BBCode.Lexer (BBToken(..), token) import Data.Attoparsec.Text (parseOnly, endOfInput, endOfLine) import Data.Tree import Data.Tree.Zipper (TreePos, Empty, Full) import qualified Data.Tree.Zipper as Z import Data.Map (Map) import qualified Data.Map as Map import Data.CaseInsensitive (CI) import qualified Data.CaseInsensitive as CI import Data.Bifunctor (Bifunctor(first)) import Data.Maybe (catMaybes) -- | Our target structure -- a rose tree with an explicit terminal constructor data DomTree = Element (CI Text) (Map (CI Text) Text) [DomTree] | Content Text deriving (Show, Eq) -- | List of paragraphs, which are comprised of lists of 'DomTree's type DomForest = [[DomTree]] dom :: Forest BBLabel -> DomForest -- ^ Parse semantically constrained rose tree to syntactically constrained version -- -- Silently drops children of semantically terminal nodes ('BBPlain') -- -- We already ensured that paragraphs occur nowhere but at toplevel dom = map (\(Node BBPar ts) -> map dom' ts) . ensureTopLevelPar where ensureTopLevelPar xs@((Node BBPar _):_) = xs ensureTopLevelPar xs = pure $ Node BBPar xs dom' (Node (BBPlain t) _) = Content t dom' (Node (BBTag t attrs) ts) = Element (CI.mk t) (Map.mapKeys CI.mk attrs) $ map dom' ts -- | Errors encountered during parsing data BBCodeError = LexerError String -- ^ Error while parsing input to stream of tokens | TreeError TreeError -- ^ Error while parsing stream of tokens to syntax tree deriving (Show, Eq, Generic, Typeable) instance Exception BBCodeError bbcode :: Text -> Either BBCodeError DomForest -- ^ Parse BBCode bbcode t = fmap dom $ first LexerError (parseOnly (many token <* many endOfLine <* endOfInput) t) >>= first TreeError . rose -- | Errors in input encountered during parsing of lexed token-stream data TreeError = MismatchedTags Text Text -- ^ Closing tags label does not match opening tags | ImbalancedTags Text -- ^ We found an extraneous closing tag | UnclosedTags [Text] -- ^ We found opened tags that were not closed | ParagraphWithinTag -- ^ We found a paragraph-break within a tag deriving (Show, Eq, Generic, Typeable) instance Exception TreeError -- | The label of our rose-tree nodes carries the tag name and a map of attributes data BBLabel = BBTag Text (Map Text Text) | BBPar | BBPlain Text deriving (Show, Eq) matches :: Text -> Text -> Bool -- ^ @`matches` "open" "close"@ should be 'True' iff @[/close]@ is a valid closing tag for @[open]@ -- -- @ (==) `on` 'CI.mk' @ matches = (==) `on` CI.mk rose :: [BBToken] -> Either TreeError (Forest BBLabel) -- ^ Assuming that both tags and content have the same type (we use 'BBLabel') bbcode is a flat representation of a rose tree -- -- We use @'Tree' 'BBLabel'@ only as another intermediate structure because it carries no guarantee that the data is semantically valid -- a 'BBPlain'-value semantically has no children. -- -- The use of 'Tree' was still deemed desirable because the morphism to a more sensible structure is straightforward and 'Data.Tree.Zipper' provides all the tools needed to implement 'rose' in a sensible fashion rose = fmap Z.toForest . checkClosure <=< foldM (flip rose') (Z.fromForest []) where rose' (BBStr t) = return . Z.nextSpace . Z.insert (Node (BBPlain t) []) rose' BBNewPar = parBreak -- for more pointless rose' (BBOpen t attrs) = return . Z.children . Z.insert (Node (BBTag t $ Map.fromList attrs) []) rose' (BBContained t attrs) = return . Z.nextSpace . Z.insert (Node (BBTag t $ Map.fromList attrs) []) rose' (BBClose t) = close t -- for more pointless close :: Text -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel) close tag pos = do pos' <- maybe (Left $ ImbalancedTags tag) Right $ Z.parent pos >>= traversePars let pTag = (\(BBTag t _) -> t) $ Z.label pos' unless (pTag `matches` tag) . Left $ MismatchedTags pTag tag -- The structure shows that this mode of failure is not logically required -- it's just nice to have return $ Z.nextSpace pos' where traversePars pos | isPar . Z.tree $ pos = Z.parent pos >>= traversePars | otherwise = return pos parBreak :: TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel) parBreak z = let siblings = reverse $ Z.before z siblingsAsPars | all isPar siblings = Right z | Z.isRoot z = Right . Z.fromForest $ [Node BBPar siblings] | (Just p) <- Z.parent z , BBPar <- Z.label p = Right . Z.nextSpace $ p | otherwise = Left ParagraphWithinTag in Z.children . Z.insert (Node BBPar []) . Z.last <$> siblingsAsPars isPar (Node BBPar _) = True isPar _ = False checkClosure :: TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel) -- This failure mode isn't required either checkClosure z | null parentTags = Right z | otherwise = Left . UnclosedTags $ parentTags where parentTags = catMaybes . map (getTag . (\(_, p, _) -> p)) $ Z.parents z getTag (BBTag t _) = Just t getTag _ = Nothing