{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE DeriveGeneric #-} -- | An implementation of BBcode parsing 'Text' to a syntax tree module Text.BBCode ( bbcode , BBCodeError(..) , TreeError(..) , DomTree(..) -- , dom -- , BBLabel -- , rose -- , matches ) where import Data.Text (Text) import GHC.Generics (Generic) import Control.Exception (Exception) import Data.Typeable (Typeable) import Control.Monad (unless, join, foldM) import Data.Function (on) import Control.Applicative import Text.BBCode.Lexer (BBToken(..), token) import Data.Attoparsec.Text (parseOnly, endOfInput) import Data.Tree import Data.Tree.Zipper (TreePos, Empty, Full) import qualified Data.Tree.Zipper as Z import Data.Map (Map) import qualified Data.Map as Map import Data.CaseInsensitive (CI) import qualified Data.CaseInsensitive as CI import Data.Bifunctor (Bifunctor(first)) -- | Our target structure -- a rose tree with an explicit terminal constructor data DomTree = Element Text (Map Text Text) [DomTree] | Paragraph [DomTree] | Content Text deriving (Show, Eq) dom :: Forest BBLabel -> [DomTree] -- ^ Parse semantically constrained rose tree to syntactically constrained version -- -- Silently drops children of semantically terminal nodes ('BBPlain') dom = map dom' where dom' (Node (BBPlain t) _) = Content t dom' (Node (BBTag t attrs) ts) = Element t attrs $ map dom' ts dom' (Node BBPar ts) = Paragraph $ map dom' ts -- | Errors encountered during parsing data BBCodeError = LexerError String -- ^ Error while parsing input to stream of tokens | TreeError TreeError -- ^ Error while parsing stream of tokens to syntax tree deriving (Show, Eq, Generic, Typeable) instance Exception BBCodeError bbcode :: Text -> Either BBCodeError [DomTree] -- ^ Parse BBCode bbcode t = fmap dom $ first LexerError (parseOnly (many token <* endOfInput) t) >>= first TreeError . rose -- | Errors in input encountered during parsing of lexed token-stream data TreeError = MismatchedTags Text Text -- ^ Closing tags label does not match opening tags | ImbalancedTags Text -- ^ We found an extraneous closing tag deriving (Show, Eq, Generic, Typeable) instance Exception TreeError -- | The label of our rose-tree nodes carries the tag name and a map of attributes data BBLabel = BBTag Text (Map Text Text) | BBPar | BBPlain Text deriving (Show, Eq) matches :: Text -> Text -> Bool -- ^ @`matches` "open" "close"@ should be 'True' iff @[/close]@ is a valid closing tag for @[open]@ -- -- @ (==) `on` 'CI.mk' @ matches = (==) `on` CI.mk rose :: [BBToken] -> Either TreeError (Forest BBLabel) -- ^ Assuming that both tags and content have the same type (we use 'BBLabel') bbcode is a flat representation of a rose tree -- -- We use @'Tree' 'BBLabel'@ only as another intermediate structure because it carries no guarantee that the data is semantically valid -- a 'BBPlain'-value semantically has no children. -- -- The use of 'Tree' was still deemed desirable because the morphism to a more sensible structure is straightforward and 'Data.Tree.Zipper' provides all the tools needed to implement 'rose' in a sensible fashion rose = fmap Z.toForest . foldM (flip rose') (Z.fromForest []) where rose' (BBStr t) = return . Z.nextSpace . Z.insert (Node (BBPlain t) []) rose' BBNewPar = return . parBreak -- for more pointless rose' (BBOpen t attrs) = return . Z.children . Z.insert (Node (BBTag t $ Map.fromList attrs) []) rose' (BBContained t attrs) = return . Z.nextSpace . Z.insert (Node (BBTag t $ Map.fromList attrs) []) rose' (BBClose t) = close t -- for more pointless close :: Text -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel) close tag pos = do pos' <- maybe (Left $ ImbalancedTags tag) Right $ Z.parent pos >>= traversePars let pTag = (\(BBTag t _) -> t) $ Z.label pos' unless (pTag `matches` tag) . Left $ MismatchedTags pTag tag -- The structure shows that this mode of failure is not logically required -- it's just nice to have return $ Z.nextSpace pos' where traversePars pos | isPar . Z.tree $ pos = Z.parent pos >>= traversePars | otherwise = return pos parBreak :: TreePos Empty BBLabel -> TreePos Empty BBLabel parBreak z = let siblings = reverse $ Z.before z -- We only move ever move right so Z.after will always be empty siblingsAsPars | all isPar siblings = z | otherwise = case Z.parent z of Nothing -> Z.fromForest $ [Node BBPar siblings] Just p -> Z.children . Z.modifyTree (\(Node l _) -> Node l [Node BBPar siblings]) $ p in Z.children . Z.insert (Node BBPar []) . Z.last $ siblingsAsPars isPar (Node BBPar _) = True isPar _ = False