From 478bc6572d3ba508bddf1fdcf697e5a9e56e4055 Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Tue, 12 Jan 2016 09:39:42 +0100 Subject: DomTree --- bbcode/src/Text/BBCode.hs | 48 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 8 deletions(-) (limited to 'bbcode/src/Text/BBCode.hs') diff --git a/bbcode/src/Text/BBCode.hs b/bbcode/src/Text/BBCode.hs index 1e9960a..30b1da8 100644 --- a/bbcode/src/Text/BBCode.hs +++ b/bbcode/src/Text/BBCode.hs @@ -1,9 +1,11 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE DeriveGeneric #-} --- | An implementation of BBcode parsing 'Text' to a syntax tree (@'Forest' 'BBLabel'@) +-- | An implementation of BBcode parsing 'Text' to a syntax tree module Text.BBCode ( TreeError(..) + , DomTree(..) + , dom , BBLabel , rose , matches @@ -15,11 +17,13 @@ import GHC.Generics (Generic) import Control.Exception (Exception) import Data.Typeable (Typeable) -import Control.Monad (unless) +import Control.Monad (unless, join) import Data.Function (on) +import Control.Applicative import Text.BBCode.Lexer (BBToken(..), token) - +import Data.Attoparsec.Text (parseOnly, endOfInput) + import Data.Tree import Data.Tree.Zipper (TreePos, Empty, Full) import qualified Data.Tree.Zipper as Z @@ -30,6 +34,28 @@ import qualified Data.Map as Map import Data.CaseInsensitive (CI) import qualified Data.CaseInsensitive as CI +import Data.Bifunctor (Bifunctor(first)) + +data DomTree = Element Text (Map Text Text) [DomTree] + | Content Text + deriving (Show, Eq) + +dom :: Forest BBLabel -> [DomTree] +dom = map dom' + where + dom' (Node (BBPlain t) _) = Content t + dom' (Node (BBTag t attrs) ts) = Element t attrs $ map dom' ts + +-- | Errors encountered during parsing +data BBCodeError = LexerError String -- ^ Error while parsing input to stream of tokens + | TreeError TreeError -- ^ Error while parsing stream of tokens to syntax tree + deriving (Show, Eq, Generic, Typeable) + +instance Exception BBCodeError + +bbcode :: Text -> Either BBCodeError [DomTree] +bbcode t = fmap dom $ first LexerError (parseOnly (many token <* endOfInput) t) >>= first TreeError . rose + -- | Errors in input encountered during parsing of lexed token-stream data TreeError = MismatchedTags Text Text -- ^ Closing tags does not match opening tags | ImbalancedTags Text -- ^ We found an extraneous closing tag @@ -38,7 +64,9 @@ data TreeError = MismatchedTags Text Text -- ^ Closing tags does not match openi instance Exception TreeError -- | The label of our rose-tree nodes carries the tag name and a map of attributes -type BBLabel = (Text, Maybe (Map Text (Maybe Text))) +data BBLabel = BBTag Text (Map Text Text) + | BBPlain Text + deriving (Show, Eq) matches :: Text -> Text -> Bool -- ^ @`matches` "open" "close"@ should be 'True' iff @[/close]@ is a valid closing tag for @[open]@ @@ -47,21 +75,25 @@ matches :: Text -> Text -> Bool matches = (==) `on` CI.mk rose :: [BBToken] -> Either TreeError (Forest BBLabel) --- ^ Assuming that both tags and content have the same type (we use 'Text') bbcode is a flat representation of a rose tree +-- ^ Assuming that both tags and content have the same type (we use 'BBLabel') bbcode is a flat representation of a rose tree +-- +-- We use @'Tree' 'BBLabel'@ only as another intermediate structure because it carries no guarantee that the data is semantically valid -- a 'BBPlain'-value semantically has no children. +-- +-- The use of 'Tree' was still deemed desirable because the morphism to a more sensible structure is straightforward and 'Data.Tree.Zipper' provides all the tools needed to implement 'rose' in a sensible fashion rose = fmap Z.toForest . flip rose' (Z.fromForest []) where rose' :: [BBToken] -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel) rose' [] = return rose' (x:xs) = (>>= rose' xs) . rose'' x - rose'' (BBStr t) = return . Z.nextSpace . Z.insert (Node (t, Nothing) []) - rose'' (BBOpen t attrs) = return . Z.children . Z.insert (Node (t, Just $ Map.fromList attrs) []) + rose'' (BBStr t) = return . Z.nextSpace . Z.insert (Node (BBPlain t) []) + rose'' (BBOpen t attrs) = return . Z.children . Z.insert (Node (BBTag t $ Map.fromList attrs) []) rose'' (BBClose t) = close t -- for more pointless close :: Text -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel) close tag pos = do pos' <- maybe (Left $ ImbalancedTags tag) Right $ Z.parent pos let - pTag = fst $ Z.label pos' + pTag = (\(BBTag t _) -> t) $ Z.label pos' unless (pTag `matches` tag) . Left $ MismatchedTags pTag tag -- The structure shows that this mode of failure is not logically required -- it's just nice to have return $ Z.nextSpace pos' -- cgit v1.2.3