From 478bc6572d3ba508bddf1fdcf697e5a9e56e4055 Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Tue, 12 Jan 2016 09:39:42 +0100 Subject: DomTree --- bbcode/src/Text/BBCode.hs | 48 ++++++++++++++++++++++++++++++------ bbcode/src/Text/BBCode/Lexer.hs | 12 ++++----- bbcode/test/Text/BBCode/LexerSpec.hs | 18 +++++++------- 3 files changed, 55 insertions(+), 23 deletions(-) diff --git a/bbcode/src/Text/BBCode.hs b/bbcode/src/Text/BBCode.hs index 1e9960a..30b1da8 100644 --- a/bbcode/src/Text/BBCode.hs +++ b/bbcode/src/Text/BBCode.hs @@ -1,9 +1,11 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE DeriveGeneric #-} --- | An implementation of BBcode parsing 'Text' to a syntax tree (@'Forest' 'BBLabel'@) +-- | An implementation of BBcode parsing 'Text' to a syntax tree module Text.BBCode ( TreeError(..) + , DomTree(..) + , dom , BBLabel , rose , matches @@ -15,11 +17,13 @@ import GHC.Generics (Generic) import Control.Exception (Exception) import Data.Typeable (Typeable) -import Control.Monad (unless) +import Control.Monad (unless, join) import Data.Function (on) +import Control.Applicative import Text.BBCode.Lexer (BBToken(..), token) - +import Data.Attoparsec.Text (parseOnly, endOfInput) + import Data.Tree import Data.Tree.Zipper (TreePos, Empty, Full) import qualified Data.Tree.Zipper as Z @@ -30,6 +34,28 @@ import qualified Data.Map as Map import Data.CaseInsensitive (CI) import qualified Data.CaseInsensitive as CI +import Data.Bifunctor (Bifunctor(first)) + +data DomTree = Element Text (Map Text Text) [DomTree] + | Content Text + deriving (Show, Eq) + +dom :: Forest BBLabel -> [DomTree] +dom = map dom' + where + dom' (Node (BBPlain t) _) = Content t + dom' (Node (BBTag t attrs) ts) = Element t attrs $ map dom' ts + +-- | Errors encountered during parsing +data BBCodeError = LexerError String -- ^ Error while parsing input to stream of tokens + | TreeError TreeError -- ^ Error while parsing stream of tokens to syntax tree + deriving (Show, Eq, Generic, Typeable) + +instance Exception BBCodeError + +bbcode :: Text -> Either BBCodeError [DomTree] +bbcode t = fmap dom $ first LexerError (parseOnly (many token <* endOfInput) t) >>= first TreeError . rose + -- | Errors in input encountered during parsing of lexed token-stream data TreeError = MismatchedTags Text Text -- ^ Closing tags does not match opening tags | ImbalancedTags Text -- ^ We found an extraneous closing tag @@ -38,7 +64,9 @@ data TreeError = MismatchedTags Text Text -- ^ Closing tags does not match openi instance Exception TreeError -- | The label of our rose-tree nodes carries the tag name and a map of attributes -type BBLabel = (Text, Maybe (Map Text (Maybe Text))) +data BBLabel = BBTag Text (Map Text Text) + | BBPlain Text + deriving (Show, Eq) matches :: Text -> Text -> Bool -- ^ @`matches` "open" "close"@ should be 'True' iff @[/close]@ is a valid closing tag for @[open]@ @@ -47,21 +75,25 @@ matches :: Text -> Text -> Bool matches = (==) `on` CI.mk rose :: [BBToken] -> Either TreeError (Forest BBLabel) --- ^ Assuming that both tags and content have the same type (we use 'Text') bbcode is a flat representation of a rose tree +-- ^ Assuming that both tags and content have the same type (we use 'BBLabel') bbcode is a flat representation of a rose tree +-- +-- We use @'Tree' 'BBLabel'@ only as another intermediate structure because it carries no guarantee that the data is semantically valid -- a 'BBPlain'-value semantically has no children. +-- +-- The use of 'Tree' was still deemed desirable because the morphism to a more sensible structure is straightforward and 'Data.Tree.Zipper' provides all the tools needed to implement 'rose' in a sensible fashion rose = fmap Z.toForest . flip rose' (Z.fromForest []) where rose' :: [BBToken] -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel) rose' [] = return rose' (x:xs) = (>>= rose' xs) . rose'' x - rose'' (BBStr t) = return . Z.nextSpace . Z.insert (Node (t, Nothing) []) - rose'' (BBOpen t attrs) = return . Z.children . Z.insert (Node (t, Just $ Map.fromList attrs) []) + rose'' (BBStr t) = return . Z.nextSpace . Z.insert (Node (BBPlain t) []) + rose'' (BBOpen t attrs) = return . Z.children . Z.insert (Node (BBTag t $ Map.fromList attrs) []) rose'' (BBClose t) = close t -- for more pointless close :: Text -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel) close tag pos = do pos' <- maybe (Left $ ImbalancedTags tag) Right $ Z.parent pos let - pTag = fst $ Z.label pos' + pTag = (\(BBTag t _) -> t) $ Z.label pos' unless (pTag `matches` tag) . Left $ MismatchedTags pTag tag -- The structure shows that this mode of failure is not logically required -- it's just nice to have return $ Z.nextSpace pos' diff --git a/bbcode/src/Text/BBCode/Lexer.hs b/bbcode/src/Text/BBCode/Lexer.hs index 03f57d2..560324b 100644 --- a/bbcode/src/Text/BBCode/Lexer.hs +++ b/bbcode/src/Text/BBCode/Lexer.hs @@ -21,7 +21,7 @@ import Control.Applicative import Prelude hiding (takeWhile) -- | Our lexicographical unit -data BBToken = BBOpen Text [(Text, Maybe Text)] -- ^ Tag open with attributes +data BBToken = BBOpen Text [(Text, Text)] -- ^ Tag open with attributes | BBClose Text -- ^ Tag close | BBStr Text -- ^ Content of a tag deriving (Eq, Show) @@ -32,18 +32,18 @@ token = BBClose <$> ("[/" *> escapedText' [']'] <* "]") <|> uncurry BBOpen <$ "[" <*> openTag <* "]" <|> BBStr <$> escapedText ['['] -openTag :: Parser (Text, [(Text, Maybe Text)]) +openTag :: Parser (Text, [(Text, Text)]) openTag = (,) <$> escapedText' [']', ' ', '='] <*> attrs' -attrs :: Parser [(Text, Maybe Text)] +attrs :: Parser [(Text, Text)] attrs = (:) <$> (namedAttr <|> plainValue) <* takeWhile isSpace <*> attrs' where - namedAttr = (,) <$ takeWhile isSpace <*> escapedText ['=', ']', ' '] <*> optional ("=" *> attrArg) - plainValue = (,) <$> pure "" <* "=" <*> (Just <$> attrArg) + namedAttr = (,) <$ takeWhile isSpace <*> escapedText ['=', ']', ' '] <*> option "" ("=" *> attrArg) + plainValue = (,) <$> pure "" <* "=" <*> attrArg attrArg = "\"" *> escapedText ['"'] <* "\"" <|> escapedText [']', ' '] -attrs' :: Parser [(Text, Maybe Text)] +attrs' :: Parser [(Text, Text)] attrs' = option [] attrs escapedText :: [Char] -> Parser Text diff --git a/bbcode/test/Text/BBCode/LexerSpec.hs b/bbcode/test/Text/BBCode/LexerSpec.hs index fcc1c4f..5c4b89e 100644 --- a/bbcode/test/Text/BBCode/LexerSpec.hs +++ b/bbcode/test/Text/BBCode/LexerSpec.hs @@ -27,8 +27,8 @@ coToken (BBOpen t []) = "[" <> escape [']'] t <> "]" coToken (BBOpen t xs) = "[" <> escape [']'] t <> " " <> attrs <> " ]" where attrs = mconcat . intersperse " " $ map attr xs - attr (key, Nothing) = escape ['=', ']', ' '] key - attr (key, Just val) = escape ['=', ']', ' '] key <> "=\"" <> escape ['\"'] val <> "\"" + attr (key, "") = escape ['=', ']', ' '] key + attr (key, val) = escape ['=', ']', ' '] key <> "=\"" <> escape ['\"'] val <> "\"" coToken (BBClose t) = "[/" <> escape [']'] t <> "]" coToken (BBStr t) = escape ['['] t @@ -82,17 +82,17 @@ examples = [ ("[t]test[/t]" , ("[t]test[/t\\]]" , [BBOpen "t" [], BBStr "test", BBClose "t]"]) , ("[t attr]test[/t]" - , [BBOpen "t" [("attr", Nothing)], BBStr "test", BBClose "t"]) + , [BBOpen "t" [("attr", "")], BBStr "test", BBClose "t"]) , ("[t=attr]test[/t]" - , [BBOpen "t" [("", Just "attr")], BBStr "test", BBClose "t"]) + , [BBOpen "t" [("", "attr")], BBStr "test", BBClose "t"]) , ("[t attr=val]test[/t]" - , [BBOpen "t" [("attr", Just "val")], BBStr "test", BBClose "t"]) + , [BBOpen "t" [("attr", "val")], BBStr "test", BBClose "t"]) , ("[t attr=\"val\"]test[/t]" - , [BBOpen "t" [("attr", Just "val")], BBStr "test", BBClose "t"]) + , [BBOpen "t" [("attr", "val")], BBStr "test", BBClose "t"]) , ("[t attr=\"va]l\"]test[/t]" - , [BBOpen "t" [("attr", Just "va]l")], BBStr "test", BBClose "t"]) + , [BBOpen "t" [("attr", "va]l")], BBStr "test", BBClose "t"]) , ("[t attr=\"va\\\"l\"]test[/t]" - , [BBOpen "t" [("attr", Just "va\"l")], BBStr "test", BBClose "t"]) + , [BBOpen "t" [("attr", "va\"l")], BBStr "test", BBClose "t"]) , ("[t attr=\"val\" attr2=\"val2\" ]test[/t]" - , [BBOpen "t" [("attr", Just "val"), ("attr2", Just "val2")], BBStr "test", BBClose "t"]) + , [BBOpen "t" [("attr", "val"), ("attr2", "val2")], BBStr "test", BBClose "t"]) ] -- cgit v1.2.3