From ca9d7f5b760cb3d56d7785b357590f54ede5b469 Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Fri, 15 Jan 2016 01:22:56 +0000 Subject: BBCode now understands paragraphs --- bbcode/src/Text/BBCode.hs | 31 ++++++++++++++++++++++++++----- bbcode/src/Text/BBCode/Lexer.hs | 14 ++++++++++++-- bbcode/test/Text/BBCode/LexerSpec.hs | 7 +++++++ bbcode/test/Text/BBCodeSpec.hs | 4 ++++ 4 files changed, 49 insertions(+), 7 deletions(-) diff --git a/bbcode/src/Text/BBCode.hs b/bbcode/src/Text/BBCode.hs index 6fef446..0773124 100644 --- a/bbcode/src/Text/BBCode.hs +++ b/bbcode/src/Text/BBCode.hs @@ -40,6 +40,7 @@ import Data.Bifunctor (Bifunctor(first)) -- | Our target structure -- a rose tree with an explicit terminal constructor data DomTree = Element Text (Map Text Text) [DomTree] + | Paragraph [DomTree] | Content Text deriving (Show, Eq) @@ -51,6 +52,7 @@ dom = map dom' where dom' (Node (BBPlain t) _) = Content t dom' (Node (BBTag t attrs) ts) = Element t attrs $ map dom' ts + dom' (Node BBPar ts) = Paragraph $ map dom' ts -- | Errors encountered during parsing data BBCodeError = LexerError String -- ^ Error while parsing input to stream of tokens @@ -72,6 +74,7 @@ instance Exception TreeError -- | The label of our rose-tree nodes carries the tag name and a map of attributes data BBLabel = BBTag Text (Map Text Text) + | BBPar | BBPlain Text deriving (Show, Eq) @@ -89,15 +92,33 @@ rose :: [BBToken] -> Either TreeError (Forest BBLabel) -- The use of 'Tree' was still deemed desirable because the morphism to a more sensible structure is straightforward and 'Data.Tree.Zipper' provides all the tools needed to implement 'rose' in a sensible fashion rose = fmap Z.toForest . foldM (flip rose') (Z.fromForest []) where - rose' (BBStr t) = return . Z.nextSpace . Z.insert (Node (BBPlain t) []) - rose' (BBOpen t attrs) = return . Z.children . Z.insert (Node (BBTag t $ Map.fromList attrs) []) + rose' (BBStr t) = return . Z.nextSpace . Z.insert (Node (BBPlain t) []) + rose' BBNewPar = return . parBreak -- for more pointless + rose' (BBOpen t attrs) = return . Z.children . Z.insert (Node (BBTag t $ Map.fromList attrs) []) rose' (BBContained t attrs) = return . Z.nextSpace . Z.insert (Node (BBTag t $ Map.fromList attrs) []) - rose' (BBClose t) = close t -- for more pointless + rose' (BBClose t) = close t -- for more pointless close :: Text -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel) close tag pos = do - pos' <- maybe (Left $ ImbalancedTags tag) Right $ Z.parent pos + pos' <- maybe (Left $ ImbalancedTags tag) Right $ Z.parent pos >>= traversePars let - pTag = (\(BBTag t _) -> t) $ Z.label pos' + pTag = (\(BBTag t _) -> t) . Z.label $ pos' unless (pTag `matches` tag) . Left $ MismatchedTags pTag tag -- The structure shows that this mode of failure is not logically required -- it's just nice to have return $ Z.nextSpace pos' + where + traversePars pos + | isPar . Z.tree $ pos = Z.parent pos >>= traversePars + | otherwise = return pos + + parBreak :: TreePos Empty BBLabel -> TreePos Empty BBLabel + parBreak z = let siblings = reverse $ Z.before z -- We only move ever move right so Z.after will always be empty + + siblingsAsPars + | all isPar siblings = z + | otherwise = case Z.parent z of + Nothing -> Z.fromForest $ [Node BBPar siblings] + Just p -> Z.children . Z.modifyTree (\(Node l _) -> Node l [Node BBPar siblings]) $ p + in Z.children . Z.insert (Node BBPar []) . Z.last $ siblingsAsPars + + isPar (Node BBPar _) = True + isPar _ = False diff --git a/bbcode/src/Text/BBCode/Lexer.hs b/bbcode/src/Text/BBCode/Lexer.hs index 218427d..8bd1446 100644 --- a/bbcode/src/Text/BBCode/Lexer.hs +++ b/bbcode/src/Text/BBCode/Lexer.hs @@ -17,6 +17,9 @@ import qualified Data.Text as T (singleton) import Data.Char (isSpace) import Control.Applicative +import Control.Monad (liftM2) + +import Data.Monoid import Prelude hiding (takeWhile) @@ -25,6 +28,7 @@ data BBToken = BBOpen Text [(Text, Text)] -- ^ Tag open with attributes | BBContained Text [(Text, Text)] -- ^ Tag open & immediate close with attributes | BBClose Text -- ^ Tag close | BBStr Text -- ^ Content of a tag + | BBNewPar deriving (Eq, Show) token :: Parser BBToken @@ -32,7 +36,11 @@ token :: Parser BBToken token = BBClose <$> ("[/" *> escapedText' [']'] <* "]") <|> uncurry BBContained <$ "[" <*> openTag <* "/]" <|> uncurry BBOpen <$ "[" <*> openTag <* "]" - <|> BBStr <$> escapedText ['['] + <|> BBNewPar <$ endOfLine <* many1 endOfLine + <|> BBStr <$> paragraph + where + paragraph = (\a b c -> a <> b <> c) <$> option "" endOfLine' <*> escapedText ['['] <*> option "" paragraph + endOfLine' = "\n" <$ endOfLine openTag :: Parser (Text, [(Text, Text)]) openTag = (,) <$> escapedText' [']', ' ', '=', '/'] <*> attrs' @@ -51,11 +59,13 @@ attrs' = option [] attrs escapedText :: [Char] -> Parser Text -- ^ @escapedText cs@ consumes 'Text' up to (not including) the first occurence of a character from @cs@ that is not escaped using @\\@ -- +-- Does not consume characters used to indicate linebreaks as determined by `isEndOfLine` +-- -- Always consumes at least one character -- -- @\\@ needs to be escaped (prefixed with @\\@) iff it precedes a character from @cs@ escapedText [] = takeText -escapedText cs = recurse $ choice [ takeWhile1 (not . special) +escapedText cs = recurse $ choice [ takeWhile1 $ not . liftM2 (||) special isEndOfLine , escapeSeq , escapeChar' ] diff --git a/bbcode/test/Text/BBCode/LexerSpec.hs b/bbcode/test/Text/BBCode/LexerSpec.hs index e225fab..117006c 100644 --- a/bbcode/test/Text/BBCode/LexerSpec.hs +++ b/bbcode/test/Text/BBCode/LexerSpec.hs @@ -32,6 +32,7 @@ coToken (BBOpen t xs) = "[" <> escape [']'] t <> " " <> attrs <> " ]" coToken (BBContained t xs) = (<> "/]") . T.init . coToken $ BBOpen t xs coToken (BBClose t) = "[/" <> escape [']'] t <> "]" coToken (BBStr t) = escape ['['] t +coToken BBNewPar = "\n\n" escape :: [Char] -> Text -> Text -- ^ Inverse of `escapedText` @@ -108,4 +109,10 @@ examples = [ ("[t]test[/t]" , [BBContained "br" [("attr", "val"), ("val2", "")]]) , ("[foo\\/bar]" , [BBOpen "foo/bar" []]) + , ("foo\nbar" + , [BBStr "foo\nbar"]) + , ("foo\n\nbar" + , [BBStr "foo", BBNewPar, BBStr "bar"]) + , ("foo\n\n\r\n\r\nbar" + , [BBStr "foo", BBNewPar, BBStr "bar"]) ] diff --git a/bbcode/test/Text/BBCodeSpec.hs b/bbcode/test/Text/BBCodeSpec.hs index 0d8949a..e24c661 100644 --- a/bbcode/test/Text/BBCodeSpec.hs +++ b/bbcode/test/Text/BBCodeSpec.hs @@ -55,5 +55,9 @@ examples = [ ("[t]test[/t]" , [Element "br" [("attr", "val")] []]) , ("[br attr=val val2/]" , [Element "br" [("attr", "val"), ("val2", "")] []]) + , ("foo\n\nbar" + , [Paragraph [Content "foo"], Paragraph [Content "bar"]]) + , ("[b]foo\n\nbar[/b]" + , [Element "b" [] [Paragraph [Content "foo"], Paragraph [Content "bar"]]]) ] -- cgit v1.2.3