From a5d285a8b74d2278e8549909d29c01b62dc84424 Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Tue, 12 Jan 2016 05:07:27 +0000 Subject: Lexing tag attributes --- bbcode/src/Text/BBCode.hs | 2 +- bbcode/src/Text/BBCode/Lexer.hs | 17 ++++++++++++++--- bbcode/test/Text/BBCode/LexerSpec.hs | 30 ++++++++++++++++++++---------- 3 files changed, 35 insertions(+), 14 deletions(-) diff --git a/bbcode/src/Text/BBCode.hs b/bbcode/src/Text/BBCode.hs index dfa1db7..a6de7b4 100644 --- a/bbcode/src/Text/BBCode.hs +++ b/bbcode/src/Text/BBCode.hs @@ -42,7 +42,7 @@ rose = fmap Z.toForest . flip rose' (Z.fromForest []) rose' (x:xs) = (>>= rose' xs) . rose'' x rose'' (BBStr t) = return . Z.nextSpace . Z.insert (Node t []) - rose'' (BBOpen t) = return . Z.children . Z.insert (Node t []) + rose'' (BBOpen t _) = return . Z.children . Z.insert (Node t []) rose'' (BBClose t) = close t -- for more pointless close :: Text -> TreePos Empty Text -> Either TreeError (TreePos Empty Text) diff --git a/bbcode/src/Text/BBCode/Lexer.hs b/bbcode/src/Text/BBCode/Lexer.hs index a7294fe..ad26113 100644 --- a/bbcode/src/Text/BBCode/Lexer.hs +++ b/bbcode/src/Text/BBCode/Lexer.hs @@ -14,20 +14,31 @@ import Data.Attoparsec.Text import Data.Text (Text) import qualified Data.Text as T (singleton) +import Data.Char (isSpace) + import Control.Applicative +import Prelude hiding (takeWhile) + -- | Our lexicographical unit -data BBToken = BBOpen Text -- ^ Tag open +data BBToken = BBOpen Text [(Text, Maybe Text)] -- ^ Tag open with attributes | BBClose Text -- ^ Tag close | BBStr Text -- ^ Content of a tag deriving (Eq, Show) token :: Parser BBToken -- ^ Tokenizer -token = BBClose <$> (string "[/" *> escapedText' [']'] <* string "]") - <|> BBOpen <$> (string "[" *> escapedText' [']'] <* string "]") +token = BBClose <$> ("[/" *> escapedText' [']'] <* "]") + <|> BBOpen <$> ("[" *> escapedText' [']', ' ']) <*> (option [] attrs <* "]") <|> BBStr <$> escapedText ['['] +attrs :: Parser [(Text, Maybe Text)] +attrs = (:) <$> (attrs' <* takeWhile isSpace) <*> (option [] $ attrs) + where + attrs' = (,) <$> escapedText ['=', ']', ' '] <*> optional ("=" *> attrArg) + attrArg = "\"" *> escapedText ['"'] <* "\"" + <|> escapedText [']', ' '] + escapedText :: [Char] -> Parser Text -- ^ @escapedText cs@ consumes 'Text' up to (not including) the first occurence of a character from @cs@ that is not escaped using @\\@ -- diff --git a/bbcode/test/Text/BBCode/LexerSpec.hs b/bbcode/test/Text/BBCode/LexerSpec.hs index d6e1427..bd2ab7f 100644 --- a/bbcode/test/Text/BBCode/LexerSpec.hs +++ b/bbcode/test/Text/BBCode/LexerSpec.hs @@ -14,6 +14,7 @@ import Data.Text (Text) import qualified Data.Text as T (singleton, replace, last, null) import Data.Monoid ((<>), mconcat, Endo(..)) +import Data.List (intersperse) import Data.Attoparsec.Text (parseOnly, endOfInput) @@ -22,7 +23,12 @@ import Control.Monad (zipWithM_) coToken :: BBToken -> Text -- ^ Inverse of `token` -coToken (BBOpen t) = "[" <> escape [']'] t <> "]" +coToken (BBOpen t []) = "[" <> escape [']'] t <> "]" +coToken (BBOpen t xs) = "[" <> escape [']'] t <> " " <> attrs <> " ]" + where + attrs = mconcat . intersperse " " $ map attr xs + attr (key, Nothing) = escape ['=', ']', ' '] key + attr (key, Just val) = escape ['=', ']', ' '] key <> "=\"" <> escape ['\"'] val <> "\"" coToken (BBClose t) = "[/" <> escape [']'] t <> "]" coToken (BBStr t) = escape ['['] t @@ -49,26 +55,30 @@ spec = do in specify str (tokenize s == Right ts) prop_token :: Text -> Property --- ^ prop> (mconcat . map coToken) <$> tokenize x == Right x +-- ^ prop> (==) <$> (((mconcat . map coToken) <$> tokenize x) >>= tokenize) <*> tokenize x -- -- Where 'x' is restricted such that `tokenize` succeeds -prop_token x = discardLeft (((== x) . mconcat . map coToken) <$> tokenize x) +-- +-- Without accounting for failure this is: +-- +-- > (tokenize . mconcat . map coToken . tokenize x) == tokenize x +prop_token x = discardLeft $ (==) <$> (((mconcat . map coToken) <$> tokenize x) >>= tokenize) <*> tokenize x where discardLeft = either (const $ property Discard) property examples :: [(Text, [BBToken])] examples = [ ("[t]test[/t]" - , [BBOpen "t", BBStr "test", BBClose "t"]) + , [BBOpen "t" [], BBStr "test", BBClose "t"]) , ("[t]te\\st[/t]" - , [BBOpen "t", BBStr "te\\st", BBClose "t"]) + , [BBOpen "t" [], BBStr "te\\st", BBClose "t"]) , ("[t]te\\[st[/t]" - , [BBOpen "t", BBStr "te[st", BBClose "t"]) + , [BBOpen "t" [], BBStr "te[st", BBClose "t"]) , ("[t]test\\\\[/t]" - , [BBOpen "t", BBStr "test\\", BBClose "t"]) + , [BBOpen "t" [], BBStr "test\\", BBClose "t"]) , ("[t]test\\[/t]" - , [BBOpen "t", BBStr "test[/t]"]) + , [BBOpen "t" [], BBStr "test[/t]"]) , ("[\\t]test[/t]" - , [BBOpen "\\t", BBStr "test", BBClose "t"]) + , [BBOpen "\\t" [], BBStr "test", BBClose "t"]) , ("[t]test[/t\\]]" - , [BBOpen "t", BBStr "test", BBClose "t]"]) + , [BBOpen "t" [], BBStr "test", BBClose "t]"]) ] -- cgit v1.2.3