diff options
| author | Gregor Kleen <gkleen@yggdrasil.li> | 2016-01-12 05:07:27 +0000 |
|---|---|---|
| committer | Gregor Kleen <gkleen@yggdrasil.li> | 2016-01-12 05:07:27 +0000 |
| commit | a5d285a8b74d2278e8549909d29c01b62dc84424 (patch) | |
| tree | 4a6e016dc930ff5fa4b5d8ca965a3162518a1541 | |
| parent | ed629414fbc3af1700f7ed6829744f0fb30417c9 (diff) | |
| download | thermoprint-a5d285a8b74d2278e8549909d29c01b62dc84424.tar thermoprint-a5d285a8b74d2278e8549909d29c01b62dc84424.tar.gz thermoprint-a5d285a8b74d2278e8549909d29c01b62dc84424.tar.bz2 thermoprint-a5d285a8b74d2278e8549909d29c01b62dc84424.tar.xz thermoprint-a5d285a8b74d2278e8549909d29c01b62dc84424.zip | |
Lexing tag attributes
| -rw-r--r-- | bbcode/src/Text/BBCode.hs | 2 | ||||
| -rw-r--r-- | bbcode/src/Text/BBCode/Lexer.hs | 17 | ||||
| -rw-r--r-- | bbcode/test/Text/BBCode/LexerSpec.hs | 30 |
3 files changed, 35 insertions, 14 deletions
diff --git a/bbcode/src/Text/BBCode.hs b/bbcode/src/Text/BBCode.hs index dfa1db7..a6de7b4 100644 --- a/bbcode/src/Text/BBCode.hs +++ b/bbcode/src/Text/BBCode.hs | |||
| @@ -42,7 +42,7 @@ rose = fmap Z.toForest . flip rose' (Z.fromForest []) | |||
| 42 | rose' (x:xs) = (>>= rose' xs) . rose'' x | 42 | rose' (x:xs) = (>>= rose' xs) . rose'' x |
| 43 | 43 | ||
| 44 | rose'' (BBStr t) = return . Z.nextSpace . Z.insert (Node t []) | 44 | rose'' (BBStr t) = return . Z.nextSpace . Z.insert (Node t []) |
| 45 | rose'' (BBOpen t) = return . Z.children . Z.insert (Node t []) | 45 | rose'' (BBOpen t _) = return . Z.children . Z.insert (Node t []) |
| 46 | rose'' (BBClose t) = close t -- for more pointless | 46 | rose'' (BBClose t) = close t -- for more pointless |
| 47 | 47 | ||
| 48 | close :: Text -> TreePos Empty Text -> Either TreeError (TreePos Empty Text) | 48 | close :: Text -> TreePos Empty Text -> Either TreeError (TreePos Empty Text) |
diff --git a/bbcode/src/Text/BBCode/Lexer.hs b/bbcode/src/Text/BBCode/Lexer.hs index a7294fe..ad26113 100644 --- a/bbcode/src/Text/BBCode/Lexer.hs +++ b/bbcode/src/Text/BBCode/Lexer.hs | |||
| @@ -14,20 +14,31 @@ import Data.Attoparsec.Text | |||
| 14 | import Data.Text (Text) | 14 | import Data.Text (Text) |
| 15 | import qualified Data.Text as T (singleton) | 15 | import qualified Data.Text as T (singleton) |
| 16 | 16 | ||
| 17 | import Data.Char (isSpace) | ||
| 18 | |||
| 17 | import Control.Applicative | 19 | import Control.Applicative |
| 18 | 20 | ||
| 21 | import Prelude hiding (takeWhile) | ||
| 22 | |||
| 19 | -- | Our lexicographical unit | 23 | -- | Our lexicographical unit |
| 20 | data BBToken = BBOpen Text -- ^ Tag open | 24 | data BBToken = BBOpen Text [(Text, Maybe Text)] -- ^ Tag open with attributes |
| 21 | | BBClose Text -- ^ Tag close | 25 | | BBClose Text -- ^ Tag close |
| 22 | | BBStr Text -- ^ Content of a tag | 26 | | BBStr Text -- ^ Content of a tag |
| 23 | deriving (Eq, Show) | 27 | deriving (Eq, Show) |
| 24 | 28 | ||
| 25 | token :: Parser BBToken | 29 | token :: Parser BBToken |
| 26 | -- ^ Tokenizer | 30 | -- ^ Tokenizer |
| 27 | token = BBClose <$> (string "[/" *> escapedText' [']'] <* string "]") | 31 | token = BBClose <$> ("[/" *> escapedText' [']'] <* "]") |
| 28 | <|> BBOpen <$> (string "[" *> escapedText' [']'] <* string "]") | 32 | <|> BBOpen <$> ("[" *> escapedText' [']', ' ']) <*> (option [] attrs <* "]") |
| 29 | <|> BBStr <$> escapedText ['['] | 33 | <|> BBStr <$> escapedText ['['] |
| 30 | 34 | ||
| 35 | attrs :: Parser [(Text, Maybe Text)] | ||
| 36 | attrs = (:) <$> (attrs' <* takeWhile isSpace) <*> (option [] $ attrs) | ||
| 37 | where | ||
| 38 | attrs' = (,) <$> escapedText ['=', ']', ' '] <*> optional ("=" *> attrArg) | ||
| 39 | attrArg = "\"" *> escapedText ['"'] <* "\"" | ||
| 40 | <|> escapedText [']', ' '] | ||
| 41 | |||
| 31 | escapedText :: [Char] -> Parser Text | 42 | escapedText :: [Char] -> Parser Text |
| 32 | -- ^ @escapedText cs@ consumes 'Text' up to (not including) the first occurence of a character from @cs@ that is not escaped using @\\@ | 43 | -- ^ @escapedText cs@ consumes 'Text' up to (not including) the first occurence of a character from @cs@ that is not escaped using @\\@ |
| 33 | -- | 44 | -- |
diff --git a/bbcode/test/Text/BBCode/LexerSpec.hs b/bbcode/test/Text/BBCode/LexerSpec.hs index d6e1427..bd2ab7f 100644 --- a/bbcode/test/Text/BBCode/LexerSpec.hs +++ b/bbcode/test/Text/BBCode/LexerSpec.hs | |||
| @@ -14,6 +14,7 @@ import Data.Text (Text) | |||
| 14 | import qualified Data.Text as T (singleton, replace, last, null) | 14 | import qualified Data.Text as T (singleton, replace, last, null) |
| 15 | 15 | ||
| 16 | import Data.Monoid ((<>), mconcat, Endo(..)) | 16 | import Data.Monoid ((<>), mconcat, Endo(..)) |
| 17 | import Data.List (intersperse) | ||
| 17 | 18 | ||
| 18 | import Data.Attoparsec.Text (parseOnly, endOfInput) | 19 | import Data.Attoparsec.Text (parseOnly, endOfInput) |
| 19 | 20 | ||
| @@ -22,7 +23,12 @@ import Control.Monad (zipWithM_) | |||
| 22 | 23 | ||
| 23 | coToken :: BBToken -> Text | 24 | coToken :: BBToken -> Text |
| 24 | -- ^ Inverse of `token` | 25 | -- ^ Inverse of `token` |
| 25 | coToken (BBOpen t) = "[" <> escape [']'] t <> "]" | 26 | coToken (BBOpen t []) = "[" <> escape [']'] t <> "]" |
| 27 | coToken (BBOpen t xs) = "[" <> escape [']'] t <> " " <> attrs <> " ]" | ||
| 28 | where | ||
| 29 | attrs = mconcat . intersperse " " $ map attr xs | ||
| 30 | attr (key, Nothing) = escape ['=', ']', ' '] key | ||
| 31 | attr (key, Just val) = escape ['=', ']', ' '] key <> "=\"" <> escape ['\"'] val <> "\"" | ||
| 26 | coToken (BBClose t) = "[/" <> escape [']'] t <> "]" | 32 | coToken (BBClose t) = "[/" <> escape [']'] t <> "]" |
| 27 | coToken (BBStr t) = escape ['['] t | 33 | coToken (BBStr t) = escape ['['] t |
| 28 | 34 | ||
| @@ -49,26 +55,30 @@ spec = do | |||
| 49 | in specify str (tokenize s == Right ts) | 55 | in specify str (tokenize s == Right ts) |
| 50 | 56 | ||
| 51 | prop_token :: Text -> Property | 57 | prop_token :: Text -> Property |
| 52 | -- ^ prop> (mconcat . map coToken) <$> tokenize x == Right x | 58 | -- ^ prop> (==) <$> (((mconcat . map coToken) <$> tokenize x) >>= tokenize) <*> tokenize x |
| 53 | -- | 59 | -- |
| 54 | -- Where 'x' is restricted such that `tokenize` succeeds | 60 | -- Where 'x' is restricted such that `tokenize` succeeds |
| 55 | prop_token x = discardLeft (((== x) . mconcat . map coToken) <$> tokenize x) | 61 | -- |
| 62 | -- Without accounting for failure this is: | ||
| 63 | -- | ||
| 64 | -- > (tokenize . mconcat . map coToken . tokenize x) == tokenize x | ||
| 65 | prop_token x = discardLeft $ (==) <$> (((mconcat . map coToken) <$> tokenize x) >>= tokenize) <*> tokenize x | ||
| 56 | where | 66 | where |
| 57 | discardLeft = either (const $ property Discard) property | 67 | discardLeft = either (const $ property Discard) property |
| 58 | 68 | ||
| 59 | examples :: [(Text, [BBToken])] | 69 | examples :: [(Text, [BBToken])] |
| 60 | examples = [ ("[t]test[/t]" | 70 | examples = [ ("[t]test[/t]" |
| 61 | , [BBOpen "t", BBStr "test", BBClose "t"]) | 71 | , [BBOpen "t" [], BBStr "test", BBClose "t"]) |
| 62 | , ("[t]te\\st[/t]" | 72 | , ("[t]te\\st[/t]" |
| 63 | , [BBOpen "t", BBStr "te\\st", BBClose "t"]) | 73 | , [BBOpen "t" [], BBStr "te\\st", BBClose "t"]) |
| 64 | , ("[t]te\\[st[/t]" | 74 | , ("[t]te\\[st[/t]" |
| 65 | , [BBOpen "t", BBStr "te[st", BBClose "t"]) | 75 | , [BBOpen "t" [], BBStr "te[st", BBClose "t"]) |
| 66 | , ("[t]test\\\\[/t]" | 76 | , ("[t]test\\\\[/t]" |
| 67 | , [BBOpen "t", BBStr "test\\", BBClose "t"]) | 77 | , [BBOpen "t" [], BBStr "test\\", BBClose "t"]) |
| 68 | , ("[t]test\\[/t]" | 78 | , ("[t]test\\[/t]" |
| 69 | , [BBOpen "t", BBStr "test[/t]"]) | 79 | , [BBOpen "t" [], BBStr "test[/t]"]) |
| 70 | , ("[\\t]test[/t]" | 80 | , ("[\\t]test[/t]" |
| 71 | , [BBOpen "\\t", BBStr "test", BBClose "t"]) | 81 | , [BBOpen "\\t" [], BBStr "test", BBClose "t"]) |
| 72 | , ("[t]test[/t\\]]" | 82 | , ("[t]test[/t\\]]" |
| 73 | , [BBOpen "t", BBStr "test", BBClose "t]"]) | 83 | , [BBOpen "t" [], BBStr "test", BBClose "t]"]) |
| 74 | ] | 84 | ] |
