diff options
| -rw-r--r-- | bbcode/src/Text/BBCode.hs | 48 | ||||
| -rw-r--r-- | bbcode/src/Text/BBCode/Lexer.hs | 12 | ||||
| -rw-r--r-- | bbcode/test/Text/BBCode/LexerSpec.hs | 18 |
3 files changed, 55 insertions, 23 deletions
diff --git a/bbcode/src/Text/BBCode.hs b/bbcode/src/Text/BBCode.hs index 1e9960a..30b1da8 100644 --- a/bbcode/src/Text/BBCode.hs +++ b/bbcode/src/Text/BBCode.hs | |||
| @@ -1,9 +1,11 @@ | |||
| 1 | {-# LANGUAGE OverloadedStrings #-} | 1 | {-# LANGUAGE OverloadedStrings #-} |
| 2 | {-# LANGUAGE DeriveGeneric #-} | 2 | {-# LANGUAGE DeriveGeneric #-} |
| 3 | 3 | ||
| 4 | -- | An implementation of BBcode parsing 'Text' to a syntax tree (@'Forest' 'BBLabel'@) | 4 | -- | An implementation of BBcode parsing 'Text' to a syntax tree |
| 5 | module Text.BBCode | 5 | module Text.BBCode |
| 6 | ( TreeError(..) | 6 | ( TreeError(..) |
| 7 | , DomTree(..) | ||
| 8 | , dom | ||
| 7 | , BBLabel | 9 | , BBLabel |
| 8 | , rose | 10 | , rose |
| 9 | , matches | 11 | , matches |
| @@ -15,11 +17,13 @@ import GHC.Generics (Generic) | |||
| 15 | import Control.Exception (Exception) | 17 | import Control.Exception (Exception) |
| 16 | import Data.Typeable (Typeable) | 18 | import Data.Typeable (Typeable) |
| 17 | 19 | ||
| 18 | import Control.Monad (unless) | 20 | import Control.Monad (unless, join) |
| 19 | import Data.Function (on) | 21 | import Data.Function (on) |
| 22 | import Control.Applicative | ||
| 20 | 23 | ||
| 21 | import Text.BBCode.Lexer (BBToken(..), token) | 24 | import Text.BBCode.Lexer (BBToken(..), token) |
| 22 | 25 | import Data.Attoparsec.Text (parseOnly, endOfInput) | |
| 26 | |||
| 23 | import Data.Tree | 27 | import Data.Tree |
| 24 | import Data.Tree.Zipper (TreePos, Empty, Full) | 28 | import Data.Tree.Zipper (TreePos, Empty, Full) |
| 25 | import qualified Data.Tree.Zipper as Z | 29 | import qualified Data.Tree.Zipper as Z |
| @@ -30,6 +34,28 @@ import qualified Data.Map as Map | |||
| 30 | import Data.CaseInsensitive (CI) | 34 | import Data.CaseInsensitive (CI) |
| 31 | import qualified Data.CaseInsensitive as CI | 35 | import qualified Data.CaseInsensitive as CI |
| 32 | 36 | ||
| 37 | import Data.Bifunctor (Bifunctor(first)) | ||
| 38 | |||
| 39 | data DomTree = Element Text (Map Text Text) [DomTree] | ||
| 40 | | Content Text | ||
| 41 | deriving (Show, Eq) | ||
| 42 | |||
| 43 | dom :: Forest BBLabel -> [DomTree] | ||
| 44 | dom = map dom' | ||
| 45 | where | ||
| 46 | dom' (Node (BBPlain t) _) = Content t | ||
| 47 | dom' (Node (BBTag t attrs) ts) = Element t attrs $ map dom' ts | ||
| 48 | |||
| 49 | -- | Errors encountered during parsing | ||
| 50 | data BBCodeError = LexerError String -- ^ Error while parsing input to stream of tokens | ||
| 51 | | TreeError TreeError -- ^ Error while parsing stream of tokens to syntax tree | ||
| 52 | deriving (Show, Eq, Generic, Typeable) | ||
| 53 | |||
| 54 | instance Exception BBCodeError | ||
| 55 | |||
| 56 | bbcode :: Text -> Either BBCodeError [DomTree] | ||
| 57 | bbcode t = fmap dom $ first LexerError (parseOnly (many token <* endOfInput) t) >>= first TreeError . rose | ||
| 58 | |||
| 33 | -- | Errors in input encountered during parsing of lexed token-stream | 59 | -- | Errors in input encountered during parsing of lexed token-stream |
| 34 | data TreeError = MismatchedTags Text Text -- ^ Closing tags does not match opening tags | 60 | data TreeError = MismatchedTags Text Text -- ^ Closing tags does not match opening tags |
| 35 | | ImbalancedTags Text -- ^ We found an extraneous closing tag | 61 | | ImbalancedTags Text -- ^ We found an extraneous closing tag |
| @@ -38,7 +64,9 @@ data TreeError = MismatchedTags Text Text -- ^ Closing tags does not match openi | |||
| 38 | instance Exception TreeError | 64 | instance Exception TreeError |
| 39 | 65 | ||
| 40 | -- | The label of our rose-tree nodes carries the tag name and a map of attributes | 66 | -- | The label of our rose-tree nodes carries the tag name and a map of attributes |
| 41 | type BBLabel = (Text, Maybe (Map Text (Maybe Text))) | 67 | data BBLabel = BBTag Text (Map Text Text) |
| 68 | | BBPlain Text | ||
| 69 | deriving (Show, Eq) | ||
| 42 | 70 | ||
| 43 | matches :: Text -> Text -> Bool | 71 | matches :: Text -> Text -> Bool |
| 44 | -- ^ @`matches` "open" "close"@ should be 'True' iff @[/close]@ is a valid closing tag for @[open]@ | 72 | -- ^ @`matches` "open" "close"@ should be 'True' iff @[/close]@ is a valid closing tag for @[open]@ |
| @@ -47,21 +75,25 @@ matches :: Text -> Text -> Bool | |||
| 47 | matches = (==) `on` CI.mk | 75 | matches = (==) `on` CI.mk |
| 48 | 76 | ||
| 49 | rose :: [BBToken] -> Either TreeError (Forest BBLabel) | 77 | rose :: [BBToken] -> Either TreeError (Forest BBLabel) |
| 50 | -- ^ Assuming that both tags and content have the same type (we use 'Text') bbcode is a flat representation of a rose tree | 78 | -- ^ Assuming that both tags and content have the same type (we use 'BBLabel') bbcode is a flat representation of a rose tree |
| 79 | -- | ||
| 80 | -- We use @'Tree' 'BBLabel'@ only as another intermediate structure because it carries no guarantee that the data is semantically valid -- a 'BBPlain'-value semantically has no children. | ||
| 81 | -- | ||
| 82 | -- The use of 'Tree' was still deemed desirable because the morphism to a more sensible structure is straightforward and 'Data.Tree.Zipper' provides all the tools needed to implement 'rose' in a sensible fashion | ||
| 51 | rose = fmap Z.toForest . flip rose' (Z.fromForest []) | 83 | rose = fmap Z.toForest . flip rose' (Z.fromForest []) |
| 52 | where | 84 | where |
| 53 | rose' :: [BBToken] -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel) | 85 | rose' :: [BBToken] -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel) |
| 54 | rose' [] = return | 86 | rose' [] = return |
| 55 | rose' (x:xs) = (>>= rose' xs) . rose'' x | 87 | rose' (x:xs) = (>>= rose' xs) . rose'' x |
| 56 | 88 | ||
| 57 | rose'' (BBStr t) = return . Z.nextSpace . Z.insert (Node (t, Nothing) []) | 89 | rose'' (BBStr t) = return . Z.nextSpace . Z.insert (Node (BBPlain t) []) |
| 58 | rose'' (BBOpen t attrs) = return . Z.children . Z.insert (Node (t, Just $ Map.fromList attrs) []) | 90 | rose'' (BBOpen t attrs) = return . Z.children . Z.insert (Node (BBTag t $ Map.fromList attrs) []) |
| 59 | rose'' (BBClose t) = close t -- for more pointless | 91 | rose'' (BBClose t) = close t -- for more pointless |
| 60 | 92 | ||
| 61 | close :: Text -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel) | 93 | close :: Text -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel) |
| 62 | close tag pos = do | 94 | close tag pos = do |
| 63 | pos' <- maybe (Left $ ImbalancedTags tag) Right $ Z.parent pos | 95 | pos' <- maybe (Left $ ImbalancedTags tag) Right $ Z.parent pos |
| 64 | let | 96 | let |
| 65 | pTag = fst $ Z.label pos' | 97 | pTag = (\(BBTag t _) -> t) $ Z.label pos' |
| 66 | unless (pTag `matches` tag) . Left $ MismatchedTags pTag tag -- The structure shows that this mode of failure is not logically required -- it's just nice to have | 98 | unless (pTag `matches` tag) . Left $ MismatchedTags pTag tag -- The structure shows that this mode of failure is not logically required -- it's just nice to have |
| 67 | return $ Z.nextSpace pos' | 99 | return $ Z.nextSpace pos' |
diff --git a/bbcode/src/Text/BBCode/Lexer.hs b/bbcode/src/Text/BBCode/Lexer.hs index 03f57d2..560324b 100644 --- a/bbcode/src/Text/BBCode/Lexer.hs +++ b/bbcode/src/Text/BBCode/Lexer.hs | |||
| @@ -21,7 +21,7 @@ import Control.Applicative | |||
| 21 | import Prelude hiding (takeWhile) | 21 | import Prelude hiding (takeWhile) |
| 22 | 22 | ||
| 23 | -- | Our lexicographical unit | 23 | -- | Our lexicographical unit |
| 24 | data BBToken = BBOpen Text [(Text, Maybe Text)] -- ^ Tag open with attributes | 24 | data BBToken = BBOpen Text [(Text, Text)] -- ^ Tag open with attributes |
| 25 | | BBClose Text -- ^ Tag close | 25 | | BBClose Text -- ^ Tag close |
| 26 | | BBStr Text -- ^ Content of a tag | 26 | | BBStr Text -- ^ Content of a tag |
| 27 | deriving (Eq, Show) | 27 | deriving (Eq, Show) |
| @@ -32,18 +32,18 @@ token = BBClose <$> ("[/" *> escapedText' [']'] <* "]") | |||
| 32 | <|> uncurry BBOpen <$ "[" <*> openTag <* "]" | 32 | <|> uncurry BBOpen <$ "[" <*> openTag <* "]" |
| 33 | <|> BBStr <$> escapedText ['['] | 33 | <|> BBStr <$> escapedText ['['] |
| 34 | 34 | ||
| 35 | openTag :: Parser (Text, [(Text, Maybe Text)]) | 35 | openTag :: Parser (Text, [(Text, Text)]) |
| 36 | openTag = (,) <$> escapedText' [']', ' ', '='] <*> attrs' | 36 | openTag = (,) <$> escapedText' [']', ' ', '='] <*> attrs' |
| 37 | 37 | ||
| 38 | attrs :: Parser [(Text, Maybe Text)] | 38 | attrs :: Parser [(Text, Text)] |
| 39 | attrs = (:) <$> (namedAttr <|> plainValue) <* takeWhile isSpace <*> attrs' | 39 | attrs = (:) <$> (namedAttr <|> plainValue) <* takeWhile isSpace <*> attrs' |
| 40 | where | 40 | where |
| 41 | namedAttr = (,) <$ takeWhile isSpace <*> escapedText ['=', ']', ' '] <*> optional ("=" *> attrArg) | 41 | namedAttr = (,) <$ takeWhile isSpace <*> escapedText ['=', ']', ' '] <*> option "" ("=" *> attrArg) |
| 42 | plainValue = (,) <$> pure "" <* "=" <*> (Just <$> attrArg) | 42 | plainValue = (,) <$> pure "" <* "=" <*> attrArg |
| 43 | attrArg = "\"" *> escapedText ['"'] <* "\"" | 43 | attrArg = "\"" *> escapedText ['"'] <* "\"" |
| 44 | <|> escapedText [']', ' '] | 44 | <|> escapedText [']', ' '] |
| 45 | 45 | ||
| 46 | attrs' :: Parser [(Text, Maybe Text)] | 46 | attrs' :: Parser [(Text, Text)] |
| 47 | attrs' = option [] attrs | 47 | attrs' = option [] attrs |
| 48 | 48 | ||
| 49 | escapedText :: [Char] -> Parser Text | 49 | escapedText :: [Char] -> Parser Text |
diff --git a/bbcode/test/Text/BBCode/LexerSpec.hs b/bbcode/test/Text/BBCode/LexerSpec.hs index fcc1c4f..5c4b89e 100644 --- a/bbcode/test/Text/BBCode/LexerSpec.hs +++ b/bbcode/test/Text/BBCode/LexerSpec.hs | |||
| @@ -27,8 +27,8 @@ coToken (BBOpen t []) = "[" <> escape [']'] t <> "]" | |||
| 27 | coToken (BBOpen t xs) = "[" <> escape [']'] t <> " " <> attrs <> " ]" | 27 | coToken (BBOpen t xs) = "[" <> escape [']'] t <> " " <> attrs <> " ]" |
| 28 | where | 28 | where |
| 29 | attrs = mconcat . intersperse " " $ map attr xs | 29 | attrs = mconcat . intersperse " " $ map attr xs |
| 30 | attr (key, Nothing) = escape ['=', ']', ' '] key | 30 | attr (key, "") = escape ['=', ']', ' '] key |
| 31 | attr (key, Just val) = escape ['=', ']', ' '] key <> "=\"" <> escape ['\"'] val <> "\"" | 31 | attr (key, val) = escape ['=', ']', ' '] key <> "=\"" <> escape ['\"'] val <> "\"" |
| 32 | coToken (BBClose t) = "[/" <> escape [']'] t <> "]" | 32 | coToken (BBClose t) = "[/" <> escape [']'] t <> "]" |
| 33 | coToken (BBStr t) = escape ['['] t | 33 | coToken (BBStr t) = escape ['['] t |
| 34 | 34 | ||
| @@ -82,17 +82,17 @@ examples = [ ("[t]test[/t]" | |||
| 82 | , ("[t]test[/t\\]]" | 82 | , ("[t]test[/t\\]]" |
| 83 | , [BBOpen "t" [], BBStr "test", BBClose "t]"]) | 83 | , [BBOpen "t" [], BBStr "test", BBClose "t]"]) |
| 84 | , ("[t attr]test[/t]" | 84 | , ("[t attr]test[/t]" |
| 85 | , [BBOpen "t" [("attr", Nothing)], BBStr "test", BBClose "t"]) | 85 | , [BBOpen "t" [("attr", "")], BBStr "test", BBClose "t"]) |
| 86 | , ("[t=attr]test[/t]" | 86 | , ("[t=attr]test[/t]" |
| 87 | , [BBOpen "t" [("", Just "attr")], BBStr "test", BBClose "t"]) | 87 | , [BBOpen "t" [("", "attr")], BBStr "test", BBClose "t"]) |
| 88 | , ("[t attr=val]test[/t]" | 88 | , ("[t attr=val]test[/t]" |
| 89 | , [BBOpen "t" [("attr", Just "val")], BBStr "test", BBClose "t"]) | 89 | , [BBOpen "t" [("attr", "val")], BBStr "test", BBClose "t"]) |
| 90 | , ("[t attr=\"val\"]test[/t]" | 90 | , ("[t attr=\"val\"]test[/t]" |
| 91 | , [BBOpen "t" [("attr", Just "val")], BBStr "test", BBClose "t"]) | 91 | , [BBOpen "t" [("attr", "val")], BBStr "test", BBClose "t"]) |
| 92 | , ("[t attr=\"va]l\"]test[/t]" | 92 | , ("[t attr=\"va]l\"]test[/t]" |
| 93 | , [BBOpen "t" [("attr", Just "va]l")], BBStr "test", BBClose "t"]) | 93 | , [BBOpen "t" [("attr", "va]l")], BBStr "test", BBClose "t"]) |
| 94 | , ("[t attr=\"va\\\"l\"]test[/t]" | 94 | , ("[t attr=\"va\\\"l\"]test[/t]" |
| 95 | , [BBOpen "t" [("attr", Just "va\"l")], BBStr "test", BBClose "t"]) | 95 | , [BBOpen "t" [("attr", "va\"l")], BBStr "test", BBClose "t"]) |
| 96 | , ("[t attr=\"val\" attr2=\"val2\" ]test[/t]" | 96 | , ("[t attr=\"val\" attr2=\"val2\" ]test[/t]" |
| 97 | , [BBOpen "t" [("attr", Just "val"), ("attr2", Just "val2")], BBStr "test", BBClose "t"]) | 97 | , [BBOpen "t" [("attr", "val"), ("attr2", "val2")], BBStr "test", BBClose "t"]) |
| 98 | ] | 98 | ] |
