diff options
author | Gregor Kleen <gkleen@yggdrasil.li> | 2016-01-12 09:39:42 +0100 |
---|---|---|
committer | Gregor Kleen <gkleen@yggdrasil.li> | 2016-01-12 09:39:42 +0100 |
commit | 478bc6572d3ba508bddf1fdcf697e5a9e56e4055 (patch) | |
tree | 8470747547cdc304c7573b4c5545119ea3e3374b /bbcode | |
parent | 07759433a7e075e99267e2ea04f232c99118c9fd (diff) | |
download | thermoprint-478bc6572d3ba508bddf1fdcf697e5a9e56e4055.tar thermoprint-478bc6572d3ba508bddf1fdcf697e5a9e56e4055.tar.gz thermoprint-478bc6572d3ba508bddf1fdcf697e5a9e56e4055.tar.bz2 thermoprint-478bc6572d3ba508bddf1fdcf697e5a9e56e4055.tar.xz thermoprint-478bc6572d3ba508bddf1fdcf697e5a9e56e4055.zip |
DomTree
Diffstat (limited to 'bbcode')
-rw-r--r-- | bbcode/src/Text/BBCode.hs | 48 | ||||
-rw-r--r-- | bbcode/src/Text/BBCode/Lexer.hs | 12 | ||||
-rw-r--r-- | bbcode/test/Text/BBCode/LexerSpec.hs | 18 |
3 files changed, 55 insertions, 23 deletions
diff --git a/bbcode/src/Text/BBCode.hs b/bbcode/src/Text/BBCode.hs index 1e9960a..30b1da8 100644 --- a/bbcode/src/Text/BBCode.hs +++ b/bbcode/src/Text/BBCode.hs | |||
@@ -1,9 +1,11 @@ | |||
1 | {-# LANGUAGE OverloadedStrings #-} | 1 | {-# LANGUAGE OverloadedStrings #-} |
2 | {-# LANGUAGE DeriveGeneric #-} | 2 | {-# LANGUAGE DeriveGeneric #-} |
3 | 3 | ||
4 | -- | An implementation of BBcode parsing 'Text' to a syntax tree (@'Forest' 'BBLabel'@) | 4 | -- | An implementation of BBcode parsing 'Text' to a syntax tree |
5 | module Text.BBCode | 5 | module Text.BBCode |
6 | ( TreeError(..) | 6 | ( TreeError(..) |
7 | , DomTree(..) | ||
8 | , dom | ||
7 | , BBLabel | 9 | , BBLabel |
8 | , rose | 10 | , rose |
9 | , matches | 11 | , matches |
@@ -15,11 +17,13 @@ import GHC.Generics (Generic) | |||
15 | import Control.Exception (Exception) | 17 | import Control.Exception (Exception) |
16 | import Data.Typeable (Typeable) | 18 | import Data.Typeable (Typeable) |
17 | 19 | ||
18 | import Control.Monad (unless) | 20 | import Control.Monad (unless, join) |
19 | import Data.Function (on) | 21 | import Data.Function (on) |
22 | import Control.Applicative | ||
20 | 23 | ||
21 | import Text.BBCode.Lexer (BBToken(..), token) | 24 | import Text.BBCode.Lexer (BBToken(..), token) |
22 | 25 | import Data.Attoparsec.Text (parseOnly, endOfInput) | |
26 | |||
23 | import Data.Tree | 27 | import Data.Tree |
24 | import Data.Tree.Zipper (TreePos, Empty, Full) | 28 | import Data.Tree.Zipper (TreePos, Empty, Full) |
25 | import qualified Data.Tree.Zipper as Z | 29 | import qualified Data.Tree.Zipper as Z |
@@ -30,6 +34,28 @@ import qualified Data.Map as Map | |||
30 | import Data.CaseInsensitive (CI) | 34 | import Data.CaseInsensitive (CI) |
31 | import qualified Data.CaseInsensitive as CI | 35 | import qualified Data.CaseInsensitive as CI |
32 | 36 | ||
37 | import Data.Bifunctor (Bifunctor(first)) | ||
38 | |||
39 | data DomTree = Element Text (Map Text Text) [DomTree] | ||
40 | | Content Text | ||
41 | deriving (Show, Eq) | ||
42 | |||
43 | dom :: Forest BBLabel -> [DomTree] | ||
44 | dom = map dom' | ||
45 | where | ||
46 | dom' (Node (BBPlain t) _) = Content t | ||
47 | dom' (Node (BBTag t attrs) ts) = Element t attrs $ map dom' ts | ||
48 | |||
49 | -- | Errors encountered during parsing | ||
50 | data BBCodeError = LexerError String -- ^ Error while parsing input to stream of tokens | ||
51 | | TreeError TreeError -- ^ Error while parsing stream of tokens to syntax tree | ||
52 | deriving (Show, Eq, Generic, Typeable) | ||
53 | |||
54 | instance Exception BBCodeError | ||
55 | |||
56 | bbcode :: Text -> Either BBCodeError [DomTree] | ||
57 | bbcode t = fmap dom $ first LexerError (parseOnly (many token <* endOfInput) t) >>= first TreeError . rose | ||
58 | |||
33 | -- | Errors in input encountered during parsing of lexed token-stream | 59 | -- | Errors in input encountered during parsing of lexed token-stream |
34 | data TreeError = MismatchedTags Text Text -- ^ Closing tags does not match opening tags | 60 | data TreeError = MismatchedTags Text Text -- ^ Closing tags does not match opening tags |
35 | | ImbalancedTags Text -- ^ We found an extraneous closing tag | 61 | | ImbalancedTags Text -- ^ We found an extraneous closing tag |
@@ -38,7 +64,9 @@ data TreeError = MismatchedTags Text Text -- ^ Closing tags does not match openi | |||
38 | instance Exception TreeError | 64 | instance Exception TreeError |
39 | 65 | ||
40 | -- | The label of our rose-tree nodes carries the tag name and a map of attributes | 66 | -- | The label of our rose-tree nodes carries the tag name and a map of attributes |
41 | type BBLabel = (Text, Maybe (Map Text (Maybe Text))) | 67 | data BBLabel = BBTag Text (Map Text Text) |
68 | | BBPlain Text | ||
69 | deriving (Show, Eq) | ||
42 | 70 | ||
43 | matches :: Text -> Text -> Bool | 71 | matches :: Text -> Text -> Bool |
44 | -- ^ @`matches` "open" "close"@ should be 'True' iff @[/close]@ is a valid closing tag for @[open]@ | 72 | -- ^ @`matches` "open" "close"@ should be 'True' iff @[/close]@ is a valid closing tag for @[open]@ |
@@ -47,21 +75,25 @@ matches :: Text -> Text -> Bool | |||
47 | matches = (==) `on` CI.mk | 75 | matches = (==) `on` CI.mk |
48 | 76 | ||
49 | rose :: [BBToken] -> Either TreeError (Forest BBLabel) | 77 | rose :: [BBToken] -> Either TreeError (Forest BBLabel) |
50 | -- ^ Assuming that both tags and content have the same type (we use 'Text') bbcode is a flat representation of a rose tree | 78 | -- ^ Assuming that both tags and content have the same type (we use 'BBLabel') bbcode is a flat representation of a rose tree |
79 | -- | ||
80 | -- We use @'Tree' 'BBLabel'@ only as another intermediate structure because it carries no guarantee that the data is semantically valid -- a 'BBPlain'-value semantically has no children. | ||
81 | -- | ||
82 | -- The use of 'Tree' was still deemed desirable because the morphism to a more sensible structure is straightforward and 'Data.Tree.Zipper' provides all the tools needed to implement 'rose' in a sensible fashion | ||
51 | rose = fmap Z.toForest . flip rose' (Z.fromForest []) | 83 | rose = fmap Z.toForest . flip rose' (Z.fromForest []) |
52 | where | 84 | where |
53 | rose' :: [BBToken] -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel) | 85 | rose' :: [BBToken] -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel) |
54 | rose' [] = return | 86 | rose' [] = return |
55 | rose' (x:xs) = (>>= rose' xs) . rose'' x | 87 | rose' (x:xs) = (>>= rose' xs) . rose'' x |
56 | 88 | ||
57 | rose'' (BBStr t) = return . Z.nextSpace . Z.insert (Node (t, Nothing) []) | 89 | rose'' (BBStr t) = return . Z.nextSpace . Z.insert (Node (BBPlain t) []) |
58 | rose'' (BBOpen t attrs) = return . Z.children . Z.insert (Node (t, Just $ Map.fromList attrs) []) | 90 | rose'' (BBOpen t attrs) = return . Z.children . Z.insert (Node (BBTag t $ Map.fromList attrs) []) |
59 | rose'' (BBClose t) = close t -- for more pointless | 91 | rose'' (BBClose t) = close t -- for more pointless |
60 | 92 | ||
61 | close :: Text -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel) | 93 | close :: Text -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel) |
62 | close tag pos = do | 94 | close tag pos = do |
63 | pos' <- maybe (Left $ ImbalancedTags tag) Right $ Z.parent pos | 95 | pos' <- maybe (Left $ ImbalancedTags tag) Right $ Z.parent pos |
64 | let | 96 | let |
65 | pTag = fst $ Z.label pos' | 97 | pTag = (\(BBTag t _) -> t) $ Z.label pos' |
66 | unless (pTag `matches` tag) . Left $ MismatchedTags pTag tag -- The structure shows that this mode of failure is not logically required -- it's just nice to have | 98 | unless (pTag `matches` tag) . Left $ MismatchedTags pTag tag -- The structure shows that this mode of failure is not logically required -- it's just nice to have |
67 | return $ Z.nextSpace pos' | 99 | return $ Z.nextSpace pos' |
diff --git a/bbcode/src/Text/BBCode/Lexer.hs b/bbcode/src/Text/BBCode/Lexer.hs index 03f57d2..560324b 100644 --- a/bbcode/src/Text/BBCode/Lexer.hs +++ b/bbcode/src/Text/BBCode/Lexer.hs | |||
@@ -21,7 +21,7 @@ import Control.Applicative | |||
21 | import Prelude hiding (takeWhile) | 21 | import Prelude hiding (takeWhile) |
22 | 22 | ||
23 | -- | Our lexicographical unit | 23 | -- | Our lexicographical unit |
24 | data BBToken = BBOpen Text [(Text, Maybe Text)] -- ^ Tag open with attributes | 24 | data BBToken = BBOpen Text [(Text, Text)] -- ^ Tag open with attributes |
25 | | BBClose Text -- ^ Tag close | 25 | | BBClose Text -- ^ Tag close |
26 | | BBStr Text -- ^ Content of a tag | 26 | | BBStr Text -- ^ Content of a tag |
27 | deriving (Eq, Show) | 27 | deriving (Eq, Show) |
@@ -32,18 +32,18 @@ token = BBClose <$> ("[/" *> escapedText' [']'] <* "]") | |||
32 | <|> uncurry BBOpen <$ "[" <*> openTag <* "]" | 32 | <|> uncurry BBOpen <$ "[" <*> openTag <* "]" |
33 | <|> BBStr <$> escapedText ['['] | 33 | <|> BBStr <$> escapedText ['['] |
34 | 34 | ||
35 | openTag :: Parser (Text, [(Text, Maybe Text)]) | 35 | openTag :: Parser (Text, [(Text, Text)]) |
36 | openTag = (,) <$> escapedText' [']', ' ', '='] <*> attrs' | 36 | openTag = (,) <$> escapedText' [']', ' ', '='] <*> attrs' |
37 | 37 | ||
38 | attrs :: Parser [(Text, Maybe Text)] | 38 | attrs :: Parser [(Text, Text)] |
39 | attrs = (:) <$> (namedAttr <|> plainValue) <* takeWhile isSpace <*> attrs' | 39 | attrs = (:) <$> (namedAttr <|> plainValue) <* takeWhile isSpace <*> attrs' |
40 | where | 40 | where |
41 | namedAttr = (,) <$ takeWhile isSpace <*> escapedText ['=', ']', ' '] <*> optional ("=" *> attrArg) | 41 | namedAttr = (,) <$ takeWhile isSpace <*> escapedText ['=', ']', ' '] <*> option "" ("=" *> attrArg) |
42 | plainValue = (,) <$> pure "" <* "=" <*> (Just <$> attrArg) | 42 | plainValue = (,) <$> pure "" <* "=" <*> attrArg |
43 | attrArg = "\"" *> escapedText ['"'] <* "\"" | 43 | attrArg = "\"" *> escapedText ['"'] <* "\"" |
44 | <|> escapedText [']', ' '] | 44 | <|> escapedText [']', ' '] |
45 | 45 | ||
46 | attrs' :: Parser [(Text, Maybe Text)] | 46 | attrs' :: Parser [(Text, Text)] |
47 | attrs' = option [] attrs | 47 | attrs' = option [] attrs |
48 | 48 | ||
49 | escapedText :: [Char] -> Parser Text | 49 | escapedText :: [Char] -> Parser Text |
diff --git a/bbcode/test/Text/BBCode/LexerSpec.hs b/bbcode/test/Text/BBCode/LexerSpec.hs index fcc1c4f..5c4b89e 100644 --- a/bbcode/test/Text/BBCode/LexerSpec.hs +++ b/bbcode/test/Text/BBCode/LexerSpec.hs | |||
@@ -27,8 +27,8 @@ coToken (BBOpen t []) = "[" <> escape [']'] t <> "]" | |||
27 | coToken (BBOpen t xs) = "[" <> escape [']'] t <> " " <> attrs <> " ]" | 27 | coToken (BBOpen t xs) = "[" <> escape [']'] t <> " " <> attrs <> " ]" |
28 | where | 28 | where |
29 | attrs = mconcat . intersperse " " $ map attr xs | 29 | attrs = mconcat . intersperse " " $ map attr xs |
30 | attr (key, Nothing) = escape ['=', ']', ' '] key | 30 | attr (key, "") = escape ['=', ']', ' '] key |
31 | attr (key, Just val) = escape ['=', ']', ' '] key <> "=\"" <> escape ['\"'] val <> "\"" | 31 | attr (key, val) = escape ['=', ']', ' '] key <> "=\"" <> escape ['\"'] val <> "\"" |
32 | coToken (BBClose t) = "[/" <> escape [']'] t <> "]" | 32 | coToken (BBClose t) = "[/" <> escape [']'] t <> "]" |
33 | coToken (BBStr t) = escape ['['] t | 33 | coToken (BBStr t) = escape ['['] t |
34 | 34 | ||
@@ -82,17 +82,17 @@ examples = [ ("[t]test[/t]" | |||
82 | , ("[t]test[/t\\]]" | 82 | , ("[t]test[/t\\]]" |
83 | , [BBOpen "t" [], BBStr "test", BBClose "t]"]) | 83 | , [BBOpen "t" [], BBStr "test", BBClose "t]"]) |
84 | , ("[t attr]test[/t]" | 84 | , ("[t attr]test[/t]" |
85 | , [BBOpen "t" [("attr", Nothing)], BBStr "test", BBClose "t"]) | 85 | , [BBOpen "t" [("attr", "")], BBStr "test", BBClose "t"]) |
86 | , ("[t=attr]test[/t]" | 86 | , ("[t=attr]test[/t]" |
87 | , [BBOpen "t" [("", Just "attr")], BBStr "test", BBClose "t"]) | 87 | , [BBOpen "t" [("", "attr")], BBStr "test", BBClose "t"]) |
88 | , ("[t attr=val]test[/t]" | 88 | , ("[t attr=val]test[/t]" |
89 | , [BBOpen "t" [("attr", Just "val")], BBStr "test", BBClose "t"]) | 89 | , [BBOpen "t" [("attr", "val")], BBStr "test", BBClose "t"]) |
90 | , ("[t attr=\"val\"]test[/t]" | 90 | , ("[t attr=\"val\"]test[/t]" |
91 | , [BBOpen "t" [("attr", Just "val")], BBStr "test", BBClose "t"]) | 91 | , [BBOpen "t" [("attr", "val")], BBStr "test", BBClose "t"]) |
92 | , ("[t attr=\"va]l\"]test[/t]" | 92 | , ("[t attr=\"va]l\"]test[/t]" |
93 | , [BBOpen "t" [("attr", Just "va]l")], BBStr "test", BBClose "t"]) | 93 | , [BBOpen "t" [("attr", "va]l")], BBStr "test", BBClose "t"]) |
94 | , ("[t attr=\"va\\\"l\"]test[/t]" | 94 | , ("[t attr=\"va\\\"l\"]test[/t]" |
95 | , [BBOpen "t" [("attr", Just "va\"l")], BBStr "test", BBClose "t"]) | 95 | , [BBOpen "t" [("attr", "va\"l")], BBStr "test", BBClose "t"]) |
96 | , ("[t attr=\"val\" attr2=\"val2\" ]test[/t]" | 96 | , ("[t attr=\"val\" attr2=\"val2\" ]test[/t]" |
97 | , [BBOpen "t" [("attr", Just "val"), ("attr2", Just "val2")], BBStr "test", BBClose "t"]) | 97 | , [BBOpen "t" [("attr", "val"), ("attr2", "val2")], BBStr "test", BBClose "t"]) |
98 | ] | 98 | ] |