diff options
Diffstat (limited to 'bbcode/src/Text')
-rw-r--r-- | bbcode/src/Text/BBCode.hs | 48 | ||||
-rw-r--r-- | bbcode/src/Text/BBCode/Lexer.hs | 12 |
2 files changed, 46 insertions, 14 deletions
diff --git a/bbcode/src/Text/BBCode.hs b/bbcode/src/Text/BBCode.hs index 1e9960a..30b1da8 100644 --- a/bbcode/src/Text/BBCode.hs +++ b/bbcode/src/Text/BBCode.hs | |||
@@ -1,9 +1,11 @@ | |||
1 | {-# LANGUAGE OverloadedStrings #-} | 1 | {-# LANGUAGE OverloadedStrings #-} |
2 | {-# LANGUAGE DeriveGeneric #-} | 2 | {-# LANGUAGE DeriveGeneric #-} |
3 | 3 | ||
4 | -- | An implementation of BBcode parsing 'Text' to a syntax tree (@'Forest' 'BBLabel'@) | 4 | -- | An implementation of BBcode parsing 'Text' to a syntax tree |
5 | module Text.BBCode | 5 | module Text.BBCode |
6 | ( TreeError(..) | 6 | ( TreeError(..) |
7 | , DomTree(..) | ||
8 | , dom | ||
7 | , BBLabel | 9 | , BBLabel |
8 | , rose | 10 | , rose |
9 | , matches | 11 | , matches |
@@ -15,11 +17,13 @@ import GHC.Generics (Generic) | |||
15 | import Control.Exception (Exception) | 17 | import Control.Exception (Exception) |
16 | import Data.Typeable (Typeable) | 18 | import Data.Typeable (Typeable) |
17 | 19 | ||
18 | import Control.Monad (unless) | 20 | import Control.Monad (unless, join) |
19 | import Data.Function (on) | 21 | import Data.Function (on) |
22 | import Control.Applicative | ||
20 | 23 | ||
21 | import Text.BBCode.Lexer (BBToken(..), token) | 24 | import Text.BBCode.Lexer (BBToken(..), token) |
22 | 25 | import Data.Attoparsec.Text (parseOnly, endOfInput) | |
26 | |||
23 | import Data.Tree | 27 | import Data.Tree |
24 | import Data.Tree.Zipper (TreePos, Empty, Full) | 28 | import Data.Tree.Zipper (TreePos, Empty, Full) |
25 | import qualified Data.Tree.Zipper as Z | 29 | import qualified Data.Tree.Zipper as Z |
@@ -30,6 +34,28 @@ import qualified Data.Map as Map | |||
30 | import Data.CaseInsensitive (CI) | 34 | import Data.CaseInsensitive (CI) |
31 | import qualified Data.CaseInsensitive as CI | 35 | import qualified Data.CaseInsensitive as CI |
32 | 36 | ||
37 | import Data.Bifunctor (Bifunctor(first)) | ||
38 | |||
39 | data DomTree = Element Text (Map Text Text) [DomTree] | ||
40 | | Content Text | ||
41 | deriving (Show, Eq) | ||
42 | |||
43 | dom :: Forest BBLabel -> [DomTree] | ||
44 | dom = map dom' | ||
45 | where | ||
46 | dom' (Node (BBPlain t) _) = Content t | ||
47 | dom' (Node (BBTag t attrs) ts) = Element t attrs $ map dom' ts | ||
48 | |||
49 | -- | Errors encountered during parsing | ||
50 | data BBCodeError = LexerError String -- ^ Error while parsing input to stream of tokens | ||
51 | | TreeError TreeError -- ^ Error while parsing stream of tokens to syntax tree | ||
52 | deriving (Show, Eq, Generic, Typeable) | ||
53 | |||
54 | instance Exception BBCodeError | ||
55 | |||
56 | bbcode :: Text -> Either BBCodeError [DomTree] | ||
57 | bbcode t = fmap dom $ first LexerError (parseOnly (many token <* endOfInput) t) >>= first TreeError . rose | ||
58 | |||
33 | -- | Errors in input encountered during parsing of lexed token-stream | 59 | -- | Errors in input encountered during parsing of lexed token-stream |
34 | data TreeError = MismatchedTags Text Text -- ^ Closing tags does not match opening tags | 60 | data TreeError = MismatchedTags Text Text -- ^ Closing tags does not match opening tags |
35 | | ImbalancedTags Text -- ^ We found an extraneous closing tag | 61 | | ImbalancedTags Text -- ^ We found an extraneous closing tag |
@@ -38,7 +64,9 @@ data TreeError = MismatchedTags Text Text -- ^ Closing tags does not match openi | |||
38 | instance Exception TreeError | 64 | instance Exception TreeError |
39 | 65 | ||
40 | -- | The label of our rose-tree nodes carries the tag name and a map of attributes | 66 | -- | The label of our rose-tree nodes carries the tag name and a map of attributes |
41 | type BBLabel = (Text, Maybe (Map Text (Maybe Text))) | 67 | data BBLabel = BBTag Text (Map Text Text) |
68 | | BBPlain Text | ||
69 | deriving (Show, Eq) | ||
42 | 70 | ||
43 | matches :: Text -> Text -> Bool | 71 | matches :: Text -> Text -> Bool |
44 | -- ^ @`matches` "open" "close"@ should be 'True' iff @[/close]@ is a valid closing tag for @[open]@ | 72 | -- ^ @`matches` "open" "close"@ should be 'True' iff @[/close]@ is a valid closing tag for @[open]@ |
@@ -47,21 +75,25 @@ matches :: Text -> Text -> Bool | |||
47 | matches = (==) `on` CI.mk | 75 | matches = (==) `on` CI.mk |
48 | 76 | ||
49 | rose :: [BBToken] -> Either TreeError (Forest BBLabel) | 77 | rose :: [BBToken] -> Either TreeError (Forest BBLabel) |
50 | -- ^ Assuming that both tags and content have the same type (we use 'Text') bbcode is a flat representation of a rose tree | 78 | -- ^ Assuming that both tags and content have the same type (we use 'BBLabel') bbcode is a flat representation of a rose tree |
79 | -- | ||
80 | -- We use @'Tree' 'BBLabel'@ only as another intermediate structure because it carries no guarantee that the data is semantically valid -- a 'BBPlain'-value semantically has no children. | ||
81 | -- | ||
82 | -- The use of 'Tree' was still deemed desirable because the morphism to a more sensible structure is straightforward and 'Data.Tree.Zipper' provides all the tools needed to implement 'rose' in a sensible fashion | ||
51 | rose = fmap Z.toForest . flip rose' (Z.fromForest []) | 83 | rose = fmap Z.toForest . flip rose' (Z.fromForest []) |
52 | where | 84 | where |
53 | rose' :: [BBToken] -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel) | 85 | rose' :: [BBToken] -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel) |
54 | rose' [] = return | 86 | rose' [] = return |
55 | rose' (x:xs) = (>>= rose' xs) . rose'' x | 87 | rose' (x:xs) = (>>= rose' xs) . rose'' x |
56 | 88 | ||
57 | rose'' (BBStr t) = return . Z.nextSpace . Z.insert (Node (t, Nothing) []) | 89 | rose'' (BBStr t) = return . Z.nextSpace . Z.insert (Node (BBPlain t) []) |
58 | rose'' (BBOpen t attrs) = return . Z.children . Z.insert (Node (t, Just $ Map.fromList attrs) []) | 90 | rose'' (BBOpen t attrs) = return . Z.children . Z.insert (Node (BBTag t $ Map.fromList attrs) []) |
59 | rose'' (BBClose t) = close t -- for more pointless | 91 | rose'' (BBClose t) = close t -- for more pointless |
60 | 92 | ||
61 | close :: Text -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel) | 93 | close :: Text -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel) |
62 | close tag pos = do | 94 | close tag pos = do |
63 | pos' <- maybe (Left $ ImbalancedTags tag) Right $ Z.parent pos | 95 | pos' <- maybe (Left $ ImbalancedTags tag) Right $ Z.parent pos |
64 | let | 96 | let |
65 | pTag = fst $ Z.label pos' | 97 | pTag = (\(BBTag t _) -> t) $ Z.label pos' |
66 | unless (pTag `matches` tag) . Left $ MismatchedTags pTag tag -- The structure shows that this mode of failure is not logically required -- it's just nice to have | 98 | unless (pTag `matches` tag) . Left $ MismatchedTags pTag tag -- The structure shows that this mode of failure is not logically required -- it's just nice to have |
67 | return $ Z.nextSpace pos' | 99 | return $ Z.nextSpace pos' |
diff --git a/bbcode/src/Text/BBCode/Lexer.hs b/bbcode/src/Text/BBCode/Lexer.hs index 03f57d2..560324b 100644 --- a/bbcode/src/Text/BBCode/Lexer.hs +++ b/bbcode/src/Text/BBCode/Lexer.hs | |||
@@ -21,7 +21,7 @@ import Control.Applicative | |||
21 | import Prelude hiding (takeWhile) | 21 | import Prelude hiding (takeWhile) |
22 | 22 | ||
23 | -- | Our lexicographical unit | 23 | -- | Our lexicographical unit |
24 | data BBToken = BBOpen Text [(Text, Maybe Text)] -- ^ Tag open with attributes | 24 | data BBToken = BBOpen Text [(Text, Text)] -- ^ Tag open with attributes |
25 | | BBClose Text -- ^ Tag close | 25 | | BBClose Text -- ^ Tag close |
26 | | BBStr Text -- ^ Content of a tag | 26 | | BBStr Text -- ^ Content of a tag |
27 | deriving (Eq, Show) | 27 | deriving (Eq, Show) |
@@ -32,18 +32,18 @@ token = BBClose <$> ("[/" *> escapedText' [']'] <* "]") | |||
32 | <|> uncurry BBOpen <$ "[" <*> openTag <* "]" | 32 | <|> uncurry BBOpen <$ "[" <*> openTag <* "]" |
33 | <|> BBStr <$> escapedText ['['] | 33 | <|> BBStr <$> escapedText ['['] |
34 | 34 | ||
35 | openTag :: Parser (Text, [(Text, Maybe Text)]) | 35 | openTag :: Parser (Text, [(Text, Text)]) |
36 | openTag = (,) <$> escapedText' [']', ' ', '='] <*> attrs' | 36 | openTag = (,) <$> escapedText' [']', ' ', '='] <*> attrs' |
37 | 37 | ||
38 | attrs :: Parser [(Text, Maybe Text)] | 38 | attrs :: Parser [(Text, Text)] |
39 | attrs = (:) <$> (namedAttr <|> plainValue) <* takeWhile isSpace <*> attrs' | 39 | attrs = (:) <$> (namedAttr <|> plainValue) <* takeWhile isSpace <*> attrs' |
40 | where | 40 | where |
41 | namedAttr = (,) <$ takeWhile isSpace <*> escapedText ['=', ']', ' '] <*> optional ("=" *> attrArg) | 41 | namedAttr = (,) <$ takeWhile isSpace <*> escapedText ['=', ']', ' '] <*> option "" ("=" *> attrArg) |
42 | plainValue = (,) <$> pure "" <* "=" <*> (Just <$> attrArg) | 42 | plainValue = (,) <$> pure "" <* "=" <*> attrArg |
43 | attrArg = "\"" *> escapedText ['"'] <* "\"" | 43 | attrArg = "\"" *> escapedText ['"'] <* "\"" |
44 | <|> escapedText [']', ' '] | 44 | <|> escapedText [']', ' '] |
45 | 45 | ||
46 | attrs' :: Parser [(Text, Maybe Text)] | 46 | attrs' :: Parser [(Text, Text)] |
47 | attrs' = option [] attrs | 47 | attrs' = option [] attrs |
48 | 48 | ||
49 | escapedText :: [Char] -> Parser Text | 49 | escapedText :: [Char] -> Parser Text |