aboutsummaryrefslogtreecommitdiff
path: root/bbcode/src/Text
diff options
context:
space:
mode:
Diffstat (limited to 'bbcode/src/Text')
-rw-r--r--bbcode/src/Text/BBCode.hs48
-rw-r--r--bbcode/src/Text/BBCode/Lexer.hs12
2 files changed, 46 insertions, 14 deletions
diff --git a/bbcode/src/Text/BBCode.hs b/bbcode/src/Text/BBCode.hs
index 1e9960a..30b1da8 100644
--- a/bbcode/src/Text/BBCode.hs
+++ b/bbcode/src/Text/BBCode.hs
@@ -1,9 +1,11 @@
1{-# LANGUAGE OverloadedStrings #-} 1{-# LANGUAGE OverloadedStrings #-}
2{-# LANGUAGE DeriveGeneric #-} 2{-# LANGUAGE DeriveGeneric #-}
3 3
4-- | An implementation of BBcode parsing 'Text' to a syntax tree (@'Forest' 'BBLabel'@) 4-- | An implementation of BBcode parsing 'Text' to a syntax tree
5module Text.BBCode 5module Text.BBCode
6 ( TreeError(..) 6 ( TreeError(..)
7 , DomTree(..)
8 , dom
7 , BBLabel 9 , BBLabel
8 , rose 10 , rose
9 , matches 11 , matches
@@ -15,11 +17,13 @@ import GHC.Generics (Generic)
15import Control.Exception (Exception) 17import Control.Exception (Exception)
16import Data.Typeable (Typeable) 18import Data.Typeable (Typeable)
17 19
18import Control.Monad (unless) 20import Control.Monad (unless, join)
19import Data.Function (on) 21import Data.Function (on)
22import Control.Applicative
20 23
21import Text.BBCode.Lexer (BBToken(..), token) 24import Text.BBCode.Lexer (BBToken(..), token)
22 25import Data.Attoparsec.Text (parseOnly, endOfInput)
26
23import Data.Tree 27import Data.Tree
24import Data.Tree.Zipper (TreePos, Empty, Full) 28import Data.Tree.Zipper (TreePos, Empty, Full)
25import qualified Data.Tree.Zipper as Z 29import qualified Data.Tree.Zipper as Z
@@ -30,6 +34,28 @@ import qualified Data.Map as Map
30import Data.CaseInsensitive (CI) 34import Data.CaseInsensitive (CI)
31import qualified Data.CaseInsensitive as CI 35import qualified Data.CaseInsensitive as CI
32 36
37import Data.Bifunctor (Bifunctor(first))
38
39data DomTree = Element Text (Map Text Text) [DomTree]
40 | Content Text
41 deriving (Show, Eq)
42
43dom :: Forest BBLabel -> [DomTree]
44dom = map dom'
45 where
46 dom' (Node (BBPlain t) _) = Content t
47 dom' (Node (BBTag t attrs) ts) = Element t attrs $ map dom' ts
48
49-- | Errors encountered during parsing
50data BBCodeError = LexerError String -- ^ Error while parsing input to stream of tokens
51 | TreeError TreeError -- ^ Error while parsing stream of tokens to syntax tree
52 deriving (Show, Eq, Generic, Typeable)
53
54instance Exception BBCodeError
55
56bbcode :: Text -> Either BBCodeError [DomTree]
57bbcode t = fmap dom $ first LexerError (parseOnly (many token <* endOfInput) t) >>= first TreeError . rose
58
33-- | Errors in input encountered during parsing of lexed token-stream 59-- | Errors in input encountered during parsing of lexed token-stream
34data TreeError = MismatchedTags Text Text -- ^ Closing tags does not match opening tags 60data TreeError = MismatchedTags Text Text -- ^ Closing tags does not match opening tags
35 | ImbalancedTags Text -- ^ We found an extraneous closing tag 61 | ImbalancedTags Text -- ^ We found an extraneous closing tag
@@ -38,7 +64,9 @@ data TreeError = MismatchedTags Text Text -- ^ Closing tags does not match openi
38instance Exception TreeError 64instance Exception TreeError
39 65
40-- | The label of our rose-tree nodes carries the tag name and a map of attributes 66-- | The label of our rose-tree nodes carries the tag name and a map of attributes
41type BBLabel = (Text, Maybe (Map Text (Maybe Text))) 67data BBLabel = BBTag Text (Map Text Text)
68 | BBPlain Text
69 deriving (Show, Eq)
42 70
43matches :: Text -> Text -> Bool 71matches :: Text -> Text -> Bool
44-- ^ @`matches` "open" "close"@ should be 'True' iff @[/close]@ is a valid closing tag for @[open]@ 72-- ^ @`matches` "open" "close"@ should be 'True' iff @[/close]@ is a valid closing tag for @[open]@
@@ -47,21 +75,25 @@ matches :: Text -> Text -> Bool
47matches = (==) `on` CI.mk 75matches = (==) `on` CI.mk
48 76
49rose :: [BBToken] -> Either TreeError (Forest BBLabel) 77rose :: [BBToken] -> Either TreeError (Forest BBLabel)
50-- ^ Assuming that both tags and content have the same type (we use 'Text') bbcode is a flat representation of a rose tree 78-- ^ Assuming that both tags and content have the same type (we use 'BBLabel') bbcode is a flat representation of a rose tree
79--
80-- We use @'Tree' 'BBLabel'@ only as another intermediate structure because it carries no guarantee that the data is semantically valid -- a 'BBPlain'-value semantically has no children.
81--
82-- The use of 'Tree' was still deemed desirable because the morphism to a more sensible structure is straightforward and 'Data.Tree.Zipper' provides all the tools needed to implement 'rose' in a sensible fashion
51rose = fmap Z.toForest . flip rose' (Z.fromForest []) 83rose = fmap Z.toForest . flip rose' (Z.fromForest [])
52 where 84 where
53 rose' :: [BBToken] -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel) 85 rose' :: [BBToken] -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel)
54 rose' [] = return 86 rose' [] = return
55 rose' (x:xs) = (>>= rose' xs) . rose'' x 87 rose' (x:xs) = (>>= rose' xs) . rose'' x
56 88
57 rose'' (BBStr t) = return . Z.nextSpace . Z.insert (Node (t, Nothing) []) 89 rose'' (BBStr t) = return . Z.nextSpace . Z.insert (Node (BBPlain t) [])
58 rose'' (BBOpen t attrs) = return . Z.children . Z.insert (Node (t, Just $ Map.fromList attrs) []) 90 rose'' (BBOpen t attrs) = return . Z.children . Z.insert (Node (BBTag t $ Map.fromList attrs) [])
59 rose'' (BBClose t) = close t -- for more pointless 91 rose'' (BBClose t) = close t -- for more pointless
60 92
61 close :: Text -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel) 93 close :: Text -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel)
62 close tag pos = do 94 close tag pos = do
63 pos' <- maybe (Left $ ImbalancedTags tag) Right $ Z.parent pos 95 pos' <- maybe (Left $ ImbalancedTags tag) Right $ Z.parent pos
64 let 96 let
65 pTag = fst $ Z.label pos' 97 pTag = (\(BBTag t _) -> t) $ Z.label pos'
66 unless (pTag `matches` tag) . Left $ MismatchedTags pTag tag -- The structure shows that this mode of failure is not logically required -- it's just nice to have 98 unless (pTag `matches` tag) . Left $ MismatchedTags pTag tag -- The structure shows that this mode of failure is not logically required -- it's just nice to have
67 return $ Z.nextSpace pos' 99 return $ Z.nextSpace pos'
diff --git a/bbcode/src/Text/BBCode/Lexer.hs b/bbcode/src/Text/BBCode/Lexer.hs
index 03f57d2..560324b 100644
--- a/bbcode/src/Text/BBCode/Lexer.hs
+++ b/bbcode/src/Text/BBCode/Lexer.hs
@@ -21,7 +21,7 @@ import Control.Applicative
21import Prelude hiding (takeWhile) 21import Prelude hiding (takeWhile)
22 22
23-- | Our lexicographical unit 23-- | Our lexicographical unit
24data BBToken = BBOpen Text [(Text, Maybe Text)] -- ^ Tag open with attributes 24data BBToken = BBOpen Text [(Text, Text)] -- ^ Tag open with attributes
25 | BBClose Text -- ^ Tag close 25 | BBClose Text -- ^ Tag close
26 | BBStr Text -- ^ Content of a tag 26 | BBStr Text -- ^ Content of a tag
27 deriving (Eq, Show) 27 deriving (Eq, Show)
@@ -32,18 +32,18 @@ token = BBClose <$> ("[/" *> escapedText' [']'] <* "]")
32 <|> uncurry BBOpen <$ "[" <*> openTag <* "]" 32 <|> uncurry BBOpen <$ "[" <*> openTag <* "]"
33 <|> BBStr <$> escapedText ['['] 33 <|> BBStr <$> escapedText ['[']
34 34
35openTag :: Parser (Text, [(Text, Maybe Text)]) 35openTag :: Parser (Text, [(Text, Text)])
36openTag = (,) <$> escapedText' [']', ' ', '='] <*> attrs' 36openTag = (,) <$> escapedText' [']', ' ', '='] <*> attrs'
37 37
38attrs :: Parser [(Text, Maybe Text)] 38attrs :: Parser [(Text, Text)]
39attrs = (:) <$> (namedAttr <|> plainValue) <* takeWhile isSpace <*> attrs' 39attrs = (:) <$> (namedAttr <|> plainValue) <* takeWhile isSpace <*> attrs'
40 where 40 where
41 namedAttr = (,) <$ takeWhile isSpace <*> escapedText ['=', ']', ' '] <*> optional ("=" *> attrArg) 41 namedAttr = (,) <$ takeWhile isSpace <*> escapedText ['=', ']', ' '] <*> option "" ("=" *> attrArg)
42 plainValue = (,) <$> pure "" <* "=" <*> (Just <$> attrArg) 42 plainValue = (,) <$> pure "" <* "=" <*> attrArg
43 attrArg = "\"" *> escapedText ['"'] <* "\"" 43 attrArg = "\"" *> escapedText ['"'] <* "\""
44 <|> escapedText [']', ' '] 44 <|> escapedText [']', ' ']
45 45
46attrs' :: Parser [(Text, Maybe Text)] 46attrs' :: Parser [(Text, Text)]
47attrs' = option [] attrs 47attrs' = option [] attrs
48 48
49escapedText :: [Char] -> Parser Text 49escapedText :: [Char] -> Parser Text