aboutsummaryrefslogtreecommitdiff
path: root/bbcode/src/Text
diff options
context:
space:
mode:
Diffstat (limited to 'bbcode/src/Text')
-rw-r--r--bbcode/src/Text/BBCode.hs31
-rw-r--r--bbcode/src/Text/BBCode/Lexer.hs14
2 files changed, 38 insertions, 7 deletions
diff --git a/bbcode/src/Text/BBCode.hs b/bbcode/src/Text/BBCode.hs
index 6fef446..0773124 100644
--- a/bbcode/src/Text/BBCode.hs
+++ b/bbcode/src/Text/BBCode.hs
@@ -40,6 +40,7 @@ import Data.Bifunctor (Bifunctor(first))
40 40
41-- | Our target structure -- a rose tree with an explicit terminal constructor 41-- | Our target structure -- a rose tree with an explicit terminal constructor
42data DomTree = Element Text (Map Text Text) [DomTree] 42data DomTree = Element Text (Map Text Text) [DomTree]
43 | Paragraph [DomTree]
43 | Content Text 44 | Content Text
44 deriving (Show, Eq) 45 deriving (Show, Eq)
45 46
@@ -51,6 +52,7 @@ dom = map dom'
51 where 52 where
52 dom' (Node (BBPlain t) _) = Content t 53 dom' (Node (BBPlain t) _) = Content t
53 dom' (Node (BBTag t attrs) ts) = Element t attrs $ map dom' ts 54 dom' (Node (BBTag t attrs) ts) = Element t attrs $ map dom' ts
55 dom' (Node BBPar ts) = Paragraph $ map dom' ts
54 56
55-- | Errors encountered during parsing 57-- | Errors encountered during parsing
56data BBCodeError = LexerError String -- ^ Error while parsing input to stream of tokens 58data BBCodeError = LexerError String -- ^ Error while parsing input to stream of tokens
@@ -72,6 +74,7 @@ instance Exception TreeError
72 74
73-- | The label of our rose-tree nodes carries the tag name and a map of attributes 75-- | The label of our rose-tree nodes carries the tag name and a map of attributes
74data BBLabel = BBTag Text (Map Text Text) 76data BBLabel = BBTag Text (Map Text Text)
77 | BBPar
75 | BBPlain Text 78 | BBPlain Text
76 deriving (Show, Eq) 79 deriving (Show, Eq)
77 80
@@ -89,15 +92,33 @@ rose :: [BBToken] -> Either TreeError (Forest BBLabel)
89-- The use of 'Tree' was still deemed desirable because the morphism to a more sensible structure is straightforward and 'Data.Tree.Zipper' provides all the tools needed to implement 'rose' in a sensible fashion 92-- The use of 'Tree' was still deemed desirable because the morphism to a more sensible structure is straightforward and 'Data.Tree.Zipper' provides all the tools needed to implement 'rose' in a sensible fashion
90rose = fmap Z.toForest . foldM (flip rose') (Z.fromForest []) 93rose = fmap Z.toForest . foldM (flip rose') (Z.fromForest [])
91 where 94 where
92 rose' (BBStr t) = return . Z.nextSpace . Z.insert (Node (BBPlain t) []) 95 rose' (BBStr t) = return . Z.nextSpace . Z.insert (Node (BBPlain t) [])
93 rose' (BBOpen t attrs) = return . Z.children . Z.insert (Node (BBTag t $ Map.fromList attrs) []) 96 rose' BBNewPar = return . parBreak -- for more pointless
97 rose' (BBOpen t attrs) = return . Z.children . Z.insert (Node (BBTag t $ Map.fromList attrs) [])
94 rose' (BBContained t attrs) = return . Z.nextSpace . Z.insert (Node (BBTag t $ Map.fromList attrs) []) 98 rose' (BBContained t attrs) = return . Z.nextSpace . Z.insert (Node (BBTag t $ Map.fromList attrs) [])
95 rose' (BBClose t) = close t -- for more pointless 99 rose' (BBClose t) = close t -- for more pointless
96 100
97 close :: Text -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel) 101 close :: Text -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel)
98 close tag pos = do 102 close tag pos = do
99 pos' <- maybe (Left $ ImbalancedTags tag) Right $ Z.parent pos 103 pos' <- maybe (Left $ ImbalancedTags tag) Right $ Z.parent pos >>= traversePars
100 let 104 let
101 pTag = (\(BBTag t _) -> t) $ Z.label pos' 105 pTag = (\(BBTag t _) -> t) . Z.label $ pos'
102 unless (pTag `matches` tag) . Left $ MismatchedTags pTag tag -- The structure shows that this mode of failure is not logically required -- it's just nice to have 106 unless (pTag `matches` tag) . Left $ MismatchedTags pTag tag -- The structure shows that this mode of failure is not logically required -- it's just nice to have
103 return $ Z.nextSpace pos' 107 return $ Z.nextSpace pos'
108 where
109 traversePars pos
110 | isPar . Z.tree $ pos = Z.parent pos >>= traversePars
111 | otherwise = return pos
112
113 parBreak :: TreePos Empty BBLabel -> TreePos Empty BBLabel
114 parBreak z = let siblings = reverse $ Z.before z -- We only move ever move right so Z.after will always be empty
115
116 siblingsAsPars
117 | all isPar siblings = z
118 | otherwise = case Z.parent z of
119 Nothing -> Z.fromForest $ [Node BBPar siblings]
120 Just p -> Z.children . Z.modifyTree (\(Node l _) -> Node l [Node BBPar siblings]) $ p
121 in Z.children . Z.insert (Node BBPar []) . Z.last $ siblingsAsPars
122
123 isPar (Node BBPar _) = True
124 isPar _ = False
diff --git a/bbcode/src/Text/BBCode/Lexer.hs b/bbcode/src/Text/BBCode/Lexer.hs
index 218427d..8bd1446 100644
--- a/bbcode/src/Text/BBCode/Lexer.hs
+++ b/bbcode/src/Text/BBCode/Lexer.hs
@@ -17,6 +17,9 @@ import qualified Data.Text as T (singleton)
17import Data.Char (isSpace) 17import Data.Char (isSpace)
18 18
19import Control.Applicative 19import Control.Applicative
20import Control.Monad (liftM2)
21
22import Data.Monoid
20 23
21import Prelude hiding (takeWhile) 24import Prelude hiding (takeWhile)
22 25
@@ -25,6 +28,7 @@ data BBToken = BBOpen Text [(Text, Text)] -- ^ Tag open with attributes
25 | BBContained Text [(Text, Text)] -- ^ Tag open & immediate close with attributes 28 | BBContained Text [(Text, Text)] -- ^ Tag open & immediate close with attributes
26 | BBClose Text -- ^ Tag close 29 | BBClose Text -- ^ Tag close
27 | BBStr Text -- ^ Content of a tag 30 | BBStr Text -- ^ Content of a tag
31 | BBNewPar
28 deriving (Eq, Show) 32 deriving (Eq, Show)
29 33
30token :: Parser BBToken 34token :: Parser BBToken
@@ -32,7 +36,11 @@ token :: Parser BBToken
32token = BBClose <$> ("[/" *> escapedText' [']'] <* "]") 36token = BBClose <$> ("[/" *> escapedText' [']'] <* "]")
33 <|> uncurry BBContained <$ "[" <*> openTag <* "/]" 37 <|> uncurry BBContained <$ "[" <*> openTag <* "/]"
34 <|> uncurry BBOpen <$ "[" <*> openTag <* "]" 38 <|> uncurry BBOpen <$ "[" <*> openTag <* "]"
35 <|> BBStr <$> escapedText ['['] 39 <|> BBNewPar <$ endOfLine <* many1 endOfLine
40 <|> BBStr <$> paragraph
41 where
42 paragraph = (\a b c -> a <> b <> c) <$> option "" endOfLine' <*> escapedText ['['] <*> option "" paragraph
43 endOfLine' = "\n" <$ endOfLine
36 44
37openTag :: Parser (Text, [(Text, Text)]) 45openTag :: Parser (Text, [(Text, Text)])
38openTag = (,) <$> escapedText' [']', ' ', '=', '/'] <*> attrs' 46openTag = (,) <$> escapedText' [']', ' ', '=', '/'] <*> attrs'
@@ -51,11 +59,13 @@ attrs' = option [] attrs
51escapedText :: [Char] -> Parser Text 59escapedText :: [Char] -> Parser Text
52-- ^ @escapedText cs@ consumes 'Text' up to (not including) the first occurence of a character from @cs@ that is not escaped using @\\@ 60-- ^ @escapedText cs@ consumes 'Text' up to (not including) the first occurence of a character from @cs@ that is not escaped using @\\@
53-- 61--
62-- Does not consume characters used to indicate linebreaks as determined by `isEndOfLine`
63--
54-- Always consumes at least one character 64-- Always consumes at least one character
55-- 65--
56-- @\\@ needs to be escaped (prefixed with @\\@) iff it precedes a character from @cs@ 66-- @\\@ needs to be escaped (prefixed with @\\@) iff it precedes a character from @cs@
57escapedText [] = takeText 67escapedText [] = takeText
58escapedText cs = recurse $ choice [ takeWhile1 (not . special) 68escapedText cs = recurse $ choice [ takeWhile1 $ not . liftM2 (||) special isEndOfLine
59 , escapeSeq 69 , escapeSeq
60 , escapeChar' 70 , escapeChar'
61 ] 71 ]