diff options
Diffstat (limited to 'bbcode/src/Text')
-rw-r--r-- | bbcode/src/Text/BBCode.hs | 31 | ||||
-rw-r--r-- | bbcode/src/Text/BBCode/Lexer.hs | 14 |
2 files changed, 38 insertions, 7 deletions
diff --git a/bbcode/src/Text/BBCode.hs b/bbcode/src/Text/BBCode.hs index 6fef446..0773124 100644 --- a/bbcode/src/Text/BBCode.hs +++ b/bbcode/src/Text/BBCode.hs | |||
@@ -40,6 +40,7 @@ import Data.Bifunctor (Bifunctor(first)) | |||
40 | 40 | ||
41 | -- | Our target structure -- a rose tree with an explicit terminal constructor | 41 | -- | Our target structure -- a rose tree with an explicit terminal constructor |
42 | data DomTree = Element Text (Map Text Text) [DomTree] | 42 | data DomTree = Element Text (Map Text Text) [DomTree] |
43 | | Paragraph [DomTree] | ||
43 | | Content Text | 44 | | Content Text |
44 | deriving (Show, Eq) | 45 | deriving (Show, Eq) |
45 | 46 | ||
@@ -51,6 +52,7 @@ dom = map dom' | |||
51 | where | 52 | where |
52 | dom' (Node (BBPlain t) _) = Content t | 53 | dom' (Node (BBPlain t) _) = Content t |
53 | dom' (Node (BBTag t attrs) ts) = Element t attrs $ map dom' ts | 54 | dom' (Node (BBTag t attrs) ts) = Element t attrs $ map dom' ts |
55 | dom' (Node BBPar ts) = Paragraph $ map dom' ts | ||
54 | 56 | ||
55 | -- | Errors encountered during parsing | 57 | -- | Errors encountered during parsing |
56 | data BBCodeError = LexerError String -- ^ Error while parsing input to stream of tokens | 58 | data BBCodeError = LexerError String -- ^ Error while parsing input to stream of tokens |
@@ -72,6 +74,7 @@ instance Exception TreeError | |||
72 | 74 | ||
73 | -- | The label of our rose-tree nodes carries the tag name and a map of attributes | 75 | -- | The label of our rose-tree nodes carries the tag name and a map of attributes |
74 | data BBLabel = BBTag Text (Map Text Text) | 76 | data BBLabel = BBTag Text (Map Text Text) |
77 | | BBPar | ||
75 | | BBPlain Text | 78 | | BBPlain Text |
76 | deriving (Show, Eq) | 79 | deriving (Show, Eq) |
77 | 80 | ||
@@ -89,15 +92,33 @@ rose :: [BBToken] -> Either TreeError (Forest BBLabel) | |||
89 | -- The use of 'Tree' was still deemed desirable because the morphism to a more sensible structure is straightforward and 'Data.Tree.Zipper' provides all the tools needed to implement 'rose' in a sensible fashion | 92 | -- The use of 'Tree' was still deemed desirable because the morphism to a more sensible structure is straightforward and 'Data.Tree.Zipper' provides all the tools needed to implement 'rose' in a sensible fashion |
90 | rose = fmap Z.toForest . foldM (flip rose') (Z.fromForest []) | 93 | rose = fmap Z.toForest . foldM (flip rose') (Z.fromForest []) |
91 | where | 94 | where |
92 | rose' (BBStr t) = return . Z.nextSpace . Z.insert (Node (BBPlain t) []) | 95 | rose' (BBStr t) = return . Z.nextSpace . Z.insert (Node (BBPlain t) []) |
93 | rose' (BBOpen t attrs) = return . Z.children . Z.insert (Node (BBTag t $ Map.fromList attrs) []) | 96 | rose' BBNewPar = return . parBreak -- for more pointless |
97 | rose' (BBOpen t attrs) = return . Z.children . Z.insert (Node (BBTag t $ Map.fromList attrs) []) | ||
94 | rose' (BBContained t attrs) = return . Z.nextSpace . Z.insert (Node (BBTag t $ Map.fromList attrs) []) | 98 | rose' (BBContained t attrs) = return . Z.nextSpace . Z.insert (Node (BBTag t $ Map.fromList attrs) []) |
95 | rose' (BBClose t) = close t -- for more pointless | 99 | rose' (BBClose t) = close t -- for more pointless |
96 | 100 | ||
97 | close :: Text -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel) | 101 | close :: Text -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel) |
98 | close tag pos = do | 102 | close tag pos = do |
99 | pos' <- maybe (Left $ ImbalancedTags tag) Right $ Z.parent pos | 103 | pos' <- maybe (Left $ ImbalancedTags tag) Right $ Z.parent pos >>= traversePars |
100 | let | 104 | let |
101 | pTag = (\(BBTag t _) -> t) $ Z.label pos' | 105 | pTag = (\(BBTag t _) -> t) . Z.label $ pos' |
102 | unless (pTag `matches` tag) . Left $ MismatchedTags pTag tag -- The structure shows that this mode of failure is not logically required -- it's just nice to have | 106 | unless (pTag `matches` tag) . Left $ MismatchedTags pTag tag -- The structure shows that this mode of failure is not logically required -- it's just nice to have |
103 | return $ Z.nextSpace pos' | 107 | return $ Z.nextSpace pos' |
108 | where | ||
109 | traversePars pos | ||
110 | | isPar . Z.tree $ pos = Z.parent pos >>= traversePars | ||
111 | | otherwise = return pos | ||
112 | |||
113 | parBreak :: TreePos Empty BBLabel -> TreePos Empty BBLabel | ||
114 | parBreak z = let siblings = reverse $ Z.before z -- We only move ever move right so Z.after will always be empty | ||
115 | |||
116 | siblingsAsPars | ||
117 | | all isPar siblings = z | ||
118 | | otherwise = case Z.parent z of | ||
119 | Nothing -> Z.fromForest $ [Node BBPar siblings] | ||
120 | Just p -> Z.children . Z.modifyTree (\(Node l _) -> Node l [Node BBPar siblings]) $ p | ||
121 | in Z.children . Z.insert (Node BBPar []) . Z.last $ siblingsAsPars | ||
122 | |||
123 | isPar (Node BBPar _) = True | ||
124 | isPar _ = False | ||
diff --git a/bbcode/src/Text/BBCode/Lexer.hs b/bbcode/src/Text/BBCode/Lexer.hs index 218427d..8bd1446 100644 --- a/bbcode/src/Text/BBCode/Lexer.hs +++ b/bbcode/src/Text/BBCode/Lexer.hs | |||
@@ -17,6 +17,9 @@ import qualified Data.Text as T (singleton) | |||
17 | import Data.Char (isSpace) | 17 | import Data.Char (isSpace) |
18 | 18 | ||
19 | import Control.Applicative | 19 | import Control.Applicative |
20 | import Control.Monad (liftM2) | ||
21 | |||
22 | import Data.Monoid | ||
20 | 23 | ||
21 | import Prelude hiding (takeWhile) | 24 | import Prelude hiding (takeWhile) |
22 | 25 | ||
@@ -25,6 +28,7 @@ data BBToken = BBOpen Text [(Text, Text)] -- ^ Tag open with attributes | |||
25 | | BBContained Text [(Text, Text)] -- ^ Tag open & immediate close with attributes | 28 | | BBContained Text [(Text, Text)] -- ^ Tag open & immediate close with attributes |
26 | | BBClose Text -- ^ Tag close | 29 | | BBClose Text -- ^ Tag close |
27 | | BBStr Text -- ^ Content of a tag | 30 | | BBStr Text -- ^ Content of a tag |
31 | | BBNewPar | ||
28 | deriving (Eq, Show) | 32 | deriving (Eq, Show) |
29 | 33 | ||
30 | token :: Parser BBToken | 34 | token :: Parser BBToken |
@@ -32,7 +36,11 @@ token :: Parser BBToken | |||
32 | token = BBClose <$> ("[/" *> escapedText' [']'] <* "]") | 36 | token = BBClose <$> ("[/" *> escapedText' [']'] <* "]") |
33 | <|> uncurry BBContained <$ "[" <*> openTag <* "/]" | 37 | <|> uncurry BBContained <$ "[" <*> openTag <* "/]" |
34 | <|> uncurry BBOpen <$ "[" <*> openTag <* "]" | 38 | <|> uncurry BBOpen <$ "[" <*> openTag <* "]" |
35 | <|> BBStr <$> escapedText ['['] | 39 | <|> BBNewPar <$ endOfLine <* many1 endOfLine |
40 | <|> BBStr <$> paragraph | ||
41 | where | ||
42 | paragraph = (\a b c -> a <> b <> c) <$> option "" endOfLine' <*> escapedText ['['] <*> option "" paragraph | ||
43 | endOfLine' = "\n" <$ endOfLine | ||
36 | 44 | ||
37 | openTag :: Parser (Text, [(Text, Text)]) | 45 | openTag :: Parser (Text, [(Text, Text)]) |
38 | openTag = (,) <$> escapedText' [']', ' ', '=', '/'] <*> attrs' | 46 | openTag = (,) <$> escapedText' [']', ' ', '=', '/'] <*> attrs' |
@@ -51,11 +59,13 @@ attrs' = option [] attrs | |||
51 | escapedText :: [Char] -> Parser Text | 59 | escapedText :: [Char] -> Parser Text |
52 | -- ^ @escapedText cs@ consumes 'Text' up to (not including) the first occurence of a character from @cs@ that is not escaped using @\\@ | 60 | -- ^ @escapedText cs@ consumes 'Text' up to (not including) the first occurence of a character from @cs@ that is not escaped using @\\@ |
53 | -- | 61 | -- |
62 | -- Does not consume characters used to indicate linebreaks as determined by `isEndOfLine` | ||
63 | -- | ||
54 | -- Always consumes at least one character | 64 | -- Always consumes at least one character |
55 | -- | 65 | -- |
56 | -- @\\@ needs to be escaped (prefixed with @\\@) iff it precedes a character from @cs@ | 66 | -- @\\@ needs to be escaped (prefixed with @\\@) iff it precedes a character from @cs@ |
57 | escapedText [] = takeText | 67 | escapedText [] = takeText |
58 | escapedText cs = recurse $ choice [ takeWhile1 (not . special) | 68 | escapedText cs = recurse $ choice [ takeWhile1 $ not . liftM2 (||) special isEndOfLine |
59 | , escapeSeq | 69 | , escapeSeq |
60 | , escapeChar' | 70 | , escapeChar' |
61 | ] | 71 | ] |