1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveGeneric #-}
-- | An implementation of BBcode parsing 'Text' to a syntax tree
module Text.BBCode
( bbcode
, BBCodeError(..)
, TreeError(..)
, DomTree(..)
-- , dom
-- , BBLabel
-- , rose
-- , matches
) where
import Data.Text (Text)
import GHC.Generics (Generic)
import Control.Exception (Exception)
import Data.Typeable (Typeable)
import Control.Monad (unless, join, foldM)
import Data.Function (on)
import Control.Applicative
import Text.BBCode.Lexer (BBToken(..), token)
import Data.Attoparsec.Text (parseOnly, endOfInput)
import Data.Tree
import Data.Tree.Zipper (TreePos, Empty, Full)
import qualified Data.Tree.Zipper as Z
import Data.Map (Map)
import qualified Data.Map as Map
import Data.CaseInsensitive (CI)
import qualified Data.CaseInsensitive as CI
import Data.Bifunctor (Bifunctor(first))
-- | Our target structure -- a rose tree with an explicit terminal constructor
data DomTree = Element Text (Map Text Text) [DomTree]
| Content Text
deriving (Show, Eq)
dom :: Forest BBLabel -> [DomTree]
-- ^ Parse semantically constrained rose tree to syntactically constrained version
--
-- Silently drops children of semantically terminal nodes ('BBPlain')
dom = map dom'
where
dom' (Node (BBPlain t) _) = Content t
dom' (Node (BBTag t attrs) ts) = Element t attrs $ map dom' ts
-- | Errors encountered during parsing
data BBCodeError = LexerError String -- ^ Error while parsing input to stream of tokens
| TreeError TreeError -- ^ Error while parsing stream of tokens to syntax tree
deriving (Show, Eq, Generic, Typeable)
instance Exception BBCodeError
bbcode :: Text -> Either BBCodeError [DomTree]
-- ^ Parse BBCode
bbcode t = fmap dom $ first LexerError (parseOnly (many token <* endOfInput) t) >>= first TreeError . rose
-- | Errors in input encountered during parsing of lexed token-stream
data TreeError = MismatchedTags Text Text -- ^ Closing tags does not match opening tags
| ImbalancedTags Text -- ^ We found an extraneous closing tag
deriving (Show, Eq, Generic, Typeable)
instance Exception TreeError
-- | The label of our rose-tree nodes carries the tag name and a map of attributes
data BBLabel = BBTag Text (Map Text Text)
| BBPlain Text
deriving (Show, Eq)
matches :: Text -> Text -> Bool
-- ^ @`matches` "open" "close"@ should be 'True' iff @[/close]@ is a valid closing tag for @[open]@
--
-- @ (==) `on` 'CI.mk' @
matches = (==) `on` CI.mk
rose :: [BBToken] -> Either TreeError (Forest BBLabel)
-- ^ Assuming that both tags and content have the same type (we use 'BBLabel') bbcode is a flat representation of a rose tree
--
-- We use @'Tree' 'BBLabel'@ only as another intermediate structure because it carries no guarantee that the data is semantically valid -- a 'BBPlain'-value semantically has no children.
--
-- The use of 'Tree' was still deemed desirable because the morphism to a more sensible structure is straightforward and 'Data.Tree.Zipper' provides all the tools needed to implement 'rose' in a sensible fashion
rose = fmap Z.toForest . foldM (flip rose') (Z.fromForest [])
where
rose' (BBStr t) = return . Z.nextSpace . Z.insert (Node (BBPlain t) [])
rose' (BBOpen t attrs) = return . Z.children . Z.insert (Node (BBTag t $ Map.fromList attrs) [])
rose' (BBContained t attrs) = return . Z.nextSpace . Z.insert (Node (BBTag t $ Map.fromList attrs) [])
rose' (BBClose t) = close t -- for more pointless
close :: Text -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel)
close tag pos = do
pos' <- maybe (Left $ ImbalancedTags tag) Right $ Z.parent pos
let
pTag = (\(BBTag t _) -> t) $ Z.label pos'
unless (pTag `matches` tag) . Left $ MismatchedTags pTag tag -- The structure shows that this mode of failure is not logically required -- it's just nice to have
return $ Z.nextSpace pos'
|