aboutsummaryrefslogtreecommitdiff
path: root/bbcode/src/Text/BBCode.hs
blob: ac5697465b161f4f5f7a22c64cfebadb200ea01e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveGeneric #-}

-- | An implementation of BBcode parsing 'Text' to a syntax tree
module Text.BBCode
       ( bbcode
       , BBCodeError(..)
       , TreeError(..)
       , DomForest
       , DomTree(..)
       -- , dom
       -- , BBLabel
       -- , rose
       -- , matches
       ) where

import Data.Text (Text)

import GHC.Generics (Generic)
import Control.Exception (Exception)
import Data.Typeable (Typeable)

import Control.Monad (unless, join, foldM)
import Data.Function (on)
import Control.Applicative

import Text.BBCode.Lexer (BBToken(..), token)
import Data.Attoparsec.Text (parseOnly, endOfInput)
  
import Data.Tree
import Data.Tree.Zipper (TreePos, Empty, Full)
import qualified Data.Tree.Zipper as Z

import Data.Map (Map)
import qualified Data.Map as Map

import Data.CaseInsensitive (CI)
import qualified Data.CaseInsensitive as CI

import Data.Bifunctor (Bifunctor(first))

-- | Our target structure -- a rose tree with an explicit terminal constructor
data DomTree = Element (CI Text) (Map (CI Text) Text) [DomTree]
             | Content Text
             deriving (Show, Eq)

-- | List of paragraphs, which are comprised of lists of 'DomTree's
type DomForest = [[DomTree]]

dom :: Forest BBLabel -> DomForest
-- ^ Parse semantically constrained rose tree to syntactically constrained version
--
-- Silently drops children of semantically terminal nodes ('BBPlain')
--
-- We already ensured that paragraphs occur nowhere but at toplevel
dom = map (\(Node BBPar ts) -> map dom' ts) . ensureTopLevelPar
  where
    ensureTopLevelPar xs@((Node BBPar _):_) = xs
    ensureTopLevelPar xs                    = pure $ Node BBPar xs
    
    dom' (Node (BBPlain t) _)      = Content t
    dom' (Node (BBTag t attrs) ts) = Element (CI.mk t) (Map.mapKeys CI.mk attrs) $ map dom' ts

-- | Errors encountered during parsing
data BBCodeError = LexerError String -- ^ Error while parsing input to stream of tokens
                 | TreeError TreeError -- ^ Error while parsing stream of tokens to syntax tree
                 deriving (Show, Eq, Generic, Typeable)

instance Exception BBCodeError

bbcode :: Text -> Either BBCodeError DomForest
-- ^ Parse BBCode
bbcode t = fmap dom $ first LexerError (parseOnly (many token <* endOfInput) t) >>= first TreeError . rose

-- | Errors in input encountered during parsing of lexed token-stream 
data TreeError = MismatchedTags Text Text -- ^ Closing tags label does not match opening tags
               | ImbalancedTags Text -- ^ We found an extraneous closing tag
               | ParagraphWithinTag -- ^ We found a paragraph-break within a tag
               deriving (Show, Eq, Generic, Typeable)

instance Exception TreeError

-- | The label of our rose-tree nodes carries the tag name and a map of attributes
data BBLabel = BBTag Text (Map Text Text)
             | BBPar
             | BBPlain Text
             deriving (Show, Eq)

matches :: Text -> Text -> Bool
-- ^ @`matches` "open" "close"@ should be 'True' iff @[/close]@ is a valid closing tag for @[open]@
--
-- @ (==) `on` 'CI.mk' @
matches = (==) `on` CI.mk

rose :: [BBToken] -> Either TreeError (Forest BBLabel)
-- ^ Assuming that both tags and content have the same type (we use 'BBLabel') bbcode is a flat representation of a rose tree
--
-- We use @'Tree' 'BBLabel'@ only as another intermediate structure because it carries no guarantee that the data is semantically valid -- a 'BBPlain'-value semantically has no children.
--
-- The use of 'Tree' was still deemed desirable because the morphism to a more sensible structure is straightforward and 'Data.Tree.Zipper' provides all the tools needed to implement 'rose' in a sensible fashion
rose = fmap Z.toForest . foldM (flip rose') (Z.fromForest [])
  where
    rose' (BBStr t)             = return . Z.nextSpace . Z.insert (Node (BBPlain t) [])
    rose'  BBNewPar             = parBreak -- for more pointless
    rose' (BBOpen t attrs)      = return . Z.children . Z.insert (Node (BBTag t $ Map.fromList attrs) [])
    rose' (BBContained t attrs) = return . Z.nextSpace . Z.insert (Node (BBTag t $ Map.fromList attrs) [])
    rose' (BBClose t)           = close t -- for more pointless

    close :: Text -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel)
    close tag pos = do
      pos' <- maybe (Left $ ImbalancedTags tag) Right $ Z.parent pos >>= traversePars
      let
        pTag = (\(BBTag t _) -> t) $ Z.label pos'
      unless (pTag `matches` tag) . Left $ MismatchedTags pTag tag -- The structure shows that this mode of failure is not logically required -- it's just nice to have
      return $ Z.nextSpace pos'
      where
        traversePars pos
          | isPar . Z.tree $ pos = Z.parent pos >>= traversePars
          | otherwise = return pos

    parBreak :: TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel)
    parBreak z = let siblings = reverse $ Z.before z

                     siblingsAsPars
                       | all isPar siblings = Right z
                       | Z.isRoot z         = Right . Z.fromForest $ [Node BBPar siblings]
                       | otherwise          = Left ParagraphWithinTag
                 in Z.children . Z.insert (Node BBPar []) . Z.last <$> siblingsAsPars

    isPar (Node BBPar _) = True
    isPar _              = False