aboutsummaryrefslogtreecommitdiff
path: root/bbcode/src/Text/BBCode.hs
blob: 75fa219b402f6939a0719ad5032290ae9cbbdd52 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveGeneric #-}

-- | An implementation of BBcode parsing 'Text' to a syntax tree
module Text.BBCode
       ( bbcode
       , BBCodeError(..)
       , TreeError(..)
       , DomTree(..)
       -- , dom
       -- , BBLabel
       -- , rose
       -- , matches
       ) where

import Data.Text (Text)

import GHC.Generics (Generic)
import Control.Exception (Exception)
import Data.Typeable (Typeable)

import Control.Monad (unless, join)
import Data.Function (on)
import Control.Applicative

import Text.BBCode.Lexer (BBToken(..), token)
import Data.Attoparsec.Text (parseOnly, endOfInput)
  
import Data.Tree
import Data.Tree.Zipper (TreePos, Empty, Full)
import qualified Data.Tree.Zipper as Z

import Data.Map (Map)
import qualified Data.Map as Map

import Data.CaseInsensitive (CI)
import qualified Data.CaseInsensitive as CI

import Data.Bifunctor (Bifunctor(first))

-- | Our target structure -- a rose tree with an explicit terminal constructor
data DomTree = Element Text (Map Text Text) [DomTree]
             | Content Text
             deriving (Show, Eq)

dom :: Forest BBLabel -> [DomTree]
-- ^ Parse semantically constrained rose tree to syntactically constrained version
--
-- Silently drops children of semantically terminal nodes ('BBPlain')
dom = map dom'
  where
    dom' (Node (BBPlain t) _) = Content t
    dom' (Node (BBTag t attrs) ts) = Element t attrs $ map dom' ts

-- | Errors encountered during parsing
data BBCodeError = LexerError String -- ^ Error while parsing input to stream of tokens
                 | TreeError TreeError -- ^ Error while parsing stream of tokens to syntax tree
                 deriving (Show, Eq, Generic, Typeable)

instance Exception BBCodeError

bbcode :: Text -> Either BBCodeError [DomTree]
-- ^ Parse BBCode
bbcode t = fmap dom $ first LexerError (parseOnly (many token <* endOfInput) t) >>= first TreeError . rose

-- | Errors in input encountered during parsing of lexed token-stream 
data TreeError = MismatchedTags Text Text -- ^ Closing tags does not match opening tags
               | ImbalancedTags Text -- ^ We found an extraneous closing tag
               deriving (Show, Eq, Generic, Typeable)

instance Exception TreeError

-- | The label of our rose-tree nodes carries the tag name and a map of attributes
data BBLabel = BBTag Text (Map Text Text)
             | BBPlain Text
             deriving (Show, Eq)

matches :: Text -> Text -> Bool
-- ^ @`matches` "open" "close"@ should be 'True' iff @[/close]@ is a valid closing tag for @[open]@
--
-- @ (==) `on` 'CI.mk' @
matches = (==) `on` CI.mk

rose :: [BBToken] -> Either TreeError (Forest BBLabel)
-- ^ Assuming that both tags and content have the same type (we use 'BBLabel') bbcode is a flat representation of a rose tree
--
-- We use @'Tree' 'BBLabel'@ only as another intermediate structure because it carries no guarantee that the data is semantically valid -- a 'BBPlain'-value semantically has no children.
--
-- The use of 'Tree' was still deemed desirable because the morphism to a more sensible structure is straightforward and 'Data.Tree.Zipper' provides all the tools needed to implement 'rose' in a sensible fashion
rose = fmap Z.toForest . flip rose' (Z.fromForest [])
  where
    rose' :: [BBToken] -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel)
    rose' [] = return
    rose' (x:xs) = (>>= rose' xs) . rose'' x

    rose'' (BBStr t) = return . Z.nextSpace . Z.insert (Node (BBPlain t) [])
    rose'' (BBOpen t attrs) = return . Z.children . Z.insert (Node (BBTag t $ Map.fromList attrs) [])
    rose'' (BBContained t attrs) = return . Z.nextSpace . Z.insert (Node (BBTag t $ Map.fromList attrs) [])
    rose'' (BBClose t) = close t -- for more pointless

    close :: Text -> TreePos Empty BBLabel -> Either TreeError (TreePos Empty BBLabel)
    close tag pos = do
      pos' <- maybe (Left $ ImbalancedTags tag) Right $ Z.parent pos
      let
        pTag = (\(BBTag t _) -> t) $ Z.label pos'
      unless (pTag `matches` tag) . Left $ MismatchedTags pTag tag -- The structure shows that this mode of failure is not logically required -- it's just nice to have
      return $ Z.nextSpace pos'