aboutsummaryrefslogtreecommitdiff
path: root/bbcode/src
diff options
context:
space:
mode:
Diffstat (limited to 'bbcode/src')
-rw-r--r--bbcode/src/BBCode.hs96
-rw-r--r--bbcode/src/BBCode/Syntax.hs108
-rw-r--r--bbcode/src/BBCode/Tokenizer.hs44
3 files changed, 248 insertions, 0 deletions
diff --git a/bbcode/src/BBCode.hs b/bbcode/src/BBCode.hs
new file mode 100644
index 0000000..750fb0f
--- /dev/null
+++ b/bbcode/src/BBCode.hs
@@ -0,0 +1,96 @@
1{-# LANGUAGE OverloadedStrings, OverloadedLists, RankNTypes, ImpredicativeTypes #-}
2
3module BBCode
4 ( parse
5 ) where
6
7import Thermoprint
8
9import BBCode.Tokenizer
10import BBCode.Syntax
11
12import Data.Maybe
13import Data.Monoid
14import Data.Either
15import Data.Bifunctor
16import Data.List
17import Data.Ord
18import Data.Foldable
19
20import Data.Function (on)
21
22import Data.CaseInsensitive ( CI )
23import qualified Data.CaseInsensitive as CI
24
25import Data.Map ( Map )
26import qualified Data.Map as Map
27
28import Prelude hiding (takeWhile)
29
30import Debug.Trace
31
32knownTags :: Map (CI String) (Either (Block String -> Block String) (Inline String -> Inline String))
33knownTags = [ ("center", Left Center)
34 , ("u", Right Underline)
35 ]
36
37isBlock, isInline :: String -> Bool
38isBlock = testTag isLeft
39isInline = testTag isRight
40testTag f k = fromMaybe False (f <$> Map.lookup (CI.mk k) knownTags)
41
42data Decorated c = Decorated c [String]
43 deriving (Show, Eq)
44
45parse :: String -> Either String (Block String)
46parse input = (remerge . blockify <$> (tokenize input >>= treeify)) >>= semantics
47
48blockify :: ContentForest -> [Decorated String]
49blockify = map sortDeco . concat . map (blockify' [])
50 where
51 blockify' _ Empty = []
52 blockify' initial (Content str) = [str `Decorated` initial]
53 blockify' initial (Tagged cs tag) = concat $ map (blockify' $ tag : initial) cs
54 sortDeco :: Decorated c -> Decorated c
55 sortDeco (Decorated c tags) = Decorated c $ (nubBy ((==) `on` CI.mk) . sortBy blockiness) tags
56 blockiness :: String -> String -> Ordering
57 blockiness a b = comparing isBlock a b <> comparing CI.mk a b -- sort also alphabetically for the benefit of remerge
58
59remerge :: [Decorated String] -> ContentForest
60remerge [] = []
61remerge [(Decorated c deco)] = [appEndo applyDeco $ Content c]
62 where
63 applyDeco = foldMap (\t -> Endo (\c -> Tagged [c] t)) $ reverse deco
64remerge xs = concat $ map toTree $ groupLasts xs
65 where
66 groupLasts = groupBy ((==) `on` (listToMaybe . reverse . (\(Decorated _ ds) -> ds)))
67 -- toTree :: [Decorated String] -> [ContentTree]
68 toTree [] = []
69 toTree (x@(Decorated _ []):xs) = map (Content . (\(Decorated c []) -> c)) (x:xs)
70 toTree (x@(Decorated _ ds):xs) = [Tagged content tag]
71 where
72 tag = last ds
73 content = toTree $ map stripLast (x:xs)
74 stripLast (Decorated c ds) = Decorated c (init ds)
75
76unknownTag :: forall a. String -> Either String a
77unknownTag tag = Left $ "Unknown tag: " ++ tag
78
79semantics :: ContentForest -> Either String (Block String)
80semantics forest = Over <$> mapM semantics' forest
81
82semantics' :: ContentTree -> Either String (Block String)
83semantics' Empty = Right $ Over []
84semantics' (Content str) = Right $ Paragraph (Cooked str)
85semantics' (Tagged cs tag) = case Map.lookup (CI.mk tag) knownTags of
86 Nothing -> unknownTag tag
87 Just (Left f) -> Over . map f <$> mapM semantics' cs
88 Just (Right f) -> Paragraph . Beside . map f<$> mapM iSemantics cs
89
90iSemantics :: ContentTree -> Either String (Inline String)
91iSemantics Empty = Right $ Beside []
92iSemantics (Content str) = Right $ Cooked str
93iSemantics (Tagged cs tag) = case Map.lookup (CI.mk tag) knownTags of
94 Nothing -> unknownTag tag
95 Just (Left f) -> error "Known inline tag sorted within block"
96 Just (Right f) -> Beside . map f <$> mapM iSemantics cs
diff --git a/bbcode/src/BBCode/Syntax.hs b/bbcode/src/BBCode/Syntax.hs
new file mode 100644
index 0000000..a196e05
--- /dev/null
+++ b/bbcode/src/BBCode/Syntax.hs
@@ -0,0 +1,108 @@
1{-# LANGUAGE RecordWildCards #-}
2
3module BBCode.Syntax
4 ( treeify
5 , ContentForest
6 , ContentTree(..)
7 ) where
8
9import BBCode.Tokenizer (Token(..))
10
11import Data.Foldable
12import Data.Bifunctor
13import Data.Monoid
14
15import Control.Monad.State
16import Control.Monad.Trans
17import Control.Monad
18
19import Data.CaseInsensitive ( CI )
20import qualified Data.CaseInsensitive as CI
21
22import Data.Function (on)
23
24type ContentForest = [ContentTree]
25data ContentTree = Content String
26 | Tagged [ContentTree] String
27 | Empty
28 deriving (Show, Eq)
29
30data Step = Down [ContentTree] [ContentTree] String
31 deriving (Show)
32
33data Zipper = Zipper
34 { hole :: String
35 , prevs :: [ContentTree]
36 , steps :: [Step]
37 }
38 deriving (Show)
39
40type Parser = StateT Zipper (Either String)
41abort :: String -> Parser a
42abort = lift . Left
43
44
45treeify :: [Token] -> Either String ContentForest
46treeify tokenStream = second (postProcess . unZip) $ execStateT (mapM_ incorporate tokenStream) (Zipper "" [] [])
47
48postProcess :: ContentForest -> ContentForest
49postProcess forest = do
50 tree <- forest
51 let
52 tree' = postProcess' tree
53 guard $ tree' /= Empty
54 return tree'
55 where
56 postProcess' :: ContentTree -> ContentTree
57 postProcess' (Content "") = Empty
58 postProcess' (Tagged [] _) = Empty
59 postProcess' (Tagged cs t) = Tagged [c' | c <- cs, let c' = postProcess' c, c' /= Empty ] t
60 postProcess' x = x
61
62unZip :: Zipper -> ContentForest
63unZip Zipper{..} = reverse (apply hole steps : prevs)
64
65apply :: String -> [Step] -> ContentTree
66apply hole steps = hole `apply'` (reverse steps)
67apply' "" [] = Empty
68apply' hole [] = Content hole
69apply' hole (Down pres posts tag:steps) = Tagged (reverse pres ++ [hole `apply` steps] ++ posts) tag
70
71incorporate :: Token -> Parser ()
72incorporate (Text str) = append str
73incorporate (Whitespace str)
74 | delimitsPar str = do
75 currSteps <- gets steps
76 if null currSteps then
77 modify (\Zipper{..} -> Zipper { hole = "", steps = [], prevs = (hole `apply` steps : prevs) })
78 else
79 append str
80 | otherwise = append str
81 where
82 delimitsPar str = (sum . map (const (1 :: Integer)) . filter (== '\n') $ str) >= 1
83incorporate (TagOpen tagName) = modify $ (\z@(Zipper{..}) -> z { steps = (Down [] [] tagName : steps) })
84incorporate (TagClose tagName) = do
85 currSteps <- gets steps
86 case currSteps of
87 [] -> abort $ "Closing unopenend tag: " ++ tagName
88 (Down pres posts tagName':s) -> if ((==) `on` CI.mk) tagName tagName' then
89 goUp
90 else
91 abort $ "Mismatched tags: [" ++ tagName' ++ "]...[/" ++ tagName ++ "]"
92
93append :: String -> Parser ()
94append str = modify (\z@(Zipper{..}) -> z { hole = hole ++ str })
95
96goUp :: Parser ()
97goUp = do
98 (Down pres posts tagName:s) <- gets steps
99 hole <- gets hole
100 let
101 steppedHole = Tagged (reverse pres ++ [Content hole] ++ posts) tagName
102 case s of
103 [] -> modify $ (\z@(Zipper{..}) -> Zipper { hole = "", prevs = steppedHole : prevs, steps = [] })
104 (t:ts) -> do
105 let
106 (Down pres posts tagName) = t
107 t' = Down (steppedHole : pres) posts tagName
108 modify $ (\z -> z { hole = "", steps = (t':ts) })
diff --git a/bbcode/src/BBCode/Tokenizer.hs b/bbcode/src/BBCode/Tokenizer.hs
new file mode 100644
index 0000000..c860c7c
--- /dev/null
+++ b/bbcode/src/BBCode/Tokenizer.hs
@@ -0,0 +1,44 @@
1{-# LANGUAGE OverloadedStrings #-}
2
3module BBCode.Tokenizer
4 ( Token(..)
5 , tokenize
6 ) where
7
8import qualified Data.Text.Lazy as TL
9import qualified Data.Text as T
10
11import Control.Applicative
12import Data.Attoparsec.Text.Lazy
13
14import Data.Char (isSpace)
15import Data.Monoid (mconcat)
16
17data Token = Text String
18 | Whitespace String
19 | TagOpen String
20 | TagClose String
21 deriving (Show, Read, Eq)
22
23tokenize :: String -> Either String [Token]
24tokenize = eitherResult . parse (tokenize' <* endOfInput) . TL.pack
25
26tokenize' :: Parser [Token]
27tokenize' = many $ choice [ whitespace
28 , Text . T.unpack <$> ("\\" *> "[")
29 , tagClose
30 , tagOpen
31 , text
32 ]
33
34whitespace :: Parser Token
35whitespace = Whitespace <$> many1 space
36
37tagOpen :: Parser Token
38tagOpen = TagOpen . T.unpack <$> ("[" *> takeWhile1 (/= ']') <* "]")
39
40tagClose :: Parser Token
41tagClose = TagClose . T.unpack <$> ("[/" *> takeWhile1 (/= ']') <* "]")
42
43text :: Parser Token
44text = Text . T.unpack <$> takeWhile1 (\c -> not (isSpace c) && notInClass "[" c)