diff options
author | Gregor Kleen <gkleen@yggdrasil.li> | 2015-10-17 02:26:25 +0200 |
---|---|---|
committer | Gregor Kleen <gkleen@yggdrasil.li> | 2015-10-17 02:26:25 +0200 |
commit | 005dc408dc09c3b479398ebe3e92efa2cd54846e (patch) | |
tree | 23dcfe7a545885c9aa145f1ccae6d33206a87820 /bbcode/src | |
parent | 2dcbb4482de2c352b76372b389fda20c63075295 (diff) | |
download | thermoprint-005dc408dc09c3b479398ebe3e92efa2cd54846e.tar thermoprint-005dc408dc09c3b479398ebe3e92efa2cd54846e.tar.gz thermoprint-005dc408dc09c3b479398ebe3e92efa2cd54846e.tar.bz2 thermoprint-005dc408dc09c3b479398ebe3e92efa2cd54846e.tar.xz thermoprint-005dc408dc09c3b479398ebe3e92efa2cd54846e.zip |
Working prototype
Diffstat (limited to 'bbcode/src')
-rw-r--r-- | bbcode/src/BBCode.hs | 96 | ||||
-rw-r--r-- | bbcode/src/BBCode/Syntax.hs | 108 | ||||
-rw-r--r-- | bbcode/src/BBCode/Tokenizer.hs | 44 |
3 files changed, 248 insertions, 0 deletions
diff --git a/bbcode/src/BBCode.hs b/bbcode/src/BBCode.hs new file mode 100644 index 0000000..750fb0f --- /dev/null +++ b/bbcode/src/BBCode.hs | |||
@@ -0,0 +1,96 @@ | |||
1 | {-# LANGUAGE OverloadedStrings, OverloadedLists, RankNTypes, ImpredicativeTypes #-} | ||
2 | |||
3 | module BBCode | ||
4 | ( parse | ||
5 | ) where | ||
6 | |||
7 | import Thermoprint | ||
8 | |||
9 | import BBCode.Tokenizer | ||
10 | import BBCode.Syntax | ||
11 | |||
12 | import Data.Maybe | ||
13 | import Data.Monoid | ||
14 | import Data.Either | ||
15 | import Data.Bifunctor | ||
16 | import Data.List | ||
17 | import Data.Ord | ||
18 | import Data.Foldable | ||
19 | |||
20 | import Data.Function (on) | ||
21 | |||
22 | import Data.CaseInsensitive ( CI ) | ||
23 | import qualified Data.CaseInsensitive as CI | ||
24 | |||
25 | import Data.Map ( Map ) | ||
26 | import qualified Data.Map as Map | ||
27 | |||
28 | import Prelude hiding (takeWhile) | ||
29 | |||
30 | import Debug.Trace | ||
31 | |||
32 | knownTags :: Map (CI String) (Either (Block String -> Block String) (Inline String -> Inline String)) | ||
33 | knownTags = [ ("center", Left Center) | ||
34 | , ("u", Right Underline) | ||
35 | ] | ||
36 | |||
37 | isBlock, isInline :: String -> Bool | ||
38 | isBlock = testTag isLeft | ||
39 | isInline = testTag isRight | ||
40 | testTag f k = fromMaybe False (f <$> Map.lookup (CI.mk k) knownTags) | ||
41 | |||
42 | data Decorated c = Decorated c [String] | ||
43 | deriving (Show, Eq) | ||
44 | |||
45 | parse :: String -> Either String (Block String) | ||
46 | parse input = (remerge . blockify <$> (tokenize input >>= treeify)) >>= semantics | ||
47 | |||
48 | blockify :: ContentForest -> [Decorated String] | ||
49 | blockify = map sortDeco . concat . map (blockify' []) | ||
50 | where | ||
51 | blockify' _ Empty = [] | ||
52 | blockify' initial (Content str) = [str `Decorated` initial] | ||
53 | blockify' initial (Tagged cs tag) = concat $ map (blockify' $ tag : initial) cs | ||
54 | sortDeco :: Decorated c -> Decorated c | ||
55 | sortDeco (Decorated c tags) = Decorated c $ (nubBy ((==) `on` CI.mk) . sortBy blockiness) tags | ||
56 | blockiness :: String -> String -> Ordering | ||
57 | blockiness a b = comparing isBlock a b <> comparing CI.mk a b -- sort also alphabetically for the benefit of remerge | ||
58 | |||
59 | remerge :: [Decorated String] -> ContentForest | ||
60 | remerge [] = [] | ||
61 | remerge [(Decorated c deco)] = [appEndo applyDeco $ Content c] | ||
62 | where | ||
63 | applyDeco = foldMap (\t -> Endo (\c -> Tagged [c] t)) $ reverse deco | ||
64 | remerge xs = concat $ map toTree $ groupLasts xs | ||
65 | where | ||
66 | groupLasts = groupBy ((==) `on` (listToMaybe . reverse . (\(Decorated _ ds) -> ds))) | ||
67 | -- toTree :: [Decorated String] -> [ContentTree] | ||
68 | toTree [] = [] | ||
69 | toTree (x@(Decorated _ []):xs) = map (Content . (\(Decorated c []) -> c)) (x:xs) | ||
70 | toTree (x@(Decorated _ ds):xs) = [Tagged content tag] | ||
71 | where | ||
72 | tag = last ds | ||
73 | content = toTree $ map stripLast (x:xs) | ||
74 | stripLast (Decorated c ds) = Decorated c (init ds) | ||
75 | |||
76 | unknownTag :: forall a. String -> Either String a | ||
77 | unknownTag tag = Left $ "Unknown tag: " ++ tag | ||
78 | |||
79 | semantics :: ContentForest -> Either String (Block String) | ||
80 | semantics forest = Over <$> mapM semantics' forest | ||
81 | |||
82 | semantics' :: ContentTree -> Either String (Block String) | ||
83 | semantics' Empty = Right $ Over [] | ||
84 | semantics' (Content str) = Right $ Paragraph (Cooked str) | ||
85 | semantics' (Tagged cs tag) = case Map.lookup (CI.mk tag) knownTags of | ||
86 | Nothing -> unknownTag tag | ||
87 | Just (Left f) -> Over . map f <$> mapM semantics' cs | ||
88 | Just (Right f) -> Paragraph . Beside . map f<$> mapM iSemantics cs | ||
89 | |||
90 | iSemantics :: ContentTree -> Either String (Inline String) | ||
91 | iSemantics Empty = Right $ Beside [] | ||
92 | iSemantics (Content str) = Right $ Cooked str | ||
93 | iSemantics (Tagged cs tag) = case Map.lookup (CI.mk tag) knownTags of | ||
94 | Nothing -> unknownTag tag | ||
95 | Just (Left f) -> error "Known inline tag sorted within block" | ||
96 | Just (Right f) -> Beside . map f <$> mapM iSemantics cs | ||
diff --git a/bbcode/src/BBCode/Syntax.hs b/bbcode/src/BBCode/Syntax.hs new file mode 100644 index 0000000..a196e05 --- /dev/null +++ b/bbcode/src/BBCode/Syntax.hs | |||
@@ -0,0 +1,108 @@ | |||
1 | {-# LANGUAGE RecordWildCards #-} | ||
2 | |||
3 | module BBCode.Syntax | ||
4 | ( treeify | ||
5 | , ContentForest | ||
6 | , ContentTree(..) | ||
7 | ) where | ||
8 | |||
9 | import BBCode.Tokenizer (Token(..)) | ||
10 | |||
11 | import Data.Foldable | ||
12 | import Data.Bifunctor | ||
13 | import Data.Monoid | ||
14 | |||
15 | import Control.Monad.State | ||
16 | import Control.Monad.Trans | ||
17 | import Control.Monad | ||
18 | |||
19 | import Data.CaseInsensitive ( CI ) | ||
20 | import qualified Data.CaseInsensitive as CI | ||
21 | |||
22 | import Data.Function (on) | ||
23 | |||
24 | type ContentForest = [ContentTree] | ||
25 | data ContentTree = Content String | ||
26 | | Tagged [ContentTree] String | ||
27 | | Empty | ||
28 | deriving (Show, Eq) | ||
29 | |||
30 | data Step = Down [ContentTree] [ContentTree] String | ||
31 | deriving (Show) | ||
32 | |||
33 | data Zipper = Zipper | ||
34 | { hole :: String | ||
35 | , prevs :: [ContentTree] | ||
36 | , steps :: [Step] | ||
37 | } | ||
38 | deriving (Show) | ||
39 | |||
40 | type Parser = StateT Zipper (Either String) | ||
41 | abort :: String -> Parser a | ||
42 | abort = lift . Left | ||
43 | |||
44 | |||
45 | treeify :: [Token] -> Either String ContentForest | ||
46 | treeify tokenStream = second (postProcess . unZip) $ execStateT (mapM_ incorporate tokenStream) (Zipper "" [] []) | ||
47 | |||
48 | postProcess :: ContentForest -> ContentForest | ||
49 | postProcess forest = do | ||
50 | tree <- forest | ||
51 | let | ||
52 | tree' = postProcess' tree | ||
53 | guard $ tree' /= Empty | ||
54 | return tree' | ||
55 | where | ||
56 | postProcess' :: ContentTree -> ContentTree | ||
57 | postProcess' (Content "") = Empty | ||
58 | postProcess' (Tagged [] _) = Empty | ||
59 | postProcess' (Tagged cs t) = Tagged [c' | c <- cs, let c' = postProcess' c, c' /= Empty ] t | ||
60 | postProcess' x = x | ||
61 | |||
62 | unZip :: Zipper -> ContentForest | ||
63 | unZip Zipper{..} = reverse (apply hole steps : prevs) | ||
64 | |||
65 | apply :: String -> [Step] -> ContentTree | ||
66 | apply hole steps = hole `apply'` (reverse steps) | ||
67 | apply' "" [] = Empty | ||
68 | apply' hole [] = Content hole | ||
69 | apply' hole (Down pres posts tag:steps) = Tagged (reverse pres ++ [hole `apply` steps] ++ posts) tag | ||
70 | |||
71 | incorporate :: Token -> Parser () | ||
72 | incorporate (Text str) = append str | ||
73 | incorporate (Whitespace str) | ||
74 | | delimitsPar str = do | ||
75 | currSteps <- gets steps | ||
76 | if null currSteps then | ||
77 | modify (\Zipper{..} -> Zipper { hole = "", steps = [], prevs = (hole `apply` steps : prevs) }) | ||
78 | else | ||
79 | append str | ||
80 | | otherwise = append str | ||
81 | where | ||
82 | delimitsPar str = (sum . map (const (1 :: Integer)) . filter (== '\n') $ str) >= 1 | ||
83 | incorporate (TagOpen tagName) = modify $ (\z@(Zipper{..}) -> z { steps = (Down [] [] tagName : steps) }) | ||
84 | incorporate (TagClose tagName) = do | ||
85 | currSteps <- gets steps | ||
86 | case currSteps of | ||
87 | [] -> abort $ "Closing unopenend tag: " ++ tagName | ||
88 | (Down pres posts tagName':s) -> if ((==) `on` CI.mk) tagName tagName' then | ||
89 | goUp | ||
90 | else | ||
91 | abort $ "Mismatched tags: [" ++ tagName' ++ "]...[/" ++ tagName ++ "]" | ||
92 | |||
93 | append :: String -> Parser () | ||
94 | append str = modify (\z@(Zipper{..}) -> z { hole = hole ++ str }) | ||
95 | |||
96 | goUp :: Parser () | ||
97 | goUp = do | ||
98 | (Down pres posts tagName:s) <- gets steps | ||
99 | hole <- gets hole | ||
100 | let | ||
101 | steppedHole = Tagged (reverse pres ++ [Content hole] ++ posts) tagName | ||
102 | case s of | ||
103 | [] -> modify $ (\z@(Zipper{..}) -> Zipper { hole = "", prevs = steppedHole : prevs, steps = [] }) | ||
104 | (t:ts) -> do | ||
105 | let | ||
106 | (Down pres posts tagName) = t | ||
107 | t' = Down (steppedHole : pres) posts tagName | ||
108 | modify $ (\z -> z { hole = "", steps = (t':ts) }) | ||
diff --git a/bbcode/src/BBCode/Tokenizer.hs b/bbcode/src/BBCode/Tokenizer.hs new file mode 100644 index 0000000..c860c7c --- /dev/null +++ b/bbcode/src/BBCode/Tokenizer.hs | |||
@@ -0,0 +1,44 @@ | |||
1 | {-# LANGUAGE OverloadedStrings #-} | ||
2 | |||
3 | module BBCode.Tokenizer | ||
4 | ( Token(..) | ||
5 | , tokenize | ||
6 | ) where | ||
7 | |||
8 | import qualified Data.Text.Lazy as TL | ||
9 | import qualified Data.Text as T | ||
10 | |||
11 | import Control.Applicative | ||
12 | import Data.Attoparsec.Text.Lazy | ||
13 | |||
14 | import Data.Char (isSpace) | ||
15 | import Data.Monoid (mconcat) | ||
16 | |||
17 | data Token = Text String | ||
18 | | Whitespace String | ||
19 | | TagOpen String | ||
20 | | TagClose String | ||
21 | deriving (Show, Read, Eq) | ||
22 | |||
23 | tokenize :: String -> Either String [Token] | ||
24 | tokenize = eitherResult . parse (tokenize' <* endOfInput) . TL.pack | ||
25 | |||
26 | tokenize' :: Parser [Token] | ||
27 | tokenize' = many $ choice [ whitespace | ||
28 | , Text . T.unpack <$> ("\\" *> "[") | ||
29 | , tagClose | ||
30 | , tagOpen | ||
31 | , text | ||
32 | ] | ||
33 | |||
34 | whitespace :: Parser Token | ||
35 | whitespace = Whitespace <$> many1 space | ||
36 | |||
37 | tagOpen :: Parser Token | ||
38 | tagOpen = TagOpen . T.unpack <$> ("[" *> takeWhile1 (/= ']') <* "]") | ||
39 | |||
40 | tagClose :: Parser Token | ||
41 | tagClose = TagClose . T.unpack <$> ("[/" *> takeWhile1 (/= ']') <* "]") | ||
42 | |||
43 | text :: Parser Token | ||
44 | text = Text . T.unpack <$> takeWhile1 (\c -> not (isSpace c) && notInClass "[" c) | ||