From 005dc408dc09c3b479398ebe3e92efa2cd54846e Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Sat, 17 Oct 2015 02:26:25 +0200 Subject: Working prototype --- bbcode/src/BBCode/Syntax.hs | 108 +++++++++++++++++++++++++++++++++++++++++ bbcode/src/BBCode/Tokenizer.hs | 44 +++++++++++++++++ 2 files changed, 152 insertions(+) create mode 100644 bbcode/src/BBCode/Syntax.hs create mode 100644 bbcode/src/BBCode/Tokenizer.hs (limited to 'bbcode/src/BBCode') diff --git a/bbcode/src/BBCode/Syntax.hs b/bbcode/src/BBCode/Syntax.hs new file mode 100644 index 0000000..a196e05 --- /dev/null +++ b/bbcode/src/BBCode/Syntax.hs @@ -0,0 +1,108 @@ +{-# LANGUAGE RecordWildCards #-} + +module BBCode.Syntax + ( treeify + , ContentForest + , ContentTree(..) + ) where + +import BBCode.Tokenizer (Token(..)) + +import Data.Foldable +import Data.Bifunctor +import Data.Monoid + +import Control.Monad.State +import Control.Monad.Trans +import Control.Monad + +import Data.CaseInsensitive ( CI ) +import qualified Data.CaseInsensitive as CI + +import Data.Function (on) + +type ContentForest = [ContentTree] +data ContentTree = Content String + | Tagged [ContentTree] String + | Empty + deriving (Show, Eq) + +data Step = Down [ContentTree] [ContentTree] String + deriving (Show) + +data Zipper = Zipper + { hole :: String + , prevs :: [ContentTree] + , steps :: [Step] + } + deriving (Show) + +type Parser = StateT Zipper (Either String) +abort :: String -> Parser a +abort = lift . Left + + +treeify :: [Token] -> Either String ContentForest +treeify tokenStream = second (postProcess . unZip) $ execStateT (mapM_ incorporate tokenStream) (Zipper "" [] []) + +postProcess :: ContentForest -> ContentForest +postProcess forest = do + tree <- forest + let + tree' = postProcess' tree + guard $ tree' /= Empty + return tree' + where + postProcess' :: ContentTree -> ContentTree + postProcess' (Content "") = Empty + postProcess' (Tagged [] _) = Empty + postProcess' (Tagged cs t) = Tagged [c' | c <- cs, let c' = postProcess' c, c' /= Empty ] t + postProcess' x = x + +unZip :: Zipper -> ContentForest +unZip Zipper{..} = reverse (apply hole steps : prevs) + +apply :: String -> [Step] -> ContentTree +apply hole steps = hole `apply'` (reverse steps) +apply' "" [] = Empty +apply' hole [] = Content hole +apply' hole (Down pres posts tag:steps) = Tagged (reverse pres ++ [hole `apply` steps] ++ posts) tag + +incorporate :: Token -> Parser () +incorporate (Text str) = append str +incorporate (Whitespace str) + | delimitsPar str = do + currSteps <- gets steps + if null currSteps then + modify (\Zipper{..} -> Zipper { hole = "", steps = [], prevs = (hole `apply` steps : prevs) }) + else + append str + | otherwise = append str + where + delimitsPar str = (sum . map (const (1 :: Integer)) . filter (== '\n') $ str) >= 1 +incorporate (TagOpen tagName) = modify $ (\z@(Zipper{..}) -> z { steps = (Down [] [] tagName : steps) }) +incorporate (TagClose tagName) = do + currSteps <- gets steps + case currSteps of + [] -> abort $ "Closing unopenend tag: " ++ tagName + (Down pres posts tagName':s) -> if ((==) `on` CI.mk) tagName tagName' then + goUp + else + abort $ "Mismatched tags: [" ++ tagName' ++ "]...[/" ++ tagName ++ "]" + +append :: String -> Parser () +append str = modify (\z@(Zipper{..}) -> z { hole = hole ++ str }) + +goUp :: Parser () +goUp = do + (Down pres posts tagName:s) <- gets steps + hole <- gets hole + let + steppedHole = Tagged (reverse pres ++ [Content hole] ++ posts) tagName + case s of + [] -> modify $ (\z@(Zipper{..}) -> Zipper { hole = "", prevs = steppedHole : prevs, steps = [] }) + (t:ts) -> do + let + (Down pres posts tagName) = t + t' = Down (steppedHole : pres) posts tagName + modify $ (\z -> z { hole = "", steps = (t':ts) }) diff --git a/bbcode/src/BBCode/Tokenizer.hs b/bbcode/src/BBCode/Tokenizer.hs new file mode 100644 index 0000000..c860c7c --- /dev/null +++ b/bbcode/src/BBCode/Tokenizer.hs @@ -0,0 +1,44 @@ +{-# LANGUAGE OverloadedStrings #-} + +module BBCode.Tokenizer + ( Token(..) + , tokenize + ) where + +import qualified Data.Text.Lazy as TL +import qualified Data.Text as T + +import Control.Applicative +import Data.Attoparsec.Text.Lazy + +import Data.Char (isSpace) +import Data.Monoid (mconcat) + +data Token = Text String + | Whitespace String + | TagOpen String + | TagClose String + deriving (Show, Read, Eq) + +tokenize :: String -> Either String [Token] +tokenize = eitherResult . parse (tokenize' <* endOfInput) . TL.pack + +tokenize' :: Parser [Token] +tokenize' = many $ choice [ whitespace + , Text . T.unpack <$> ("\\" *> "[") + , tagClose + , tagOpen + , text + ] + +whitespace :: Parser Token +whitespace = Whitespace <$> many1 space + +tagOpen :: Parser Token +tagOpen = TagOpen . T.unpack <$> ("[" *> takeWhile1 (/= ']') <* "]") + +tagClose :: Parser Token +tagClose = TagClose . T.unpack <$> ("[/" *> takeWhile1 (/= ']') <* "]") + +text :: Parser Token +text = Text . T.unpack <$> takeWhile1 (\c -> not (isSpace c) && notInClass "[" c) -- cgit v1.2.3