From 005dc408dc09c3b479398ebe3e92efa2cd54846e Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Sat, 17 Oct 2015 02:26:25 +0200 Subject: Working prototype --- bbcode/src/BBCode.hs | 96 ++++++++++++++++++++++++++++++++++++ bbcode/src/BBCode/Syntax.hs | 108 +++++++++++++++++++++++++++++++++++++++++ bbcode/src/BBCode/Tokenizer.hs | 44 +++++++++++++++++ 3 files changed, 248 insertions(+) create mode 100644 bbcode/src/BBCode.hs create mode 100644 bbcode/src/BBCode/Syntax.hs create mode 100644 bbcode/src/BBCode/Tokenizer.hs (limited to 'bbcode/src') diff --git a/bbcode/src/BBCode.hs b/bbcode/src/BBCode.hs new file mode 100644 index 0000000..750fb0f --- /dev/null +++ b/bbcode/src/BBCode.hs @@ -0,0 +1,96 @@ +{-# LANGUAGE OverloadedStrings, OverloadedLists, RankNTypes, ImpredicativeTypes #-} + +module BBCode + ( parse + ) where + +import Thermoprint + +import BBCode.Tokenizer +import BBCode.Syntax + +import Data.Maybe +import Data.Monoid +import Data.Either +import Data.Bifunctor +import Data.List +import Data.Ord +import Data.Foldable + +import Data.Function (on) + +import Data.CaseInsensitive ( CI ) +import qualified Data.CaseInsensitive as CI + +import Data.Map ( Map ) +import qualified Data.Map as Map + +import Prelude hiding (takeWhile) + +import Debug.Trace + +knownTags :: Map (CI String) (Either (Block String -> Block String) (Inline String -> Inline String)) +knownTags = [ ("center", Left Center) + , ("u", Right Underline) + ] + +isBlock, isInline :: String -> Bool +isBlock = testTag isLeft +isInline = testTag isRight +testTag f k = fromMaybe False (f <$> Map.lookup (CI.mk k) knownTags) + +data Decorated c = Decorated c [String] + deriving (Show, Eq) + +parse :: String -> Either String (Block String) +parse input = (remerge . blockify <$> (tokenize input >>= treeify)) >>= semantics + +blockify :: ContentForest -> [Decorated String] +blockify = map sortDeco . concat . map (blockify' []) + where + blockify' _ Empty = [] + blockify' initial (Content str) = [str `Decorated` initial] + blockify' initial (Tagged cs tag) = concat $ map (blockify' $ tag : initial) cs + sortDeco :: Decorated c -> Decorated c + sortDeco (Decorated c tags) = Decorated c $ (nubBy ((==) `on` CI.mk) . sortBy blockiness) tags + blockiness :: String -> String -> Ordering + blockiness a b = comparing isBlock a b <> comparing CI.mk a b -- sort also alphabetically for the benefit of remerge + +remerge :: [Decorated String] -> ContentForest +remerge [] = [] +remerge [(Decorated c deco)] = [appEndo applyDeco $ Content c] + where + applyDeco = foldMap (\t -> Endo (\c -> Tagged [c] t)) $ reverse deco +remerge xs = concat $ map toTree $ groupLasts xs + where + groupLasts = groupBy ((==) `on` (listToMaybe . reverse . (\(Decorated _ ds) -> ds))) + -- toTree :: [Decorated String] -> [ContentTree] + toTree [] = [] + toTree (x@(Decorated _ []):xs) = map (Content . (\(Decorated c []) -> c)) (x:xs) + toTree (x@(Decorated _ ds):xs) = [Tagged content tag] + where + tag = last ds + content = toTree $ map stripLast (x:xs) + stripLast (Decorated c ds) = Decorated c (init ds) + +unknownTag :: forall a. String -> Either String a +unknownTag tag = Left $ "Unknown tag: " ++ tag + +semantics :: ContentForest -> Either String (Block String) +semantics forest = Over <$> mapM semantics' forest + +semantics' :: ContentTree -> Either String (Block String) +semantics' Empty = Right $ Over [] +semantics' (Content str) = Right $ Paragraph (Cooked str) +semantics' (Tagged cs tag) = case Map.lookup (CI.mk tag) knownTags of + Nothing -> unknownTag tag + Just (Left f) -> Over . map f <$> mapM semantics' cs + Just (Right f) -> Paragraph . Beside . map f<$> mapM iSemantics cs + +iSemantics :: ContentTree -> Either String (Inline String) +iSemantics Empty = Right $ Beside [] +iSemantics (Content str) = Right $ Cooked str +iSemantics (Tagged cs tag) = case Map.lookup (CI.mk tag) knownTags of + Nothing -> unknownTag tag + Just (Left f) -> error "Known inline tag sorted within block" + Just (Right f) -> Beside . map f <$> mapM iSemantics cs diff --git a/bbcode/src/BBCode/Syntax.hs b/bbcode/src/BBCode/Syntax.hs new file mode 100644 index 0000000..a196e05 --- /dev/null +++ b/bbcode/src/BBCode/Syntax.hs @@ -0,0 +1,108 @@ +{-# LANGUAGE RecordWildCards #-} + +module BBCode.Syntax + ( treeify + , ContentForest + , ContentTree(..) + ) where + +import BBCode.Tokenizer (Token(..)) + +import Data.Foldable +import Data.Bifunctor +import Data.Monoid + +import Control.Monad.State +import Control.Monad.Trans +import Control.Monad + +import Data.CaseInsensitive ( CI ) +import qualified Data.CaseInsensitive as CI + +import Data.Function (on) + +type ContentForest = [ContentTree] +data ContentTree = Content String + | Tagged [ContentTree] String + | Empty + deriving (Show, Eq) + +data Step = Down [ContentTree] [ContentTree] String + deriving (Show) + +data Zipper = Zipper + { hole :: String + , prevs :: [ContentTree] + , steps :: [Step] + } + deriving (Show) + +type Parser = StateT Zipper (Either String) +abort :: String -> Parser a +abort = lift . Left + + +treeify :: [Token] -> Either String ContentForest +treeify tokenStream = second (postProcess . unZip) $ execStateT (mapM_ incorporate tokenStream) (Zipper "" [] []) + +postProcess :: ContentForest -> ContentForest +postProcess forest = do + tree <- forest + let + tree' = postProcess' tree + guard $ tree' /= Empty + return tree' + where + postProcess' :: ContentTree -> ContentTree + postProcess' (Content "") = Empty + postProcess' (Tagged [] _) = Empty + postProcess' (Tagged cs t) = Tagged [c' | c <- cs, let c' = postProcess' c, c' /= Empty ] t + postProcess' x = x + +unZip :: Zipper -> ContentForest +unZip Zipper{..} = reverse (apply hole steps : prevs) + +apply :: String -> [Step] -> ContentTree +apply hole steps = hole `apply'` (reverse steps) +apply' "" [] = Empty +apply' hole [] = Content hole +apply' hole (Down pres posts tag:steps) = Tagged (reverse pres ++ [hole `apply` steps] ++ posts) tag + +incorporate :: Token -> Parser () +incorporate (Text str) = append str +incorporate (Whitespace str) + | delimitsPar str = do + currSteps <- gets steps + if null currSteps then + modify (\Zipper{..} -> Zipper { hole = "", steps = [], prevs = (hole `apply` steps : prevs) }) + else + append str + | otherwise = append str + where + delimitsPar str = (sum . map (const (1 :: Integer)) . filter (== '\n') $ str) >= 1 +incorporate (TagOpen tagName) = modify $ (\z@(Zipper{..}) -> z { steps = (Down [] [] tagName : steps) }) +incorporate (TagClose tagName) = do + currSteps <- gets steps + case currSteps of + [] -> abort $ "Closing unopenend tag: " ++ tagName + (Down pres posts tagName':s) -> if ((==) `on` CI.mk) tagName tagName' then + goUp + else + abort $ "Mismatched tags: [" ++ tagName' ++ "]...[/" ++ tagName ++ "]" + +append :: String -> Parser () +append str = modify (\z@(Zipper{..}) -> z { hole = hole ++ str }) + +goUp :: Parser () +goUp = do + (Down pres posts tagName:s) <- gets steps + hole <- gets hole + let + steppedHole = Tagged (reverse pres ++ [Content hole] ++ posts) tagName + case s of + [] -> modify $ (\z@(Zipper{..}) -> Zipper { hole = "", prevs = steppedHole : prevs, steps = [] }) + (t:ts) -> do + let + (Down pres posts tagName) = t + t' = Down (steppedHole : pres) posts tagName + modify $ (\z -> z { hole = "", steps = (t':ts) }) diff --git a/bbcode/src/BBCode/Tokenizer.hs b/bbcode/src/BBCode/Tokenizer.hs new file mode 100644 index 0000000..c860c7c --- /dev/null +++ b/bbcode/src/BBCode/Tokenizer.hs @@ -0,0 +1,44 @@ +{-# LANGUAGE OverloadedStrings #-} + +module BBCode.Tokenizer + ( Token(..) + , tokenize + ) where + +import qualified Data.Text.Lazy as TL +import qualified Data.Text as T + +import Control.Applicative +import Data.Attoparsec.Text.Lazy + +import Data.Char (isSpace) +import Data.Monoid (mconcat) + +data Token = Text String + | Whitespace String + | TagOpen String + | TagClose String + deriving (Show, Read, Eq) + +tokenize :: String -> Either String [Token] +tokenize = eitherResult . parse (tokenize' <* endOfInput) . TL.pack + +tokenize' :: Parser [Token] +tokenize' = many $ choice [ whitespace + , Text . T.unpack <$> ("\\" *> "[") + , tagClose + , tagOpen + , text + ] + +whitespace :: Parser Token +whitespace = Whitespace <$> many1 space + +tagOpen :: Parser Token +tagOpen = TagOpen . T.unpack <$> ("[" *> takeWhile1 (/= ']') <* "]") + +tagClose :: Parser Token +tagClose = TagClose . T.unpack <$> ("[/" *> takeWhile1 (/= ']') <* "]") + +text :: Parser Token +text = Text . T.unpack <$> takeWhile1 (\c -> not (isSpace c) && notInClass "[" c) -- cgit v1.2.3