From 005dc408dc09c3b479398ebe3e92efa2cd54846e Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Sat, 17 Oct 2015 02:26:25 +0200 Subject: Working prototype --- bbcode/src/BBCode/Tokenizer.hs | 44 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 bbcode/src/BBCode/Tokenizer.hs (limited to 'bbcode/src/BBCode/Tokenizer.hs') diff --git a/bbcode/src/BBCode/Tokenizer.hs b/bbcode/src/BBCode/Tokenizer.hs new file mode 100644 index 0000000..c860c7c --- /dev/null +++ b/bbcode/src/BBCode/Tokenizer.hs @@ -0,0 +1,44 @@ +{-# LANGUAGE OverloadedStrings #-} + +module BBCode.Tokenizer + ( Token(..) + , tokenize + ) where + +import qualified Data.Text.Lazy as TL +import qualified Data.Text as T + +import Control.Applicative +import Data.Attoparsec.Text.Lazy + +import Data.Char (isSpace) +import Data.Monoid (mconcat) + +data Token = Text String + | Whitespace String + | TagOpen String + | TagClose String + deriving (Show, Read, Eq) + +tokenize :: String -> Either String [Token] +tokenize = eitherResult . parse (tokenize' <* endOfInput) . TL.pack + +tokenize' :: Parser [Token] +tokenize' = many $ choice [ whitespace + , Text . T.unpack <$> ("\\" *> "[") + , tagClose + , tagOpen + , text + ] + +whitespace :: Parser Token +whitespace = Whitespace <$> many1 space + +tagOpen :: Parser Token +tagOpen = TagOpen . T.unpack <$> ("[" *> takeWhile1 (/= ']') <* "]") + +tagClose :: Parser Token +tagClose = TagClose . T.unpack <$> ("[/" *> takeWhile1 (/= ']') <* "]") + +text :: Parser Token +text = Text . T.unpack <$> takeWhile1 (\c -> not (isSpace c) && notInClass "[" c) -- cgit v1.2.3