{-# LANGUAGE OverloadedStrings #-} module BBCode.Tokenizer ( Token(..) , tokenize ) where import qualified Data.Text.Lazy as TL import qualified Data.Text as T import Control.Applicative import Data.Attoparsec.Text.Lazy import Data.Char (isSpace) import Data.Monoid (mconcat) data Token = Text String | Whitespace String | TagOpen String | TagClose String deriving (Show, Read, Eq) tokenize :: String -> Either String [Token] tokenize = eitherResult . parse (tokenize' <* endOfInput) . TL.pack tokenize' :: Parser [Token] tokenize' = many $ choice [ whitespace , Text . T.unpack <$> ("\\" *> "[") , tagClose , tagOpen , text ] whitespace :: Parser Token whitespace = Whitespace <$> many1 space tagOpen :: Parser Token tagOpen = TagOpen . T.unpack <$> ("[" *> takeWhile1 (/= ']') <* "]") tagClose :: Parser Token tagClose = TagClose . T.unpack <$> ("[/" *> takeWhile1 (/= ']') <* "]") text :: Parser Token text = Text . T.unpack <$> takeWhile1 (\c -> not (isSpace c) && notInClass "[" c)