{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-} -- | A parser to transform 'Text' into a stream of 'BBToken's module Text.BBCode.Lexer ( BBToken(..) , token -- , escapedText -- , escapedText' ) where import Data.Attoparsec.Text import Data.Text (Text) import qualified Data.Text as T (singleton) import Data.Char (isSpace) import Control.Applicative import Prelude hiding (takeWhile) -- | Our lexicographical unit data BBToken = BBOpen Text [(Text, Text)] -- ^ Tag open with attributes | BBContained Text [(Text, Text)] -- ^ Tag open & immediate close with attributes | BBClose Text -- ^ Tag close | BBStr Text -- ^ Content of a tag deriving (Eq, Show) token :: Parser BBToken -- ^ Tokenizer token = BBClose <$> ("[/" *> escapedText' [']'] <* "]") <|> uncurry BBContained <$ "[" <*> openTag <* "/]" <|> uncurry BBOpen <$ "[" <*> openTag <* "]" <|> BBStr <$> escapedText ['['] openTag :: Parser (Text, [(Text, Text)]) openTag = (,) <$> escapedText' [']', ' ', '=', '/'] <*> attrs' attrs :: Parser [(Text, Text)] attrs = (:) <$> (namedAttr <|> plainValue) <* takeWhile isSpace <*> attrs' where namedAttr = (,) <$ takeWhile isSpace <*> escapedText ['=', ']', ' ', '/'] <*> option "" ("=" *> attrArg) plainValue = (,) <$> pure "" <* "=" <*> attrArg attrArg = "\"" *> escapedText ['"'] <* "\"" <|> escapedText [']', ' ', '/'] attrs' :: Parser [(Text, Text)] attrs' = option [] attrs escapedText :: [Char] -> Parser Text -- ^ @escapedText cs@ consumes 'Text' up to (not including) the first occurence of a character from @cs@ that is not escaped using @\\@ -- -- Always consumes at least one character -- -- @\\@ needs to be escaped (prefixed with @\\@) iff it precedes a character from @cs@ escapedText [] = takeText escapedText cs = recurse $ choice [ takeWhile1 (not . special) , escapeSeq , escapeChar' ] where escapeChar = '\\' special = inClass $ escapeChar : cs escapeChar' = string $ T.singleton escapeChar escapeSeq = escapeChar' >> (T.singleton <$> satisfy special) -- s/\\([:cs])/\1/ recurse p = mappend <$> p <*> escapedText' cs escapedText' :: [Char] -> Parser Text -- ^ @'option' "" $ 'escapedText' cs@ escapedText' cs = option "" $ escapedText cs