{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-} -- | A parser to transform 'Text' into a stream of 'BBToken's module Text.BBCode.Lexer ( BBToken(..) , token , escapedText , escapedText' ) where import Data.Attoparsec.Text import Data.Text (Text) import qualified Data.Text as T (singleton, head, last, tail, null) import Control.Applicative import Test.QuickCheck (Arbitrary(..), CoArbitrary, genericShrink) import Test.QuickCheck.Gen (oneof, suchThat) import Test.QuickCheck.Instances import GHC.Generics (Generic) -- | Our lexicographical unit data BBToken = BBOpen Text -- ^ Tag open | BBClose Text -- ^ Tag close | BBStr Text -- ^ Content of a tag deriving (Generic, Eq, Show, CoArbitrary) -- | This instance does not produce: -- -- * opening and closing tags whose 'Text' ends in @\\@ -- * empty 'BBStr's instance Arbitrary BBToken where shrink = genericShrink arbitrary = oneof [ BBOpen <$> tagText , BBClose <$> tagText , BBStr <$> nonEmpty ] where tagText = arbitrary `suchThat` (not . lastIsEscape) lastIsEscape t | T.null t = False | T.last t == '\\' = True | otherwise = False nonEmpty = (arbitrary `suchThat` (not . T.null)) token :: Parser BBToken -- ^ Tokenizer token = BBClose <$> (string "[/" *> escapedText' [']'] <* string "]") <|> BBOpen <$> (string "[" *> escapedText' [']'] <* string "]") <|> BBStr <$> escapedText ['['] escapedText :: [Char] -> Parser Text -- ^ @escapedText cs@ consumes 'Text' up to (not including) the first occurence of a character from @cs@ that is not escaped using @\\@ -- -- Always consumes at least one character -- -- @\\@ needs to be escaped (prefixed with @\\@) iff it precedes a character from @cs@ escapedText [] = takeText escapedText cs = recurse $ choice [ takeWhile1 (not . special) , escapeSeq , escapeChar' ] where escapeChar = '\\' special = inClass $ escapeChar : cs escapeChar' = string $ T.singleton escapeChar escapeSeq = escapeChar' >> (T.singleton <$> satisfy special) -- s/\\[:cs]/\1/ recurse p = mappend <$> p <*> escapedText' cs escapedText' :: [Char] -> Parser Text -- ^ @'option' "" $ 'escapedText' cs@ escapedText' cs = option "" $ escapedText cs