diff options
| author | Gregor Kleen <gkleen@yggdrasil.li> | 2016-01-12 00:02:10 +0000 |
|---|---|---|
| committer | Gregor Kleen <gkleen@yggdrasil.li> | 2016-01-12 00:02:10 +0000 |
| commit | c9554b25d4ad99ceec1bef7bd60b1df82ef5ce8a (patch) | |
| tree | 2701c86c9d75b2cbfae81b92c07949fc1c86fc07 /bbcode | |
| parent | 650feae1e8c267981f224e1de31ff4729a526afd (diff) | |
| download | thermoprint-c9554b25d4ad99ceec1bef7bd60b1df82ef5ce8a.tar thermoprint-c9554b25d4ad99ceec1bef7bd60b1df82ef5ce8a.tar.gz thermoprint-c9554b25d4ad99ceec1bef7bd60b1df82ef5ce8a.tar.bz2 thermoprint-c9554b25d4ad99ceec1bef7bd60b1df82ef5ce8a.tar.xz thermoprint-c9554b25d4ad99ceec1bef7bd60b1df82ef5ce8a.zip | |
BBCode lexer
Diffstat (limited to 'bbcode')
| -rw-r--r-- | bbcode/src/Text/BBCode.hs | 10 | ||||
| -rw-r--r-- | bbcode/src/Text/BBCode/Lexer.hs | 74 | ||||
| -rw-r--r-- | bbcode/test/Spec.hs | 1 | ||||
| -rw-r--r-- | bbcode/test/Text/BBCode/LexerSpec.hs | 68 | ||||
| -rw-r--r-- | bbcode/thermoprint-bbcode.cabal | 21 | ||||
| -rw-r--r-- | bbcode/thermoprint-bbcode.nix | 15 |
6 files changed, 187 insertions, 2 deletions
diff --git a/bbcode/src/Text/BBCode.hs b/bbcode/src/Text/BBCode.hs index 455decb..7a328a8 100644 --- a/bbcode/src/Text/BBCode.hs +++ b/bbcode/src/Text/BBCode.hs | |||
| @@ -1,4 +1,14 @@ | |||
| 1 | {-# LANGUAGE OverloadedStrings #-} | ||
| 2 | |||
| 1 | module Text.BBCode | 3 | module Text.BBCode |
| 2 | ( | 4 | ( |
| 3 | ) where | 5 | ) where |
| 4 | 6 | ||
| 7 | import Data.Attoparsec.Text | ||
| 8 | |||
| 9 | import Data.Text (Text) | ||
| 10 | import qualified Data.Text as T (singleton, head, tail) | ||
| 11 | |||
| 12 | import Control.Applicative | ||
| 13 | |||
| 14 | import Text.BBCode.Lexer (BBToken(..), token) | ||
diff --git a/bbcode/src/Text/BBCode/Lexer.hs b/bbcode/src/Text/BBCode/Lexer.hs new file mode 100644 index 0000000..d2aa2bc --- /dev/null +++ b/bbcode/src/Text/BBCode/Lexer.hs | |||
| @@ -0,0 +1,74 @@ | |||
| 1 | {-# LANGUAGE OverloadedStrings #-} | ||
| 2 | {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-} | ||
| 3 | |||
| 4 | -- | A parser to transform 'Text' into a stream of 'BBToken's | ||
| 5 | module Text.BBCode.Lexer | ||
| 6 | ( BBToken(..) | ||
| 7 | , token | ||
| 8 | , escapedText | ||
| 9 | , escapedText' | ||
| 10 | ) where | ||
| 11 | |||
| 12 | import Data.Attoparsec.Text | ||
| 13 | |||
| 14 | import Data.Text (Text) | ||
| 15 | import qualified Data.Text as T (singleton, head, last, tail, null) | ||
| 16 | |||
| 17 | import Control.Applicative | ||
| 18 | |||
| 19 | import Test.QuickCheck (Arbitrary(..), CoArbitrary, genericShrink) | ||
| 20 | import Test.QuickCheck.Gen (oneof, suchThat) | ||
| 21 | import Test.QuickCheck.Instances | ||
| 22 | import GHC.Generics (Generic) | ||
| 23 | |||
| 24 | -- | Our lexicographical unit | ||
| 25 | data BBToken = BBOpen Text -- ^ Tag open | ||
| 26 | | BBClose Text -- ^ Tag close | ||
| 27 | | BBStr Text -- ^ Content of a tag | ||
| 28 | deriving (Generic, Eq, Show, CoArbitrary) | ||
| 29 | |||
| 30 | -- | This instance does not produce: | ||
| 31 | -- | ||
| 32 | -- * opening and closing tags whose 'Text' ends in @\\@ | ||
| 33 | -- * empty 'BBStr's | ||
| 34 | instance Arbitrary BBToken where | ||
| 35 | shrink = genericShrink | ||
| 36 | arbitrary = oneof [ BBOpen <$> tagText | ||
| 37 | , BBClose <$> tagText | ||
| 38 | , BBStr <$> nonEmpty | ||
| 39 | ] | ||
| 40 | where | ||
| 41 | tagText = arbitrary `suchThat` (not . lastIsEscape) | ||
| 42 | lastIsEscape t | ||
| 43 | | T.null t = False | ||
| 44 | | T.last t == '\\' = True | ||
| 45 | | otherwise = False | ||
| 46 | nonEmpty = (arbitrary `suchThat` (not . T.null)) | ||
| 47 | |||
| 48 | token :: Parser BBToken | ||
| 49 | -- ^ Tokenizer | ||
| 50 | token = BBClose <$> (string "[/" *> escapedText' [']'] <* string "]") | ||
| 51 | <|> BBOpen <$> (string "[" *> escapedText' [']'] <* string "]") | ||
| 52 | <|> BBStr <$> escapedText ['['] | ||
| 53 | |||
| 54 | escapedText :: [Char] -> Parser Text | ||
| 55 | -- ^ @escapedText cs@ consumes 'Text' up to (not including) the first occurence of a character from @cs@ that is not escaped using @\\@ | ||
| 56 | -- | ||
| 57 | -- Always consumes at least one character | ||
| 58 | -- | ||
| 59 | -- @\\@ needs to be escaped (prefixed with @\\@) iff it precedes a character from @cs@ | ||
| 60 | escapedText [] = takeText | ||
| 61 | escapedText cs = recurse $ choice [ takeWhile1 (not . special) | ||
| 62 | , escapeSeq | ||
| 63 | , escapeChar' | ||
| 64 | ] | ||
| 65 | where | ||
| 66 | escapeChar = '\\' | ||
| 67 | special = inClass $ escapeChar : cs | ||
| 68 | escapeChar' = string $ T.singleton escapeChar | ||
| 69 | escapeSeq = escapeChar' >> (T.singleton <$> satisfy special) -- s/\\[:cs]/\1/ | ||
| 70 | recurse p = mappend <$> p <*> escapedText' cs | ||
| 71 | |||
| 72 | escapedText' :: [Char] -> Parser Text | ||
| 73 | -- ^ @'option' "" $ 'escapedText' cs@ | ||
| 74 | escapedText' cs = option "" $ escapedText cs | ||
diff --git a/bbcode/test/Spec.hs b/bbcode/test/Spec.hs new file mode 100644 index 0000000..a824f8c --- /dev/null +++ b/bbcode/test/Spec.hs | |||
| @@ -0,0 +1 @@ | |||
| {-# OPTIONS_GHC -F -pgmF hspec-discover #-} | |||
diff --git a/bbcode/test/Text/BBCode/LexerSpec.hs b/bbcode/test/Text/BBCode/LexerSpec.hs new file mode 100644 index 0000000..8b95874 --- /dev/null +++ b/bbcode/test/Text/BBCode/LexerSpec.hs | |||
| @@ -0,0 +1,68 @@ | |||
| 1 | {-# LANGUAGE OverloadedStrings #-} | ||
| 2 | {-# LANGUAGE FlexibleInstances #-} | ||
| 3 | |||
| 4 | module Text.BBCode.LexerSpec (spec) where | ||
| 5 | |||
| 6 | import Test.Hspec | ||
| 7 | import Test.Hspec.QuickCheck (prop) | ||
| 8 | import Test.QuickCheck (Property, Discard(..), property) | ||
| 9 | import Text.BBCode.Lexer | ||
| 10 | |||
| 11 | import Data.Text (Text) | ||
| 12 | import qualified Data.Text as T (singleton, replace, last, null) | ||
| 13 | |||
| 14 | import Data.Monoid ((<>), mconcat, Endo(..)) | ||
| 15 | |||
| 16 | import Data.Attoparsec.Text (parseOnly, endOfInput) | ||
| 17 | |||
| 18 | import Control.Applicative | ||
| 19 | import Control.Monad (zipWithM_) | ||
| 20 | |||
| 21 | coToken :: BBToken -> Text | ||
| 22 | -- ^ Inverse of `token` | ||
| 23 | coToken (BBOpen t) = "[" <> escape [']'] t <> "]" | ||
| 24 | coToken (BBClose t) = "[/" <> escape [']'] t <> "]" | ||
| 25 | coToken (BBStr t) = escape ['['] t | ||
| 26 | |||
| 27 | escape :: [Char] -> Text -> Text | ||
| 28 | -- ^ Inverse of `escapedText` | ||
| 29 | escape xs = endos [ T.replace c' ("\\" <> c') . T.replace ("\\" <> c') ("\\\\" <> c') | c <- xs, let c' = T.singleton c ] . escapeLast | ||
| 30 | where | ||
| 31 | endos = appEndo . mconcat . map Endo | ||
| 32 | escapeLast t | ||
| 33 | | T.null t = "" | ||
| 34 | | T.last t == '\\' = t <> "\\" | ||
| 35 | | otherwise = t | ||
| 36 | |||
| 37 | tokenize :: Text -> Either String [BBToken] | ||
| 38 | -- ^ Run 'many token' against input | ||
| 39 | tokenize = parseOnly (many token <* endOfInput) | ||
| 40 | |||
| 41 | spec :: Spec | ||
| 42 | spec = do | ||
| 43 | prop "prop_token" prop_token | ||
| 44 | zipWithM_ example [1..] examples | ||
| 45 | where | ||
| 46 | example n (s, ts) = let str = "Example " <> show n | ||
| 47 | in specify str $ (tokenize s == Right ts) | ||
| 48 | |||
| 49 | prop_token :: Text -> Property | ||
| 50 | -- ^ prop> (mconcat . map coToken) <$> tokenize x == Right x | ||
| 51 | -- | ||
| 52 | -- Where 'x' is restricted such that `tokenize` succeeds | ||
| 53 | prop_token x = discardLeft (((== x) . mconcat . map coToken) <$> tokenize x) | ||
| 54 | where | ||
| 55 | discardLeft = either (const $ property Discard) property | ||
| 56 | |||
| 57 | examples :: [(Text, [BBToken])] | ||
| 58 | examples = [ ("[t]test[/t]" | ||
| 59 | , [BBOpen "t", BBStr "test", BBClose "t"]) | ||
| 60 | , ("[t]te\\st[/t]" | ||
| 61 | , [BBOpen "t", BBStr "te\\st", BBClose "t"]) | ||
| 62 | , ("[t]te\\[st[/t]" | ||
| 63 | , [BBOpen "t", BBStr "te[st", BBClose "t"]) | ||
| 64 | , ("[\\t]test[/t]" | ||
| 65 | , [BBOpen "\\t", BBStr "test", BBClose "t"]) | ||
| 66 | , ("[t]test[/t\\]]" | ||
| 67 | , [BBOpen "t", BBStr "test", BBClose "t]"]) | ||
| 68 | ] | ||
diff --git a/bbcode/thermoprint-bbcode.cabal b/bbcode/thermoprint-bbcode.cabal index 537cfe7..ea00969 100644 --- a/bbcode/thermoprint-bbcode.cabal +++ b/bbcode/thermoprint-bbcode.cabal | |||
| @@ -17,9 +17,26 @@ build-type: Simple | |||
| 17 | cabal-version: >=1.10 | 17 | cabal-version: >=1.10 |
| 18 | 18 | ||
| 19 | library | 19 | library |
| 20 | -- exposed-modules: | 20 | exposed-modules: Text.BBCode |
| 21 | , Text.BBCode.Lexer | ||
| 21 | -- other-modules: | 22 | -- other-modules: |
| 22 | -- other-extensions: | 23 | -- other-extensions: |
| 23 | build-depends: base >=4.8 && <4.9 | 24 | build-depends: base >=4.8 && <4.9 |
| 25 | , attoparsec >=0.13.0 && <1 | ||
| 26 | , text >=1.2.1 && <2 | ||
| 27 | , QuickCheck >=2.8.1 && <3 | ||
| 28 | , quickcheck-instances >=0.3.11 && <1 | ||
| 24 | hs-source-dirs: src | 29 | hs-source-dirs: src |
| 25 | default-language: Haskell2010 \ No newline at end of file | 30 | default-language: Haskell2010 |
| 31 | |||
| 32 | Test-Suite tests | ||
| 33 | type: exitcode-stdio-1.0 | ||
| 34 | hs-source-dirs: test | ||
| 35 | main-is: Spec.hs | ||
| 36 | ghc-options: -threaded -with-rtsopts=-N | ||
| 37 | build-depends: base >=4.8.1 && <5 | ||
| 38 | , thermoprint-bbcode -any | ||
| 39 | , hspec >=2.2.1 && <3 | ||
| 40 | , QuickCheck >=2.8.1 && <3 | ||
| 41 | , attoparsec >=0.13.0 && <1 | ||
| 42 | , text >=1.2.1 && <2 \ No newline at end of file | ||
diff --git a/bbcode/thermoprint-bbcode.nix b/bbcode/thermoprint-bbcode.nix new file mode 100644 index 0000000..5521b10 --- /dev/null +++ b/bbcode/thermoprint-bbcode.nix | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | { mkDerivation, attoparsec, base, hspec, QuickCheck | ||
| 2 | , quickcheck-instances, stdenv, text | ||
| 3 | }: | ||
| 4 | mkDerivation { | ||
| 5 | pname = "thermoprint-bbcode"; | ||
| 6 | version = "0.0.0"; | ||
| 7 | src = ./.; | ||
| 8 | libraryHaskellDepends = [ | ||
| 9 | attoparsec base QuickCheck quickcheck-instances text | ||
| 10 | ]; | ||
| 11 | testHaskellDepends = [ attoparsec base hspec QuickCheck text ]; | ||
| 12 | homepage = "http://dirty-haskell.org/tags/thermoprint.html"; | ||
| 13 | description = "A parser for a subset of bbcode compatible with thermoprint-spec"; | ||
| 14 | license = stdenv.lib.licenses.publicDomain; | ||
| 15 | } | ||
