diff options
author | Gregor Kleen <gkleen@yggdrasil.li> | 2016-01-12 00:02:10 +0000 |
---|---|---|
committer | Gregor Kleen <gkleen@yggdrasil.li> | 2016-01-12 00:02:10 +0000 |
commit | c9554b25d4ad99ceec1bef7bd60b1df82ef5ce8a (patch) | |
tree | 2701c86c9d75b2cbfae81b92c07949fc1c86fc07 /bbcode | |
parent | 650feae1e8c267981f224e1de31ff4729a526afd (diff) | |
download | thermoprint-c9554b25d4ad99ceec1bef7bd60b1df82ef5ce8a.tar thermoprint-c9554b25d4ad99ceec1bef7bd60b1df82ef5ce8a.tar.gz thermoprint-c9554b25d4ad99ceec1bef7bd60b1df82ef5ce8a.tar.bz2 thermoprint-c9554b25d4ad99ceec1bef7bd60b1df82ef5ce8a.tar.xz thermoprint-c9554b25d4ad99ceec1bef7bd60b1df82ef5ce8a.zip |
BBCode lexer
Diffstat (limited to 'bbcode')
-rw-r--r-- | bbcode/src/Text/BBCode.hs | 10 | ||||
-rw-r--r-- | bbcode/src/Text/BBCode/Lexer.hs | 74 | ||||
-rw-r--r-- | bbcode/test/Spec.hs | 1 | ||||
-rw-r--r-- | bbcode/test/Text/BBCode/LexerSpec.hs | 68 | ||||
-rw-r--r-- | bbcode/thermoprint-bbcode.cabal | 21 | ||||
-rw-r--r-- | bbcode/thermoprint-bbcode.nix | 15 |
6 files changed, 187 insertions, 2 deletions
diff --git a/bbcode/src/Text/BBCode.hs b/bbcode/src/Text/BBCode.hs index 455decb..7a328a8 100644 --- a/bbcode/src/Text/BBCode.hs +++ b/bbcode/src/Text/BBCode.hs | |||
@@ -1,4 +1,14 @@ | |||
1 | {-# LANGUAGE OverloadedStrings #-} | ||
2 | |||
1 | module Text.BBCode | 3 | module Text.BBCode |
2 | ( | 4 | ( |
3 | ) where | 5 | ) where |
4 | 6 | ||
7 | import Data.Attoparsec.Text | ||
8 | |||
9 | import Data.Text (Text) | ||
10 | import qualified Data.Text as T (singleton, head, tail) | ||
11 | |||
12 | import Control.Applicative | ||
13 | |||
14 | import Text.BBCode.Lexer (BBToken(..), token) | ||
diff --git a/bbcode/src/Text/BBCode/Lexer.hs b/bbcode/src/Text/BBCode/Lexer.hs new file mode 100644 index 0000000..d2aa2bc --- /dev/null +++ b/bbcode/src/Text/BBCode/Lexer.hs | |||
@@ -0,0 +1,74 @@ | |||
1 | {-# LANGUAGE OverloadedStrings #-} | ||
2 | {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-} | ||
3 | |||
4 | -- | A parser to transform 'Text' into a stream of 'BBToken's | ||
5 | module Text.BBCode.Lexer | ||
6 | ( BBToken(..) | ||
7 | , token | ||
8 | , escapedText | ||
9 | , escapedText' | ||
10 | ) where | ||
11 | |||
12 | import Data.Attoparsec.Text | ||
13 | |||
14 | import Data.Text (Text) | ||
15 | import qualified Data.Text as T (singleton, head, last, tail, null) | ||
16 | |||
17 | import Control.Applicative | ||
18 | |||
19 | import Test.QuickCheck (Arbitrary(..), CoArbitrary, genericShrink) | ||
20 | import Test.QuickCheck.Gen (oneof, suchThat) | ||
21 | import Test.QuickCheck.Instances | ||
22 | import GHC.Generics (Generic) | ||
23 | |||
24 | -- | Our lexicographical unit | ||
25 | data BBToken = BBOpen Text -- ^ Tag open | ||
26 | | BBClose Text -- ^ Tag close | ||
27 | | BBStr Text -- ^ Content of a tag | ||
28 | deriving (Generic, Eq, Show, CoArbitrary) | ||
29 | |||
30 | -- | This instance does not produce: | ||
31 | -- | ||
32 | -- * opening and closing tags whose 'Text' ends in @\\@ | ||
33 | -- * empty 'BBStr's | ||
34 | instance Arbitrary BBToken where | ||
35 | shrink = genericShrink | ||
36 | arbitrary = oneof [ BBOpen <$> tagText | ||
37 | , BBClose <$> tagText | ||
38 | , BBStr <$> nonEmpty | ||
39 | ] | ||
40 | where | ||
41 | tagText = arbitrary `suchThat` (not . lastIsEscape) | ||
42 | lastIsEscape t | ||
43 | | T.null t = False | ||
44 | | T.last t == '\\' = True | ||
45 | | otherwise = False | ||
46 | nonEmpty = (arbitrary `suchThat` (not . T.null)) | ||
47 | |||
48 | token :: Parser BBToken | ||
49 | -- ^ Tokenizer | ||
50 | token = BBClose <$> (string "[/" *> escapedText' [']'] <* string "]") | ||
51 | <|> BBOpen <$> (string "[" *> escapedText' [']'] <* string "]") | ||
52 | <|> BBStr <$> escapedText ['['] | ||
53 | |||
54 | escapedText :: [Char] -> Parser Text | ||
55 | -- ^ @escapedText cs@ consumes 'Text' up to (not including) the first occurence of a character from @cs@ that is not escaped using @\\@ | ||
56 | -- | ||
57 | -- Always consumes at least one character | ||
58 | -- | ||
59 | -- @\\@ needs to be escaped (prefixed with @\\@) iff it precedes a character from @cs@ | ||
60 | escapedText [] = takeText | ||
61 | escapedText cs = recurse $ choice [ takeWhile1 (not . special) | ||
62 | , escapeSeq | ||
63 | , escapeChar' | ||
64 | ] | ||
65 | where | ||
66 | escapeChar = '\\' | ||
67 | special = inClass $ escapeChar : cs | ||
68 | escapeChar' = string $ T.singleton escapeChar | ||
69 | escapeSeq = escapeChar' >> (T.singleton <$> satisfy special) -- s/\\[:cs]/\1/ | ||
70 | recurse p = mappend <$> p <*> escapedText' cs | ||
71 | |||
72 | escapedText' :: [Char] -> Parser Text | ||
73 | -- ^ @'option' "" $ 'escapedText' cs@ | ||
74 | escapedText' cs = option "" $ escapedText cs | ||
diff --git a/bbcode/test/Spec.hs b/bbcode/test/Spec.hs new file mode 100644 index 0000000..a824f8c --- /dev/null +++ b/bbcode/test/Spec.hs | |||
@@ -0,0 +1 @@ | |||
{-# OPTIONS_GHC -F -pgmF hspec-discover #-} | |||
diff --git a/bbcode/test/Text/BBCode/LexerSpec.hs b/bbcode/test/Text/BBCode/LexerSpec.hs new file mode 100644 index 0000000..8b95874 --- /dev/null +++ b/bbcode/test/Text/BBCode/LexerSpec.hs | |||
@@ -0,0 +1,68 @@ | |||
1 | {-# LANGUAGE OverloadedStrings #-} | ||
2 | {-# LANGUAGE FlexibleInstances #-} | ||
3 | |||
4 | module Text.BBCode.LexerSpec (spec) where | ||
5 | |||
6 | import Test.Hspec | ||
7 | import Test.Hspec.QuickCheck (prop) | ||
8 | import Test.QuickCheck (Property, Discard(..), property) | ||
9 | import Text.BBCode.Lexer | ||
10 | |||
11 | import Data.Text (Text) | ||
12 | import qualified Data.Text as T (singleton, replace, last, null) | ||
13 | |||
14 | import Data.Monoid ((<>), mconcat, Endo(..)) | ||
15 | |||
16 | import Data.Attoparsec.Text (parseOnly, endOfInput) | ||
17 | |||
18 | import Control.Applicative | ||
19 | import Control.Monad (zipWithM_) | ||
20 | |||
21 | coToken :: BBToken -> Text | ||
22 | -- ^ Inverse of `token` | ||
23 | coToken (BBOpen t) = "[" <> escape [']'] t <> "]" | ||
24 | coToken (BBClose t) = "[/" <> escape [']'] t <> "]" | ||
25 | coToken (BBStr t) = escape ['['] t | ||
26 | |||
27 | escape :: [Char] -> Text -> Text | ||
28 | -- ^ Inverse of `escapedText` | ||
29 | escape xs = endos [ T.replace c' ("\\" <> c') . T.replace ("\\" <> c') ("\\\\" <> c') | c <- xs, let c' = T.singleton c ] . escapeLast | ||
30 | where | ||
31 | endos = appEndo . mconcat . map Endo | ||
32 | escapeLast t | ||
33 | | T.null t = "" | ||
34 | | T.last t == '\\' = t <> "\\" | ||
35 | | otherwise = t | ||
36 | |||
37 | tokenize :: Text -> Either String [BBToken] | ||
38 | -- ^ Run 'many token' against input | ||
39 | tokenize = parseOnly (many token <* endOfInput) | ||
40 | |||
41 | spec :: Spec | ||
42 | spec = do | ||
43 | prop "prop_token" prop_token | ||
44 | zipWithM_ example [1..] examples | ||
45 | where | ||
46 | example n (s, ts) = let str = "Example " <> show n | ||
47 | in specify str $ (tokenize s == Right ts) | ||
48 | |||
49 | prop_token :: Text -> Property | ||
50 | -- ^ prop> (mconcat . map coToken) <$> tokenize x == Right x | ||
51 | -- | ||
52 | -- Where 'x' is restricted such that `tokenize` succeeds | ||
53 | prop_token x = discardLeft (((== x) . mconcat . map coToken) <$> tokenize x) | ||
54 | where | ||
55 | discardLeft = either (const $ property Discard) property | ||
56 | |||
57 | examples :: [(Text, [BBToken])] | ||
58 | examples = [ ("[t]test[/t]" | ||
59 | , [BBOpen "t", BBStr "test", BBClose "t"]) | ||
60 | , ("[t]te\\st[/t]" | ||
61 | , [BBOpen "t", BBStr "te\\st", BBClose "t"]) | ||
62 | , ("[t]te\\[st[/t]" | ||
63 | , [BBOpen "t", BBStr "te[st", BBClose "t"]) | ||
64 | , ("[\\t]test[/t]" | ||
65 | , [BBOpen "\\t", BBStr "test", BBClose "t"]) | ||
66 | , ("[t]test[/t\\]]" | ||
67 | , [BBOpen "t", BBStr "test", BBClose "t]"]) | ||
68 | ] | ||
diff --git a/bbcode/thermoprint-bbcode.cabal b/bbcode/thermoprint-bbcode.cabal index 537cfe7..ea00969 100644 --- a/bbcode/thermoprint-bbcode.cabal +++ b/bbcode/thermoprint-bbcode.cabal | |||
@@ -17,9 +17,26 @@ build-type: Simple | |||
17 | cabal-version: >=1.10 | 17 | cabal-version: >=1.10 |
18 | 18 | ||
19 | library | 19 | library |
20 | -- exposed-modules: | 20 | exposed-modules: Text.BBCode |
21 | , Text.BBCode.Lexer | ||
21 | -- other-modules: | 22 | -- other-modules: |
22 | -- other-extensions: | 23 | -- other-extensions: |
23 | build-depends: base >=4.8 && <4.9 | 24 | build-depends: base >=4.8 && <4.9 |
25 | , attoparsec >=0.13.0 && <1 | ||
26 | , text >=1.2.1 && <2 | ||
27 | , QuickCheck >=2.8.1 && <3 | ||
28 | , quickcheck-instances >=0.3.11 && <1 | ||
24 | hs-source-dirs: src | 29 | hs-source-dirs: src |
25 | default-language: Haskell2010 \ No newline at end of file | 30 | default-language: Haskell2010 |
31 | |||
32 | Test-Suite tests | ||
33 | type: exitcode-stdio-1.0 | ||
34 | hs-source-dirs: test | ||
35 | main-is: Spec.hs | ||
36 | ghc-options: -threaded -with-rtsopts=-N | ||
37 | build-depends: base >=4.8.1 && <5 | ||
38 | , thermoprint-bbcode -any | ||
39 | , hspec >=2.2.1 && <3 | ||
40 | , QuickCheck >=2.8.1 && <3 | ||
41 | , attoparsec >=0.13.0 && <1 | ||
42 | , text >=1.2.1 && <2 \ No newline at end of file | ||
diff --git a/bbcode/thermoprint-bbcode.nix b/bbcode/thermoprint-bbcode.nix new file mode 100644 index 0000000..5521b10 --- /dev/null +++ b/bbcode/thermoprint-bbcode.nix | |||
@@ -0,0 +1,15 @@ | |||
1 | { mkDerivation, attoparsec, base, hspec, QuickCheck | ||
2 | , quickcheck-instances, stdenv, text | ||
3 | }: | ||
4 | mkDerivation { | ||
5 | pname = "thermoprint-bbcode"; | ||
6 | version = "0.0.0"; | ||
7 | src = ./.; | ||
8 | libraryHaskellDepends = [ | ||
9 | attoparsec base QuickCheck quickcheck-instances text | ||
10 | ]; | ||
11 | testHaskellDepends = [ attoparsec base hspec QuickCheck text ]; | ||
12 | homepage = "http://dirty-haskell.org/tags/thermoprint.html"; | ||
13 | description = "A parser for a subset of bbcode compatible with thermoprint-spec"; | ||
14 | license = stdenv.lib.licenses.publicDomain; | ||
15 | } | ||