aboutsummaryrefslogtreecommitdiff
path: root/bbcode
diff options
context:
space:
mode:
authorGregor Kleen <gkleen@yggdrasil.li>2016-01-12 00:02:10 +0000
committerGregor Kleen <gkleen@yggdrasil.li>2016-01-12 00:02:10 +0000
commitc9554b25d4ad99ceec1bef7bd60b1df82ef5ce8a (patch)
tree2701c86c9d75b2cbfae81b92c07949fc1c86fc07 /bbcode
parent650feae1e8c267981f224e1de31ff4729a526afd (diff)
downloadthermoprint-c9554b25d4ad99ceec1bef7bd60b1df82ef5ce8a.tar
thermoprint-c9554b25d4ad99ceec1bef7bd60b1df82ef5ce8a.tar.gz
thermoprint-c9554b25d4ad99ceec1bef7bd60b1df82ef5ce8a.tar.bz2
thermoprint-c9554b25d4ad99ceec1bef7bd60b1df82ef5ce8a.tar.xz
thermoprint-c9554b25d4ad99ceec1bef7bd60b1df82ef5ce8a.zip
BBCode lexer
Diffstat (limited to 'bbcode')
-rw-r--r--bbcode/src/Text/BBCode.hs10
-rw-r--r--bbcode/src/Text/BBCode/Lexer.hs74
-rw-r--r--bbcode/test/Spec.hs1
-rw-r--r--bbcode/test/Text/BBCode/LexerSpec.hs68
-rw-r--r--bbcode/thermoprint-bbcode.cabal21
-rw-r--r--bbcode/thermoprint-bbcode.nix15
6 files changed, 187 insertions, 2 deletions
diff --git a/bbcode/src/Text/BBCode.hs b/bbcode/src/Text/BBCode.hs
index 455decb..7a328a8 100644
--- a/bbcode/src/Text/BBCode.hs
+++ b/bbcode/src/Text/BBCode.hs
@@ -1,4 +1,14 @@
1{-# LANGUAGE OverloadedStrings #-}
2
1module Text.BBCode 3module Text.BBCode
2 ( 4 (
3 ) where 5 ) where
4 6
7import Data.Attoparsec.Text
8
9import Data.Text (Text)
10import qualified Data.Text as T (singleton, head, tail)
11
12import Control.Applicative
13
14import Text.BBCode.Lexer (BBToken(..), token)
diff --git a/bbcode/src/Text/BBCode/Lexer.hs b/bbcode/src/Text/BBCode/Lexer.hs
new file mode 100644
index 0000000..d2aa2bc
--- /dev/null
+++ b/bbcode/src/Text/BBCode/Lexer.hs
@@ -0,0 +1,74 @@
1{-# LANGUAGE OverloadedStrings #-}
2{-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
3
4-- | A parser to transform 'Text' into a stream of 'BBToken's
5module Text.BBCode.Lexer
6 ( BBToken(..)
7 , token
8 , escapedText
9 , escapedText'
10 ) where
11
12import Data.Attoparsec.Text
13
14import Data.Text (Text)
15import qualified Data.Text as T (singleton, head, last, tail, null)
16
17import Control.Applicative
18
19import Test.QuickCheck (Arbitrary(..), CoArbitrary, genericShrink)
20import Test.QuickCheck.Gen (oneof, suchThat)
21import Test.QuickCheck.Instances
22import GHC.Generics (Generic)
23
24-- | Our lexicographical unit
25data BBToken = BBOpen Text -- ^ Tag open
26 | BBClose Text -- ^ Tag close
27 | BBStr Text -- ^ Content of a tag
28 deriving (Generic, Eq, Show, CoArbitrary)
29
30-- | This instance does not produce:
31--
32-- * opening and closing tags whose 'Text' ends in @\\@
33-- * empty 'BBStr's
34instance Arbitrary BBToken where
35 shrink = genericShrink
36 arbitrary = oneof [ BBOpen <$> tagText
37 , BBClose <$> tagText
38 , BBStr <$> nonEmpty
39 ]
40 where
41 tagText = arbitrary `suchThat` (not . lastIsEscape)
42 lastIsEscape t
43 | T.null t = False
44 | T.last t == '\\' = True
45 | otherwise = False
46 nonEmpty = (arbitrary `suchThat` (not . T.null))
47
48token :: Parser BBToken
49-- ^ Tokenizer
50token = BBClose <$> (string "[/" *> escapedText' [']'] <* string "]")
51 <|> BBOpen <$> (string "[" *> escapedText' [']'] <* string "]")
52 <|> BBStr <$> escapedText ['[']
53
54escapedText :: [Char] -> Parser Text
55-- ^ @escapedText cs@ consumes 'Text' up to (not including) the first occurence of a character from @cs@ that is not escaped using @\\@
56--
57-- Always consumes at least one character
58--
59-- @\\@ needs to be escaped (prefixed with @\\@) iff it precedes a character from @cs@
60escapedText [] = takeText
61escapedText cs = recurse $ choice [ takeWhile1 (not . special)
62 , escapeSeq
63 , escapeChar'
64 ]
65 where
66 escapeChar = '\\'
67 special = inClass $ escapeChar : cs
68 escapeChar' = string $ T.singleton escapeChar
69 escapeSeq = escapeChar' >> (T.singleton <$> satisfy special) -- s/\\[:cs]/\1/
70 recurse p = mappend <$> p <*> escapedText' cs
71
72escapedText' :: [Char] -> Parser Text
73-- ^ @'option' "" $ 'escapedText' cs@
74escapedText' cs = option "" $ escapedText cs
diff --git a/bbcode/test/Spec.hs b/bbcode/test/Spec.hs
new file mode 100644
index 0000000..a824f8c
--- /dev/null
+++ b/bbcode/test/Spec.hs
@@ -0,0 +1 @@
{-# OPTIONS_GHC -F -pgmF hspec-discover #-}
diff --git a/bbcode/test/Text/BBCode/LexerSpec.hs b/bbcode/test/Text/BBCode/LexerSpec.hs
new file mode 100644
index 0000000..8b95874
--- /dev/null
+++ b/bbcode/test/Text/BBCode/LexerSpec.hs
@@ -0,0 +1,68 @@
1{-# LANGUAGE OverloadedStrings #-}
2{-# LANGUAGE FlexibleInstances #-}
3
4module Text.BBCode.LexerSpec (spec) where
5
6import Test.Hspec
7import Test.Hspec.QuickCheck (prop)
8import Test.QuickCheck (Property, Discard(..), property)
9import Text.BBCode.Lexer
10
11import Data.Text (Text)
12import qualified Data.Text as T (singleton, replace, last, null)
13
14import Data.Monoid ((<>), mconcat, Endo(..))
15
16import Data.Attoparsec.Text (parseOnly, endOfInput)
17
18import Control.Applicative
19import Control.Monad (zipWithM_)
20
21coToken :: BBToken -> Text
22-- ^ Inverse of `token`
23coToken (BBOpen t) = "[" <> escape [']'] t <> "]"
24coToken (BBClose t) = "[/" <> escape [']'] t <> "]"
25coToken (BBStr t) = escape ['['] t
26
27escape :: [Char] -> Text -> Text
28-- ^ Inverse of `escapedText`
29escape xs = endos [ T.replace c' ("\\" <> c') . T.replace ("\\" <> c') ("\\\\" <> c') | c <- xs, let c' = T.singleton c ] . escapeLast
30 where
31 endos = appEndo . mconcat . map Endo
32 escapeLast t
33 | T.null t = ""
34 | T.last t == '\\' = t <> "\\"
35 | otherwise = t
36
37tokenize :: Text -> Either String [BBToken]
38-- ^ Run 'many token' against input
39tokenize = parseOnly (many token <* endOfInput)
40
41spec :: Spec
42spec = do
43 prop "prop_token" prop_token
44 zipWithM_ example [1..] examples
45 where
46 example n (s, ts) = let str = "Example " <> show n
47 in specify str $ (tokenize s == Right ts)
48
49prop_token :: Text -> Property
50-- ^ prop> (mconcat . map coToken) <$> tokenize x == Right x
51--
52-- Where 'x' is restricted such that `tokenize` succeeds
53prop_token x = discardLeft (((== x) . mconcat . map coToken) <$> tokenize x)
54 where
55 discardLeft = either (const $ property Discard) property
56
57examples :: [(Text, [BBToken])]
58examples = [ ("[t]test[/t]"
59 , [BBOpen "t", BBStr "test", BBClose "t"])
60 , ("[t]te\\st[/t]"
61 , [BBOpen "t", BBStr "te\\st", BBClose "t"])
62 , ("[t]te\\[st[/t]"
63 , [BBOpen "t", BBStr "te[st", BBClose "t"])
64 , ("[\\t]test[/t]"
65 , [BBOpen "\\t", BBStr "test", BBClose "t"])
66 , ("[t]test[/t\\]]"
67 , [BBOpen "t", BBStr "test", BBClose "t]"])
68 ]
diff --git a/bbcode/thermoprint-bbcode.cabal b/bbcode/thermoprint-bbcode.cabal
index 537cfe7..ea00969 100644
--- a/bbcode/thermoprint-bbcode.cabal
+++ b/bbcode/thermoprint-bbcode.cabal
@@ -17,9 +17,26 @@ build-type: Simple
17cabal-version: >=1.10 17cabal-version: >=1.10
18 18
19library 19library
20 -- exposed-modules: 20 exposed-modules: Text.BBCode
21 , Text.BBCode.Lexer
21 -- other-modules: 22 -- other-modules:
22 -- other-extensions: 23 -- other-extensions:
23 build-depends: base >=4.8 && <4.9 24 build-depends: base >=4.8 && <4.9
25 , attoparsec >=0.13.0 && <1
26 , text >=1.2.1 && <2
27 , QuickCheck >=2.8.1 && <3
28 , quickcheck-instances >=0.3.11 && <1
24 hs-source-dirs: src 29 hs-source-dirs: src
25 default-language: Haskell2010 \ No newline at end of file 30 default-language: Haskell2010
31
32Test-Suite tests
33 type: exitcode-stdio-1.0
34 hs-source-dirs: test
35 main-is: Spec.hs
36 ghc-options: -threaded -with-rtsopts=-N
37 build-depends: base >=4.8.1 && <5
38 , thermoprint-bbcode -any
39 , hspec >=2.2.1 && <3
40 , QuickCheck >=2.8.1 && <3
41 , attoparsec >=0.13.0 && <1
42 , text >=1.2.1 && <2 \ No newline at end of file
diff --git a/bbcode/thermoprint-bbcode.nix b/bbcode/thermoprint-bbcode.nix
new file mode 100644
index 0000000..5521b10
--- /dev/null
+++ b/bbcode/thermoprint-bbcode.nix
@@ -0,0 +1,15 @@
1{ mkDerivation, attoparsec, base, hspec, QuickCheck
2, quickcheck-instances, stdenv, text
3}:
4mkDerivation {
5 pname = "thermoprint-bbcode";
6 version = "0.0.0";
7 src = ./.;
8 libraryHaskellDepends = [
9 attoparsec base QuickCheck quickcheck-instances text
10 ];
11 testHaskellDepends = [ attoparsec base hspec QuickCheck text ];
12 homepage = "http://dirty-haskell.org/tags/thermoprint.html";
13 description = "A parser for a subset of bbcode compatible with thermoprint-spec";
14 license = stdenv.lib.licenses.publicDomain;
15}