aboutsummaryrefslogtreecommitdiff
path: root/bbcode/src/Text
diff options
context:
space:
mode:
authorGregor Kleen <gkleen@yggdrasil.li>2016-01-12 00:02:10 +0000
committerGregor Kleen <gkleen@yggdrasil.li>2016-01-12 00:02:10 +0000
commitc9554b25d4ad99ceec1bef7bd60b1df82ef5ce8a (patch)
tree2701c86c9d75b2cbfae81b92c07949fc1c86fc07 /bbcode/src/Text
parent650feae1e8c267981f224e1de31ff4729a526afd (diff)
downloadthermoprint-c9554b25d4ad99ceec1bef7bd60b1df82ef5ce8a.tar
thermoprint-c9554b25d4ad99ceec1bef7bd60b1df82ef5ce8a.tar.gz
thermoprint-c9554b25d4ad99ceec1bef7bd60b1df82ef5ce8a.tar.bz2
thermoprint-c9554b25d4ad99ceec1bef7bd60b1df82ef5ce8a.tar.xz
thermoprint-c9554b25d4ad99ceec1bef7bd60b1df82ef5ce8a.zip
BBCode lexer
Diffstat (limited to 'bbcode/src/Text')
-rw-r--r--bbcode/src/Text/BBCode.hs10
-rw-r--r--bbcode/src/Text/BBCode/Lexer.hs74
2 files changed, 84 insertions, 0 deletions
diff --git a/bbcode/src/Text/BBCode.hs b/bbcode/src/Text/BBCode.hs
index 455decb..7a328a8 100644
--- a/bbcode/src/Text/BBCode.hs
+++ b/bbcode/src/Text/BBCode.hs
@@ -1,4 +1,14 @@
1{-# LANGUAGE OverloadedStrings #-}
2
1module Text.BBCode 3module Text.BBCode
2 ( 4 (
3 ) where 5 ) where
4 6
7import Data.Attoparsec.Text
8
9import Data.Text (Text)
10import qualified Data.Text as T (singleton, head, tail)
11
12import Control.Applicative
13
14import Text.BBCode.Lexer (BBToken(..), token)
diff --git a/bbcode/src/Text/BBCode/Lexer.hs b/bbcode/src/Text/BBCode/Lexer.hs
new file mode 100644
index 0000000..d2aa2bc
--- /dev/null
+++ b/bbcode/src/Text/BBCode/Lexer.hs
@@ -0,0 +1,74 @@
1{-# LANGUAGE OverloadedStrings #-}
2{-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
3
4-- | A parser to transform 'Text' into a stream of 'BBToken's
5module Text.BBCode.Lexer
6 ( BBToken(..)
7 , token
8 , escapedText
9 , escapedText'
10 ) where
11
12import Data.Attoparsec.Text
13
14import Data.Text (Text)
15import qualified Data.Text as T (singleton, head, last, tail, null)
16
17import Control.Applicative
18
19import Test.QuickCheck (Arbitrary(..), CoArbitrary, genericShrink)
20import Test.QuickCheck.Gen (oneof, suchThat)
21import Test.QuickCheck.Instances
22import GHC.Generics (Generic)
23
24-- | Our lexicographical unit
25data BBToken = BBOpen Text -- ^ Tag open
26 | BBClose Text -- ^ Tag close
27 | BBStr Text -- ^ Content of a tag
28 deriving (Generic, Eq, Show, CoArbitrary)
29
30-- | This instance does not produce:
31--
32-- * opening and closing tags whose 'Text' ends in @\\@
33-- * empty 'BBStr's
34instance Arbitrary BBToken where
35 shrink = genericShrink
36 arbitrary = oneof [ BBOpen <$> tagText
37 , BBClose <$> tagText
38 , BBStr <$> nonEmpty
39 ]
40 where
41 tagText = arbitrary `suchThat` (not . lastIsEscape)
42 lastIsEscape t
43 | T.null t = False
44 | T.last t == '\\' = True
45 | otherwise = False
46 nonEmpty = (arbitrary `suchThat` (not . T.null))
47
48token :: Parser BBToken
49-- ^ Tokenizer
50token = BBClose <$> (string "[/" *> escapedText' [']'] <* string "]")
51 <|> BBOpen <$> (string "[" *> escapedText' [']'] <* string "]")
52 <|> BBStr <$> escapedText ['[']
53
54escapedText :: [Char] -> Parser Text
55-- ^ @escapedText cs@ consumes 'Text' up to (not including) the first occurence of a character from @cs@ that is not escaped using @\\@
56--
57-- Always consumes at least one character
58--
59-- @\\@ needs to be escaped (prefixed with @\\@) iff it precedes a character from @cs@
60escapedText [] = takeText
61escapedText cs = recurse $ choice [ takeWhile1 (not . special)
62 , escapeSeq
63 , escapeChar'
64 ]
65 where
66 escapeChar = '\\'
67 special = inClass $ escapeChar : cs
68 escapeChar' = string $ T.singleton escapeChar
69 escapeSeq = escapeChar' >> (T.singleton <$> satisfy special) -- s/\\[:cs]/\1/
70 recurse p = mappend <$> p <*> escapedText' cs
71
72escapedText' :: [Char] -> Parser Text
73-- ^ @'option' "" $ 'escapedText' cs@
74escapedText' cs = option "" $ escapedText cs