aboutsummaryrefslogtreecommitdiff
path: root/bbcode/src/BBCode/Tokenizer.hs
diff options
context:
space:
mode:
authorGregor Kleen <gkleen@yggdrasil.li>2015-10-17 02:26:25 +0200
committerGregor Kleen <gkleen@yggdrasil.li>2015-10-17 02:26:25 +0200
commit005dc408dc09c3b479398ebe3e92efa2cd54846e (patch)
tree23dcfe7a545885c9aa145f1ccae6d33206a87820 /bbcode/src/BBCode/Tokenizer.hs
parent2dcbb4482de2c352b76372b389fda20c63075295 (diff)
downloadthermoprint-005dc408dc09c3b479398ebe3e92efa2cd54846e.tar
thermoprint-005dc408dc09c3b479398ebe3e92efa2cd54846e.tar.gz
thermoprint-005dc408dc09c3b479398ebe3e92efa2cd54846e.tar.bz2
thermoprint-005dc408dc09c3b479398ebe3e92efa2cd54846e.tar.xz
thermoprint-005dc408dc09c3b479398ebe3e92efa2cd54846e.zip
Working prototype
Diffstat (limited to 'bbcode/src/BBCode/Tokenizer.hs')
-rw-r--r--bbcode/src/BBCode/Tokenizer.hs44
1 files changed, 44 insertions, 0 deletions
diff --git a/bbcode/src/BBCode/Tokenizer.hs b/bbcode/src/BBCode/Tokenizer.hs
new file mode 100644
index 0000000..c860c7c
--- /dev/null
+++ b/bbcode/src/BBCode/Tokenizer.hs
@@ -0,0 +1,44 @@
1{-# LANGUAGE OverloadedStrings #-}
2
3module BBCode.Tokenizer
4 ( Token(..)
5 , tokenize
6 ) where
7
8import qualified Data.Text.Lazy as TL
9import qualified Data.Text as T
10
11import Control.Applicative
12import Data.Attoparsec.Text.Lazy
13
14import Data.Char (isSpace)
15import Data.Monoid (mconcat)
16
17data Token = Text String
18 | Whitespace String
19 | TagOpen String
20 | TagClose String
21 deriving (Show, Read, Eq)
22
23tokenize :: String -> Either String [Token]
24tokenize = eitherResult . parse (tokenize' <* endOfInput) . TL.pack
25
26tokenize' :: Parser [Token]
27tokenize' = many $ choice [ whitespace
28 , Text . T.unpack <$> ("\\" *> "[")
29 , tagClose
30 , tagOpen
31 , text
32 ]
33
34whitespace :: Parser Token
35whitespace = Whitespace <$> many1 space
36
37tagOpen :: Parser Token
38tagOpen = TagOpen . T.unpack <$> ("[" *> takeWhile1 (/= ']') <* "]")
39
40tagClose :: Parser Token
41tagClose = TagClose . T.unpack <$> ("[/" *> takeWhile1 (/= ']') <* "]")
42
43text :: Parser Token
44text = Text . T.unpack <$> takeWhile1 (\c -> not (isSpace c) && notInClass "[" c)