diff options
Diffstat (limited to 'bbcode/src/BBCode/Tokenizer.hs')
-rw-r--r-- | bbcode/src/BBCode/Tokenizer.hs | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/bbcode/src/BBCode/Tokenizer.hs b/bbcode/src/BBCode/Tokenizer.hs new file mode 100644 index 0000000..c860c7c --- /dev/null +++ b/bbcode/src/BBCode/Tokenizer.hs | |||
@@ -0,0 +1,44 @@ | |||
1 | {-# LANGUAGE OverloadedStrings #-} | ||
2 | |||
3 | module BBCode.Tokenizer | ||
4 | ( Token(..) | ||
5 | , tokenize | ||
6 | ) where | ||
7 | |||
8 | import qualified Data.Text.Lazy as TL | ||
9 | import qualified Data.Text as T | ||
10 | |||
11 | import Control.Applicative | ||
12 | import Data.Attoparsec.Text.Lazy | ||
13 | |||
14 | import Data.Char (isSpace) | ||
15 | import Data.Monoid (mconcat) | ||
16 | |||
17 | data Token = Text String | ||
18 | | Whitespace String | ||
19 | | TagOpen String | ||
20 | | TagClose String | ||
21 | deriving (Show, Read, Eq) | ||
22 | |||
23 | tokenize :: String -> Either String [Token] | ||
24 | tokenize = eitherResult . parse (tokenize' <* endOfInput) . TL.pack | ||
25 | |||
26 | tokenize' :: Parser [Token] | ||
27 | tokenize' = many $ choice [ whitespace | ||
28 | , Text . T.unpack <$> ("\\" *> "[") | ||
29 | , tagClose | ||
30 | , tagOpen | ||
31 | , text | ||
32 | ] | ||
33 | |||
34 | whitespace :: Parser Token | ||
35 | whitespace = Whitespace <$> many1 space | ||
36 | |||
37 | tagOpen :: Parser Token | ||
38 | tagOpen = TagOpen . T.unpack <$> ("[" *> takeWhile1 (/= ']') <* "]") | ||
39 | |||
40 | tagClose :: Parser Token | ||
41 | tagClose = TagClose . T.unpack <$> ("[/" *> takeWhile1 (/= ']') <* "]") | ||
42 | |||
43 | text :: Parser Token | ||
44 | text = Text . T.unpack <$> takeWhile1 (\c -> not (isSpace c) && notInClass "[" c) | ||