aboutsummaryrefslogtreecommitdiff
path: root/bbcode/src/BBCode/Tokenizer.hs
blob: c860c7c4f5e559ee7282aa288e71f3af83755f50 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
{-# LANGUAGE OverloadedStrings #-}

module BBCode.Tokenizer
       ( Token(..)
       , tokenize
       ) where

import qualified Data.Text.Lazy as TL
import qualified Data.Text as T

import Control.Applicative
import Data.Attoparsec.Text.Lazy

import Data.Char (isSpace)
import Data.Monoid (mconcat)

data Token = Text String
           | Whitespace String
           | TagOpen String
           | TagClose String
           deriving (Show, Read, Eq)

tokenize :: String -> Either String [Token]
tokenize = eitherResult . parse (tokenize' <* endOfInput) . TL.pack

tokenize' :: Parser [Token]
tokenize' = many $ choice [ whitespace
                          , Text . T.unpack <$> ("\\" *> "[")
                          , tagClose
                          , tagOpen
                          , text
                          ]

whitespace :: Parser Token
whitespace = Whitespace <$> many1 space

tagOpen :: Parser Token
tagOpen = TagOpen . T.unpack <$> ("[" *> takeWhile1 (/= ']') <* "]")

tagClose :: Parser Token
tagClose = TagClose . T.unpack <$> ("[/" *> takeWhile1 (/= ']') <* "]")

text :: Parser Token
text = Text . T.unpack <$> takeWhile1 (\c -> not (isSpace c) && notInClass "[" c)