diff options
| author | Gregor Kleen <gkleen@yggdrasil.li> | 2015-10-17 02:26:25 +0200 | 
|---|---|---|
| committer | Gregor Kleen <gkleen@yggdrasil.li> | 2015-10-17 02:26:25 +0200 | 
| commit | 005dc408dc09c3b479398ebe3e92efa2cd54846e (patch) | |
| tree | 23dcfe7a545885c9aa145f1ccae6d33206a87820 /bbcode | |
| parent | 2dcbb4482de2c352b76372b389fda20c63075295 (diff) | |
| download | thermoprint-005dc408dc09c3b479398ebe3e92efa2cd54846e.tar thermoprint-005dc408dc09c3b479398ebe3e92efa2cd54846e.tar.gz thermoprint-005dc408dc09c3b479398ebe3e92efa2cd54846e.tar.bz2 thermoprint-005dc408dc09c3b479398ebe3e92efa2cd54846e.tar.xz thermoprint-005dc408dc09c3b479398ebe3e92efa2cd54846e.zip | |
Working prototype
Diffstat (limited to 'bbcode')
| -rw-r--r-- | bbcode/LICENSE | 27 | ||||
| -rw-r--r-- | bbcode/Setup.hs | 2 | ||||
| -rw-r--r-- | bbcode/bbcode.cabal | 33 | ||||
| -rw-r--r-- | bbcode/bbcode.nix | 18 | ||||
| -rw-r--r-- | bbcode/src/BBCode.hs | 96 | ||||
| -rw-r--r-- | bbcode/src/BBCode/Syntax.hs | 108 | ||||
| -rw-r--r-- | bbcode/src/BBCode/Tokenizer.hs | 44 | 
7 files changed, 328 insertions, 0 deletions
| diff --git a/bbcode/LICENSE b/bbcode/LICENSE new file mode 100644 index 0000000..4ad71e2 --- /dev/null +++ b/bbcode/LICENSE | |||
| @@ -0,0 +1,27 @@ | |||
| 1 | Statement of Purpose | ||
| 2 | |||
| 3 | The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work"). | ||
| 4 | |||
| 5 | Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others. | ||
| 6 | |||
| 7 | For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights. | ||
| 8 | |||
| 9 | 1. Copyright and Related Rights. A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following: | ||
| 10 | |||
| 11 | the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work; | ||
| 12 | moral rights retained by the original author(s) and/or performer(s); | ||
| 13 | publicity and privacy rights pertaining to a person's image or likeness depicted in a Work; | ||
| 14 | rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below; | ||
| 15 | rights protecting the extraction, dissemination, use and reuse of data in a Work; | ||
| 16 | database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and | ||
| 17 | other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof. | ||
| 18 | 2. Waiver. To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose. | ||
| 19 | |||
| 20 | 3. Public License Fallback. Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose. | ||
| 21 | |||
| 22 | 4. Limitations and Disclaimers. | ||
| 23 | |||
| 24 | No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document. | ||
| 25 | Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law. | ||
| 26 | Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work. | ||
| 27 | Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work. \ No newline at end of file | ||
| diff --git a/bbcode/Setup.hs b/bbcode/Setup.hs new file mode 100644 index 0000000..9a994af --- /dev/null +++ b/bbcode/Setup.hs | |||
| @@ -0,0 +1,2 @@ | |||
| 1 | import Distribution.Simple | ||
| 2 | main = defaultMain | ||
| diff --git a/bbcode/bbcode.cabal b/bbcode/bbcode.cabal new file mode 100644 index 0000000..b17ff34 --- /dev/null +++ b/bbcode/bbcode.cabal | |||
| @@ -0,0 +1,33 @@ | |||
| 1 | -- Initial bbcode.cabal generated by cabal init. For further | ||
| 2 | -- documentation, see http://haskell.org/cabal/users-guide/ | ||
| 3 | |||
| 4 | name: bbcode | ||
| 5 | version: 0.0.0 | ||
| 6 | synopsis: A tiny parser for bbcode->thermoprint-syntax-tree conversion | ||
| 7 | -- description: | ||
| 8 | homepage: git://git.yggdrasil.li/thermoprint | ||
| 9 | license: PublicDomain | ||
| 10 | license-file: LICENSE | ||
| 11 | author: Gregor Kleen | ||
| 12 | maintainer: aethoago@141.li | ||
| 13 | -- copyright: | ||
| 14 | -- category: | ||
| 15 | build-type: Simple | ||
| 16 | -- extra-source-files: | ||
| 17 | cabal-version: >=1.10 | ||
| 18 | |||
| 19 | library | ||
| 20 | exposed-modules: BBCode | ||
| 21 | other-modules: BBCode.Tokenizer | ||
| 22 | , BBCode.Syntax | ||
| 23 | -- other-extensions: | ||
| 24 | hs-source-dirs: src | ||
| 25 | default-language: Haskell2010 | ||
| 26 | build-depends: base >=4.8 && <4.9 | ||
| 27 | , thermoprint | ||
| 28 | , attoparsec >=0.13.0 && <1 | ||
| 29 | , text >=1.2.1 && <2 | ||
| 30 | , parsec >=3.1.9 && <4 | ||
| 31 | , mtl >=2.2.1 && <3 | ||
| 32 | , case-insensitive >=1.2.0 && <2 | ||
| 33 | , containers >=0.5.6 && <1 \ No newline at end of file | ||
| diff --git a/bbcode/bbcode.nix b/bbcode/bbcode.nix new file mode 100644 index 0000000..d363a17 --- /dev/null +++ b/bbcode/bbcode.nix | |||
| @@ -0,0 +1,18 @@ | |||
| 1 | { mkDerivation | ||
| 2 | , stdenv | ||
| 3 | , base | ||
| 4 | , thermoprint | ||
| 5 | , attoparsec, parsec, mtl, case-insensitive, containers | ||
| 6 | }: | ||
| 7 | mkDerivation { | ||
| 8 | pname = "bbcode"; | ||
| 9 | version = "0.0.0"; | ||
| 10 | src = ./.; | ||
| 11 | libraryHaskellDepends = [ base | ||
| 12 | thermoprint | ||
| 13 | attoparsec parsec mtl case-insensitive containers | ||
| 14 | ]; | ||
| 15 | homepage = "git://git.yggdrasil.li/thermoprint"; | ||
| 16 | description = "A tiny parser for bbcode->thermoprint-syntax-tree conversion"; | ||
| 17 | license = stdenv.lib.licenses.publicDomain; | ||
| 18 | } | ||
| diff --git a/bbcode/src/BBCode.hs b/bbcode/src/BBCode.hs new file mode 100644 index 0000000..750fb0f --- /dev/null +++ b/bbcode/src/BBCode.hs | |||
| @@ -0,0 +1,96 @@ | |||
| 1 | {-# LANGUAGE OverloadedStrings, OverloadedLists, RankNTypes, ImpredicativeTypes #-} | ||
| 2 | |||
| 3 | module BBCode | ||
| 4 | ( parse | ||
| 5 | ) where | ||
| 6 | |||
| 7 | import Thermoprint | ||
| 8 | |||
| 9 | import BBCode.Tokenizer | ||
| 10 | import BBCode.Syntax | ||
| 11 | |||
| 12 | import Data.Maybe | ||
| 13 | import Data.Monoid | ||
| 14 | import Data.Either | ||
| 15 | import Data.Bifunctor | ||
| 16 | import Data.List | ||
| 17 | import Data.Ord | ||
| 18 | import Data.Foldable | ||
| 19 | |||
| 20 | import Data.Function (on) | ||
| 21 | |||
| 22 | import Data.CaseInsensitive ( CI ) | ||
| 23 | import qualified Data.CaseInsensitive as CI | ||
| 24 | |||
| 25 | import Data.Map ( Map ) | ||
| 26 | import qualified Data.Map as Map | ||
| 27 | |||
| 28 | import Prelude hiding (takeWhile) | ||
| 29 | |||
| 30 | import Debug.Trace | ||
| 31 | |||
| 32 | knownTags :: Map (CI String) (Either (Block String -> Block String) (Inline String -> Inline String)) | ||
| 33 | knownTags = [ ("center", Left Center) | ||
| 34 | , ("u", Right Underline) | ||
| 35 | ] | ||
| 36 | |||
| 37 | isBlock, isInline :: String -> Bool | ||
| 38 | isBlock = testTag isLeft | ||
| 39 | isInline = testTag isRight | ||
| 40 | testTag f k = fromMaybe False (f <$> Map.lookup (CI.mk k) knownTags) | ||
| 41 | |||
| 42 | data Decorated c = Decorated c [String] | ||
| 43 | deriving (Show, Eq) | ||
| 44 | |||
| 45 | parse :: String -> Either String (Block String) | ||
| 46 | parse input = (remerge . blockify <$> (tokenize input >>= treeify)) >>= semantics | ||
| 47 | |||
| 48 | blockify :: ContentForest -> [Decorated String] | ||
| 49 | blockify = map sortDeco . concat . map (blockify' []) | ||
| 50 | where | ||
| 51 | blockify' _ Empty = [] | ||
| 52 | blockify' initial (Content str) = [str `Decorated` initial] | ||
| 53 | blockify' initial (Tagged cs tag) = concat $ map (blockify' $ tag : initial) cs | ||
| 54 | sortDeco :: Decorated c -> Decorated c | ||
| 55 | sortDeco (Decorated c tags) = Decorated c $ (nubBy ((==) `on` CI.mk) . sortBy blockiness) tags | ||
| 56 | blockiness :: String -> String -> Ordering | ||
| 57 | blockiness a b = comparing isBlock a b <> comparing CI.mk a b -- sort also alphabetically for the benefit of remerge | ||
| 58 | |||
| 59 | remerge :: [Decorated String] -> ContentForest | ||
| 60 | remerge [] = [] | ||
| 61 | remerge [(Decorated c deco)] = [appEndo applyDeco $ Content c] | ||
| 62 | where | ||
| 63 | applyDeco = foldMap (\t -> Endo (\c -> Tagged [c] t)) $ reverse deco | ||
| 64 | remerge xs = concat $ map toTree $ groupLasts xs | ||
| 65 | where | ||
| 66 | groupLasts = groupBy ((==) `on` (listToMaybe . reverse . (\(Decorated _ ds) -> ds))) | ||
| 67 | -- toTree :: [Decorated String] -> [ContentTree] | ||
| 68 | toTree [] = [] | ||
| 69 | toTree (x@(Decorated _ []):xs) = map (Content . (\(Decorated c []) -> c)) (x:xs) | ||
| 70 | toTree (x@(Decorated _ ds):xs) = [Tagged content tag] | ||
| 71 | where | ||
| 72 | tag = last ds | ||
| 73 | content = toTree $ map stripLast (x:xs) | ||
| 74 | stripLast (Decorated c ds) = Decorated c (init ds) | ||
| 75 | |||
| 76 | unknownTag :: forall a. String -> Either String a | ||
| 77 | unknownTag tag = Left $ "Unknown tag: " ++ tag | ||
| 78 | |||
| 79 | semantics :: ContentForest -> Either String (Block String) | ||
| 80 | semantics forest = Over <$> mapM semantics' forest | ||
| 81 | |||
| 82 | semantics' :: ContentTree -> Either String (Block String) | ||
| 83 | semantics' Empty = Right $ Over [] | ||
| 84 | semantics' (Content str) = Right $ Paragraph (Cooked str) | ||
| 85 | semantics' (Tagged cs tag) = case Map.lookup (CI.mk tag) knownTags of | ||
| 86 | Nothing -> unknownTag tag | ||
| 87 | Just (Left f) -> Over . map f <$> mapM semantics' cs | ||
| 88 | Just (Right f) -> Paragraph . Beside . map f<$> mapM iSemantics cs | ||
| 89 | |||
| 90 | iSemantics :: ContentTree -> Either String (Inline String) | ||
| 91 | iSemantics Empty = Right $ Beside [] | ||
| 92 | iSemantics (Content str) = Right $ Cooked str | ||
| 93 | iSemantics (Tagged cs tag) = case Map.lookup (CI.mk tag) knownTags of | ||
| 94 | Nothing -> unknownTag tag | ||
| 95 | Just (Left f) -> error "Known inline tag sorted within block" | ||
| 96 | Just (Right f) -> Beside . map f <$> mapM iSemantics cs | ||
| diff --git a/bbcode/src/BBCode/Syntax.hs b/bbcode/src/BBCode/Syntax.hs new file mode 100644 index 0000000..a196e05 --- /dev/null +++ b/bbcode/src/BBCode/Syntax.hs | |||
| @@ -0,0 +1,108 @@ | |||
| 1 | {-# LANGUAGE RecordWildCards #-} | ||
| 2 | |||
| 3 | module BBCode.Syntax | ||
| 4 | ( treeify | ||
| 5 | , ContentForest | ||
| 6 | , ContentTree(..) | ||
| 7 | ) where | ||
| 8 | |||
| 9 | import BBCode.Tokenizer (Token(..)) | ||
| 10 | |||
| 11 | import Data.Foldable | ||
| 12 | import Data.Bifunctor | ||
| 13 | import Data.Monoid | ||
| 14 | |||
| 15 | import Control.Monad.State | ||
| 16 | import Control.Monad.Trans | ||
| 17 | import Control.Monad | ||
| 18 | |||
| 19 | import Data.CaseInsensitive ( CI ) | ||
| 20 | import qualified Data.CaseInsensitive as CI | ||
| 21 | |||
| 22 | import Data.Function (on) | ||
| 23 | |||
| 24 | type ContentForest = [ContentTree] | ||
| 25 | data ContentTree = Content String | ||
| 26 | | Tagged [ContentTree] String | ||
| 27 | | Empty | ||
| 28 | deriving (Show, Eq) | ||
| 29 | |||
| 30 | data Step = Down [ContentTree] [ContentTree] String | ||
| 31 | deriving (Show) | ||
| 32 | |||
| 33 | data Zipper = Zipper | ||
| 34 | { hole :: String | ||
| 35 | , prevs :: [ContentTree] | ||
| 36 | , steps :: [Step] | ||
| 37 | } | ||
| 38 | deriving (Show) | ||
| 39 | |||
| 40 | type Parser = StateT Zipper (Either String) | ||
| 41 | abort :: String -> Parser a | ||
| 42 | abort = lift . Left | ||
| 43 | |||
| 44 | |||
| 45 | treeify :: [Token] -> Either String ContentForest | ||
| 46 | treeify tokenStream = second (postProcess . unZip) $ execStateT (mapM_ incorporate tokenStream) (Zipper "" [] []) | ||
| 47 | |||
| 48 | postProcess :: ContentForest -> ContentForest | ||
| 49 | postProcess forest = do | ||
| 50 | tree <- forest | ||
| 51 | let | ||
| 52 | tree' = postProcess' tree | ||
| 53 | guard $ tree' /= Empty | ||
| 54 | return tree' | ||
| 55 | where | ||
| 56 | postProcess' :: ContentTree -> ContentTree | ||
| 57 | postProcess' (Content "") = Empty | ||
| 58 | postProcess' (Tagged [] _) = Empty | ||
| 59 | postProcess' (Tagged cs t) = Tagged [c' | c <- cs, let c' = postProcess' c, c' /= Empty ] t | ||
| 60 | postProcess' x = x | ||
| 61 | |||
| 62 | unZip :: Zipper -> ContentForest | ||
| 63 | unZip Zipper{..} = reverse (apply hole steps : prevs) | ||
| 64 | |||
| 65 | apply :: String -> [Step] -> ContentTree | ||
| 66 | apply hole steps = hole `apply'` (reverse steps) | ||
| 67 | apply' "" [] = Empty | ||
| 68 | apply' hole [] = Content hole | ||
| 69 | apply' hole (Down pres posts tag:steps) = Tagged (reverse pres ++ [hole `apply` steps] ++ posts) tag | ||
| 70 | |||
| 71 | incorporate :: Token -> Parser () | ||
| 72 | incorporate (Text str) = append str | ||
| 73 | incorporate (Whitespace str) | ||
| 74 | | delimitsPar str = do | ||
| 75 | currSteps <- gets steps | ||
| 76 | if null currSteps then | ||
| 77 | modify (\Zipper{..} -> Zipper { hole = "", steps = [], prevs = (hole `apply` steps : prevs) }) | ||
| 78 | else | ||
| 79 | append str | ||
| 80 | | otherwise = append str | ||
| 81 | where | ||
| 82 | delimitsPar str = (sum . map (const (1 :: Integer)) . filter (== '\n') $ str) >= 1 | ||
| 83 | incorporate (TagOpen tagName) = modify $ (\z@(Zipper{..}) -> z { steps = (Down [] [] tagName : steps) }) | ||
| 84 | incorporate (TagClose tagName) = do | ||
| 85 | currSteps <- gets steps | ||
| 86 | case currSteps of | ||
| 87 | [] -> abort $ "Closing unopenend tag: " ++ tagName | ||
| 88 | (Down pres posts tagName':s) -> if ((==) `on` CI.mk) tagName tagName' then | ||
| 89 | goUp | ||
| 90 | else | ||
| 91 | abort $ "Mismatched tags: [" ++ tagName' ++ "]...[/" ++ tagName ++ "]" | ||
| 92 | |||
| 93 | append :: String -> Parser () | ||
| 94 | append str = modify (\z@(Zipper{..}) -> z { hole = hole ++ str }) | ||
| 95 | |||
| 96 | goUp :: Parser () | ||
| 97 | goUp = do | ||
| 98 | (Down pres posts tagName:s) <- gets steps | ||
| 99 | hole <- gets hole | ||
| 100 | let | ||
| 101 | steppedHole = Tagged (reverse pres ++ [Content hole] ++ posts) tagName | ||
| 102 | case s of | ||
| 103 | [] -> modify $ (\z@(Zipper{..}) -> Zipper { hole = "", prevs = steppedHole : prevs, steps = [] }) | ||
| 104 | (t:ts) -> do | ||
| 105 | let | ||
| 106 | (Down pres posts tagName) = t | ||
| 107 | t' = Down (steppedHole : pres) posts tagName | ||
| 108 | modify $ (\z -> z { hole = "", steps = (t':ts) }) | ||
| diff --git a/bbcode/src/BBCode/Tokenizer.hs b/bbcode/src/BBCode/Tokenizer.hs new file mode 100644 index 0000000..c860c7c --- /dev/null +++ b/bbcode/src/BBCode/Tokenizer.hs | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | {-# LANGUAGE OverloadedStrings #-} | ||
| 2 | |||
| 3 | module BBCode.Tokenizer | ||
| 4 | ( Token(..) | ||
| 5 | , tokenize | ||
| 6 | ) where | ||
| 7 | |||
| 8 | import qualified Data.Text.Lazy as TL | ||
| 9 | import qualified Data.Text as T | ||
| 10 | |||
| 11 | import Control.Applicative | ||
| 12 | import Data.Attoparsec.Text.Lazy | ||
| 13 | |||
| 14 | import Data.Char (isSpace) | ||
| 15 | import Data.Monoid (mconcat) | ||
| 16 | |||
| 17 | data Token = Text String | ||
| 18 | | Whitespace String | ||
| 19 | | TagOpen String | ||
| 20 | | TagClose String | ||
| 21 | deriving (Show, Read, Eq) | ||
| 22 | |||
| 23 | tokenize :: String -> Either String [Token] | ||
| 24 | tokenize = eitherResult . parse (tokenize' <* endOfInput) . TL.pack | ||
| 25 | |||
| 26 | tokenize' :: Parser [Token] | ||
| 27 | tokenize' = many $ choice [ whitespace | ||
| 28 | , Text . T.unpack <$> ("\\" *> "[") | ||
| 29 | , tagClose | ||
| 30 | , tagOpen | ||
| 31 | , text | ||
| 32 | ] | ||
| 33 | |||
| 34 | whitespace :: Parser Token | ||
| 35 | whitespace = Whitespace <$> many1 space | ||
| 36 | |||
| 37 | tagOpen :: Parser Token | ||
| 38 | tagOpen = TagOpen . T.unpack <$> ("[" *> takeWhile1 (/= ']') <* "]") | ||
| 39 | |||
| 40 | tagClose :: Parser Token | ||
| 41 | tagClose = TagClose . T.unpack <$> ("[/" *> takeWhile1 (/= ']') <* "]") | ||
| 42 | |||
| 43 | text :: Parser Token | ||
| 44 | text = Text . T.unpack <$> takeWhile1 (\c -> not (isSpace c) && notInClass "[" c) | ||
