summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGregor Kleen <gkleen@yggdrasil.li>2016-01-11 04:58:05 +0000
committerGregor Kleen <gkleen@yggdrasil.li>2016-01-11 04:58:05 +0000
commita9b3b2b2902b0a79437f87cda8cd18f426359621 (patch)
tree92998f933b3c7a66113857175375c443d7260a28
parentc0e8fb8417cd190af926e771e692ea896b1c987b (diff)
downloaddirty-haskell.org-a9b3b2b2902b0a79437f87cda8cd18f426359621.tar
dirty-haskell.org-a9b3b2b2902b0a79437f87cda8cd18f426359621.tar.gz
dirty-haskell.org-a9b3b2b2902b0a79437f87cda8cd18f426359621.tar.bz2
dirty-haskell.org-a9b3b2b2902b0a79437f87cda8cd18f426359621.tar.xz
dirty-haskell.org-a9b3b2b2902b0a79437f87cda8cd18f426359621.zip
thermoprint-2
-rw-r--r--provider/posts/thermoprint-2.lhs262
1 files changed, 262 insertions, 0 deletions
diff --git a/provider/posts/thermoprint-2.lhs b/provider/posts/thermoprint-2.lhs
new file mode 100644
index 0000000..056f1ef
--- /dev/null
+++ b/provider/posts/thermoprint-2.lhs
@@ -0,0 +1,262 @@
1---
2title: On the design of a structured document format compatible with character oriented printers
3published: 2016-01-11
4tags: Thermoprint
5---
6
7This post is an annotated version of the file `spec/src/Thermoprint/Printout.hs` as of commit [f6dc3d1](git://git.yggdrasil.li/thermoprint#f6dc3d1).
8
9> {-# LANGUAGE DeriveGeneric, DeriveAnyClass #-}
10> {-# LANGUAGE OverloadedStrings #-}
11> {-# OPTIONS_HADDOCK show-extensions #-}
12
13Motivation
14----------
15
16We want our codebase to be compatible with as many different models of printers as we are willing to implement.
17It is therefore desirable to maintain a structured document format which we can transform into a printer-specific representation of the payload to be printed with minimal effort.
18
19In this post we present one such format.
20
21Contents
22--------
23
24> -- | This module contains the definition of the structure -- 'Printout' -- used to represent the content of a job
25> module Thermoprint.Printout
26> ( Printout(..)
27> , Paragraph(..)
28> , Chunk(..)
29> , Block(..)
30> , Line( HSpace
31> , SpaceSep
32> )
33> , text, cotext
34> , prop_text
35> ) where
36
37Preliminaries
38-------------
39
40> import Data.Sequence (Seq, (|>), (<|))
41
42A Sequence represents the same structure as the linked lists common in haskell but supports $O(1)$ `snoc`, which is desirable since we intend to iteratively build up the structure when parsing input formats.
43
44> import Data.Text.Lazy (Text)
45>
46> import Data.ByteString.Lazy (ByteString)
47
48The entire structure will be lazy by default but an instance of `NFData`, thus the lazy variants of `Text` and `ByteString`.
49
50> import GHC.Generics (Generic)
51
52We will use derived instances of `Generic` to get handed suitable instances of rather complicated classes such as `Arbitrary` and `FromJSON`
53
54> import Control.DeepSeq (NFData)
55
56Instances of `NFData` allow us to strictly evaluate our document structure when needed
57
58> import Data.Aeson (FromJSON(..), ToJSON(..), Value(..))
59> import qualified Data.Aeson as JSON (encode, decode)
60> import Data.Aeson.Types (typeMismatch)
61
62We will encode the document as a [json](https://en.wikipedia.org/wiki/JSON) object during transport
63
64> import Test.QuickCheck.Arbitrary (Arbitrary(..), CoArbitrary, genericShrink)
65> import Test.QuickCheck.Modifiers (NonNegative(..))
66> import Test.QuickCheck.Gen (oneof, suchThat, scale)
67> import Test.QuickCheck.Instances
68> import Test.QuickCheck (forAll, Property)
69
70We will use [QuickCheck](https://hackage.haskell.org/package/QuickCheck) for automatic test generation.
71
72> import qualified Data.Text.Lazy as TL (split, null, pack, filter, intercalate, map)
73> import qualified Data.Text as T (pack)
74> import Data.Char (isSpace)
75>
76> import Data.Monoid (Monoid(..), (<>))
77>
78> import Data.List (dropWhile, dropWhileEnd, groupBy, genericLength, genericReplicate)
79>
80> import Data.Sequence as Seq (fromList, null, singleton)
81>
82> import Data.Function (on)
83>
84> import Data.Foldable (toList, fold)
85
86We will need to do some parsing and pretty-printing to implement `text` and `cotext`, respectively.
87
88> import Data.Encoding (encodeLazyByteStringExplicit, decodeLazyByteString)
89> import Data.Encoding.UTF8
90> import qualified Data.ByteString.Base64.Lazy as Base64 (encode, decode)
91
92Since we want end users to be able to include direct instructions the printer in the form of a lazy [`ByteString`](https://hackage.haskell.org/package/bytestring) we need some way to encode `ByteString`s in JSON.
93We chose [base64](https://hackage.haskell.org/package/base64-bytestring).
94
95> import Prelude hiding (fold)
96>
97>
98> -- | A 'Printout' is a sequence of visually seperated 'Paragraph's
99> type Printout = Seq Paragraph
100
101"visually seperated" will most likely end up meaning "seperated by a single blank line"
102
103> -- | A 'Paragraph' is a non-seperated sequence of 'Chunk's
104> type Paragraph = Seq Chunk
105>
106> -- | We introduce both 'Chunk' and 'Paragraph' mainly to allow 'Raw'.
107> --
108> -- Were we to disallow 'Raw', 'Block' would be identical to 'Paragraph'
109> data Chunk = Cooked Block -- ^ text semantically structured to be rendered in accordance with the display format of printer
110> | Raw ByteString -- ^ direct instructions to the printer
111> deriving (Generic, NFData, Show, CoArbitrary)
112>
113> instance FromJSON Chunk where
114> parseJSON s@(String _) = Raw <$> ((either fail return . decodeBase64) =<< parseJSON s)
115> where
116> decodeBase64 :: String -> Either String ByteString
117> decodeBase64 s = (either (Left . show) Right . encodeLazyByteStringExplicit UTF8Strict $ s) >>= Base64.decode
118> parseJSON o@(Object _) = Cooked <$> parseJSON o
119> parseJSON v = typeMismatch "Chunk" v
120>
121> instance ToJSON Chunk where
122> toJSON (Raw bs) = String . T.pack . decodeLazyByteString UTF8Strict . Base64.encode $ bs
123> toJSON (Cooked block) = toJSON block
124
125We provide custom instances of `FromJSON Chunk` and `ToJSON Chunk` so that we might reduce the sice of the resulting JSON somewhat (this is an opportune target since disambiguaty is simple)
126
127> -- | 'Block' is the entry point for our structured document format
128> data Block = Line Line -- ^ a single 'Line' of text
129> | VSpace Integer -- ^ vertical space of height equivalent to 'Integer' lines
130> | NewlSep (Seq Block) -- ^ A sequence of 'Block's seperated by newlines
131> deriving (Generic, NFData, Show, CoArbitrary, FromJSON, ToJSON)
132>
133> {- | A 'Line' is one of:
134>
135> * a single word
136> * horizontal space equivalent to the width of 'Integer' `em`.
137> * a sequence of words seperated by spaces
138>
139> We don't export all constructors and instead encourage the use of 'text'.
140> -}
141> data Line = Word Text
142> | HSpace Integer
143> | SpaceSep (Seq Line)
144> deriving (Generic, NFData, Show, CoArbitrary, FromJSON, ToJSON)
145>
146> instance Monoid Block where
147> mempty = NewlSep mempty
148> x@(NewlSep xs) `mappend` y@(NewlSep ys)
149> | Seq.null xs = y
150> | Seq.null ys = x
151> | otherwise = NewlSep (xs <> ys)
152> (NewlSep xs) `mappend` y
153> | Seq.null xs = y
154> | otherwise = NewlSep (xs |> y)
155> x `mappend` (NewlSep ys)
156> | Seq.null ys = x
157> | otherwise = NewlSep (x <| ys)
158> x `mappend` y = NewlSep $ Seq.fromList [x, y]
159>
160> instance Monoid Line where
161> mempty = SpaceSep mempty
162> x@(SpaceSep xs) `mappend` y@(SpaceSep ys)
163> | Seq.null xs = y
164> | Seq.null ys = x
165> | otherwise = SpaceSep (xs <> ys)
166> (SpaceSep xs) `mappend` y
167> | Seq.null xs = y
168> | otherwise = SpaceSep (xs |> y)
169> x `mappend` (SpaceSep ys)
170> | Seq.null ys = x
171> | otherwise = SpaceSep (x <| ys)
172> x `mappend` y = SpaceSep $ Seq.fromList [x, y]
173
174The Monoid instances for `Block` and `Line` are somewhat unwieldy since we want to guarantee minimum overhead by reducing expressions such as `SpaceSep (fromList [x])` to `x` whenever possible.
175
176The same effect would have been possible by introducing the monoid structure *one level higher* -- we could have introduced constructors such as `Line :: Seq Word -> Block`.
177This was deemed undesirable since we would not have been able to implement instances such as `Monoid Line` which allow the use of more generic functions during parsing.
178
179> text :: Text -> Either Block Line
180> -- ^ Smart constructor for 'Line'/'Block' which maps word and line boundaries (as determined by 'isSpace' and '(== '\n')' respectively) to the structure of 'Block' and 'Line'.
181> --
182> -- Since we are unwilling to duplicate the list of chars from 'isSpace' we cannot reasonably determine a width for the various whitespace 'Char's.
183> -- Thus they are all weighted equally as having width 1 `em`.
184> text t = case splitLines t of
185> [] -> Right mempty
186> [Line x] -> Right x
187> xs -> Left $ mconcat xs
188> where
189> splitLines :: Text -> [Block]
190> splitLines t = map toBlock
191> . groupBy ((==) `on` TL.null)
192> $ TL.split (== '\n') t
193> splitWords :: Text -> [Line]
194> splitWords t = map toLine
195> . groupBy ((==) `on` TL.null)
196> $ TL.split isSpace t
197> toBlock [] = mempty
198> toBlock xs@(x:_)
199> | TL.null x = VSpace $ genericLength xs - 1
200> | otherwise = mconcat . map (Line . mconcat . splitWords) $ xs
201> toLine [] = mempty
202> toLine xs@(x:_)
203> | TL.null x = HSpace $ genericLength xs - 1
204> | otherwise = mconcat . map Word $ xs
205> list :: b -> (a -> [a] -> b) -> [a] -> b
206> list c _ [] = c
207> list _ f (x:xs) = f x xs
208
209Implementations using `TL.lines` and `TL.words` were tested.
210We chose to use `TL.split`-based solutions instead because the more specific splitting functions provided by [text](https://hackage.haskell.org/package/text) drop information concerning the exact amount of whitespace.
211
212> cotext :: Block -> Text
213> -- ^ inverse of
214> -- @
215> -- either id Line . `text`
216> -- @
217> cotext (VSpace n) = TL.pack . genericReplicate n $ '\n'
218> cotext (NewlSep xs) = TL.intercalate "\n" . map cotext . toList $ xs
219> cotext (Line x) = cotext' x
220> where
221> cotext' (Word x) = x
222> cotext' (HSpace n) = TL.pack . genericReplicate n $ ' '
223> cotext' (SpaceSep xs) = TL.intercalate " " . map cotext' . toList $ xs
224
225We provide cotext for testing `text` and to enable determining semantic equality of `Printout`s at a later date
226
227> prop_text :: Text -> Bool
228> -- ^ prop> (`cotext` . either id Line . `text` $ x) == x
229> --
230> -- Where 'x' is restricted to those `TL.Text` which do not contain whitespace besides ' ' and '\n'.
231> prop_text x = (cotext . either id Line . text $ x') == x'
232> where
233> x' = TL.map normSpace x
234> normSpace c
235> | isSpace c
236> , c `elem` keep = c
237> | isSpace c = ' ' -- We have to do this because all whitespace gets interpreted as width 1
238> | otherwise = c
239> keep = [' ', '\n']
240>
241> -- | We don't test 'Raw' 'Chunk's
242> instance Arbitrary Chunk where
243> shrink = genericShrink
244> arbitrary = Cooked <$> arbitrary
245>
246> instance Arbitrary Block where
247> shrink = genericShrink
248> arbitrary = oneof [ Line <$> arbitrary
249> , VSpace . getNonNegative <$> arbitrary
250> , NewlSep <$> scale' arbitrary
251> ]
252>
253> instance Arbitrary Line where
254> shrink = genericShrink
255> arbitrary = oneof [ Word . TL.filter (not . isSpace) <$> arbitrary -- 'isSpace '\n' == True'
256> , HSpace . getNonNegative <$> arbitrary
257> , SpaceSep <$> scale' arbitrary
258> ]
259>
260> scale' = scale (round . sqrt . fromInteger . toInteger)
261
262Failing to properly scale the tested structures was shown to use more than 8GiB of RAM during testing