From 5b063193f389ef472366e4355a683f1843f29733 Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Fri, 27 May 2016 19:40:18 +0200 Subject: structure --- provider/posts/beuteltier/1.lhs | 327 ++++++++++++++++++++++++++++++++++++++++ provider/posts/beuteltier/2.lhs | 179 ++++++++++++++++++++++ provider/posts/beuteltier/3.md | 19 +++ provider/posts/beuteltier/4.lhs | 220 +++++++++++++++++++++++++++ 4 files changed, 745 insertions(+) create mode 100644 provider/posts/beuteltier/1.lhs create mode 100644 provider/posts/beuteltier/2.lhs create mode 100644 provider/posts/beuteltier/3.md create mode 100644 provider/posts/beuteltier/4.lhs (limited to 'provider/posts/beuteltier') diff --git a/provider/posts/beuteltier/1.lhs b/provider/posts/beuteltier/1.lhs new file mode 100644 index 0000000..7789f40 --- /dev/null +++ b/provider/posts/beuteltier/1.lhs @@ -0,0 +1,327 @@ +--- +title: On the Design of Overly Complicated Feedreaders +published: 2015-08-04 +tags: Beuteltier +--- + +I like feedreaders. +Thus, of course, I had to implement my own, because, as always, all existing software does +not fullfill my exceedingly unrealistic expectations with respect to customizability and +extendability. + +This post marks the start of a series describing and documenting the design of the current +iteration of `Beuteltier` (`Beutel` kind of sounds like [Beuter](https://newsbeuter.org) +and is german for bag, which is what we call our backstore since it is held at such an +universal and unstructured level that the analogy is fitting. `Tier` is german for animal +and taken to mean "Thing that does stuff". In conjunction `Beuteltier` means +[Marsupial](https://en.wikipedia.org/wiki/Marsupial)). + +It should be noted that the library described here is not finished or ready for use in any +sense of the word (at the time of writing a "trivial" implementation of a `Beutel` shipped +with the library supports only `run`, `search`, and `delete`). Searching a way to +procrastinate implementing the more arduous `insert` (it requires nubbing—deduplication in +the backstore) I decided to, instead, start this series of posts and put the thought that +went into the library so far in a form that I can read again for later reference. + +We begin, as is to be expected for a haskell project, with type definitions and, thus, +design philosophy. + +This post in particular reproduces the file `beuteltier/Beuteltier/Types.hs` from the +git repo with annotiations to provide some motivation. + +The `Beuteltier` library itself only provides primitives for (and a default implementation +of) access to what we call a backstore. A backstore is, to us, an instance of the +typeclass `Beutel` which contains the most primitive of primitives for storing, searching +for and deleting representations of the objects we care about from the store. + +It is recommended that the reader not try to follow the rest of this post linearly but start +at the end with the definition of the `Beutel` class and work their way backwards. + +> {-# LANGUAGE FlexibleInstances, StandaloneDeriving, KindSignatures, MultiParamTypeClasses, TypeFamilies #-} +> +> module Beuteltier.Types +> ( -- * Types +> Object +> , ObjectGen(..) +> , SubObject(..) +> , MetaData(..) +> , Thunk(..) +> , ThunkState(..) +> , ThunkResult(..) +> , Tag +> , Flag(..) +> , SubObjectName +> , ThunkName +> , SearchQuery +> , Predicate +> , Beutel(..) +> ) where + +`Flag` ends up being a [sum type](https://en.wikipedia.org/wiki/Sum_type) holding values +such as `Seen`, `Old`, or `Hidden`. +We define it externally. + +> import Beuteltier.Types.Flags + +The `Identity` functor serves as basis for many a Monadtransformer-stack. + +> import Data.Functor.Identity +> import Data.Functor.Classes () + +Binary contents are encoded as `ByteStrings` + +> import qualified Data.ByteString.Lazy as Lazy (ByteString) +> import qualified Data.ByteString.Lazy as LBS + +Unicode text as `Text` + +> import Data.Text (Text) + +Long unicode text as lazy `Text` + +> import qualified Data.Text.Lazy as Lazy (Text) +> import qualified Data.Text.Lazy as LT +> +> import Data.Set (Set) +> +> import Data.Map (Map) +> +> import Data.Time (UTCTime) +> +> import Data.Function (on) +> import Data.Ord (comparing) +> import Control.Applicative + +`Data.Default` provides some convenience when constructing extensive record structures. + +> import Data.Default + +The `boolexpr` package provides us with a structure for representing boolean expressions +supporting functor operations and evaluation. + +> import Data.BoolExpr + +Previous iterations of Beuteltier acted on Objects that were kept completely in RAM during +all operations. +This proved to be unsustainable, not only because nubbing (deduplication in the store of +all objects) tended to exceed all RAM constraints (>4GiB for a few hundred objects), but +also because cheaper operations on objects, like presentation to the user, got painfully +slow once large `SubObject`s (like videos) were introduced into the store. + +The straight forward solution was to enrich the `Object` structure with provisions for +explicit lazyness and partial construction. + +> -- | We deal in, at runtime, partially retrieved Objects +> data ObjectGen (f :: * -> *) = ObjectGen +> { _oMeta :: f MetaData +> -- ^ An undetermined set of Metainformation +> , _oContent :: f (Map SubObjectName (f SubObject)) +> -- ^ A list of undetermined length of undetermined +> -- 'SubObject's with guaranteed unique 'SubObjectName's +> , _oThunks :: f [f Thunk] +> -- ^ A list of undetermined length of undetermined Thunks. +> -- There is such a thing as thunk colissions (i.e.: two +> -- thunks promise or even create 'SubObject's with the +> -- same name). +> -- Precedence in such a case is to be as suggested by +> -- the list structure (later thunks override earlier ones). +> } +> +> instance Monad f => Default (ObjectGen f) where +> def = ObjectGen { _oContent = return def +> , _oThunks = return def +> , _oMeta = return def +> } + +It is straight forward to collapse the more advanced representation of `Object`s back to +the old behaviour by parametrising over the Identity functor, which is simply a newtype +wrapper over the contained structure. + +> -- | An entirely retrieved Object +> type Object = ObjectGen Identity +> +> -- -- | The default 'Object' is empty except for metadata +> -- instance Default Object where +> -- def = ObjectGen { _oContent = return def +> -- , _oThunks = return def +> -- , _oMeta = return def +> -- } +> +> -- | Equality simply gets deferred to all subcomponents +> deriving instance Eq Object +> +> -- | 'Object's compare as their 'MetaData' +> instance Ord Object where +> compare = comparing _oMeta + +We would like to associate some set of meta information with all objects. +Therefore, we do. + +> -- | Metadata associated with an Object +> data MetaData = MetaData +> { _mRetrieved :: UTCTime -- ^ Time of creation +> , _mTags :: Set Tag -- ^ Tags such as the name of the author, +> -- the title of the work represented in +> -- the 'Object', …. +> -- We use something like @show . _mTags@ +> -- to identify an 'Object' to the user +> , _mFlags :: Set Flag -- ^ Flags such as \"Read\" or \"Spam\" +> } deriving (Show, Ord) +> -- | Tags are unicode text +> type Tag = Text +> +> -- | 'MetaData' equates as the contained tags +> instance Eq MetaData where +> (==) = (==) `on` _mTags +> +> -- | The default MetaData has no tags, no flags, and an undefined timestamp +> instance Default MetaData where +> def = MetaData { _mFlags = def +> , _mTags = def +> , _mRetrieved = undefined -- There really is no such thing as a default time +> } + +Objects are no fun if they don´t contain anything of interest in the end. + +Below we see a remnant of an older model of associating names to `SubObject`s. We switched +to using a `Map` for reasons of deduplication. Inserting into a `Map` carries some +guarantees that keys end up being unique. + +Note below: creation of a `SubObject` is an update. It is thus expected, that `SubObject`s +created at the same time as the `Object` they are associated to encode an update +time that matches the `Object`s creation time. + +> -- | Contents of an object +> data SubObject = SubObject +> -- { _sId :: SubObjectName +> -- ^ We associate a name to every chunk of content to determine +> -- how to present an object to the user +> { _sContent :: Lazy.ByteString +> , _sUpdates :: [UTCTime] +> -- ^ Times of witnessed updates to this 'SubObject' +> } deriving (Show) +> +> -- | No content, no witnessed updates +> instance Default SubObject where +> def = SubObject { _sContent = def +> , _sUpdates = def +> } +> +> -- | Extensionality for 'SubObject's: +> -- +> -- > (==) = (==) `on` _sContent +> instance Eq SubObject where +> (==) = (==) `on` _sContent + +The distinguishing feature of Beuteltier is it´s support for `Thunk`s. They are, as the +name suggests, loosly based on the concept of lazy evaluation. They are, however, less +transparent and thus more explicit than implementations as they are used in, for example +haskell. + +As far as Beuteltier is concerned `Thunk`s are executables that are expected to produce +files in the directory they are executed in in a pure manner. That is to say they do not +access external resources, where possible. A `Thunk` that downloads a video from the +internet will, of course, access the internet and can thus fail. We expect it, however, to +not to try and access the users home directory to look for e.g. credentials for +authentication it intends to use to its primary job. + +When a `Thunk`s executable gets executed the files it creates (excluding itself) get +translated to `SubObject`s with the filenames (directories stripped of course) as their +`SubObjectName`s and the file contents as their… well, their contents. It is understood, +that not all possible `SubObjectName`s can be created thus (we restrict ourselves to valid +filenames on whatever system we happen to be on). We do not consider this to be a great +loss. + +The advanced equality checks mentioned below are, in fact, implemented and will be explained +in more detail in a later post concerned with the file `beuteltier/Beuteltier/Types/Util.hs`. + +> -- | Thunks are at runtime not yet known parts of an object +> data Thunk = Thunk +> { _tId :: ThunkName -- ^ For debugging +> , _tScript :: Lazy.ByteString +> -- ^ A Thunk is, in the end, a shell script that is expected to generate +> -- 'SubObject's +> , _tPromises :: Maybe [SubObjectName] +> -- ^ Maybe we already know what our script is going to generate? +> -- This would enable us to do some more advanced equality checks under +> -- the assumption that scripts are pure +> , _tState :: ThunkState +> } +> deriving (Show) +> +> -- | Empty id, empty script, promises nothing, and with default state +> instance Default Thunk where +> def = Thunk { _tId = def +> , _tScript = def +> , _tPromises = def +> , _tState = def +> } +> +> -- | Equality on 'Thunk's ignores '_tState' and '_tId' +> instance Eq Thunk where +> a == b = and $ [ (==) `on` _tScript +> , (==) `on` _tPromises +> ] <*> pure a <*> pure b +> +> -- | The states in which a 'Thunk' can be encountered. +> data ThunkState = NotExecuted +> | Executed [SubObjectName] ThunkResult +> deriving (Show) +> +> -- | Return the default 'ThunkResult' upon forcing +> instance Default ThunkState where +> def = NotExecuted +> +> -- | Thunks generate some data during execution +> data ThunkResult = ThunkResult +> { _rOutErr, _rOutStd :: Lazy.Text +> , _rExit :: Integer -- ^ Numerical exit code (0 usually means success) +> } +> deriving (Show) +> +> -- | Empty output, and with undefined exit code (no execution took place and we can´t +> -- encode this in a numerical exit code) +> instance Default ThunkResult where +> def = ThunkResult { _rOutErr = LT.empty, _rOutStd = LT.empty +> , _rExit = undefined +> } +> +> -- | We expect identifiers for 'SubObject's to be short, thus 'String' +> type SubObjectName = String +> -- | We expect identifiers for 'Thunk's to be short, thus 'String' +> type ThunkName = String +> +> -- | @LBS.empty@ +> instance Default (Lazy.ByteString) where +> def = LBS.empty + +What good is a library for managing a backstore if it does not support search operations? +We consider the answer to be "very little" and, thus, support searches. + +> type SearchQuery f = BoolExpr (Predicate f) +> -- data Predicate f = Prim (ObjectGen f -> f Bool) +> -- | Meta (MetaData -> Bool) +> type Predicate f = ObjectGen f -> f Bool + +The heart of the `Beuteltier` library is the typeclass reproduced below. We expect +implementations of backstores to be `Monad`s so that we may be able to construct +complicated actions that act on the backstore in question. +Once we have constructed such an action using the three primitives `search`, `insert`, and +`delete` we additionally require a way to execute that action from within the `IO` +`Monad`. + +Additional primitives, such as those for "forcing" and resetting thunks, are provided in +additional libraries and, thus, later posts. + +> -- | We have the user supply the functions we use to interact with whatever backstore +> -- she uses +> class Monad functor => Beutel (functor :: * -> *) where +> data Config :: * +> run :: Config -> functor a -> IO a +> -- ^ Actually run whatever action we constructed against the backstore +> search :: SearchQuery functor -> functor [ObjectGen functor] +> -- ^ Perform a search +> insert :: Object -> functor () +> -- ^ Insert an object +> delete :: SearchQuery functor -> functor () +> -- ^ Delete the results of a search diff --git a/provider/posts/beuteltier/2.lhs b/provider/posts/beuteltier/2.lhs new file mode 100644 index 0000000..8da4711 --- /dev/null +++ b/provider/posts/beuteltier/2.lhs @@ -0,0 +1,179 @@ +--- +title: "\"Type level\" utilities for an overly complicated feedreader" +published: 2015-08-05 +tags: Beuteltier +--- + +By popular (n=1) demand we will, in this post, be taking a look at +`beuteltier/Beuteltier/Types/Util.hs` the, creatively named, module providing some "type +level" utilities. + +What I mean when I say "type level" is: additional instances (placed here when they +contain major design decisions and are not "Ord" or "Eq"), utilities not connected to +beuteltier itself (like the different flavours of `alter` below) + +In contrast to the first, this post is straightforward enough to be read linearly. + +> {-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-} +> +> module Beuteltier.Types.Util +> ( -- * Constructing structures +> construct +> , construct' +> , alter +> , alter' +> -- * Dealing with 'ObjectGen's (here be dragons) +> , generateObject +> , liftGen +> -- * Equivalence on 'Object's (for nubbing) +> , Equivalent(..) +> -- * Operations on 'SearchQuery's +> , runQuery +> -- , runExpr +> ) where +> +> import Beuteltier.Types +> import Beuteltier.Types.Lenses + +We make use of lenses (as provided by [lens](http://hackage.haskell.org/package/lens)) +extensively. +We won´t dedicate a post to `beuteltier/Beuteltier/Types/Lenses.hs` because it consists +mostly of the canonical invocations of +[makeLenses](http://hackage.haskell.org/package/lens-4.12.3/docs/Control-Lens-TH.html#v:makeLenses). + +> import Data.Default +> +> import Prelude hiding (sequence) +> import Data.Traversable (sequence) +> +> import Control.Lens +> +> import Control.Monad.State.Lazy hiding (sequence) -- Why is this exported? +> +> import Data.Map (Map) +> import qualified Data.Map as Map +> +> import Data.Set (Set) +> import qualified Data.Set as Set +> +> import Data.Hashable (Hashable(..), hashUsing) +> +> import Data.Monoid ((<>)) +> +> import Data.Function (on) +> import Data.Maybe (mapMaybe) +> +> import Data.BoolExpr + +Quite often we find ourselves in the position that we want to alter some small parts of a +complicated structure. We would therefore like to write the following: + +~~~ {.haskell} +updateFoo :: Foo -> Monad Foo +updateFoo x = alter x $ do + bar <~ (constructNewBar :: Monad Bar) + buz .= (makeConstantBuz :: Buz) +~~~ + +The definitions below allow us not only to do so, but also provide some convenience +functions for constructing entirely new values and performing both operations in a pure +context. + +> alter :: Monad m => s -> StateT s m a -> m s +> -- ^ Alter a complex structure monodically +> alter = flip execStateT +> +> alter' :: s -> State s a -> s +> -- ^ Specialization of 'alter' to 'Identity' +> alter' s = runIdentity . alter s +> +> construct :: (Monad m, Default s) => StateT s m a -> m s +> -- ^ Compute a complex structure monadically +> construct = alter def +> +> construct' :: Default s => State s a -> s +> -- ^ Specialization of 'construct' to 'Identity' +> construct' = runIdentity . construct + +Sometimes we just really want to translate an `ObjectGen` to an `Object`. + +> generateObject :: Monad f => ObjectGen f -> f Object +> -- ^ Run an object generator. +> -- Use iff /all/ components of an object are needed /in RAM now/. +> generateObject gen = construct $ do +> content <- lift $ gen ^. oContent >>= sequence +> thunks <- lift $ gen ^. oThunks >>= sequence +> meta <- lift $ gen ^. oMeta +> oContent .= return (fmap return content) +> oThunks .= return (fmap return thunks) +> oMeta .= return meta +> +> liftGen :: Monad f => Object -> ObjectGen f +> -- ^ Lift an 'Object' to be an 'ObjectGen' in any 'Monad' by the power of 'return' +> liftGen obj = construct' $ do +> oContent .= return (Map.map return $ obj ^. oContent') +> oThunks .= return (map return $ obj ^. oThunks') +> oMeta .= return (obj ^. oMeta') + +We expect implementations of `insert` to perform what we call nubbing. That is removal of +`Object`s that are, in some sense, `Equivalent` to the new one we´re currently +inserting. Thus we provide a definition of what we mean, when we say `Equivalent`. + +> class Equivalent a where +> (~~) :: a -> a -> Bool +> +> -- | Two 'Object's are equivalent iff their content is identical as follows: +> -- the set of 'SubObjectName's both promised and actually occurring is identical +> -- and all 'SubObject's that actually occurr and share a 'SubObjectName' are +> -- identical (as per '(==)') +> -- +> -- Additionally we expect their 'Metadata' to be identical (as per '(==)') +> instance Equivalent Object where +> a ~~ b = (contentCompare `on` content) a b && ((==) `on` (^. oMeta')) a b +> where +> contentCompare :: (Ord k, Eq v) => Map k (Maybe v) -> Map k (Maybe v) -> Bool +> contentCompare a b = Map.foldl (&&) True $ Map.mergeWithKey combine setFalse setFalse a b +> combine _ a b = Just $ cmpMaybes a b +> setFalse = Map.map $ const False +> +> cmpMaybes Nothing _ = True +> cmpMaybes _ Nothing = True +> cmpMaybes (Just a) (Just b) = a == b + +To speed up nubbing we also provide a quick way to "cache results". To make caching +meaningful we of course expect the following to hold: + +~~~ +a ~~ b ⇒ (hash a) == (hash b) +~~~ + +Note that we do not expect the converse to hold. We will thus require a second pass over +all objects sharing a hash to determine true equivalency. + +> -- | Two 'Object's´ hashes are a first indication of whether they are 'Equivalent' +> instance Hashable Object where +> hashWithSalt = hashUsing $ \a -> (a ^. oMeta', Map.keys $ content a) +> +> instance Hashable MetaData where +> hashWithSalt = hashUsing $ Set.toList . (^. mTags) +> +> content :: Object -> Map SubObjectName (Maybe SubObject) +> content obj = promised obj <> actual obj +> actual :: Object -> Map SubObjectName (Maybe SubObject) +> actual = fmap Just . (^. oContent') +> promised :: Object -> Map SubObjectName (Maybe SubObject) +> promised = Map.fromList . map (\n -> (n, Nothing)) . concat . promises +> promises :: Object -> [[SubObjectName]] +> promises = mapMaybe (^. tPromises) . (^. oThunks') + +Evaluating a `SearchQuery` against an `ObjectGen` is, due to the structure of elementary +`SearchQuery`s quite straightforward. + +> runQuery :: Monad f => SearchQuery f -> ObjectGen f -> f Bool +> -- ^ Run a 'SearchQuery' against an 'ObjectGen' +> runQuery query obj = liftM reduceBoolExpr $ sequence $ fmap ($ obj) query +> +> -- runExpr :: Monad f => ObjectGen f -> Predicate f -> f Bool +> -- -- ^ Run a 'Predicate' (»an atomic 'SearchQuery'«) against an 'ObjectGen' +> -- runExpr obj (Prim f) = f obj +> -- runExpr obj (Meta f) = liftM f (obj ^. oMeta) diff --git a/provider/posts/beuteltier/3.md b/provider/posts/beuteltier/3.md new file mode 100644 index 0000000..9b699c1 --- /dev/null +++ b/provider/posts/beuteltier/3.md @@ -0,0 +1,19 @@ +--- +title: An Update to the Type Level Utilities of an Overly Complicated Feedreader +published: 2015-08-12 +tags: Beuteltier +--- + +I commited a change to `beuteltier/Beuteltier/Types/Util.hs` ("[…] Hashable update"). I +replaced the `Hashable` instances for `Object` and `MetaData` with a single, better +optimized function: + +~~~ {.haskell} +objHash :: Applicative f => ObjectGen f -> f Int +-- ^ Two 'ObjectGen's hashes are a first indication of whether they are 'Equivalent' +objHash o = fmap hash $ (,) <$> (Set.toList . (^. mTags) <$> o ^. oMeta) <*> (Map.keys <$> o ^. oContent) +~~~ + +The new implementation allows computation of hashes without calling `generateObject` (that +function is evil — it makes sure the entire `Object` is "in RAM" (it isn´t actually, of +course (because haskell is lazy)—but I have no guarantee of that)). diff --git a/provider/posts/beuteltier/4.lhs b/provider/posts/beuteltier/4.lhs new file mode 100644 index 0000000..478cbac --- /dev/null +++ b/provider/posts/beuteltier/4.lhs @@ -0,0 +1,220 @@ +--- +title: Business Logic for an Overly Complicated Feedreader +published: 2015-08-12 +tags: Beuteltier +--- + +It turns out I don´t have to write much in the way of comments—the source file is already +quite well documented. + +> {-# LANGUAGE KindSignatures #-} +> +> module Beuteltier +> ( -- * Forcing (Executing) 'Thunk's +> forceAllThunks +> , WithObjects +> , forceThunk +> , resetAllThunks +> , resetThunk +> -- * Higher level interactions with a 'Beutel' +> , replace +> , eqTo +> , update +> , forceAllThunks' +> , resetAllThunks' +> , module Beuteltier.Util +> , module Beuteltier.Types.Common +> ) where +> +> import Beuteltier.Types.Common +> import Beuteltier.Util +> +> import Data.Map (Map) +> import qualified Data.Map as Map +> +> import Control.Lens +> +> import qualified Data.ByteString.Lazy as LBS +> import qualified Data.ByteString as BS +> +> import qualified Data.Text as T +> import qualified Data.Text.IO as T +> import qualified Data.Text.Lazy as TL +> +> import System.FilePath +> import System.Directory +> import System.Posix.Files +> import System.Posix.Temp +> +> import System.Environment +> import System.IO +> import GHC.IO.Handle +> import System.Process as P +> import System.Exit +> +> import Control.Concurrent +> import Control.Applicative +> import Control.Monad.Morph +> import Control.Monad.Trans.State +> import Control.Monad.Writer +> import Control.Monad.Trans.Resource +> import Control.Monad (liftM) +> +> import Data.Time.Clock +> +> import Data.BoolExpr + +The distinguishing feature of our Overly Complicated Feedreader™ is it´s support for +`Thunk`s. It is thus reasonable to expect, that we have some functions to actually +interact with them. Most striking in that hypothetical set of functions would be one that +executes all thunks associated with a single `ObjectGen` and return a new one with the +content generated by `Thunk`s filled in. + +Enter `forceAllThunks`. + +> forceAllThunks :: (MonadIO f, MonadResource f) +> => (Thunk -> Bool) -- ^ Select which 'Thunk's to force +> -> ObjectGen f -> f (ObjectGen f) +> -- ^ Force all thunks in place and update '_oContent' +> -- +> -- The reason we require a 'MonadResource' instance is that we would like to store our expensive to hold in RAM 'SubObject' contents in temporary files. +> forceAllThunks pred = flip alter $ do +> pureThunks <- use oThunks >>= lift >>= mapM lift +> (newThunks, objectResults) <- mapAndUnzipM forceThunk' pureThunks +> assign oThunks $ return $ map return newThunks +> oContent %= liftM (<> mconcat objectResults) +> where +> forceThunk' thunk +> | pred thunk = forceThunk thunk +> | otherwise = return (thunk, Map.empty) + +`forceAllThunks'` (`resetAllThunks'` below, too) is tainted by the evil of +`generateObject` but included for convenience. + +> forceAllThunks' :: (MonadIO f, MonadResource f) => (Thunk -> Bool) -> StateT Object f () +> -- ^ Version of 'forceAllThunks' suitable for use with 'update' +> forceAllThunks' pred = get >>= lift . forceAllThunks pred . liftGen >>= lift . generateObject >>= put +> +> -- | Internal helper to track computations creating 'SubObject's +> type WithObjects (n :: * -> *) (m :: * -> *) = WriterT (Map SubObjectName (n SubObject)) m +> +> forceThunk_ :: (MonadIO m, MonadResource m) => Thunk -> WithObjects (ResourceT IO) m Thunk +> -- ^ Make sure the 'ThunkState' contained within a 'Thunk' is 'Executed' +> forceThunk_ = flip alter $ do +> -- (result, subObjects) <- liftIO $ runWriterT $ parseThunk thunk +> -- writer ((), subObjects) +> (result, subObjects) <- listen . hoist (hoist liftResourceT) . lift . parseThunk =<< get +> tState .= Executed (Map.keys subObjects) result +> +> forceThunk :: (MonadResource m, MonadResource n) => Thunk -> m (Thunk, Map SubObjectName (n SubObject)) +> -- ^ Force a 'Thunk' and return it in 'Executed' state together with the 'SubObject's it created during execution +> forceThunk thunk = liftM (_2 %~ fmap liftResourceT) $ (runWriterT . forceThunk_) thunk + +Quite often we want to undue the harm done by `forceAllThunks` (to save space, usually). + +> resetAllThunks :: Monad f +> => (Thunk -> Bool) -- ^ Select which 'Thunk's to reset +> -> ObjectGen f -> f (ObjectGen f) +> -- ^ Undoes 'forceAllThunks': +> -- +> -- prop> forceAllThunks (const True) obj >>= resetAllThunks (const True) >>= forceAllThunks (const True) = forceAllThunks (const True) obj +> -- +> -- This inevitably drops information ('ThunkResult's for one). +> -- +> -- In the case where 'forceAllThunks' does not drop information (i.e.: no 'SubObjectName' collisions ocurr) the following, stronger property holds: +> -- +> -- prop> forceAllThunks (const True) obj >>= resetAllThunks (const True) = return obj +> resetAllThunks pred = flip alter $ do +> thunks <- liftM (map lift) (use oThunks >>= lift) >>= sequence +> let +> (subObjectNames, newThunks) = over _1 concat $ unzip $ map resetThunk' thunks +> oThunks .= return (map return newThunks) +> oContent %= (>>= return . Map.filterWithKey (\k _ -> k `notElem` subObjectNames)) +> where +> resetThunk' thunk +> | pred thunk = resetThunk thunk +> | otherwise = ([], thunk) +> +> resetAllThunks' :: (MonadIO f, MonadResource f) => (Thunk -> Bool) -> StateT Object f () +> -- ^ Version of 'resetAllThunks' suitable for use with 'update' +> resetAllThunks' pred = get >>= lift . resetAllThunks pred . liftGen >>= lift . generateObject >>= put +> +> resetThunk :: Thunk -> ([SubObjectName], Thunk) +> -- ^ Reset a thunk and return the 'SubObjectName's of the 'SubObject's it once created. +> -- This forgets information. +> resetThunk thunk = case thunk ^. tState of +> NotExecuted -> ([], thunk) +> Executed created _ -> (created, set tState NotExecuted thunk) +> +> parseThunk :: Thunk -> WithObjects (ResourceT IO) (ResourceT IO) ThunkResult +> -- ^ Generate a runnable action from a 'Thunk' +> -- +> -- Regarding the "inner" and "outer" 'Monad' here being 'IO': We have not, at time of forcing, a neccessary connection to our backstore and thus cannot expect the monads to be anything else. +> parseThunk thunk = do +> tmpDirName <- liftIO getTemporaryDirectory +> progName <- liftIO getProgName +> let +> tmpDirName' = tmpDirName progName +> (_, tmpDir) <- allocate (mkdtemp tmpDirName') removeDirectoryRecursive +> let exec = tmpDir "exec" +> out = tmpDir "out" +> result <- liftIO $ do +> createDirectory out +> LBS.writeFile exec script +> setFileMode exec $ foldl unionFileModes nullFileMode [ownerReadMode, ownerExecuteMode] +> (Just std_in, Just std_out, Just std_err, ph) <- createProcess $ (P.proc exec []) { cwd = Just out, std_in = CreatePipe, std_out = CreatePipe, std_err = CreatePipe } +> hClose std_in +> hSetBinaryMode std_out True +> hSetBinaryMode std_err True +> std_out `sendTo` stdout +> std_err `sendTo` stderr +> construct $ do +> rOutStd <~ TL.fromStrict <$> liftIO (T.hGetContents std_out) -- Yes, sadly we have to be strict here +> rOutErr <~ TL.fromStrict <$> liftIO (T.hGetContents std_err) +> rExit <~ toNum <$> liftIO (waitForProcess ph) +> outputFiles <- liftIO $ getDirectoryContents out +> let +> outputFiles' = filter fileFilter outputFiles +> fileFilter = and . (<*>) [(/=) ".", (/=) ".."] . pure . takeFileName +> mapM_ tell =<< mapM (liftResourceT . toSubObject) outputFiles' +> return result +> where +> script = thunk ^. tScript +> toSubObject :: FilePath -> ResIO (Map SubObjectName (ResIO SubObject)) +> -- ^ Using 'ResourceT' provides us with the guarantee, that the 'FilePath' we´re referring to should still exist when we actually try to get the 'SubObject'´s contents +> toSubObject name = fmap (Map.singleton name' . return) $ construct $ do +> sContent <~ liftIO (LBS.readFile name) +> sUpdates <~ pure <$> liftIO getCurrentTime +> where +> name' = takeFileName name +> sendTo input output = do +> input' <- hDuplicate input +> forkIO $ do +> hSetBuffering input' NoBuffering +> LBS.hGetContents input' >>= LBS.hPutStr output +> return () +> toNum :: Num a => ExitCode -> a +> toNum ExitSuccess = 0 +> toNum (ExitFailure i) = fromInteger $ toInteger i + +We provide `update`, a convenience function for high-level interactions (though costly on +large sets of equivalent objects (which should not exist due to nubbing)). + +> eqTo :: Monad f => Object -> SearchQuery f +> -- ^ @eqTo o@ constructs a 'SearchQuery' that matches all 'Object's 'Equivalent' to @o@ +> -- +> -- This is costly because it calls 'generateObject' on the contents of the entire 'Beutel'. +> eqTo o = BConst ((>>= return . (~~) o) . generateObject) +> +> update :: Beutel f => SearchQuery f -> StateT Object f a -> f () +> -- ^ @update search action@ replaces /all/ 'Object's matching @search@ within the 'Beutel' by versions of themselves modified by applying @action@. +> -- +> -- Does not handle '_sUpdates'. +> -- +> -- This is costly because it calls 'generateObject' on the contents of the entire 'Beutel' /and/ all results of @search@ (in order to use 'Eq' on 'Object's to delete the results of the initial 'search'). +> update query alteration = do +> matches <- search query +> matches' <- mapM generateObject matches +> delete $ BConst ((>>= return . (`elem` matches')) . generateObject) +> mapM_ (\o -> alter o alteration >>= insert) matches' +> return () -- cgit v1.2.3