gelisam · October 25, 2022 13:52 · Oct 25, 2022 · Oct 25, 2022 · Oct 25, 2022
diff --git a/PolyConduit.hs b/PolyConduit.hs
@@ -179,7 +179,8 @@ myPolyCapL liftM src doubleConduit
 --
 -- In fact, machines are represented in pretty much the same way as conduits, as a sequence of instructions, they aren't stored as a tree or a graph of instructions as one might expect. Pretty much the only difference is that a machine's `awaits` instruction takes an extra argument specifying which inputs it is awaiting from.
 
--- Since I suspect that most conduit stacks will either have IO or Identity as
+
+-- Anyway, since I suspect that most conduit stacks will either have IO or Identity as
 -- a base monad, here are two specializations which fill-in the 'liftM'
 -- parameter.
 

diff --git a/PolyConduit.hs b/PolyConduit.hs
@@ -173,6 +173,11 @@ myPolyCapL liftM src doubleConduit
         Nothing -> do
           pure ()
 
+-- The types line up, but how does this work? We're compiling down to a single layer, so how did we persuade conduit to magically create a ConduitT with more than one input?
+--
+-- The secret is, we didn't! Conduits are combined in the same way machines are: by lining up and then eliminating matching `yield` and `await` instructions. Thus, after we have attached an input and eliminated a ConduitT layer, all the `await` calls which were reading from that input have been replaced by a fragment of the code from that input, namely the code between two consecutive `yield`s. That's the magic of representing computations as a sequence of instructions, we can splice and rearrange those instructions!
+--
+-- In fact, machines are represented in pretty much the same way as conduits, as a sequence of instructions, they aren't stored as a tree or a graph of instructions as one might expect. Pretty much the only difference is that a machine's `awaits` instruction takes an extra argument specifying which inputs it is awaiting from.
 
 -- Since I suspect that most conduit stacks will either have IO or Identity as
 -- a base monad, here are two specializations which fill-in the 'liftM'

diff --git a/PolyConduit.hs b/PolyConduit.hs
@@ -0,0 +1,287 @@
+-- Follow up to [1], praising @viercc's better solution [2].
+--
+-- [1] https://gist.github.com/gelisam/a8bee217410b74f030c21f782de23d11
+-- [2] https://www.reddit.com/r/haskell/comments/yb9bi4/comment/itfh07z
+--
+-- The challenge is still to implement a function which takes in three
+-- Conduits, and uses the values from the first Conduit in order to decide
+-- which of the other two Conduits to sample from next. Something like this:
+--
+--     example bools ints strings = do
+--       maybeBool <- awaitMaybe bools
+--       case maybeBool of
+--         Nothing -> do
+--           liftIO $ putStrLn "it's over"
+--         Just True -> do
+--           int <- await ints
+--           yield (int + 4)
+--           example
+--         Just False -> do
+--           str <- await strings
+--           liftIO $ putStrLn str
+--           yield $ length str
+--           example
+--
+-- And I still want a solution which works for n Conduits, not just three. But
+-- this time, instead of switching to the notoriously-complex "machines"
+-- package, I'll stick to conduit, thanks to @viercc's great tip of using
+-- Conduit _transformers_.
+{-# LANGUAGE LambdaCase, RankNTypes, ScopedTypeVariables #-}
+module Main where
+
+import Test.DocTest
+
+import Control.Monad.IO.Class (MonadIO(liftIO))
+import Control.Monad.Trans.Class (lift)
+import Data.Conduit hiding (Source)
+import Data.Functor.Identity (Identity(runIdentity))
+import Data.Void (absurd)
+
+-- $setup
+-- >>> import qualified Data.Conduit.Combinators as Conduit
+-- >>> import qualified Data.Conduit.List as Conduit
+-- >>> :{
+-- let testConduit
+--       :: Show o
+--       => ConduitT () o IO ()
+--       -> IO ()
+--     testConduit source = do
+--       os <- connect source Conduit.consume
+--       print os
+-- :}
+
+
+-- In the previous post, I used a type-level list to keep track of the
+-- multiple inputs. This time, we'll represent a conduit with n inputs as a
+-- stack of n 'ConduitT' transformers.
+
+type Source  m o       = ConduitT () o m ()
+type Process m a o     = ConduitT a o m ()
+type Tee     m a b o   = ConduitT a o
+                           (ConduitT b Void m) ()
+type Tee3    m a b c o = ConduitT a o
+                           (ConduitT b Void
+                             (ConduitT c Void m)) ()
+
+-- Each 'ConduitT' layer has two type arguments; one for the elements you
+-- await from the single input you get when you use conduits the normal way,
+-- and one for the elements you send downstream. With n layers, we can thus
+-- specify n inputs, which is what we want, but we must also specify n
+-- outputs, which is n-1 too many. Using 'Void' for all but one of the outputs
+-- clarifies that only one of them is actually used.
+
+-- Now that we have unexpectedly managed to represent conduits which take more
+-- than one input, how can we attach those inputs? In the previous post, I
+-- defined a versatile 'polyCapL' function which could attach a 'Source' to a
+-- number of different machines:
+--
+--     -- polyCapL :: Source m a -> Process m a o     -> Source  m o
+--     -- polyCapL :: Source m a -> Tee     m a b o   -> Process m b o
+--     -- polyCapL :: Source m a -> Tee3    m a b c o -> Tee     m b c o
+--     polyCapL
+--       :: Source m a1
+--       -> PolyTee m (a1 ': as) o
+--       -> PolyTee m as o
+--
+-- I would like to construct a similar function here. As before, I want to
+-- attach a single source as the first input 'a1', while leaving the remaining
+-- inputs 'as' untouched. In this conduit stack representation, the first
+-- input is the input of the outermost 'ConduitT' layer, while the remaining
+-- inputs are specified by the rest of the layers, 'mm':
+--
+--     myPolyCapL
+--       :: Source m a1 ()
+--       -> ConduitT a1 o mm r
+--       -> mm r
+--
+-- In order for this type to specialize to these,
+--
+--     myPolyCapL :: Source m a -> Process m a o     -> Source  m o
+--     myPolyCapL :: Source m a -> Tee     m a b o   -> Process m b o
+--     myPolyCapL :: Source m a -> Tee3    m a b c o -> Tee     m b c o
+--
+-- I need to somehow specify that the monad at the base of the 'mm' stack must
+-- be 'm'. When 'm' is IO, this is represented using a 'MonadIO' constraint:
+--
+--     liftIO :: forall x. IO x -> mm x
+--
+-- There exists a @MonadBase m@ constraint which generalizes 'MonadIO':
+--
+--     liftBase :: forall x. m x -> mm x
+--
+-- But instead of adding an orphan @MonadBase m (ConduitT i o mm)@ instance,
+-- I'll just ask for an extra @forall x. m x -> mm x@ parameter:
+--
+--     myPolyCapL
+--       :: (forall x. m x -> mm x)
+--       => Source m a1 ()
+--       -> ConduitT a1 o mm r
+--       -> mm r
+
+-- Another way in which the type above isn't quite right is that the output
+-- type 'o' disappears. The fix is quite simple: instead of only concretizing
+-- the very outermost ConduitT layer and leaving the rest abstract, I
+-- concretize the _two_ outermost ConduitT layers:
+myPolyCapL
+  :: forall a b o m mm r. (Monad m, Monad mm)
+  => (forall x. m x -> mm x)
+  -> ConduitT () a m ()
+  -> ConduitT a o (ConduitT b Void mm) r
+  -> ConduitT b o mm r
+myPolyCapL liftM src doubleConduit
+  = connect src' doubleConduit''
+  where
+    -- The implementation looks very different from polyCapL's, but it's the
+    -- same idea. It just so happens that the conduit API is expressive enough
+    -- that we can achieve our goal via several small transformations, without
+    -- having to unravel the conduits into sequences of instructions.
+
+    src' :: ConduitT () a (ConduitT b o mm) ()
+    src'
+      = transPipe (lift . liftM) src
+
+    doubleConduit' :: ConduitT a o (ConduitT b o mm) r
+    doubleConduit'
+      = transPipe (mapOutput absurd) doubleConduit
+
+    -- The 'doubleConduit' transformations swap the 'Void' and 'o' output
+    -- types.
+    -- At this point one might wonder why I chose the convention of
+    -- using 'Void' for all but the _outermost_ layer. If I had chosen the
+    -- innermost layer instead, the 'o' would already be in the right
+    -- position, and I wouldn't need to perform any transformations on
+    -- 'doubleConduit'!
+    -- The reason is simply to provide a more ergonomic experience to the
+    -- user: by choosing the outermost layer, the user can emit by writing
+    --
+    --     emit o
+    --
+    -- Whereas if I had chosen the innermost layer, the user would have to
+    -- write this instead.
+    --
+    --     lift $ lift $ emit o
+    doubleConduit'' :: ConduitT a Void (ConduitT b o mm) r
+    doubleConduit''
+      = fuseUpstream doubleConduit' outputToInner
+
+    outputToInner :: ConduitT o Void (ConduitT b o mm) ()
+    outputToInner = do
+      await >>= \case
+        Just o -> do
+          lift $ yield o
+          outputToInner
+        Nothing -> do
+          pure ()
+
+
+-- Since I suspect that most conduit stacks will either have IO or Identity as
+-- a base monad, here are two specializations which fill-in the 'liftM'
+-- parameter.
+
+-- polyCapIO :: Source IO a -> Process IO a o     -> Source  IO o
+-- polyCapIO :: Source IO a -> Tee     IO a b o   -> Process IO b o
+-- polyCapIO :: Source IO a -> Tee3    IO a b c o -> Tee     IO b c o
+polyCapIO
+  :: forall a b o mm r. MonadIO mm
+  => Source IO a
+  -> ConduitT a o (ConduitT b Void mm) r
+  -> ConduitT b o mm r
+polyCapIO
+  = myPolyCapL liftIO
+
+polyCap
+  :: forall a b o mm r. Monad mm
+  => ConduitT () a Identity ()
+  -> ConduitT a o (ConduitT b Void mm) r
+  -> ConduitT b o mm r
+polyCap
+  = myPolyCapL (pure . runIdentity)
+
+
+-- In the previous post, I also implemented a 'polyCapR' function for
+-- converting a fully-saturated 'PolyTee' into a 'Source', so it can be used
+-- with existing machine combinators.
+--
+--     polyCapR
+--       :: PolyTee m '[] b
+--       -> Source m b
+--
+-- With the conduit layers representation, such a function is not needed.
+-- After attaching all but one of the inputs, the result is a single ConduitT
+-- layer, that is, a normal conduit which can already be used with existing
+-- conduit combinators. In particular, 'fuse' can be used to attach the last
+-- input, thus converting the conduit to a source.
+
+
+-- We can finally implement the challenge; twice, in order to exercise both
+-- the IO and Identity specializations.
+
+-- |
+-- >>> :{
+-- testConduit
+--   $ fuse (Conduit.yieldMany ["foo", "bar", "quux"])
+--   $ polyCapIO (Conduit.yieldMany [1..])
+--   $ polyCapIO (Conduit.yieldMany [True, False, False, True, False])
+--   $ exampleIO
+-- :}
+-- foo
+-- bar
+-- quux
+-- it's over
+-- [5,3,3,6,4]
+exampleIO
+  :: ConduitT Bool Int
+       (ConduitT Int Void
+         (ConduitT String Void IO))
+       ()
+exampleIO = do
+  maybeBool <- await
+  case maybeBool of
+    Nothing -> do
+      liftIO $ putStrLn "it's over"
+    Just True -> do
+      Just int <- lift await
+      yield (int + 4)
+      exampleIO
+    Just False -> do
+      Just str <- lift $ lift await
+      liftIO $ putStrLn str
+      yield $ length str
+      exampleIO
+
+-- |
+-- >>> :{
+-- testConduit
+--   $ fuse (Conduit.yieldMany ["foo", "bar", "quux"])
+--   $ polyCap (Conduit.yieldMany [1..])
+--   $ polyCap (Conduit.yieldMany [True, False, False, True, False])
+--   $ example
+-- :}
+-- [5,3,3,6,4]
+example
+  :: forall m. MonadFail m
+  => ConduitT Bool Int
+       (ConduitT Int Void
+         (ConduitT String Void m))
+       ()
+example = do
+  maybeBool <- await
+  case maybeBool of
+    Nothing -> do
+      pure ()
+    Just True -> do
+      Just int <- lift await
+      yield (int + 4)
+      example
+    Just False -> do
+      Just str <- lift $ lift await
+      yield $ length str
+      example
+
+main :: IO ()
+main = do
+  putStrLn "typechecks."
+
+test :: IO ()
+test = do
+  doctest ["src/Main.hs"]