about summary refs log tree commit diff
path: root/users/Profpatsch/netencode
diff options
context:
space:
mode:
Diffstat (limited to 'users/Profpatsch/netencode')
-rw-r--r--users/Profpatsch/netencode/Netencode.hs433
-rw-r--r--users/Profpatsch/netencode/Netencode/Parse.hs102
-rw-r--r--users/Profpatsch/netencode/README.md2
-rw-r--r--users/Profpatsch/netencode/default.nix46
-rw-r--r--users/Profpatsch/netencode/netencode.cabal74
-rw-r--r--users/Profpatsch/netencode/netencode.rs112
6 files changed, 740 insertions, 29 deletions
diff --git a/users/Profpatsch/netencode/Netencode.hs b/users/Profpatsch/netencode/Netencode.hs
new file mode 100644
index 0000000000..ca93ab2fef
--- /dev/null
+++ b/users/Profpatsch/netencode/Netencode.hs
@@ -0,0 +1,433 @@
+{-# LANGUAGE AllowAmbiguousTypes #-}
+{-# LANGUAGE QuasiQuotes #-}
+{-# LANGUAGE TemplateHaskell #-}
+
+module Netencode where
+
+import Control.Applicative (many)
+import Data.Attoparsec.ByteString qualified as Atto
+import Data.Attoparsec.ByteString.Char8 qualified as Atto.Char
+import Data.ByteString qualified as ByteString
+import Data.ByteString.Builder (Builder)
+import Data.ByteString.Builder qualified as Builder
+import Data.ByteString.Lazy qualified as ByteString.Lazy
+import Data.Fix (Fix (Fix))
+import Data.Fix qualified as Fix
+import Data.Functor.Classes (Eq1 (liftEq))
+import Data.Int (Int16, Int32, Int64, Int8)
+import Data.Map.NonEmpty (NEMap)
+import Data.Map.NonEmpty qualified as NEMap
+import Data.Semigroup qualified as Semi
+import Data.String (IsString)
+import Data.Word (Word16, Word32, Word64)
+import GHC.Exts (fromString)
+import Hedgehog qualified as Hedge
+import Hedgehog.Gen qualified as Gen
+import Hedgehog.Range qualified as Range
+import PossehlAnalyticsPrelude
+import Text.Show.Deriving
+import Prelude hiding (sum)
+
+-- | Netencode type base functor.
+--
+-- Recursive elements have a @rec@.
+data TF rec
+  = -- | Unit value
+    Unit
+  | -- | Boolean (2^1)
+    N1 Bool
+  | -- | Byte (2^3)
+    N3 Word8
+  | -- | 64-bit Natural (2^6)
+    N6 Word64
+  | -- | 64-bit Integer (2^6)
+    I6 Int64
+  | -- | Unicode Text
+    Text Text
+  | -- | Arbitrary Bytestring
+    Bytes ByteString
+  | -- | A constructor of a(n open) Sum
+    Sum (Tag Text rec)
+  | -- | Record
+    Record (NEMap Text rec)
+  | -- | List
+    List [rec]
+  deriving stock (Show, Eq, Functor)
+
+instance Eq1 TF where
+  liftEq _ Unit Unit = True
+  liftEq _ (N1 b) (N1 b') = b == b'
+  liftEq _ (N3 w8) (N3 w8') = w8 == w8'
+  liftEq _ (N6 w64) (N6 w64') = w64 == w64'
+  liftEq _ (I6 i64) (I6 i64') = i64 == i64'
+  liftEq _ (Text t) (Text t') = t == t'
+  liftEq _ (Bytes b) (Bytes b') = b == b'
+  liftEq eq (Sum t) (Sum t') = eq (t.tagVal) (t'.tagVal)
+  liftEq eq (Record m) (Record m') = liftEq eq m m'
+  liftEq eq (List xs) (List xs') = liftEq eq xs xs'
+  liftEq _ _ _ = False
+
+-- | A tagged value
+data Tag tag val = Tag
+  { tagTag :: tag,
+    tagVal :: val
+  }
+  deriving stock (Show, Eq, Functor)
+
+$(Text.Show.Deriving.deriveShow1 ''Tag)
+$(Text.Show.Deriving.deriveShow1 ''TF)
+
+-- | The Netencode type
+newtype T = T {unT :: Fix TF}
+  deriving stock (Eq, Show)
+
+-- | Create a unit
+unit :: T
+unit = T $ Fix Unit
+
+-- | Create a boolean
+n1 :: Bool -> T
+n1 = T . Fix . N1
+
+-- | Create a byte
+n3 :: Word8 -> T
+n3 = T . Fix . N3
+
+-- | Create a 64-bit natural
+n6 :: Word64 -> T
+n6 = T . Fix . N6
+
+-- | Create a 64-bit integer
+i6 :: Int64 -> T
+i6 = T . Fix . I6
+
+-- | Create a UTF-8 unicode text
+text :: Text -> T
+text = T . Fix . Text
+
+-- | Create an arbitrary bytestring
+bytes :: ByteString -> T
+bytes = T . Fix . Bytes
+
+-- | Create a tagged value from a tag name and a value
+tag :: Text -> T -> T
+tag key val = T $ Fix $ Sum $ coerce @(Tag Text T) @(Tag Text (Fix TF)) $ Tag key val
+
+-- | Create a record from a non-empty map
+record :: NEMap Text T -> T
+record = T . Fix . Record . coerce @(NEMap Text T) @(NEMap Text (Fix TF))
+
+-- | Create a list
+list :: [T] -> T
+list = T . Fix . List . coerce @[T] @([Fix TF])
+
+-- | Stable encoding of a netencode value. Record keys will be sorted lexicographically ascending.
+netencodeEncodeStable :: T -> Builder
+netencodeEncodeStable (T fix) = Fix.foldFix (netencodeEncodeStableF id) fix
+
+-- | Stable encoding of a netencode functor value. Record keys will be sorted lexicographically ascending.
+--
+-- The given function is used for encoding the recursive values.
+netencodeEncodeStableF :: (rec -> Builder) -> TF rec -> Builder
+netencodeEncodeStableF inner tf = builder go
+  where
+    -- TODO: directly pass in BL?
+    innerBL = fromBuilder . inner
+    go = case tf of
+      Unit -> "u,"
+      N1 False -> "n1:0,"
+      N1 True -> "n1:1,"
+      N3 w8 -> "n3:" <> fromBuilder (Builder.word8Dec w8) <> ","
+      N6 w64 -> "n6:" <> fromBuilder (Builder.word64Dec w64) <> ","
+      I6 i64 -> "i6:" <> fromBuilder (Builder.int64Dec i64) <> ","
+      Text t ->
+        let b = fromText t
+         in "t" <> builderLenDec b <> ":" <> b <> ","
+      Bytes b -> "b" <> builderLenDec (fromByteString b) <> ":" <> fromByteString b <> ","
+      Sum (Tag key val) -> encTag key val
+      Record m ->
+        -- NEMap uses Map internally, and that folds in lexicographic ascending order over the key.
+        -- Since these are `Text` in our case, this is stable.
+        let mBuilder = m & NEMap.foldMapWithKey encTag
+         in "{" <> builderLenDec mBuilder <> ":" <> mBuilder <> "}"
+      List xs ->
+        let xsBuilder = xs <&> innerBL & mconcat
+         in "[" <> builderLenDec xsBuilder <> ":" <> xsBuilder <> "]"
+      where
+        encTag key val =
+          let bKey = fromText key
+           in "<" <> builderLenDec bKey <> ":" <> bKey <> "|" <> innerBL val
+
+-- | A builder that knows its own size in bytes
+newtype BL = BL (Builder, Semi.Sum Natural)
+  deriving newtype (Monoid, Semigroup)
+
+instance IsString BL where
+  fromString s =
+    BL
+      ( fromString @Builder s,
+        fromString @ByteString s
+          & ByteString.length
+          & intToNatural
+          & fromMaybe 0
+          & Semi.Sum
+      )
+
+-- | Retrieve the builder
+builder :: BL -> Builder
+builder (BL (b, _)) = b
+
+-- | Retrieve the bytestring length
+builderLen :: BL -> Natural
+builderLen (BL (_, len)) = Semi.getSum $ len
+
+-- | Take a 'BL' and create a new 'BL' that represents the length as a decimal integer
+builderLenDec :: BL -> BL
+builderLenDec (BL (_, len)) =
+  let b = Builder.intDec $ (len & Semi.getSum & fromIntegral @Natural @Int)
+   in b & fromBuilder
+
+-- | Create a 'BL' from a 'Builder'.
+--
+-- Not efficient, goes back to a lazy bytestring to get the length
+fromBuilder :: Builder -> BL
+fromBuilder b =
+  BL
+    ( b,
+      b
+        & Builder.toLazyByteString
+        & ByteString.Lazy.length
+        & fromIntegral @Int64 @Natural
+        & Semi.Sum
+    )
+
+-- | Create a 'BL' from a 'ByteString'.
+fromByteString :: ByteString -> BL
+fromByteString b =
+  BL
+    ( Builder.byteString b,
+      b
+        & ByteString.length
+        & fromIntegral @Int @Natural
+        & Semi.Sum
+    )
+
+-- | Create a 'BL' from a 'Text'.
+fromText :: Text -> BL
+fromText t = t & textToBytesUtf8 & fromByteString
+
+-- | Parser for a netencode value.
+netencodeParser :: Atto.Parser T
+netencodeParser = T <$> go
+  where
+    go = Fix <$> netencodeParserF go
+
+-- | Parser for one level of a netencode value. Requires a parser for the recursion.
+netencodeParserF :: Atto.Parser rec -> Atto.Parser (TF rec)
+netencodeParserF inner = do
+  typeTag <- Atto.Char.anyChar
+  case typeTag of
+    't' -> Text <$> textParser
+    'b' -> Bytes <$> bytesParser
+    'u' -> unitParser
+    '<' -> Sum <$> tagParser
+    '{' -> Record <$> recordParser
+    '[' -> List <$> listParser
+    'n' -> naturalParser
+    'i' -> I6 <$> intParser
+    c -> fail ([c] <> " is not a valid netencode tag")
+  where
+    bytesParser = do
+      len <- boundedDecimalFail Atto.<?> "bytes is missing a digit specifying the length"
+      _ <- Atto.Char.char ':' Atto.<?> "bytes did not have : after length"
+      bytes' <- Atto.take len
+      _ <- Atto.Char.char ',' Atto.<?> "bytes did not end with ,"
+      pure bytes'
+
+    textParser = do
+      len <- boundedDecimalFail Atto.<?> "text is missing a digit specifying the length"
+      _ <- Atto.Char.char ':' Atto.<?> "text did not have : after length"
+      text' <-
+        Atto.take len <&> bytesToTextUtf8 >>= \case
+          Left err -> fail [fmt|cannot decode text as utf8: {err & prettyError}|]
+          Right t -> pure t
+      _ <- Atto.Char.char ',' Atto.<?> "text did not end with ,"
+      pure text'
+
+    unitParser = do
+      _ <- Atto.Char.char ',' Atto.<?> "unit did not end with ,"
+      pure $ Unit
+
+    tagParser = do
+      len <- boundedDecimalFail Atto.<?> "tag is missing a digit specifying the length"
+      _ <- Atto.Char.char ':' Atto.<?> "tag did not have : after length"
+      tagTag <-
+        Atto.take len <&> bytesToTextUtf8 >>= \case
+          Left err -> fail [fmt|cannot decode tag key as utf8: {err & prettyError}|]
+          Right t -> pure t
+      _ <- Atto.Char.char '|' Atto.<?> "tag was missing the key/value separator (|)"
+      tagVal <- inner
+      pure $ Tag {..}
+
+    recordParser = do
+      -- TODO: the record does not use its inner length because we are descending into the inner parsers.
+      -- This is a smell! In theory it can be used to skip parsing the whole inner keys.
+      _len <- boundedDecimalFail Atto.<?> "record is missing a digit specifying the length"
+      _ <- Atto.Char.char ':' Atto.<?> "record did not have : after length"
+      record' <-
+        many (Atto.Char.char '<' >> tagParser) <&> nonEmpty >>= \case
+          Nothing -> fail "record is not allowed to have 0 elements"
+          Just tags ->
+            pure $
+              tags
+                <&> (\t -> (t.tagTag, t.tagVal))
+                -- later keys are preferred if they are duplicates, according to the standard
+                & NEMap.fromList
+      _ <- Atto.Char.char '}' Atto.<?> "record did not end with }"
+      pure record'
+
+    listParser = do
+      -- TODO: the list does not use its inner length because we are descending into the inner parsers.
+      -- This is a smell! In theory it can be used to skip parsing the whole inner keys.
+      _len <- boundedDecimalFail Atto.<?> "list is missing a digit specifying the length"
+      _ <- Atto.Char.char ':' Atto.<?> "list did not have : after length"
+      -- TODO: allow empty lists?
+      list' <- many inner
+      _ <- Atto.Char.char ']' Atto.<?> "list did not end with ]"
+      pure list'
+
+    intParser = do
+      let p :: forall parseSize. (Bounded parseSize, Integral parseSize) => (Integer -> Atto.Parser Int64)
+          p n = do
+            _ <- Atto.Char.char ':' Atto.<?> [fmt|i{n & show} did not have : after length|]
+            isNegative <- Atto.option False (Atto.Char.char '-' <&> \_c -> True)
+            int <-
+              boundedDecimal @parseSize >>= \case
+                Nothing -> fail [fmt|cannot parse into i{n & show}, the number is too big (would overflow)|]
+                Just i ->
+                  pure $
+                    if isNegative
+                      then -- TODO: this should alread be done in the decimal parser, @minBound@ cannot be parsed cause it’s one more than @(-maxBound)@!
+                        (-i)
+                      else i
+            _ <- Atto.Char.char ',' Atto.<?> [fmt|i{n & show} did not end with ,|]
+            pure $ fromIntegral @parseSize @Int64 int
+      digit <- Atto.Char.digit
+      case digit of
+        -- TODO: separate parser for i1 and i2 that makes sure the boundaries are right!
+        '1' -> p @Int8 1
+        '2' -> p @Int8 2
+        '3' -> p @Int8 3
+        '4' -> p @Int16 4
+        '5' -> p @Int32 5
+        '6' -> p @Int64 6
+        '7' -> fail [fmt|i parser only supports numbers up to size 6, was 7|]
+        '8' -> fail [fmt|i parser only supports numbers up to size 6, was 8|]
+        '9' -> fail [fmt|i parser only supports numbers up to size 6, was 9|]
+        o -> fail [fmt|i number with length {o & show} not possible|]
+
+    naturalParser = do
+      let p :: forall parseSize finalSize. (Bounded parseSize, Integral parseSize, Num finalSize) => (Integer -> Atto.Parser finalSize)
+          p n = do
+            _ <- Atto.Char.char ':' Atto.<?> [fmt|n{n & show} did not have : after length|]
+            int <-
+              boundedDecimal @parseSize >>= \case
+                Nothing -> fail [fmt|cannot parse into n{n & show}, the number is too big (would overflow)|]
+                Just i -> pure i
+
+            _ <- Atto.Char.char ',' Atto.<?> [fmt|n{n & show} did not end with ,|]
+            pure $ fromIntegral @parseSize @finalSize int
+      let b n = do
+            _ <- Atto.Char.char ':' Atto.<?> [fmt|n{n & show} did not have : after length|]
+            bool <-
+              (Atto.Char.char '0' >> pure False)
+                <|> (Atto.Char.char '1' >> pure True)
+            _ <- Atto.Char.char ',' Atto.<?> [fmt|n{n & show} did not end with ,|]
+            pure bool
+
+      digit <- Atto.Char.digit
+      case digit of
+        -- TODO: separate parser for n1 and n2 that makes sure the boundaries are right!
+        '1' -> N1 <$> b 1
+        '2' -> N3 <$> p @Word8 @Word8 2
+        '3' -> N3 <$> p @Word8 @Word8 3
+        '4' -> N6 <$> p @Word16 @Word64 4
+        '5' -> N6 <$> p @Word32 @Word64 5
+        '6' -> N6 <$> p @Word64 @Word64 6
+        '7' -> fail [fmt|n parser only supports numbers up to size 6, was 7|]
+        '8' -> fail [fmt|n parser only supports numbers up to size 6, was 8|]
+        '9' -> fail [fmt|n parser only supports numbers up to size 6, was 9|]
+        o -> fail [fmt|n number with length {o & show} not possible|]
+
+-- | Parser for a bounded decimal that does not overflow the decimal.
+--
+--  via https://www.extrema.is/blog/2021/10/20/parsing-bounded-integers
+boundedDecimal :: forall a. (Bounded a, Integral a) => Atto.Parser (Maybe a)
+boundedDecimal = do
+  i :: Integer <- decimal
+  pure $
+    if (i :: Integer) > fromIntegral (maxBound :: a)
+      then Nothing
+      else Just $ fromIntegral i
+  where
+    -- Copied from @Attoparsec.Text@ and adjusted to bytestring
+    decimal :: (Integral a2) => Atto.Parser a2
+    decimal = ByteString.foldl' step 0 <$> Atto.Char.takeWhile1 Atto.Char.isDigit
+      where
+        step a c = a * 10 + fromIntegral (c - 48)
+{-# SPECIALIZE boundedDecimal :: Atto.Parser (Maybe Int) #-}
+{-# SPECIALIZE boundedDecimal :: Atto.Parser (Maybe Int64) #-}
+{-# SPECIALIZE boundedDecimal :: Atto.Parser (Maybe Word8) #-}
+{-# SPECIALIZE boundedDecimal :: Atto.Parser (Maybe Word64) #-}
+
+-- | 'boundedDecimal', but fail the parser if the decimal overflows.
+boundedDecimalFail :: Atto.Parser Int
+boundedDecimalFail =
+  boundedDecimal >>= \case
+    Nothing -> fail "decimal out of range"
+    Just a -> pure a
+
+-- | Hedgehog generator for a netencode value.
+genNetencode :: Hedge.MonadGen m => m T
+genNetencode =
+  Gen.recursive
+    Gen.choice
+    [ -- these are bundled into one Gen, so that scalar elements get chosen less frequently, and the generator produces nicely nested examples
+      Gen.frequency
+        [ (1, pure unit),
+          (1, n1 <$> Gen.bool),
+          (1, n3 <$> Gen.element [0, 1, 5]),
+          (1, n6 <$> Gen.element [0, 1, 5]),
+          (1, i6 <$> Gen.element [-1, 1, 5]),
+          (2, text <$> Gen.text (Range.linear 1 10) Gen.lower),
+          (2, bytes <$> Gen.bytes (Range.linear 1 10))
+        ]
+    ]
+    [ do
+        key <- Gen.text (Range.linear 3 10) Gen.lower
+        val <- genNetencode
+        pure $ tag key val,
+      record
+        <$> ( let k = Gen.text (Range.linear 3 10) Gen.lower
+                  v = genNetencode
+               in NEMap.insertMap
+                    <$> k
+                    <*> v
+                    <*> ( (Gen.map (Range.linear 0 3)) $
+                            (,) <$> k <*> v
+                        )
+            )
+    ]
+
+-- | Hedgehog property: encoding a netencode value and parsing it again returns the same result.
+prop_netencodeRoundtrip :: Hedge.Property
+prop_netencodeRoundtrip = Hedge.property $ do
+  enc <- Hedge.forAll genNetencode
+  ( Atto.parseOnly
+      netencodeParser
+      ( netencodeEncodeStable enc
+          & Builder.toLazyByteString
+          & toStrictBytes
+      )
+    )
+    Hedge.=== (Right enc)
diff --git a/users/Profpatsch/netencode/Netencode/Parse.hs b/users/Profpatsch/netencode/Netencode/Parse.hs
new file mode 100644
index 0000000000..184fb5f912
--- /dev/null
+++ b/users/Profpatsch/netencode/Netencode/Parse.hs
@@ -0,0 +1,102 @@
+{-# LANGUAGE QuasiQuotes #-}
+
+module Netencode.Parse where
+
+import Control.Category qualified
+import Control.Selective (Selective)
+import Data.Error.Tree
+import Data.Fix (Fix (..))
+import Data.Functor.Compose
+import Data.List qualified as List
+import Data.Map.NonEmpty (NEMap)
+import Data.Map.NonEmpty qualified as NEMap
+import Data.Semigroupoid qualified as Semigroupiod
+import Data.Semigroupoid qualified as Semigroupoid
+import Data.Text qualified as Text
+import Netencode qualified
+import PossehlAnalyticsPrelude
+import Prelude hiding (log)
+
+newtype Parse from to
+  = -- TODO: the way @Context = [Text]@ has to be forwarded to everything is kinda shitty.
+    -- This is essentially just a difference list, and can probably be treated as a function in the output?
+    Parse (([Text], from) -> Validation (NonEmpty ErrorTree) ([Text], to))
+  deriving
+    (Functor, Applicative, Selective)
+    via ( Compose
+            ( Compose
+                ((->) ([Text], from))
+                (Validation (NonEmpty ErrorTree))
+            )
+            ((,) [Text])
+        )
+
+instance Semigroupoid Parse where
+  o p2 p1 = Parse $ \from -> case runParse' p1 from of
+    Failure err -> Failure err
+    Success to1 -> runParse' p2 to1
+
+instance Category Parse where
+  (.) = Semigroupoid.o
+  id = Parse $ \t -> Success t
+
+runParse :: Error -> Parse from to -> from -> Either ErrorTree to
+runParse errMsg parser t =
+  (["$"], t)
+    & runParse' parser
+    <&> snd
+    & first (nestedMultiError errMsg)
+    & validationToEither
+
+runParse' :: Parse from to -> ([Text], from) -> Validation (NonEmpty ErrorTree) ([Text], to)
+runParse' (Parse f) from = f from
+
+parseEither :: (([Text], from) -> Either ErrorTree ([Text], to)) -> Parse from to
+parseEither f = Parse $ \from -> f from & eitherToListValidation
+
+tAs :: (Netencode.TF (Fix Netencode.TF) -> Either ([Text] -> ErrorTree) to) -> Parse Netencode.T to
+tAs f = parseEither ((\(context, Netencode.T (Fix tf)) -> f tf & bimap ($ context) (context,)))
+
+key :: Text -> Parse (NEMap Text to) to
+key name = parseEither $ \(context, rec) ->
+  rec
+    & NEMap.lookup name
+    & annotate (errorTreeContext (showContext context) [fmt|Key "{name}" does not exist|])
+    <&> (addContext name context,)
+
+showContext :: [Text] -> Text
+showContext context = context & List.reverse & Text.intercalate "."
+
+addContext :: a -> [a] -> [a]
+addContext = (:)
+
+asText :: Parse Netencode.T Text
+asText = tAs $ \case
+  Netencode.Text t -> pure t
+  other -> typeError "of text" other
+
+asBytes :: Parse Netencode.T ByteString
+asBytes = tAs $ \case
+  Netencode.Bytes b -> pure b
+  other -> typeError "of bytes" other
+
+asRecord :: Parse Netencode.T (NEMap Text (Netencode.T))
+asRecord = tAs $ \case
+  Netencode.Record rec -> pure (rec <&> Netencode.T)
+  other -> typeError "a record" other
+
+typeError :: Text -> Netencode.TF ignored -> (Either ([Text] -> ErrorTree) b)
+typeError should is = do
+  let otherS = is <&> (\_ -> ("…" :: String)) & show
+  Left $ \context -> errorTreeContext (showContext context) [fmt|Value is not {should}, but a {otherS}|]
+
+orThrowParseError ::
+  Parse (Either Error to) to
+orThrowParseError = Parse $ \case
+  (context, Left err) ->
+    err
+      & singleError
+      & errorTreeContext (showContext context)
+      & singleton
+      & Failure
+  (context, Right to) -> Success (context, to)
diff --git a/users/Profpatsch/netencode/README.md b/users/Profpatsch/netencode/README.md
index 840ffaedd0..3538a110a6 100644
--- a/users/Profpatsch/netencode/README.md
+++ b/users/Profpatsch/netencode/README.md
@@ -85,7 +85,7 @@ Similar to text, records start with the length of their *whole encoded content*,
 * A record with one empty field, `foo`: `{9:<3:foo|u,}`
 * A record with two fields, `foo` and `x`: `{21:<3:foo|u,<1:x|t3:baz,}`
 * The same record: `{21:<1:x|t3:baz,<3:foo|u,}`
-* The same record (later occurences of fields are ignored): `{28:<1:x|t3:baz,<3:foo|u,<1:x|u,}`
+* The same record (earlier occurences of fields are ignored): `{<1:x|u,28:<1:x|t3:baz,<3:foo|u,}`
 
 ### sums (tagged unions)
 
diff --git a/users/Profpatsch/netencode/default.nix b/users/Profpatsch/netencode/default.nix
index d389258148..6e7dce489a 100644
--- a/users/Profpatsch/netencode/default.nix
+++ b/users/Profpatsch/netencode/default.nix
@@ -11,6 +11,34 @@ let
     }
     (builtins.readFile ./netencode.rs);
 
+  netencode-hs = pkgs.haskellPackages.mkDerivation {
+    pname = "netencode";
+    version = "0.1.0";
+
+    src = depot.users.Profpatsch.exactSource ./. [
+      ./netencode.cabal
+      ./Netencode.hs
+      ./Netencode/Parse.hs
+    ];
+
+    libraryHaskellDepends = [
+      pkgs.haskellPackages.hedgehog
+      pkgs.haskellPackages.nonempty-containers
+      pkgs.haskellPackages.deriving-compat
+      pkgs.haskellPackages.data-fix
+      pkgs.haskellPackages.bytestring
+      pkgs.haskellPackages.attoparsec
+      pkgs.haskellPackages.pa-prelude
+      pkgs.haskellPackages.pa-label
+      pkgs.haskellPackages.pa-error-tree
+    ];
+
+    isLibrary = true;
+    license = lib.licenses.mit;
+
+
+  };
+
   gen = import ./gen.nix { inherit lib; };
 
   pretty-rs = depot.nix.writers.rustSimpleLib
@@ -37,9 +65,8 @@ let
 
     fn main() {
       let (_, prog) = exec_helpers::args_for_exec("netencode-pretty", 0);
-      let mut buf = vec![];
-      let u = netencode::u_from_stdin_or_die_user_error("netencode-pretty", &mut buf);
-      match netencode_pretty::Pretty::from_u(u).print_multiline(&mut std::io::stdout()) {
+      let t = netencode::t_from_stdin_or_die_user_error("netencode-pretty");
+      match netencode_pretty::Pretty::from_u(t.to_u()).print_multiline(&mut std::io::stdout()) {
         Ok(()) => {},
         Err(err) => exec_helpers::die_temporary("netencode-pretty", format!("could not write to stdout: {}", err))
       }
@@ -64,24 +91,21 @@ let
       dependencies = [
         netencode-rs
         depot.users.Profpatsch.execline.exec-helpers
-        depot.users.Profpatsch.arglib.netencode.rust
       ];
     } ''
     extern crate netencode;
-    extern crate arglib_netencode;
     extern crate exec_helpers;
     use netencode::{encode, dec};
     use netencode::dec::{Decoder, DecodeError};
 
     fn main() {
-        let mut buf = vec![];
         let args = exec_helpers::args("record-get", 1);
         let field = match std::str::from_utf8(&args[0]) {
             Ok(f) => f,
             Err(_e) => exec_helpers::die_user_error("record-get", format!("The field name needs to be valid unicode"))
         };
-        let u = netencode::u_from_stdin_or_die_user_error("record-get", &mut buf);
-        match (dec::RecordDot {field, inner: dec::AnyU }).dec(u) {
+        let t = netencode::t_from_stdin_or_die_user_error("record-get");
+        match (dec::RecordDot {field, inner: dec::AnyU }).dec(t.to_u()) {
             Ok(u) => encode(&mut std::io::stdout(), &u).expect("encoding to stdout failed"),
             Err(DecodeError(err)) => exec_helpers::die_user_error("record-get", err)
         }
@@ -101,10 +125,9 @@ let
     use netencode::dec::{Record, Try, ScalarAsBytes, Decoder, DecodeError};
 
     fn main() {
-        let mut buf = vec![];
-        let u = netencode::u_from_stdin_or_die_user_error("record-splice-env", &mut buf);
+        let t = netencode::t_from_stdin_or_die_user_error("record-splice-env");
         let (_, prog) = exec_helpers::args_for_exec("record-splice-env", 0);
-        match Record(Try(ScalarAsBytes)).dec(u) {
+        match Record(Try(ScalarAsBytes)).dec(t.to_u()) {
             Ok(map) => {
                 exec_helpers::exec_into_args(
                     "record-splice-env",
@@ -149,6 +172,7 @@ in
 depot.nix.readTree.drvTargets {
   inherit
     netencode-rs
+    netencode-hs
     pretty-rs
     pretty
     netencode-mustache
diff --git a/users/Profpatsch/netencode/netencode.cabal b/users/Profpatsch/netencode/netencode.cabal
new file mode 100644
index 0000000000..7bff4487bb
--- /dev/null
+++ b/users/Profpatsch/netencode/netencode.cabal
@@ -0,0 +1,74 @@
+cabal-version:      3.0
+name:               netencode
+version:            0.1.0.0
+author:             Profpatsch
+maintainer:         mail@profpatsch.de
+
+
+common common-options
+  ghc-options:
+      -Wall
+      -Wno-type-defaults
+      -Wunused-packages
+      -Wredundant-constraints
+      -fwarn-missing-deriving-strategies
+
+  -- See https://downloads.haskell.org/ghc/latest/docs/users_guide/exts.html
+  -- for a description of all these extensions
+  default-extensions:
+      -- Infer Applicative instead of Monad where possible
+    ApplicativeDo
+
+    -- Allow literal strings to be Text
+    OverloadedStrings
+
+    -- Syntactic sugar improvements
+    LambdaCase
+    MultiWayIf
+
+    -- Makes the (deprecated) usage of * instead of Data.Kind.Type an error
+    NoStarIsType
+
+    -- Convenient and crucial to deal with ambiguous field names, commonly
+    -- known as RecordDotSyntax
+    OverloadedRecordDot
+
+    -- does not export record fields as functions, use OverloadedRecordDot to access instead
+    NoFieldSelectors
+
+    -- Record punning
+    RecordWildCards
+
+    -- Improved Deriving
+    DerivingStrategies
+    DerivingVia
+
+    -- Type-level strings
+    DataKinds
+
+    -- to enable the `type` keyword in import lists (ormolu uses this automatically)
+    ExplicitNamespaces
+
+  default-language: GHC2021
+
+
+library
+    import: common-options
+    exposed-modules:
+        Netencode,
+        Netencode.Parse
+
+    build-depends:
+        base >=4.15 && <5,
+        pa-prelude,
+        pa-label,
+        pa-error-tree,
+        hedgehog,
+        nonempty-containers,
+        deriving-compat,
+        data-fix,
+        bytestring,
+        attoparsec,
+        text,
+        semigroupoids,
+        selective
diff --git a/users/Profpatsch/netencode/netencode.rs b/users/Profpatsch/netencode/netencode.rs
index bb08dca4aa..34a8fcef09 100644
--- a/users/Profpatsch/netencode/netencode.rs
+++ b/users/Profpatsch/netencode/netencode.rs
@@ -198,25 +198,103 @@ pub fn text(s: String) -> T {
     T::Text(s)
 }
 
-pub fn u_from_stdin_or_die_user_error<'a>(prog_name: &'_ str, stdin_buf: &'a mut Vec<u8>) -> U<'a> {
-    std::io::stdin().lock().read_to_end(stdin_buf);
-    let u = match parse::u_u(stdin_buf) {
-        Ok((rest, u)) => match rest {
-            b"" => u,
-            _ => exec_helpers::die_user_error(
+pub fn t_from_stdin_or_die_user_error<'a>(prog_name: &'_ str) -> T {
+    match t_from_stdin_or_die_user_error_with_rest(prog_name, &vec![]) {
+        None => exec_helpers::die_user_error(prog_name, "stdin was empty"),
+        Some((rest, t)) => {
+            if rest.is_empty() {
+                t
+            } else {
+                exec_helpers::die_user_error(
+                    prog_name,
+                    format!(
+                        "stdin contained some soup after netencode value: {:?}",
+                        String::from_utf8_lossy(&rest)
+                    ),
+                )
+            }
+        }
+    }
+}
+
+/// Read a netencode value from stdin incrementally, return bytes that could not be read.
+/// Nothing if there was nothing to read from stdin & no initial_bytes were provided.
+/// These can be passed back as `initial_bytes` if more values should be read.
+pub fn t_from_stdin_or_die_user_error_with_rest<'a>(
+    prog_name: &'_ str,
+    initial_bytes: &[u8],
+) -> Option<(Vec<u8>, T)> {
+    let mut chonker = Chunkyboi::new(std::io::stdin().lock(), 4096);
+    // The vec to pass to the parser on each step
+    let mut parser_vec: Vec<u8> = initial_bytes.to_vec();
+    // whether stdin was already empty
+    let mut was_empty: bool = false;
+    loop {
+        match chonker.next() {
+            None => {
+                if parser_vec.is_empty() {
+                    return None;
+                } else {
+                    was_empty = true
+                }
+            }
+            Some(Err(err)) => exec_helpers::die_temporary(
                 prog_name,
-                format!(
-                    "stdin contained some soup after netencode value: {:?}",
-                    String::from_utf8_lossy(rest)
-                ),
+                &format!("could not read from stdin: {:?}", err),
+            ),
+            Some(Ok(mut new_bytes)) => parser_vec.append(&mut new_bytes),
+        }
+
+        match parse::t_t(&parser_vec) {
+            Ok((rest, t)) => return Some((rest.to_owned(), t)),
+            Err(nom::Err::Incomplete(Needed)) => {
+                if was_empty {
+                    exec_helpers::die_user_error(
+                        prog_name,
+                        &format!(
+                            "unable to parse netencode from stdin, input incomplete: {:?}",
+                            parser_vec
+                        ),
+                    );
+                }
+                // read more from stdin and try parsing again
+                continue;
+            }
+            Err(err) => exec_helpers::die_user_error(
+                prog_name,
+                &format!("unable to parse netencode from stdin: {:?}", err),
             ),
-        },
-        Err(err) => exec_helpers::die_user_error(
-            prog_name,
-            format!("unable to parse netencode from stdin: {:?}", err),
-        ),
-    };
-    u
+        }
+    }
+}
+
+// iter helper
+// TODO: put into its own module
+struct Chunkyboi<T> {
+    inner: T,
+    buf: Vec<u8>,
+}
+
+impl<R: Read> Chunkyboi<R> {
+    fn new(inner: R, chunksize: usize) -> Self {
+        let buf = vec![0; chunksize];
+        Chunkyboi { inner, buf }
+    }
+}
+
+impl<R: Read> Iterator for Chunkyboi<R> {
+    type Item = std::io::Result<Vec<u8>>;
+
+    fn next(&mut self) -> Option<std::io::Result<Vec<u8>>> {
+        match self.inner.read(&mut self.buf) {
+            Ok(0) => None,
+            Ok(read) => {
+                // clone a new buffer so we can reuse the internal one
+                Some(Ok(self.buf[..read].to_owned()))
+            }
+            Err(err) => Some(Err(err)),
+        }
+    }
 }
 
 pub mod parse {