diff options
Diffstat (limited to 'users/Profpatsch/netencode')
-rw-r--r-- | users/Profpatsch/netencode/Netencode.hs | 433 | ||||
-rw-r--r-- | users/Profpatsch/netencode/Netencode/Parse.hs | 102 | ||||
-rw-r--r-- | users/Profpatsch/netencode/README.md | 2 | ||||
-rw-r--r-- | users/Profpatsch/netencode/default.nix | 46 | ||||
-rw-r--r-- | users/Profpatsch/netencode/netencode.cabal | 74 | ||||
-rw-r--r-- | users/Profpatsch/netencode/netencode.rs | 112 |
6 files changed, 740 insertions, 29 deletions
diff --git a/users/Profpatsch/netencode/Netencode.hs b/users/Profpatsch/netencode/Netencode.hs new file mode 100644 index 0000000000..ca93ab2fef --- /dev/null +++ b/users/Profpatsch/netencode/Netencode.hs @@ -0,0 +1,433 @@ +{-# LANGUAGE AllowAmbiguousTypes #-} +{-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE TemplateHaskell #-} + +module Netencode where + +import Control.Applicative (many) +import Data.Attoparsec.ByteString qualified as Atto +import Data.Attoparsec.ByteString.Char8 qualified as Atto.Char +import Data.ByteString qualified as ByteString +import Data.ByteString.Builder (Builder) +import Data.ByteString.Builder qualified as Builder +import Data.ByteString.Lazy qualified as ByteString.Lazy +import Data.Fix (Fix (Fix)) +import Data.Fix qualified as Fix +import Data.Functor.Classes (Eq1 (liftEq)) +import Data.Int (Int16, Int32, Int64, Int8) +import Data.Map.NonEmpty (NEMap) +import Data.Map.NonEmpty qualified as NEMap +import Data.Semigroup qualified as Semi +import Data.String (IsString) +import Data.Word (Word16, Word32, Word64) +import GHC.Exts (fromString) +import Hedgehog qualified as Hedge +import Hedgehog.Gen qualified as Gen +import Hedgehog.Range qualified as Range +import PossehlAnalyticsPrelude +import Text.Show.Deriving +import Prelude hiding (sum) + +-- | Netencode type base functor. +-- +-- Recursive elements have a @rec@. +data TF rec + = -- | Unit value + Unit + | -- | Boolean (2^1) + N1 Bool + | -- | Byte (2^3) + N3 Word8 + | -- | 64-bit Natural (2^6) + N6 Word64 + | -- | 64-bit Integer (2^6) + I6 Int64 + | -- | Unicode Text + Text Text + | -- | Arbitrary Bytestring + Bytes ByteString + | -- | A constructor of a(n open) Sum + Sum (Tag Text rec) + | -- | Record + Record (NEMap Text rec) + | -- | List + List [rec] + deriving stock (Show, Eq, Functor) + +instance Eq1 TF where + liftEq _ Unit Unit = True + liftEq _ (N1 b) (N1 b') = b == b' + liftEq _ (N3 w8) (N3 w8') = w8 == w8' + liftEq _ (N6 w64) (N6 w64') = w64 == w64' + liftEq _ (I6 i64) (I6 i64') = i64 == i64' + liftEq _ (Text t) (Text t') = t == t' + liftEq _ (Bytes b) (Bytes b') = b == b' + liftEq eq (Sum t) (Sum t') = eq (t.tagVal) (t'.tagVal) + liftEq eq (Record m) (Record m') = liftEq eq m m' + liftEq eq (List xs) (List xs') = liftEq eq xs xs' + liftEq _ _ _ = False + +-- | A tagged value +data Tag tag val = Tag + { tagTag :: tag, + tagVal :: val + } + deriving stock (Show, Eq, Functor) + +$(Text.Show.Deriving.deriveShow1 ''Tag) +$(Text.Show.Deriving.deriveShow1 ''TF) + +-- | The Netencode type +newtype T = T {unT :: Fix TF} + deriving stock (Eq, Show) + +-- | Create a unit +unit :: T +unit = T $ Fix Unit + +-- | Create a boolean +n1 :: Bool -> T +n1 = T . Fix . N1 + +-- | Create a byte +n3 :: Word8 -> T +n3 = T . Fix . N3 + +-- | Create a 64-bit natural +n6 :: Word64 -> T +n6 = T . Fix . N6 + +-- | Create a 64-bit integer +i6 :: Int64 -> T +i6 = T . Fix . I6 + +-- | Create a UTF-8 unicode text +text :: Text -> T +text = T . Fix . Text + +-- | Create an arbitrary bytestring +bytes :: ByteString -> T +bytes = T . Fix . Bytes + +-- | Create a tagged value from a tag name and a value +tag :: Text -> T -> T +tag key val = T $ Fix $ Sum $ coerce @(Tag Text T) @(Tag Text (Fix TF)) $ Tag key val + +-- | Create a record from a non-empty map +record :: NEMap Text T -> T +record = T . Fix . Record . coerce @(NEMap Text T) @(NEMap Text (Fix TF)) + +-- | Create a list +list :: [T] -> T +list = T . Fix . List . coerce @[T] @([Fix TF]) + +-- | Stable encoding of a netencode value. Record keys will be sorted lexicographically ascending. +netencodeEncodeStable :: T -> Builder +netencodeEncodeStable (T fix) = Fix.foldFix (netencodeEncodeStableF id) fix + +-- | Stable encoding of a netencode functor value. Record keys will be sorted lexicographically ascending. +-- +-- The given function is used for encoding the recursive values. +netencodeEncodeStableF :: (rec -> Builder) -> TF rec -> Builder +netencodeEncodeStableF inner tf = builder go + where + -- TODO: directly pass in BL? + innerBL = fromBuilder . inner + go = case tf of + Unit -> "u," + N1 False -> "n1:0," + N1 True -> "n1:1," + N3 w8 -> "n3:" <> fromBuilder (Builder.word8Dec w8) <> "," + N6 w64 -> "n6:" <> fromBuilder (Builder.word64Dec w64) <> "," + I6 i64 -> "i6:" <> fromBuilder (Builder.int64Dec i64) <> "," + Text t -> + let b = fromText t + in "t" <> builderLenDec b <> ":" <> b <> "," + Bytes b -> "b" <> builderLenDec (fromByteString b) <> ":" <> fromByteString b <> "," + Sum (Tag key val) -> encTag key val + Record m -> + -- NEMap uses Map internally, and that folds in lexicographic ascending order over the key. + -- Since these are `Text` in our case, this is stable. + let mBuilder = m & NEMap.foldMapWithKey encTag + in "{" <> builderLenDec mBuilder <> ":" <> mBuilder <> "}" + List xs -> + let xsBuilder = xs <&> innerBL & mconcat + in "[" <> builderLenDec xsBuilder <> ":" <> xsBuilder <> "]" + where + encTag key val = + let bKey = fromText key + in "<" <> builderLenDec bKey <> ":" <> bKey <> "|" <> innerBL val + +-- | A builder that knows its own size in bytes +newtype BL = BL (Builder, Semi.Sum Natural) + deriving newtype (Monoid, Semigroup) + +instance IsString BL where + fromString s = + BL + ( fromString @Builder s, + fromString @ByteString s + & ByteString.length + & intToNatural + & fromMaybe 0 + & Semi.Sum + ) + +-- | Retrieve the builder +builder :: BL -> Builder +builder (BL (b, _)) = b + +-- | Retrieve the bytestring length +builderLen :: BL -> Natural +builderLen (BL (_, len)) = Semi.getSum $ len + +-- | Take a 'BL' and create a new 'BL' that represents the length as a decimal integer +builderLenDec :: BL -> BL +builderLenDec (BL (_, len)) = + let b = Builder.intDec $ (len & Semi.getSum & fromIntegral @Natural @Int) + in b & fromBuilder + +-- | Create a 'BL' from a 'Builder'. +-- +-- Not efficient, goes back to a lazy bytestring to get the length +fromBuilder :: Builder -> BL +fromBuilder b = + BL + ( b, + b + & Builder.toLazyByteString + & ByteString.Lazy.length + & fromIntegral @Int64 @Natural + & Semi.Sum + ) + +-- | Create a 'BL' from a 'ByteString'. +fromByteString :: ByteString -> BL +fromByteString b = + BL + ( Builder.byteString b, + b + & ByteString.length + & fromIntegral @Int @Natural + & Semi.Sum + ) + +-- | Create a 'BL' from a 'Text'. +fromText :: Text -> BL +fromText t = t & textToBytesUtf8 & fromByteString + +-- | Parser for a netencode value. +netencodeParser :: Atto.Parser T +netencodeParser = T <$> go + where + go = Fix <$> netencodeParserF go + +-- | Parser for one level of a netencode value. Requires a parser for the recursion. +netencodeParserF :: Atto.Parser rec -> Atto.Parser (TF rec) +netencodeParserF inner = do + typeTag <- Atto.Char.anyChar + case typeTag of + 't' -> Text <$> textParser + 'b' -> Bytes <$> bytesParser + 'u' -> unitParser + '<' -> Sum <$> tagParser + '{' -> Record <$> recordParser + '[' -> List <$> listParser + 'n' -> naturalParser + 'i' -> I6 <$> intParser + c -> fail ([c] <> " is not a valid netencode tag") + where + bytesParser = do + len <- boundedDecimalFail Atto.<?> "bytes is missing a digit specifying the length" + _ <- Atto.Char.char ':' Atto.<?> "bytes did not have : after length" + bytes' <- Atto.take len + _ <- Atto.Char.char ',' Atto.<?> "bytes did not end with ," + pure bytes' + + textParser = do + len <- boundedDecimalFail Atto.<?> "text is missing a digit specifying the length" + _ <- Atto.Char.char ':' Atto.<?> "text did not have : after length" + text' <- + Atto.take len <&> bytesToTextUtf8 >>= \case + Left err -> fail [fmt|cannot decode text as utf8: {err & prettyError}|] + Right t -> pure t + _ <- Atto.Char.char ',' Atto.<?> "text did not end with ," + pure text' + + unitParser = do + _ <- Atto.Char.char ',' Atto.<?> "unit did not end with ," + pure $ Unit + + tagParser = do + len <- boundedDecimalFail Atto.<?> "tag is missing a digit specifying the length" + _ <- Atto.Char.char ':' Atto.<?> "tag did not have : after length" + tagTag <- + Atto.take len <&> bytesToTextUtf8 >>= \case + Left err -> fail [fmt|cannot decode tag key as utf8: {err & prettyError}|] + Right t -> pure t + _ <- Atto.Char.char '|' Atto.<?> "tag was missing the key/value separator (|)" + tagVal <- inner + pure $ Tag {..} + + recordParser = do + -- TODO: the record does not use its inner length because we are descending into the inner parsers. + -- This is a smell! In theory it can be used to skip parsing the whole inner keys. + _len <- boundedDecimalFail Atto.<?> "record is missing a digit specifying the length" + _ <- Atto.Char.char ':' Atto.<?> "record did not have : after length" + record' <- + many (Atto.Char.char '<' >> tagParser) <&> nonEmpty >>= \case + Nothing -> fail "record is not allowed to have 0 elements" + Just tags -> + pure $ + tags + <&> (\t -> (t.tagTag, t.tagVal)) + -- later keys are preferred if they are duplicates, according to the standard + & NEMap.fromList + _ <- Atto.Char.char '}' Atto.<?> "record did not end with }" + pure record' + + listParser = do + -- TODO: the list does not use its inner length because we are descending into the inner parsers. + -- This is a smell! In theory it can be used to skip parsing the whole inner keys. + _len <- boundedDecimalFail Atto.<?> "list is missing a digit specifying the length" + _ <- Atto.Char.char ':' Atto.<?> "list did not have : after length" + -- TODO: allow empty lists? + list' <- many inner + _ <- Atto.Char.char ']' Atto.<?> "list did not end with ]" + pure list' + + intParser = do + let p :: forall parseSize. (Bounded parseSize, Integral parseSize) => (Integer -> Atto.Parser Int64) + p n = do + _ <- Atto.Char.char ':' Atto.<?> [fmt|i{n & show} did not have : after length|] + isNegative <- Atto.option False (Atto.Char.char '-' <&> \_c -> True) + int <- + boundedDecimal @parseSize >>= \case + Nothing -> fail [fmt|cannot parse into i{n & show}, the number is too big (would overflow)|] + Just i -> + pure $ + if isNegative + then -- TODO: this should alread be done in the decimal parser, @minBound@ cannot be parsed cause it’s one more than @(-maxBound)@! + (-i) + else i + _ <- Atto.Char.char ',' Atto.<?> [fmt|i{n & show} did not end with ,|] + pure $ fromIntegral @parseSize @Int64 int + digit <- Atto.Char.digit + case digit of + -- TODO: separate parser for i1 and i2 that makes sure the boundaries are right! + '1' -> p @Int8 1 + '2' -> p @Int8 2 + '3' -> p @Int8 3 + '4' -> p @Int16 4 + '5' -> p @Int32 5 + '6' -> p @Int64 6 + '7' -> fail [fmt|i parser only supports numbers up to size 6, was 7|] + '8' -> fail [fmt|i parser only supports numbers up to size 6, was 8|] + '9' -> fail [fmt|i parser only supports numbers up to size 6, was 9|] + o -> fail [fmt|i number with length {o & show} not possible|] + + naturalParser = do + let p :: forall parseSize finalSize. (Bounded parseSize, Integral parseSize, Num finalSize) => (Integer -> Atto.Parser finalSize) + p n = do + _ <- Atto.Char.char ':' Atto.<?> [fmt|n{n & show} did not have : after length|] + int <- + boundedDecimal @parseSize >>= \case + Nothing -> fail [fmt|cannot parse into n{n & show}, the number is too big (would overflow)|] + Just i -> pure i + + _ <- Atto.Char.char ',' Atto.<?> [fmt|n{n & show} did not end with ,|] + pure $ fromIntegral @parseSize @finalSize int + let b n = do + _ <- Atto.Char.char ':' Atto.<?> [fmt|n{n & show} did not have : after length|] + bool <- + (Atto.Char.char '0' >> pure False) + <|> (Atto.Char.char '1' >> pure True) + _ <- Atto.Char.char ',' Atto.<?> [fmt|n{n & show} did not end with ,|] + pure bool + + digit <- Atto.Char.digit + case digit of + -- TODO: separate parser for n1 and n2 that makes sure the boundaries are right! + '1' -> N1 <$> b 1 + '2' -> N3 <$> p @Word8 @Word8 2 + '3' -> N3 <$> p @Word8 @Word8 3 + '4' -> N6 <$> p @Word16 @Word64 4 + '5' -> N6 <$> p @Word32 @Word64 5 + '6' -> N6 <$> p @Word64 @Word64 6 + '7' -> fail [fmt|n parser only supports numbers up to size 6, was 7|] + '8' -> fail [fmt|n parser only supports numbers up to size 6, was 8|] + '9' -> fail [fmt|n parser only supports numbers up to size 6, was 9|] + o -> fail [fmt|n number with length {o & show} not possible|] + +-- | Parser for a bounded decimal that does not overflow the decimal. +-- +-- via https://www.extrema.is/blog/2021/10/20/parsing-bounded-integers +boundedDecimal :: forall a. (Bounded a, Integral a) => Atto.Parser (Maybe a) +boundedDecimal = do + i :: Integer <- decimal + pure $ + if (i :: Integer) > fromIntegral (maxBound :: a) + then Nothing + else Just $ fromIntegral i + where + -- Copied from @Attoparsec.Text@ and adjusted to bytestring + decimal :: (Integral a2) => Atto.Parser a2 + decimal = ByteString.foldl' step 0 <$> Atto.Char.takeWhile1 Atto.Char.isDigit + where + step a c = a * 10 + fromIntegral (c - 48) +{-# SPECIALIZE boundedDecimal :: Atto.Parser (Maybe Int) #-} +{-# SPECIALIZE boundedDecimal :: Atto.Parser (Maybe Int64) #-} +{-# SPECIALIZE boundedDecimal :: Atto.Parser (Maybe Word8) #-} +{-# SPECIALIZE boundedDecimal :: Atto.Parser (Maybe Word64) #-} + +-- | 'boundedDecimal', but fail the parser if the decimal overflows. +boundedDecimalFail :: Atto.Parser Int +boundedDecimalFail = + boundedDecimal >>= \case + Nothing -> fail "decimal out of range" + Just a -> pure a + +-- | Hedgehog generator for a netencode value. +genNetencode :: Hedge.MonadGen m => m T +genNetencode = + Gen.recursive + Gen.choice + [ -- these are bundled into one Gen, so that scalar elements get chosen less frequently, and the generator produces nicely nested examples + Gen.frequency + [ (1, pure unit), + (1, n1 <$> Gen.bool), + (1, n3 <$> Gen.element [0, 1, 5]), + (1, n6 <$> Gen.element [0, 1, 5]), + (1, i6 <$> Gen.element [-1, 1, 5]), + (2, text <$> Gen.text (Range.linear 1 10) Gen.lower), + (2, bytes <$> Gen.bytes (Range.linear 1 10)) + ] + ] + [ do + key <- Gen.text (Range.linear 3 10) Gen.lower + val <- genNetencode + pure $ tag key val, + record + <$> ( let k = Gen.text (Range.linear 3 10) Gen.lower + v = genNetencode + in NEMap.insertMap + <$> k + <*> v + <*> ( (Gen.map (Range.linear 0 3)) $ + (,) <$> k <*> v + ) + ) + ] + +-- | Hedgehog property: encoding a netencode value and parsing it again returns the same result. +prop_netencodeRoundtrip :: Hedge.Property +prop_netencodeRoundtrip = Hedge.property $ do + enc <- Hedge.forAll genNetencode + ( Atto.parseOnly + netencodeParser + ( netencodeEncodeStable enc + & Builder.toLazyByteString + & toStrictBytes + ) + ) + Hedge.=== (Right enc) diff --git a/users/Profpatsch/netencode/Netencode/Parse.hs b/users/Profpatsch/netencode/Netencode/Parse.hs new file mode 100644 index 0000000000..184fb5f912 --- /dev/null +++ b/users/Profpatsch/netencode/Netencode/Parse.hs @@ -0,0 +1,102 @@ +{-# LANGUAGE QuasiQuotes #-} + +module Netencode.Parse where + +import Control.Category qualified +import Control.Selective (Selective) +import Data.Error.Tree +import Data.Fix (Fix (..)) +import Data.Functor.Compose +import Data.List qualified as List +import Data.Map.NonEmpty (NEMap) +import Data.Map.NonEmpty qualified as NEMap +import Data.Semigroupoid qualified as Semigroupiod +import Data.Semigroupoid qualified as Semigroupoid +import Data.Text qualified as Text +import Netencode qualified +import PossehlAnalyticsPrelude +import Prelude hiding (log) + +newtype Parse from to + = -- TODO: the way @Context = [Text]@ has to be forwarded to everything is kinda shitty. + -- This is essentially just a difference list, and can probably be treated as a function in the output? + Parse (([Text], from) -> Validation (NonEmpty ErrorTree) ([Text], to)) + deriving + (Functor, Applicative, Selective) + via ( Compose + ( Compose + ((->) ([Text], from)) + (Validation (NonEmpty ErrorTree)) + ) + ((,) [Text]) + ) + +instance Semigroupoid Parse where + o p2 p1 = Parse $ \from -> case runParse' p1 from of + Failure err -> Failure err + Success to1 -> runParse' p2 to1 + +instance Category Parse where + (.) = Semigroupoid.o + id = Parse $ \t -> Success t + +runParse :: Error -> Parse from to -> from -> Either ErrorTree to +runParse errMsg parser t = + (["$"], t) + & runParse' parser + <&> snd + & first (nestedMultiError errMsg) + & validationToEither + +runParse' :: Parse from to -> ([Text], from) -> Validation (NonEmpty ErrorTree) ([Text], to) +runParse' (Parse f) from = f from + +parseEither :: (([Text], from) -> Either ErrorTree ([Text], to)) -> Parse from to +parseEither f = Parse $ \from -> f from & eitherToListValidation + +tAs :: (Netencode.TF (Fix Netencode.TF) -> Either ([Text] -> ErrorTree) to) -> Parse Netencode.T to +tAs f = parseEither ((\(context, Netencode.T (Fix tf)) -> f tf & bimap ($ context) (context,))) + +key :: Text -> Parse (NEMap Text to) to +key name = parseEither $ \(context, rec) -> + rec + & NEMap.lookup name + & annotate (errorTreeContext (showContext context) [fmt|Key "{name}" does not exist|]) + <&> (addContext name context,) + +showContext :: [Text] -> Text +showContext context = context & List.reverse & Text.intercalate "." + +addContext :: a -> [a] -> [a] +addContext = (:) + +asText :: Parse Netencode.T Text +asText = tAs $ \case + Netencode.Text t -> pure t + other -> typeError "of text" other + +asBytes :: Parse Netencode.T ByteString +asBytes = tAs $ \case + Netencode.Bytes b -> pure b + other -> typeError "of bytes" other + +asRecord :: Parse Netencode.T (NEMap Text (Netencode.T)) +asRecord = tAs $ \case + Netencode.Record rec -> pure (rec <&> Netencode.T) + other -> typeError "a record" other + +typeError :: Text -> Netencode.TF ignored -> (Either ([Text] -> ErrorTree) b) +typeError should is = do + let otherS = is <&> (\_ -> ("…" :: String)) & show + Left $ \context -> errorTreeContext (showContext context) [fmt|Value is not {should}, but a {otherS}|] + +orThrowParseError :: + Parse (Either Error to) to +orThrowParseError = Parse $ \case + (context, Left err) -> + err + & singleError + & errorTreeContext (showContext context) + & singleton + & Failure + (context, Right to) -> Success (context, to) diff --git a/users/Profpatsch/netencode/README.md b/users/Profpatsch/netencode/README.md index 840ffaedd0..3538a110a6 100644 --- a/users/Profpatsch/netencode/README.md +++ b/users/Profpatsch/netencode/README.md @@ -85,7 +85,7 @@ Similar to text, records start with the length of their *whole encoded content*, * A record with one empty field, `foo`: `{9:<3:foo|u,}` * A record with two fields, `foo` and `x`: `{21:<3:foo|u,<1:x|t3:baz,}` * The same record: `{21:<1:x|t3:baz,<3:foo|u,}` -* The same record (later occurences of fields are ignored): `{28:<1:x|t3:baz,<3:foo|u,<1:x|u,}` +* The same record (earlier occurences of fields are ignored): `{<1:x|u,28:<1:x|t3:baz,<3:foo|u,}` ### sums (tagged unions) diff --git a/users/Profpatsch/netencode/default.nix b/users/Profpatsch/netencode/default.nix index d389258148..6e7dce489a 100644 --- a/users/Profpatsch/netencode/default.nix +++ b/users/Profpatsch/netencode/default.nix @@ -11,6 +11,34 @@ let } (builtins.readFile ./netencode.rs); + netencode-hs = pkgs.haskellPackages.mkDerivation { + pname = "netencode"; + version = "0.1.0"; + + src = depot.users.Profpatsch.exactSource ./. [ + ./netencode.cabal + ./Netencode.hs + ./Netencode/Parse.hs + ]; + + libraryHaskellDepends = [ + pkgs.haskellPackages.hedgehog + pkgs.haskellPackages.nonempty-containers + pkgs.haskellPackages.deriving-compat + pkgs.haskellPackages.data-fix + pkgs.haskellPackages.bytestring + pkgs.haskellPackages.attoparsec + pkgs.haskellPackages.pa-prelude + pkgs.haskellPackages.pa-label + pkgs.haskellPackages.pa-error-tree + ]; + + isLibrary = true; + license = lib.licenses.mit; + + + }; + gen = import ./gen.nix { inherit lib; }; pretty-rs = depot.nix.writers.rustSimpleLib @@ -37,9 +65,8 @@ let fn main() { let (_, prog) = exec_helpers::args_for_exec("netencode-pretty", 0); - let mut buf = vec![]; - let u = netencode::u_from_stdin_or_die_user_error("netencode-pretty", &mut buf); - match netencode_pretty::Pretty::from_u(u).print_multiline(&mut std::io::stdout()) { + let t = netencode::t_from_stdin_or_die_user_error("netencode-pretty"); + match netencode_pretty::Pretty::from_u(t.to_u()).print_multiline(&mut std::io::stdout()) { Ok(()) => {}, Err(err) => exec_helpers::die_temporary("netencode-pretty", format!("could not write to stdout: {}", err)) } @@ -64,24 +91,21 @@ let dependencies = [ netencode-rs depot.users.Profpatsch.execline.exec-helpers - depot.users.Profpatsch.arglib.netencode.rust ]; } '' extern crate netencode; - extern crate arglib_netencode; extern crate exec_helpers; use netencode::{encode, dec}; use netencode::dec::{Decoder, DecodeError}; fn main() { - let mut buf = vec![]; let args = exec_helpers::args("record-get", 1); let field = match std::str::from_utf8(&args[0]) { Ok(f) => f, Err(_e) => exec_helpers::die_user_error("record-get", format!("The field name needs to be valid unicode")) }; - let u = netencode::u_from_stdin_or_die_user_error("record-get", &mut buf); - match (dec::RecordDot {field, inner: dec::AnyU }).dec(u) { + let t = netencode::t_from_stdin_or_die_user_error("record-get"); + match (dec::RecordDot {field, inner: dec::AnyU }).dec(t.to_u()) { Ok(u) => encode(&mut std::io::stdout(), &u).expect("encoding to stdout failed"), Err(DecodeError(err)) => exec_helpers::die_user_error("record-get", err) } @@ -101,10 +125,9 @@ let use netencode::dec::{Record, Try, ScalarAsBytes, Decoder, DecodeError}; fn main() { - let mut buf = vec![]; - let u = netencode::u_from_stdin_or_die_user_error("record-splice-env", &mut buf); + let t = netencode::t_from_stdin_or_die_user_error("record-splice-env"); let (_, prog) = exec_helpers::args_for_exec("record-splice-env", 0); - match Record(Try(ScalarAsBytes)).dec(u) { + match Record(Try(ScalarAsBytes)).dec(t.to_u()) { Ok(map) => { exec_helpers::exec_into_args( "record-splice-env", @@ -149,6 +172,7 @@ in depot.nix.readTree.drvTargets { inherit netencode-rs + netencode-hs pretty-rs pretty netencode-mustache diff --git a/users/Profpatsch/netencode/netencode.cabal b/users/Profpatsch/netencode/netencode.cabal new file mode 100644 index 0000000000..7bff4487bb --- /dev/null +++ b/users/Profpatsch/netencode/netencode.cabal @@ -0,0 +1,74 @@ +cabal-version: 3.0 +name: netencode +version: 0.1.0.0 +author: Profpatsch +maintainer: mail@profpatsch.de + + +common common-options + ghc-options: + -Wall + -Wno-type-defaults + -Wunused-packages + -Wredundant-constraints + -fwarn-missing-deriving-strategies + + -- See https://downloads.haskell.org/ghc/latest/docs/users_guide/exts.html + -- for a description of all these extensions + default-extensions: + -- Infer Applicative instead of Monad where possible + ApplicativeDo + + -- Allow literal strings to be Text + OverloadedStrings + + -- Syntactic sugar improvements + LambdaCase + MultiWayIf + + -- Makes the (deprecated) usage of * instead of Data.Kind.Type an error + NoStarIsType + + -- Convenient and crucial to deal with ambiguous field names, commonly + -- known as RecordDotSyntax + OverloadedRecordDot + + -- does not export record fields as functions, use OverloadedRecordDot to access instead + NoFieldSelectors + + -- Record punning + RecordWildCards + + -- Improved Deriving + DerivingStrategies + DerivingVia + + -- Type-level strings + DataKinds + + -- to enable the `type` keyword in import lists (ormolu uses this automatically) + ExplicitNamespaces + + default-language: GHC2021 + + +library + import: common-options + exposed-modules: + Netencode, + Netencode.Parse + + build-depends: + base >=4.15 && <5, + pa-prelude, + pa-label, + pa-error-tree, + hedgehog, + nonempty-containers, + deriving-compat, + data-fix, + bytestring, + attoparsec, + text, + semigroupoids, + selective diff --git a/users/Profpatsch/netencode/netencode.rs b/users/Profpatsch/netencode/netencode.rs index bb08dca4aa..34a8fcef09 100644 --- a/users/Profpatsch/netencode/netencode.rs +++ b/users/Profpatsch/netencode/netencode.rs @@ -198,25 +198,103 @@ pub fn text(s: String) -> T { T::Text(s) } -pub fn u_from_stdin_or_die_user_error<'a>(prog_name: &'_ str, stdin_buf: &'a mut Vec<u8>) -> U<'a> { - std::io::stdin().lock().read_to_end(stdin_buf); - let u = match parse::u_u(stdin_buf) { - Ok((rest, u)) => match rest { - b"" => u, - _ => exec_helpers::die_user_error( +pub fn t_from_stdin_or_die_user_error<'a>(prog_name: &'_ str) -> T { + match t_from_stdin_or_die_user_error_with_rest(prog_name, &vec![]) { + None => exec_helpers::die_user_error(prog_name, "stdin was empty"), + Some((rest, t)) => { + if rest.is_empty() { + t + } else { + exec_helpers::die_user_error( + prog_name, + format!( + "stdin contained some soup after netencode value: {:?}", + String::from_utf8_lossy(&rest) + ), + ) + } + } + } +} + +/// Read a netencode value from stdin incrementally, return bytes that could not be read. +/// Nothing if there was nothing to read from stdin & no initial_bytes were provided. +/// These can be passed back as `initial_bytes` if more values should be read. +pub fn t_from_stdin_or_die_user_error_with_rest<'a>( + prog_name: &'_ str, + initial_bytes: &[u8], +) -> Option<(Vec<u8>, T)> { + let mut chonker = Chunkyboi::new(std::io::stdin().lock(), 4096); + // The vec to pass to the parser on each step + let mut parser_vec: Vec<u8> = initial_bytes.to_vec(); + // whether stdin was already empty + let mut was_empty: bool = false; + loop { + match chonker.next() { + None => { + if parser_vec.is_empty() { + return None; + } else { + was_empty = true + } + } + Some(Err(err)) => exec_helpers::die_temporary( prog_name, - format!( - "stdin contained some soup after netencode value: {:?}", - String::from_utf8_lossy(rest) - ), + &format!("could not read from stdin: {:?}", err), + ), + Some(Ok(mut new_bytes)) => parser_vec.append(&mut new_bytes), + } + + match parse::t_t(&parser_vec) { + Ok((rest, t)) => return Some((rest.to_owned(), t)), + Err(nom::Err::Incomplete(Needed)) => { + if was_empty { + exec_helpers::die_user_error( + prog_name, + &format!( + "unable to parse netencode from stdin, input incomplete: {:?}", + parser_vec + ), + ); + } + // read more from stdin and try parsing again + continue; + } + Err(err) => exec_helpers::die_user_error( + prog_name, + &format!("unable to parse netencode from stdin: {:?}", err), ), - }, - Err(err) => exec_helpers::die_user_error( - prog_name, - format!("unable to parse netencode from stdin: {:?}", err), - ), - }; - u + } + } +} + +// iter helper +// TODO: put into its own module +struct Chunkyboi<T> { + inner: T, + buf: Vec<u8>, +} + +impl<R: Read> Chunkyboi<R> { + fn new(inner: R, chunksize: usize) -> Self { + let buf = vec![0; chunksize]; + Chunkyboi { inner, buf } + } +} + +impl<R: Read> Iterator for Chunkyboi<R> { + type Item = std::io::Result<Vec<u8>>; + + fn next(&mut self) -> Option<std::io::Result<Vec<u8>>> { + match self.inner.read(&mut self.buf) { + Ok(0) => None, + Ok(read) => { + // clone a new buffer so we can reuse the internal one + Some(Ok(self.buf[..read].to_owned())) + } + Err(err) => Some(Err(err)), + } + } } pub mod parse { |