diff options
Diffstat (limited to 'users/Profpatsch/netencode')
-rw-r--r-- | users/Profpatsch/netencode/Netencode.hs | 433 | ||||
-rw-r--r-- | users/Profpatsch/netencode/Netencode/Parse.hs | 102 | ||||
-rw-r--r-- | users/Profpatsch/netencode/README.md | 28 | ||||
-rw-r--r-- | users/Profpatsch/netencode/default.nix | 149 | ||||
-rw-r--r-- | users/Profpatsch/netencode/gen.nix | 40 | ||||
-rw-r--r-- | users/Profpatsch/netencode/netencode-mustache.rs | 29 | ||||
-rw-r--r-- | users/Profpatsch/netencode/netencode.cabal | 74 | ||||
-rw-r--r-- | users/Profpatsch/netencode/netencode.rs | 627 | ||||
-rw-r--r-- | users/Profpatsch/netencode/pretty.rs | 73 |
9 files changed, 1191 insertions, 364 deletions
diff --git a/users/Profpatsch/netencode/Netencode.hs b/users/Profpatsch/netencode/Netencode.hs new file mode 100644 index 0000000000..ca93ab2fef --- /dev/null +++ b/users/Profpatsch/netencode/Netencode.hs @@ -0,0 +1,433 @@ +{-# LANGUAGE AllowAmbiguousTypes #-} +{-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE TemplateHaskell #-} + +module Netencode where + +import Control.Applicative (many) +import Data.Attoparsec.ByteString qualified as Atto +import Data.Attoparsec.ByteString.Char8 qualified as Atto.Char +import Data.ByteString qualified as ByteString +import Data.ByteString.Builder (Builder) +import Data.ByteString.Builder qualified as Builder +import Data.ByteString.Lazy qualified as ByteString.Lazy +import Data.Fix (Fix (Fix)) +import Data.Fix qualified as Fix +import Data.Functor.Classes (Eq1 (liftEq)) +import Data.Int (Int16, Int32, Int64, Int8) +import Data.Map.NonEmpty (NEMap) +import Data.Map.NonEmpty qualified as NEMap +import Data.Semigroup qualified as Semi +import Data.String (IsString) +import Data.Word (Word16, Word32, Word64) +import GHC.Exts (fromString) +import Hedgehog qualified as Hedge +import Hedgehog.Gen qualified as Gen +import Hedgehog.Range qualified as Range +import PossehlAnalyticsPrelude +import Text.Show.Deriving +import Prelude hiding (sum) + +-- | Netencode type base functor. +-- +-- Recursive elements have a @rec@. +data TF rec + = -- | Unit value + Unit + | -- | Boolean (2^1) + N1 Bool + | -- | Byte (2^3) + N3 Word8 + | -- | 64-bit Natural (2^6) + N6 Word64 + | -- | 64-bit Integer (2^6) + I6 Int64 + | -- | Unicode Text + Text Text + | -- | Arbitrary Bytestring + Bytes ByteString + | -- | A constructor of a(n open) Sum + Sum (Tag Text rec) + | -- | Record + Record (NEMap Text rec) + | -- | List + List [rec] + deriving stock (Show, Eq, Functor) + +instance Eq1 TF where + liftEq _ Unit Unit = True + liftEq _ (N1 b) (N1 b') = b == b' + liftEq _ (N3 w8) (N3 w8') = w8 == w8' + liftEq _ (N6 w64) (N6 w64') = w64 == w64' + liftEq _ (I6 i64) (I6 i64') = i64 == i64' + liftEq _ (Text t) (Text t') = t == t' + liftEq _ (Bytes b) (Bytes b') = b == b' + liftEq eq (Sum t) (Sum t') = eq (t.tagVal) (t'.tagVal) + liftEq eq (Record m) (Record m') = liftEq eq m m' + liftEq eq (List xs) (List xs') = liftEq eq xs xs' + liftEq _ _ _ = False + +-- | A tagged value +data Tag tag val = Tag + { tagTag :: tag, + tagVal :: val + } + deriving stock (Show, Eq, Functor) + +$(Text.Show.Deriving.deriveShow1 ''Tag) +$(Text.Show.Deriving.deriveShow1 ''TF) + +-- | The Netencode type +newtype T = T {unT :: Fix TF} + deriving stock (Eq, Show) + +-- | Create a unit +unit :: T +unit = T $ Fix Unit + +-- | Create a boolean +n1 :: Bool -> T +n1 = T . Fix . N1 + +-- | Create a byte +n3 :: Word8 -> T +n3 = T . Fix . N3 + +-- | Create a 64-bit natural +n6 :: Word64 -> T +n6 = T . Fix . N6 + +-- | Create a 64-bit integer +i6 :: Int64 -> T +i6 = T . Fix . I6 + +-- | Create a UTF-8 unicode text +text :: Text -> T +text = T . Fix . Text + +-- | Create an arbitrary bytestring +bytes :: ByteString -> T +bytes = T . Fix . Bytes + +-- | Create a tagged value from a tag name and a value +tag :: Text -> T -> T +tag key val = T $ Fix $ Sum $ coerce @(Tag Text T) @(Tag Text (Fix TF)) $ Tag key val + +-- | Create a record from a non-empty map +record :: NEMap Text T -> T +record = T . Fix . Record . coerce @(NEMap Text T) @(NEMap Text (Fix TF)) + +-- | Create a list +list :: [T] -> T +list = T . Fix . List . coerce @[T] @([Fix TF]) + +-- | Stable encoding of a netencode value. Record keys will be sorted lexicographically ascending. +netencodeEncodeStable :: T -> Builder +netencodeEncodeStable (T fix) = Fix.foldFix (netencodeEncodeStableF id) fix + +-- | Stable encoding of a netencode functor value. Record keys will be sorted lexicographically ascending. +-- +-- The given function is used for encoding the recursive values. +netencodeEncodeStableF :: (rec -> Builder) -> TF rec -> Builder +netencodeEncodeStableF inner tf = builder go + where + -- TODO: directly pass in BL? + innerBL = fromBuilder . inner + go = case tf of + Unit -> "u," + N1 False -> "n1:0," + N1 True -> "n1:1," + N3 w8 -> "n3:" <> fromBuilder (Builder.word8Dec w8) <> "," + N6 w64 -> "n6:" <> fromBuilder (Builder.word64Dec w64) <> "," + I6 i64 -> "i6:" <> fromBuilder (Builder.int64Dec i64) <> "," + Text t -> + let b = fromText t + in "t" <> builderLenDec b <> ":" <> b <> "," + Bytes b -> "b" <> builderLenDec (fromByteString b) <> ":" <> fromByteString b <> "," + Sum (Tag key val) -> encTag key val + Record m -> + -- NEMap uses Map internally, and that folds in lexicographic ascending order over the key. + -- Since these are `Text` in our case, this is stable. + let mBuilder = m & NEMap.foldMapWithKey encTag + in "{" <> builderLenDec mBuilder <> ":" <> mBuilder <> "}" + List xs -> + let xsBuilder = xs <&> innerBL & mconcat + in "[" <> builderLenDec xsBuilder <> ":" <> xsBuilder <> "]" + where + encTag key val = + let bKey = fromText key + in "<" <> builderLenDec bKey <> ":" <> bKey <> "|" <> innerBL val + +-- | A builder that knows its own size in bytes +newtype BL = BL (Builder, Semi.Sum Natural) + deriving newtype (Monoid, Semigroup) + +instance IsString BL where + fromString s = + BL + ( fromString @Builder s, + fromString @ByteString s + & ByteString.length + & intToNatural + & fromMaybe 0 + & Semi.Sum + ) + +-- | Retrieve the builder +builder :: BL -> Builder +builder (BL (b, _)) = b + +-- | Retrieve the bytestring length +builderLen :: BL -> Natural +builderLen (BL (_, len)) = Semi.getSum $ len + +-- | Take a 'BL' and create a new 'BL' that represents the length as a decimal integer +builderLenDec :: BL -> BL +builderLenDec (BL (_, len)) = + let b = Builder.intDec $ (len & Semi.getSum & fromIntegral @Natural @Int) + in b & fromBuilder + +-- | Create a 'BL' from a 'Builder'. +-- +-- Not efficient, goes back to a lazy bytestring to get the length +fromBuilder :: Builder -> BL +fromBuilder b = + BL + ( b, + b + & Builder.toLazyByteString + & ByteString.Lazy.length + & fromIntegral @Int64 @Natural + & Semi.Sum + ) + +-- | Create a 'BL' from a 'ByteString'. +fromByteString :: ByteString -> BL +fromByteString b = + BL + ( Builder.byteString b, + b + & ByteString.length + & fromIntegral @Int @Natural + & Semi.Sum + ) + +-- | Create a 'BL' from a 'Text'. +fromText :: Text -> BL +fromText t = t & textToBytesUtf8 & fromByteString + +-- | Parser for a netencode value. +netencodeParser :: Atto.Parser T +netencodeParser = T <$> go + where + go = Fix <$> netencodeParserF go + +-- | Parser for one level of a netencode value. Requires a parser for the recursion. +netencodeParserF :: Atto.Parser rec -> Atto.Parser (TF rec) +netencodeParserF inner = do + typeTag <- Atto.Char.anyChar + case typeTag of + 't' -> Text <$> textParser + 'b' -> Bytes <$> bytesParser + 'u' -> unitParser + '<' -> Sum <$> tagParser + '{' -> Record <$> recordParser + '[' -> List <$> listParser + 'n' -> naturalParser + 'i' -> I6 <$> intParser + c -> fail ([c] <> " is not a valid netencode tag") + where + bytesParser = do + len <- boundedDecimalFail Atto.<?> "bytes is missing a digit specifying the length" + _ <- Atto.Char.char ':' Atto.<?> "bytes did not have : after length" + bytes' <- Atto.take len + _ <- Atto.Char.char ',' Atto.<?> "bytes did not end with ," + pure bytes' + + textParser = do + len <- boundedDecimalFail Atto.<?> "text is missing a digit specifying the length" + _ <- Atto.Char.char ':' Atto.<?> "text did not have : after length" + text' <- + Atto.take len <&> bytesToTextUtf8 >>= \case + Left err -> fail [fmt|cannot decode text as utf8: {err & prettyError}|] + Right t -> pure t + _ <- Atto.Char.char ',' Atto.<?> "text did not end with ," + pure text' + + unitParser = do + _ <- Atto.Char.char ',' Atto.<?> "unit did not end with ," + pure $ Unit + + tagParser = do + len <- boundedDecimalFail Atto.<?> "tag is missing a digit specifying the length" + _ <- Atto.Char.char ':' Atto.<?> "tag did not have : after length" + tagTag <- + Atto.take len <&> bytesToTextUtf8 >>= \case + Left err -> fail [fmt|cannot decode tag key as utf8: {err & prettyError}|] + Right t -> pure t + _ <- Atto.Char.char '|' Atto.<?> "tag was missing the key/value separator (|)" + tagVal <- inner + pure $ Tag {..} + + recordParser = do + -- TODO: the record does not use its inner length because we are descending into the inner parsers. + -- This is a smell! In theory it can be used to skip parsing the whole inner keys. + _len <- boundedDecimalFail Atto.<?> "record is missing a digit specifying the length" + _ <- Atto.Char.char ':' Atto.<?> "record did not have : after length" + record' <- + many (Atto.Char.char '<' >> tagParser) <&> nonEmpty >>= \case + Nothing -> fail "record is not allowed to have 0 elements" + Just tags -> + pure $ + tags + <&> (\t -> (t.tagTag, t.tagVal)) + -- later keys are preferred if they are duplicates, according to the standard + & NEMap.fromList + _ <- Atto.Char.char '}' Atto.<?> "record did not end with }" + pure record' + + listParser = do + -- TODO: the list does not use its inner length because we are descending into the inner parsers. + -- This is a smell! In theory it can be used to skip parsing the whole inner keys. + _len <- boundedDecimalFail Atto.<?> "list is missing a digit specifying the length" + _ <- Atto.Char.char ':' Atto.<?> "list did not have : after length" + -- TODO: allow empty lists? + list' <- many inner + _ <- Atto.Char.char ']' Atto.<?> "list did not end with ]" + pure list' + + intParser = do + let p :: forall parseSize. (Bounded parseSize, Integral parseSize) => (Integer -> Atto.Parser Int64) + p n = do + _ <- Atto.Char.char ':' Atto.<?> [fmt|i{n & show} did not have : after length|] + isNegative <- Atto.option False (Atto.Char.char '-' <&> \_c -> True) + int <- + boundedDecimal @parseSize >>= \case + Nothing -> fail [fmt|cannot parse into i{n & show}, the number is too big (would overflow)|] + Just i -> + pure $ + if isNegative + then -- TODO: this should alread be done in the decimal parser, @minBound@ cannot be parsed cause it’s one more than @(-maxBound)@! + (-i) + else i + _ <- Atto.Char.char ',' Atto.<?> [fmt|i{n & show} did not end with ,|] + pure $ fromIntegral @parseSize @Int64 int + digit <- Atto.Char.digit + case digit of + -- TODO: separate parser for i1 and i2 that makes sure the boundaries are right! + '1' -> p @Int8 1 + '2' -> p @Int8 2 + '3' -> p @Int8 3 + '4' -> p @Int16 4 + '5' -> p @Int32 5 + '6' -> p @Int64 6 + '7' -> fail [fmt|i parser only supports numbers up to size 6, was 7|] + '8' -> fail [fmt|i parser only supports numbers up to size 6, was 8|] + '9' -> fail [fmt|i parser only supports numbers up to size 6, was 9|] + o -> fail [fmt|i number with length {o & show} not possible|] + + naturalParser = do + let p :: forall parseSize finalSize. (Bounded parseSize, Integral parseSize, Num finalSize) => (Integer -> Atto.Parser finalSize) + p n = do + _ <- Atto.Char.char ':' Atto.<?> [fmt|n{n & show} did not have : after length|] + int <- + boundedDecimal @parseSize >>= \case + Nothing -> fail [fmt|cannot parse into n{n & show}, the number is too big (would overflow)|] + Just i -> pure i + + _ <- Atto.Char.char ',' Atto.<?> [fmt|n{n & show} did not end with ,|] + pure $ fromIntegral @parseSize @finalSize int + let b n = do + _ <- Atto.Char.char ':' Atto.<?> [fmt|n{n & show} did not have : after length|] + bool <- + (Atto.Char.char '0' >> pure False) + <|> (Atto.Char.char '1' >> pure True) + _ <- Atto.Char.char ',' Atto.<?> [fmt|n{n & show} did not end with ,|] + pure bool + + digit <- Atto.Char.digit + case digit of + -- TODO: separate parser for n1 and n2 that makes sure the boundaries are right! + '1' -> N1 <$> b 1 + '2' -> N3 <$> p @Word8 @Word8 2 + '3' -> N3 <$> p @Word8 @Word8 3 + '4' -> N6 <$> p @Word16 @Word64 4 + '5' -> N6 <$> p @Word32 @Word64 5 + '6' -> N6 <$> p @Word64 @Word64 6 + '7' -> fail [fmt|n parser only supports numbers up to size 6, was 7|] + '8' -> fail [fmt|n parser only supports numbers up to size 6, was 8|] + '9' -> fail [fmt|n parser only supports numbers up to size 6, was 9|] + o -> fail [fmt|n number with length {o & show} not possible|] + +-- | Parser for a bounded decimal that does not overflow the decimal. +-- +-- via https://www.extrema.is/blog/2021/10/20/parsing-bounded-integers +boundedDecimal :: forall a. (Bounded a, Integral a) => Atto.Parser (Maybe a) +boundedDecimal = do + i :: Integer <- decimal + pure $ + if (i :: Integer) > fromIntegral (maxBound :: a) + then Nothing + else Just $ fromIntegral i + where + -- Copied from @Attoparsec.Text@ and adjusted to bytestring + decimal :: (Integral a2) => Atto.Parser a2 + decimal = ByteString.foldl' step 0 <$> Atto.Char.takeWhile1 Atto.Char.isDigit + where + step a c = a * 10 + fromIntegral (c - 48) +{-# SPECIALIZE boundedDecimal :: Atto.Parser (Maybe Int) #-} +{-# SPECIALIZE boundedDecimal :: Atto.Parser (Maybe Int64) #-} +{-# SPECIALIZE boundedDecimal :: Atto.Parser (Maybe Word8) #-} +{-# SPECIALIZE boundedDecimal :: Atto.Parser (Maybe Word64) #-} + +-- | 'boundedDecimal', but fail the parser if the decimal overflows. +boundedDecimalFail :: Atto.Parser Int +boundedDecimalFail = + boundedDecimal >>= \case + Nothing -> fail "decimal out of range" + Just a -> pure a + +-- | Hedgehog generator for a netencode value. +genNetencode :: Hedge.MonadGen m => m T +genNetencode = + Gen.recursive + Gen.choice + [ -- these are bundled into one Gen, so that scalar elements get chosen less frequently, and the generator produces nicely nested examples + Gen.frequency + [ (1, pure unit), + (1, n1 <$> Gen.bool), + (1, n3 <$> Gen.element [0, 1, 5]), + (1, n6 <$> Gen.element [0, 1, 5]), + (1, i6 <$> Gen.element [-1, 1, 5]), + (2, text <$> Gen.text (Range.linear 1 10) Gen.lower), + (2, bytes <$> Gen.bytes (Range.linear 1 10)) + ] + ] + [ do + key <- Gen.text (Range.linear 3 10) Gen.lower + val <- genNetencode + pure $ tag key val, + record + <$> ( let k = Gen.text (Range.linear 3 10) Gen.lower + v = genNetencode + in NEMap.insertMap + <$> k + <*> v + <*> ( (Gen.map (Range.linear 0 3)) $ + (,) <$> k <*> v + ) + ) + ] + +-- | Hedgehog property: encoding a netencode value and parsing it again returns the same result. +prop_netencodeRoundtrip :: Hedge.Property +prop_netencodeRoundtrip = Hedge.property $ do + enc <- Hedge.forAll genNetencode + ( Atto.parseOnly + netencodeParser + ( netencodeEncodeStable enc + & Builder.toLazyByteString + & toStrictBytes + ) + ) + Hedge.=== (Right enc) diff --git a/users/Profpatsch/netencode/Netencode/Parse.hs b/users/Profpatsch/netencode/Netencode/Parse.hs new file mode 100644 index 0000000000..184fb5f912 --- /dev/null +++ b/users/Profpatsch/netencode/Netencode/Parse.hs @@ -0,0 +1,102 @@ +{-# LANGUAGE QuasiQuotes #-} + +module Netencode.Parse where + +import Control.Category qualified +import Control.Selective (Selective) +import Data.Error.Tree +import Data.Fix (Fix (..)) +import Data.Functor.Compose +import Data.List qualified as List +import Data.Map.NonEmpty (NEMap) +import Data.Map.NonEmpty qualified as NEMap +import Data.Semigroupoid qualified as Semigroupiod +import Data.Semigroupoid qualified as Semigroupoid +import Data.Text qualified as Text +import Netencode qualified +import PossehlAnalyticsPrelude +import Prelude hiding (log) + +newtype Parse from to + = -- TODO: the way @Context = [Text]@ has to be forwarded to everything is kinda shitty. + -- This is essentially just a difference list, and can probably be treated as a function in the output? + Parse (([Text], from) -> Validation (NonEmpty ErrorTree) ([Text], to)) + deriving + (Functor, Applicative, Selective) + via ( Compose + ( Compose + ((->) ([Text], from)) + (Validation (NonEmpty ErrorTree)) + ) + ((,) [Text]) + ) + +instance Semigroupoid Parse where + o p2 p1 = Parse $ \from -> case runParse' p1 from of + Failure err -> Failure err + Success to1 -> runParse' p2 to1 + +instance Category Parse where + (.) = Semigroupoid.o + id = Parse $ \t -> Success t + +runParse :: Error -> Parse from to -> from -> Either ErrorTree to +runParse errMsg parser t = + (["$"], t) + & runParse' parser + <&> snd + & first (nestedMultiError errMsg) + & validationToEither + +runParse' :: Parse from to -> ([Text], from) -> Validation (NonEmpty ErrorTree) ([Text], to) +runParse' (Parse f) from = f from + +parseEither :: (([Text], from) -> Either ErrorTree ([Text], to)) -> Parse from to +parseEither f = Parse $ \from -> f from & eitherToListValidation + +tAs :: (Netencode.TF (Fix Netencode.TF) -> Either ([Text] -> ErrorTree) to) -> Parse Netencode.T to +tAs f = parseEither ((\(context, Netencode.T (Fix tf)) -> f tf & bimap ($ context) (context,))) + +key :: Text -> Parse (NEMap Text to) to +key name = parseEither $ \(context, rec) -> + rec + & NEMap.lookup name + & annotate (errorTreeContext (showContext context) [fmt|Key "{name}" does not exist|]) + <&> (addContext name context,) + +showContext :: [Text] -> Text +showContext context = context & List.reverse & Text.intercalate "." + +addContext :: a -> [a] -> [a] +addContext = (:) + +asText :: Parse Netencode.T Text +asText = tAs $ \case + Netencode.Text t -> pure t + other -> typeError "of text" other + +asBytes :: Parse Netencode.T ByteString +asBytes = tAs $ \case + Netencode.Bytes b -> pure b + other -> typeError "of bytes" other + +asRecord :: Parse Netencode.T (NEMap Text (Netencode.T)) +asRecord = tAs $ \case + Netencode.Record rec -> pure (rec <&> Netencode.T) + other -> typeError "a record" other + +typeError :: Text -> Netencode.TF ignored -> (Either ([Text] -> ErrorTree) b) +typeError should is = do + let otherS = is <&> (\_ -> ("…" :: String)) & show + Left $ \context -> errorTreeContext (showContext context) [fmt|Value is not {should}, but a {otherS}|] + +orThrowParseError :: + Parse (Either Error to) to +orThrowParseError = Parse $ \case + (context, Left err) -> + err + & singleError + & errorTreeContext (showContext context) + & singleton + & Failure + (context, Right to) -> Success (context, to) diff --git a/users/Profpatsch/netencode/README.md b/users/Profpatsch/netencode/README.md index 3058e36eaf..3538a110a6 100644 --- a/users/Profpatsch/netencode/README.md +++ b/users/Profpatsch/netencode/README.md @@ -1,6 +1,6 @@ # netencode 0.1-unreleased -[bencode][] and [netstring][]-inspired pipe format that should be trivial go generate correctly in every context (only requires a `byte_length()` and a `printf()`), easy to parse (100 lines of code or less), mostly human-decipherable for easy debugging, and support nested record and sum types. +[bencode][] and [netstring][]-inspired pipe format that should be trivial to generate correctly in every context (only requires a `byte_length()` and a `printf()`), easy to parse (100 lines of code or less), mostly human-decipherable for easy debugging, and support nested record and sum types. ## scalars @@ -73,7 +73,11 @@ A tag (`<`) gives a value a name. The tag is UTF-8 encoded, starting with its le ### records (products/records), also maps A record (`{`) is a concatenation of tags (`<`). It needs to be closed with `}`. -If tag names repeat the later ones should be ignored. Ordering does not matter. + +If tag names repeat the *earlier* ones should be ignored. +Using the last tag corresponds with the way most languages handle converting a list of tuples to Maps, by using a for-loop and Map.insert without checking the contents first. Otherwise you’d have to revert the list first or remember which keys you already inserted. + +Ordering of tags in a record does not matter. Similar to text, records start with the length of their *whole encoded content*, in bytes. This makes it possible to treat their contents as opaque bytestrings. @@ -81,7 +85,7 @@ Similar to text, records start with the length of their *whole encoded content*, * A record with one empty field, `foo`: `{9:<3:foo|u,}` * A record with two fields, `foo` and `x`: `{21:<3:foo|u,<1:x|t3:baz,}` * The same record: `{21:<1:x|t3:baz,<3:foo|u,}` -* The same record (later occurences of fields are ignored): `{28:<1:x|t3:baz,<3:foo|u,<1:x|u,}` +* The same record (earlier occurences of fields are ignored): `{<1:x|u,28:<1:x|t3:baz,<3:foo|u,}` ### sums (tagged unions) @@ -98,6 +102,24 @@ Similar to records, lists start with the length of their whole encoded content. * The list with text `foo` followed by i3 `-42`: `[14:t3:foo,i3:-42,]` * The list with `Some` and `None` tags: `[33:<4:Some|t3:foo,<4None|u,<4None|u,]` +## parser security considerations + +The length field is a decimal number that is not length-restricted, +meaning an attacker could give an infinitely long length (or extremely long) +thus overflowing your parser if you are not careful. + +You should thus put a practical length limit to the length of length fields, +which implicitely enforces a length limit on how long the value itself can be. + +Start by defining a max value length in bytes. +Then count the number of decimals in that number. + +So if your max length is 1024 bytes, your length field can be a maximum `count_digits(1024) == 4` bytes long. + +Thus, if you restrict your parser to a length field of 4 bytes, +it should also never parse anything longer than 1024 bytes for the value +(plus 1 byte for the type tag, 4 bytes for the length, and 2 bytes for the separator & ending character). + ## motivation TODO diff --git a/users/Profpatsch/netencode/default.nix b/users/Profpatsch/netencode/default.nix index db892cc9de..6e7dce489a 100644 --- a/users/Profpatsch/netencode/default.nix +++ b/users/Profpatsch/netencode/default.nix @@ -1,101 +1,133 @@ { depot, pkgs, lib, ... }: let - netencode-rs = depot.nix.writers.rustSimpleLib { + netencode-rs = depot.nix.writers.rustSimpleLib + { name = "netencode"; dependencies = [ depot.third_party.rust-crates.nom depot.users.Profpatsch.execline.exec-helpers ]; - } (builtins.readFile ./netencode.rs); + } + (builtins.readFile ./netencode.rs); - gen = import ./gen.nix { inherit lib; }; + netencode-hs = pkgs.haskellPackages.mkDerivation { + pname = "netencode"; + version = "0.1.0"; - pretty-rs = depot.nix.writers.rustSimpleLib { - name = "netencode-pretty"; - dependencies = [ - netencode-rs + src = depot.users.Profpatsch.exactSource ./. [ + ./netencode.cabal + ./Netencode.hs + ./Netencode/Parse.hs ]; - } (builtins.readFile ./pretty.rs); - - pretty = depot.nix.writers.rustSimple { - name = "netencode-pretty"; - dependencies = [ - netencode-rs - pretty-rs - depot.users.Profpatsch.execline.exec-helpers + + libraryHaskellDepends = [ + pkgs.haskellPackages.hedgehog + pkgs.haskellPackages.nonempty-containers + pkgs.haskellPackages.deriving-compat + pkgs.haskellPackages.data-fix + pkgs.haskellPackages.bytestring + pkgs.haskellPackages.attoparsec + pkgs.haskellPackages.pa-prelude + pkgs.haskellPackages.pa-label + pkgs.haskellPackages.pa-error-tree ]; - } '' + + isLibrary = true; + license = lib.licenses.mit; + + + }; + + gen = import ./gen.nix { inherit lib; }; + + pretty-rs = depot.nix.writers.rustSimpleLib + { + name = "netencode-pretty"; + dependencies = [ + netencode-rs + ]; + } + (builtins.readFile ./pretty.rs); + + pretty = depot.nix.writers.rustSimple + { + name = "netencode-pretty"; + dependencies = [ + netencode-rs + pretty-rs + depot.users.Profpatsch.execline.exec-helpers + ]; + } '' extern crate netencode; extern crate netencode_pretty; extern crate exec_helpers; fn main() { let (_, prog) = exec_helpers::args_for_exec("netencode-pretty", 0); - let mut buf = vec![]; - let u = netencode::u_from_stdin_or_die_user_error("netencode-pretty", &mut buf); - match netencode_pretty::Pretty::from_u(u).print_multiline(&mut std::io::stdout()) { + let t = netencode::t_from_stdin_or_die_user_error("netencode-pretty"); + match netencode_pretty::Pretty::from_u(t.to_u()).print_multiline(&mut std::io::stdout()) { Ok(()) => {}, Err(err) => exec_helpers::die_temporary("netencode-pretty", format!("could not write to stdout: {}", err)) } } ''; - netencode-mustache = depot.nix.writers.rustSimple { - name = "netencode_mustache"; - dependencies = [ - depot.users.Profpatsch.arglib.netencode.rust - netencode-rs - depot.third_party.rust-crates.mustache - ]; - } (builtins.readFile ./netencode-mustache.rs); + netencode-mustache = depot.nix.writers.rustSimple + { + name = "netencode_mustache"; + dependencies = [ + depot.users.Profpatsch.arglib.netencode.rust + netencode-rs + depot.third_party.rust-crates.mustache + ]; + } + (builtins.readFile ./netencode-mustache.rs); - record-get = depot.nix.writers.rustSimple { - name = "record-get"; - dependencies = [ - netencode-rs - depot.users.Profpatsch.execline.exec-helpers - depot.users.Profpatsch.arglib.netencode.rust - ]; - } '' + record-get = depot.nix.writers.rustSimple + { + name = "record-get"; + dependencies = [ + netencode-rs + depot.users.Profpatsch.execline.exec-helpers + ]; + } '' extern crate netencode; - extern crate arglib_netencode; extern crate exec_helpers; use netencode::{encode, dec}; use netencode::dec::{Decoder, DecodeError}; fn main() { - let mut buf = vec![]; let args = exec_helpers::args("record-get", 1); let field = match std::str::from_utf8(&args[0]) { Ok(f) => f, Err(_e) => exec_helpers::die_user_error("record-get", format!("The field name needs to be valid unicode")) }; - let u = netencode::u_from_stdin_or_die_user_error("record-get", &mut buf); - match (dec::RecordDot {field, inner: dec::AnyU }).dec(u) { + let t = netencode::t_from_stdin_or_die_user_error("record-get"); + match (dec::RecordDot {field, inner: dec::AnyU }).dec(t.to_u()) { Ok(u) => encode(&mut std::io::stdout(), &u).expect("encoding to stdout failed"), Err(DecodeError(err)) => exec_helpers::die_user_error("record-get", err) } } ''; - record-splice-env = depot.nix.writers.rustSimple { - name = "record-splice-env"; - dependencies = [ - netencode-rs - depot.users.Profpatsch.execline.exec-helpers - ]; - } '' + record-splice-env = depot.nix.writers.rustSimple + { + name = "record-splice-env"; + dependencies = [ + netencode-rs + depot.users.Profpatsch.execline.exec-helpers + ]; + } '' extern crate netencode; extern crate exec_helpers; use netencode::dec::{Record, Try, ScalarAsBytes, Decoder, DecodeError}; fn main() { - let mut buf = vec![]; - let u = netencode::u_from_stdin_or_die_user_error("record-splice-env", &mut buf); + let t = netencode::t_from_stdin_or_die_user_error("record-splice-env"); let (_, prog) = exec_helpers::args_for_exec("record-splice-env", 0); - match Record(Try(ScalarAsBytes)).dec(u) { + match Record(Try(ScalarAsBytes)).dec(t.to_u()) { Ok(map) => { exec_helpers::exec_into_args( "record-splice-env", @@ -109,13 +141,14 @@ let } ''; - env-splice-record = depot.nix.writers.rustSimple { - name = "env-splice-record"; - dependencies = [ - netencode-rs - depot.users.Profpatsch.execline.exec-helpers - ]; - } '' + env-splice-record = depot.nix.writers.rustSimple + { + name = "env-splice-record"; + dependencies = [ + netencode-rs + depot.users.Profpatsch.execline.exec-helpers + ]; + } '' extern crate netencode; extern crate exec_helpers; use netencode::{T}; @@ -135,9 +168,11 @@ let } ''; -in depot.nix.utils.drvTargets { +in +depot.nix.readTree.drvTargets { inherit netencode-rs + netencode-hs pretty-rs pretty netencode-mustache diff --git a/users/Profpatsch/netencode/gen.nix b/users/Profpatsch/netencode/gen.nix index 305ff7b08d..efc9629ca0 100644 --- a/users/Profpatsch/netencode/gen.nix +++ b/users/Profpatsch/netencode/gen.nix @@ -27,29 +27,33 @@ let concatStrings = builtins.concatStringsSep ""; record = lokv: netstring "{" "}" - (concatStrings (map ({key, val}: tag key val) lokv)); + (concatStrings (map ({ key, val }: tag key val) lokv)); list = l: netstring "[" "]" (concatStrings l); dwim = val: - let match = { - "bool" = n1; - "int" = i6; - "string" = text; - "set" = attrs: - # it could be a derivation, then just return the path - if attrs.type or "" == "derivation" then text "${attrs}" - else - record (lib.mapAttrsToList - (k: v: { - key = k; - val = dwim v; - }) attrs); - "list" = l: list (map dwim l); - }; - in match.${builtins.typeOf val} val; + let + match = { + "bool" = n1; + "int" = i6; + "string" = text; + "set" = attrs: + # it could be a derivation, then just return the path + if attrs.type or "" == "derivation" then text "${attrs}" + else + record (lib.mapAttrsToList + (k: v: { + key = k; + val = dwim v; + }) + attrs); + "list" = l: list (map dwim l); + }; + in + match.${builtins.typeOf val} val; -in { +in +{ inherit unit n1 diff --git a/users/Profpatsch/netencode/netencode-mustache.rs b/users/Profpatsch/netencode/netencode-mustache.rs index ee7bafed22..73ed5be1de 100644 --- a/users/Profpatsch/netencode/netencode-mustache.rs +++ b/users/Profpatsch/netencode/netencode-mustache.rs @@ -1,12 +1,12 @@ -extern crate netencode; -extern crate mustache; extern crate arglib_netencode; +extern crate mustache; +extern crate netencode; -use mustache::{Data}; -use netencode::{T}; +use mustache::Data; +use netencode::T; use std::collections::HashMap; -use std::os::unix::ffi::{OsStrExt}; -use std::io::{Read}; +use std::io::Read; +use std::os::unix::ffi::OsStrExt; fn netencode_to_mustache_data_dwim(t: T) -> Data { match t { @@ -25,27 +25,26 @@ fn netencode_to_mustache_data_dwim(t: T) -> Data { T::Record(xs) => Data::Map( xs.into_iter() .map(|(key, val)| (key, netencode_to_mustache_data_dwim(val))) - .collect::<HashMap<_,_>>() + .collect::<HashMap<_, _>>(), ), T::List(xs) => Data::Vec( xs.into_iter() .map(|x| netencode_to_mustache_data_dwim(x)) - .collect::<Vec<_>>() + .collect::<Vec<_>>(), ), } } pub fn from_stdin() -> () { - let data = netencode_to_mustache_data_dwim( - arglib_netencode::arglib_netencode("netencode-mustache", Some(std::ffi::OsStr::new("TEMPLATE_DATA"))) - ); + let data = netencode_to_mustache_data_dwim(arglib_netencode::arglib_netencode( + "netencode-mustache", + Some(std::ffi::OsStr::new("TEMPLATE_DATA")), + )); let mut stdin = String::new(); std::io::stdin().read_to_string(&mut stdin).unwrap(); mustache::compile_str(&stdin) - .and_then(|templ| templ.render_data( - &mut std::io::stdout(), - &data - )).unwrap() + .and_then(|templ| templ.render_data(&mut std::io::stdout(), &data)) + .unwrap() } pub fn main() { diff --git a/users/Profpatsch/netencode/netencode.cabal b/users/Profpatsch/netencode/netencode.cabal new file mode 100644 index 0000000000..7bff4487bb --- /dev/null +++ b/users/Profpatsch/netencode/netencode.cabal @@ -0,0 +1,74 @@ +cabal-version: 3.0 +name: netencode +version: 0.1.0.0 +author: Profpatsch +maintainer: mail@profpatsch.de + + +common common-options + ghc-options: + -Wall + -Wno-type-defaults + -Wunused-packages + -Wredundant-constraints + -fwarn-missing-deriving-strategies + + -- See https://downloads.haskell.org/ghc/latest/docs/users_guide/exts.html + -- for a description of all these extensions + default-extensions: + -- Infer Applicative instead of Monad where possible + ApplicativeDo + + -- Allow literal strings to be Text + OverloadedStrings + + -- Syntactic sugar improvements + LambdaCase + MultiWayIf + + -- Makes the (deprecated) usage of * instead of Data.Kind.Type an error + NoStarIsType + + -- Convenient and crucial to deal with ambiguous field names, commonly + -- known as RecordDotSyntax + OverloadedRecordDot + + -- does not export record fields as functions, use OverloadedRecordDot to access instead + NoFieldSelectors + + -- Record punning + RecordWildCards + + -- Improved Deriving + DerivingStrategies + DerivingVia + + -- Type-level strings + DataKinds + + -- to enable the `type` keyword in import lists (ormolu uses this automatically) + ExplicitNamespaces + + default-language: GHC2021 + + +library + import: common-options + exposed-modules: + Netencode, + Netencode.Parse + + build-depends: + base >=4.15 && <5, + pa-prelude, + pa-label, + pa-error-tree, + hedgehog, + nonempty-containers, + deriving-compat, + data-fix, + bytestring, + attoparsec, + text, + semigroupoids, + selective diff --git a/users/Profpatsch/netencode/netencode.rs b/users/Profpatsch/netencode/netencode.rs index fcf642ca02..34a8fcef09 100644 --- a/users/Profpatsch/netencode/netencode.rs +++ b/users/Profpatsch/netencode/netencode.rs @@ -1,9 +1,9 @@ -extern crate nom; extern crate exec_helpers; +extern crate nom; use std::collections::HashMap; -use std::io::{Write, Read}; -use std::fmt::{Display, Debug}; +use std::fmt::{Debug, Display}; +use std::io::{Read, Write}; #[derive(Debug, PartialEq, Eq, Clone)] pub enum T { @@ -46,22 +46,19 @@ impl T { T::I7(i) => U::I7(*i), T::Text(t) => U::Text(t.as_str()), T::Binary(v) => U::Binary(v), - T::Sum(Tag { tag, val }) => U::Sum( - Tag { tag: tag.as_str(), val: Box::new(val.to_u()) } - ), - T::Record(map) => U::Record( - map.iter().map(|(k, v)| (k.as_str(), v.to_u())).collect() - ), - T::List(l) => U::List( - l.iter().map(|v| v.to_u()).collect::<Vec<U<'a>>>() - ), + T::Sum(Tag { tag, val }) => U::Sum(Tag { + tag: tag.as_str(), + val: Box::new(val.to_u()), + }), + T::Record(map) => U::Record(map.iter().map(|(k, v)| (k.as_str(), v.to_u())).collect()), + T::List(l) => U::List(l.iter().map(|v| v.to_u()).collect::<Vec<U<'a>>>()), } } pub fn encode<'a>(&'a self) -> Vec<u8> { match self { // TODO: don’t go via U, inefficient - o => o.to_u().encode() + o => o.to_u().encode(), } } } @@ -110,15 +107,16 @@ impl<'a> U<'a> { U::I7(i) => T::I7(*i), U::Text(t) => T::Text((*t).to_owned()), U::Binary(v) => T::Binary((*v).to_owned()), - U::Sum(Tag { tag, val }) => T::Sum( - Tag { tag: (*tag).to_owned(), val: Box::new(val.to_t()) } - ), + U::Sum(Tag { tag, val }) => T::Sum(Tag { + tag: (*tag).to_owned(), + val: Box::new(val.to_t()), + }), U::Record(map) => T::Record( - map.iter().map(|(k, v)| ((*k).to_owned(), v.to_t())).collect::<HashMap<String, T>>() - ), - U::List(l) => T::List( - l.iter().map(|v| v.to_t()).collect::<Vec<T>>() + map.iter() + .map(|(k, v)| ((*k).to_owned(), v.to_t())) + .collect::<HashMap<String, T>>(), ), + U::List(l) => T::List(l.iter().map(|v| v.to_t()).collect::<Vec<T>>()), } } } @@ -127,16 +125,18 @@ impl<'a> U<'a> { pub struct Tag<S, A> { // TODO: make into &str pub tag: S, - pub val: Box<A> + pub val: Box<A>, } impl<S, A> Tag<S, A> { fn map<F, B>(self, f: F) -> Tag<S, B> - where F: Fn(A) -> B { - Tag { - tag: self.tag, - val: Box::new(f(*self.val)) - } + where + F: Fn(A) -> B, + { + Tag { + tag: self.tag, + val: Box::new(f(*self.val)), + } } } @@ -147,77 +147,170 @@ fn encode_tag<W: Write>(w: &mut W, tag: &str, val: &U) -> std::io::Result<()> { } pub fn encode<W: Write>(w: &mut W, u: &U) -> std::io::Result<()> { - match u { - U::Unit => write!(w, "u,"), - U::N1(b) => if *b { write!(w, "n1:1,") } else { write!(w, "n1:0,") }, - U::N3(n) => write!(w, "n3:{},", n), - U::N6(n) => write!(w, "n6:{},", n), - U::N7(n) => write!(w, "n7:{},", n), - U::I3(i) => write!(w, "i3:{},", i), - U::I6(i) => write!(w, "i6:{},", i), - U::I7(i) => write!(w, "i7:{},", i), - U::Text(s) => { - write!(w, "t{}:", s.len()); - w.write_all(s.as_bytes()); - write!(w, ",") - } - U::Binary(s) => { - write!(w, "b{}:", s.len()); - w.write_all(&s); - write!(w, ",") - }, - U::Sum(Tag{tag, val}) => encode_tag(w, tag, val), - U::Record(m) => { - let mut c = std::io::Cursor::new(vec![]); - for (k, v) in m { - encode_tag(&mut c, k, v)?; - } - write!(w, "{{{}:", c.get_ref().len())?; - w.write_all(c.get_ref())?; - write!(w, "}}") - }, - U::List(l) => { - let mut c = std::io::Cursor::new(vec![]); - for u in l { - encode(&mut c, u)?; - } - write!(w, "[{}:", c.get_ref().len())?; - w.write_all(c.get_ref())?; - write!(w, "]") - } - } + match u { + U::Unit => write!(w, "u,"), + U::N1(b) => { + if *b { + write!(w, "n1:1,") + } else { + write!(w, "n1:0,") + } + } + U::N3(n) => write!(w, "n3:{},", n), + U::N6(n) => write!(w, "n6:{},", n), + U::N7(n) => write!(w, "n7:{},", n), + U::I3(i) => write!(w, "i3:{},", i), + U::I6(i) => write!(w, "i6:{},", i), + U::I7(i) => write!(w, "i7:{},", i), + U::Text(s) => { + write!(w, "t{}:", s.len()); + w.write_all(s.as_bytes()); + write!(w, ",") + } + U::Binary(s) => { + write!(w, "b{}:", s.len()); + w.write_all(&s); + write!(w, ",") + } + U::Sum(Tag { tag, val }) => encode_tag(w, tag, val), + U::Record(m) => { + let mut c = std::io::Cursor::new(vec![]); + for (k, v) in m { + encode_tag(&mut c, k, v)?; + } + write!(w, "{{{}:", c.get_ref().len())?; + w.write_all(c.get_ref())?; + write!(w, "}}") + } + U::List(l) => { + let mut c = std::io::Cursor::new(vec![]); + for u in l { + encode(&mut c, u)?; + } + write!(w, "[{}:", c.get_ref().len())?; + w.write_all(c.get_ref())?; + write!(w, "]") + } + } } pub fn text(s: String) -> T { T::Text(s) } -pub fn u_from_stdin_or_die_user_error<'a>(prog_name: &'_ str, stdin_buf: &'a mut Vec<u8>) -> U<'a> { - std::io::stdin().lock().read_to_end(stdin_buf); - let u = match parse::u_u(stdin_buf) { - Ok((rest, u)) => match rest { - b"" => u, - _ => exec_helpers::die_user_error(prog_name, format!("stdin contained some soup after netencode value: {:?}", String::from_utf8_lossy(rest))) - }, - Err(err) => exec_helpers::die_user_error(prog_name, format!("unable to parse netencode from stdin: {:?}", err)) - }; - u +pub fn t_from_stdin_or_die_user_error<'a>(prog_name: &'_ str) -> T { + match t_from_stdin_or_die_user_error_with_rest(prog_name, &vec![]) { + None => exec_helpers::die_user_error(prog_name, "stdin was empty"), + Some((rest, t)) => { + if rest.is_empty() { + t + } else { + exec_helpers::die_user_error( + prog_name, + format!( + "stdin contained some soup after netencode value: {:?}", + String::from_utf8_lossy(&rest) + ), + ) + } + } + } +} + +/// Read a netencode value from stdin incrementally, return bytes that could not be read. +/// Nothing if there was nothing to read from stdin & no initial_bytes were provided. +/// These can be passed back as `initial_bytes` if more values should be read. +pub fn t_from_stdin_or_die_user_error_with_rest<'a>( + prog_name: &'_ str, + initial_bytes: &[u8], +) -> Option<(Vec<u8>, T)> { + let mut chonker = Chunkyboi::new(std::io::stdin().lock(), 4096); + // The vec to pass to the parser on each step + let mut parser_vec: Vec<u8> = initial_bytes.to_vec(); + // whether stdin was already empty + let mut was_empty: bool = false; + loop { + match chonker.next() { + None => { + if parser_vec.is_empty() { + return None; + } else { + was_empty = true + } + } + Some(Err(err)) => exec_helpers::die_temporary( + prog_name, + &format!("could not read from stdin: {:?}", err), + ), + Some(Ok(mut new_bytes)) => parser_vec.append(&mut new_bytes), + } + + match parse::t_t(&parser_vec) { + Ok((rest, t)) => return Some((rest.to_owned(), t)), + Err(nom::Err::Incomplete(Needed)) => { + if was_empty { + exec_helpers::die_user_error( + prog_name, + &format!( + "unable to parse netencode from stdin, input incomplete: {:?}", + parser_vec + ), + ); + } + // read more from stdin and try parsing again + continue; + } + Err(err) => exec_helpers::die_user_error( + prog_name, + &format!("unable to parse netencode from stdin: {:?}", err), + ), + } + } +} + +// iter helper +// TODO: put into its own module +struct Chunkyboi<T> { + inner: T, + buf: Vec<u8>, +} + +impl<R: Read> Chunkyboi<R> { + fn new(inner: R, chunksize: usize) -> Self { + let buf = vec![0; chunksize]; + Chunkyboi { inner, buf } + } +} + +impl<R: Read> Iterator for Chunkyboi<R> { + type Item = std::io::Result<Vec<u8>>; + + fn next(&mut self) -> Option<std::io::Result<Vec<u8>>> { + match self.inner.read(&mut self.buf) { + Ok(0) => None, + Ok(read) => { + // clone a new buffer so we can reuse the internal one + Some(Ok(self.buf[..read].to_owned())) + } + Err(err) => Some(Err(err)), + } + } } pub mod parse { - use super::{T, Tag, U}; + use super::{Tag, T, U}; - use std::str::FromStr; - use std::ops::Neg; use std::collections::HashMap; + use std::ops::Neg; + use std::str::FromStr; - use nom::{IResult}; - use nom::branch::{alt}; + use nom::branch::alt; use nom::bytes::streaming::{tag, take}; - use nom::character::streaming::{digit1, char}; - use nom::sequence::{tuple}; - use nom::combinator::{map, map_res, flat_map, map_parser, opt}; + use nom::character::streaming::{char, digit1}; + use nom::combinator::{flat_map, map, map_parser, map_res, opt}; use nom::error::{context, ErrorKind, ParseError}; + use nom::sequence::tuple; + use nom::IResult; fn unit_t(s: &[u8]) -> IResult<&[u8], ()> { let (s, _) = context("unit", tag("u,"))(s)?; @@ -227,9 +320,9 @@ pub mod parse { fn usize_t(s: &[u8]) -> IResult<&[u8], usize> { context( "usize", - map_res( - map_res(digit1, |n| std::str::from_utf8(n)), - |s| s.parse::<usize>()) + map_res(map_res(digit1, |n| std::str::from_utf8(n)), |s| { + s.parse::<usize>() + }), )(s) } @@ -238,87 +331,77 @@ pub mod parse { // This is the point where we check the descriminator; // if the beginning char does not match, we can immediately return. let (s, _) = char(begin)(s)?; - let (s, (len, _)) = tuple(( - usize_t, - char(':') - ))(s)?; - let (s, (res, _)) = tuple(( - take(len), - char(end) - ))(s)?; + let (s, (len, _)) = tuple((usize_t, char(':')))(s)?; + let (s, (res, _)) = tuple((take(len), char(end)))(s)?; Ok((s, res)) } } - fn uint_t<'a, I: FromStr + 'a>(t: &'static str) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], I> { move |s: &'a [u8]| { let (s, (_, _, int, _)) = tuple(( tag(t.as_bytes()), char(':'), - map_res( - map_res(digit1, |n: &[u8]| std::str::from_utf8(n)), - |s| s.parse::<I>() - ), - char(',') + map_res(map_res(digit1, |n: &[u8]| std::str::from_utf8(n)), |s| { + s.parse::<I>() + }), + char(','), ))(s)?; Ok((s, int)) } } fn bool_t<'a>() -> impl Fn(&'a [u8]) -> IResult<&'a [u8], bool> { - context("bool", alt(( - map(tag("n1:0,"), |_| false), - map(tag("n1:1,"), |_| true), - ))) - } - - fn int_t<'a, I: FromStr + Neg<Output=I>>(t: &'static str) -> impl Fn(&'a [u8]) -> IResult<&[u8], I> { context( - t, - move |s: &'a [u8]| { - let (s, (_, _, neg, int, _)) = tuple(( - tag(t.as_bytes()), - char(':'), - opt(char('-')), - map_res( - map_res(digit1, |n: &[u8]| std::str::from_utf8(n)), - |s| s.parse::<I>() - ), - char(',') - ))(s)?; - let res = match neg { - Some(_) => -int, - None => int, - }; - Ok((s, res)) - } + "bool", + alt((map(tag("n1:0,"), |_| false), map(tag("n1:1,"), |_| true))), ) } + fn int_t<'a, I: FromStr + Neg<Output = I>>( + t: &'static str, + ) -> impl Fn(&'a [u8]) -> IResult<&[u8], I> { + context(t, move |s: &'a [u8]| { + let (s, (_, _, neg, int, _)) = tuple(( + tag(t.as_bytes()), + char(':'), + opt(char('-')), + map_res(map_res(digit1, |n: &[u8]| std::str::from_utf8(n)), |s| { + s.parse::<I>() + }), + char(','), + ))(s)?; + let res = match neg { + Some(_) => -int, + None => int, + }; + Ok((s, res)) + }) + } + fn tag_t(s: &[u8]) -> IResult<&[u8], Tag<String, T>> { // recurses into the main parser - map(tag_g(t_t), - |Tag {tag, val}| - Tag { - tag: tag.to_string(), - val - })(s) + map(tag_g(t_t), |Tag { tag, val }| Tag { + tag: tag.to_string(), + val, + })(s) } fn tag_g<'a, P, O>(inner: P) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Tag<&'a str, O>> where - P: Fn(&'a [u8]) -> IResult<&'a [u8], O> + P: Fn(&'a [u8]) -> IResult<&'a [u8], O>, { move |s: &[u8]| { let (s, tag) = sized('<', '|')(s)?; let (s, val) = inner(s)?; - Ok((s, Tag { - tag: std::str::from_utf8(tag) - .map_err(|_| nom::Err::Failure((s, ErrorKind::Char)))?, - val: Box::new(val) - })) - + Ok(( + s, + Tag { + tag: std::str::from_utf8(tag) + .map_err(|_| nom::Err::Failure((s, ErrorKind::Char)))?, + val: Box::new(val), + }, + )) } } @@ -330,9 +413,9 @@ pub mod parse { fn text_g(s: &[u8]) -> IResult<&[u8], &str> { let (s, res) = sized('t', ',')(s)?; - Ok((s, - std::str::from_utf8(res) - .map_err(|_| nom::Err::Failure((s, ErrorKind::Char)))?, + Ok(( + s, + std::str::from_utf8(res).map_err(|_| nom::Err::Failure((s, ErrorKind::Char)))?, )) } @@ -374,22 +457,24 @@ pub mod parse { { map_parser( sized('[', ']'), - nom::multi::many0(inner_no_empty_string(inner)) + nom::multi::many0(inner_no_empty_string(inner)), ) } fn record_t<'a>(s: &'a [u8]) -> IResult<&'a [u8], HashMap<String, T>> { let (s, r) = record_g(t_t)(s)?; - Ok((s, + Ok(( + s, r.into_iter() - .map(|(k, v)| (k.to_string(), v)) - .collect::<HashMap<_,_>>())) + .map(|(k, v)| (k.to_string(), v)) + .collect::<HashMap<_, _>>(), + )) } fn record_g<'a, P, O>(inner: P) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], HashMap<&'a str, O>> where O: Clone, - P: Fn(&'a [u8]) -> IResult<&'a [u8], O> + P: Fn(&'a [u8]) -> IResult<&'a [u8], O>, { move |s: &'a [u8]| { let (s, map) = map_parser( @@ -397,19 +482,17 @@ pub mod parse { nom::multi::fold_many0( inner_no_empty_string(tag_g(&inner)), HashMap::new(), - |mut acc: HashMap<_,_>, Tag { tag, mut val }| { - // ignore duplicated tag names that appear later + |mut acc: HashMap<_, _>, Tag { tag, mut val }| { + // ignore earlier tags with the same name // according to netencode spec - if ! acc.contains_key(tag) { - acc.insert(tag, *val); - } + let _ = acc.insert(tag, *val); acc - } - ) + }, + ), )(s)?; if map.is_empty() { // records must not be empty, according to the spec - Err(nom::Err::Failure((s,nom::error::ErrorKind::Many1))) + Err(nom::Err::Failure((s, nom::error::ErrorKind::Many1))) } else { Ok((s, map)) } @@ -424,7 +507,6 @@ pub mod parse { map(tag_g(u_u), |t| U::Sum(t)), map(list_g(u_u), U::List), map(record_g(u_u), U::Record), - map(bool_t(), |u| U::N1(u)), map(uint_t("n3"), |u| U::N3(u)), map(uint_t("n6"), |u| U::N6(u)), @@ -432,7 +514,6 @@ pub mod parse { map(int_t("i3"), |u| U::I3(u)), map(int_t("i6"), |u| U::I6(u)), map(int_t("i7"), |u| U::I7(u)), - // less common map(uint_t("n2"), |u| U::N3(u)), map(uint_t("n4"), |u| U::N6(u)), @@ -445,7 +526,7 @@ pub mod parse { ))(s) } - pub fn t_t(s: &[u8]) -> IResult<&[u8], T> { + pub fn t_t(s: &[u8]) -> IResult<&[u8], T> { alt(( text, binary(), @@ -453,7 +534,6 @@ pub mod parse { map(tag_t, |t| T::Sum(t)), map(list_t, |l| T::List(l)), map(record_t, |p| T::Record(p)), - map(bool_t(), |u| T::N1(u)), // 8, 64 and 128 bit map(uint_t("n3"), |u| T::N3(u)), @@ -462,7 +542,6 @@ pub mod parse { map(int_t("i3"), |u| T::I3(u)), map(int_t("i6"), |u| T::I6(u)), map(int_t("i7"), |u| T::I7(u)), - // less common map(uint_t("n2"), |u| T::N3(u)), map(uint_t("n4"), |u| T::N6(u)), @@ -481,30 +560,18 @@ pub mod parse { #[test] fn test_parse_unit_t() { - assert_eq!( - unit_t("u,".as_bytes()), - Ok(("".as_bytes(), ())) - ); + assert_eq!(unit_t("u,".as_bytes()), Ok(("".as_bytes(), ()))); } #[test] fn test_parse_bool_t() { - assert_eq!( - bool_t()("n1:0,".as_bytes()), - Ok(("".as_bytes(), false)) - ); - assert_eq!( - bool_t()("n1:1,".as_bytes()), - Ok(("".as_bytes(), true)) - ); + assert_eq!(bool_t()("n1:0,".as_bytes()), Ok(("".as_bytes(), false))); + assert_eq!(bool_t()("n1:1,".as_bytes()), Ok(("".as_bytes(), true))); } #[test] fn test_parse_usize_t() { - assert_eq!( - usize_t("32foo".as_bytes()), - Ok(("foo".as_bytes(), 32)) - ); + assert_eq!(usize_t("32foo".as_bytes()), Ok(("foo".as_bytes(), 32))); } #[test] @@ -515,7 +582,10 @@ pub mod parse { ); assert_eq!( uint_t::<u8>("n3")("n3:1024,abc".as_bytes()), - Err(nom::Err::Error(("1024,abc".as_bytes(), nom::error::ErrorKind::MapRes))) + Err(nom::Err::Error(( + "1024,abc".as_bytes(), + nom::error::ErrorKind::MapRes + ))) ); assert_eq!( int_t::<i64>("i6")("i6:-23,abc".as_bytes()), @@ -544,18 +614,21 @@ pub mod parse { assert_eq!( text("t5:hello,".as_bytes()), Ok(("".as_bytes(), T::Text("hello".to_owned()))), - "{}", r"t5:hello," + "{}", + r"t5:hello," ); assert_eq!( text("t4:fo".as_bytes()), // The content of the text should be 4 long Err(nom::Err::Incomplete(nom::Needed::Size(4))), - "{}", r"t4:fo," + "{}", + r"t4:fo," ); assert_eq!( text("t9:今日は,".as_bytes()), Ok(("".as_bytes(), T::Text("今日は".to_owned()))), - "{}", r"t9:今日は," + "{}", + r"t9:今日は," ); } @@ -564,24 +637,28 @@ pub mod parse { assert_eq!( binary()("b5:hello,".as_bytes()), Ok(("".as_bytes(), T::Binary(Vec::from("hello".to_owned())))), - "{}", r"b5:hello," + "{}", + r"b5:hello," ); assert_eq!( binary()("b4:fo".as_bytes()), // The content of the byte should be 4 long Err(nom::Err::Incomplete(nom::Needed::Size(4))), - "{}", r"b4:fo," + "{}", + r"b4:fo," ); assert_eq!( binary()("b4:foob".as_bytes()), // The content is 4 bytes now, but the finishing , is missing Err(nom::Err::Incomplete(nom::Needed::Size(1))), - "{}", r"b4:fo," - ); + "{}", + r"b4:fo," + ); assert_eq!( binary()("b9:今日は,".as_bytes()), Ok(("".as_bytes(), T::Binary(Vec::from("今日は".as_bytes())))), - "{}", r"b9:今日は," + "{}", + r"b9:今日は," ); } @@ -590,25 +667,23 @@ pub mod parse { assert_eq!( list_t("[0:]".as_bytes()), Ok(("".as_bytes(), vec![])), - "{}", r"[0:]" + "{}", + r"[0:]" ); assert_eq!( list_t("[6:u,u,u,]".as_bytes()), - Ok(("".as_bytes(), vec![ - T::Unit, - T::Unit, - T::Unit, - ])), - "{}", r"[6:u,u,u,]" + Ok(("".as_bytes(), vec![T::Unit, T::Unit, T::Unit,])), + "{}", + r"[6:u,u,u,]" ); assert_eq!( list_t("[15:u,[7:t3:foo,]u,]".as_bytes()), - Ok(("".as_bytes(), vec![ - T::Unit, - T::List(vec![T::Text("foo".to_owned())]), - T::Unit, - ])), - "{}", r"[15:u,[7:t3:foo,]u,]" + Ok(( + "".as_bytes(), + vec![T::Unit, T::List(vec![T::Text("foo".to_owned())]), T::Unit,] + )), + "{}", + r"[15:u,[7:t3:foo,]u,]" ); } @@ -616,27 +691,40 @@ pub mod parse { fn test_record() { assert_eq!( record_t("{21:<1:a|u,<1:b|u,<1:c|u,}".as_bytes()), - Ok(("".as_bytes(), vec![ - ("a".to_owned(), T::Unit), - ("b".to_owned(), T::Unit), - ("c".to_owned(), T::Unit), - ].into_iter().collect::<HashMap<String, T>>())), - "{}", r"{21:<1:a|u,<1:b|u,<1:c|u,}" + Ok(( + "".as_bytes(), + vec![ + ("a".to_owned(), T::Unit), + ("b".to_owned(), T::Unit), + ("c".to_owned(), T::Unit), + ] + .into_iter() + .collect::<HashMap<String, T>>() + )), + "{}", + r"{21:<1:a|u,<1:b|u,<1:c|u,}" ); // duplicated keys are ignored (first is taken) assert_eq!( record_t("{25:<1:a|u,<1:b|u,<1:a|i1:-1,}".as_bytes()), - Ok(("".as_bytes(), vec![ - ("a".to_owned(), T::Unit), - ("b".to_owned(), T::Unit), - ].into_iter().collect::<HashMap<_,_>>())), - "{}", r"{25:<1:a|u,<1:b|u,<1:a|i1:-1,}" + Ok(( + "".as_bytes(), + vec![("a".to_owned(), T::I3(-1)), ("b".to_owned(), T::Unit),] + .into_iter() + .collect::<HashMap<_, _>>() + )), + "{}", + r"{25:<1:a|u,<1:b|u,<1:a|i1:-1,}" ); // empty records are not allowed assert_eq!( record_t("{0:}".as_bytes()), - Err(nom::Err::Failure(("".as_bytes(), nom::error::ErrorKind::Many1))), - "{}", r"{0:}" + Err(nom::Err::Failure(( + "".as_bytes(), + nom::error::ErrorKind::Many1 + ))), + "{}", + r"{0:}" ); } @@ -645,37 +733,62 @@ pub mod parse { assert_eq!( t_t("n3:255,".as_bytes()), Ok(("".as_bytes(), T::N3(255))), - "{}", r"n3:255," + "{}", + r"n3:255," ); assert_eq!( t_t("t6:halloo,".as_bytes()), Ok(("".as_bytes(), T::Text("halloo".to_owned()))), - "{}", r"t6:halloo," + "{}", + r"t6:halloo," ); assert_eq!( t_t("<3:foo|t6:halloo,".as_bytes()), - Ok(("".as_bytes(), T::Sum (Tag { - tag: "foo".to_owned(), - val: Box::new(T::Text("halloo".to_owned())) - }))), - "{}", r"<3:foo|t6:halloo," + Ok(( + "".as_bytes(), + T::Sum(Tag { + tag: "foo".to_owned(), + val: Box::new(T::Text("halloo".to_owned())) + }) + )), + "{}", + r"<3:foo|t6:halloo," ); // { a: Unit // , foo: List <A: Unit | B: List i3> } assert_eq!( t_t("{52:<1:a|u,<3:foo|[33:<1:A|u,<1:A|n1:1,<1:B|[7:i3:127,]]}".as_bytes()), - Ok(("".as_bytes(), T::Record(vec![ - ("a".to_owned(), T::Unit), - ("foo".to_owned(), T::List(vec![ - T::Sum(Tag { tag: "A".to_owned(), val: Box::new(T::Unit) }), - T::Sum(Tag { tag: "A".to_owned(), val: Box::new(T::N1(true)) }), - T::Sum(Tag { tag: "B".to_owned(), val: Box::new(T::List(vec![T::I3(127)])) }), - ])) - ].into_iter().collect::<HashMap<String, T>>()))), - "{}", r"{52:<1:a|u,<3:foo|[33:<1:A|u,<1:A|n1:1,<1:B|[7:i3:127,]]}" + Ok(( + "".as_bytes(), + T::Record( + vec![ + ("a".to_owned(), T::Unit), + ( + "foo".to_owned(), + T::List(vec![ + T::Sum(Tag { + tag: "A".to_owned(), + val: Box::new(T::Unit) + }), + T::Sum(Tag { + tag: "A".to_owned(), + val: Box::new(T::N1(true)) + }), + T::Sum(Tag { + tag: "B".to_owned(), + val: Box::new(T::List(vec![T::I3(127)])) + }), + ]) + ) + ] + .into_iter() + .collect::<HashMap<String, T>>() + ) + )), + "{}", + r"{52:<1:a|u,<3:foo|[33:<1:A|u,<1:A|n1:1,<1:B|[7:i3:127,]]}" ); } - } } @@ -690,8 +803,10 @@ pub mod dec { fn dec(&self, u: U<'a>) -> Result<Self::A, DecodeError>; } + /// Any netencode, as `T`. #[derive(Clone, Copy)] pub struct AnyT; + /// Any netencode, as `U`. #[derive(Clone, Copy)] pub struct AnyU; @@ -709,8 +824,11 @@ pub mod dec { } } + /// A text #[derive(Clone, Copy)] pub struct Text; + + /// A bytestring // TODO: rename to Bytes #[derive(Clone, Copy)] pub struct Binary; @@ -730,11 +848,15 @@ pub mod dec { fn dec(&self, u: U<'a>) -> Result<Self::A, DecodeError> { match u { U::Binary(b) => Ok(b), - other => Err(DecodeError(format!("Cannot decode {:?} into Binary", other))), + other => Err(DecodeError(format!( + "Cannot decode {:?} into Binary", + other + ))), } } } + /// Any scalar, converted to bytes. #[derive(Clone, Copy)] pub struct ScalarAsBytes; @@ -755,80 +877,93 @@ pub mod dec { } } + /// A map of Ts (TODO: rename to map) #[derive(Clone, Copy)] pub struct Record<T>(pub T); impl<'a, Inner> Decoder<'a> for Record<Inner> - where Inner: Decoder<'a> + where + Inner: Decoder<'a>, { type A = HashMap<&'a str, Inner::A>; fn dec(&self, u: U<'a>) -> Result<Self::A, DecodeError> { match u { - U::Record(map) => - map.into_iter() + U::Record(map) => map + .into_iter() .map(|(k, v)| self.0.dec(v).map(|v2| (k, v2))) .collect::<Result<Self::A, _>>(), - o => Err(DecodeError(format!("Cannot decode {:?} into record", o))) + o => Err(DecodeError(format!("Cannot decode {:?} into record", o))), } } } + /// Assume a record and project out the field with the given name and type. #[derive(Clone, Copy)] pub struct RecordDot<'a, T> { pub field: &'a str, - pub inner: T + pub inner: T, } - impl <'a, Inner> Decoder<'a> for RecordDot<'_, Inner> - where Inner: Decoder<'a> + Clone + impl<'a, Inner> Decoder<'a> for RecordDot<'_, Inner> + where + Inner: Decoder<'a> + Clone, { type A = Inner::A; fn dec(&self, u: U<'a>) -> Result<Self::A, DecodeError> { match Record(self.inner.clone()).dec(u) { Ok(mut map) => match map.remove(self.field) { Some(inner) => Ok(inner), - None => Err(DecodeError(format!("Cannot find `{}` in record map", self.field))), + None => Err(DecodeError(format!( + "Cannot find `{}` in record map", + self.field + ))), }, Err(err) => Err(err), } } } + /// Equals one of the listed `A`s exactly, after decoding. #[derive(Clone)] - pub struct OneOf<T, A>{ + pub struct OneOf<T, A> { pub inner: T, pub list: Vec<A>, } - impl <'a, Inner> Decoder<'a> for OneOf<Inner, Inner::A> - where Inner: Decoder<'a>, - Inner::A: Display + Debug + PartialEq + impl<'a, Inner> Decoder<'a> for OneOf<Inner, Inner::A> + where + Inner: Decoder<'a>, + Inner::A: Display + Debug + PartialEq, { type A = Inner::A; fn dec(&self, u: U<'a>) -> Result<Self::A, DecodeError> { match self.inner.dec(u) { Ok(inner) => match self.list.iter().any(|x| x.eq(&inner)) { true => Ok(inner), - false => Err(DecodeError(format!("{} is not one of {:?}", inner, self.list))) + false => Err(DecodeError(format!( + "{} is not one of {:?}", + inner, self.list + ))), }, - Err(err) => Err(err) + Err(err) => Err(err), } } } + /// Try decoding as `T`. #[derive(Clone)] pub struct Try<T>(pub T); - impl <'a, Inner> Decoder<'a> for Try<Inner> - where Inner: Decoder<'a> + impl<'a, Inner> Decoder<'a> for Try<Inner> + where + Inner: Decoder<'a>, { type A = Option<Inner::A>; fn dec(&self, u: U<'a>) -> Result<Self::A, DecodeError> { match self.0.dec(u) { Ok(inner) => Ok(Some(inner)), - Err(err) => Ok(None) + Err(err) => Ok(None), } } } - } diff --git a/users/Profpatsch/netencode/pretty.rs b/users/Profpatsch/netencode/pretty.rs index 8fec24a60e..935c3d4a8a 100644 --- a/users/Profpatsch/netencode/pretty.rs +++ b/users/Profpatsch/netencode/pretty.rs @@ -1,6 +1,6 @@ extern crate netencode; -use netencode::{U, T, Tag}; +use netencode::{Tag, T, U}; pub enum Pretty { Single { @@ -20,7 +20,7 @@ pub enum Pretty { r#type: char, length: String, vals: Vec<Pretty>, - trailer: char + trailer: char, }, } @@ -39,7 +39,7 @@ impl Pretty { r#type: 't', length: format!("{}:", s.len()), val: s.to_string(), - trailer: ',' + trailer: ',', }, U::Binary(s) => Pretty::Single { r#type: 'b', @@ -47,15 +47,18 @@ impl Pretty { // For pretty printing we want the string to be visible obviously. // Instead of not supporting binary, let’s use lossy conversion. val: String::from_utf8_lossy(s).into_owned(), - trailer: ',' + trailer: ',', }, - U::Sum(Tag{tag, val}) => Self::pretty_tag(tag, Self::from_u(*val)), + U::Sum(Tag { tag, val }) => Self::pretty_tag(tag, Self::from_u(*val)), U::Record(m) => Pretty::Multi { r#type: '{', // TODO: we are losing the size here, should we recompute it? Keep it? length: String::from(""), - vals: m.into_iter().map(|(k, v)| Self::pretty_tag(k, Self::from_u(v))).collect(), - trailer: '}' + vals: m + .into_iter() + .map(|(k, v)| Self::pretty_tag(k, Self::from_u(v))) + .collect(), + trailer: '}', }, U::List(l) => Pretty::Multi { r#type: '[', @@ -68,13 +71,14 @@ impl Pretty { } fn scalar<D>(r#type: char, length: &str, d: D) -> Pretty - where D: std::fmt::Display + where + D: std::fmt::Display, { Pretty::Single { r#type, length: length.to_string(), val: format!("{}", d), - trailer: ',' + trailer: ',', } } @@ -89,43 +93,62 @@ impl Pretty { } pub fn print_multiline<W>(&self, mut w: &mut W) -> std::io::Result<()> - where W: std::io::Write + where + W: std::io::Write, { Self::go(&mut w, self, 0, true); write!(w, "\n") } fn go<W>(mut w: &mut W, p: &Pretty, depth: usize, is_newline: bool) -> std::io::Result<()> - where W: std::io::Write + where + W: std::io::Write, { - const full : usize = 4; - const half : usize = 2; - let i = &vec![b' '; depth*full]; - let iandhalf = &vec![b' '; depth*full + half]; - let (i, iandhalf) = unsafe {( - std::str::from_utf8_unchecked(i), - std::str::from_utf8_unchecked(iandhalf), - )}; + const full: usize = 4; + const half: usize = 2; + let i = &vec![b' '; depth * full]; + let iandhalf = &vec![b' '; depth * full + half]; + let (i, iandhalf) = unsafe { + ( + std::str::from_utf8_unchecked(i), + std::str::from_utf8_unchecked(iandhalf), + ) + }; if is_newline { write!(&mut w, "{}", i); } match p { - Pretty::Single {r#type, length, val, trailer} => - write!(&mut w, "{} {}{}", r#type, val, trailer), - Pretty::Tag { r#type, length, key, inner, val } => { + Pretty::Single { + r#type, + length, + val, + trailer, + } => write!(&mut w, "{} {}{}", r#type, val, trailer), + Pretty::Tag { + r#type, + length, + key, + inner, + val, + } => { write!(&mut w, "{} {} {}", r#type, key, inner)?; Self::go::<W>(&mut w, val, depth, false) - }, + } // if the length is 0 or 1, we print on one line, // only if there’s more than one element we split the resulting value. // we never break lines on arbitrary column sizes, since that is just silly. - Pretty::Multi {r#type, length, vals, trailer} => match vals.len() { + Pretty::Multi { + r#type, + length, + vals, + trailer, + } => match vals.len() { 0 => write!(&mut w, "{} {}", r#type, trailer), 1 => { write!(&mut w, "{} ", r#type); Self::go::<W>(&mut w, &vals[0], depth, false)?; write!(&mut w, "{}", trailer) - }, + } more => { write!(&mut w, "\n{}{} \n", iandhalf, r#type)?; for v in vals { |