about summary refs log tree commit diff
path: root/users/Profpatsch/netencode
diff options
context:
space:
mode:
Diffstat (limited to 'users/Profpatsch/netencode')
-rw-r--r--users/Profpatsch/netencode/README.md111
-rw-r--r--users/Profpatsch/netencode/default.nix136
-rw-r--r--users/Profpatsch/netencode/gen.nix69
-rw-r--r--users/Profpatsch/netencode/netencode-mustache.rs53
-rw-r--r--users/Profpatsch/netencode/netencode.rs776
5 files changed, 1145 insertions, 0 deletions
diff --git a/users/Profpatsch/netencode/README.md b/users/Profpatsch/netencode/README.md
new file mode 100644
index 000000000000..3058e36eaf5c
--- /dev/null
+++ b/users/Profpatsch/netencode/README.md
@@ -0,0 +1,111 @@
+# netencode 0.1-unreleased
+
+[bencode][] and [netstring][]-inspired pipe format that should be trivial go generate correctly in every context (only requires a `byte_length()` and a `printf()`), easy to parse (100 lines of code or less), mostly human-decipherable for easy debugging, and support nested record and sum types.
+
+
+## scalars
+
+Scalars have the format `[type prefix][size]:[value],`.
+
+where size is a natural number without leading zeroes.
+
+### unit
+
+The unit (`u`) has only one value.
+
+* The unit is: `u,`
+
+### numbers
+
+Naturals (`n`) and Integers (`i`), with a maximum size in bits.
+
+Bit sizes are specified in 2^n increments, 1 to 9 (`n1`..`n9`, `i1`..`n9`).
+
+* Natural `1234` that fits in 32 bits (2^5): `n5:1234,`
+* Integer `-42` that fits in 8 bits (2^3): `i3:-42,`
+* Integer `23` that fits in 64 bits (2^6): `i6:23,`
+* Integer `-1` that fits in 512 bits (2^9): `i9:-1,`
+* Natural `0` that fits in 1 bit (2^1): `n1:0,`
+
+An implementation can define the biggest numbers it supports, and has to throw an error for anything bigger. It has to support everything smaller, so for example if you support up to i6/n6, you have to support 1–6 as well. An implementation could support up to the current architecture’s wordsize for example.
+
+Floats are not supported, you can implement fixed-size decimals or ratios using integers.
+
+### booleans
+
+A boolean is represented as `n1`.
+
+* `n1:0,`: false
+* `n1:1,`: true
+
+TODO: should we add `f,` and `t,`?
+
+### text
+
+Text (`t`) that *must* be encoded as UTF-8, starting with its length in bytes:
+
+* The string `hello world` (11 bytes): `t11:hello world,`
+* The string `今日は` (9 bytes): `t9:今日は,`
+* The string `:,` (2 bytes): `t2::,,`
+* The empty sting `` (0 bytes): `t0:,`
+
+### binary
+
+Arbitrary binary strings (`b`) that can contain any data, starting with its length in bytes.
+
+* The ASCII string `hello world` as binary data (11 bytes): `b11:hello world,`
+* The empty binary string (0 bytes): `b0:,`
+* The bytestring with `^D` (1 byte): `b1:,`
+
+Since the binary strings are length-prefixd, they can contain `\0` and no escaping is required. Care has to be taken in languages with `\0`-terminated bytestrings.
+
+Use text (`t`) if you have utf-8 encoded data.
+
+## tagged values
+
+### tags
+
+A tag (`<`) gives a value a name. The tag is UTF-8 encoded, starting with its length in bytes and proceeding with the value.
+
+* The tag `foo` (3 bytes) tagging the text `hello` (5 bytes): `<3:foo|t5:hello,`
+* The tag `` (0 bytes) tagging the 8-bit integer 0: `<0:|i3:0,`
+
+### records (products/records), also maps
+
+A record (`{`) is a concatenation of tags (`<`). It needs to be closed with `}`.
+If tag names repeat the later ones should be ignored. Ordering does not matter.
+
+Similar to text, records start with the length of their *whole encoded content*, in bytes. This makes it possible to treat their contents as opaque bytestrings.
+
+* There is no empty record. (TODO: make the empty record the unit type, remove `u,`?)
+* A record with one empty field, `foo`: `{9:<3:foo|u,}`
+* A record with two fields, `foo` and `x`: `{21:<3:foo|u,<1:x|t3:baz,}`
+* The same record: `{21:<1:x|t3:baz,<3:foo|u,}`
+* The same record (later occurences of fields are ignored): `{28:<1:x|t3:baz,<3:foo|u,<1:x|u,}`
+
+### sums (tagged unions)
+
+Simply a tagged value. The tag marker `<` indicates it is a sum if it appears outside of a record.
+
+## lists
+
+A list (`[`) imposes an ordering on a sequence of values. It needs to be closed with `]`. Values in it are simply concatenated.
+
+Similar to records, lists start with the length of their whole encoded content.
+
+* The empty list: `[0:]`
+* The list with one element, the string `foo`: `[7:t3:foo,]`
+* The list with text `foo` followed by i3 `-42`: `[14:t3:foo,i3:-42,]`
+* The list with `Some` and `None` tags: `[33:<4:Some|t3:foo,<4None|u,<4None|u,]`
+
+## motivation
+
+TODO
+
+## guarantees
+
+TODO: do I want unique representation (bijection like bencode?) This would put more restrictions on the generator, like sorting records in lexicographic order, but would make it possible to compare without decoding
+
+
+[bencode]: https://en.wikipedia.org/wiki/Bencode
+[netstring]: https://en.wikipedia.org/wiki/Netstring
diff --git a/users/Profpatsch/netencode/default.nix b/users/Profpatsch/netencode/default.nix
new file mode 100644
index 000000000000..294e3b4395da
--- /dev/null
+++ b/users/Profpatsch/netencode/default.nix
@@ -0,0 +1,136 @@
+{ depot, pkgs, lib, ... }:
+
+let
+  imports = {
+    inherit (depot.users.Profpatsch)
+      writers;
+  };
+
+  netencode-rs = imports.writers.testRustSimple
+    (imports.writers.rustSimpleLib {
+      name = "netencode";
+      dependencies = [
+        depot.users.Profpatsch.rust-crates.nom
+        depot.users.Profpatsch.execline.exec-helpers
+      ];
+      release = false;
+      verbose = true;
+    } (builtins.readFile ./netencode.rs));
+
+  gen = import ./gen.nix { inherit lib; };
+
+  cfg-if = pkgs.buildRustCrate {
+    pname = "cfg-if";
+    version = "1.0.0";
+    crateName = "cfg-if";
+    sha256 = "1fzidq152hnxhg4lj6r2gv4jpnn8yivp27z6q6xy7w6v0dp6bai9";
+  };
+
+  log = pkgs.buildRustCrate {
+    pname = "log";
+    version = "0.4.11";
+    crateName = "log";
+    sha256 = "0m6xhqxsps5mgd7r91g5mqkndbh8zbjd58p7w75r330zl4n40l07";
+    dependencies = [ cfg-if ];
+  };
+
+  serde_derive = pkgs.buildRustCrate {
+    pname = "serde";
+    crateName = "serde";
+    version = "1.0.123";
+    sha256 = "05xl2s1vpf3p7fi2yc9qlzw88d5ap0z3qmhmd7axa6pp9pn1s5xc";
+  };
+
+  serde = pkgs.buildRustCrate {
+    pname = "serde";
+    crateName = "serde";
+    version = "1.0.123";
+    sha256 = "05xl2s1vpf3p7fi2yc9qlzw88d5ap0z3qmhmd7axa6pp9pn1s5xc";
+    features = [ "std" ];
+  };
+
+  mustache = pkgs.buildRustCrate {
+    pname = "mustache";
+    version = "0.9.0";
+    crateName = "mustache";
+    sha256 = "1zgl8l15i19lzp90icgwyi6zqdd31b9vm8w129f41d1zd0hs7ayq";
+    dependencies = [ log serde ];
+  };
+
+  netencode-mustache = imports.writers.rustSimple {
+    name = "netencode_mustache";
+    dependencies = [
+      depot.users.Profpatsch.arglib.netencode.rust
+      netencode-rs
+      mustache
+    ];
+  } (builtins.readFile ./netencode-mustache.rs);
+
+
+  record-get = imports.writers.rustSimple {
+    name = "record-get";
+    dependencies = [
+      netencode-rs
+      depot.users.Profpatsch.execline.exec-helpers
+      depot.users.Profpatsch.arglib.netencode.rust
+    ];
+  } ''
+    extern crate netencode;
+    extern crate arglib_netencode;
+    extern crate exec_helpers;
+    use netencode::{encode, dec};
+    use netencode::dec::{Decoder, DecodeError};
+
+    fn main() {
+        let mut buf = vec![];
+        let args = exec_helpers::args("record-get", 1);
+        let field = match std::str::from_utf8(&args[0]) {
+            Ok(f) => f,
+            Err(_e) => exec_helpers::die_user_error("record-get", format!("The field name needs to be valid unicode"))
+        };
+        let u = netencode::u_from_stdin_or_die_user_error("record-get", &mut buf);
+        match (dec::RecordDot {field, inner: dec::AnyU }).dec(u) {
+            Ok(u) => encode(&mut std::io::stdout(), &u).expect("encoding to stdout failed"),
+            Err(DecodeError(err)) => exec_helpers::die_user_error("record-get", err)
+        }
+    }
+  '';
+
+  record-splice-env = imports.writers.rustSimple {
+    name = "record-splice-env";
+    dependencies = [
+      netencode-rs
+      depot.users.Profpatsch.execline.exec-helpers
+    ];
+  } ''
+    extern crate netencode;
+    extern crate exec_helpers;
+    use netencode::dec::{Record, Try, ScalarAsBytes, Decoder, DecodeError};
+
+    fn main() {
+        let mut buf = vec![];
+        let u = netencode::u_from_stdin_or_die_user_error("record-splice-env", &mut buf);
+        let (_, prog) = exec_helpers::args_for_exec("record-splice-env", 0);
+        match Record(Try(ScalarAsBytes)).dec(u) {
+            Ok(map) => {
+                exec_helpers::exec_into_args(
+                    "record-splice-env",
+                    prog,
+                    // some elements can’t be decoded as scalars, so just ignore them
+                    map.into_iter().filter_map(|(k, v)| v.map(|v2| (k, v2)))
+                );
+            },
+            Err(DecodeError(err)) => exec_helpers::die_user_error("record-splice-env", err),
+        }
+    }
+  '';
+
+in {
+  inherit
+   netencode-rs
+   netencode-mustache
+   record-get
+   record-splice-env
+   gen
+   ;
+}
diff --git a/users/Profpatsch/netencode/gen.nix b/users/Profpatsch/netencode/gen.nix
new file mode 100644
index 000000000000..305ff7b08dd6
--- /dev/null
+++ b/users/Profpatsch/netencode/gen.nix
@@ -0,0 +1,69 @@
+{ lib }:
+let
+
+  netstring = tag: suffix: s:
+    "${tag}${toString (builtins.stringLength s)}:${s}${suffix}";
+
+  unit = "u,";
+
+  n1 = b: if b then "n1:1," else "n1:0,";
+
+  n = i: n: "n${toString i}:${toString n},";
+  i = i: n: "i${toString i}:${toString n},";
+
+  n3 = n 3;
+  n6 = n 6;
+  n7 = n 7;
+
+  i3 = i 3;
+  i6 = i 6;
+  i7 = i 7;
+
+  text = netstring "t" ",";
+  binary = netstring "b" ",";
+
+  tag = key: val: netstring "<" "|" key + val;
+
+  concatStrings = builtins.concatStringsSep "";
+
+  record = lokv: netstring "{" "}"
+    (concatStrings (map ({key, val}: tag key val) lokv));
+
+  list = l: netstring "[" "]" (concatStrings l);
+
+  dwim = val:
+    let match = {
+      "bool" = n1;
+      "int" = i6;
+      "string" = text;
+      "set" = attrs:
+        # it could be a derivation, then just return the path
+        if attrs.type or "" == "derivation" then text "${attrs}"
+        else
+          record (lib.mapAttrsToList
+          (k: v: {
+            key = k;
+            val = dwim v;
+          }) attrs);
+      "list" = l: list (map dwim l);
+    };
+    in match.${builtins.typeOf val} val;
+
+in {
+  inherit
+    unit
+    n1
+    n3
+    n6
+    n7
+    i3
+    i6
+    i7
+    text
+    binary
+    tag
+    record
+    list
+    dwim
+    ;
+}
diff --git a/users/Profpatsch/netencode/netencode-mustache.rs b/users/Profpatsch/netencode/netencode-mustache.rs
new file mode 100644
index 000000000000..ee7bafed2250
--- /dev/null
+++ b/users/Profpatsch/netencode/netencode-mustache.rs
@@ -0,0 +1,53 @@
+extern crate netencode;
+extern crate mustache;
+extern crate arglib_netencode;
+
+use mustache::{Data};
+use netencode::{T};
+use std::collections::HashMap;
+use std::os::unix::ffi::{OsStrExt};
+use std::io::{Read};
+
+fn netencode_to_mustache_data_dwim(t: T) -> Data {
+    match t {
+        // TODO: good idea?
+        T::Unit => Data::Null,
+        T::N1(b) => Data::Bool(b),
+        T::N3(u) => Data::String(u.to_string()),
+        T::N6(u) => Data::String(u.to_string()),
+        T::N7(u) => Data::String(u.to_string()),
+        T::I3(i) => Data::String(i.to_string()),
+        T::I6(i) => Data::String(i.to_string()),
+        T::I7(i) => Data::String(i.to_string()),
+        T::Text(s) => Data::String(s),
+        T::Binary(b) => unimplemented!(),
+        T::Sum(tag) => unimplemented!(),
+        T::Record(xs) => Data::Map(
+            xs.into_iter()
+                .map(|(key, val)| (key, netencode_to_mustache_data_dwim(val)))
+                .collect::<HashMap<_,_>>()
+        ),
+        T::List(xs) => Data::Vec(
+            xs.into_iter()
+                .map(|x| netencode_to_mustache_data_dwim(x))
+                .collect::<Vec<_>>()
+        ),
+    }
+}
+
+pub fn from_stdin() -> () {
+    let data = netencode_to_mustache_data_dwim(
+        arglib_netencode::arglib_netencode("netencode-mustache", Some(std::ffi::OsStr::new("TEMPLATE_DATA")))
+    );
+    let mut stdin = String::new();
+    std::io::stdin().read_to_string(&mut stdin).unwrap();
+    mustache::compile_str(&stdin)
+        .and_then(|templ| templ.render_data(
+            &mut std::io::stdout(),
+            &data
+        )).unwrap()
+}
+
+pub fn main() {
+    from_stdin()
+}
diff --git a/users/Profpatsch/netencode/netencode.rs b/users/Profpatsch/netencode/netencode.rs
new file mode 100644
index 000000000000..28003260925c
--- /dev/null
+++ b/users/Profpatsch/netencode/netencode.rs
@@ -0,0 +1,776 @@
+extern crate nom;
+extern crate exec_helpers;
+
+use std::collections::HashMap;
+use std::io::{Write, Read};
+use std::fmt::{Display, Debug};
+
+#[derive(Debug, PartialEq, Eq, Clone)]
+pub enum T {
+    // Unit
+    Unit,
+    // Boolean
+    N1(bool),
+    // Naturals
+    N3(u8),
+    N6(u64),
+    N7(u128),
+    // Integers
+    I3(i8),
+    I6(i64),
+    I7(i128),
+    // Text
+    // TODO: make into &str
+    Text(String),
+    // TODO: rename to Bytes
+    Binary(Vec<u8>),
+    // Tags
+    // TODO: make into &str
+    Sum(Tag<String, T>),
+    // TODO: make into &str
+    Record(HashMap<String, T>),
+    List(Vec<T>),
+}
+
+impl T {
+    pub fn to_u<'a>(&'a self) -> U<'a> {
+        match self {
+            T::Unit => U::Unit,
+            T::N1(b) => U::N1(*b),
+            T::N3(u) => U::N3(*u),
+            T::N6(u) => U::N6(*u),
+            T::N7(u) => U::N7(*u),
+            T::I3(i) => U::I3(*i),
+            T::I6(i) => U::I6(*i),
+            T::I7(i) => U::I7(*i),
+            T::Text(t) => U::Text(t.as_str()),
+            T::Binary(v) => U::Binary(v),
+            T::Sum(Tag { tag, val }) => U::Sum(
+                Tag { tag: tag.as_str(), val: Box::new(val.to_u()) }
+            ),
+            T::Record(map) => U::Record(
+                map.iter().map(|(k, v)| (k.as_str(), v.to_u())).collect()
+            ),
+            T::List(l) => U::List(
+                l.iter().map(|v| v.to_u()).collect::<Vec<U<'a>>>()
+            ),
+        }
+    }
+
+    pub fn encode<'a>(&'a self) -> Vec<u8> {
+        match self {
+            // TODO: don’t go via U, inefficient
+            o => o.to_u().encode()
+        }
+    }
+}
+
+#[derive(Debug, PartialEq, Eq, Clone)]
+pub enum U<'a> {
+    Unit,
+    // Boolean
+    N1(bool),
+    // Naturals
+    N3(u8),
+    N6(u64),
+    N7(u128),
+    // Integers
+    I3(i8),
+    I6(i64),
+    I7(i128),
+    // Text
+    Text(&'a str),
+    Binary(&'a [u8]),
+    // Tags
+    // TODO: the U-recursion we do here means we can’t be breadth-lazy anymore
+    // like we originally planned; maybe we want to go `U<'a>` → `&'a [u8]` again?
+    Sum(Tag<&'a str, U<'a>>),
+    Record(HashMap<&'a str, U<'a>>),
+    List(Vec<U<'a>>),
+}
+
+impl<'a> U<'a> {
+    pub fn encode(&self) -> Vec<u8> {
+        let mut c = std::io::Cursor::new(vec![]);
+        encode(&mut c, self);
+        c.into_inner()
+    }
+
+    pub fn to_t(&self) -> T {
+        match self {
+            U::Unit => T::Unit,
+            U::N1(b) => T::N1(*b),
+            U::N3(u) => T::N3(*u),
+            U::N6(u) => T::N6(*u),
+            U::N7(u) => T::N7(*u),
+            U::I3(i) => T::I3(*i),
+            U::I6(i) => T::I6(*i),
+            U::I7(i) => T::I7(*i),
+            U::Text(t) => T::Text((*t).to_owned()),
+            U::Binary(v) => T::Binary((*v).to_owned()),
+            U::Sum(Tag { tag, val }) => T::Sum(
+                Tag { tag: (*tag).to_owned(), val: Box::new(val.to_t()) }
+            ),
+            U::Record(map) => T::Record(
+                map.iter().map(|(k, v)| ((*k).to_owned(), v.to_t())).collect::<HashMap<String, T>>()
+            ),
+            U::List(l) => T::List(
+                l.iter().map(|v| v.to_t()).collect::<Vec<T>>()
+            ),
+        }
+    }
+}
+
+#[derive(Debug, PartialEq, Eq, Clone)]
+pub struct Tag<S, A> {
+    // TODO: make into &str
+    pub tag: S,
+    pub val: Box<A>
+}
+
+impl<S, A> Tag<S, A> {
+    fn map<F, B>(self, f: F) -> Tag<S, B>
+        where F: Fn(A) -> B {
+          Tag {
+              tag: self.tag,
+              val: Box::new(f(*self.val))
+          }
+    }
+}
+
+fn encode_tag<W: Write>(w: &mut W, tag: &str, val: &U) -> std::io::Result<()> {
+    write!(w, "<{}:{}|", tag.len(), tag)?;
+    encode(w, val)?;
+    Ok(())
+}
+
+pub fn encode<W: Write>(w: &mut W, u: &U) -> std::io::Result<()> {
+  match u {
+      U::Unit => write!(w, "u,"),
+      U::N1(b) => if *b { write!(w, "n1:1,") } else { write!(w, "n1:0,") },
+      U::N3(n) => write!(w, "n3:{},", n),
+      U::N6(n) => write!(w, "n6:{},", n),
+      U::N7(n) => write!(w, "n7:{},", n),
+      U::I3(i) => write!(w, "i3:{},", i),
+      U::I6(i) => write!(w, "i6:{},", i),
+      U::I7(i) => write!(w, "i7:{},", i),
+      U::Text(s) => {
+          write!(w, "t{}:", s.len());
+          w.write(s.as_bytes());
+          write!(w, ",")
+      }
+      U::Binary(s) => {
+          write!(w, "b{}:", s.len());
+          w.write(&s);
+          write!(w, ",")
+      },
+      U::Sum(Tag{tag, val}) => encode_tag(w, tag, val),
+      U::Record(m) => {
+          let mut c = std::io::Cursor::new(vec![]);
+          for (k, v) in m {
+              encode_tag(&mut c, k, v)?;
+          }
+          write!(w, "{{{}:", c.get_ref().len())?;
+          w.write(c.get_ref())?;
+          write!(w, "}}")
+      },
+      U::List(l) => {
+          let mut c = std::io::Cursor::new(vec![]);
+          for u in l {
+              encode(&mut c, u)?;
+          }
+          write!(w, "[{}:", c.get_ref().len())?;
+          w.write(c.get_ref())?;
+          write!(w, "]")
+      }
+  }
+}
+
+pub fn text(s: String) -> T {
+    T::Text(s)
+}
+
+pub fn u_from_stdin_or_die_user_error<'a>(prog_name: &'_ str, stdin_buf: &'a mut Vec<u8>) -> U<'a> {
+    std::io::stdin().lock().read_to_end(stdin_buf);
+    let u = match parse::u_u(stdin_buf) {
+        Ok((rest, u)) => match rest {
+            b"" => u,
+            _ => exec_helpers::die_user_error(prog_name, format!("stdin contained some soup after netencode value: {:?}", rest))
+        },
+        Err(err) => exec_helpers::die_user_error(prog_name, format!("unable to parse netencode from stdin: {:?}", err))
+    };
+    u
+}
+
+pub mod parse {
+    use super::{T, Tag, U};
+
+    use std::str::FromStr;
+    use std::ops::Neg;
+    use std::collections::HashMap;
+
+    use nom::{IResult};
+    use nom::bytes::complete::{tag, take};
+    use nom::branch::{alt};
+    use nom::character::complete::{digit1, char};
+    use nom::sequence::{tuple};
+    use nom::combinator::{map, map_res, flat_map, map_parser, opt};
+    use nom::error::{context, ErrorKind, ParseError};
+
+    fn unit_t(s: &[u8]) -> IResult<&[u8], ()> {
+        let (s, _) = context("unit", tag("u,"))(s)?;
+        Ok((s, ()))
+    }
+
+    fn usize_t(s: &[u8]) -> IResult<&[u8], usize> {
+        context(
+            "usize",
+            map_res(
+                map_res(digit1, |n| std::str::from_utf8(n)),
+                |s| s.parse::<usize>())
+        )(s)
+    }
+
+    fn sized(begin: char, end: char) -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> {
+        move |s: &[u8]| {
+            let (s, (_, len, _)) = tuple((
+                char(begin),
+                usize_t,
+                char(':')
+            ))(s)?;
+            let (s, (res, _)) = tuple((
+                take(len),
+                char(end)
+            ))(s)?;
+            Ok((s, res))
+        }
+    }
+
+
+    fn uint_t<'a, I: FromStr + 'a>(t: &'static str) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], I> {
+        move |s: &'a [u8]| {
+            let (s, (_, _, int, _)) = tuple((
+                tag(t.as_bytes()),
+                char(':'),
+                map_res(
+                    map_res(digit1, |n: &[u8]| std::str::from_utf8(n)),
+                    |s| s.parse::<I>()
+                ),
+                char(',')
+            ))(s)?;
+            Ok((s, int))
+        }
+    }
+
+    fn bool_t<'a>() -> impl Fn(&'a [u8]) -> IResult<&'a [u8], bool> {
+        context("bool", alt((
+            map(tag("n1:0,"), |_| false),
+            map(tag("n1:1,"), |_| true),
+        )))
+    }
+
+    fn int_t<'a, I: FromStr + Neg<Output=I>>(t: &'static str) -> impl Fn(&'a [u8]) -> IResult<&[u8], I> {
+        context(
+            t,
+            move |s: &'a [u8]| {
+                let (s, (_, _, neg, int, _)) = tuple((
+                    tag(t.as_bytes()),
+                    char(':'),
+                    opt(char('-')),
+                    map_res(
+                        map_res(digit1, |n: &[u8]| std::str::from_utf8(n)),
+                        |s| s.parse::<I>()
+                    ),
+                    char(',')
+                ))(s)?;
+                let res = match neg {
+                    Some(_) => -int,
+                    None => int,
+                };
+                Ok((s, res))
+            }
+        )
+    }
+
+    fn tag_t(s: &[u8]) -> IResult<&[u8], Tag<String, T>> {
+        // recurses into the main parser
+        map(tag_g(t_t),
+            |Tag {tag, val}|
+            Tag {
+                tag: tag.to_string(),
+                val
+            })(s)
+    }
+
+    fn tag_g<'a, P, O>(inner: P) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Tag<&'a str, O>>
+    where
+        P: Fn(&'a [u8]) -> IResult<&'a [u8], O>
+    {
+        move |s: &[u8]| {
+            let (s, tag) = sized('<', '|')(s)?;
+            let (s, val) = inner(s)?;
+            Ok((s, Tag {
+                tag: std::str::from_utf8(tag)
+                    .map_err(|_| nom::Err::Failure((s, ErrorKind::Char)))?,
+                val: Box::new(val)
+            }))
+
+        }
+    }
+
+    /// parse text scalar (`t5:hello,`)
+    fn text(s: &[u8]) -> IResult<&[u8], T> {
+        let (s, res) = text_g(s)?;
+        Ok((s, T::Text(res.to_string())))
+    }
+
+    fn text_g(s: &[u8]) -> IResult<&[u8], &str> {
+        let (s, res) = sized('t', ',')(s)?;
+        Ok((s,
+            std::str::from_utf8(res)
+                .map_err(|_| nom::Err::Failure((s, ErrorKind::Char)))?,
+        ))
+    }
+
+    fn binary<'a>() -> impl Fn(&'a [u8]) -> IResult<&'a [u8], T> {
+        map(binary_g(), |b| T::Binary(b.to_owned()))
+    }
+
+    fn binary_g() -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> {
+        sized('b', ',')
+    }
+
+    fn list_t(s: &[u8]) -> IResult<&[u8], Vec<T>> {
+        list_g(t_t)(s)
+    }
+
+    fn list_g<'a, P, O>(inner: P) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Vec<O>>
+    where
+        O: Clone,
+        P: Fn(&'a [u8]) -> IResult<&'a [u8], O>
+    {
+        map_parser(
+            sized('[', ']'),
+            nom::multi::many0(inner)
+        )
+    }
+
+    fn record_t<'a>(s: &'a [u8]) -> IResult<&'a [u8], HashMap<String, T>> {
+        let (s, r) = record_g(t_t)(s)?;
+        Ok((s,
+            r.into_iter()
+            .map(|(k, v)| (k.to_string(), v))
+            .collect::<HashMap<_,_>>()))
+    }
+
+    fn record_g<'a, P, O>(inner: P) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], HashMap<&'a str, O>>
+    where
+        O: Clone,
+        P: Fn(&'a [u8]) -> IResult<&'a [u8], O>
+    {
+        map_parser(
+            sized('{', '}'),
+            nom::multi::fold_many1(
+                tag_g(inner),
+                HashMap::new(),
+                |mut acc: HashMap<_,_>, Tag { tag, mut val }| {
+                    // ignore duplicated tag names that appear later
+                    // according to netencode spec
+                    if ! acc.contains_key(tag) {
+                        acc.insert(tag, *val);
+                    }
+                    acc
+                }
+            )
+        )
+    }
+
+    pub fn u_u(s: &[u8]) -> IResult<&[u8], U> {
+        alt((
+            map(text_g, U::Text),
+            map(binary_g(), U::Binary),
+            map(unit_t, |()| U::Unit),
+            map(tag_g(u_u), |t| U::Sum(t)),
+            map(list_g(u_u), U::List),
+            map(record_g(u_u), U::Record),
+
+            map(bool_t(), |u| U::N1(u)),
+            map(uint_t("n3"), |u| U::N3(u)),
+            map(uint_t("n6"), |u| U::N6(u)),
+            map(uint_t("n7"), |u| U::N7(u)),
+            map(int_t("i3"), |u| U::I3(u)),
+            map(int_t("i6"), |u| U::I6(u)),
+            map(int_t("i7"), |u| U::I7(u)),
+
+            // less common
+            map(uint_t("n2"), |u| U::N3(u)),
+            map(uint_t("n4"), |u| U::N6(u)),
+            map(uint_t("n5"), |u| U::N6(u)),
+            map(int_t("i1"), |u| U::I3(u)),
+            map(int_t("i2"), |u| U::I3(u)),
+            map(int_t("i4"), |u| U::I6(u)),
+            map(int_t("i5"), |u| U::I6(u)),
+            // TODO: 8, 9 not supported
+        ))(s)
+    }
+
+    pub fn t_t(s: &[u8]) -> IResult<&[u8], T>  {
+        alt((
+            text,
+            binary(),
+            map(unit_t, |_| T::Unit),
+            map(tag_t, |t| T::Sum(t)),
+            map(list_t, |l| T::List(l)),
+            map(record_t, |p| T::Record(p)),
+
+            map(bool_t(), |u| T::N1(u)),
+            // 8, 64 and 128 bit
+            map(uint_t("n3"), |u| T::N3(u)),
+            map(uint_t("n6"), |u| T::N6(u)),
+            map(uint_t("n7"), |u| T::N7(u)),
+            map(int_t("i3"), |u| T::I3(u)),
+            map(int_t("i6"), |u| T::I6(u)),
+            map(int_t("i7"), |u| T::I7(u)),
+
+            // less common
+            map(uint_t("n2"), |u| T::N3(u)),
+            map(uint_t("n4"), |u| T::N6(u)),
+            map(uint_t("n5"), |u| T::N6(u)),
+            map(int_t("i1"), |u| T::I3(u)),
+            map(int_t("i2"), |u| T::I3(u)),
+            map(int_t("i4"), |u| T::I6(u)),
+            map(int_t("i5"), |u| T::I6(u)),
+            // TODO: 8, 9 not supported
+        ))(s)
+    }
+
+    #[cfg(test)]
+    mod tests {
+        use super::*;
+
+        #[test]
+        fn test_parse_unit_t() {
+            assert_eq!(
+                unit_t("u,".as_bytes()),
+                Ok(("".as_bytes(), ()))
+            );
+        }
+
+        #[test]
+        fn test_parse_bool_t() {
+            assert_eq!(
+                bool_t()("n1:0,".as_bytes()),
+                Ok(("".as_bytes(), false))
+            );
+            assert_eq!(
+                bool_t()("n1:1,".as_bytes()),
+                Ok(("".as_bytes(), true))
+            );
+        }
+
+        #[test]
+        fn test_parse_usize_t() {
+            assert_eq!(
+                usize_t("32foo".as_bytes()),
+                Ok(("foo".as_bytes(), 32))
+            );
+        }
+
+        #[test]
+        fn test_parse_int_t() {
+            assert_eq!(
+                uint_t::<u8>("n3")("n3:42,abc".as_bytes()),
+                Ok(("abc".as_bytes(), 42))
+            );
+            assert_eq!(
+                uint_t::<u8>("n3")("n3:1024,abc".as_bytes()),
+                Err(nom::Err::Error(("1024,abc".as_bytes(), nom::error::ErrorKind::MapRes)))
+            );
+            assert_eq!(
+                int_t::<i64>("i6")("i6:-23,abc".as_bytes()),
+                Ok(("abc".as_bytes(), -23))
+            );
+            assert_eq!(
+                int_t::<i128>("i3")("i3:0,:abc".as_bytes()),
+                Ok((":abc".as_bytes(), 0))
+            );
+            assert_eq!(
+                uint_t::<u8>("n7")("n7:09,".as_bytes()),
+                Ok(("".as_bytes(), 9))
+            );
+            // assert_eq!(
+            //     length("c"),
+            //     Err(nom::Err::Error(("c", nom::error::ErrorKind::Digit)))
+            // );
+            // assert_eq!(
+            //     length(":"),
+            //     Err(nom::Err::Error((":", nom::error::ErrorKind::Digit)))
+            // );
+        }
+
+        #[test]
+        fn test_parse_text() {
+            assert_eq!(
+                text("t5:hello,".as_bytes()),
+                Ok(("".as_bytes(), T::Text("hello".to_owned())))
+            );
+            assert_eq!(
+                text("t4:fo,".as_bytes()),
+                // TODO: way better parse error messages
+                Err(nom::Err::Error(("fo,".as_bytes(), nom::error::ErrorKind::Eof)))
+            );
+            assert_eq!(
+                text("t9:今日は,".as_bytes()),
+                Ok(("".as_bytes(), T::Text("今日は".to_owned())))
+            );
+        }
+
+        #[test]
+        fn test_parse_binary() {
+            assert_eq!(
+                binary()("b5:hello,".as_bytes()),
+                Ok(("".as_bytes(), T::Binary(Vec::from("hello".to_owned()))))
+            );
+            assert_eq!(
+                binary()("b4:fo,".as_bytes()),
+                // TODO: way better parse error messages
+                Err(nom::Err::Error(("fo,".as_bytes(), nom::error::ErrorKind::Eof)))
+            );
+            assert_eq!(
+                binary()("b9:今日は,".as_bytes()),
+                Ok(("".as_bytes(), T::Binary(Vec::from("今日は".as_bytes()))))
+            );
+        }
+
+        #[test]
+        fn test_list() {
+            assert_eq!(
+                list_t("[0:]".as_bytes()),
+                Ok(("".as_bytes(), vec![]))
+            );
+            assert_eq!(
+                list_t("[6:u,u,u,]".as_bytes()),
+                Ok(("".as_bytes(), vec![
+                    T::Unit,
+                    T::Unit,
+                    T::Unit,
+                ]))
+            );
+            assert_eq!(
+                list_t("[15:u,[7:t3:foo,]u,]".as_bytes()),
+                Ok(("".as_bytes(), vec![
+                    T::Unit,
+                    T::List(vec![T::Text("foo".to_owned())]),
+                    T::Unit,
+                ]))
+            );
+        }
+
+        #[test]
+        fn test_record() {
+            assert_eq!(
+                record_t("{21:<1:a|u,<1:b|u,<1:c|u,}".as_bytes()),
+                Ok(("".as_bytes(), vec![
+                    ("a".to_owned(), T::Unit),
+                    ("b".to_owned(), T::Unit),
+                    ("c".to_owned(), T::Unit),
+                ].into_iter().collect::<HashMap<String, T>>()))
+            );
+            // duplicated keys are ignored (first is taken)
+            assert_eq!(
+                record_t("{25:<1:a|u,<1:b|u,<1:a|i1:-1,}".as_bytes()),
+                Ok(("".as_bytes(), vec![
+                    ("a".to_owned(), T::Unit),
+                    ("b".to_owned(), T::Unit),
+                ].into_iter().collect::<HashMap<_,_>>()))
+            );
+        }
+
+        #[test]
+        fn test_parse() {
+            assert_eq!(
+                t_t("n3:255,".as_bytes()),
+                Ok(("".as_bytes(), T::N3(255)))
+            );
+            assert_eq!(
+                t_t("t6:halloo,".as_bytes()),
+                Ok(("".as_bytes(), T::Text("halloo".to_owned())))
+            );
+            assert_eq!(
+                t_t("<3:foo|t6:halloo,".as_bytes()),
+                Ok(("".as_bytes(), T::Sum (Tag {
+                    tag: "foo".to_owned(),
+                    val: Box::new(T::Text("halloo".to_owned()))
+                })))
+            );
+            // { a: Unit
+            // , foo: List <A: Unit | B: List i3> }
+            assert_eq!(
+                t_t("{52:<1:a|u,<3:foo|[33:<1:A|u,<1:A|n1:1,<1:B|[7:i3:127,]]}".as_bytes()),
+                Ok(("".as_bytes(), T::Record(vec![
+                    ("a".to_owned(), T::Unit),
+                    ("foo".to_owned(), T::List(vec![
+                        T::Sum(Tag { tag: "A".to_owned(), val: Box::new(T::Unit) }),
+                        T::Sum(Tag { tag: "A".to_owned(), val: Box::new(T::N1(true)) }),
+                        T::Sum(Tag { tag: "B".to_owned(), val: Box::new(T::List(vec![T::I3(127)])) }),
+                    ]))
+                ].into_iter().collect::<HashMap<String, T>>())))
+            );
+        }
+
+    }
+}
+
+pub mod dec {
+    use super::*;
+    use std::collections::HashMap;
+
+    pub struct DecodeError(pub String);
+
+    pub trait Decoder<'a> {
+        type A;
+        fn dec(&self, u: U<'a>) -> Result<Self::A, DecodeError>;
+    }
+
+    #[derive(Clone, Copy)]
+    pub struct AnyT;
+    #[derive(Clone, Copy)]
+    pub struct AnyU;
+
+    impl<'a> Decoder<'a> for AnyT {
+        type A = T;
+        fn dec(&self, u: U<'a>) -> Result<Self::A, DecodeError> {
+            Ok(u.to_t())
+        }
+    }
+
+    impl<'a> Decoder<'a> for AnyU {
+        type A = U<'a>;
+        fn dec(&self, u: U<'a>) -> Result<Self::A, DecodeError> {
+            Ok(u)
+        }
+    }
+
+    #[derive(Clone, Copy)]
+    pub struct Text;
+    // TODO: rename to Bytes
+    #[derive(Clone, Copy)]
+    pub struct Binary;
+
+    impl<'a> Decoder<'a> for Text {
+        type A = &'a str;
+        fn dec(&self, u: U<'a>) -> Result<Self::A, DecodeError> {
+            match u {
+                U::Text(t) => Ok(t),
+                other => Err(DecodeError(format!("Cannot decode {:?} into Text", other))),
+            }
+        }
+    }
+
+    impl<'a> Decoder<'a> for Binary {
+        type A = &'a [u8];
+        fn dec(&self, u: U<'a>) -> Result<Self::A, DecodeError> {
+            match u {
+                U::Binary(b) => Ok(b),
+                other => Err(DecodeError(format!("Cannot decode {:?} into Binary", other))),
+            }
+        }
+    }
+
+    #[derive(Clone, Copy)]
+    pub struct ScalarAsBytes;
+
+    impl<'a> Decoder<'a> for ScalarAsBytes {
+        type A = Vec<u8>;
+        fn dec(&self, u: U<'a>) -> Result<Self::A, DecodeError> {
+            match u {
+                U::N3(u) => Ok(format!("{}", u).into_bytes()),
+                U::N6(u) => Ok(format!("{}", u).into_bytes()),
+                U::N7(u) => Ok(format!("{}", u).into_bytes()),
+                U::I3(i) => Ok(format!("{}", i).into_bytes()),
+                U::I6(i) => Ok(format!("{}", i).into_bytes()),
+                U::I7(i) => Ok(format!("{}", i).into_bytes()),
+                U::Text(t) => Ok(t.as_bytes().to_owned()),
+                U::Binary(b) => Ok(b.to_owned()),
+                o => Err(DecodeError(format!("Cannot decode {:?} into scalar", o))),
+            }
+        }
+    }
+
+    #[derive(Clone, Copy)]
+    pub struct Record<T>(pub T);
+
+    impl<'a, Inner> Decoder<'a> for Record<Inner>
+        where Inner: Decoder<'a>
+    {
+        type A = HashMap<&'a str, Inner::A>;
+        fn dec(&self, u: U<'a>) -> Result<Self::A, DecodeError> {
+            match u {
+                U::Record(map) =>
+                    map.into_iter()
+                    .map(|(k, v)| self.0.dec(v).map(|v2| (k, v2)))
+                    .collect::<Result<Self::A, _>>(),
+                o => Err(DecodeError(format!("Cannot decode {:?} into record", o)))
+            }
+        }
+    }
+
+    #[derive(Clone, Copy)]
+    pub struct RecordDot<'a, T> {
+        pub field: &'a str,
+        pub inner: T
+    }
+
+    impl <'a, Inner> Decoder<'a> for RecordDot<'_, Inner>
+        where Inner: Decoder<'a> + Clone
+    {
+        type A = Inner::A;
+        fn dec(&self, u: U<'a>) -> Result<Self::A, DecodeError> {
+            match Record(self.inner.clone()).dec(u) {
+                Ok(mut map) => match map.remove(self.field) {
+                    Some(inner) => Ok(inner),
+                    None => Err(DecodeError(format!("Cannot find `{}` in record map", self.field))),
+                },
+                Err(err) => Err(err),
+            }
+        }
+    }
+
+    #[derive(Clone)]
+    pub struct OneOf<T, A>{
+        pub inner: T,
+        pub list: Vec<A>,
+    }
+
+    impl <'a, Inner> Decoder<'a> for OneOf<Inner, Inner::A>
+        where Inner: Decoder<'a>,
+              Inner::A: Display + Debug + PartialEq
+    {
+        type A = Inner::A;
+        fn dec(&self, u: U<'a>) -> Result<Self::A, DecodeError> {
+            match self.inner.dec(u) {
+                Ok(inner) => match self.list.iter().any(|x| x.eq(&inner)) {
+                    true => Ok(inner),
+                    false => Err(DecodeError(format!("{} is not one of {:?}", inner, self.list)))
+                },
+                Err(err) => Err(err)
+            }
+        }
+    }
+
+    #[derive(Clone)]
+    pub struct Try<T>(pub T);
+
+    impl <'a, Inner> Decoder<'a> for Try<Inner>
+        where Inner: Decoder<'a>
+    {
+        type A = Option<Inner::A>;
+        fn dec(&self, u: U<'a>) -> Result<Self::A, DecodeError> {
+            match self.0.dec(u) {
+                Ok(inner) => Ok(Some(inner)),
+                Err(err) => Ok(None)
+            }
+        }
+    }
+
+}