From e207785e1fc78d06381b369c3b71b7e6becdbf80 Mon Sep 17 00:00:00 2001 From: Profpatsch Date: Sat, 6 Feb 2021 22:33:39 +0100 Subject: feat(users/Profpatsch/{netencode,http-parse}): use HashMap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `U::Record` is required to be a hash map (later keys should be ignored), so why not do the hash map immediately. This surfaced a problem with read-http, because duplicate headers in http are possible, but before they’d be silently ignored. Now we merge them into a `U::List` in case, to be handled by consumers of read-http. Change-Id: Ifd594916f76e5acf9d08e705e0dec2c10a0081c9 Reviewed-on: https://cl.tvl.fyi/c/depot/+/2490 Tested-by: BuildkiteCI Reviewed-by: Profpatsch --- users/Profpatsch/netencode/netencode.rs | 17 +++++----- users/Profpatsch/read-http.nix | 1 - users/Profpatsch/read-http.rs | 60 +++++++++++++++++++++------------ 3 files changed, 48 insertions(+), 30 deletions(-) (limited to 'users') diff --git a/users/Profpatsch/netencode/netencode.rs b/users/Profpatsch/netencode/netencode.rs index 7f8506708d..09f347b786 100644 --- a/users/Profpatsch/netencode/netencode.rs +++ b/users/Profpatsch/netencode/netencode.rs @@ -48,7 +48,7 @@ pub enum U<'a> { Binary(&'a [u8]), // Tags Sum(Tag<&'a str, U<'a>>), - Record(Vec<(&'a str, U<'a>)>), + Record(HashMap<&'a str, U<'a>>), List(Vec>), } @@ -290,14 +290,11 @@ pub mod parse { let (s, r) = record_g(t_t)(s)?; Ok((s, r.into_iter() - // ignore duplicated tag names that appear later - // by reverting the vector now - .rev() .map(|(k, v)| (k.to_string(), v)) .collect::>())) } - fn record_g<'a, P, O>(inner: P) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Vec<(&'a str, O)>> + fn record_g<'a, P, O>(inner: P) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], HashMap<&'a str, O>> where O: Clone, P: Fn(&'a [u8]) -> IResult<&'a [u8], O> @@ -306,9 +303,13 @@ pub mod parse { sized('{', '}'), nom::multi::fold_many1( tag_g(inner), - Vec::new(), - |mut acc: Vec<_>, Tag { tag, mut val }| { - acc.push((tag, *val)); + HashMap::new(), + |mut acc: HashMap<_,_>, Tag { tag, mut val }| { + // ignore duplicated tag names that appear later + // according to netencode spec + if ! acc.contains_key(tag) { + acc.insert(tag, *val); + } acc } ) diff --git a/users/Profpatsch/read-http.nix b/users/Profpatsch/read-http.nix index aff1fa8662..614993c457 100644 --- a/users/Profpatsch/read-http.nix +++ b/users/Profpatsch/read-http.nix @@ -2,7 +2,6 @@ let - # reads a http request (stdin), and writes all headers to stdout, as netencoded dict read-http = depot.users.Profpatsch.writers.rustSimple { name = "read-http"; dependencies = [ diff --git a/users/Profpatsch/read-http.rs b/users/Profpatsch/read-http.rs index 42d4f37ff6..60afa96af9 100644 --- a/users/Profpatsch/read-http.rs +++ b/users/Profpatsch/read-http.rs @@ -7,6 +7,7 @@ extern crate exec_helpers; use std::os::unix::io::FromRawFd; use std::io::Read; use std::io::Write; +use std::collections::HashMap; use exec_helpers::{die_user_error, die_expected_error, die_temporary}; use netencode::{U, T}; @@ -16,6 +17,8 @@ enum What { Response } +// reads a http request (stdin), and writes all headers to stdout, as netencoded record. +// The keys are text, but can be lists of text iff headers appear multiple times, so beware. fn main() -> std::io::Result<()> { let what : What = match arglib_netencode::arglib_netencode(None).unwrap() { @@ -58,24 +61,40 @@ fn main() -> std::io::Result<()> { } } - - fn normalize_headers<'a>(headers: &'a [httparse::Header]) -> Vec<(String, &'a str)> { - let mut res = vec![]; + fn normalize_headers<'a>(headers: &'a [httparse::Header]) -> HashMap> { + let mut res = HashMap::new(); for httparse::Header { name, value } in headers { let val = ascii::AsciiStr::from_ascii(*value) - .expect(&format!("read-http: we require header values to be ASCII, but the header {} was {:?}", name, value)); - // lowercase the headers, since the standard doesn’t care - // and we want unique strings to match agains - res.push((name.to_lowercase(), val.as_str())) + .expect(&format!("read-http: we require header values to be ASCII, but the header {} was {:?}", name, value)) + .as_str(); + // lowercase the header names, since the standard doesn’t care + // and we want unique strings to match against + let name_lower = name.to_lowercase(); + match res.insert(name_lower, U::Text(val)) { + None => (), + Some(U::Text(t)) => { + let name_lower = name.to_lowercase(); + let _ = res.insert(name_lower, U::List(vec![U::Text(t), U::Text(val)])); + () + }, + Some(U::List(mut l)) => { + let name_lower = name.to_lowercase(); + l.push(U::Text(val)); + let _ = res.insert(name_lower, U::List(l)); + () + }, + Some(o) => panic!("read-http: header not text nor list: {:?}", o), + } } res } // tries to read until the end of the http header (deliniated by two newlines "\r\n\r\n") fn read_till_end_of_header(buf: &mut Vec, reader: R) -> Option<()> { - let mut chunker = Chunkyboi::new(reader, 4096); + let mut chonker = Chunkyboi::new(reader, 4096); loop { - match chunker.next() { + // TODO: attacker can send looooong input, set upper maximum + match chonker.next() { Some(Ok(chunk)) => { buf.extend_from_slice(&chunk); if chunk.windows(4).any(|c| c == b"\r\n\r\n" ) { @@ -126,31 +145,30 @@ fn main() -> std::io::Result<()> { } } -fn write_dict_req<'buf>(method: &'buf str, path: &'buf str, headers: &[(String, &str)]) -> std::io::Result<()> { +fn write_dict_req<'a, 'buf>(method: &'buf str, path: &'buf str, headers: &'a HashMap>) -> std::io::Result<()> { let mut http = vec![ ("method", U::Text(method)), ("path", U::Text(path)), - ]; + ].into_iter().collect(); write_dict(http, headers) } -fn write_dict_resp<'buf>(code: u16, reason: &'buf str, headers: &[(String, &str)]) -> std::io::Result<()> { +fn write_dict_resp<'a, 'buf>(code: u16, reason: &'buf str, headers: &'a HashMap>) -> std::io::Result<()> { let mut http = vec![ ("status", U::N6(code as u64)), ("status-text", U::Text(reason)), - ]; + ].into_iter().collect(); write_dict(http, headers) } -fn write_dict<'buf, 'a>(mut http: Vec<(&str, U<'a>)>, headers: &'a[(String, &str)]) -> std::io::Result<()> { - http.push(("headers", U::Record( - headers.iter().map( - |(name, value)| - (name.as_str(), U::Text(value)) - ).collect::>() - ))); - +fn write_dict<'buf, 'a>(mut http: HashMap<&str, U<'a>>, headers: &'a HashMap>) -> std::io::Result<()> { + match http.insert("headers", U::Record( + headers.iter().map(|(k,v)| (k.as_str(), v.clone())).collect() + )) { + None => (), + Some(_) => panic!("read-http: headers already in dict"), + }; netencode::encode( &mut std::io::stdout(), U::Record(http) -- cgit 1.4.1