From 1facd889bba724cf20ea14422ee1e57440b3e761 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Tue, 24 Jan 2023 19:27:20 +0100 Subject: feat(tvix/eval): use lexical-core to format float Apparently our naive implementation of float formatting, which simply used {:.5}, and trimmed trailing "0" strings not sufficient. It wrongly trimmed numbers with zeroes but no decimal point, like `10000` got trimmed to `1`. Nix uses `std::to_string` on the double, which according to https://en.cppreference.com/w/cpp/string/basic_string/to_string is equivalent to `std::sprintf(buf, "%f", value)`. https://en.cppreference.com/w/cpp/io/c/fprintf mentions this is treated like this: > Precision specifies the exact number of digits to appear after > the decimal point character. The default precision is 6. In the > alternative implementation decimal point character is written even if > no digits follow it. For infinity and not-a-number conversion style > see notes. This doesn't seem to be the case though, and Nix uses scientific notation in some cases. There's a whole bunch of strategies to determine which is a more compact notation, and which notation should be used for a given number. https://github.com/rust-lang/rust/issues/24556 provides some pointers into various rabbit holes for those interested. This gist seems to be that currently a different formatting is not exposed in rust directly, at least not for public consumption. There is the [lexical-core](https://github.com/Alexhuszagh/rust-lexical) crate though, which provides a way to format floats with various strategies and formats. Change our implementation of `TotalDisplay` for the `Value::Float` case to use that. We still need to do some post-processing, because Nix always adds the sign in scientific notation (and there's no way to configure lexical-core to do that), and lexical-core in some cases keeps the trailing zeros. Even with all that in place, there as a difference in `eval-okay- fromjson.nix` (from tvix-tests), which I couldn't get to work. I updated the fixture to a less problematic number. With this, the testsuite passes again, and does for the upcoming CL introducing builtins.fromTOML, and enabling the nix testsuite bits for it, too. Change-Id: Ie6fba5619e1d9fd7ce669a51594658b029057acc Reviewed-on: https://cl.tvl.fyi/c/depot/+/7922 Tested-by: BuildkiteCI Autosubmit: flokli Reviewed-by: tazjin --- .../src/tests/tvix_tests/eval-okay-fromjson.exp | 2 +- .../src/tests/tvix_tests/eval-okay-fromjson.nix | 2 +- tvix/eval/src/value/mod.rs | 88 +++++++++++++++++++++- 3 files changed, 88 insertions(+), 4 deletions(-) (limited to 'tvix/eval/src') diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-fromjson.exp b/tvix/eval/src/tests/tvix_tests/eval-okay-fromjson.exp index c855950a30d4..24aa21d78ff7 100644 --- a/tvix/eval/src/tests/tvix_tests/eval-okay-fromjson.exp +++ b/tvix/eval/src/tests/tvix_tests/eval-okay-fromjson.exp @@ -1 +1 @@ -[ { Image = { Animated = false; Height = 600; IDs = [ 116 943 234 38793 true false null -100 ]; Latitude = 37.7668; Longitude = -122.3959; Thumbnail = { Height = 125; Url = "http://www.example.com/image/481989943"; Width = 100; }; Title = "View from 15th Floor"; Width = 800; }; } { name = "a"; value = "b"; } [ 1 2 3 4 ] ] +[ { Image = { Animated = false; Height = 600; IDs = [ 116 943 234 38793 true false null -100 ]; Latitude = 37.7668; Longitude = -122.396; Thumbnail = { Height = 125; Url = "http://www.example.com/image/481989943"; Width = 100; }; Title = "View from 15th Floor"; Width = 800; }; } { name = "a"; value = "b"; } [ 1 2 3 4 ] ] diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-fromjson.nix b/tvix/eval/src/tests/tvix_tests/eval-okay-fromjson.nix index e4f62131250e..e530789446ef 100644 --- a/tvix/eval/src/tests/tvix_tests/eval-okay-fromjson.nix +++ b/tvix/eval/src/tests/tvix_tests/eval-okay-fromjson.nix @@ -15,7 +15,7 @@ "Animated" : false, "IDs": [116, 943, 234, 38793, true ,false,null, -100], "Latitude": 37.7668, - "Longitude": -122.3959 + "Longitude": -122.396 } } '') diff --git a/tvix/eval/src/value/mod.rs b/tvix/eval/src/value/mod.rs index 7a6bbcafb30f..f9ada2b627b5 100644 --- a/tvix/eval/src/value/mod.rs +++ b/tvix/eval/src/value/mod.rs @@ -1,11 +1,13 @@ //! This module implements the backing representation of runtime //! values in the Nix language. use std::cmp::Ordering; +use std::num::{NonZeroI32, NonZeroUsize}; use std::ops::Deref; use std::path::PathBuf; use std::rc::Rc; use std::{cell::Ref, fmt::Display}; +use lexical_core::format::CXX_LITERAL; use serde::{Deserialize, Serialize}; #[cfg(feature = "arbitrary")] @@ -32,6 +34,8 @@ pub use thunk::Thunk; use self::thunk::ThunkSet; +use lazy_static::lazy_static; + #[warn(variant_size_differences)] #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(untagged)] @@ -72,6 +76,17 @@ pub enum Value { UnresolvedPath(PathBuf), } +lazy_static! { + static ref WRITE_FLOAT_OPTIONS: lexical_core::WriteFloatOptions = + lexical_core::WriteFloatOptionsBuilder::new() + .trim_floats(true) + .round_mode(lexical_core::write_float_options::RoundMode::Round) + .positive_exponent_break(Some(NonZeroI32::new(5).unwrap())) + .max_significant_digits(Some(NonZeroUsize::new(6).unwrap())) + .build() + .unwrap(); +} + // Helper macros to generate the to_*/as_* macros while accounting for // thunks. @@ -514,9 +529,78 @@ impl TotalDisplay for Value { Value::Builtin(builtin) => builtin.fmt(f), // Nix prints floats with a maximum precision of 5 digits - // only. + // only. Except when it decides to use scientific notation + // (with a + after the `e`, and zero-padded to 0 digits) Value::Float(num) => { - write!(f, "{}", format!("{:.5}", num).trim_end_matches(['.', '0'])) + let mut buf = [b'0'; lexical_core::BUFFER_SIZE]; + let mut s = lexical_core::write_with_options::( + num.clone(), + &mut buf, + &WRITE_FLOAT_OPTIONS, + ); + + // apply some postprocessing on the buffer. If scientific + // notation is used (we see an `e`), and the next character is + // a digit, add the missing `+` sign.) + let mut new_s = Vec::with_capacity(s.len()); + + if s.contains(&b'e') { + for (i, c) in s.iter().enumerate() { + // encountered `e` + if c == &b'e' { + // next character is a digit (so no negative exponent) + if s.len() > i && s[i + 1].is_ascii_digit() { + // copy everything from the start up to (including) the e + new_s.extend_from_slice(&s[0..=i]); + // add the missing '+' + new_s.push(b'+'); + // check for the remaining characters. + // If it's only one, we need to prepend a trailing zero + if s.len() == i + 2 { + new_s.push(b'0'); + } + new_s.extend_from_slice(&s[i + 1..]); + break; + } + } + } + + // if we modified the scientific notation, flip the reference + if new_s.len() != 0 { + s = &mut new_s + } + } + // else, if this is not scientific notation, and there's a + // decimal point, make sure we really drop trailing zeroes. + // In some cases, lexical_core doesn't. + else if s.contains(&b'.') { + for (i, c) in s.iter().enumerate() { + // at `.`` + if c == &b'.' { + // trim zeroes from the right side. + let frac = String::from_utf8_lossy(&s[i + 1..]); + let frac_no_trailing_zeroes = frac.trim_end_matches("0"); + + if frac.len() != frac_no_trailing_zeroes.len() { + // we managed to strip something, construct new_s + if frac_no_trailing_zeroes.is_empty() { + // if frac_no_trailing_zeroes is empty, the fractional part was all zeroes, so we can drop the decimal point as well + new_s.extend_from_slice(&s[0..=i - 1]); + } else { + // else, assemble the rest of the string + new_s.extend_from_slice(&s[0..=i]); + new_s.extend_from_slice(frac_no_trailing_zeroes.as_bytes()); + } + + // flip the reference + s = &mut new_s; + break; + } + } + } + } + + write!(f, "{}", format!("{}", String::from_utf8_lossy(&s))) } // internal types -- cgit 1.4.1