From 1facd889bba724cf20ea14422ee1e57440b3e761 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Tue, 24 Jan 2023 19:27:20 +0100 Subject: feat(tvix/eval): use lexical-core to format float Apparently our naive implementation of float formatting, which simply used {:.5}, and trimmed trailing "0" strings not sufficient. It wrongly trimmed numbers with zeroes but no decimal point, like `10000` got trimmed to `1`. Nix uses `std::to_string` on the double, which according to https://en.cppreference.com/w/cpp/string/basic_string/to_string is equivalent to `std::sprintf(buf, "%f", value)`. https://en.cppreference.com/w/cpp/io/c/fprintf mentions this is treated like this: > Precision specifies the exact number of digits to appear after > the decimal point character. The default precision is 6. In the > alternative implementation decimal point character is written even if > no digits follow it. For infinity and not-a-number conversion style > see notes. This doesn't seem to be the case though, and Nix uses scientific notation in some cases. There's a whole bunch of strategies to determine which is a more compact notation, and which notation should be used for a given number. https://github.com/rust-lang/rust/issues/24556 provides some pointers into various rabbit holes for those interested. This gist seems to be that currently a different formatting is not exposed in rust directly, at least not for public consumption. There is the [lexical-core](https://github.com/Alexhuszagh/rust-lexical) crate though, which provides a way to format floats with various strategies and formats. Change our implementation of `TotalDisplay` for the `Value::Float` case to use that. We still need to do some post-processing, because Nix always adds the sign in scientific notation (and there's no way to configure lexical-core to do that), and lexical-core in some cases keeps the trailing zeros. Even with all that in place, there as a difference in `eval-okay- fromjson.nix` (from tvix-tests), which I couldn't get to work. I updated the fixture to a less problematic number. With this, the testsuite passes again, and does for the upcoming CL introducing builtins.fromTOML, and enabling the nix testsuite bits for it, too. Change-Id: Ie6fba5619e1d9fd7ce669a51594658b029057acc Reviewed-on: https://cl.tvl.fyi/c/depot/+/7922 Tested-by: BuildkiteCI Autosubmit: flokli Reviewed-by: tazjin --- tvix/Cargo.lock | 72 ++++++ tvix/Cargo.nix | 249 +++++++++++++++++++++ tvix/eval/Cargo.toml | 2 + .../src/tests/tvix_tests/eval-okay-fromjson.exp | 2 +- .../src/tests/tvix_tests/eval-okay-fromjson.nix | 2 +- tvix/eval/src/value/mod.rs | 88 +++++++- 6 files changed, 411 insertions(+), 4 deletions(-) (limited to 'tvix') diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock index 6d52ffb9c1..4f4ebac0d3 100644 --- a/tvix/Cargo.lock +++ b/tvix/Cargo.lock @@ -1036,6 +1036,70 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "lexical-core" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" +dependencies = [ + "lexical-util", + "static_assertions", +] + [[package]] name = "libc" version = "0.2.137" @@ -1900,6 +1964,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "str-buf" version = "1.0.6" @@ -2410,6 +2480,8 @@ dependencies = [ "dirs", "imbl", "itertools", + "lazy_static", + "lexical-core", "path-clean", "pretty_assertions", "proptest", diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix index c360f7293d..d435d5870f 100644 --- a/tvix/Cargo.nix +++ b/tvix/Cargo.nix @@ -2997,6 +2997,236 @@ rec { "spin_no_std" = [ "spin" ]; }; }; + "lexical-core" = rec { + crateName = "lexical-core"; + version = "0.8.5"; + edition = "2018"; + sha256 = "0ihf0x3vrk25fq3bv9q35m0xax0wmvwkh0j0pjm2yk4ddvh5vpic"; + authors = [ + "Alex Huszagh " + ]; + dependencies = [ + { + name = "lexical-parse-float"; + packageId = "lexical-parse-float"; + optional = true; + usesDefaultFeatures = false; + } + { + name = "lexical-parse-integer"; + packageId = "lexical-parse-integer"; + optional = true; + usesDefaultFeatures = false; + } + { + name = "lexical-util"; + packageId = "lexical-util"; + usesDefaultFeatures = false; + } + { + name = "lexical-write-float"; + packageId = "lexical-write-float"; + optional = true; + usesDefaultFeatures = false; + } + { + name = "lexical-write-integer"; + packageId = "lexical-write-integer"; + optional = true; + usesDefaultFeatures = false; + } + ]; + features = { + "compact" = [ "lexical-write-integer/compact" "lexical-write-float/compact" "lexical-parse-integer/compact" "lexical-parse-float/compact" ]; + "default" = [ "std" "write-integers" "write-floats" "parse-integers" "parse-floats" ]; + "f128" = [ "lexical-util/f128" "lexical-parse-float/f128" "lexical-write-float/f128" ]; + "f16" = [ "lexical-util/f16" "lexical-parse-float/f16" "lexical-write-float/f16" ]; + "format" = [ "lexical-util/format" "lexical-parse-integer/format" "lexical-parse-float/format" "lexical-write-integer/format" "lexical-write-float/format" ]; + "lexical-parse-float" = [ "dep:lexical-parse-float" ]; + "lexical-parse-integer" = [ "dep:lexical-parse-integer" ]; + "lexical-write-float" = [ "dep:lexical-write-float" ]; + "lexical-write-integer" = [ "dep:lexical-write-integer" ]; + "lint" = [ "lexical-util/lint" "lexical-write-integer/lint" "lexical-write-float/lint" "lexical-parse-integer/lint" "lexical-parse-float/lint" ]; + "nightly" = [ "lexical-write-integer/nightly" "lexical-write-float/nightly" "lexical-parse-integer/nightly" "lexical-parse-float/nightly" ]; + "parse-floats" = [ "lexical-parse-float" "parse" "floats" ]; + "parse-integers" = [ "lexical-parse-integer" "parse" "integers" ]; + "power-of-two" = [ "lexical-util/power-of-two" "lexical-write-integer/power-of-two" "lexical-write-float/power-of-two" "lexical-parse-integer/power-of-two" "lexical-parse-float/power-of-two" ]; + "radix" = [ "lexical-util/radix" "lexical-write-integer/radix" "lexical-write-float/radix" "lexical-parse-integer/radix" "lexical-parse-float/radix" ]; + "safe" = [ "lexical-write-integer/safe" "lexical-write-float/safe" "lexical-parse-integer/safe" "lexical-parse-float/safe" ]; + "std" = [ "lexical-util/std" "lexical-write-integer/std" "lexical-write-float/std" "lexical-parse-integer/std" "lexical-parse-float/std" ]; + "write-floats" = [ "lexical-write-float" "write" "floats" ]; + "write-integers" = [ "lexical-write-integer" "write" "integers" ]; + }; + resolvedDefaultFeatures = [ "default" "floats" "format" "integers" "lexical-parse-float" "lexical-parse-integer" "lexical-write-float" "lexical-write-integer" "parse" "parse-floats" "parse-integers" "std" "write" "write-floats" "write-integers" ]; + }; + "lexical-parse-float" = rec { + crateName = "lexical-parse-float"; + version = "0.8.5"; + edition = "2018"; + sha256 = "0py0gp8hlzcrlvjqmqlpl2v1as65iiqxq2xsabxvhc01pmg3lfv8"; + authors = [ + "Alex Huszagh " + ]; + dependencies = [ + { + name = "lexical-parse-integer"; + packageId = "lexical-parse-integer"; + usesDefaultFeatures = false; + } + { + name = "lexical-util"; + packageId = "lexical-util"; + usesDefaultFeatures = false; + features = [ "parse-floats" ]; + } + { + name = "static_assertions"; + packageId = "static_assertions"; + } + ]; + features = { + "compact" = [ "lexical-util/compact" "lexical-parse-integer/compact" ]; + "default" = [ "std" ]; + "f128" = [ "lexical-util/f128" ]; + "f16" = [ "lexical-util/f16" ]; + "format" = [ "lexical-util/format" "lexical-parse-integer/format" ]; + "lint" = [ "lexical-util/lint" "lexical-parse-integer/lint" ]; + "nightly" = [ "lexical-parse-integer/nightly" ]; + "power-of-two" = [ "lexical-util/power-of-two" "lexical-parse-integer/power-of-two" ]; + "radix" = [ "lexical-util/radix" "lexical-parse-integer/radix" "power-of-two" ]; + "safe" = [ "lexical-parse-integer/safe" ]; + "std" = [ "lexical-util/std" "lexical-parse-integer/std" ]; + }; + resolvedDefaultFeatures = [ "format" "std" ]; + }; + "lexical-parse-integer" = rec { + crateName = "lexical-parse-integer"; + version = "0.8.6"; + edition = "2018"; + sha256 = "1sayji3mpvb2xsjq56qcq3whfz8px9a6fxk5v7v15hyhbr4982bd"; + authors = [ + "Alex Huszagh " + ]; + dependencies = [ + { + name = "lexical-util"; + packageId = "lexical-util"; + usesDefaultFeatures = false; + features = [ "parse-integers" ]; + } + { + name = "static_assertions"; + packageId = "static_assertions"; + } + ]; + features = { + "compact" = [ "lexical-util/compact" ]; + "default" = [ "std" ]; + "format" = [ "lexical-util/format" ]; + "lint" = [ "lexical-util/lint" ]; + "power-of-two" = [ "lexical-util/power-of-two" ]; + "radix" = [ "lexical-util/radix" "power-of-two" ]; + "std" = [ "lexical-util/std" ]; + }; + resolvedDefaultFeatures = [ "format" "std" ]; + }; + "lexical-util" = rec { + crateName = "lexical-util"; + version = "0.8.5"; + edition = "2018"; + sha256 = "1z73qkv7yxhsbc4aiginn1dqmsj8jarkrdlyxc88g2gz2vzvjmaj"; + authors = [ + "Alex Huszagh " + ]; + dependencies = [ + { + name = "static_assertions"; + packageId = "static_assertions"; + } + ]; + features = { + "default" = [ "std" ]; + "f128" = [ "floats" ]; + "f16" = [ "floats" ]; + "parse-floats" = [ "parse" "floats" ]; + "parse-integers" = [ "parse" "integers" ]; + "radix" = [ "power-of-two" ]; + "write-floats" = [ "write" "floats" ]; + "write-integers" = [ "write" "integers" ]; + }; + resolvedDefaultFeatures = [ "floats" "format" "integers" "parse" "parse-floats" "parse-integers" "std" "write" "write-floats" "write-integers" ]; + }; + "lexical-write-float" = rec { + crateName = "lexical-write-float"; + version = "0.8.5"; + edition = "2018"; + sha256 = "0qk825l0csvnksh9sywb51996cjc2bylq6rxjaiha7sqqjhvmjmc"; + authors = [ + "Alex Huszagh " + ]; + dependencies = [ + { + name = "lexical-util"; + packageId = "lexical-util"; + usesDefaultFeatures = false; + features = [ "write-floats" ]; + } + { + name = "lexical-write-integer"; + packageId = "lexical-write-integer"; + usesDefaultFeatures = false; + } + { + name = "static_assertions"; + packageId = "static_assertions"; + } + ]; + features = { + "compact" = [ "lexical-util/compact" "lexical-write-integer/compact" ]; + "default" = [ "std" ]; + "f128" = [ "lexical-util/f128" ]; + "f16" = [ "lexical-util/f16" ]; + "format" = [ "lexical-util/format" ]; + "lint" = [ "lexical-util/lint" "lexical-write-integer/lint" ]; + "nightly" = [ "lexical-write-integer/nightly" ]; + "power-of-two" = [ "lexical-util/power-of-two" "lexical-write-integer/power-of-two" ]; + "radix" = [ "lexical-util/radix" "lexical-write-integer/radix" "power-of-two" ]; + "safe" = [ "lexical-write-integer/safe" ]; + "std" = [ "lexical-util/std" "lexical-write-integer/std" ]; + }; + resolvedDefaultFeatures = [ "format" "std" ]; + }; + "lexical-write-integer" = rec { + crateName = "lexical-write-integer"; + version = "0.8.5"; + edition = "2018"; + sha256 = "0ii4hmvqrg6pd4j9y1pkhkp0nw2wpivjzmljh6v6ca22yk8z7dp1"; + authors = [ + "Alex Huszagh " + ]; + dependencies = [ + { + name = "lexical-util"; + packageId = "lexical-util"; + usesDefaultFeatures = false; + features = [ "write-integers" ]; + } + { + name = "static_assertions"; + packageId = "static_assertions"; + } + ]; + features = { + "compact" = [ "lexical-util/compact" ]; + "default" = [ "std" ]; + "format" = [ "lexical-util/format" ]; + "lint" = [ "lexical-util/lint" ]; + "power-of-two" = [ "lexical-util/power-of-two" ]; + "radix" = [ "lexical-util/radix" "power-of-two" ]; + "std" = [ "lexical-util/std" ]; + }; + resolvedDefaultFeatures = [ "format" "std" ]; + }; "libc" = rec { crateName = "libc"; version = "0.2.137"; @@ -5464,6 +5694,16 @@ rec { features = { }; resolvedDefaultFeatures = [ "all" ]; }; + "static_assertions" = rec { + crateName = "static_assertions"; + version = "1.1.0"; + edition = "2015"; + sha256 = "0gsl6xmw10gvn3zs1rv99laj5ig7ylffnh71f9l34js4nr4r7sx2"; + authors = [ + "Nikolai Vazquez" + ]; + features = { }; + }; "str-buf" = rec { crateName = "str-buf"; version = "1.0.6"; @@ -7176,6 +7416,15 @@ rec { packageId = "imbl"; features = [ "serde" ]; } + { + name = "lazy_static"; + packageId = "lazy_static"; + } + { + name = "lexical-core"; + packageId = "lexical-core"; + features = [ "format" "parse-floats" ]; + } { name = "path-clean"; packageId = "path-clean"; diff --git a/tvix/eval/Cargo.toml b/tvix/eval/Cargo.toml index 24e6d33d0d..d47ad8c397 100644 --- a/tvix/eval/Cargo.toml +++ b/tvix/eval/Cargo.toml @@ -15,6 +15,8 @@ codemap = "0.1.3" codemap-diagnostic = "0.1.1" dirs = "4.0.0" imbl = { version = "2.0", features = [ "serde" ] } +lazy_static = "1.4.0" +lexical-core = { version = "0.8.5", features = ["format", "parse-floats"] } path-clean = "0.1" proptest = { version = "1.0.0", default_features = false, features = ["std", "alloc", "break-dead-code", "tempfile"], optional = true } regex = "1.6.0" diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-fromjson.exp b/tvix/eval/src/tests/tvix_tests/eval-okay-fromjson.exp index c855950a30..24aa21d78f 100644 --- a/tvix/eval/src/tests/tvix_tests/eval-okay-fromjson.exp +++ b/tvix/eval/src/tests/tvix_tests/eval-okay-fromjson.exp @@ -1 +1 @@ -[ { Image = { Animated = false; Height = 600; IDs = [ 116 943 234 38793 true false null -100 ]; Latitude = 37.7668; Longitude = -122.3959; Thumbnail = { Height = 125; Url = "http://www.example.com/image/481989943"; Width = 100; }; Title = "View from 15th Floor"; Width = 800; }; } { name = "a"; value = "b"; } [ 1 2 3 4 ] ] +[ { Image = { Animated = false; Height = 600; IDs = [ 116 943 234 38793 true false null -100 ]; Latitude = 37.7668; Longitude = -122.396; Thumbnail = { Height = 125; Url = "http://www.example.com/image/481989943"; Width = 100; }; Title = "View from 15th Floor"; Width = 800; }; } { name = "a"; value = "b"; } [ 1 2 3 4 ] ] diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-fromjson.nix b/tvix/eval/src/tests/tvix_tests/eval-okay-fromjson.nix index e4f6213125..e530789446 100644 --- a/tvix/eval/src/tests/tvix_tests/eval-okay-fromjson.nix +++ b/tvix/eval/src/tests/tvix_tests/eval-okay-fromjson.nix @@ -15,7 +15,7 @@ "Animated" : false, "IDs": [116, 943, 234, 38793, true ,false,null, -100], "Latitude": 37.7668, - "Longitude": -122.3959 + "Longitude": -122.396 } } '') diff --git a/tvix/eval/src/value/mod.rs b/tvix/eval/src/value/mod.rs index 7a6bbcafb3..f9ada2b627 100644 --- a/tvix/eval/src/value/mod.rs +++ b/tvix/eval/src/value/mod.rs @@ -1,11 +1,13 @@ //! This module implements the backing representation of runtime //! values in the Nix language. use std::cmp::Ordering; +use std::num::{NonZeroI32, NonZeroUsize}; use std::ops::Deref; use std::path::PathBuf; use std::rc::Rc; use std::{cell::Ref, fmt::Display}; +use lexical_core::format::CXX_LITERAL; use serde::{Deserialize, Serialize}; #[cfg(feature = "arbitrary")] @@ -32,6 +34,8 @@ pub use thunk::Thunk; use self::thunk::ThunkSet; +use lazy_static::lazy_static; + #[warn(variant_size_differences)] #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(untagged)] @@ -72,6 +76,17 @@ pub enum Value { UnresolvedPath(PathBuf), } +lazy_static! { + static ref WRITE_FLOAT_OPTIONS: lexical_core::WriteFloatOptions = + lexical_core::WriteFloatOptionsBuilder::new() + .trim_floats(true) + .round_mode(lexical_core::write_float_options::RoundMode::Round) + .positive_exponent_break(Some(NonZeroI32::new(5).unwrap())) + .max_significant_digits(Some(NonZeroUsize::new(6).unwrap())) + .build() + .unwrap(); +} + // Helper macros to generate the to_*/as_* macros while accounting for // thunks. @@ -514,9 +529,78 @@ impl TotalDisplay for Value { Value::Builtin(builtin) => builtin.fmt(f), // Nix prints floats with a maximum precision of 5 digits - // only. + // only. Except when it decides to use scientific notation + // (with a + after the `e`, and zero-padded to 0 digits) Value::Float(num) => { - write!(f, "{}", format!("{:.5}", num).trim_end_matches(['.', '0'])) + let mut buf = [b'0'; lexical_core::BUFFER_SIZE]; + let mut s = lexical_core::write_with_options::( + num.clone(), + &mut buf, + &WRITE_FLOAT_OPTIONS, + ); + + // apply some postprocessing on the buffer. If scientific + // notation is used (we see an `e`), and the next character is + // a digit, add the missing `+` sign.) + let mut new_s = Vec::with_capacity(s.len()); + + if s.contains(&b'e') { + for (i, c) in s.iter().enumerate() { + // encountered `e` + if c == &b'e' { + // next character is a digit (so no negative exponent) + if s.len() > i && s[i + 1].is_ascii_digit() { + // copy everything from the start up to (including) the e + new_s.extend_from_slice(&s[0..=i]); + // add the missing '+' + new_s.push(b'+'); + // check for the remaining characters. + // If it's only one, we need to prepend a trailing zero + if s.len() == i + 2 { + new_s.push(b'0'); + } + new_s.extend_from_slice(&s[i + 1..]); + break; + } + } + } + + // if we modified the scientific notation, flip the reference + if new_s.len() != 0 { + s = &mut new_s + } + } + // else, if this is not scientific notation, and there's a + // decimal point, make sure we really drop trailing zeroes. + // In some cases, lexical_core doesn't. + else if s.contains(&b'.') { + for (i, c) in s.iter().enumerate() { + // at `.`` + if c == &b'.' { + // trim zeroes from the right side. + let frac = String::from_utf8_lossy(&s[i + 1..]); + let frac_no_trailing_zeroes = frac.trim_end_matches("0"); + + if frac.len() != frac_no_trailing_zeroes.len() { + // we managed to strip something, construct new_s + if frac_no_trailing_zeroes.is_empty() { + // if frac_no_trailing_zeroes is empty, the fractional part was all zeroes, so we can drop the decimal point as well + new_s.extend_from_slice(&s[0..=i - 1]); + } else { + // else, assemble the rest of the string + new_s.extend_from_slice(&s[0..=i]); + new_s.extend_from_slice(frac_no_trailing_zeroes.as_bytes()); + } + + // flip the reference + s = &mut new_s; + break; + } + } + } + } + + write!(f, "{}", format!("{}", String::from_utf8_lossy(&s))) } // internal types -- cgit 1.4.1