about summary refs log tree commit diff
path: root/tvix/eval
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2023-01-24T18·27+0100
committerclbot <clbot@tvl.fyi>2023-01-25T07·49+0000
commit1facd889bba724cf20ea14422ee1e57440b3e761 (patch)
tree114d353331e7387bdb9955f767e5982eeb4fb9ca /tvix/eval
parent192dac5a749edece1b5b3fb0b8acb92819df22e0 (diff)
feat(tvix/eval): use lexical-core to format float r/5753
Apparently our naive implementation of float formatting, which simply
used {:.5}, and trimmed trailing "0" strings not sufficient.

It wrongly trimmed numbers with zeroes but no decimal point, like
`10000` got trimmed to `1`.

Nix uses `std::to_string` on the double, which according to
https://en.cppreference.com/w/cpp/string/basic_string/to_string
is equivalent to `std::sprintf(buf, "%f", value)`.

https://en.cppreference.com/w/cpp/io/c/fprintf mentions this is treated
like this:

> Precision specifies the exact number of digits to appear after
> the decimal point character. The default precision is 6. In the
> alternative implementation decimal point character is written even if
> no digits follow it. For infinity and not-a-number conversion style
> see notes.

This doesn't seem to be the case though, and Nix uses scientific
notation in some cases.

There's a whole bunch of strategies to determine which is a more compact
notation, and which notation should be used for a given number.
https://github.com/rust-lang/rust/issues/24556 provides some pointers
into various rabbit holes for those interested.

This gist seems to be that currently a different formatting is not
exposed in rust directly, at least not for public consumption.

There is the
[lexical-core](https://github.com/Alexhuszagh/rust-lexical) crate
though, which provides a way to format floats with various strategies
and formats.

Change our implementation of `TotalDisplay` for the `Value::Float` case
to use that. We still need to do some post-processing, because Nix
always adds the sign in scientific notation (and there's no way to
configure lexical-core to do that), and lexical-core in some cases keeps
the trailing zeros.

Even with all that in place, there as a difference in `eval-okay-
fromjson.nix` (from tvix-tests), which I couldn't get to work. I updated
the fixture to a less problematic number.

With this, the testsuite passes again, and does for the upcoming CL
introducing builtins.fromTOML, and enabling the nix testsuite bits for
it, too.

Change-Id: Ie6fba5619e1d9fd7ce669a51594658b029057acc
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7922
Tested-by: BuildkiteCI
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: tazjin <tazjin@tvl.su>
Diffstat (limited to 'tvix/eval')
-rw-r--r--tvix/eval/Cargo.toml2
-rw-r--r--tvix/eval/src/tests/tvix_tests/eval-okay-fromjson.exp2
-rw-r--r--tvix/eval/src/tests/tvix_tests/eval-okay-fromjson.nix2
-rw-r--r--tvix/eval/src/value/mod.rs88
4 files changed, 90 insertions, 4 deletions
diff --git a/tvix/eval/Cargo.toml b/tvix/eval/Cargo.toml
index 24e6d33d0d..d47ad8c397 100644
--- a/tvix/eval/Cargo.toml
+++ b/tvix/eval/Cargo.toml
@@ -15,6 +15,8 @@ codemap = "0.1.3"
 codemap-diagnostic = "0.1.1"
 dirs = "4.0.0"
 imbl = { version = "2.0", features = [ "serde" ] }
+lazy_static = "1.4.0"
+lexical-core = { version = "0.8.5", features = ["format", "parse-floats"] }
 path-clean = "0.1"
 proptest = { version = "1.0.0", default_features = false, features = ["std", "alloc", "break-dead-code", "tempfile"], optional = true }
 regex = "1.6.0"
diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-fromjson.exp b/tvix/eval/src/tests/tvix_tests/eval-okay-fromjson.exp
index c855950a30..24aa21d78f 100644
--- a/tvix/eval/src/tests/tvix_tests/eval-okay-fromjson.exp
+++ b/tvix/eval/src/tests/tvix_tests/eval-okay-fromjson.exp
@@ -1 +1 @@
-[ { Image = { Animated = false; Height = 600; IDs = [ 116 943 234 38793 true false null -100 ]; Latitude = 37.7668; Longitude = -122.3959; Thumbnail = { Height = 125; Url = "http://www.example.com/image/481989943"; Width = 100; }; Title = "View from 15th Floor"; Width = 800; }; } { name = "a"; value = "b"; } [ 1 2 3 4 ] ]
+[ { Image = { Animated = false; Height = 600; IDs = [ 116 943 234 38793 true false null -100 ]; Latitude = 37.7668; Longitude = -122.396; Thumbnail = { Height = 125; Url = "http://www.example.com/image/481989943"; Width = 100; }; Title = "View from 15th Floor"; Width = 800; }; } { name = "a"; value = "b"; } [ 1 2 3 4 ] ]
diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-fromjson.nix b/tvix/eval/src/tests/tvix_tests/eval-okay-fromjson.nix
index e4f6213125..e530789446 100644
--- a/tvix/eval/src/tests/tvix_tests/eval-okay-fromjson.nix
+++ b/tvix/eval/src/tests/tvix_tests/eval-okay-fromjson.nix
@@ -15,7 +15,7 @@
           "Animated" : false,
           "IDs": [116, 943, 234, 38793, true  ,false,null, -100],
           "Latitude":  37.7668,
-          "Longitude": -122.3959
+          "Longitude": -122.396
       }
     }
   '')
diff --git a/tvix/eval/src/value/mod.rs b/tvix/eval/src/value/mod.rs
index 7a6bbcafb3..f9ada2b627 100644
--- a/tvix/eval/src/value/mod.rs
+++ b/tvix/eval/src/value/mod.rs
@@ -1,11 +1,13 @@
 //! This module implements the backing representation of runtime
 //! values in the Nix language.
 use std::cmp::Ordering;
+use std::num::{NonZeroI32, NonZeroUsize};
 use std::ops::Deref;
 use std::path::PathBuf;
 use std::rc::Rc;
 use std::{cell::Ref, fmt::Display};
 
+use lexical_core::format::CXX_LITERAL;
 use serde::{Deserialize, Serialize};
 
 #[cfg(feature = "arbitrary")]
@@ -32,6 +34,8 @@ pub use thunk::Thunk;
 
 use self::thunk::ThunkSet;
 
+use lazy_static::lazy_static;
+
 #[warn(variant_size_differences)]
 #[derive(Clone, Debug, Serialize, Deserialize)]
 #[serde(untagged)]
@@ -72,6 +76,17 @@ pub enum Value {
     UnresolvedPath(PathBuf),
 }
 
+lazy_static! {
+    static ref WRITE_FLOAT_OPTIONS: lexical_core::WriteFloatOptions =
+        lexical_core::WriteFloatOptionsBuilder::new()
+            .trim_floats(true)
+            .round_mode(lexical_core::write_float_options::RoundMode::Round)
+            .positive_exponent_break(Some(NonZeroI32::new(5).unwrap()))
+            .max_significant_digits(Some(NonZeroUsize::new(6).unwrap()))
+            .build()
+            .unwrap();
+}
+
 // Helper macros to generate the to_*/as_* macros while accounting for
 // thunks.
 
@@ -514,9 +529,78 @@ impl TotalDisplay for Value {
             Value::Builtin(builtin) => builtin.fmt(f),
 
             // Nix prints floats with a maximum precision of 5 digits
-            // only.
+            // only. Except when it decides to use scientific notation
+            // (with a + after the `e`, and zero-padded to 0 digits)
             Value::Float(num) => {
-                write!(f, "{}", format!("{:.5}", num).trim_end_matches(['.', '0']))
+                let mut buf = [b'0'; lexical_core::BUFFER_SIZE];
+                let mut s = lexical_core::write_with_options::<f64, { CXX_LITERAL }>(
+                    num.clone(),
+                    &mut buf,
+                    &WRITE_FLOAT_OPTIONS,
+                );
+
+                // apply some postprocessing on the buffer. If scientific
+                // notation is used (we see an `e`), and the next character is
+                // a digit, add the missing `+` sign.)
+                let mut new_s = Vec::with_capacity(s.len());
+
+                if s.contains(&b'e') {
+                    for (i, c) in s.iter().enumerate() {
+                        // encountered `e`
+                        if c == &b'e' {
+                            // next character is a digit (so no negative exponent)
+                            if s.len() > i && s[i + 1].is_ascii_digit() {
+                                // copy everything from the start up to (including) the e
+                                new_s.extend_from_slice(&s[0..=i]);
+                                // add the missing '+'
+                                new_s.push(b'+');
+                                // check for the remaining characters.
+                                // If it's only one, we need to prepend a trailing zero
+                                if s.len() == i + 2 {
+                                    new_s.push(b'0');
+                                }
+                                new_s.extend_from_slice(&s[i + 1..]);
+                                break;
+                            }
+                        }
+                    }
+
+                    // if we modified the scientific notation, flip the reference
+                    if new_s.len() != 0 {
+                        s = &mut new_s
+                    }
+                }
+                // else, if this is not scientific notation, and there's a
+                // decimal point, make sure we really drop trailing zeroes.
+                // In some cases, lexical_core doesn't.
+                else if s.contains(&b'.') {
+                    for (i, c) in s.iter().enumerate() {
+                        // at `.``
+                        if c == &b'.' {
+                            // trim zeroes from the right side.
+                            let frac = String::from_utf8_lossy(&s[i + 1..]);
+                            let frac_no_trailing_zeroes = frac.trim_end_matches("0");
+
+                            if frac.len() != frac_no_trailing_zeroes.len() {
+                                // we managed to strip something, construct new_s
+                                if frac_no_trailing_zeroes.is_empty() {
+                                    // if frac_no_trailing_zeroes is empty, the fractional part was all zeroes, so we can drop the decimal point as well
+                                    new_s.extend_from_slice(&s[0..=i - 1]);
+                                } else {
+                                    // else, assemble the rest of the string
+                                    new_s.extend_from_slice(&s[0..=i]);
+                                    new_s.extend_from_slice(frac_no_trailing_zeroes.as_bytes());
+                                }
+
+                                // flip the reference
+                                s = &mut new_s;
+                                break;
+                            }
+                        }
+                    }
+                }
+
+                write!(f, "{}", format!("{}", String::from_utf8_lossy(&s)))
             }
 
             // internal types