about summary refs log tree commit diff
diff options
context:
space:
mode:
authorPadraic-O-Mhuiris <patrick.morris.310@gmail.com>2024-02-22T15·33+0000
committerPádraic Ó Mhuiris <patrick.morris.310@gmail.com>2024-02-29T10·58+0000
commite0ffb4d47a9fb52c8ea3b0d8b1d002606b0bc4ee (patch)
tree3b739852313336e8c77f7fca7491ac9fc996ee3f
parent5acaa672acbfdf0d9355435a2acce07f3f07d37a (diff)
feat(tvix/eval): implement `builtins.hashFile` r/7625
The primary addition is the `builtin_hashFile` function which attempts
value-path coercion and on success reads the file to a nix_string.
Similar to the `builtins_hashString` implementation, the hash is then
derived using the `hash_nix_string` function in the introduced hash
module which has extracted hashing functionality initially specified in
cl/11005.

Change-Id: I5e3ef0317223af99488ebb79efc2fb49b4cbc1b6
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11007
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
-rw-r--r--tvix/eval/docs/builtins.md2
-rw-r--r--tvix/eval/src/builtins/hash.rs25
-rw-r--r--tvix/eval/src/builtins/impure.rs13
-rw-r--r--tvix/eval/src/builtins/mod.rs25
-rw-r--r--tvix/eval/src/tests/nix_tests/eval-okay-hashfile.exp (renamed from tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-hashfile.exp)0
-rw-r--r--tvix/eval/src/tests/nix_tests/eval-okay-hashfile.nix (renamed from tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-hashfile.nix)2
6 files changed, 42 insertions, 25 deletions
diff --git a/tvix/eval/docs/builtins.md b/tvix/eval/docs/builtins.md
index eff761c7057d..26bbd1b2d344 100644
--- a/tvix/eval/docs/builtins.md
+++ b/tvix/eval/docs/builtins.md
@@ -65,7 +65,7 @@ The `impl` column indicates implementation status in tvix:
 | getEnv                        | false  |       | false |         |
 | hasAttr                       | false  |       |       |         |
 | hasContext                    | false  |       |       |         |
-| hashFile                      | false  |       | false | todo    |
+| hashFile                      | false  |       | false |         |
 | hashString                    | false  |       |       |         |
 | head                          | false  |       |       |         |
 | import                        | true   |       |       |         |
diff --git a/tvix/eval/src/builtins/hash.rs b/tvix/eval/src/builtins/hash.rs
new file mode 100644
index 000000000000..6d07fc9b2dc8
--- /dev/null
+++ b/tvix/eval/src/builtins/hash.rs
@@ -0,0 +1,25 @@
+use bstr::ByteSlice;
+use data_encoding::HEXLOWER;
+use md5::Md5;
+use sha1::Sha1;
+use sha2::{digest::Output, Digest, Sha256, Sha512};
+
+use crate::ErrorKind;
+
+fn hash<D: Digest>(b: &[u8]) -> Output<D> {
+    let mut hasher = D::new();
+    hasher.update(b);
+    hasher.finalize()
+}
+
+pub fn hash_nix_string(algo: impl AsRef<[u8]>, s: impl AsRef<[u8]>) -> Result<String, ErrorKind> {
+    match algo.as_ref() {
+        b"md5" => Ok(HEXLOWER.encode(hash::<Md5>(s.as_ref()).as_bstr())),
+        b"sha1" => Ok(HEXLOWER.encode(hash::<Sha1>(s.as_ref()).as_bstr())),
+        b"sha256" => Ok(HEXLOWER.encode(hash::<Sha256>(s.as_ref()).as_bstr())),
+        b"sha512" => Ok(HEXLOWER.encode(hash::<Sha512>(s.as_ref()).as_bstr())),
+        _ => Err(ErrorKind::UnknownHashType(
+            algo.as_ref().as_bstr().to_string(),
+        )),
+    }
+}
diff --git a/tvix/eval/src/builtins/impure.rs b/tvix/eval/src/builtins/impure.rs
index def6ce29094c..aad55c7331e8 100644
--- a/tvix/eval/src/builtins/impure.rs
+++ b/tvix/eval/src/builtins/impure.rs
@@ -21,7 +21,7 @@ mod impure_builtins {
     use std::os::unix::ffi::OsStrExt;
 
     use super::*;
-    use crate::builtins::coerce_value_to_path;
+    use crate::builtins::{coerce_value_to_path, hash::hash_nix_string};
 
     #[builtin("getEnv")]
     async fn builtin_get_env(co: GenCo, var: Value) -> Result<Value, ErrorKind> {
@@ -30,6 +30,17 @@ mod impure_builtins {
             .into())
     }
 
+    #[builtin("hashFile")]
+    #[allow(non_snake_case)]
+    async fn builtin_hashFile(co: GenCo, algo: Value, path: Value) -> Result<Value, ErrorKind> {
+        let path = match coerce_value_to_path(&co, path).await? {
+            Err(cek) => return Ok(Value::from(cek)),
+            Ok(p) => p,
+        };
+        let s = generators::request_read_to_string(&co, path).await;
+        hash_nix_string(algo.to_str()?, s.to_str()?).map(Value::from)
+    }
+
     #[builtin("pathExists")]
     async fn builtin_path_exists(co: GenCo, path: Value) -> Result<Value, ErrorKind> {
         match coerce_value_to_path(&co, path).await? {
diff --git a/tvix/eval/src/builtins/mod.rs b/tvix/eval/src/builtins/mod.rs
index 119c0bda2dc3..0e0890f7cd28 100644
--- a/tvix/eval/src/builtins/mod.rs
+++ b/tvix/eval/src/builtins/mod.rs
@@ -5,14 +5,9 @@
 
 use bstr::{ByteSlice, ByteVec};
 use builtin_macros::builtins;
-use data_encoding::HEXLOWER;
 use genawaiter::rc::Gen;
 use imbl::OrdMap;
-use md5::Md5;
 use regex::Regex;
-use sha1::Sha1;
-use sha2::digest::Output;
-use sha2::{Digest, Sha256, Sha512};
 use std::cmp::{self, Ordering};
 use std::collections::VecDeque;
 use std::collections::{BTreeMap, HashSet};
@@ -24,12 +19,14 @@ use crate::vm::generators::{self, GenCo};
 use crate::warnings::WarningKind;
 use crate::{
     self as tvix_eval,
+    builtins::hash::hash_nix_string,
     errors::{CatchableErrorKind, ErrorKind},
     value::{CoercionKind, NixAttrs, NixList, NixString, Thunk, Value},
 };
 
 use self::versions::{VersionPart, VersionPartsIter};
 
+mod hash;
 mod to_xml;
 mod versions;
 
@@ -692,23 +689,7 @@ mod pure_builtins {
     #[builtin("hashString")]
     #[allow(non_snake_case)]
     async fn builtin_hashString(co: GenCo, algo: Value, s: Value) -> Result<Value, ErrorKind> {
-        fn hash<D: Digest>(b: &[u8]) -> Output<D> {
-            let mut hasher = D::new();
-            hasher.update(b);
-            hasher.finalize()
-        }
-
-        let s = s.to_str()?;
-
-        let encoded_hash = match algo.to_str()?.as_bytes() {
-            b"md5" => HEXLOWER.encode(hash::<Md5>(&s).as_bstr()),
-            b"sha1" => HEXLOWER.encode(hash::<Sha1>(&s).as_bstr()),
-            b"sha256" => HEXLOWER.encode(hash::<Sha256>(&s).as_bstr()),
-            b"sha512" => HEXLOWER.encode(hash::<Sha512>(&s).as_bstr()),
-            _ => return Err(ErrorKind::UnknownHashType(s.into())),
-        };
-
-        Ok(Value::from(encoded_hash))
+        hash_nix_string(algo.to_str()?, s.to_str()?).map(Value::from)
     }
 
     #[builtin("head")]
diff --git a/tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-hashfile.exp b/tvix/eval/src/tests/nix_tests/eval-okay-hashfile.exp
index ff1e8293ef22..ff1e8293ef22 100644
--- a/tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-hashfile.exp
+++ b/tvix/eval/src/tests/nix_tests/eval-okay-hashfile.exp
diff --git a/tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-hashfile.nix b/tvix/eval/src/tests/nix_tests/eval-okay-hashfile.nix
index 8c9de66b7ecf..aff5a1856814 100644
--- a/tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-hashfile.nix
+++ b/tvix/eval/src/tests/nix_tests/eval-okay-hashfile.nix
@@ -1,4 +1,4 @@
 let
-  paths = [ ./../data ./../binary-data ];
+  paths = [ ./data ./binary-data ];
 in
   builtins.concatLists (map (hash: map (builtins.hashFile hash) paths) ["md5" "sha1" "sha256" "sha512"])