From 8018313b6880d9fae71ba189a476502b68a26d25 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Sat, 9 Dec 2023 18:45:39 +0200 Subject: feat(tvix/glue): add nixpkgs eval benchmark This adds a criterion.rs-based testbench into tvix-glue. It can be invoked by running `cargo bench` from inside the `tvix-glue` crate. `target/criterion/report/index.html` contains nice graphs. It's able to diff against the previous run, so you can invoke `cargo bench` before and after a certain change to reason about the impact in evaluation performance. Currently, we need to create a bunch of Evaluator resources inside the benchmark loop itself, which is a bit annoying, as it leaks into the things we benchmark. This should become better with b/262. Fixes b/322. Change-Id: I91656a308887baa1d459ed54d58baae919a4aaf2 Reviewed-on: https://cl.tvl.fyi/c/depot/+/10245 Autosubmit: flokli Tested-by: BuildkiteCI Reviewed-by: tazjin --- tvix/Cargo.lock | 1 + tvix/Cargo.nix | 5 ++++ tvix/glue/Cargo.toml | 5 ++++ tvix/glue/benches/eval.rs | 68 +++++++++++++++++++++++++++++++++++++++++++++++ tvix/shell.nix | 9 +++++++ 5 files changed, 88 insertions(+) create mode 100644 tvix/glue/benches/eval.rs (limited to 'tvix') diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock index ce6f10bb7695..1df6d199a541 100644 --- a/tvix/Cargo.lock +++ b/tvix/Cargo.lock @@ -3161,6 +3161,7 @@ name = "tvix-glue" version = "0.1.0" dependencies = [ "bytes", + "criterion", "lazy_static", "nix-compat", "test-case", diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix index 5e4b54d9d1f1..91709258ef55 100644 --- a/tvix/Cargo.nix +++ b/tvix/Cargo.nix @@ -9840,6 +9840,11 @@ rec { } ]; devDependencies = [ + { + name = "criterion"; + packageId = "criterion"; + features = [ "html_reports" ]; + } { name = "lazy_static"; packageId = "lazy_static"; diff --git a/tvix/glue/Cargo.toml b/tvix/glue/Cargo.toml index 4469c3bab3a0..90ad2752691d 100644 --- a/tvix/glue/Cargo.toml +++ b/tvix/glue/Cargo.toml @@ -18,5 +18,10 @@ thiserror = "1.0.38" git = "https://github.com/tvlfyi/wu-manber.git" [dev-dependencies] +criterion = { version = "0.5", features = ["html_reports"] } lazy_static = "1.4.0" test-case = "2.2.2" + +[[bench]] +name = "eval" +harness = false diff --git a/tvix/glue/benches/eval.rs b/tvix/glue/benches/eval.rs new file mode 100644 index 000000000000..4aa9b3e5c298 --- /dev/null +++ b/tvix/glue/benches/eval.rs @@ -0,0 +1,68 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use lazy_static::lazy_static; +use std::{cell::RefCell, env, rc::Rc, sync::Arc, time::Duration}; +use tvix_castore::{ + blobservice::{BlobService, MemoryBlobService}, + directoryservice::{DirectoryService, MemoryDirectoryService}, +}; +use tvix_glue::{ + builtins::add_derivation_builtins, configure_nix_path, known_paths::KnownPaths, + tvix_store_io::TvixStoreIO, +}; +use tvix_store::pathinfoservice::{MemoryPathInfoService, PathInfoService}; + +lazy_static! { + static ref BLOB_SERVICE: Arc = Arc::new(MemoryBlobService::default()); + static ref DIRECTORY_SERVICE: Arc = + Arc::new(MemoryDirectoryService::default()); + static ref PATH_INFO_SERVICE: Arc = Arc::new(MemoryPathInfoService::new( + BLOB_SERVICE.clone(), + DIRECTORY_SERVICE.clone(), + )); + static ref TOKIO_RUNTIME: tokio::runtime::Runtime = tokio::runtime::Runtime::new().unwrap(); +} + +fn interpret(code: &str) { + // TODO: this is a bit annoying. + // It'd be nice if we could set this up once and then run evaluate() with a + // piece of code. b/262 + let mut eval = tvix_eval::Evaluation::new_impure(code, None); + + let known_paths: Rc> = Default::default(); + add_derivation_builtins(&mut eval, known_paths.clone()); + configure_nix_path( + &mut eval, + // The benchmark requires TVIX_BENCH_NIX_PATH to be set, so barf out + // early, rather than benchmarking tvix returning an error. + &Some(env::var("TVIX_BENCH_NIX_PATH").expect("TVIX_BENCH_NIX_PATH must be set")), + ); + + eval.io_handle = Box::new(tvix_glue::tvix_io::TvixIO::new( + known_paths.clone(), + TvixStoreIO::new( + BLOB_SERVICE.clone(), + DIRECTORY_SERVICE.clone(), + PATH_INFO_SERVICE.clone(), + TOKIO_RUNTIME.handle().clone(), + ), + )); + + let result = eval.evaluate(); + + assert!(result.errors.is_empty()); +} + +fn eval_nixpkgs(c: &mut Criterion) { + c.bench_function("hello outpath", |b| { + b.iter(|| { + interpret(black_box("(import {}).hello.outPath")); + }) + }); +} + +criterion_group!( + name = benches; + config = Criterion::default().measurement_time(Duration::from_secs(30)).sample_size(10); + targets = eval_nixpkgs +); +criterion_main!(benches); diff --git a/tvix/shell.nix b/tvix/shell.nix index 4859b2b1aa2a..9a14554ac6dc 100644 --- a/tvix/shell.nix +++ b/tvix/shell.nix @@ -35,4 +35,13 @@ pkgs.mkShell { pkgs.libiconv pkgs.buildPackages.darwin.apple_sdk.frameworks.Security ]; + + # Set TVIX_BENCH_NIX_PATH to a somewhat pinned nixpkgs path. + # This is for invoking `cargo bench` imperatively on the developer machine. + # For tvix benchmarking across longer periods of time (by CI), we probably + # should also benchmark with a more static nixpkgs checkout, so nixpkgs + # refactorings are not observed as eval perf changes. + shellHook = '' + export TVIX_BENCH_NIX_PATH=nixpkgs=${pkgs.path} + ''; } -- cgit 1.4.1