From 2414c872821ab7ad8c9ff8dca5a91433ffb307f3 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Fri, 27 Sep 2024 14:13:21 +0200 Subject: feat(tvix/build), add OciBuildService, the old way This is just patchset 10 of CL10855, before the color_eyre changes, rebased to the tvix_castore api. Change-Id: If4b42412ff8568058908cda971ad7d6f2d9f9b7b --- This provides a build service invoking runc. It can be used by using the `oci://$path_to_some_tempdir` builder URL for now. For now, it can be tested as such: ``` BUILD_SERVICE_ADDR=oci://$PWD/bundles target/debug/tvix let pkgs = (import {}); in builtins.readDir pkgs.perl ``` readDir is to actually trigger IO into the store path (which triggers the builds). For now it fails due to missing reference scanning (see followup CLs). Change-Id: I09b40e410114ce69966a41a0e3c33281b859e443 Reviewed-on: https://cl.tvl.fyi/c/depot/+/12526 Autosubmit: yuka Tested-by: BuildkiteCI Reviewed-by: flokli --- tvix/build/src/oci/bundle.rs | 133 ++++++++++++++++++ tvix/build/src/oci/mod.rs | 13 ++ tvix/build/src/oci/spec.rs | 319 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 465 insertions(+) create mode 100644 tvix/build/src/oci/bundle.rs create mode 100644 tvix/build/src/oci/mod.rs create mode 100644 tvix/build/src/oci/spec.rs (limited to 'tvix/build/src/oci') diff --git a/tvix/build/src/oci/bundle.rs b/tvix/build/src/oci/bundle.rs new file mode 100644 index 000000000000..c3c2e83e89e5 --- /dev/null +++ b/tvix/build/src/oci/bundle.rs @@ -0,0 +1,133 @@ +//! Module to create an OCI runtime bundle for a given [BuildRequest]. +use std::{ + fs, + path::{Path, PathBuf}, +}; + +use super::scratch_name; +use crate::proto::BuildRequest; +use anyhow::{bail, Context}; +use tracing::{debug, instrument}; + +/// Produce an OCI bundle in a given path. +/// Check [make_spec] for a description about the paths produced. +#[instrument(err)] +pub(crate) fn make_bundle<'a>( + request: &BuildRequest, + runtime_spec: &oci_spec::runtime::Spec, + path: &Path, +) -> anyhow::Result<()> { + fs::create_dir_all(path).context("failed to create bundle path")?; + + let spec_json = serde_json::to_string(runtime_spec).context("failed to render spec to json")?; + fs::write(path.join("config.json"), spec_json).context("failed to write config.json")?; + + fs::create_dir_all(path.join("inputs")).context("failed to create inputs dir")?; + + let root_path = path.join("root"); + + fs::create_dir_all(&root_path).context("failed to create root path dir")?; + fs::create_dir_all(root_path.join("etc")).context("failed to create root/etc dir")?; + + // TODO: populate /etc/{group,passwd}. It's a mess? + + let scratch_root = path.join("scratch"); + fs::create_dir_all(&scratch_root).context("failed to create scratch/ dir")?; + + // for each scratch path, calculate its name inside scratch, and ensure the + // directory exists. + for p in request.scratch_paths.iter() { + let scratch_path = scratch_root.join(scratch_name(p)); + debug!(scratch_path=?scratch_path, path=?p, "about to create scratch dir"); + fs::create_dir_all(scratch_path).context("Unable to create scratch dir")?; + } + + Ok(()) +} + +/// Determine the path of all outputs specified in a [BuildRequest] +/// as seen from the host, for post-build ingestion. +/// This lookup needs to take scratch paths into consideration, as the build +/// root is not writable on its own. +/// If a path can't be determined, an error is returned. +pub(crate) fn get_host_output_paths( + request: &BuildRequest, + bundle_path: &Path, +) -> anyhow::Result> { + let scratch_root = bundle_path.join("scratch"); + + let mut host_output_paths: Vec = Vec::with_capacity(request.outputs.len()); + + for output_path in request.outputs.iter() { + // calculate the location of the path. + if let Some((mp, relpath)) = + find_path_in_scratchs(output_path, request.scratch_paths.as_slice()) + { + host_output_paths.push(scratch_root.join(scratch_name(mp)).join(relpath)); + } else { + bail!("unable to find path {}", output_path); + } + } + + Ok(host_output_paths) +} + +/// For a given list of mountpoints (sorted) and a search_path, find the +/// specific mountpoint parenting that search_path and return it, as well as the +/// relative path from there to the search_path. +/// mountpoints must be sorted, so we can iterate over the list from the back +/// and match on the prefix. +fn find_path_in_scratchs<'a, 'b>( + search_path: &'a str, + mountpoints: &'b [String], +) -> Option<(&'b str, &'a str)> { + mountpoints.iter().rev().find_map(|mp| { + Some(( + mp.as_str(), + search_path.strip_prefix(mp)?.strip_prefix('/')?, + )) + }) +} + +#[cfg(test)] +mod tests { + use std::path::{Path, PathBuf}; + + use rstest::rstest; + + use crate::{oci::scratch_name, proto::BuildRequest}; + + use super::{find_path_in_scratchs, get_host_output_paths}; + + #[rstest] + #[case::simple("nix/store/aaaa", &["nix/store".into()], Some(("nix/store", "aaaa")))] + #[case::prefix_no_sep("nix/store/aaaa", &["nix/sto".into()], None)] + #[case::not_found("nix/store/aaaa", &["build".into()], None)] + fn test_test_find_path_in_scratchs( + #[case] search_path: &str, + #[case] mountpoints: &[String], + #[case] expected: Option<(&str, &str)>, + ) { + assert_eq!(find_path_in_scratchs(search_path, mountpoints), expected); + } + + #[test] + fn test_get_host_output_paths_simple() { + let request = BuildRequest { + outputs: vec!["nix/store/fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo".into()], + scratch_paths: vec!["build".into(), "nix/store".into()], + ..Default::default() + }; + + let paths = + get_host_output_paths(&request, Path::new("bundle-root")).expect("must succeed"); + + let mut expected_path = PathBuf::new(); + expected_path.push("bundle-root"); + expected_path.push("scratch"); + expected_path.push(scratch_name("nix/store")); + expected_path.push("fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo"); + + assert_eq!(vec![expected_path], paths) + } +} diff --git a/tvix/build/src/oci/mod.rs b/tvix/build/src/oci/mod.rs new file mode 100644 index 000000000000..26dab3059a58 --- /dev/null +++ b/tvix/build/src/oci/mod.rs @@ -0,0 +1,13 @@ +mod bundle; +mod spec; + +pub(crate) use bundle::get_host_output_paths; +pub(crate) use bundle::make_bundle; +pub(crate) use spec::make_spec; + +/// For a given scratch path, return the scratch_name that's allocated. +// We currently use use lower hex encoding of the b3 digest of the scratch +// path, so we don't need to globally allocate and pass down some uuids. +pub(crate) fn scratch_name(scratch_path: &str) -> String { + data_encoding::BASE32.encode(blake3::hash(scratch_path.as_bytes()).as_bytes()) +} diff --git a/tvix/build/src/oci/spec.rs b/tvix/build/src/oci/spec.rs new file mode 100644 index 000000000000..d804aa1171c1 --- /dev/null +++ b/tvix/build/src/oci/spec.rs @@ -0,0 +1,319 @@ +//! Module to create a OCI runtime spec for a given [BuildRequest]. +use crate::proto::BuildRequest; +use oci_spec::{ + runtime::{Capability, LinuxNamespace, LinuxNamespaceBuilder, LinuxNamespaceType}, + OciSpecError, +}; +use std::{collections::HashSet, path::Path}; +use tvix_castore::proto as castorepb; + +use super::scratch_name; + +/// For a given [BuildRequest], return an OCI runtime spec. +/// +/// While there's no IO occuring in this function, the generated spec contains +/// path references relative to the "bundle location". +/// Due to overlayfs requiring its layers to be absolute paths, we also need a +/// [bundle_dir] parameter, pointing to the location of the bundle dir itself. +/// +/// The paths used in the spec are the following (relative to a "bundle root"): +/// +/// - `inputs`, a directory where the castore nodes specified the build request +/// inputs are supposed to be populated. +/// - `outputs`, a directory where all writes to the store_dir during the build +/// are directed to. +/// - `root`, a minimal skeleton of files that'll be present at /. +/// - `scratch`, a directory containing other directories which will be +/// bind-mounted read-write into the container and used as scratch space +/// during the build. +/// No assumptions should be made about what's inside this directory. +/// +/// Generating these paths, and populating contents, like a skeleton root +/// is up to another function, this function doesn't do filesystem IO. +pub(crate) fn make_spec( + request: &BuildRequest, + rootless: bool, + sandbox_shell: &str, +) -> Result { + // TODO: add BuildRequest validations. BuildRequest must contain strings as inputs + + let allow_network = request + .constraints + .as_ref() + .is_some_and(|c| c.network_access); + + // Assemble ro_host_mounts. Start with constraints.available_ro_paths. + let mut ro_host_mounts = request + .constraints + .as_ref() + .map(|constraints| { + constraints + .available_ro_paths + .iter() + .map(|e| (e.as_str(), e.as_str())) + .collect::>() + }) + .unwrap_or_default(); + + // If provide_bin_sh is set, mount sandbox_shell to /bin/sh + if request + .constraints + .as_ref() + .is_some_and(|c| c.provide_bin_sh) + { + ro_host_mounts.push((sandbox_shell, "/bin/sh")) + } + + oci_spec::runtime::SpecBuilder::default() + .process(configure_process( + &request.command_args, + &request.working_dir, + request + .environment_vars + .iter() + .map(|e| { + ( + e.key.as_str(), + // TODO: decide what to do with non-bytes env values + String::from_utf8(e.value.to_vec()).expect("invalid string in env"), + ) + }) + .collect::>(), + rootless, + )?) + .linux(configure_linux(allow_network, rootless)?) + .root( + oci_spec::runtime::RootBuilder::default() + .path("root") + .readonly(true) + .build()?, + ) + .hostname("localhost") + .mounts(configure_mounts( + rootless, + allow_network, + request.scratch_paths.iter().map(|e| e.as_str()), + request.inputs.iter(), + &request.inputs_dir, // TODO: validate + ro_host_mounts, + )?) + .build() +} + +/// Return the Process part of the OCI Runtime spec. +/// This configures the command, it's working dir, env and terminal setup. +/// It also takes care of setting rlimits and capabilities. +/// Capabilities are a bit more complicated in case rootless building is requested. +fn configure_process<'a>( + command_args: &[String], + cwd: &String, + env: impl IntoIterator, + rootless: bool, +) -> Result { + let spec_builder = oci_spec::runtime::ProcessBuilder::default() + .args(command_args) + .env( + env.into_iter() + .map(|(k, v)| format!("{}={}", k, v)) + .collect::>(), + ) + .terminal(true) + .user( + oci_spec::runtime::UserBuilder::default() + .uid(1000u32) + .gid(100u32) + .build()?, + ) + .cwd(Path::new("/").join(cwd)) // relative to the bundle root, but at least runc wants it to also be absolute. + .capabilities({ + let caps: HashSet = if !rootless { + HashSet::from([Capability::AuditWrite, Capability::Kill]) + } else { + HashSet::from([ + Capability::AuditWrite, + Capability::Chown, + Capability::DacOverride, + Capability::Fowner, + Capability::Fsetid, + Capability::Kill, + Capability::Mknod, + Capability::NetBindService, + Capability::NetRaw, + Capability::Setfcap, + Capability::Setgid, + Capability::Setpcap, + Capability::Setuid, + Capability::SysChroot, + ]) + }; + + oci_spec::runtime::LinuxCapabilitiesBuilder::default() + .bounding(caps.clone()) + .effective(caps.clone()) + .inheritable(caps.clone()) + .permitted(caps.clone()) + .ambient(caps) + .build()? + }) + .rlimits([oci_spec::runtime::LinuxRlimitBuilder::default() + .typ(oci_spec::runtime::LinuxRlimitType::RlimitNofile) + .hard(1024_u64) + .soft(1024_u64) + .build()?]) + .no_new_privileges(true); + + spec_builder.build() +} + +/// Return the Linux part of the OCI Runtime spec. +/// This configures various namespaces, masked and read-only paths. +fn configure_linux( + allow_network: bool, + rootless: bool, +) -> Result { + let mut linux = oci_spec::runtime::Linux::default(); + + // explicitly set namespaces, depending on allow_network. + linux.set_namespaces(Some({ + let mut namespace_types = vec![ + LinuxNamespaceType::Pid, + LinuxNamespaceType::Ipc, + LinuxNamespaceType::Uts, + LinuxNamespaceType::Mount, + LinuxNamespaceType::Cgroup, + ]; + if !allow_network { + namespace_types.push(LinuxNamespaceType::Network) + } + if rootless { + namespace_types.push(LinuxNamespaceType::User) + } + + namespace_types + .into_iter() + .map(|e| LinuxNamespaceBuilder::default().typ(e).build()) + .collect::, _>>()? + })); + + linux.set_masked_paths(Some( + [ + "/proc/kcore", + "/proc/latency_stats", + "/proc/timer_list", + "/proc/timer_stats", + "/proc/sched_debug", + "/sys/firmware", + ] + .into_iter() + .map(|e| e.to_string()) + .collect::>(), + )); + + linux.set_readonly_paths(Some( + [ + "/proc/asound", + "/proc/bus", + "/proc/fs", + "/proc/irq", + "/proc/sys", + "/proc/sysrq-trigger", + ] + .into_iter() + .map(|e| e.to_string()) + .collect::>(), + )); + + Ok(linux) +} + +/// Return the Mounts part of the OCI Runtime spec. +/// It first sets up the standard mounts, then scratch paths, bind mounts for +/// all inputs, and finally read-only paths from the hosts. +fn configure_mounts<'a>( + rootless: bool, + allow_network: bool, + scratch_paths: impl IntoIterator, + inputs: impl Iterator, + inputs_dir: &str, + ro_host_mounts: impl IntoIterator, +) -> Result, oci_spec::OciSpecError> { + let mut mounts: Vec<_> = if rootless { + oci_spec::runtime::get_rootless_mounts() + } else { + oci_spec::runtime::get_default_mounts() + }; + + mounts.push(configure_mount( + "tmpfs", + "/tmp", + "tmpfs", + &["nosuid", "noatime", "mode=700"], + )?); + + // For each scratch path, create a bind mount entry. + let scratch_root = Path::new("scratch"); // relative path + for scratch_path in scratch_paths.into_iter() { + let src = scratch_root.join(scratch_name(scratch_path)); + mounts.push(configure_mount( + src.to_str().unwrap(), + Path::new("/").join(scratch_path).to_str().unwrap(), + "none", + &["rbind", "rw"], + )?); + } + + // For each input, create a bind mount from inputs/$name into $inputs_dir/$name. + for input in inputs { + let (input_name, _input) = input + .clone() + .into_name_and_node() + .expect("invalid input name"); + + let input_name = std::str::from_utf8(input_name.as_ref()).expect("invalid input name"); + mounts.push(configure_mount( + Path::new("inputs").join(input_name).to_str().unwrap(), + Path::new("/") + .join(inputs_dir) + .join(input_name) + .to_str() + .unwrap(), + "none", + &[ + "rbind", "ro", + // "nosuid" is required, otherwise mounting will just fail with + // a generic permission error. + // See https://github.com/wllenyj/containerd/commit/42a386c8164bef16d59590c61ab00806f854d8fd + "nosuid", "nodev", + ], + )?); + } + + // Process ro_host_mounts + for (src, dst) in ro_host_mounts.into_iter() { + mounts.push(configure_mount(src, dst, "none", &["rbind", "ro"])?); + } + + // In case network is enabled, also mount in /etc/{resolv.conf,services,hosts} + if allow_network { + for p in ["/etc/resolv.conf", "/etc/services", "/etc/hosts"] { + mounts.push(configure_mount(p, p, "none", &["rbind", "ro"])?); + } + } + + Ok(mounts) +} + +/// Helper function to produce a mount. +fn configure_mount( + source: &str, + destination: &str, + typ: &str, + options: &[&str], +) -> Result { + oci_spec::runtime::MountBuilder::default() + .destination(destination.to_string()) + .typ(typ.to_string()) + .source(source.to_string()) + .options(options.iter().map(|e| e.to_string()).collect::>()) + .build() +} -- cgit 1.4.1