From 2414c872821ab7ad8c9ff8dca5a91433ffb307f3 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Fri, 27 Sep 2024 14:13:21 +0200 Subject: feat(tvix/build), add OciBuildService, the old way This is just patchset 10 of CL10855, before the color_eyre changes, rebased to the tvix_castore api. Change-Id: If4b42412ff8568058908cda971ad7d6f2d9f9b7b --- This provides a build service invoking runc. It can be used by using the `oci://$path_to_some_tempdir` builder URL for now. For now, it can be tested as such: ``` BUILD_SERVICE_ADDR=oci://$PWD/bundles target/debug/tvix let pkgs = (import {}); in builtins.readDir pkgs.perl ``` readDir is to actually trigger IO into the store path (which triggers the builds). For now it fails due to missing reference scanning (see followup CLs). Change-Id: I09b40e410114ce69966a41a0e3c33281b859e443 Reviewed-on: https://cl.tvl.fyi/c/depot/+/12526 Autosubmit: yuka Tested-by: BuildkiteCI Reviewed-by: flokli --- tvix/build/src/oci/spec.rs | 319 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 319 insertions(+) create mode 100644 tvix/build/src/oci/spec.rs (limited to 'tvix/build/src/oci/spec.rs') diff --git a/tvix/build/src/oci/spec.rs b/tvix/build/src/oci/spec.rs new file mode 100644 index 000000000000..d804aa1171c1 --- /dev/null +++ b/tvix/build/src/oci/spec.rs @@ -0,0 +1,319 @@ +//! Module to create a OCI runtime spec for a given [BuildRequest]. +use crate::proto::BuildRequest; +use oci_spec::{ + runtime::{Capability, LinuxNamespace, LinuxNamespaceBuilder, LinuxNamespaceType}, + OciSpecError, +}; +use std::{collections::HashSet, path::Path}; +use tvix_castore::proto as castorepb; + +use super::scratch_name; + +/// For a given [BuildRequest], return an OCI runtime spec. +/// +/// While there's no IO occuring in this function, the generated spec contains +/// path references relative to the "bundle location". +/// Due to overlayfs requiring its layers to be absolute paths, we also need a +/// [bundle_dir] parameter, pointing to the location of the bundle dir itself. +/// +/// The paths used in the spec are the following (relative to a "bundle root"): +/// +/// - `inputs`, a directory where the castore nodes specified the build request +/// inputs are supposed to be populated. +/// - `outputs`, a directory where all writes to the store_dir during the build +/// are directed to. +/// - `root`, a minimal skeleton of files that'll be present at /. +/// - `scratch`, a directory containing other directories which will be +/// bind-mounted read-write into the container and used as scratch space +/// during the build. +/// No assumptions should be made about what's inside this directory. +/// +/// Generating these paths, and populating contents, like a skeleton root +/// is up to another function, this function doesn't do filesystem IO. +pub(crate) fn make_spec( + request: &BuildRequest, + rootless: bool, + sandbox_shell: &str, +) -> Result { + // TODO: add BuildRequest validations. BuildRequest must contain strings as inputs + + let allow_network = request + .constraints + .as_ref() + .is_some_and(|c| c.network_access); + + // Assemble ro_host_mounts. Start with constraints.available_ro_paths. + let mut ro_host_mounts = request + .constraints + .as_ref() + .map(|constraints| { + constraints + .available_ro_paths + .iter() + .map(|e| (e.as_str(), e.as_str())) + .collect::>() + }) + .unwrap_or_default(); + + // If provide_bin_sh is set, mount sandbox_shell to /bin/sh + if request + .constraints + .as_ref() + .is_some_and(|c| c.provide_bin_sh) + { + ro_host_mounts.push((sandbox_shell, "/bin/sh")) + } + + oci_spec::runtime::SpecBuilder::default() + .process(configure_process( + &request.command_args, + &request.working_dir, + request + .environment_vars + .iter() + .map(|e| { + ( + e.key.as_str(), + // TODO: decide what to do with non-bytes env values + String::from_utf8(e.value.to_vec()).expect("invalid string in env"), + ) + }) + .collect::>(), + rootless, + )?) + .linux(configure_linux(allow_network, rootless)?) + .root( + oci_spec::runtime::RootBuilder::default() + .path("root") + .readonly(true) + .build()?, + ) + .hostname("localhost") + .mounts(configure_mounts( + rootless, + allow_network, + request.scratch_paths.iter().map(|e| e.as_str()), + request.inputs.iter(), + &request.inputs_dir, // TODO: validate + ro_host_mounts, + )?) + .build() +} + +/// Return the Process part of the OCI Runtime spec. +/// This configures the command, it's working dir, env and terminal setup. +/// It also takes care of setting rlimits and capabilities. +/// Capabilities are a bit more complicated in case rootless building is requested. +fn configure_process<'a>( + command_args: &[String], + cwd: &String, + env: impl IntoIterator, + rootless: bool, +) -> Result { + let spec_builder = oci_spec::runtime::ProcessBuilder::default() + .args(command_args) + .env( + env.into_iter() + .map(|(k, v)| format!("{}={}", k, v)) + .collect::>(), + ) + .terminal(true) + .user( + oci_spec::runtime::UserBuilder::default() + .uid(1000u32) + .gid(100u32) + .build()?, + ) + .cwd(Path::new("/").join(cwd)) // relative to the bundle root, but at least runc wants it to also be absolute. + .capabilities({ + let caps: HashSet = if !rootless { + HashSet::from([Capability::AuditWrite, Capability::Kill]) + } else { + HashSet::from([ + Capability::AuditWrite, + Capability::Chown, + Capability::DacOverride, + Capability::Fowner, + Capability::Fsetid, + Capability::Kill, + Capability::Mknod, + Capability::NetBindService, + Capability::NetRaw, + Capability::Setfcap, + Capability::Setgid, + Capability::Setpcap, + Capability::Setuid, + Capability::SysChroot, + ]) + }; + + oci_spec::runtime::LinuxCapabilitiesBuilder::default() + .bounding(caps.clone()) + .effective(caps.clone()) + .inheritable(caps.clone()) + .permitted(caps.clone()) + .ambient(caps) + .build()? + }) + .rlimits([oci_spec::runtime::LinuxRlimitBuilder::default() + .typ(oci_spec::runtime::LinuxRlimitType::RlimitNofile) + .hard(1024_u64) + .soft(1024_u64) + .build()?]) + .no_new_privileges(true); + + spec_builder.build() +} + +/// Return the Linux part of the OCI Runtime spec. +/// This configures various namespaces, masked and read-only paths. +fn configure_linux( + allow_network: bool, + rootless: bool, +) -> Result { + let mut linux = oci_spec::runtime::Linux::default(); + + // explicitly set namespaces, depending on allow_network. + linux.set_namespaces(Some({ + let mut namespace_types = vec![ + LinuxNamespaceType::Pid, + LinuxNamespaceType::Ipc, + LinuxNamespaceType::Uts, + LinuxNamespaceType::Mount, + LinuxNamespaceType::Cgroup, + ]; + if !allow_network { + namespace_types.push(LinuxNamespaceType::Network) + } + if rootless { + namespace_types.push(LinuxNamespaceType::User) + } + + namespace_types + .into_iter() + .map(|e| LinuxNamespaceBuilder::default().typ(e).build()) + .collect::, _>>()? + })); + + linux.set_masked_paths(Some( + [ + "/proc/kcore", + "/proc/latency_stats", + "/proc/timer_list", + "/proc/timer_stats", + "/proc/sched_debug", + "/sys/firmware", + ] + .into_iter() + .map(|e| e.to_string()) + .collect::>(), + )); + + linux.set_readonly_paths(Some( + [ + "/proc/asound", + "/proc/bus", + "/proc/fs", + "/proc/irq", + "/proc/sys", + "/proc/sysrq-trigger", + ] + .into_iter() + .map(|e| e.to_string()) + .collect::>(), + )); + + Ok(linux) +} + +/// Return the Mounts part of the OCI Runtime spec. +/// It first sets up the standard mounts, then scratch paths, bind mounts for +/// all inputs, and finally read-only paths from the hosts. +fn configure_mounts<'a>( + rootless: bool, + allow_network: bool, + scratch_paths: impl IntoIterator, + inputs: impl Iterator, + inputs_dir: &str, + ro_host_mounts: impl IntoIterator, +) -> Result, oci_spec::OciSpecError> { + let mut mounts: Vec<_> = if rootless { + oci_spec::runtime::get_rootless_mounts() + } else { + oci_spec::runtime::get_default_mounts() + }; + + mounts.push(configure_mount( + "tmpfs", + "/tmp", + "tmpfs", + &["nosuid", "noatime", "mode=700"], + )?); + + // For each scratch path, create a bind mount entry. + let scratch_root = Path::new("scratch"); // relative path + for scratch_path in scratch_paths.into_iter() { + let src = scratch_root.join(scratch_name(scratch_path)); + mounts.push(configure_mount( + src.to_str().unwrap(), + Path::new("/").join(scratch_path).to_str().unwrap(), + "none", + &["rbind", "rw"], + )?); + } + + // For each input, create a bind mount from inputs/$name into $inputs_dir/$name. + for input in inputs { + let (input_name, _input) = input + .clone() + .into_name_and_node() + .expect("invalid input name"); + + let input_name = std::str::from_utf8(input_name.as_ref()).expect("invalid input name"); + mounts.push(configure_mount( + Path::new("inputs").join(input_name).to_str().unwrap(), + Path::new("/") + .join(inputs_dir) + .join(input_name) + .to_str() + .unwrap(), + "none", + &[ + "rbind", "ro", + // "nosuid" is required, otherwise mounting will just fail with + // a generic permission error. + // See https://github.com/wllenyj/containerd/commit/42a386c8164bef16d59590c61ab00806f854d8fd + "nosuid", "nodev", + ], + )?); + } + + // Process ro_host_mounts + for (src, dst) in ro_host_mounts.into_iter() { + mounts.push(configure_mount(src, dst, "none", &["rbind", "ro"])?); + } + + // In case network is enabled, also mount in /etc/{resolv.conf,services,hosts} + if allow_network { + for p in ["/etc/resolv.conf", "/etc/services", "/etc/hosts"] { + mounts.push(configure_mount(p, p, "none", &["rbind", "ro"])?); + } + } + + Ok(mounts) +} + +/// Helper function to produce a mount. +fn configure_mount( + source: &str, + destination: &str, + typ: &str, + options: &[&str], +) -> Result { + oci_spec::runtime::MountBuilder::default() + .destination(destination.to_string()) + .typ(typ.to_string()) + .source(source.to_string()) + .options(options.iter().map(|e| e.to_string()).collect::>()) + .build() +} -- cgit 1.4.1