about summary refs log tree commit diff
path: root/tvix/build/src/oci
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2024-09-27T12·13+0200
committerclbot <clbot@tvl.fyi>2024-10-01T13·41+0000
commit2414c872821ab7ad8c9ff8dca5a91433ffb307f3 (patch)
tree22479c65cd145aea584853156bafce9ddd72e825 /tvix/build/src/oci
parentcf91917a9d8a4ca34a8451a0e0f31f0c07f755dc (diff)
feat(tvix/build), add OciBuildService, the old way r/8744
This is just patchset 10 of CL10855, before the color_eyre changes,
rebased to the tvix_castore api.

Change-Id: If4b42412ff8568058908cda971ad7d6f2d9f9b7b

---
This provides a build service invoking runc. It can be used by using the
`oci://$path_to_some_tempdir` builder URL for now.

For now, it can be tested as such:

```
BUILD_SERVICE_ADDR=oci://$PWD/bundles target/debug/tvix
let pkgs = (import <nixpkgs> {}); in builtins.readDir pkgs.perl
```

readDir is to actually trigger IO into the store path (which triggers
the builds).

For now it fails due to missing reference scanning (see followup CLs).

Change-Id: I09b40e410114ce69966a41a0e3c33281b859e443
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12526
Autosubmit: yuka <yuka@yuka.dev>
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
Diffstat (limited to 'tvix/build/src/oci')
-rw-r--r--tvix/build/src/oci/bundle.rs133
-rw-r--r--tvix/build/src/oci/mod.rs13
-rw-r--r--tvix/build/src/oci/spec.rs319
3 files changed, 465 insertions, 0 deletions
diff --git a/tvix/build/src/oci/bundle.rs b/tvix/build/src/oci/bundle.rs
new file mode 100644
index 000000000000..c3c2e83e89e5
--- /dev/null
+++ b/tvix/build/src/oci/bundle.rs
@@ -0,0 +1,133 @@
+//! Module to create an OCI runtime bundle for a given [BuildRequest].
+use std::{
+    fs,
+    path::{Path, PathBuf},
+};
+
+use super::scratch_name;
+use crate::proto::BuildRequest;
+use anyhow::{bail, Context};
+use tracing::{debug, instrument};
+
+/// Produce an OCI bundle in a given path.
+/// Check [make_spec] for a description about the paths produced.
+#[instrument(err)]
+pub(crate) fn make_bundle<'a>(
+    request: &BuildRequest,
+    runtime_spec: &oci_spec::runtime::Spec,
+    path: &Path,
+) -> anyhow::Result<()> {
+    fs::create_dir_all(path).context("failed to create bundle path")?;
+
+    let spec_json = serde_json::to_string(runtime_spec).context("failed to render spec to json")?;
+    fs::write(path.join("config.json"), spec_json).context("failed to write config.json")?;
+
+    fs::create_dir_all(path.join("inputs")).context("failed to create inputs dir")?;
+
+    let root_path = path.join("root");
+
+    fs::create_dir_all(&root_path).context("failed to create root path dir")?;
+    fs::create_dir_all(root_path.join("etc")).context("failed to create root/etc dir")?;
+
+    // TODO: populate /etc/{group,passwd}. It's a mess?
+
+    let scratch_root = path.join("scratch");
+    fs::create_dir_all(&scratch_root).context("failed to create scratch/ dir")?;
+
+    // for each scratch path, calculate its name inside scratch, and ensure the
+    // directory exists.
+    for p in request.scratch_paths.iter() {
+        let scratch_path = scratch_root.join(scratch_name(p));
+        debug!(scratch_path=?scratch_path, path=?p, "about to create scratch dir");
+        fs::create_dir_all(scratch_path).context("Unable to create scratch dir")?;
+    }
+
+    Ok(())
+}
+
+/// Determine the path of all outputs specified in a [BuildRequest]
+/// as seen from the host, for post-build ingestion.
+/// This lookup needs to take scratch paths into consideration, as the build
+/// root is not writable on its own.
+/// If a path can't be determined, an error is returned.
+pub(crate) fn get_host_output_paths(
+    request: &BuildRequest,
+    bundle_path: &Path,
+) -> anyhow::Result<Vec<PathBuf>> {
+    let scratch_root = bundle_path.join("scratch");
+
+    let mut host_output_paths: Vec<PathBuf> = Vec::with_capacity(request.outputs.len());
+
+    for output_path in request.outputs.iter() {
+        // calculate the location of the path.
+        if let Some((mp, relpath)) =
+            find_path_in_scratchs(output_path, request.scratch_paths.as_slice())
+        {
+            host_output_paths.push(scratch_root.join(scratch_name(mp)).join(relpath));
+        } else {
+            bail!("unable to find path {}", output_path);
+        }
+    }
+
+    Ok(host_output_paths)
+}
+
+/// For a given list of mountpoints (sorted) and a search_path, find the
+/// specific mountpoint parenting that search_path and return it, as well as the
+/// relative path from there to the search_path.
+/// mountpoints must be sorted, so we can iterate over the list from the back
+/// and match on the prefix.
+fn find_path_in_scratchs<'a, 'b>(
+    search_path: &'a str,
+    mountpoints: &'b [String],
+) -> Option<(&'b str, &'a str)> {
+    mountpoints.iter().rev().find_map(|mp| {
+        Some((
+            mp.as_str(),
+            search_path.strip_prefix(mp)?.strip_prefix('/')?,
+        ))
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use std::path::{Path, PathBuf};
+
+    use rstest::rstest;
+
+    use crate::{oci::scratch_name, proto::BuildRequest};
+
+    use super::{find_path_in_scratchs, get_host_output_paths};
+
+    #[rstest]
+    #[case::simple("nix/store/aaaa", &["nix/store".into()], Some(("nix/store", "aaaa")))]
+    #[case::prefix_no_sep("nix/store/aaaa", &["nix/sto".into()], None)]
+    #[case::not_found("nix/store/aaaa", &["build".into()], None)]
+    fn test_test_find_path_in_scratchs(
+        #[case] search_path: &str,
+        #[case] mountpoints: &[String],
+        #[case] expected: Option<(&str, &str)>,
+    ) {
+        assert_eq!(find_path_in_scratchs(search_path, mountpoints), expected);
+    }
+
+    #[test]
+    fn test_get_host_output_paths_simple() {
+        let request = BuildRequest {
+            outputs: vec!["nix/store/fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo".into()],
+            scratch_paths: vec!["build".into(), "nix/store".into()],
+            ..Default::default()
+        };
+
+        let paths =
+            get_host_output_paths(&request, Path::new("bundle-root")).expect("must succeed");
+
+        let mut expected_path = PathBuf::new();
+        expected_path.push("bundle-root");
+        expected_path.push("scratch");
+        expected_path.push(scratch_name("nix/store"));
+        expected_path.push("fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo");
+
+        assert_eq!(vec![expected_path], paths)
+    }
+}
diff --git a/tvix/build/src/oci/mod.rs b/tvix/build/src/oci/mod.rs
new file mode 100644
index 000000000000..26dab3059a58
--- /dev/null
+++ b/tvix/build/src/oci/mod.rs
@@ -0,0 +1,13 @@
+mod bundle;
+mod spec;
+
+pub(crate) use bundle::get_host_output_paths;
+pub(crate) use bundle::make_bundle;
+pub(crate) use spec::make_spec;
+
+/// For a given scratch path, return the scratch_name that's allocated.
+// We currently use use lower hex encoding of the b3 digest of the scratch
+// path, so we don't need to globally allocate and pass down some uuids.
+pub(crate) fn scratch_name(scratch_path: &str) -> String {
+    data_encoding::BASE32.encode(blake3::hash(scratch_path.as_bytes()).as_bytes())
+}
diff --git a/tvix/build/src/oci/spec.rs b/tvix/build/src/oci/spec.rs
new file mode 100644
index 000000000000..d804aa1171c1
--- /dev/null
+++ b/tvix/build/src/oci/spec.rs
@@ -0,0 +1,319 @@
+//! Module to create a OCI runtime spec for a given [BuildRequest].
+use crate::proto::BuildRequest;
+use oci_spec::{
+    runtime::{Capability, LinuxNamespace, LinuxNamespaceBuilder, LinuxNamespaceType},
+    OciSpecError,
+};
+use std::{collections::HashSet, path::Path};
+use tvix_castore::proto as castorepb;
+
+use super::scratch_name;
+
+/// For a given [BuildRequest], return an OCI runtime spec.
+///
+/// While there's no IO occuring in this function, the generated spec contains
+/// path references relative to the "bundle location".
+/// Due to overlayfs requiring its layers to be absolute paths, we also need a
+/// [bundle_dir] parameter, pointing to the location of the bundle dir itself.
+///
+/// The paths used in the spec are the following (relative to a "bundle root"):
+///
+/// - `inputs`, a directory where the castore nodes specified the build request
+///   inputs are supposed to be populated.
+/// - `outputs`, a directory where all writes to the store_dir during the build
+///   are directed to.
+/// - `root`, a minimal skeleton of files that'll be present at /.
+/// - `scratch`, a directory containing other directories which will be
+///   bind-mounted read-write into the container and used as scratch space
+///   during the build.
+///   No assumptions should be made about what's inside this directory.
+///
+/// Generating these paths, and populating contents, like a skeleton root
+/// is up to another function, this function doesn't do filesystem IO.
+pub(crate) fn make_spec(
+    request: &BuildRequest,
+    rootless: bool,
+    sandbox_shell: &str,
+) -> Result<oci_spec::runtime::Spec, oci_spec::OciSpecError> {
+    // TODO: add BuildRequest validations. BuildRequest must contain strings as inputs
+
+    let allow_network = request
+        .constraints
+        .as_ref()
+        .is_some_and(|c| c.network_access);
+
+    // Assemble ro_host_mounts. Start with constraints.available_ro_paths.
+    let mut ro_host_mounts = request
+        .constraints
+        .as_ref()
+        .map(|constraints| {
+            constraints
+                .available_ro_paths
+                .iter()
+                .map(|e| (e.as_str(), e.as_str()))
+                .collect::<Vec<_>>()
+        })
+        .unwrap_or_default();
+
+    // If provide_bin_sh is set, mount sandbox_shell to /bin/sh
+    if request
+        .constraints
+        .as_ref()
+        .is_some_and(|c| c.provide_bin_sh)
+    {
+        ro_host_mounts.push((sandbox_shell, "/bin/sh"))
+    }
+
+    oci_spec::runtime::SpecBuilder::default()
+        .process(configure_process(
+            &request.command_args,
+            &request.working_dir,
+            request
+                .environment_vars
+                .iter()
+                .map(|e| {
+                    (
+                        e.key.as_str(),
+                        // TODO: decide what to do with non-bytes env values
+                        String::from_utf8(e.value.to_vec()).expect("invalid string in env"),
+                    )
+                })
+                .collect::<Vec<_>>(),
+            rootless,
+        )?)
+        .linux(configure_linux(allow_network, rootless)?)
+        .root(
+            oci_spec::runtime::RootBuilder::default()
+                .path("root")
+                .readonly(true)
+                .build()?,
+        )
+        .hostname("localhost")
+        .mounts(configure_mounts(
+            rootless,
+            allow_network,
+            request.scratch_paths.iter().map(|e| e.as_str()),
+            request.inputs.iter(),
+            &request.inputs_dir, // TODO: validate
+            ro_host_mounts,
+        )?)
+        .build()
+}
+
+/// Return the Process part of the OCI Runtime spec.
+/// This configures the command, it's working dir, env and terminal setup.
+/// It also takes care of setting rlimits and capabilities.
+/// Capabilities are a bit more complicated in case rootless building is requested.
+fn configure_process<'a>(
+    command_args: &[String],
+    cwd: &String,
+    env: impl IntoIterator<Item = (&'a str, String)>,
+    rootless: bool,
+) -> Result<oci_spec::runtime::Process, oci_spec::OciSpecError> {
+    let spec_builder = oci_spec::runtime::ProcessBuilder::default()
+        .args(command_args)
+        .env(
+            env.into_iter()
+                .map(|(k, v)| format!("{}={}", k, v))
+                .collect::<Vec<_>>(),
+        )
+        .terminal(true)
+        .user(
+            oci_spec::runtime::UserBuilder::default()
+                .uid(1000u32)
+                .gid(100u32)
+                .build()?,
+        )
+        .cwd(Path::new("/").join(cwd)) // relative to the bundle root, but at least runc wants it to also be absolute.
+        .capabilities({
+            let caps: HashSet<Capability> = if !rootless {
+                HashSet::from([Capability::AuditWrite, Capability::Kill])
+            } else {
+                HashSet::from([
+                    Capability::AuditWrite,
+                    Capability::Chown,
+                    Capability::DacOverride,
+                    Capability::Fowner,
+                    Capability::Fsetid,
+                    Capability::Kill,
+                    Capability::Mknod,
+                    Capability::NetBindService,
+                    Capability::NetRaw,
+                    Capability::Setfcap,
+                    Capability::Setgid,
+                    Capability::Setpcap,
+                    Capability::Setuid,
+                    Capability::SysChroot,
+                ])
+            };
+
+            oci_spec::runtime::LinuxCapabilitiesBuilder::default()
+                .bounding(caps.clone())
+                .effective(caps.clone())
+                .inheritable(caps.clone())
+                .permitted(caps.clone())
+                .ambient(caps)
+                .build()?
+        })
+        .rlimits([oci_spec::runtime::LinuxRlimitBuilder::default()
+            .typ(oci_spec::runtime::LinuxRlimitType::RlimitNofile)
+            .hard(1024_u64)
+            .soft(1024_u64)
+            .build()?])
+        .no_new_privileges(true);
+
+    spec_builder.build()
+}
+
+/// Return the Linux part of the OCI Runtime spec.
+/// This configures various namespaces, masked and read-only paths.
+fn configure_linux(
+    allow_network: bool,
+    rootless: bool,
+) -> Result<oci_spec::runtime::Linux, OciSpecError> {
+    let mut linux = oci_spec::runtime::Linux::default();
+
+    // explicitly set namespaces, depending on allow_network.
+    linux.set_namespaces(Some({
+        let mut namespace_types = vec![
+            LinuxNamespaceType::Pid,
+            LinuxNamespaceType::Ipc,
+            LinuxNamespaceType::Uts,
+            LinuxNamespaceType::Mount,
+            LinuxNamespaceType::Cgroup,
+        ];
+        if !allow_network {
+            namespace_types.push(LinuxNamespaceType::Network)
+        }
+        if rootless {
+            namespace_types.push(LinuxNamespaceType::User)
+        }
+
+        namespace_types
+            .into_iter()
+            .map(|e| LinuxNamespaceBuilder::default().typ(e).build())
+            .collect::<Result<Vec<LinuxNamespace>, _>>()?
+    }));
+
+    linux.set_masked_paths(Some(
+        [
+            "/proc/kcore",
+            "/proc/latency_stats",
+            "/proc/timer_list",
+            "/proc/timer_stats",
+            "/proc/sched_debug",
+            "/sys/firmware",
+        ]
+        .into_iter()
+        .map(|e| e.to_string())
+        .collect::<Vec<_>>(),
+    ));
+
+    linux.set_readonly_paths(Some(
+        [
+            "/proc/asound",
+            "/proc/bus",
+            "/proc/fs",
+            "/proc/irq",
+            "/proc/sys",
+            "/proc/sysrq-trigger",
+        ]
+        .into_iter()
+        .map(|e| e.to_string())
+        .collect::<Vec<_>>(),
+    ));
+
+    Ok(linux)
+}
+
+/// Return the Mounts part of the OCI Runtime spec.
+/// It first sets up the standard mounts, then scratch paths, bind mounts for
+/// all inputs, and finally read-only paths from the hosts.
+fn configure_mounts<'a>(
+    rootless: bool,
+    allow_network: bool,
+    scratch_paths: impl IntoIterator<Item = &'a str>,
+    inputs: impl Iterator<Item = &'a castorepb::Node>,
+    inputs_dir: &str,
+    ro_host_mounts: impl IntoIterator<Item = (&'a str, &'a str)>,
+) -> Result<Vec<oci_spec::runtime::Mount>, oci_spec::OciSpecError> {
+    let mut mounts: Vec<_> = if rootless {
+        oci_spec::runtime::get_rootless_mounts()
+    } else {
+        oci_spec::runtime::get_default_mounts()
+    };
+
+    mounts.push(configure_mount(
+        "tmpfs",
+        "/tmp",
+        "tmpfs",
+        &["nosuid", "noatime", "mode=700"],
+    )?);
+
+    // For each scratch path, create a bind mount entry.
+    let scratch_root = Path::new("scratch"); // relative path
+    for scratch_path in scratch_paths.into_iter() {
+        let src = scratch_root.join(scratch_name(scratch_path));
+        mounts.push(configure_mount(
+            src.to_str().unwrap(),
+            Path::new("/").join(scratch_path).to_str().unwrap(),
+            "none",
+            &["rbind", "rw"],
+        )?);
+    }
+
+    // For each input, create a bind mount from inputs/$name into $inputs_dir/$name.
+    for input in inputs {
+        let (input_name, _input) = input
+            .clone()
+            .into_name_and_node()
+            .expect("invalid input name");
+
+        let input_name = std::str::from_utf8(input_name.as_ref()).expect("invalid input name");
+        mounts.push(configure_mount(
+            Path::new("inputs").join(input_name).to_str().unwrap(),
+            Path::new("/")
+                .join(inputs_dir)
+                .join(input_name)
+                .to_str()
+                .unwrap(),
+            "none",
+            &[
+                "rbind", "ro",
+                // "nosuid" is required, otherwise mounting will just fail with
+                // a generic permission error.
+                // See https://github.com/wllenyj/containerd/commit/42a386c8164bef16d59590c61ab00806f854d8fd
+                "nosuid", "nodev",
+            ],
+        )?);
+    }
+
+    // Process ro_host_mounts
+    for (src, dst) in ro_host_mounts.into_iter() {
+        mounts.push(configure_mount(src, dst, "none", &["rbind", "ro"])?);
+    }
+
+    // In case network is enabled, also mount in /etc/{resolv.conf,services,hosts}
+    if allow_network {
+        for p in ["/etc/resolv.conf", "/etc/services", "/etc/hosts"] {
+            mounts.push(configure_mount(p, p, "none", &["rbind", "ro"])?);
+        }
+    }
+
+    Ok(mounts)
+}
+
+/// Helper function to produce a mount.
+fn configure_mount(
+    source: &str,
+    destination: &str,
+    typ: &str,
+    options: &[&str],
+) -> Result<oci_spec::runtime::Mount, oci_spec::OciSpecError> {
+    oci_spec::runtime::MountBuilder::default()
+        .destination(destination.to_string())
+        .typ(typ.to_string())
+        .source(source.to_string())
+        .options(options.iter().map(|e| e.to_string()).collect::<Vec<_>>())
+        .build()
+}