about summary refs log tree commit diff
path: root/tvix/build/src
diff options
context:
space:
mode:
authorMarijan Petričević <marijan.petricevic94@gmail.com>2024-10-14T20·04-0500
committerMarijan Petričević <marijan.petricevic94@gmail.com>2024-10-18T15·45+0000
commita247b2509771b03f50568307263bb05b3af825ed (patch)
treeda2ec6ca87b61cbc2bd1ca333278400aa69ab0ca /tvix/build/src
parent1c1eb686784cf26667ef5e09e5339710c642f23e (diff)
refactor(tvix/build): add stricter BuildRequest type r/8830
Change-Id: I2950c76bbc2227952e583426bfb3ed34e8da6d2d
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12625
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Diffstat (limited to 'tvix/build/src')
-rw-r--r--tvix/build/src/buildservice/build_request.rs131
-rw-r--r--tvix/build/src/buildservice/mod.rs6
-rw-r--r--tvix/build/src/proto/mod.rs181
3 files changed, 315 insertions, 3 deletions
diff --git a/tvix/build/src/buildservice/build_request.rs b/tvix/build/src/buildservice/build_request.rs
new file mode 100644
index 000000000000..a93fef5b393c
--- /dev/null
+++ b/tvix/build/src/buildservice/build_request.rs
@@ -0,0 +1,131 @@
+use std::collections::{BTreeMap, HashSet};
+use std::path::PathBuf;
+
+use bytes::Bytes;
+use tvix_castore::{Node, PathComponent};
+/// A BuildRequest describes the request of something to be run on the builder.
+/// It is distinct from an actual \[Build\] that has already happened, or might be
+/// currently ongoing.
+///
+/// A BuildRequest can be seen as a more normalized version of a Derivation
+/// (parsed from A-Term), "writing out" some of the Nix-internal details about
+/// how e.g. environment variables in the build are set.
+///
+/// Nix has some impurities when building a Derivation, for example the --cores option
+/// ends up as an environment variable in the build, that's not part of the ATerm.
+///
+/// As of now, we serialize this into the BuildRequest, so builders can stay dumb.
+/// This might change in the future.
+///
+/// There's also a big difference when it comes to how inputs are modelled:
+///
+/// * Nix only uses store path (strings) to describe the inputs.
+///   As store paths can be input-addressed, a certain store path can contain
+///   different contents (as not all store paths are binary reproducible).
+///   This requires that for every input-addressed input, the builder has access
+///   to either the input's deriver (and needs to build it) or else a trusted
+///   source for the built input.
+///   to upload input-addressed paths, requiring the trusted users concept.
+/// * tvix-build records a list of tvix.castore.v1.Node as inputs.
+///   These map from the store path base name to their contents, relieving the
+///   builder from having to "trust" any input-addressed paths, contrary to Nix.
+///
+/// While this approach gives a better hermeticity, it has one downside:
+/// A BuildRequest can only be sent once the contents of all its inputs are known.
+///
+/// As of now, we're okay to accept this, but it prevents uploading an
+/// entirely-non-IFD subgraph of BuildRequests eagerly.
+#[derive(Clone, PartialEq)]
+pub struct BuildRequest {
+    /// The list of all root nodes that should be visible in `inputs_dir` at the
+    /// time of the build.
+    /// As all references are content-addressed, no additional signatures are
+    /// needed to substitute / make these available in the build environment.
+    pub inputs: BTreeMap<PathComponent, Node>,
+    /// The command (and its args) executed as the build script.
+    /// In the case of a Nix derivation, this is usually
+    /// \["/path/to/some-bash/bin/bash", "-e", "/path/to/some/builder.sh"\].
+    pub command_args: Vec<String>,
+    /// The working dir of the command, relative to the build root.
+    /// "build", in the case of Nix.
+    /// This MUST be a clean relative path, without any ".", "..", or superfluous
+    /// slashes.
+    pub working_dir: PathBuf,
+    /// A list of "scratch" paths, relative to the build root.
+    /// These will be write-able during the build.
+    /// \[build, nix/store\] in the case of Nix.
+    /// These MUST be clean relative paths, without any ".", "..", or superfluous
+    /// slashes, and sorted.
+    pub scratch_paths: Vec<PathBuf>,
+    /// The path where the castore input nodes will be located at,
+    /// "nix/store" in case of Nix.
+    /// Builds might also write into here (Nix builds do that).
+    /// This MUST be a clean relative path, without any ".", "..", or superfluous
+    /// slashes.
+    pub inputs_dir: PathBuf,
+    /// The list of output paths the build is expected to produce,
+    /// relative to the root.
+    /// If the path is not produced, the build is considered to have failed.
+    /// These MUST be clean relative paths, without any ".", "..", or superfluous
+    /// slashes, and sorted.
+    pub outputs: Vec<PathBuf>,
+    /// The list of environment variables and their values that should be set
+    /// inside the build environment.
+    /// This includes both environment vars set inside the derivation, as well as
+    /// more "ephemeral" ones like NIX_BUILD_CORES, controlled by the `--cores`
+    /// CLI option of `nix-build`.
+    /// For now, we consume this as an option when turning a Derivation into a BuildRequest,
+    /// similar to how Nix has a `--cores` option.
+    /// We don't want to bleed these very nix-specific sandbox impl details into
+    /// (dumber) builders if we don't have to.
+    /// Environment variables are sorted by their keys.
+    pub environment_vars: Vec<EnvVar>,
+    /// A set of constraints that need to be satisfied on a build host before a
+    /// Build can be started.
+    pub constraints: HashSet<BuildConstraints>,
+    /// Additional (small) files and their contents that should be placed into the
+    /// build environment, but outside inputs_dir.
+    /// Used for passAsFile and structuredAttrs in Nix.
+    pub additional_files: Vec<AdditionalFile>,
+    /// If this is an non-empty list, all paths in `outputs` are scanned for these.
+    /// For Nix, `refscan_needles` would be populated with the nixbase32 hash parts of
+    /// every input store path and output store path. The latter is necessary to scan
+    /// for references between multi-output derivations.
+    pub refscan_needles: Vec<String>,
+}
+
+#[derive(Clone, PartialEq)]
+pub struct EnvVar {
+    /// name of the environment variable. Must not contain =.
+    pub key: String,
+    pub value: Bytes,
+}
+/// BuildConstraints represents certain conditions that must be fulfilled
+/// inside the build environment to be able to build this.
+/// Constraints can be things like required architecture and minimum amount of memory.
+/// The required input paths are *not* represented in here, because it
+/// wouldn't be hermetic enough - see the comment around inputs too.
+#[derive(Clone, PartialEq, Eq, Hash)]
+pub enum BuildConstraints {
+    /// The system that's needed to execute the build.
+    /// Must not be empty.
+    System(String),
+    /// The amount of memory required to be available for the build, in bytes.
+    MinMemory(u64),
+    /// An absolute path that need to be available in the build
+    /// environment, like `/dev/kvm`.
+    /// This is distinct from the castore nodes in inputs.
+    /// These MUST be clean absolute paths, without any ".", "..", or superfluous
+    /// slashes, and sorted.
+    AvailableReadOnlyPath(PathBuf),
+    /// Whether the build should be able to access the network.
+    NetworkAccess,
+    /// Whether to provide a /bin/sh inside the build environment, usually a static bash.
+    ProvideBinSh,
+}
+
+#[derive(Clone, PartialEq)]
+pub struct AdditionalFile {
+    pub path: PathBuf,
+    pub contents: Bytes,
+}
diff --git a/tvix/build/src/buildservice/mod.rs b/tvix/build/src/buildservice/mod.rs
index cdc3cb2afcc3..c3c2fd6c5715 100644
--- a/tvix/build/src/buildservice/mod.rs
+++ b/tvix/build/src/buildservice/mod.rs
@@ -1,7 +1,9 @@
 use tonic::async_trait;
 
-use crate::proto::{Build, BuildRequest};
+use crate::proto::{self, Build};
 
+pub mod build_request;
+pub use crate::buildservice::build_request::*;
 mod dummy;
 mod from_addr;
 mod grpc;
@@ -15,5 +17,5 @@ pub use from_addr::from_addr;
 #[async_trait]
 pub trait BuildService: Send + Sync {
     /// TODO: document
-    async fn do_build(&self, request: BuildRequest) -> std::io::Result<Build>;
+    async fn do_build(&self, request: proto::BuildRequest) -> std::io::Result<Build>;
 }
diff --git a/tvix/build/src/proto/mod.rs b/tvix/build/src/proto/mod.rs
index b36049d05b9d..97d56c6e625d 100644
--- a/tvix/build/src/proto/mod.rs
+++ b/tvix/build/src/proto/mod.rs
@@ -1,7 +1,8 @@
+use std::collections::{BTreeMap, HashSet};
 use std::path::{Path, PathBuf};
 
 use itertools::Itertools;
-use tvix_castore::DirectoryError;
+use tvix_castore::{DirectoryError, Node, PathComponent};
 
 mod grpc_buildservice_wrapper;
 
@@ -201,6 +202,101 @@ impl BuildRequest {
     }
 }
 
+impl TryFrom<BuildRequest> for crate::buildservice::BuildRequest {
+    type Error = ValidateBuildRequestError;
+    fn try_from(value: BuildRequest) -> Result<Self, Self::Error> {
+        // validate input names. Make sure they're sorted
+
+        let mut last_name: bytes::Bytes = "".into();
+        let mut inputs: BTreeMap<PathComponent, Node> = BTreeMap::new();
+        for (i, node) in value.inputs.iter().enumerate() {
+            let (name, node) = node
+                .clone()
+                .into_name_and_node()
+                .map_err(|e| ValidateBuildRequestError::InvalidInputNode(i, e))?;
+
+            if name.as_ref() <= last_name.as_ref() {
+                return Err(ValidateBuildRequestError::InputNodesNotSorted);
+            } else {
+                inputs.insert(name.clone(), node);
+                last_name = name.into();
+            }
+        }
+
+        // validate working_dir
+        if !is_clean_relative_path(&value.working_dir) {
+            Err(ValidateBuildRequestError::InvalidWorkingDir)?;
+        }
+
+        // validate scratch paths
+        for (i, p) in value.scratch_paths.iter().enumerate() {
+            if !is_clean_relative_path(p) {
+                Err(ValidateBuildRequestError::InvalidScratchPath(i))?
+            }
+        }
+        if !is_sorted(value.scratch_paths.iter().map(|e| e.as_bytes())) {
+            Err(ValidateBuildRequestError::ScratchPathsNotSorted)?;
+        }
+
+        // validate inputs_dir
+        if !is_clean_relative_path(&value.inputs_dir) {
+            Err(ValidateBuildRequestError::InvalidInputsDir)?;
+        }
+
+        // validate outputs
+        for (i, p) in value.outputs.iter().enumerate() {
+            if !is_clean_relative_path(p) {
+                Err(ValidateBuildRequestError::InvalidOutputPath(i))?
+            }
+        }
+        if !is_sorted(value.outputs.iter().map(|e| e.as_bytes())) {
+            Err(ValidateBuildRequestError::OutputsNotSorted)?;
+        }
+
+        // validate environment_vars.
+        for (i, e) in value.environment_vars.iter().enumerate() {
+            if e.key.is_empty() || e.key.contains('=') {
+                Err(ValidateBuildRequestError::InvalidEnvVar(i))?
+            }
+        }
+        if !is_sorted(value.environment_vars.iter().map(|e| e.key.as_bytes())) {
+            Err(ValidateBuildRequestError::EnvVarNotSorted)?;
+        }
+
+        // validate build constraints
+        let constraints = value
+            .constraints
+            .map_or(Ok(HashSet::new()), |constraints| {
+                constraints
+                    .try_into()
+                    .map_err(ValidateBuildRequestError::InvalidBuildConstraints)
+            })?;
+
+        // validate additional_files
+        for (i, additional_file) in value.additional_files.iter().enumerate() {
+            if !is_clean_relative_path(&additional_file.path) {
+                Err(ValidateBuildRequestError::InvalidAdditionalFilePath(i))?
+            }
+        }
+        if !is_sorted(value.additional_files.iter().map(|e| e.path.as_bytes())) {
+            Err(ValidateBuildRequestError::AdditionalFilesNotSorted)?;
+        }
+
+        Ok(Self {
+            inputs,
+            command_args: value.command_args,
+            working_dir: PathBuf::from(value.working_dir),
+            scratch_paths: value.scratch_paths.iter().map(PathBuf::from).collect(),
+            inputs_dir: PathBuf::from(value.inputs_dir),
+            outputs: value.outputs.iter().map(PathBuf::from).collect(),
+            environment_vars: value.environment_vars.into_iter().map(Into::into).collect(),
+            constraints,
+            additional_files: value.additional_files.into_iter().map(Into::into).collect(),
+            refscan_needles: value.refscan_needles,
+        })
+    }
+}
+
 /// Errors that occur during the validation of
 /// [build_request::BuildConstraints] messages.
 #[derive(Debug, thiserror::Error)]
@@ -235,7 +331,90 @@ impl build_request::BuildConstraints {
     }
 }
 
+impl From<build_request::EnvVar> for crate::buildservice::EnvVar {
+    fn from(value: build_request::EnvVar) -> Self {
+        Self {
+            key: value.key,
+            value: value.value,
+        }
+    }
+}
+
+impl From<crate::buildservice::EnvVar> for build_request::EnvVar {
+    fn from(value: crate::buildservice::EnvVar) -> Self {
+        Self {
+            key: value.key,
+            value: value.value,
+        }
+    }
+}
+
+impl From<build_request::AdditionalFile> for crate::buildservice::AdditionalFile {
+    fn from(value: build_request::AdditionalFile) -> Self {
+        Self {
+            path: PathBuf::from(value.path),
+            contents: value.contents,
+        }
+    }
+}
+
+impl From<crate::buildservice::AdditionalFile> for build_request::AdditionalFile {
+    fn from(value: crate::buildservice::AdditionalFile) -> Self {
+        Self {
+            path: value
+                .path
+                .to_str()
+                .expect("Tvix bug: expected a valid path")
+                .to_string(),
+            contents: value.contents,
+        }
+    }
+}
+
+impl TryFrom<build_request::BuildConstraints> for HashSet<crate::buildservice::BuildConstraints> {
+    type Error = ValidateBuildConstraintsError;
+    fn try_from(value: build_request::BuildConstraints) -> Result<Self, Self::Error> {
+        use crate::buildservice::BuildConstraints;
+
+        // validate system
+        if value.system.is_empty() {
+            Err(ValidateBuildConstraintsError::InvalidSystem)?;
+        }
+
+        let mut build_constraints = HashSet::from([
+            BuildConstraints::System(value.system),
+            BuildConstraints::MinMemory(value.min_memory),
+        ]);
+
+        // validate available_ro_paths
+        for (i, p) in value.available_ro_paths.iter().enumerate() {
+            if !is_clean_absolute_path(p) {
+                Err(ValidateBuildConstraintsError::InvalidAvailableRoPaths(i))?
+            } else {
+                build_constraints.insert(BuildConstraints::AvailableReadOnlyPath(PathBuf::from(p)));
+            }
+        }
+        if !is_sorted(value.available_ro_paths.iter().map(|e| e.as_bytes())) {
+            Err(ValidateBuildConstraintsError::AvailableRoPathsNotSorted)?;
+        }
+
+        if value.network_access {
+            build_constraints.insert(BuildConstraints::NetworkAccess);
+        }
+        if value.provide_bin_sh {
+            build_constraints.insert(BuildConstraints::ProvideBinSh);
+        }
+
+        Ok(build_constraints)
+    }
+}
+
 #[cfg(test)]
+// TODO: add testcases for constraints special cases. The default cases in the protos
+// should result in the constraints not being added. For example min_memory 0 can be omitted.
+// Also interesting testcases are "merging semantics". MimMemory(1) and MinMemory(100) will
+// result in mim_memory 100, multiple AvailableReadOnlyPaths need to be merged. Contradicting
+// system constraints need to fail somewhere (maybe an assertion, as only buggy code can construct it)
 mod tests {
     use super::{is_clean_path, is_clean_relative_path};
     use rstest::rstest;