diff options
author | Marijan Petričević <marijan.petricevic94@gmail.com> | 2024-10-14T20·04-0500 |
---|---|---|
committer | Marijan Petričević <marijan.petricevic94@gmail.com> | 2024-10-18T15·45+0000 |
commit | a247b2509771b03f50568307263bb05b3af825ed (patch) | |
tree | da2ec6ca87b61cbc2bd1ca333278400aa69ab0ca | |
parent | 1c1eb686784cf26667ef5e09e5339710c642f23e (diff) |
refactor(tvix/build): add stricter BuildRequest type r/8830
Change-Id: I2950c76bbc2227952e583426bfb3ed34e8da6d2d Reviewed-on: https://cl.tvl.fyi/c/depot/+/12625 Reviewed-by: flokli <flokli@flokli.de> Tested-by: BuildkiteCI
-rw-r--r-- | tvix/build/src/buildservice/build_request.rs | 131 | ||||
-rw-r--r-- | tvix/build/src/buildservice/mod.rs | 6 | ||||
-rw-r--r-- | tvix/build/src/proto/mod.rs | 181 |
3 files changed, 315 insertions, 3 deletions
diff --git a/tvix/build/src/buildservice/build_request.rs b/tvix/build/src/buildservice/build_request.rs new file mode 100644 index 000000000000..a93fef5b393c --- /dev/null +++ b/tvix/build/src/buildservice/build_request.rs @@ -0,0 +1,131 @@ +use std::collections::{BTreeMap, HashSet}; +use std::path::PathBuf; + +use bytes::Bytes; +use tvix_castore::{Node, PathComponent}; +/// A BuildRequest describes the request of something to be run on the builder. +/// It is distinct from an actual \[Build\] that has already happened, or might be +/// currently ongoing. +/// +/// A BuildRequest can be seen as a more normalized version of a Derivation +/// (parsed from A-Term), "writing out" some of the Nix-internal details about +/// how e.g. environment variables in the build are set. +/// +/// Nix has some impurities when building a Derivation, for example the --cores option +/// ends up as an environment variable in the build, that's not part of the ATerm. +/// +/// As of now, we serialize this into the BuildRequest, so builders can stay dumb. +/// This might change in the future. +/// +/// There's also a big difference when it comes to how inputs are modelled: +/// +/// * Nix only uses store path (strings) to describe the inputs. +/// As store paths can be input-addressed, a certain store path can contain +/// different contents (as not all store paths are binary reproducible). +/// This requires that for every input-addressed input, the builder has access +/// to either the input's deriver (and needs to build it) or else a trusted +/// source for the built input. +/// to upload input-addressed paths, requiring the trusted users concept. +/// * tvix-build records a list of tvix.castore.v1.Node as inputs. +/// These map from the store path base name to their contents, relieving the +/// builder from having to "trust" any input-addressed paths, contrary to Nix. +/// +/// While this approach gives a better hermeticity, it has one downside: +/// A BuildRequest can only be sent once the contents of all its inputs are known. +/// +/// As of now, we're okay to accept this, but it prevents uploading an +/// entirely-non-IFD subgraph of BuildRequests eagerly. +#[derive(Clone, PartialEq)] +pub struct BuildRequest { + /// The list of all root nodes that should be visible in `inputs_dir` at the + /// time of the build. + /// As all references are content-addressed, no additional signatures are + /// needed to substitute / make these available in the build environment. + pub inputs: BTreeMap<PathComponent, Node>, + /// The command (and its args) executed as the build script. + /// In the case of a Nix derivation, this is usually + /// \["/path/to/some-bash/bin/bash", "-e", "/path/to/some/builder.sh"\]. + pub command_args: Vec<String>, + /// The working dir of the command, relative to the build root. + /// "build", in the case of Nix. + /// This MUST be a clean relative path, without any ".", "..", or superfluous + /// slashes. + pub working_dir: PathBuf, + /// A list of "scratch" paths, relative to the build root. + /// These will be write-able during the build. + /// \[build, nix/store\] in the case of Nix. + /// These MUST be clean relative paths, without any ".", "..", or superfluous + /// slashes, and sorted. + pub scratch_paths: Vec<PathBuf>, + /// The path where the castore input nodes will be located at, + /// "nix/store" in case of Nix. + /// Builds might also write into here (Nix builds do that). + /// This MUST be a clean relative path, without any ".", "..", or superfluous + /// slashes. + pub inputs_dir: PathBuf, + /// The list of output paths the build is expected to produce, + /// relative to the root. + /// If the path is not produced, the build is considered to have failed. + /// These MUST be clean relative paths, without any ".", "..", or superfluous + /// slashes, and sorted. + pub outputs: Vec<PathBuf>, + /// The list of environment variables and their values that should be set + /// inside the build environment. + /// This includes both environment vars set inside the derivation, as well as + /// more "ephemeral" ones like NIX_BUILD_CORES, controlled by the `--cores` + /// CLI option of `nix-build`. + /// For now, we consume this as an option when turning a Derivation into a BuildRequest, + /// similar to how Nix has a `--cores` option. + /// We don't want to bleed these very nix-specific sandbox impl details into + /// (dumber) builders if we don't have to. + /// Environment variables are sorted by their keys. + pub environment_vars: Vec<EnvVar>, + /// A set of constraints that need to be satisfied on a build host before a + /// Build can be started. + pub constraints: HashSet<BuildConstraints>, + /// Additional (small) files and their contents that should be placed into the + /// build environment, but outside inputs_dir. + /// Used for passAsFile and structuredAttrs in Nix. + pub additional_files: Vec<AdditionalFile>, + /// If this is an non-empty list, all paths in `outputs` are scanned for these. + /// For Nix, `refscan_needles` would be populated with the nixbase32 hash parts of + /// every input store path and output store path. The latter is necessary to scan + /// for references between multi-output derivations. + pub refscan_needles: Vec<String>, +} + +#[derive(Clone, PartialEq)] +pub struct EnvVar { + /// name of the environment variable. Must not contain =. + pub key: String, + pub value: Bytes, +} +/// BuildConstraints represents certain conditions that must be fulfilled +/// inside the build environment to be able to build this. +/// Constraints can be things like required architecture and minimum amount of memory. +/// The required input paths are *not* represented in here, because it +/// wouldn't be hermetic enough - see the comment around inputs too. +#[derive(Clone, PartialEq, Eq, Hash)] +pub enum BuildConstraints { + /// The system that's needed to execute the build. + /// Must not be empty. + System(String), + /// The amount of memory required to be available for the build, in bytes. + MinMemory(u64), + /// An absolute path that need to be available in the build + /// environment, like `/dev/kvm`. + /// This is distinct from the castore nodes in inputs. + /// These MUST be clean absolute paths, without any ".", "..", or superfluous + /// slashes, and sorted. + AvailableReadOnlyPath(PathBuf), + /// Whether the build should be able to access the network. + NetworkAccess, + /// Whether to provide a /bin/sh inside the build environment, usually a static bash. + ProvideBinSh, +} + +#[derive(Clone, PartialEq)] +pub struct AdditionalFile { + pub path: PathBuf, + pub contents: Bytes, +} diff --git a/tvix/build/src/buildservice/mod.rs b/tvix/build/src/buildservice/mod.rs index cdc3cb2afcc3..c3c2fd6c5715 100644 --- a/tvix/build/src/buildservice/mod.rs +++ b/tvix/build/src/buildservice/mod.rs @@ -1,7 +1,9 @@ use tonic::async_trait; -use crate::proto::{Build, BuildRequest}; +use crate::proto::{self, Build}; +pub mod build_request; +pub use crate::buildservice::build_request::*; mod dummy; mod from_addr; mod grpc; @@ -15,5 +17,5 @@ pub use from_addr::from_addr; #[async_trait] pub trait BuildService: Send + Sync { /// TODO: document - async fn do_build(&self, request: BuildRequest) -> std::io::Result<Build>; + async fn do_build(&self, request: proto::BuildRequest) -> std::io::Result<Build>; } diff --git a/tvix/build/src/proto/mod.rs b/tvix/build/src/proto/mod.rs index b36049d05b9d..97d56c6e625d 100644 --- a/tvix/build/src/proto/mod.rs +++ b/tvix/build/src/proto/mod.rs @@ -1,7 +1,8 @@ +use std::collections::{BTreeMap, HashSet}; use std::path::{Path, PathBuf}; use itertools::Itertools; -use tvix_castore::DirectoryError; +use tvix_castore::{DirectoryError, Node, PathComponent}; mod grpc_buildservice_wrapper; @@ -201,6 +202,101 @@ impl BuildRequest { } } +impl TryFrom<BuildRequest> for crate::buildservice::BuildRequest { + type Error = ValidateBuildRequestError; + fn try_from(value: BuildRequest) -> Result<Self, Self::Error> { + // validate input names. Make sure they're sorted + + let mut last_name: bytes::Bytes = "".into(); + let mut inputs: BTreeMap<PathComponent, Node> = BTreeMap::new(); + for (i, node) in value.inputs.iter().enumerate() { + let (name, node) = node + .clone() + .into_name_and_node() + .map_err(|e| ValidateBuildRequestError::InvalidInputNode(i, e))?; + + if name.as_ref() <= last_name.as_ref() { + return Err(ValidateBuildRequestError::InputNodesNotSorted); + } else { + inputs.insert(name.clone(), node); + last_name = name.into(); + } + } + + // validate working_dir + if !is_clean_relative_path(&value.working_dir) { + Err(ValidateBuildRequestError::InvalidWorkingDir)?; + } + + // validate scratch paths + for (i, p) in value.scratch_paths.iter().enumerate() { + if !is_clean_relative_path(p) { + Err(ValidateBuildRequestError::InvalidScratchPath(i))? + } + } + if !is_sorted(value.scratch_paths.iter().map(|e| e.as_bytes())) { + Err(ValidateBuildRequestError::ScratchPathsNotSorted)?; + } + + // validate inputs_dir + if !is_clean_relative_path(&value.inputs_dir) { + Err(ValidateBuildRequestError::InvalidInputsDir)?; + } + + // validate outputs + for (i, p) in value.outputs.iter().enumerate() { + if !is_clean_relative_path(p) { + Err(ValidateBuildRequestError::InvalidOutputPath(i))? + } + } + if !is_sorted(value.outputs.iter().map(|e| e.as_bytes())) { + Err(ValidateBuildRequestError::OutputsNotSorted)?; + } + + // validate environment_vars. + for (i, e) in value.environment_vars.iter().enumerate() { + if e.key.is_empty() || e.key.contains('=') { + Err(ValidateBuildRequestError::InvalidEnvVar(i))? + } + } + if !is_sorted(value.environment_vars.iter().map(|e| e.key.as_bytes())) { + Err(ValidateBuildRequestError::EnvVarNotSorted)?; + } + + // validate build constraints + let constraints = value + .constraints + .map_or(Ok(HashSet::new()), |constraints| { + constraints + .try_into() + .map_err(ValidateBuildRequestError::InvalidBuildConstraints) + })?; + + // validate additional_files + for (i, additional_file) in value.additional_files.iter().enumerate() { + if !is_clean_relative_path(&additional_file.path) { + Err(ValidateBuildRequestError::InvalidAdditionalFilePath(i))? + } + } + if !is_sorted(value.additional_files.iter().map(|e| e.path.as_bytes())) { + Err(ValidateBuildRequestError::AdditionalFilesNotSorted)?; + } + + Ok(Self { + inputs, + command_args: value.command_args, + working_dir: PathBuf::from(value.working_dir), + scratch_paths: value.scratch_paths.iter().map(PathBuf::from).collect(), + inputs_dir: PathBuf::from(value.inputs_dir), + outputs: value.outputs.iter().map(PathBuf::from).collect(), + environment_vars: value.environment_vars.into_iter().map(Into::into).collect(), + constraints, + additional_files: value.additional_files.into_iter().map(Into::into).collect(), + refscan_needles: value.refscan_needles, + }) + } +} + /// Errors that occur during the validation of /// [build_request::BuildConstraints] messages. #[derive(Debug, thiserror::Error)] @@ -235,7 +331,90 @@ impl build_request::BuildConstraints { } } +impl From<build_request::EnvVar> for crate::buildservice::EnvVar { + fn from(value: build_request::EnvVar) -> Self { + Self { + key: value.key, + value: value.value, + } + } +} + +impl From<crate::buildservice::EnvVar> for build_request::EnvVar { + fn from(value: crate::buildservice::EnvVar) -> Self { + Self { + key: value.key, + value: value.value, + } + } +} + +impl From<build_request::AdditionalFile> for crate::buildservice::AdditionalFile { + fn from(value: build_request::AdditionalFile) -> Self { + Self { + path: PathBuf::from(value.path), + contents: value.contents, + } + } +} + +impl From<crate::buildservice::AdditionalFile> for build_request::AdditionalFile { + fn from(value: crate::buildservice::AdditionalFile) -> Self { + Self { + path: value + .path + .to_str() + .expect("Tvix bug: expected a valid path") + .to_string(), + contents: value.contents, + } + } +} + +impl TryFrom<build_request::BuildConstraints> for HashSet<crate::buildservice::BuildConstraints> { + type Error = ValidateBuildConstraintsError; + fn try_from(value: build_request::BuildConstraints) -> Result<Self, Self::Error> { + use crate::buildservice::BuildConstraints; + + // validate system + if value.system.is_empty() { + Err(ValidateBuildConstraintsError::InvalidSystem)?; + } + + let mut build_constraints = HashSet::from([ + BuildConstraints::System(value.system), + BuildConstraints::MinMemory(value.min_memory), + ]); + + // validate available_ro_paths + for (i, p) in value.available_ro_paths.iter().enumerate() { + if !is_clean_absolute_path(p) { + Err(ValidateBuildConstraintsError::InvalidAvailableRoPaths(i))? + } else { + build_constraints.insert(BuildConstraints::AvailableReadOnlyPath(PathBuf::from(p))); + } + } + if !is_sorted(value.available_ro_paths.iter().map(|e| e.as_bytes())) { + Err(ValidateBuildConstraintsError::AvailableRoPathsNotSorted)?; + } + + if value.network_access { + build_constraints.insert(BuildConstraints::NetworkAccess); + } + if value.provide_bin_sh { + build_constraints.insert(BuildConstraints::ProvideBinSh); + } + + Ok(build_constraints) + } +} + #[cfg(test)] +// TODO: add testcases for constraints special cases. The default cases in the protos +// should result in the constraints not being added. For example min_memory 0 can be omitted. +// Also interesting testcases are "merging semantics". MimMemory(1) and MinMemory(100) will +// result in mim_memory 100, multiple AvailableReadOnlyPaths need to be merged. Contradicting +// system constraints need to fail somewhere (maybe an assertion, as only buggy code can construct it) mod tests { use super::{is_clean_path, is_clean_relative_path}; use rstest::rstest; |