about summary refs log tree commit diff
path: root/tvix/nix-compat/src/derivation/mod.rs
blob: 6baeaba38299fc421a0dd7f61b0e8086073a3b58 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
use crate::store_path::{
    self, build_ca_path, build_output_path, build_text_path, StorePath, StorePathRef,
};
use bstr::BString;
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use std::collections::{BTreeMap, BTreeSet};
use std::io;

mod errors;
mod output;
mod parse_error;
mod parser;
mod validate;
mod write;

#[cfg(test)]
mod tests;

// Public API of the crate.
pub use crate::nixhash::{CAHash, NixHash};
pub use errors::{DerivationError, OutputError};
pub use output::Output;

use self::write::AtermWriteable;

#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
pub struct Derivation {
    #[serde(rename = "args")]
    pub arguments: Vec<String>,

    pub builder: String,

    #[serde(rename = "env")]
    pub environment: BTreeMap<String, BString>,

    /// Map from drv path to output names used from this derivation.
    #[serde(rename = "inputDrvs")]
    pub input_derivations: BTreeMap<StorePath<String>, BTreeSet<String>>,

    /// Plain store paths of additional inputs.
    #[serde(rename = "inputSrcs")]
    pub input_sources: BTreeSet<StorePath<String>>,

    /// Maps output names to Output.
    pub outputs: BTreeMap<String, Output>,

    pub system: String,
}

impl Derivation {
    /// write the Derivation to the given [std::io::Write], in ATerm format.
    ///
    /// The only errors returns are these when writing to the passed writer.
    pub fn serialize(&self, writer: &mut impl std::io::Write) -> Result<(), io::Error> {
        self.serialize_with_replacements(writer, &self.input_derivations)
    }

    /// Like `serialize` but allow replacing the input_derivations for hash calculations.
    fn serialize_with_replacements(
        &self,
        writer: &mut impl std::io::Write,
        input_derivations: &BTreeMap<impl AtermWriteable, BTreeSet<String>>,
    ) -> Result<(), io::Error> {
        use write::*;

        writer.write_all(write::DERIVATION_PREFIX.as_bytes())?;
        write_char(writer, write::PAREN_OPEN)?;

        write_outputs(writer, &self.outputs)?;
        write_char(writer, COMMA)?;

        write_input_derivations(writer, input_derivations)?;
        write_char(writer, COMMA)?;

        write_input_sources(writer, &self.input_sources)?;
        write_char(writer, COMMA)?;

        write_system(writer, &self.system)?;
        write_char(writer, COMMA)?;

        write_builder(writer, &self.builder)?;
        write_char(writer, COMMA)?;

        write_arguments(writer, &self.arguments)?;
        write_char(writer, COMMA)?;

        write_environment(writer, &self.environment)?;

        write_char(writer, PAREN_CLOSE)?;

        Ok(())
    }

    /// return the ATerm serialization.
    pub fn to_aterm_bytes(&self) -> Vec<u8> {
        self.to_aterm_bytes_with_replacements(&self.input_derivations)
    }

    /// Like `to_aterm_bytes`, but accept a different BTreeMap for input_derivations.
    /// This is used to render the ATerm representation of a Derivation "modulo
    /// fixed-output derivations".
    fn to_aterm_bytes_with_replacements(
        &self,
        input_derivations: &BTreeMap<impl AtermWriteable, BTreeSet<String>>,
    ) -> Vec<u8> {
        let mut buffer: Vec<u8> = Vec::new();

        // invoke serialize and write to the buffer.
        // Note we only propagate errors writing to the writer in serialize,
        // which won't panic for the string we write to.
        self.serialize_with_replacements(&mut buffer, input_derivations)
            .unwrap();

        buffer
    }

    /// Parse an Derivation in ATerm serialization, and validate it passes our
    /// set of validations.
    pub fn from_aterm_bytes(b: &[u8]) -> Result<Derivation, parser::Error<&[u8]>> {
        parser::parse(b)
    }

    /// Returns the drv path of a [Derivation] struct.
    ///
    /// The drv path is calculated by invoking [build_text_path], using
    /// the `name` with a `.drv` suffix as name, all [Derivation::input_sources] and
    /// keys of [Derivation::input_derivations] as references, and the ATerm string of
    /// the [Derivation] as content.
    pub fn calculate_derivation_path(
        &self,
        name: &str,
    ) -> Result<StorePath<String>, DerivationError> {
        // append .drv to the name
        let name = &format!("{}.drv", name);

        // collect the list of paths from input_sources and input_derivations
        // into a (sorted, guaranteed by BTreeSet) list of references
        let references: BTreeSet<String> = self
            .input_sources
            .iter()
            .chain(self.input_derivations.keys())
            .map(StorePath::to_absolute_path)
            .collect();

        build_text_path(name, self.to_aterm_bytes(), references)
            .map_err(|_e| DerivationError::InvalidOutputName(name.to_string()))
    }

    /// Returns the FOD digest, if the derivation is fixed-output, or None if
    /// it's not.
    /// TODO: this is kinda the string from [build_ca_path] with a
    /// [CAHash::Flat], what's fed to `build_store_path_from_fingerprint_parts`
    /// (except the out_output.path being an empty string)
    pub fn fod_digest(&self) -> Option<[u8; 32]> {
        if self.outputs.len() != 1 {
            return None;
        }

        let out_output = self.outputs.get("out")?;
        let ca_hash = &out_output.ca_hash.as_ref()?;

        Some(
            Sha256::new_with_prefix(format!(
                "fixed:out:{}{}:{}",
                ca_kind_prefix(ca_hash),
                ca_hash.hash().to_nix_hex_string(),
                out_output
                    .path
                    .as_ref()
                    .map(StorePath::to_absolute_path)
                    .as_ref()
                    .map(|s| s as &str)
                    .unwrap_or(""),
            ))
            .finalize()
            .into(),
        )
    }

    /// Calculates the hash of a derivation modulo fixed-output subderivations.
    ///
    /// This is called `hashDerivationModulo` in nixcpp.
    ///
    /// It returns the sha256 digest of the derivation ATerm representation,
    /// except that:
    ///  -  any input derivation paths have beed replaced "by the result of a
    ///     recursive call to this function" and that
    ///  - for fixed-output derivations the special
    ///    `fixed:out:${algo}:${digest}:${fodPath}` string is hashed instead of
    ///    the A-Term.
    ///
    /// It's up to the caller of this function to provide a (infallible) lookup
    /// function to query [hash_derivation_modulo] of direct input derivations,
    /// by their [StorePathRef].
    /// It will only be called in case the derivation is not a fixed-output
    /// derivation.
    pub fn hash_derivation_modulo<F>(&self, fn_lookup_hash_derivation_modulo: F) -> [u8; 32]
    where
        F: Fn(&StorePathRef) -> [u8; 32],
    {
        // Fixed-output derivations return a fixed hash.
        // Non-Fixed-output derivations return the sha256 digest of the ATerm
        // notation, but with all input_derivation paths replaced by a recursive
        // call to this function.
        // We call [fn_lookup_hash_derivation_modulo] rather than recursing
        // ourselves, so callers can precompute this.
        self.fod_digest().unwrap_or({
            // For each input_derivation, look up the hash derivation modulo,
            // and replace the derivation path in the aterm with it's HEXLOWER digest.
            let aterm_bytes = self.to_aterm_bytes_with_replacements(&BTreeMap::from_iter(
                self.input_derivations
                    .iter()
                    .map(|(drv_path, output_names)| {
                        let hash = fn_lookup_hash_derivation_modulo(&drv_path.as_ref());

                        (hash, output_names.to_owned())
                    }),
            ));

            // write the ATerm of that to the hash function and return its digest.
            Sha256::new_with_prefix(aterm_bytes).finalize().into()
        })
    }

    /// This calculates all output paths of a Derivation and updates the struct.
    /// It requires the struct to be initially without output paths.
    /// This means, self.outputs[$outputName].path needs to be an empty string,
    /// and self.environment[$outputName] needs to be an empty string.
    ///
    /// Output path calculation requires knowledge of the
    /// [hash_derivation_modulo], which (in case of non-fixed-output
    /// derivations) also requires knowledge of the [hash_derivation_modulo] of
    /// input derivations (recursively).
    ///
    /// To avoid recursing and doing unnecessary calculation, we simply
    /// ask the caller of this function to provide the result of the
    /// [hash_derivation_modulo] call of the current [Derivation],
    /// and leave it up to them to calculate it when needed.
    ///
    /// On completion, `self.environment[$outputName]` and
    /// `self.outputs[$outputName].path` are set to the calculated output path for all
    /// outputs.
    pub fn calculate_output_paths(
        &mut self,
        name: &str,
        hash_derivation_modulo: &[u8; 32],
    ) -> Result<(), DerivationError> {
        // The fingerprint and hash differs per output
        for (output_name, output) in self.outputs.iter_mut() {
            // Assert that outputs are not yet populated, to avoid using this function wrongly.
            // We don't also go over self.environment, but it's a sufficient
            // footgun prevention mechanism.
            assert!(output.path.is_none());

            let path_name = output_path_name(name, output_name);

            // For fixed output derivation we use [build_ca_path], otherwise we
            // use [build_output_path] with [hash_derivation_modulo].
            let abs_store_path = if let Some(ref hwm) = output.ca_hash {
                build_ca_path(&path_name, hwm, Vec::<String>::new(), false).map_err(|e| {
                    DerivationError::InvalidOutputDerivationPath(output_name.to_string(), e)
                })?
            } else {
                build_output_path(hash_derivation_modulo, output_name, &path_name).map_err(|e| {
                    DerivationError::InvalidOutputDerivationPath(
                        output_name.to_string(),
                        store_path::BuildStorePathError::InvalidStorePath(e),
                    )
                })?
            };

            output.path = Some(abs_store_path.to_owned());
            self.environment.insert(
                output_name.to_string(),
                abs_store_path.to_absolute_path().into(),
            );
        }

        Ok(())
    }
}

/// Calculate the name part of the store path of a derivation [Output].
///
/// It's the name, and (if it's the non-out output), the output name
/// after a `-`.
fn output_path_name(derivation_name: &str, output_name: &str) -> String {
    let mut output_path_name = derivation_name.to_string();
    if output_name != "out" {
        output_path_name.push('-');
        output_path_name.push_str(output_name);
    }
    output_path_name
}

/// For a [CAHash], return the "prefix" used for NAR purposes.
/// For [CAHash::Flat], this is an empty string, for [CAHash::Nar], it's "r:".
/// Panics for other [CAHash] kinds, as they're not valid in a derivation
/// context.
fn ca_kind_prefix(ca_hash: &CAHash) -> &'static str {
    match ca_hash {
        CAHash::Flat(_) => "",
        CAHash::Nar(_) => "r:",
        _ => panic!("invalid ca hash in derivation context: {:?}", ca_hash),
    }
}