use super::PathInfoService; use crate::{nar::ingest_nar_and_hash, proto::PathInfo}; use futures::{stream::BoxStream, TryStreamExt}; use nix_compat::{ narinfo::{self, NarInfo}, nixbase32, nixhash::NixHash, }; use reqwest::StatusCode; use std::sync::Arc; use tokio::io::{self, AsyncRead}; use tonic::async_trait; use tracing::{debug, instrument, warn}; use tvix_castore::composition::{CompositionContext, ServiceBuilder}; use tvix_castore::{ blobservice::BlobService, directoryservice::DirectoryService, proto as castorepb, Error, }; use url::Url; /// NixHTTPPathInfoService acts as a bridge in between the Nix HTTP Binary cache /// protocol provided by Nix binary caches such as cache.nixos.org, and the Tvix /// Store Model. /// It implements the [PathInfoService] trait in an interesting way: /// Every [PathInfoService::get] fetches the .narinfo and referred NAR file, /// inserting components into a [BlobService] and [DirectoryService], then /// returning a [PathInfo] struct with the root. /// /// Due to this being quite a costly operation, clients are expected to layer /// this service with store composition, so they're only ingested once. /// /// The client is expected to be (indirectly) using the same [BlobService] and /// [DirectoryService], so able to fetch referred Directories and Blobs. /// [PathInfoService::put] is not implemented and returns an error if called. /// TODO: what about reading from nix-cache-info? pub struct NixHTTPPathInfoService<BS, DS> { base_url: url::Url, http_client: reqwest_middleware::ClientWithMiddleware, blob_service: BS, directory_service: DS, /// An optional list of [narinfo::PubKey]. /// If set, the .narinfo files received need to have correct signature by at least one of these. public_keys: Option<Vec<narinfo::VerifyingKey>>, } impl<BS, DS> NixHTTPPathInfoService<BS, DS> { pub fn new(base_url: url::Url, blob_service: BS, directory_service: DS) -> Self { Self { base_url, http_client: reqwest_middleware::ClientBuilder::new(reqwest::Client::new()) .with(tvix_tracing::propagate::reqwest::tracing_middleware()) .build(), blob_service, directory_service, public_keys: None, } } /// Configures [Self] to validate NARInfo fingerprints with the public keys passed. pub fn set_public_keys(&mut self, public_keys: Vec<narinfo::VerifyingKey>) { self.public_keys = Some(public_keys); } } #[async_trait] impl<BS, DS> PathInfoService for NixHTTPPathInfoService<BS, DS> where BS: AsRef<dyn BlobService> + Send + Sync + Clone + 'static, DS: AsRef<dyn DirectoryService> + Send + Sync + Clone + 'static, { #[instrument(skip_all, err, fields(path.digest=nixbase32::encode(&digest)))] async fn get(&self, digest: [u8; 20]) -> Result<Option<PathInfo>, Error> { let narinfo_url = self .base_url .join(&format!("{}.narinfo", nixbase32::encode(&digest))) .map_err(|e| { warn!(e = %e, "unable to join URL"); io::Error::new(io::ErrorKind::InvalidInput, "unable to join url") })?; debug!(narinfo_url= %narinfo_url, "constructed NARInfo url"); let resp = self .http_client .get(narinfo_url) .send() .await .map_err(|e| { warn!(e=%e,"unable to send NARInfo request"); io::Error::new( io::ErrorKind::InvalidInput, "unable to send NARInfo request", ) })?; // In the case of a 404, return a NotFound. // We also return a NotFound in case of a 403 - this is to match the behaviour as Nix, // when querying nix-cache.s3.amazonaws.com directly, rather than cache.nixos.org. if resp.status() == StatusCode::NOT_FOUND || resp.status() == StatusCode::FORBIDDEN { return Ok(None); } let narinfo_str = resp.text().await.map_err(|e| { warn!(e=%e,"unable to decode response as string"); io::Error::new( io::ErrorKind::InvalidData, "unable to decode response as string", ) })?; // parse the received narinfo let narinfo = NarInfo::parse(&narinfo_str).map_err(|e| { warn!(e=%e,"unable to parse response as NarInfo"); io::Error::new( io::ErrorKind::InvalidData, "unable to parse response as NarInfo", ) })?; // if [self.public_keys] is set, ensure there's at least one valid signature. if let Some(public_keys) = &self.public_keys { let fingerprint = narinfo.fingerprint(); if !public_keys.iter().any(|pubkey| { narinfo .signatures .iter() .any(|sig| pubkey.verify(&fingerprint, sig)) }) { warn!("no valid signature found"); Err(io::Error::new( io::ErrorKind::InvalidData, "no valid signature found", ))?; } } // Convert to a (sparse) PathInfo. We still need to populate the node field, // and for this we need to download the NAR file. // FUTUREWORK: Keep some database around mapping from narsha256 to // (unnamed) rootnode, so we can use that (and the name from the // StorePath) and avoid downloading the same NAR a second time. let pathinfo: PathInfo = (&narinfo).into(); // create a request for the NAR file itself. let nar_url = self.base_url.join(narinfo.url).map_err(|e| { warn!(e = %e, "unable to join URL"); io::Error::new(io::ErrorKind::InvalidInput, "unable to join url") })?; debug!(nar_url= %nar_url, "constructed NAR url"); let resp = self .http_client .get(nar_url.clone()) .send() .await .map_err(|e| { warn!(e=%e,"unable to send NAR request"); io::Error::new(io::ErrorKind::InvalidInput, "unable to send NAR request") })?; // if the request is not successful, return an error. if !resp.status().is_success() { return Err(Error::StorageError(format!( "unable to retrieve NAR at {}, status {}", nar_url, resp.status() ))); } // get a reader of the response body. let r = tokio_util::io::StreamReader::new(resp.bytes_stream().map_err(|e| { let e = e.without_url(); warn!(e=%e, "failed to get response body"); io::Error::new(io::ErrorKind::BrokenPipe, e.to_string()) })); // handle decompression, depending on the compression field. let mut r: Box<dyn AsyncRead + Send + Unpin> = match narinfo.compression { None => Box::new(r) as Box<dyn AsyncRead + Send + Unpin>, Some("bzip2") => Box::new(async_compression::tokio::bufread::BzDecoder::new(r)) as Box<dyn AsyncRead + Send + Unpin>, Some("gzip") => Box::new(async_compression::tokio::bufread::GzipDecoder::new(r)) as Box<dyn AsyncRead + Send + Unpin>, Some("xz") => Box::new(async_compression::tokio::bufread::XzDecoder::new(r)) as Box<dyn AsyncRead + Send + Unpin>, Some("zstd") => Box::new(async_compression::tokio::bufread::ZstdDecoder::new(r)) as Box<dyn AsyncRead + Send + Unpin>, Some(comp_str) => { return Err(Error::StorageError(format!( "unsupported compression: {comp_str}" ))); } }; let (root_node, nar_hash, nar_size) = ingest_nar_and_hash( self.blob_service.clone(), self.directory_service.clone(), &mut r, ) .await .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; // ensure the ingested narhash and narsize do actually match. if narinfo.nar_size != nar_size { warn!( narinfo.nar_size = narinfo.nar_size, http.nar_size = nar_size, "NarSize mismatch" ); Err(io::Error::new( io::ErrorKind::InvalidData, "NarSize mismatch".to_string(), ))?; } if narinfo.nar_hash != nar_hash { warn!( narinfo.nar_hash = %NixHash::Sha256(narinfo.nar_hash), http.nar_hash = %NixHash::Sha256(nar_hash), "NarHash mismatch" ); Err(io::Error::new( io::ErrorKind::InvalidData, "NarHash mismatch".to_string(), ))?; } Ok(Some(PathInfo { node: Some(castorepb::Node { // set the name of the root node to the digest-name of the store path. node: Some(root_node.rename(narinfo.store_path.to_string().to_owned().into())), }), references: pathinfo.references, narinfo: pathinfo.narinfo, })) } #[instrument(skip_all, fields(path_info=?_path_info))] async fn put(&self, _path_info: PathInfo) -> Result<PathInfo, Error> { Err(Error::InvalidRequest( "put not supported for this backend".to_string(), )) } fn list(&self) -> BoxStream<'static, Result<PathInfo, Error>> { Box::pin(futures::stream::once(async { Err(Error::InvalidRequest( "list not supported for this backend".to_string(), )) })) } } #[derive(serde::Deserialize)] pub struct NixHTTPPathInfoServiceConfig { base_url: String, blob_service: String, directory_service: String, #[serde(default)] /// An optional list of [narinfo::PubKey]. /// If set, the .narinfo files received need to have correct signature by at least one of these. public_keys: Option<Vec<String>>, } impl TryFrom<Url> for NixHTTPPathInfoServiceConfig { type Error = Box<dyn std::error::Error + Send + Sync>; fn try_from(url: Url) -> Result<Self, Self::Error> { let mut public_keys: Option<Vec<String>> = None; for (_, v) in url .query_pairs() .into_iter() .filter(|(k, _)| k == "trusted-public-keys") { public_keys .get_or_insert(Default::default()) .extend(v.split_ascii_whitespace().map(ToString::to_string)); } Ok(NixHTTPPathInfoServiceConfig { // Stringify the URL and remove the nix+ prefix. // We can't use `url.set_scheme(rest)`, as it disallows // setting something http(s) that previously wasn't. base_url: url.to_string().strip_prefix("nix+").unwrap().to_string(), blob_service: "default".to_string(), directory_service: "default".to_string(), public_keys, }) } } #[async_trait] impl ServiceBuilder for NixHTTPPathInfoServiceConfig { type Output = dyn PathInfoService; async fn build<'a>( &'a self, _instance_name: &str, context: &CompositionContext, ) -> Result<Arc<dyn PathInfoService>, Box<dyn std::error::Error + Send + Sync + 'static>> { let (blob_service, directory_service) = futures::join!( context.resolve(self.blob_service.clone()), context.resolve(self.directory_service.clone()) ); let mut svc = NixHTTPPathInfoService::new( Url::parse(&self.base_url)?, blob_service?, directory_service?, ); if let Some(public_keys) = &self.public_keys { svc.set_public_keys( public_keys .iter() .map(|pubkey_str| { narinfo::VerifyingKey::parse(pubkey_str) .map_err(|e| Error::StorageError(format!("invalid public key: {e}"))) }) .collect::<Result<Vec<_>, Error>>()?, ); } Ok(Arc::new(svc)) } }