diff options
author | Connor Brewster <cbrewster@hey.com> | 2023-09-16T18·58-0500 |
---|---|---|
committer | Connor Brewster <cbrewster@hey.com> | 2023-09-24T17·05+0000 |
commit | 993c505cdbef35a7e5ae2fc896af6e6ad79317ea (patch) | |
tree | 80be604ef2ea27ea3b63092cc209233111c4ed55 | |
parent | e5f22818566b4cc49ce9b089c59921f5a054c48c (diff) |
feat(tvix/store/fs): Add support for virtiofs backend r/6642
This adds a virtiofs daemon implementation which hooks into the existing tvix-store filesystem implementation that is used for FUSE. This allows adding the filesystem to a microvm without having to set up FUSE inside the guest. Change-Id: If80c36c9657f2289853e8d9a364bf4f1f7b7559c Reviewed-on: https://cl.tvl.fyi/c/depot/+/9344 Autosubmit: Connor Brewster <cbrewster@hey.com> Tested-by: BuildkiteCI Reviewed-by: flokli <flokli@flokli.de>
-rw-r--r-- | tvix/Cargo.lock | 60 | ||||
-rw-r--r-- | tvix/Cargo.nix | 206 | ||||
-rw-r--r-- | tvix/store/Cargo.toml | 32 | ||||
-rw-r--r-- | tvix/store/src/bin/tvix-store.rs | 54 | ||||
-rw-r--r-- | tvix/store/src/fs/mod.rs | 3 | ||||
-rw-r--r-- | tvix/store/src/fs/virtiofs.rs | 237 |
6 files changed, 587 insertions, 5 deletions
diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock index a241307a7d26..6b5f12f949bc 100644 --- a/tvix/Cargo.lock +++ b/tvix/Cargo.lock @@ -775,6 +775,8 @@ dependencies = [ "log", "mio", "nix 0.24.3", + "vhost", + "virtio-queue", "vm-memory", "vmm-sys-util", ] @@ -2865,6 +2867,12 @@ dependencies = [ "tracing-subscriber", "tvix-castore", "url", + "vhost", + "vhost-user-backend", + "virtio-bindings 0.2.1", + "virtio-queue", + "vm-memory", + "vmm-sys-util", "walkdir", ] @@ -2949,11 +2957,63 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] +name = "vhost" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6769e8dbf5276b4376439fbf36bb880d203bf614bf7ef444198edc24b5a9f35" +dependencies = [ + "bitflags", + "libc", + "vm-memory", + "vmm-sys-util", +] + +[[package]] +name = "vhost-user-backend" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f237b91db4ac339d639fb43398b52d785fa51e3c7760ac9425148863c1f4303" +dependencies = [ + "libc", + "log", + "vhost", + "virtio-bindings 0.1.0", + "virtio-queue", + "vm-memory", + "vmm-sys-util", +] + +[[package]] +name = "virtio-bindings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ff512178285488516ed85f15b5d0113a7cdb89e9e8a760b269ae4f02b84bd6b" + +[[package]] +name = "virtio-bindings" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c18d7b74098a946470ea265b5bacbbf877abc3373021388454de0d47735a5b98" + +[[package]] +name = "virtio-queue" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ba81e2bcc21c0d2fc5e6683e79367e26ad219197423a498df801d79d5ba77bd" +dependencies = [ + "log", + "virtio-bindings 0.1.0", + "vm-memory", + "vmm-sys-util", +] + +[[package]] name = "vm-memory" version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "688a70366615b45575a424d9c665561c1b5ab2224d494f706b6a6812911a827c" dependencies = [ + "arc-swap", "libc", "winapi", ] diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix index ac0053c6bff6..43cd31c91fb8 100644 --- a/tvix/Cargo.nix +++ b/tvix/Cargo.nix @@ -2222,6 +2222,17 @@ rec { packageId = "nix 0.24.3"; } { + name = "vhost"; + packageId = "vhost"; + optional = true; + features = [ "vhost-user-slave" ]; + } + { + name = "virtio-queue"; + packageId = "virtio-queue"; + optional = true; + } + { name = "vm-memory"; packageId = "vm-memory"; features = [ "backend-mmap" ]; @@ -2259,7 +2270,7 @@ rec { "virtiofs" = [ "virtio-queue" "caps" "vmm-sys-util" ]; "vmm-sys-util" = [ "dep:vmm-sys-util" ]; }; - resolvedDefaultFeatures = [ "caps" "core-foundation-sys" "default" "fusedev" "vmm-sys-util" ]; + resolvedDefaultFeatures = [ "caps" "core-foundation-sys" "default" "fusedev" "vhost" "vhost-user-fs" "virtio-queue" "virtiofs" "vmm-sys-util" ]; }; "futures" = rec { crateName = "futures"; @@ -8568,6 +8579,7 @@ rec { name = "fuse-backend-rs"; packageId = "fuse-backend-rs"; optional = true; + features = [ "vhost-user-fs" ]; } { name = "futures"; @@ -8657,6 +8669,36 @@ rec { packageId = "url"; } { + name = "vhost"; + packageId = "vhost"; + optional = true; + } + { + name = "vhost-user-backend"; + packageId = "vhost-user-backend"; + optional = true; + } + { + name = "virtio-bindings"; + packageId = "virtio-bindings 0.2.1"; + optional = true; + } + { + name = "virtio-queue"; + packageId = "virtio-queue"; + optional = true; + } + { + name = "vm-memory"; + packageId = "vm-memory"; + optional = true; + } + { + name = "vmm-sys-util"; + packageId = "vmm-sys-util"; + optional = true; + } + { name = "walkdir"; packageId = "walkdir"; } @@ -8686,13 +8728,14 @@ rec { } ]; features = { - "default" = [ "fuse" "reflection" ]; + "default" = [ "fuse" "virtiofs" "reflection" ]; "fs" = [ "dep:libc" "dep:fuse-backend-rs" ]; "fuse" = [ "fs" ]; "reflection" = [ "tonic-reflection" ]; "tonic-reflection" = [ "dep:tonic-reflection" ]; + "virtiofs" = [ "fs" "dep:vhost" "dep:vhost-user-backend" "dep:virtio-queue" "dep:vm-memory" "dep:vmm-sys-util" "dep:virtio-bindings" ]; }; - resolvedDefaultFeatures = [ "default" "fs" "fuse" "reflection" "tonic-reflection" ]; + resolvedDefaultFeatures = [ "default" "fs" "fuse" "reflection" "tonic-reflection" "virtiofs" ]; }; "typenum" = rec { crateName = "typenum"; @@ -8868,6 +8911,156 @@ rec { ]; }; + "vhost" = rec { + crateName = "vhost"; + version = "0.6.1"; + edition = "2018"; + sha256 = "0dczb95w5vcq852fzxsbc6zh7ll0p1mz7yrrchvv8xjjpy6rwxm6"; + authors = [ + "Liu Jiang <gerry@linux.alibaba.com>" + ]; + dependencies = [ + { + name = "bitflags"; + packageId = "bitflags"; + } + { + name = "libc"; + packageId = "libc"; + } + { + name = "vm-memory"; + packageId = "vm-memory"; + } + { + name = "vmm-sys-util"; + packageId = "vmm-sys-util"; + } + ]; + devDependencies = [ + { + name = "vm-memory"; + packageId = "vm-memory"; + features = [ "backend-mmap" ]; + } + ]; + features = { + "vhost-net" = [ "vhost-kern" ]; + "vhost-user-master" = [ "vhost-user" ]; + "vhost-user-slave" = [ "vhost-user" ]; + "vhost-vdpa" = [ "vhost-kern" ]; + }; + resolvedDefaultFeatures = [ "default" "vhost-user" "vhost-user-slave" ]; + }; + "vhost-user-backend" = rec { + crateName = "vhost-user-backend"; + version = "0.8.0"; + edition = "2018"; + sha256 = "00s33wy8cj2i8b4hlxn7wd8zm1fpaa5kjhzv77b3khsavf8pn8wz"; + authors = [ + "The Cloud Hypervisor Authors" + ]; + dependencies = [ + { + name = "libc"; + packageId = "libc"; + } + { + name = "log"; + packageId = "log"; + } + { + name = "vhost"; + packageId = "vhost"; + features = [ "vhost-user-slave" ]; + } + { + name = "virtio-bindings"; + packageId = "virtio-bindings 0.1.0"; + } + { + name = "virtio-queue"; + packageId = "virtio-queue"; + } + { + name = "vm-memory"; + packageId = "vm-memory"; + features = [ "backend-mmap" "backend-atomic" ]; + } + { + name = "vmm-sys-util"; + packageId = "vmm-sys-util"; + } + ]; + devDependencies = [ + { + name = "vhost"; + packageId = "vhost"; + features = [ "vhost-user-master" "vhost-user-slave" ]; + } + { + name = "vm-memory"; + packageId = "vm-memory"; + features = [ "backend-mmap" "backend-atomic" "backend-bitmap" ]; + } + ]; + + }; + "virtio-bindings 0.1.0" = rec { + crateName = "virtio-bindings"; + version = "0.1.0"; + edition = "2018"; + sha256 = "0sxxhhmz1r4s4q5pd2lykswcv9qk05fmpwc5xlb8aj45h8bi5x9z"; + authors = [ + "Sergio Lopez <slp@redhat.com>" + ]; + features = { }; + }; + "virtio-bindings 0.2.1" = rec { + crateName = "virtio-bindings"; + version = "0.2.1"; + edition = "2021"; + sha256 = "162vb9rlf3fyaj23h89h6z1snxzqpfn5nnr6x9q6954a15s7p3f1"; + authors = [ + "Sergio Lopez <slp@redhat.com>" + ]; + features = { }; + }; + "virtio-queue" = rec { + crateName = "virtio-queue"; + version = "0.7.1"; + edition = "2021"; + sha256 = "1gbppbapj7c0vyca88vl34cx4sp2cy9yg0v6bvyd5h11rhmixa1v"; + authors = [ + "The Chromium OS Authors" + ]; + dependencies = [ + { + name = "log"; + packageId = "log"; + } + { + name = "virtio-bindings"; + packageId = "virtio-bindings 0.1.0"; + } + { + name = "vm-memory"; + packageId = "vm-memory"; + } + { + name = "vmm-sys-util"; + packageId = "vmm-sys-util"; + } + ]; + devDependencies = [ + { + name = "vm-memory"; + packageId = "vm-memory"; + features = [ "backend-mmap" "backend-atomic" ]; + } + ]; + features = { }; + }; "vm-memory" = rec { crateName = "vm-memory"; version = "0.10.0"; @@ -8878,6 +9071,11 @@ rec { ]; dependencies = [ { + name = "arc-swap"; + packageId = "arc-swap"; + optional = true; + } + { name = "libc"; packageId = "libc"; } @@ -8892,7 +9090,7 @@ rec { "arc-swap" = [ "dep:arc-swap" ]; "backend-atomic" = [ "arc-swap" ]; }; - resolvedDefaultFeatures = [ "backend-mmap" "default" ]; + resolvedDefaultFeatures = [ "arc-swap" "backend-atomic" "backend-mmap" "default" ]; }; "vmm-sys-util" = rec { crateName = "vmm-sys-util"; diff --git a/tvix/store/Cargo.toml b/tvix/store/Cargo.toml index a2e143de7014..20909221c524 100644 --- a/tvix/store/Cargo.toml +++ b/tvix/store/Cargo.toml @@ -36,6 +36,35 @@ optional = true # TODO: Switch back to upstream version once https://github.com/cloud-hypervisor/fuse-backend-rs/pull/153 lands. git = "https://github.com/cbrewster/fuse-backend-rs.git" branch = "optional-allow_other" +# Ideally this would only be enabled if virtiofs is enabled +# Ex: virtiofs = [..., "fuse-backend-rs/?vhost-user-fs", ...] +# However, crate2nix doesn't properly understand this syntax and doesn't +# enable this feature properly. +features = ["vhost-user-fs"] + +[dependencies.vhost] +optional = true +version = "0.6" + +[dependencies.vhost-user-backend] +optional = true +version = "0.8" + +[dependencies.virtio-queue] +optional = true +version = "0.7" + +[dependencies.vm-memory] +optional = true +version = "0.10" + +[dependencies.vmm-sys-util] +optional = true +version = "0.11" + +[dependencies.virtio-bindings] +optional = true +version = "0.2.1" [dependencies.tonic-reflection] optional = true @@ -55,7 +84,8 @@ tempfile = "3.3.0" tonic-mock = { git = "https://github.com/brainrake/tonic-mock", branch = "bump-dependencies" } [features] -default = ["fuse", "reflection"] +default = ["fuse", "virtiofs", "reflection"] fs = ["dep:libc", "dep:fuse-backend-rs"] +virtiofs = ["fs", "dep:vhost", "dep:vhost-user-backend", "dep:virtio-queue", "dep:vm-memory", "dep:vmm-sys-util", "dep:virtio-bindings"] fuse = ["fs"] reflection = ["tonic-reflection"] diff --git a/tvix/store/src/bin/tvix-store.rs b/tvix/store/src/bin/tvix-store.rs index 474a48c9fd1a..813d62cb129a 100644 --- a/tvix/store/src/bin/tvix-store.rs +++ b/tvix/store/src/bin/tvix-store.rs @@ -29,6 +29,9 @@ use tvix_store::fs::TvixStoreFs; #[cfg(feature = "fuse")] use tvix_store::fs::fuse::FuseDaemon; +#[cfg(feature = "virtiofs")] +use tvix_store::fs::virtiofs::start_virtiofs_daemon; + #[cfg(feature = "reflection")] use tvix_castore::proto::FILE_DESCRIPTOR_SET as CASTORE_FILE_DESCRIPTOR_SET; #[cfg(feature = "reflection")] @@ -111,6 +114,28 @@ enum Commands { #[clap(long, short, action)] list_root: bool, }, + /// Starts a tvix-store virtiofs daemon at the given socket path. + #[cfg(feature = "virtiofs")] + #[command(name = "virtiofs")] + VirtioFs { + #[clap(value_name = "PATH")] + socket: PathBuf, + + #[arg(long, env, default_value = "grpc+http://[::1]:8000")] + blob_service_addr: String, + + #[arg(long, env, default_value = "grpc+http://[::1]:8000")] + directory_service_addr: String, + + #[arg(long, env, default_value = "grpc+http://[::1]:8000")] + path_info_service_addr: String, + + /// Whether to list elements at the root of the mount point. + /// This is useful if your PathInfoService doesn't provide an + /// (exhaustive) listing. + #[clap(long, short, action)] + list_root: bool, + }, } #[cfg(feature = "fuse")] @@ -328,6 +353,35 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> { }) .await??; } + #[cfg(feature = "virtiofs")] + Commands::VirtioFs { + socket, + blob_service_addr, + directory_service_addr, + path_info_service_addr, + list_root, + } => { + let blob_service = blobservice::from_addr(&blob_service_addr)?; + let directory_service = directoryservice::from_addr(&directory_service_addr)?; + let path_info_service = pathinfoservice::from_addr( + &path_info_service_addr, + blob_service.clone(), + directory_service.clone(), + )?; + + tokio::task::spawn_blocking(move || { + let fs = TvixStoreFs::new( + blob_service, + directory_service, + path_info_service, + list_root, + ); + info!("starting tvix-store virtiofs daemon on {:?}", &socket); + + start_virtiofs_daemon(fs, socket) + }) + .await??; + } }; Ok(()) } diff --git a/tvix/store/src/fs/mod.rs b/tvix/store/src/fs/mod.rs index 59b8f0d0854f..91adfa35f0e0 100644 --- a/tvix/store/src/fs/mod.rs +++ b/tvix/store/src/fs/mod.rs @@ -5,6 +5,9 @@ mod inodes; #[cfg(feature = "fuse")] pub mod fuse; +#[cfg(feature = "virtiofs")] +pub mod virtiofs; + #[cfg(test)] mod tests; diff --git a/tvix/store/src/fs/virtiofs.rs b/tvix/store/src/fs/virtiofs.rs new file mode 100644 index 000000000000..3786a84285cd --- /dev/null +++ b/tvix/store/src/fs/virtiofs.rs @@ -0,0 +1,237 @@ +use std::{ + convert, error, fmt, io, + ops::Deref, + path::Path, + sync::{Arc, MutexGuard, RwLock}, +}; + +use fuse_backend_rs::{ + api::{filesystem::FileSystem, server::Server}, + transport::{FsCacheReqHandler, Reader, VirtioFsWriter}, +}; +use tracing::error; +use vhost::vhost_user::{ + Listener, SlaveFsCacheReq, VhostUserProtocolFeatures, VhostUserVirtioFeatures, +}; +use vhost_user_backend::{VhostUserBackendMut, VhostUserDaemon, VringMutex, VringState, VringT}; +use virtio_bindings::bindings::virtio_ring::{ + VIRTIO_RING_F_EVENT_IDX, VIRTIO_RING_F_INDIRECT_DESC, +}; +use virtio_queue::QueueT; +use vm_memory::{GuestAddressSpace, GuestMemoryAtomic, GuestMemoryMmap}; +use vmm_sys_util::epoll::EventSet; + +const VIRTIO_F_VERSION_1: u32 = 32; +const NUM_QUEUES: usize = 2; +const QUEUE_SIZE: usize = 1024; + +#[derive(Debug)] +enum Error { + /// Failed to handle non-input event. + HandleEventNotEpollIn, + /// Failed to handle unknown event. + HandleEventUnknownEvent, + /// Invalid descriptor chain. + InvlaidDescriptorChain, + /// Failed to handle filesystem requests. + HandleRequests(fuse_backend_rs::Error), + /// Failed to construct new vhost user daemon. + NewDaemon, + /// Failed to start the vhost user daemon. + StartDaemon, + /// Failed to wait for the vhost user daemon. + WaitDaemon, +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "vhost_user_fs_error: {self:?}") + } +} + +impl error::Error for Error {} + +impl convert::From<Error> for io::Error { + fn from(e: Error) -> Self { + io::Error::new(io::ErrorKind::Other, e) + } +} + +struct VhostUserFsBackend<FS> +where + FS: FileSystem + Send + Sync, +{ + server: Arc<Server<Arc<FS>>>, + event_idx: bool, + guest_mem: GuestMemoryAtomic<GuestMemoryMmap>, + cache_req: Option<SlaveFsCacheReq>, +} + +impl<FS> VhostUserFsBackend<FS> +where + FS: FileSystem + Send + Sync, +{ + fn process_queue(&mut self, vring: &mut MutexGuard<VringState>) -> std::io::Result<bool> { + let mut used_descs = false; + + while let Some(desc_chain) = vring + .get_queue_mut() + .pop_descriptor_chain(self.guest_mem.memory()) + { + let memory = desc_chain.memory(); + let reader = Reader::from_descriptor_chain(memory, desc_chain.clone()) + .map_err(|_| Error::InvlaidDescriptorChain)?; + let writer = VirtioFsWriter::new(memory, desc_chain.clone()) + .map_err(|_| Error::InvlaidDescriptorChain)?; + + self.server + .handle_message( + reader, + writer.into(), + self.cache_req + .as_mut() + .map(|req| req as &mut dyn FsCacheReqHandler), + None, + ) + .map_err(Error::HandleRequests)?; + + // TODO: Is len 0 correct? + if let Err(error) = vring + .get_queue_mut() + .add_used(memory, desc_chain.head_index(), 0) + { + error!(?error, "failed to add desc back to ring"); + } + + // TODO: What happens if we error out before here? + used_descs = true; + } + + let needs_notification = if self.event_idx { + match vring + .get_queue_mut() + .needs_notification(self.guest_mem.memory().deref()) + { + Ok(needs_notification) => needs_notification, + Err(error) => { + error!(?error, "failed to check if queue needs notification"); + true + } + } + } else { + true + }; + + if needs_notification { + if let Err(error) = vring.signal_used_queue() { + error!(?error, "failed to signal used queue"); + } + } + + Ok(used_descs) + } +} + +impl<FS> VhostUserBackendMut<VringMutex> for VhostUserFsBackend<FS> +where + FS: FileSystem + Send + Sync, +{ + fn num_queues(&self) -> usize { + NUM_QUEUES + } + + fn max_queue_size(&self) -> usize { + QUEUE_SIZE + } + + fn features(&self) -> u64 { + 1 << VIRTIO_F_VERSION_1 + | 1 << VIRTIO_RING_F_INDIRECT_DESC + | 1 << VIRTIO_RING_F_EVENT_IDX + | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits() + } + + fn protocol_features(&self) -> VhostUserProtocolFeatures { + VhostUserProtocolFeatures::MQ | VhostUserProtocolFeatures::SLAVE_REQ + } + + fn set_event_idx(&mut self, enabled: bool) { + self.event_idx = enabled; + } + + fn update_memory(&mut self, _mem: GuestMemoryAtomic<GuestMemoryMmap>) -> std::io::Result<()> { + // This is what most the vhost user implementations do... + Ok(()) + } + + fn set_slave_req_fd(&mut self, cache_req: SlaveFsCacheReq) { + self.cache_req = Some(cache_req); + } + + fn handle_event( + &mut self, + device_event: u16, + evset: vmm_sys_util::epoll::EventSet, + vrings: &[VringMutex], + _thread_id: usize, + ) -> std::io::Result<bool> { + if evset != EventSet::IN { + return Err(Error::HandleEventNotEpollIn.into()); + } + + let mut queue = match device_event { + // High priority queue + 0 => vrings[0].get_mut(), + // Regurlar priority queue + 1 => vrings[1].get_mut(), + _ => { + return Err(Error::HandleEventUnknownEvent.into()); + } + }; + + if self.event_idx { + loop { + queue + .get_queue_mut() + .enable_notification(self.guest_mem.memory().deref()) + .map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?; + if !self.process_queue(&mut queue)? { + break; + } + } + } else { + self.process_queue(&mut queue)?; + } + + Ok(false) + } +} + +pub fn start_virtiofs_daemon<FS, P>(fs: FS, socket: P) -> io::Result<()> +where + FS: FileSystem + Send + Sync + 'static, + P: AsRef<Path>, +{ + let guest_mem = GuestMemoryAtomic::new(GuestMemoryMmap::new()); + + let server = Arc::new(fuse_backend_rs::api::server::Server::new(Arc::new(fs))); + + let backend = Arc::new(RwLock::new(VhostUserFsBackend { + server, + guest_mem: guest_mem.clone(), + event_idx: false, + cache_req: None, + })); + + let listener = Listener::new(socket, true).unwrap(); + + let mut fs_daemon = + VhostUserDaemon::new(String::from("vhost-user-fs-tvix-store"), backend, guest_mem) + .map_err(|_| Error::NewDaemon)?; + + fs_daemon.start(listener).map_err(|_| Error::StartDaemon)?; + + fs_daemon.wait().map_err(|_| Error::WaitDaemon)?; + + Ok(()) +} |