about summary refs log tree commit diff
path: root/tvix/boot
diff options
context:
space:
mode:
Diffstat (limited to 'tvix/boot')
-rw-r--r--tvix/boot/README.md136
-rw-r--r--tvix/boot/default.nix113
-rw-r--r--tvix/boot/tests/default.nix133
-rw-r--r--tvix/boot/tvix-init.go138
4 files changed, 520 insertions, 0 deletions
diff --git a/tvix/boot/README.md b/tvix/boot/README.md
new file mode 100644
index 0000000000..9c7b722a7a
--- /dev/null
+++ b/tvix/boot/README.md
@@ -0,0 +1,136 @@
+# tvix/boot
+
+This directory provides tooling to boot VMs with /nix/store provided by
+virtiofs.
+
+In the `tests/` subdirectory, there's some integration tests.
+
+## //tvix/boot:runVM
+A script spinning up a `tvix-store virtiofs` daemon, then starting a cloud-
+hypervisor VM.
+
+The cloud-hypervisor VM is using a (semi-)minimal kernel image with virtiofs
+support, and a custom initrd (using u-root). It supports various command line
+options, to be able to do VM tests, act as an interactive shell or exec a binary
+from a closure.
+
+It supports the following env vars:
+ - `CH_NUM_CPUS=1` controls the number of CPUs available to the VM
+ - `CH_MEM_SIZE=512M` controls the memory availabe to the VM
+ - `CH_CMDLINE=` controls the kernel cmdline (which can be used to control the
+   boot)
+
+### Usage
+First, ensure you have `tvix-store` in `$PATH`, as that's what `run-tvix-vm`
+expects:
+
+Assuming you ran `cargo build --profile=release-with-debug` before, and are in
+the `tvix` directory:
+
+```
+export PATH=$PATH:$PWD/target/release-with-debug
+```
+
+Secondly, configure tvix to use the local backend:
+
+```
+export BLOB_SERVICE_ADDR=sled://$PWD/blobs.sled
+export DIRECTORY_SERVICE_ADDR=sled://$PWD/directories.sled
+export PATH_INFO_SERVICE_ADDR=sled://$PWD/pathinfo.sled
+```
+
+Potentially copy some data into tvix-store (via nar-bridge):
+
+```
+mg run //tvix:store -- daemon &
+$(mg build //tvix:nar-bridge-go)/bin/nar-bridge-http &
+rm -Rf ~/.cache/nix; nix copy --to http://localhost:9000\?compression\=none $(mg build //third_party/nixpkgs:hello)
+pkill nar-bridge-http; pkill tvix-store
+```
+
+#### Interactive shell
+Run the VM like this:
+
+```
+CH_CMDLINE=tvix.shell mg run //tvix/boot:runVM --
+```
+
+You'll get dropped into an interactive shell, from which you can do things with
+the store:
+
+```
+  ______      _         ____      _ __
+ /_  __/   __(_)  __   /  _/___  (_) /_
+  / / | | / / / |/_/   / // __ \/ / __/
+ / /  | |/ / />  <   _/ // / / / / /_
+/_/   |___/_/_/|_|  /___/_/ /_/_/\__/
+
+/# ls -la /nix/store/
+dr-xr-xr-x root 0 0   Jan  1 00:00 .
+dr-xr-xr-x root 0 989 Jan  1 00:00 aw2fw9ag10wr9pf0qk4nk5sxi0q0bn56-glibc-2.37-8
+dr-xr-xr-x root 0 3   Jan  1 00:00 jbwb8d8l28lg9z0xzl784wyb9vlbwss6-xgcc-12.3.0-libgcc
+dr-xr-xr-x root 0 82  Jan  1 00:00 k8ivghpggjrq1n49xp8sj116i4sh8lia-libidn2-2.3.4
+dr-xr-xr-x root 0 141 Jan  1 00:00 mdi7lvrn2mx7rfzv3fdq3v5yw8swiks6-hello-2.12.1
+dr-xr-xr-x root 0 5   Jan  1 00:00 s2gi8pfjszy6rq3ydx0z1vwbbskw994i-libunistring-1.1
+```
+
+Once you exit the shell, the VM will power off itself.
+
+#### Execute a specific binary
+Run the VM like this:
+
+```
+hello_cmd=$(mg build //third_party/nixpkgs:hello)/bin/hello
+CH_CMDLINE=tvix.run=$hello_cmd mg run //tvix/boot:runVM --
+```
+
+Observe it executing the file (and closure) from the tvix-store:
+
+```
+[    0.277486] Run /init as init process
+  ______      _         ____      _ __
+ /_  __/   __(_)  __   /  _/___  (_) /_
+  / / | | / / / |/_/   / // __ \/ / __/
+ / /  | |/ / />  <   _/ // / / / / /_
+/_/   |___/_/_/|_|  /___/_/ /_/_/\__/
+
+Hello, world!
+2023/09/24 21:10:19 Nothing left to be done, powering off.
+[    0.299122] ACPI: PM: Preparing to enter system sleep state S5
+[    0.299422] reboot: Power down
+```
+
+#### Execute a NixOS system closure
+It's also possible to invoke a system closure. To do this, tvix-init honors the
+init= cmdline option, and will switch_root to it.
+
+
+```
+CH_CMDLINE=init=/nix/store/…-nixos-system-…/init mg run //tvix/boot:runVM --
+```
+
+```
+  ______      _         ____      _ __
+ /_  __/   __(_)  __   /  _/___  (_) /_
+  / / | | / / / |/_/   / // __ \/ / __/
+ / /  | |/ / />  <   _/ // / / / / /_
+/_/   |___/_/_/|_|  /___/_/ /_/_/\__/
+
+2023/09/24 21:16:43 switch_root: moving mounts
+2023/09/24 21:16:43 switch_root: Skipping "/run" as the dir does not exist
+2023/09/24 21:16:43 switch_root: Changing directory
+2023/09/24 21:16:43 switch_root: Moving /
+2023/09/24 21:16:43 switch_root: Changing root!
+2023/09/24 21:16:43 switch_root: Deleting old /
+2023/09/24 21:16:43 switch_root: executing init
+
+<<< NixOS Stage 2 >>>
+
+[    0.322096] booting system configuration /nix/store/g657sdxinpqfcdv0162zmb8vv9b5c4c5-nixos-system-client-23.11.git.82102fc37da
+running activation script...
+setting up /etc...
+starting systemd...
+[    0.980740] systemd[1]: systemd 253.6 running in system mode (+PAM +AUDIT -SELINUX +APPARMOR +IMA +SMACK +SECCOMP +GCRYPT -GNUTLS +OPENSSL +ACL +BLKID +CURL +ELFUTILS +FIDO2 +IDN2 -IDN +IPTC +KMOD +LIBCRYPTSETUP +LIBFDISK +PCRE2 -PWQUALITY +P11KIT -QRENCODE +TPM2 +BZIP2 +LZ4 +XZ +ZLIB +ZSTD +BPF_FRAMEWORK -XKBCOMMON +UTMP -SYSVINIT default-hierarchy=unified)
+```
+
+This effectively replaces the NixOS Stage 1 entirely.
diff --git a/tvix/boot/default.nix b/tvix/boot/default.nix
new file mode 100644
index 0000000000..85995ffbf2
--- /dev/null
+++ b/tvix/boot/default.nix
@@ -0,0 +1,113 @@
+{ depot, pkgs, ... }:
+
+rec {
+  # A binary that sets up /nix/store from virtiofs, lists all store paths, and
+  # powers off the machine.
+  tvix-init = depot.nix.buildGo.program {
+    name = "tvix-init";
+    srcs = [
+      ./tvix-init.go
+    ];
+  };
+
+  # A kernel with virtiofs support baked in
+  # TODO: make a smaller kernel, we don't need a gazillion filesystems and
+  # device drivers in it.
+  kernel = pkgs.buildLinux ({ } // {
+    inherit (pkgs.linuxPackages_latest.kernel) src version modDirVersion;
+    autoModules = false;
+    kernelPreferBuiltin = true;
+    ignoreConfigErrors = true;
+    kernelPatches = [ ];
+    structuredExtraConfig = with pkgs.lib.kernel; {
+      FUSE_FS = option yes;
+      DAX_DRIVER = option yes;
+      DAX = option yes;
+      FS_DAX = option yes;
+      VIRTIO_FS = option yes;
+      VIRTIO = option yes;
+      ZONE_DEVICE = option yes;
+    };
+  });
+
+  # A build framework for minimal initrds
+  uroot = pkgs.buildGoModule rec {
+    pname = "u-root";
+    version = "0.14.0";
+    src = pkgs.fetchFromGitHub {
+      owner = "u-root";
+      repo = "u-root";
+      rev = "v${version}";
+      hash = "sha256-8zA3pHf45MdUcq/MA/mf0KCTxB1viHieU/oigYwIPgo=";
+    };
+    vendorHash = null;
+
+    doCheck = false; # Some tests invoke /bin/bash
+  };
+
+  # Use u-root to build a initrd with our tvix-init inside.
+  initrd = pkgs.stdenv.mkDerivation {
+    name = "initrd.cpio";
+    nativeBuildInputs = [ pkgs.go ];
+    # https://github.com/u-root/u-root/issues/2466
+    buildCommand = ''
+      mkdir -p /tmp/go/src/github.com/u-root/
+      cp -R ${uroot.src} /tmp/go/src/github.com/u-root/u-root
+      cd /tmp/go/src/github.com/u-root/u-root
+      chmod +w .
+      cp ${tvix-init}/bin/tvix-init tvix-init
+
+      export HOME=$(mktemp -d)
+      export GOROOT="$(go env GOROOT)"
+
+      GO111MODULE=off GOPATH=/tmp/go GOPROXY=off ${uroot}/bin/u-root -files ./tvix-init -initcmd "/tvix-init" -o $out
+    '';
+  };
+
+  # Start a `tvix-store` virtiofs daemon from $PATH, then a cloud-hypervisor
+  # pointed to it.
+  # Supports the following env vars (and defaults)
+  # CH_NUM_CPUS=2
+  # CH_MEM_SIZE=512M
+  # CH_CMDLINE=""
+  runVM = pkgs.writers.writeBashBin "run-tvix-vm" ''
+    tempdir=$(mktemp -d)
+
+    cleanup() {
+      kill $virtiofsd_pid
+      if [[ -n ''${work_dir-} ]]; then
+        chmod -R u+rw "$tempdir"
+        rm -rf "$tempdir"
+      fi
+    }
+    trap cleanup EXIT
+
+    # Spin up the virtiofs daemon
+    tvix-store --otlp=false virtiofs -l $tempdir/tvix.sock &
+    virtiofsd_pid=$!
+
+    # Wait for the socket to exist.
+    until [ -e $tempdir/tvix.sock ]; do sleep 0.1; done
+
+    CH_NUM_CPUS="''${CH_NUM_CPUS:-2}"
+    CH_MEM_SIZE="''${CH_MEM_SIZE:-512M}"
+    CH_CMDLINE="''${CH_CMDLINE:-}"
+
+    # spin up cloud_hypervisor
+    ${pkgs.cloud-hypervisor}/bin/cloud-hypervisor \
+     --cpus boot=$CH_NUM_CPU \
+     --memory mergeable=on,shared=on,size=$CH_MEM_SIZE \
+     --console null \
+     --serial tty \
+     --kernel ${kernel.dev}/vmlinux \
+     --initramfs ${initrd} \
+     --cmdline "console=ttyS0 $CH_CMDLINE" \
+     --fs tag=tvix,socket=$tempdir/tvix.sock,num_queues=''${CH_NUM_CPU},queue_size=512
+  '';
+
+  meta.ci.targets = [
+    "initrd"
+    "kernel"
+    "runVM"
+  ];
+}
diff --git a/tvix/boot/tests/default.nix b/tvix/boot/tests/default.nix
new file mode 100644
index 0000000000..5c7f97a1ce
--- /dev/null
+++ b/tvix/boot/tests/default.nix
@@ -0,0 +1,133 @@
+{ depot, pkgs, lib, ... }:
+
+let
+  # Seed a tvix-store with the tvix docs, then start a VM, ask it to list all
+  # files in /nix/store, and ensure the store path is present, which acts as a
+  # nice smoketest.
+  mkBootTest =
+    { blobServiceAddr ? "memory://"
+    , directoryServiceAddr ? "memory://"
+    , pathInfoServiceAddr ? "memory://"
+
+
+      # The path to import.
+    , path
+
+      # Whether the path should be imported as a closure.
+      # If false, importPathName must be specified.
+    , isClosure ? false
+    , importPathName ? null
+
+      # The cmdline to pass to the VM.
+      # Defaults to tvix.find, which lists all files in the store.
+    , vmCmdline ? "tvix.find"
+      # The string we expect to find in the VM output.
+      # Defaults the value of `path` (the store path we upload).
+    , assertVMOutput ? path
+    }:
+
+      assert isClosure -> importPathName == null;
+      assert (!isClosure) -> importPathName != null;
+
+      pkgs.stdenv.mkDerivation {
+        name = "run-vm";
+
+        __structuredAttrs = true;
+        exportReferencesGraph.closure = [ path ];
+
+        nativeBuildInputs = [
+          depot.tvix.store
+          depot.tvix.boot.runVM
+        ];
+        buildCommand = ''
+          touch $out
+
+          # Start the tvix daemon, listening on a unix socket.
+          BLOB_SERVICE_ADDR=${blobServiceAddr} \
+            DIRECTORY_SERVICE_ADDR=${directoryServiceAddr} \
+            PATH_INFO_SERVICE_ADDR=${pathInfoServiceAddr} \
+            tvix-store \
+              --otlp=false \
+              daemon -l $PWD/tvix-store.sock &
+
+          # Wait for the socket to be created.
+          while [ ! -e $PWD/tvix-store.sock ]; do sleep 1; done
+
+          # Export env vars so that subsequent tvix-store commands will talk to
+          # our tvix-store daemon over the unix socket.
+          export BLOB_SERVICE_ADDR=grpc+unix://$PWD/tvix-store.sock
+          export DIRECTORY_SERVICE_ADDR=grpc+unix://$PWD/tvix-store.sock
+          export PATH_INFO_SERVICE_ADDR=grpc+unix://$PWD/tvix-store.sock
+        '' + lib.optionalString (!isClosure) ''
+          echo "Importing ${path} into tvix-store with name ${importPathName}…"
+          cp -R ${path} ${importPathName}
+          outpath=$(tvix-store import ${importPathName})
+
+          echo "imported to $outpath"
+        '' + lib.optionalString (isClosure) ''
+          echo "Copying closure ${path}…"
+          # This picks up the `closure` key in `$NIX_ATTRS_JSON_FILE` automatically.
+          tvix-store --otlp=false copy
+        '' + ''
+          # Invoke a VM using tvix as the backing store, ensure the outpath appears in its listing.
+          echo "Starting VM…"
+
+          CH_CMDLINE="${vmCmdline}" run-tvix-vm 2>&1 | tee output.txt
+          grep "${assertVMOutput}" output.txt
+        '';
+        requiredSystemFeatures = [ "kvm" ];
+      };
+
+  systemFor = sys: (depot.ops.nixos.nixosFor sys).system;
+
+  testSystem = systemFor ({ modulesPath, pkgs, ... }: {
+    # Set some options necessary to evaluate.
+    boot.loader.systemd-boot.enable = true;
+    # TODO: figure out how to disable this without causing eval to fail
+    fileSystems."/" = {
+      device = "/dev/root";
+      fsType = "tmpfs";
+    };
+
+    services.getty.helpLine = "Onwards and upwards.";
+    systemd.services.do-shutdown = {
+      after = [ "getty.target" ];
+      description = "Shut down again";
+      wantedBy = [ "multi-user.target" ];
+      serviceConfig.Type = "oneshot";
+      script = "/run/current-system/sw/bin/systemctl poweroff --when=+10s";
+    };
+
+    # Don't warn about stateVersion.
+    system.stateVersion = "24.05";
+  });
+
+in
+depot.nix.readTree.drvTargets
+{
+  docs-memory = (mkBootTest {
+    path = ../../docs;
+    importPathName = "docs";
+  });
+  docs-persistent = (mkBootTest {
+    blobServiceAddr = "objectstore+file://$PWD/blobs";
+    directoryServiceAddr = "sled://$PWD/directories.sled";
+    pathInfoServiceAddr = "sled://$PWD/pathinfo.sled";
+    path = ../../docs;
+    importPathName = "docs";
+  });
+
+  closure-tvix = (mkBootTest {
+    blobServiceAddr = "objectstore+file://$PWD/blobs";
+    path = depot.tvix.store;
+    isClosure = true;
+  });
+
+  closure-nixos = (mkBootTest {
+    blobServiceAddr = "objectstore+file://$PWD/blobs";
+    path = testSystem;
+    isClosure = true;
+    vmCmdline = "init=${testSystem}/init panic=-1"; # reboot immediately on panic
+    assertVMOutput = "Onwards and upwards.";
+  });
+}
diff --git a/tvix/boot/tvix-init.go b/tvix/boot/tvix-init.go
new file mode 100644
index 0000000000..97a24bab35
--- /dev/null
+++ b/tvix/boot/tvix-init.go
@@ -0,0 +1,138 @@
+package main
+
+import (
+	"fmt"
+	"log"
+	"os"
+	"os/exec"
+	"strings"
+	"syscall"
+)
+
+// run the given command, connecting std{in,err,out} with the OS one.
+func run(args ...string) error {
+	cmd := exec.Command(args[0], args[1:]...)
+	cmd.Stdin = os.Stdin
+	cmd.Stderr = os.Stderr
+	cmd.Stdout = os.Stdout
+
+	return cmd.Run()
+}
+
+// parse the cmdline, return a map[string]string.
+func parseCmdline(cmdline string) map[string]string {
+	line := strings.TrimSuffix(cmdline, "\n")
+	fields := strings.Fields(line)
+	out := make(map[string]string, 0)
+
+	for _, arg := range fields {
+		kv := strings.SplitN(arg, "=", 2)
+		switch len(kv) {
+		case 1:
+			out[kv[0]] = ""
+		case 2:
+			out[kv[0]] = kv[1]
+		}
+	}
+
+	return out
+}
+
+// mounts the nix store from the virtiofs tag to the given destination,
+// creating the destination if it doesn't exist already.
+func mountTvixStore(dest string) error {
+	if err := os.MkdirAll(dest, os.ModePerm); err != nil {
+		return fmt.Errorf("unable to mkdir dest: %w", err)
+	}
+	if err := run("mount", "-t", "virtiofs", "tvix", dest, "-o", "ro"); err != nil {
+		return fmt.Errorf("unable to run mount: %w", err)
+	}
+
+	return nil
+}
+
+func main() {
+	fmt.Print(`
+  ______      _         ____      _ __
+ /_  __/   __(_)  __   /  _/___  (_) /_
+  / / | | / / / |/_/   / // __ \/ / __/
+ / /  | |/ / />  <   _/ // / / / / /_
+/_/   |___/_/_/|_|  /___/_/ /_/_/\__/
+
+`)
+
+	// Set PATH to "/bbin", so we can find the u-root tools
+	os.Setenv("PATH", "/bbin")
+
+	if err := run("mount", "-t", "proc", "none", "/proc"); err != nil {
+		log.Printf("Failed to mount /proc: %v\n", err)
+	}
+	if err := run("mount", "-t", "sysfs", "none", "/sys"); err != nil {
+		log.Printf("Failed to mount /sys: %v\n", err)
+	}
+	if err := run("mount", "-t", "devtmpfs", "devtmpfs", "/dev"); err != nil {
+		log.Printf("Failed to mount /dev: %v\n", err)
+	}
+
+	cmdline, err := os.ReadFile("/proc/cmdline")
+	if err != nil {
+		log.Printf("Failed to read cmdline: %s\n", err)
+	}
+	cmdlineFields := parseCmdline(string(cmdline))
+
+	if _, ok := cmdlineFields["tvix.find"]; ok {
+		// If tvix.find is set, invoke find /nix/store
+		if err := mountTvixStore("/nix/store"); err != nil {
+			log.Printf("Failed to mount tvix store: %v\n", err)
+		}
+
+		if err := run("find", "/nix/store"); err != nil {
+			log.Printf("Failed to run find command: %s\n", err)
+		}
+	} else if _, ok := cmdlineFields["tvix.shell"]; ok {
+		// If tvix.shell is set, mount the nix store to /nix/store directly,
+		// then invoke the elvish shell
+		if err := mountTvixStore("/nix/store"); err != nil {
+			log.Printf("Failed to mount tvix store: %v\n", err)
+		}
+
+		if err := run("elvish"); err != nil {
+			log.Printf("Failed to run shell: %s\n", err)
+		}
+	} else if v, ok := cmdlineFields["tvix.run"]; ok {
+		// If tvix.run is set, mount the nix store to /nix/store directly,
+		// then invoke the command.
+		if err := mountTvixStore("/nix/store"); err != nil {
+			log.Printf("Failed to mount tvix store: %v\n", err)
+		}
+
+		if err := run(v); err != nil {
+			log.Printf("Failed to run command: %s\n", err)
+		}
+	} else if v, ok := cmdlineFields["init"]; ok {
+		// If init is set, invoke the binary specified (with switch_root),
+		// and prepare /fs beforehand as well.
+		os.Mkdir("/fs", os.ModePerm)
+		if err := run("mount", "-t", "tmpfs", "none", "/fs"); err != nil {
+			log.Fatalf("Failed to mount /fs tmpfs: %s\n", err)
+		}
+
+		// Mount /fs/nix/store
+		if err := mountTvixStore("/fs/nix/store"); err != nil {
+			log.Fatalf("Failed to mount tvix store: %v\n", err)
+		}
+
+		// Invoke switch_root, which will take care of moving /proc, /sys and /dev.
+		if err := syscall.Exec("/bbin/switch_root", []string{"switch_root", "/fs", v}, []string{}); err != nil {
+			log.Printf("Failed to switch root: %s\n", err)
+		}
+	} else {
+		log.Printf("No command detected, not knowing what to do!")
+	}
+
+	// This is only reached in the non switch_root case.
+	log.Printf("Nothing left to be done, powering off.")
+	if err := run("poweroff"); err != nil {
+		log.Printf("Failed to run poweroff command: %v\n", err)
+	}
+}