about summary refs log tree commit diff
path: root/users/flokli/nixos/nixos-tvix-cache
diff options
context:
space:
mode:
Diffstat (limited to 'users/flokli/nixos/nixos-tvix-cache')
-rw-r--r--users/flokli/nixos/nixos-tvix-cache/OWNERS1
-rw-r--r--users/flokli/nixos/nixos-tvix-cache/README.md40
-rw-r--r--users/flokli/nixos/nixos-tvix-cache/configuration.nix81
-rw-r--r--users/flokli/nixos/nixos-tvix-cache/disko.nix62
-rw-r--r--users/flokli/nixos/nixos-tvix-cache/monitoring.nix147
-rw-r--r--users/flokli/nixos/nixos-tvix-cache/nar-bridge-module.nix75
-rw-r--r--users/flokli/nixos/nixos-tvix-cache/nar-bridge.nix141
7 files changed, 547 insertions, 0 deletions
diff --git a/users/flokli/nixos/nixos-tvix-cache/OWNERS b/users/flokli/nixos/nixos-tvix-cache/OWNERS
new file mode 100644
index 000000000000..d96354420040
--- /dev/null
+++ b/users/flokli/nixos/nixos-tvix-cache/OWNERS
@@ -0,0 +1 @@
+zimbatm
diff --git a/users/flokli/nixos/nixos-tvix-cache/README.md b/users/flokli/nixos/nixos-tvix-cache/README.md
new file mode 100644
index 000000000000..6a0e2f8ddbd8
--- /dev/null
+++ b/users/flokli/nixos/nixos-tvix-cache/README.md
@@ -0,0 +1,40 @@
+# nixos-tvix-cache
+
+This is a fetch-through mirror of cache.nixos.org, hosted by NumTide.
+
+The current machine is a SX65 Hetzner dedicated server with 4x22TB SATA disks,
+and 2x1TB NVMe disks.
+
+The goals of this machine:
+
+ - Exercise tvix-store and nar-bridge code
+ - Collect usage metrics (see [Grafana](https://nixos.tvix.store/grafana))
+ - Identify bottlenecks in the current implementations and fix them
+ - Replace cache.nixos.org?
+
+You can configure this as a Nix substitutor on your systems like this:
+
+```nix
+  nix.settings.substituters = lib.mkForce [
+    "https://nixos.tvix.store"
+  ];
+```
+
+For store paths it hasn't already seen yet, it'll internally ingest its contents
+into tvix-castore (deduplicating in doing so).
+
+Requests for NARs will dynamically reassemble the NAR representation on demand.
+
+Metadata and signatures are preserved (which is why you don't need to add
+additional trusted keys).
+We need to produce the same data bit by bit, else the signature check in your
+Nix/Lix client would fail.
+
+Be however aware that there's zero availability guarantees.
+We will frequently redeploy this box, and it might become unavailable without
+prior notice.
+
+Tvix currently doesn't have garbage collection. If we run out of disk space, we
+might either move things to a bigger box or delete everything on it so far.
+
+As it's only a cache, it should however re-ingest things again.
diff --git a/users/flokli/nixos/nixos-tvix-cache/configuration.nix b/users/flokli/nixos/nixos-tvix-cache/configuration.nix
new file mode 100644
index 000000000000..221ea7f8779f
--- /dev/null
+++ b/users/flokli/nixos/nixos-tvix-cache/configuration.nix
@@ -0,0 +1,81 @@
+{ pkgs
+, lib
+, config
+, ...
+}:
+
+let
+  srvos =
+    import (builtins.fetchTarball {
+      url = "https://github.com/nix-community/srvos/archive/15b152766b329dd2957549a49f0fd96a7a861db1.tar.gz";
+      sha256 = "sha256-11TCdlxJEf84Lm2KIJGL8J2nJ2G9CNTW8PrCebJLg/M=";
+    });
+  disko =
+    (builtins.fetchTarball {
+      url = "https://github.com/nix-community/disko/archive/84dd8eea9a06006d42b8af7cfd4fda4cf334db81.tar.gz";
+      sha256 = "13mfnjnjp21wms4mw35ar019775qgy3fnjc59zrpnqbkfmzyvv02";
+    });
+
+
+in
+{
+  imports = [
+    "${disko}/module.nix"
+    ./disko.nix
+    ./monitoring.nix
+    ./nar-bridge.nix
+    srvos.nixosModules.hardware-hetzner-online-amd
+    srvos.nixosModules.mixins-nginx
+  ];
+
+  options = {
+    machine.domain = lib.mkOption {
+      type = lib.types.str;
+      default = "nixos.tvix.store";
+    };
+  };
+
+  config = {
+    services.nginx.virtualHosts."${config.machine.domain}" = {
+      enableACME = true;
+      forceSSL = true;
+    };
+
+
+    security.acme.acceptTerms = true;
+    security.acme.defaults.email = "admin+acme@numtide.com";
+
+    nixpkgs.hostPlatform = "x86_64-linux";
+
+    networking.hostName = "tvix-cache";
+
+    systemd.network.networks."10-uplink".networkConfig.Address = "2a01:4f9:3071:1091::2/64";
+
+
+    # Enable SSH and add some keys
+    services.openssh.enable = true;
+    users.users.root.openssh.authorizedKeys.keys = [
+      # edef
+      "cert-authority ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQCvb/7ojfcbKvHIyjnrNUOOgzy44tCkgXY9HLuyFta1jQOE9pFIK19B4dR9bOglPKf145CCL0mSFJNNqmNwwavU2uRn+TQrW+U1dQAk8Gt+gh3O49YE854hwwyMU+xD6bIuUdfxPr+r5al/Ov5Km28ZMlHOs3FoAP0hInK+eAibioxL5rVJOtgicrOVCkGoXEgnuG+LRbOYTwzdClhRUxiPjK8alCbcJQ53AeZHO4G6w9wTr+W5ILCfvW4OmUXCX01sKzaBiQuuFCF6M/H4LlnsPWLMra2twXxkOIhZblwC+lncps9lQaUgiD4koZeOCORvHW00G0L39ilFbbnVcL6Itp/m8RRWm/xRxS4RMnsdV/AhvpRLrhL3lfQ7E2oCeSM36v1S9rdg6a47zcnpL+ahG76Gz39Y7KmVRQciNx7ezbwxj3Q5lZtFykgdfGIAN+bT8ijXMO6m68g60i9Bz4IoMZGkiJGqMYLTxMQ+oRgR3Ro5lbj7E11YBHyeimoBYXYGHMkiuxopQZ7lIj3plxIzhmUlXJBA4jMw9KGHdYaLhaicIYhvQmCTAjrkt2HvxEe6lU8iws2Qv+pB6tAGundN36RVVWAckeQPZ4ZsgDP8V2FfibZ1nsrQ+zBKqaslYMAHs01Cf0Hm0PnCqagf230xaobu0iooNuXx44QKoDnB+w== edef"
+      # flokli
+      "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPTVTXOutUZZjXLB0lUSgeKcSY/8mxKkC0ingGK1whD2 flokli"
+      # mic92
+      "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKbBp2dH2X3dcU1zh+xW3ZsdYROKpJd3n13ssOP092qE"
+      "ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBCsjXKHCkpQT4LhWIdT0vDM/E/3tw/4KHTQcdJhyqPSH0FnwC8mfP2N9oHYFa2isw538kArd5ZMo5DD1ujL5dLk= ssh@secretive.Joerg’s-Laptop.local"
+      # padraic
+      "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIEFlro/QUDlDpaA1AQxdWIqBg9HSFJf9Cb7CPdsh0JN7"
+      # zimbatm
+      "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOuiDoBOxgyer8vGcfAIbE6TC4n4jo8lhG9l01iJ0bZz zimbatm@no1"
+      "sk-ssh-ed25519@openssh.com AAAAGnNrLXNzaC1lZDI1NTE5QG9wZW5zc2guY29tAAAAINwWC6CJ/E6o3WGeZxbZMajC4roXnzVi8fOo1JYJSE6YAAAABHNzaDo= zimbatm@nixos"
+    ];
+
+    environment.systemPackages = [
+      pkgs.helix
+      pkgs.htop
+      pkgs.kitty.terminfo
+      pkgs.tmux
+    ];
+
+    system.stateVersion = "24.11";
+  };
+}
diff --git a/users/flokli/nixos/nixos-tvix-cache/disko.nix b/users/flokli/nixos/nixos-tvix-cache/disko.nix
new file mode 100644
index 000000000000..ef7c8def3daa
--- /dev/null
+++ b/users/flokli/nixos/nixos-tvix-cache/disko.nix
@@ -0,0 +1,62 @@
+# Disk /dev/nvme0n1: 1024 GB (=> 953 GiB)
+# Disk /dev/nvme1n1: 1024 GB (=> 953 GiB)
+# Disk /dev/sda: 22 TB (=> 20 TiB)
+# Disk /dev/sdb: 22 TB (=> 20 TiB)
+# Disk /dev/sdc: 22 TB (=> 20 TiB)
+# Disk /dev/sdd: 22 TB (=> 20 TiB)
+#
+# # Installation
+# 1. Comment out the fileSystems
+# 2. Bootstrap the machine with `clan machines tvix-cache-install`
+# 3. Do the btrfs partitioning by hand (because it's not supported by Disko)
+#   a. `mkfs.btrfs -m raid1 -d single /dev/sd{a,b,c,d} --label tank -f`
+#   b. `mkdir /tank && mount /dev/disk/by-label/tank /tank`
+# 4. Uncomment the fileSystems section below
+# 5. Re-deploy
+#
+# TODO: make use of /dev/nvme1n1
+{
+  boot.loader.efi.canTouchEfiVariables = true;
+  boot.loader.systemd-boot.configurationLimit = 10;
+  boot.loader.systemd-boot.enable = true;
+  boot.loader.timeout = 3;
+  boot.supportedFilesystems = [ "btrfs" ];
+
+  # TODO: comment me during install
+  fileSystems."/tank" = {
+    fsType = "btrfs";
+    device = "/dev/disk/by-label/tank";
+  };
+
+  disko.devices = {
+    disk = {
+      main = {
+        type = "disk";
+        device = "/dev/nvme0n1";
+        content = {
+          type = "gpt";
+          partitions = {
+            ESP = {
+              size = "1G";
+              type = "EF00";
+              content = {
+                type = "filesystem";
+                format = "vfat";
+                mountpoint = "/boot";
+                mountOptions = [ "umask=0077" ];
+              };
+            };
+            root = {
+              size = "100%";
+              content = {
+                type = "filesystem";
+                format = "btrfs";
+                mountpoint = "/";
+              };
+            };
+          };
+        };
+      };
+    };
+  };
+}
diff --git a/users/flokli/nixos/nixos-tvix-cache/monitoring.nix b/users/flokli/nixos/nixos-tvix-cache/monitoring.nix
new file mode 100644
index 000000000000..699d74cefc06
--- /dev/null
+++ b/users/flokli/nixos/nixos-tvix-cache/monitoring.nix
@@ -0,0 +1,147 @@
+{ config, ... }:
+let
+  domain = config.machine.domain;
+in
+{
+  # Configure the NixOS machine with Grafana and Tempo to collect metrics from nar-bridge.
+
+  services.tempo = {
+    enable = true;
+    settings = {
+      auth_enabled = false;
+      server = {
+        http_listen_address = "127.0.0.1";
+        http_listen_port = 9080;
+        grpc_listen_address = "127.0.0.1";
+        grpc_listen_port = 9095;
+        grpc_server_max_recv_msg_size = 67108864;
+        grpc_server_max_send_msg_size = 67108864;
+        log_level = "warn";
+      };
+      distributor.receivers = {
+        otlp.protocols = {
+          grpc = { }; # *:4317
+          http = { }; # *:4318
+        };
+      };
+      storage.trace = {
+        backend = "local";
+        wal.path = "/var/lib/tempo/wal";
+        local.path = "/var/lib/tempo/blocks";
+      };
+      usage_report.reporting_enabled = false;
+    };
+  };
+
+  # No need, tempo collects the traces directly.
+  #
+  # services.opentelemetry-collector = {
+  #   enable = true;
+
+  #   settings = {
+  #     receivers = {
+  #       otlp.protocols.grpc.endpoint = "127.0.0.1:4317";
+  #       otlp.protocols.http.endpoint = "127.0.0.1:4318";
+  #     };
+
+  #     processors = {
+  #       batch = { };
+  #     };
+
+  #     exporters = {
+  #       otlp = {
+  #         endpoint = "127.0.0.1:9080"; # Tempo
+  #       };
+  #     };
+
+  #     extensions = {
+  #       zpages = { };
+  #     };
+
+  #     service = {
+  #       extensions = [
+  #         "zpages"
+  #       ];
+  #       pipelines = {
+  #         traces = {
+  #           receivers = [ "otlp" ];
+  #           processors = [ "batch" ];
+  #           exporters = [ "otlp" ];
+  #         };
+  #         metrics = {
+  #           receivers = [ "otlp" ];
+  #           processors = [ "batch" ];
+  #           exporters = [ "otlp" ];
+  #         };
+  #         logs = {
+  #           receivers = [ "otlp" ];
+  #           processors = [ "batch" ];
+  #           exporters = [ "otlp" ];
+  #         };
+  #       };
+  #     };
+  #   };
+  # };
+
+  services.grafana = {
+    enable = true;
+
+    settings = {
+      server = {
+        domain = domain;
+        http_addr = "127.0.0.1";
+        http_port = 3000;
+        root_url = "https://%(domain)s/grafana";
+        serve_from_sub_path = true;
+      };
+      analytics.reporting_enabled = false;
+      "auth.anonymous" = {
+        enabled = true;
+      };
+      auth.disable_login_form = true;
+      "auth.basic".enabled = false;
+      "auth.github" = {
+        enabled = true;
+        client_id = "Ov23liAnuBwzWtJJ7gv4";
+        client_secret = "$__file{/run/credentials/grafana.service/github_auth_client_secret}";
+        scopes = "user:email,read:org";
+        auth_url = "https://github.com/login/oauth/authorize";
+        token_url = "https://github.com/login/oauth/access_token";
+        api_url = "https://api.github.com/user";
+        allow_sign_up = true;
+        auto_login = false;
+        allowed_organizations = [ "numtide" ];
+        role_attribute_path = "contains(groups[*], '@numtide/network') && 'GrafanaAdmin' || 'Viewer'";
+      };
+    };
+
+    provision = {
+      enable = true;
+      datasources.settings.datasources = [
+        {
+          name = "Tempo";
+          type = "tempo";
+          uid = "traces";
+          url = "http://127.0.0.1:3200";
+          access = "proxy";
+          timeout = "300";
+
+          jsonData = {
+            nodeGraph.enabled = true;
+            # tracesToLogs.datasourceUid = "logs";
+            # tracesToMetrics.datasourceUid = "metrics";
+            # serviceMap.datasourceUid = "metrics";
+            # nodeGraph.enabled = true;
+            # lokiSearch.datasourceUid = "logs";
+          };
+        }
+      ];
+    };
+  };
+
+  systemd.services.grafana.serviceConfig.LoadCredential = "github_auth_client_secret:/etc/secrets/grafana_github_auth_client_secret";
+
+  services.nginx.virtualHosts."${domain}".locations."/grafana" = {
+    proxyPass = "http://localhost:3000";
+  };
+}
diff --git a/users/flokli/nixos/nixos-tvix-cache/nar-bridge-module.nix b/users/flokli/nixos/nixos-tvix-cache/nar-bridge-module.nix
new file mode 100644
index 000000000000..3370f1aaa3cb
--- /dev/null
+++ b/users/flokli/nixos/nixos-tvix-cache/nar-bridge-module.nix
@@ -0,0 +1,75 @@
+{ config
+, lib
+, pkgs
+, depot
+, ...
+}:
+let
+  cfg = config.services.nar-bridge;
+
+  package = depot.tvix.nar-bridge.override (old: {
+    features = old.features or [ "default" ] ++ [ "xp-store-composition-cli" ];
+    runTests = true;
+  });
+
+  storeCompositionFormat = pkgs.formats.toml { };
+
+  storeCompositionFile = storeCompositionFormat.generate "store-composition.toml" cfg.settings;
+
+  args = [
+    "--listen-address"
+    "sd-listen"
+    "--experimental-store-composition"
+    storeCompositionFile
+  ];
+in
+{
+  options = {
+    services.nar-bridge = {
+      enable = lib.mkEnableOption "nar-bridge service";
+
+      settings = lib.mkOption {
+        type = storeCompositionFormat.type;
+        default = { };
+      };
+    };
+  };
+
+  config = lib.mkIf cfg.enable {
+    users.users.nar-bridge = {
+      isSystemUser = true;
+      group = "nar-bridge";
+    };
+
+    users.groups.nar-bridge = { };
+
+    systemd.sockets.nar-bridge = {
+      description = "nar-bridge socket";
+      wantedBy = [ "sockets.target" ];
+
+      socketConfig = {
+        LimitNOFILE = 65535;
+        ListenStream = "/run/nar-bridge.sock";
+        SocketMode = "0666";
+        SocketUser = "root";
+      };
+    };
+
+    systemd.services.nar-bridge = {
+      description = "NAR Bridge";
+      requires = [ "nar-bridge.socket" ];
+      after = [ "nar-bridge.socket" ];
+      wantedBy = [ "multi-user.target" ];
+      serviceConfig = {
+        ExecStart = "${package}/bin/nar-bridge ${lib.escapeShellArgs args}";
+
+        Restart = "always";
+        RestartSec = "10";
+
+        User = "nar-bridge";
+        Group = "nar-bridge";
+        StateDirectory = "nar-bridge";
+      };
+    };
+  };
+}
diff --git a/users/flokli/nixos/nixos-tvix-cache/nar-bridge.nix b/users/flokli/nixos/nixos-tvix-cache/nar-bridge.nix
new file mode 100644
index 000000000000..e87189da2a89
--- /dev/null
+++ b/users/flokli/nixos/nixos-tvix-cache/nar-bridge.nix
@@ -0,0 +1,141 @@
+{ config, depot, pkgs, ... }:
+{
+  imports = [ ./nar-bridge-module.nix ];
+
+  # Microbenchmark
+  # hyperfine --warmup 1 'rm -rf /tmp/cache; nix copy --from https://nixos.tvix.store/ --to "file:///tmp/cache?compression=none" /nix/store/jlkypcf54nrh4n6r0l62ryx93z752hb2-firefox-132.0'
+  # From a different hetzner machine with 1Gbps uplink:
+  # - with zstd: 13.384s
+  # - with gzip: 11.130s
+  # - with brotli: ~18s
+  # - without compression: 15.6s
+
+  # From a 1Gbit link in TUM:
+  # - with zstd: 32.292s
+  # - with gzip: 51s
+  # - cache.nixos.org from the same connection: 36.559s
+  services.nginx = {
+    package = pkgs.nginxStable.override {
+      modules = [ pkgs.nginxModules.zstd ];
+    };
+    virtualHosts.${config.machine.domain} = {
+      # when using http2 we actually see worse throughput,
+      # because it only uses a single tcp connection,
+      # which pins nginx to a single core.
+      http2 = false;
+      locations."=/" = {
+        tryFiles = "$uri $uri/index.html =404";
+        root = pkgs.runCommand "index"
+          {
+            nativeBuildInputs = [ depot.tools.cheddar ];
+          } ''
+          mkdir -p $out
+          cheddar README.md < ${./README.md} > $out/index.html
+          find $out
+        '';
+      };
+      locations."/" = {
+        proxyPass = "http://unix:/run/nar-bridge.sock:/";
+        extraConfig = ''
+          # Restrict allowed HTTP methods
+          limit_except GET HEAD {
+            # nar bridge allows to upload nars via PUT
+            deny all;
+          }
+          # Enable proxy cache
+          proxy_cache nar-bridge;
+          proxy_cache_key "$scheme$proxy_host$request_uri";
+          proxy_cache_valid 200 301 302 10m;  # Cache responses for 10 minutes
+          proxy_cache_valid 404 1m;  # Cache 404 responses for 1 minute
+          proxy_cache_min_uses 2;  # Cache only if the object is requested at least twice
+          proxy_cache_use_stale error timeout updating;
+
+          zstd on;
+          zstd_types application/x-nix-nar;
+        '';
+      };
+    };
+
+    # use more cores for compression
+    appendConfig = ''
+      worker_processes auto;
+    '';
+
+    proxyCachePath."nar-bridge" = {
+      enable = true;
+      levels = "1:2";
+      keysZoneName = "nar-bridge";
+      # Put our 1TB NVME to good use
+      maxSize = "200G";
+      inactive = "10d";
+      useTempPath = false;
+    };
+  };
+
+  services.nar-bridge = {
+    enable = true;
+
+    settings = {
+      blobservices = {
+        root = {
+          type = "objectstore";
+          object_store_url = "file:///var/lib/nar-bridge/blobs.object_store";
+          object_store_options = { };
+        };
+      };
+
+      directoryservices = {
+        root = {
+          type = "redb";
+          is_temporary = false;
+          path = "/var/lib/nar-bridge/directories.redb";
+        };
+      };
+
+      pathinfoservices = {
+        root = {
+          type = "cache";
+          near = "redb";
+          far = "cache-nixos-org";
+        };
+
+        redb = {
+          type = "redb";
+          is_temporary = false;
+          path = "/var/lib/nar-bridge/pathinfo.redb";
+        };
+
+        "cache-nixos-org" = {
+          type = "nix";
+          base_url = "https://cache.nixos.org";
+          blob_service = "root";
+          directory_service = "root";
+          public_keys = [
+            "cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY="
+          ];
+        };
+      };
+    };
+  };
+
+  systemd.tmpfiles.rules = [
+    # Put the data in the big disk
+    "d /tank/nar-bridge 0755 nar-bridge nar-bridge -"
+    # Cache responses on NVME
+    "d /var/cache/nginx 0755 ${config.services.nginx.user} ${config.services.nginx.group} -"
+  ];
+
+  fileSystems."/var/lib/nar-bridge" = {
+    device = "/tank/nar-bridge";
+    options = [
+      "bind"
+      "nofail"
+    ];
+  };
+
+  systemd.services.nar-bridge = {
+    unitConfig.RequiresMountsFor = "/var/lib/nar-bridge";
+    # twice the normal allowed limit, same as nix-daemon
+    serviceConfig.LimitNOFILE = "1048576";
+  };
+}