about summary refs log tree commit diff
path: root/users/flokli/nixos
diff options
context:
space:
mode:
Diffstat (limited to 'users/flokli/nixos')
-rw-r--r--users/flokli/nixos/.envrc1
-rw-r--r--users/flokli/nixos/.skip-subtree0
-rw-r--r--users/flokli/nixos/archeology-ec2/OWNERS1
-rw-r--r--users/flokli/nixos/archeology-ec2/configuration.nix35
-rw-r--r--users/flokli/nixos/archeology-ec2/hardware-configuration.nix36
-rw-r--r--users/flokli/nixos/archeology-ec2/parse-bucket-logs-continuously.py62
-rw-r--r--users/flokli/nixos/default.nix32
-rw-r--r--users/flokli/nixos/profiles/archeology.nix37
8 files changed, 204 insertions, 0 deletions
diff --git a/users/flokli/nixos/.envrc b/users/flokli/nixos/.envrc
new file mode 100644
index 000000000000..ccf3cb847ac5
--- /dev/null
+++ b/users/flokli/nixos/.envrc
@@ -0,0 +1 @@
+PATH_add $(nix-build ../../.. -A users.flokli.nixos.deps --no-out-link)/bin
diff --git a/users/flokli/nixos/.skip-subtree b/users/flokli/nixos/.skip-subtree
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/users/flokli/nixos/.skip-subtree
diff --git a/users/flokli/nixos/archeology-ec2/OWNERS b/users/flokli/nixos/archeology-ec2/OWNERS
new file mode 100644
index 000000000000..b9bc074a8020
--- /dev/null
+++ b/users/flokli/nixos/archeology-ec2/OWNERS
@@ -0,0 +1 @@
+edef
diff --git a/users/flokli/nixos/archeology-ec2/configuration.nix b/users/flokli/nixos/archeology-ec2/configuration.nix
new file mode 100644
index 000000000000..f0fc0c5d095c
--- /dev/null
+++ b/users/flokli/nixos/archeology-ec2/configuration.nix
@@ -0,0 +1,35 @@
+{ depot, pkgs, modulesPath, ... }:
+
+{
+  imports = [
+    "${modulesPath}/virtualisation/amazon-image.nix"
+    ../profiles/archeology.nix
+  ];
+
+  systemd.timers.parse-bucket-logs = {
+    wantedBy = [ "multi-user.target" ];
+    timerConfig.OnCalendar = "*-*-* 03:00:00 UTC";
+  };
+
+  systemd.services.parse-bucket-logs = {
+    path = [ depot.users.flokli.archeology.parse-bucket-logs ];
+    serviceConfig = {
+      Type = "oneshot";
+      ExecStart = (pkgs.writers.writePython3 "parse-bucket-logs-continuously"
+        {
+          libraries = [ pkgs.python3Packages.boto3 ];
+        } ./parse-bucket-logs-continuously.py);
+      DynamicUser = "yes";
+      StateDirectory = "parse-bucket-logs";
+    };
+  };
+
+  environment.systemPackages = [
+    depot.users.flokli.archeology.parse-bucket-logs
+  ];
+
+  networking.hostName = "archeology-ec2";
+
+  system.stateVersion = "23.05"; # Did you read the comment?
+}
+
diff --git a/users/flokli/nixos/archeology-ec2/hardware-configuration.nix b/users/flokli/nixos/archeology-ec2/hardware-configuration.nix
new file mode 100644
index 000000000000..7b3d79d70a5d
--- /dev/null
+++ b/users/flokli/nixos/archeology-ec2/hardware-configuration.nix
@@ -0,0 +1,36 @@
+{ lib, modulesPath, ... }:
+
+{
+  imports =
+    [
+      (modulesPath + "/profiles/qemu-guest.nix")
+    ];
+
+  boot.initrd.availableKernelModules = [ "ahci" "xhci_pci" "virtio_pci" "sr_mod" "virtio_blk" ];
+  boot.initrd.kernelModules = [ ];
+  boot.kernelModules = [ "kvm-amd" ];
+  boot.extraModulePackages = [ ];
+
+  fileSystems."/" =
+    {
+      device = "/dev/disk/by-partlabel/root";
+      fsType = "xfs";
+    };
+
+  fileSystems."/boot" =
+    {
+      device = "/dev/disk/by-partlabel/boot";
+      fsType = "vfat";
+    };
+
+  swapDevices = [ ];
+
+  # Enables DHCP on each ethernet and wireless interface. In case of scripted networking
+  # (the default) this is the recommended approach. When using systemd-networkd it's
+  # still possible to use this option, but it's recommended to use it in conjunction
+  # with explicit per-interface declarations with `networking.interfaces.<interface>.useDHCP`.
+  networking.useDHCP = lib.mkDefault true;
+  # networking.interfaces.enp1s0.useDHCP = lib.mkDefault true;
+
+  nixpkgs.hostPlatform = lib.mkDefault "x86_64-linux";
+}
diff --git a/users/flokli/nixos/archeology-ec2/parse-bucket-logs-continuously.py b/users/flokli/nixos/archeology-ec2/parse-bucket-logs-continuously.py
new file mode 100644
index 000000000000..f6ec8fb77cef
--- /dev/null
+++ b/users/flokli/nixos/archeology-ec2/parse-bucket-logs-continuously.py
@@ -0,0 +1,62 @@
+import boto3
+import datetime
+import os
+import re
+import subprocess
+import tempfile
+
+s3 = boto3.resource('s3')
+bucket_name = "nix-archeologist"
+prefix = "nix-cache-bucket-logs/"
+
+bucket = s3.Bucket(bucket_name)
+
+key_pattern = re.compile(r'.*\/(?P<y>\d{4})-(?P<m>\d{2})-(?P<d>\d{2})\.parquet$')  # noqa: E501
+
+# get a listing (which is sorted), grab the most recent key
+last_elem = list(
+    o for o in bucket.objects.filter(Prefix=prefix)
+    if key_pattern.match(o.key)
+).pop()
+
+# extract the date of that key.
+m = key_pattern.search(last_elem.key)
+last_elem_date = datetime.date(int(m.group("y")), int(m.group("m")), int(m.group("d")))  # noqa: E501
+
+# get the current date (UTC)
+now = datetime.datetime.now(tz=datetime.UTC)
+now_date = datetime.date(now.year, now.month, now.day)
+
+while True:
+    # Calculate what date would be processed next.
+    next_elem_date = last_elem_date + datetime.timedelta(days=1)
+
+    # If that's today, we don't want to process it.
+    if next_elem_date == now_date:
+        print("Caught up, would process data from today.")
+        break
+
+    # If we'd be processing data from yesterday, but it's right after midnight,
+    # also don't process - data might still be flushed.
+    if (next_elem_date + datetime.timedelta(days=1) == now_date) and now.hour == 0:  # noqa: E501
+        print("Not processing data from previous day right after midnight")
+        break
+
+    src = f"http://nix-cache-log.s3.amazonaws.com/log/{next_elem_date.isoformat()}-*"  # noqa: E501
+
+    # Invoke parse-bucket-logs script inside a tempdir and upload on success.
+    with tempfile.TemporaryDirectory() as td:
+        work_file_name = os.path.join(td, "output.parquet")
+        args = ["archeology-parse-bucket-logs", src, work_file_name]
+        subprocess.run(
+            args,
+            check=True  # throw exception if nonzero exit code
+        )
+
+        dest_key = f"{prefix}{next_elem_date.isoformat()}.parquet"
+
+        # Upload the file
+        print(f"uploading to s3://{bucket_name}{dest_key}")
+        bucket.upload_file(work_file_name, dest_key)
+
+    last_elem_date = next_elem_date
diff --git a/users/flokli/nixos/default.nix b/users/flokli/nixos/default.nix
new file mode 100644
index 000000000000..9ed223a90896
--- /dev/null
+++ b/users/flokli/nixos/default.nix
@@ -0,0 +1,32 @@
+{ depot, pkgs, lib, ... }:
+
+let
+  systemFor = sys: (depot.ops.nixos.nixosFor sys).system;
+
+  # assumes `name` is configured appropriately in your .ssh/config
+  deployScript = name: sys: pkgs.writeShellScriptBin "deploy-${name}" ''
+    set -eo pipefail
+    nix-copy-closure --to ${name} --gzip --use-substitutes ${sys}
+    ssh ${name} nix-env --profile /nix/var/nix/profiles/system --set ${sys}
+    ssh ${name} ${sys}/bin/switch-to-configuration switch
+  '';
+
+in
+depot.nix.readTree.drvTargets rec {
+  archeologyEc2System = (depot.ops.nixos.nixosFor ({ ... }: {
+    imports = [
+      ./archeology-ec2/configuration.nix
+    ];
+  })).config.system.build.toplevel;
+
+  deploy-archeology-ec2 = (deployScript "archeology-ec2" archeologyEc2System);
+
+  deps = (depot.nix.lazy-deps {
+    deploy-archeology-ec2.attr = "users.flokli.nixos.deploy-archeology-ec2";
+  });
+
+  shell = pkgs.mkShell {
+    name = "flokli-nixos-shell";
+    packages = [ deps ];
+  };
+}
diff --git a/users/flokli/nixos/profiles/archeology.nix b/users/flokli/nixos/profiles/archeology.nix
new file mode 100644
index 000000000000..c87d6bcf30fa
--- /dev/null
+++ b/users/flokli/nixos/profiles/archeology.nix
@@ -0,0 +1,37 @@
+# Set of unconditional config options applicable to all archeology machines.
+
+{ depot, pkgs, ... }:
+
+{
+  # Use the TVL binary cache
+  tvl.cache.enable = true;
+
+  # Start clickhose as a system service.
+  services.clickhouse.enable = true;
+
+  # for ClickHouse
+  # We're keeping this here rather than in the NixOS module, because I suspect
+  # this opens up timing side channels. This is a single-user, single-purpose
+  # machine, so that isn't a concern here.
+  boot.kernel.sysctl."kernel.task_delayacct" = 1;
+
+  # Enable SSH and let edef and flokli in
+  services.openssh.enable = true;
+
+  users.users.root.openssh.authorizedKeys.keys = [
+    "cert-authority ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQCvb/7ojfcbKvHIyjnrNUOOgzy44tCkgXY9HLuyFta1jQOE9pFIK19B4dR9bOglPKf145CCL0mSFJNNqmNwwavU2uRn+TQrW+U1dQAk8Gt+gh3O49YE854hwwyMU+xD6bIuUdfxPr+r5al/Ov5Km28ZMlHOs3FoAP0hInK+eAibioxL5rVJOtgicrOVCkGoXEgnuG+LRbOYTwzdClhRUxiPjK8alCbcJQ53AeZHO4G6w9wTr+W5ILCfvW4OmUXCX01sKzaBiQuuFCF6M/H4LlnsPWLMra2twXxkOIhZblwC+lncps9lQaUgiD4koZeOCORvHW00G0L39ilFbbnVcL6Itp/m8RRWm/xRxS4RMnsdV/AhvpRLrhL3lfQ7E2oCeSM36v1S9rdg6a47zcnpL+ahG76Gz39Y7KmVRQciNx7ezbwxj3Q5lZtFykgdfGIAN+bT8ijXMO6m68g60i9Bz4IoMZGkiJGqMYLTxMQ+oRgR3Ro5lbj7E11YBHyeimoBYXYGHMkiuxopQZ7lIj3plxIzhmUlXJBA4jMw9KGHdYaLhaicIYhvQmCTAjrkt2HvxEe6lU8iws2Qv+pB6tAGundN36RVVWAckeQPZ4ZsgDP8V2FfibZ1nsrQ+zBKqaslYMAHs01Cf0Hm0PnCqagf230xaobu0iooNuXx44QKoDnB+w== edef"
+    "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPTVTXOutUZZjXLB0lUSgeKcSY/8mxKkC0ingGK1whD2 flokli"
+  ];
+
+  # Get a bunch of text editors and CLI tools.
+  environment.systemPackages = [
+    pkgs.awscli
+    pkgs.duckdb
+    pkgs.parquet-tools
+    pkgs.helix
+    pkgs.htop
+    pkgs.kakoune
+    pkgs.kitty.terminfo
+    pkgs.tmux
+  ];
+}