From 4f22203a3aecd070881ae9b4eabc47532d948f01 Mon Sep 17 00:00:00 2001 From: edef Date: Wed, 17 Jan 2024 16:04:03 +0000 Subject: feat(tvix/tools/crunch-v2): init This is a tool for ingesting subsets of cache.nixos.org into its own flattened castore format. Currently, produced chunks are not preserved, and this purely serves as a way of measuring compression/deduplication ratios for various chunking and compression parameters. Change-Id: I3983af02a66f7837d76874ee0fc8b2fab62ac17e Reviewed-on: https://cl.tvl.fyi/c/depot/+/10486 Tested-by: BuildkiteCI Reviewed-by: flokli --- tvix/tools/crunch-v2/protos/flatstore.proto | 38 +++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 tvix/tools/crunch-v2/protos/flatstore.proto (limited to 'tvix/tools/crunch-v2/protos/flatstore.proto') diff --git a/tvix/tools/crunch-v2/protos/flatstore.proto b/tvix/tools/crunch-v2/protos/flatstore.proto new file mode 100644 index 000000000000..2f2838fc75c2 --- /dev/null +++ b/tvix/tools/crunch-v2/protos/flatstore.proto @@ -0,0 +1,38 @@ +syntax = "proto3"; + +package tvix.flatstore.v1; + +message Path { + bytes nar_hash = 1; + + oneof node { + DirectoryNode directory = 2; + FileNode file = 3; + SymlinkNode symlink = 4; + } +} + +message DirectoryNode { + bytes name = 1; + repeated DirectoryNode directories = 2; + repeated FileNode files = 3; + repeated SymlinkNode symlinks = 4; +} + +message FileNode { + bytes name = 1; + bytes hash = 2; + repeated Chunk chunks = 3; + bool executable = 4; +} + +message Chunk { + bytes hash = 1; + uint32 size = 2; + uint32 size_compressed = 3; +} + +message SymlinkNode { + bytes name = 1; + bytes target = 2; +} -- cgit 1.4.1