about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEelco Dolstra <e.dolstra@tudelft.nl>2005-01-05T09·58+0000
committerEelco Dolstra <e.dolstra@tudelft.nl>2005-01-05T09·58+0000
commit6af4a5a71ffee704c4760be4259d69bb7e21b870 (patch)
treece5963bc38b56b8972fe2a2c80acd57f069c480e
parenta03397be4c176c142871e7337dfef7734cd508ca (diff)
* Prototype store optimiser. It searched the Nix store for identical
  files and hard-links them to each other to save disk space.

  Currently it doesn't actually do the hard-linking, it just reports
  the amount of space saved if it did.

-rwxr-xr-xscripts/optimise-store.pl69
1 files changed, 69 insertions, 0 deletions
diff --git a/scripts/optimise-store.pl b/scripts/optimise-store.pl
new file mode 100755
index 000000000000..359140073b5b
--- /dev/null
+++ b/scripts/optimise-store.pl
@@ -0,0 +1,69 @@
+#! /usr/bin/perl -w
+
+use strict;
+
+{ my $ofh = select STDOUT;
+  $| = 1;
+  select $ofh;
+}
+
+#my @paths = ("/nix/store/caef3a49150506d233f474322a824e50-glibc-2.3.3", "/nix/store/a8a9d585d1ad4b1bc911be7743b3b996-glibc-2.3.3");
+my @paths = ("/nix/store");
+
+my $tmpfile = "/tmp/nix-optimise-hash-list";
+#my $tmpfile = "/data/nix-optimise-hash-list";
+
+system("find @paths -type f -print0 | xargs -0 md5sum -- > $tmpfile") == 0
+    or die "cannot hash store files";
+
+system("sort $tmpfile > $tmpfile.sorted") == 0
+    or die "cannot sort list";
+
+open LIST, "<$tmpfile.sorted" or die;
+
+my $prevFile;
+my $prevHash;
+
+my $totalSpace = 0;
+my $savedSpace = 0;
+
+my $files = 0;
+
+while (<LIST>) {
+#    print "D";
+    /^([0-9a-f]*)\s+(.*)$/ or die;
+    my $curFile = $2;
+    my $curHash = $1;
+
+#    print "A";
+    my $fileSize = (stat $curFile)[7];
+#    print "B";
+#    my $fileSize = 1;
+    $totalSpace += $fileSize;
+
+    if (defined $prevHash && $curHash eq $prevHash) {
+        
+#        print "$curFile = $prevFile\n";
+
+        $savedSpace += $fileSize;
+        
+    } else {
+        $prevFile = $curFile;
+        $prevHash = $curHash;
+    }
+
+    print "." if ($files++ % 100 == 0);
+    #print ".";
+
+#    print "C";
+}
+
+print "\n";
+
+print "total space = $totalSpace\n";
+print "saved space = $savedSpace\n";
+my $savings = ($savedSpace / $totalSpace) * 100.0;
+print "savings = $savings %\n";
+
+
+close LIST;