about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEelco Dolstra <e.dolstra@tudelft.nl>2006-09-19T14·58+0000
committerEelco Dolstra <e.dolstra@tudelft.nl>2006-09-19T14·58+0000
commit6dbed1bf38cd881ebf13aae39a4d988e4bd57f79 (patch)
tree1ae9878e8156c9702238c3b7153e14d0fa9521e2
parent07cec27848014244c8f62985c5c0eb045bc86634 (diff)
* `optimise-store.pl' reduces disk space consumption by hard-linking
  all identitical files in the Nix store to each other.  (Previously
  it only computed the size that would be saved by doing so.)

-rwxr-xr-xscripts/optimise-store.pl56
1 files changed, 40 insertions, 16 deletions
diff --git a/scripts/optimise-store.pl b/scripts/optimise-store.pl
index 7bad1d5e53ba..1ed89272d971 100755
--- a/scripts/optimise-store.pl
+++ b/scripts/optimise-store.pl
@@ -1,61 +1,85 @@
 #! /usr/bin/perl -w
 
 use strict;
+use File::Basename;
 
-#{ my $ofh = select STDOUT;
-#  $| = 1;
-#  select $ofh;
-#}
 
-#my @paths = ("/nix/store/d49mc94xwwd7wf1xzfh4ch4cypn0ajjr-glibc-2.3.6", "/nix/store/1mgfgy3ga4m9z60747s0yzxl0g6w5kxz-glibc-2.3.6");
 my @paths = ("/nix/store");
 
+
+print "hashing...\n";
+
 my $hashList = "/tmp/nix-optimise-hash-list";
 
 system("find @paths -type f -print0 | xargs -0 md5sum -- > $hashList") == 0
     or die "cannot hash store files";
 
+
+print "sorting by hash...\n";
+
 system("sort $hashList > $hashList.sorted") == 0
     or die "cannot sort list";
 
+
+sub atomicLink {
+    my $target = shift;
+    my $new = shift;
+    my $tmpNew = "${new}_optimise.$$";
+
+    # Make the directory writable temporarily.
+    my $dir = dirname $new;
+    my @st = stat $dir or die;
+
+    chmod ($st[2] | 0200, $dir) or die "cannot make `$dir' writable: $!";
+    
+    link $target, $tmpNew or die "cannot create hard link `$tmpNew': $!";
+
+    rename $tmpNew, $new or die "cannot rename `$tmpNew' to `$new': $!";
+
+    chmod ($st[2], $dir) or die "cannot restore permission on `$dir': $!";
+}
+
+
+print "hard-linking...\n";
+
 open LIST, "<$hashList.sorted" or die;
 
 my $prevFile;
 my $prevHash;
+my $prevInode;
 
 my $totalSpace = 0;
 my $savedSpace = 0;
 
-my $files = 0;
-
 while (<LIST>) {
     /^([0-9a-f]*)\s+(.*)$/ or die;
     my $curFile = $2;
     my $curHash = $1;
 
-    my $fileSize = (stat $curFile)[7];
+    my @st = stat $curFile or die;
+    next if ($st[2] & 0222) != 0; # skip writable files
+
+    my $fileSize = $st[7];
     $totalSpace += $fileSize;
 
     if (defined $prevHash && $curHash eq $prevHash) {
         
-        print "$curFile = $prevFile\n";
-
-        $savedSpace += $fileSize;
+        if ($st[1] != $prevInode) {
+            print "$curFile = $prevFile\n";
+            atomicLink $prevFile, $curFile;
+            $savedSpace += $fileSize;
+        }
         
     } else {
         $prevFile = $curFile;
         $prevHash = $curHash;
+        $prevInode = $st[1];
     }
-
-#    print "." if ($files++ % 100 == 0);
 }
 
-#print "\n";
-
 print "total space = $totalSpace\n";
 print "saved space = $savedSpace\n";
 my $savings = ($savedSpace / $totalSpace) * 100.0;
 print "savings = $savings %\n";
 
-
 close LIST;