about summary refs log tree commit diff
path: root/scripts/generate-patches.pl.in
diff options
context:
space:
mode:
authorEelco Dolstra <e.dolstra@tudelft.nl>2005-03-01T10·33+0000
committerEelco Dolstra <e.dolstra@tudelft.nl>2005-03-01T10·33+0000
commitdb322a47ffb17be3f75ea864e0732d8f948aaf19 (patch)
tree7c3a52cbabaa1c8939811dd47d47f59ea8ce84a2 /scripts/generate-patches.pl.in
parent2c4302dd7a437c3153d6de704fb5ddec004ea308 (diff)
* Use a weighted use heuristic to disambiguate between multiple
  occurances of a component.  If the shortest path distance between a
  component P and Q in the referers graph is D, then the contribution
  of Q to the use of P is 1 / R^D, where R >= 1, typically 2.  This
  expresses that distant indirect uses are less important than nearby
  uses.

  For instance, this can disambiguate between the bootstrap GCC in
  Nixpkgs and the GCC of the final stdenv (the former has more uses,
  but they are further away),  and between the GCC of the final stdenv
  and the GCC+G77 build (the latter has very few uses).

Diffstat (limited to 'scripts/generate-patches.pl.in')
-rwxr-xr-xscripts/generate-patches.pl.in85
1 files changed, 77 insertions, 8 deletions
diff --git a/scripts/generate-patches.pl.in b/scripts/generate-patches.pl.in
index 87334b5fcb8e..f146f49bb652 100755
--- a/scripts/generate-patches.pl.in
+++ b/scripts/generate-patches.pl.in
@@ -126,6 +126,64 @@ sub containsPatch {
 }
 
 
+# Compute the "weighted" number of uses of a path in the build graph.
+sub computeUses {
+    my $narFiles = shift;
+    my $path = shift;
+
+    # Find the deriver of $path.
+    return 1 unless defined $$narFiles{$path};
+    my $deriver = @{$$narFiles{$path}}[0]->{deriver};
+    return 1 unless defined $deriver && $deriver ne "";
+
+#    print "  DERIVER $deriver\n";
+
+    # Optimisation: build the referers graph from the references
+    # graph.
+    my %referers;
+    foreach my $q (keys %{$narFiles}) {
+        my @refs = split " ", @{$$narFiles{$q}}[0]->{references};
+        foreach my $r (@refs) {
+            $referers{$r} = [] unless defined $referers{$r};
+            push @{$referers{$r}}, $q;
+        }
+    }
+
+    # Determine the shortest path from $deriver to all other reachable
+    # paths in the `referers' graph.
+
+    my %dist;
+    $dist{$deriver} = 0;
+
+    my @queue = ($deriver);
+    my $pos = 0;
+    
+    while ($pos < scalar @queue) {
+        my $p = $queue[$pos];
+        $pos++;
+
+        foreach my $q (@{$referers{$p}}) {
+            if (!defined $dist{$q}) {
+                $dist{$q} = $dist{$p} + 1;
+#                print "    $q $dist{$q}\n";
+                push @queue, $q;
+            }
+        }
+    }
+
+    my $wuse = 1.0;
+    foreach my $user (keys %dist) {
+        next if $user eq $deriver;
+#        print "    $user $dist{$user}\n";
+        $wuse += 1.0 / 2.0**$dist{$user};
+    }
+
+#    print "    XXX $path $wuse\n";
+    
+    return $wuse;
+}
+
+
 # For each output path in the destination, see if we need to / can
 # create a patch.
 
@@ -134,7 +192,7 @@ print "creating patches...\n";
 foreach my $p (keys %dstOutPaths) {
 
     # If exactly the same path already exists in the source, skip it.
-# !!!    next if defined $srcOutPaths{$p};
+    next if defined $srcOutPaths{$p};
     
     print "  $p\n";
 
@@ -154,17 +212,32 @@ foreach my $p (keys %dstOutPaths) {
         (my $name2, my $version2) = getNameVersion $q;
         if ($name eq $name2) {
 
-            # If the sizes differ to much, then skip.  This
+            # If the sizes differ too much, then skip.  This
             # disambiguates between, e.g., a real component and a
             # wrapper component (cf. Firefox in Nixpkgs).
             my $srcSize = @{$srcNarFiles{$q}}[0]->{size};
             my $dstSize = @{$dstNarFiles{$p}}[0]->{size};
             my $ratio = $srcSize / $dstSize;
             $ratio = 1 / $ratio if $ratio < 1;
-            print "  $srcSize $dstSize $ratio $q\n";
+#            print "  SIZE $srcSize $dstSize $ratio $q\n";
 
             if ($ratio >= 3) {
-                print "  SKIPPING $q due to size ratio $ratio\n";
+                print "  SKIPPING $q due to size ratio $ratio ($srcSize $dstSize)\n";
+                next;
+            }
+
+            # If the numbers of weighted uses differ too much, then
+            # skip.  This disambiguates between, e.g., the bootstrap
+            # GCC and the final GCC in Nixpkgs.
+            my $srcUses = computeUses \%srcNarFiles, $q;
+            my $dstUses = computeUses \%dstNarFiles, $p;
+            $ratio = $srcUses / $dstUses;
+            $ratio = 1 / $ratio if $ratio < 1;
+            print "  USE $srcUses $dstUses $ratio $q\n";
+            
+            if ($ratio >= 2) {
+                print "  SKIPPING $q due to use ratio $ratio ($srcUses $dstUses)\n";
+                next;
             }
 
             # If there are multiple matching names, include the ones
@@ -190,8 +263,6 @@ foreach my $p (keys %dstOutPaths) {
         # Generate a patch between $closest and $p.
         print "  $p <- $closest\n";
 
-        next;
-
         # If the patch already exists, skip it.
         if (containsPatch(\%srcPatches, $p, $closest) ||
             containsPatch(\%dstPatches, $p, $closest))
@@ -260,8 +331,6 @@ foreach my $p (keys %dstOutPaths) {
     }
 }
 
-exit 0; # !!!
-
 
 # Add in any potentially useful patches in the source (namely, those
 # patches that produce either paths in the destination or paths that