about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEelco Dolstra <e.dolstra@tudelft.nl>2005-11-16T08·27+0000
committerEelco Dolstra <e.dolstra@tudelft.nl>2005-11-16T08·27+0000
commitb7f008fc353ea05c423fb571047144052ce691c3 (patch)
treed2d62977981a0ba8ab7efecab481d80e617f488b
parent9311ab76a523de516b6bc98afda9e4b790225514 (diff)
* Did something useful while waiting at IAD: reference scanning is now
  much faster.

-rw-r--r--src/libstore/build.cc5
-rw-r--r--src/libstore/references.cc68
-rw-r--r--src/libstore/references.hh2
-rw-r--r--src/libutil/hash.cc6
-rw-r--r--src/libutil/hash.hh2
5 files changed, 52 insertions, 31 deletions
diff --git a/src/libstore/build.cc b/src/libstore/build.cc
index e4ff1efd3e15..193974bf83bd 100644
--- a/src/libstore/build.cc
+++ b/src/libstore/build.cc
@@ -1390,10 +1390,7 @@ void DerivationGoal::computeClosure()
 	   in it. */
         PathSet references;
         if (!pathExists(path + "/nix-support/no-scan")) {
-            Paths references2;
-            references2 = filterReferences(path, 
-                Paths(allPaths.begin(), allPaths.end()));
-            references = PathSet(references2.begin(), references2.end());
+            references = scanForReferences(path, allPaths);
 
             /* For debugging, print out the referenced and
                unreferenced paths. */
diff --git a/src/libstore/references.cc b/src/libstore/references.cc
index 843aed97fc3b..75a6efead0b2 100644
--- a/src/libstore/references.cc
+++ b/src/libstore/references.cc
@@ -11,26 +11,45 @@
 #include "hash.hh"
 
 
+static unsigned int refLength = 32; /* characters */
+
+
 static void search(const string & s,
-    Strings & ids, Strings & seen)
+    StringSet & ids, StringSet & seen)
 {
-    for (Strings::iterator i = ids.begin();
-         i != ids.end(); )
-    {
-        checkInterrupt();
-        if (s.find(*i) == string::npos)
-            i++;
-        else {
-            debug(format("found reference to `%1%'") % *i);
-            seen.push_back(*i);
-            i = ids.erase(i);
+    static bool initialised = false;
+    static bool isBase32[256];
+    if (!initialised) {
+        for (unsigned int i = 0; i < 256; ++i) isBase32[i] = false;
+        for (unsigned int i = 0; i < base32Chars.size(); ++i)
+            isBase32[(unsigned char) base32Chars[i]] = true;
+        initialised = true;
+    }
+    
+    for (unsigned int i = 0; i + refLength <= s.size(); ) {
+        int j;
+        bool match = true;
+        for (j = refLength - 1; j >= 0; --j)
+            if (!isBase32[(unsigned char) s[i + j]]) {
+                i += j + 1;
+                match = false;
+                break;
+            }
+        if (!match) continue;
+        string ref(s, i, refLength);
+        if (ids.find(ref) != ids.end()) {
+            debug(format("found reference to `%1%' at offset `%2%'")
+                  % ref % i);
+            seen.insert(ref);
+            ids.erase(ref);
         }
+        ++i;
     }
 }
 
 
 void checkPath(const string & path,
-    Strings & ids, Strings & seen)
+    StringSet & ids, StringSet & seen)
 {
     checkInterrupt();
     
@@ -69,36 +88,35 @@ void checkPath(const string & path,
 }
 
 
-Strings filterReferences(const string & path, const Strings & paths)
+PathSet scanForReferences(const string & path, const PathSet & paths)
 {
-    map<string, string> backMap;
-    Strings ids;
-    Strings seen;
+    map<string, Path> backMap;
+    StringSet ids;
+    StringSet seen;
 
     /* For efficiency (and a higher hit rate), just search for the
        hash part of the file name.  (This assumes that all references
        have the form `HASH-bla'). */
-    for (Strings::const_iterator i = paths.begin();
-         i != paths.end(); i++)
-    {
+    for (PathSet::const_iterator i = paths.begin(); i != paths.end(); i++) {
         string baseName = baseNameOf(*i);
         unsigned int pos = baseName.find('-');
         if (pos == string::npos)
             throw Error(format("bad reference `%1%'") % *i);
         string s = string(baseName, 0, pos);
+        assert(s.size() == refLength);
+        assert(backMap.find(s) == backMap.end());
         // parseHash(htSHA256, s);
-        ids.push_back(s);
+        ids.insert(s);
         backMap[s] = *i;
     }
 
     checkPath(path, ids, seen);
 
-    Strings found;
-    for (Strings::iterator i = seen.begin(); i != seen.end(); i++)
-    {
-        map<string, string>::iterator j;
+    PathSet found;
+    for (StringSet::iterator i = seen.begin(); i != seen.end(); i++) {
+        map<string, Path>::iterator j;
         if ((j = backMap.find(*i)) == backMap.end()) abort();
-        found.push_back(j->second);
+        found.insert(j->second);
     }
 
     return found;
diff --git a/src/libstore/references.hh b/src/libstore/references.hh
index ada23a883302..4b1299e7a352 100644
--- a/src/libstore/references.hh
+++ b/src/libstore/references.hh
@@ -4,7 +4,7 @@
 #include "util.hh"
 
 
-Strings filterReferences(const Path & path, const Strings & refs);
+PathSet scanForReferences(const Path & path, const PathSet & refs);
 
 
 #endif /* !__REFERENCES_H */
diff --git a/src/libutil/hash.cc b/src/libutil/hash.cc
index 271e63665b8f..71d4ba67e2c0 100644
--- a/src/libutil/hash.cc
+++ b/src/libutil/hash.cc
@@ -109,13 +109,15 @@ static unsigned char divMod(unsigned char * bytes, unsigned char y)
 
 
 // omitted: E O U T
-char chars[] = "0123456789abcdfghijklmnpqrsvwxyz";
+const string base32Chars = "0123456789abcdfghijklmnpqrsvwxyz";
 
 
 string printHash32(const Hash & hash)
 {
     Hash hash2(hash);
     unsigned int len = (hash.hashSize * 8 - 1) / 5 + 1;
+
+    const char * chars = base32Chars.c_str();
     
     string s(len, '0');
 
@@ -165,6 +167,8 @@ Hash parseHash32(HashType ht, const string & s)
 {
     Hash hash(ht);
 
+    const char * chars = base32Chars.c_str();
+
     for (unsigned int i = 0; i < s.length(); ++i) {
         char c = s[i];
         unsigned char digit;
diff --git a/src/libutil/hash.hh b/src/libutil/hash.hh
index 398b17421644..0a0dfed7eefe 100644
--- a/src/libutil/hash.hh
+++ b/src/libutil/hash.hh
@@ -15,6 +15,8 @@ const int md5HashSize = 16;
 const int sha1HashSize = 20;
 const int sha256HashSize = 32;
 
+extern const string base32Chars;
+
 
 struct Hash
 {