about summary refs log tree commit diff
path: root/third_party/nix/src/libstore/references.cc
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/nix/src/libstore/references.cc')
-rw-r--r--third_party/nix/src/libstore/references.cc124
1 files changed, 124 insertions, 0 deletions
diff --git a/third_party/nix/src/libstore/references.cc b/third_party/nix/src/libstore/references.cc
new file mode 100644
index 000000000000..c8cebaec08f3
--- /dev/null
+++ b/third_party/nix/src/libstore/references.cc
@@ -0,0 +1,124 @@
+#include "references.hh"
+
+#include <cstdlib>
+#include <map>
+
+#include <glog/logging.h>
+
+#include "archive.hh"
+#include "hash.hh"
+#include "util.hh"
+
+namespace nix {
+
+static unsigned int refLength = 32; /* characters */
+
+static void search(const unsigned char* s, size_t len, StringSet& hashes,
+                   StringSet& seen) {
+  static bool initialised = false;
+  static bool isBase32[256];
+  if (!initialised) {
+    for (bool& i : isBase32) {
+      i = false;
+    }
+    for (char base32Char : base32Chars) {
+      isBase32[(unsigned char)base32Char] = true;
+    }
+    initialised = true;
+  }
+
+  for (size_t i = 0; i + refLength <= len;) {
+    int j;
+    bool match = true;
+    for (j = refLength - 1; j >= 0; --j) {
+      if (!isBase32[(unsigned char)s[i + j]]) {
+        i += j + 1;
+        match = false;
+        break;
+      }
+    }
+    if (!match) {
+      continue;
+    }
+    string ref((const char*)s + i, refLength);
+    if (hashes.find(ref) != hashes.end()) {
+      DLOG(INFO) << "found reference to '" << ref << "' at offset " << i;
+      seen.insert(ref);
+      hashes.erase(ref);
+    }
+    ++i;
+  }
+}
+
+struct RefScanSink : Sink {
+  HashSink hashSink;
+  StringSet hashes;
+  StringSet seen;
+
+  string tail;
+
+  RefScanSink() : hashSink(htSHA256) {}
+
+  void operator()(const unsigned char* data, size_t len) override;
+};
+
+void RefScanSink::operator()(const unsigned char* data, size_t len) {
+  hashSink(data, len);
+
+  /* It's possible that a reference spans the previous and current
+     fragment, so search in the concatenation of the tail of the
+     previous fragment and the start of the current fragment. */
+  string s =
+      tail + string((const char*)data, len > refLength ? refLength : len);
+  search((const unsigned char*)s.data(), s.size(), hashes, seen);
+
+  search(data, len, hashes, seen);
+
+  size_t tailLen = len <= refLength ? len : refLength;
+  tail = string(tail, tail.size() < refLength - tailLen
+                          ? 0
+                          : tail.size() - (refLength - tailLen)) +
+         string((const char*)data + len - tailLen, tailLen);
+}
+
+PathSet scanForReferences(const string& path, const PathSet& refs,
+                          HashResult& hash) {
+  RefScanSink sink;
+  std::map<string, Path> backMap;
+
+  /* For efficiency (and a higher hit rate), just search for the
+     hash part of the file name.  (This assumes that all references
+     have the form `HASH-bla'). */
+  for (auto& i : refs) {
+    string baseName = baseNameOf(i);
+    string::size_type pos = baseName.find('-');
+    if (pos == string::npos) {
+      throw Error(format("bad reference '%1%'") % i);
+    }
+    string s = string(baseName, 0, pos);
+    assert(s.size() == refLength);
+    assert(backMap.find(s) == backMap.end());
+    // parseHash(htSHA256, s);
+    sink.hashes.insert(s);
+    backMap[s] = i;
+  }
+
+  /* Look for the hashes in the NAR dump of the path. */
+  dumpPath(path, sink);
+
+  /* Map the hashes found back to their store paths. */
+  PathSet found;
+  for (auto& i : sink.seen) {
+    std::map<string, Path>::iterator j;
+    if ((j = backMap.find(i)) == backMap.end()) {
+      abort();
+    }
+    found.insert(j->second);
+  }
+
+  hash = sink.hashSink.finish();
+
+  return found;
+}
+
+}  // namespace nix