about summary refs log tree commit diff
path: root/third_party/nix/src/libstore/references.cc
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/nix/src/libstore/references.cc')
-rw-r--r--third_party/nix/src/libstore/references.cc126
1 files changed, 126 insertions, 0 deletions
diff --git a/third_party/nix/src/libstore/references.cc b/third_party/nix/src/libstore/references.cc
new file mode 100644
index 000000000000..f120439c1060
--- /dev/null
+++ b/third_party/nix/src/libstore/references.cc
@@ -0,0 +1,126 @@
+#include "libstore/references.hh"
+
+#include <cstdlib>
+#include <map>
+
+#include <glog/logging.h>
+
+#include "libutil/archive.hh"
+#include "libutil/hash.hh"
+#include "libutil/util.hh"
+
+namespace nix {
+
+constexpr unsigned int kRefLength = 32; /* characters */
+
+static void search(const unsigned char* s, size_t len, StringSet& hashes,
+                   StringSet& seen) {
+  static bool initialised = false;
+  static bool isBase32[256];
+  if (!initialised) {
+    for (bool& i : isBase32) {
+      i = false;
+    }
+    for (char base32Char : base32Chars) {
+      isBase32[static_cast<unsigned char>(base32Char)] = true;
+    }
+    initialised = true;
+  }
+
+  for (size_t i = 0; i + kRefLength <= len;) {
+    int j = 0;
+    bool match = true;
+    for (j = kRefLength - 1; j >= 0; --j) {
+      if (!isBase32[s[i + j]]) {
+        i += j + 1;
+        match = false;
+        break;
+      }
+    }
+    if (!match) {
+      continue;
+    }
+    std::string ref(reinterpret_cast<const char*>(s) + i, kRefLength);
+    if (hashes.find(ref) != hashes.end()) {
+      DLOG(INFO) << "found reference to '" << ref << "' at offset " << i;
+      seen.insert(ref);
+      hashes.erase(ref);
+    }
+    ++i;
+  }
+}
+
+struct RefScanSink : Sink {
+  HashSink hashSink;
+  StringSet hashes;
+  StringSet seen;
+
+  std::string tail;
+
+  RefScanSink() : hashSink(htSHA256) {}
+
+  void operator()(const unsigned char* data, size_t len) override;
+};
+
+void RefScanSink::operator()(const unsigned char* data, size_t len) {
+  hashSink(data, len);
+
+  /* It's possible that a reference spans the previous and current
+     fragment, so search in the concatenation of the tail of the
+     previous fragment and the start of the current fragment. */
+  std::string s = tail + std::string(reinterpret_cast<const char*>(data),
+                                     len > kRefLength ? kRefLength : len);
+  search(reinterpret_cast<const unsigned char*>(s.data()), s.size(), hashes,
+         seen);
+
+  search(data, len, hashes, seen);
+
+  size_t tailLen = len <= kRefLength ? len : kRefLength;
+  tail =
+      std::string(tail, tail.size() < kRefLength - tailLen
+                            ? 0
+                            : tail.size() - (kRefLength - tailLen)) +
+      std::string(reinterpret_cast<const char*>(data) + len - tailLen, tailLen);
+}
+
+PathSet scanForReferences(const std::string& path, const PathSet& refs,
+                          HashResult& hash) {
+  RefScanSink sink;
+  std::map<std::string, Path> backMap;
+
+  /* For efficiency (and a higher hit rate), just search for the
+     hash part of the file name.  (This assumes that all references
+     have the form `HASH-bla'). */
+  for (auto& i : refs) {
+    std::string baseName = baseNameOf(i);
+    std::string::size_type pos = baseName.find('-');
+    if (pos == std::string::npos) {
+      throw Error(format("bad reference '%1%'") % i);
+    }
+    std::string s = std::string(baseName, 0, pos);
+    assert(s.size() == kRefLength);
+    assert(backMap.find(s) == backMap.end());
+    // parseHash(htSHA256, s);
+    sink.hashes.insert(s);
+    backMap[s] = i;
+  }
+
+  /* Look for the hashes in the NAR dump of the path. */
+  dumpPath(path, sink);
+
+  /* Map the hashes found back to their store paths. */
+  PathSet found;
+  for (auto& i : sink.seen) {
+    std::map<std::string, Path>::iterator j;
+    if ((j = backMap.find(i)) == backMap.end()) {
+      abort();
+    }
+    found.insert(j->second);
+  }
+
+  hash = sink.hashSink.finish();
+
+  return found;
+}
+
+}  // namespace nix