about summary refs log tree commit diff
path: root/src/libutil
diff options
context:
space:
mode:
authorEelco Dolstra <e.dolstra@tudelft.nl>2005-01-14T16·04+0000
committerEelco Dolstra <e.dolstra@tudelft.nl>2005-01-14T16·04+0000
commitd58a11e019813902b6c4547ca61a127938b2cc20 (patch)
treeddaff27d1a3c0604ffac989867cf63fbf8ce94ff /src/libutil
parent9530cc31700f68fd229eee69eabd2baa099f404a (diff)
* Shorten SHA-256 hashes used in store path name generation to 160
  bits, then encode them in a radix-32 representation (using digits
  and letters except e, o, u, and t).  This produces store paths like
  /nix/store/4i0zb0z7f88mwghjirkz702a71dcfivn-aterm-2.3.1.  The nice
  thing about this is that the hash part of the file name is still 32
  characters, as before with MD5.

  (Of course, shortening SHA-256 to 160 bits makes it no better than
  SHA-160 in theory, but hopefully it's a bit more resistant to
  attacks; it's certainly a lot slower.)

Diffstat (limited to 'src/libutil')
-rw-r--r--src/libutil/hash.cc72
-rw-r--r--src/libutil/hash.hh24
2 files changed, 85 insertions, 11 deletions
diff --git a/src/libutil/hash.cc b/src/libutil/hash.cc
index cd7043090ec7..324e2bf7f4c2 100644
--- a/src/libutil/hash.cc
+++ b/src/libutil/hash.cc
@@ -15,6 +15,14 @@ extern "C" {
 
 
 
+Hash::Hash()
+{
+    type = htUnknown;
+    hashSize = 0;
+    memset(hash, 0, maxHashSize);
+}
+
+
 Hash::Hash(HashType type)
 {
     this->type = type;
@@ -23,7 +31,7 @@ Hash::Hash(HashType type)
     else if (type == htSHA256) hashSize = sha256HashSize;
     else throw Error("unknown hash type");
     assert(hashSize <= maxHashSize);
-    memset(hash, 0, hashSize);
+    memset(hash, 0, maxHashSize);
 }
 
 
@@ -52,21 +60,21 @@ bool Hash::operator < (const Hash & h) const
 }
 
 
-Hash::operator string() const
+string printHash(const Hash & hash)
 {
     ostringstream str;
-    for (unsigned int i = 0; i < hashSize; i++) {
+    for (unsigned int i = 0; i < hash.hashSize; i++) {
         str.fill('0');
         str.width(2);
-        str << hex << (int) hash[i];
+        str << hex << (int) hash.hash[i];
     }
     return str.str();
 }
 
     
-Hash parseHash(const string & s)
+Hash parseHash(HashType ht, const string & s)
 {
-    Hash hash(htMD5);
+    Hash hash(ht);
     if (s.length() != hash.hashSize * 2)
         throw Error(format("invalid hash `%1%'") % s);
     for (unsigned int i = 0; i < hash.hashSize; i++) {
@@ -82,6 +90,48 @@ Hash parseHash(const string & s)
 }
 
 
+static unsigned short divMod(uint16_t * words, unsigned short y)
+{
+    unsigned int borrow = 0;
+
+    int pos = (Hash::maxHashSize / 2) - 1;
+    while (pos >= 0 && !words[pos]) --pos;
+
+    for ( ; pos >= 0; --pos) {
+        unsigned int s = words[pos] + (borrow << 16);
+        unsigned int d = s / y;
+        borrow = s % y;
+        words[pos] = d;
+    }
+
+    return borrow;
+}
+
+
+// omitted: E O U T
+char chars[] = "0123456789abcdfghijklmnpqrsvwxyz";
+
+
+string printHash32(const Hash & hash)
+{
+    Hash hash2(hash);
+    unsigned int len = (hash.hashSize * 8 - 1) / 5 + 1;
+    
+    string s(len, '0');
+
+    int pos = len - 1;
+    while (pos >= 0) {
+        unsigned short digit = divMod((uint16_t *) hash2.hash, 32);
+        s[pos--] = chars[digit];
+    }
+
+    for (unsigned int i = 0; i < hash2.maxHashSize; ++i)
+        assert(hash2.hash[i] == 0);
+
+    return s;
+}
+
+
 bool isHash(const string & s)
 {
     if (s.length() != 32) return false;
@@ -186,3 +236,13 @@ Hash hashPath(const Path & path, HashType ht)
     finish(ht, sink.ctx, hash.hash);
     return hash;
 }
+
+
+Hash compressHash(const Hash & hash, unsigned int newSize)
+{
+    Hash h;
+    h.hashSize = newSize;
+    for (unsigned int i = 0; i < hash.hashSize; ++i)
+        h.hash[i % newSize] ^= hash.hash[i];
+    return h;
+}
diff --git a/src/libutil/hash.hh b/src/libutil/hash.hh
index 4490d2ff7ace..0c9d7b9cb936 100644
--- a/src/libutil/hash.hh
+++ b/src/libutil/hash.hh
@@ -8,7 +8,7 @@
 using namespace std;
 
 
-typedef enum { htMD5, htSHA1, htSHA256 } HashType;
+typedef enum { htUnknown, htMD5, htSHA1, htSHA256 } HashType;
 
 
 const int md5HashSize = 16;
@@ -24,7 +24,10 @@ struct Hash
 
     HashType type;
 
-    /* Create a zeroed hash object. */
+    /* Create an unusable hash object. */
+    Hash();
+
+    /* Create a zero-filled hash object. */
     Hash(HashType type);
 
     /* Check whether two hash are equal. */
@@ -36,13 +39,20 @@ struct Hash
     /* For sorting. */
     bool operator < (const Hash & h) const;
 
-    /* Convert a hash code into a hexadecimal representation. */
-    operator string() const;
 };
 
 
+/* Convert a hash to a hexadecimal representation. */
+string printHash(const Hash & hash);
+
 /* Parse a hexadecimal representation of a hash code. */
-Hash parseHash(const string & s);
+Hash parseHash(HashType ht, const string & s);
+
+/* Convert a hash to a base-32 representation. */
+string printHash32(const Hash & hash);
+
+/* Parse a base-32 representation of a hash code. */
+Hash parseHash32(HashType ht, const string & s);
 
 /* Verify that the given string is a valid hash code. */
 bool isHash(const string & s);
@@ -57,5 +67,9 @@ Hash hashFile(const Path & path, HashType ht);
    md5(dump(path)). */
 Hash hashPath(const Path & path, HashType ht);
 
+/* Compress a hash to the specified number of bytes by cyclically
+   XORing bytes together. */
+Hash compressHash(const Hash & hash, unsigned int newSize);
+
 
 #endif /* !__HASH_H */