about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEelco Dolstra <e.dolstra@tudelft.nl>2006-05-03T23·07+0000
committerEelco Dolstra <e.dolstra@tudelft.nl>2006-05-03T23·07+0000
commitdb0d865ec4fb646856731000d540d968011f4daf (patch)
treee90e63ad20cc264a41d94f814e9cf56408b805b6
parent052cefe1bdb6be0257ef2f1c1f128b7e635814ed (diff)
* New ATermMap, seems more-or-less finished.
-rw-r--r--table/aterm-map.cc193
-rw-r--r--table/timings.txt11
2 files changed, 154 insertions, 50 deletions
diff --git a/table/aterm-map.cc b/table/aterm-map.cc
index 53623d0880fa..465cabf34eda 100644
--- a/table/aterm-map.cc
+++ b/table/aterm-map.cc
@@ -48,32 +48,53 @@ public:
     ATerm get(ATerm key) const;
 
     void remove(ATerm key);
-    void remove(const string & key);
 
 private:
     void init(unsigned int expectedCount);
 
+    void free();
+
     void resizeTable(unsigned int expectedCount);
 
-    unsigned int hash1(ATerm key) const;
-    unsigned int hash2(ATerm key) const;
+    void copy(KeyValue * elements, unsigned int size);
+    
+    inline unsigned int hash1(ATerm key) const;
+    inline unsigned int hash2(ATerm key) const;
 };
 
 
 ATermMap::ATermMap(unsigned int expectedCount)
 {
-    init(expectedCount);
+    init(expectedCount * 10 / 9); /* slight adjustment */
+}
+
+
+ATermMap::ATermMap(const ATermMap & map)
+{
+    init(map.maxCount);
+    copy(map.hashTable, map.size);
+}
+
+
+ATermMap & ATermMap::operator = (const ATermMap & map)
+{
+    if (this == &map) return *this;
+    free();
+    init(map.maxCount);
+    copy(map.hashTable, map.size);
+    return *this;
 }
 
 
 ATermMap::~ATermMap()
 {
-    if (hashTable) free(hashTable); 
+    free();
 }
 
 
 void ATermMap::init(unsigned int expectedCount)
 {
+    assert(sizeof(ATerm) * 2 == sizeof(KeyValue));
     size = 0;
     count = 0;
     maxCount = 0;
@@ -82,88 +103,127 @@ void ATermMap::init(unsigned int expectedCount)
 }
 
 
-void ATermMap::resizeTable(unsigned int expectedCount)
+void ATermMap::free()
+{
+    if (hashTable) {
+        ATunprotectArray((ATerm *) hashTable);
+        ::free(hashTable);
+        hashTable = 0;
+    }
+}
+
+
+static unsigned int roundToPowerOf2(unsigned int x)
 {
-    assert(size == 0);
+    x--;
+    x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; x |= x >> 16;
+    x++;
+    return x;
+}
+
 
-    this->maxCount = expectedCount;
+static const unsigned int maxLoadFactor = /* 1 / */ 3;
+static unsigned int nrResizes = 0;
 
-    unsigned int newSize = 128;
 
-    hashTable = (KeyValue *) calloc(sizeof(KeyValue), newSize);
+void ATermMap::resizeTable(unsigned int expectedCount)
+{
+    if (expectedCount == 0) expectedCount = 1;
+//     cout << maxCount << " -> " << expectedCount << endl;
+//     cout << maxCount << " " << size << endl;
+//     cout << (double) size / maxCount << endl;
+
+    unsigned int oldSize = size;
+    KeyValue * oldHashTable = hashTable;
+
+    maxCount = expectedCount;
+    size = roundToPowerOf2(maxCount * maxLoadFactor);
+    hashTable = (KeyValue *) calloc(sizeof(KeyValue), size);
+    ATprotectArray((ATerm *) hashTable, size * 2);
+    
+//     cout << size << endl;
+
+    /* Re-hash the elements in the old table. */
+    if (oldSize != 0) {
+        count = 0;
+        copy(oldHashTable, oldSize);
+        ATunprotectArray((ATerm *) oldHashTable);
+        ::free(oldHashTable);
+        nrResizes++;
+    }
+}
+
 
-    size = newSize;
+void ATermMap::copy(KeyValue * elements, unsigned int size)
+{
+    for (unsigned int i = 0; i < size; ++i)
+        if (elements[i].value) /* i.e., non-empty, non-deleted element */
+            set(elements[i].key, elements[i].value);
 }
 
 
+static const unsigned int shift = 16;
+static const unsigned int knuth = (unsigned int) (0.6180339887 * (1 << shift));
+
+
 unsigned int ATermMap::hash1(ATerm key) const
 {
     /* Don't care about the least significant bits of the ATerm
        pointer since they're always 0. */
     unsigned int key2 = ((unsigned int) key) >> 2;
 
-#if 0
-    double d1 = key2 * 0.6180339887;
-    unsigned int h1 = (int) (size * (d1 - floor(d1)));
-#endif
-
-#if 0
-    unsigned int h1 = size * (key2 * 61803 % 100000);
-#endif
-
-    unsigned int h1 = (size * ((key2 * 40503) & 0xffff)) >> 16;
-
-//     cout << key2 << " " << h1 << endl;
-    
-//     unsigned int h1 = (key2 * 134217689) & (size - 1);
+    /* Approximately equal to:
+    double d = key2 * 0.6180339887;
+    unsigned int h = (int) (size * (d - floor(d)));
+    */
+ 
+    unsigned int h = (size * ((key2 * knuth) & ((1 << shift) - 1))) >> shift;
 
-    return h1 % size;
+    return h;
 }
 
 
 unsigned int ATermMap::hash2(ATerm key) const
 {
     unsigned int key2 = ((unsigned int) key) >> 2;
-
-#if 0    
-    double d2 = key2 * 0.6180339887;
-    unsigned int h2 = 1 | (int) (size * (d2 - floor(d2)));
-#endif
-
-    unsigned int h3 = ((key2 * 134217689) & (size - 1)) | 1;
-    return h3;
+    /* Note: the result must be relatively prime to `size' (which is a
+       power of 2), so we make sure that the result is always odd. */
+    unsigned int h = ((key2 * 134217689) & (size - 1)) | 1;
+    return h;
 }
 
 
-unsigned int nrItemsSet = 0;
-unsigned int nrSetProbes = 0;
-unsigned int nrMaxProbes = 0;
+static unsigned int nrItemsSet = 0;
+static unsigned int nrSetProbes = 0;
 
 
 void ATermMap::set(ATerm key, ATerm value)
 {
-    unsigned int probes = 0;
+    if (count == maxCount) resizeTable(size * 2 / maxLoadFactor);
+    
     nrItemsSet++;
     for (unsigned int i = 0, h = hash1(key); i < size;
          ++i, h = (h + hash2(key)) & (size - 1))
     {
-        assert(h < size);
-        probes++;
+        // assert(h < size);
         nrSetProbes++;
-        if (hashTable[h].key == 0) {
-            if (probes > nrMaxProbes) nrMaxProbes = probes;
+        /* Note: to see whether a slot is free, we check
+           hashTable[h].value, not hashTable[h].key, since we use
+           value == 0 to mark deleted slots. */
+        if (hashTable[h].value == 0 || hashTable[h].key == key) {
             hashTable[h].key = key;
             hashTable[h].value = value;
             count++;
             return;
         }
     }
+        
     abort();
 }
 
 
-unsigned int nrItemsGet = 0;
-unsigned int nrGetProbes = 0;
+static unsigned int nrItemsGet = 0;
+static unsigned int nrGetProbes = 0;
 
 
 ATerm ATermMap::get(ATerm key) const
@@ -180,6 +240,20 @@ ATerm ATermMap::get(ATerm key) const
 }
 
 
+void ATermMap::remove(ATerm key)
+{
+    for (unsigned int i = 0, h = hash1(key); i < size;
+         ++i, h = (h + hash2(key)) & (size - 1))
+    {
+        if (hashTable[h].key == 0) return;
+        if (hashTable[h].key == key) {
+            hashTable[h].value = 0;
+            return;
+        }
+    }
+}
+
+
 int main(int argc, char * * argv)
 {
     ATerm bottomOfStack;
@@ -211,18 +285,37 @@ int main(int argc, char * * argv)
 
 
     for (int test = 0; test < 100000; ++test) {
-        ATermMap map(100);
-        for (int i = 0; i < 30; ++i) 
-            map.set(someTerm(), someTerm());
-        for (int i = 0; i < 100; ++i)
+        // cerr << test << endl;
+        unsigned int n = 300;
+        ATermMap map(300);
+        ATerm keys[n], values[n];
+        for (unsigned int i = 0; i < n; ++i) {
+            keys[i] = someTerm();
+            values[i] = someTerm();
+            map.set(keys[i], values[i]);
+            // cerr << "INSERT: " << keys[i] << " " << values[i] << endl;
+        }
+        values[n - 1] = 0;
+        map.remove(keys[n - 1]);
+        for (unsigned int i = 0; i < n; ++i) {
+            if (map.get(keys[i]) != values[i]) {
+                for (unsigned int j = i + 1; j < n; ++j)
+                    if (keys[i] == keys[j]) goto x;
+                cerr << "MISMATCH: " << keys[i] << " " << values[i] << " " << map.get(keys[i]) << " " << i << endl;
+                abort();
+            x: ;
+            }
+        }
+        for (unsigned int i = 0; i < 100; ++i)
             map.get(someTerm());
     }
 
+    cout << "RESIZES: " << nrResizes << endl;
+        
     cout << "SET: "
          << nrItemsSet << " "
          << nrSetProbes << " "
-         << (double) nrSetProbes / nrItemsSet << " "
-         << nrMaxProbes << endl;
+         << (double) nrSetProbes / nrItemsSet << endl;
 
     cout << "GET: "
          << nrItemsGet << " "
diff --git a/table/timings.txt b/table/timings.txt
index 26b428adec50..cb68b914332f 100644
--- a/table/timings.txt
+++ b/table/timings.txt
@@ -17,3 +17,14 @@ GET: 10000000 13032311 1.30323
 real    0m0.956s
 user    0m0.928s
 sys     0m0.024s
+
+
+[eelco@tyros:~/Dev/nix/table]$ time ./a.out
+testing...
+RESIZES: 0
+SET: 30000000 35420570 1.18069
+GET: 40000000 49507966 1.2377
+
+real    0m3.796s
+user    0m3.700s
+sys     0m0.016s