Bläddra i källkod

HashSet improvements

Billy Barrow 2 veckor sedan
förälder
incheckning
25362fa7f4
1 ändrade filer med 71 tillägg och 80 borttagningar
  1. 71 80
      src/lib/DataStructures/HashSet.vala

+ 71 - 80
src/lib/DataStructures/HashSet.vala

@@ -3,14 +3,17 @@ namespace Invercargill.DataStructures {
 
     public class HashSet<T> : Enumerable<T>, Lot<T>, ReadOnlyCollection<T>, ReadOnlySet<T>, Set<T> {
 
-        private const uint BUCKET_TOMBSTONE = uint.MAX;
-        private const uint BUCKET_EMPTY = 0;
-        private uint[] buckets;
-        private T[] items;
+        [Compact]
+        private class HashSetItem<T> {
+            public uint hash;
+            public T item;
+        }
+
+        private HashSetItem<T>* tombstone;
+        private HashSetItem<T>*[] buckets;
         private int n_items = 0;
         private int n_buckets = 16;
         private int n_collissions = 0;
-        private int next_item_index = 0;
         private SafeReadFunc<T>? safe_read;
         private SafeWriteFunc<T>? safe_write;
         private HashDelegate<T> hash_func;
@@ -25,36 +28,15 @@ namespace Invercargill.DataStructures {
         }
 
         private void setup(HashDelegate<T>? value_hash_func = null, EqualityDelegate<T>? value_equal_func = null) {
+            tombstone = new HashSetItem<T>();
             hash_func = value_hash_func ?? Operators.hash<T>();
             equal_func = value_equal_func ?? Operators.equality<T>();
             safe_read = get_safe_read_function_for<T>();
             safe_write = get_safe_write_function_for<T>();
-            buckets = new uint[n_buckets];
-            items = new T[n_buckets];
+            buckets = new HashSetItem<T>[n_buckets];
 
         }
 
-        private T read_item(uint index) {
-            if(safe_read != null){
-                return safe_read(items, index);
-            }
-            return items[index];
-        }
-
-        private void write_item(uint index, T item) {
-            if(safe_write != null) {
-                safe_write(items, index, item);
-                return;
-            }
-            items[index] = item;
-        }
-
-        private void ensure_room_for_items(int count) {
-            if(items.length <= n_items + count) {
-                items.resize(items.length * 2);
-            }
-        }
-
         private void ensure_room_for_bucket(uint index) {
             var target = buckets.length;
             while(target <= index) {
@@ -67,35 +49,26 @@ namespace Invercargill.DataStructures {
 
         private void double_buckets() {
             n_buckets *= 2;
-            buckets = new uint[n_buckets];
+            var old_buckets = buckets;
+            var buckets = new HashSetItem<T>*[n_buckets];
             n_collissions = 0;
 
-            for(var i = 0; i < n_items; i++) {
-                var item = read_item(i);
-                var bucket_index = bucket_for(item);
-                while(buckets[bucket_index] != BUCKET_EMPTY) {
+            for(var i = 0; i < old_buckets.length; i++) {
+                var bucket = old_buckets[i];
+                if(bucket == null || bucket == tombstone) {
+                    continue;
+                }
+
+                var bucket_index = bucket->hash % n_buckets;;
+                while(buckets[bucket_index] != null) {
                     bucket_index++;
                     n_collissions++;
                     ensure_room_for_bucket(bucket_index);
                 }
-                buckets[bucket_index] = i+1;
-            }
-            
-        }
 
-        private T get_item(uint index) {
-            if(index == BUCKET_EMPTY || index == BUCKET_TOMBSTONE) {
-                assert_not_reached();
+                buckets[bucket_index] = bucket;
             }
-            return read_item(index-1);
-        }
-
-        private uint add_item(T item) {
-            ensure_room_for_items(1);
-            write_item(next_item_index, item);
-            n_items++;
-            next_item_index++;
-            return n_items;
+            
         }
 
         private uint bucket_for(T item) {
@@ -112,9 +85,9 @@ namespace Invercargill.DataStructures {
         }
 
         public override Tracker<T> get_tracker () {
-            return range(0, buckets.length)
-                .where(i => buckets[i] != BUCKET_EMPTY && buckets[i] != BUCKET_TOMBSTONE)
-                .select<T>(i => get_item(buckets[i]))
+            return range(0, n_buckets)
+                .where(i => buckets[i] != null && buckets[i] != tombstone)
+                .select<T>(i => buckets[i]->item)
                 .get_tracker();
         }
 
@@ -126,16 +99,17 @@ namespace Invercargill.DataStructures {
             add_internal(item, true);
         }
 
-        private bool add_internal (T item, bool overwrite) {
+        private bool add_internal (owned T item, bool overwrite) {
             if(n_collissions > n_buckets / 4) {
                 double_buckets();
             }
             
-            var bucket_index = bucket_for(item);
-            while(buckets[bucket_index] != BUCKET_EMPTY && buckets[bucket_index] != BUCKET_TOMBSTONE) {
-                if(equal_func(get_item(buckets[bucket_index]), item)) {
+            var bucket_hash = hash_func(item);
+            var bucket_index = bucket_hash % n_buckets;
+            while(buckets[bucket_index] != null && buckets[bucket_index] != tombstone) {
+                if(equal_func(buckets[bucket_index]->item, item)) {
                     if(overwrite)
-                        write_item(buckets[bucket_index] - 1, item);
+                        buckets[bucket_index]->item = (owned)item;
                     return overwrite;
                 }
                 bucket_index++;
@@ -143,15 +117,19 @@ namespace Invercargill.DataStructures {
                 ensure_room_for_bucket(bucket_index);
             }
             
-            buckets[bucket_index] = add_item(item);
+            HashSetItem<T>* new_bucket = new HashSetItem<T>();
+            new_bucket->hash = bucket_hash;
+            new_bucket->item = (owned)item;
+            buckets[bucket_index] = new_bucket;
+            n_items++;
             return true;
         }
 
         public bool try_find(T search, out T item) {
             var bucket_index = bucket_for(search);
-            while(bucket_index < buckets.length && buckets[bucket_index] != BUCKET_EMPTY) {
-                if(buckets[bucket_index] != BUCKET_TOMBSTONE) {
-                    var ours = get_item(buckets[bucket_index]);
+            while(bucket_index < buckets.length && buckets[bucket_index] != null) {
+                if(buckets[bucket_index] != tombstone) {
+                    var ours = buckets[bucket_index]->item;
                     if(equal_func(ours, search)) {
                         item = ours;
                         return true;
@@ -176,42 +154,55 @@ namespace Invercargill.DataStructures {
             return try_find(item, out _);
         }
 
-        public void clear() {
-            n_items = 0;
-            n_buckets = 16;
-            n_collissions = 0;
-            buckets = new uint[n_buckets];
-            items = new T[n_buckets];
+        public bool equals(Enumerable<T> other) {
+            if(other == this) {
+                return true;
+            }
+            return other.non_common(this, hash_func, equal_func).count() == 0;
+        }
+
+        public bool is_subset_of(Enumerable<T> other) {
+            return this.exclude(other, hash_func, equal_func).count() == 0;
         }
 
         public T? remove(T item) {
             var bucket_index = bucket_for(item);
-            while(bucket_index < buckets.length && buckets[bucket_index] != BUCKET_EMPTY) {
-                if(buckets[bucket_index] != BUCKET_TOMBSTONE) {
-                    var ours = get_item(buckets[bucket_index]);
+            while(bucket_index < buckets.length && buckets[bucket_index] != null) {
+                if(buckets[bucket_index] != tombstone) {
+                    var ours = buckets[bucket_index]->item;
                     if(equal_func(ours, item)) {
-                        write_item(buckets[bucket_index] - 1, null);
-                        buckets[bucket_index] = BUCKET_TOMBSTONE;
+                        delete buckets[bucket_index];
+                        buckets[bucket_index] = tombstone;
                         n_items--;
                         return ours;
                     }
-                    else {
-                    }
                 }
                 bucket_index++;
             }
             return null;
         }
 
-        public bool equals(Enumerable<T> other) {
-            if(other == this) {
-                return true;
+        public void clear() {
+            n_items = 0;
+            n_buckets = 16;
+            n_collissions = 0;
+            delete_items();
+            buckets = new HashSetItem<T>*[n_buckets];
+        }
+
+        private void delete_items() {
+            for(int i = 0; i < buckets.length; i++) {
+                if(buckets[i] != null && buckets[i] != tombstone)
+                delete buckets[i];
             }
-            return other.non_common(this, hash_func, equal_func).count() == 0;
         }
-        public bool is_subset_of(Enumerable<T> other) {
-            return this.exclude(other, hash_func, equal_func).count() == 0;
+
+        ~HashSet() {
+            delete_items();
+            delete tombstone;
         }
 
+
+
     }
 }