Last active
September 29, 2025 10:55
-
-
Save SolidAlloy/f430d7b6b715aef4ca7394d1ba208db0 to your computer and use it in GitHub Desktop.
C# implementation of a Robin Hood hashmap with backshift deletions from https://github.com/goossaert/hashmap/blob/master/backshift_hashmap.h
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| [StructLayout(LayoutKind.Auto)] | |
| public struct HashMap<TKey, TValue> where TKey : IEquatable<TKey> | |
| { | |
| [StructLayout(LayoutKind.Auto)] | |
| public struct Entry | |
| { | |
| public int Hash; // 0 == empty | |
| public int Dib; // distance-to-initial-bucket for this slot (0 for home) | |
| public TKey Key; | |
| public TValue Value; | |
| } | |
| public Entry[] Buckets; | |
| private int _count; | |
| private int _mask; | |
| private const float MaxLoadFactor = 0.85f; | |
| public int Count => _count; | |
| public int Capacity => Buckets.Length; | |
| public HashMap(int initialCapacity = 8) | |
| { | |
| if (initialCapacity < 1) initialCapacity = 1; | |
| int cap = Mathf.NextPowerOfTwo(Mathf.CeilToInt(initialCapacity * MaxLoadFactor)); | |
| Buckets = new Entry[cap]; | |
| _mask = cap - 1; | |
| _count = 0; | |
| } | |
| #region Public API | |
| public bool TryGetValue(TKey key, out TValue value) | |
| { | |
| int hash = MixNonZero(key.GetHashCode()); | |
| int init = hash & _mask; | |
| for (int i = 0; i < Buckets.Length; ++i) | |
| { | |
| int idx = (init + i) & _mask; | |
| ref Entry e = ref Buckets[idx]; | |
| int eHash = e.Hash; | |
| if (eHash == 0) break; // empty bucket => not found | |
| if (i > e.Dib) break; // we passed where it could be | |
| if (eHash == hash && e.Key.Equals(key)) | |
| { | |
| value = e.Value; | |
| return true; | |
| } | |
| } | |
| value = default!; | |
| return false; | |
| } | |
| public ref TValue GetOrAddValue(TKey key, out bool exists) | |
| { | |
| if (NeedsResize(_count + 1)) Resize(Buckets.Length << 1); | |
| var buckets = Buckets; | |
| int mask = _mask; | |
| int hash = MixNonZero(key.GetHashCode()); | |
| int init = hash & mask; | |
| // We'll insert this if missing; DIB will be set when placed. | |
| var entryToInsert = new Entry { Hash = hash, Dib = 0, Key = key, Value = default }; | |
| int placedIndex = -1; | |
| for (int i = 0, currentDib = 0; ; ++i, ++currentDib) | |
| { | |
| int idx = (init + i) & mask; | |
| ref var e = ref buckets[idx]; | |
| if (e.Hash == 0) | |
| { | |
| // place new | |
| entryToInsert.Dib = currentDib; | |
| e = entryToInsert; | |
| ++_count; | |
| // we displaced the original, remembered its placed index, so returning placedIndex | |
| // if placedIndex == -1, we didn't move anything | |
| exists = false; | |
| if (placedIndex == -1) | |
| { | |
| return ref e.Value; | |
| } | |
| return ref Buckets[placedIndex].Value; | |
| } | |
| if (e.Hash == hash && e.Key.Equals(key)) | |
| { | |
| exists = true; | |
| return ref e.Value; | |
| } | |
| // Robin-Hood: newcomer steals if its DIB (i) > resident's DIB | |
| if (currentDib > e.Dib) | |
| { | |
| // swap: newcomer takes slot with its current DIB (i) | |
| var tmpEntry = e; | |
| e = entryToInsert; | |
| e.Dib = currentDib; | |
| if (placedIndex < 0) | |
| placedIndex = idx; | |
| // displaced continues probing with its own DIB (tmp.Dib) | |
| entryToInsert = tmpEntry; | |
| currentDib = entryToInsert.Dib; // continue from resident's DIB | |
| } | |
| } | |
| } | |
| public bool ContainsKey(TKey key) => TryGetValue(key, out _); | |
| public bool Add(TKey key, in TValue value) | |
| { | |
| if (NeedsResize(_count + 1)) | |
| Resize(Buckets.Length << 1); | |
| int hash = MixNonZero(key.GetHashCode()); | |
| int init = hash & _mask; | |
| Entry entryToInsert = new Entry { Hash = hash, Dib = 0, Key = key, Value = value }; | |
| for (int i = 0, currentDib = 0; ; ++i, ++currentDib) | |
| { | |
| int idx = (init + i) & _mask; | |
| ref Entry e = ref Buckets[idx]; | |
| if (e.Hash == 0) | |
| { | |
| entryToInsert.Dib = currentDib; | |
| e = entryToInsert; | |
| ++_count; | |
| return true; | |
| } | |
| if (e.Hash == hash && e.Key.Equals(key)) | |
| { | |
| e.Value = value; // overwrite existing | |
| return false; | |
| } | |
| if (currentDib > e.Dib) | |
| { | |
| // swap with DIB maintenance | |
| var tmpEntry = e; | |
| e = entryToInsert; | |
| e.Dib = currentDib; | |
| entryToInsert = tmpEntry; | |
| currentDib = entryToInsert.Dib; | |
| } | |
| } | |
| } | |
| public bool Remove(TKey key) | |
| { | |
| if (_count == 0) return false; | |
| int hash = MixNonZero(key.GetHashCode()); | |
| int init = hash & _mask; | |
| int idxFound = -1; | |
| for (int i = 0; i < Buckets.Length; ++i) | |
| { | |
| int idx = (init + i) & _mask; | |
| ref Entry e = ref Buckets[idx]; | |
| if (e.Hash == 0) break; | |
| if (i > e.Dib) break; | |
| if (e.Hash == hash && e.Key.Equals(key)) | |
| { | |
| idxFound = idx; | |
| break; | |
| } | |
| } | |
| if (idxFound < 0) return false; | |
| // Backshift compaction: shift entries left until a home or empty | |
| for (int hole = idxFound, next = (hole + 1) & _mask; ; hole = next, next = (next + 1) & _mask) | |
| { | |
| ref Entry e = ref Buckets[next]; | |
| if (e.Hash == 0 || e.Dib == 0) | |
| { | |
| Buckets[hole] = default; | |
| --_count; | |
| return true; | |
| } | |
| // Move e back into the hole; DIB decreases by 1 | |
| Entry moved = e; | |
| moved.Dib -= 1; | |
| Buckets[hole] = moved; | |
| Buckets[next] = default; | |
| } | |
| } | |
| public void Clear() | |
| { | |
| Array.Clear(Buckets, 0, Buckets.Length); | |
| _count = 0; | |
| } | |
| public void EnsureCapacity(int desired) | |
| { | |
| if (desired <= Capacity) return; | |
| Resize(Mathf.NextPowerOfTwo(desired)); | |
| } | |
| #endregion | |
| #region Internals | |
| private void Resize(int newCapacity) | |
| { | |
| newCapacity = Mathf.NextPowerOfTwo(Math.Max(newCapacity, 1)); | |
| Entry[] old = Buckets; | |
| Buckets = new Entry[newCapacity]; | |
| _mask = newCapacity - 1; | |
| _count = 0; | |
| for (int i = 0; i < old.Length; i++) | |
| { | |
| Entry e = old[i]; | |
| if (e.Hash == 0) continue; | |
| int hash = e.Hash; | |
| int init = hash & _mask; | |
| // reinsert with correct DIB tracking | |
| for (int dib = 0; ; ++dib) | |
| { | |
| int idx = (init + dib) & _mask; | |
| ref Entry target = ref Buckets[idx]; | |
| if (target.Hash == 0) | |
| { | |
| e.Dib = dib; | |
| target = e; | |
| ++_count; | |
| break; | |
| } | |
| if (dib > target.Dib) | |
| { | |
| // swap; set DIBs appropriately | |
| Entry tmp = target; | |
| target = e; | |
| target.Dib = dib; | |
| e = tmp; | |
| dib = e.Dib; // continue with displaced entry's DIB | |
| } | |
| } | |
| } | |
| } | |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] | |
| private bool NeedsResize(int futureCount) => futureCount > (int)(MaxLoadFactor * Buckets.Length); | |
| // cheap mixer; maps 0 to non-zero for empty-sentinel | |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] | |
| private static int MixNonZero(int h) | |
| { | |
| uint x = (uint)h * 0x9E3779B1u; // golden ratio multiply | |
| if (x == 0) x = 1; | |
| return (int)x; | |
| } | |
| #endregion | |
| } |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
It's a struct to avoid double dereferencing in my project. Feel free to make it a class, implement IDictionary, and all that stuff.
Comparison to Dictionary on 1000-2000 entries with an
intkey and a large struct value:Add: Same, sometimes 5% slower
Get an existing value (copy): 30% faster
Try get a missing value (copy): 30% faster
Get or add an existing value (TryGetValue + Add for Dictionary, GetOrAddValue for HashMap): 25% faster
Get or add a missing value: 35% faster
Resizing: basically random, depends on the current state of GC. Averages to 50% slower, but I'm not sure we can make a conclusion from that. I cannot separate allocations from the iteration and rehashing algorithm when measuring the resizing of the standard Dictionary, so we can't compare apples to apples.
Adding 2,110,000 entries with an
intkey and adoublevalue, starting with zero capacity and dynamically resizing: 22% fasterAnother C# implementation using the same C++ source: https://github.com/jzebedee/rhbackshiftdict
It's slower than this implementation, especially as the number of entries grows. I store DIB as I found it to improve the performance very slightly for my use case. And I use a different hash mixing algorithm, but that's about the only significant difference I can see at a glance.
The other things I tried, but they made the performance worse: