Skip to content

Instantly share code, notes, and snippets.

@SolidAlloy
Last active September 29, 2025 10:55
Show Gist options
  • Select an option

  • Save SolidAlloy/f430d7b6b715aef4ca7394d1ba208db0 to your computer and use it in GitHub Desktop.

Select an option

Save SolidAlloy/f430d7b6b715aef4ca7394d1ba208db0 to your computer and use it in GitHub Desktop.
C# implementation of a Robin Hood hashmap with backshift deletions from https://github.com/goossaert/hashmap/blob/master/backshift_hashmap.h
[StructLayout(LayoutKind.Auto)]
public struct HashMap<TKey, TValue> where TKey : IEquatable<TKey>
{
[StructLayout(LayoutKind.Auto)]
public struct Entry
{
public int Hash; // 0 == empty
public int Dib; // distance-to-initial-bucket for this slot (0 for home)
public TKey Key;
public TValue Value;
}
public Entry[] Buckets;
private int _count;
private int _mask;
private const float MaxLoadFactor = 0.85f;
public int Count => _count;
public int Capacity => Buckets.Length;
public HashMap(int initialCapacity = 8)
{
if (initialCapacity < 1) initialCapacity = 1;
int cap = Mathf.NextPowerOfTwo(Mathf.CeilToInt(initialCapacity * MaxLoadFactor));
Buckets = new Entry[cap];
_mask = cap - 1;
_count = 0;
}
#region Public API
public bool TryGetValue(TKey key, out TValue value)
{
int hash = MixNonZero(key.GetHashCode());
int init = hash & _mask;
for (int i = 0; i < Buckets.Length; ++i)
{
int idx = (init + i) & _mask;
ref Entry e = ref Buckets[idx];
int eHash = e.Hash;
if (eHash == 0) break; // empty bucket => not found
if (i > e.Dib) break; // we passed where it could be
if (eHash == hash && e.Key.Equals(key))
{
value = e.Value;
return true;
}
}
value = default!;
return false;
}
public ref TValue GetOrAddValue(TKey key, out bool exists)
{
if (NeedsResize(_count + 1)) Resize(Buckets.Length << 1);
var buckets = Buckets;
int mask = _mask;
int hash = MixNonZero(key.GetHashCode());
int init = hash & mask;
// We'll insert this if missing; DIB will be set when placed.
var entryToInsert = new Entry { Hash = hash, Dib = 0, Key = key, Value = default };
int placedIndex = -1;
for (int i = 0, currentDib = 0; ; ++i, ++currentDib)
{
int idx = (init + i) & mask;
ref var e = ref buckets[idx];
if (e.Hash == 0)
{
// place new
entryToInsert.Dib = currentDib;
e = entryToInsert;
++_count;
// we displaced the original, remembered its placed index, so returning placedIndex
// if placedIndex == -1, we didn't move anything
exists = false;
if (placedIndex == -1)
{
return ref e.Value;
}
return ref Buckets[placedIndex].Value;
}
if (e.Hash == hash && e.Key.Equals(key))
{
exists = true;
return ref e.Value;
}
// Robin-Hood: newcomer steals if its DIB (i) > resident's DIB
if (currentDib > e.Dib)
{
// swap: newcomer takes slot with its current DIB (i)
var tmpEntry = e;
e = entryToInsert;
e.Dib = currentDib;
if (placedIndex < 0)
placedIndex = idx;
// displaced continues probing with its own DIB (tmp.Dib)
entryToInsert = tmpEntry;
currentDib = entryToInsert.Dib; // continue from resident's DIB
}
}
}
public bool ContainsKey(TKey key) => TryGetValue(key, out _);
public bool Add(TKey key, in TValue value)
{
if (NeedsResize(_count + 1))
Resize(Buckets.Length << 1);
int hash = MixNonZero(key.GetHashCode());
int init = hash & _mask;
Entry entryToInsert = new Entry { Hash = hash, Dib = 0, Key = key, Value = value };
for (int i = 0, currentDib = 0; ; ++i, ++currentDib)
{
int idx = (init + i) & _mask;
ref Entry e = ref Buckets[idx];
if (e.Hash == 0)
{
entryToInsert.Dib = currentDib;
e = entryToInsert;
++_count;
return true;
}
if (e.Hash == hash && e.Key.Equals(key))
{
e.Value = value; // overwrite existing
return false;
}
if (currentDib > e.Dib)
{
// swap with DIB maintenance
var tmpEntry = e;
e = entryToInsert;
e.Dib = currentDib;
entryToInsert = tmpEntry;
currentDib = entryToInsert.Dib;
}
}
}
public bool Remove(TKey key)
{
if (_count == 0) return false;
int hash = MixNonZero(key.GetHashCode());
int init = hash & _mask;
int idxFound = -1;
for (int i = 0; i < Buckets.Length; ++i)
{
int idx = (init + i) & _mask;
ref Entry e = ref Buckets[idx];
if (e.Hash == 0) break;
if (i > e.Dib) break;
if (e.Hash == hash && e.Key.Equals(key))
{
idxFound = idx;
break;
}
}
if (idxFound < 0) return false;
// Backshift compaction: shift entries left until a home or empty
for (int hole = idxFound, next = (hole + 1) & _mask; ; hole = next, next = (next + 1) & _mask)
{
ref Entry e = ref Buckets[next];
if (e.Hash == 0 || e.Dib == 0)
{
Buckets[hole] = default;
--_count;
return true;
}
// Move e back into the hole; DIB decreases by 1
Entry moved = e;
moved.Dib -= 1;
Buckets[hole] = moved;
Buckets[next] = default;
}
}
public void Clear()
{
Array.Clear(Buckets, 0, Buckets.Length);
_count = 0;
}
public void EnsureCapacity(int desired)
{
if (desired <= Capacity) return;
Resize(Mathf.NextPowerOfTwo(desired));
}
#endregion
#region Internals
private void Resize(int newCapacity)
{
newCapacity = Mathf.NextPowerOfTwo(Math.Max(newCapacity, 1));
Entry[] old = Buckets;
Buckets = new Entry[newCapacity];
_mask = newCapacity - 1;
_count = 0;
for (int i = 0; i < old.Length; i++)
{
Entry e = old[i];
if (e.Hash == 0) continue;
int hash = e.Hash;
int init = hash & _mask;
// reinsert with correct DIB tracking
for (int dib = 0; ; ++dib)
{
int idx = (init + dib) & _mask;
ref Entry target = ref Buckets[idx];
if (target.Hash == 0)
{
e.Dib = dib;
target = e;
++_count;
break;
}
if (dib > target.Dib)
{
// swap; set DIBs appropriately
Entry tmp = target;
target = e;
target.Dib = dib;
e = tmp;
dib = e.Dib; // continue with displaced entry's DIB
}
}
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private bool NeedsResize(int futureCount) => futureCount > (int)(MaxLoadFactor * Buckets.Length);
// cheap mixer; maps 0 to non-zero for empty-sentinel
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int MixNonZero(int h)
{
uint x = (uint)h * 0x9E3779B1u; // golden ratio multiply
if (x == 0) x = 1;
return (int)x;
}
#endregion
}
@SolidAlloy
Copy link
Author

SolidAlloy commented Aug 12, 2025

It's a struct to avoid double dereferencing in my project. Feel free to make it a class, implement IDictionary, and all that stuff.

Comparison to Dictionary on 1000-2000 entries with an int key and a large struct value:
Add: Same, sometimes 5% slower
Get an existing value (copy): 30% faster
Try get a missing value (copy): 30% faster
Get or add an existing value (TryGetValue + Add for Dictionary, GetOrAddValue for HashMap): 25% faster
Get or add a missing value: 35% faster
Resizing: basically random, depends on the current state of GC. Averages to 50% slower, but I'm not sure we can make a conclusion from that. I cannot separate allocations from the iteration and rehashing algorithm when measuring the resizing of the standard Dictionary, so we can't compare apples to apples.

Adding 2,110,000 entries with an int key and a double value, starting with zero capacity and dynamically resizing: 22% faster

Another C# implementation using the same C++ source: https://github.com/jzebedee/rhbackshiftdict
It's slower than this implementation, especially as the number of entries grows. I store DIB as I found it to improve the performance very slightly for my use case. And I use a different hash mixing algorithm, but that's about the only significant difference I can see at a glance.

The other things I tried, but they made the performance worse:

  • a heavier mixing algorithm that makes keys spread out better in theory
  • storing values separately so that more entries fit a cache line

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment