xtao · June 4, 2016 05:39 · May 23, 2016 · May 18, 2016 · May 18, 2016 · May 18, 2016
diff --git a/distribution.py b/distribution.py
@@ -22,8 +22,6 @@
 Consistent Hashing implementation inspired by:
 http://techspot.zzzeek.org/2012/07/07/the-absolutely-simplest-consistent-hashing-example
 
-XXX Add credits for the murmur hash
-
 Also, good read on hashes:
 
 http://programmers.stackexchange.com/questions/49550/which-hashing-algorithm-is-best-for-uniqueness-and-speed/145633#145633
@@ -33,12 +31,29 @@
 from collections import defaultdict
 import binascii
 import time
+from functools import wraps
 
 
 class CollisionError(Exception):
     pass
 
 
+_collisions = {}
+
+
+def catch_collision(func):
+    @wraps(func)
+    def _catch(key):
+        res = func(key)
+        if res in _collisions and key != _collisions[res]:
+            raise CollisionError('%s and %s with %s' % (key, _collisions[res],
+                func))
+        _collisions[res] = key
+        return res
+    return _catch
+
+
+@catch_collision
 def fnv32a(key):
     hval = 0x811c9dc5
     fnv_32_prime = 0x01000193
@@ -49,73 +64,25 @@ def fnv32a(key):
     return hval
 
 
+@catch_collision
 def sha512(key):
     return long(hashlib.sha512(key).hexdigest(), 16)
 
 
+@catch_collision
 def sha256(key):
     return long(hashlib.sha256(key).hexdigest(), 16)
 
 
+@catch_collision
 def md5(key):
     return long(hashlib.md5(key).hexdigest(), 16)
 
 
 
-MASK = 2 ** 64 - 1
-_64 = 8 * 8
-
-def murmur64(data, seed=19820125):
-    m = 0xc6a4a7935bd1e995
-    r = 47
-    data = bytearray(data)
-    h = seed ^ ((m * len(data)) & MASK)
-    off = len(data) / _64
-
-    def b2l(bytes):
-        return sum((b << (k * 8) for k, b in enumerate(bytes)))
-
-    for ll in range(0, off, 8):
-        k = b2l(data[ll:ll + 8])
-        k = (k * m) & MASK
-        k = k ^ ((k >> r) & MASK)
-        k = (k * m) & MASK
-        h = (h ^ k)
-        h = (h * m) & MASK
-
-    l = len(data) & 7
-
-    if l >= 7:
-        h = (h ^ (data[off+6] << 48))
-
-    if l >= 6:
-        h = (h ^ (data[off+5] << 40))
-
-    if l >= 5:
-        h = (h ^ (data[off+4] << 32))
-
-    if l >= 4:
-        h = (h ^ (data[off+3] << 24))
-
-    if l >= 3:
-        h = (h ^ (data[off+2] << 16))
-
-    if l >= 2:
-        h = (h ^ (data[off+1] << 8))
-
-    if l >= 1:
-        h = (h ^ data[off])
-        h = (h * m) & MASK
-
-    h = h ^ ((h >> r) & MASK)
-    h = (h * m) & MASK
-    h = h ^ ((h >> r) & MASK)
-    return h
-
-
 class RendezVous(object):
 
-    def __init__(self, ips=None, hash=murmur64):
+    def __init__(self, ips=None, hash=md5):
         if ips is None:
             ips = []
         self.ips = ips
@@ -165,9 +132,6 @@ def add(self, ip):
         for i in range(self.replicas):
             sip = _repl(ip, i)
             hashed = self._hash(sip)
-            if hashed in self._ips:
-                raise CollisionError('Collision! %s vs %s with %s hash' % (sip,
-                    self._ips[hashed], self._hash))
             self._ips[hashed] = sip
             bisect.insort(self._hashed_ips, hashed)
 

diff --git a/distribution.py b/distribution.py
@@ -32,6 +32,11 @@
 import bisect
 from collections import defaultdict
 import binascii
+import time
+
+
+class CollisionError(Exception):
+    pass
 
 
 def fnv32a(key):
@@ -44,6 +49,10 @@ def fnv32a(key):
     return hval
 
 
+def sha512(key):
+    return long(hashlib.sha512(key).hexdigest(), 16)
+
+
 def sha256(key):
     return long(hashlib.sha256(key).hexdigest(), 16)
 
@@ -52,21 +61,22 @@ def md5(key):
     return long(hashlib.md5(key).hexdigest(), 16)
 
 
-def bytes_to_long(bytes):
-    assert len(bytes) == 8
-    return sum((b << (k * 8) for k, b in enumerate(bytes)))
 
+MASK = 2 ** 64 - 1
+_64 = 8 * 8
 
 def murmur64(data, seed=19820125):
     m = 0xc6a4a7935bd1e995
     r = 47
-    MASK = 2 ** 64 - 1
     data = bytearray(data)
     h = seed ^ ((m * len(data)) & MASK)
+    off = len(data) / _64
+
+    def b2l(bytes):
+        return sum((b << (k * 8) for k, b in enumerate(bytes)))
 
-    off = len(data) / 8 * 8
     for ll in range(0, off, 8):
-        k = bytes_to_long(data[ll:ll + 8])
+        k = b2l(data[ll:ll + 8])
         k = (k * m) & MASK
         k = k ^ ((k >> r) & MASK)
         k = (k * m) & MASK
@@ -100,7 +110,6 @@ def murmur64(data, seed=19820125):
     h = h ^ ((h >> r) & MASK)
     h = (h * m) & MASK
     h = h ^ ((h >> r) & MASK)
-
     return h
 
 
@@ -140,7 +149,7 @@ def _repl(name, index):
 
 class ConsistentHashing(object):
 
-    def __init__(self, ips=[], replicas=100, hash=md5):
+    def __init__(self, ips=[], replicas=200, hash=md5):
         self._ips = {}
         self._hashed_ips = []
         self.replicas = replicas
@@ -156,6 +165,9 @@ def add(self, ip):
         for i in range(self.replicas):
             sip = _repl(ip, i)
             hashed = self._hash(sip)
+            if hashed in self._ips:
+                raise CollisionError('Collision! %s vs %s with %s hash' % (sip,
+                    self._ips[hashed], self._hash))
             self._ips[hashed] = sip
             bisect.insort(self._hashed_ips, hashed)
 
@@ -227,9 +239,19 @@ def run_test(servers, users):
                'postgres1']
 
     for klass in (ConsistentHashing, RendezVous):
-        for hash in (md5, murmur64, sha256, binascii.crc32, fnv32a):
-            cluster = klass(list(servers), hash=hash)
+        for hash in (md5, sha256, binascii.crc32, fnv32a, sha512):
+            try:
+                cluster = klass(list(servers), hash=hash)
+            except CollisionError:
+                print('Collision error with hash %s' % hash)
+                continue
+
             print(cluster)
-            run_test(cluster, users)
+            start = time.time()
+            try:
+                run_test(cluster, users)
+            except CollisionError:
+                print('Collision error..')
+            print('Took %d seconds' % (time.time() - start))
             print
             print
diff --git a/distribution.py b/distribution.py
@@ -31,10 +31,21 @@
 import hashlib
 import bisect
 from collections import defaultdict
+import binascii
+
+
+def fnv32a(key):
+    hval = 0x811c9dc5
+    fnv_32_prime = 0x01000193
+    uint32_max = 2 ** 32
+    for s in key:
+        hval = hval ^ ord(s)
+        hval = (hval * fnv_32_prime) % uint32_max
+    return hval
 
 
 def sha256(key):
-    return long(hashlib.md5(key).hexdigest(), 16)
+    return long(hashlib.sha256(key).hexdigest(), 16)
 
 
 def md5(key):
@@ -216,7 +227,7 @@ def run_test(servers, users):
                'postgres1']
 
     for klass in (ConsistentHashing, RendezVous):
-        for hash in (md5, murmur64, sha256):
+        for hash in (md5, murmur64, sha256, binascii.crc32, fnv32a):
             cluster = klass(list(servers), hash=hash)
             print(cluster)
             run_test(cluster, users)

diff --git a/distribution.py b/distribution.py
@@ -2,16 +2,24 @@
 
 We have a few servers and we want a load-balancer to
 distribute incoming requests across them in a deterministic
-and consistent way.
+and consistent way - without keeping any counter to make the
+decision.
 
 Removing a backend server should not impact users on other
 servers.
 
 Adding a backend will generate the redistribution of
-users on the nearest 2 servers.
+users across other servers.
 
-Consistent Hashing inspired by:
+The goal is to come up with the best algorithm
+for 1M users across 5 servers. Speed is a bonus.
 
+Two known techniques:
+
+- RendezVous : https://en.wikipedia.org/wiki/Rendezvous_hashing#Comparison_With_Consistent_Hashing
+- Consistent Hashing: https://en.wikipedia.org/wiki/Consistent_hashing
+
+Consistent Hashing implementation inspired by:
 http://techspot.zzzeek.org/2012/07/07/the-absolutely-simplest-consistent-hashing-example
 
 XXX Add credits for the murmur hash
@@ -20,13 +28,11 @@
 
 http://programmers.stackexchange.com/questions/49550/which-hashing-algorithm-is-best-for-uniqueness-and-speed/145633#145633
 """
-import sys
 import hashlib
 import bisect
 from collections import defaultdict
 
 
-
 def sha256(key):
     return long(hashlib.md5(key).hexdigest(), 16)
 
@@ -117,7 +123,6 @@ def select(self, key):
         return winner
 
 
-
 def _repl(name, index):
     return '%s:%d' % (name, index)
 

diff --git a/distribution.py b/distribution.py
@@ -15,13 +15,22 @@
 http://techspot.zzzeek.org/2012/07/07/the-absolutely-simplest-consistent-hashing-example
 
 XXX Add credits for the murmur hash
+
+Also, good read on hashes:
+
+http://programmers.stackexchange.com/questions/49550/which-hashing-algorithm-is-best-for-uniqueness-and-speed/145633#145633
 """
 import sys
 import hashlib
 import bisect
 from collections import defaultdict
 
 
+
+def sha256(key):
+    return long(hashlib.md5(key).hexdigest(), 16)
+
+
 def md5(key):
     return long(hashlib.md5(key).hexdigest(), 16)
 
@@ -160,8 +169,18 @@ def run_test(servers, users):
 
     print '===='
     print('Distribution')
+    smallest = NUM_USERS + 1
+    biggest = 0
     for db in selection:
-        print('%d users in %s' % (len(selection[db]), db))
+        size = len(selection[db])
+        if size < smallest:
+            smallest = size
+        if size > biggest:
+            biggest = size
+
+        print('%d users in %s' % (size, db))
+
+    print('span: %d' % (biggest - smallest))
 
     # removing server 2 and 4
     servers.remove('postgres2')
@@ -172,8 +191,18 @@ def run_test(servers, users):
         user_db = servers.select(user)
         selection[user_db].append(user)
 
-    for db in selection:
-        print('%d users in %s' % (len(selection[db]), db))
+    smallest = NUM_USERS + 1
+    biggest = 0
+    for i, db in enumerate(selection):
+        size = len(selection[db])
+        if size < smallest:
+            smallest = size
+        if size > biggest:
+            biggest = size
+
+        print('%d users in %s' % (size, db))
+
+    print('span: %d' % (biggest - smallest))
 
 
 if __name__ == '__main__':
@@ -182,7 +211,7 @@ def run_test(servers, users):
                'postgres1']
 
     for klass in (ConsistentHashing, RendezVous):
-        for hash in (md5, murmur64):
+        for hash in (md5, murmur64, sha256):
             cluster = klass(list(servers), hash=hash)
             print(cluster)
             run_test(cluster, users)

diff --git a/distribution.py b/distribution.py
@@ -10,63 +10,150 @@
 Adding a backend will generate the redistribution of
 users on the nearest 2 servers.
 
-inspired by:
+Consistent Hashing inspired by:
 
 http://techspot.zzzeek.org/2012/07/07/the-absolutely-simplest-consistent-hashing-example
+
+XXX Add credits for the murmur hash
 """
+import sys
 import hashlib
 import bisect
 from collections import defaultdict
 
 
-def _hash(key):
+def md5(key):
     return long(hashlib.md5(key).hexdigest(), 16)
 
 
+def bytes_to_long(bytes):
+    assert len(bytes) == 8
+    return sum((b << (k * 8) for k, b in enumerate(bytes)))
+
+
+def murmur64(data, seed=19820125):
+    m = 0xc6a4a7935bd1e995
+    r = 47
+    MASK = 2 ** 64 - 1
+    data = bytearray(data)
+    h = seed ^ ((m * len(data)) & MASK)
+
+    off = len(data) / 8 * 8
+    for ll in range(0, off, 8):
+        k = bytes_to_long(data[ll:ll + 8])
+        k = (k * m) & MASK
+        k = k ^ ((k >> r) & MASK)
+        k = (k * m) & MASK
+        h = (h ^ k)
+        h = (h * m) & MASK
+
+    l = len(data) & 7
+
+    if l >= 7:
+        h = (h ^ (data[off+6] << 48))
+
+    if l >= 6:
+        h = (h ^ (data[off+5] << 40))
+
+    if l >= 5:
+        h = (h ^ (data[off+4] << 32))
+
+    if l >= 4:
+        h = (h ^ (data[off+3] << 24))
+
+    if l >= 3:
+        h = (h ^ (data[off+2] << 16))
+
+    if l >= 2:
+        h = (h ^ (data[off+1] << 8))
+
+    if l >= 1:
+        h = (h ^ data[off])
+        h = (h * m) & MASK
+
+    h = h ^ ((h >> r) & MASK)
+    h = (h * m) & MASK
+    h = h ^ ((h >> r) & MASK)
+
+    return h
+
+
+class RendezVous(object):
+
+    def __init__(self, ips=None, hash=murmur64):
+        if ips is None:
+            ips = []
+        self.ips = ips
+        self._hash = hash
+
+    def __str__(self):
+        return '<RendezVous with %s hash>' % self._hash
+
+    def add(self, ip):
+        self.ips.append(ip)
+
+    def remove(self, ip):
+        self.ips.remove(ip)
+
+    def select(self, key):
+        high_score = -1
+        winner = None
+        for ip in self.ips:
+            score = self._hash("%s-%s" % (str(ip), str(key)))
+            if score > high_score:
+                high_score, winner = score, ip
+
+            elif score == high_score:
+                high_score, winner = score, max(str(ip), str(winner))
+        return winner
+
+
+
 def _repl(name, index):
     return '%s:%d' % (name, index)
 
 
-class Servers(object):
+class ConsistentHashing(object):
 
-    def __init__(self, ips=[], replicas=100):
+    def __init__(self, ips=[], replicas=100, hash=md5):
         self._ips = {}
         self._hashed_ips = []
         self.replicas = replicas
+        self._hash = hash
+
         for ip in ips:
             self.add(ip)
 
+    def __str__(self):
+        return '<ConsistentHashing with %s hash>' % self._hash
+
     def add(self, ip):
         for i in range(self.replicas):
             sip = _repl(ip, i)
-            hashed = _hash(sip)
+            hashed = self._hash(sip)
             self._ips[hashed] = sip
             bisect.insort(self._hashed_ips, hashed)
 
     def remove(self, ip):
         for i in range(self.replicas):
             sip = _repl(ip, i)
-            hashed = _hash(sip)
+            hashed = self._hash(sip)
             del self._ips[hashed]
             index = bisect.bisect_left(self._hashed_ips, hashed)
             del self._hashed_ips[index]
 
     def select(self, username):
-        hashed = _hash(username)
+        hashed = self._hash(username)
         start = bisect.bisect(self._hashed_ips, hashed,
                               hi=len(self._hashed_ips)-1)
         return self._ips[self._hashed_ips[start]].split(':')[0]
 
 
 NUM_USERS = 1000000
 
-if __name__ == '__main__':
-    selection = defaultdict(list)
-    servers = Servers(['postgres5', 'postgres2', 'postgres3', 'postgres4',
-                       'postgres1'])
 
-    users = ['%06d' % i for i in range(NUM_USERS)]
-    users = users
+def run_test(servers, users):
+    selection = defaultdict(list)
     for user in users:
         user_db = servers.select(user)
         selection[user_db].append(user)
@@ -87,3 +174,17 @@ def select(self, username):
 
     for db in selection:
         print('%d users in %s' % (len(selection[db]), db))
+
+
+if __name__ == '__main__':
+    users = ['%06d' % i for i in range(NUM_USERS)]
+    servers = ['postgres5', 'postgres2', 'postgres3', 'postgres4',
+               'postgres1']
+
+    for klass in (ConsistentHashing, RendezVous):
+        for hash in (md5, murmur64):
+            cluster = klass(list(servers), hash=hash)
+            print(cluster)
+            run_test(cluster, users)
+            print
+            print
diff --git a/distribution.py b/distribution.py
@@ -11,18 +11,20 @@
 users on the nearest 2 servers.
 
 inspired by:
-http://techspot.zzzeek.org/2012/07/07/the-absolutely-simplest-consistent-hashing-example/
 
+http://techspot.zzzeek.org/2012/07/07/the-absolutely-simplest-consistent-hashing-example
 """
 import hashlib
 import bisect
-import random
-import pprint
-
 from collections import defaultdict
 
 
-_hash = hashlib.md5
+def _hash(key):
+    return long(hashlib.md5(key).hexdigest(), 16)
+
+
+def _repl(name, index):
+    return '%s:%d' % (name, index)
 
 
 class Servers(object):
@@ -36,28 +38,21 @@ def __init__(self, ips=[], replicas=100):
 
     def add(self, ip):
         for i in range(self.replicas):
-            sip = ip + ':' + str(i)
-            hashed = self._hash(sip)
+            sip = _repl(ip, i)
+            hashed = _hash(sip)
             self._ips[hashed] = sip
             bisect.insort(self._hashed_ips, hashed)
 
     def remove(self, ip):
         for i in range(self.replicas):
-            sip = ip + ':' + str(i)
-            hashed = self._hash(sip)
+            sip = _repl(ip, i)
+            hashed = _hash(sip)
             del self._ips[hashed]
             index = bisect.bisect_left(self._hashed_ips, hashed)
             del self._hashed_ips[index]
 
-    def _hash(self, key):
-        def _hexhash(key):
-            return _hash(key).hexdigest()
-
-        hash = _hexhash(key)
-        return long(hash, 16)
-
     def select(self, username):
-        hashed = self._hash(username)
+        hashed = _hash(username)
         start = bisect.bisect(self._hashed_ips, hashed,
                               hi=len(self._hashed_ips)-1)
         return self._ips[self._hashed_ips[start]].split(':')[0]
@@ -70,7 +65,6 @@ def select(self, username):
     servers = Servers(['postgres5', 'postgres2', 'postgres3', 'postgres4',
                        'postgres1'])
 
-
     users = ['%06d' % i for i in range(NUM_USERS)]
     users = users
     for user in users:
@@ -93,4 +87,3 @@ def select(self, username):
 
     for db in selection:
         print('%d users in %s' % (len(selection[db]), db))
-
diff --git a/distribution.py b/distribution.py
@@ -1,5 +1,19 @@
-# inspired by
-# http://techspot.zzzeek.org/2012/07/07/the-absolutely-simplest-consistent-hashing-example/
+""" Consistent load-balancing.
+
+We have a few servers and we want a load-balancer to
+distribute incoming requests across them in a deterministic
+and consistent way.
+
+Removing a backend server should not impact users on other
+servers.
+
+Adding a backend will generate the redistribution of
+users on the nearest 2 servers.
+
+inspired by:
+http://techspot.zzzeek.org/2012/07/07/the-absolutely-simplest-consistent-hashing-example/
+
+"""
 import hashlib
 import bisect
 import random

diff --git a/distribution.py b/distribution.py
@@ -49,15 +49,15 @@ def select(self, username):
         return self._ips[self._hashed_ips[start]].split(':')[0]
 
 
+NUM_USERS = 1000000
+
 if __name__ == '__main__':
     selection = defaultdict(list)
     servers = Servers(['postgres5', 'postgres2', 'postgres3', 'postgres4',
                        'postgres1'])
 
-    #print('Hash ring')
-    #pprint.pprint([(servers._ips[hash], hash) for hash in servers._hashed_ips])
 
-    users = ['%06d' % i for i in range(100000)]
+    users = ['%06d' % i for i in range(NUM_USERS)]
     users = users
     for user in users:
         user_db = servers.select(user)

diff --git a/distribution.py b/distribution.py
@@ -13,22 +13,27 @@
 
 class Servers(object):
 
-    def __init__(self, ips=[]):
+    def __init__(self, ips=[], replicas=100):
         self._ips = {}
         self._hashed_ips = []
+        self.replicas = replicas
         for ip in ips:
             self.add(ip)
 
     def add(self, ip):
-        hashed = self._hash(ip)
-        self._ips[hashed] = ip
-        bisect.insort(self._hashed_ips, hashed)
+        for i in range(self.replicas):
+            sip = ip + ':' + str(i)
+            hashed = self._hash(sip)
+            self._ips[hashed] = sip
+            bisect.insort(self._hashed_ips, hashed)
 
     def remove(self, ip):
-        hashed = self._hash(ip)
-        del self._ips[hashed]
-        index = bisect.bisect_left(self._hashed_ips, hashed)
-        del self._hashed_ips[index]
+        for i in range(self.replicas):
+            sip = ip + ':' + str(i)
+            hashed = self._hash(sip)
+            del self._ips[hashed]
+            index = bisect.bisect_left(self._hashed_ips, hashed)
+            del self._hashed_ips[index]
 
     def _hash(self, key):
         def _hexhash(key):
@@ -41,16 +46,16 @@ def select(self, username):
         hashed = self._hash(username)
         start = bisect.bisect(self._hashed_ips, hashed,
                               hi=len(self._hashed_ips)-1)
-        return self._ips[self._hashed_ips[start]]
+        return self._ips[self._hashed_ips[start]].split(':')[0]
 
 
 if __name__ == '__main__':
     selection = defaultdict(list)
     servers = Servers(['postgres5', 'postgres2', 'postgres3', 'postgres4',
                        'postgres1'])
 
-    print('Hash ring')
-    pprint.pprint([(servers._ips[hash], hash) for hash in servers._hashed_ips])
+    #print('Hash ring')
+    #pprint.pprint([(servers._ips[hash], hash) for hash in servers._hashed_ips])
 
     users = ['%06d' % i for i in range(100000)]
     users = users

diff --git a/distribution.py b/distribution.py
@@ -3,10 +3,14 @@
 import hashlib
 import bisect
 import random
+import pprint
 
 from collections import defaultdict
 
 
+_hash = hashlib.md5
+
+
 class Servers(object):
 
     def __init__(self, ips=[]):
@@ -28,34 +32,39 @@ def remove(self, ip):
 
     def _hash(self, key):
         def _hexhash(key):
-            return hashlib.md5(key).hexdigest()
+            return _hash(key).hexdigest()
+
         hash = _hexhash(key)
         return long(hash, 16)
 
     def select(self, username):
         hashed = self._hash(username)
-        start = bisect.bisect(self._hashed_ips, hashed)
-        if start == len(self._hashed_ips):
-            start = 0
+        start = bisect.bisect(self._hashed_ips, hashed,
+                              hi=len(self._hashed_ips)-1)
         return self._ips[self._hashed_ips[start]]
 
 
 if __name__ == '__main__':
     selection = defaultdict(list)
-    servers = Servers(['postgres1', 'postgres2', 'postgres3', 'postgres4',
-                       'postgres5'])
-    users = ['%06d' % i for i in range(100000)]
+    servers = Servers(['postgres5', 'postgres2', 'postgres3', 'postgres4',
+                       'postgres1'])
+
+    print('Hash ring')
+    pprint.pprint([(servers._ips[hash], hash) for hash in servers._hashed_ips])
 
+    users = ['%06d' % i for i in range(100000)]
+    users = users
     for user in users:
         user_db = servers.select(user)
         selection[user_db].append(user)
 
+    print '===='
+    print('Distribution')
     for db in selection:
         print('%d users in %s' % (len(selection[db]), db))
 
     # removing server 2 and 4
     servers.remove('postgres2')
-    servers.remove('postgres4')
     print '===='
 
     selection = defaultdict(list)
@@ -64,4 +73,5 @@ def select(self, username):
         selection[user_db].append(user)
 
     for db in selection:
-        print('%d users in %s' % (len(selection[db]), db))
+        print('%d users in %s' % (len(selection[db]), db))
+
diff --git a/distribution.py b/distribution.py
@@ -0,0 +1,67 @@
+# inspired by
+# http://techspot.zzzeek.org/2012/07/07/the-absolutely-simplest-consistent-hashing-example/
+import hashlib
+import bisect
+import random
+
+from collections import defaultdict
+
+
+class Servers(object):
+
+    def __init__(self, ips=[]):
+        self._ips = {}
+        self._hashed_ips = []
+        for ip in ips:
+            self.add(ip)
+
+    def add(self, ip):
+        hashed = self._hash(ip)
+        self._ips[hashed] = ip
+        bisect.insort(self._hashed_ips, hashed)
+
+    def remove(self, ip):
+        hashed = self._hash(ip)
+        del self._ips[hashed]
+        index = bisect.bisect_left(self._hashed_ips, hashed)
+        del self._hashed_ips[index]
+
+    def _hash(self, key):
+        def _hexhash(key):
+            return hashlib.md5(key).hexdigest()
+        hash = _hexhash(key)
+        return long(hash, 16)
+
+    def select(self, username):
+        hashed = self._hash(username)
+        start = bisect.bisect(self._hashed_ips, hashed)
+        if start == len(self._hashed_ips):
+            start = 0
+        return self._ips[self._hashed_ips[start]]
+
+
+if __name__ == '__main__':
+    selection = defaultdict(list)
+    servers = Servers(['postgres1', 'postgres2', 'postgres3', 'postgres4',
+                       'postgres5'])
+    users = ['%06d' % i for i in range(100000)]
+
+    for user in users:
+        user_db = servers.select(user)
+        selection[user_db].append(user)
+
+    for db in selection:
+        print('%d users in %s' % (len(selection[db]), db))
+
+    # removing server 2 and 4
+    servers.remove('postgres2')
+    servers.remove('postgres4')
+    print '===='
+
+    selection = defaultdict(list)
+    for user in users:
+        user_db = servers.select(user)
+        selection[user_db].append(user)
+
+    for db in selection:
+        print('%d users in %s' % (len(selection[db]), db))