Skip to content

Instantly share code, notes, and snippets.

@xtao
Forked from tarekziade/distribution.py
Created June 4, 2016 05:39
Show Gist options
  • Save xtao/b2e918c7d0dc092bbab24637ef7ae6bc to your computer and use it in GitHub Desktop.
Save xtao/b2e918c7d0dc092bbab24637ef7ae6bc to your computer and use it in GitHub Desktop.
Consistent Distribution of users across servers
# inspired by
# http://techspot.zzzeek.org/2012/07/07/the-absolutely-simplest-consistent-hashing-example/
import hashlib
import bisect
import random
import pprint
from collections import defaultdict
_hash = hashlib.md5
class Servers(object):
def __init__(self, ips=[], replicas=100):
self._ips = {}
self._hashed_ips = []
self.replicas = replicas
for ip in ips:
self.add(ip)
def add(self, ip):
for i in range(self.replicas):
sip = ip + ':' + str(i)
hashed = self._hash(sip)
self._ips[hashed] = sip
bisect.insort(self._hashed_ips, hashed)
def remove(self, ip):
for i in range(self.replicas):
sip = ip + ':' + str(i)
hashed = self._hash(sip)
del self._ips[hashed]
index = bisect.bisect_left(self._hashed_ips, hashed)
del self._hashed_ips[index]
def _hash(self, key):
def _hexhash(key):
return _hash(key).hexdigest()
hash = _hexhash(key)
return long(hash, 16)
def select(self, username):
hashed = self._hash(username)
start = bisect.bisect(self._hashed_ips, hashed,
hi=len(self._hashed_ips)-1)
return self._ips[self._hashed_ips[start]].split(':')[0]
NUM_USERS = 1000000
if __name__ == '__main__':
selection = defaultdict(list)
servers = Servers(['postgres5', 'postgres2', 'postgres3', 'postgres4',
'postgres1'])
users = ['%06d' % i for i in range(NUM_USERS)]
users = users
for user in users:
user_db = servers.select(user)
selection[user_db].append(user)
print '===='
print('Distribution')
for db in selection:
print('%d users in %s' % (len(selection[db]), db))
# removing server 2 and 4
servers.remove('postgres2')
print '===='
selection = defaultdict(list)
for user in users:
user_db = servers.select(user)
selection[user_db].append(user)
for db in selection:
print('%d users in %s' % (len(selection[db]), db))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment