Last active
June 4, 2016 05:39
-
-
Save tarekziade/efa320ee463d9675db6f55f2ffaa7f86 to your computer and use it in GitHub Desktop.
Consistent Distribution of users across servers
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ Consistent load-balancing. | |
| We have a few servers and we want a load-balancer to | |
| distribute incoming requests across them in a deterministic | |
| and consistent way. | |
| Removing a backend server should not impact users on other | |
| servers. | |
| Adding a backend will generate the redistribution of | |
| users on the nearest 2 servers. | |
| inspired by: | |
| http://techspot.zzzeek.org/2012/07/07/the-absolutely-simplest-consistent-hashing-example | |
| """ | |
| import hashlib | |
| import bisect | |
| from collections import defaultdict | |
| def _hash(key): | |
| return long(hashlib.md5(key).hexdigest(), 16) | |
| def _repl(name, index): | |
| return '%s:%d' % (name, index) | |
| class Servers(object): | |
| def __init__(self, ips=[], replicas=100): | |
| self._ips = {} | |
| self._hashed_ips = [] | |
| self.replicas = replicas | |
| for ip in ips: | |
| self.add(ip) | |
| def add(self, ip): | |
| for i in range(self.replicas): | |
| sip = _repl(ip, i) | |
| hashed = _hash(sip) | |
| self._ips[hashed] = sip | |
| bisect.insort(self._hashed_ips, hashed) | |
| def remove(self, ip): | |
| for i in range(self.replicas): | |
| sip = _repl(ip, i) | |
| hashed = _hash(sip) | |
| del self._ips[hashed] | |
| index = bisect.bisect_left(self._hashed_ips, hashed) | |
| del self._hashed_ips[index] | |
| def select(self, username): | |
| hashed = _hash(username) | |
| start = bisect.bisect(self._hashed_ips, hashed, | |
| hi=len(self._hashed_ips)-1) | |
| return self._ips[self._hashed_ips[start]].split(':')[0] | |
| NUM_USERS = 1000000 | |
| if __name__ == '__main__': | |
| selection = defaultdict(list) | |
| servers = Servers(['postgres5', 'postgres2', 'postgres3', 'postgres4', | |
| 'postgres1']) | |
| users = ['%06d' % i for i in range(NUM_USERS)] | |
| users = users | |
| for user in users: | |
| user_db = servers.select(user) | |
| selection[user_db].append(user) | |
| print '====' | |
| print('Distribution') | |
| for db in selection: | |
| print('%d users in %s' % (len(selection[db]), db)) | |
| # removing server 2 and 4 | |
| servers.remove('postgres2') | |
| print '====' | |
| selection = defaultdict(list) | |
| for user in users: | |
| user_db = servers.select(user) | |
| selection[user_db].append(user) | |
| for db in selection: | |
| print('%d users in %s' % (len(selection[db]), db)) |
And the winner is... RendezVous + sha256
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.