Last active
January 31, 2023 22:40
-
-
Save luminousmen/f7969aa196af41b9979087e2e738d0f2 to your computer and use it in GitHub Desktop.
Revisions
-
luminousmen revised this gist
Jan 31, 2023 . No changes.There are no files selected for viewing
-
luminousmen revised this gist
Jan 31, 2023 . 2 changed files with 47 additions and 0 deletions.There are no files selected for viewing
File renamed without changes.This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,47 @@ import hashlib class BloomFilter: def __init__(self, size, num_hashes, salt=None): self.size = size self.num_hashes = num_hashes self.salt = salt or '' self.bit_array = [0] * size def add(self, element): for i in range(self.num_hashes): digest = hashlib.sha1((self.salt + str(element) + str(i)).encode('utf-8')).hexdigest() index = int(digest, 16) % self.size self.bit_array[index] = 1 def __contains__(self, element): for i in range(self.num_hashes): digest = hashlib.sha1((self.salt + str(element) + str(i)).encode('utf-8')).hexdigest() index = int(digest, 16) % self.size if self.bit_array[index] == 0: return False return True for h in self.calculate_hash(value): if not self.bitmap[h]: return False return True if __name__ == "__main__": coffees = [ "Iced Coffee", "Iced Coffee with Milk", "Espresso", "Espresso Macchiato", "Flat White", "Latte Macchiato", "Cappuccino", "Mocha", ] bloom = BloomFilter(20, 2) for drink in coffees: bloom.add(drink) print(bloom.bit_array) print("Flat White" in bloom) print("Americano" in bloom) -
luminousmen created this gist
Jan 31, 2023 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,60 @@ # Hash implementations from # http://www.partow.net/programming/hashfunctions/index.html # Author: @luminousmen class BloomFilter: def __init__(self, size: int): self.size = size self.bitmap = size * [0] def FNV(self, key: str): fnv_prime = 0x811C9DC5 hash = 0 for i in range(len(key)): hash *= fnv_prime hash ^= ord(key[i]) return hash def DJB(self, key: str): hash = 5381 for i in range(len(key)): hash = ((hash << 5) + hash) + ord(key[i]) return hash def calculate_hash(self, value): # FNV hash1 = self.FNV(value) % self.size # MurMur hash2 = self.DJB(value) % self.size return hash1, hash2 def add(self, value: str): for h in self.calculate_hash(value): self.bitmap[h] = 1 def check(self, value): for h in self.calculate_hash(value): if not self.bitmap[h]: return False return True if __name__ == "__main__": coffees = [ "Iced Coffee", "Iced Coffee with Milk", "Espresso", "Espresso Macchiato", "Flat White", "Latte Macchiato", "Americano", "Mocha", ] bloom = BloomFilter(20) for drink in coffees: bloom.add(drink) print(bloom.bitmap) print(bloom.check("Flat White")) print(bloom.check("Cappuccino"))