Skip to content

Instantly share code, notes, and snippets.

@luminousmen
Last active January 31, 2023 22:40
Show Gist options
  • Save luminousmen/f7969aa196af41b9979087e2e738d0f2 to your computer and use it in GitHub Desktop.
Save luminousmen/f7969aa196af41b9979087e2e738d0f2 to your computer and use it in GitHub Desktop.

Revisions

  1. luminousmen revised this gist Jan 31, 2023. No changes.
  2. luminousmen revised this gist Jan 31, 2023. 2 changed files with 47 additions and 0 deletions.
    File renamed without changes.
    47 changes: 47 additions & 0 deletions bloom_2.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,47 @@
    import hashlib

    class BloomFilter:
    def __init__(self, size, num_hashes, salt=None):
    self.size = size
    self.num_hashes = num_hashes
    self.salt = salt or ''
    self.bit_array = [0] * size

    def add(self, element):
    for i in range(self.num_hashes):
    digest = hashlib.sha1((self.salt + str(element) + str(i)).encode('utf-8')).hexdigest()
    index = int(digest, 16) % self.size
    self.bit_array[index] = 1

    def __contains__(self, element):
    for i in range(self.num_hashes):
    digest = hashlib.sha1((self.salt + str(element) + str(i)).encode('utf-8')).hexdigest()
    index = int(digest, 16) % self.size
    if self.bit_array[index] == 0:
    return False
    return True
    for h in self.calculate_hash(value):
    if not self.bitmap[h]:
    return False
    return True


    if __name__ == "__main__":
    coffees = [
    "Iced Coffee",
    "Iced Coffee with Milk",
    "Espresso",
    "Espresso Macchiato",
    "Flat White",
    "Latte Macchiato",
    "Cappuccino",
    "Mocha",
    ]

    bloom = BloomFilter(20, 2)
    for drink in coffees:
    bloom.add(drink)
    print(bloom.bit_array)

    print("Flat White" in bloom)
    print("Americano" in bloom)
  3. luminousmen created this gist Jan 31, 2023.
    60 changes: 60 additions & 0 deletions bloom.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,60 @@
    # Hash implementations from
    # http://www.partow.net/programming/hashfunctions/index.html
    # Author: @luminousmen

    class BloomFilter:
    def __init__(self, size: int):
    self.size = size
    self.bitmap = size * [0]

    def FNV(self, key: str):
    fnv_prime = 0x811C9DC5
    hash = 0
    for i in range(len(key)):
    hash *= fnv_prime
    hash ^= ord(key[i])
    return hash

    def DJB(self, key: str):
    hash = 5381
    for i in range(len(key)):
    hash = ((hash << 5) + hash) + ord(key[i])
    return hash

    def calculate_hash(self, value):
    # FNV
    hash1 = self.FNV(value) % self.size
    # MurMur
    hash2 = self.DJB(value) % self.size
    return hash1, hash2

    def add(self, value: str):
    for h in self.calculate_hash(value):
    self.bitmap[h] = 1

    def check(self, value):
    for h in self.calculate_hash(value):
    if not self.bitmap[h]:
    return False
    return True


    if __name__ == "__main__":
    coffees = [
    "Iced Coffee",
    "Iced Coffee with Milk",
    "Espresso",
    "Espresso Macchiato",
    "Flat White",
    "Latte Macchiato",
    "Americano",
    "Mocha",
    ]

    bloom = BloomFilter(20)
    for drink in coffees:
    bloom.add(drink)
    print(bloom.bitmap)

    print(bloom.check("Flat White"))
    print(bloom.check("Cappuccino"))