Skip to content

Instantly share code, notes, and snippets.

@dariodip
Last active January 18, 2022 11:02
Show Gist options
  • Select an option

  • Save dariodip/8ebcd96a8cca33f945c468f372f56f62 to your computer and use it in GitHub Desktop.

Select an option

Save dariodip/8ebcd96a8cca33f945c468f372f56f62 to your computer and use it in GitHub Desktop.

Revisions

  1. dariodip revised this gist Jan 18, 2022. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion main.py
    Original file line number Diff line number Diff line change
    @@ -8,7 +8,7 @@ def generate_random_list(n, k):
    return [random.randint(0, k) for _ in range(n)]


    def generate_random_list_pidgeon(n):
    def generate_random_list_pigeon(n):
    """
    Return an unsorted list of n random elements from 1 to n-1.
    Exacly one element is duplicated.
  2. dariodip revised this gist Jun 6, 2021. 5 changed files with 5 additions and 5 deletions.
    2 changes: 1 addition & 1 deletion bf_duplicate.py
    Original file line number Diff line number Diff line change
    @@ -1,6 +1,6 @@
    from bloom_filter import BloomFilter

    def find_duplicate_bf(a: list, bf_prob: float=0.5) -> int:
    def find_duplicate_bf(a: list, bf_prob: float=0.5):
    bf = BloomFilter(len(a), bf_prob)

    for (i, el) in enumerate(a):
    2 changes: 1 addition & 1 deletion dummy.py
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,5 @@

    def find_duplicate_dummy(a: list) -> int:
    def find_duplicate_dummy(a: list):
    # for each index but the last
    for i in range(len(a) - 1):
    # for every other element
    2 changes: 1 addition & 1 deletion floyd.py
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,5 @@

    def find_duplicates_floyd(a):
    def find_duplicates_floyd(a: list):
    # both tortoise and hare start
    # from the beginning
    tortoise = a[0]
    2 changes: 1 addition & 1 deletion hashmap.py
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,5 @@

    def find_duplicate_map(a: list) -> int:
    def find_duplicate_map(a: list):
    seen = dict()
    for i in a:
    if i in seen:
    2 changes: 1 addition & 1 deletion sorted.py
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,5 @@

    def find_duplicate_sorted(a: list) -> int:
    def find_duplicate_sorted(a: list):
    a.sort()
    for i in range(1, len(a)):
    # duplicates are adjacent
  3. dariodip revised this gist Jun 6, 2021. 1 changed file with 8 additions and 0 deletions.
    8 changes: 8 additions & 0 deletions bysum.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,8 @@

    def find_duplicate_sum(a: list) -> int:
    sum_els = lambda x: int(x * (x + 1) / 2)
    sum_before = sum_els(min(a) - 1)
    sum_after = sum_els(max(a))

    list_sum = sum(a)
    return list_sum - (sum_after - sum_before)
  4. dariodip created this gist Jun 6, 2021.
    12 changes: 12 additions & 0 deletions bf_duplicate.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,12 @@
    from bloom_filter import BloomFilter

    def find_duplicate_bf(a: list, bf_prob: float=0.5) -> int:
    bf = BloomFilter(len(a), bf_prob)

    for (i, el) in enumerate(a):
    if bf.check(el): # element is in bloom filter
    for j in range(i): # check the previous elements
    if a[j] == el:
    return el
    else:
    bf.add(el) # add element to bloom filter
    65 changes: 65 additions & 0 deletions bloom_filter.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,65 @@
    import math
    import mmh3
    from bitarray import bitarray

    class BloomFilter:
    '''
    Class for Bloom filter, using murmur3 hash function
    '''
    def __init__(self, count, fp_prob):
    '''
    count: int
    Number of items to be stored in Bloom Filter
    fp_prob: float
    False positive probability (in decimal)
    '''
    self.fp_prob = fp_prob
    self.size = BloomFilter.get_size(count, fp_prob)
    self.hash_count = BloomFilter.get_hash_count(self.size, count)
    self.bit_array = bitarray(self.size)
    self.bit_array.setall(0)

    def add(self, item):
    '''
    Add an item to the filter
    '''
    # create a generator having a digest for every hash function
    digests = ( mmh3.hash(item, i) % self.size for i in range(self.hash_count) )
    for i in digests:
    self.bit_array[i] = True

    def check(self, item) -> bool:
    '''
    Check for existence of an item in filter
    '''
    # create a generator having a digest for every hash function
    digests = ( mmh3.hash(item, i) % self.size for i in range(self.hash_count) )
    for i in digests:
    # if at least one is false, return false
    if self.bit_array[i] == False:
    return False
    return True

    @staticmethod
    def get_size(n, p) -> int:
    '''
    Return the size of bit array(m) to be used
    n : int
    number of items expected to be stored in filter
    p : float
    False Positive probability in decimal
    '''
    m = -(n * math.log(p))/(math.log(2)**2)
    return int(m)

    @staticmethod
    def get_hash_count(m, n) -> int:
    '''
    Return the hash function(k) to be used
    m : int
    size of bit array
    n : int
    number of items expected to be stored in filter
    '''
    k = (m/n) * math.log(2)
    return int(k)
    8 changes: 8 additions & 0 deletions dummy.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,8 @@

    def find_duplicate_dummy(a: list) -> int:
    # for each index but the last
    for i in range(len(a) - 1):
    # for every other element
    for j in range(i + 1, len(a)):
    if a[i] == a[j]:
    return a[i]
    20 changes: 20 additions & 0 deletions floyd.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,20 @@

    def find_duplicates_floyd(a):
    # both tortoise and hare start
    # from the beginning
    tortoise = a[0]
    hare = a[0]
    while True:
    tortoise = a[tortoise]
    hare = a[a[hare]]
    if tortoise == hare:
    # cycle found
    break
    # go to the beginning of the cycle
    ptr1 = a[0]
    ptr2 = tortoise
    while ptr1 != ptr2:
    ptr1 = a[ptr1]
    ptr2 = a[ptr2]

    return ptr1
    8 changes: 8 additions & 0 deletions hashmap.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,8 @@

    def find_duplicate_map(a: list) -> int:
    seen = dict()
    for i in a:
    if i in seen:
    # here is a duplicate
    return i
    seen[i] = True
    19 changes: 19 additions & 0 deletions main.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,19 @@
    import random


    def generate_random_list(n, k):
    """
    Return an unsorted list of n random integers from 0 to k
    """
    return [random.randint(0, k) for _ in range(n)]


    def generate_random_list_pidgeon(n):
    """
    Return an unsorted list of n random elements from 1 to n-1.
    Exacly one element is duplicated.
    """
    a = [i for i in range(n)]
    a[0] = a[random.randint(0, n-1)]
    random.shuffle(a) # shuffle the list
    return a
    7 changes: 7 additions & 0 deletions sorted.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,7 @@

    def find_duplicate_sorted(a: list) -> int:
    a.sort()
    for i in range(1, len(a)):
    # duplicates are adjacent
    if a[i-1] == a[i]:
    return a[i]