dariodip · January 18, 2022 11:02 · Jan 18, 2022 · Jun 6, 2021 · Jun 6, 2021 · Jun 6, 2021
diff --git a/main.py b/main.py
@@ -8,7 +8,7 @@ def generate_random_list(n, k):
   return [random.randint(0, k) for _ in range(n)]
 
 
-def generate_random_list_pidgeon(n):
+def generate_random_list_pigeon(n):
   """
   Return an unsorted list of n random elements from 1 to n-1.
   Exacly one element is duplicated.

diff --git a/bf_duplicate.py b/bf_duplicate.py
@@ -1,6 +1,6 @@
 from bloom_filter import BloomFilter
 
-def find_duplicate_bf(a: list, bf_prob: float=0.5) -> int:
+def find_duplicate_bf(a: list, bf_prob: float=0.5):
   bf = BloomFilter(len(a), bf_prob)
 
   for (i, el) in enumerate(a):

diff --git a/dummy.py b/dummy.py
@@ -1,5 +1,5 @@
 
-def find_duplicate_dummy(a: list) -> int:
+def find_duplicate_dummy(a: list):
   # for each index but the last
   for i in range(len(a) - 1):
     # for every other element

diff --git a/floyd.py b/floyd.py
@@ -1,5 +1,5 @@
 
-def find_duplicates_floyd(a):
+def find_duplicates_floyd(a: list):
   # both tortoise and hare start
   # from the beginning
   tortoise = a[0]

diff --git a/hashmap.py b/hashmap.py
@@ -1,5 +1,5 @@
 
-def find_duplicate_map(a: list) -> int:
+def find_duplicate_map(a: list):
   seen = dict()
   for i in a:
     if i in seen:

diff --git a/sorted.py b/sorted.py
@@ -1,5 +1,5 @@
 
-def find_duplicate_sorted(a: list) -> int:
+def find_duplicate_sorted(a: list):
   a.sort()
   for i in range(1, len(a)):
     # duplicates are adjacent

diff --git a/bysum.py b/bysum.py
@@ -0,0 +1,8 @@
+
+def find_duplicate_sum(a: list) -> int:
+  sum_els = lambda x: int(x * (x + 1) / 2)
+  sum_before = sum_els(min(a) - 1)
+  sum_after = sum_els(max(a))
+
+  list_sum = sum(a)
+  return list_sum - (sum_after - sum_before)    
diff --git a/bf_duplicate.py b/bf_duplicate.py
@@ -0,0 +1,12 @@
+from bloom_filter import BloomFilter
+
+def find_duplicate_bf(a: list, bf_prob: float=0.5) -> int:
+  bf = BloomFilter(len(a), bf_prob)
+
+  for (i, el) in enumerate(a):
+    if bf.check(el):    # element is in bloom filter
+      for j in range(i):  # check the previous elements
+        if a[j] == el:
+          return el
+    else:  
+      bf.add(el)  # add element to bloom filter
diff --git a/bloom_filter.py b/bloom_filter.py
@@ -0,0 +1,65 @@
+import math
+import mmh3
+from bitarray import bitarray
+
+class BloomFilter:
+  '''
+  Class for Bloom filter, using murmur3 hash function
+  '''
+  def __init__(self, count, fp_prob):
+    '''
+    count: int
+      Number of items to be stored in Bloom Filter
+    fp_prob: float
+      False positive probability (in decimal)
+    '''
+    self.fp_prob = fp_prob
+    self.size = BloomFilter.get_size(count, fp_prob)
+    self.hash_count = BloomFilter.get_hash_count(self.size, count)
+    self.bit_array = bitarray(self.size)
+    self.bit_array.setall(0)
+
+  def add(self, item):
+    '''
+    Add an item to the filter
+    '''
+    # create a generator having a digest for every hash function
+    digests = ( mmh3.hash(item, i) % self.size for i in range(self.hash_count) )
+    for i in digests:
+      self.bit_array[i] = True
+
+  def check(self, item) -> bool:
+    '''
+    Check for existence of an item in filter
+    '''
+    # create a generator having a digest for every hash function
+    digests = ( mmh3.hash(item, i) % self.size for i in range(self.hash_count) ) 
+    for i in digests:
+      # if at least one is false, return false
+      if self.bit_array[i] == False:
+        return False
+    return True
+
+  @staticmethod
+  def get_size(n, p) -> int:
+    '''
+    Return the size of bit array(m) to be used
+    n : int
+        number of items expected to be stored in filter
+    p : float
+        False Positive probability in decimal
+    '''
+    m = -(n * math.log(p))/(math.log(2)**2)
+    return int(m)
+
+  @staticmethod
+  def get_hash_count(m, n) -> int:
+    '''
+    Return the hash function(k) to be used
+    m : int
+        size of bit array
+    n : int
+        number of items expected to be stored in filter
+    '''
+    k = (m/n) * math.log(2)
+    return int(k)
diff --git a/dummy.py b/dummy.py
@@ -0,0 +1,8 @@
+
+def find_duplicate_dummy(a: list) -> int:
+  # for each index but the last
+  for i in range(len(a) - 1):
+    # for every other element
+    for j in range(i + 1, len(a)):
+      if a[i] == a[j]:
+        return a[i]
diff --git a/floyd.py b/floyd.py
@@ -0,0 +1,20 @@
+
+def find_duplicates_floyd(a):
+  # both tortoise and hare start
+  # from the beginning
+  tortoise = a[0]
+  hare = a[0]
+  while True:
+    tortoise = a[tortoise]
+    hare = a[a[hare]]
+    if tortoise == hare:
+      # cycle found
+      break
+  # go to the beginning of the cycle
+  ptr1 = a[0]
+  ptr2 = tortoise
+  while ptr1 != ptr2:
+    ptr1 = a[ptr1]
+    ptr2 = a[ptr2]
+
+  return ptr1
diff --git a/hashmap.py b/hashmap.py
@@ -0,0 +1,8 @@
+
+def find_duplicate_map(a: list) -> int:
+  seen = dict()
+  for i in a:
+    if i in seen:
+      # here is a duplicate
+      return i
+    seen[i] = True
diff --git a/main.py b/main.py
@@ -0,0 +1,19 @@
+import random
+
+
+def generate_random_list(n, k):
+  """
+  Return an unsorted list of n random integers from 0 to k
+  """
+  return [random.randint(0, k) for _ in range(n)]
+
+
+def generate_random_list_pidgeon(n):
+  """
+  Return an unsorted list of n random elements from 1 to n-1.
+  Exacly one element is duplicated.
+  """
+  a = [i for i in range(n)]
+  a[0] = a[random.randint(0, n-1)]
+  random.shuffle(a) # shuffle the list
+  return a
diff --git a/sorted.py b/sorted.py
@@ -0,0 +1,7 @@
+
+def find_duplicate_sorted(a: list) -> int:
+  a.sort()
+  for i in range(1, len(a)):
+    # duplicates are adjacent
+    if a[i-1] == a[i]:
+      return a[i]
No results found