Last active
January 18, 2022 11:02
-
-
Save dariodip/8ebcd96a8cca33f945c468f372f56f62 to your computer and use it in GitHub Desktop.
Revisions
-
dariodip revised this gist
Jan 18, 2022 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -8,7 +8,7 @@ def generate_random_list(n, k): return [random.randint(0, k) for _ in range(n)] def generate_random_list_pigeon(n): """ Return an unsorted list of n random elements from 1 to n-1. Exacly one element is duplicated. -
dariodip revised this gist
Jun 6, 2021 . 5 changed files with 5 additions and 5 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,6 +1,6 @@ from bloom_filter import BloomFilter def find_duplicate_bf(a: list, bf_prob: float=0.5): bf = BloomFilter(len(a), bf_prob) for (i, el) in enumerate(a): This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,5 @@ def find_duplicate_dummy(a: list): # for each index but the last for i in range(len(a) - 1): # for every other element This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,5 @@ def find_duplicates_floyd(a: list): # both tortoise and hare start # from the beginning tortoise = a[0] This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,5 @@ def find_duplicate_map(a: list): seen = dict() for i in a: if i in seen: This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,5 @@ def find_duplicate_sorted(a: list): a.sort() for i in range(1, len(a)): # duplicates are adjacent -
dariodip revised this gist
Jun 6, 2021 . 1 changed file with 8 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,8 @@ def find_duplicate_sum(a: list) -> int: sum_els = lambda x: int(x * (x + 1) / 2) sum_before = sum_els(min(a) - 1) sum_after = sum_els(max(a)) list_sum = sum(a) return list_sum - (sum_after - sum_before) -
dariodip created this gist
Jun 6, 2021 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,12 @@ from bloom_filter import BloomFilter def find_duplicate_bf(a: list, bf_prob: float=0.5) -> int: bf = BloomFilter(len(a), bf_prob) for (i, el) in enumerate(a): if bf.check(el): # element is in bloom filter for j in range(i): # check the previous elements if a[j] == el: return el else: bf.add(el) # add element to bloom filter This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,65 @@ import math import mmh3 from bitarray import bitarray class BloomFilter: ''' Class for Bloom filter, using murmur3 hash function ''' def __init__(self, count, fp_prob): ''' count: int Number of items to be stored in Bloom Filter fp_prob: float False positive probability (in decimal) ''' self.fp_prob = fp_prob self.size = BloomFilter.get_size(count, fp_prob) self.hash_count = BloomFilter.get_hash_count(self.size, count) self.bit_array = bitarray(self.size) self.bit_array.setall(0) def add(self, item): ''' Add an item to the filter ''' # create a generator having a digest for every hash function digests = ( mmh3.hash(item, i) % self.size for i in range(self.hash_count) ) for i in digests: self.bit_array[i] = True def check(self, item) -> bool: ''' Check for existence of an item in filter ''' # create a generator having a digest for every hash function digests = ( mmh3.hash(item, i) % self.size for i in range(self.hash_count) ) for i in digests: # if at least one is false, return false if self.bit_array[i] == False: return False return True @staticmethod def get_size(n, p) -> int: ''' Return the size of bit array(m) to be used n : int number of items expected to be stored in filter p : float False Positive probability in decimal ''' m = -(n * math.log(p))/(math.log(2)**2) return int(m) @staticmethod def get_hash_count(m, n) -> int: ''' Return the hash function(k) to be used m : int size of bit array n : int number of items expected to be stored in filter ''' k = (m/n) * math.log(2) return int(k) This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,8 @@ def find_duplicate_dummy(a: list) -> int: # for each index but the last for i in range(len(a) - 1): # for every other element for j in range(i + 1, len(a)): if a[i] == a[j]: return a[i] This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,20 @@ def find_duplicates_floyd(a): # both tortoise and hare start # from the beginning tortoise = a[0] hare = a[0] while True: tortoise = a[tortoise] hare = a[a[hare]] if tortoise == hare: # cycle found break # go to the beginning of the cycle ptr1 = a[0] ptr2 = tortoise while ptr1 != ptr2: ptr1 = a[ptr1] ptr2 = a[ptr2] return ptr1 This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,8 @@ def find_duplicate_map(a: list) -> int: seen = dict() for i in a: if i in seen: # here is a duplicate return i seen[i] = True This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,19 @@ import random def generate_random_list(n, k): """ Return an unsorted list of n random integers from 0 to k """ return [random.randint(0, k) for _ in range(n)] def generate_random_list_pidgeon(n): """ Return an unsorted list of n random elements from 1 to n-1. Exacly one element is duplicated. """ a = [i for i in range(n)] a[0] = a[random.randint(0, n-1)] random.shuffle(a) # shuffle the list return a This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,7 @@ def find_duplicate_sorted(a: list) -> int: a.sort() for i in range(1, len(a)): # duplicates are adjacent if a[i-1] == a[i]: return a[i]