Last active
July 24, 2020 16:05
-
-
Save sobernaut/cb6b87ac7d570d3e9ce01fbd51fb6a4f to your computer and use it in GitHub Desktop.
Revisions
-
sobernaut revised this gist
Jul 24, 2020 . 1 changed file with 46 additions and 20 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,24 +1,21 @@ import sys, csv, os import json original = './data' new = './data/new' old = './updated' def stripp(x): return x.replace(' ', '') def getYearFile(path, year): for file in os.listdir(path): if file.endswith(".csv") and file.startswith(year): return path + '/' + file def getProcessedArr(file, rowNo, cb=None): arr = [] with open(file, 'r') as f: @@ -46,23 +43,52 @@ def compare(x, y): def formatCb(val): return val.split('_')[1] def dump(data, filename): f = './data/rnd/' + filename + '.json' with open(f, 'w', encoding='utf-8') as file: json.dump(data, file, ensure_ascii=False, indent=2) print('Dumped data on file {}'.format(f)) def diff(a, b): return list(set(a) - set(b)) def analyze(year): print('\n------------------------Year {}------------------------'.format(year)) old_csv = getYearFile(old, year) old_str_no = getProcessedArr(old_csv, 3, formatCb) new_csv = getYearFile(new, year) new_str_no = getProcessedArr(new_csv, 1) print('Total ids in new', len(new_str_no)) print('Total ids in old', len(old_str_no)) print('Same or not?', compare(old_str_no, new_str_no)) newminusold = diff(new_str_no, old_str_no) oldminusnew = diff(old_str_no, new_str_no) print('Ids in new that are not in old', len(newminusold)) dump(newminusold, year + 'newminusold') dump(oldminusnew, year + 'oldminusnew') original_csv = getYearFile(original, year) original_str_no = getProcessedArr(original_csv, 1) print('original', len(original_str_no)) print('Same or not?', compare(old_str_no, original_str_no), compare(new_str_no, original_str_no)) print('Ids in new that are not in original', len(list(set(new_str_no) - set(original_str_no)))) print('Ids in old that are not in original', len(list(set(old_str_no) - set(original_str_no)))) print('/END/') user_input = input('Enter year') for splitted in user_input.split(','): analyze(splitted.replace(' ', ''))
-
sobernaut revised this gist
Jul 19, 2020 . 1 changed file with 9 additions and 19 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -2,8 +2,8 @@ original = './data' new = './data/new' old = './updated' year = input('Enter year') @@ -48,31 +48,21 @@ def formatCb(val): old_csv = getYearFile(old) old_str_no = getProcessedArr(old_csv, 3, formatCb) new_csv = getYearFile(new) new_str_no = getProcessedArr(new_csv, 1) print('Total ids in new', len(new_str_no)) print('Total ids in old', len(old_str_no)) print('Same or not?', compare(old_str_no, new_str_no)) print('Ids in new that are not in old', len(list(set(new_str_no) - set(old_str_no)))) original_csv = getYearFile(original) original_str_no = getProcessedArr(original_csv, 1) print('original', len(original_str_no)) print('Same or not?', compare(old_str_no, original_str_no), compare(new_str_no, original_str_no)) print('Ids in new that are not in original', len(list(set(new_str_no) - set(original_str_no)))) print('Ids in old that are not in original', len(list(set(old_str_no) - set(original_str_no)))) -
sobernaut revised this gist
Jul 19, 2020 . 1 changed file with 10 additions and 12 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -63,18 +63,16 @@ def formatCb(val): print('Change in U', len(u)) print('Change in P', len(p)) original_csv = getYearFile(original) original_str_no = getProcessedArr(original_csv, 1) print('original', len(original_str_no)) print('Same or not?', compare(old_str_no, original_str_no), compare(new_str_no, original_str_no)) u = list(set(old_str_no) - set(original_str_no)) p = list(set(new_str_no) - set(original_str_no)) print('Change in U', len(u)) print('Change in P', len(p)) -
sobernaut revised this gist
Jul 15, 2020 . 1 changed file with 16 additions and 3 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -19,7 +19,7 @@ def getYearFile(path): def getProcessedArr(file, rowNo, cb=None): arr = [] with open(file, 'r') as f: reader = csv.reader(f) @@ -29,7 +29,8 @@ def getProcessedArr(file, rowNo): if line_count == 0: line_count += 1 else: stripped = stripp(row[rowNo]) structure_no = cb(stripped) if cb else stripped arr.append(structure_no) line_count += 1 @@ -42,15 +43,25 @@ def compare(x, y): return [comp_set, comp_set_none] def formatCb(val): return val.split('_')[1] old_csv = getYearFile(old) new_csv = getYearFile(new) old_str_no = getProcessedArr(old_csv, 1) new_str_no = getProcessedArr(new_csv, 3, formatCb) print('new', len(new_str_no), 'old', len(old_str_no)) print('Same or not?', compare(old_str_no, new_str_no)) u = list(set(old_str_no) - set(new_str_no)) p = list(set(new_str_no) - set(old_str_no)) print('Change in U', len(u)) print('Change in P', len(p)) arguments = sys.argv if len(arguments) > 2: @@ -65,3 +76,5 @@ def compare(x, y): print('Change in U', len(u)) print('Change in P', len(p)) -
sobernaut created this gist
Jul 15, 2020 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,67 @@ import sys, csv, os original = './data' old = './data/new' new = './updated' year = input('Enter year') def stripp(x): return x.replace(' ', '') def getYearFile(path): for file in os.listdir(path): if file.endswith(".csv") and file.startswith(year): return path + '/' + file def getProcessedArr(file, rowNo): arr = [] with open(file, 'r') as f: reader = csv.reader(f) line_count = 0 for row in reader: if line_count == 0: line_count += 1 else: structure_no = stripp(row[rowNo]) arr.append(structure_no) line_count += 1 print("Processed file {} with {} lines".format(file, line_count)) return arr def compare(x, y): comp_set = set(x) == set(y) comp_set_none = x == y return [comp_set, comp_set_none] old_csv = getYearFile(old) new_csv = getYearFile(new) old_str_no = getProcessedArr(old_csv, 1) new_str_no = getProcessedArr(new_csv, 11) print('new', len(new_str_no), 'old', len(old_str_no)) print('Same or not?', compare(old_str_no, new_str_no)) arguments = sys.argv if len(arguments) > 2: original_csv = getYearFile(original) original_str_no = getProcessedArr(original_csv, 1) print('original', len(original_str_no)) print('Same or not?', compare(old_str_no, original_str_no), compare(new_str_no, original_str_no)) u = list(set(old_str_no) - set(original_str_no)) p = list(set(new_str_no) - set(original_str_no)) print('Change in U', len(u)) print('Change in P', len(p))