Last active
May 2, 2025 04:16
-
-
Save serif/a1281c676cf5a1f77af6ff1a25255a85 to your computer and use it in GitHub Desktop.
Revisions
-
serif revised this gist
Nov 27, 2023 . 1 changed file with 5 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,4 +1,9 @@ #!/usr/bin/env python3 # updated 2023-11-27 # updated 2023-10-12 # updated 2021 # updated 2020 # created 2018 import sys import hashlib from urllib.parse import urlparse -
serif revised this gist
Nov 27, 2023 . 1 changed file with 70 additions and 64 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,80 +1,86 @@ #!/usr/bin/env python3 import sys import hashlib from urllib.parse import urlparse def main(argv): # Fields in Bitwarden CSV f = 'folder,favorite,type,name,notes,fields,reprompt,login_uri,login_username,login_password,login_totp'.split(',') if len(argv) < 1: sys.exit('Supply input file path as command argument') in_path = argv[0] csv = '.csv' csv_out = '_out' + csv csv_rem = '_rem' + csv out_path = in_path.replace(csv, csv_out) rem_path = in_path.replace(csv, csv_rem) completed_lines_hash = set() line_number = -1 write_count = 0 cache = '' # Process file with open(out_path, 'w', encoding='utf8') as out_file, \ open(rem_path, 'w', encoding='utf8') as rem_file, \ open(in_path, 'r', encoding='utf8') as in_file: for line in in_file: line_number += 1 # Validate .csv format if line_number == 0 and not line.strip() == ','.join(f): print('\nBitwarden CSV format has changed.') print('Contact author for update.') exit(1) # Skip empty lines if not line.strip(): continue fields = line.split(',') # If the line has fewer fields than expected, # try to combine with the previous line if len(fields) < len(f): # Add previous line if short line = cache.strip('\n') + line cache = line fields = line.split(',') if len(fields) == len(f): print(f'Recovered with line {line_number}:\n{line}') cache = '' else: print(f'Missing fields in line {line_number}:\n{line}') rem_file.write(line) continue else: cache = '' # Generate an MD5 hash based on login URI, username, and password if line_number != 0: domain = urlparse(fields[f.index('login_uri')]).netloc if len(domain) > 0: fields[f.index('login_uri')] = domain token = fields[f.index('login_uri')] token += fields[f.index('login_username')] token += fields[f.index('login_password')] hashValue = hashlib.md5(token.rstrip().encode('utf-8')).hexdigest() # Write entry if hashValue not in completed_lines_hash: out_file.write(line) completed_lines_hash.add(hashValue) write_count += 1 else: rem_file.write(line) # print(f'Duplicate on line {line_number}:\n{line}') # Report dup_count = line_number - write_count print(f'\nOutput file: {out_path}\n{write_count} unique entries saved') print(f'\n{dup_count} duplicates saved to {rem_path}') if __name__ == "__main__": main(sys.argv[1:]) -
serif created this gist
Oct 13, 2023 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,80 @@ #!/usr/bin/env python3 # bwclean2.py # Removes duplicates from Bitwarden export .csv # 2019-02-09 # 2023-10-12 import sys import hashlib from urllib.parse import urlparse # Field ordinals in Bitwarden CSV FOLDER = 0 FAVORITE = 1 TYPE = 2 NAME = 3 NOTES = 4 FIELDS = 5 REPROMPT = 6 URI = 7 USERNAME = 8 PASSWORD = 9 TOTP = 10 def main(argv): if len(argv) < 1: print('Missing input file path') sys.exit(1) in_file_path = argv[0] out_file_path = in_file_path[0:(len(in_file_path)-4)]+'_out.csv' rem_file_path = in_file_path[0:(len(in_file_path)-4)]+'_rem.csv' completed_lines_hash = set() line_number = -1 write_count = 0 cache = '' out_file = open(out_file_path, 'w', encoding = 'utf8') rem_file = open(rem_file_path, 'w', encoding = 'utf8') for line in open(in_file_path, 'r', encoding = 'utf8'): line_number += 1 fields = line.split(',') if len(fields) < 10: # Add previous line if short line = cache.strip('\n') + line cache = line fields = line.split(',') if len(fields) > 9: print(f'Recovered with line {line_number}:\n{line}') cache = '' else: print(f'Missing fields in line {line_number}:\n{line}') rem_file.write(line) continue else: cache = '' if line_number != 0: domain = urlparse(fields[URI]).netloc if len(domain) > 0: fields[URI] = domain token = fields[URI] + fields[USERNAME] + fields[PASSWORD] hashValue = hashlib.md5(token.rstrip().encode('utf-8')).hexdigest() if hashValue not in completed_lines_hash: out_file.write(line) completed_lines_hash.add(hashValue) write_count += 1 else: rem_file.write(line) # Uncomment for verbose mode # print(f'Skipping duplicate on line {line_number}:\n{line}') out_file.close() rem_file.close() dup_count = line_number - write_count print(f'\nOutput file: {out_file_path}\n{write_count} unique entries saved') print(f'\n{dup_count} duplicates saved to {rem_file_path}') if __name__ == "__main__": main(sys.argv[1:])