#!/usr/bin/env python3 # Build a SQLite3 DB for looking up SHA-1 hashes of leaked passwords. # # This can be fed the txt file from one of Have I Been Pwned's hash # lists available from https://haveibeenpwned.com/Passwords -- but any # text file with line format ``hash-hex:count`` will work. # # When run on the v5 hash-ordered SHA-1 file, expect the build to take # about 35 minutes and produce a 15.7 GiB file (~30.5 bytes per record). # # This example shows querying the resulting database for the # vulnerable password "qwerty123", and finding that it was present # with a count of 621679: # # >>> import sqlite3 # >>> conn = sqlite3.connect("pwned-passwords-sha1-with-counts-v5.sqlite") # >>> hash_bytes = hashlib.sha1('qwerty123'.encode()).digest() # >>> conn.execute("SELECT * FROM hashes WHERE hash = :sha1 LIMIT 1", {'sha1': hash_bytes}).fetchone() # (b'\\\xec\x17[\x16^=^b\xc9\xe1<\xe8H\xefo\xea\xc8\x1b\xff', 621679) import os import sqlite3 import sys def record_generator(in_path): with open(in_path) as hashes: for line in hashes: (sha1_hex, count_str) = line.split(':', 2) sha1_bytes = bytes.fromhex(sha1_hex) count = int(count_str) yield (sha1_bytes, count) def build(in_path, out_path): with sqlite3.connect(out_path) as conn: conn.execute('pragma journal_mode=memory') conn.execute('CREATE TABLE hashes("hash" BLOB PRIMARY KEY, "count" INT) WITHOUT ROWID') conn.executemany( 'INSERT INTO hashes(hash, count) VALUES (?, ?)', record_generator(in_path)) conn.commit() def main(*args): if len(args) != 2: print("Usage: build.py ") sys.exit(1) build(*args) if __name__ == '__main__': main(*sys.argv[1:])