import csv import sys csv.field_size_limit(sys.maxsize) MULTILINE_DELIMITER = "2021-09-" FILENAME="test.csv" PROGRESS_EVERY_LINES=1000000 print("Reading RDS csvlog, removing multilines") with open(FILENAME, "r") as f, open("test-out.csv", "w") as w: i = 0 multiline = "" line = f.readline() while line: i += 1 if i % PROGRESS_EVERY_LINES == 0: print(f"{i}..", end="", flush=True) if line.startswith(MULTILINE_DELIMITER): if multiline: w.write(multiline + "\n") multiline = line.rstrip() else: multiline += line.rstrip() line = f.readline() print("\nConverting RDS csvlog to pgreplay-go compatible log") with open("test-out.csv", "r") as f, open("test-go.csv", "w") as w: csv_reader = csv.reader(f) for i, line in enumerate(csv_reader): if i % PROGRESS_EVERY_LINES == 0: print(f"{i}..", end="", flush=True) if len(line) >= 14: if line[7] in ['SELECT', 'PARSE', 'BIND']: w.write(f"{line[0]}|{line[1]}|{line[2]}|{line[5]}|LOG: {line[13]}\n") if line[14].startswith("parameters"): w.write(f"{line[0]}|{line[1]}|{line[2]}|{line[5]}|DETAIL: {line[14]}\n")