Created
August 31, 2024 13:17
-
-
Save adamdekan/0fe7ffdb582391fad09a96f17c489bcd to your computer and use it in GitHub Desktop.
Compare csv files and add missing columns
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import csv | |
| from typing import Dict, List | |
| import click | |
| @click.command() | |
| @click.argument("file1", type=click.Path(exists=True)) | |
| @click.argument("file2", type=click.Path(exists=True)) | |
| @click.argument("output", type=click.Path()) | |
| def compare_csv_files(file1: str, file2: str, output: str): | |
| """ | |
| Compare two CSV files, add missing columns from FILE2 to FILE1, | |
| and save the result to OUTPUT. | |
| """ | |
| # Read headers from both files | |
| headers1 = get_csv_headers(file1) | |
| headers2 = get_csv_headers(file2) | |
| # Find missing columns | |
| missing_columns = [col for col in headers2 if col not in headers1] | |
| # Read data from file1 and add missing columns | |
| with open(file1, "r", newline="") as f1, open(output, "w", newline="") as out_file: | |
| reader = csv.DictReader(f1) | |
| fieldnames = reader.fieldnames + missing_columns | |
| writer = csv.DictWriter(out_file, fieldnames=fieldnames) | |
| writer.writeheader() | |
| for row in reader: | |
| for col in missing_columns: | |
| row[col] = "" | |
| writer.writerow(row) | |
| click.echo(f"Output saved to {output}") | |
| click.echo(f"Added {len(missing_columns)} columns: {', '.join(missing_columns)}") | |
| def get_csv_headers(file_path: str) -> List[str]: | |
| with open(file_path, "r", newline="") as f: | |
| reader = csv.reader(f) | |
| return next(reader) | |
| if __name__ == "__main__": | |
| compare_csv_files() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment