Skip to content

Instantly share code, notes, and snippets.

@adamdekan
Created August 31, 2024 13:17
Show Gist options
  • Select an option

  • Save adamdekan/0fe7ffdb582391fad09a96f17c489bcd to your computer and use it in GitHub Desktop.

Select an option

Save adamdekan/0fe7ffdb582391fad09a96f17c489bcd to your computer and use it in GitHub Desktop.
Compare csv files and add missing columns
import csv
from typing import Dict, List
import click
@click.command()
@click.argument("file1", type=click.Path(exists=True))
@click.argument("file2", type=click.Path(exists=True))
@click.argument("output", type=click.Path())
def compare_csv_files(file1: str, file2: str, output: str):
"""
Compare two CSV files, add missing columns from FILE2 to FILE1,
and save the result to OUTPUT.
"""
# Read headers from both files
headers1 = get_csv_headers(file1)
headers2 = get_csv_headers(file2)
# Find missing columns
missing_columns = [col for col in headers2 if col not in headers1]
# Read data from file1 and add missing columns
with open(file1, "r", newline="") as f1, open(output, "w", newline="") as out_file:
reader = csv.DictReader(f1)
fieldnames = reader.fieldnames + missing_columns
writer = csv.DictWriter(out_file, fieldnames=fieldnames)
writer.writeheader()
for row in reader:
for col in missing_columns:
row[col] = ""
writer.writerow(row)
click.echo(f"Output saved to {output}")
click.echo(f"Added {len(missing_columns)} columns: {', '.join(missing_columns)}")
def get_csv_headers(file_path: str) -> List[str]:
with open(file_path, "r", newline="") as f:
reader = csv.reader(f)
return next(reader)
if __name__ == "__main__":
compare_csv_files()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment