Skip to content

Instantly share code, notes, and snippets.

@divinity76
Created March 2, 2024 08:08
Show Gist options
  • Save divinity76/3cab164c0c7a7be51fff4d77e1534bc0 to your computer and use it in GitHub Desktop.
Save divinity76/3cab164c0c7a7be51fff4d77e1534bc0 to your computer and use it in GitHub Desktop.

Revisions

  1. divinity76 created this gist Mar 2, 2024.
    113 changes: 113 additions & 0 deletions merge_csv.php
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,113 @@
    <?php

    declare(strict_types=1);

    function fix_encoding(string $str): string
    {
    //return $str;
    return mb_convert_encoding($str, 'UTF-8', 'ISO-8859-1');
    }

    function parse_csv(string $csv, string $separator = ","): array
    {
    $csv = strtr(
    $csv,
    [
    "\xEF\xBB\xBF" => "", // remove UTF-8 byte order masks, if present
    "\r\n" => "\n", // Windows CrLf=> Unix Lf
    "\r" => "\n" // old-MacOS Cr => Unix Lf
    // (both modern MacOS and Linux use Lf .. Windows is the only outlier)
    ]
    );
    $lines = explode("\n", $csv);
    $keys = str_getcsv(array_shift($lines), $separator);
    $ret = array();
    foreach ($lines as $lineno => $line) {
    if (strlen($line) < 1) {
    // ... probably malformed csv, but we'll allow it
    continue;
    }
    $parsed = str_getcsv($line, $separator);
    if (count($parsed) !== count($keys)) {
    throw new \RuntimeException("error on csv line #{$lineno}: count mismatch:" . count($parsed) . ' !== ' . count($keys) . ": " . var_export([
    'error' => 'count mismatch',
    'keys' => $keys,
    'parsed' => $parsed,
    'line' => $line
    ], true));
    }
    $ret[] = array_combine($keys, $parsed);
    }
    return $ret;
    }
    error_reporting(E_ALL);
    ini_set('display_errors', '1');
    set_error_handler(function ($errno, $errstr, $errfile, $errline) {
    if (error_reporting() & $errno) {
    throw new \ErrorException($errstr, 0, $errno, $errfile, $errline);
    }
    });
    if ($argc !== 3) {
    echo "Usage: php {$argv[0]} <file1.csv> <file2.csv>\n";
    exit(1);
    }
    $file1 = $argv[1];
    $file2 = $argv[2];
    $raw1 = file_get_contents($file1);
    $raw2 = file_get_contents($file2);
    $raw1 = fix_encoding($raw1);
    $raw2 = fix_encoding($raw2);
    $csv1 = parse_csv($raw1);
    $csv2 = parse_csv($raw2);
    $merged = [];


    // Assuming both CSVs have the same headers and the merge is based on these headers.
    $headers = array_keys($csv1[0] + $csv2[0]); // Combine and get unique headers from both CSVs
    foreach ($csv1 as $index => $row) {
    foreach ($headers as $header) {
    // If the current header exists in both csv1 and csv2 rows
    if (isset($csv2[$index][$header], $row[$header])) {
    // If csv1's value is empty and csv2's value is not, take csv2's value
    if (empty($row[$header]) && !empty($csv2[$index][$header])) {
    $merged[$index][$header] = $csv2[$index][$header];
    } else {
    // Otherwise, use csv1's value (even if it's empty, since both are equal priority in this case)
    $merged[$index][$header] = $row[$header];
    }
    } elseif (isset($row[$header])) {
    // If the header exists only in csv1, take csv1's value
    $merged[$index][$header] = $row[$header];
    } elseif (isset($csv2[$index][$header])) {
    // If the header exists only in csv2, take csv2's value
    $merged[$index][$header] = $csv2[$index][$header];
    }
    }
    }

    // Handle the case where csv2 is longer than csv1
    if (count($csv2) > count($csv1)) {
    for ($i = count($csv1); $i < count($csv2); $i++) {
    foreach ($headers as $header) {
    if (isset($csv2[$i][$header])) {
    $merged[$i][$header] = $csv2[$i][$header];
    }
    }
    }
    }

    function array_to_csv(array $data, string $delimiter = ",", string $enclosure = '"', string $escapeChar = "\\"): string {
    $fh = fopen('php://temp', 'rw'); // Write to a temporary file
    foreach ($data as $row) {
    fputcsv($fh, $row, $delimiter, $enclosure, $escapeChar);
    }
    rewind($fh);
    $csvString = stream_get_contents($fh);
    fclose($fh);
    return $csvString;
    }

    $mergedCsvString = array_to_csv($merged);
    // For demonstration, let's just print the merged CSV string
    echo mb_convert_encoding($mergedCsvString, 'ISO-8859-1', 'UTF-8');
    //echo $mergedCsvString;