Created
          March 2, 2024 08:08 
        
      - 
      
- 
        Save divinity76/3cab164c0c7a7be51fff4d77e1534bc0 to your computer and use it in GitHub Desktop. 
Revisions
- 
        divinity76 created this gist Mar 2, 2024 .There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,113 @@ <?php declare(strict_types=1); function fix_encoding(string $str): string { //return $str; return mb_convert_encoding($str, 'UTF-8', 'ISO-8859-1'); } function parse_csv(string $csv, string $separator = ","): array { $csv = strtr( $csv, [ "\xEF\xBB\xBF" => "", // remove UTF-8 byte order masks, if present "\r\n" => "\n", // Windows CrLf=> Unix Lf "\r" => "\n" // old-MacOS Cr => Unix Lf // (both modern MacOS and Linux use Lf .. Windows is the only outlier) ] ); $lines = explode("\n", $csv); $keys = str_getcsv(array_shift($lines), $separator); $ret = array(); foreach ($lines as $lineno => $line) { if (strlen($line) < 1) { // ... probably malformed csv, but we'll allow it continue; } $parsed = str_getcsv($line, $separator); if (count($parsed) !== count($keys)) { throw new \RuntimeException("error on csv line #{$lineno}: count mismatch:" . count($parsed) . ' !== ' . count($keys) . ": " . var_export([ 'error' => 'count mismatch', 'keys' => $keys, 'parsed' => $parsed, 'line' => $line ], true)); } $ret[] = array_combine($keys, $parsed); } return $ret; } error_reporting(E_ALL); ini_set('display_errors', '1'); set_error_handler(function ($errno, $errstr, $errfile, $errline) { if (error_reporting() & $errno) { throw new \ErrorException($errstr, 0, $errno, $errfile, $errline); } }); if ($argc !== 3) { echo "Usage: php {$argv[0]} <file1.csv> <file2.csv>\n"; exit(1); } $file1 = $argv[1]; $file2 = $argv[2]; $raw1 = file_get_contents($file1); $raw2 = file_get_contents($file2); $raw1 = fix_encoding($raw1); $raw2 = fix_encoding($raw2); $csv1 = parse_csv($raw1); $csv2 = parse_csv($raw2); $merged = []; // Assuming both CSVs have the same headers and the merge is based on these headers. $headers = array_keys($csv1[0] + $csv2[0]); // Combine and get unique headers from both CSVs foreach ($csv1 as $index => $row) { foreach ($headers as $header) { // If the current header exists in both csv1 and csv2 rows if (isset($csv2[$index][$header], $row[$header])) { // If csv1's value is empty and csv2's value is not, take csv2's value if (empty($row[$header]) && !empty($csv2[$index][$header])) { $merged[$index][$header] = $csv2[$index][$header]; } else { // Otherwise, use csv1's value (even if it's empty, since both are equal priority in this case) $merged[$index][$header] = $row[$header]; } } elseif (isset($row[$header])) { // If the header exists only in csv1, take csv1's value $merged[$index][$header] = $row[$header]; } elseif (isset($csv2[$index][$header])) { // If the header exists only in csv2, take csv2's value $merged[$index][$header] = $csv2[$index][$header]; } } } // Handle the case where csv2 is longer than csv1 if (count($csv2) > count($csv1)) { for ($i = count($csv1); $i < count($csv2); $i++) { foreach ($headers as $header) { if (isset($csv2[$i][$header])) { $merged[$i][$header] = $csv2[$i][$header]; } } } } function array_to_csv(array $data, string $delimiter = ",", string $enclosure = '"', string $escapeChar = "\\"): string { $fh = fopen('php://temp', 'rw'); // Write to a temporary file foreach ($data as $row) { fputcsv($fh, $row, $delimiter, $enclosure, $escapeChar); } rewind($fh); $csvString = stream_get_contents($fh); fclose($fh); return $csvString; } $mergedCsvString = array_to_csv($merged); // For demonstration, let's just print the merged CSV string echo mb_convert_encoding($mergedCsvString, 'ISO-8859-1', 'UTF-8'); //echo $mergedCsvString;