Last active
September 28, 2025 14:05
-
-
Save yting27/9ccdbed5e52f861b65ada40810acc0c9 to your computer and use it in GitHub Desktop.
Revisions
-
yting27 revised this gist
Sep 28, 2025 . 1 changed file with 4 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -5,6 +5,10 @@ set -euo pipefail FILE="${1:?Usage: $0 <conflicted_file.csv>}" # Get conflicted file relative path GIT_ROOT="$(git rev-parse --show-toplevel)" FILE="${FILE/$GIT_ROOT\//}" # Make sure file is actually conflicted if ! git diff --name-only --diff-filter=U | grep -qx -- "$FILE"; then echo "[INFO] Error: $FILE is not in conflict." >&2 -
yting27 renamed this gist
Sep 28, 2025 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,5 @@ #!/usr/bin/env bash # Usage: auto_resolve_conflict.sh <conflicted_file.csv> set -euo pipefail -
yting27 created this gist
Sep 28, 2025 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,136 @@ #!/usr/bin/env python3 """ convert_csv_to_json.py — Convert CSV to JSON (array or NDJSON) efficiently. Usage examples: # Standard JSON array with default 2-space indent python convert_csv_to_json.py input.csv -o output.json # Pretty print with 4-space indent python convert_csv_to_json.py input.csv -o output.json --indent 4 # Custom delimiter python convert_csv_to_json.py data.csv -o data.json --delimiter ';' # Gzip input or output (auto by extension) python convert_csv_to_json.py data.csv.gz -o data.json.gz """ from __future__ import annotations import argparse import csv import gzip import io import json import os import sys from typing import Iterable, Dict, TextIO def open_maybe_gzip(path: str, mode: str, encoding: str = "utf-8"): """ Open plain or .gz file based on filename extension. For text modes, wrap with TextIOWrapper to ensure encoding and newline handling. """ is_text = "b" not in mode if path == "-": # stdin/stdout handling if "r" in mode: return sys.stdin if is_text else sys.stdin.buffer else: return sys.stdout if is_text else sys.stdout.buffer if path.lower().endswith(".gz"): f = gzip.open(path, mode.replace("t", "")) if is_text: return io.TextIOWrapper(f, encoding=encoding, newline="") return f else: return open(path, mode, encoding=encoding) if is_text else open(path, mode) def to_json_array(rows: Iterable[Dict[str, str]], out_f: TextIO, indent: int | None) -> None: dump = json.dumps first = True if indent is not None: out_f.write("[\n") else: out_f.write("[") for row in rows: if not first: out_f.write(",\n" if indent is not None else ",") if indent is not None: out_f.write(dump(row, ensure_ascii=False, indent=indent)) else: out_f.write(dump(row, ensure_ascii=False)) first = False if indent is not None: out_f.write("\n]\n") else: out_f.write("]\n") def convert_csv_to_json( in_path: str, out_path: str, delimiter: str = ",", indent: int | None = None, limit: int | None = None, encoding_in: str = "utf-8-sig", # utf-8 BOM-safe encoding_out: str = "utf-8", ) -> None: # Read CSV with streaming DictReader with open_maybe_gzip(in_path, "rt", encoding=encoding_in) as f_in: reader = csv.DictReader(f_in, delimiter=delimiter, quotechar='"') # Optional limiting iterator (for testing) def limited_rows(): if limit is None: yield from reader else: for i, row in enumerate(reader): if i >= limit: break yield row # Write JSON # Use text mode for json so ensure_ascii=False works properly with UTF-8 with open_maybe_gzip(out_path, "wt", encoding=encoding_out) as f_out: to_json_array(limited_rows(), f_out, indent=indent) def parse_args() -> argparse.Namespace: p = argparse.ArgumentParser(description="Convert CSV to JSON efficiently (streaming).") p.add_argument("input", help='Input CSV path (use "-" for stdin). Supports .gz') p.add_argument("-o", "--output", default=None, help='Output JSON path (use "-" for stdout). Supports .gz. ' 'Default: same as input with .json') p.add_argument("--indent", type=int, default=2, help="Pretty-print JSON with the given indent (spaces).") p.add_argument("--delimiter", default=",", help="CSV delimiter (default: ,)") p.add_argument("--encoding-in", default="utf-8-sig", help="Input encoding (default: utf-8-sig to strip BOM).") p.add_argument("--encoding-out", default="utf-8", help="Output encoding (default: utf-8).") args = p.parse_args() if args.output is None: base, _ = os.path.splitext(args.input if args.input != "-" else "stdout") args.output = base + ".json" return args def main(): args = parse_args() convert_csv_to_json( in_path=args.input, out_path=args.output, delimiter=args.delimiter, indent=args.indent, encoding_in=args.encoding_in, encoding_out=args.encoding_out, ) if __name__ == "__main__": main() This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,160 @@ #!/usr/bin/env python3 """ convert_json_to_csv.py — Convert JSON array back to CSV. Usage examples: # JSON array -> CSV python convert_json_to_csv.py input.json -o output.csv # Gzip in/out by extension python convert_json_to_csv.py data.json.gz -o data.csv.gz """ from __future__ import annotations import argparse import csv import gzip import io import json import os import sys from typing import Any, Dict, Iterable, Iterator, List, Optional, TextIO, Union # ---------- I/O helpers ---------- def open_maybe_gzip(path: str, mode: str, *, encoding: str = "utf-8", newline: Optional[str] = ""): """ Open plain or .gz file based on extension. Supports "-" for stdin/stdout. For text modes, enforce 'encoding' and 'newline' to make csv writer predictable. """ is_text = "b" not in mode if path == "-": if "r" in mode: return sys.stdin if is_text else sys.stdin.buffer return sys.stdout if is_text else sys.stdout.buffer if path.lower().endswith(".gz"): # For text mode: wrap gzip stream in TextIOWrapper with specified encoding/newline raw = gzip.open(path, mode.replace("t", "")) if is_text: return io.TextIOWrapper(raw, encoding=encoding, newline=newline) return raw else: if is_text: return open(path, mode, encoding=encoding, newline=newline) return open(path, mode) # ---------- JSON readers ---------- def read_json_array(f: TextIO) -> List[Dict[str, Any]]: """Load a JSON array fully (suitable for moderate files).""" data = json.load(f) if not isinstance(data, list): raise ValueError("Input is not a JSON array.") # Enforce dict rows out: List[Dict[str, Any]] = [] for i, item in enumerate(data): if not isinstance(item, dict): raise ValueError(f"Array item at index {i} is not an object.") out.append(item) return out # ---------- Core conversion ---------- def to_scalar_string(value: Any) -> str: """ Convert JSON value back to CSV cell text: - None -> empty field - str -> as-is - int/float/bool -> str(value) (NOTE: if original had leading zeros, ensure JSON had strings) - lists/objects -> compact JSON string """ if value is None: return "" if isinstance(value, str): return value if isinstance(value, (int, float, bool)): return str(value) # Nested structure: embed as JSON text return json.dumps(value, ensure_ascii=False, separators=(",", ":")) def write_csv( rows: Iterable[Dict[str, Any]], out_f: TextIO, columns: List[str], *, delimiter: str ) -> None: writer = csv.writer( out_f, delimiter=delimiter ) writer.writerow(columns) for row in rows: writer.writerow([to_scalar_string(row.get(col)) for col in columns]) def convert_json_to_csv( in_path: str, out_path: str, delimiter: str, encoding_in: str, encoding_out: str, ) -> None: # Prepare output stream with stable newline handling for csv with open_maybe_gzip(out_path, "wt", encoding=encoding_out, newline="") as f_out: with open_maybe_gzip(in_path, "rt", encoding=encoding_in, newline="") as f_in: data = read_json_array(f_in) if not data: if columns: write_csv([], f_out, columns, delimiter=delimiter) return columns = list(data[0].keys()) write_csv( data, f_out, columns, delimiter=delimiter ) # ---------- CLI ---------- def parse_args() -> argparse.Namespace: p = argparse.ArgumentParser(description="Convert JSON (array or NDJSON) to CSV.") p.add_argument("input", help='Input JSON path (use "-" for stdin). Supports .gz') p.add_argument("-o", "--output", default=None, help='Output CSV path (use "-" for stdout). Supports .gz. ' 'Default: same base name with .csv') p.add_argument("--delimiter", default=",", help="CSV delimiter (default: ,)") p.add_argument("--encoding-in", default="utf-8", help="Input encoding (default: utf-8).") p.add_argument("--encoding-out", default="utf-8", help="Output encoding (default: utf-8).") args = p.parse_args() if args.output is None: base, _ = os.path.splitext(args.input if args.input != "-" else "stdout") args.output = base + ".csv" return argparse.Namespace( input=args.input, output=args.output, delimiter=args.delimiter, encoding_in=args.encoding_in, encoding_out=args.encoding_out ) def main(): args = parse_args() convert_json_to_csv( in_path=args.input, out_path=args.output, delimiter=args.delimiter, encoding_in=args.encoding_in, encoding_out=args.encoding_out, ) if __name__ == "__main__": main() This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,48 @@ #!/usr/bin/env bash # Usage: resolve_conflict.sh <conflicted_file.csv> set -euo pipefail FILE="${1:?Usage: $0 <conflicted_file.csv>}" # Make sure file is actually conflicted if ! git diff --name-only --diff-filter=U | grep -qx -- "$FILE"; then echo "[INFO] Error: $FILE is not in conflict." >&2 exit 1 fi tmpdir="$(mktemp -d)" echo "[INFO] Using temporary directory: $tmpdir" # 1) Extract conflict stages git show :2:"$FILE" > "$tmpdir/others.csv" git show :3:"$FILE" > "$tmpdir/mine.csv" git show :1:"$FILE" > "$tmpdir/base.csv" 2>/dev/null || true # 2) Convert CSV -> JSON python3 scripts/convert_csv_to_json.py "$tmpdir/others.csv" -o "$tmpdir/others.json" --indent 2 python3 scripts/convert_csv_to_json.py "$tmpdir/mine.csv" -o "$tmpdir/mine.json" --indent 2 if [ -s "$tmpdir/base.csv" ]; then python3 scripts/convert_csv_to_json.py "$tmpdir/base.csv" -o "$tmpdir/base.json" --indent 2 fi # 3) Auto-merge with kdiff3 merged_json="$tmpdir/merged.json" if [ -s "$tmpdir/base.json" ]; then kdiff3 "$tmpdir/base.json" "$tmpdir/mine.json" "$tmpdir/others.json" \ -o "$merged_json" -m else kdiff3 "$tmpdir/mine.json" "$tmpdir/others.json" \ -o "$merged_json" -m fi # 4) Backup, convert back to CSV, and stage backup="/tmp/$(basename "$FILE")_ori.$(date +%s).bak" cp -- "$FILE" "$backup" echo "[INFO] Backup saved at: $backup" python3 scripts/convert_json_to_csv.py "$merged_json" -o "$FILE" git add -- "$FILE" echo "[INFO] Merged and staged: $FILE" echo "[INFO] Continue rebase with: git rebase --continue"