yting27 · September 28, 2025 14:05 · Sep 28, 2025 · Sep 28, 2025 · Sep 28, 2025
diff --git a/auto_resolve_conflict.sh b/auto_resolve_conflict.sh
@@ -5,6 +5,10 @@ set -euo pipefail
 
 FILE="${1:?Usage: $0 <conflicted_file.csv>}"
 
+# Get conflicted file relative path
+GIT_ROOT="$(git rev-parse --show-toplevel)"
+FILE="${FILE/$GIT_ROOT\//}"
+
 # Make sure file is actually conflicted
 if ! git diff --name-only --diff-filter=U | grep -qx -- "$FILE"; then
   echo "[INFO] Error: $FILE is not in conflict." >&2

diff --git a/resolve_conflict.sh → auto_resolve_conflict.sh b/resolve_conflict.sh → auto_resolve_conflict.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Usage: resolve_conflict.sh <conflicted_file.csv>
+# Usage: auto_resolve_conflict.sh <conflicted_file.csv>
 
 set -euo pipefail
 

diff --git a/convert_csv_to_json.py b/convert_csv_to_json.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python3
+
+"""
+convert_csv_to_json.py — Convert CSV to JSON (array or NDJSON) efficiently.
+Usage examples:
+  # Standard JSON array with default 2-space indent
+  python convert_csv_to_json.py input.csv -o output.json
+  # Pretty print with 4-space indent
+  python convert_csv_to_json.py input.csv -o output.json --indent 4
+  # Custom delimiter
+  python convert_csv_to_json.py data.csv -o data.json --delimiter ';'
+  # Gzip input or output (auto by extension)
+  python convert_csv_to_json.py data.csv.gz -o data.json.gz
+"""
+from __future__ import annotations
+
+import argparse
+import csv
+import gzip
+import io
+import json
+import os
+import sys
+from typing import Iterable, Dict, TextIO
+
+
+def open_maybe_gzip(path: str, mode: str, encoding: str = "utf-8"):
+    """
+    Open plain or .gz file based on filename extension.
+    For text modes, wrap with TextIOWrapper to ensure encoding and newline handling.
+    """
+    is_text = "b" not in mode
+    if path == "-":
+        # stdin/stdout handling
+        if "r" in mode:
+            return sys.stdin if is_text else sys.stdin.buffer
+        else:
+            return sys.stdout if is_text else sys.stdout.buffer
+
+    if path.lower().endswith(".gz"):
+        f = gzip.open(path, mode.replace("t", ""))
+        if is_text:
+            return io.TextIOWrapper(f, encoding=encoding, newline="")
+        return f
+    else:
+        return open(path, mode, encoding=encoding) if is_text else open(path, mode)
+
+def to_json_array(rows: Iterable[Dict[str, str]],
+                  out_f: TextIO,
+                  indent: int | None) -> None:
+    dump = json.dumps
+    first = True
+    if indent is not None:
+        out_f.write("[\n")
+    else:
+        out_f.write("[")
+
+    for row in rows:
+        if not first:
+            out_f.write(",\n" if indent is not None else ",")
+        if indent is not None:
+            out_f.write(dump(row, ensure_ascii=False, indent=indent))
+        else:
+            out_f.write(dump(row, ensure_ascii=False))
+        first = False
+
+    if indent is not None:
+        out_f.write("\n]\n")
+    else:
+        out_f.write("]\n")
+
+
+def convert_csv_to_json(
+    in_path: str,
+    out_path: str,
+    delimiter: str = ",",
+    indent: int | None = None,
+    limit: int | None = None,
+    encoding_in: str = "utf-8-sig",   # utf-8 BOM-safe
+    encoding_out: str = "utf-8",
+) -> None:
+    # Read CSV with streaming DictReader
+    with open_maybe_gzip(in_path, "rt", encoding=encoding_in) as f_in:
+        reader = csv.DictReader(f_in, delimiter=delimiter, quotechar='"')
+
+        # Optional limiting iterator (for testing)
+        def limited_rows():
+            if limit is None:
+                yield from reader
+            else:
+                for i, row in enumerate(reader):
+                    if i >= limit:
+                        break
+                    yield row
+
+        # Write JSON
+        # Use text mode for json so ensure_ascii=False works properly with UTF-8
+        with open_maybe_gzip(out_path, "wt", encoding=encoding_out) as f_out:
+            to_json_array(limited_rows(), f_out, indent=indent)
+
+
+def parse_args() -> argparse.Namespace:
+    p = argparse.ArgumentParser(description="Convert CSV to JSON efficiently (streaming).")
+    p.add_argument("input", help='Input CSV path (use "-" for stdin). Supports .gz')
+    p.add_argument("-o", "--output", default=None,
+                   help='Output JSON path (use "-" for stdout). Supports .gz. '
+                        'Default: same as input with .json')
+    p.add_argument("--indent", type=int, default=2,
+                   help="Pretty-print JSON with the given indent (spaces).")
+    p.add_argument("--delimiter", default=",", help="CSV delimiter (default: ,)")
+    p.add_argument("--encoding-in", default="utf-8-sig",
+                   help="Input encoding (default: utf-8-sig to strip BOM).")
+    p.add_argument("--encoding-out", default="utf-8",
+                   help="Output encoding (default: utf-8).")
+    args = p.parse_args()
+
+    if args.output is None:
+        base, _ = os.path.splitext(args.input if args.input != "-" else "stdout")
+        args.output = base + ".json"
+    return args
+
+
+def main():
+    args = parse_args()
+    convert_csv_to_json(
+        in_path=args.input,
+        out_path=args.output,
+        delimiter=args.delimiter,
+        indent=args.indent,
+        encoding_in=args.encoding_in,
+        encoding_out=args.encoding_out,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/convert_json_to_csv.py b/convert_json_to_csv.py
@@ -0,0 +1,160 @@
+#!/usr/bin/env python3
+
+"""
+convert_json_to_csv.py — Convert JSON array back to CSV.
+Usage examples:
+  # JSON array -> CSV
+  python convert_json_to_csv.py input.json -o output.csv
+  # Gzip in/out by extension
+  python convert_json_to_csv.py data.json.gz -o data.csv.gz
+"""
+
+from __future__ import annotations
+
+import argparse
+import csv
+import gzip
+import io
+import json
+import os
+import sys
+from typing import Any, Dict, Iterable, Iterator, List, Optional, TextIO, Union
+
+# ---------- I/O helpers ----------
+
+def open_maybe_gzip(path: str, mode: str, *, encoding: str = "utf-8", newline: Optional[str] = ""):
+    """
+    Open plain or .gz file based on extension. Supports "-" for stdin/stdout.
+    For text modes, enforce 'encoding' and 'newline' to make csv writer predictable.
+    """
+    is_text = "b" not in mode
+    if path == "-":
+        if "r" in mode:
+            return sys.stdin if is_text else sys.stdin.buffer
+        return sys.stdout if is_text else sys.stdout.buffer
+
+    if path.lower().endswith(".gz"):
+        # For text mode: wrap gzip stream in TextIOWrapper with specified encoding/newline
+        raw = gzip.open(path, mode.replace("t", ""))
+        if is_text:
+            return io.TextIOWrapper(raw, encoding=encoding, newline=newline)
+        return raw
+    else:
+        if is_text:
+            return open(path, mode, encoding=encoding, newline=newline)
+        return open(path, mode)
+
+# ---------- JSON readers ----------
+
+def read_json_array(f: TextIO) -> List[Dict[str, Any]]:
+    """Load a JSON array fully (suitable for moderate files)."""
+    data = json.load(f)
+    if not isinstance(data, list):
+        raise ValueError("Input is not a JSON array.")
+    # Enforce dict rows
+    out: List[Dict[str, Any]] = []
+    for i, item in enumerate(data):
+        if not isinstance(item, dict):
+            raise ValueError(f"Array item at index {i} is not an object.")
+        out.append(item)
+    return out
+
+# ---------- Core conversion ----------
+
+def to_scalar_string(value: Any) -> str:
+    """
+    Convert JSON value back to CSV cell text:
+      - None -> empty field
+      - str -> as-is
+      - int/float/bool -> str(value)  (NOTE: if original had leading zeros, ensure JSON had strings)
+      - lists/objects -> compact JSON string
+    """
+    if value is None:
+        return ""
+    if isinstance(value, str):
+        return value
+    if isinstance(value, (int, float, bool)):
+        return str(value)
+    # Nested structure: embed as JSON text
+    return json.dumps(value, ensure_ascii=False, separators=(",", ":"))
+
+def write_csv(
+    rows: Iterable[Dict[str, Any]],
+    out_f: TextIO,
+    columns: List[str],
+    *,
+    delimiter: str
+) -> None:
+    writer = csv.writer(
+        out_f,
+        delimiter=delimiter
+    )
+    writer.writerow(columns)
+    for row in rows:
+        writer.writerow([to_scalar_string(row.get(col)) for col in columns])
+
+def convert_json_to_csv(
+    in_path: str,
+    out_path: str,
+    delimiter: str,
+    encoding_in: str,
+    encoding_out: str,
+) -> None:
+
+    # Prepare output stream with stable newline handling for csv
+    with open_maybe_gzip(out_path, "wt", encoding=encoding_out, newline="") as f_out:
+        with open_maybe_gzip(in_path, "rt", encoding=encoding_in, newline="") as f_in:
+            data = read_json_array(f_in)
+
+        if not data:
+            if columns:
+                write_csv([], f_out, columns, delimiter=delimiter)
+            return
+
+        columns = list(data[0].keys())
+        write_csv(
+            data,
+            f_out,
+            columns,
+            delimiter=delimiter
+        )
+
+# ---------- CLI ----------
+
+def parse_args() -> argparse.Namespace:
+    p = argparse.ArgumentParser(description="Convert JSON (array or NDJSON) to CSV.")
+    p.add_argument("input", help='Input JSON path (use "-" for stdin). Supports .gz')
+    p.add_argument("-o", "--output", default=None,
+                   help='Output CSV path (use "-" for stdout). Supports .gz. '
+                        'Default: same base name with .csv')
+    p.add_argument("--delimiter", default=",", help="CSV delimiter (default: ,)")
+    p.add_argument("--encoding-in", default="utf-8",
+                   help="Input encoding (default: utf-8).")
+    p.add_argument("--encoding-out", default="utf-8",
+                   help="Output encoding (default: utf-8).")
+    args = p.parse_args()
+
+    if args.output is None:
+        base, _ = os.path.splitext(args.input if args.input != "-" else "stdout")
+        args.output = base + ".csv"
+
+    return argparse.Namespace(
+        input=args.input,
+        output=args.output,
+        delimiter=args.delimiter,
+        encoding_in=args.encoding_in,
+        encoding_out=args.encoding_out
+    )
+
+def main():
+    args = parse_args()
+    convert_json_to_csv(
+        in_path=args.input,
+        out_path=args.output,
+        delimiter=args.delimiter,
+        encoding_in=args.encoding_in,
+        encoding_out=args.encoding_out,
+    )
+
+if __name__ == "__main__":
+    main()
diff --git a/resolve_conflict.sh b/resolve_conflict.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+# Usage: resolve_conflict.sh <conflicted_file.csv>
+
+set -euo pipefail
+
+FILE="${1:?Usage: $0 <conflicted_file.csv>}"
+
+# Make sure file is actually conflicted
+if ! git diff --name-only --diff-filter=U | grep -qx -- "$FILE"; then
+  echo "[INFO] Error: $FILE is not in conflict." >&2
+  exit 1
+fi
+
+tmpdir="$(mktemp -d)"
+echo "[INFO] Using temporary directory: $tmpdir"
+
+# 1) Extract conflict stages
+git show :2:"$FILE" > "$tmpdir/others.csv"
+git show :3:"$FILE" > "$tmpdir/mine.csv"
+git show :1:"$FILE" > "$tmpdir/base.csv" 2>/dev/null || true
+
+# 2) Convert CSV -> JSON
+python3 scripts/convert_csv_to_json.py "$tmpdir/others.csv"  -o "$tmpdir/others.json"  --indent 2
+python3 scripts/convert_csv_to_json.py "$tmpdir/mine.csv"    -o "$tmpdir/mine.json"    --indent 2
+if [ -s "$tmpdir/base.csv" ]; then
+  python3 scripts/convert_csv_to_json.py "$tmpdir/base.csv"  -o "$tmpdir/base.json"    --indent 2
+fi
+
+# 3) Auto-merge with kdiff3
+merged_json="$tmpdir/merged.json"
+if [ -s "$tmpdir/base.json" ]; then
+  kdiff3 "$tmpdir/base.json" "$tmpdir/mine.json" "$tmpdir/others.json" \
+    -o "$merged_json" -m
+else
+  kdiff3 "$tmpdir/mine.json" "$tmpdir/others.json" \
+    -o "$merged_json" -m
+fi
+
+# 4) Backup, convert back to CSV, and stage
+backup="/tmp/$(basename "$FILE")_ori.$(date +%s).bak"
+cp -- "$FILE" "$backup"
+echo "[INFO] Backup saved at: $backup"
+
+python3 scripts/convert_json_to_csv.py "$merged_json" -o "$FILE"
+git add -- "$FILE"
+
+echo "[INFO] Merged and staged: $FILE"
+echo "[INFO] Continue rebase with: git rebase --continue"