Skip to content

Instantly share code, notes, and snippets.

@yting27
Last active September 28, 2025 14:05
Show Gist options
  • Save yting27/9ccdbed5e52f861b65ada40810acc0c9 to your computer and use it in GitHub Desktop.
Save yting27/9ccdbed5e52f861b65ada40810acc0c9 to your computer and use it in GitHub Desktop.

Revisions

  1. yting27 revised this gist Sep 28, 2025. 1 changed file with 4 additions and 0 deletions.
    4 changes: 4 additions & 0 deletions auto_resolve_conflict.sh
    Original file line number Diff line number Diff line change
    @@ -5,6 +5,10 @@ set -euo pipefail

    FILE="${1:?Usage: $0 <conflicted_file.csv>}"

    # Get conflicted file relative path
    GIT_ROOT="$(git rev-parse --show-toplevel)"
    FILE="${FILE/$GIT_ROOT\//}"

    # Make sure file is actually conflicted
    if ! git diff --name-only --diff-filter=U | grep -qx -- "$FILE"; then
    echo "[INFO] Error: $FILE is not in conflict." >&2
  2. yting27 renamed this gist Sep 28, 2025. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion resolve_conflict.sh → auto_resolve_conflict.sh
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,5 @@
    #!/usr/bin/env bash
    # Usage: resolve_conflict.sh <conflicted_file.csv>
    # Usage: auto_resolve_conflict.sh <conflicted_file.csv>

    set -euo pipefail

  3. yting27 created this gist Sep 28, 2025.
    136 changes: 136 additions & 0 deletions convert_csv_to_json.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,136 @@
    #!/usr/bin/env python3

    """
    convert_csv_to_json.py — Convert CSV to JSON (array or NDJSON) efficiently.
    Usage examples:
    # Standard JSON array with default 2-space indent
    python convert_csv_to_json.py input.csv -o output.json
    # Pretty print with 4-space indent
    python convert_csv_to_json.py input.csv -o output.json --indent 4
    # Custom delimiter
    python convert_csv_to_json.py data.csv -o data.json --delimiter ';'
    # Gzip input or output (auto by extension)
    python convert_csv_to_json.py data.csv.gz -o data.json.gz
    """
    from __future__ import annotations

    import argparse
    import csv
    import gzip
    import io
    import json
    import os
    import sys
    from typing import Iterable, Dict, TextIO


    def open_maybe_gzip(path: str, mode: str, encoding: str = "utf-8"):
    """
    Open plain or .gz file based on filename extension.
    For text modes, wrap with TextIOWrapper to ensure encoding and newline handling.
    """
    is_text = "b" not in mode
    if path == "-":
    # stdin/stdout handling
    if "r" in mode:
    return sys.stdin if is_text else sys.stdin.buffer
    else:
    return sys.stdout if is_text else sys.stdout.buffer

    if path.lower().endswith(".gz"):
    f = gzip.open(path, mode.replace("t", ""))
    if is_text:
    return io.TextIOWrapper(f, encoding=encoding, newline="")
    return f
    else:
    return open(path, mode, encoding=encoding) if is_text else open(path, mode)

    def to_json_array(rows: Iterable[Dict[str, str]],
    out_f: TextIO,
    indent: int | None) -> None:
    dump = json.dumps
    first = True
    if indent is not None:
    out_f.write("[\n")
    else:
    out_f.write("[")

    for row in rows:
    if not first:
    out_f.write(",\n" if indent is not None else ",")
    if indent is not None:
    out_f.write(dump(row, ensure_ascii=False, indent=indent))
    else:
    out_f.write(dump(row, ensure_ascii=False))
    first = False

    if indent is not None:
    out_f.write("\n]\n")
    else:
    out_f.write("]\n")


    def convert_csv_to_json(
    in_path: str,
    out_path: str,
    delimiter: str = ",",
    indent: int | None = None,
    limit: int | None = None,
    encoding_in: str = "utf-8-sig", # utf-8 BOM-safe
    encoding_out: str = "utf-8",
    ) -> None:
    # Read CSV with streaming DictReader
    with open_maybe_gzip(in_path, "rt", encoding=encoding_in) as f_in:
    reader = csv.DictReader(f_in, delimiter=delimiter, quotechar='"')

    # Optional limiting iterator (for testing)
    def limited_rows():
    if limit is None:
    yield from reader
    else:
    for i, row in enumerate(reader):
    if i >= limit:
    break
    yield row

    # Write JSON
    # Use text mode for json so ensure_ascii=False works properly with UTF-8
    with open_maybe_gzip(out_path, "wt", encoding=encoding_out) as f_out:
    to_json_array(limited_rows(), f_out, indent=indent)


    def parse_args() -> argparse.Namespace:
    p = argparse.ArgumentParser(description="Convert CSV to JSON efficiently (streaming).")
    p.add_argument("input", help='Input CSV path (use "-" for stdin). Supports .gz')
    p.add_argument("-o", "--output", default=None,
    help='Output JSON path (use "-" for stdout). Supports .gz. '
    'Default: same as input with .json')
    p.add_argument("--indent", type=int, default=2,
    help="Pretty-print JSON with the given indent (spaces).")
    p.add_argument("--delimiter", default=",", help="CSV delimiter (default: ,)")
    p.add_argument("--encoding-in", default="utf-8-sig",
    help="Input encoding (default: utf-8-sig to strip BOM).")
    p.add_argument("--encoding-out", default="utf-8",
    help="Output encoding (default: utf-8).")
    args = p.parse_args()

    if args.output is None:
    base, _ = os.path.splitext(args.input if args.input != "-" else "stdout")
    args.output = base + ".json"
    return args


    def main():
    args = parse_args()
    convert_csv_to_json(
    in_path=args.input,
    out_path=args.output,
    delimiter=args.delimiter,
    indent=args.indent,
    encoding_in=args.encoding_in,
    encoding_out=args.encoding_out,
    )


    if __name__ == "__main__":
    main()
    160 changes: 160 additions & 0 deletions convert_json_to_csv.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,160 @@
    #!/usr/bin/env python3

    """
    convert_json_to_csv.py — Convert JSON array back to CSV.
    Usage examples:
    # JSON array -> CSV
    python convert_json_to_csv.py input.json -o output.csv
    # Gzip in/out by extension
    python convert_json_to_csv.py data.json.gz -o data.csv.gz
    """

    from __future__ import annotations

    import argparse
    import csv
    import gzip
    import io
    import json
    import os
    import sys
    from typing import Any, Dict, Iterable, Iterator, List, Optional, TextIO, Union

    # ---------- I/O helpers ----------

    def open_maybe_gzip(path: str, mode: str, *, encoding: str = "utf-8", newline: Optional[str] = ""):
    """
    Open plain or .gz file based on extension. Supports "-" for stdin/stdout.
    For text modes, enforce 'encoding' and 'newline' to make csv writer predictable.
    """
    is_text = "b" not in mode
    if path == "-":
    if "r" in mode:
    return sys.stdin if is_text else sys.stdin.buffer
    return sys.stdout if is_text else sys.stdout.buffer

    if path.lower().endswith(".gz"):
    # For text mode: wrap gzip stream in TextIOWrapper with specified encoding/newline
    raw = gzip.open(path, mode.replace("t", ""))
    if is_text:
    return io.TextIOWrapper(raw, encoding=encoding, newline=newline)
    return raw
    else:
    if is_text:
    return open(path, mode, encoding=encoding, newline=newline)
    return open(path, mode)

    # ---------- JSON readers ----------

    def read_json_array(f: TextIO) -> List[Dict[str, Any]]:
    """Load a JSON array fully (suitable for moderate files)."""
    data = json.load(f)
    if not isinstance(data, list):
    raise ValueError("Input is not a JSON array.")
    # Enforce dict rows
    out: List[Dict[str, Any]] = []
    for i, item in enumerate(data):
    if not isinstance(item, dict):
    raise ValueError(f"Array item at index {i} is not an object.")
    out.append(item)
    return out

    # ---------- Core conversion ----------

    def to_scalar_string(value: Any) -> str:
    """
    Convert JSON value back to CSV cell text:
    - None -> empty field
    - str -> as-is
    - int/float/bool -> str(value) (NOTE: if original had leading zeros, ensure JSON had strings)
    - lists/objects -> compact JSON string
    """
    if value is None:
    return ""
    if isinstance(value, str):
    return value
    if isinstance(value, (int, float, bool)):
    return str(value)
    # Nested structure: embed as JSON text
    return json.dumps(value, ensure_ascii=False, separators=(",", ":"))

    def write_csv(
    rows: Iterable[Dict[str, Any]],
    out_f: TextIO,
    columns: List[str],
    *,
    delimiter: str
    ) -> None:
    writer = csv.writer(
    out_f,
    delimiter=delimiter
    )
    writer.writerow(columns)
    for row in rows:
    writer.writerow([to_scalar_string(row.get(col)) for col in columns])

    def convert_json_to_csv(
    in_path: str,
    out_path: str,
    delimiter: str,
    encoding_in: str,
    encoding_out: str,
    ) -> None:

    # Prepare output stream with stable newline handling for csv
    with open_maybe_gzip(out_path, "wt", encoding=encoding_out, newline="") as f_out:
    with open_maybe_gzip(in_path, "rt", encoding=encoding_in, newline="") as f_in:
    data = read_json_array(f_in)

    if not data:
    if columns:
    write_csv([], f_out, columns, delimiter=delimiter)
    return

    columns = list(data[0].keys())
    write_csv(
    data,
    f_out,
    columns,
    delimiter=delimiter
    )

    # ---------- CLI ----------

    def parse_args() -> argparse.Namespace:
    p = argparse.ArgumentParser(description="Convert JSON (array or NDJSON) to CSV.")
    p.add_argument("input", help='Input JSON path (use "-" for stdin). Supports .gz')
    p.add_argument("-o", "--output", default=None,
    help='Output CSV path (use "-" for stdout). Supports .gz. '
    'Default: same base name with .csv')
    p.add_argument("--delimiter", default=",", help="CSV delimiter (default: ,)")
    p.add_argument("--encoding-in", default="utf-8",
    help="Input encoding (default: utf-8).")
    p.add_argument("--encoding-out", default="utf-8",
    help="Output encoding (default: utf-8).")
    args = p.parse_args()

    if args.output is None:
    base, _ = os.path.splitext(args.input if args.input != "-" else "stdout")
    args.output = base + ".csv"

    return argparse.Namespace(
    input=args.input,
    output=args.output,
    delimiter=args.delimiter,
    encoding_in=args.encoding_in,
    encoding_out=args.encoding_out
    )

    def main():
    args = parse_args()
    convert_json_to_csv(
    in_path=args.input,
    out_path=args.output,
    delimiter=args.delimiter,
    encoding_in=args.encoding_in,
    encoding_out=args.encoding_out,
    )

    if __name__ == "__main__":
    main()
    48 changes: 48 additions & 0 deletions resolve_conflict.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,48 @@
    #!/usr/bin/env bash
    # Usage: resolve_conflict.sh <conflicted_file.csv>

    set -euo pipefail

    FILE="${1:?Usage: $0 <conflicted_file.csv>}"

    # Make sure file is actually conflicted
    if ! git diff --name-only --diff-filter=U | grep -qx -- "$FILE"; then
    echo "[INFO] Error: $FILE is not in conflict." >&2
    exit 1
    fi

    tmpdir="$(mktemp -d)"
    echo "[INFO] Using temporary directory: $tmpdir"

    # 1) Extract conflict stages
    git show :2:"$FILE" > "$tmpdir/others.csv"
    git show :3:"$FILE" > "$tmpdir/mine.csv"
    git show :1:"$FILE" > "$tmpdir/base.csv" 2>/dev/null || true

    # 2) Convert CSV -> JSON
    python3 scripts/convert_csv_to_json.py "$tmpdir/others.csv" -o "$tmpdir/others.json" --indent 2
    python3 scripts/convert_csv_to_json.py "$tmpdir/mine.csv" -o "$tmpdir/mine.json" --indent 2
    if [ -s "$tmpdir/base.csv" ]; then
    python3 scripts/convert_csv_to_json.py "$tmpdir/base.csv" -o "$tmpdir/base.json" --indent 2
    fi

    # 3) Auto-merge with kdiff3
    merged_json="$tmpdir/merged.json"
    if [ -s "$tmpdir/base.json" ]; then
    kdiff3 "$tmpdir/base.json" "$tmpdir/mine.json" "$tmpdir/others.json" \
    -o "$merged_json" -m
    else
    kdiff3 "$tmpdir/mine.json" "$tmpdir/others.json" \
    -o "$merged_json" -m
    fi

    # 4) Backup, convert back to CSV, and stage
    backup="/tmp/$(basename "$FILE")_ori.$(date +%s).bak"
    cp -- "$FILE" "$backup"
    echo "[INFO] Backup saved at: $backup"

    python3 scripts/convert_json_to_csv.py "$merged_json" -o "$FILE"
    git add -- "$FILE"

    echo "[INFO] Merged and staged: $FILE"
    echo "[INFO] Continue rebase with: git rebase --continue"