Skip to content

Instantly share code, notes, and snippets.

@methanoliver
Last active September 14, 2016 23:19
Show Gist options
  • Select an option

  • Save methanoliver/26d59419cd28a13d62ffaa2a4a429dd2 to your computer and use it in GitHub Desktop.

Select an option

Save methanoliver/26d59419cd28a13d62ffaa2a4a429dd2 to your computer and use it in GitHub Desktop.

Revisions

  1. methanoliver revised this gist Sep 14, 2016. No changes.
  2. methanoliver revised this gist Sep 14, 2016. No changes.
  3. methanoliver created this gist Sep 14, 2016.
    137 changes: 137 additions & 0 deletions diff-critic.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,137 @@
    #!/usr/bin/python

    from __future__ import print_function

    import subprocess
    import os
    import re


    def criticize(root, from_ref, to_ref, filename, html=False):
    # Git uses "^+" for additions, "^-" for deletions, "^~$" for newlines. It's
    # always deletion first additions second, so when a deletion is followed by
    # addition, it's a change {~~this~>that~~}, otherwise {--deletion--} and
    # {++addition++}.

    # There is probably a simpler way to do this, but this one works
    # passably well.

    git_args = [
    "git", "diff", "--unified=100000", "--word-diff=porcelain",
    "--no-color",
    from_ref, to_ref, filename
    ]

    p = subprocess.Popen(git_args, cwd=root, stdout=subprocess.PIPE,
    universal_newlines=True)
    p.wait()

    if p.returncode > 0:
    raise IOError("Git complained about something.")

    raw_data = p.stdout.read()

    # Now we massage this into CriticMark format.

    # Cut off the unified diff header.
    raw_data = re.sub(
    r"^diff --git .*\nindex .*\n--- .*\n\+\+\+ .*\n@@ .* @@\n", "",
    raw_data, flags=re.MULTILINE)

    # Replace all cases of changed pairs, i.e. - followed by a +.

    raw_data = re.sub(
    r"^-(?P<from>.*)\n\+(?P<to>.*)\n",
    r" <del>\g<from></del><ins>\g<to></ins>\n" if html
    else r" {~~\g<from>~>\g<to>~~}\n",
    raw_data,
    flags=re.MULTILINE
    )

    # Replace solo deletions and additions.
    raw_data = re.sub(
    r"^-(?P<del>.*)\n",
    r" <del>\g<del></del>\n" if html else r" {--\g<del>--}\n",
    raw_data,
    flags=re.MULTILINE
    )
    raw_data = re.sub(
    r"^\+(?P<add>.*)\n",
    r" <ins>\g<add></ins>\n" if html else r" {++\g<add>++}\n",
    raw_data,
    flags=re.MULTILINE
    )

    # The rest is easier to do manually:

    # If a line starts with a space, cut the space off and join it to the
    # previous one.

    # If a line starts with a tilde, convert it to a straight newline.

    output_data = ""

    for line in raw_data.split('\n'):
    if line.startswith('~'):
    output_data += "\n"
    elif line.startswith(" "):
    output_data += line[1:]
    else:
    output_data += line

    return output_data

    if __name__ == "__main__":

    import argparse
    import sys

    def output_pattern(filename):
    if any([filename.endswith(x) for x in ['.md', '.markdown']]):
    return ".".join(filename.split('.')[:-1]) + ".critic.md"
    return filename + ".critic.md"

    parser = argparse.ArgumentParser(
    description="Using git's word-based diff facility, produce a "
    "diff between two versions of a given file expressed as CriticMark "
    "markup.\nAssumes it's running in the root of a git tree.\n"
    "Nothing will be written if the file did not change between the "
    "given revisions."
    )

    parser.add_argument('filename', metavar='FILE', type=str, nargs='+',
    help='Filenames to produce a diff for.')

    parser.add_argument('--output', '-o', metavar='output_filename', type=str,
    help="Output filename. Can "
    "only be used when processing files one by one, "
    "otherwise default pattern *.critic.md is used.")

    parser.add_argument('--html', action="store_true",
    help="Output HTML instead of CriticMark.")

    parser.add_argument('--from', '-f', dest='from_ref', metavar='from',
    type=str, required=True,
    help="The 'before' version of the file, required.")

    parser.add_argument('--to', '-t', dest='to_ref', metavar='to', type=str,
    default='HEAD',
    help="The 'after' version of the file, "
    "defaults to HEAD.")

    args = parser.parse_args()

    if not os.path.isdir('.git'):
    print("Not running in a git tree. Use -h for help.")
    sys.exit(1)

    if len(args.filename) > 1 and args.output:
    print("Cannot write multiple output files into the same filename.")
    sys.exit(1)

    for filename in args.filename:
    data = criticize(os.getcwd(), args.from_ref, args.to_ref,
    filename, html=args.html)
    if len(data):
    with open(args.output or output_pattern(filename), 'w+b') as f:
    f.write(data)