#!/usr/bin/python from __future__ import print_function import subprocess import os import re def criticize(root, from_ref, to_ref, filename, html=False): # Git uses "^+" for additions, "^-" for deletions, "^~$" for newlines. It's # always deletion first additions second, so when a deletion is followed by # addition, it's a change {~~this~>that~~}, otherwise {--deletion--} and # {++addition++}. # There is probably a simpler way to do this, but this one works # passably well. git_args = [ "git", "diff", "--unified=100000", "--word-diff=porcelain", "--no-color", from_ref, to_ref, filename ] p = subprocess.Popen(git_args, cwd=root, stdout=subprocess.PIPE, universal_newlines=True) p.wait() if p.returncode > 0: raise IOError("Git complained about something.") raw_data = p.stdout.read() # Now we massage this into CriticMark format. # Cut off the unified diff header. raw_data = re.sub( r"^diff --git .*\nindex .*\n--- .*\n\+\+\+ .*\n@@ .* @@\n", "", raw_data, flags=re.MULTILINE) # Replace all cases of changed pairs, i.e. - followed by a +. raw_data = re.sub( r"^-(?P.*)\n\+(?P.*)\n", r" \g\g\n" if html else r" {~~\g~>\g~~}\n", raw_data, flags=re.MULTILINE ) # Replace solo deletions and additions. raw_data = re.sub( r"^-(?P.*)\n", r" \g\n" if html else r" {--\g--}\n", raw_data, flags=re.MULTILINE ) raw_data = re.sub( r"^\+(?P.*)\n", r" \g\n" if html else r" {++\g++}\n", raw_data, flags=re.MULTILINE ) # The rest is easier to do manually: # If a line starts with a space, cut the space off and join it to the # previous one. # If a line starts with a tilde, convert it to a straight newline. output_data = "" for line in raw_data.split('\n'): if line.startswith('~'): output_data += "\n" elif line.startswith(" "): output_data += line[1:] else: output_data += line return output_data if __name__ == "__main__": import argparse import sys def output_pattern(filename): if any([filename.endswith(x) for x in ['.md', '.markdown']]): return ".".join(filename.split('.')[:-1]) + ".critic.md" return filename + ".critic.md" parser = argparse.ArgumentParser( description="Using git's word-based diff facility, produce a " "diff between two versions of a given file expressed as CriticMark " "markup.\nAssumes it's running in the root of a git tree.\n" "Nothing will be written if the file did not change between the " "given revisions." ) parser.add_argument('filename', metavar='FILE', type=str, nargs='+', help='Filenames to produce a diff for.') parser.add_argument('--output', '-o', metavar='output_filename', type=str, help="Output filename. Can " "only be used when processing files one by one, " "otherwise default pattern *.critic.md is used.") parser.add_argument('--html', action="store_true", help="Output HTML instead of CriticMark.") parser.add_argument('--from', '-f', dest='from_ref', metavar='from', type=str, required=True, help="The 'before' version of the file, required.") parser.add_argument('--to', '-t', dest='to_ref', metavar='to', type=str, default='HEAD', help="The 'after' version of the file, " "defaults to HEAD.") args = parser.parse_args() if not os.path.isdir('.git'): print("Not running in a git tree. Use -h for help.") sys.exit(1) if len(args.filename) > 1 and args.output: print("Cannot write multiple output files into the same filename.") sys.exit(1) for filename in args.filename: data = criticize(os.getcwd(), args.from_ref, args.to_ref, filename, html=args.html) if len(data): with open(args.output or output_pattern(filename), 'w+b') as f: f.write(data)