Last active
October 21, 2015 21:45
-
-
Save haldean/a298dda15bff8021b228 to your computer and use it in GitHub Desktop.
Revisions
-
haldean revised this gist
Oct 21, 2015 . 1 changed file with 8 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -26,6 +26,7 @@ file_densities = dict() authors_found = set() total_freq = collections.defaultdict(lambda: 0) for f in sys.argv[1:]: if not match_files.search(f): @@ -46,6 +47,7 @@ author = synonyms[author] authors_found.add(author) authors_freq[author] += 1 total_freq[author] += 1 total_lines = len(author_lines) authors = {author: author_lines / total_lines for author, author_lines in authors_freq.iteritems()} @@ -60,3 +62,9 @@ print("\n".join("%3.0f%%\t%s" % (100. * s[0], s[1]) for s in author_densities[:count])) print() total_lines = sum(total_freq.values()) records = total_freq.items() records.sort(key=lambda p: (p[1], p[0]), reverse=True) for author, freq in records: print("%s\t%.3f%%\t\t%s" % (freq, 100. * freq / total_lines, author)) -
haldean created this gist
Oct 6, 2015 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,62 @@ """ Most useful as: git ls-tree --name-only -r HEAD | xargs python /path/to/authordensity.py When run from the root of your git repository. If people show up under multiple names, use the synonyms dict to map their aliases to a canonical name. Needs no external libs. """ from __future__ import division, print_function import collections import re import subprocess import sys synonyms = { } match_files = re.compile(r"\.(cc|h|cpp|hpp|c|py|pxi|pyx)$") author_line_re = re.compile("^author ") count = 30 file_densities = dict() authors_found = set() for f in sys.argv[1:]: if not match_files.search(f): continue try: blame = subprocess.check_output( ["git", "blame", "--line-porcelain", f], stderr=subprocess.PIPE) except subprocess.CalledProcessError as e: if e.returncode == 128: continue raise blame_lines = blame.splitlines() author_lines = filter(lambda l: author_line_re.match(l), blame_lines) authors_freq = collections.defaultdict(lambda: 0) for line in author_lines: author = line.split(" ", 1)[1] if author in synonyms: author = synonyms[author] authors_found.add(author) authors_freq[author] += 1 total_lines = len(author_lines) authors = {author: author_lines / total_lines for author, author_lines in authors_freq.iteritems()} file_densities[f] = authors for author in authors_found: author_densities = [(file_density.get(author, 0), fname) for fname, file_density in file_densities.iteritems() if file_density.get(author)] author_densities.sort(reverse=True) print("\n%s" % author) print("\n".join("%3.0f%%\t%s" % (100. * s[0], s[1]) for s in author_densities[:count])) print()