Skip to content

Instantly share code, notes, and snippets.

@haldean
Last active October 21, 2015 21:45
Show Gist options
  • Save haldean/a298dda15bff8021b228 to your computer and use it in GitHub Desktop.
Save haldean/a298dda15bff8021b228 to your computer and use it in GitHub Desktop.

Revisions

  1. haldean revised this gist Oct 21, 2015. 1 changed file with 8 additions and 0 deletions.
    8 changes: 8 additions & 0 deletions authordensity.py
    Original file line number Diff line number Diff line change
    @@ -26,6 +26,7 @@

    file_densities = dict()
    authors_found = set()
    total_freq = collections.defaultdict(lambda: 0)

    for f in sys.argv[1:]:
    if not match_files.search(f):
    @@ -46,6 +47,7 @@
    author = synonyms[author]
    authors_found.add(author)
    authors_freq[author] += 1
    total_freq[author] += 1
    total_lines = len(author_lines)
    authors = {author: author_lines / total_lines
    for author, author_lines in authors_freq.iteritems()}
    @@ -60,3 +62,9 @@
    print("\n".join("%3.0f%%\t%s" % (100. * s[0], s[1])
    for s in author_densities[:count]))
    print()

    total_lines = sum(total_freq.values())
    records = total_freq.items()
    records.sort(key=lambda p: (p[1], p[0]), reverse=True)
    for author, freq in records:
    print("%s\t%.3f%%\t\t%s" % (freq, 100. * freq / total_lines, author))
  2. haldean created this gist Oct 6, 2015.
    62 changes: 62 additions & 0 deletions authordensity.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,62 @@
    """
    Most useful as:
    git ls-tree --name-only -r HEAD | xargs python /path/to/authordensity.py
    When run from the root of your git repository. If people show up under
    multiple names, use the synonyms dict to map their aliases to a canonical
    name.
    Needs no external libs.
    """

    from __future__ import division, print_function

    import collections
    import re
    import subprocess
    import sys

    synonyms = {
    }

    match_files = re.compile(r"\.(cc|h|cpp|hpp|c|py|pxi|pyx)$")
    author_line_re = re.compile("^author ")
    count = 30

    file_densities = dict()
    authors_found = set()

    for f in sys.argv[1:]:
    if not match_files.search(f):
    continue
    try:
    blame = subprocess.check_output(
    ["git", "blame", "--line-porcelain", f], stderr=subprocess.PIPE)
    except subprocess.CalledProcessError as e:
    if e.returncode == 128:
    continue
    raise
    blame_lines = blame.splitlines()
    author_lines = filter(lambda l: author_line_re.match(l), blame_lines)
    authors_freq = collections.defaultdict(lambda: 0)
    for line in author_lines:
    author = line.split(" ", 1)[1]
    if author in synonyms:
    author = synonyms[author]
    authors_found.add(author)
    authors_freq[author] += 1
    total_lines = len(author_lines)
    authors = {author: author_lines / total_lines
    for author, author_lines in authors_freq.iteritems()}
    file_densities[f] = authors

    for author in authors_found:
    author_densities = [(file_density.get(author, 0), fname)
    for fname, file_density in file_densities.iteritems()
    if file_density.get(author)]
    author_densities.sort(reverse=True)
    print("\n%s" % author)
    print("\n".join("%3.0f%%\t%s" % (100. * s[0], s[1])
    for s in author_densities[:count]))
    print()