Skip to content

Instantly share code, notes, and snippets.

@mrjoshuak
Last active March 22, 2025 19:18
Show Gist options
  • Save mrjoshuak/227327f26815f6df557a5118519298a5 to your computer and use it in GitHub Desktop.
Save mrjoshuak/227327f26815f6df557a5118519298a5 to your computer and use it in GitHub Desktop.
A shell script to list contributors sorted by naive assessment of contributions. Also tries to merge results for users with inconsistant name or emails, and link to their GitHub profile
#!/bin/bash
# get_contributors.sh
# A script to collect all contributors to a Git repository, quantify their contributions,
# combine stats from the same person using different names/emails, and fetch GitHub profile information.
#
# Created by Joshua Kolden (https://github.com/mrjoshuak)
#
# Usage: ./get_contributors.sh [path_to_repo] [github_repo_owner/repo_name] [mapping_file]
#
# The optional mapping_file is a text file with lines in the format:
# name_or_email_pattern|normalized_key
#
# Example mapping file:
# # Lines starting with # are comments
# John|johndoe
# [email protected]|johndoe
# Jane Smith|janesmith
# Set the repository path and other parameters
REPO_PATH=${1:-.}
GITHUB_REPO=${2:-""}
MAPPING_FILE=${3:-""}
# Check if the path exists and is a git repository
if [ ! -d "$REPO_PATH/.git" ]; then
echo "Error: $REPO_PATH is not a git repository"
exit 1
fi
# Change to the repository directory
cd "$REPO_PATH" || exit 1
echo "Analyzing contributors for repository: $(basename "$PWD")"
echo "------------------------------------------------------"
# Check for GitHub API credentials
if [ -n "$GITHUB_TOKEN" ]; then
echo "GitHub API token found in environment"
GITHUB_AUTH="Authorization: token $GITHUB_TOKEN"
elif [ -n "$GITHUB_API_TOKEN" ]; then
echo "GitHub API token found in environment"
GITHUB_AUTH="Authorization: token $GITHUB_API_TOKEN"
else
echo "No GitHub API token found in environment. Will use unauthenticated API calls (rate limited)."
GITHUB_AUTH=""
fi
# If no GitHub repo was provided, try to guess it from the remote URL
if [ -z "$GITHUB_REPO" ]; then
REMOTE_URL=$(git config --get remote.origin.url)
if [[ "$REMOTE_URL" == *"github.com"* ]]; then
# Extract owner/repo from the URL
GITHUB_REPO=$(echo "$REMOTE_URL" | sed -E 's|.*github\.com[:/]([^/]+/[^/]+)(\.git)?|\1|')
echo "Detected GitHub repository: $GITHUB_REPO"
else
echo "No GitHub repository specified and couldn't detect one from remotes."
echo "GitHub profile information will not be available."
fi
fi
# Get all contributors with their commit counts, lines added, and lines deleted
echo "Collecting contribution data..."
# Create temporary files
TEMP_FILE=$(mktemp)
NORM_FILE=$(mktemp)
MANUAL_MAP=$(mktemp)
GITHUB_INFO=$(mktemp)
GITHUB_USERNAME_MAP=$(mktemp)
# Create a default manual mapping file if no mapping file is provided
if [ -z "$MAPPING_FILE" ] || [ ! -f "$MAPPING_FILE" ]; then
echo "# No custom mapping file provided. Using default empty mapping." >"$MANUAL_MAP"
else
echo "Using mapping file: $MAPPING_FILE"
cp "$MAPPING_FILE" "$MANUAL_MAP"
fi
# Create a default GitHub username mapping file
cat >"$GITHUB_USERNAME_MAP" <<EOF
# Format: name_or_email_pattern|github_username
# This file maps contributors to their GitHub usernames
EOF
# Get all authors and their emails
git log --format='%an|%ae' | sort | uniq >/tmp/contributors.txt
# Process each contributor and normalize names
while IFS='|' read -r name email; do
# Check if this contributor matches any manual mapping
manual_key=""
while IFS='|' read -r pattern key; do
# Skip comments
if [[ "$pattern" == \#* ]]; then
continue
fi
# Check if pattern matches name or email
if [[ "$name" == *"$pattern"* || "$email" == *"$pattern"* ]]; then
manual_key="$key"
break
fi
done <"$MANUAL_MAP"
# If no manual mapping, create a normalized key from the name
if [[ -z "$manual_key" ]]; then
# Create a simple normalized name (lowercase, alphanumeric only)
manual_key=$(echo "$name" | tr '[:upper:]' '[:lower:]' | tr -cd 'a-z0-9')
fi
# Get commit count
commits=$(git log --author="$name" --pretty=oneline | wc -l | tr -d ' ')
# Get lines added and deleted
stats=$(git log --author="$name" --pretty=tformat: --numstat | awk '{ add += $1; del += $2 } END { printf "%d %d", add, del }')
added=$(echo "$stats" | cut -d' ' -f1)
deleted=$(echo "$stats" | cut -d' ' -f2)
# Calculate a "contribution score" (commits + added + deleted)
score=$((commits + added + deleted))
# Output to the temporary file with normalized name for later combining
echo "$manual_key|$score|$commits|$added|$deleted|$name|$email" >>"$NORM_FILE"
done </tmp/contributors.txt
# Combine entries with the same normalized key
cat "$NORM_FILE" | sort -t'|' -k1,1 | awk -F'|' '
BEGIN { OFS="|" }
{
if ($1 == prev_key) {
# Add stats
total_score += $2
total_commits += $3
total_added += $4
total_deleted += $5
# Keep the longer name
if (length($6) > length(prev_name)) {
prev_name = $6
prev_email = $7
}
} else {
# Output previous combined entry
if (prev_key != "") {
print total_score, total_commits, total_added, total_deleted, prev_name, prev_email
}
# Start new entry
prev_key = $1
prev_name = $6
prev_email = $7
total_score = $2
total_commits = $3
total_added = $4
total_deleted = $5
}
}
END {
# Output the last entry
if (prev_key != "") {
print total_score, total_commits, total_added, total_deleted, prev_name, prev_email
}
}' >"$TEMP_FILE"
# Function to extract GitHub username from email
extract_github_username() {
local email="$1"
local username=""
# Check if it's a GitHub noreply email
if [[ "$email" == *"@users.noreply.github.com"* ]]; then
# Extract the username or ID
username=$(echo "$email" | sed -E 's/^([0-9]+\+)?([^@]+)@users\.noreply\.github\.com$/\2/')
fi
echo "$username"
}
# Function to get GitHub info for a user
get_github_info() {
local email="$1"
local name="$2"
local github_username=""
local full_name=""
local profile_url=""
# Check if this user has a manual GitHub username mapping
while IFS='|' read -r pattern username; do
# Skip comments
if [[ "$pattern" == \#* ]]; then
continue
fi
# Check if pattern matches name or email
if [[ "$name" == *"$pattern"* || "$email" == *"$pattern"* ]]; then
github_username="$username"
break
fi
done <"$GITHUB_USERNAME_MAP"
# If no manual mapping, try to extract from email
if [ -z "$github_username" ]; then
github_username=$(extract_github_username "$email")
fi
# If we have a GitHub repo and username or email, try to get more info
if [ -n "$GITHUB_REPO" ]; then
# Try to find the contributor in the GitHub repo contributors list
if [ -z "$github_username" ]; then
# Search by email
if [ -n "$GITHUB_AUTH" ]; then
# Use authenticated API call
response=$(curl -s -H "$GITHUB_AUTH" "https://api.github.com/search/users?q=$email+in:email")
if [ "$(echo "$response" | grep -c "total_count.*[1-9]")" -gt 0 ]; then
github_username=$(echo "$response" | grep -o '"login": *"[^"]*"' | head -1 | sed 's/"login": *"\([^"]*\)"/\1/')
fi
fi
# If still no username, try to search by name
if [ -z "$github_username" ]; then
if [ -n "$GITHUB_AUTH" ]; then
response=$(curl -s -H "$GITHUB_AUTH" "https://api.github.com/search/users?q=$name+in:name")
if [ "$(echo "$response" | grep -c "total_count.*[1-9]")" -gt 0 ]; then
github_username=$(echo "$response" | grep -o '"login": *"[^"]*"' | head -1 | sed 's/"login": *"\([^"]*\)"/\1/')
fi
fi
fi
fi
# If we found a GitHub username, get their profile info
if [ -n "$github_username" ]; then
profile_url="https://github.com/$github_username"
# Get full name from GitHub profile if we have auth
if [ -n "$GITHUB_AUTH" ]; then
user_info=$(curl -s -H "$GITHUB_AUTH" "https://api.github.com/users/$github_username")
full_name=$(echo "$user_info" | grep -o '"name": *"[^"]*"' | sed 's/"name": *"\([^"]*\)"/\1/')
# If no full name, use the username
if [ -z "$full_name" ] || [ "$full_name" = "null" ]; then
full_name="$github_username"
fi
else
full_name="$github_username"
fi
fi
fi
# If we couldn't get GitHub info, use the name from git
if [ -z "$full_name" ]; then
full_name="$name"
fi
# Return the info
echo "$full_name|$github_username|$profile_url"
}
# Get GitHub info for each contributor
echo "Fetching GitHub profile information..."
if [ -n "$GITHUB_REPO" ]; then
sort -t'|' -k1,1nr "$TEMP_FILE" | while IFS='|' read -r score commits added deleted name email; do
# Skip entries with zero score
if [[ $score -eq 0 ]]; then
continue
fi
# Get GitHub info
github_info=$(get_github_info "$email" "$name")
IFS='|' read -r full_name github_username profile_url <<<"$github_info"
# Output to GitHub info file
echo "$score|$commits|$added|$deleted|$name|$email|$full_name|$github_username|$profile_url" >>"$GITHUB_INFO"
done
# Display the results with GitHub info
echo "Name GitHub Username Profile Commits Added Deleted Score"
echo "--------------------------------------------------------------------------------------------------------------"
cat "$GITHUB_INFO" | while IFS='|' read -r score commits added deleted name email full_name github_username profile_url; do
# Use full name if available, otherwise use git name
display_name="$full_name"
if [ -z "$display_name" ]; then
display_name="$name"
fi
# Use username if available, otherwise blank
display_username="$github_username"
if [ -z "$display_username" ]; then
display_username="-"
fi
# Use profile URL if available, otherwise blank
display_url="$profile_url"
if [ -z "$display_url" ]; then
display_url="-"
fi
printf "%-30s %-30s %-30s %-10s %-10s %-10s %-10s\n" "$display_name" "$display_username" "$display_url" "$commits" "$added" "$deleted" "$score"
done
else
# Display the results without GitHub info
echo "Name Email Commits Added Deleted Score"
echo "--------------------------------------------------------------------------------------------"
sort -t'|' -k1,1nr "$TEMP_FILE" | while IFS='|' read -r score commits added deleted name email; do
# Skip entries with zero score
if [[ $score -eq 0 ]]; then
continue
fi
printf "%-30s %-30s %-10s %-10s %-10s %-10s\n" "$name" "$email" "$commits" "$added" "$deleted" "$score"
done
fi
# Clean up temporary files
rm -f /tmp/contributors.txt "$TEMP_FILE" "$NORM_FILE" "$MANUAL_MAP" "$GITHUB_INFO" "$GITHUB_USERNAME_MAP"
echo "------------------------------------------------------"
echo "Note: 'Score' is a simple metric calculated as (commits + lines added + lines deleted)"
echo "This is a basic quantification and doesn't account for the complexity or impact of contributions."
echo "Contributors with multiple email addresses have been combined where possible."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment