Last active
March 22, 2025 19:18
-
-
Save mrjoshuak/227327f26815f6df557a5118519298a5 to your computer and use it in GitHub Desktop.
A shell script to list contributors sorted by naive assessment of contributions. Also tries to merge results for users with inconsistant name or emails, and link to their GitHub profile
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # get_contributors.sh | |
| # A script to collect all contributors to a Git repository, quantify their contributions, | |
| # combine stats from the same person using different names/emails, and fetch GitHub profile information. | |
| # | |
| # Created by Joshua Kolden (https://github.com/mrjoshuak) | |
| # | |
| # Usage: ./get_contributors.sh [path_to_repo] [github_repo_owner/repo_name] [mapping_file] | |
| # | |
| # The optional mapping_file is a text file with lines in the format: | |
| # name_or_email_pattern|normalized_key | |
| # | |
| # Example mapping file: | |
| # # Lines starting with # are comments | |
| # John|johndoe | |
| # [email protected]|johndoe | |
| # Jane Smith|janesmith | |
| # Set the repository path and other parameters | |
| REPO_PATH=${1:-.} | |
| GITHUB_REPO=${2:-""} | |
| MAPPING_FILE=${3:-""} | |
| # Check if the path exists and is a git repository | |
| if [ ! -d "$REPO_PATH/.git" ]; then | |
| echo "Error: $REPO_PATH is not a git repository" | |
| exit 1 | |
| fi | |
| # Change to the repository directory | |
| cd "$REPO_PATH" || exit 1 | |
| echo "Analyzing contributors for repository: $(basename "$PWD")" | |
| echo "------------------------------------------------------" | |
| # Check for GitHub API credentials | |
| if [ -n "$GITHUB_TOKEN" ]; then | |
| echo "GitHub API token found in environment" | |
| GITHUB_AUTH="Authorization: token $GITHUB_TOKEN" | |
| elif [ -n "$GITHUB_API_TOKEN" ]; then | |
| echo "GitHub API token found in environment" | |
| GITHUB_AUTH="Authorization: token $GITHUB_API_TOKEN" | |
| else | |
| echo "No GitHub API token found in environment. Will use unauthenticated API calls (rate limited)." | |
| GITHUB_AUTH="" | |
| fi | |
| # If no GitHub repo was provided, try to guess it from the remote URL | |
| if [ -z "$GITHUB_REPO" ]; then | |
| REMOTE_URL=$(git config --get remote.origin.url) | |
| if [[ "$REMOTE_URL" == *"github.com"* ]]; then | |
| # Extract owner/repo from the URL | |
| GITHUB_REPO=$(echo "$REMOTE_URL" | sed -E 's|.*github\.com[:/]([^/]+/[^/]+)(\.git)?|\1|') | |
| echo "Detected GitHub repository: $GITHUB_REPO" | |
| else | |
| echo "No GitHub repository specified and couldn't detect one from remotes." | |
| echo "GitHub profile information will not be available." | |
| fi | |
| fi | |
| # Get all contributors with their commit counts, lines added, and lines deleted | |
| echo "Collecting contribution data..." | |
| # Create temporary files | |
| TEMP_FILE=$(mktemp) | |
| NORM_FILE=$(mktemp) | |
| MANUAL_MAP=$(mktemp) | |
| GITHUB_INFO=$(mktemp) | |
| GITHUB_USERNAME_MAP=$(mktemp) | |
| # Create a default manual mapping file if no mapping file is provided | |
| if [ -z "$MAPPING_FILE" ] || [ ! -f "$MAPPING_FILE" ]; then | |
| echo "# No custom mapping file provided. Using default empty mapping." >"$MANUAL_MAP" | |
| else | |
| echo "Using mapping file: $MAPPING_FILE" | |
| cp "$MAPPING_FILE" "$MANUAL_MAP" | |
| fi | |
| # Create a default GitHub username mapping file | |
| cat >"$GITHUB_USERNAME_MAP" <<EOF | |
| # Format: name_or_email_pattern|github_username | |
| # This file maps contributors to their GitHub usernames | |
| EOF | |
| # Get all authors and their emails | |
| git log --format='%an|%ae' | sort | uniq >/tmp/contributors.txt | |
| # Process each contributor and normalize names | |
| while IFS='|' read -r name email; do | |
| # Check if this contributor matches any manual mapping | |
| manual_key="" | |
| while IFS='|' read -r pattern key; do | |
| # Skip comments | |
| if [[ "$pattern" == \#* ]]; then | |
| continue | |
| fi | |
| # Check if pattern matches name or email | |
| if [[ "$name" == *"$pattern"* || "$email" == *"$pattern"* ]]; then | |
| manual_key="$key" | |
| break | |
| fi | |
| done <"$MANUAL_MAP" | |
| # If no manual mapping, create a normalized key from the name | |
| if [[ -z "$manual_key" ]]; then | |
| # Create a simple normalized name (lowercase, alphanumeric only) | |
| manual_key=$(echo "$name" | tr '[:upper:]' '[:lower:]' | tr -cd 'a-z0-9') | |
| fi | |
| # Get commit count | |
| commits=$(git log --author="$name" --pretty=oneline | wc -l | tr -d ' ') | |
| # Get lines added and deleted | |
| stats=$(git log --author="$name" --pretty=tformat: --numstat | awk '{ add += $1; del += $2 } END { printf "%d %d", add, del }') | |
| added=$(echo "$stats" | cut -d' ' -f1) | |
| deleted=$(echo "$stats" | cut -d' ' -f2) | |
| # Calculate a "contribution score" (commits + added + deleted) | |
| score=$((commits + added + deleted)) | |
| # Output to the temporary file with normalized name for later combining | |
| echo "$manual_key|$score|$commits|$added|$deleted|$name|$email" >>"$NORM_FILE" | |
| done </tmp/contributors.txt | |
| # Combine entries with the same normalized key | |
| cat "$NORM_FILE" | sort -t'|' -k1,1 | awk -F'|' ' | |
| BEGIN { OFS="|" } | |
| { | |
| if ($1 == prev_key) { | |
| # Add stats | |
| total_score += $2 | |
| total_commits += $3 | |
| total_added += $4 | |
| total_deleted += $5 | |
| # Keep the longer name | |
| if (length($6) > length(prev_name)) { | |
| prev_name = $6 | |
| prev_email = $7 | |
| } | |
| } else { | |
| # Output previous combined entry | |
| if (prev_key != "") { | |
| print total_score, total_commits, total_added, total_deleted, prev_name, prev_email | |
| } | |
| # Start new entry | |
| prev_key = $1 | |
| prev_name = $6 | |
| prev_email = $7 | |
| total_score = $2 | |
| total_commits = $3 | |
| total_added = $4 | |
| total_deleted = $5 | |
| } | |
| } | |
| END { | |
| # Output the last entry | |
| if (prev_key != "") { | |
| print total_score, total_commits, total_added, total_deleted, prev_name, prev_email | |
| } | |
| }' >"$TEMP_FILE" | |
| # Function to extract GitHub username from email | |
| extract_github_username() { | |
| local email="$1" | |
| local username="" | |
| # Check if it's a GitHub noreply email | |
| if [[ "$email" == *"@users.noreply.github.com"* ]]; then | |
| # Extract the username or ID | |
| username=$(echo "$email" | sed -E 's/^([0-9]+\+)?([^@]+)@users\.noreply\.github\.com$/\2/') | |
| fi | |
| echo "$username" | |
| } | |
| # Function to get GitHub info for a user | |
| get_github_info() { | |
| local email="$1" | |
| local name="$2" | |
| local github_username="" | |
| local full_name="" | |
| local profile_url="" | |
| # Check if this user has a manual GitHub username mapping | |
| while IFS='|' read -r pattern username; do | |
| # Skip comments | |
| if [[ "$pattern" == \#* ]]; then | |
| continue | |
| fi | |
| # Check if pattern matches name or email | |
| if [[ "$name" == *"$pattern"* || "$email" == *"$pattern"* ]]; then | |
| github_username="$username" | |
| break | |
| fi | |
| done <"$GITHUB_USERNAME_MAP" | |
| # If no manual mapping, try to extract from email | |
| if [ -z "$github_username" ]; then | |
| github_username=$(extract_github_username "$email") | |
| fi | |
| # If we have a GitHub repo and username or email, try to get more info | |
| if [ -n "$GITHUB_REPO" ]; then | |
| # Try to find the contributor in the GitHub repo contributors list | |
| if [ -z "$github_username" ]; then | |
| # Search by email | |
| if [ -n "$GITHUB_AUTH" ]; then | |
| # Use authenticated API call | |
| response=$(curl -s -H "$GITHUB_AUTH" "https://api.github.com/search/users?q=$email+in:email") | |
| if [ "$(echo "$response" | grep -c "total_count.*[1-9]")" -gt 0 ]; then | |
| github_username=$(echo "$response" | grep -o '"login": *"[^"]*"' | head -1 | sed 's/"login": *"\([^"]*\)"/\1/') | |
| fi | |
| fi | |
| # If still no username, try to search by name | |
| if [ -z "$github_username" ]; then | |
| if [ -n "$GITHUB_AUTH" ]; then | |
| response=$(curl -s -H "$GITHUB_AUTH" "https://api.github.com/search/users?q=$name+in:name") | |
| if [ "$(echo "$response" | grep -c "total_count.*[1-9]")" -gt 0 ]; then | |
| github_username=$(echo "$response" | grep -o '"login": *"[^"]*"' | head -1 | sed 's/"login": *"\([^"]*\)"/\1/') | |
| fi | |
| fi | |
| fi | |
| fi | |
| # If we found a GitHub username, get their profile info | |
| if [ -n "$github_username" ]; then | |
| profile_url="https://github.com/$github_username" | |
| # Get full name from GitHub profile if we have auth | |
| if [ -n "$GITHUB_AUTH" ]; then | |
| user_info=$(curl -s -H "$GITHUB_AUTH" "https://api.github.com/users/$github_username") | |
| full_name=$(echo "$user_info" | grep -o '"name": *"[^"]*"' | sed 's/"name": *"\([^"]*\)"/\1/') | |
| # If no full name, use the username | |
| if [ -z "$full_name" ] || [ "$full_name" = "null" ]; then | |
| full_name="$github_username" | |
| fi | |
| else | |
| full_name="$github_username" | |
| fi | |
| fi | |
| fi | |
| # If we couldn't get GitHub info, use the name from git | |
| if [ -z "$full_name" ]; then | |
| full_name="$name" | |
| fi | |
| # Return the info | |
| echo "$full_name|$github_username|$profile_url" | |
| } | |
| # Get GitHub info for each contributor | |
| echo "Fetching GitHub profile information..." | |
| if [ -n "$GITHUB_REPO" ]; then | |
| sort -t'|' -k1,1nr "$TEMP_FILE" | while IFS='|' read -r score commits added deleted name email; do | |
| # Skip entries with zero score | |
| if [[ $score -eq 0 ]]; then | |
| continue | |
| fi | |
| # Get GitHub info | |
| github_info=$(get_github_info "$email" "$name") | |
| IFS='|' read -r full_name github_username profile_url <<<"$github_info" | |
| # Output to GitHub info file | |
| echo "$score|$commits|$added|$deleted|$name|$email|$full_name|$github_username|$profile_url" >>"$GITHUB_INFO" | |
| done | |
| # Display the results with GitHub info | |
| echo "Name GitHub Username Profile Commits Added Deleted Score" | |
| echo "--------------------------------------------------------------------------------------------------------------" | |
| cat "$GITHUB_INFO" | while IFS='|' read -r score commits added deleted name email full_name github_username profile_url; do | |
| # Use full name if available, otherwise use git name | |
| display_name="$full_name" | |
| if [ -z "$display_name" ]; then | |
| display_name="$name" | |
| fi | |
| # Use username if available, otherwise blank | |
| display_username="$github_username" | |
| if [ -z "$display_username" ]; then | |
| display_username="-" | |
| fi | |
| # Use profile URL if available, otherwise blank | |
| display_url="$profile_url" | |
| if [ -z "$display_url" ]; then | |
| display_url="-" | |
| fi | |
| printf "%-30s %-30s %-30s %-10s %-10s %-10s %-10s\n" "$display_name" "$display_username" "$display_url" "$commits" "$added" "$deleted" "$score" | |
| done | |
| else | |
| # Display the results without GitHub info | |
| echo "Name Email Commits Added Deleted Score" | |
| echo "--------------------------------------------------------------------------------------------" | |
| sort -t'|' -k1,1nr "$TEMP_FILE" | while IFS='|' read -r score commits added deleted name email; do | |
| # Skip entries with zero score | |
| if [[ $score -eq 0 ]]; then | |
| continue | |
| fi | |
| printf "%-30s %-30s %-10s %-10s %-10s %-10s\n" "$name" "$email" "$commits" "$added" "$deleted" "$score" | |
| done | |
| fi | |
| # Clean up temporary files | |
| rm -f /tmp/contributors.txt "$TEMP_FILE" "$NORM_FILE" "$MANUAL_MAP" "$GITHUB_INFO" "$GITHUB_USERNAME_MAP" | |
| echo "------------------------------------------------------" | |
| echo "Note: 'Score' is a simple metric calculated as (commits + lines added + lines deleted)" | |
| echo "This is a basic quantification and doesn't account for the complexity or impact of contributions." | |
| echo "Contributors with multiple email addresses have been combined where possible." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment