#!/bin/bash # get_contributors.sh # A script to collect all contributors to a Git repository, quantify their contributions, # combine stats from the same person using different names/emails, and fetch GitHub profile information. # # Created by Joshua Kolden (https://github.com/mrjoshuak) # # Usage: ./get_contributors.sh [path_to_repo] [github_repo_owner/repo_name] [mapping_file] # # The optional mapping_file is a text file with lines in the format: # name_or_email_pattern|normalized_key # # Example mapping file: # # Lines starting with # are comments # John|johndoe # john.doe@example.com|johndoe # Jane Smith|janesmith # Set the repository path and other parameters REPO_PATH=${1:-.} GITHUB_REPO=${2:-""} MAPPING_FILE=${3:-""} # Check if the path exists and is a git repository if [ ! -d "$REPO_PATH/.git" ]; then echo "Error: $REPO_PATH is not a git repository" exit 1 fi # Change to the repository directory cd "$REPO_PATH" || exit 1 echo "Analyzing contributors for repository: $(basename "$PWD")" echo "------------------------------------------------------" # Check for GitHub API credentials if [ -n "$GITHUB_TOKEN" ]; then echo "GitHub API token found in environment" GITHUB_AUTH="Authorization: token $GITHUB_TOKEN" elif [ -n "$GITHUB_API_TOKEN" ]; then echo "GitHub API token found in environment" GITHUB_AUTH="Authorization: token $GITHUB_API_TOKEN" else echo "No GitHub API token found in environment. Will use unauthenticated API calls (rate limited)." GITHUB_AUTH="" fi # If no GitHub repo was provided, try to guess it from the remote URL if [ -z "$GITHUB_REPO" ]; then REMOTE_URL=$(git config --get remote.origin.url) if [[ "$REMOTE_URL" == *"github.com"* ]]; then # Extract owner/repo from the URL GITHUB_REPO=$(echo "$REMOTE_URL" | sed -E 's|.*github\.com[:/]([^/]+/[^/]+)(\.git)?|\1|') echo "Detected GitHub repository: $GITHUB_REPO" else echo "No GitHub repository specified and couldn't detect one from remotes." echo "GitHub profile information will not be available." fi fi # Get all contributors with their commit counts, lines added, and lines deleted echo "Collecting contribution data..." # Create temporary files TEMP_FILE=$(mktemp) NORM_FILE=$(mktemp) MANUAL_MAP=$(mktemp) GITHUB_INFO=$(mktemp) GITHUB_USERNAME_MAP=$(mktemp) # Create a default manual mapping file if no mapping file is provided if [ -z "$MAPPING_FILE" ] || [ ! -f "$MAPPING_FILE" ]; then echo "# No custom mapping file provided. Using default empty mapping." >"$MANUAL_MAP" else echo "Using mapping file: $MAPPING_FILE" cp "$MAPPING_FILE" "$MANUAL_MAP" fi # Create a default GitHub username mapping file cat >"$GITHUB_USERNAME_MAP" </tmp/contributors.txt # Process each contributor and normalize names while IFS='|' read -r name email; do # Check if this contributor matches any manual mapping manual_key="" while IFS='|' read -r pattern key; do # Skip comments if [[ "$pattern" == \#* ]]; then continue fi # Check if pattern matches name or email if [[ "$name" == *"$pattern"* || "$email" == *"$pattern"* ]]; then manual_key="$key" break fi done <"$MANUAL_MAP" # If no manual mapping, create a normalized key from the name if [[ -z "$manual_key" ]]; then # Create a simple normalized name (lowercase, alphanumeric only) manual_key=$(echo "$name" | tr '[:upper:]' '[:lower:]' | tr -cd 'a-z0-9') fi # Get commit count commits=$(git log --author="$name" --pretty=oneline | wc -l | tr -d ' ') # Get lines added and deleted stats=$(git log --author="$name" --pretty=tformat: --numstat | awk '{ add += $1; del += $2 } END { printf "%d %d", add, del }') added=$(echo "$stats" | cut -d' ' -f1) deleted=$(echo "$stats" | cut -d' ' -f2) # Calculate a "contribution score" (commits + added + deleted) score=$((commits + added + deleted)) # Output to the temporary file with normalized name for later combining echo "$manual_key|$score|$commits|$added|$deleted|$name|$email" >>"$NORM_FILE" done length(prev_name)) { prev_name = $6 prev_email = $7 } } else { # Output previous combined entry if (prev_key != "") { print total_score, total_commits, total_added, total_deleted, prev_name, prev_email } # Start new entry prev_key = $1 prev_name = $6 prev_email = $7 total_score = $2 total_commits = $3 total_added = $4 total_deleted = $5 } } END { # Output the last entry if (prev_key != "") { print total_score, total_commits, total_added, total_deleted, prev_name, prev_email } }' >"$TEMP_FILE" # Function to extract GitHub username from email extract_github_username() { local email="$1" local username="" # Check if it's a GitHub noreply email if [[ "$email" == *"@users.noreply.github.com"* ]]; then # Extract the username or ID username=$(echo "$email" | sed -E 's/^([0-9]+\+)?([^@]+)@users\.noreply\.github\.com$/\2/') fi echo "$username" } # Function to get GitHub info for a user get_github_info() { local email="$1" local name="$2" local github_username="" local full_name="" local profile_url="" # Check if this user has a manual GitHub username mapping while IFS='|' read -r pattern username; do # Skip comments if [[ "$pattern" == \#* ]]; then continue fi # Check if pattern matches name or email if [[ "$name" == *"$pattern"* || "$email" == *"$pattern"* ]]; then github_username="$username" break fi done <"$GITHUB_USERNAME_MAP" # If no manual mapping, try to extract from email if [ -z "$github_username" ]; then github_username=$(extract_github_username "$email") fi # If we have a GitHub repo and username or email, try to get more info if [ -n "$GITHUB_REPO" ]; then # Try to find the contributor in the GitHub repo contributors list if [ -z "$github_username" ]; then # Search by email if [ -n "$GITHUB_AUTH" ]; then # Use authenticated API call response=$(curl -s -H "$GITHUB_AUTH" "https://api.github.com/search/users?q=$email+in:email") if [ "$(echo "$response" | grep -c "total_count.*[1-9]")" -gt 0 ]; then github_username=$(echo "$response" | grep -o '"login": *"[^"]*"' | head -1 | sed 's/"login": *"\([^"]*\)"/\1/') fi fi # If still no username, try to search by name if [ -z "$github_username" ]; then if [ -n "$GITHUB_AUTH" ]; then response=$(curl -s -H "$GITHUB_AUTH" "https://api.github.com/search/users?q=$name+in:name") if [ "$(echo "$response" | grep -c "total_count.*[1-9]")" -gt 0 ]; then github_username=$(echo "$response" | grep -o '"login": *"[^"]*"' | head -1 | sed 's/"login": *"\([^"]*\)"/\1/') fi fi fi fi # If we found a GitHub username, get their profile info if [ -n "$github_username" ]; then profile_url="https://github.com/$github_username" # Get full name from GitHub profile if we have auth if [ -n "$GITHUB_AUTH" ]; then user_info=$(curl -s -H "$GITHUB_AUTH" "https://api.github.com/users/$github_username") full_name=$(echo "$user_info" | grep -o '"name": *"[^"]*"' | sed 's/"name": *"\([^"]*\)"/\1/') # If no full name, use the username if [ -z "$full_name" ] || [ "$full_name" = "null" ]; then full_name="$github_username" fi else full_name="$github_username" fi fi fi # If we couldn't get GitHub info, use the name from git if [ -z "$full_name" ]; then full_name="$name" fi # Return the info echo "$full_name|$github_username|$profile_url" } # Get GitHub info for each contributor echo "Fetching GitHub profile information..." if [ -n "$GITHUB_REPO" ]; then sort -t'|' -k1,1nr "$TEMP_FILE" | while IFS='|' read -r score commits added deleted name email; do # Skip entries with zero score if [[ $score -eq 0 ]]; then continue fi # Get GitHub info github_info=$(get_github_info "$email" "$name") IFS='|' read -r full_name github_username profile_url <<<"$github_info" # Output to GitHub info file echo "$score|$commits|$added|$deleted|$name|$email|$full_name|$github_username|$profile_url" >>"$GITHUB_INFO" done # Display the results with GitHub info echo "Name GitHub Username Profile Commits Added Deleted Score" echo "--------------------------------------------------------------------------------------------------------------" cat "$GITHUB_INFO" | while IFS='|' read -r score commits added deleted name email full_name github_username profile_url; do # Use full name if available, otherwise use git name display_name="$full_name" if [ -z "$display_name" ]; then display_name="$name" fi # Use username if available, otherwise blank display_username="$github_username" if [ -z "$display_username" ]; then display_username="-" fi # Use profile URL if available, otherwise blank display_url="$profile_url" if [ -z "$display_url" ]; then display_url="-" fi printf "%-30s %-30s %-30s %-10s %-10s %-10s %-10s\n" "$display_name" "$display_username" "$display_url" "$commits" "$added" "$deleted" "$score" done else # Display the results without GitHub info echo "Name Email Commits Added Deleted Score" echo "--------------------------------------------------------------------------------------------" sort -t'|' -k1,1nr "$TEMP_FILE" | while IFS='|' read -r score commits added deleted name email; do # Skip entries with zero score if [[ $score -eq 0 ]]; then continue fi printf "%-30s %-30s %-10s %-10s %-10s %-10s\n" "$name" "$email" "$commits" "$added" "$deleted" "$score" done fi # Clean up temporary files rm -f /tmp/contributors.txt "$TEMP_FILE" "$NORM_FILE" "$MANUAL_MAP" "$GITHUB_INFO" "$GITHUB_USERNAME_MAP" echo "------------------------------------------------------" echo "Note: 'Score' is a simple metric calculated as (commits + lines added + lines deleted)" echo "This is a basic quantification and doesn't account for the complexity or impact of contributions." echo "Contributors with multiple email addresses have been combined where possible."