Skip to content

Instantly share code, notes, and snippets.

@harshadsatra
Created March 5, 2025 08:00
Show Gist options
  • Select an option

  • Save harshadsatra/e5a717654fe1f3e19dfa5b5bf6c56b44 to your computer and use it in GitHub Desktop.

Select an option

Save harshadsatra/e5a717654fe1f3e19dfa5b5bf6c56b44 to your computer and use it in GitHub Desktop.

Revisions

  1. harshadsatra created this gist Mar 5, 2025.
    388 changes: 388 additions & 0 deletions git_history_to_csv.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,388 @@
    #!/bin/bash

    #####################################################################
    # Git History to CSV - Enhanced Branch Detection
    #####################################################################
    #
    # DESCRIPTION:
    # This script generates a CSV file containing the history of git commits
    # with comprehensive branch information. It tracks which commits belong to
    # which branches by using multiple detection methods, ensuring accurate
    # branch attribution even for commits without direct references.
    #
    # FEATURES:
    # - Enhanced branch detection (uses both direct refs and branch ancestry)
    # - Supports all branches or specific branch filtering
    # - Date range filtering capability
    # - Multiple sorting options
    # - Detailed branch statistics
    # - Debug mode for troubleshooting
    # - Handles complex commit messages with commas and quotes
    #
    # USAGE:
    # ./git_history_to_csv.sh [options]
    #
    # OPTIONS:
    # -o FILE Specify output file (default: git_history.csv)
    # -a Include all branches (default)
    # -c Current branch only
    # -b BRANCHES Specific branches only (comma-separated list)
    # -s DATE Include commits since DATE (format: YYYY-MM-DD)
    # -u DATE Include commits until DATE (format: YYYY-MM-DD)
    # -d ORDER Sort order: 'default', 'author-date', or 'committer-date'
    # -v Enable debug output
    # -h Display help message
    #
    # EXAMPLES:
    # # Put thiss script in your root project directory
    # # Generate CSV with all branches (default)
    # ./git_history_to_csv.sh
    #
    # # Specify output file
    # ./git_history_to_csv.sh -o project_history.csv
    #
    # # Only include commits from specific branches
    # ./git_history_to_csv.sh -b main,develop
    #
    # # Include commits from the last 30 days
    # ./git_history_to_csv.sh -s "$(date -d '30 days ago' +%Y-%m-%d)"
    #
    # # Debug mode with date range and specific output file
    # ./git_history_to_csv.sh -v -s 2023-01-01 -u 2023-12-31 -o year_2023.csv
    #
    #####################################################################

    # Check if git is installed
    if ! command -v git &> /dev/null; then
    echo "Error: git is not installed or not in the PATH" >&2
    exit 1
    fi

    # Check if we're in a git repository
    if ! git rev-parse --is-inside-work-tree &> /dev/null; then
    echo "Error: Not a git repository" >&2
    exit 1
    fi

    # Default output file name
    OUTPUT_FILE="git_history.csv"

    # Default values for options
    ALL_BRANCHES=true
    SPECIFIC_BRANCHES=""
    SINCE_DATE=""
    UNTIL_DATE=""
    SORT_ORDER="default" # Can be 'default', 'author-date', 'committer-date'
    DEBUG=false

    # Function to display usage information
    show_usage() {
    echo "Usage: $0 [options]"
    echo "Options:"
    echo " -o FILE Specify output file (default: git_history.csv)"
    echo " -a Include all branches (default)"
    echo " -c Current branch only"
    echo " -b BRANCHES Specific branches only (comma-separated list)"
    echo " -s DATE Include commits since DATE (format: YYYY-MM-DD)"
    echo " -u DATE Include commits until DATE (format: YYYY-MM-DD)"
    echo " -d ORDER Sort order: 'default', 'author-date', or 'committer-date'"
    echo " -v Enable debug output"
    echo " -h Display this help message"
    exit 1
    }

    # Parse command line arguments
    while getopts ":o:acb:s:u:d:vh" opt; do
    case $opt in
    o)
    OUTPUT_FILE="$OPTARG"
    ;;
    a)
    ALL_BRANCHES=true
    SPECIFIC_BRANCHES=""
    ;;
    c)
    ALL_BRANCHES=false
    SPECIFIC_BRANCHES=""
    ;;
    b)
    ALL_BRANCHES=false
    SPECIFIC_BRANCHES="$OPTARG"
    ;;
    s)
    SINCE_DATE="$OPTARG"
    ;;
    u)
    UNTIL_DATE="$OPTARG"
    ;;
    d)
    SORT_ORDER="$OPTARG"
    ;;
    v)
    DEBUG=true
    ;;
    h)
    show_usage
    ;;
    \?)
    echo "Invalid option: -$OPTARG" >&2
    show_usage
    ;;
    :)
    echo "Option -$OPTARG requires an argument." >&2
    show_usage
    ;;
    esac
    done

    # Debug function
    debug() {
    if [ "$DEBUG" = true ]; then
    echo "[DEBUG] $*" >&2
    fi
    }

    # Function to get branch information from a commit hash
    get_branches_for_commit() {
    local commit_hash="$1"
    local branches=""
    local local_branches=""
    local remote_branches=""

    debug "Getting branches for commit $commit_hash"

    # Get all branches containing this commit
    while IFS= read -r branch; do
    # Skip empty lines
    if [ -z "$branch" ]; then
    continue
    fi

    # Trim leading whitespace
    branch="${branch## }"

    # Skip the current/HEAD marker (* prefix)
    branch="${branch#* }"

    debug " Found branch: $branch"

    # Separate local and remote branches
    if [[ "$branch" == *"/"* ]] && [[ "$branch" != "HEAD" ]]; then
    remote_branches="$remote_branches,$branch"
    else
    local_branches="$local_branches,$branch"
    fi
    done < <(git branch --all --contains "$commit_hash" 2>/dev/null)

    # Remove leading comma
    local_branches="${local_branches#,}"
    remote_branches="${remote_branches#,}"

    debug " Local branches: $local_branches"
    debug " Remote branches: $remote_branches"

    # Prefer local branches over remote branches
    if [ -n "$local_branches" ]; then
    branches="$local_branches"
    elif [ -n "$remote_branches" ]; then
    branches="$remote_branches"
    fi

    echo "$branches"
    }

    # Function to extract branch from reference
    extract_branch_from_refs() {
    local refs="$1"
    local branch="(none)"

    debug "Extracting branch from refs: $refs"

    # Extract branch name from refs using string operations instead of regex
    if [ -n "$refs" ]; then
    # Try to find "HEAD -> branch" pattern
    if [[ "$refs" == *"HEAD -> "* ]]; then
    # Extract the branch name after "HEAD -> "
    head_branch="${refs#*HEAD -> }"
    # Take everything up to the next comma or end of string
    branch="${head_branch%%,*}"
    branch="${branch%% *}" # Remove any trailing space
    debug " Found HEAD -> branch: $branch"
    # Try to find "tag: tagname" pattern
    elif [[ "$refs" == *"tag: "* ]]; then
    # Extract the tag name after "tag: "
    tag_name="${refs#*tag: }"
    # Take everything up to the next comma or end of string
    tag_name="${tag_name%%,*}"
    tag_name="${tag_name%% *}" # Remove any trailing space
    branch="tag: $tag_name"
    debug " Found tag: $branch"
    # Check for individual branch names
    elif [[ "$refs" == *","* ]]; then
    # Split by comma and look for local branches first
    local found_local=false
    IFS=',' read -ra REFS <<< "$refs"
    for ref in "${REFS[@]}"; do
    ref="${ref## }" # Remove leading spaces
    # Skip remotes if we find a local branch
    if [[ "$ref" != *"/"* ]] && [[ "$ref" != "HEAD" ]] && [[ "$ref" != *"tag:"* ]]; then
    branch="$ref"
    found_local=true
    debug " Found local branch in list: $branch"
    break
    fi
    done

    # If no local branch found, use the first remote branch
    if [ "$found_local" = false ]; then
    for ref in "${REFS[@]}"; do
    ref="${ref## }" # Remove leading spaces
    if [[ "$ref" != "HEAD" ]] && [[ "$ref" != *"tag:"* ]]; then
    branch="$ref"
    debug " Using remote branch: $branch"
    break
    fi
    done
    fi
    # Take the single ref as fallback
    else
    branch="${refs## }" # Remove leading space
    branch="${branch%% *}" # Remove anything after a space
    debug " Using single ref: $branch"
    fi
    else
    debug " No refs found in git log output"
    fi

    echo "$branch"
    }

    # Construct the git command with the specified options
    GIT_CMD="git log"

    # Handle branch selection
    if [ "$ALL_BRANCHES" = true ]; then
    GIT_CMD="$GIT_CMD --all"
    elif [ -n "$SPECIFIC_BRANCHES" ]; then
    # Convert comma-separated list to space-separated for git
    BRANCH_LIST=$(echo "$SPECIFIC_BRANCHES" | sed 's/,/ /g')
    GIT_CMD="$GIT_CMD $BRANCH_LIST"
    fi

    # Add date range filters if specified
    if [ -n "$SINCE_DATE" ]; then
    GIT_CMD="$GIT_CMD --since=$SINCE_DATE"
    fi

    if [ -n "$UNTIL_DATE" ]; then
    GIT_CMD="$GIT_CMD --until=$UNTIL_DATE"
    fi

    # Add sorting options
    case "$SORT_ORDER" in
    "author-date")
    GIT_CMD="$GIT_CMD --author-date-order"
    ;;
    "committer-date")
    GIT_CMD="$GIT_CMD --date-order"
    ;;
    *)
    # Default sort order, no additional flag needed
    ;;
    esac

    # Add headers to the CSV file
    echo "Date,Author,Branch,Message,CommitHash" > "$OUTPUT_FILE"

    # Display the options being used
    echo "Generating CSV with the following options:"
    echo "Output file: $OUTPUT_FILE"
    if [ "$ALL_BRANCHES" = true ]; then
    echo "Branches: All branches"
    elif [ -n "$SPECIFIC_BRANCHES" ]; then
    echo "Branches: $SPECIFIC_BRANCHES"
    else
    echo "Branches: Current branch only"
    fi
    [ -n "$SINCE_DATE" ] && echo "Since date: $SINCE_DATE"
    [ -n "$UNTIL_DATE" ] && echo "Until date: $UNTIL_DATE"
    echo "Sort order: $SORT_ORDER"
    if [ "$DEBUG" = true ]; then
    echo "Debug mode: Enabled"
    fi
    echo "Processing..."

    # Get git log and format it as CSV, now including the commit hash (%H)
    $GIT_CMD --pretty=format:'%ad,%an,%D,\"%s\",%H' --date=short |
    while IFS= read -r line; do
    # Parse the line properly, handling quoted message field
    date=$(echo "$line" | cut -d, -f1)
    author=$(echo "$line" | cut -d, -f2)
    refs=$(echo "$line" | cut -d, -f3)

    # Extract the commit hash (last field)
    commit_hash=$(echo "$line" | awk -F, '{print $NF}')

    # Get the message (everything between the 4th field and the last field)
    message_with_commas=$(echo "$line" | awk -F, '{
    # Remove the first three fields and the last field
    for(i=4; i<NF; i++) {
    printf "%s", $i
    if(i<NF-1) printf ","
    }
    print ""
    }')

    debug "Processing commit $commit_hash from $date by $author"

    # Extract branch from refs first
    branch=$(extract_branch_from_refs "$refs")

    # If no branch found from refs (%D) or branch is "(none)", try fallback method
    if [ "$branch" = "(none)" ] || [ -z "$branch" ]; then
    debug "No branch info from refs, trying git branch --contains fallback"
    branches_from_git=$(get_branches_for_commit "$commit_hash")

    if [ -n "$branches_from_git" ]; then
    branch="$branches_from_git"
    debug "Found branches using fallback: $branch"
    else
    debug "Fallback also returned no branches"
    fi
    fi

    # Print the processed line, escaping any commas in the message field
    # Replace commas inside message with escaped commas
    message_escaped="${message_with_commas//,/\\,}"

    debug "Final branch assignment: $branch"
    echo "$date,$author,$branch,$message_escaped,$commit_hash"
    done >> "$OUTPUT_FILE"

    # Check if the command was successful
    if [ $? -eq 0 ]; then
    echo "Git history successfully exported to $OUTPUT_FILE"
    else
    echo "Error: Failed to export git history" >&2
    exit 1
    fi

    # Count the number of commits exported
    COMMIT_COUNT=$(wc -l < "$OUTPUT_FILE")
    COMMIT_COUNT=$((COMMIT_COUNT - 1)) # Subtract 1 for the header line
    echo "Total commits exported: $COMMIT_COUNT"

    # List branch statistics
    if [ "$ALL_BRANCHES" = true ] || [ -n "$SPECIFIC_BRANCHES" ]; then
    echo ""
    echo "Commit count by branch:"
    tail -n +2 "$OUTPUT_FILE" | cut -d, -f3 | sort | uniq -c | sort -nr |
    while read -r count branch; do
    branch="${branch#\"}" # Remove leading quote if present
    branch="${branch%\"}" # Remove trailing quote if present
    printf " %-20s %s\n" "$branch:" "$count commits"
    done
    fi

    exit 0