Created
March 5, 2025 08:00
-
-
Save harshadsatra/e5a717654fe1f3e19dfa5b5bf6c56b44 to your computer and use it in GitHub Desktop.
Revisions
-
harshadsatra created this gist
Mar 5, 2025 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,388 @@ #!/bin/bash ##################################################################### # Git History to CSV - Enhanced Branch Detection ##################################################################### # # DESCRIPTION: # This script generates a CSV file containing the history of git commits # with comprehensive branch information. It tracks which commits belong to # which branches by using multiple detection methods, ensuring accurate # branch attribution even for commits without direct references. # # FEATURES: # - Enhanced branch detection (uses both direct refs and branch ancestry) # - Supports all branches or specific branch filtering # - Date range filtering capability # - Multiple sorting options # - Detailed branch statistics # - Debug mode for troubleshooting # - Handles complex commit messages with commas and quotes # # USAGE: # ./git_history_to_csv.sh [options] # # OPTIONS: # -o FILE Specify output file (default: git_history.csv) # -a Include all branches (default) # -c Current branch only # -b BRANCHES Specific branches only (comma-separated list) # -s DATE Include commits since DATE (format: YYYY-MM-DD) # -u DATE Include commits until DATE (format: YYYY-MM-DD) # -d ORDER Sort order: 'default', 'author-date', or 'committer-date' # -v Enable debug output # -h Display help message # # EXAMPLES: # # Put thiss script in your root project directory # # Generate CSV with all branches (default) # ./git_history_to_csv.sh # # # Specify output file # ./git_history_to_csv.sh -o project_history.csv # # # Only include commits from specific branches # ./git_history_to_csv.sh -b main,develop # # # Include commits from the last 30 days # ./git_history_to_csv.sh -s "$(date -d '30 days ago' +%Y-%m-%d)" # # # Debug mode with date range and specific output file # ./git_history_to_csv.sh -v -s 2023-01-01 -u 2023-12-31 -o year_2023.csv # ##################################################################### # Check if git is installed if ! command -v git &> /dev/null; then echo "Error: git is not installed or not in the PATH" >&2 exit 1 fi # Check if we're in a git repository if ! git rev-parse --is-inside-work-tree &> /dev/null; then echo "Error: Not a git repository" >&2 exit 1 fi # Default output file name OUTPUT_FILE="git_history.csv" # Default values for options ALL_BRANCHES=true SPECIFIC_BRANCHES="" SINCE_DATE="" UNTIL_DATE="" SORT_ORDER="default" # Can be 'default', 'author-date', 'committer-date' DEBUG=false # Function to display usage information show_usage() { echo "Usage: $0 [options]" echo "Options:" echo " -o FILE Specify output file (default: git_history.csv)" echo " -a Include all branches (default)" echo " -c Current branch only" echo " -b BRANCHES Specific branches only (comma-separated list)" echo " -s DATE Include commits since DATE (format: YYYY-MM-DD)" echo " -u DATE Include commits until DATE (format: YYYY-MM-DD)" echo " -d ORDER Sort order: 'default', 'author-date', or 'committer-date'" echo " -v Enable debug output" echo " -h Display this help message" exit 1 } # Parse command line arguments while getopts ":o:acb:s:u:d:vh" opt; do case $opt in o) OUTPUT_FILE="$OPTARG" ;; a) ALL_BRANCHES=true SPECIFIC_BRANCHES="" ;; c) ALL_BRANCHES=false SPECIFIC_BRANCHES="" ;; b) ALL_BRANCHES=false SPECIFIC_BRANCHES="$OPTARG" ;; s) SINCE_DATE="$OPTARG" ;; u) UNTIL_DATE="$OPTARG" ;; d) SORT_ORDER="$OPTARG" ;; v) DEBUG=true ;; h) show_usage ;; \?) echo "Invalid option: -$OPTARG" >&2 show_usage ;; :) echo "Option -$OPTARG requires an argument." >&2 show_usage ;; esac done # Debug function debug() { if [ "$DEBUG" = true ]; then echo "[DEBUG] $*" >&2 fi } # Function to get branch information from a commit hash get_branches_for_commit() { local commit_hash="$1" local branches="" local local_branches="" local remote_branches="" debug "Getting branches for commit $commit_hash" # Get all branches containing this commit while IFS= read -r branch; do # Skip empty lines if [ -z "$branch" ]; then continue fi # Trim leading whitespace branch="${branch## }" # Skip the current/HEAD marker (* prefix) branch="${branch#* }" debug " Found branch: $branch" # Separate local and remote branches if [[ "$branch" == *"/"* ]] && [[ "$branch" != "HEAD" ]]; then remote_branches="$remote_branches,$branch" else local_branches="$local_branches,$branch" fi done < <(git branch --all --contains "$commit_hash" 2>/dev/null) # Remove leading comma local_branches="${local_branches#,}" remote_branches="${remote_branches#,}" debug " Local branches: $local_branches" debug " Remote branches: $remote_branches" # Prefer local branches over remote branches if [ -n "$local_branches" ]; then branches="$local_branches" elif [ -n "$remote_branches" ]; then branches="$remote_branches" fi echo "$branches" } # Function to extract branch from reference extract_branch_from_refs() { local refs="$1" local branch="(none)" debug "Extracting branch from refs: $refs" # Extract branch name from refs using string operations instead of regex if [ -n "$refs" ]; then # Try to find "HEAD -> branch" pattern if [[ "$refs" == *"HEAD -> "* ]]; then # Extract the branch name after "HEAD -> " head_branch="${refs#*HEAD -> }" # Take everything up to the next comma or end of string branch="${head_branch%%,*}" branch="${branch%% *}" # Remove any trailing space debug " Found HEAD -> branch: $branch" # Try to find "tag: tagname" pattern elif [[ "$refs" == *"tag: "* ]]; then # Extract the tag name after "tag: " tag_name="${refs#*tag: }" # Take everything up to the next comma or end of string tag_name="${tag_name%%,*}" tag_name="${tag_name%% *}" # Remove any trailing space branch="tag: $tag_name" debug " Found tag: $branch" # Check for individual branch names elif [[ "$refs" == *","* ]]; then # Split by comma and look for local branches first local found_local=false IFS=',' read -ra REFS <<< "$refs" for ref in "${REFS[@]}"; do ref="${ref## }" # Remove leading spaces # Skip remotes if we find a local branch if [[ "$ref" != *"/"* ]] && [[ "$ref" != "HEAD" ]] && [[ "$ref" != *"tag:"* ]]; then branch="$ref" found_local=true debug " Found local branch in list: $branch" break fi done # If no local branch found, use the first remote branch if [ "$found_local" = false ]; then for ref in "${REFS[@]}"; do ref="${ref## }" # Remove leading spaces if [[ "$ref" != "HEAD" ]] && [[ "$ref" != *"tag:"* ]]; then branch="$ref" debug " Using remote branch: $branch" break fi done fi # Take the single ref as fallback else branch="${refs## }" # Remove leading space branch="${branch%% *}" # Remove anything after a space debug " Using single ref: $branch" fi else debug " No refs found in git log output" fi echo "$branch" } # Construct the git command with the specified options GIT_CMD="git log" # Handle branch selection if [ "$ALL_BRANCHES" = true ]; then GIT_CMD="$GIT_CMD --all" elif [ -n "$SPECIFIC_BRANCHES" ]; then # Convert comma-separated list to space-separated for git BRANCH_LIST=$(echo "$SPECIFIC_BRANCHES" | sed 's/,/ /g') GIT_CMD="$GIT_CMD $BRANCH_LIST" fi # Add date range filters if specified if [ -n "$SINCE_DATE" ]; then GIT_CMD="$GIT_CMD --since=$SINCE_DATE" fi if [ -n "$UNTIL_DATE" ]; then GIT_CMD="$GIT_CMD --until=$UNTIL_DATE" fi # Add sorting options case "$SORT_ORDER" in "author-date") GIT_CMD="$GIT_CMD --author-date-order" ;; "committer-date") GIT_CMD="$GIT_CMD --date-order" ;; *) # Default sort order, no additional flag needed ;; esac # Add headers to the CSV file echo "Date,Author,Branch,Message,CommitHash" > "$OUTPUT_FILE" # Display the options being used echo "Generating CSV with the following options:" echo "Output file: $OUTPUT_FILE" if [ "$ALL_BRANCHES" = true ]; then echo "Branches: All branches" elif [ -n "$SPECIFIC_BRANCHES" ]; then echo "Branches: $SPECIFIC_BRANCHES" else echo "Branches: Current branch only" fi [ -n "$SINCE_DATE" ] && echo "Since date: $SINCE_DATE" [ -n "$UNTIL_DATE" ] && echo "Until date: $UNTIL_DATE" echo "Sort order: $SORT_ORDER" if [ "$DEBUG" = true ]; then echo "Debug mode: Enabled" fi echo "Processing..." # Get git log and format it as CSV, now including the commit hash (%H) $GIT_CMD --pretty=format:'%ad,%an,%D,\"%s\",%H' --date=short | while IFS= read -r line; do # Parse the line properly, handling quoted message field date=$(echo "$line" | cut -d, -f1) author=$(echo "$line" | cut -d, -f2) refs=$(echo "$line" | cut -d, -f3) # Extract the commit hash (last field) commit_hash=$(echo "$line" | awk -F, '{print $NF}') # Get the message (everything between the 4th field and the last field) message_with_commas=$(echo "$line" | awk -F, '{ # Remove the first three fields and the last field for(i=4; i<NF; i++) { printf "%s", $i if(i<NF-1) printf "," } print "" }') debug "Processing commit $commit_hash from $date by $author" # Extract branch from refs first branch=$(extract_branch_from_refs "$refs") # If no branch found from refs (%D) or branch is "(none)", try fallback method if [ "$branch" = "(none)" ] || [ -z "$branch" ]; then debug "No branch info from refs, trying git branch --contains fallback" branches_from_git=$(get_branches_for_commit "$commit_hash") if [ -n "$branches_from_git" ]; then branch="$branches_from_git" debug "Found branches using fallback: $branch" else debug "Fallback also returned no branches" fi fi # Print the processed line, escaping any commas in the message field # Replace commas inside message with escaped commas message_escaped="${message_with_commas//,/\\,}" debug "Final branch assignment: $branch" echo "$date,$author,$branch,$message_escaped,$commit_hash" done >> "$OUTPUT_FILE" # Check if the command was successful if [ $? -eq 0 ]; then echo "Git history successfully exported to $OUTPUT_FILE" else echo "Error: Failed to export git history" >&2 exit 1 fi # Count the number of commits exported COMMIT_COUNT=$(wc -l < "$OUTPUT_FILE") COMMIT_COUNT=$((COMMIT_COUNT - 1)) # Subtract 1 for the header line echo "Total commits exported: $COMMIT_COUNT" # List branch statistics if [ "$ALL_BRANCHES" = true ] || [ -n "$SPECIFIC_BRANCHES" ]; then echo "" echo "Commit count by branch:" tail -n +2 "$OUTPUT_FILE" | cut -d, -f3 | sort | uniq -c | sort -nr | while read -r count branch; do branch="${branch#\"}" # Remove leading quote if present branch="${branch%\"}" # Remove trailing quote if present printf " %-20s %s\n" "$branch:" "$count commits" done fi exit 0