#!/usr/bin/env python3 """ Git Time Analyzer - Enhanced Version Based on original work by MiKatre (https://gist.github.com/MiKatre/0d6bdd4664cef1ed5d6d67ba531821b8) Features: - Commit size analysis - Working hours consideration (including overnight shifts) - Smart time allocation - Data visualization - Commit message analysis - Automated commit detection - Merge commit filtering """ import subprocess import re from datetime import datetime, timedelta import matplotlib.pyplot as plt from collections import defaultdict import numpy as np class Config: """Configuration parameters for time estimation algorithm.""" # Time thresholds MAX_COMMIT_DIFF_MINUTES = 2 * 60 ISOLATED_COMMIT_MINUTES = 60 MIN_COMMIT_MINUTES = 15 MAX_COMMIT_MINUTES = 4 * 60 # Working hours (can handle overnight shifts) WORK_START_HOUR = 13 # 1 PM WORK_END_HOUR = 5 # 3 AM next day OVERNIGHT_SHIFT = True # Flag to indicate if work hours cross midnight # Commit size thresholds SMALL_COMMIT_CHANGES = 50 MEDIUM_COMMIT_CHANGES = 200 LARGE_COMMIT_CHANGES = 500 # Commit message patterns AUTOMATED_PATTERNS = [ r'automated', r'auto-generated', r'bot:', r'dependabot', r'^\[automated\]', r'ci:', r'chore\(deps\)', r'build\(deps\)', r'yarn upgrade', r'npm update', r'package-lock.json', r'poetry.lock', r'bump version', ] # Merge patterns MERGE_PATTERNS = [ r'^Merge branch', r'^Merge pull request', r'^Merge remote-tracking', r'^\[maven-release-plugin\]', r'^Automatic merge', r'^Auto-merge', ] # Time multipliers for different commit types TIME_MULTIPLIERS = { 'feature': 1.0, 'fix': 0.8, 'refactor': 1.2, 'docs': 0.5, 'style': 0.3, 'test': 0.7, 'chore': 0.4, 'automated': 0.0, 'merge': 0.1, } class CommitAnalyzer: """Analyzes commit messages and metadata to determine commit type and importance.""" def __init__(self): """Initialize regex patterns for commit analysis.""" self.automated_patterns = [re.compile(pattern, re.IGNORECASE) for pattern in Config.AUTOMATED_PATTERNS] self.merge_patterns = [re.compile(pattern, re.IGNORECASE) for pattern in Config.MERGE_PATTERNS] def get_commit_type(self, message, commit_hash): """Determine the type of commit based on its message and metadata.""" # Check for automated commits if any(pattern.search(message) for pattern in self.automated_patterns): return 'automated', Config.TIME_MULTIPLIERS['automated'] # Check for merge commits if any(pattern.search(message) for pattern in self.merge_patterns): return 'merge', Config.TIME_MULTIPLIERS['merge'] # Conventional commits analysis conventional_match = re.match(r'^(feat|fix|docs|style|refactor|test|chore)(\(.*\))?: ', message) if conventional_match: commit_type = conventional_match.group(1) if commit_type == 'feat': return 'feature', Config.TIME_MULTIPLIERS['feature'] return commit_type, Config.TIME_MULTIPLIERS.get(commit_type, 1.0) # Analyze message content for keywords message_lower = message.lower() if 'fix' in message_lower or 'bug' in message_lower: return 'fix', Config.TIME_MULTIPLIERS['fix'] if 'refactor' in message_lower: return 'refactor', Config.TIME_MULTIPLIERS['refactor'] if 'test' in message_lower: return 'test', Config.TIME_MULTIPLIERS['test'] if 'doc' in message_lower: return 'docs', Config.TIME_MULTIPLIERS['docs'] # Default to feature type return 'feature', Config.TIME_MULTIPLIERS['feature'] def get_commit_message(self, commit_hash): """Get the full commit message for a given hash.""" try: return execute_git_command(f"git log --format=%B -n 1 {commit_hash}").strip() except: return "" def execute_git_command(command): """Execute a Git command and return its output.""" output = subprocess.check_output(command, shell=True, universal_newlines=True) return output def get_commit_size(commit_hash): """Calculate the size of a commit by counting lines changed.""" try: diff_stats = execute_git_command(f"git show --shortstat {commit_hash}") insertions = deletions = 0 if "insertions" in diff_stats: insertions = int(re.search(r"(\d+) insertion", diff_stats).group(1)) if "deletions" in diff_stats: deletions = int(re.search(r"(\d+) deletion", diff_stats).group(1)) return insertions + deletions except: return 0 def is_working_hours(dt): """Check if a given datetime falls within defined working hours.""" hour = dt.hour if Config.OVERNIGHT_SHIFT: if Config.WORK_START_HOUR > Config.WORK_END_HOUR: # Handle overnight shift (e.g., 13:00 to 03:00 next day) return hour >= Config.WORK_START_HOUR or hour <= Config.WORK_END_HOUR else: # Handle same-day shift return Config.WORK_START_HOUR <= hour < Config.WORK_END_HOUR else: # Original behavior for regular day shifts return Config.WORK_START_HOUR <= hour < Config.WORK_END_HOUR def estimate_commit_time(commit_size, commit_type_multiplier): """Estimate time spent on a commit based on its size and type.""" base_time = 0 if commit_size <= Config.SMALL_COMMIT_CHANGES: base_time = max(Config.MIN_COMMIT_MINUTES, commit_size * 0.5) elif commit_size <= Config.MEDIUM_COMMIT_CHANGES: base_time = commit_size * 0.4 elif commit_size <= Config.LARGE_COMMIT_CHANGES: base_time = commit_size * 0.3 else: base_time = min(Config.MAX_COMMIT_MINUTES, commit_size * 0.2) return base_time * commit_type_multiplier def estimate_hours(dates, commit_hashes): """Estimate working hours based on commit dates, sizes, and types.""" if len(dates) < 2: return 0, [], {} analyzer = CommitAnalyzer() hours = 0 commit_times = [] commit_stats = defaultdict(int) for i in range(len(dates) - 1): current_date = dates[i] next_date = dates[i + 1] # Handle overnight time difference calculation diff_minutes = (next_date - current_date).total_seconds() / 60 if Config.OVERNIGHT_SHIFT: # If commits are on consecutive days during work hours, adjust the time difference if (is_working_hours(current_date) and is_working_hours(next_date) and next_date.date() == current_date.date() + timedelta(days=1)): # Calculate time until end of day plus time from start of next day minutes_until_midnight = (24 - current_date.hour) * 60 - current_date.minute minutes_after_midnight = next_date.hour * 60 + next_date.minute diff_minutes = min(minutes_until_midnight + minutes_after_midnight, Config.MAX_COMMIT_DIFF_MINUTES) commit_message = analyzer.get_commit_message(commit_hashes[i]) commit_type, type_multiplier = analyzer.get_commit_type(commit_message, commit_hashes[i]) commit_stats[commit_type] += 1 commit_size = get_commit_size(commit_hashes[i]) base_time = estimate_commit_time(commit_size, type_multiplier) if diff_minutes < Config.MAX_COMMIT_DIFF_MINUTES and is_working_hours(current_date): time_to_add = min(diff_minutes, Config.MAX_COMMIT_MINUTES) time_to_add = max(base_time, time_to_add) else: time_to_add = max(base_time, Config.ISOLATED_COMMIT_MINUTES) time_to_add *= type_multiplier hours += time_to_add / 60 commit_times.append((current_date, time_to_add / 60, commit_type)) # Handle last commit last_message = analyzer.get_commit_message(commit_hashes[-1]) last_type, last_multiplier = analyzer.get_commit_type(last_message, commit_hashes[-1]) commit_stats[last_type] += 1 last_commit_size = get_commit_size(commit_hashes[-1]) last_commit_time = max(estimate_commit_time(last_commit_size, last_multiplier), Config.ISOLATED_COMMIT_MINUTES) / 60 hours += last_commit_time commit_times.append((dates[-1], last_commit_time, last_type)) return round(hours, 1), commit_times, dict(commit_stats) def create_visualization(author_commit_times): """Create visualizations of work patterns and commit types.""" plt.figure(figsize=(15, 12)) # Weekly pattern plt.subplot(3, 1, 1) weekly_hours = defaultdict(float) for author, commit_data in author_commit_times.items(): for date, hours, _ in commit_data: week_num = date.isocalendar()[1] weekly_hours[week_num] += hours weeks = sorted(weekly_hours.keys()) hours = [weekly_hours[w] for w in weeks] plt.bar(weeks, hours, alpha=0.7) plt.title('Weekly Work Pattern') plt.xlabel('Week Number') plt.ylabel('Hours') # Daily distribution plt.subplot(3, 1, 2) hours_by_hour = defaultdict(float) for author, commit_data in author_commit_times.items(): for date, hours, _ in commit_data: hours_by_hour[date.hour] += hours hours_range = range(24) hourly_dist = [hours_by_hour[h] for h in hours_range] # Reorder hours to show work hours in the middle for overnight shifts if Config.OVERNIGHT_SHIFT: start_idx = Config.WORK_START_HOUR reordered_hours = list(range(start_idx, 24)) + list(range(0, start_idx)) reordered_dist = [hourly_dist[h] for h in reordered_hours] plt.bar(range(24), reordered_dist, alpha=0.7) plt.xticks(range(24), [f"{h:02d}:00" for h in reordered_hours]) else: plt.bar(hours_range, hourly_dist, alpha=0.7) plt.xticks(hours_range, [f"{h:02d}:00" for h in hours_range]) plt.title('Daily Work Distribution') plt.xlabel('Hour of Day') plt.ylabel('Total Hours') # Commit types distribution plt.subplot(3, 1, 3) commit_types = defaultdict(float) for author, commit_data in author_commit_times.items(): for _, hours, commit_type in commit_data: commit_types[commit_type] += hours types = list(commit_types.keys()) type_hours = [commit_types[t] for t in types] plt.bar(types, type_hours, alpha=0.7) plt.title('Time Distribution by Commit Type') plt.xlabel('Commit Type') plt.ylabel('Hours') plt.xticks(rotation=45) plt.tight_layout() plt.savefig('work_patterns.png') plt.close() def main(): """Main function to run the Git time analysis.""" print("Analyzing Git repository...") commit_logs = execute_git_command("git log --format='%H|%an|%ad' --date=iso") commit_dates = defaultdict(list) commit_hashes = defaultdict(list) for line in commit_logs.strip().split('\n'): commit_hash, author, date_str = line.split('|') commit_date = datetime.fromisoformat(date_str.strip()) commit_dates[author].append(commit_date) commit_hashes[author].append(commit_hash) hours_worked = {} author_commit_times = {} author_commit_stats = {} for author in commit_dates: print(f"\nAnalyzing commits for {author}...") hours, commit_times, commit_stats = estimate_hours( commit_dates[author], commit_hashes[author] ) hours_worked[author] = hours author_commit_times[author] = commit_times author_commit_stats[author] = commit_stats print("\nEstimated Hours Worked:") print("-" * 40) for author, hours in hours_worked.items(): print(f"\n{author}: {hours} hours") print("Commit breakdown:") for commit_type, count in author_commit_stats[author].items(): print(f" {commit_type}: {count} commits") print("\nGenerating visualization...") create_visualization(author_commit_times) print("Visualization saved as 'work_patterns.png' in the current directory") if __name__ == "__main__": main()