Created
September 16, 2025 13:52
-
-
Save gionn/a670facb7beb60f09e630a02373d8c73 to your computer and use it in GitHub Desktop.
A Python script to scan GitHub organizations and list all public repositories of their members - useful for security auditing.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| GitHub Organization Scanner | |
| A script to list all users in a specific GitHub organization | |
| and enumerate their public/open source repositories. | |
| Usage: | |
| python github_org_scanner.py <organization_name> | |
| Set the GH_TOKEN environment variable with your GitHub personal access token. | |
| Example: | |
| export GH_TOKEN=your_github_token_here | |
| python github_org_scanner.py microsoft | |
| """ | |
| import argparse | |
| import json | |
| import os | |
| import sys | |
| from typing import List, Dict, Optional | |
| import requests | |
| from datetime import datetime | |
| class GitHubOrgScanner: | |
| """Scanner for GitHub organizations and user repositories.""" | |
| def __init__(self, token: Optional[str] = None): | |
| """ | |
| Initialize the GitHub scanner. | |
| Args: | |
| token: GitHub personal access token for authentication | |
| """ | |
| self.base_url = "https://api.github.com" | |
| self.session = requests.Session() | |
| if token: | |
| self.session.headers.update({ | |
| "Authorization": f"token {token}", | |
| "Accept": "application/vnd.github.v3+json" | |
| }) | |
| else: | |
| # Without token, rate limiting will be more restrictive | |
| self.session.headers.update({ | |
| "Accept": "application/vnd.github.v3+json" | |
| }) | |
| def get_org_members(self, org_name: str) -> List[Dict]: | |
| """ | |
| Get all public members of a GitHub organization. | |
| Args: | |
| org_name: Name of the GitHub organization | |
| Returns: | |
| List of member dictionaries with user information | |
| """ | |
| members = [] | |
| page = 1 | |
| per_page = 100 | |
| print(f"Fetching members from organization: {org_name}") | |
| while True: | |
| url = f"{self.base_url}/orgs/{org_name}/members" | |
| params = { | |
| "page": page, | |
| "per_page": per_page | |
| } | |
| response = self.session.get(url, params=params) | |
| if response.status_code == 404: | |
| print( | |
| f"Organization '{org_name}' not found or has no public members") | |
| return [] | |
| elif response.status_code != 200: | |
| print( | |
| f"Error fetching org members: {response.status_code} - {response.text}") | |
| return [] | |
| page_members = response.json() | |
| if not page_members: | |
| break | |
| members.extend(page_members) | |
| print(f" Fetched {len(page_members)} members (page {page})") | |
| page += 1 | |
| print(f"Total members found: {len(members)}") | |
| return members | |
| def get_user_repos(self, username: str) -> List[Dict]: | |
| """ | |
| Get all public repositories for a specific user. | |
| Args: | |
| username: GitHub username | |
| Returns: | |
| List of repository dictionaries | |
| """ | |
| repos = [] | |
| page = 1 | |
| per_page = 100 | |
| while True: | |
| url = f"{self.base_url}/users/{username}/repos" | |
| params = { | |
| "page": page, | |
| "per_page": per_page, | |
| "type": "public", | |
| "sort": "updated", | |
| "direction": "desc" | |
| } | |
| response = self.session.get(url, params=params) | |
| if response.status_code != 200: | |
| print( | |
| f" Error fetching repos for {username}: {response.status_code}") | |
| return [] | |
| page_repos = response.json() | |
| if not page_repos: | |
| break | |
| repos.extend(page_repos) | |
| page += 1 | |
| # Break if we have less than per_page repos (last page) | |
| if len(page_repos) < per_page: | |
| break | |
| return repos | |
| def scan_organization(self, org_name: str, user_limit: Optional[int] = None) -> Dict: | |
| """ | |
| Scan an entire organization and collect member and repository data. | |
| Args: | |
| org_name: Name of the GitHub organization | |
| user_limit: Maximum number of users to process (None for all users) | |
| Returns: | |
| Dictionary containing organization scan results | |
| """ | |
| print(f"\nπ Starting scan of GitHub organization: {org_name}") | |
| print("=" * 60) | |
| # Get organization members | |
| members = self.get_org_members(org_name) | |
| if not members: | |
| return { | |
| "organization": org_name, | |
| "scan_timestamp": datetime.now().isoformat(), | |
| "members": [], | |
| "total_members": 0, | |
| "total_repositories": 0 | |
| } | |
| # Apply user limit if specified | |
| if user_limit and user_limit > 0: | |
| original_count = len(members) | |
| members = members[:user_limit] | |
| print( | |
| f"π Limited to first {len(members)} users (out of {original_count} total)") | |
| # Collect repository data for each member | |
| results = { | |
| "organization": org_name, | |
| "scan_timestamp": datetime.now().isoformat(), | |
| "members": [], | |
| "total_members": len(members), | |
| "total_repositories": 0, | |
| "limited_scan": user_limit is not None and user_limit > 0 | |
| } | |
| print(f"\nπ Scanning repositories for {len(members)} members...") | |
| print("-" * 60) | |
| for i, member in enumerate(members, 1): | |
| username = member["login"] | |
| print(f"[{i}/{len(members)}] Scanning {username}...") | |
| repos = self.get_user_repos(username) | |
| member_data = { | |
| "username": username, | |
| "profile_url": member["html_url"], | |
| "avatar_url": member["avatar_url"], | |
| "repositories": [] | |
| } | |
| for repo in repos: | |
| repo_data = { | |
| "name": repo["name"], | |
| "full_name": repo["full_name"], | |
| "description": repo["description"], | |
| "url": repo["html_url"], | |
| "language": repo["language"], | |
| "stars": repo["stargazers_count"], | |
| "forks": repo["forks_count"], | |
| "is_fork": repo["fork"], | |
| "created_at": repo["created_at"], | |
| "updated_at": repo["updated_at"] | |
| } | |
| member_data["repositories"].append(repo_data) | |
| member_data["repository_count"] = len(repos) | |
| results["members"].append(member_data) | |
| results["total_repositories"] += len(repos) | |
| print(f" Found {len(repos)} public repositories") | |
| return results | |
| def print_summary(results: Dict): | |
| """Print a summary of the scan results.""" | |
| print("\n" + "=" * 80) | |
| print("π SCAN SUMMARY") | |
| print("=" * 80) | |
| print(f"Organization: {results['organization']}") | |
| print(f"Scan completed: {results['scan_timestamp']}") | |
| if results.get('limited_scan', False): | |
| print("β οΈ Limited scan: Only processed first subset of users") | |
| print(f"Total members processed: {results['total_members']}") | |
| print(f"Total repositories: {results['total_repositories']}") | |
| if results['members']: | |
| avg_repos = results['total_repositories'] / results['total_members'] | |
| print(f"Average repos per member: {avg_repos:.1f}") | |
| # Find top contributors | |
| top_contributors = sorted( | |
| results['members'], | |
| key=lambda x: x['repository_count'], | |
| reverse=True | |
| )[:5] | |
| print("\nπ Top 5 contributors by repository count:") | |
| for i, member in enumerate(top_contributors, 1): | |
| print( | |
| f" {i}. {member['username']}: {member['repository_count']} repos") | |
| def print_detailed_results(results: Dict): | |
| """Print detailed results for each member.""" | |
| print("\n" + "=" * 80) | |
| print("π DETAILED RESULTS") | |
| print("=" * 80) | |
| for member in results['members']: | |
| print(f"\nπ€ {member['username']}") | |
| print(f" Profile: {member['profile_url']}") | |
| print(f" Repositories: {member['repository_count']}") | |
| if member['repositories']: | |
| print(" π Public Repositories:") | |
| for repo in member['repositories'][:10]: # Show top 10 repos | |
| stars = f"β{repo['stars']}" if repo['stars'] > 0 else "" | |
| forks = f"π΄{repo['forks']}" if repo['forks'] > 0 else "" | |
| fork_indicator = "π" if repo['is_fork'] else "" | |
| language = f"[{repo['language']}]" if repo['language'] else "" | |
| print( | |
| f" β’ {repo['name']} {language} {stars} {forks} {fork_indicator}") | |
| if repo['description']: | |
| desc = repo['description'][:100] + \ | |
| "..." if len(repo['description'] | |
| ) > 100 else repo['description'] | |
| print(f" {desc}") | |
| if len(member['repositories']) > 10: | |
| print( | |
| f" ... and {len(member['repositories']) - 10} more repositories") | |
| def main(): | |
| """Main function to handle command line arguments and run the scanner.""" | |
| parser = argparse.ArgumentParser( | |
| description="Scan GitHub organization members and their public repositories", | |
| formatter_class=argparse.RawDescriptionHelpFormatter, | |
| epilog=""" | |
| Examples: | |
| export GH_TOKEN=your_github_token_here | |
| python github_org_scanner.py microsoft | |
| python github_org_scanner.py google --output my_results.json | |
| python github_org_scanner.py alfresco --limit 10 | |
| """ | |
| ) | |
| parser.add_argument( | |
| "organization", | |
| help="GitHub organization name to scan" | |
| ) | |
| parser.add_argument( | |
| "--output", | |
| help="Output file to save results as JSON (default: results-TIMESTAMP.json)" | |
| ) | |
| parser.add_argument( | |
| "--limit", | |
| type=int, | |
| help="Limit processing to the first X users (useful for testing or large orgs)" | |
| ) | |
| args = parser.parse_args() | |
| # Set default output filename with timestamp if not provided | |
| if not args.output: | |
| timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") | |
| args.output = f"results-{timestamp}.json" | |
| # Get GitHub token from environment variable | |
| token = os.getenv('GH_TOKEN') | |
| if not token: | |
| print("β οΈ Warning: No GitHub token found in GH_TOKEN environment variable.") | |
| print(" You'll be subject to lower rate limits (60 requests/hour).") | |
| print( | |
| " Set GH_TOKEN environment variable for higher limits (5000 requests/hour).") | |
| print() | |
| # Initialize scanner | |
| scanner = GitHubOrgScanner(token=token) | |
| # Perform scan | |
| try: | |
| results = scanner.scan_organization(args.organization, args.limit) | |
| # Print results | |
| print_summary(results) | |
| print_detailed_results(results) | |
| # Save to file (always required now) | |
| with open(args.output, 'w', encoding='utf-8') as f: | |
| json.dump(results, f, indent=2) | |
| print(f"\nπΎ Results saved to: {args.output}") | |
| except KeyboardInterrupt: | |
| print("\n\nβ οΈ Scan interrupted by user") | |
| sys.exit(1) | |
| except requests.RequestException as e: | |
| print(f"\nβ Network/API error during scan: {e}") | |
| sys.exit(1) | |
| except (IOError, OSError) as e: | |
| print(f"\nβ File I/O error: {e}") | |
| sys.exit(1) | |
| if __name__ == "__main__": | |
| main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| requests>=2.25.0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment