Skip to content

Instantly share code, notes, and snippets.

@gionn
Created September 16, 2025 13:52
Show Gist options
  • Save gionn/a670facb7beb60f09e630a02373d8c73 to your computer and use it in GitHub Desktop.
Save gionn/a670facb7beb60f09e630a02373d8c73 to your computer and use it in GitHub Desktop.
A Python script to scan GitHub organizations and list all public repositories of their members - useful for security auditing.
#!/usr/bin/env python3
"""
GitHub Organization Scanner
A script to list all users in a specific GitHub organization
and enumerate their public/open source repositories.
Usage:
python github_org_scanner.py <organization_name>
Set the GH_TOKEN environment variable with your GitHub personal access token.
Example:
export GH_TOKEN=your_github_token_here
python github_org_scanner.py microsoft
"""
import argparse
import json
import os
import sys
from typing import List, Dict, Optional
import requests
from datetime import datetime
class GitHubOrgScanner:
"""Scanner for GitHub organizations and user repositories."""
def __init__(self, token: Optional[str] = None):
"""
Initialize the GitHub scanner.
Args:
token: GitHub personal access token for authentication
"""
self.base_url = "https://api.github.com"
self.session = requests.Session()
if token:
self.session.headers.update({
"Authorization": f"token {token}",
"Accept": "application/vnd.github.v3+json"
})
else:
# Without token, rate limiting will be more restrictive
self.session.headers.update({
"Accept": "application/vnd.github.v3+json"
})
def get_org_members(self, org_name: str) -> List[Dict]:
"""
Get all public members of a GitHub organization.
Args:
org_name: Name of the GitHub organization
Returns:
List of member dictionaries with user information
"""
members = []
page = 1
per_page = 100
print(f"Fetching members from organization: {org_name}")
while True:
url = f"{self.base_url}/orgs/{org_name}/members"
params = {
"page": page,
"per_page": per_page
}
response = self.session.get(url, params=params)
if response.status_code == 404:
print(
f"Organization '{org_name}' not found or has no public members")
return []
elif response.status_code != 200:
print(
f"Error fetching org members: {response.status_code} - {response.text}")
return []
page_members = response.json()
if not page_members:
break
members.extend(page_members)
print(f" Fetched {len(page_members)} members (page {page})")
page += 1
print(f"Total members found: {len(members)}")
return members
def get_user_repos(self, username: str) -> List[Dict]:
"""
Get all public repositories for a specific user.
Args:
username: GitHub username
Returns:
List of repository dictionaries
"""
repos = []
page = 1
per_page = 100
while True:
url = f"{self.base_url}/users/{username}/repos"
params = {
"page": page,
"per_page": per_page,
"type": "public",
"sort": "updated",
"direction": "desc"
}
response = self.session.get(url, params=params)
if response.status_code != 200:
print(
f" Error fetching repos for {username}: {response.status_code}")
return []
page_repos = response.json()
if not page_repos:
break
repos.extend(page_repos)
page += 1
# Break if we have less than per_page repos (last page)
if len(page_repos) < per_page:
break
return repos
def scan_organization(self, org_name: str, user_limit: Optional[int] = None) -> Dict:
"""
Scan an entire organization and collect member and repository data.
Args:
org_name: Name of the GitHub organization
user_limit: Maximum number of users to process (None for all users)
Returns:
Dictionary containing organization scan results
"""
print(f"\nπŸ” Starting scan of GitHub organization: {org_name}")
print("=" * 60)
# Get organization members
members = self.get_org_members(org_name)
if not members:
return {
"organization": org_name,
"scan_timestamp": datetime.now().isoformat(),
"members": [],
"total_members": 0,
"total_repositories": 0
}
# Apply user limit if specified
if user_limit and user_limit > 0:
original_count = len(members)
members = members[:user_limit]
print(
f"πŸ“ Limited to first {len(members)} users (out of {original_count} total)")
# Collect repository data for each member
results = {
"organization": org_name,
"scan_timestamp": datetime.now().isoformat(),
"members": [],
"total_members": len(members),
"total_repositories": 0,
"limited_scan": user_limit is not None and user_limit > 0
}
print(f"\nπŸ“Š Scanning repositories for {len(members)} members...")
print("-" * 60)
for i, member in enumerate(members, 1):
username = member["login"]
print(f"[{i}/{len(members)}] Scanning {username}...")
repos = self.get_user_repos(username)
member_data = {
"username": username,
"profile_url": member["html_url"],
"avatar_url": member["avatar_url"],
"repositories": []
}
for repo in repos:
repo_data = {
"name": repo["name"],
"full_name": repo["full_name"],
"description": repo["description"],
"url": repo["html_url"],
"language": repo["language"],
"stars": repo["stargazers_count"],
"forks": repo["forks_count"],
"is_fork": repo["fork"],
"created_at": repo["created_at"],
"updated_at": repo["updated_at"]
}
member_data["repositories"].append(repo_data)
member_data["repository_count"] = len(repos)
results["members"].append(member_data)
results["total_repositories"] += len(repos)
print(f" Found {len(repos)} public repositories")
return results
def print_summary(results: Dict):
"""Print a summary of the scan results."""
print("\n" + "=" * 80)
print("πŸ“‹ SCAN SUMMARY")
print("=" * 80)
print(f"Organization: {results['organization']}")
print(f"Scan completed: {results['scan_timestamp']}")
if results.get('limited_scan', False):
print("⚠️ Limited scan: Only processed first subset of users")
print(f"Total members processed: {results['total_members']}")
print(f"Total repositories: {results['total_repositories']}")
if results['members']:
avg_repos = results['total_repositories'] / results['total_members']
print(f"Average repos per member: {avg_repos:.1f}")
# Find top contributors
top_contributors = sorted(
results['members'],
key=lambda x: x['repository_count'],
reverse=True
)[:5]
print("\nπŸ† Top 5 contributors by repository count:")
for i, member in enumerate(top_contributors, 1):
print(
f" {i}. {member['username']}: {member['repository_count']} repos")
def print_detailed_results(results: Dict):
"""Print detailed results for each member."""
print("\n" + "=" * 80)
print("πŸ“ DETAILED RESULTS")
print("=" * 80)
for member in results['members']:
print(f"\nπŸ‘€ {member['username']}")
print(f" Profile: {member['profile_url']}")
print(f" Repositories: {member['repository_count']}")
if member['repositories']:
print(" πŸ“ Public Repositories:")
for repo in member['repositories'][:10]: # Show top 10 repos
stars = f"⭐{repo['stars']}" if repo['stars'] > 0 else ""
forks = f"🍴{repo['forks']}" if repo['forks'] > 0 else ""
fork_indicator = "πŸ”—" if repo['is_fork'] else ""
language = f"[{repo['language']}]" if repo['language'] else ""
print(
f" β€’ {repo['name']} {language} {stars} {forks} {fork_indicator}")
if repo['description']:
desc = repo['description'][:100] + \
"..." if len(repo['description']
) > 100 else repo['description']
print(f" {desc}")
if len(member['repositories']) > 10:
print(
f" ... and {len(member['repositories']) - 10} more repositories")
def main():
"""Main function to handle command line arguments and run the scanner."""
parser = argparse.ArgumentParser(
description="Scan GitHub organization members and their public repositories",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
export GH_TOKEN=your_github_token_here
python github_org_scanner.py microsoft
python github_org_scanner.py google --output my_results.json
python github_org_scanner.py alfresco --limit 10
"""
)
parser.add_argument(
"organization",
help="GitHub organization name to scan"
)
parser.add_argument(
"--output",
help="Output file to save results as JSON (default: results-TIMESTAMP.json)"
)
parser.add_argument(
"--limit",
type=int,
help="Limit processing to the first X users (useful for testing or large orgs)"
)
args = parser.parse_args()
# Set default output filename with timestamp if not provided
if not args.output:
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
args.output = f"results-{timestamp}.json"
# Get GitHub token from environment variable
token = os.getenv('GH_TOKEN')
if not token:
print("⚠️ Warning: No GitHub token found in GH_TOKEN environment variable.")
print(" You'll be subject to lower rate limits (60 requests/hour).")
print(
" Set GH_TOKEN environment variable for higher limits (5000 requests/hour).")
print()
# Initialize scanner
scanner = GitHubOrgScanner(token=token)
# Perform scan
try:
results = scanner.scan_organization(args.organization, args.limit)
# Print results
print_summary(results)
print_detailed_results(results)
# Save to file (always required now)
with open(args.output, 'w', encoding='utf-8') as f:
json.dump(results, f, indent=2)
print(f"\nπŸ’Ύ Results saved to: {args.output}")
except KeyboardInterrupt:
print("\n\n⚠️ Scan interrupted by user")
sys.exit(1)
except requests.RequestException as e:
print(f"\n❌ Network/API error during scan: {e}")
sys.exit(1)
except (IOError, OSError) as e:
print(f"\n❌ File I/O error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()
requests>=2.25.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment