Skip to content

Instantly share code, notes, and snippets.

@mr-rizwan-syed
Last active April 20, 2025 18:44
Show Gist options
  • Save mr-rizwan-syed/8933e5183fd8313d24cf854f8a4e9fcd to your computer and use it in GitHub Desktop.
Save mr-rizwan-syed/8933e5183fd8313d24cf854f8a4e9fcd to your computer and use it in GitHub Desktop.

Revisions

  1. mr-rizwan-syed revised this gist Apr 16, 2024. 1 changed file with 41 additions and 21 deletions.
    62 changes: 41 additions & 21 deletions tldextractor.py
    Original file line number Diff line number Diff line change
    @@ -1,36 +1,56 @@
    import sys
    import tldextract

    def extract_apex_domains_from_file(file_path):
    try:
    with open(file_path, 'r') as file:
    subdomains = [line.strip() for line in file.readlines()]
    def is_valid_domain(domain):
    # Check if the domain contains only alphanumeric characters and hyphens
    if not domain.replace('-', '').replace('.', '').isalnum():
    return False

    apex_domains = set()
    # Check if the domain doesn't end with a period
    if domain.endswith('.'):
    return False

    for subdomain in subdomains:
    # Use tldextract to extract the domain information
    domain_info = tldextract.extract(subdomain)
    return True

    # Construct the apex domain
    apex_domain = f"{domain_info.domain}.{domain_info.suffix}"
    def extract_apex_domains_from_input(input_data):
    subdomains = input_data.splitlines()

    apex_domains = set()

    for subdomain in subdomains:
    # Use tldextract to extract the domain information
    domain_info = tldextract.extract(subdomain)

    # Construct the apex domain
    apex_domain = f"{domain_info.domain}.{domain_info.suffix}"

    # Check if the apex domain is valid
    if is_valid_domain(apex_domain):
    # Add to the set to ensure uniqueness
    apex_domains.add(apex_domain)

    return list(apex_domains)
    except FileNotFoundError:
    print(f"Error: File '{file_path}' not found.")
    return []
    return list(apex_domains)

    if __name__ == "__main__":
    if len(sys.argv) != 2:
    print("Usage: python script.py <subdomains_file>")
    if len(sys.argv) == 2:
    file_path = sys.argv[1]
    try:
    with open(file_path, 'r') as file:
    input_data = file.read()
    except FileNotFoundError:
    print(f"Error: File '{file_path}' not found.")
    sys.exit(1)
    elif len(sys.argv) == 1:
    # Read from stdin
    input_data = sys.stdin.read()
    else:
    print("Usage: python script.py [<subdomains_file>]")
    sys.exit(1)

    subdomains_file = sys.argv[1]
    apex_domains = extract_apex_domains_from_file(subdomains_file)
    apex_domains = extract_apex_domains_from_input(input_data)

    print("Apex Domains:")
    for apex_domain in apex_domains:
    print(apex_domain)
    if apex_domains:
    for apex_domain in apex_domains:
    print(apex_domain)
    else:
    print("No valid apex domains found.")
  2. mr-rizwan-syed renamed this gist Feb 1, 2024. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  3. mr-rizwan-syed created this gist Feb 1, 2024.
    36 changes: 36 additions & 0 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,36 @@
    import sys
    import tldextract

    def extract_apex_domains_from_file(file_path):
    try:
    with open(file_path, 'r') as file:
    subdomains = [line.strip() for line in file.readlines()]

    apex_domains = set()

    for subdomain in subdomains:
    # Use tldextract to extract the domain information
    domain_info = tldextract.extract(subdomain)

    # Construct the apex domain
    apex_domain = f"{domain_info.domain}.{domain_info.suffix}"

    # Add to the set to ensure uniqueness
    apex_domains.add(apex_domain)

    return list(apex_domains)
    except FileNotFoundError:
    print(f"Error: File '{file_path}' not found.")
    return []

    if __name__ == "__main__":
    if len(sys.argv) != 2:
    print("Usage: python script.py <subdomains_file>")
    sys.exit(1)

    subdomains_file = sys.argv[1]
    apex_domains = extract_apex_domains_from_file(subdomains_file)

    print("Apex Domains:")
    for apex_domain in apex_domains:
    print(apex_domain)