Skip to content

Instantly share code, notes, and snippets.

@sshyran
Forked from google-code-export/export.py
Created March 12, 2021 10:23
Show Gist options
  • Save sshyran/2ae5d139cabfd6844813edb50b960e85 to your computer and use it in GitHub Desktop.
Save sshyran/2ae5d139cabfd6844813edb50b960e85 to your computer and use it in GitHub Desktop.

Revisions

  1. sshyran renamed this gist Mar 12, 2021. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  2. @google-code-export google-code-export created this gist Mar 13, 2015.
    49 changes: 49 additions & 0 deletions export.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,49 @@
    import requests
    import re
    import sys

    tags = ['', 'python', 'javascript', 'django', 'web', 'google', 'java', 'ajax',
    'rails', 'plugin', 'android', 'cplusplus', 'mysql', 'dotnet', 'game',
    'appengine', 'php', 'flash', 'jquery', 'database', 'gwt']

    seen_tags = set(tags)

    projects = set()

    def get_tag():
    i = 0
    while i < len(tags):
    yield tags[i]
    i += 1

    def add_tag(tag):
    if tag not in seen_tags:
    tags.append(tag)
    seen_tags.add(tag)

    SEARCH_URL = 'https://code.google.com/hosting/search?q=label%3A'


    for tag in get_tag():
    r = requests.get(SEARCH_URL+tag)

    if '&' not in tag:
    try:
    num_result = int(re.search('Results \d+ - \d+ of (\d+)', r.text).group(1))
    except:
    print(':( could not get {}'.format(SEARCH_URL+tag), file=sys.stderr)
    continue
    for i in range(50, num_result, 10):
    add_tag(tag+'&start='+str(i))
    continue

    new_tags = set(map(str.lower, re.findall('<a href="/hosting/search\?q=label:([^"]+)">', r.text)))
    for tag in new_tags:
    add_tag(tag)

    new_projects = set(re.findall('<a href="/p/([^/"]+)/">', r.text)) - projects

    if new_projects:
    print('https://code.google.com/export-to-github/export?project='+'\nhttps://code.google.com/export-to-github/export?project='.join(new_projects))

    projects |= new_projects