-
-
Save sshyran/2ae5d139cabfd6844813edb50b960e85 to your computer and use it in GitHub Desktop.
Revisions
-
sshyran renamed this gist
Mar 12, 2021 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
google-code-export created this gist
Mar 13, 2015 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,49 @@ import requests import re import sys tags = ['', 'python', 'javascript', 'django', 'web', 'google', 'java', 'ajax', 'rails', 'plugin', 'android', 'cplusplus', 'mysql', 'dotnet', 'game', 'appengine', 'php', 'flash', 'jquery', 'database', 'gwt'] seen_tags = set(tags) projects = set() def get_tag(): i = 0 while i < len(tags): yield tags[i] i += 1 def add_tag(tag): if tag not in seen_tags: tags.append(tag) seen_tags.add(tag) SEARCH_URL = 'https://code.google.com/hosting/search?q=label%3A' for tag in get_tag(): r = requests.get(SEARCH_URL+tag) if '&' not in tag: try: num_result = int(re.search('Results \d+ - \d+ of (\d+)', r.text).group(1)) except: print(':( could not get {}'.format(SEARCH_URL+tag), file=sys.stderr) continue for i in range(50, num_result, 10): add_tag(tag+'&start='+str(i)) continue new_tags = set(map(str.lower, re.findall('<a href="/hosting/search\?q=label:([^"]+)">', r.text))) for tag in new_tags: add_tag(tag) new_projects = set(re.findall('<a href="/p/([^/"]+)/">', r.text)) - projects if new_projects: print('https://code.google.com/export-to-github/export?project='+'\nhttps://code.google.com/export-to-github/export?project='.join(new_projects)) projects |= new_projects