Created
May 27, 2012 22:52
-
-
Save crizCraig/2816295 to your computer and use it in GitHub Desktop.
Revisions
-
crizCraig revised this gist
May 27, 2012 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -51,4 +51,4 @@ def go(query, path): time.sleep(1.5) # Example use go('landscape', 'myDirectory') -
crizCraig revised this gist
May 27, 2012 . 1 changed file with 10 additions and 9 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -20,33 +20,34 @@ def go(query, path): if not os.path.exists(BASE_PATH): os.makedirs(BASE_PATH) start = 0 # Google's start query string parameter for pagination. while start < 60: # Google will only return a max of 56 results. r = requests.get(BASE_URL % start) for image_info in json.loads(r.text)['responseData']['results']: url = image_info['unescapedUrl'] try: image_r = requests.get(url) except ConnectionError, e: print 'could not download %s' % url continue # Remove file-system path characters from name. title = image_info['titleNoFormatting'].replace('/', '').replace('\\', '') file = open(os.path.join(BASE_PATH, '%s.jpg') % title, 'w') try: Image.open(StringIO(image_r.content)).save(file, 'JPEG') except IOError, e: # Throw away some gifs...blegh. print 'could not save %s' % url continue finally: file.close() print start start += 4 # 4 images per page. # Be nice to Google and they'll be nice back :) time.sleep(1.5) # Example use -
crizCraig revised this gist
May 27, 2012 . 1 changed file with 22 additions and 11 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -7,7 +7,11 @@ from requests.exceptions import ConnectionError def go(query, path): """Download full size images from Google image search. Don't print or republish images without permission. I used this to train a learning algorithm. """ BASE_URL = 'https://ajax.googleapis.com/ajax/services/search/images?'\ 'v=1.0&q=' + query + '&start=%d' @@ -16,27 +20,34 @@ def go(query, path): if not os.path.exists(BASE_PATH): os.makedirs(BASE_PATH) start = 0 # Start query string parameter for pagination. while start < 60: # Google returns a max of 56 results. r = requests.get(BASE_URL % start) for image_info in json.loads(r.text)['responseData']['results']: try: image_r = requests.get(image_info['unescapedUrl']) except ConnectionError, e: print 'could not download %s' % image_info['url'] continue # Remove file system path characters from name. title = image_info['titleNoFormatting'].replace('/', '').replace('\\', '') file = open(os.path.join(BASE_PATH, '%s.jpg') % title, 'w') try: Image.open(StringIO(image_r.content)).save(file, 'JPEG') except IOError, e: # This usually throws away some gifs. But who cares about gifs. print 'could not save %s' % image_info['url'] continue finally: file.close() print start start += 4 # Four images are returned per page. # Be nice to Google and they'll be nice to you :) time.sleep(1.5) # Example use go('landscape', 'negative_examples') -
crizCraig renamed this gist
May 27, 2012 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
crizCraig created this gist
May 27, 2012 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,42 @@ import json import os import time import requests from PIL import Image from StringIO import StringIO from requests.exceptions import ConnectionError def go(query, path): """Download full size images from Google image search.""" BASE_URL = 'https://ajax.googleapis.com/ajax/services/search/images?'\ 'v=1.0&q=' + query + '&start=%d' BASE_PATH = os.path.join(path, query) if not os.path.exists(BASE_PATH): os.makedirs(BASE_PATH) BASE_PATH = os.path.join(BASE_PATH, '%s.jpg') start = 0 while start < 60: r = requests.get(BASE_URL % start) image_json = json.loads(r.text) image_infos = json.loads(r.text)['responseData']['results'] for image_info in image_infos: try: image_r = requests.get(image_info['unescapedUrl']) except ConnectionError, e: print 'could not download %s' % image_info['url'] title = image_info['titleNoFormatting'].replace('/', '').replace('\\', '') file = open(BASE_PATH % title, 'w') try: Image.open(StringIO(image_r.content)).save(file, 'JPEG') except IOError, e: print 'could not save %s' % image_info['url'] print start start += 4 time.sleep(1.5) go('landscapes', 'negative_examples')