Last active
February 14, 2019 16:25
-
-
Save michael-bey/fbf57ea372ec691f087c91330596990f to your computer and use it in GitHub Desktop.
Revisions
-
michael-bey revised this gist
Nov 14, 2016 . 1 changed file with 46 additions and 60 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,4 +1,4 @@ # requirements: selenium wget python 2.7 import time import sys @@ -9,69 +9,55 @@ from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.keys import Keys def googlescrape(str): browser = webdriver.Firefox() browser.get(url) time.sleep(3) # sleep for 5 seconds so you can see the results results = browser.find_elements_by_css_selector('div.g') if len(results) == 0: print "No results found" browser.quit() else: for x in range(0, len(results)): link = results[x].find_element_by_tag_name("a") href = link.get_attribute("href") print href wget.download(href) browser.quit() return if len(sys.argv) == 3: domain = sys.argv[1] ftype = sys.argv[2] url = "https://www.google.com/search?num=100&start=0&hl=em&meta=&q=site:" url += domain url += "+filetype:" url += ftype url += "&filter=0" googlescrape(url) elif len(sys.argv) == 2: for i in range(0, 3): if i == 0: print "Checking for pdfs..." ftype = "pdf" elif i == 1: print "Checking for docs..." ftype = "doc" elif i == 2: print "Checking for xls..." ftype = "xls" domain = sys.argv[1] url = "https://www.google.com/search?num=100&start=0&hl=em&meta=&q=site:" url += domain url += "+filetype:" url += ftype url += "&filter=0" googlescrape(url) else: print "Error: Improper number of arguments. Usage: python search.py domain.com pdf" sys.exit() -
michael-bey created this gist
Aug 3, 2016 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,77 @@ #requirements: selenium wget python 2.7 import time import sys import wget from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.keys import Keys def googlescrape(str): browser = webdriver.Firefox() browser.get(url) time.sleep(3) # sleep for 5 seconds so you can see the results results = browser.find_elements_by_css_selector('div.g') if len(results) == 0: print "No results found" browser.quit() else: for x in range(0,len(results)): link = results[x].find_element_by_tag_name("a") href = link.get_attribute("href") print href wget.download(href) browser.quit() return if len(sys.argv) == 3: domain = sys.argv[1] ftype = sys.argv[2] url = "https://www.google.com/search?num=100&start=0&hl=em&meta=&q=site:" url += domain url += "+filetype:" url += ftype url += "&filter=0" googlescrape(url) elif len(sys.argv) == 2: for i in range (0,3): if i==0: print "Checking for pdfs..." ftype = "pdf" elif i == 1: print "Checking for docs..." ftype = "doc" elif i == 2: print "Checking for xls..." ftype = "xls" domain = sys.argv[1] url = "https://www.google.com/search?num=100&start=0&hl=em&meta=&q=site:" url += domain url += "+filetype:" url += ftype url += "&filter=0" googlescrape(url) else: print "Error: Improper number of arguments. Usage: python search.py domain.com pdf" sys.exit()