Skip to content

Instantly share code, notes, and snippets.

@michael-bey
Last active February 14, 2019 16:25
Show Gist options
  • Save michael-bey/fbf57ea372ec691f087c91330596990f to your computer and use it in GitHub Desktop.
Save michael-bey/fbf57ea372ec691f087c91330596990f to your computer and use it in GitHub Desktop.

Revisions

  1. michael-bey revised this gist Nov 14, 2016. 1 changed file with 46 additions and 60 deletions.
    106 changes: 46 additions & 60 deletions search.py
    Original file line number Diff line number Diff line change
    @@ -1,4 +1,4 @@
    #requirements: selenium wget python 2.7
    # requirements: selenium wget python 2.7

    import time
    import sys
    @@ -9,69 +9,55 @@
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.common.keys import Keys

    def googlescrape(str):
    browser = webdriver.Firefox()
    browser.get(url)
    time.sleep(3) # sleep for 5 seconds so you can see the results

    results = browser.find_elements_by_css_selector('div.g')
    if len(results) == 0:
    print "No results found"
    browser.quit()
    else:
    for x in range(0,len(results)):
    link = results[x].find_element_by_tag_name("a")
    href = link.get_attribute("href")
    print href
    wget.download(href)
    browser.quit()

    return




    def googlescrape(str):
    browser = webdriver.Firefox()
    browser.get(url)
    time.sleep(3) # sleep for 5 seconds so you can see the results

    results = browser.find_elements_by_css_selector('div.g')
    if len(results) == 0:
    print "No results found"
    browser.quit()
    else:
    for x in range(0, len(results)):
    link = results[x].find_element_by_tag_name("a")
    href = link.get_attribute("href")
    print href
    wget.download(href)
    browser.quit()
    return

    if len(sys.argv) == 3:
    domain = sys.argv[1]
    ftype = sys.argv[2]
    url = "https://www.google.com/search?num=100&start=0&hl=em&meta=&q=site:"
    url += domain
    url += "+filetype:"
    url += ftype
    url += "&filter=0"
    googlescrape(url)
    domain = sys.argv[1]
    ftype = sys.argv[2]
    url = "https://www.google.com/search?num=100&start=0&hl=em&meta=&q=site:"
    url += domain
    url += "+filetype:"
    url += ftype
    url += "&filter=0"
    googlescrape(url)
    elif len(sys.argv) == 2:

    for i in range (0,3):
    if i==0:
    print "Checking for pdfs..."
    ftype = "pdf"
    elif i == 1:
    print "Checking for docs..."
    ftype = "doc"
    elif i == 2:
    print "Checking for xls..."
    ftype = "xls"

    domain = sys.argv[1]
    url = "https://www.google.com/search?num=100&start=0&hl=em&meta=&q=site:"
    url += domain
    url += "+filetype:"
    url += ftype
    url += "&filter=0"
    googlescrape(url)



    for i in range(0, 3):
    if i == 0:
    print "Checking for pdfs..."
    ftype = "pdf"
    elif i == 1:
    print "Checking for docs..."
    ftype = "doc"
    elif i == 2:
    print "Checking for xls..."
    ftype = "xls"

    domain = sys.argv[1]
    url = "https://www.google.com/search?num=100&start=0&hl=em&meta=&q=site:"
    url += domain
    url += "+filetype:"
    url += ftype
    url += "&filter=0"
    googlescrape(url)

    else:
    print "Error: Improper number of arguments. Usage: python search.py domain.com pdf"
    sys.exit()







    print "Error: Improper number of arguments. Usage: python search.py domain.com pdf"
    sys.exit()
  2. michael-bey created this gist Aug 3, 2016.
    77 changes: 77 additions & 0 deletions search.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,77 @@
    #requirements: selenium wget python 2.7

    import time
    import sys
    import wget
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.webdriver.common.keys import Keys

    def googlescrape(str):
    browser = webdriver.Firefox()
    browser.get(url)
    time.sleep(3) # sleep for 5 seconds so you can see the results

    results = browser.find_elements_by_css_selector('div.g')
    if len(results) == 0:
    print "No results found"
    browser.quit()
    else:
    for x in range(0,len(results)):
    link = results[x].find_element_by_tag_name("a")
    href = link.get_attribute("href")
    print href
    wget.download(href)
    browser.quit()

    return





    if len(sys.argv) == 3:
    domain = sys.argv[1]
    ftype = sys.argv[2]
    url = "https://www.google.com/search?num=100&start=0&hl=em&meta=&q=site:"
    url += domain
    url += "+filetype:"
    url += ftype
    url += "&filter=0"
    googlescrape(url)
    elif len(sys.argv) == 2:

    for i in range (0,3):
    if i==0:
    print "Checking for pdfs..."
    ftype = "pdf"
    elif i == 1:
    print "Checking for docs..."
    ftype = "doc"
    elif i == 2:
    print "Checking for xls..."
    ftype = "xls"

    domain = sys.argv[1]
    url = "https://www.google.com/search?num=100&start=0&hl=em&meta=&q=site:"
    url += domain
    url += "+filetype:"
    url += ftype
    url += "&filter=0"
    googlescrape(url)




    else:
    print "Error: Improper number of arguments. Usage: python search.py domain.com pdf"
    sys.exit()