Skip to content

Instantly share code, notes, and snippets.

@zenosxx
Created July 30, 2019 03:21
Show Gist options
  • Select an option

  • Save zenosxx/0809d9f6a77cbda894c66e130358e05a to your computer and use it in GitHub Desktop.

Select an option

Save zenosxx/0809d9f6a77cbda894c66e130358e05a to your computer and use it in GitHub Desktop.

Revisions

  1. zenosxx created this gist Jul 30, 2019.
    126 changes: 126 additions & 0 deletions project.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,126 @@
    import requests,re,os,schedule,time,subprocess
    from bs4 import BeautifulSoup
    from urllib.parse import urlparse
    from sendgrid import SendGridAPIClient
    from sendgrid.helpers.mail import Mail


    url=str(input("Enter domain:\n"))


    #using request module to fetch html page
    def fetch(url):
    try:
    data = requests.get(url,timeout=10)
    print(url)
    return data.text
    except requests.exceptions.RequestException as e:
    return "None"

    #using html parser to grab script tag src and saving into List
    def grab_js(url):
    data = []
    content=fetch(url)
    soup = BeautifulSoup(content, 'html.parser')
    soup = soup.find_all('script')
    for url in soup:
    data.append(url.get('src'))
    data = list(filter(None,data))
    print(data)
    return data



    #using regex to find and add domain to relative url
    def check():
    url_check=grab_js(url)
    for i in range(0,len(url_check)):
    regex=re.search('https?|//',url_check[i])
    if regex == None:
    url_check[i] = url+url_check[i]
    return url_check




    #sudo npm install -g diff2html-cli
    #using diff to compare & diff2html-cli for templating
    def report(original,duplicate,reportname):
    cmd="diff -u "+ original +" "+ duplicate + "| diff2html -i stdin -F data/report/"+reportname
    output = subprocess.check_output(cmd, shell=True)
    if "support" in output.decode("utf-8"):
    print("No New Changes Detected..")
    else:
    report = subprocess.check_output('cat data/report/'+reportname, shell=True)
    report = report.decode("utf-8")
    send_report(report)
    print("reporting done")





    url_list=check()

    def save_list():
    save = open("list.txt","w")
    for i in range(0,len(url_list)):
    save.write(url_list[i]+"\n")


    def original():
    for i in range(0,len(url_list)):
    #print(url_list[i])
    data=fetch(url_list[i])
    file = open('data/original/'+os.path.basename(url_list[i]), "w")
    file.write(data)

    def duplicate():
    for i in range(0,len(url_list)):
    #print(url_list[i])
    data=fetch(url_list[i])
    file = open('data/duplicate/'+os.path.basename(url_list[i]), "w")
    file.write(data)



    original()

    #save_list()


    def job():
    print("Schedule Job Started running...")
    duplicate()
    for i in range(0,len(url_list)):
    filename=os.path.basename(url_list[i])
    timestamp=str(int(time.time()))
    report('data/original/'+filename,'data/duplicate/'+filename,timestamp+filename+".html")
    original()


    def send_report(data_report):

    message = Mail(from_email='',
    to_emails='',
    subject='Sending with Twilio SendGrid is Fun',
    html_content=data_report)
    try:
    sg = SendGridAPIClient('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')
    response = sg.send(message)
    print(response.status_code)
    except Exception as e:
    print(str(e))


    schedule.every(5).seconds.do(job)

    while True:
    schedule.run_pending()
    time.sleep(1)