-
-
Save joshyu/4f1aaaf36eb27a998e8e030dec7aa1dc to your computer and use it in GitHub Desktop.
Revisions
-
hackerdem revised this gist
Jul 31, 2020 . 1 changed file with 6 additions and 4 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -11,7 +11,8 @@ def check(self,address): try: req=urllib.request.Request(url=address) resp=urllib.request.urlopen(req) if resp.status in [400,404,403,408,409,501,502,503]: print (Fore.RED+resp.status+"-"+resp.reason+"-->"+address) else: print (Fore.GREEN+"no problem in-->"+address) except Exception as e: @@ -37,10 +38,11 @@ def extract_link(address): tags= {'a':'href', 'img':'src', 'script':'src', 'link':'href' } for key,value in iter(tags.items()): try: headers={"User-Agent": "Mozilla/5.0"} res=urllib.request.urlopen(urllib.request.Request(url=address, headers=headers)) response=res.read().decode('utf-8') #needs improvement for link in BeautifulSoup(response,"html.parser",parse_only=SoupStrainer(key)): if link.has_attr(value) and address in link[value]: # address in link[value] to keep testing the target site only p=pattern_adjust(link[value]) if p!=0 and str(p)!='None': newcheck=check_link(p) @@ -63,7 +65,7 @@ def threader(): q=queue.Queue() global hyperlinks,website hyperlinks=set() website= 'https://www.sozcu.com.tr/' #Target website for x in range(30): t=threading.Thread(target=threader) t.deamon=True -
hackerdem revised this gist
Jul 31, 2020 . 1 changed file with 1 addition and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1 @@ foo -
hackerdem created this gist
May 11, 2016 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,73 @@ from bs4 import BeautifulSoup,SoupStrainer import urllib.request import colorama,re,queue,threading from colorama import Fore from urllib.parse import * class check_link(): def __init__(self,address): self.address=address def check(self,address): try: req=urllib.request.Request(url=address) resp=urllib.request.urlopen(req) if resp.status in [400,404,403,408,409,501,502,503]:print (Fore.RED+resp.status+"-"+resp.reason+"-->"+address) else: print (Fore.GREEN+"no problem in-->"+address) except Exception as e: print (Fore.YELLOW+"{}-{}".format(e,address)) pass def pattern_adjust(a): try: if re.match('^#' ,a):return 0 r=urlsplit(a) if r.scheme=='' and (r.netloc!='' or r.path!=''): d=urlunsplit(r) if re.match('^//' ,d): m= re.search('(?<=//)\S+', d) d=m.group(0) m="https://"+d return m elif r.scheme=='' and r.netloc=='': return address+a else:return a except Exception as e: pass def extract_link(address): tags= {'a':'href', 'img':'src', 'script':'src', 'link':'href' } for key,value in iter(tags.items()): try: res=urllib.request.urlopen(address) response=res.read().decode('utf-8') #needs improvement for link in BeautifulSoup(response,"html.parser",parse_only=SoupStrainer(key)): if link.has_attr(value): p=pattern_adjust(link[value]) if p!=0 and str(p)!='None': newcheck=check_link(p) newcheck.check(p) if p not in hyperlinks: hyperlinks.add(p) if website.split('.')[1] in p:#needs improvement if not website.endswith(('.png','.jpeg','.js','jpg')): q.put(p) except Exception as e: print (e,address) def threader(): while True: value=q.get() result=extract_link(value) q.task_done() if __name__=="__main__": colorama.init() q=queue.Queue() global hyperlinks,website hyperlinks=set() website=input("Please enter the website address: ") for x in range(30): t=threading.Thread(target=threader) t.deamon=True t.start() q.put(website.strip()) q.join()