Created
March 10, 2021 13:54
-
-
Save charanjit-singh/44b792fd92c4ab3f709422589a7f08a9 to your computer and use it in GitHub Desktop.
Revisions
-
charanjit-singh created this gist
Mar 10, 2021 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,153 @@ import requests from bs4 import BeautifulSoup import codecs import threading page = "" f = codecs.open("RERA_PUNJAB_LIST.html","r") page = f.read() soup = BeautifulSoup(page, 'html.parser') BROKERS = [] for tr in soup.find_all("tr"): tds = tr.find_all("td") sr = tds[0].get_text() name = tds[1].get_text() district = tds[2].get_text() rera = tds[3].get_text() registration_valid_upto = tds[4].get_text() anchors = tr.find_all("a") inputs = tr.find_all("input") type_ = anchors[0].get('id') agent_id = inputs[0].get("value") BROKERS.append({ "Sr. No.":sr.strip(), "Name":name.strip(), "District": district.strip(), "RERA No.":rera.strip(), "Registration Valid Upto": registration_valid_upto, "Offline": 1 if type_ in ['modalOpenerOfflineRegisteredButton',] else 0, "Agent ID": agent_id }) import csv csv_file = "PUNJAB_RERA_LIST.csv" try: with open(csv_file, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=BROKERS[0].keys()) writer.writeheader() for data in BROKERS: writer.writerow(data) except IOError: print("I/O error") import re import time class OnlineThread(threading.Thread): def __init__(self, name,): threading.Thread.__init__(self) self.name = name def run(self): ONLINE_BROKERS = [] ONLINE_KEYS = list(BROKERS[0].keys()) # ONLINE Registered CASE ONLINE_BROKERS_RAW = list(filter(lambda broker: broker['Offline'] == 0, BROKERS)) ONLINE_BROKERS_RAW_CNT = len(ONLINE_BROKERS_RAW) ONLINE_BROKERS_LOOP_COUNTER = 0 FAILED_ONLINES = [] for broker in ONLINE_BROKERS_RAW: AGENT_ID = broker['Agent ID'] ONLINE_BROKERS_LOOP_COUNTER += 1 print("Processing Online Broker:", ONLINE_BROKERS_LOOP_COUNTER,'/',ONLINE_BROKERS_RAW_CNT) URL = "https://rera.punjab.gov.in/reraindex/PublicView/AgentViewDetails?inAgent_ID="+AGENT_ID try: broker_page = requests.get(URL) except Exception as e: FAILED_ONLINES.append(broker) print("Failed Online Broker", broker, "Reason:", str(e)) time.sleep(5) continue content = broker_page.content broker_soup = BeautifulSoup(content, "html.parser") tds = broker_soup.find_all("td") for i in range(len(tds)-1): current_td = tds[i] next_td = tds[i + 1] if "single-detail" in current_td.get('class') if current_td.get("class") else False: key = current_td.get_text().strip() if key not in ONLINE_KEYS: ONLINE_KEYS.append(key) value = re.sub(' +', ' ', next_td.get_text().strip().replace("\r"," ").replace("\n"," ")) broker[key] = value ONLINE_BROKERS.append(broker) csv_file = "ONLINE_PUNJAB_RERA_LIST.csv" try: with open(csv_file, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=ONLINE_KEYS) writer.writeheader() for data in ONLINE_BROKERS: writer.writerow(data) except IOError: print("I/O error") print("FAILED ONLINE BROKERS", FAILED_ONLINES) class OfflineThread(threading.Thread): def __init__(self, name,): threading.Thread.__init__(self) self.name = name def run(self): OFFLINE_BROKERS = [] OFFLINE_KEYS = list(BROKERS[0].keys()) # OFFLINE Registered CASE OFFLINE_BROKERS_RAW = list(filter(lambda broker: broker['Offline'] == 1, BROKERS)) OFFLINE_BROKERS_RAW_CNT = len(OFFLINE_BROKERS_RAW) OFFLINE_BROKERS_LOOP_COUNTER = 0 FAILED_OFFLINES = [] for broker in OFFLINE_BROKERS_RAW: AGENT_ID = broker['Agent ID'] OFFLINE_BROKERS_LOOP_COUNTER += 1 URL = "https://rera.punjab.gov.in/reraindex/PublicView/AgentViewOfflineRegisteredDetails?inAgent_ID="+AGENT_ID print("Processing Offline Broker:", OFFLINE_BROKERS_LOOP_COUNTER,'/',OFFLINE_BROKERS_RAW_CNT, "URL:",URL) try: broker_page = requests.get(URL) except Exception as e: print("Failed offline Broker", broker, "Reason:", str(e)) FAILED_OFFLINES.append(broker) continue content = broker_page.content broker_soup = BeautifulSoup(content, "html.parser") tds = broker_soup.find_all("td") for i in range(len(tds)-1): current_td = tds[i] next_td = tds[i + 1] if "single-detail" in current_td.get('class') if current_td.get("class") else False: key = current_td.get_text().strip() if key not in OFFLINE_KEYS: OFFLINE_KEYS.append(key) value = re.sub(' +', ' ', next_td.get_text().strip().replace("\r"," ").replace("\n"," ")) broker[key] = value OFFLINE_BROKERS.append(broker) csv_file = "OFFLINE_PUNJAB_RERA_LIST.csv" try: with open(csv_file, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=OFFLINE_KEYS) writer.writeheader() for data in OFFLINE_BROKERS: writer.writerow(data) except IOError: print("I/O error") print("FAILED OFFLINE BROKERS", FAILED_OFFLINES) onlineThread = OnlineThread("Online") offlineThread = OfflineThread("Offline") onlineThread.start() offlineThread.start()