mdobson · June 22, 2018 18:34 · Jun 22, 2018 · Jun 22, 2018
diff --git a/2.py b/2.py
@@ -0,0 +1,43 @@
+import pandas as pd
+import requests as r
+import urllib.parse as u
+
+df = pd.read_csv('deq_asbestos_notifications.csv')
+
+request_url = 'https://gis.detroitmi.gov/arcgis/rest/services/DoIT/AddressPointGeocoder/GeocodeServer/findAddressCandidates?{}&ZIP=&Single+Line+Input=&category=&outFields=*&maxLocations=&outSR=4326&searchExtent=&location=&distance=&magicKey=&f=pjson'
+
+def geocode_address(addr):
+    qs = {'Street': addr}
+    encoded_qs = u.urlencode(qs)
+    res = r.get(request_url.format(encoded_qs))
+    data_dict = res.json()
+    #take the top candidate for now for the sake of greed
+    data_dict_candidates = data_dict['candidates']
+    if len(data_dict_candidates) > 0:
+        first_candidate = data_dict_candidates[0]
+        first_candidate_location = first_candidate['location']
+        first_candidate_attributes = first_candidate['attributes'] 
+        return  pd.Series({'Parcel': first_candidate_attributes['User_fld'].strip(), 'Score': first_candidate_attributes['Score'], 'Long':first_candidate_location['x'], 'Lat': first_candidate_location['y']})
+    else:
+        return  pd.Series({'Parcel': 'Unknown', 'Score': 0})
+
+#Get all unique addresses in the entries for asbestos notifs and geocode them
+uniq_addrs = df.Address.unique()
+addrs = {'Address': uniq_addrs}
+addr_df = pd.DataFrame(data=addrs)
+#geocode_slice = addr_df[:20]
+geocode_result = addr_df.apply(lambda data_row: geocode_address(data_row['Address']), axis=1)
+merged = geocode_slice.merge(geocode_result, left_index=True, right_index=True) 
+
+
+#This will find not perfectly matched geocode results
+#unmatched_gecodes = merged.query('Score < 100.00')
+
+#Join geocoded addresses to the originial dataframe
+
+joined_df = df.set_index('Address').join(merged.set_index('Address'))
+queried_df = joined_df.query('Score > 0')
+bad_addresses = joined_df.query('Score == 0')
+
+queried_df.to_csv('deq_asbestos_notifications_with_geocodes.csv')
+bad_addresses.to_csv('deq_asbestos_notifications_anamalous_addresses.csv')
diff --git a/1.py b/1.py
@@ -0,0 +1,61 @@
+import requests
+import urllib
+from bs4 import BeautifulSoup
+from urllib.parse import urlparse
+
+
+#Initial request
+r = requests.get('http://www.deq.state.mi.us/asbestos_notifications/Pages/AbSearch.aspx')
+
+#Parse out nasty asp.net form stuff
+soup = BeautifulSoup(r.text, 'html.parser')
+
+#print(soup)
+
+hidden_field_token = 'ctl00_BodyContent_smAjax_HiddenField'
+parsed_hidden_field_key = '_TSM_CombinedScripts_'
+
+for script in soup.find_all('script'):
+	src = script.get('src')
+	if src != None and hidden_field_token in src:
+		o = urlparse(src)
+		qs = urllib.parse.parse_qs(o.query)
+		#print(qs)
+
+payload = {}
+
+wayne_county_code = '82'
+city_name = 'Detroit'
+
+county_key = 'ctl00$BodyContent$ddlCounty'
+city_key = 'ctl00$BodyContent$txtCity'
+clear_key = 'ctl00$BodyContent$_btnClear'
+
+for inp in soup.find_all('input'):
+	name = inp.get('name')
+	value = inp.get('value') if inp.get('value') != None else ''
+	payload[name] = value
+
+#Setup form object to be sent to their servers and hard code the county code for wayne and the city of detroit
+payload[county_key] = wayne_county_code
+payload[city_key] = city_name
+payload[hidden_field_token] = qs[parsed_hidden_field_key][0]
+payload.pop(clear_key)
+
+
+#print(payload)
+
+#send a POST request
+req_url = 'http://www.deq.state.mi.us/asbestos_notifications/Pages/AbSearch.aspx'
+
+second_r = requests.post(req_url, data=payload)
+#print(second_r.status_code)
+
+parsed_data_page = BeautifulSoup(second_r.text, 'html.parser') 
+print(parsed_data_page)
+
+#scraped_data = parsed_data_page.find_all('div', attrs={'id': 'divPrint'})
+
+
+
+