## ## Mike Tafel and Justin Lewis ## 2010/07/26 : updated Dec. 2012 ## ## Script that geocodes address data stored in a PostgreSQL (PostGIS) database using Google. ### Preps the db table, reads address data, builds a request url, returns lat/long/precision valuse to the same table, ### transforms coordinates to 4326 then 2232. ## ## Dependancies: #### PostGIS database, Google Private Key and Client ID, all the libraries listed in the import below ## import urllib, urllib2, pg, simplejson as json, time, sys, hashlib, hmac, base64, urlparse class TablePrep: def check_columns(self): print "Checking if required fields are in the base table..." check_columns_q = "select column_name from information_schema.columns where table_name = '%(table)s'" % {'table' : table } self.columns = db.query(check_columns_q) self.fieldnames = self.columns.getresult() if ('lat',) in self.fieldnames: print "Table has lat" clear = "UPDATE %(table)s SET lat = null" % {'table' : table } db.query(clear) else: print "Adding lat column..." add_column = "ALTER TABLE %(table)s ADD COLUMN lat double precision;" % {'table' : table } db.query(add_column) if ('long',) in self.fieldnames: print "Table has long" clear = "UPDATE %(table)s SET long = null" % {'table' : table } db.query(clear) else: print "Adding long column..." add_column = "ALTER TABLE %(table)s ADD COLUMN long double precision;" % {'table' : table } db.query(add_column) if ('precision',) in self.fieldnames: print "Table has precision" clear = "UPDATE %(table)s SET precision = null" % {'table' : table } db.query(clear) else: print "Adding precision column..." add_column = "ALTER TABLE %(table)s ADD COLUMN precision character varying (50);" % {'table' : table } db.query(add_column) if ('geom_4326',) in self.fieldnames: print "Table has geom_4326" clear = "UPDATE %(table)s SET geom_2232 = null" % {'table' : table } db.query(clear) else: print "Adding geom_4326 column..." add_column = "SELECT AddGeometryColumn('public', '%(table)s', 'geom_4326', '4326', 'POINT', 2);" % {'table' : table } db.query(add_column) if ('geom_2232',) in self.fieldnames: print "Table has geom_2232" else: print "Adding geom_2232 column..." add_column = "SELECT AddGeometryColumn('public', '%(table)s', 'geom_2232', '2232', 'POINT', 2);" % {'table' : table } db.query(add_column) #### This works but need to determine what the row # is on creation ## if ('gid',) in self.fieldnames: ## print "Table has gid" ## else: ## print "Adding gid column..." ## add_column = "ALTER TABLE %(table)s ADD COLUMN gid SERIAL;" % {'table' : table } ## db.query(add_column) class Geocode_Address: def geocode_single_address_field(self, table, address_row, city_row, zip_row, ojectid_row, oid_name): #### Selecting data from the geocode table selectallSql = "SELECT * FROM %(table)s;" % {'table' : table} ## add where clause variable sel = db.query(selectallSql) result = sel.getresult() rows = len(result) #### Set url variables base_url = "http://maps.google.com/maps/api/geocode/json?" privateKey = "YOUR_PRIVATE_KEY=" client_id = "YOUR_CLIENT_ID" x = 0 while x < rows: print " " row = result[x] address_field = str(row[address_row]) city_field = str(row[city_row]) zip_field = str(row[zip_row]) objectid_field = int(row[ojectid_row]) values = { 'sensor' : 'false','address' :address_field+' '+city_field+', CO '+zip_field, 'client':client_id} data = urllib.urlencode(values) print values #### build request url request_url = base_url+data url = urlparse.urlparse(request_url) urlToSign = url.path + "?"+ url.query decodedKey = base64.urlsafe_b64decode(privateKey) signature = hmac.new(decodedKey, urlToSign, hashlib.sha1) encodedSignature = base64.urlsafe_b64encode(signature.digest()) originalUrl = url.scheme + "://" + url.netloc + url.path + "?" + url.query geocode_url = originalUrl + "&signature=" + encodedSignature print geocode_url #### Send request start = time.time() response = urllib2.urlopen(geocode_url) output = response.read() #### Read and parse json output. data = json.loads(output) status = data["status"] print status if status == "OVER_QUERY_LIMIT": x = rows print "over the query limit" elif status == "OK": long = data["results"][0]["geometry"]["location"]["lng"] lat = data["results"][0]["geometry"]["location"]["lat"] precision = data["results"][0]["geometry"]["location_type"] if long < -106 or long > -104 or lat > 41 or lat < 38: update_query = "UPDATE %(table)s SET lat = -2, precision = '%(prec)s' WHERE %(oid_name)s = '%(objectid)s'" % {'objectid' : objectid_field, 'prec': precision, 'table' : table, 'oid_name' : oid_name} db.query(update_query) print "out of region" else: update_query = "UPDATE %(table)s SET long = '%(long)s', lat = '%(lat)s', precision = '%(prec)s' WHERE %(oid_name)s = '%(objectid)s'" % {'long' : long, 'lat' : lat, 'prec': precision, 'objectid' : objectid_field, 'table' : table, 'oid_name' : oid_name} db.query(update_query) print precision elif status != "OK": #### Update the data update_query = "UPDATE %(table)s SET lat = -1 WHERE %(oid_name)s = '%(objectid)s'" % {'objectid' : objectid_field, 'table' : table, 'oid_name' : oid_name} db.query(update_query) else: pass x = x+1 class Geocode_Intersection: def intersection(self, table, address_row_1, address_row_2, city_row, ojectid_row, oid_name): #### Selecting data from the geocode table selectallSql = "SELECT * FROM %(table)s;" % {'table' : table} ## add where clause variable sel = db.query(selectallSql) result = sel.getresult() rows = len(result) #### Set url variables base_url = "http://maps.google.com/maps/api/geocode/json?" privateKey = "0bTz96TLfiOmPIwGLc0SgFl2ZBQ=" client_id = "gme-denverregionalcouncil" x = 0 while x < rows: print " " row = result[x] address_1 = str(row[address_row_1]) address_2 = str(row[address_row_2]) city_field = str(row[city_row]) ## zip_field = str(row[zip_row]) objectid_field = row[ojectid_row] values = { 'sensor' : 'false','address' :address_1+' & '+address_2+' '+city_field+', CO ', 'client':client_id} data = urllib.urlencode(values) print values #### build request url request_url = base_url+data url = urlparse.urlparse(request_url) urlToSign = url.path + "?"+ url.query decodedKey = base64.urlsafe_b64decode(privateKey) signature = hmac.new(decodedKey, urlToSign, hashlib.sha1) encodedSignature = base64.urlsafe_b64encode(signature.digest()) originalUrl = url.scheme + "://" + url.netloc + url.path + "?" + url.query geocode_url = originalUrl + "&signature=" + encodedSignature print geocode_url #### Send request start = time.time() response = urllib2.urlopen(geocode_url) output = response.read() #### Read and parse json output. data = json.loads(output) status = data["status"] print status if status == "OVER_QUERY_LIMIT": x = rows print "over the query limit" elif status == "OK": long = data["results"][0]["geometry"]["location"]["lng"] lat = data["results"][0]["geometry"]["location"]["lat"] precision = data["results"][0]["geometry"]["location_type"] if long < -106 or long > -104 or lat > 41 or lat < 38: update_query = "UPDATE %(table)s SET lat = -2, precision = '%(prec)s' WHERE %(oid_name)s = '%(objectid)s'" % {'objectid' : objectid_field, 'prec': precision, 'table' : table, 'oid_name' : oid_name} db.query(update_query) print "out of region" else: update_query = "UPDATE %(table)s SET long = '%(long)s', lat = '%(lat)s', precision = '%(prec)s' WHERE %(oid_name)s = '%(objectid)s'" % {'long' : long, 'lat' : lat, 'prec': precision, 'objectid' : objectid_field, 'table' : table, 'oid_name' : oid_name} db.query(update_query) print precision elif status != "OK": #### Update the data update_query = "UPDATE %(table)s SET lat = -1 WHERE %(oid_name)s = '%(objectid)s'" % {'objectid' : objectid_field, 'table' : table, 'oid_name' : oid_name} db.query(update_query) else: pass x = x+1 class Build_Geometry: def populate_geom(self, table): print "Populating the geom_4326 column..." popGeom = "UPDATE %(table)s SET geom_4326 = PointFromText('POINT(' || long|| ' ' || lat|| ')', 4326) WHERE lat > 0" % {'table' : table} db.query(popGeom) print "Populating the geom_2232 column..." transformGeom = "UPDATE %(table)s SET geom_2232 = (SELECT ST_Transform(geom_4326, 2232))" % {'table' : table} db.query(transformGeom) def drop_geom_4326(self, table): print "Dropping the geom_4326 column." popGeom = """ALTER TABLE %(table)s DROP COLUMN geom_4326 DELETE FROM geometry_columns WHERE f_table_name = '%(table)s' AND f_geometry_column = 'geom_4326';""" % {'table' : table} db.query(popGeom) def pop_geom_table(self, table): print "Populating the geometry_columns table..." popGeomTable = "SELECT Populate_Geometry_Columns('public.%(table)s'::regclass);" % {'table' : table} db.query(popGeomTable) if __name__ == '__main__': #DB connection properties db = pg.connect(dbname = 'geocode', host= 'postgresql', port= 5432, user = 'USER',passwd= 'PASS') #### Set this variable to the name of the table to be geocoded. table = 'geocode_test' tablePrep = TablePrep() tablePrep.check_columns() #### FOR GEOCODING STANDARD ADDRESS DATA #### REQUIRES: single address field, city, zip, objectid #### geocode_single_address_field(table name, address row #, city row #, zip row #, ojectid row #) geocode = Geocode_Address() geocode.geocode_single_address_field(table, 6, 2, 7, 0, 'gid') #### FOR GEOCODING INTERSECTION DATA #### REQUIRES: 1 field for each intersecting road, city, objectid #### intersection(table name, address row 1 #, address row 2 #, city row #, ojectid row #) ## geocode = Geocode_Intersection() ## geocode.intersection(table, 0, 1, 2, 22, 'gid') build_geom = Build_Geometry() build_geom.populate_geom(table) build_geom.drop_geom_4326(table) build_geom.pop_geom_table(table)