#!/usr/bin/python # -*- coding: utf-8 -*- import sqlite3 import re import sys, csv import datetime, time db_file = "ports.db" #vslRecID_n,vsl_m,vslTrgtPosnLat_q,vslTrgtPosnLong_q,vslCourse_q,vslTy_c,vslLen_q,vslGT_q,vslBre_q,vslSpeed_q, timeStamp_dt #52589,, 1.308571696, 103.7263107, 0, TU, 54, 1373, 0, 0, Mar 31 2014 12:01:46:270AM #68346,, 1.289833188, 104.0870514, 84.72840881,TU, 19, 89, 0, 2.988201141,Mar 31 2014 12:14:02:826AM structure_ais = dict( table="AIS", skip_first=True, ignore_zero_vid=True, fields=[ "vslRecID=vid INT", # - vessel ID. 0 means we're missing the vessel receiver ID "vsl_m=0 CHAR(1)", # - vessel manager but unused "vslTrgtPosnLat_q=lat REAL", # - vessel latitude "vslTrgtPosnLong_q=lon REAL", # - vessel longitude "vslCourse_q=course REAL", # - vessel direction "vslTy_c=country CHAR(2)", # - vessel country of registration "vslLen_q=l INT", # - vessel length "vslGT_q=gt INT", # - gross tonnage (e.g. 6000 means volume in cubic feet) "vslBre_q=beam INT", # - vessel beam "vslSpeed_q=speed REAL", # - vessel speed in knots at the time of measurement "timeStamp_dt=ts TS_AIS" # - timestamp when this record was made ], structure_csv=[], fields_sql=[], create_sql="", ) class CSVtoSQLite(): #db_create_real = re.sub(r"\#.*?\n", "\n", """ DB_HINTS_PARSE = r"^(.*?)\=(.*?)\s+(.*)" def __init__(self, db_structure): sql_create=[] sql_fields=[] sql_places=[] structure=[] for c in db_structure['fields']: m = re.match(self.DB_HINTS_PARSE, c) if m is not None: (csv_field, sql_field, typ) = (m.group(1), m.group(2), m.group(3) ) structure.append( dict(csv = csv_field, sql = sql_field, typ = typ) ) if sql_field == "0": continue if typ == "TS_AIS": typ = "INT" # Conversion will happen below print "%s - %s - %s" % (csv_field, sql_field, typ) sql_create.append("%s %s" % (sql_field, typ)) sql_fields.append(sql_field) sql_places.append('?') db_structure['structure_csv'] = structure db_structure['create_sql'] = ("CREATE TABLE %s (" % (db_structure['table'])) + ", \n".join(sql_create) + (");") db_structure['fields_sql'] = sql_fields db_structure['places_sql'] = sql_places self.db_structure = db_structure def create_if_not_there(self): r = connection.execute("SELECT * FROM sqlite_master WHERE type = 'table' AND name = ?", (self.db_structure['table'],)) if r.fetchone() is None: connection.execute(self.db_structure['create_sql']) def load_csv(self, filename): #reader = csv.DictReader(open(filename)) reader = csv.reader(open(filename)) if self.db_structure['skip_first']: throwaway = reader.next() def return_place(field): return '?' cols = ','.join(self.db_structure['fields_sql']) places = ','.join(self.db_structure['places_sql']) insert = "INSERT INTO %s (%s) VALUES (%s)" % (self.db_structure['table'], cols, places) #print insert for row in reader: entry = [] skip_row = False for (c,v) in zip(self.db_structure['structure_csv'], row): if c['sql'] == "0": continue if c['sql'] == "vid" and v=="0" and self.db_structure['ignore_zero_vid']: skip_row=True # https://docs.python.org/2/library/datetime.html#strftime-strptime-behavior if c['typ'] == "TS_AIS": #print "DATE:" + v ts = datetime.datetime.strptime(v, "%b %d %Y %I:%M:%S:%f%p") # 'Apr 30 2014 12:00:42:240AM' #v = "datetime(%s, 'unixepoch')" % (time.mktime(ts.timetuple()),) v = time.mktime(ts.timetuple()) entry.append(v) if not skip_row: #print insert #print entry connection.execute(insert, entry) if reader.line_num % 10000 == 0 : print "%30s : %6dk" % (filename, reader.line_num/1000) connection.commit() # http://www.tutorialspoint.com/sqlite/sqlite_python.htm connection = sqlite3.connect(db_file) dataset = '' if len(sys.argv)<2: print """Usage: python csv_to_sqlite.py {ais} Layout: ./db/csv_to_sqlite.py # This script ./db/ports.db # The sqlite database populated ./data/AIS/YYYY-MM/*.csv # AIS data """ else: dataset = sys.argv[1] if dataset.lower() == 'ais': ais=CSVtoSQLite(structure_ais) ais.create_if_not_there() #ais.load_csv("../data/AIS/2014-05/PTMS_curTrgtP3History_01-May-2014.csv") #ais.load_csv("../data/AIS/2014-05/PTMS_curTrgtP3History_02-May-2014.csv") ais.load_csv("../data/AIS/2014-05/PTMS_curTrgtP3History_03-May-2014.csv") connection.close() #system("""echo -e ".mode csv\n.import ../data/AIS/2014-05/PTMS_curTrgtP3History_01-May-2014.csv AIS_RAW" | sqlite3 ports.db""") #system("""echo -e ".import ../data/AIS/2014-05/PTMS_curTrgtP3History_01-May-2014.csv AIS_RAW" | sqlite3 -csv ports.db""") """ ## https://gist.github.com/rgrp/5199059 # ID INT PRIMARY KEY NOT NULL, # NAME TEXT NOT NULL, # AGE INT NOT NULL, # ADDRESS CHAR(50), # SALARY REAL ## http://www.tutorialspoint.com/sqlite/sqlite_indexes.htm #CREATE INDEX index_name #on table_name (column1, column2); """