Skip to content

Instantly share code, notes, and snippets.

@jsvcycling
Created December 13, 2018 15:33
Show Gist options
  • Save jsvcycling/ade874a02a848885f00a11d4bbf5272c to your computer and use it in GitHub Desktop.
Save jsvcycling/ade874a02a848885f00a11d4bbf5272c to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import pandas as pd
import sys
from datetime import date, datetime, timedelta
from sqlalchemy import create_engine
from sqlalchemy.sql import text
BASE_URL = "http://.../raw_data/dl.php?f={}"
SQL_INSERT = text("INSERT INTO ...")
if len(sys.argv) != 3:
exit(1)
start_date = datetime.strptime(sys.argv[1], '%Y%m%d').date()
end_date = datetime.strptime(sys.argv[2], '%Y%m%d').date()
curr_date = start_date
engine = create_engine('mysql+mysqlconnector://...')
while curr_date <= end_date:
url = BASE_URL.format(curr_date.strftime('%Y%m%d'))
print(url)
df = pd.read_csv(url, compression='zip', header=0, parse_dates=['date'])
# Clean up the data...
df.to_sql(name='...', con=engine, if_exists='append', index=False, chunksize=100)
curr_date = curr_date + timedelta(days=1)
del df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment