import datetime import hashlib import time import pandas as pd def time_with_hash(desc, f, baseline=None): start = time.time() output = f() timing = time.time() - start if baseline is None: print '%30s: %s - %.3fs' % ( desc, hashlib.md5(output.to_json()).hexdigest(), timing ) else: print '%30s: %s - %.3fs (%.2fx baseline)' % ( desc, hashlib.md5(output.to_json()).hexdigest(), timing, baseline / timing ) return timing test_formats = [ '%m-%d-%Y', '%m/%d/%Y %H:%M:%S.%f', '%Y-%m-%dT%H:%M:%S.%f', ] print 'Timing to_datetime():' for test_format in test_formats: s = ( pd .Series(pd.date_range('20000101', periods=50000, freq='H')) .apply(lambda x: x.strftime(test_format)) ) print 'Datetime format:', test_format print '---------------' baseline = time_with_hash('Without infer_format', lambda: pd.to_datetime(s)) time_with_hash( 'With infer_format', lambda: pd.to_datetime(s, infer_format=True), baseline=baseline ) time_with_hash( 'Passing the format', lambda: pd.to_datetime(s, format=test_format), baseline=baseline ) print print print 'Testing reading CSV:' for test_format in test_formats: s = ( pd .Series(pd.date_range('20000101', periods=50000, freq='H')) .apply(lambda x: x.strftime(test_format)) ) s.to_csv('/tmp/test.csv') def date_parser(s): return datetime.datetime.strptime(s, test_format) print 'Datetime format:', test_format print '---------------' baseline = time_with_hash('Without infer_format', lambda: pd.read_csv( '/tmp/test.csv', parse_dates=[1,], ) ) time_with_hash('With infer_format', lambda: pd.read_csv( '/tmp/test.csv', parse_dates=[1,], infer_datetime_format=True ), baseline=baseline ) time_with_hash('With strptime date_parser', lambda: pd.read_csv( '/tmp/test.csv', parse_dates=[1,], date_parser=date_parser, ), baseline=baseline ) print