# -*-coding:utf-8-*- from etl_utils import process_notifier from collections import Counter, defaultdict import ujson null = None def char_count(func): result = defaultdict(int) root = "/home/primary_user/tmp/" f1 = root + "redmine10050_final_merge_range.json" # 139.5 MB # f1 = root + "en_exam_20150429.json" # 1.1 GB # f1 = root + "redmine9523_final_report_four_weeks_before_range.json/16GB.txt" for line in process_notifier(file(f1)): if isinstance(line, str): line = line.decode("UTF-8") d1 = line d1 = Counter(line.split(" ")) # d1 = dict(Counter(list(line))) # d1 = func(d1) # benchmark serialize and deserialize for k2, v2 in d1.iteritems(): result[k2] += v2 def python_func(d1): return eval(repr(d1)) # return eval((d1)) def ujson_func(d1): return ujson.loads(ujson.dumps(d1)) # return ujson.loads(d1) print "** python_func" char_count(python_func) print "** ujson_func" char_count(ujson_func) # 139.5 MB # Python 3.3 MB/s # ujson 3.7 MB/s # 1.2 GB # Python 1.5 MB/s """ ####### decode (ENV)[primary_user@BJ-NAMENODE-145 benchmark_mr_internal_data_interchange]$ python local_benchmark.py ** python_func {pid: 19344, file: "/home/primary_user/tmp/redmine10050_final_merge_range.json", size: 139.5 MB}100% |##################################################################| 7.5 MB/s ** ujson_func {pid: 19344, file: "/home/primary_user/tmp/redmine10050_final_merge_range.json", size: 139.5 MB}100% |#################################################################| 70.1 MB/s ####### word + decode + encode (ENV)[primary_user@BJ-NAMENODE-145 benchmark_mr_internal_data_interchange]$ python local_benchmark.py ** python_func {pid: 23318, file: "/home/primary_user/tmp/redmine10050_final_merge_range.json", size: 139.5 MB}100% |##################################################################| 9.6 MB/s ** {pid: 23318, file: "/home/primary_user/tmp/redmine10050_final_merge_range.json", size: 139.5 MB}100% |#################################################################| 19.1 MB/s ####### char + decode + encode (ENV)[primary_user@BJ-NAMENODE-145 benchmark_mr_internal_data_interchange]$ python local_benchmark.py ** python_func {pid: 25309, file: "/home/primary_user/tmp/redmine10050_final_merge_range.json", size: 139.5 MB}100% |##################################################################| 3.5 MB/s ** ujson_func {pid: 25309, file: "/home/primary_user/tmp/redmine10050_final_merge_range.json", size: 139.5 MB}100% |##################################################################| 3.7 MB/s ####### char {pid:5204, file:/home/primary_user/tmp/redmine10050_final_merge_range.json, size:139.5 MB}100% |########################################################################| 3.8 MB/s ####### word {pid:5846, file:/home/primary_user/tmp/redmine10050_final_merge_range.json, size:139.5 MB}100% |#######################################################################| 21.8 MB/s """