|
|
@@ -0,0 +1,114 @@ |
|
|
""" |
|
|
Script for automated analysis of profiling data in MongoDB, |
|
|
gathered by Mongo with db.setProfilingLevel(1). |
|
|
|
|
|
See <http://www.mongodb.org/display/DOCS/Database+Profiler> |
|
|
|
|
|
TODO: pass collection and database with profiling data in arguments |
|
|
TODO: make thread-safe |
|
|
""" |
|
|
from collections import defaultdict |
|
|
|
|
|
|
|
|
MONGO_DB = 'test' |
|
|
PROFILE_COLLECTION = 'system.profile' # default name of collection with profiling data |
|
|
|
|
|
# global mapping of (collection, query_fields) to their statistics data |
|
|
QSTATS = defaultdict(lambda: { |
|
|
'count': 0, 'millis_sum': 0, 'millis_min': None, 'millis_max': None, |
|
|
'nscanned_sum': 0, 'nscanned_min': None, 'nscanned_max': None |
|
|
}) |
|
|
|
|
|
|
|
|
def get_profile_collection(): |
|
|
"""Return mongo collection containing profiling records""" |
|
|
from pymongo import Connection |
|
|
|
|
|
con = Connection() |
|
|
db = con[MONGO_DB] |
|
|
col = db[PROFILE_COLLECTION] |
|
|
return col |
|
|
|
|
|
|
|
|
def extract_collection_query(prof_rec): |
|
|
"""Returns tuple of collection name and list of query fields""" |
|
|
ns = prof_rec[u'ns'] |
|
|
if ns.endswith(u'$cmd'): |
|
|
cmd_info = prof_rec[u'command'] |
|
|
qry_fields = extract_fields(cmd_info.pop(u'query', {})) |
|
|
fields = cmd_info.pop(u'fields') |
|
|
command, collection = cmd_info.popitem() |
|
|
else: |
|
|
collection = ns.rsplit(u'.').pop() |
|
|
query = prof_rec[u'query'] |
|
|
if u'$query' in query: |
|
|
qry_fields = extract_fields(query[u'$query']) |
|
|
else: |
|
|
qry_fields = extract_fields(query) |
|
|
if u'$orderby' in query: |
|
|
ord_fields = [f + [u'$orderby'] for f in extract_fields(query[u'$orderby'])] |
|
|
qry_fields.extend(ord_fields) |
|
|
return (collection, [u'.'.join(f) for f in qry_fields]) |
|
|
|
|
|
|
|
|
def extract_fields(query, parent_fields=None): |
|
|
"""Recursively descend query prototype and return list of field names""" |
|
|
fields = [] |
|
|
if not parent_fields: |
|
|
parent_fields = [] |
|
|
field_path = lambda k: '.'.join(parent_fields + [k]) |
|
|
for k,v in query.items(): |
|
|
if isinstance(v, dict): |
|
|
fields.extend(extract_fields(v, parent_fields + [k])) |
|
|
else: |
|
|
fields.append(parent_fields + [k]) |
|
|
return fields |
|
|
|
|
|
|
|
|
def _update_stats(col, qry_fields, prof_rec): |
|
|
stat_key = (col, tuple(qry_fields)) |
|
|
stats = QSTATS[stat_key] |
|
|
stats['count'] += 1 |
|
|
millis = prof_rec.get(u'millis') |
|
|
if millis: |
|
|
stats['millis_sum'] += millis |
|
|
if stats['millis_min'] is None or stats['millis_min'] > millis: |
|
|
stats['millis_min'] = millis |
|
|
if stats['millis_max'] is None or stats['millis_max'] < millis: |
|
|
stats['millis_max'] = millis |
|
|
nscanned = prof_rec.get(u'nscanned') |
|
|
if nscanned: |
|
|
stats['nscanned_sum'] += nscanned |
|
|
if stats['nscanned_min'] is None or stats['nscanned_min'] > nscanned: |
|
|
stats['nscanned_min'] = nscanned |
|
|
if stats['nscanned_max'] is None or stats['nscanned_max'] < nscanned: |
|
|
stats['nscanned_max'] = nscanned |
|
|
|
|
|
|
|
|
def show_stats(): |
|
|
for (col, fields), stats in QSTATS.items(): |
|
|
print col, fields, |
|
|
info = stats.copy() |
|
|
if info['count']: |
|
|
if info['millis_sum'] is not None: |
|
|
info['avg_millis'] = info['millis_sum'] / info['count'] |
|
|
else: |
|
|
info['avg_millis'] = None |
|
|
if info['nscanned_sum'] is not None: |
|
|
info['avg_nscanned'] = info['nscanned_sum'] / info['count'] |
|
|
else: |
|
|
info['avg_nscanned'] = None |
|
|
print "count=%(count)d avg_millis=%(avg_millis)r avg_nscanned=%(avg_nscanned)r" % info |
|
|
|
|
|
|
|
|
def analyze_profiling_data(): |
|
|
"""Process all records in profiling collection and gather statistics""" |
|
|
prof_col = get_profile_collection() |
|
|
for rec in prof_col.find(): |
|
|
col, qry_fields = extract_collection_query(rec) |
|
|
_update_stats(col, qry_fields, rec) |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
analyze_profiling_data() |
|
|
show_stats() |