""" Script for automated analysis of profiling data in MongoDB, gathered by Mongo with db.setProfilingLevel(1). See TODO: pass collection and database with profiling data in arguments TODO: make thread-safe """ from collections import defaultdict MONGO_DB = 'test' PROFILE_COLLECTION = 'system.profile' # default name of collection with profiling data # global mapping of (collection, query_fields) to their statistics data QSTATS = defaultdict(lambda: { 'count': 0, 'millis_sum': 0, 'millis_min': None, 'millis_max': None, 'nscanned_sum': 0, 'nscanned_min': None, 'nscanned_max': None }) def get_profile_collection(): """Return mongo collection containing profiling records""" from pymongo import Connection con = Connection() db = con[MONGO_DB] col = db[PROFILE_COLLECTION] return col def extract_collection_query(prof_rec): """Returns tuple of collection name and list of query fields""" ns = prof_rec[u'ns'] if ns.endswith(u'$cmd'): cmd_info = prof_rec[u'command'] qry_fields = extract_fields(cmd_info.pop(u'query', {})) fields = cmd_info.pop(u'fields') command, collection = cmd_info.popitem() else: collection = ns.rsplit(u'.').pop() query = prof_rec[u'query'] if u'$query' in query: qry_fields = extract_fields(query[u'$query']) else: qry_fields = extract_fields(query) if u'$orderby' in query: ord_fields = [f + [u'$orderby'] for f in extract_fields(query[u'$orderby'])] qry_fields.extend(ord_fields) return (collection, [u'.'.join(f) for f in qry_fields]) def extract_fields(query, parent_fields=None): """Recursively descend query prototype and return list of field names""" fields = [] if not parent_fields: parent_fields = [] field_path = lambda k: '.'.join(parent_fields + [k]) for k,v in query.items(): if isinstance(v, dict): fields.extend(extract_fields(v, parent_fields + [k])) else: fields.append(parent_fields + [k]) return fields def _update_stats(col, qry_fields, prof_rec): stat_key = (col, tuple(qry_fields)) stats = QSTATS[stat_key] stats['count'] += 1 millis = prof_rec.get(u'millis') if millis: stats['millis_sum'] += millis if stats['millis_min'] is None or stats['millis_min'] > millis: stats['millis_min'] = millis if stats['millis_max'] is None or stats['millis_max'] < millis: stats['millis_max'] = millis nscanned = prof_rec.get(u'nscanned') if nscanned: stats['nscanned_sum'] += nscanned if stats['nscanned_min'] is None or stats['nscanned_min'] > nscanned: stats['nscanned_min'] = nscanned if stats['nscanned_max'] is None or stats['nscanned_max'] < nscanned: stats['nscanned_max'] = nscanned def show_stats(): for (col, fields), stats in QSTATS.items(): print col, fields, info = stats.copy() if info['count']: if info['millis_sum'] is not None: info['avg_millis'] = info['millis_sum'] / info['count'] else: info['avg_millis'] = None if info['nscanned_sum'] is not None: info['avg_nscanned'] = info['nscanned_sum'] / info['count'] else: info['avg_nscanned'] = None print "count=%(count)d avg_millis=%(avg_millis)r avg_nscanned=%(avg_nscanned)r" % info def analyze_profiling_data(): """Process all records in profiling collection and gather statistics""" prof_col = get_profile_collection() for rec in prof_col.find(): col, qry_fields = extract_collection_query(rec) _update_stats(col, qry_fields, rec) if __name__ == '__main__': analyze_profiling_data() show_stats()