Created
February 21, 2012 19:41
-
-
Save sergray/1878413 to your computer and use it in GitHub Desktop.
Revisions
-
sergray revised this gist
Feb 29, 2012 . 1 changed file with 6 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -6,6 +6,7 @@ TODO: pass collection and database with profiling data in arguments TODO: make thread-safe TODO: handle map-reduce operations """ from collections import defaultdict @@ -105,7 +106,11 @@ def analyze_profiling_data(): """Process all records in profiling collection and gather statistics""" prof_col = get_profile_collection() for rec in prof_col.find(): try: col, qry_fields = extract_collection_query(rec) except: # quick workaround, needs better handling continue _update_stats(col, qry_fields, rec) -
sergray created this gist
Feb 21, 2012 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,114 @@ """ Script for automated analysis of profiling data in MongoDB, gathered by Mongo with db.setProfilingLevel(1). See <http://www.mongodb.org/display/DOCS/Database+Profiler> TODO: pass collection and database with profiling data in arguments TODO: make thread-safe """ from collections import defaultdict MONGO_DB = 'test' PROFILE_COLLECTION = 'system.profile' # default name of collection with profiling data # global mapping of (collection, query_fields) to their statistics data QSTATS = defaultdict(lambda: { 'count': 0, 'millis_sum': 0, 'millis_min': None, 'millis_max': None, 'nscanned_sum': 0, 'nscanned_min': None, 'nscanned_max': None }) def get_profile_collection(): """Return mongo collection containing profiling records""" from pymongo import Connection con = Connection() db = con[MONGO_DB] col = db[PROFILE_COLLECTION] return col def extract_collection_query(prof_rec): """Returns tuple of collection name and list of query fields""" ns = prof_rec[u'ns'] if ns.endswith(u'$cmd'): cmd_info = prof_rec[u'command'] qry_fields = extract_fields(cmd_info.pop(u'query', {})) fields = cmd_info.pop(u'fields') command, collection = cmd_info.popitem() else: collection = ns.rsplit(u'.').pop() query = prof_rec[u'query'] if u'$query' in query: qry_fields = extract_fields(query[u'$query']) else: qry_fields = extract_fields(query) if u'$orderby' in query: ord_fields = [f + [u'$orderby'] for f in extract_fields(query[u'$orderby'])] qry_fields.extend(ord_fields) return (collection, [u'.'.join(f) for f in qry_fields]) def extract_fields(query, parent_fields=None): """Recursively descend query prototype and return list of field names""" fields = [] if not parent_fields: parent_fields = [] field_path = lambda k: '.'.join(parent_fields + [k]) for k,v in query.items(): if isinstance(v, dict): fields.extend(extract_fields(v, parent_fields + [k])) else: fields.append(parent_fields + [k]) return fields def _update_stats(col, qry_fields, prof_rec): stat_key = (col, tuple(qry_fields)) stats = QSTATS[stat_key] stats['count'] += 1 millis = prof_rec.get(u'millis') if millis: stats['millis_sum'] += millis if stats['millis_min'] is None or stats['millis_min'] > millis: stats['millis_min'] = millis if stats['millis_max'] is None or stats['millis_max'] < millis: stats['millis_max'] = millis nscanned = prof_rec.get(u'nscanned') if nscanned: stats['nscanned_sum'] += nscanned if stats['nscanned_min'] is None or stats['nscanned_min'] > nscanned: stats['nscanned_min'] = nscanned if stats['nscanned_max'] is None or stats['nscanned_max'] < nscanned: stats['nscanned_max'] = nscanned def show_stats(): for (col, fields), stats in QSTATS.items(): print col, fields, info = stats.copy() if info['count']: if info['millis_sum'] is not None: info['avg_millis'] = info['millis_sum'] / info['count'] else: info['avg_millis'] = None if info['nscanned_sum'] is not None: info['avg_nscanned'] = info['nscanned_sum'] / info['count'] else: info['avg_nscanned'] = None print "count=%(count)d avg_millis=%(avg_millis)r avg_nscanned=%(avg_nscanned)r" % info def analyze_profiling_data(): """Process all records in profiling collection and gather statistics""" prof_col = get_profile_collection() for rec in prof_col.find(): col, qry_fields = extract_collection_query(rec) _update_stats(col, qry_fields, rec) if __name__ == '__main__': analyze_profiling_data() show_stats()