from scipy.stats.distributions import binom data = [ {'name': 'ISTJ', 'count': 12, 'expected': 0.1160}, {'name': 'ISFJ', 'count': 4, 'expected': 0.1380}, {'name': 'INFJ', 'count': 14, 'expected': 0.0150}, {'name': 'INTJ', 'count': 206, 'expected': 0.0210}, {'name': 'ISTP', 'count': 16, 'expected': 0.0540}, {'name': 'ISFP', 'count': 1, 'expected': 0.0880}, {'name': 'INFP', 'count': 56, 'expected': 0.0430}, {'name': 'INTP', 'count': 191, 'expected': 0.0430}, {'name': 'ESTP', 'count': 3, 'expected': 0.0430}, {'name': 'ESFP', 'count': 1, 'expected': 0.0850}, {'name': 'ENFP', 'count': 22, 'expected': 0.0810}, {'name': 'ENTP', 'count': 56, 'expected': 0.0330}, {'name': 'ESTJ', 'count': 9, 'expected': 0.0870}, {'name': 'ESFJ', 'count': 5, 'expected': 0.1230}, {'name': 'ENFJ', 'count': 15, 'expected': 0.0240}, {'name': 'ENTJ', 'count': 49, 'expected': 0.0180}, ] n = sum(group['count'] for group in data) CONFIDENCE_INTERVAL = 0.99 def is_significant(prob): return prob < (1.0 - CONFIDENCE_INTERVAL) print " Using a binomial test at a %.0f%c confidence level." % (100*CONFIDENCE_INTERVAL, '%') print '' print 'Single-group results:' for group in data: prob = binom.pmf(group['count'], n, group['expected']) freq = 100.0 * float(group['count']) / float(n) if is_significant(prob): sig = 'significant: ' else: sig = 'NOT significant:' print (" %s is %s %5.2f%c (%3d/%d) vs. %5.2f%c expected." % (group['name'], sig, freq, '%', group['count'], n, (100.0 * group['expected']), '%')) print '' print 'Pair-wise results:' try: from itertools import combinations except ImportError: def combinations(iterable, r): # combinations('ABCD', 2) --> AB AC AD BC BD CD # combinations(range(4), 3) --> 012 013 023 123 pool = tuple(iterable) n = len(pool) if r > n: return indices = range(r) yield tuple(pool[i] for i in indices) while True: for i in reversed(range(r)): if indices[i] != i + n - r: break else: return indices[i] += 1 for j in range(i+1, r): indices[j] = indices[j-1] + 1 yield tuple(pool[i] for i in indices) pairs = {} for pair in combinations(range(4), 2): firstindex = pair[0] secondindex = pair[1] for group in data: pairname = ''.join([group['name'][firstindex], group['name'][secondindex]]) if not pairname in pairs: pairs[pairname] = {'actual': 0, 'expected': 0} pairs[pairname]['actual'] += group['count'] pairs[pairname]['expected'] += group['expected'] for pairname, pair in pairs.iteritems(): prob = binom.pmf(pair['actual'], n, pair['expected']) freq = 100.0 * float(pair['actual']) / float(n) if is_significant(prob): sig = 'significant: ' else: sig = 'NOT significant:' print (" %s is %s %5.2f%c (%3d/%d) vs. %5.2f%c expected." % (pairname, sig, freq, '%', pair['actual'], n, (100.0 * pair['expected']), '%'))