import random import statsmodels import pmap groups = ["control", "treatment"] POPULATION = 1e6 SAMPLE = 1e4 TRIES_PER_PERSON = 3 def population(): h = {} for i in range(int(POPULATION)): v = random.random() h[i] = (v) return h def predetermined_group(id): return groups[id % 2] def justintime_group(id): return groups[int(random.random() < 0.5)] def trial(_pop, fn_group): _sample = random.sample(list(_pop.items()), int(SAMPLE)) count = {'control': 0, 'treatment': 0} nobs = {'control': 0, 'treatment': 0} for (person_id, feature) in _sample: group = fn_group(person_id) for _ in range(TRIES_PER_PERSON): nobs[group] += 1 if feature > 0.9: count[group] += 1 return {"count": count, "nobs": nobs} from statsmodels.stats.proportion import proportions_ztest def pval(x): counts = [x["count"]["control"], x["count"]["treatment"]] nobs = [x["nobs"]["control"], x["nobs"]["treatment"]] stat, pval = proportions_ztest(counts, nobs) return pval import sys import pprint def pval_trials(): _pop = population() lst = [] for _ in range(100): pvals = (pval(trial(_pop, predetermined_group)), pval(trial(_pop, justintime_group))) lst.append(pvals) return lst def main(): pvals = pval_trials() counts = list((int(a < 0.05), int(b < 0.05)) for a, b in pvals) print(sum(a for (a, b) in counts), sum(b for (a, b) in counts)) sys.stdout.flush() for _ in range(100): main()