Skip to content

Instantly share code, notes, and snippets.

@OlafHaag
Last active October 1, 2020 01:17
Show Gist options
  • Select an option

  • Save OlafHaag/2e36125a3d34571284705d624e81b64f to your computer and use it in GitHub Desktop.

Select an option

Save OlafHaag/2e36125a3d34571284705d624e81b64f to your computer and use it in GitHub Desktop.

Revisions

  1. OlafHaag revised this gist Oct 1, 2020. 2 changed files with 45 additions and 128 deletions.
    86 changes: 22 additions & 64 deletions PyMC3_HLM.py
    Original file line number Diff line number Diff line change
    @@ -30,86 +30,44 @@
    #################################
    # %%
    n_trials = 30 # by block.
    n_users = 30
    n_obs = 3 * n_trials * n_users

    user0 = pd.DataFrame({'user': 0,
    'condition': 'A',
    'experience': 0.0,
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 5.5, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 5.5, size=3*n_trials)})
    df = pd.DataFrame({'user': np.random.randint(n_users, size=n_obs),
    'block': 1 + np.random.randint(3, size=n_obs),
    'parallel': np.random.normal(0.0, 1, size=n_obs),
    'orthogonal': np.random.normal(0.0, 1, size=n_obs)})

    user1 = pd.DataFrame({'user': 1,
    'condition': 'A',
    'experience': np.nan,
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 4.5, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 5.5, size=3*n_trials)})

    # %%
    # Create some data conforming to main effect of projection model: variance in parallel > orthogonal.
    user2 = pd.DataFrame({'user': 2,
    'condition': 'B',
    'experience': 4.0,
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 7.0, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 3.3, size=3*n_trials)})

    user3 = pd.DataFrame({'user': 3,
    'condition': 'B',
    'experience': 3.0,
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 8.0, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 4.5, size=3*n_trials)})

    user4 = pd.DataFrame({'user': 4,
    'condition': 'B',
    'experience': 3.0,
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 5.0, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 1.5, size=3*n_trials)})

    # %%
    # Create some data conforming to interaction effect version 1:
    # blocks 1&3 have smallest orthogonal deviations, but largest parallel deviations. Strong synergy.
    user5 = pd.DataFrame({'user': 5,
    'condition': 'C',
    'experience': 0.0,
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 7.0, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 3.0, size=3*n_trials)})
    # Block 2 has smaller parallel deviations than blocks 1&3, but higher orthogonal deviations than blocks 1&3.
    # In this case sigma_block2_parallel - sigma_block2_orthogonal = 0
    # No synergy.
    user5.loc[user5['block'] == 2, ['parallel', 'orthogonal']] = np.random.normal(0.0, 5.0, size=((user5['block']==2).sum(), 2))

    user6 = pd.DataFrame({'user': 6,
    'experience': 4.0,
    'condition': 'C',
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 6.5, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 2.5, size=3*n_trials)})
    user6.loc[user6['block'] == 2, ['parallel', 'orthogonal']] = np.random.normal(0.0, 4.5, size=((user6['block']==2).sum(), 2))
    # %%
    df = pd.concat((user0, user1, user2, user3, user4, user5, user6), axis='index')
    df.sort_values(by=['user', 'block'], inplace=True)
    df.reset_index(drop=True, inplace=True)
    # Make sure mean of each block is zero as with real data.
    df[['parallel', 'orthogonal']] = df.groupby(['user', 'block'])[['parallel', 'orthogonal']].transform(lambda x: x - x.mean())

    # %%
    # Assign each user to a condition.
    df['condition'] = df['user'].map(pd.Series(index=df['user'].unique(), data=np.random.choice(['A', 'B', 'C'], size=df['user'].nunique())))
    # Experience for each user.
    df['experience'] = df['user'].map(pd.Series(index=df['user'].unique(), data=np.random.choice([np.nan, 0, 1, 2, 3, 4], size=df['user'].nunique())))
    # Give each block its own ID.
    df['block_id'] = pd.factorize(df['user'].astype(str) + df['block'].astype(str))[0]
    # Trials.
    df['trial'] = df.groupby(['block_id'])['block'].transform(lambda x: np.arange(len(x)))
    df = df[df['trial'] < n_trials]
    # Categorical columns.
    df[['user', 'condition', 'block_id', 'block']] = df[['user', 'condition', 'block_id', 'block']].astype('category')
    # Give each block_id a rating.
    df['rating'] = df['block_id'].map(pd.Series(index=df['block_id'].unique(), data=np.random.randint(5, size=df['block_id'].cat.categories.size)))
    df[['parallel', 'orthogonal']] = df[['parallel', 'orthogonal']].astype(theano.config.floatX)

    # %%
    # Thin out rows to simulate missing data.
    # Thin out rows to simulate more missing data.
    mask = np.random.choice([*[True]*4, False], size=df.shape[0])
    df = df[mask]
    n_obs = len(df)
    # Make sure mean of each block is zero as with real data.
    df[['parallel', 'orthogonal']] = df.groupby(['user', 'block'])[['parallel', 'orthogonal']].transform(lambda x: x - x.mean())

    # Scale block deviations.
    scale_para = (9-3) * np.random.random_sample(df['block_id'].nunique()) + 3
    scale_ortho = (7-1.5) * np.random.random_sample(df['block_id'].nunique()) + 1.5
    scale = np.stack((scale_para, scale_ortho)).T
    df[['parallel', 'orthogonal']] = df.groupby(['block_id'])['parallel', 'orthogonal'].apply(lambda x: x * scale[x.name])

    # %%
    # Reshape data into long format.
    87 changes: 23 additions & 64 deletions PyMC3_model_sandbox.py
    Original file line number Diff line number Diff line change
    @@ -35,85 +35,44 @@
    ### Dummy Data ###
    ##################
    n_trials = 30 # by block.
    n_users = 30
    n_obs = 3 * n_trials * n_users

    df = pd.DataFrame({'user': np.random.randint(n_users, size=n_obs),
    'block': 1 + np.random.randint(3, size=n_obs),
    'parallel': np.random.normal(0.0, 1, size=n_obs),
    'orthogonal': np.random.normal(0.0, 1, size=n_obs)})

    user0 = pd.DataFrame({'user': 0,
    'condition': 'A',
    'experience': 0.0,
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 5.5, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 5.5, size=3*n_trials)})

    user1 = pd.DataFrame({'user': 1,
    'condition': 'A',
    'experience': np.nan,
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 4.5, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 5.5, size=3*n_trials)})

    # Create some data conforming to main effect of projection model: variance in parallel > orthogonal.
    user2 = pd.DataFrame({'user': 2,
    'condition': 'B',
    'experience': 4.0,
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 7.0, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 3.3, size=3*n_trials)})

    user3 = pd.DataFrame({'user': 3,
    'condition': 'B',
    'experience': 3.0,
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 8.0, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 4.5, size=3*n_trials)})

    user4 = pd.DataFrame({'user': 4,
    'condition': 'B',
    'experience': 3.0,
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 5.0, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 1.5, size=3*n_trials)})

    # Create some data conforming to interaction effect version 1:
    # blocks 1&3 have smallest orthogonal deviations, but largest parallel deviations. Strong synergy.
    user5 = pd.DataFrame({'user': 5,
    'condition': 'C',
    'experience': 0.0,
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 7.0, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 3.0, size=3*n_trials)})
    # Block 2 has smaller parallel deviations than blocks 1&3, but higher orthogonal deviations than blocks 1&3.
    # In this case sigma_block2_parallel - sigma_block2_orthogonal = 0
    # No synergy.
    user5.loc[user5['block'] == 2, ['parallel', 'orthogonal']] = np.random.normal(0.0, 5.0, size=((user5['block']==2).sum(), 2))

    user6 = pd.DataFrame({'user': 6,
    'experience': 4.0,
    'condition': 'C',
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 6.5, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 2.5, size=3*n_trials)})
    user6.loc[user6['block'] == 2, ['parallel', 'orthogonal']] = np.random.normal(0.0, 4.5, size=((user6['block']==2).sum(), 2))
    # %%
    # Merge data from fictional users.
    df = pd.concat((user0, user1, user2, user3, user4, user5, user6), axis='index')
    df.sort_values(by=['user', 'block'], inplace=True)
    df.reset_index(drop=True, inplace=True)
    # Make sure mean of each block is zero as with real data.
    df[['parallel', 'orthogonal']] = df.groupby(['user', 'block'])[['parallel', 'orthogonal']].transform(lambda x: x - x.mean())
    df[['parallel', 'orthogonal']] = df[['parallel', 'orthogonal']].astype(theano.config.floatX)

    # Assign each user to a condition.
    df['condition'] = df['user'].map(pd.Series(index=df['user'].unique(), data=np.random.choice(['A', 'B', 'C'], size=df['user'].nunique())))
    # Experience for each user.
    df['experience'] = df['user'].map(pd.Series(index=df['user'].unique(), data=np.random.choice([np.nan, 0, 1, 2, 3, 4], size=df['user'].nunique())))
    # Give each block its own ID.
    df['block_id'] = pd.factorize(df['user'].astype(str) + df['block'].astype(str))[0]
    # Trials.
    df['trial'] = df.groupby(['block_id'])['block'].transform(lambda x: np.arange(len(x)))
    df = df[df['trial'] < n_trials]
    # Categorical columns.
    df[['user', 'condition', 'block_id', 'block']] = df[['user', 'condition', 'block_id', 'block']].astype('category')
    # Give each block_id a rating.
    df['rating'] = df['block_id'].map(pd.Series(index=df['block_id'].unique(), data=np.random.randint(5, size=df['block_id'].cat.categories.size)))
    df[['parallel', 'orthogonal']] = df[['parallel', 'orthogonal']].astype(theano.config.floatX)

    # %%
    # Thin out rows to simulate missing data.
    # Thin out rows to simulate more missing data.
    mask = np.random.choice([*[True]*4, False], size=df.shape[0])
    df = df[mask]
    n_obs = len(df)
    # Make sure mean of each block is zero as with real data.
    df[['parallel', 'orthogonal']] = df.groupby(['user', 'block'])[['parallel', 'orthogonal']].transform(lambda x: x - x.mean())

    # Scale block deviations.
    scale_para = (9-3) * np.random.random_sample(df['block_id'].nunique()) + 3
    scale_ortho = (7-1.5) * np.random.random_sample(df['block_id'].nunique()) + 1.5
    scale = np.stack((scale_para, scale_ortho)).T
    df[['parallel', 'orthogonal']] = df.groupby(['block_id'])['parallel', 'orthogonal'].apply(lambda x: x * scale[x.name])

    # %%
    ### Coordinates ###
  2. OlafHaag revised this gist Sep 29, 2020. 1 changed file with 457 additions and 44 deletions.
    501 changes: 457 additions & 44 deletions PyMC3_model_sandbox.py
    Original file line number Diff line number Diff line change
    @@ -15,20 +15,25 @@
    Users rate the difficulty of each block they performed.
    """
    # %%
    ### Imports ###
    import arviz as az
    import matplotlib.pyplot as plt
    import numpy as np
    import pandas as pd
    from patsy import dmatrices
    import patsy
    import pymc3 as pm
    import seaborn as sns
    import theano
    import theano.tensor as tt

    RANDOM_SEED = 2048
    np.random.seed(386)

    az.style.use('arviz-darkgrid')

    #################################
    # Dummy Data #
    #################################
    # %%
    ### Dummy Data ###
    ##################
    n_trials = 30 # by block.

    user0 = pd.DataFrame({'user': 0,
    @@ -45,7 +50,6 @@
    'parallel': np.random.normal(0.0, 4.5, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 5.5, size=3*n_trials)})

    # %%
    # Create some data conforming to main effect of projection model: variance in parallel > orthogonal.
    user2 = pd.DataFrame({'user': 2,
    'condition': 'B',
    @@ -68,7 +72,6 @@
    'parallel': np.random.normal(0.0, 5.0, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 1.5, size=3*n_trials)})

    # %%
    # Create some data conforming to interaction effect version 1:
    # blocks 1&3 have smallest orthogonal deviations, but largest parallel deviations. Strong synergy.
    user5 = pd.DataFrame({'user': 5,
    @@ -90,13 +93,14 @@
    'orthogonal': np.random.normal(0.0, 2.5, size=3*n_trials)})
    user6.loc[user6['block'] == 2, ['parallel', 'orthogonal']] = np.random.normal(0.0, 4.5, size=((user6['block']==2).sum(), 2))
    # %%
    # Merge data from fictional users.
    df = pd.concat((user0, user1, user2, user3, user4, user5, user6), axis='index')
    df.sort_values(by=['user', 'block'], inplace=True)
    df.reset_index(drop=True, inplace=True)
    # Make sure mean of each block is zero as with real data.
    df[['parallel', 'orthogonal']] = df.groupby(['user', 'block'])[['parallel', 'orthogonal']].transform(lambda x: x - x.mean())
    df[['parallel', 'orthogonal']] = df[['parallel', 'orthogonal']].astype(theano.config.floatX)

    # %%
    # Give each block its own ID.
    df['block_id'] = pd.factorize(df['user'].astype(str) + df['block'].astype(str))[0]
    # Trials.
    @@ -112,9 +116,8 @@
    df = df[mask]

    # %%
    #################################
    # Coordinates #
    #################################
    ### Coordinates ###
    ###################
    conditions = df['condition'].cat.categories.tolist()
    n_conditions = df['condition'].nunique()
    condition_indices = df['condition'].cat.codes.values # Condition index for every single value.
    @@ -155,66 +158,165 @@
    'obs_id': np.arange(len(df))}

    # %%
    #################################
    # Null Model #
    #################################
    # Reshape data into long format.
    null_data = df.melt(id_vars=['user', 'block'], value_vars=['parallel', 'orthogonal'],
    var_name='direction', value_name='projection')
    ### Design Matrices ###
    #######################
    # For each block create an on/off [0, 1] switch for all trials.
    block_mx = patsy.dmatrix('0 + block', df, return_type='dataframe').astype(int)
    block_mx.columns = blocks

    # %%
    ### Helper functions ###
    def plot_ppc_loopit(idata, title):
    fig = plt.figure(figsize=(12,9))
    ax_ppc = fig.add_subplot(211)
    ax1 = fig.add_subplot(223); ax2 = fig.add_subplot(224)
    az.plot_ppc(idata, ax=ax_ppc)
    for ax, ecdf in zip([ax1, ax2], (False, True)):
    az.plot_loo_pit(idata, y="projection", ecdf=ecdf, ax=ax)
    ax_ppc.set_title(title)
    ax_ppc.set_xlabel("")
    return np.array([ax_ppc, ax1, ax2])


    def print_divergencies(trace):
    # Display the total number and percentage of divergencies.
    divergent = trace['diverging']
    print(f"Number of Divergencies: {divergent.nonzero()[0].size}")
    divperc = divergent.nonzero()[0].size / len(trace) * 100
    print(f"Percentage of Divergencies: {divperc:.1f}")



    # %%
    ### Null Model ###
    ##################
    with pm.Model(coords=coords) as null_model:
    # Wrapping the index in a Data object keeps it in the model
    # and stores the variables in the constant_data group of an InferenceData object.
    user_idx = pm.Data('user_idx', user_indices, dims='obs_id')

    # Hyperpriors:
    global_sigma = pm.Gamma('global_sigma', mu=5, sigma=5)
    global_sigma_sd = pm.HalfCauchy('global_sigma_sd', 4.0)
    # Varying user variability.
    # Varying user variability. How many priors of this kind we want is inferred from dims.
    user_sd = pm.Gamma('user_sd', mu=global_sigma, sigma=global_sigma_sd, dims='User')

    # Our coordinates aren't in long format, so we need to repeat each user to cover both directions.
    # Our coordinates aren't in long format, so we need to have the same prior 2 times to cover both directions.
    theta = tt.stack((user_sd, user_sd)).T
    projection = pm.Normal('projection', mu=0, sigma=theta[user_idx], observed=df[directions], dims='obs_id')
    # Using user_idx to index theta somehow tells which prior belongs to which user.
    projection = pm.Normal('projection', mu=0, sigma=theta[user_idx], observed=df[directions], dims=('obs_id', 'Direction'))

    # %%
    # Show model graph.
    pm.model_to_graphviz(null_model)
    # %%
    # Prior predictive check.
    with null_model:
    null_prior = pm.sample_prior_predictive(700)
    null_idata = az.from_pymc3(prior=null_prior)

    az.plot_ppc(null_idata, group="prior")

    # %%
    # Sample from posterior.
    with null_model:
    null_model.name = "Null Model"
    trace0 = pm.sample(1000, tune=800)
    null_trace = pm.sample(1000, tune=800)
    # The dimensions and coordinates are lost during sampling.
    # pm.MultiTrace objects do not store the labeled coordinates of their variables.
    # But we can infer the coordinates from the model.
    idata_aux = az.from_pymc3(null_trace, model=null_model)
    # Add the posterior to our prior information.
    null_idata.extend(idata_aux)
    az.plot_posterior(null_idata, var_names=['global'], filter_vars='like')
    # %%
    az.plot_pair(null_idata, var_names=['global'], filter_vars='like')

    # %%
    az.summary(trace0)
    az.summary(null_idata)
    # %%
    az.plot_trace(trace0, compact=True, chain_prop={"ls": "-"})
    az.plot_trace(null_idata, compact=True, chain_prop={"ls": "-"})

    # %%
    az.loo(trace0)
    # Posterior predictive check.
    with null_model:
    null_post_pred = pm.sample_posterior_predictive(null_idata)
    idata_aux = az.from_pymc3(posterior_predictive=null_post_pred)
    null_idata.extend(idata_aux)

    # %%
    #########################################################
    # Main Effect Projection Direction Model (unrestricted) #
    #########################################################
    with pm.Model(coords=coords) as dir_model_u:
    az.loo(null_idata)
    # %%
    plot_ppc_loopit(null_idata, null_model.name)

    # %%
    # Main Effect Projection Direction Model (undirected) #
    #######################################################
    with pm.Model(coords=coords) as dir_model:
    user_idx = pm.Data('user_idx', user_indices, dims='obs_id')

    # Hyperpriors:
    global_sigma = pm.Gamma('global_sigma', mu=5, sigma=5, dims='Direction')
    global_sigma_sd = pm.HalfCauchy('global_sigma_sd', 4.0, dims='Direction')
    # Varying user variability.
    # Centered paramterization.
    user_sd = pm.Gamma('user_sd', mu=global_sigma, sigma=global_sigma_sd, dims=('User', 'Direction'))

    # Non-centered reparameterization.
    #user_sd_offset = pm.Exponential('user_sd_offset', 1.0, dims=('User', 'Direction'))
    #user_sd = pm.Deterministic('user_sd', global_sigma + user_sd_offset * global_sigma_sd, dims=('User', 'Direction'))

    projection = pm.Normal('projection', mu=0, sigma=user_sd[user_idx], observed=df[directions], dims=('obs_id', 'Direction'))

    # %%
    # Show model graph.
    pm.model_to_graphviz(dir_model)
    # %%
    pm.model_to_graphviz(dir_model_u)
    # Prior predictive check.
    with dir_model:
    dir_prior = pm.sample_prior_predictive(700)
    dir_idata = az.from_pymc3(prior=dir_prior)

    #az.plot_ppc(dir_idata, group="prior")

    # %%
    with dir_model_u:
    dir_model_u.name = "Direction Model"
    trace1_u = pm.sample(1000, tune=800)
    # Sample from posterior.
    with dir_model:
    dir_model.name = "Main Effect Direction Model"
    dir_trace = pm.sample(1000, tune=1200)#, target_accept=0.95)
    # The dimensions and coordinates are lost during sampling.
    # pm.MultiTrace objects do not store the labeled coordinates of their variables.
    # But we can infer the coordinates from the model.
    idata_aux = az.from_pymc3(dir_trace, model=dir_model)
    # Add the posterior to our prior information.
    dir_idata.extend(idata_aux)
    print_divergencies(dir_trace)

    # %%
    #######################################################
    # Main Effect Projection Direction Model (restricted) #
    #######################################################
    with pm.Model() as dir_model_r:
    az.plot_posterior(dir_idata, var_names=['global_sigma'])
    # %%
    az.plot_pair(dir_idata, var_names=['global'], filter_vars='like', divergences=True)

    # %%
    az.summary(dir_idata)
    # %%
    az.plot_trace(dir_idata, compact=True, chain_prop={"ls": "-"})

    # %%
    # Posterior predictive check.
    with dir_model:
    dir_post_pred = pm.sample_posterior_predictive(dir_idata)
    idata_aux = az.from_pymc3(posterior_predictive=dir_post_pred)
    dir_idata.extend(idata_aux)

    # %%
    az.loo(dir_idata)
    # %%
    plot_ppc_loopit(dir_idata, dir_model.name)

    # %%
    # Main Effect Projection Direction Model (directed) #
    #####################################################
    with pm.Model(coords=coords) as dir_model_r:
    user_idx = pm.Data('user_idx', user_indices, dims='obs_id')
    # Hyperpriors.
    global_sigma_ortho = pm.Gamma("global_sigma_ortho", mu=3, sigma=5)
    @@ -240,16 +342,327 @@
    pm.model_to_graphviz(dir_model_r)

    # %%
    # Check priors.
    with dir_model_r:
    prior_checks = pm.sample_prior_predictive()
    proj_idata_prior1_r = az.from_pymc3(prior=prior_checks)
    # Prior predictive check.
    with dir_model:
    dir_prior_r = pm.sample_prior_predictive(700)
    dir_idata_r = az.from_pymc3(prior=dir_prior_r)

    # %%
    #az.plot_ppc(dir_idata, group="prior")
    _, ax = plt.subplots()
    proj_idata_prior1_r.prior.plot.scatter(x='global_sigma_ortho', y='global_sigma_parallel', color='k', alpha=0.2, ax=ax)
    dir_idata_r.prior.plot.scatter(x='global_sigma_ortho', y='global_sigma_parallel', color='k', alpha=0.2, ax=ax)

    # %%
    # Sample from posterior.
    with dir_model_r:
    dir_model_r.name = "Main Effect Direction Model"
    dir_trace_r = pm.sample(1000, tune=1200)#, target_accept=0.95)
    # The dimensions and coordinates are lost during sampling.
    # pm.MultiTrace objects do not store the labeled coordinates of their variables.
    # But we can infer the coordinates from the model.
    idata_aux = az.from_pymc3(dir_trace_r, model=dir_model_r)
    # Add the posterior to our prior information.
    dir_idata_r.extend(idata_aux)
    print_divergencies(dir_trace_r)

    # %%
    az.plot_posterior(dir_idata_r, var_names=['global_sigma'])
    # %%
    az.plot_pair(dir_idata_r, var_names=['global'], filter_vars='like', divergences=True)

    # %%
    az.summary(dir_idata_r)
    # %%
    az.plot_trace(dir_idata_r, compact=True, chain_prop={"ls": "-"})

    # %%
    # Posterior predictive check.
    with dir_model_r:
    dir_model_r.name = "Main Effect Direction"
    trace1_r = pm.sample(1000, tune=1000)
    az.summary(trace1_r)
    dir_post_pred_r = pm.sample_posterior_predictive(dir_idata_r)
    idata_aux = az.from_pymc3(posterior_predictive=dir_post_pred_r)
    dir_idata_r.extend(idata_aux)

    # %%
    az.loo(dir_idata_r)
    # %%
    plot_ppc_loopit(dir_idata_r, dir_model_r.name)

    # %%
    # Main Effect Block Model (undirected) #
    ########################################
    with pm.Model(coords=coords) as block_model:
    user_idx = pm.Data('user_idx', user_indices, dims='obs_id')
    block_idx = pm.Data('block_idx', block_indices, dims='obs_id')
    # Hyperpriors.
    global_sigma = pm.Gamma("global_sigma", mu=5, sigma=5, dims='Block')
    global_sigma_sd = pm.HalfCauchy('global_sigma_sd', 4.0, dims='Block')

    # Priors.
    user_sd = pm.Gamma('user_sd', mu=global_sigma, sigma=global_sigma_sd, dims=('User', 'Block'))

    # Expected deviation per user and direction:
    theta = tt.stack((user_sd[user_idx, block_idx], user_sd[user_idx, block_idx])).T
    # Observed.
    projection = pm.Normal('projection', 0.0, sigma=theta, observed=df[directions], dims=('obs_id', 'Direction'))

    # %%
    pm.model_to_graphviz(block_model)

    # %%
    # Prior predictive check.
    with block_model:
    block_prior = pm.sample_prior_predictive(700)
    block_idata = az.from_pymc3(prior=block_prior)

    #az.plot_ppc(block_idata, group="prior")

    # %%
    # Sample from posterior.
    with block_model:
    block_model.name = "Main Effect Block Model"
    block_trace = pm.sample(1000, tune=1200, target_accept=0.95)
    # The dimensions and coordinates are lost during sampling.
    # pm.MultiTrace objects do not store the labeled coordinates of their variables.
    # But we can infer the coordinates from the model.
    idata_aux = az.from_pymc3(block_trace, model=block_model)
    # Add the posterior to our prior information.
    block_idata.extend(idata_aux)
    print_divergencies(block_trace)

    # %%
    az.plot_posterior(block_idata, var_names=['global_sigma'])
    # %%
    az.plot_pair(block_idata, var_names=['global'], filter_vars='like', divergences=True)

    # %%
    az.summary(block_idata)
    # %%
    az.plot_trace(block_idata, compact=True, chain_prop={"ls": "-"})

    # %%
    # Posterior predictive check.
    with block_model:
    block_post_pred = pm.sample_posterior_predictive(block_idata)
    idata_aux = az.from_pymc3(posterior_predictive=block_post_pred)
    block_idata.extend(idata_aux)

    # %%
    az.loo(block_idata)
    # %%
    plot_ppc_loopit(block_idata, block_model.name)

    # %%
    # Main Effect Block Model (directed) #
    ######################################
    with pm.Model(coords=coords) as block_model_r:
    user_idx = pm.Data('user_idx', user_indices, dims='obs_id')
    block2_idx = pm.Data('block2_idx', block_mx[1].values, dims='obs_id')

    # Hyperpriors.
    global_sigma_b13 = pm.Gamma("global_sigma_blocks13", mu=5, sigma=5)
    global_sigma_b13_sd = pm.HalfCauchy('global_sigma_blocks13_sd', 4.0)
    global_sigma_diff = pm.Gamma("global_sigma_diff", mu=5, sigma=5)
    global_sigma_diff_sd = pm.HalfCauchy('global_sigma_diff_sd', 4.0)
    # The sum of two gamma-distributed variables is gamma-distributed.
    global_sigma_b2 = pm.Deterministic("global_sigma_block2", global_sigma_b13 + global_sigma_diff)

    # Priors.
    user_sd_b13 = pm.Gamma('user_sd_blocks13', mu=global_sigma_b13, sigma=global_sigma_b13_sd, dims='User')
    user_sd_diff = pm.Gamma('user_sd_diff', mu=global_sigma_diff, sigma=global_sigma_diff_sd, dims='User')
    user_sd_b2 = pm.Deterministic("user_sd_block2", user_sd_b13 + user_sd_diff, dims='User')

    # Expected deviation per user and direction:
    theta_ = (1 - block2_idx) * user_sd_b13[user_idx] + block2_idx * user_sd_b2[user_idx]
    theta = tt.stack((theta_, theta_)).T
    # Observed.
    projection = pm.Normal('projection', 0.0, sigma=theta, observed=df[directions], dims=('obs_id', 'Direction'))

    # %%
    pm.model_to_graphviz(block_model_r)

    # %%
    # Prior predictive check.
    with block_model_r:
    block_prior_r = pm.sample_prior_predictive(700)
    block_idata_r = az.from_pymc3(prior=block_prior_r)

    #az.plot_ppc(block_idata, group="prior")

    # %%
    # Sample from posterior.
    with block_model_r:
    block_model_r.name = "Main Effect Block 2>[1&3] Model"
    block_trace_r = pm.sample(1000, tune=1200, target_accept=0.95)
    # The dimensions and coordinates are lost during sampling.
    # pm.MultiTrace objects do not store the labeled coordinates of their variables.
    # But we can infer the coordinates from the model.
    idata_aux = az.from_pymc3(block_trace_r, model=block_model_r)
    # Add the posterior to our prior information.
    block_idata_r.extend(idata_aux)
    print_divergencies(block_trace_r)

    # %%
    az.plot_posterior(block_idata_r, var_names=['global_sigma'])
    # %%
    az.plot_pair(block_idata_r, var_names=['global'], filter_vars='like', divergences=True)

    # %%
    az.summary(block_idata_r)
    # %%
    az.plot_trace(block_idata_r, compact=True, chain_prop={"ls": "-"})

    # %%
    # Posterior predictive check.
    with block_model_r:
    block_post_pred_r = pm.sample_posterior_predictive(block_idata_r)
    idata_aux = az.from_pymc3(posterior_predictive=block_post_pred_r)
    block_idata_r.extend(idata_aux)

    # %%
    az.loo(block_idata_r)
    # %%
    plot_ppc_loopit(block_idata_r, block_model_r.name)

    # %%
    # Interaction Effect Projection Direction x Block Model (undirected) #
    ######################################################################
    with pm.Model(coords=coords) as interaction_model:
    user_idx = pm.Data('user_idx', user_indices, dims='obs_id')
    block_idx = pm.Data('block_idx', block_indices, dims='obs_id')

    # Hyperpriors:
    global_sigma = pm.Gamma('global_sigma', mu=5, sigma=5, dims=('Direction', 'Block'))
    global_sigma_sd = pm.HalfCauchy('global_sigma_sd', 4.0, dims=('Direction', 'Block'))
    # Varying user variability.
    # Centered paramterization.
    user_sd = pm.Gamma('user_sd', mu=global_sigma, sigma=global_sigma_sd, dims=('User', 'Direction', 'Block'))
    # Non-centered reparameterization.
    #user_sd_offset = pm.Exponential('user_sd_offset', 1.0, dims=('User', 'Direction'))
    #user_sd = pm.Deterministic('user_sd', global_sigma + user_sd_offset * global_sigma_sd, dims=('User', 'Direction'))

    # Expected deviation per user and direction:
    theta = tt.stack((user_sd[user_idx, 0, block_idx], user_sd[user_idx, 1, block_idx])).T

    projection = pm.Normal('projection', mu=0, sigma=theta, observed=df[directions], dims=('obs_id', 'Direction'))

    # %%
    # Show model graph.
    pm.model_to_graphviz(interaction_model)

    # %%
    # Prior predictive check.
    with interaction_model:
    interaction_prior = pm.sample_prior_predictive(700)
    interaction_idata = az.from_pymc3(prior=interaction_prior)

    #az.plot_ppc(interaction_idata, group="prior")

    # %%
    # Sample from posterior.
    with interaction_model:
    interaction_model.name = "Interaction Direction x Block Model"
    interaction_trace = pm.sample(1000, tune=1200, target_accept=0.99)
    # The dimensions and coordinates are lost during sampling.
    # pm.MultiTrace objects do not store the labeled coordinates of their variables.
    # But we can infer the coordinates from the model.
    idata_aux = az.from_pymc3(interaction_trace, model=interaction_model)
    # Add the posterior to our prior information.
    interaction_idata.extend(idata_aux)
    print_divergencies(interaction_trace)

    # %%
    az.plot_posterior(interaction_idata, var_names=['global_sigma'])
    # %%
    az.plot_pair(interaction_idata, var_names=['global_sigma'], filter_vars=None, divergences=True)

    # %%
    az.summary(interaction_idata)
    # %%
    az.plot_trace(interaction_idata, compact=True, chain_prop={"ls": "-"})

    # %%
    # Posterior predictive check.
    with interaction_model:
    interaction_post_pred = pm.sample_posterior_predictive(interaction_idata)
    idata_aux = az.from_pymc3(posterior_predictive=interaction_post_pred)
    interaction_idata.extend(idata_aux)

    # %%
    az.loo(interaction_idata)
    # %%
    plot_ppc_loopit(interaction_idata, interaction_model.name)

    # %%
    # Hierarchical Model Blocks -> Direction -> User #
    ##################################################
    with pm.Model(coords=coords) as hierarchical_model:
    user_idx = pm.Data('user_idx', user_indices, dims='obs_id')
    block_idx = pm.Data('block_idx', block_indices, dims='obs_id')

    # Hyperpriors:
    global_sigma_block = pm.Gamma('global_sigma_block', mu=5, sigma=5, dims='Block')
    global_sigma_block_sd = pm.HalfCauchy('global_sigma_block_sd', 4.0, dims='Block')

    global_sigma_direction = pm.Gamma('global_sigma_dir', mu=global_sigma_block, sigma=global_sigma_block_sd, dims=('Direction', 'Block'))
    global_sigma_direction_sd = pm.HalfCauchy('global_sigma_dir_sd', 4.0, dims=('Direction', 'Block'))
    # Varying user variability.
    # Centered paramterization.
    user_sd = pm.Gamma('user_sd', mu=global_sigma_direction, sigma=global_sigma_direction_sd, dims=('User', 'Direction', 'Block'))

    # Expected deviation per user and direction:
    theta = tt.stack((user_sd[user_idx, 0, block_idx], user_sd[user_idx, 1, block_idx])).T

    projection = pm.Normal('projection', mu=0, sigma=theta, observed=df[directions], dims=('obs_id', 'Direction'))

    # %%
    # Show model graph.
    pm.model_to_graphviz(hierarchical_model)

    # %%
    # Prior predictive check.
    with hierarchical_model:
    hierarchical_prior = pm.sample_prior_predictive(700)
    hierarchical_idata = az.from_pymc3(prior=hierarchical_prior)

    # %%
    # Sample from posterior.
    with hierarchical_model:
    hierarchical_model.name = "Hierarchical Block x Direction Model"
    hierarchical_trace = pm.sample(1000, tune=1200, target_accept=0.99)
    # The dimensions and coordinates are lost during sampling.
    # pm.MultiTrace objects do not store the labeled coordinates of their variables.
    # But we can infer the coordinates from the model.
    idata_aux = az.from_pymc3(hierarchical_trace, model=hierarchical_model)
    # Add the posterior to our prior information.
    hierarchical_idata.extend(idata_aux)
    print_divergencies(hierarchical_trace)

    # %%
    az.plot_posterior(hierarchical_idata, var_names=['global_sigma_dir'], filter_vars=None)

    # %%
    # non-centered Hierarchical Model #
    # Blocks -> Direction -> User #
    ###################################
    with pm.Model(coords=coords) as hierarchical_model_nc:
    user_idx = pm.Data('user_idx', user_indices, dims='obs_id')
    block_idx = pm.Data('block_idx', block_indices, dims='obs_id')

    # Hyperpriors:
    global_sigma_block = pm.Gamma('global_sigma_block', mu=5, sigma=5, dims='Block')
    global_sigma_block_sd = pm.HalfCauchy('global_sigma_block_sd', 4.0, dims='Block')

    # A potentially negative offset is problematic if it pushes beyond 0.
    # A strictly positive offset is problematic, because it biases the estimate.
    sigma_direction_offset = pm.Exponential('sigma_dir_offset', 1.0, dims=('Direction', 'Block'))
    sigma_direction = pm.Deterministic('sigma_dir', global_sigma_block + sigma_direction_offset * global_sigma_block_sd, dims=('Direction', 'Block'))
    sigma_direction_sd = pm.HalfCauchy('sigma_dir_sd', 4.0, dims=('Direction', 'Block'))
    # Varying user variability.
    # Non-centered reparameterization.
    user_sd_offset = pm.Exponential('user_sd_offset', 1.0, dims=('User', 'Direction', 'Block'))
    user_sd = pm.Deterministic('user_sd', sigma_direction + user_sd_offset * sigma_direction_sd, dims=('User', 'Direction', 'Block'))

    # Expected deviation per user and direction:
    theta = tt.stack((user_sd[user_idx, 0, block_idx], user_sd[user_idx, 1, block_idx])).T

    projection = pm.Normal('projection', mu=0, sigma=theta, observed=df[directions], dims=('obs_id', 'Direction'))
  3. OlafHaag revised this gist Sep 26, 2020. 1 changed file with 64 additions and 15 deletions.
    79 changes: 64 additions & 15 deletions PyMC3_HLM.py
    Original file line number Diff line number Diff line change
    @@ -19,9 +19,10 @@
    import matplotlib.pyplot as plt
    import numpy as np
    import pandas as pd
    from patsy import dmatrices
    import patsy
    import pymc3 as pm
    import seaborn as sns
    import theano.tensor as tt


    #################################
    @@ -117,14 +118,18 @@
    var_name='direction', value_name='projection')
    samples['direction'] = samples['direction'].astype('category')

    # %%
    # I'm interested in the variance for each direction. The mean of the squared projection by block_id is that block's variance.
    samples['log_projection_sq'] = samples['projection'].transform('square').transform('log')

    # %%
    # Vizualize the data.
    g = sns.FacetGrid(samples, col='block', row='user', hue="direction")
    g.despine(offset=10)
    g = (g.map(sns.distplot, "projection").add_legend(title="Projection", loc='upper right'))
    g = (g.map(sns.distplot, "log_projection_sq").add_legend(title="Projection", loc='upper right'))
    for ax in g.axes.flat:
    ax.set_ylabel("Probability Density")
    ax.set_xlabel("Projection Length")
    ax.set_xlabel("log(Projection Length^2)")
    plt.tight_layout()
    plt.show()

    @@ -173,19 +178,62 @@
    # Multi-Level Model #
    #################################
    # %%
    # I'm interested in the variance for each direction. The mean of the squared projection by block_id is that block's variance.
    samples['projection_sq'] = samples['projection'].transform('square')
    # Not sure if this is right...
    outcome, predictors = dmatrices('projection_sq ~ condition * user * block * direction', samples)
    # Fixed effects. Commonly denoted as X.
    X = patsy.dmatrix('1 + block * direction', samples, return_type='dataframe')
    X = np.asarray(X)

    # %%
    # Design matrix for the random effects. Intercept and slope are modelled separately to have more control on the prior.
    Z_intercept = patsy.dmatrix('0 + user', data=samples, return_type='dataframe')
    Z_intercept = np.asarray(Z_intercept)

    Z_slope = patsy.dmatrix('0 + user:direction', data=samples, return_type='dataframe')
    Z_slope = np.asarray(Z_slope)

    Z = np.concatenate((Z_intercept, Z_slope), axis=1)

    print('Z_intercept has shape ({}, {})'.format(*Z_intercept.shape))
    print('Z_slope has shape ({}, {})'.format(*Z_slope.shape))
    print('Z has shape ({}, {})'.format(*Z.shape))

    # %%
    Y = np.asarray(samples['log_projection_sq'])

    # %%
    with pm.Model(coords=coords) as hlm_model:
    sigma = pm.HalfCauchy('sigma', 4.0)
    #
    # No clue how to use predictors and what to do with it!
    # Don't know how to use indexing either. Index what where?!
    #
    projection = pm.Normal('projection', 0.0, sigma=sigma, observed=samples['projection'], dims='obs_id')
    ## Fixed effect
    beta_X_intercept = pm.Normal('beta_X_ortho', mu=0, sd=10) # contrain it to positive values
    beta_X_slope_b2 = pm.Normal('beta_X_b2_ortho_offset', mu=0, sd=10)
    beta_X_slope_b3 = pm.Normal('beta_X_b3_ortho_offset', mu=0, sd=10)
    beta_X_slope_para = pm.Normal('beta_X_parallel_offset', mu=0, sd=10)
    beta_X_slope_b2_para = pm.Normal('beta_X_b2_parallel_offset', mu=0, sd=10)
    beta_X_slope_b3_para = pm.Normal('beta_X_b3_parallel_offset', mu=0, sd=10)
    beta_X = tt.stack([beta_X_intercept,
    beta_X_slope_b2,
    beta_X_slope_b3,
    beta_X_slope_para,
    beta_X_slope_b2_para,
    beta_X_slope_b3_para
    ])

    estimate_X = pm.math.dot(X, beta_X)

    ## Random effect
    # Non Centered version
    sigma_Z_intercept = pm.HalfNormal('sigma_Z_intercept', sd=10)
    gamma_Z_intercept_raw = pm.Normal('gamma_Z_intercept_raw', mu=0, sd=1, shape=Z_intercept.shape[1])
    gamma_Z_intercept = pm.Deterministic('gamma_Z_intercept', gamma_Z_intercept_raw * sigma_Z_intercept)

    sigma_Z_slope = pm.HalfNormal('sigma_Z_slope', sd=10)
    gamma_Z_slope_raw = pm.Normal('gamma_Z_slope_raw', mu=0, sd=1, shape=Z_slope.shape[1])
    gamma_Z_slope = pm.Deterministic('gamma_Z_slope', gamma_Z_slope_raw * sigma_Z_slope)

    estimate_Z = pm.math.dot(Z_intercept, gamma_Z_intercept) + pm.math.dot(Z_slope, gamma_Z_slope)

    ## likelihood
    mu_estimate = pm.Deterministic('mu_estimate', estimate_X + estimate_Z)
    sigma_unexplained = pm.HalfNormal('sigma_unexplained', sd=10) # unexplained variability
    y_likelihood = pm.Normal('y_likelihood', mu=mu_estimate, sd=sigma_unexplained, observed=Y)

    # %%
    pm.model_to_graphviz(hlm_model)
    @@ -196,6 +244,7 @@

    # %%
    az.summary(idata)

    # %%
    az.plot_trace(idata, compact=True, chain_prop={"ls": "-"})
    pm.traceplot(idata, varnames=['beta_X_ortho', 'beta_X_b2_ortho_offset', 'beta_X_b3_ortho_offset', 'beta_X_parallel_offset', 'beta_X_b2_parallel_offset', 'beta_X_b3_parallel_offset', 'gamma_Z_intercept', 'gamma_Z_slope', 'sigma_unexplained'])
    # %%
    pm.plot_posterior(idata, varnames=['beta_X_ortho', 'beta_X_b2_ortho_offset', 'beta_X_b3_ortho_offset', 'beta_X_parallel_offset', 'beta_X_b2_parallel_offset', 'beta_X_b3_parallel_offset', 'gamma_Z_intercept', 'gamma_Z_slope', 'sigma_unexplained'])
  4. OlafHaag revised this gist Sep 23, 2020. 1 changed file with 9 additions and 0 deletions.
    9 changes: 9 additions & 0 deletions PyMC3_model_sandbox.py
    Original file line number Diff line number Diff line change
    @@ -239,6 +239,15 @@
    # %%
    pm.model_to_graphviz(dir_model_r)

    # %%
    # Check priors.
    with dir_model_r:
    prior_checks = pm.sample_prior_predictive()
    proj_idata_prior1_r = az.from_pymc3(prior=prior_checks)

    _, ax = plt.subplots()
    proj_idata_prior1_r.prior.plot.scatter(x='global_sigma_ortho', y='global_sigma_parallel', color='k', alpha=0.2, ax=ax)

    # %%
    with dir_model_r:
    dir_model_r.name = "Main Effect Direction"
  5. OlafHaag revised this gist Sep 23, 2020. 1 changed file with 246 additions and 0 deletions.
    246 changes: 246 additions & 0 deletions PyMC3_model_sandbox.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,246 @@
    """
    Having a 3 x (3) Mixed Design: 3 conditions as BETWEEN factor, 3 blocks as WITHIN factor.
    There are multiple measurements in each block (trials).
    There are 2 dependent variables: parallel & orthogonal
    Since 'parallel' and 'orthogonal' measures have the same unit,
    it can be viewed as 1 outcome called 'projection' in 2 different directions.
    The interest here is in the VARIABILITY (e.g. standard deviation or variance) of the data
    for parallel & orthogonal outcomes for blocks 1, 2, and 3.
    The analysis should enable to make statements about whether conditions, blocks, direction
    have an effect on the outcome.
    Bonus:
    Each user has an individual level of experience in the task.
    Users rate the difficulty of each block they performed.
    """
    # %%
    import arviz as az
    import matplotlib.pyplot as plt
    import numpy as np
    import pandas as pd
    from patsy import dmatrices
    import pymc3 as pm
    import seaborn as sns
    import theano.tensor as tt


    #################################
    # Dummy Data #
    #################################
    # %%
    n_trials = 30 # by block.

    user0 = pd.DataFrame({'user': 0,
    'condition': 'A',
    'experience': 0.0,
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 5.5, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 5.5, size=3*n_trials)})

    user1 = pd.DataFrame({'user': 1,
    'condition': 'A',
    'experience': np.nan,
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 4.5, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 5.5, size=3*n_trials)})

    # %%
    # Create some data conforming to main effect of projection model: variance in parallel > orthogonal.
    user2 = pd.DataFrame({'user': 2,
    'condition': 'B',
    'experience': 4.0,
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 7.0, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 3.3, size=3*n_trials)})

    user3 = pd.DataFrame({'user': 3,
    'condition': 'B',
    'experience': 3.0,
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 8.0, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 4.5, size=3*n_trials)})

    user4 = pd.DataFrame({'user': 4,
    'condition': 'B',
    'experience': 3.0,
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 5.0, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 1.5, size=3*n_trials)})

    # %%
    # Create some data conforming to interaction effect version 1:
    # blocks 1&3 have smallest orthogonal deviations, but largest parallel deviations. Strong synergy.
    user5 = pd.DataFrame({'user': 5,
    'condition': 'C',
    'experience': 0.0,
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 7.0, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 3.0, size=3*n_trials)})
    # Block 2 has smaller parallel deviations than blocks 1&3, but higher orthogonal deviations than blocks 1&3.
    # In this case sigma_block2_parallel - sigma_block2_orthogonal = 0
    # No synergy.
    user5.loc[user5['block'] == 2, ['parallel', 'orthogonal']] = np.random.normal(0.0, 5.0, size=((user5['block']==2).sum(), 2))

    user6 = pd.DataFrame({'user': 6,
    'experience': 4.0,
    'condition': 'C',
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 6.5, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 2.5, size=3*n_trials)})
    user6.loc[user6['block'] == 2, ['parallel', 'orthogonal']] = np.random.normal(0.0, 4.5, size=((user6['block']==2).sum(), 2))
    # %%
    df = pd.concat((user0, user1, user2, user3, user4, user5, user6), axis='index')
    df.sort_values(by=['user', 'block'], inplace=True)
    df.reset_index(drop=True, inplace=True)
    # Make sure mean of each block is zero as with real data.
    df[['parallel', 'orthogonal']] = df.groupby(['user', 'block'])[['parallel', 'orthogonal']].transform(lambda x: x - x.mean())

    # %%
    # Give each block its own ID.
    df['block_id'] = pd.factorize(df['user'].astype(str) + df['block'].astype(str))[0]
    # Trials.
    df['trial'] = df.groupby(['block_id'])['block'].transform(lambda x: np.arange(len(x)))
    # Categorical columns.
    df[['user', 'condition', 'block_id', 'block']] = df[['user', 'condition', 'block_id', 'block']].astype('category')
    # Give each block_id a rating.
    df['rating'] = df['block_id'].map(pd.Series(index=df['block_id'].unique(), data=np.random.randint(5, size=df['block_id'].cat.categories.size)))

    # %%
    # Thin out rows to simulate missing data.
    mask = np.random.choice([*[True]*4, False], size=df.shape[0])
    df = df[mask]

    # %%
    #################################
    # Coordinates #
    #################################
    conditions = df['condition'].cat.categories.tolist()
    n_conditions = df['condition'].nunique()
    condition_indices = df['condition'].cat.codes.values # Condition index for every single value.
    condition_lookup = pd.Series(*pd.factorize(df['condition'].cat.categories), name='condition_idx_lookup')

    # We need to be able to index by user. But the user IDs of the sample don't start at 0. We create an index and a mapping between them.
    users = df['user'].cat.categories.tolist()
    n_users = df['user'].nunique()
    user_indices = df['user'].cat.codes.values # User index for every single value.
    user_lookup = pd.Series(*pd.factorize(df['user'].cat.categories), name='user_idx_lookup')
    # User level predictors. More options: age group, gender.
    # gaming_exp is not on an interval scale (no equidistance between items), but ordinal.
    experience = df.drop_duplicates(subset="user")['experience'] # Shape = user_indices

    # Block indices for 1,2,3.
    blocks = df['block'].cat.categories.tolist()
    n_blocks = df['block'].nunique() # Same as using cat.categories.size
    block_indices = df['block'].cat.codes.values # Block index for every single value.
    block_lookup = pd.Series(*pd.factorize(df['block'].cat.categories), name='block_idx_lookup')
    # Block ID indices. Shape = n_users * n_blocks.
    block_ids = df['block_id'].cat.categories.tolist()
    n_block_ids = df['block_id'].nunique()
    block_id_indices = df['block_id'].cat.codes.values # Block index for every single value.
    block_id_lookup = pd.Series(*pd.factorize(df['block_id'].cat.categories), name='block_id_idx_lookup')
    # Block level predictors.
    # Note: Ratings are correlated with block index.
    ratings = df.drop_duplicates(subset=["user", 'block'])['rating'] # Shape = block_id_indices

    # Direction of projection.
    directions = ['orthogonal', 'parallel']
    n_directions = 2

    coords = {'Direction': directions,
    'Condition': conditions,
    'User' :users,
    'Block': blocks,
    'Block_ID': block_ids,
    'obs_id': np.arange(len(df))}

    # %%
    #################################
    # Null Model #
    #################################
    # Reshape data into long format.
    null_data = df.melt(id_vars=['user', 'block'], value_vars=['parallel', 'orthogonal'],
    var_name='direction', value_name='projection')
    # %%
    with pm.Model(coords=coords) as null_model:
    user_idx = pm.Data('user_idx', user_indices, dims='obs_id')

    # Hyperpriors:
    global_sigma = pm.Gamma('global_sigma', mu=5, sigma=5)
    global_sigma_sd = pm.HalfCauchy('global_sigma_sd', 4.0)
    # Varying user variability.
    user_sd = pm.Gamma('user_sd', mu=global_sigma, sigma=global_sigma_sd, dims='User')

    # Our coordinates aren't in long format, so we need to repeat each user to cover both directions.
    theta = tt.stack((user_sd, user_sd)).T
    projection = pm.Normal('projection', mu=0, sigma=theta[user_idx], observed=df[directions], dims='obs_id')

    # %%
    pm.model_to_graphviz(null_model)

    # %%
    with null_model:
    null_model.name = "Null Model"
    trace0 = pm.sample(1000, tune=800)
    # %%
    az.summary(trace0)
    # %%
    az.plot_trace(trace0, compact=True, chain_prop={"ls": "-"})
    # %%
    az.loo(trace0)

    # %%
    #########################################################
    # Main Effect Projection Direction Model (unrestricted) #
    #########################################################
    with pm.Model(coords=coords) as dir_model_u:
    user_idx = pm.Data('user_idx', user_indices, dims='obs_id')

    # Hyperpriors:
    global_sigma = pm.Gamma('global_sigma', mu=5, sigma=5, dims='Direction')
    global_sigma_sd = pm.HalfCauchy('global_sigma_sd', 4.0, dims='Direction')
    # Varying user variability.
    user_sd = pm.Gamma('user_sd', mu=global_sigma, sigma=global_sigma_sd, dims=('User', 'Direction'))

    projection = pm.Normal('projection', mu=0, sigma=user_sd[user_idx], observed=df[directions], dims=('obs_id', 'Direction'))
    # %%
    pm.model_to_graphviz(dir_model_u)
    # %%
    with dir_model_u:
    dir_model_u.name = "Direction Model"
    trace1_u = pm.sample(1000, tune=800)

    # %%
    #######################################################
    # Main Effect Projection Direction Model (restricted) #
    #######################################################
    with pm.Model() as dir_model_r:
    user_idx = pm.Data('user_idx', user_indices, dims='obs_id')
    # Hyperpriors.
    global_sigma_ortho = pm.Gamma("global_sigma_ortho", mu=3, sigma=5)
    global_sigma_ortho_sd = pm.Exponential('global_sigma_ortho_sd', 0.5)
    # Difference between orthohonal and parallel projections.
    global_sigma_diff = pm.Gamma('sigma_diff', mu=3, sigma=5)
    global_sigma_diff_sd = pm.Exponential('global_diff_sd', 1.0)
    # The sum of two gamma-distributed variables is gamma-distributed.
    global_sigma_parallel = pm.Deterministic("global_sigma_parallel", global_sigma_ortho + global_sigma_diff)

    # Priors.
    user_ortho = pm.Gamma('user_ortho', mu=global_sigma_ortho, sigma=global_sigma_ortho_sd, dims='User')
    # Varying slopes:
    user_diff = pm.Gamma('user_diff', mu=global_sigma_diff, sigma=global_sigma_diff_sd, dims='User')
    user_parallel = pm.Deterministic('user_parallel', user_ortho + user_diff, dims='User')

    # Expected deviation per user and direction:
    theta = tt.stack((user_ortho, user_parallel)).T
    # Observed.
    projection = pm.Normal('projection', 0.0, sigma=theta[user_idx], observed=df[directions], dims=('obs_id', 'Direction'))

    # %%
    pm.model_to_graphviz(dir_model_r)

    # %%
    with dir_model_r:
    dir_model_r.name = "Main Effect Direction"
    trace1_r = pm.sample(1000, tune=1000)
    az.summary(trace1_r)
  6. OlafHaag revised this gist Sep 22, 2020. 1 changed file with 9 additions and 6 deletions.
    15 changes: 9 additions & 6 deletions PyMC3_HLM.py
    Original file line number Diff line number Diff line change
    @@ -90,11 +90,6 @@
    user6.loc[user6['block'] == 2, ['parallel', 'orthogonal']] = np.random.normal(0.0, 4.5, size=((user6['block']==2).sum(), 2))
    # %%
    df = pd.concat((user0, user1, user2, user3, user4, user5, user6), axis='index')
    # Thin out rows to simulate missing data.
    mask = np.random.choice([*[True]*4, False], size=df.shape[0])
    df = df[mask]

    # %%
    df.sort_values(by=['user', 'block'], inplace=True)
    df.reset_index(drop=True, inplace=True)
    # Make sure mean of each block is zero as with real data.
    @@ -103,14 +98,22 @@
    # %%
    # Give each block its own ID.
    df['block_id'] = pd.factorize(df['user'].astype(str) + df['block'].astype(str))[0]
    # Trials.
    df['trial'] = df.groupby(['block_id'])['block'].transform(lambda x: np.arange(len(x)))
    # Categorical columns.
    df[['user', 'condition', 'block_id', 'block']] = df[['user', 'condition', 'block_id', 'block']].astype('category')
    # Give each block_id a rating.
    df['rating'] = df['block_id'].map(pd.Series(index=df['block_id'].unique(), data=np.random.randint(5, size=df['block_id'].cat.categories.size)))

    # %%
    # Thin out rows to simulate missing data.
    mask = np.random.choice([*[True]*4, False], size=df.shape[0])
    df = df[mask]

    # %%
    # Reshape data into long format.
    samples = df.melt(id_vars=['user', 'experience', 'condition', 'block_id', 'block', 'rating'], value_vars=['parallel', 'orthogonal'],
    samples = df.melt(id_vars=['user', 'experience', 'condition', 'block_id', 'block', 'rating', 'trial'],
    value_vars=['parallel', 'orthogonal'],
    var_name='direction', value_name='projection')
    samples['direction'] = samples['direction'].astype('category')

  7. OlafHaag created this gist Sep 18, 2020.
    198 changes: 198 additions & 0 deletions PyMC3_HLM.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,198 @@
    """
    Having a 3 x (3) Mixed Design: 3 conditions as BETWEEN factor, 3 blocks as WITHIN factor.
    There are multiple measurements in each block (trials).
    There are 2 dependent variables: parallel & orthogonal
    Since 'parallel' and 'orthogonal' measures have the same unit,
    it can be viewed as 1 outcome called 'projection' in 2 different directions.
    The interest here is in the VARIABILITY (e.g. standard deviation or variance) of the data
    for parallel & orthogonal outcomes for blocks 1, 2, and 3.
    The analysis should enable to make statements about whether conditions, blocks, direction
    have an effect on the outcome.
    Bonus:
    Each user has an individual level of experience in the task.
    Users rate the difficulty of each block they performed.
    """
    # %%
    import arviz as az
    import matplotlib.pyplot as plt
    import numpy as np
    import pandas as pd
    from patsy import dmatrices
    import pymc3 as pm
    import seaborn as sns


    #################################
    # Dummy Data #
    #################################
    # %%
    n_trials = 30 # by block.

    user0 = pd.DataFrame({'user': 0,
    'condition': 'A',
    'experience': 0.0,
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 5.5, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 5.5, size=3*n_trials)})

    user1 = pd.DataFrame({'user': 1,
    'condition': 'A',
    'experience': np.nan,
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 4.5, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 5.5, size=3*n_trials)})

    # %%
    # Create some data conforming to main effect of projection model: variance in parallel > orthogonal.
    user2 = pd.DataFrame({'user': 2,
    'condition': 'B',
    'experience': 4.0,
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 7.0, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 3.3, size=3*n_trials)})

    user3 = pd.DataFrame({'user': 3,
    'condition': 'B',
    'experience': 3.0,
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 8.0, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 4.5, size=3*n_trials)})

    user4 = pd.DataFrame({'user': 4,
    'condition': 'B',
    'experience': 3.0,
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 5.0, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 1.5, size=3*n_trials)})

    # %%
    # Create some data conforming to interaction effect version 1:
    # blocks 1&3 have smallest orthogonal deviations, but largest parallel deviations. Strong synergy.
    user5 = pd.DataFrame({'user': 5,
    'condition': 'C',
    'experience': 0.0,
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 7.0, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 3.0, size=3*n_trials)})
    # Block 2 has smaller parallel deviations than blocks 1&3, but higher orthogonal deviations than blocks 1&3.
    # In this case sigma_block2_parallel - sigma_block2_orthogonal = 0
    # No synergy.
    user5.loc[user5['block'] == 2, ['parallel', 'orthogonal']] = np.random.normal(0.0, 5.0, size=((user5['block']==2).sum(), 2))

    user6 = pd.DataFrame({'user': 6,
    'experience': 4.0,
    'condition': 'C',
    'block': 1 + np.random.randint(3, size=3*n_trials),
    'parallel': np.random.normal(0.0, 6.5, size=3*n_trials),
    'orthogonal': np.random.normal(0.0, 2.5, size=3*n_trials)})
    user6.loc[user6['block'] == 2, ['parallel', 'orthogonal']] = np.random.normal(0.0, 4.5, size=((user6['block']==2).sum(), 2))
    # %%
    df = pd.concat((user0, user1, user2, user3, user4, user5, user6), axis='index')
    # Thin out rows to simulate missing data.
    mask = np.random.choice([*[True]*4, False], size=df.shape[0])
    df = df[mask]

    # %%
    df.sort_values(by=['user', 'block'], inplace=True)
    df.reset_index(drop=True, inplace=True)
    # Make sure mean of each block is zero as with real data.
    df[['parallel', 'orthogonal']] = df.groupby(['user', 'block'])[['parallel', 'orthogonal']].transform(lambda x: x - x.mean())

    # %%
    # Give each block its own ID.
    df['block_id'] = pd.factorize(df['user'].astype(str) + df['block'].astype(str))[0]
    # Categorical columns.
    df[['user', 'condition', 'block_id', 'block']] = df[['user', 'condition', 'block_id', 'block']].astype('category')
    # Give each block_id a rating.
    df['rating'] = df['block_id'].map(pd.Series(index=df['block_id'].unique(), data=np.random.randint(5, size=df['block_id'].cat.categories.size)))

    # %%
    # Reshape data into long format.
    samples = df.melt(id_vars=['user', 'experience', 'condition', 'block_id', 'block', 'rating'], value_vars=['parallel', 'orthogonal'],
    var_name='direction', value_name='projection')
    samples['direction'] = samples['direction'].astype('category')

    # %%
    # Vizualize the data.
    g = sns.FacetGrid(samples, col='block', row='user', hue="direction")
    g.despine(offset=10)
    g = (g.map(sns.distplot, "projection").add_legend(title="Projection", loc='upper right'))
    for ax in g.axes.flat:
    ax.set_ylabel("Probability Density")
    ax.set_xlabel("Projection Length")
    plt.tight_layout()
    plt.show()

    # %%
    #################################
    # Coordinates #
    #################################
    conditions = samples['condition'].cat.categories.tolist()
    n_conditions = samples['condition'].nunique()
    condition_indices = df['condition'].cat.codes.values # Condition index for every single value.
    condition_lookup = pd.Series(*pd.factorize(samples['condition'].cat.categories), name='condition_idx_lookup')

    # We need to be able to index by user. But the user IDs of the sample don't start at 0. We create an index and a mapping between them.
    users = samples['user'].cat.categories.tolist()
    n_users = samples['user'].nunique()
    user_indices = samples['user'].cat.codes.values # User index for every single value.
    user_lookup = pd.Series(*pd.factorize(samples['user'].cat.categories), name='user_idx_lookup')
    # User level predictors. More options: age group, gender.
    # gaming_exp is not on an interval scale (no equidistance between items), but ordinal.
    experience = samples.drop_duplicates(subset="user")['experience'] # Shape = user_indices

    # Block indices for 1,2,3.
    blocks = samples['block'].cat.categories.tolist()
    n_blocks = samples['block'].nunique() # Same as using cat.categories.size
    block_indices = samples['block'].cat.codes.values # Block index for every single value.
    block_lookup = pd.Series(*pd.factorize(samples['block'].cat.categories), name='block_idx_lookup')
    # Block ID indices. Shape = n_users * n_blocks.
    block_ids = samples['block_id'].cat.categories.tolist()
    n_block_ids = samples['block_id'].nunique()
    block_id_indices = samples['block_id'].cat.codes.values # Block index for every single value.
    block_id_lookup = pd.Series(*pd.factorize(samples['block_id'].cat.categories), name='block_id_idx_lookup')
    # Block level predictors.
    # Note: Ratings are correlated with block index.
    ratings = samples.drop_duplicates(subset=["user", 'block'])['rating'] # Shape = block_id_indices

    # Coded direction of projection: 0 = orthogonal, 1 = parallel.
    directions = samples['direction'].cat.categories.tolist()
    n_directions = samples['direction'].nunique()
    direction_indices = samples['direction'].cat.codes.values
    direction_lookup = pd.Series(*pd.factorize(samples['direction'].cat.categories), name='direction_idx_lookup')

    coords = {'Direction': directions, 'Condition': conditions, 'User' :users, 'Block': blocks, 'Block_ID': block_ids, 'obs_id': np.arange(direction_indices
    .size)}

    #################################
    # Multi-Level Model #
    #################################
    # %%
    # I'm interested in the variance for each direction. The mean of the squared projection by block_id is that block's variance.
    samples['projection_sq'] = samples['projection'].transform('square')
    # Not sure if this is right...
    outcome, predictors = dmatrices('projection_sq ~ condition * user * block * direction', samples)

    # %%
    with pm.Model(coords=coords) as hlm_model:
    sigma = pm.HalfCauchy('sigma', 4.0)
    #
    # No clue how to use predictors and what to do with it!
    # Don't know how to use indexing either. Index what where?!
    #
    projection = pm.Normal('projection', 0.0, sigma=sigma, observed=samples['projection'], dims='obs_id')

    # %%
    pm.model_to_graphviz(hlm_model)

    # %%
    with hlm_model:
    idata = pm.sample(2000, tune=2000, return_inferencedata=True)

    # %%
    az.summary(idata)

    # %%
    az.plot_trace(idata, compact=True, chain_prop={"ls": "-"})