# Function to create intermediate path strings def transition_states(paths): unique_channels = set(x for element in paths for x in element) transition_states = {x + '>' + y: 0 for x in unique_channels for y in unique_channels} for possible_state in unique_channels: if possible_state not in ['Activation', 'Null']: for user_path in paths: if possible_state in user_path: indices = [i for i, s in enumerate(user_path) if possible_state in s] for col in indices: transition_states[user_path[col] + '>' + user_path[col + 1]] += 1 return transition_states # Function to calculate transition probabilities def transition_prob(trans_dict, paths): unique_channels = set(x for element in paths for x in element) trans_prob = dict() for state in unique_channels: if state not in ['Activation', 'Null']: counter = 0 index = [i for i, s in enumerate(trans_dict) if state + '>' in s] for col in index: if trans_dict[list(trans_dict)[col]] > 0: counter += trans_dict[list(trans_dict)[col]] for col in index: if trans_dict[list(trans_dict)[col]] > 0: state_prob = float((trans_dict[list(trans_dict)[col]])) / float(counter) trans_prob[list(trans_dict)[col]] = state_prob return trans_prob # Function to create transition matrix def transition_matrix(list_of_paths, transition_probabilities): trans_matrix = pd.DataFrame() unique_channels = set(x for element in list_of_paths for x in element) for channel in unique_channels: trans_matrix[channel] = 0.00 trans_matrix.loc[channel] = 0.00 trans_matrix.loc[channel][channel] = 1.0 if channel in ['Activation', 'Null'] else 0.0 for key, value in transition_probabilities.items(): origin, destination = key.split('>') trans_matrix.at[origin, destination] = value return trans_matrix trans_states = transition_states(journeys['path']) trans_prob = transition_prob(trans_states, journeys['path']) trans_matrix = transition_matrix(journeys['path'], trans_prob)