------------------- Pandas ------------------- # Create dataframe from dictionary df = pd.DataFrame.from_dict(data["trainData"]) # Convert entire dataframe values to numeric type. df = df.apply(pd.to_numeric, errors='ignore') # Remove missing value. df = df.dropna() # Create a new column named "new_col" with value "2" if cluster_id column has value 1, else 0. df.insert(loc=0, column=new_col, value=np.where(cluster_id == 1, 2, 0)) # Create a new column named "new_col" with value of df["log2_livable"] if cluster_id is 1, else 0. df.insert(loc=0, column=new_col, value=np.where(cluster_id == 1, df["log2_livable"], 0)) # Change date format of entire column. effective_date = pd.to_datetime(df["effective_date"], format='%Y-%m-%d') # Re-order dataframe by columns (A-Z) df = df[sorted(df.columns)] # Clone dataframe df.copy(deep=True) # Fetch the row based on given index. df.iloc[[index]] # Fetch multiple columns. df.loc[:, ['col_1', 'col_2']]