git clone [email protected]:YOUR-USERNAME/YOUR-FORKED-REPO.git
cd into/cloned/fork-repo
git remote add upstream git://github.com/ORIGINAL-DEV-USERNAME/REPO-YOU-FORKED-FROM.git
git fetch upstream
| #!/bin/bash | |
| sudo apt-get install -y \ | |
| apt-transport-https \ | |
| ca-certificates \ | |
| curl \ | |
| software-properties-common | |
| curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - | |
| sudo apt-key fingerprint 0EBFCD88 | |
| sudo add-apt-repository \ | |
| "deb [arch=amd64] https://download.docker.com/linux/ubuntu \ |
| def seed_everything(seed: int): | |
| import random, os | |
| import numpy as np | |
| import torch | |
| random.seed(seed) | |
| os.environ['PYTHONHASHSEED'] = str(seed) | |
| np.random.seed(seed) | |
| torch.manual_seed(seed) | |
| torch.cuda.manual_seed(seed) |
| # From https://stackoverflow.com/questions/23586510/return-multiple-columns-from-pandas-apply | |
| def sizes(s): | |
| return locale.format("%.1f", s / 1024.0, grouping=True) + ' KB', \ | |
| locale.format("%.1f", s / 1024.0 ** 2, grouping=True) + ' MB', \ | |
| locale.format("%.1f", s / 1024.0 ** 3, grouping=True) + ' GB' | |
| df_test['size_kb'], df_test['size_mb'], df_test['size_gb'] = zip(*df_test['size'].apply(sizes)) |
| pivot.columns | |
| MultiIndex([('mean', 'is_suitable'), | |
| ('size', 'is_suitable')], | |
| ) | |
| pivot.columns.map('_'.join) | |
| Index(['mean_is_suitable', 'size_is_suitable'], dtype='object') |
| def group_others(serie: pd.Series, | |
| min_threshold: int) -> pd.Series: | |
| """ | |
| This function finds categorical values with little representation | |
| and group them under the category "OTHERS" to mitigate the curse | |
| of dimensionality, thus avoiding overfitting | |
| """ | |
| condition = (serie.value_counts() < min_threshold).values | |
| other_group = list(serie.value_counts()[condition].index) |
| import pylab as plt | |
| plt.plot([1,2,3,10], [1,2,3,4]) | |
| %matplot plt # Include this in the same cell as the plot |
| def diversity_percentage(df, columns): | |
| """ | |
| This function returns the number of different elements in each column as a percentage of the total elements in the group. | |
| A low value indicates there are many repeated elements. | |
| Example 1: a value of 0 indicates all values are the same. | |
| Example 2: a value of 100 indicates all values are different. | |
| """ | |
| diversity = dict() | |
| for col in columns: |
| def plot_nulls(dataframe): | |
| def null_perc(dataframe): | |
| return 100*dataframe.isnull().sum()/len(dataframe) | |
| nulls = null_perc(dataframe) | |
| plt.figure(1, figsize=(5,20)) # Customize this if needed | |
| ax = sns.barplot(x=nulls, y=list(range(len(nulls))), orient='h', color="blue") | |
| _ = plt.yticks(plt.yticks()[0], nulls.index) | |
| ax.xaxis.set_ticks_position('top') |
git clone [email protected]:YOUR-USERNAME/YOUR-FORKED-REPO.git
cd into/cloned/fork-repo
git remote add upstream git://github.com/ORIGINAL-DEV-USERNAME/REPO-YOU-FORKED-FROM.git
git fetch upstream
| from IPython.core.debugger import Tracer; | |
| # Place this call wherever you want to start debugging | |
| Tracer()() | |
| """ | |
| Some PDB Debuger commands: | |
| n(ext) line and run this one | |
| c(ontinue) running until next breakpoint |