Ricardo Guerrero Gómez-Olmedo ricgu8086

git clone [email protected]:YOUR-USERNAME/YOUR-FORKED-REPO.git

cd into/cloned/fork-repo
git remote add upstream git://github.com/ORIGINAL-DEV-USERNAME/REPO-YOU-FORKED-FROM.git
git fetch upstream

	#!/bin/bash
	sudo apt-get install -y \
	apt-transport-https \
	ca-certificates \
	curl \
	software-properties-common
	curl -fsSL https://download.docker.com/linux/ubuntu/gpg \| sudo apt-key add -
	sudo apt-key fingerprint 0EBFCD88
	sudo add-apt-repository \
	"deb [arch=amd64] https://download.docker.com/linux/ubuntu \

	def seed_everything(seed: int):
	import random, os
	import numpy as np
	import torch

	random.seed(seed)
	os.environ['PYTHONHASHSEED'] = str(seed)
	np.random.seed(seed)
	torch.manual_seed(seed)
	torch.cuda.manual_seed(seed)

	# From https://stackoverflow.com/questions/23586510/return-multiple-columns-from-pandas-apply

	def sizes(s):
	return locale.format("%.1f", s / 1024.0, grouping=True) + ' KB', \
	locale.format("%.1f", s / 1024.0 ** 2, grouping=True) + ' MB', \
	locale.format("%.1f", s / 1024.0 ** 3, grouping=True) + ' GB'
	df_test['size_kb'], df_test['size_mb'], df_test['size_gb'] = zip(*df_test['size'].apply(sizes))

	pivot.columns

	MultiIndex([('mean', 'is_suitable'),
	('size', 'is_suitable')],
	)

	pivot.columns.map('_'.join)

	Index(['mean_is_suitable', 'size_is_suitable'], dtype='object')

	def group_others(serie: pd.Series,
	min_threshold: int) -> pd.Series:
	"""
	This function finds categorical values with little representation
	and group them under the category "OTHERS" to mitigate the curse
	of dimensionality, thus avoiding overfitting
	"""

	condition = (serie.value_counts() < min_threshold).values
	other_group = list(serie.value_counts()[condition].index)

	import pylab as plt

	plt.plot([1,2,3,10], [1,2,3,4])
	%matplot plt # Include this in the same cell as the plot

	def diversity_percentage(df, columns):
	"""
	This function returns the number of different elements in each column as a percentage of the total elements in the group.
	A low value indicates there are many repeated elements.
	Example 1: a value of 0 indicates all values are the same.
	Example 2: a value of 100 indicates all values are different.
	"""
	diversity = dict()

	for col in columns:

	def plot_nulls(dataframe):

	def null_perc(dataframe):
	return 100*dataframe.isnull().sum()/len(dataframe)

	nulls = null_perc(dataframe)
	plt.figure(1, figsize=(5,20)) # Customize this if needed
	ax = sns.barplot(x=nulls, y=list(range(len(nulls))), orient='h', color="blue")
	_ = plt.yticks(plt.yticks()[0], nulls.index)
	ax.xaxis.set_ticks_position('top')

	from IPython.core.debugger import Tracer;


	# Place this call wherever you want to start debugging
	Tracer()()

	"""
	Some PDB Debuger commands:
	n(ext) line and run this one
	c(ontinue) running until next breakpoint