Skip to content

Instantly share code, notes, and snippets.

@data-goblin
Created April 2, 2025 09:39
Show Gist options
  • Select an option

  • Save data-goblin/c45728f6da75d3d7dea684dbf8696741 to your computer and use it in GitHub Desktop.

Select an option

Save data-goblin/c45728f6da75d3d7dea684dbf8696741 to your computer and use it in GitHub Desktop.
# Sample visual - requires input dataframe 'df'. Expected to run in a Fabric notebook.
import matplotlib.pyplot as plt
import seaborn as sns # Use sns instead of sb for consistency with common practice
import numpy as np
import pandas as pd
import warnings
# Suppress the specific FutureWarning about use_inf_as_na
warnings.filterwarnings("ignore", category=FutureWarning,
message="use_inf_as_na option is deprecated")
# Ensure numeric datatypes
df['price'] = df['price'].astype(float)
df['cost'] = df['cost'].astype(float)
df['profit'] = df['profit'].astype(float)
# Replace infinities with NaN before log transform to address the warnings
df['price'].replace([np.inf, -np.inf], np.nan, inplace=True)
df['cost'].replace([np.inf, -np.inf], np.nan, inplace=True)
df['profit'].replace([np.inf, -np.inf], np.nan, inplace=True)
# Log-transform the data
# Add small epsilon to avoid log(0) issues
epsilon = 1e-10
df['log_price'] = np.log10(df['price'] + epsilon)
df['log_cost'] = np.log10(df['cost'] + epsilon)
df['log_profit'] = np.log10(df['profit'] + epsilon)
# Create a new column with the count by type appended to the 'Type' column
type_counts = df.groupby('Type')['product'].count()
df['count'] = df['Type'].map(type_counts)
df['YLabel'] = df['Type'].astype(str) + ' (' + df['count'].astype(str) + ')'
# Sort by cost
Type = df.sort_values('cost', ascending=False).YLabel.unique().tolist()
# Figure with 3 subplots that have a shared y-axis
fig, ax = plt.subplots(1, 3, sharey=True, figsize=(12, 5.5))
# Color
palette = "coolwarm"
# Fig 1A - unit price in log GP per product Type with jittered stripplot
sns.violinplot(x="log_price", y="YLabel", data=df, palette=palette, scale="width", inner=None, order=Type, ax=ax[0])
sns.stripplot(x="log_price", y="YLabel", data=df, color="#333333", alpha=0.5, jitter=0.033, order=Type, ax=ax[0])
# Fig 1B - cost in log GP per product Type with jittered stripplot
sns.violinplot(x="log_cost", y="YLabel", data=df, palette=palette, scale="width", inner=None, order=Type, ax=ax[1])
sns.stripplot(x="log_cost", y="YLabel", data=df, color="#333333", alpha=0.5, jitter=0.033, order=Type, ax=ax[1])
# Fig 1C - profit in log GP per product Type with jittered stripplot
sns.violinplot(x="log_profit", y="YLabel", data=df, palette=palette, scale="width", inner=None, order=Type, ax=ax[2])
sns.stripplot(x="log_profit", y="YLabel", data=df, color="#333333", alpha=0.5, jitter=0.033, order=Type, ax=ax[2])
# Set titles for each subplot
ax[0].set_title('Log Price')
ax[1].set_title('Log Cost')
ax[2].set_title('Log Profit')
# Remove the axis lines and labels
sns.despine(left=True, bottom=True)
for n in ax:
n.set(ylabel="")
plt.tight_layout()
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment