Created
April 2, 2025 09:39
-
-
Save data-goblin/c45728f6da75d3d7dea684dbf8696741 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Sample visual - requires input dataframe 'df'. Expected to run in a Fabric notebook. | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns # Use sns instead of sb for consistency with common practice | |
| import numpy as np | |
| import pandas as pd | |
| import warnings | |
| # Suppress the specific FutureWarning about use_inf_as_na | |
| warnings.filterwarnings("ignore", category=FutureWarning, | |
| message="use_inf_as_na option is deprecated") | |
| # Ensure numeric datatypes | |
| df['price'] = df['price'].astype(float) | |
| df['cost'] = df['cost'].astype(float) | |
| df['profit'] = df['profit'].astype(float) | |
| # Replace infinities with NaN before log transform to address the warnings | |
| df['price'].replace([np.inf, -np.inf], np.nan, inplace=True) | |
| df['cost'].replace([np.inf, -np.inf], np.nan, inplace=True) | |
| df['profit'].replace([np.inf, -np.inf], np.nan, inplace=True) | |
| # Log-transform the data | |
| # Add small epsilon to avoid log(0) issues | |
| epsilon = 1e-10 | |
| df['log_price'] = np.log10(df['price'] + epsilon) | |
| df['log_cost'] = np.log10(df['cost'] + epsilon) | |
| df['log_profit'] = np.log10(df['profit'] + epsilon) | |
| # Create a new column with the count by type appended to the 'Type' column | |
| type_counts = df.groupby('Type')['product'].count() | |
| df['count'] = df['Type'].map(type_counts) | |
| df['YLabel'] = df['Type'].astype(str) + ' (' + df['count'].astype(str) + ')' | |
| # Sort by cost | |
| Type = df.sort_values('cost', ascending=False).YLabel.unique().tolist() | |
| # Figure with 3 subplots that have a shared y-axis | |
| fig, ax = plt.subplots(1, 3, sharey=True, figsize=(12, 5.5)) | |
| # Color | |
| palette = "coolwarm" | |
| # Fig 1A - unit price in log GP per product Type with jittered stripplot | |
| sns.violinplot(x="log_price", y="YLabel", data=df, palette=palette, scale="width", inner=None, order=Type, ax=ax[0]) | |
| sns.stripplot(x="log_price", y="YLabel", data=df, color="#333333", alpha=0.5, jitter=0.033, order=Type, ax=ax[0]) | |
| # Fig 1B - cost in log GP per product Type with jittered stripplot | |
| sns.violinplot(x="log_cost", y="YLabel", data=df, palette=palette, scale="width", inner=None, order=Type, ax=ax[1]) | |
| sns.stripplot(x="log_cost", y="YLabel", data=df, color="#333333", alpha=0.5, jitter=0.033, order=Type, ax=ax[1]) | |
| # Fig 1C - profit in log GP per product Type with jittered stripplot | |
| sns.violinplot(x="log_profit", y="YLabel", data=df, palette=palette, scale="width", inner=None, order=Type, ax=ax[2]) | |
| sns.stripplot(x="log_profit", y="YLabel", data=df, color="#333333", alpha=0.5, jitter=0.033, order=Type, ax=ax[2]) | |
| # Set titles for each subplot | |
| ax[0].set_title('Log Price') | |
| ax[1].set_title('Log Cost') | |
| ax[2].set_title('Log Profit') | |
| # Remove the axis lines and labels | |
| sns.despine(left=True, bottom=True) | |
| for n in ax: | |
| n.set(ylabel="") | |
| plt.tight_layout() | |
| plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment