import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import numpy as np from matplotlib.patches import Ellipse from sklearn.linear_model import LinearRegression from sklearn.metrics import r2_score def plot_iris_petal_scatter_with_regression(): """ Downloads Iris dataset, extracts petal data, creates a scatter plot with regression, overlays a filled ellipse for each class based on petal distribution. """ try: # Download the Iris dataset from the UCI Machine Learning Repository url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data" column_names = ["sepal_length", "sepal_width", "petal_length", "petal_width", "class"] df = pd.read_csv(url, names=column_names) # Extract petal data (petal length and petal width) petal_data = df[["petal_length", "petal_width"]] # Prepare data for linear regression X = petal_data[["petal_length"]] y = petal_data["petal_width"] # Create and train the linear regression model model = LinearRegression() model.fit(X, y) # Calculate the R-squared value y_pred = model.predict(X) r2 = r2_score(y, y_pred) print(f"R-squared (R^2): {r2}") # Generate predictions for plotting the regression line x_range = pd.DataFrame({ "petal_length": [petal_data["petal_length"].min(), petal_data["petal_length"].max()] }) y_range = model.predict(x_range) # Create scatter plot with class differentiation using seaborn plt.figure(figsize=(10, 6)) sns.scatterplot(data=df, x="petal_length", y="petal_width", hue="class", s=100) # Plot the linear regression line plt.plot(x_range, y_range, color="red", linewidth=2, label="Regression Line") # Overlay a half-transparent filled ellipse for each class ax = plt.gca() unique_classes = df["class"].unique() palette = sns.color_palette("deep", n_colors=len(unique_classes)) color_map = dict(zip(unique_classes, palette)) for cls in unique_classes: sub_df = df[df["class"] == cls] x_vals = sub_df["petal_length"].values y_vals = sub_df["petal_width"].values # Compute the mean of x and y mean_x, mean_y = x_vals.mean(), y_vals.mean() # Compute the covariance matrix cov = np.cov(x_vals, y_vals) # Compute the eigenvalues and eigenvectors eigvals, eigvecs = np.linalg.eigh(cov) # Compute the angle of the ellipse angle = np.degrees(np.arctan2(*eigvecs[:, 0][::-1])) # Scale the eigenvalues by 4 to enclose most of the data distribution width, height = 4 * np.sqrt(eigvals) ellipse = Ellipse((mean_x, mean_y), width=width, height=height, angle=angle, facecolor=color_map[cls], alpha=0.3, edgecolor=None, label=f"{cls} region") ax.add_patch(ellipse) plt.title("Iris Petal Scatter Plot by Class with Regression and Regions") plt.xlabel("Petal Length") plt.ylabel("Petal Width") plt.legend(title="Class") plt.grid(True) plt.show() except Exception as e: print(f"An error occurred: {e}") print("Please check your internet connection or the data download.") # Run the function to create and show the plot. plot_iris_petal_scatter_with_regression()