#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd import matplotlib.pyplot as plt # Load the data from the Excel file data = pd.read_excel("../检验推荐系统调查数据.xlsx") # Calculate the average values for each column average_values = data.mean(numeric_only=True) # Columns to plot columns = ["推荐准确性", "推荐相关性", "操作便捷性", "生成报告时间", "数据隐私保护", "系统可接受性"] # Values to plot values = average_values[columns] plt.rcParams['font.family'] = ['SimHei'] # 使用字体名称 # Create a bar plot with adjusted width plt.figure(figsize=(3.5,4)) plt.bar(values.index, values.values, color='skyblue', width=0.3) # 调整柱状宽度为0.6 # Set font size for x and y ticks plt.xticks(rotation=45, fontsize=10) # 设置x轴标签字体大小为12 plt.yticks(fontsize=12) # 设置y轴标签字体大小为12 # Set font size for axis labels and title plt.xlabel('指标', fontsize=10) # 设置x轴标题字体大小 plt.ylabel('平均分', fontsize=10) # 设置y轴标题字体大小 plt.title('推荐系统各项指标的平均分', fontsize=12) # 设置图表标题字体大小 # Set the limit for y-axis plt.ylim(0, 10) # Add value labels on the bars for i, v in enumerate(values.values): plt.text(i, v + 0.1, round(v, 2), ha='center', fontsize=10) # 设置数值标签字体大小为12 # Save the plot as a PNG file plt.savefig('recommendation_system_average_scores.png', format='png', bbox_inches='tight') # Save the plot as a TIFF file plt.savefig('recommendation_system_average_scores.tiff', format='tiff', bbox_inches='tight') # Display the plot plt.tight_layout() plt.show() # In[3]: import seaborn as sns # Compute the correlation matrix correlation_matrix = data[columns].corr() # Set up the matplotlib figure plt.figure(figsize=(10, 8)) # Generate a heatmap of the correlation matrix # sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", annot_kws={"size": 22}) sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", annot_kws={"size": 22}, cbar=True, cbar_kws={'label': 'Correlation Coefficient'}) # Title and labels plt.title('推荐系统各项指标的相关性分析', fontsize=20) plt.xticks(rotation=45, ha="right", fontsize=16) plt.yticks(rotation=0, ha="right", fontsize=16) plt.savefig('Heat map correlation analysis.png', format='png', bbox_inches='tight') # Save the plot as a TIFF file plt.savefig('Heat map correlation analysis.tiff', format='tiff', bbox_inches='tight') # Show the plot plt.tight_layout() plt.show() # In[ ]: # In[7]: import pandas as pd from scipy.stats import pearsonr # 1. 加载数据 file_path = '../检验推荐系统调查数据.xlsx' # 替换为您的Excel文件路径 data = pd.read_excel(file_path) # 2. 选择用于相关性分析的列 columns = ['推荐准确性', '推荐相关性', '操作便捷性', '生成报告时间', '数据隐私保护', '系统可接受性'] # 3. 检查并处理缺失值 # 检查缺失值 missing_values = data[columns].isnull().sum() # 如果有缺失值,使用每列的均值填充 if missing_values.any(): data[columns] = data[columns].fillna(data[columns].mean()) # 4. 计算每对列之间的p值 p_values = {} for i, col1 in enumerate(columns): for j, col2 in enumerate(columns): if i < j: # 只计算上三角部分,避免重复 _, p_value = pearsonr(data[col1], data[col2]) p_values[(col1, col2)] = p_value # 5. 输出p值 for (col1, col2), p_value in p_values.items(): print(f"p值 between {col1} and {col2}: {p_value:.2f}") # In[6]: import pandas as pd from scipy.stats import kruskal from scipy.stats import pearsonr # 定义文件路径 file_path = '../检验推荐系统调查数据.xlsx' # 加载数据 data = pd.read_excel(file_path) # 定义评分指标列 quantitative_columns = ['推荐准确性', '推荐相关性', '操作便捷性', '生成报告时间', '数据隐私保护', '系统可接受性'] # 计算描述性统计信息 descriptive_stats = data[quantitative_columns].describe() print("描述性统计信息:\n", descriptive_stats) # 计算相关性矩阵 correlation_matrix = data[quantitative_columns].corr() print("\n相关性矩阵:\n", correlation_matrix) # 计算总体满意度 data['overall_satisfaction'] = data[quantitative_columns].mean(axis=1) # 检查异常值并删除它们 def find_and_remove_outliers(data, columns): outliers = {} for col in columns: q25 = data[col].quantile(0.25) q75 = data[col].quantile(0.75) iqr = q75 - q25 lower_bound = q25 - 1.5 * iqr upper_bound = q75 + 1.5 * iqr outliers[col] = data[(data[col] < lower_bound) | (data[col] > upper_bound)][col] outliers_to_remove = pd.concat([outliers[col] for col in outliers]).drop_duplicates().index return data.drop(outliers_to_remove) data_cleaned = find_and_remove_outliers(data, quantitative_columns) # 使用Kruskal-Wallis H检验测试评分指标之间的差异 kruskal_results_cleaned = kruskal( data_cleaned['推荐准确性'], data_cleaned['推荐相关性'], data_cleaned['操作便捷性'], data_cleaned['生成报告时间'], data_cleaned['数据隐私保护'], data_cleaned['系统可接受性'] ) print("\nKruskal-Wallis H检验结果:\n", kruskal_results_cleaned) # 使用Pearson相关系数检验总体满意度与其他指标的相关性 pearson_results = {} for col in quantitative_columns: pearson_results[col], _ = pearsonr(data_cleaned['overall_satisfaction'], data_cleaned[col]) print("\nPearson相关系数检验结果:\n", pearson_results) # In[3]: # Analyzing the "系统改进建议" column to determine common suggestions suggestions = data["系统改进建议"].value_counts() # pd.set_option('display.max_colwidth', 100) # 使用 str.wrap() 来换行文本,这里设置每行最大字符数为 30 # data["系统改进建议"] = data["系统改进建议"].str.wrap(40) data["系统改进建议"] = data["系统改进建议"].str.replace(r'[。.]', '', regex=True) # Analyzing the "系统对工作流程的影响" column to determine the impact on work流程 workflow_impact = data["系统对工作流程的影响"].value_counts() # Analyzing the "系统对医疗质量的影响" column to determine the impact on medical quality quality_impact = data["系统对医疗质量的影响"].value_counts() # Plotting a bar chart for the top suggestions in "系统改进建议" top_suggestions = suggestions.head(10) plt.figure(figsize=(6, 6)) top_suggestions.plot(kind='barh', color='skyblue') # Set font size for x and y ticks plt.xticks(fontsize=16) # 设置x轴标签字体大小为12 plt.yticks(fontsize=16) # 设置y轴标签字体大小为12 plt.xlabel('出现次数', fontsize=16) plt.ylabel('系统改进建议', fontsize=16) plt.title('最常见的10条系统改进建议', fontsize=16) plt.gca().invert_yaxis() # Invert the y-axis to display the most frequent suggestions at the top # Save the plot as a PNG file plt.savefig('最常见的10条系统改进建议.png', format='png', bbox_inches='tight') # Save the plot as a TIFF file plt.savefig('最常见的10条系统改进建议.tiff', format='tiff', bbox_inches='tight') plt.show() # In[10]: # Analyzing the "系统改进建议" column to determine common suggestions suggestions = data["系统对工作流程的影响"].value_counts() # pd.set_option('display.max_colwidth', 100) # 使用 str.wrap() 来换行文本,这里设置每行最大字符数为 30 # data["系统改进建议"] = data["系统改进建议"].str.wrap(40) data["系统对工作流程的影响"] = data["系统对工作流程的影响"].str.replace(r'[。.]', '', regex=True) # Plotting a bar chart for the top suggestions in "系统改进建议" top_suggestions = suggestions.head(10) plt.figure(figsize=(6, 6)) top_suggestions.plot(kind='barh', color='lightcoral') # Set font size for x and y ticks plt.xticks(fontsize=16) # 设置x轴标签字体大小为12 plt.yticks(fontsize=16) # 设置y轴标签字体大小为12 plt.xlabel('出现次数', fontsize=16) plt.ylabel('系统对工作流程的影响', fontsize=16) plt.title('最常见的10种系统对工作流程的影响', fontsize=16) plt.gca().invert_yaxis() # Invert the y-axis to display the most frequent suggestions at the top # Save the plot as a PNG file plt.savefig('系统对工作流程的影响.png', format='png', bbox_inches='tight') # Save the plot as a TIFF file plt.savefig('系统对工作流程的影响.tiff', format='tiff', bbox_inches='tight') plt.show() # In[4]: # Analyzing the "系统改进建议" column to determine common suggestions suggestions = data["系统对医疗质量的影响"].value_counts() # pd.set_option('display.max_colwidth', 100) # 使用 str.wrap() 来换行文本,这里设置每行最大字符数为 30 # data["系统改进建议"] = data["系统改进建议"].str.wrap(40) data["系统对医疗质量的影响"] = data["系统对医疗质量的影响"].str.replace(r'[。.]', '', regex=True) # Plotting a bar chart for the top suggestions in "系统改进建议" top_suggestions = suggestions.head(10) plt.figure(figsize=(6, 6)) top_suggestions.plot(kind='barh', color='lightgreen') # Set font size for x and y ticks plt.xticks(fontsize=16) # 设置x轴标签字体大小为12 plt.yticks(fontsize=16) # 设置y轴标签字体大小为12 plt.xlabel('出现次数', fontsize=16) plt.ylabel('系统对医疗质量的影响', fontsize=16) plt.title('最常见的10种系统对医疗质量的影响', fontsize=16) plt.gca().invert_yaxis() # Invert the y-axis to display the most frequent suggestions at the top # Save the plot as a PNG file plt.savefig('系统对工作流程的影响.png', format='png', bbox_inches='tight') # Save the plot as a TIFF file plt.savefig('系统对工作流程的影响.tiff', format='tiff', bbox_inches='tight') plt.show() # In[10]: import matplotlib.pyplot as plt import seaborn as sns # 设置字体为支持中文的字体,例如“SimHei” plt.rcParams['font.sans-serif'] = ['SimHei'] # 为了支持负号,您还需要设置字体大小 plt.rcParams['font.size'] = 16 # 创建图形 plt.figure(figsize=(6, 4)) # Boxplots for each score category sns.boxplot(x=data['推荐准确性'], width=0.2) plt.xticks(fontsize=16) plt.title('推荐准确性的分布') plt.ylabel('推荐准确性') # Save the plot as a PNG file plt.savefig('推荐准确性的分布.png', format='png', bbox_inches='tight') # Save the plot as a TIFF file plt.savefig('推荐准确性的分布.tiff', format='tiff', bbox_inches='tight') plt.show() # Boxplots for other score categories sns.boxplot(x=data['推荐相关性'], width=0.2) plt.xticks(fontsize=16) plt.title('推荐相关性的分布') plt.ylabel('推荐相关性') # Save the plot as a PNG file plt.savefig('推荐相关性的分布.png', format='png', bbox_inches='tight') # Save the plot as a TIFF file plt.savefig('推荐相关性的分布.tiff', format='tiff', bbox_inches='tight') plt.show() sns.boxplot(x=data['操作便捷性'], width=0.2) plt.xticks(fontsize=16) plt.title('操作便捷性的分布') plt.ylabel('操作便捷性') # Save the plot as a PNG file plt.savefig('操作便捷性的分布.png', format='png', bbox_inches='tight') # Save the plot as a TIFF file plt.savefig('操作便捷性的分布.tiff', format='tiff', bbox_inches='tight') plt.show() sns.boxplot(x=data['生成报告时间'], width=0.2) plt.xticks(fontsize=16) plt.title('生成报告时间的分布') plt.ylabel('生成报告时间的总时间') # Save the plot as a PNG file plt.savefig('生成报告时间的分布.png', format='png', bbox_inches='tight') # Save the plot as a TIFF file plt.savefig('生成报告时间的分布.tiff', format='tiff', bbox_inches='tight') plt.show() sns.boxplot(x=data['数据隐私保护'], width=0.2) plt.xticks(fontsize=16) plt.title('数据隐私保护的分布') plt.ylabel('数据隐私保护') # Save the plot as a PNG file plt.savefig('数据隐私保护的分布.png', format='png', bbox_inches='tight') # Save the plot as a TIFF file plt.savefig('数据隐私保护的分布.tiff', format='tiff', bbox_inches='tight') plt.show() # In[11]: import matplotlib.pyplot as plt import seaborn as sns # 设置字体为支持中文的字体,例如“SimHei” plt.rcParams['font.sans-serif'] = ['SimHei'] # 为了支持负号,您还需要设置字体大小 plt.rcParams['font.size'] = 16 # 创建图形 plt.figure(figsize=(6, 4)) sns.boxplot(x=data['系统可接受性'], width=0.2) plt.xticks(fontsize=16) plt.title('系统可接受性的分布') plt.ylabel('系统可接受性') # Save the plot as a PNG file plt.savefig('系统可接受性的分布.png', format='png', bbox_inches='tight') # Save the plot as a TIFF file plt.savefig('系统可接受性的分布.tiff', format='tiff', bbox_inches='tight') plt.show() # In[12]: import matplotlib.pyplot as plt import seaborn as sns # 设置字体为支持中文的字体,例如“SimHei” plt.rcParams['font.sans-serif'] = ['SimHei'] # 为了支持负号,您还需要设置字体大小 plt.rcParams['font.size'] = 16 # 创建图形 plt.figure(figsize=(6, 4)) # 获取当前轴对象 ax = plt.gca() # 设置边框线厚度 ax.spines['top'].set_linewidth(0.1) ax.spines['bottom'].set_linewidth(0.1) ax.spines['left'].set_linewidth(0.2) ax.spines['right'].set_linewidth(0.1) # 假设data是一个pandas DataFrame,且包含'系统可接受性'这一列 sns.boxplot(x=data['系统可接受性'], width=0.2,) plt.xticks(fontsize=16) plt.title('系统可接受性的分布') plt.ylabel('系统可接受性') # 添加垂直线,例如在x=0.5的位置 for x in [0.5, 2, 3.5,5.5,8,10]: # 您可以根据需要更改这些x坐标值 plt.axvline(x=x, color='gray', linestyle='-', linewidth=0.3) # Save the plot as a PNG file plt.savefig('系统可接受性的分布.png', format='png', bbox_inches='tight') # Save the plot as a TIFF file plt.savefig('系统可接受性的分布.tiff', format='tiff', bbox_inches='tight') plt.show() # In[ ]: