# http://stackoverflow.com/questions/6645895/calculating-the-percentage-of-variance-measure-for-k-means cents = [km.kmeans.cluster_centers_ for km in kms] D_k = [cdist(rid_brand_pca, cent, 'euclidean') for cent in cents] # 最も近い中心との距離 dist = [np.min(D,axis=1) for D in D_k] avgWithinSS = [sum(d)/rid_brand_pca.shape[0] for d in dist] # elbow curve K = range(1, 16) fig = plt.figure() ax = fig.add_subplot(111) ax.plot(K, avgWithinSS, 'b*-') plt.grid(True) plt.xlabel('Number of clusters') plt.ylim((1.6, 2.0)) plt.ylabel('Average within-cluster sum of squares') plt.title('Elbow for KMeans clustering')