Skip to content

Instantly share code, notes, and snippets.

@adzkar
Created February 26, 2020 04:16
Show Gist options
  • Save adzkar/a454da5c4ca31c2ab9936791687de20c to your computer and use it in GitHub Desktop.
Save adzkar/a454da5c4ca31c2ab9936791687de20c to your computer and use it in GitHub Desktop.
from matplotlib import pyplot as plt
from copy import deepcopy
import numpy as np
plt.rcParams['figure.figsize'] = (10, 10)
plt.style.use('ggplot')
# Euclidian Distance
dist = lambda a, b, ax=1: np.linalg.norm(a - b, axis=ax)
# init
k = 3
datas = [ map(int, data.split(';')[1:]) for data in open('contohData.csv').read().splitlines()[1:] ]
f1 = [ x[0] for x in datas ]
f2 = [ x[1] for x in datas ]
#Plot read points
plt.scatter(f1,f2, c='#050505', s=50)
# Clustering
# Random Centroid X dan Y
centroids = np.array([ datas[i] for i in np.random.randint(0,len(datas), k) ], dtype=np.float32)
# Plot Random Centroid
plt.scatter(centroids[0], centroids[1], marker="*", s=200, c='g')
# Find a new Centroid
centroids_old = np.zeros(centroids.shape)
clusters = np.zeros(len(datas))
error = dist(centroids, centroids_old, None)
while error != 0:
for i in range(len(datas)) :
distance = dist(datas[i], centroids)
cluster = np.argmin(distance)
clusters[i] = cluster
centroids_old = deepcopy(centroids)
for i in range(k):
points = [datas[j] for j in range(len(datas)) if clusters[j] == i]
centroids[i] = np.mean(points, axis=0)
error = dist(centroids, centroids_old, None)
colors = ['r', 'g', 'b', 'y', 'c', 'm']
fig, ax = plt.subplots()
for i in range(k):
points = np.array([datas[j] for j in range(len(datas)) if clusters[j] == i])
ax.scatter(points[:, 0], points[:, 1], s=7, c=colors[i])
ax.scatter(centroids[:, 0], centroids[:, 1], marker='*', s=200, c='#050505')
print 'centroid optimal'
print centroids
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment