-
-
Save mldeveloper01/032a94c6dc59b7b2d0689a9cb7cbbfda to your computer and use it in GitHub Desktop.
Revisions
-
iandanforth revised this gist
Mar 30, 2018 . 1 changed file with 148 additions and 64 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,11 +1,12 @@ ############################################################################# # Full Imports from __future__ import division import math import random """ This is a pure Python implementation of the K-means Clustering algorithmn. The original can be found here: http://pandoricweb.tumblr.com/post/8646701677/python-implementation-of-the-k-means-clustering @@ -49,28 +50,134 @@ def main(): upper = 200 # The K in k-means. How many clusters do we assume exist? # - Must be less than num_points num_clusters = 3 # When do we say the process has 'converged' and stop updating clusters? cutoff = 0.2 # Generate some points to cluster # Note: If you want to use your own data, set points equal to it here. points = [ makeRandomPoint(dimensions, lower, upper) for i in xrange(num_points) ] # Cluster those data! iteration_count = 20 best_clusters = iterative_kmeans( points, num_clusters, cutoff, iteration_count ) # Print our best clusters for i, c in enumerate(best_clusters): for p in c.points: print " Cluster: ", i, "\t Point :", p # Display clusters using plotly for 2d data if dimensions in [2, 3] and plotly: print "Plotting points, launching browser ..." plotClusters(best_clusters, dimensions) ############################################################################# # K-means Methods def iterative_kmeans(points, num_clusters, cutoff, iteration_count): """ K-means isn't guaranteed to get the best answer the first time. It might get stuck in a "local minimum." Here we run kmeans() *iteration_count* times to increase the chance of getting a good answer. Returns the best set of clusters found. """ print "Running K-means %d times to find best clusters ..." % iteration_count candidate_clusters = [] errors = [] for _ in range(iteration_count): clusters = kmeans(points, num_clusters, cutoff) error = calculateError(clusters) candidate_clusters.append(clusters) errors.append(error) highest_error = max(errors) lowest_error = min(errors) print "Lowest error found: %.2f (Highest: %.2f)" % ( lowest_error, highest_error ) ind_of_lowest_error = errors.index(lowest_error) best_clusters = candidate_clusters[ind_of_lowest_error] return best_clusters def kmeans(points, k, cutoff): # Pick out k random points to use as our initial centroids initial_centroids = random.sample(points, k) # Create k clusters using those centroids # Note: Cluster takes lists, so we wrap each point in a list here. clusters = [Cluster([p]) for p in initial_centroids] # Loop through the dataset until the clusters stabilize loopCounter = 0 while True: # Create a list of lists to hold the points in each cluster lists = [[] for _ in clusters] clusterCount = len(clusters) # Start counting loops loopCounter += 1 # For every point in the dataset ... for p in points: # Get the distance between that point and the centroid of the first # cluster. smallest_distance = getDistance(p, clusters[0].centroid) # Set the cluster this point belongs to clusterIndex = 0 # For the remainder of the clusters ... for i in range(1, clusterCount): # calculate the distance of that point to each other cluster's # centroid. distance = getDistance(p, clusters[i].centroid) # If it's closer to that cluster's centroid update what we # think the smallest distance is if distance < smallest_distance: smallest_distance = distance clusterIndex = i # After finding the cluster the smallest distance away # set the point to belong to that cluster lists[clusterIndex].append(p) # Set our biggest_shift to zero for this iteration biggest_shift = 0.0 # For each cluster ... for i in range(clusterCount): # Calculate how far the centroid moved in this iteration shift = clusters[i].update(lists[i]) # Keep track of the largest move from all cluster centroid updates biggest_shift = max(biggest_shift, shift) # Remove empty clusters clusters = [c for c in clusters if len(c.points) != 0] # If the centroids have stopped moving much, say we're done! if biggest_shift < cutoff: print "Converged after %s iterations" % loopCounter break return clusters ############################################################################# # Classes class Point(object): ''' @@ -130,6 +237,11 @@ def update(self, points): ''' old_centroid = self.centroid self.points = points # Return early if we have no points, this cluster will get # cleaned up (removed) in the outer loop. if len(self.points) == 0: return 0 self.centroid = self.calculateCentroid() shift = getDistance(old_centroid, self.centroid) return shift @@ -148,66 +260,23 @@ def calculateCentroid(self): return Point(centroid_coords) def getTotalDistance(self): ''' Return the sum of all squared Euclidean distances between each point in the cluster and the cluster's centroid. ''' sumOfDistances = 0.0 for p in self.points: sumOfDistances += getDistance(p, self.centroid) return sumOfDistances ############################################################################# # Helper Methods def getDistance(a, b): ''' Squared Euclidean distance between two n-dimensional points. https://en.wikipedia.org/wiki/Euclidean_distance#n_dimensions Note: This can be very slow and does not scale well ''' @@ -218,9 +287,8 @@ def getDistance(a, b): for i in range(a.n): squareDifference = pow((a.coords[i]-b.coords[i]), 2) accumulatedDifference += squareDifference return accumulatedDifference def makeRandomPoint(n, lower, upper): ''' @@ -230,6 +298,22 @@ def makeRandomPoint(n, lower, upper): p = Point([random.uniform(lower, upper) for _ in range(n)]) return p def calculateError(clusters): ''' Return the average squared distance between each point and its cluster centroid. This is also known as the "distortion cost." ''' accumulatedDistances = 0 num_points = 0 for cluster in clusters: num_points += len(cluster.points) accumulatedDistances += cluster.getTotalDistance() error = accumulatedDistances / num_points return error def plotClusters(data, dimensions): ''' This uses the plotly offline mode to create a local HTML file. -
iandanforth revised this gist
Jan 29, 2017 . 1 changed file with 65 additions and 52 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,7 +1,6 @@ ############################################################################# # Full Imports import math import random @@ -27,7 +26,6 @@ This script uses an offline plotting mode and will store and open plots locally. To store and share plots online sign up for a plotly API key at https://plot.ly. """ plotly = False @@ -38,102 +36,104 @@ print "INFO: Plotly is not installed, plots will not be generated." def main(): # How many points are in our dataset? num_points = 20 # For each of those points how many dimensions do they have? # Note: Plotting will only work in two or three dimensions dimensions = 2 # Bounds for the values of those points in each dimension lower = 0 upper = 200 # The K in k-means. How many clusters do we assume exist? num_clusters = 3 # When do we say the optimization has 'converged' and stop updating clusters cutoff = 0.2 # Generate some points to cluster points = [ makeRandomPoint(dimensions, lower, upper) for i in xrange(num_points) ] # Cluster those data! clusters = kmeans(points, num_clusters, cutoff) # Print our clusters for i, c in enumerate(clusters): for p in c.points: print " Cluster: ", i, "\t Point :", p # Display clusters using plotly for 2d data if dimensions in [2, 3] and plotly: print "Plotting points, launching browser ..." plotClusters(clusters, dimensions) class Point(object): ''' A point in n dimensional space ''' def __init__(self, coords): ''' coords - A list of values, one per dimension ''' self.coords = coords self.n = len(coords) def __repr__(self): return str(self.coords) class Cluster(object): ''' A set of points and their centroid ''' def __init__(self, points): ''' points - A list of point objects ''' if len(points) == 0: raise Exception("ERROR: empty cluster") # The points that belong to this cluster self.points = points # The dimensionality of the points in this cluster self.n = points[0].n # Assert that all points are of the same dimensionality for p in points: if p.n != self.n: raise Exception("ERROR: inconsistent dimensions") # Set up the initial centroid (this is usually based off one point) self.centroid = self.calculateCentroid() def __repr__(self): ''' String representation of this object ''' return str(self.points) def update(self, points): ''' Returns the distance between the previous centroid and the new after recalculating and storing the new centroid. Note: Initially we expect centroids to shift around a lot and then gradually settle down. ''' old_centroid = self.centroid self.points = points self.centroid = self.calculateCentroid() shift = getDistance(old_centroid, self.centroid) return shift def calculateCentroid(self): ''' Finds a virtual center point for a group of n-dimensional points @@ -145,36 +145,36 @@ def calculateCentroid(self): unzipped = zip(*coords) # Calculate the mean for each dimension centroid_coords = [math.fsum(dList)/numPoints for dList in unzipped] return Point(centroid_coords) def kmeans(points, k, cutoff): # Pick out k random points to use as our initial centroids initial = random.sample(points, k) # Create k clusters using those centroids # Note: Cluster takes lists, so we wrap each point in a list here. clusters = [Cluster([p]) for p in initial] # Loop through the dataset until the clusters stabilize loopCounter = 0 while True: # Create a list of lists to hold the points in each cluster lists = [[] for _ in clusters] clusterCount = len(clusters) # Start counting loops loopCounter += 1 # For every point in the dataset ... for p in points: # Get the distance between that point and the centroid of the first # cluster. smallest_distance = getDistance(p, clusters[0].centroid) # Set the cluster this point belongs to clusterIndex = 0 # For the remainder of the clusters ... for i in range(clusterCount - 1): # calculate the distance of that point to each other cluster's @@ -188,17 +188,17 @@ def kmeans(points, k, cutoff): # After finding the cluster the smallest distance away # set the point to belong to that cluster lists[clusterIndex].append(p) # Set our biggest_shift to zero for this iteration biggest_shift = 0.0 # For each cluster ... for i in range(clusterCount): # Calculate how far the centroid moved in this iteration shift = clusters[i].update(lists[i]) # Keep track of the largest move from all cluster centroid updates biggest_shift = max(biggest_shift, shift) # If the centroids have stopped moving much, say we're done! if biggest_shift < cutoff: print "Converged after %s iterations" % loopCounter @@ -208,25 +208,31 @@ def kmeans(points, k, cutoff): def getDistance(a, b): ''' Euclidean distance between two n-dimensional points. https://en.wikipedia.org/wiki/Euclidean_distance#n_dimensions Note: This can be very slow and does not scale well ''' if a.n != b.n: raise Exception("ERROR: non comparable points") accumulatedDifference = 0.0 for i in range(a.n): squareDifference = pow((a.coords[i]-b.coords[i]), 2) accumulatedDifference += squareDifference distance = math.sqrt(accumulatedDifference) return distance def makeRandomPoint(n, lower, upper): ''' Returns a Point object with n dimensions and values between lower and upper in each of those dimensions ''' p = Point([random.uniform(lower, upper) for _ in range(n)]) return p def plotClusters(data, dimensions): ''' This uses the plotly offline mode to create a local HTML file. This should open your default web browser. ''' if dimensions not in [2, 3]: @@ -261,12 +267,19 @@ def plotClusters(data, dimensions): centroid['name'] = "Centroid " + str(i) traceList.append(Scatter(**centroid)) else: symbols = [ "circle", "square", "diamond", "circle-open", "square-open", "diamond-open", "cross", "x" ] symbol_count = len(symbols) if i > symbol_count: print "Warning: Not enough marker symbols to go around" # Convert our list of x,y,z's separate lists. trace['x'], trace['y'], trace['z'] = zip(*cluster_data) trace['mode'] = 'markers' trace['marker'] = {} @@ -291,5 +304,5 @@ def plotClusters(data, dimensions): "layout": Layout(title=title) }) if __name__ == "__main__": main() -
iandanforth revised this gist
Jan 23, 2017 . 1 changed file with 1 addition and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -4,7 +4,6 @@ import sys import math import random """ This is a pure Python implementation of the K-Means Clustering algorithmn. The @@ -44,7 +43,7 @@ def main(): num_points = 20 # For each of those points how many dimensions do they have? # Note: Plotting will only work in two or three dimensions dimensions = 2 # Bounds for the values of those points in each dimension -
iandanforth revised this gist
Jan 22, 2017 . 1 changed file with 96 additions and 70 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -19,29 +19,32 @@ This script specifically avoids using numpy or other more obscure libraries. It is meant to be *clear* not fast. I have also added integration with the plot.ly plotting library. So you can see the clusters found by this algorithm. To install run: ``` pip install plotly ``` This script uses an offline plotting mode and will store and open plots locally. To store and share plots online sign up for a plotly API key at https://plot.ly. """ plotly = False try: import plotly from plotly.graph_objs import Scatter, Scatter3d, Layout except ImportError: print "INFO: Plotly is not installed, plots will not be generated." def main(): # How many points are in our dataset? num_points = 20 # For each of those points how many dimensions do they have? # Note: Plotting will only work in two dimensions dimensions = 2 # Bounds for the values of those points in each dimension @@ -52,28 +55,27 @@ def main(): num_clusters = 3 # When do we say the optimization has 'converged' and stop updating clusters cutoff = 0.2 # Generate some points to cluster points = [makeRandomPoint(dimensions, lower, upper) for i in xrange(num_points)] # Cluster those data! clusters = kmeans(points, num_clusters, cutoff) # Print our clusters for i, c in enumerate(clusters): for p in c.points: print " Cluster: ", i, "\t Point :", p # Display clusters using plotly for 2d data if dimensions in [2, 3] and plotly: print "Plotting points, launching browser ..." plotClusters(clusters, dimensions) class Point: ''' A point in n dimensional space ''' def __init__(self, coords): ''' @@ -96,7 +98,9 @@ def __init__(self, points): points - A list of point objects ''' if len(points) == 0: raise Exception("ERROR: empty cluster") # The points that belong to this cluster self.points = points @@ -105,7 +109,8 @@ def __init__(self, points): # Assert that all points are of the same dimensionality for p in points: if p.n != self.n: raise Exception("ERROR: inconsistent dimensions") # Set up the initial centroid (this is usually based off one point) self.centroid = self.calculateCentroid() @@ -120,6 +125,9 @@ def update(self, points): ''' Returns the distance between the previous centroid and the new after recalculating and storing the new centroid. Note: Initially we expect centroids to shift around a lot and then gradually settle down. ''' old_centroid = self.centroid self.points = points @@ -147,13 +155,14 @@ def kmeans(points, k, cutoff): initial = random.sample(points, k) # Create k clusters using those centroids # Note: Cluster takes lists, so we wrap each point in a list here. clusters = [Cluster([p]) for p in initial] # Loop through the dataset until the clusters stabilize loopCounter = 0 while True: # Create a list of lists to hold the points in each cluster lists = [[] for c in clusters] clusterCount = len(clusters) # Start counting loops @@ -173,17 +182,18 @@ def kmeans(points, k, cutoff): # centroid. distance = getDistance(p, clusters[i+1].centroid) # If it's closer to that cluster's centroid update what we # think the smallest distance is if distance < smallest_distance: smallest_distance = distance clusterIndex = i+1 # After finding the cluster the smallest distance away # set the point to belong to that cluster lists[clusterIndex].append(p) # Set our biggest_shift to zero for this iteration biggest_shift = 0.0 # For each cluster ... for i in range(clusterCount): # Calculate how far the centroid moved in this iteration shift = clusters[i].update(lists[i]) @@ -202,9 +212,9 @@ def getDistance(a, b): Note: This can be very slow and does not scale well ''' if a.n != b.n: raise Exception("ERROR: non comparable points") ret = reduce(lambda x,y: x + pow((a.coords[y]-b.coords[y]), 2), range(a.n), 0.0) return math.sqrt(ret) def makeRandomPoint(n, lower, upper): @@ -215,56 +225,72 @@ def makeRandomPoint(n, lower, upper): p = Point([random.uniform(lower, upper) for i in range(n)]) return p def plotClusters(data, dimensions): ''' This uses the plotly offline mode to create a local HTML file. This should open your default web browser. ''' if dimensions not in [2, 3]: raise Exception("Plots are only available for 2 and 3 dimensional data") # Convert data into plotly format. traceList = [] for i, c in enumerate(data): # Get a list of x,y coordinates for the points in this cluster. cluster_data = [] for point in c.points: cluster_data.append(point.coords) trace = {} centroid = {} if dimensions == 2: # Convert our list of x,y's into an x list and a y list. trace['x'], trace['y'] = zip(*cluster_data) trace['mode'] = 'markers' trace['marker'] = {} trace['marker']['symbol'] = i trace['marker']['size'] = 12 trace['name'] = "Cluster " + str(i) traceList.append(Scatter(**trace)) # Centroid (A trace of length 1) centroid['x'] = [c.centroid.coords[0]] centroid['y'] = [c.centroid.coords[1]] centroid['mode'] = 'markers' centroid['marker'] = {} centroid['marker']['symbol'] = i centroid['marker']['color'] = 'rgb(200,10,10)' centroid['name'] = "Centroid " + str(i) traceList.append(Scatter(**centroid)) else: symbols = ["circle", "square", "diamond", "circle-open", "square-open", "diamond-open", "cross", "x"] symbol_count = len(symbols) symbol_index = i % len(symbols) if i > symbol_count: print "Warning: Not enough marker symbols to go around, expect repeats" # Convert our list of x,y,z's into an x list, a y list, and a z list. trace['x'], trace['y'], trace['z'] = zip(*cluster_data) trace['mode'] = 'markers' trace['marker'] = {} trace['marker']['symbol'] = symbols[i] trace['marker']['size'] = 12 trace['name'] = "Cluster " + str(i) traceList.append(Scatter3d(**trace)) # Centroid (A trace of length 1) centroid['x'] = [c.centroid.coords[0]] centroid['y'] = [c.centroid.coords[1]] centroid['z'] = [c.centroid.coords[2]] centroid['mode'] = 'markers' centroid['marker'] = {} centroid['marker']['symbol'] = symbols[i] centroid['marker']['color'] = 'rgb(200,10,10)' centroid['name'] = "Centroid " + str(i) traceList.append(Scatter3d(**centroid)) title = "K-means clustering with %s clusters" % str(len(data)) plotly.offline.plot({ "data": traceList, "layout": Layout(title=title) }) if __name__ == "__main__": main() -
iandanforth revised this gist
Jun 25, 2013 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -68,7 +68,7 @@ def main(): # Display clusters using plotly for 2d data # This uses the 'open' command on a URL and may only work on OSX. if dimensions == 2 and PLOTLY_USERNAME: print "Plotting points, launching browser ..." plotClusters(clusters) class Point: -
iandanforth created this gist
Jun 25, 2013 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,270 @@ ############################################################################# # Full Imports import sys import math import random import subprocess """ This is a pure Python implementation of the K-Means Clustering algorithmn. The original can be found here: http://pandoricweb.tumblr.com/post/8646701677/python-implementation-of-the-k-means-clustering I have refactored the code and added comments to aid in readability. After reading through this code you should understand clearly how K-means works. If not, feel free to email me with questions and suggestions. (iandanforth at gmail) This script specifically avoids using numpy or other more obscure libraries. It is meant to be *clear* not fast. I have also added integration with the plot.ly plotting service. If you put in your (free) plot.ly credentials below, it will automatically plot the discovered clusters and their centroids. To use plotly integration you will need to: 1. Get a username/key from www.plot.ly/api and enter them below 2. Install the plotly module: pip install plotly """ PLOTLY_USERNAME = None PLOTLY_KEY = None if PLOTLY_USERNAME: from plotly import plotly def main(): # How many points are in our dataset? num_points = 10 # For each of those points how many dimensions do they have? dimensions = 2 # Bounds for the values of those points in each dimension lower = 0 upper = 200 # The K in k-means. How many clusters do we assume exist? num_clusters = 3 # When do we say the optimization has 'converged' and stop updating clusters opt_cutoff = 0.5 # Generate some points points = [makeRandomPoint(dimensions, lower, upper) for i in xrange(num_points)] # Cluster those data! clusters = kmeans(points, num_clusters, opt_cutoff) # Print our clusters for i,c in enumerate(clusters): for p in c.points: print " Cluster: ", i, "\t Point :", p # Display clusters using plotly for 2d data # This uses the 'open' command on a URL and may only work on OSX. if dimensions == 2 and PLOTLY_USERNAME: "Plotting points, launching browser ..." plotClusters(clusters) class Point: ''' An point in n dimensional space ''' def __init__(self, coords): ''' coords - A list of values, one per dimension ''' self.coords = coords self.n = len(coords) def __repr__(self): return str(self.coords) class Cluster: ''' A set of points and their centroid ''' def __init__(self, points): ''' points - A list of point objects ''' if len(points) == 0: raise Exception("ILLEGAL: empty cluster") # The points that belong to this cluster self.points = points # The dimensionality of the points in this cluster self.n = points[0].n # Assert that all points are of the same dimensionality for p in points: if p.n != self.n: raise Exception("ILLEGAL: wrong dimensions") # Set up the initial centroid (this is usually based off one point) self.centroid = self.calculateCentroid() def __repr__(self): ''' String representation of this object ''' return str(self.points) def update(self, points): ''' Returns the distance between the previous centroid and the new after recalculating and storing the new centroid. ''' old_centroid = self.centroid self.points = points self.centroid = self.calculateCentroid() shift = getDistance(old_centroid, self.centroid) return shift def calculateCentroid(self): ''' Finds a virtual center point for a group of n-dimensional points ''' numPoints = len(self.points) # Get a list of all coordinates in this cluster coords = [p.coords for p in self.points] # Reformat that so all x's are together, all y'z etc. unzipped = zip(*coords) # Calculate the mean for each dimension centroid_coords = [math.fsum(dList)/numPoints for dList in unzipped] return Point(centroid_coords) def kmeans(points, k, cutoff): # Pick out k random points to use as our initial centroids initial = random.sample(points, k) # Create k clusters using those centroids clusters = [Cluster([p]) for p in initial] # Loop through the dataset until the clusters stabilize loopCounter = 0 while True: # Create a list of lists to hold the points in each cluster lists = [ [] for c in clusters] clusterCount = len(clusters) # Start counting loops loopCounter += 1 # For every point in the dataset ... for p in points: # Get the distance between that point and the centroid of the first # cluster. smallest_distance = getDistance(p, clusters[0].centroid) # Set the cluster this point belongs to clusterIndex = 0 # For the remainder of the clusters ... for i in range(clusterCount - 1): # calculate the distance of that point to each other cluster's # centroid. distance = getDistance(p, clusters[i+1].centroid) # If it's closer to that cluster's centroid update what we # think the smallest distance is, and set the point to belong # to that cluster if distance < smallest_distance: smallest_distance = distance clusterIndex = i+1 lists[clusterIndex].append(p) # Set our biggest_shift to zero for this iteration biggest_shift = 0.0 # As many times as there are clusters ... for i in range(clusterCount): # Calculate how far the centroid moved in this iteration shift = clusters[i].update(lists[i]) # Keep track of the largest move from all cluster centroid updates biggest_shift = max(biggest_shift, shift) # If the centroids have stopped moving much, say we're done! if biggest_shift < cutoff: print "Converged after %s iterations" % loopCounter break return clusters def getDistance(a, b): ''' Euclidean distance between two n-dimensional points. Note: This can be very slow and does not scale well ''' if a.n != b.n: raise Exception("ILLEGAL: non comparable points") ret = reduce(lambda x,y: x + pow((a.coords[y]-b.coords[y]), 2),range(a.n),0.0) return math.sqrt(ret) def makeRandomPoint(n, lower, upper): ''' Returns a Point object with n dimensions and values between lower and upper in each of those dimensions ''' p = Point([random.uniform(lower, upper) for i in range(n)]) return p def plotClusters(data): ''' Use the plotly API to plot data from clusters. Gets a plot URL from plotly and then uses subprocess to 'open' that URL from the command line. This should open your default web browser. ''' # List of symbols each cluster will be displayed using symbols = ['circle', 'cross', 'triangle-up', 'square'] # Convert data into plotly format. traceList = [] for i, c in enumerate(data): data = [] for p in c.points: data.append(p.coords) # Data trace = {} trace['x'], trace['y'] = zip(*data) trace['marker'] = {} trace['marker']['symbol'] = symbols[i] trace['name'] = "Cluster " + str(i) traceList.append(trace) # Centroid (A trace of length 1) centroid = {} centroid['x'] = [c.centroid.coords[0]] centroid['y'] = [c.centroid.coords[1]] centroid['marker'] = {} centroid['marker']['symbol'] = symbols[i] centroid['marker']['color'] = 'rgb(200,10,10)' centroid['name'] = "Centroid " + str(i) traceList.append(centroid) # Log in to plotly py = plotly(username=PLOTLY_USERNAME, key=PLOTLY_KEY) # Style the chart datastyle = {'mode':'markers', 'type':'scatter', 'marker':{'line':{'width':0}, 'size':12, 'opacity':0.6, 'color':'rgb(74, 134, 232)'}} resp = py.plot(*traceList, style = datastyle) # Display that plot in a browser cmd = "open " + resp['url'] subprocess.call(cmd, shell=True) if __name__ == "__main__": main()