Skip to content

Instantly share code, notes, and snippets.

@rcarmo
Created March 4, 2025 23:11
Show Gist options
  • Select an option

  • Save rcarmo/52da274506bc6d3262ba6aed4e94e46a to your computer and use it in GitHub Desktop.

Select an option

Save rcarmo/52da274506bc6d3262ba6aed4e94e46a to your computer and use it in GitHub Desktop.

Revisions

  1. rcarmo created this gist Mar 4, 2025.
    104 changes: 104 additions & 0 deletions kmeans.js
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,104 @@
    function initCentroids(pts, k) {
    var randPts = pts.slice(0);
    randPts.sort(() => Math.floor(Math.random() * pts.length));
    return randPts.slice(0, k);
    }

    function dist(v1, v2) {
    var sum = 0;
    for (var i = 0; i < v1.length; i++) {
    if (i !== 0) sum += Math.pow(v2[i] - v1[i], 2);
    }
    return Math.sqrt(sum);
    }

    function findNearestCentroid(pt, cents) {
    var minDist = Infinity,
    idx = 0;

    for (var i = 0; i < cents.length; i++) {
    var d = dist(pt, cents[i]);
    if (d < minDist) {
    minDist = d;
    idx = i;
    }
    }
    return idx;
    }

    function iterate(pts, k, cents) {
    var clusters = new Array(k);
    var zeros = [];
    for (var i = 0; i < pts[0].length; i++) {
    zeros.push(0);
    }
    var sums = [];
    for (var i = 0; i < k; i++) {
    sums[i] = zeros.slice(0);
    }

    var clusterIds = new Array(pts.length);

    for (var i = 0; i < pts.length; i++) {
    var pt = pts[i].slice(0);
    var clusterId = findNearestCentroid(pt, cents);

    clusterIds[i] = clusterId;

    if (!clusters[clusterId]) clusters[clusterId] = [];
    clusters[clusterId].push(pt);

    for (var j = 0; j < pt.length; j++) {
    sums[clusterId][j] += pt[j];
    }
    }

    var maxDist = 0;

    for (var i = 0; i < k; i++) {
    var size = clusters[i] ? clusters[i].length : 0;

    for (var j = 0; j < sums[i].length; j++) {
    sums[i][j] = sums[i][j] / size;
    }
    var centDist = dist(sums[i], cents[i]);
    if (centDist > maxDist) maxDist = centDist;
    }

    if (maxDist <= 0.5) {
    return clusterIds;
    }

    for (var i = 0; i < k; i++) {
    cents[i] = sums[i].slice(0);
    }
    return iterate(pts, k, cents);
    }

    function kmeans(pts, k) {
    if (!pts || !k) {
    throw new Error("Provide 2 arguments: points and clusters");
    }

    if (!pts.length || !pts[0].length) {
    throw new Error("Points array cannot be empty");
    }

    if (k < 1 || k > pts.length) {
    throw new Error("K must be between 1 and the number of points");
    }

    var cents = initCentroids(pts, k);
    return iterate(pts, k, cents);
    }

    try {
    var clusterIds = kmeans(msg.payload, msg.clusters || 4);
    msg = {
    payload: clusterIds,
    };
    return msg;
    } catch (error) {
    console.error(error);
    return null;
    }