Mon 21 Jul 22:43:21 CEST 2025
This commit is contained in:
parent
65fcffb881
commit
6717a5344f
101
js/ml/kmeans.js
Normal file
101
js/ml/kmeans.js
Normal file
|
@ -0,0 +1,101 @@
|
|||
/**
|
||||
* Created by joonkukang on 2014. 1. 16..
|
||||
*/
|
||||
var math = Require('ml/math')
|
||||
var Kmeans = module.exports;
|
||||
|
||||
Kmeans.cluster = function(options) {
|
||||
var data = options['data'];
|
||||
var k = options['k'];
|
||||
var distance = getDistanceFunction(options['distance']);
|
||||
var epochs = options['epochs'];
|
||||
var init_using_data = options['init_using_data'];
|
||||
if(typeof init_using_data === "undefined");
|
||||
init_using_data = true;
|
||||
var means = getRandomMeans(data,k, init_using_data);
|
||||
|
||||
var epoch, i, j, l;
|
||||
var clusters = [];
|
||||
for(i=0 ; i<k ; i++)
|
||||
clusters.push([]);
|
||||
|
||||
for(epoch=0 ; epoch<epochs ; epoch++) {
|
||||
clusters = [];
|
||||
for(i=0 ; i<k ; i++)
|
||||
clusters.push([]);
|
||||
|
||||
// Find which centroid is the closest for each row
|
||||
for(i=0 ; i<data.length ; i++) {
|
||||
var bestmatch = 0;
|
||||
for(j=0 ; j<k ; j++) {
|
||||
if(distance(means[j],data[i]) < distance(means[bestmatch],data[i])) bestmatch = j;
|
||||
}
|
||||
clusters[bestmatch].push(i);
|
||||
}
|
||||
|
||||
// Move the centroids to the average of their members
|
||||
for(i=0 ; i<k ; i++) {
|
||||
var avgs = [];
|
||||
for(j=0 ; j<data[0].length ; j++)
|
||||
avgs.push(0.0);
|
||||
if(clusters[i].length > 0) {
|
||||
for(j=0 ; j<clusters[i].length ; j++) {
|
||||
for(l=0 ; l<data[0].length ; l++) {
|
||||
avgs[l] += data[clusters[i][j]][l];
|
||||
}
|
||||
}
|
||||
for(j=0 ; j<data[0].length ; j++) {
|
||||
avgs[j] /= clusters[i].length;
|
||||
}
|
||||
means[i] = avgs;
|
||||
}
|
||||
}
|
||||
}
|
||||
return {
|
||||
clusters : clusters,
|
||||
means : means
|
||||
};
|
||||
}
|
||||
|
||||
var getRandomMeans = function(data,k, init_using_data) {
|
||||
var clusters = [];
|
||||
if(init_using_data) {
|
||||
var cluster_index = math.range(data.length);
|
||||
cluster_index = math.shuffle(cluster_index);
|
||||
for(i=0 ; i<k ; i++) {
|
||||
clusters.push(data[cluster_index[i]]);
|
||||
}
|
||||
} else {
|
||||
var i,j;
|
||||
var ranges = [];
|
||||
for(i=0 ; i<data[0].length ; i++) {
|
||||
var min = data[0][i] , max = data[0][i];
|
||||
for(j=0 ; j<data.length ; j++) {
|
||||
if(data[j][i] < min) min = data[j][i];
|
||||
if(data[j][i] > max) max = data[j][i];
|
||||
}
|
||||
ranges.push([min,max]);
|
||||
}
|
||||
for(i=0 ; i<k ; i++) {
|
||||
var cluster = [];
|
||||
for(j=0 ; j<data[0].length;j++) {
|
||||
cluster.push(Math.random() * (ranges[j][1] - ranges[j][0]) + ranges[j][0]);
|
||||
}
|
||||
clusters.push(cluster);
|
||||
}
|
||||
}
|
||||
return clusters;
|
||||
}
|
||||
|
||||
|
||||
function getDistanceFunction(options) {
|
||||
if(typeof options === 'undefined') {
|
||||
return math.euclidean;
|
||||
} else if (typeof options === 'function') {
|
||||
return options;
|
||||
} else if (options['type'] === 'euclidean') {
|
||||
return math.euclidean;
|
||||
} else if (options['type'] === 'pearson') {
|
||||
return math.pearson;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user