102 lines
3.0 KiB
JavaScript
102 lines
3.0 KiB
JavaScript
/**
|
|
* Created by joonkukang on 2014. 1. 16..
|
|
*/
|
|
var math = Require('ml/math')
|
|
var Kmeans = module.exports;
|
|
|
|
Kmeans.cluster = function(options) {
|
|
var data = options['data'];
|
|
var k = options['k'];
|
|
var distance = getDistanceFunction(options['distance']);
|
|
var epochs = options['epochs'];
|
|
var init_using_data = options['init_using_data'];
|
|
if(typeof init_using_data === "undefined");
|
|
init_using_data = true;
|
|
var means = getRandomMeans(data,k, init_using_data);
|
|
|
|
var epoch, i, j, l;
|
|
var clusters = [];
|
|
for(i=0 ; i<k ; i++)
|
|
clusters.push([]);
|
|
|
|
for(epoch=0 ; epoch<epochs ; epoch++) {
|
|
clusters = [];
|
|
for(i=0 ; i<k ; i++)
|
|
clusters.push([]);
|
|
|
|
// Find which centroid is the closest for each row
|
|
for(i=0 ; i<data.length ; i++) {
|
|
var bestmatch = 0;
|
|
for(j=0 ; j<k ; j++) {
|
|
if(distance(means[j],data[i]) < distance(means[bestmatch],data[i])) bestmatch = j;
|
|
}
|
|
clusters[bestmatch].push(i);
|
|
}
|
|
|
|
// Move the centroids to the average of their members
|
|
for(i=0 ; i<k ; i++) {
|
|
var avgs = [];
|
|
for(j=0 ; j<data[0].length ; j++)
|
|
avgs.push(0.0);
|
|
if(clusters[i].length > 0) {
|
|
for(j=0 ; j<clusters[i].length ; j++) {
|
|
for(l=0 ; l<data[0].length ; l++) {
|
|
avgs[l] += data[clusters[i][j]][l];
|
|
}
|
|
}
|
|
for(j=0 ; j<data[0].length ; j++) {
|
|
avgs[j] /= clusters[i].length;
|
|
}
|
|
means[i] = avgs;
|
|
}
|
|
}
|
|
}
|
|
return {
|
|
clusters : clusters,
|
|
means : means
|
|
};
|
|
}
|
|
|
|
var getRandomMeans = function(data,k, init_using_data) {
|
|
var clusters = [];
|
|
if(init_using_data) {
|
|
var cluster_index = math.range(data.length);
|
|
cluster_index = math.shuffle(cluster_index);
|
|
for(i=0 ; i<k ; i++) {
|
|
clusters.push(data[cluster_index[i]]);
|
|
}
|
|
} else {
|
|
var i,j;
|
|
var ranges = [];
|
|
for(i=0 ; i<data[0].length ; i++) {
|
|
var min = data[0][i] , max = data[0][i];
|
|
for(j=0 ; j<data.length ; j++) {
|
|
if(data[j][i] < min) min = data[j][i];
|
|
if(data[j][i] > max) max = data[j][i];
|
|
}
|
|
ranges.push([min,max]);
|
|
}
|
|
for(i=0 ; i<k ; i++) {
|
|
var cluster = [];
|
|
for(j=0 ; j<data[0].length;j++) {
|
|
cluster.push(Math.random() * (ranges[j][1] - ranges[j][0]) + ranges[j][0]);
|
|
}
|
|
clusters.push(cluster);
|
|
}
|
|
}
|
|
return clusters;
|
|
}
|
|
|
|
|
|
function getDistanceFunction(options) {
|
|
if(typeof options === 'undefined') {
|
|
return math.euclidean;
|
|
} else if (typeof options === 'function') {
|
|
return options;
|
|
} else if (options['type'] === 'euclidean') {
|
|
return math.euclidean;
|
|
} else if (options['type'] === 'pearson') {
|
|
return math.pearson;
|
|
}
|
|
}
|