diff --git a/js/ml/pca.js b/js/ml/pca.js new file mode 100644 index 0000000..bfe6eb4 --- /dev/null +++ b/js/ml/pca.js @@ -0,0 +1,560 @@ +// https://github.com/bitanath/pca +var PCA = (function () { + var options = {}; + /** + * The first step is to subtract the mean and center data + * + * @param {Array} matrix - data in an mXn matrix format + * @returns + */ + function computeDeviationMatrix(matrix) { + var unit = unitSquareMatrix(matrix.length); + return subtract(matrix, scale(multiply(unit, matrix), 1 / matrix.length)); + } + /** + * Computes variance from deviation + * + * @param {Array} deviation - data minus mean as calculated from computeDeviationMatrix + * @returns + */ + function computeDeviationScores(deviation) { + var devSumOfSquares = multiply(transpose(deviation), deviation); + return devSumOfSquares; + } + /** + * Calculates the var covar square matrix using either population or sample + * + * @param {Array} devSumOfSquares + * @param {boolean} sample - true/false whether data is from sample or not + * @returns + */ + function computeVarianceCovariance(devSumOfSquares, sample) { + var varianceCovariance; + if (sample) + varianceCovariance = scale(devSumOfSquares, 1 / (devSumOfSquares.length - 1)); + else + varianceCovariance = scale(devSumOfSquares, 1 / (devSumOfSquares.length)); + return varianceCovariance; + } + /** + * Matrix is the deviation sum of squares as computed earlier + * + * @param {Array} matrix - output of computeDeviationScores + * @returns + */ + function computeSVD(matrix) { + var result = svd(matrix); + if (options.verbose) console.log(result) + var eigenvectors = result.U; + var eigenvalues = result.S; + var results = eigenvalues.map(function (value, i) { + var obj = {}; + obj.eigenvalue = value; + obj.vector = eigenvectors.map(function (vector, j) { + return -1 * vector[i]; //HACK prevent completely negative vectors + }); + return obj; + }); + return results; + } + /** + * Get reduced dataset after removing some dimensions + * + * @param {Array} data - initial matrix started out with + * @param {rest} vectors - eigenvectors selected as part of process + * @returns + */ + function computeAdjustedData(data) { + for (var _len = arguments.length, vectorObjs = new Array(_len > 1 ? _len - 1 : 0), + _key = 1; _key < _len; _key++) { + vectorObjs[_key - 1] = arguments[_key]; + } + //FIXME no need to transpose vectors since they're already in row normal form + var vectors = vectorObjs.map(function(v){return v.vector}); + var matrixMinusMean = computeDeviationMatrix(data); + var adjustedData = multiply(vectors, transpose(matrixMinusMean)); + var unit = unitSquareMatrix(data.length); + var avgData = scale(multiply(unit, data), -1 / data.length); //NOTE get the averages to add back + + var formattedAdjustData = formatData(adjustedData, 2); + return { + adjustedData: adjustedData, + formattedAdjustedData: formattedAdjustData, + avgData: avgData, + selectedVectors: vectors + }; + } + + /** + * Get original data set from reduced data set (decompress) + * @param {*} adjustedData = formatted or unformatted adjusted data + * @param {*} vectors = selectedVectors + * @param {*} avgData = avgData + */ + function computeOriginalData(adjustedData, vectors, avgData) { + var originalWithoutMean = transpose(multiply(transpose(vectors), adjustedData)); + var originalWithMean = subtract(originalWithoutMean, avgData); + var formattedData = formatData(originalWithMean, 2); + return { + originalData: originalWithMean, + formattedOriginalData: formattedData + } + } + + /** + * Get percentage explained, or loss + * @param {*} vectors + * @param {*} selected + */ + function computePercentageExplained(vectors) { + for (var _len = arguments.length, selected = new Array(_len > 1 ? _len - 1 : 0), + _key = 1; _key < _len; _key++) { + selected[_key - 1] = arguments[_key]; + } + var total = vectors.map(function (v) { + return v.eigenvalue + }).reduce(function (a, b) { + return a + b; + }); + var explained = selected.map(function (v) { + return v.eigenvalue + }).reduce(function (a, b) { + return a + b; + }); + return (explained / total); + } + + function getEigenVectors(data) { + return computeSVD(computeVarianceCovariance(computeDeviationScores(computeDeviationMatrix(data)), false)); + } + + function analyseTopResult(data) { + var eigenVectors = getEigenVectors(data); + var sorted = eigenVectors.sort(function (a, b) { + return b.eigenvalue - a.eigenvalue; + }); + console.log('Sorted Vectors', sorted); + var selected = sorted[0].vector; + return computeAdjustedData(data, selected); + } + + function formatData(data, precision) { + var TEN = Math.pow(10, precision || 2); + return data.map(function (d, i) { + return d.map(function (n) { + return Math.round(n * TEN) / TEN; + }) + }) + } + /** + * Multiplies AxB, where A and B are matrices of nXm and mXn dimensions + * @param {} a + * @param {*} b + */ + function multiply(a, b) { + if (!a[0] || !b[0] || !a.length || !b.length) { + throw new Error('Both A and B should be matrices'); + } + + if (b.length !== a[0].length) { + throw new Error('Columns in A should be the same as the number of rows in B'); + } + var product = []; + + for (var i = 0; i < a.length; i++) { + product[i] = []; //initialize a new row + for (var j = 0; j < b[0].length; j++) { + for (var k = 0; k < a[0].length; k++) { + (product[i])[j] = !!(product[i])[j] ? (product[i])[j] + (a[i])[k] * (b[k])[j] : (a[i])[k] * (b[k])[j]; + } + } + } + return product; + } + /** + * Utility function to subtract matrix b from a + * + * @param {any} a + * @param {any} b + * @returns + */ + function subtract(a, b) { + if (!(a.length === b.length && a[0].length === b[0].length)) + throw new Error('Both A and B should have the same dimensions'); + var result = []; + for (var i = 0; i < a.length; i++) { + result[i] = []; + for (var j = 0; j < b[0].length; j++) { + (result[i])[j] = (a[i])[j] - (b[i])[j]; + } + } + return result; + } + /** + * Multiplies a matrix into a factor + * + * @param {any} matrix + * @param {any} factor + * @returns + */ + function scale(matrix, factor) { + var result = []; + for (var i = 0; i < matrix.length; i++) { + result[i] = []; + for (var j = 0; j < matrix[0].length; j++) { + (result[i])[j] = (matrix[i])[j] * factor; + } + } + return result; + } + + /** + * Generates a unit square matrix + * @param {*} rows = number of rows to fill + */ + function unitSquareMatrix(rows) { + var result = []; + for (var i = 0; i < rows; i++) { + result[i] = []; + for (var j = 0; j < rows; j++) { + (result[i])[j] = 1; + } + } + return result; + } + /** + * Transposes a matrix, converts rows to columns + * @param {*} matrix + */ + function transpose(matrix) { + var operated = clone(matrix); + return operated[0].map(function (m, c) { + return matrix.map(function (r) { + return r[c]; + }); + }); + } + /** + * Deep Clones a matrix + * @param {*} arr + */ + function clone(arr) { + var string = JSON.stringify(arr); + var result = JSON.parse(string); + return result; + } + + /** + * Compute the thin SVD from G. H. Golub and C. Reinsch, Numer. Math. 14, 403-420 (1970) + * From the Numeric JS Implementation Copyright (C) 2011 by Sébastien Loisel + * The C implementation from which this has been taken may be found here: http://www.public.iastate.edu/~dicook/JSS/paper/code/svd.c + * @param {*} A = m*n matrix + */ + function svd(A) { + var temp; + var prec = Math.pow(2, -52) // assumes double prec + var tolerance = 1.e-64 / prec; + var itmax = 50; + var c = 0; + var i = 0; + var j = 0; + var k = 0; + var l = 0; + var u = clone(A); + var m = u.length; + var n = u[0].length; + + if (m < n) throw "Need more rows than columns" + + var e = new Array(n); //vector1 + var q = new Array(n); //vector2 + for (i = 0; i < n; i++) e[i] = q[i] = 0.0; + var v = rep([n, n], 0); + + function pythag(a, b) { + a = Math.abs(a) + b = Math.abs(b) + if (a > b) + return a * Math.sqrt(1.0 + (b * b / a / a)) + else if (b == 0.0) + return a + return b * Math.sqrt(1.0 + (a * a / b / b)) + } + + //rep function + function rep(s, v, k) { + if (typeof k === "undefined") { + k = 0; + } + var n = s[k], + ret = Array(n), + i; + if (k === s.length - 1) { + for (i = n - 2; i >= 0; i -= 2) { + ret[i + 1] = v; + ret[i] = v; + } + if (i === -1) { + ret[0] = v; + } + return ret; + } + for (i = n - 1; i >= 0; i--) { + ret[i] = rep(s, v, k + 1); + } + return ret; + } + + //Householder's reduction to bidiagonal form + + var f = 0.0; + var g = 0.0; + var h = 0.0; + var x = 0.0; + var y = 0.0; + var z = 0.0; + var s = 0.0; + + for (i = 0; i < n; i++) { + e[i] = g; //vector + s = 0.0; //sum + l = i + 1; //stays i+1 + for (j = i; j < m; j++) + s += (u[j][i] * u[j][i]); + if (s <= tolerance) + g = 0.0; + else { + f = u[i][i]; + g = Math.sqrt(s); + if (f >= 0.0) g = -g; + h = f * g - s + u[i][i] = f - g; + for (j = l; j < n; j++) { + s = 0.0 + for (k = i; k < m; k++) + s += u[k][i] * u[k][j] + f = s / h + for (k = i; k < m; k++) + u[k][j] += f * u[k][i] + } + } + q[i] = g + s = 0.0 + for (j = l; j < n; j++) + s = s + u[i][j] * u[i][j] + if (s <= tolerance) + g = 0.0 + else { + f = u[i][i + 1] + g = Math.sqrt(s) + if (f >= 0.0) g = -g + h = f * g - s + u[i][i + 1] = f - g; + for (j = l; j < n; j++) e[j] = u[i][j] / h + for (j = l; j < m; j++) { + s = 0.0 + for (k = l; k < n; k++) + s += (u[j][k] * u[i][k]) + for (k = l; k < n; k++) + u[j][k] += s * e[k] + } + } + y = Math.abs(q[i]) + Math.abs(e[i]) + if (y > x) + x = y + } + + // accumulation of right hand transformations + for (i = n - 1; i != -1; i += -1) { + if (g != 0.0) { + h = g * u[i][i + 1] + for (j = l; j < n; j++) + v[j][i] = u[i][j] / h //u is array, v is square of columns + for (j = l; j < n; j++) { + s = 0.0 + for (k = l; k < n; k++) + s += u[i][k] * v[k][j] + for (k = l; k < n; k++) + v[k][j] += (s * v[k][i]) + } + } + for (j = l; j < n; j++) { + v[i][j] = 0; + v[j][i] = 0; + } + v[i][i] = 1; + g = e[i] + l = i + } + + // accumulation of left hand transformations + for (i = n - 1; i != -1; i += -1) { + l = i + 1 + g = q[i] + for (j = l; j < n; j++) + u[i][j] = 0; + if (g != 0.0) { + h = u[i][i] * g + for (j = l; j < n; j++) { + s = 0.0 + for (k = l; k < m; k++) s += u[k][i] * u[k][j]; + f = s / h + for (k = i; k < m; k++) u[k][j] += f * u[k][i]; + } + for (j = i; j < m; j++) u[j][i] = u[j][i] / g; + } else + for (j = i; j < m; j++) u[j][i] = 0; + u[i][i] += 1; + } + + // diagonalization of the bidiagonal form + prec = prec * x + for (k = n - 1; k != -1; k += -1) { + for (var iteration = 0; iteration < itmax; iteration++) { // test f splitting + var test_convergence = false + for (l = k; l != -1; l += -1) { + if (Math.abs(e[l]) <= prec) { + test_convergence = true + break + } + if (Math.abs(q[l - 1]) <= prec) + break + } + if (!test_convergence) { // cancellation of e[l] if l>0 + c = 0.0 + s = 1.0 + var l1 = l - 1 + for (i = l; i < k + 1; i++) { + f = s * e[i] + e[i] = c * e[i] + if (Math.abs(f) <= prec) + break + g = q[i] + h = pythag(f, g) + q[i] = h + c = g / h + s = -f / h + for (j = 0; j < m; j++) { + y = u[j][l1] + z = u[j][i] + u[j][l1] = y * c + (z * s) + u[j][i] = -y * s + (z * c) + } + } + } + // test f convergence + z = q[k] + if (l == k) { //convergence + if (z < 0.0) { //q[k] is made non-negative + q[k] = -z + for (j = 0; j < n; j++) + v[j][k] = -v[j][k] + } + break //break out of iteration loop and move on to next k value + } + if (iteration >= itmax - 1) + throw 'Error: no convergence.' + // shift from bottom 2x2 minor + x = q[l] + y = q[k - 1] + g = e[k - 1] + h = e[k] + f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y) + g = pythag(f, 1.0) + if (f < 0.0) + f = ((x - z) * (x + z) + h * (y / (f - g) - h)) / x + else + f = ((x - z) * (x + z) + h * (y / (f + g) - h)) / x + // next QR transformation + c = 1.0 + s = 1.0 + for (i = l + 1; i < k + 1; i++) { + g = e[i] + y = q[i] + h = s * g + g = c * g + z = pythag(f, h) + e[i - 1] = z + c = f / z + s = h / z + f = x * c + g * s + g = -x * s + g * c + h = y * s + y = y * c + for (j = 0; j < n; j++) { + x = v[j][i - 1] + z = v[j][i] + v[j][i - 1] = x * c + z * s + v[j][i] = -x * s + z * c + } + z = pythag(f, h) + q[i - 1] = z + c = f / z + s = h / z + f = c * g + s * y + x = -s * g + c * y + for (j = 0; j < m; j++) { + y = u[j][i - 1] + z = u[j][i] + u[j][i - 1] = y * c + z * s + u[j][i] = -y * s + z * c + } + } + e[l] = 0.0 + e[k] = f + q[k] = x + } + } + + for (i = 0; i < q.length; i++) + if (q[i] < prec) q[i] = 0 + + //sort eigenvalues + for (i = 0; i < n; i++) { + for (j = i - 1; j >= 0; j--) { + if (q[j] < q[i]) { + c = q[j] + q[j] = q[i] + q[i] = c + for (k = 0; k < u.length; k++) { + temp = u[k][i]; + u[k][i] = u[k][j]; + u[k][j] = temp; + } + for (k = 0; k < v.length; k++) { + temp = v[k][i]; + v[k][i] = v[k][j]; + v[k][j] = temp; + } + i = j + } + } + } + + return { + U: u, + S: q, + V: v + } + } + + return { + computeDeviationScores: computeDeviationScores, + computeDeviationMatrix: computeDeviationMatrix, + computeSVD: computeSVD, + computePercentageExplained: computePercentageExplained, + computeOriginalData: computeOriginalData, + computeVarianceCovariance: computeVarianceCovariance, + computeAdjustedData: computeAdjustedData, + getEigenVectors: getEigenVectors, + analyseTopResult: analyseTopResult, + transpose: transpose, + multiply: multiply, + clone: clone, + scale: scale, + options:options + } +})(); + +if(typeof module !== 'undefined') +module.exports = PCA;