From 9493e0954bfb37c1a30ecb3aa8d659058ffa19fd Mon Sep 17 00:00:00 2001 From: sbosse Date: Mon, 21 Jul 2025 23:08:02 +0200 Subject: [PATCH] Mon 21 Jul 22:43:21 CEST 2025 --- js/ml/knn.js | 503 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 503 insertions(+) create mode 100644 js/ml/knn.js diff --git a/js/ml/knn.js b/js/ml/knn.js new file mode 100644 index 0000000..2821ec3 --- /dev/null +++ b/js/ml/knn.js @@ -0,0 +1,503 @@ +/** + ** ============================== + ** O O O OOOO + ** O O O O O O + ** O O O O O O + ** OOOO OOOO O OOO OOOO + ** O O O O O O O + ** O O O O O O O + ** OOOO OOOO O O OOOO + ** ============================== + ** Dr. Stefan Bosse http://www.bsslab.de + ** + ** COPYRIGHT: THIS SOFTWARE, EXECUTABLE AND SOURCE CODE IS OWNED + ** BY THE AUTHOR(S). + ** THIS SOURCE CODE MAY NOT BE COPIED, EXTRACTED, + ** MODIFIED, OR OTHERWISE USED IN A CONTEXT + ** OUTSIDE OF THE SOFTWARE SYSTEM. + ** + ** $AUTHORS: Ankit Kuwadekar, Stefan Bosse + ** $INITIAL: (C) 2014, Ankit Kuwadekar + ** $MODIFIED: (C) 2006-2019 bLAB by sbosse + ** $VERSION: 1.2.1 + ** + ** $INFO: + ** + ** KNN: k-nearest-neighbour Algorithm + ** A General purpose k-nearest neighbor classifier algorithm based on the + ** k-d tree Javascript library develop by Ubilabs. + ** + ** Portable models (KNN/KNN2) + ** + ** $ENDOFINFO + */ +var options = { + version:'1.2.1' +} +var Comp = Require('com/compat'); +var math = Require('ml/math'); +var euclideanDistance = math.euclidean; + +/* + * Original code from: + * + * k-d Tree JavaScript - V 1.01 + * + * https://github.com/ubilabs/kd-tree-javascript + * + * @author Mircea Pricop , 2012 + * @author Martin Kleppe , 2012 + * @author Ubilabs http://ubilabs.net, 2012 + * @license MIT License + */ + +function Node(obj, dimension, parent) { + var N = {} + N.obj = obj; + N.left = null; + N.right = null; + N.parent = parent; + N.dimension = dimension; + return N; +} + +/* KDTree + * + */ + +function KDTree(points, metric) { + // if (!(this instanceof KDTree)) return new KDTree(points, metric); + // If points is not an array, assume we're loading a pre-built tree + var K ={} + if (!Array.isArray(points)) { + K.dimensions = points.dimensions; + K.root = points; + restoreParent(K.root); + } else { + K.dimensions = new Array(points[0].length); + for (var i = 0; i < K.dimensions.length; i++) { + K.dimensions[i] = i; + } + K.root = buildTree(points, 0, null, K.dimensions); + } + K.metric = metric; + return K; +} + +// Convert to a JSON serializable structure; this just requires removing +// the `parent` property +KDTree.code = { + nearest : function(K, point, maxNodes, maxDistance) { + var metric = K.metric; + var dimensions = K.dimensions; + var i; + + var bestNodes = BinaryHeap( + function (e) { + return -e[1]; + } + ); + + function nearestSearch(node) { + var dimension = dimensions[node.dimension]; + var ownDistance = metric(point, node.obj); + var linearPoint = {}; + var bestChild, + linearDistance, + otherChild, + i; + function saveNode(node, distance) { + BinaryHeap.code.push(bestNodes,[node, distance]); + if (BinaryHeap.code.size(bestNodes) > maxNodes) { + BinaryHeap.code.pop(bestNodes); + } + } + + for (i = 0; i < dimensions.length; i += 1) { + if (i === node.dimension) { + linearPoint[dimensions[i]] = point[dimensions[i]]; + } else { + linearPoint[dimensions[i]] = node.obj[dimensions[i]]; + } + } + + linearDistance = metric(linearPoint, node.obj); + if (node.right === null && node.left === null) { + if (BinaryHeap.code.size(bestNodes) < maxNodes || ownDistance < BinaryHeap.code.peek(bestNodes)[1]) { + saveNode(node, ownDistance); + } + return; + } + + if (node.right === null) { + bestChild = node.left; + } else if (node.left === null) { + bestChild = node.right; + } else { + if (point[dimension] < node.obj[dimension]) { + bestChild = node.left; + } else { + bestChild = node.right; + } + } + + nearestSearch(bestChild); + + if (BinaryHeap.code.size(bestNodes) < maxNodes || ownDistance < BinaryHeap.code.peek(bestNodes)[1]) { + saveNode(node, ownDistance); + } + + if (BinaryHeap.code.size(bestNodes) < maxNodes || Math.abs(linearDistance) < BinaryHeap.code.peek(bestNodes)[1]) { + if (bestChild === node.left) { + otherChild = node.right; + } else { + otherChild = node.left; + } + if (otherChild !== null) { + nearestSearch(otherChild); + } + } + } + + if (maxDistance) { + for (i = 0; i < maxNodes; i += 1) { + BinaryHeap.code.push(bestNodes,[null, maxDistance]); + } + } + + if (K.root) { + nearestSearch(K.root); + } + + var result = []; + for (i = 0; i < Math.min(maxNodes, bestNodes.content.length); i += 1) { + if (bestNodes.content[i][0]) { + result.push([bestNodes.content[i][0].obj, bestNodes.content[i][1]]); + } + } + return result; + } +} + +function buildTree(points, depth, parent, dimensions) { + var dim = depth % dimensions.length; + + if (points.length === 0) { + return null; + } + if (points.length === 1) { + return Node(points[0], dim, parent); + } + + points.sort(function (a, b) { a[dimensions[dim]] - b[dimensions[dim]]}); + + var median = Math.floor(points.length / 2); + var node = Node(points[median], dim, parent); + node.left = buildTree(points.slice(0, median), depth + 1, node, dimensions); + node.right = buildTree(points.slice(median + 1), depth + 1, node, dimensions); + + return node; +} + +function restoreParent(root) { + if (root.left) { + root.left.parent = root; + restoreParent(root.left); + } + + if (root.right) { + root.right.parent = root; + restoreParent(root.right); + } +} +/** BinaryHeap + * + */ + +// Binary heap implementation from: +// http://eloquentjavascript.net/appendix2.html +function BinaryHeap (scoreFunction) { + var B={} + //if (!(this instanceof BinaryHeap)) return new BinaryHeap (scoreFunction); + B.content = []; + B.scoreFunction = scoreFunction; + return B; +} + + +BinaryHeap.code = { + push : function(B,element) { + // Add the new element to the end of the array. + B.content.push(element); + // Allow it to bubble up. + BinaryHeap.code.bubbleUp(B,B.content.length - 1); + }, + pop : function(B) { + // Store the first element so we can return it later. + var result = B.content[0]; + // Get the element at the end of the array. + var end = B.content.pop(); + // If there are any elements left, put the end element at the + // start, and let it sink down. + if (B.content.length > 0) { + B.content[0] = end; + BinaryHeap.code.sinkDown(B,0); + } + return result; + }, + peek : function(B) { + return B.content[0]; + }, + size : function(B) { + return B.content.length; + }, + bubbleUp : function(B,n) { + // Fetch the element that has to be moved. + var element = B.content[n]; + // When at 0, an element can not go up any further. + while (n > 0) { + // Compute the parent element's index, and fetch it. + var parentN = Math.floor((n + 1) / 2) - 1; + var parent = B.content[parentN]; + // Swap the elements if the parent is greater. + if (B.scoreFunction(element) < B.scoreFunction(parent)) { + B.content[parentN] = element; + B.content[n] = parent; + // Update 'n' to continue at the new position. + n = parentN; + } else { // Found a parent that is less, no need to move it further. + break; + } + } + }, + sinkDown : function(B,n) { + // Look up the target element and its score. + var length = B.content.length; + var element = B.content[n]; + var elemScore = B.scoreFunction(element); + + while (true) { + // Compute the indices of the child elements. + var child2N = (n + 1) * 2; + var child1N = child2N - 1; + // This is used to store the new position of the element, + // if any. + var swap = null; + // If the first child exists (is inside the array)... + if (child1N < length) { + // Look it up and compute its score. + var child1 = B.content[child1N]; + var child1Score = B.scoreFunction(child1); + // If the score is less than our element's, we need to swap. + if (child1Score < elemScore) { + swap = child1N; + } + } + // Do the same checks for the other child. + if (child2N < length) { + var child2 = B.content[child2N]; + var child2Score = B.scoreFunction(child2); + if (child2Score < (swap === null ? elemScore : child1Score)) { + swap = child2N; + } + } + + // If the element needs to be moved, swap it, and continue. + if (swap !== null) { + B.content[n] = B.content[swap]; + B.content[swap] = element; + n = swap; + } else { + // Otherwise, we are done. + break; + } + } + } +} + +/** KNN + * + */ + +/** + ** typeof @dataset = number [] [] + ** typeof @labels = number [] + ** typeof @options = { distance?:function, k?:number } + */ +function KNN(dataset, labels, options) { + var L = {} + if (!options) options={}; + if (dataset === true) { + var model = labels; + L.kdTree = KDTree(model.kdTree, options); + L.k = model.k; + L.classes = new Set(model.classes); + L.isEuclidean = model.isEuclidean; + return L; + } + var classes = new Set(labels); + + var distance = getDistanceFunction(options.distance), + k = options.k||classes.size + 1; + + var points = new Array(dataset.length); + for (var i = 0; i < points.length; ++i) { + points[i] = dataset[i].slice(); + } + + for (i = 0; i < labels.length; ++i) { + points[i].push(labels[i]); + } + + L.kdTree = KDTree(points, distance); + L.k = k; + L.distance = distance; + L.classes = classes; + L.isEuclidean = distance === euclideanDistance; + return L; +} + + +/** + * Predicts the output given the matrix to predict. + * @param {Array} dataset + * @return {Array} predictions + */ +KNN.code = { + predict : function(L,dataset) { + if (Array.isArray(dataset)) { + if (typeof dataset[0] === 'number') { + return getSinglePrediction(L, dataset); + } else if (Array.isArray(dataset[0]) && typeof dataset[0][0] === 'number') { + var predictions = new Array(dataset.length); + for (var i = 0; i < dataset.length; i++) { + predictions[i] = getSinglePrediction(L, dataset[i]); + } + return predictions; + } + } + throw new TypeError('dataset to predict must be an array or a matrix'); + } +} + +function getSinglePrediction(knn, currentCase) { + var nearestPoints = KDTree.code.nearest(knn.kdTree, currentCase, knn.k); + var pointsPerClass = {}; + var predictedClassMin = null; + var predictedClassMax = null; + var predictedClassDistance = 0; + var maxPoints = -1; + var minDistance = 1E30; + + var lastElement = nearestPoints[0][0].length - 1; + //for (var element of knn.classes) { + // pointsPerClass[element] = 0; + //} + forof(knn.classes,function (element) { + pointsPerClass[element] = 0; + }); + for (var i = 0; i < nearestPoints.length; ++i) { + var currentClass = nearestPoints[i][0][lastElement]; + var currentPoints = ++pointsPerClass[currentClass]; + // Either use majority of points matching a class or the nearest points + if (currentPoints > maxPoints) { + predictedClassMax = currentClass; + predictedClassDistance = predictedClassDistance+nearestPoints[i][1]; + maxPoints = currentPoints; + } + if (nearestPoints[i][1] < minDistance) { + predictedClassMin = currentClass; + minDistance = nearestPoints[i][1]; + } + } + predictedClassDistance /= maxPoints; + return maxPoints>2?predictedClassMax:predictedClassMin; +} + + + +/** Create a simple KNN (2) + * + * typeof @options = {x:number [] [],y: number []} + * + */ +var KNN2 = function (options) { + var model={} + // if (!(this instanceof KNN2)) return new KNN2(options); + model.x = options.x; + model.y = options.y; + model.target = options.y; + model.k = options.k || 3 + model.distance = getDistanceFunction(options.distance); + model.weightf = getWeightedFunction(options.weightf); + return model +} + +/** Make a prediction + * + */ +KNN2.code = { + predict : function (model,data) { + var x = data; + var k = model.k; + var weightf = model.weightf; + var distance = model.distance; + var distanceList = []; + var i; + for(i=0; i