From c70c265e206dbc8e9771f735423944d9ef72c29e Mon Sep 17 00:00:00 2001 From: sbosse Date: Mon, 21 Jul 2025 23:07:26 +0200 Subject: [PATCH] Mon 21 Jul 22:43:21 CEST 2025 --- js/ml/id3.js.bak | 296 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 296 insertions(+) create mode 100644 js/ml/id3.js.bak diff --git a/js/ml/id3.js.bak b/js/ml/id3.js.bak new file mode 100644 index 0000000..b468392 --- /dev/null +++ b/js/ml/id3.js.bak @@ -0,0 +1,296 @@ +/** + ** ============================== + ** O O O OOOO + ** O O O O O O + ** O O O O O O + ** OOOO OOOO O OOO OOOO + ** O O O O O O O + ** O O O O O O O + ** OOOO OOOO O O OOOO + ** ============================== + ** Dr. Stefan Bosse http://www.bsslab.de + ** + ** COPYRIGHT: THIS SOFTWARE, EXECUTABLE AND SOURCE CODE IS OWNED + ** BY THE AUTHOR(S). + ** THIS SOURCE CODE MAY NOT BE COPIED, EXTRACTED, + ** MODIFIED, OR OTHERWISE USED IN A CONTEXT + ** OUTSIDE OF THE SOFTWARE SYSTEM. + ** + ** $AUTHORS: Ankit Kuwadekar, Stefan Bosse + ** $INITIAL: (C) 2014, Ankit Kuwadekar + ** $MODIFIED: (C) 2006-2018 bLAB by sbosse + ** $VERSION: 1.2.1 + ** + ** $INFO: + ** + ** ID3 Decision Tree Algorithm supporting categorical values only + ** Portable model + ** + ** $ENDOFINFO + */ +var Io = Require('com/io'); +var Comp = Require('com/compat'); +var current=none; +var Aios=none; + + +/** + * Map of valid tree node types + * @constant + * @static + */ +var NODE_TYPES = { + RESULT: 'result', + FEATURE: 'feature', + FEATURE_VALUE: 'feature_value' +}; + +function isEqual(a,b) { return a==b } + +/** + * Predicts class for sample + */ +function predict(model,sample) { + var root = model; + while (root.type !== NODE_TYPES.RESULT) { + var attr = root.name; + var sampleVal = sample[attr]; + var childNode = Comp.array.find(root.vals, function(node) { + return node.name == sampleVal + }); + if (childNode){ + root = childNode.child; + } else { + root = root.vals[0].child; + } + } + return root.val; +}; + +/** + * Evalutes prediction accuracy on samples + */ +function evaluate(model,target,samples) { + + var total = 0; + var correct = 0; + + Comp.array.iter(samples, function(s) { + total++; + var pred = predict(model,s); + var actual = s[target]; + if (isEqual(pred,actual)) { + correct++; + } + }); + + return correct / total; +}; + +/** + * Creates a new tree + */ +function createTree(data, target, features) { + var targets = Comp.array.unique(Comp.array.pluck(data, target)); + + if (targets.length == 1) { + return { + type: NODE_TYPES.RESULT, + val: targets[0], + name: targets[0], + alias: targets[0] + randomUUID() + }; + } + + if (features.length == 0) { + var topTarget = mostCommon(targets); + return { + type: NODE_TYPES.RESULT, + val: topTarget, + name: topTarget, + alias: topTarget + randomUUID() + }; + } + + var bestFeature = maxGain(data, target, features); + var remainingFeatures = Comp.array.without(features, bestFeature); + var possibleValues = Comp.array.unique(Comp.array.pluck(data, bestFeature)); + + var node = { + name: bestFeature, + alias: bestFeature + randomUUID() + }; + + node.type = NODE_TYPES.FEATURE; + node.vals = Comp.array.map(possibleValues, function(v) { + var _newS = data.filter(function(x) { + return x[bestFeature] == v + }); + + var child_node = { + name: v, + alias: v + randomUUID(), + type: NODE_TYPES.FEATURE_VALUE + }; + + child_node.child = createTree(_newS, target, remainingFeatures); + return child_node; + }); + + return node; +} + +/** + * Computes Max gain across features to determine best split + * @private + */ +function maxGain(data, target, features) { + var gains=[]; + var maxgain= Comp.array.max(features, function(element) { + var g = gain(data, target, element); + gains.push(element+':'+g); + return g; + }); + return maxgain; +} + +/** + * Computes entropy of a list + * @private + */ +function entropy(vals) { + var uniqueVals = Comp.array.unique(vals); + var probs = uniqueVals.map(function(x) { + return prob(x, vals) + }); + + var logVals = probs.map(function(p) { + return -p * log2(p) + }); + + return logVals.reduce(function(a, b) { + return a + b + }, 0); +} + +/** + * Computes gain + * @private + */ +function gain(data, target, feature) { + var attrVals = Comp.array.unique(Comp.array.pluck(data, feature)); + var setEntropy = entropy(Comp.array.pluck(data, target)); + var setSize = data.length; + + var entropies = attrVals.map(function(n) { + var subset = data.filter(function(x) { + return x[feature] === n + }); + + return (subset.length / setSize) * entropy(Comp.array.pluck(subset, target)); + }); + + // var entropyData = entropyV(Comp.array.pluck(data, feature),eps); + // console.log('Feat '+feature+':'+entropyData); + var sumOfEntropies = entropies.reduce(function(a, b) { + return a + b + }, 0); + return setEntropy - sumOfEntropies; +} + +/** + * Computes probability of of a given value existing in a given list + * @private + */ +function prob(value, list) { + var occurrences = Comp.array.filter(list, function(element) { + return element === value + }); + + var numOccurrences = occurrences.length; + var numElements = list.length; + return numOccurrences / numElements; +} + +/** + * Computes Log with base-2 + * @private + */ +function log2(n) { + return Math.log(n) / Math.log(2); +} + +/** + * Finds element with highest occurrence in a list + * @private + */ +function mostCommon(list) { + var elementFrequencyMap = {}; + var largestFrequency = -1; + var mostCommonElement = null; + + list.forEach(function(element) { + var elementFrequency = (elementFrequencyMap[element] || 0) + 1; + elementFrequencyMap[element] = elementFrequency; + + if (largestFrequency < elementFrequency) { + mostCommonElement = element; + largestFrequency = elementFrequency; + } + }); + + return mostCommonElement; +} + +/** + * Generates random UUID + * @private + */ +function randomUUID() { + return "_r" + Math.random().toString(32).slice(2); +} + +function depth(model) { + switch (model.type) { + case NODE_TYPES.RESULT: return 1; + case NODE_TYPES.FEATURE: + return 1+Comp.array.max(model.vals,function (val) { + return depth(val); + }); + case NODE_TYPES.FEATURE_VALUE: + return 1+depth(model.child); + } + return 0; +} + +function print(model) { + var line='',sep; + switch (model.type) { + case NODE_TYPES.RESULT: + return ' -> '+model.name; + case NODE_TYPES.FEATURE: + line='('+model.name+'?'; + sep=''; + Comp.array.iter(model.vals,function (v) { + line += sep+print(v); + sep=','; + }); + return line+')'; + case NODE_TYPES.FEATURE_VALUE: + return ' '+model.name+':'+print(model.child); + } + return 0; +} + + +module.exports = { + NODE_TYPES:NODE_TYPES, + createTree:createTree, + depth:depth, + entropy:entropy, + evaluate:evaluate, + predict:predict, + print:print, + current:function (module) { current=module.current; Aios=module;} +}; +