diff --git a/js/ml/ice.js b/js/ml/ice.js new file mode 100644 index 0000000..837e82f --- /dev/null +++ b/js/ml/ice.js @@ -0,0 +1,198 @@ +/** + ** ============================== + ** O O O OOOO + ** O O O O O O + ** O O O O O O + ** OOOO OOOO O OOO OOOO + ** O O O O O O O + ** O O O O O O O + ** OOOO OOOO O O OOOO + ** ============================== + ** Dr. Stefan Bosse http://www.bsslab.de + ** + ** COPYRIGHT: THIS SOFTWARE, EXECUTABLE AND SOURCE CODE IS OWNED + ** BY THE AUTHOR(S). + ** THIS SOURCE CODE MAY NOT BE COPIED, EXTRACTED, + ** MODIFIED, OR OTHERWISE USED IN A CONTEXT + ** OUTSIDE OF THE SOFTWARE SYSTEM. + ** + ** $AUTHORS: Ankit Kuwadekar, Stefan Bosse + ** $INITIAL: (C) 2014, Ankit Kuwadekar + ** $MODIFIED: (C) 2006-2018 bLAB by sbosse + ** $VERSION: 1.3.2 + ** + ** $INFO: + ** + ** ICE: C45/ID3 Decision Tree Algorithm supporting feature variables with eps intervals + ** + ** Portable model + ** + ** New: + ** typeof eps = number | [epsx1:number,epsx2:number,..] + ** + ** $ENDOFINFO + */ +var Io = Require('com/io'); +var Comp = Require('com/compat'); +var current=none; +var Aios=none; +var that; + +/** + * Map of valid tree node types + * @constant + * @static + */ +var NODE_TYPES = { + RESULT: 'result', + FEATURE: 'feature', + FEATURE_VALUE: 'feature_value' +}; + +var NL ='\n' + +/** + * Creates a new tree + */ +function createTree(data, target, features, eps) { + var ml = that.ml; + var targets = ml.stats.unique(ml.stats.utils.column(data, target)); + if (targets.length == 1) { + return { + type: NODE_TYPES.RESULT, + name: targets[0], + }; + } + + if (features.length == 0) { + var topTarget = ml.stats.mostCommon(targets); + return { + type: NODE_TYPES.RESULT, + name: topTarget, + }; + } + + + var split = ml.stats.splitEps(data,features,target,targets,eps); + var bestFeature = split.feature; + var index = features.indexOf(bestFeature); + var remainingFeatures = split.remainingFeatures; + var remainingEps = + typeof eps == 'number'?eps:remainingFeatures.map(function (v) { return eps[features.indexOf(v)] }); + var possibleValues = split.possibleValues; + + var node = { + type: NODE_TYPES.FEATURE, + name: bestFeature, + index: index, + eps: that.ml.stats.utils.selectEps(eps,index) + }; + + node.vals = split.choices.map(function (c) { + var child_node = { + val : c.val, + eps : that.ml.stats.utils.selectEps(eps,index), + type: NODE_TYPES.FEATURE_VALUE + }; + + child_node.child = createTree(c.data, target, remainingFeatures, remainingEps); + return child_node; + + }) + return node; +} + + +function depth(model) { + switch (model.type) { + case NODE_TYPES.RESULT: return 1; + case NODE_TYPES.FEATURE: + return 1+Comp.array.max(model.vals.map(function (val) { + return depth(val); + })); + case NODE_TYPES.FEATURE_VALUE: + return 1+depth(model.child); + } + return 0; +} + +function info(model) { + var vl = vars(model); + return { + depth:depth(model), + nodes:vl.length, + vars:vl.unique(), + } +} + +function predictEps(model,sample,prob,eps) { + var root = model; + if (!prob) prob=1; + while (root.type !== NODE_TYPES.RESULT) { + var attr = root.name; + var sampleVal = sample[attr]; + // kNN approximation + var childNode = null; + root.vals.forEach(function(node) { + var fit=Math.abs(node.val-sampleVal); + if (!childNode || fit < childNode.fit) childNode={fit:fit,node:node}; + }); + if (childNode){ + // with fit quality propagation + prob = prob * (1-Math.abs(childNode.fit/that.ml.stats.utils.selectEps(eps,root.index))/4) + root = childNode.node.child; + } else { + root = root.vals[0].child; + } + } + return {value:root.name,prob:prob}; +}; + + +function printModel(model,indent) { + var line='',sep; + if (indent==undefined) indent=0; + if (!model) return ''; + var sp = function () {var s=''; for(var i=0;i '+model.name+NL; + case NODE_TYPES.FEATURE: + line=sp()+'$'+model.name+'?'+NL; + model.vals.forEach(function (v) { + line += printModel(v,indent+2); + }); + return line; + case NODE_TYPES.FEATURE_VALUE: + line=sp()+'=['+(model.val-model.eps)+','+(model.val+model.eps)+']'+NL; + return line+printModel(model.child,indent+2); + } + return 'model?'; +} + +function vars(model) { + switch (model.type) { + case NODE_TYPES.RESULT: return []; + case NODE_TYPES.FEATURE: + return [model.name].concat(Comp.array.flatten(model.vals.map(vars))); + case NODE_TYPES.FEATURE_VALUE: + return vars(model.child); + } + return []; +} + +that = module.exports = { + create: function (options) { + return createTree(options.data, + options.target, + options.features, + options.eps) + }, + depth:depth, + info:info, + ml:{}, + predict:function (model,sample) { + return predictEps(model,sample,1,model.eps) + }, + print:printModel, +}