/** ** ============================== ** O O O OOOO ** O O O O O O ** O O O O O O ** OOOO OOOO O OOO OOOO ** O O O O O O O ** O O O O O O O ** OOOO OOOO O O OOOO ** ============================== ** Dr. Stefan Bosse http://www.bsslab.de ** ** COPYRIGHT: THIS SOFTWARE, EXECUTABLE AND SOURCE CODE IS OWNED ** BY THE AUTHOR(S). ** THIS SOURCE CODE MAY NOT BE COPIED, EXTRACTED, ** MODIFIED, OR OTHERWISE USED IN A CONTEXT ** OUTSIDE OF THE SOFTWARE SYSTEM. ** ** $AUTHORS: Ankit Kuwadekar, Stefan Bosse ** $INITIAL: (C) 2014, Ankit Kuwadekar ** $MODIFIED: (C) 2006-2018 bLAB by sbosse ** $VERSION: 1.2.2 ** ** $INFO: ** ** ICE: C45/ID3 Decision Tree Algorithm supporting feature variables with eps intervals ** ** Portable model ** ** TODO: independent eps for each feature variable ** typeof eps = number | [epsx1:number,epsx2:number,..] ** ** $ENDOFINFO */ var Io = Require('com/io'); var Comp = Require('com/compat'); var current=none; var Aios=none; var that; /** * Map of valid tree node types * @constant * @static */ var NODE_TYPES = { RESULT: 'result', FEATURE: 'feature', FEATURE_VALUE: 'feature_value' }; var NL ='\n' /** * Creates a new tree */ function createTree(data, target, features, eps) { var ml = that.ml; var targets = ml.stats.unique(ml.stats.utils.column(data, target)); if (targets.length == 1) { return { type: NODE_TYPES.RESULT, name: targets[0], }; } if (features.length == 0) { var topTarget = ml.stats.mostCommon(targets); return { type: NODE_TYPES.RESULT, name: topTarget, }; } var split = ml.stats.splitEps(data,features,target,targets,eps); var bestFeature = split.feature; var remainingFeatures = split.remainingFeatures; var possibleValues = split.possibleValues; var node = { type: NODE_TYPES.FEATURE, name: bestFeature, eps: eps, // eps[bestFeature] }; node.vals = split.choices.map(function (c) { var child_node = { val : c.val, eps : eps, // TODO type: NODE_TYPES.FEATURE_VALUE }; child_node.child = createTree(c.data, target, remainingFeatures, eps); return child_node; }) return node; } function depth(model) { switch (model.type) { case NODE_TYPES.RESULT: return 1; case NODE_TYPES.FEATURE: return 1+Comp.array.max(model.vals,function (val) { return depth(val); }); case NODE_TYPES.FEATURE_VALUE: return 1+depth(model.child); } return 0; } function predictEps(model,sample,prob,eps) { var root = model; if (!prob) prob=1; while (root.type !== NODE_TYPES.RESULT) { var attr = root.name; var sampleVal = sample[attr]; // kNN approximation var childNode = null; root.vals.forEach(function(node) { var fit=Math.abs(node.val-sampleVal); if (!childNode || fit < childNode.fit) childNode={fit:fit,node:node}; }); if (childNode){ // with fit quality propagation prob = prob * (1-Math.abs(childNode.fit/eps)/4) root = childNode.node.child; } else { root = root.vals[0].child; } } return {value:root.name,prob:prob}; }; function printModel(model,indent) { var line='',sep; if (indent==undefined) indent=0; if (!model) return ''; var sp = function () {var s=''; for(var i=0;i '+model.name+NL; case NODE_TYPES.FEATURE: line=sp()+'$'+model.name+'?'+NL; model.vals.forEach(function (v) { line += printModel(v,indent+2); }); return line; case NODE_TYPES.FEATURE_VALUE: line=sp()+'=['+(model.val-model.eps)+','+(model.val+model.eps)+']'+NL; return line+printModel(model.child,indent+2); } return 'model?'; } that = module.exports = { create: function (options) { return createTree(options.data, options.target, options.features, options.eps) }, depth:depth, ml:{}, predict:function (model,sample) { return predictEps(model,sample,1,model.eps) }, print:printModel, }