199 lines
5.0 KiB
JavaScript
199 lines
5.0 KiB
JavaScript
/**
|
|
** ==============================
|
|
** O O O OOOO
|
|
** O O O O O O
|
|
** O O O O O O
|
|
** OOOO OOOO O OOO OOOO
|
|
** O O O O O O O
|
|
** O O O O O O O
|
|
** OOOO OOOO O O OOOO
|
|
** ==============================
|
|
** Dr. Stefan Bosse http://www.bsslab.de
|
|
**
|
|
** COPYRIGHT: THIS SOFTWARE, EXECUTABLE AND SOURCE CODE IS OWNED
|
|
** BY THE AUTHOR(S).
|
|
** THIS SOURCE CODE MAY NOT BE COPIED, EXTRACTED,
|
|
** MODIFIED, OR OTHERWISE USED IN A CONTEXT
|
|
** OUTSIDE OF THE SOFTWARE SYSTEM.
|
|
**
|
|
** $AUTHORS: Ankit Kuwadekar, Stefan Bosse
|
|
** $INITIAL: (C) 2014, Ankit Kuwadekar
|
|
** $MODIFIED: (C) 2006-2018 bLAB by sbosse
|
|
** $VERSION: 1.3.2
|
|
**
|
|
** $INFO:
|
|
**
|
|
** ICE: C45/ID3 Decision Tree Algorithm supporting feature variables with eps intervals
|
|
**
|
|
** Portable model
|
|
**
|
|
** New:
|
|
** typeof eps = number | [epsx1:number,epsx2:number,..]
|
|
**
|
|
** $ENDOFINFO
|
|
*/
|
|
var Io = Require('com/io');
|
|
var Comp = Require('com/compat');
|
|
var current=none;
|
|
var Aios=none;
|
|
var that;
|
|
|
|
/**
|
|
* Map of valid tree node types
|
|
* @constant
|
|
* @static
|
|
*/
|
|
var NODE_TYPES = {
|
|
RESULT: 'result',
|
|
FEATURE: 'feature',
|
|
FEATURE_VALUE: 'feature_value'
|
|
};
|
|
|
|
var NL ='\n'
|
|
|
|
/**
|
|
* Creates a new tree
|
|
*/
|
|
function createTree(data, target, features, eps) {
|
|
var ml = that.ml;
|
|
var targets = ml.stats.unique(ml.stats.utils.column(data, target));
|
|
if (targets.length == 1) {
|
|
return {
|
|
type: NODE_TYPES.RESULT,
|
|
name: targets[0],
|
|
};
|
|
}
|
|
|
|
if (features.length == 0) {
|
|
var topTarget = ml.stats.mostCommon(targets);
|
|
return {
|
|
type: NODE_TYPES.RESULT,
|
|
name: topTarget,
|
|
};
|
|
}
|
|
|
|
|
|
var split = ml.stats.splitEps(data,features,target,targets,eps);
|
|
var bestFeature = split.feature;
|
|
var index = features.indexOf(bestFeature);
|
|
var remainingFeatures = split.remainingFeatures;
|
|
var remainingEps =
|
|
typeof eps == 'number'?eps:remainingFeatures.map(function (v) { return eps[features.indexOf(v)] });
|
|
var possibleValues = split.possibleValues;
|
|
|
|
var node = {
|
|
type: NODE_TYPES.FEATURE,
|
|
name: bestFeature,
|
|
index: index,
|
|
eps: that.ml.stats.utils.selectEps(eps,index)
|
|
};
|
|
|
|
node.vals = split.choices.map(function (c) {
|
|
var child_node = {
|
|
val : c.val,
|
|
eps : that.ml.stats.utils.selectEps(eps,index),
|
|
type: NODE_TYPES.FEATURE_VALUE
|
|
};
|
|
|
|
child_node.child = createTree(c.data, target, remainingFeatures, remainingEps);
|
|
return child_node;
|
|
|
|
})
|
|
return node;
|
|
}
|
|
|
|
|
|
function depth(model) {
|
|
switch (model.type) {
|
|
case NODE_TYPES.RESULT: return 1;
|
|
case NODE_TYPES.FEATURE:
|
|
return 1+Comp.array.max(model.vals.map(function (val) {
|
|
return depth(val);
|
|
}));
|
|
case NODE_TYPES.FEATURE_VALUE:
|
|
return 1+depth(model.child);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
function info(model) {
|
|
var vl = vars(model);
|
|
return {
|
|
depth:depth(model),
|
|
nodes:vl.length,
|
|
vars:vl.unique(),
|
|
}
|
|
}
|
|
|
|
function predictEps(model,sample,prob,eps) {
|
|
var root = model;
|
|
if (!prob) prob=1;
|
|
while (root.type !== NODE_TYPES.RESULT) {
|
|
var attr = root.name;
|
|
var sampleVal = sample[attr];
|
|
// kNN approximation
|
|
var childNode = null;
|
|
root.vals.forEach(function(node) {
|
|
var fit=Math.abs(node.val-sampleVal);
|
|
if (!childNode || fit < childNode.fit) childNode={fit:fit,node:node};
|
|
});
|
|
if (childNode){
|
|
// with fit quality propagation
|
|
prob = prob * (1-Math.abs(childNode.fit/that.ml.stats.utils.selectEps(eps,root.index))/4)
|
|
root = childNode.node.child;
|
|
} else {
|
|
root = root.vals[0].child;
|
|
}
|
|
}
|
|
return {value:root.name,prob:prob};
|
|
};
|
|
|
|
|
|
function printModel(model,indent) {
|
|
var line='',sep;
|
|
if (indent==undefined) indent=0;
|
|
if (!model) return '';
|
|
var sp = function () {var s=''; for(var i=0;i<indent;i++) s+=' '; return s};
|
|
switch (model.type) {
|
|
case NODE_TYPES.RESULT:
|
|
return sp()+'-> '+model.name+NL;
|
|
case NODE_TYPES.FEATURE:
|
|
line=sp()+'$'+model.name+'?'+NL;
|
|
model.vals.forEach(function (v) {
|
|
line += printModel(v,indent+2);
|
|
});
|
|
return line;
|
|
case NODE_TYPES.FEATURE_VALUE:
|
|
line=sp()+'=['+(model.val-model.eps)+','+(model.val+model.eps)+']'+NL;
|
|
return line+printModel(model.child,indent+2);
|
|
}
|
|
return 'model?';
|
|
}
|
|
|
|
function vars(model) {
|
|
switch (model.type) {
|
|
case NODE_TYPES.RESULT: return [];
|
|
case NODE_TYPES.FEATURE:
|
|
return [model.name].concat(Comp.array.flatten(model.vals.map(vars)));
|
|
case NODE_TYPES.FEATURE_VALUE:
|
|
return vars(model.child);
|
|
}
|
|
return [];
|
|
}
|
|
|
|
that = module.exports = {
|
|
create: function (options) {
|
|
return createTree(options.data,
|
|
options.target,
|
|
options.features,
|
|
options.eps)
|
|
},
|
|
depth:depth,
|
|
info:info,
|
|
ml:{},
|
|
predict:function (model,sample) {
|
|
return predictEps(model,sample,1,model.eps)
|
|
},
|
|
print:printModel,
|
|
}
|