175 lines
		
	
	
		
			4.3 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			175 lines
		
	
	
		
			4.3 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| /**
 | |
|  **      ==============================
 | |
|  **       O           O      O   OOOO
 | |
|  **       O           O     O O  O   O
 | |
|  **       O           O     O O  O   O
 | |
|  **       OOOO   OOOO O     OOO  OOOO
 | |
|  **       O   O       O    O   O O   O
 | |
|  **       O   O       O    O   O O   O
 | |
|  **       OOOO        OOOO O   O OOOO
 | |
|  **      ==============================
 | |
|  **      Dr. Stefan Bosse http://www.bsslab.de
 | |
|  **
 | |
|  **      COPYRIGHT: THIS SOFTWARE, EXECUTABLE AND SOURCE CODE IS OWNED
 | |
|  **                 BY THE AUTHOR(S).
 | |
|  **                 THIS SOURCE CODE MAY NOT BE COPIED, EXTRACTED,
 | |
|  **                 MODIFIED, OR OTHERWISE USED IN A CONTEXT
 | |
|  **                 OUTSIDE OF THE SOFTWARE SYSTEM.
 | |
|  **
 | |
|  **    $AUTHORS:     Ankit Kuwadekar, Stefan Bosse
 | |
|  **    $INITIAL:     (C) 2014, Ankit Kuwadekar
 | |
|  **    $MODIFIED:    (C) 2006-2018 bLAB by sbosse
 | |
|  **    $VERSION:     1.2.2
 | |
|  **
 | |
|  **    $INFO:
 | |
|  **
 | |
|  ** ICE: C45/ID3 Decision Tree Algorithm supporting feature variables with eps intervals
 | |
|  **
 | |
|  ** Portable model
 | |
|  **
 | |
|  ** TODO: independent eps for each feature variable 
 | |
|  **        typeof eps = number | [epsx1:number,epsx2:number,..]
 | |
|  **
 | |
|  **    $ENDOFINFO
 | |
|  */
 | |
| var Io = Require('com/io');
 | |
| var Comp = Require('com/compat');
 | |
| var current=none;
 | |
| var Aios=none;
 | |
| var that;
 | |
| 
 | |
| /**
 | |
|  * Map of valid tree node types
 | |
|  * @constant
 | |
|  * @static
 | |
|  */
 | |
| var NODE_TYPES = {
 | |
|   RESULT: 'result',
 | |
|   FEATURE: 'feature',
 | |
|   FEATURE_VALUE: 'feature_value'
 | |
| };
 | |
| 
 | |
| var NL ='\n'
 | |
| 
 | |
| /**
 | |
|  * Creates a new tree
 | |
|  */
 | |
| function createTree(data, target, features, eps) {
 | |
|   var ml = that.ml;
 | |
|   var targets = ml.stats.unique(ml.stats.utils.column(data, target));
 | |
|   if (targets.length == 1) {
 | |
|     return {
 | |
|       type: NODE_TYPES.RESULT,
 | |
|       name: targets[0],
 | |
|     };
 | |
|   }
 | |
| 
 | |
|   if (features.length == 0) {
 | |
|     var topTarget = ml.stats.mostCommon(targets);
 | |
|     return {
 | |
|       type: NODE_TYPES.RESULT,
 | |
|       name: topTarget,
 | |
|     };
 | |
|   }
 | |
| 
 | |
|   
 | |
|   var split = ml.stats.splitEps(data,features,target,targets,eps);
 | |
|   var bestFeature = split.feature;
 | |
|   var remainingFeatures = split.remainingFeatures;
 | |
|   var possibleValues = split.possibleValues;
 | |
| 
 | |
|   var node = {
 | |
|     type: NODE_TYPES.FEATURE,
 | |
|     name: bestFeature,
 | |
|     eps: eps,  // eps[bestFeature]
 | |
|   };
 | |
| 
 | |
|   node.vals = split.choices.map(function (c) {
 | |
|     var child_node = {
 | |
|       val : c.val,
 | |
|       eps : eps, // TODO
 | |
|       type: NODE_TYPES.FEATURE_VALUE
 | |
|     };
 | |
| 
 | |
|     child_node.child = createTree(c.data, target, remainingFeatures, eps);
 | |
|     return child_node;
 | |
|     
 | |
|   })
 | |
|   return node;
 | |
| }
 | |
| 
 | |
| 
 | |
| function depth(model) {
 | |
|   switch (model.type) {
 | |
|     case NODE_TYPES.RESULT: return 1;
 | |
|     case NODE_TYPES.FEATURE: 
 | |
|       return 1+Comp.array.max(model.vals,function (val) {
 | |
|         return depth(val);
 | |
|       });
 | |
|     case NODE_TYPES.FEATURE_VALUE: 
 | |
|       return 1+depth(model.child);   
 | |
|   }
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| function predictEps(model,sample,prob,eps) {
 | |
|   var root = model;
 | |
|   if (!prob) prob=1;
 | |
|   while (root.type !== NODE_TYPES.RESULT) {
 | |
|     var attr = root.name;
 | |
|     var sampleVal = sample[attr];
 | |
|     // kNN approximation
 | |
|     var childNode = null;
 | |
|     root.vals.forEach(function(node) {
 | |
|       var fit=Math.abs(node.val-sampleVal);
 | |
|       if (!childNode || fit < childNode.fit) childNode={fit:fit,node:node};
 | |
|     });
 | |
|     if (childNode){
 | |
|       // with fit quality propagation
 | |
|       prob = prob * (1-Math.abs(childNode.fit/eps)/4) 
 | |
|       root = childNode.node.child;
 | |
|     } else {
 | |
|       root = root.vals[0].child;
 | |
|     }
 | |
|   }
 | |
|   return {value:root.name,prob:prob};
 | |
| };
 | |
| 
 | |
| 
 | |
| function printModel(model,indent) {
 | |
|   var line='',sep;
 | |
|   if (indent==undefined) indent=0;
 | |
|   if (!model) return '';
 | |
|   var sp = function () {var s=''; for(var i=0;i<indent;i++) s+=' '; return s};
 | |
|   switch (model.type) {
 | |
|     case NODE_TYPES.RESULT: 
 | |
|       return sp()+'-> '+model.name+NL;
 | |
|     case NODE_TYPES.FEATURE:
 | |
|       line=sp()+'$'+model.name+'?'+NL;
 | |
|       model.vals.forEach(function (v) {
 | |
|         line += printModel(v,indent+2);
 | |
|       }); 
 | |
|       return line;
 | |
|     case NODE_TYPES.FEATURE_VALUE: 
 | |
|       line=sp()+'=['+(model.val-model.eps)+','+(model.val+model.eps)+']'+NL;
 | |
|       return line+printModel(model.child,indent+2); 
 | |
|   }
 | |
|   return 'model?';
 | |
| }
 | |
| 
 | |
| 
 | |
| that = module.exports = {
 | |
|   create: function (options) {
 | |
|     return createTree(options.data,
 | |
|                       options.target,
 | |
|                       options.features,
 | |
|                       options.eps)
 | |
|   },
 | |
|   depth:depth,
 | |
|   ml:{},
 | |
|   predict:function (model,sample) {
 | |
|     return predictEps(model,sample,1,model.eps)
 | |
|   },
 | |
|   print:printModel,
 | |
| }
 |