// MIT License // Random Forest Trees (only binary classifier) // Andrej Karpathy // @blab+ // https://github.com/karpathy/forestjs var RandomForest = function(options) { var L = {}; return L } RandomForest.code = { /* data is 2D array of size N x D of examples labels is a 1D array of labels (only -1 or 1 for now). In future will support multiclass or maybe even regression options.numTrees can be used to customize number of trees to train (default = 100) options.maxDepth is the maximum depth of each tree in the forest (default = 4) options.numTries is the number of random hypotheses generated at each node during training (default = 10) options.trainFun is a function with signature "function myWeakTrain(data, labels, ix, options)". Here, ix is a list of indeces into data of the instances that should be payed attention to. Everything not in the list should be ignored. This is done for efficiency. The function should return a model where you store variables. (i.e. model = {}; model.myvar = 5;) This will be passed to testFun. options.testFun is a function with signature "funtion myWeakTest(inst, model)" where inst is 1D array specifying an example, and model will be the same model that you return in options.trainFun. For example, model.myvar will be 5. see decisionStumpTrain() and decisionStumpTest() downstairs for example. */ train: function(L, data, labels, options) { options = options || {}; L.options = options; L.numTrees = options.numTrees || 100; // initialize many trees and train them all independently L.trees= new Array(L.numTrees); for(var i=0;i %f, %f. Gain %f", thr, H, LH, RH, informationGain); if(informationGain > bestGain || i === 0) { bestGain= informationGain; bestThr= thr; } } model= {}; model.thr= bestThr; model.ri= ri; return model; } // returns a decision for a single data instance function decisionStumpTest(inst, model) { if(!model) { // this is a leaf that never received any data... return 1; } return inst[model.ri] < model.thr ? 1 : -1; } // returns model. Code duplication with decisionStumpTrain :( function decision2DStumpTrain(data, labels, ix, options) { options = options || {}; var numtries = options.numTries || 10; // choose a dimension at random and pick a best split var N= ix.length; var ri1= 0; var ri2= 1; if(data[0].length > 2) { // more than 2D data. Pick 2 random dimensions ri1= randi(0, data[0].length); ri2= randi(0, data[0].length); while(ri2 == ri1) ri2= randi(0, data[0].length); // must be distinct! } // evaluate class entropy of incoming data var H= entropy(labels, ix); var bestGain=0; var bestw1, bestw2, bestthr; var dots= new Array(ix.length); for(var i=0;i %f, %f. Gain %f", thr, H, LH, RH, informationGain); if(informationGain > bestGain || i === 0) { bestGain= informationGain; bestw1= w1; bestw2= w2; bestthr= dotthr; } } model= {}; model.w1= bestw1; model.w2= bestw2; model.dotthr= bestthr; return model; } // returns label for a single data instance function decision2DStumpTest(inst, model) { if(!model) { // this is a leaf that never received any data... return 1; } return inst[0]*model.w1 + inst[1]*model.w2 < model.dotthr ? 1 : -1; } // Misc utility functions function entropy(labels, ix) { var N= ix.length; var p=0.0; for(var i=0;i