/*** https://github.com/karpathy/convnetjs ***/ var convnet={REVISION: 'ALPHA'} module.exports=convnet; "use strict"; /*** convnet_util ***/ // Random number utilities var return_v = false; var v_val = 0.0; var gaussRandom = function() { if(return_v) { return_v = false; return v_val; } var u = 2*Math.random()-1; var v = 2*Math.random()-1; var r = u*u + v*v; if(r == 0 || r > 1) return gaussRandom(); var c = Math.sqrt(-2*Math.log(r)/r); v_val = v*c; // cache this return_v = true; return u*c; } var randf = function(a, b) { return Math.random()*(b-a)+a; } var randi = function(a, b) { return Math.floor(Math.random()*(b-a)+a); } var randn = function(mu, std){ return mu+gaussRandom()*std; } // Array utilities var zeros = function(n) { if(typeof(n)==='undefined' || isNaN(n)) { return []; } if(typeof ArrayBuffer === 'undefined') { // lacking browser support var arr = new Array(n); for(var i=0;i maxv) { maxv = w[i]; maxi = i; } if(w[i] < minv) { minv = w[i]; mini = i; } } return {maxi: maxi, maxv: maxv, mini: mini, minv: minv, dv:maxv-minv}; } // create random permutation of numbers, in range [0...n-1] var randperm = function(n) { var i = n, j = 0, temp; var array = []; for(var q=0;qright var augment = function(V, crop, dx, dy, fliplr) { // note assumes square outputs of size crop x crop if(typeof(fliplr)==='undefined') var fliplr = false; if(typeof(dx)==='undefined') var dx = convnet.randi(0, V.sx - crop); if(typeof(dy)==='undefined') var dy = convnet.randi(0, V.sy - crop); // randomly sample a crop in the input volume var W; if(crop !== V.sx || dx!==0 || dy!==0) { W = new Vol(crop, crop, V.depth, 0.0); for(var x=0;x=V.sx || y+dy<0 || y+dy>=V.sy) continue; // oob for(var d=0;d=0 && oy=0 && ox=0 && oy=0 && ox=0 && oy=0 && ox a) { a = v; winx=ox; winy=oy;} } } } this.switchx[n] = winx; this.switchy[n] = winy; n++; A.set(ax, ay, d, a); } } } this.out_act = A; return this.out_act; }, backward: function() { // pooling layers have no parameters, so simply compute // gradient wrt data here var V = this.in_act; V.dw = convnet.zeros(V.w.length); // zero out gradient wrt data var A = this.out_act; // computed in forward pass var n = 0; for(var d=0;d amax) amax = as[i]; } // compute exponentials (carefully to not blow up) var es = convnet.zeros(this.out_depth); var esum = 0.0; for(var i=0;i 0) { // violating dimension, apply loss x.dw[i] += 1; x.dw[y] -= 1; loss += ydiff; } } return loss; }, getParamsAndGrads: function() { return []; }, toJSON: function() { var json = {}; json.out_depth = this.out_depth; json.out_sx = this.out_sx; json.out_sy = this.out_sy; json.layer_type = this.layer_type; json.num_inputs = this.num_inputs; return json; }, fromJSON: function(json) { this.out_depth = json.out_depth; this.out_sx = json.out_sx; this.out_sy = json.out_sy; this.layer_type = json.layer_type; this.num_inputs = json.num_inputs; } } convnet.RegressionLayer = RegressionLayer; convnet.SoftmaxLayer = SoftmaxLayer; convnet.SVMLayer = SVMLayer; /*** convnet_layers_nonlinearities ***/ // Implements ReLU nonlinearity elementwise // x -> max(0, x) // the output is in [0, inf) var ReluLayer = function(opt) { var opt = opt || {}; // computed this.out_sx = opt.in_sx; this.out_sy = opt.in_sy; this.out_depth = opt.in_depth; this.layer_type = 'relu'; } ReluLayer.prototype = { forward: function(V, is_training) { this.in_act = V; var V2 = V.clone(); var N = V.w.length; var V2w = V2.w; for(var i=0;i 1/(1+e^(-x)) // so the output is between 0 and 1. var SigmoidLayer = function(opt) { var opt = opt || {}; // computed this.out_sx = opt.in_sx; this.out_sy = opt.in_sy; this.out_depth = opt.in_depth; this.layer_type = 'sigmoid'; } SigmoidLayer.prototype = { forward: function(V, is_training) { this.in_act = V; var V2 = V.cloneAndZero(); var N = V.w.length; var V2w = V2.w; var Vw = V.w; for(var i=0;i max(x) // where x is a vector of size group_size. Ideally of course, // the input size should be exactly divisible by group_size var MaxoutLayer = function(opt) { var opt = opt || {}; // required this.group_size = typeof opt.group_size !== 'undefined' ? opt.group_size : 2; // computed this.out_sx = opt.in_sx; this.out_sy = opt.in_sy; this.out_depth = Math.floor(opt.in_depth / this.group_size); this.layer_type = 'maxout'; this.switches = convnet.zeros(this.out_sx*this.out_sy*this.out_depth); // useful for backprop } MaxoutLayer.prototype = { forward: function(V, is_training) { this.in_act = V; var N = this.out_depth; var V2 = new Vol(this.out_sx, this.out_sy, this.out_depth, 0.0); // optimization branch. If we're operating on 1D arrays we dont have // to worry about keeping track of x,y,d coordinates inside // input volumes. In convnets we do :( if(this.out_sx === 1 && this.out_sy === 1) { for(var i=0;i a) { a = a2; ai = j; } } V2.w[i] = a; this.switches[i] = ix + ai; } } else { var n=0; // counter for switches for(var x=0;x a) { a = a2; ai = j; } } V2.set(x,y,i,a); this.switches[n] = ix + ai; n++; } } } } this.out_act = V2; return this.out_act; }, backward: function() { var V = this.in_act; // we need to set dw of this var V2 = this.out_act; var N = this.out_depth; V.dw = convnet.zeros(V.w.length); // zero out gradient wrt data // pass the gradient through the appropriate switch if(this.out_sx === 1 && this.out_sy === 1) { for(var i=0;i tanh(x) // so the output is between -1 and 1. var TanhLayer = function(opt) { var opt = opt || {}; // computed this.out_sx = opt.in_sx; this.out_sy = opt.in_sy; this.out_depth = opt.in_depth; this.layer_type = 'tanh'; } TanhLayer.prototype = { forward: function(V, is_training) { this.in_act = V; var V2 = V.cloneAndZero(); var N = V.w.length; for(var i=0;i= 2, 'Error! At least one input layer and one loss layer are required.'); assert(defs[0].type === 'input', 'Error! First layer must be the input layer, to declare size of inputs'); // desugar layer_defs for adding activation, dropout layers etc var desugar = function() { var new_defs = []; for(var i=0;i0) { var prev = this.layers[i-1]; def.in_sx = prev.out_sx; def.in_sy = prev.out_sy; def.in_depth = prev.out_depth; } switch(def.type) { case 'fc': this.layers.push(new convnet.FullyConnLayer(def)); break; case 'lrn': this.layers.push(new convnet.LocalResponseNormalizationLayer(def)); break; case 'dropout': this.layers.push(new convnet.DropoutLayer(def)); break; case 'input': this.layers.push(new convnet.InputLayer(def)); break; case 'softmax': this.layers.push(new convnet.SoftmaxLayer(def)); break; case 'regression': this.layers.push(new convnet.RegressionLayer(def)); break; case 'conv': this.layers.push(new convnet.ConvLayer(def)); break; case 'pool': this.layers.push(new convnet.PoolLayer(def)); break; case 'relu': this.layers.push(new convnet.ReluLayer(def)); break; case 'sigmoid': this.layers.push(new convnet.SigmoidLayer(def)); break; case 'tanh': this.layers.push(new convnet.TanhLayer(def)); break; case 'maxout': this.layers.push(new convnet.MaxoutLayer(def)); break; case 'svm': this.layers.push(new convnet.SVMLayer(def)); break; default: console.log('ERROR: UNRECOGNIZED LAYER TYPE: ' + def.type); } } }, // forward prop the network. // The trainer class passes is_training = true, but when this function is // called from outside (not from the trainer), it defaults to prediction mode forward: function(V, is_training) { if(typeof(is_training) === 'undefined') is_training = false; var act = this.layers[0].forward(V, is_training); for(var i=1;i=0;i--) { // first layer assumed input this.layers[i].backward(); } return loss; }, getParamsAndGrads: function() { // accumulate parameters and gradients for the entire network var response = []; for(var i=0;i maxv) { maxv = p[i]; maxi = i;} } return maxi; // return index of the class with highest class probability }, toJSON: function() { var json = {}; json.layers = []; for(var i=0;i 0.0)) { // only vanilla sgd doesnt need either lists // momentum needs gsum // adagrad needs gsum // adam and adadelta needs gsum and xsum for(var i=0;i 0 ? 1 : -1); var l2grad = l2_decay * (p[j]); var gij = (l2grad + l1grad + g[j]) / this.batch_size; // raw batch gradient var gsumi = this.gsum[i]; var xsumi = this.xsum[i]; if(this.method === 'adam') { // adam update gsumi[j] = gsumi[j] * this.beta1 + (1- this.beta1) * gij; // update biased first moment estimate xsumi[j] = xsumi[j] * this.beta2 + (1-this.beta2) * gij * gij; // update biased second moment estimate var biasCorr1 = gsumi[j] * (1 - Math.pow(this.beta1, this.k)); // correct bias first moment estimate var biasCorr2 = xsumi[j] * (1 - Math.pow(this.beta2, this.k)); // correct bias second moment estimate var dx = - this.learning_rate * biasCorr1 / (Math.sqrt(biasCorr2) + this.eps); p[j] += dx; } else if(this.method === 'adagrad') { // adagrad update gsumi[j] = gsumi[j] + gij * gij; var dx = - this.learning_rate / Math.sqrt(gsumi[j] + this.eps) * gij; p[j] += dx; } else if(this.method === 'windowgrad') { // this is adagrad but with a moving window weighted average // so the gradient is not accumulated over the entire history of the run. // it's also referred to as Idea #1 in Zeiler paper on Adadelta. Seems reasonable to me! gsumi[j] = this.ro * gsumi[j] + (1-this.ro) * gij * gij; var dx = - this.learning_rate / Math.sqrt(gsumi[j] + this.eps) * gij; // eps added for better conditioning p[j] += dx; } else if(this.method === 'adadelta') { gsumi[j] = this.ro * gsumi[j] + (1-this.ro) * gij * gij; var dx = - Math.sqrt((xsumi[j] + this.eps)/(gsumi[j] + this.eps)) * gij; xsumi[j] = this.ro * xsumi[j] + (1-this.ro) * dx * dx; // yes, xsum lags behind gsum by 1. p[j] += dx; } else if(this.method === 'nesterov') { var dx = gsumi[j]; gsumi[j] = gsumi[j] * this.momentum + this.learning_rate * gij; dx = this.momentum * dx - (1.0 + this.momentum) * gsumi[j]; p[j] += dx; } else { // assume SGD if(this.momentum > 0.0) { // momentum update var dx = this.momentum * gsumi[j] - this.learning_rate * gij; // step gsumi[j] = dx; // back this up for next iteration of momentum p[j] += dx; // apply corrected gradient } else { // vanilla sgd p[j] += - this.learning_rate * gij; } } g[j] = 0.0; // zero out gradient so that we can begin accumulating anew } } } // appending softmax_loss for backwards compatibility, but from now on we will always use cost_loss // in future, TODO: have to completely redo the way loss is done around the network as currently // loss is a bit of a hack. Ideally, user should specify arbitrary number of loss functions on any layer // and it should all be computed correctly and automatically. return {fwd_time: fwd_time, bwd_time: bwd_time, l2_decay_loss: l2_decay_loss, l1_decay_loss: l1_decay_loss, cost_loss: cost_loss, softmax_loss: cost_loss, loss: cost_loss + l1_decay_loss + l2_decay_loss} } } convnet.Trainer = Trainer; convnet.SGDTrainer = Trainer; // backwards compatibility /*** convnet_magicnets ***/ // used utilities, make explicit local references var randf = convnet.randf; var randi = convnet.randi; var Net = convnet.Net; var Trainer = convnet.Trainer; var maxmin = convnet.maxmin; var randperm = convnet.randperm; var weightedSample = convnet.weightedSample; var getopt = convnet.getopt; var arrUnique = convnet.arrUnique; /* A MagicNet takes data: a list of convnetjs.Vol(), and labels which for now are assumed to be class indeces 0..K. MagicNet then: - creates data folds for cross-validation - samples candidate networks - evaluates candidate networks on all data folds - produces predictions by model-averaging the best networks */ var MagicNet = function(data, labels, opt) { var opt = opt || {}; if(typeof data === 'undefined') { data = []; } if(typeof labels === 'undefined') { labels = []; } // required inputs this.data = data; // store these pointers to data this.labels = labels; // optional inputs this.train_ratio = getopt(opt, 'train_ratio', 0.7); this.num_folds = getopt(opt, 'num_folds', 10); this.num_candidates = getopt(opt, 'num_candidates', 50); // we evaluate several in parallel // how many epochs of data to train every network? for every fold? // higher values mean higher accuracy in final results, but more expensive this.num_epochs = getopt(opt, 'num_epochs', 50); // number of best models to average during prediction. Usually higher = better this.ensemble_size = getopt(opt, 'ensemble_size', 10); // candidate parameters this.batch_size_min = getopt(opt, 'batch_size_min', 10); this.batch_size_max = getopt(opt, 'batch_size_max', 300); this.l2_decay_min = getopt(opt, 'l2_decay_min', -4); this.l2_decay_max = getopt(opt, 'l2_decay_max', 2); this.learning_rate_min = getopt(opt, 'learning_rate_min', -4); this.learning_rate_max = getopt(opt, 'learning_rate_max', 0); this.momentum_min = getopt(opt, 'momentum_min', 0.9); this.momentum_max = getopt(opt, 'momentum_max', 0.9); this.neurons_min = getopt(opt, 'neurons_min', 5); this.neurons_max = getopt(opt, 'neurons_max', 30); // computed this.folds = []; // data fold indices, gets filled by sampleFolds() this.candidates = []; // candidate networks that are being currently evaluated this.evaluated_candidates = []; // history of all candidates that were fully evaluated on all folds this.unique_labels = arrUnique(labels); this.iter = 0; // iteration counter, goes from 0 -> num_epochs * num_training_data this.foldix = 0; // index of active fold // callbacks this.finish_fold_callback = null; this.finish_batch_callback = null; // initializations if(this.data.length > 0) { this.sampleFolds(); this.sampleCandidates(); } }; MagicNet.prototype = { // sets this.folds to a sampling of this.num_folds folds sampleFolds: function() { var N = this.data.length; var num_train = Math.floor(this.train_ratio * N); this.folds = []; // flush folds, if any for(var i=0;i