jam/js/ml/convnet.js

2160 lines
72 KiB
JavaScript

/*** https://github.com/karpathy/convnetjs ***/
var convnet={REVISION: 'ALPHA'}
module.exports=convnet;
"use strict";
/*** convnet_util ***/
// Random number utilities
var return_v = false;
var v_val = 0.0;
var gaussRandom = function() {
if(return_v) {
return_v = false;
return v_val;
}
var u = 2*Math.random()-1;
var v = 2*Math.random()-1;
var r = u*u + v*v;
if(r == 0 || r > 1) return gaussRandom();
var c = Math.sqrt(-2*Math.log(r)/r);
v_val = v*c; // cache this
return_v = true;
return u*c;
}
var randf = function(a, b) { return Math.random()*(b-a)+a; }
var randi = function(a, b) { return Math.floor(Math.random()*(b-a)+a); }
var randn = function(mu, std){ return mu+gaussRandom()*std; }
// Array utilities
var zeros = function(n) {
if(typeof(n)==='undefined' || isNaN(n)) { return []; }
if(typeof ArrayBuffer === 'undefined') {
// lacking browser support
var arr = new Array(n);
for(var i=0;i<n;i++) { arr[i]= 0; }
return arr;
} else {
return new Float64Array(n);
}
}
var arrContains = function(arr, elt) {
for(var i=0,n=arr.length;i<n;i++) {
if(arr[i]===elt) return true;
}
return false;
}
var arrUnique = function(arr) {
var b = [];
for(var i=0,n=arr.length;i<n;i++) {
if(!arrContains(b, arr[i])) {
b.push(arr[i]);
}
}
return b;
}
// return max and min of a given non-empty array.
var maxmin = function(w) {
if(w.length === 0) { return {}; } // ... ;s
var maxv = w[0];
var minv = w[0];
var maxi = 0;
var mini = 0;
var n = w.length;
for(var i=1;i<n;i++) {
if(w[i] > maxv) { maxv = w[i]; maxi = i; }
if(w[i] < minv) { minv = w[i]; mini = i; }
}
return {maxi: maxi, maxv: maxv, mini: mini, minv: minv, dv:maxv-minv};
}
// create random permutation of numbers, in range [0...n-1]
var randperm = function(n) {
var i = n,
j = 0,
temp;
var array = [];
for(var q=0;q<n;q++)array[q]=q;
while (i--) {
j = Math.floor(Math.random() * (i+1));
temp = array[i];
array[i] = array[j];
array[j] = temp;
}
return array;
}
// sample from list lst according to probabilities in list probs
// the two lists are of same size, and probs adds up to 1
var weightedSample = function(lst, probs) {
var p = randf(0, 1.0);
var cumprob = 0.0;
for(var k=0,n=lst.length;k<n;k++) {
cumprob += probs[k];
if(p < cumprob) { return lst[k]; }
}
}
// syntactic sugar function for getting default parameter values
var getopt = function(opt, field_name, default_value) {
if(typeof field_name === 'string') {
// case of single string
return (typeof opt[field_name] !== 'undefined') ? opt[field_name] : default_value;
} else {
// assume we are given a list of string instead
var ret = default_value;
for(var i=0;i<field_name.length;i++) {
var f = field_name[i];
if (typeof opt[f] !== 'undefined') {
ret = opt[f]; // overwrite return value
}
}
return ret;
}
}
function assert(condition, message) {
if (!condition) {
message = message || "Assertion failed";
if (typeof Error !== "undefined") {
throw new Error(message);
}
throw message; // Fallback
}
}
convnet.randf = randf;
convnet.randi = randi;
convnet.randn = randn;
convnet.zeros = zeros;
convnet.maxmin = maxmin;
convnet.randperm = randperm;
convnet.weightedSample = weightedSample;
convnet.arrUnique = arrUnique;
convnet.arrContains = arrContains;
convnet.getopt = getopt;
convnet.assert = assert;
/*** convnet_vol ***/
// Vol is the basic building block of all data in a net.
// it is essentially just a 3D volume of numbers, with a
// width (sx), height (sy), and depth (depth).
// it is used to hold data for all filters, all volumes,
// all weights, and also stores all gradients w.r.t.
// the data. c is optionally a value to initialize the volume
// with. If c is missing, fills the Vol with random numbers.
var Vol = function(sx, sy, depth, c) {
// this is how you check if a variable is an array. Oh, Javascript :)
if(Object.prototype.toString.call(sx) === '[object Array]') {
// we were given a list in sx, assume 1D volume and fill it up
this.sx = 1;
this.sy = 1;
this.depth = sx.length;
// we have to do the following copy because we want to use
// fast typed arrays, not an ordinary javascript array
this.w = convnet.zeros(this.depth);
this.dw = convnet.zeros(this.depth);
for(var i=0;i<this.depth;i++) {
this.w[i] = sx[i];
}
} else {
// we were given dimensions of the vol
this.sx = sx;
this.sy = sy;
this.depth = depth;
var n = sx*sy*depth;
this.w = convnet.zeros(n);
this.dw = convnet.zeros(n);
if(typeof c === 'undefined') {
// weight normalization is done to equalize the output
// variance of every neuron, otherwise neurons with a lot
// of incoming connections have outputs of larger variance
var scale = Math.sqrt(1.0/(sx*sy*depth));
for(var i=0;i<n;i++) {
this.w[i] = convnet.randn(0.0, scale);
}
} else {
for(var i=0;i<n;i++) {
this.w[i] = c;
}
}
}
}
Vol.prototype = {
get: function(x, y, d) {
var ix=((this.sx * y)+x)*this.depth+d;
return this.w[ix];
},
set: function(x, y, d, v) {
var ix=((this.sx * y)+x)*this.depth+d;
this.w[ix] = v;
},
add: function(x, y, d, v) {
var ix=((this.sx * y)+x)*this.depth+d;
this.w[ix] += v;
},
get_grad: function(x, y, d) {
var ix = ((this.sx * y)+x)*this.depth+d;
return this.dw[ix];
},
set_grad: function(x, y, d, v) {
var ix = ((this.sx * y)+x)*this.depth+d;
this.dw[ix] = v;
},
add_grad: function(x, y, d, v) {
var ix = ((this.sx * y)+x)*this.depth+d;
this.dw[ix] += v;
},
cloneAndZero: function() { return new Vol(this.sx, this.sy, this.depth, 0.0)},
clone: function() {
var V = new Vol(this.sx, this.sy, this.depth, 0.0);
var n = this.w.length;
for(var i=0;i<n;i++) { V.w[i] = this.w[i]; }
return V;
},
addFrom: function(V) { for(var k=0;k<this.w.length;k++) { this.w[k] += V.w[k]; }},
addFromScaled: function(V, a) { for(var k=0;k<this.w.length;k++) { this.w[k] += a*V.w[k]; }},
setConst: function(a) { for(var k=0;k<this.w.length;k++) { this.w[k] = a; }},
toJSON: function() {
// todo: we may want to only save d most significant digits to save space
var json = {}
json.sx = this.sx;
json.sy = this.sy;
json.depth = this.depth;
json.w = this.w;
return json;
// we wont back up gradients to save space
},
fromJSON: function(json) {
this.sx = json.sx;
this.sy = json.sy;
this.depth = json.depth;
var n = this.sx*this.sy*this.depth;
this.w = convnet.zeros(n);
this.dw = convnet.zeros(n);
// copy over the elements.
for(var i=0;i<n;i++) {
this.w[i] = json.w[i];
}
}
}
convnet.Vol = Vol;
/*** convnet_vol_util ***/
var Vol = convnet.Vol; // convenience
// Volume utilities
// intended for use with data augmentation
// crop is the size of output
// dx,dy are offset wrt incoming volume, of the shift
// fliplr is boolean on whether we also want to flip left<->right
var augment = function(V, crop, dx, dy, fliplr) {
// note assumes square outputs of size crop x crop
if(typeof(fliplr)==='undefined') var fliplr = false;
if(typeof(dx)==='undefined') var dx = convnet.randi(0, V.sx - crop);
if(typeof(dy)==='undefined') var dy = convnet.randi(0, V.sy - crop);
// randomly sample a crop in the input volume
var W;
if(crop !== V.sx || dx!==0 || dy!==0) {
W = new Vol(crop, crop, V.depth, 0.0);
for(var x=0;x<crop;x++) {
for(var y=0;y<crop;y++) {
if(x+dx<0 || x+dx>=V.sx || y+dy<0 || y+dy>=V.sy) continue; // oob
for(var d=0;d<V.depth;d++) {
W.set(x,y,d,V.get(x+dx,y+dy,d)); // copy data over
}
}
}
} else {
W = V;
}
if(fliplr) {
// flip volume horziontally
var W2 = W.cloneAndZero();
for(var x=0;x<W.sx;x++) {
for(var y=0;y<W.sy;y++) {
for(var d=0;d<W.depth;d++) {
W2.set(x,y,d,W.get(W.sx - x - 1,y,d)); // copy data over
}
}
}
W = W2; //swap
}
return W;
}
// img is a DOM element that contains a loaded image
// returns a Vol of size (W, H, 4). 4 is for RGBA
var img_to_vol = function(img, convert_grayscale) {
if(typeof(convert_grayscale)==='undefined') var convert_grayscale = false;
var canvas = document.createElement('canvas');
canvas.width = img.width;
canvas.height = img.height;
var ctx = canvas.getContext("2d");
// due to a Firefox bug
try {
ctx.drawImage(img, 0, 0);
} catch (e) {
if (e.name === "NS_ERROR_NOT_AVAILABLE") {
// sometimes happens, lets just abort
return false;
} else {
throw e;
}
}
try {
var img_data = ctx.getImageData(0, 0, canvas.width, canvas.height);
} catch (e) {
if(e.name === 'IndexSizeError') {
return false; // not sure what causes this sometimes but okay abort
} else {
throw e;
}
}
// prepare the input: get pixels and normalize them
var p = img_data.data;
var W = img.width;
var H = img.height;
var pv = []
for(var i=0;i<p.length;i++) {
pv.push(p[i]/255.0-0.5); // normalize image pixels to [-0.5, 0.5]
}
var x = new Vol(W, H, 4, 0.0); //input volume (image)
x.w = pv;
if(convert_grayscale) {
// flatten into depth=1 array
var x1 = new Vol(W, H, 1, 0.0);
for(var i=0;i<W;i++) {
for(var j=0;j<H;j++) {
x1.set(i,j,0,x.get(i,j,0));
}
}
x = x1;
}
return x;
}
convnet.augment = augment;
convnet.img_to_vol = img_to_vol;
/*** convnet_layers_dotproducts ***/
// This file contains all layers that do dot products with input,
// but usually in a different connectivity pattern and weight sharing
// schemes:
// - FullyConn is fully connected dot products
// - ConvLayer does convolutions (so weight sharing spatially)
// putting them together in one file because they are very similar
var ConvLayer = function(opt) {
var opt = opt || {};
// required
this.out_depth = opt.filters;
this.sx = opt.sx; // filter size. Should be odd if possible, it's cleaner.
this.in_depth = opt.in_depth;
this.in_sx = opt.in_sx;
this.in_sy = opt.in_sy;
// optional
this.sy = typeof opt.sy !== 'undefined' ? opt.sy : this.sx;
this.stride = typeof opt.stride !== 'undefined' ? opt.stride : 1; // stride at which we apply filters to input volume
this.pad = typeof opt.pad !== 'undefined' ? opt.pad : 0; // amount of 0 padding to add around borders of input volume
this.l1_decay_mul = typeof opt.l1_decay_mul !== 'undefined' ? opt.l1_decay_mul : 0.0;
this.l2_decay_mul = typeof opt.l2_decay_mul !== 'undefined' ? opt.l2_decay_mul : 1.0;
// computed
// note we are doing floor, so if the strided convolution of the filter doesnt fit into the input
// volume exactly, the output volume will be trimmed and not contain the (incomplete) computed
// final application.
this.out_sx = Math.floor((this.in_sx + this.pad * 2 - this.sx) / this.stride + 1);
this.out_sy = Math.floor((this.in_sy + this.pad * 2 - this.sy) / this.stride + 1);
this.layer_type = 'conv';
// initializations
var bias = typeof opt.bias_pref !== 'undefined' ? opt.bias_pref : 0.0;
this.filters = [];
for(var i=0;i<this.out_depth;i++) { this.filters.push(new Vol(this.sx, this.sy, this.in_depth)); }
this.biases = new Vol(1, 1, this.out_depth, bias);
}
ConvLayer.prototype = {
forward: function(V, is_training) {
// optimized code by @mdda that achieves 2x speedup over previous version
this.in_act = V;
var A = new Vol(this.out_sx |0, this.out_sy |0, this.out_depth |0, 0.0);
var V_sx = V.sx |0;
var V_sy = V.sy |0;
var xy_stride = this.stride |0;
for(var d=0;d<this.out_depth;d++) {
var f = this.filters[d];
var x = -this.pad |0;
var y = -this.pad |0;
for(var ay=0; ay<this.out_sy; y+=xy_stride,ay++) { // xy_stride
x = -this.pad |0;
for(var ax=0; ax<this.out_sx; x+=xy_stride,ax++) { // xy_stride
// convolve centered at this particular location
var a = 0.0;
for(var fy=0;fy<f.sy;fy++) {
var oy = y+fy; // coordinates in the original input array coordinates
for(var fx=0;fx<f.sx;fx++) {
var ox = x+fx;
if(oy>=0 && oy<V_sy && ox>=0 && ox<V_sx) {
for(var fd=0;fd<f.depth;fd++) {
// avoid function call overhead (x2) for efficiency, compromise modularity :(
a += f.w[((f.sx * fy)+fx)*f.depth+fd] * V.w[((V_sx * oy)+ox)*V.depth+fd];
}
}
}
}
a += this.biases.w[d];
A.set(ax, ay, d, a);
}
}
}
this.out_act = A;
return this.out_act;
},
backward: function() {
var V = this.in_act;
V.dw = convnet.zeros(V.w.length); // zero out gradient wrt bottom data, we're about to fill it
var V_sx = V.sx |0;
var V_sy = V.sy |0;
var xy_stride = this.stride |0;
for(var d=0;d<this.out_depth;d++) {
var f = this.filters[d];
var x = -this.pad |0;
var y = -this.pad |0;
for(var ay=0; ay<this.out_sy; y+=xy_stride,ay++) { // xy_stride
x = -this.pad |0;
for(var ax=0; ax<this.out_sx; x+=xy_stride,ax++) { // xy_stride
// convolve centered at this particular location
var chain_grad = this.out_act.get_grad(ax,ay,d); // gradient from above, from chain rule
for(var fy=0;fy<f.sy;fy++) {
var oy = y+fy; // coordinates in the original input array coordinates
for(var fx=0;fx<f.sx;fx++) {
var ox = x+fx;
if(oy>=0 && oy<V_sy && ox>=0 && ox<V_sx) {
for(var fd=0;fd<f.depth;fd++) {
// avoid function call overhead (x2) for efficiency, compromise modularity :(
var ix1 = ((V_sx * oy)+ox)*V.depth+fd;
var ix2 = ((f.sx * fy)+fx)*f.depth+fd;
f.dw[ix2] += V.w[ix1]*chain_grad;
V.dw[ix1] += f.w[ix2]*chain_grad;
}
}
}
}
this.biases.dw[d] += chain_grad;
}
}
}
},
getParamsAndGrads: function() {
var response = [];
for(var i=0;i<this.out_depth;i++) {
response.push({params: this.filters[i].w, grads: this.filters[i].dw, l2_decay_mul: this.l2_decay_mul, l1_decay_mul: this.l1_decay_mul});
}
response.push({params: this.biases.w, grads: this.biases.dw, l1_decay_mul: 0.0, l2_decay_mul: 0.0});
return response;
},
toJSON: function() {
var json = {};
json.sx = this.sx; // filter size in x, y dims
json.sy = this.sy;
json.stride = this.stride;
json.in_depth = this.in_depth;
json.out_depth = this.out_depth;
json.out_sx = this.out_sx;
json.out_sy = this.out_sy;
json.layer_type = this.layer_type;
json.l1_decay_mul = this.l1_decay_mul;
json.l2_decay_mul = this.l2_decay_mul;
json.pad = this.pad;
json.filters = [];
for(var i=0;i<this.filters.length;i++) {
json.filters.push(this.filters[i].toJSON());
}
json.biases = this.biases.toJSON();
return json;
},
fromJSON: function(json) {
this.out_depth = json.out_depth;
this.out_sx = json.out_sx;
this.out_sy = json.out_sy;
this.layer_type = json.layer_type;
this.sx = json.sx; // filter size in x, y dims
this.sy = json.sy;
this.stride = json.stride;
this.in_depth = json.in_depth; // depth of input volume
this.filters = [];
this.l1_decay_mul = typeof json.l1_decay_mul !== 'undefined' ? json.l1_decay_mul : 1.0;
this.l2_decay_mul = typeof json.l2_decay_mul !== 'undefined' ? json.l2_decay_mul : 1.0;
this.pad = typeof json.pad !== 'undefined' ? json.pad : 0;
for(var i=0;i<json.filters.length;i++) {
var v = new Vol(0,0,0,0);
v.fromJSON(json.filters[i]);
this.filters.push(v);
}
this.biases = new Vol(0,0,0,0);
this.biases.fromJSON(json.biases);
}
}
var FullyConnLayer = function(opt) {
var opt = opt || {};
// required
// ok fine we will allow 'filters' as the word as well
this.out_depth = typeof opt.num_neurons !== 'undefined' ? opt.num_neurons : opt.filters;
// optional
this.l1_decay_mul = typeof opt.l1_decay_mul !== 'undefined' ? opt.l1_decay_mul : 0.0;
this.l2_decay_mul = typeof opt.l2_decay_mul !== 'undefined' ? opt.l2_decay_mul : 1.0;
// computed
this.num_inputs = opt.in_sx * opt.in_sy * opt.in_depth;
this.out_sx = 1;
this.out_sy = 1;
this.layer_type = 'fc';
// initializations
var bias = typeof opt.bias_pref !== 'undefined' ? opt.bias_pref : 0.0;
this.filters = [];
for(var i=0;i<this.out_depth ;i++) { this.filters.push(new Vol(1, 1, this.num_inputs)); }
this.biases = new Vol(1, 1, this.out_depth, bias);
}
FullyConnLayer.prototype = {
forward: function(V, is_training) {
this.in_act = V;
var A = new Vol(1, 1, this.out_depth, 0.0);
var Vw = V.w;
for(var i=0;i<this.out_depth;i++) {
var a = 0.0;
var wi = this.filters[i].w;
for(var d=0;d<this.num_inputs;d++) {
a += Vw[d] * wi[d]; // for efficiency use Vols directly for now
}
a += this.biases.w[i];
A.w[i] = a;
}
this.out_act = A;
return this.out_act;
},
backward: function() {
var V = this.in_act;
V.dw = convnet.zeros(V.w.length); // zero out the gradient in input Vol
// compute gradient wrt weights and data
for(var i=0;i<this.out_depth;i++) {
var tfi = this.filters[i];
var chain_grad = this.out_act.dw[i];
for(var d=0;d<this.num_inputs;d++) {
V.dw[d] += tfi.w[d]*chain_grad; // grad wrt input data
tfi.dw[d] += V.w[d]*chain_grad; // grad wrt params
}
this.biases.dw[i] += chain_grad;
}
},
getParamsAndGrads: function() {
var response = [];
for(var i=0;i<this.out_depth;i++) {
response.push({params: this.filters[i].w, grads: this.filters[i].dw, l1_decay_mul: this.l1_decay_mul, l2_decay_mul: this.l2_decay_mul});
}
response.push({params: this.biases.w, grads: this.biases.dw, l1_decay_mul: 0.0, l2_decay_mul: 0.0});
return response;
},
toJSON: function() {
var json = {};
json.out_depth = this.out_depth;
json.out_sx = this.out_sx;
json.out_sy = this.out_sy;
json.layer_type = this.layer_type;
json.num_inputs = this.num_inputs;
json.l1_decay_mul = this.l1_decay_mul;
json.l2_decay_mul = this.l2_decay_mul;
json.filters = [];
for(var i=0;i<this.filters.length;i++) {
json.filters.push(this.filters[i].toJSON());
}
json.biases = this.biases.toJSON();
return json;
},
fromJSON: function(json) {
this.out_depth = json.out_depth;
this.out_sx = json.out_sx;
this.out_sy = json.out_sy;
this.layer_type = json.layer_type;
this.num_inputs = json.num_inputs;
this.l1_decay_mul = typeof json.l1_decay_mul !== 'undefined' ? json.l1_decay_mul : 1.0;
this.l2_decay_mul = typeof json.l2_decay_mul !== 'undefined' ? json.l2_decay_mul : 1.0;
this.filters = [];
for(var i=0;i<json.filters.length;i++) {
var v = new Vol(0,0,0,0);
v.fromJSON(json.filters[i]);
this.filters.push(v);
}
this.biases = new Vol(0,0,0,0);
this.biases.fromJSON(json.biases);
}
}
convnet.ConvLayer = ConvLayer;
convnet.FullyConnLayer = FullyConnLayer;
/*** convnet_layers_pool ***/
var PoolLayer = function(opt) {
var opt = opt || {};
// required
this.sx = opt.sx; // filter size
this.in_depth = opt.in_depth;
this.in_sx = opt.in_sx;
this.in_sy = opt.in_sy;
// optional
this.sy = typeof opt.sy !== 'undefined' ? opt.sy : this.sx;
this.stride = typeof opt.stride !== 'undefined' ? opt.stride : 2;
this.pad = typeof opt.pad !== 'undefined' ? opt.pad : 0; // amount of 0 padding to add around borders of input volume
// computed
this.out_depth = this.in_depth;
this.out_sx = Math.floor((this.in_sx + this.pad * 2 - this.sx) / this.stride + 1);
this.out_sy = Math.floor((this.in_sy + this.pad * 2 - this.sy) / this.stride + 1);
this.layer_type = 'pool';
// store switches for x,y coordinates for where the max comes from, for each output neuron
this.switchx = convnet.zeros(this.out_sx*this.out_sy*this.out_depth);
this.switchy = convnet.zeros(this.out_sx*this.out_sy*this.out_depth);
}
PoolLayer.prototype = {
forward: function(V, is_training) {
this.in_act = V;
var A = new Vol(this.out_sx, this.out_sy, this.out_depth, 0.0);
var n=0; // a counter for switches
for(var d=0;d<this.out_depth;d++) {
var x = -this.pad;
var y = -this.pad;
for(var ax=0; ax<this.out_sx; x+=this.stride,ax++) {
y = -this.pad;
for(var ay=0; ay<this.out_sy; y+=this.stride,ay++) {
// convolve centered at this particular location
var a = -99999; // hopefully small enough ;\
var winx=-1,winy=-1;
for(var fx=0;fx<this.sx;fx++) {
for(var fy=0;fy<this.sy;fy++) {
var oy = y+fy;
var ox = x+fx;
if(oy>=0 && oy<V.sy && ox>=0 && ox<V.sx) {
var v = V.get(ox, oy, d);
// perform max pooling and store pointers to where
// the max came from. This will speed up backprop
// and can help make nice visualizations in future
if(v > a) { a = v; winx=ox; winy=oy;}
}
}
}
this.switchx[n] = winx;
this.switchy[n] = winy;
n++;
A.set(ax, ay, d, a);
}
}
}
this.out_act = A;
return this.out_act;
},
backward: function() {
// pooling layers have no parameters, so simply compute
// gradient wrt data here
var V = this.in_act;
V.dw = convnet.zeros(V.w.length); // zero out gradient wrt data
var A = this.out_act; // computed in forward pass
var n = 0;
for(var d=0;d<this.out_depth;d++) {
var x = -this.pad;
var y = -this.pad;
for(var ax=0; ax<this.out_sx; x+=this.stride,ax++) {
y = -this.pad;
for(var ay=0; ay<this.out_sy; y+=this.stride,ay++) {
var chain_grad = this.out_act.get_grad(ax,ay,d);
V.add_grad(this.switchx[n], this.switchy[n], d, chain_grad);
n++;
}
}
}
},
getParamsAndGrads: function() {
return [];
},
toJSON: function() {
var json = {};
json.sx = this.sx;
json.sy = this.sy;
json.stride = this.stride;
json.in_depth = this.in_depth;
json.out_depth = this.out_depth;
json.out_sx = this.out_sx;
json.out_sy = this.out_sy;
json.layer_type = this.layer_type;
json.pad = this.pad;
return json;
},
fromJSON: function(json) {
this.out_depth = json.out_depth;
this.out_sx = json.out_sx;
this.out_sy = json.out_sy;
this.layer_type = json.layer_type;
this.sx = json.sx;
this.sy = json.sy;
this.stride = json.stride;
this.in_depth = json.in_depth;
this.pad = typeof json.pad !== 'undefined' ? json.pad : 0; // backwards compatibility
this.switchx = convnet.zeros(this.out_sx*this.out_sy*this.out_depth); // need to re-init these appropriately
this.switchy = convnet.zeros(this.out_sx*this.out_sy*this.out_depth);
}
}
convnet.PoolLayer = PoolLayer;
/*** convnet_layers_input ***/
var getopt = convnet.getopt;
var InputLayer = function(opt) {
var opt = opt || {};
// required: depth
this.out_depth = getopt(opt, ['out_depth', 'depth'], 0);
// optional: default these dimensions to 1
this.out_sx = getopt(opt, ['out_sx', 'sx', 'width'], 1);
this.out_sy = getopt(opt, ['out_sy', 'sy', 'height'], 1);
// computed
this.layer_type = 'input';
}
InputLayer.prototype = {
forward: function(V, is_training) {
this.in_act = V;
this.out_act = V;
return this.out_act; // simply identity function for now
},
backward: function() { },
getParamsAndGrads: function() {
return [];
},
toJSON: function() {
var json = {};
json.out_depth = this.out_depth;
json.out_sx = this.out_sx;
json.out_sy = this.out_sy;
json.layer_type = this.layer_type;
return json;
},
fromJSON: function(json) {
this.out_depth = json.out_depth;
this.out_sx = json.out_sx;
this.out_sy = json.out_sy;
this.layer_type = json.layer_type;
}
}
convnet.InputLayer = InputLayer;
/*** convnet_layers_loss ***/
// Layers that implement a loss. Currently these are the layers that
// can initiate a backward() pass. In future we probably want a more
// flexible system that can accomodate multiple losses to do multi-task
// learning, and stuff like that. But for now, one of the layers in this
// file must be the final layer in a Net.
// This is a classifier, with N discrete classes from 0 to N-1
// it gets a stream of N incoming numbers and computes the softmax
// function (exponentiate and normalize to sum to 1 as probabilities should)
var SoftmaxLayer = function(opt) {
var opt = opt || {};
// computed
this.num_inputs = opt.in_sx * opt.in_sy * opt.in_depth;
this.out_depth = this.num_inputs;
this.out_sx = 1;
this.out_sy = 1;
this.layer_type = 'softmax';
}
SoftmaxLayer.prototype = {
forward: function(V, is_training) {
this.in_act = V;
var A = new Vol(1, 1, this.out_depth, 0.0);
// compute max activation
var as = V.w;
var amax = V.w[0];
for(var i=1;i<this.out_depth;i++) {
if(as[i] > amax) amax = as[i];
}
// compute exponentials (carefully to not blow up)
var es = convnet.zeros(this.out_depth);
var esum = 0.0;
for(var i=0;i<this.out_depth;i++) {
var e = Math.exp(as[i] - amax);
esum += e;
es[i] = e;
}
// normalize and output to sum to one
for(var i=0;i<this.out_depth;i++) {
es[i] /= esum;
A.w[i] = es[i];
}
this.es = es; // save these for backprop
this.out_act = A;
return this.out_act;
},
backward: function(y) {
// compute and accumulate gradient wrt weights and bias of this layer
var x = this.in_act;
x.dw = convnet.zeros(x.w.length); // zero out the gradient of input Vol
for(var i=0;i<this.out_depth;i++) {
var indicator = i === y ? 1.0 : 0.0;
var mul = -(indicator - this.es[i]);
x.dw[i] = mul;
}
// loss is the class negative log likelihood
return -Math.log(this.es[y]);
},
getParamsAndGrads: function() {
return [];
},
toJSON: function() {
var json = {};
json.out_depth = this.out_depth;
json.out_sx = this.out_sx;
json.out_sy = this.out_sy;
json.layer_type = this.layer_type;
json.num_inputs = this.num_inputs;
return json;
},
fromJSON: function(json) {
this.out_depth = json.out_depth;
this.out_sx = json.out_sx;
this.out_sy = json.out_sy;
this.layer_type = json.layer_type;
this.num_inputs = json.num_inputs;
}
}
// implements an L2 regression cost layer,
// so penalizes \sum_i(||x_i - y_i||^2), where x is its input
// and y is the user-provided array of "correct" values.
var RegressionLayer = function(opt) {
var opt = opt || {};
// computed
this.num_inputs = opt.in_sx * opt.in_sy * opt.in_depth;
this.out_depth = this.num_inputs;
this.out_sx = 1;
this.out_sy = 1;
this.layer_type = 'regression';
}
RegressionLayer.prototype = {
forward: function(V, is_training) {
this.in_act = V;
this.out_act = V;
return V; // identity function
},
// y is a list here of size num_inputs
// or it can be a number if only one value is regressed
// or it can be a struct {dim: i, val: x} where we only want to
// regress on dimension i and asking it to have value x
backward: function(y) {
// compute and accumulate gradient wrt weights and bias of this layer
var x = this.in_act;
x.dw = convnet.zeros(x.w.length); // zero out the gradient of input Vol
var loss = 0.0;
if(y instanceof Array || y instanceof Float64Array) {
for(var i=0;i<this.out_depth;i++) {
var dy = x.w[i] - y[i];
x.dw[i] = dy;
loss += 0.5*dy*dy;
}
} else if(typeof y === 'number') {
// lets hope that only one number is being regressed
var dy = x.w[0] - y;
x.dw[0] = dy;
loss += 0.5*dy*dy;
} else {
// assume it is a struct with entries .dim and .val
// and we pass gradient only along dimension dim to be equal to val
var i = y.dim;
var yi = y.val;
var dy = x.w[i] - yi;
x.dw[i] = dy;
loss += 0.5*dy*dy;
}
return loss;
},
getParamsAndGrads: function() {
return [];
},
toJSON: function() {
var json = {};
json.out_depth = this.out_depth;
json.out_sx = this.out_sx;
json.out_sy = this.out_sy;
json.layer_type = this.layer_type;
json.num_inputs = this.num_inputs;
return json;
},
fromJSON: function(json) {
this.out_depth = json.out_depth;
this.out_sx = json.out_sx;
this.out_sy = json.out_sy;
this.layer_type = json.layer_type;
this.num_inputs = json.num_inputs;
}
}
var SVMLayer = function(opt) {
var opt = opt || {};
// computed
this.num_inputs = opt.in_sx * opt.in_sy * opt.in_depth;
this.out_depth = this.num_inputs;
this.out_sx = 1;
this.out_sy = 1;
this.layer_type = 'svm';
}
SVMLayer.prototype = {
forward: function(V, is_training) {
this.in_act = V;
this.out_act = V; // nothing to do, output raw scores
return V;
},
backward: function(y) {
// compute and accumulate gradient wrt weights and bias of this layer
var x = this.in_act;
x.dw = convnet.zeros(x.w.length); // zero out the gradient of input Vol
// we're using structured loss here, which means that the score
// of the ground truth should be higher than the score of any other
// class, by a margin
var yscore = x.w[y]; // score of ground truth
var margin = 1.0;
var loss = 0.0;
for(var i=0;i<this.out_depth;i++) {
if(y === i) { continue; }
var ydiff = -yscore + x.w[i] + margin;
if(ydiff > 0) {
// violating dimension, apply loss
x.dw[i] += 1;
x.dw[y] -= 1;
loss += ydiff;
}
}
return loss;
},
getParamsAndGrads: function() {
return [];
},
toJSON: function() {
var json = {};
json.out_depth = this.out_depth;
json.out_sx = this.out_sx;
json.out_sy = this.out_sy;
json.layer_type = this.layer_type;
json.num_inputs = this.num_inputs;
return json;
},
fromJSON: function(json) {
this.out_depth = json.out_depth;
this.out_sx = json.out_sx;
this.out_sy = json.out_sy;
this.layer_type = json.layer_type;
this.num_inputs = json.num_inputs;
}
}
convnet.RegressionLayer = RegressionLayer;
convnet.SoftmaxLayer = SoftmaxLayer;
convnet.SVMLayer = SVMLayer;
/*** convnet_layers_nonlinearities ***/
// Implements ReLU nonlinearity elementwise
// x -> max(0, x)
// the output is in [0, inf)
var ReluLayer = function(opt) {
var opt = opt || {};
// computed
this.out_sx = opt.in_sx;
this.out_sy = opt.in_sy;
this.out_depth = opt.in_depth;
this.layer_type = 'relu';
}
ReluLayer.prototype = {
forward: function(V, is_training) {
this.in_act = V;
var V2 = V.clone();
var N = V.w.length;
var V2w = V2.w;
for(var i=0;i<N;i++) {
if(V2w[i] < 0) V2w[i] = 0; // threshold at 0
}
this.out_act = V2;
return this.out_act;
},
backward: function() {
var V = this.in_act; // we need to set dw of this
var V2 = this.out_act;
var N = V.w.length;
V.dw = convnet.zeros(N); // zero out gradient wrt data
for(var i=0;i<N;i++) {
if(V2.w[i] <= 0) V.dw[i] = 0; // threshold
else V.dw[i] = V2.dw[i];
}
},
getParamsAndGrads: function() {
return [];
},
toJSON: function() {
var json = {};
json.out_depth = this.out_depth;
json.out_sx = this.out_sx;
json.out_sy = this.out_sy;
json.layer_type = this.layer_type;
return json;
},
fromJSON: function(json) {
this.out_depth = json.out_depth;
this.out_sx = json.out_sx;
this.out_sy = json.out_sy;
this.layer_type = json.layer_type;
}
}
// Implements Sigmoid nnonlinearity elementwise
// x -> 1/(1+e^(-x))
// so the output is between 0 and 1.
var SigmoidLayer = function(opt) {
var opt = opt || {};
// computed
this.out_sx = opt.in_sx;
this.out_sy = opt.in_sy;
this.out_depth = opt.in_depth;
this.layer_type = 'sigmoid';
}
SigmoidLayer.prototype = {
forward: function(V, is_training) {
this.in_act = V;
var V2 = V.cloneAndZero();
var N = V.w.length;
var V2w = V2.w;
var Vw = V.w;
for(var i=0;i<N;i++) {
V2w[i] = 1.0/(1.0+Math.exp(-Vw[i]));
}
this.out_act = V2;
return this.out_act;
},
backward: function() {
var V = this.in_act; // we need to set dw of this
var V2 = this.out_act;
var N = V.w.length;
V.dw = convnet.zeros(N); // zero out gradient wrt data
for(var i=0;i<N;i++) {
var v2wi = V2.w[i];
V.dw[i] = v2wi * (1.0 - v2wi) * V2.dw[i];
}
},
getParamsAndGrads: function() {
return [];
},
toJSON: function() {
var json = {};
json.out_depth = this.out_depth;
json.out_sx = this.out_sx;
json.out_sy = this.out_sy;
json.layer_type = this.layer_type;
return json;
},
fromJSON: function(json) {
this.out_depth = json.out_depth;
this.out_sx = json.out_sx;
this.out_sy = json.out_sy;
this.layer_type = json.layer_type;
}
}
// Implements Maxout nnonlinearity that computes
// x -> max(x)
// where x is a vector of size group_size. Ideally of course,
// the input size should be exactly divisible by group_size
var MaxoutLayer = function(opt) {
var opt = opt || {};
// required
this.group_size = typeof opt.group_size !== 'undefined' ? opt.group_size : 2;
// computed
this.out_sx = opt.in_sx;
this.out_sy = opt.in_sy;
this.out_depth = Math.floor(opt.in_depth / this.group_size);
this.layer_type = 'maxout';
this.switches = convnet.zeros(this.out_sx*this.out_sy*this.out_depth); // useful for backprop
}
MaxoutLayer.prototype = {
forward: function(V, is_training) {
this.in_act = V;
var N = this.out_depth;
var V2 = new Vol(this.out_sx, this.out_sy, this.out_depth, 0.0);
// optimization branch. If we're operating on 1D arrays we dont have
// to worry about keeping track of x,y,d coordinates inside
// input volumes. In convnets we do :(
if(this.out_sx === 1 && this.out_sy === 1) {
for(var i=0;i<N;i++) {
var ix = i * this.group_size; // base index offset
var a = V.w[ix];
var ai = 0;
for(var j=1;j<this.group_size;j++) {
var a2 = V.w[ix+j];
if(a2 > a) {
a = a2;
ai = j;
}
}
V2.w[i] = a;
this.switches[i] = ix + ai;
}
} else {
var n=0; // counter for switches
for(var x=0;x<V.sx;x++) {
for(var y=0;y<V.sy;y++) {
for(var i=0;i<N;i++) {
var ix = i * this.group_size;
var a = V.get(x, y, ix);
var ai = 0;
for(var j=1;j<this.group_size;j++) {
var a2 = V.get(x, y, ix+j);
if(a2 > a) {
a = a2;
ai = j;
}
}
V2.set(x,y,i,a);
this.switches[n] = ix + ai;
n++;
}
}
}
}
this.out_act = V2;
return this.out_act;
},
backward: function() {
var V = this.in_act; // we need to set dw of this
var V2 = this.out_act;
var N = this.out_depth;
V.dw = convnet.zeros(V.w.length); // zero out gradient wrt data
// pass the gradient through the appropriate switch
if(this.out_sx === 1 && this.out_sy === 1) {
for(var i=0;i<N;i++) {
var chain_grad = V2.dw[i];
V.dw[this.switches[i]] = chain_grad;
}
} else {
// bleh okay, lets do this the hard way
var n=0; // counter for switches
for(var x=0;x<V2.sx;x++) {
for(var y=0;y<V2.sy;y++) {
for(var i=0;i<N;i++) {
var chain_grad = V2.get_grad(x,y,i);
V.set_grad(x,y,this.switches[n],chain_grad);
n++;
}
}
}
}
},
getParamsAndGrads: function() {
return [];
},
toJSON: function() {
var json = {};
json.out_depth = this.out_depth;
json.out_sx = this.out_sx;
json.out_sy = this.out_sy;
json.layer_type = this.layer_type;
json.group_size = this.group_size;
return json;
},
fromJSON: function(json) {
this.out_depth = json.out_depth;
this.out_sx = json.out_sx;
this.out_sy = json.out_sy;
this.layer_type = json.layer_type;
this.group_size = json.group_size;
this.switches = convnet.zeros(this.group_size);
}
}
// a helper function, since tanh is not yet part of ECMAScript. Will be in v6.
function tanh(x) {
var y = Math.exp(2 * x);
return (y - 1) / (y + 1);
}
// Implements Tanh nnonlinearity elementwise
// x -> tanh(x)
// so the output is between -1 and 1.
var TanhLayer = function(opt) {
var opt = opt || {};
// computed
this.out_sx = opt.in_sx;
this.out_sy = opt.in_sy;
this.out_depth = opt.in_depth;
this.layer_type = 'tanh';
}
TanhLayer.prototype = {
forward: function(V, is_training) {
this.in_act = V;
var V2 = V.cloneAndZero();
var N = V.w.length;
for(var i=0;i<N;i++) {
V2.w[i] = tanh(V.w[i]);
}
this.out_act = V2;
return this.out_act;
},
backward: function() {
var V = this.in_act; // we need to set dw of this
var V2 = this.out_act;
var N = V.w.length;
V.dw = convnet.zeros(N); // zero out gradient wrt data
for(var i=0;i<N;i++) {
var v2wi = V2.w[i];
V.dw[i] = (1.0 - v2wi * v2wi) * V2.dw[i];
}
},
getParamsAndGrads: function() {
return [];
},
toJSON: function() {
var json = {};
json.out_depth = this.out_depth;
json.out_sx = this.out_sx;
json.out_sy = this.out_sy;
json.layer_type = this.layer_type;
return json;
},
fromJSON: function(json) {
this.out_depth = json.out_depth;
this.out_sx = json.out_sx;
this.out_sy = json.out_sy;
this.layer_type = json.layer_type;
}
}
convnet.TanhLayer = TanhLayer;
convnet.MaxoutLayer = MaxoutLayer;
convnet.ReluLayer = ReluLayer;
convnet.SigmoidLayer = SigmoidLayer;
/*** convnet_layers_dropout ***/
// An inefficient dropout layer
// Note this is not most efficient implementation since the layer before
// computed all these activations and now we're just going to drop them :(
// same goes for backward pass. Also, if we wanted to be efficient at test time
// we could equivalently be clever and upscale during train and copy pointers during test
// todo: make more efficient.
var DropoutLayer = function(opt) {
var opt = opt || {};
// computed
this.out_sx = opt.in_sx;
this.out_sy = opt.in_sy;
this.out_depth = opt.in_depth;
this.layer_type = 'dropout';
this.drop_prob = typeof opt.drop_prob !== 'undefined' ? opt.drop_prob : 0.5;
this.dropped = convnet.zeros(this.out_sx*this.out_sy*this.out_depth);
}
DropoutLayer.prototype = {
forward: function(V, is_training) {
this.in_act = V;
if(typeof(is_training)==='undefined') { is_training = false; } // default is prediction mode
var V2 = V.clone();
var N = V.w.length;
if(is_training) {
// do dropout
for(var i=0;i<N;i++) {
if(Math.random()<this.drop_prob) { V2.w[i]=0; this.dropped[i] = true; } // drop!
else {this.dropped[i] = false;}
}
} else {
// scale the activations during prediction
for(var i=0;i<N;i++) { V2.w[i]*=this.drop_prob; }
}
this.out_act = V2;
return this.out_act; // dummy identity function for now
},
backward: function() {
var V = this.in_act; // we need to set dw of this
var chain_grad = this.out_act;
var N = V.w.length;
V.dw = convnet.zeros(N); // zero out gradient wrt data
for(var i=0;i<N;i++) {
if(!(this.dropped[i])) {
V.dw[i] = chain_grad.dw[i]; // copy over the gradient
}
}
},
getParamsAndGrads: function() {
return [];
},
toJSON: function() {
var json = {};
json.out_depth = this.out_depth;
json.out_sx = this.out_sx;
json.out_sy = this.out_sy;
json.layer_type = this.layer_type;
json.drop_prob = this.drop_prob;
return json;
},
fromJSON: function(json) {
this.out_depth = json.out_depth;
this.out_sx = json.out_sx;
this.out_sy = json.out_sy;
this.layer_type = json.layer_type;
this.drop_prob = json.drop_prob;
}
}
convnet.DropoutLayer = DropoutLayer;
/*** convnet_layers_normailzation ***/
// a bit experimental layer for now. I think it works but I'm not 100%
// the gradient check is a bit funky. I'll look into this a bit later.
// Local Response Normalization in window, along depths of volumes
var LocalResponseNormalizationLayer = function(opt) {
var opt = opt || {};
// required
this.k = opt.k;
this.n = opt.n;
this.alpha = opt.alpha;
this.beta = opt.beta;
// computed
this.out_sx = opt.in_sx;
this.out_sy = opt.in_sy;
this.out_depth = opt.in_depth;
this.layer_type = 'lrn';
// checks
if(this.n%2 === 0) { console.log('WARNING n should be odd for LRN layer'); }
}
LocalResponseNormalizationLayer.prototype = {
forward: function(V, is_training) {
this.in_act = V;
var A = V.cloneAndZero();
this.S_cache_ = V.cloneAndZero();
var n2 = Math.floor(this.n/2);
for(var x=0;x<V.sx;x++) {
for(var y=0;y<V.sy;y++) {
for(var i=0;i<V.depth;i++) {
var ai = V.get(x,y,i);
// normalize in a window of size n
var den = 0.0;
for(var j=Math.max(0,i-n2);j<=Math.min(i+n2,V.depth-1);j++) {
var aa = V.get(x,y,j);
den += aa*aa;
}
den *= this.alpha / this.n;
den += this.k;
this.S_cache_.set(x,y,i,den); // will be useful for backprop
den = Math.pow(den, this.beta);
A.set(x,y,i,ai/den);
}
}
}
this.out_act = A;
return this.out_act; // dummy identity function for now
},
backward: function() {
// evaluate gradient wrt data
var V = this.in_act; // we need to set dw of this
V.dw = convnet.zeros(V.w.length); // zero out gradient wrt data
var A = this.out_act; // computed in forward pass
var n2 = Math.floor(this.n/2);
for(var x=0;x<V.sx;x++) {
for(var y=0;y<V.sy;y++) {
for(var i=0;i<V.depth;i++) {
var chain_grad = this.out_act.get_grad(x,y,i);
var S = this.S_cache_.get(x,y,i);
var SB = Math.pow(S, this.beta);
var SB2 = SB*SB;
// normalize in a window of size n
for(var j=Math.max(0,i-n2);j<=Math.min(i+n2,V.depth-1);j++) {
var aj = V.get(x,y,j);
var g = -aj*this.beta*Math.pow(S,this.beta-1)*this.alpha/this.n*2*aj;
if(j===i) g+= SB;
g /= SB2;
g *= chain_grad;
V.add_grad(x,y,j,g);
}
}
}
}
},
getParamsAndGrads: function() { return []; },
toJSON: function() {
var json = {};
json.k = this.k;
json.n = this.n;
json.alpha = this.alpha; // normalize by size
json.beta = this.beta;
json.out_sx = this.out_sx;
json.out_sy = this.out_sy;
json.out_depth = this.out_depth;
json.layer_type = this.layer_type;
return json;
},
fromJSON: function(json) {
this.k = json.k;
this.n = json.n;
this.alpha = json.alpha; // normalize by size
this.beta = json.beta;
this.out_sx = json.out_sx;
this.out_sy = json.out_sy;
this.out_depth = json.out_depth;
this.layer_type = json.layer_type;
}
}
convnet.LocalResponseNormalizationLayer = LocalResponseNormalizationLayer;
/*** convnet_net ***/
var assert = convnet.assert;
// Net manages a set of layers
// For now constraints: Simple linear order of layers, first layer input last layer a cost layer
var Net = function(options) {
this.layers = [];
}
Net.prototype = {
// takes a list of layer definitions and creates the network layer objects
makeLayers: function(defs) {
// few checks
assert(defs.length >= 2, 'Error! At least one input layer and one loss layer are required.');
assert(defs[0].type === 'input', 'Error! First layer must be the input layer, to declare size of inputs');
// desugar layer_defs for adding activation, dropout layers etc
var desugar = function() {
var new_defs = [];
for(var i=0;i<defs.length;i++) {
var def = defs[i];
if(def.type==='softmax' || def.type==='svm') {
// add an fc layer here, there is no reason the user should
// have to worry about this and we almost always want to
new_defs.push({type:'fc', num_neurons: def.num_classes});
}
if(def.type==='regression') {
// add an fc layer here, there is no reason the user should
// have to worry about this and we almost always want to
new_defs.push({type:'fc', num_neurons: def.num_neurons});
}
if((def.type==='fc' || def.type==='conv')
&& typeof(def.bias_pref) === 'undefined'){
def.bias_pref = 0.0;
if(typeof def.activation !== 'undefined' && def.activation === 'relu') {
def.bias_pref = 0.1; // relus like a bit of positive bias to get gradients early
// otherwise it's technically possible that a relu unit will never turn on (by chance)
// and will never get any gradient and never contribute any computation. Dead relu.
}
}
new_defs.push(def);
if(typeof def.activation !== 'undefined') {
if(def.activation==='relu') { new_defs.push({type:'relu'}); }
else if (def.activation==='sigmoid') { new_defs.push({type:'sigmoid'}); }
else if (def.activation==='tanh') { new_defs.push({type:'tanh'}); }
else if (def.activation==='maxout') {
// create maxout activation, and pass along group size, if provided
var gs = def.group_size !== 'undefined' ? def.group_size : 2;
new_defs.push({type:'maxout', group_size:gs});
}
else { console.log('ERROR unsupported activation ' + def.activation); }
}
if(typeof def.drop_prob !== 'undefined' && def.type !== 'dropout') {
new_defs.push({type:'dropout', drop_prob: def.drop_prob});
}
}
return new_defs;
}
defs = desugar(defs);
// create the layers
this.layers = [];
for(var i=0;i<defs.length;i++) {
var def = defs[i];
if(i>0) {
var prev = this.layers[i-1];
def.in_sx = prev.out_sx;
def.in_sy = prev.out_sy;
def.in_depth = prev.out_depth;
}
switch(def.type) {
case 'fc': this.layers.push(new convnet.FullyConnLayer(def)); break;
case 'lrn': this.layers.push(new convnet.LocalResponseNormalizationLayer(def)); break;
case 'dropout': this.layers.push(new convnet.DropoutLayer(def)); break;
case 'input': this.layers.push(new convnet.InputLayer(def)); break;
case 'softmax': this.layers.push(new convnet.SoftmaxLayer(def)); break;
case 'regression': this.layers.push(new convnet.RegressionLayer(def)); break;
case 'conv': this.layers.push(new convnet.ConvLayer(def)); break;
case 'pool': this.layers.push(new convnet.PoolLayer(def)); break;
case 'relu': this.layers.push(new convnet.ReluLayer(def)); break;
case 'sigmoid': this.layers.push(new convnet.SigmoidLayer(def)); break;
case 'tanh': this.layers.push(new convnet.TanhLayer(def)); break;
case 'maxout': this.layers.push(new convnet.MaxoutLayer(def)); break;
case 'svm': this.layers.push(new convnet.SVMLayer(def)); break;
default: console.log('ERROR: UNRECOGNIZED LAYER TYPE: ' + def.type);
}
}
},
// forward prop the network.
// The trainer class passes is_training = true, but when this function is
// called from outside (not from the trainer), it defaults to prediction mode
forward: function(V, is_training) {
if(typeof(is_training) === 'undefined') is_training = false;
var act = this.layers[0].forward(V, is_training);
for(var i=1;i<this.layers.length;i++) {
act = this.layers[i].forward(act, is_training);
}
return act;
},
getCostLoss: function(V, y) {
this.forward(V, false);
var N = this.layers.length;
var loss = this.layers[N-1].backward(y);
return loss;
},
// backprop: compute gradients wrt all parameters
backward: function(y) {
var N = this.layers.length;
var loss = this.layers[N-1].backward(y); // last layer assumed to be loss layer
for(var i=N-2;i>=0;i--) { // first layer assumed input
this.layers[i].backward();
}
return loss;
},
getParamsAndGrads: function() {
// accumulate parameters and gradients for the entire network
var response = [];
for(var i=0;i<this.layers.length;i++) {
var layer_reponse = this.layers[i].getParamsAndGrads();
for(var j=0;j<layer_reponse.length;j++) {
response.push(layer_reponse[j]);
}
}
return response;
},
getPrediction: function() {
// this is a convenience function for returning the argmax
// prediction, assuming the last layer of the net is a softmax
var S = this.layers[this.layers.length-1];
assert(S.layer_type === 'softmax', 'getPrediction function assumes softmax as last layer of the net!');
var p = S.out_act.w;
var maxv = p[0];
var maxi = 0;
for(var i=1;i<p.length;i++) {
if(p[i] > maxv) { maxv = p[i]; maxi = i;}
}
return maxi; // return index of the class with highest class probability
},
toJSON: function() {
var json = {};
json.layers = [];
for(var i=0;i<this.layers.length;i++) {
json.layers.push(this.layers[i].toJSON());
}
return json;
},
fromJSON: function(json) {
this.layers = [];
for(var i=0;i<json.layers.length;i++) {
var Lj = json.layers[i]
var t = Lj.layer_type;
var L;
if(t==='input') { L = new convnet.InputLayer(); }
if(t==='relu') { L = new convnet.ReluLayer(); }
if(t==='sigmoid') { L = new convnet.SigmoidLayer(); }
if(t==='tanh') { L = new convnet.TanhLayer(); }
if(t==='dropout') { L = new convnet.DropoutLayer(); }
if(t==='conv') { L = new convnet.ConvLayer(); }
if(t==='pool') { L = new convnet.PoolLayer(); }
if(t==='lrn') { L = new convnet.LocalResponseNormalizationLayer(); }
if(t==='softmax') { L = new convnet.SoftmaxLayer(); }
if(t==='regression') { L = new convnet.RegressionLayer(); }
if(t==='fc') { L = new convnet.FullyConnLayer(); }
if(t==='maxout') { L = new convnet.MaxoutLayer(); }
if(t==='svm') { L = new convnet.SVMLayer(); }
L.fromJSON(Lj);
this.layers.push(L);
}
}
}
convnet.Net = Net;
/*** convnet_trainers ***/
var Trainer = function(net, options) {
this.net = net;
var options = options || {};
this.learning_rate = typeof options.learning_rate !== 'undefined' ? options.learning_rate : 0.01;
this.l1_decay = typeof options.l1_decay !== 'undefined' ? options.l1_decay : 0.0;
this.l2_decay = typeof options.l2_decay !== 'undefined' ? options.l2_decay : 0.0;
this.batch_size = typeof options.batch_size !== 'undefined' ? options.batch_size : 1;
this.method = typeof options.method !== 'undefined' ? options.method : 'sgd'; // sgd/adam/adagrad/adadelta/windowgrad/netsterov
this.momentum = typeof options.momentum !== 'undefined' ? options.momentum : 0.9;
this.ro = typeof options.ro !== 'undefined' ? options.ro : 0.95; // used in adadelta
this.eps = typeof options.eps !== 'undefined' ? options.eps : 1e-8; // used in adam or adadelta
this.beta1 = typeof options.beta1 !== 'undefined' ? options.beta1 : 0.9; // used in adam
this.beta2 = typeof options.beta2 !== 'undefined' ? options.beta2 : 0.999; // used in adam
this.k = 0; // iteration counter
this.gsum = []; // last iteration gradients (used for momentum calculations)
this.xsum = []; // used in adam or adadelta
// check if regression is expected
if(this.net.layers[this.net.layers.length - 1].layer_type === "regression")
this.regression = true;
else
this.regression = false;
}
Trainer.prototype = {
train: function(x, y) {
var start = new Date().getTime();
this.net.forward(x, true); // also set the flag that lets the net know we're just training
var end = new Date().getTime();
var fwd_time = end - start;
var start = new Date().getTime();
var cost_loss = this.net.backward(y);
var l2_decay_loss = 0.0;
var l1_decay_loss = 0.0;
var end = new Date().getTime();
var bwd_time = end - start;
if(this.regression && y.constructor !== Array)
console.log("Warning: a regression net requires an array as training output vector.");
this.k++;
if(this.k % this.batch_size === 0) {
var pglist = this.net.getParamsAndGrads();
// initialize lists for accumulators. Will only be done once on first iteration
if(this.gsum.length === 0 && (this.method !== 'sgd' || this.momentum > 0.0)) {
// only vanilla sgd doesnt need either lists
// momentum needs gsum
// adagrad needs gsum
// adam and adadelta needs gsum and xsum
for(var i=0;i<pglist.length;i++) {
this.gsum.push(convnet.zeros(pglist[i].params.length));
if(this.method === 'adam' || this.method === 'adadelta') {
this.xsum.push(convnet.zeros(pglist[i].params.length));
} else {
this.xsum.push([]); // conserve memory
}
}
}
// perform an update for all sets of weights
for(var i=0;i<pglist.length;i++) {
var pg = pglist[i]; // param, gradient, other options in future (custom learning rate etc)
var p = pg.params;
var g = pg.grads;
// learning rate for some parameters.
var l2_decay_mul = typeof pg.l2_decay_mul !== 'undefined' ? pg.l2_decay_mul : 1.0;
var l1_decay_mul = typeof pg.l1_decay_mul !== 'undefined' ? pg.l1_decay_mul : 1.0;
var l2_decay = this.l2_decay * l2_decay_mul;
var l1_decay = this.l1_decay * l1_decay_mul;
var plen = p.length;
for(var j=0;j<plen;j++) {
l2_decay_loss += l2_decay*p[j]*p[j]/2; // accumulate weight decay loss
l1_decay_loss += l1_decay*Math.abs(p[j]);
var l1grad = l1_decay * (p[j] > 0 ? 1 : -1);
var l2grad = l2_decay * (p[j]);
var gij = (l2grad + l1grad + g[j]) / this.batch_size; // raw batch gradient
var gsumi = this.gsum[i];
var xsumi = this.xsum[i];
if(this.method === 'adam') {
// adam update
gsumi[j] = gsumi[j] * this.beta1 + (1- this.beta1) * gij; // update biased first moment estimate
xsumi[j] = xsumi[j] * this.beta2 + (1-this.beta2) * gij * gij; // update biased second moment estimate
var biasCorr1 = gsumi[j] * (1 - Math.pow(this.beta1, this.k)); // correct bias first moment estimate
var biasCorr2 = xsumi[j] * (1 - Math.pow(this.beta2, this.k)); // correct bias second moment estimate
var dx = - this.learning_rate * biasCorr1 / (Math.sqrt(biasCorr2) + this.eps);
p[j] += dx;
} else if(this.method === 'adagrad') {
// adagrad update
gsumi[j] = gsumi[j] + gij * gij;
var dx = - this.learning_rate / Math.sqrt(gsumi[j] + this.eps) * gij;
p[j] += dx;
} else if(this.method === 'windowgrad') {
// this is adagrad but with a moving window weighted average
// so the gradient is not accumulated over the entire history of the run.
// it's also referred to as Idea #1 in Zeiler paper on Adadelta. Seems reasonable to me!
gsumi[j] = this.ro * gsumi[j] + (1-this.ro) * gij * gij;
var dx = - this.learning_rate / Math.sqrt(gsumi[j] + this.eps) * gij; // eps added for better conditioning
p[j] += dx;
} else if(this.method === 'adadelta') {
gsumi[j] = this.ro * gsumi[j] + (1-this.ro) * gij * gij;
var dx = - Math.sqrt((xsumi[j] + this.eps)/(gsumi[j] + this.eps)) * gij;
xsumi[j] = this.ro * xsumi[j] + (1-this.ro) * dx * dx; // yes, xsum lags behind gsum by 1.
p[j] += dx;
} else if(this.method === 'nesterov') {
var dx = gsumi[j];
gsumi[j] = gsumi[j] * this.momentum + this.learning_rate * gij;
dx = this.momentum * dx - (1.0 + this.momentum) * gsumi[j];
p[j] += dx;
} else {
// assume SGD
if(this.momentum > 0.0) {
// momentum update
var dx = this.momentum * gsumi[j] - this.learning_rate * gij; // step
gsumi[j] = dx; // back this up for next iteration of momentum
p[j] += dx; // apply corrected gradient
} else {
// vanilla sgd
p[j] += - this.learning_rate * gij;
}
}
g[j] = 0.0; // zero out gradient so that we can begin accumulating anew
}
}
}
// appending softmax_loss for backwards compatibility, but from now on we will always use cost_loss
// in future, TODO: have to completely redo the way loss is done around the network as currently
// loss is a bit of a hack. Ideally, user should specify arbitrary number of loss functions on any layer
// and it should all be computed correctly and automatically.
return {fwd_time: fwd_time, bwd_time: bwd_time,
l2_decay_loss: l2_decay_loss, l1_decay_loss: l1_decay_loss,
cost_loss: cost_loss, softmax_loss: cost_loss,
loss: cost_loss + l1_decay_loss + l2_decay_loss}
}
}
convnet.Trainer = Trainer;
convnet.SGDTrainer = Trainer; // backwards compatibility
/*** convnet_magicnets ***/
// used utilities, make explicit local references
var randf = convnet.randf;
var randi = convnet.randi;
var Net = convnet.Net;
var Trainer = convnet.Trainer;
var maxmin = convnet.maxmin;
var randperm = convnet.randperm;
var weightedSample = convnet.weightedSample;
var getopt = convnet.getopt;
var arrUnique = convnet.arrUnique;
/*
A MagicNet takes data: a list of convnetjs.Vol(), and labels
which for now are assumed to be class indeces 0..K. MagicNet then:
- creates data folds for cross-validation
- samples candidate networks
- evaluates candidate networks on all data folds
- produces predictions by model-averaging the best networks
*/
var MagicNet = function(data, labels, opt) {
var opt = opt || {};
if(typeof data === 'undefined') { data = []; }
if(typeof labels === 'undefined') { labels = []; }
// required inputs
this.data = data; // store these pointers to data
this.labels = labels;
// optional inputs
this.train_ratio = getopt(opt, 'train_ratio', 0.7);
this.num_folds = getopt(opt, 'num_folds', 10);
this.num_candidates = getopt(opt, 'num_candidates', 50); // we evaluate several in parallel
// how many epochs of data to train every network? for every fold?
// higher values mean higher accuracy in final results, but more expensive
this.num_epochs = getopt(opt, 'num_epochs', 50);
// number of best models to average during prediction. Usually higher = better
this.ensemble_size = getopt(opt, 'ensemble_size', 10);
// candidate parameters
this.batch_size_min = getopt(opt, 'batch_size_min', 10);
this.batch_size_max = getopt(opt, 'batch_size_max', 300);
this.l2_decay_min = getopt(opt, 'l2_decay_min', -4);
this.l2_decay_max = getopt(opt, 'l2_decay_max', 2);
this.learning_rate_min = getopt(opt, 'learning_rate_min', -4);
this.learning_rate_max = getopt(opt, 'learning_rate_max', 0);
this.momentum_min = getopt(opt, 'momentum_min', 0.9);
this.momentum_max = getopt(opt, 'momentum_max', 0.9);
this.neurons_min = getopt(opt, 'neurons_min', 5);
this.neurons_max = getopt(opt, 'neurons_max', 30);
// computed
this.folds = []; // data fold indices, gets filled by sampleFolds()
this.candidates = []; // candidate networks that are being currently evaluated
this.evaluated_candidates = []; // history of all candidates that were fully evaluated on all folds
this.unique_labels = arrUnique(labels);
this.iter = 0; // iteration counter, goes from 0 -> num_epochs * num_training_data
this.foldix = 0; // index of active fold
// callbacks
this.finish_fold_callback = null;
this.finish_batch_callback = null;
// initializations
if(this.data.length > 0) {
this.sampleFolds();
this.sampleCandidates();
}
};
MagicNet.prototype = {
// sets this.folds to a sampling of this.num_folds folds
sampleFolds: function() {
var N = this.data.length;
var num_train = Math.floor(this.train_ratio * N);
this.folds = []; // flush folds, if any
for(var i=0;i<this.num_folds;i++) {
var p = randperm(N);
this.folds.push({train_ix: p.slice(0, num_train), test_ix: p.slice(num_train, N)});
}
},
// returns a random candidate network
sampleCandidate: function() {
var input_depth = this.data[0].w.length;
var num_classes = this.unique_labels.length;
// sample network topology and hyperparameters
var layer_defs = [];
layer_defs.push({type:'input', out_sx:1, out_sy:1, out_depth: input_depth});
var nwl = weightedSample([0,1,2,3], [0.2, 0.3, 0.3, 0.2]); // prefer nets with 1,2 hidden layers
for(var q=0;q<nwl;q++) {
var ni = randi(this.neurons_min, this.neurons_max);
var act = ['tanh','maxout','relu'][randi(0,3)];
if(randf(0,1)<0.5) {
var dp = Math.random();
layer_defs.push({type:'fc', num_neurons: ni, activation: act, drop_prob: dp});
} else {
layer_defs.push({type:'fc', num_neurons: ni, activation: act});
}
}
layer_defs.push({type:'softmax', num_classes: num_classes});
var net = new Net();
net.makeLayers(layer_defs);
// sample training hyperparameters
var bs = randi(this.batch_size_min, this.batch_size_max); // batch size
var l2 = Math.pow(10, randf(this.l2_decay_min, this.l2_decay_max)); // l2 weight decay
var lr = Math.pow(10, randf(this.learning_rate_min, this.learning_rate_max)); // learning rate
var mom = randf(this.momentum_min, this.momentum_max); // momentum. Lets just use 0.9, works okay usually ;p
var tp = randf(0,1); // trainer type
var trainer_def;
if(tp<0.33) {
trainer_def = {method:'adadelta', batch_size:bs, l2_decay:l2};
} else if(tp<0.66) {
trainer_def = {method:'adagrad', learning_rate: lr, batch_size:bs, l2_decay:l2};
} else {
trainer_def = {method:'sgd', learning_rate: lr, momentum: mom, batch_size:bs, l2_decay:l2};
}
var trainer = new Trainer(net, trainer_def);
var cand = {};
cand.acc = [];
cand.accv = 0; // this will maintained as sum(acc) for convenience
cand.layer_defs = layer_defs;
cand.trainer_def = trainer_def;
cand.net = net;
cand.trainer = trainer;
return cand;
},
// sets this.candidates with this.num_candidates candidate nets
sampleCandidates: function() {
this.candidates = []; // flush, if any
for(var i=0;i<this.num_candidates;i++) {
var cand = this.sampleCandidate();
this.candidates.push(cand);
}
},
step: function() {
// run an example through current candidate
this.iter++;
// step all candidates on a random data point
var fold = this.folds[this.foldix]; // active fold
var dataix = fold.train_ix[randi(0, fold.train_ix.length)];
for(var k=0;k<this.candidates.length;k++) {
var x = this.data[dataix];
var l = this.labels[dataix];
this.candidates[k].trainer.train(x, l);
}
// process consequences: sample new folds, or candidates
var lastiter = this.num_epochs * fold.train_ix.length;
if(this.iter >= lastiter) {
// finished evaluation of this fold. Get final validation
// accuracies, record them, and go on to next fold.
var val_acc = this.evalValErrors();
for(var k=0;k<this.candidates.length;k++) {
var c = this.candidates[k];
c.acc.push(val_acc[k]);
c.accv += val_acc[k];
}
this.iter = 0; // reset step number
this.foldix++; // increment fold
if(this.finish_fold_callback !== null) {
this.finish_fold_callback();
}
if(this.foldix >= this.folds.length) {
// we finished all folds as well! Record these candidates
// and sample new ones to evaluate.
for(var k=0;k<this.candidates.length;k++) {
this.evaluated_candidates.push(this.candidates[k]);
}
// sort evaluated candidates according to accuracy achieved
this.evaluated_candidates.sort(function(a, b) {
return (a.accv / a.acc.length)
> (b.accv / b.acc.length)
? -1 : 1;
});
// and clip only to the top few ones (lets place limit at 3*ensemble_size)
// otherwise there are concerns with keeping these all in memory
// if MagicNet is being evaluated for a very long time
if(this.evaluated_candidates.length > 3 * this.ensemble_size) {
this.evaluated_candidates = this.evaluated_candidates.slice(0, 3 * this.ensemble_size);
}
if(this.finish_batch_callback !== null) {
this.finish_batch_callback();
}
this.sampleCandidates(); // begin with new candidates
this.foldix = 0; // reset this
} else {
// we will go on to another fold. reset all candidates nets
for(var k=0;k<this.candidates.length;k++) {
var c = this.candidates[k];
var net = new Net();
net.makeLayers(c.layer_defs);
var trainer = new Trainer(net, c.trainer_def);
c.net = net;
c.trainer = trainer;
}
}
}
},
evalValErrors: function() {
// evaluate candidates on validation data and return performance of current networks
// as simple list
var vals = [];
var fold = this.folds[this.foldix]; // active fold
for(var k=0;k<this.candidates.length;k++) {
var net = this.candidates[k].net;
var v = 0.0;
for(var q=0;q<fold.test_ix.length;q++) {
var x = this.data[fold.test_ix[q]];
var l = this.labels[fold.test_ix[q]];
net.forward(x);
var yhat = net.getPrediction();
v += (yhat === l ? 1.0 : 0.0); // 0 1 loss
}
v /= fold.test_ix.length; // normalize
vals.push(v);
}
return vals;
},
// returns prediction scores for given test data point, as Vol
// uses an averaged prediction from the best ensemble_size models
// x is a Vol.
predict_soft: function(data) {
// forward prop the best networks
// and accumulate probabilities at last layer into a an output Vol
var eval_candidates = [];
var nv = 0;
if(this.evaluated_candidates.length === 0) {
// not sure what to do here, first batch of nets hasnt evaluated yet
// lets just predict with current candidates.
nv = this.candidates.length;
eval_candidates = this.candidates;
} else {
// forward prop the best networks from evaluated_candidates
nv = Math.min(this.ensemble_size, this.evaluated_candidates.length);
eval_candidates = this.evaluated_candidates
}
// forward nets of all candidates and average the predictions
var xout, n;
for(var j=0;j<nv;j++) {
var net = eval_candidates[j].net;
var x = net.forward(data);
if(j===0) {
xout = x;
n = x.w.length;
} else {
// add it on
for(var d=0;d<n;d++) {
xout.w[d] += x.w[d];
}
}
}
// produce average
for(var d=0;d<n;d++) {
xout.w[d] /= nv;
}
return xout;
},
predict: function(data) {
var xout = this.predict_soft(data);
if(xout.w.length !== 0) {
var stats = maxmin(xout.w);
var predicted_label = stats.maxi;
} else {
var predicted_label = -1; // error out
}
return predicted_label;
},
toJSON: function() {
// dump the top ensemble_size networks as a list
var nv = Math.min(this.ensemble_size, this.evaluated_candidates.length);
var json = {};
json.nets = [];
for(var i=0;i<nv;i++) {
json.nets.push(this.evaluated_candidates[i].net.toJSON());
}
return json;
},
fromJSON: function(json) {
this.ensemble_size = json.nets.length;
this.evaluated_candidates = [];
for(var i=0;i<this.ensemble_size;i++) {
var net = new Net();
net.fromJSON(json.nets[i]);
var dummy_candidate = {};
dummy_candidate.net = net;
this.evaluated_candidates.push(dummy_candidate);
}
},
// callback functions
// called when a fold is finished, while evaluating a batch
onFinishFold: function(f) { this.finish_fold_callback = f; },
// called when a batch of candidates has finished evaluating
onFinishBatch: function(f) { this.finish_batch_callback = f; }
};
convnet.MagicNet = MagicNet;