2160 lines
72 KiB
JavaScript
2160 lines
72 KiB
JavaScript
|
|
/*** https://github.com/karpathy/convnetjs ***/
|
|
|
|
var convnet={REVISION: 'ALPHA'}
|
|
module.exports=convnet;
|
|
"use strict";
|
|
|
|
/*** convnet_util ***/
|
|
// Random number utilities
|
|
var return_v = false;
|
|
var v_val = 0.0;
|
|
var gaussRandom = function() {
|
|
if(return_v) {
|
|
return_v = false;
|
|
return v_val;
|
|
}
|
|
var u = 2*Math.random()-1;
|
|
var v = 2*Math.random()-1;
|
|
var r = u*u + v*v;
|
|
if(r == 0 || r > 1) return gaussRandom();
|
|
var c = Math.sqrt(-2*Math.log(r)/r);
|
|
v_val = v*c; // cache this
|
|
return_v = true;
|
|
return u*c;
|
|
}
|
|
var randf = function(a, b) { return Math.random()*(b-a)+a; }
|
|
var randi = function(a, b) { return Math.floor(Math.random()*(b-a)+a); }
|
|
var randn = function(mu, std){ return mu+gaussRandom()*std; }
|
|
|
|
// Array utilities
|
|
var zeros = function(n) {
|
|
if(typeof(n)==='undefined' || isNaN(n)) { return []; }
|
|
if(typeof ArrayBuffer === 'undefined') {
|
|
// lacking browser support
|
|
var arr = new Array(n);
|
|
for(var i=0;i<n;i++) { arr[i]= 0; }
|
|
return arr;
|
|
} else {
|
|
return new Float64Array(n);
|
|
}
|
|
}
|
|
|
|
var arrContains = function(arr, elt) {
|
|
for(var i=0,n=arr.length;i<n;i++) {
|
|
if(arr[i]===elt) return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
var arrUnique = function(arr) {
|
|
var b = [];
|
|
for(var i=0,n=arr.length;i<n;i++) {
|
|
if(!arrContains(b, arr[i])) {
|
|
b.push(arr[i]);
|
|
}
|
|
}
|
|
return b;
|
|
}
|
|
|
|
// return max and min of a given non-empty array.
|
|
var maxmin = function(w) {
|
|
if(w.length === 0) { return {}; } // ... ;s
|
|
var maxv = w[0];
|
|
var minv = w[0];
|
|
var maxi = 0;
|
|
var mini = 0;
|
|
var n = w.length;
|
|
for(var i=1;i<n;i++) {
|
|
if(w[i] > maxv) { maxv = w[i]; maxi = i; }
|
|
if(w[i] < minv) { minv = w[i]; mini = i; }
|
|
}
|
|
return {maxi: maxi, maxv: maxv, mini: mini, minv: minv, dv:maxv-minv};
|
|
}
|
|
|
|
// create random permutation of numbers, in range [0...n-1]
|
|
var randperm = function(n) {
|
|
var i = n,
|
|
j = 0,
|
|
temp;
|
|
var array = [];
|
|
for(var q=0;q<n;q++)array[q]=q;
|
|
while (i--) {
|
|
j = Math.floor(Math.random() * (i+1));
|
|
temp = array[i];
|
|
array[i] = array[j];
|
|
array[j] = temp;
|
|
}
|
|
return array;
|
|
}
|
|
|
|
// sample from list lst according to probabilities in list probs
|
|
// the two lists are of same size, and probs adds up to 1
|
|
var weightedSample = function(lst, probs) {
|
|
var p = randf(0, 1.0);
|
|
var cumprob = 0.0;
|
|
for(var k=0,n=lst.length;k<n;k++) {
|
|
cumprob += probs[k];
|
|
if(p < cumprob) { return lst[k]; }
|
|
}
|
|
}
|
|
|
|
// syntactic sugar function for getting default parameter values
|
|
var getopt = function(opt, field_name, default_value) {
|
|
if(typeof field_name === 'string') {
|
|
// case of single string
|
|
return (typeof opt[field_name] !== 'undefined') ? opt[field_name] : default_value;
|
|
} else {
|
|
// assume we are given a list of string instead
|
|
var ret = default_value;
|
|
for(var i=0;i<field_name.length;i++) {
|
|
var f = field_name[i];
|
|
if (typeof opt[f] !== 'undefined') {
|
|
ret = opt[f]; // overwrite return value
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
function assert(condition, message) {
|
|
if (!condition) {
|
|
message = message || "Assertion failed";
|
|
if (typeof Error !== "undefined") {
|
|
throw new Error(message);
|
|
}
|
|
throw message; // Fallback
|
|
}
|
|
}
|
|
|
|
convnet.randf = randf;
|
|
convnet.randi = randi;
|
|
convnet.randn = randn;
|
|
convnet.zeros = zeros;
|
|
convnet.maxmin = maxmin;
|
|
convnet.randperm = randperm;
|
|
convnet.weightedSample = weightedSample;
|
|
convnet.arrUnique = arrUnique;
|
|
convnet.arrContains = arrContains;
|
|
convnet.getopt = getopt;
|
|
convnet.assert = assert;
|
|
|
|
/*** convnet_vol ***/
|
|
// Vol is the basic building block of all data in a net.
|
|
// it is essentially just a 3D volume of numbers, with a
|
|
// width (sx), height (sy), and depth (depth).
|
|
// it is used to hold data for all filters, all volumes,
|
|
// all weights, and also stores all gradients w.r.t.
|
|
// the data. c is optionally a value to initialize the volume
|
|
// with. If c is missing, fills the Vol with random numbers.
|
|
var Vol = function(sx, sy, depth, c) {
|
|
// this is how you check if a variable is an array. Oh, Javascript :)
|
|
if(Object.prototype.toString.call(sx) === '[object Array]') {
|
|
// we were given a list in sx, assume 1D volume and fill it up
|
|
this.sx = 1;
|
|
this.sy = 1;
|
|
this.depth = sx.length;
|
|
// we have to do the following copy because we want to use
|
|
// fast typed arrays, not an ordinary javascript array
|
|
this.w = convnet.zeros(this.depth);
|
|
this.dw = convnet.zeros(this.depth);
|
|
for(var i=0;i<this.depth;i++) {
|
|
this.w[i] = sx[i];
|
|
}
|
|
} else {
|
|
// we were given dimensions of the vol
|
|
this.sx = sx;
|
|
this.sy = sy;
|
|
this.depth = depth;
|
|
var n = sx*sy*depth;
|
|
this.w = convnet.zeros(n);
|
|
this.dw = convnet.zeros(n);
|
|
if(typeof c === 'undefined') {
|
|
// weight normalization is done to equalize the output
|
|
// variance of every neuron, otherwise neurons with a lot
|
|
// of incoming connections have outputs of larger variance
|
|
var scale = Math.sqrt(1.0/(sx*sy*depth));
|
|
for(var i=0;i<n;i++) {
|
|
this.w[i] = convnet.randn(0.0, scale);
|
|
}
|
|
} else {
|
|
for(var i=0;i<n;i++) {
|
|
this.w[i] = c;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
Vol.prototype = {
|
|
get: function(x, y, d) {
|
|
var ix=((this.sx * y)+x)*this.depth+d;
|
|
return this.w[ix];
|
|
},
|
|
set: function(x, y, d, v) {
|
|
var ix=((this.sx * y)+x)*this.depth+d;
|
|
this.w[ix] = v;
|
|
},
|
|
add: function(x, y, d, v) {
|
|
var ix=((this.sx * y)+x)*this.depth+d;
|
|
this.w[ix] += v;
|
|
},
|
|
get_grad: function(x, y, d) {
|
|
var ix = ((this.sx * y)+x)*this.depth+d;
|
|
return this.dw[ix];
|
|
},
|
|
set_grad: function(x, y, d, v) {
|
|
var ix = ((this.sx * y)+x)*this.depth+d;
|
|
this.dw[ix] = v;
|
|
},
|
|
add_grad: function(x, y, d, v) {
|
|
var ix = ((this.sx * y)+x)*this.depth+d;
|
|
this.dw[ix] += v;
|
|
},
|
|
cloneAndZero: function() { return new Vol(this.sx, this.sy, this.depth, 0.0)},
|
|
clone: function() {
|
|
var V = new Vol(this.sx, this.sy, this.depth, 0.0);
|
|
var n = this.w.length;
|
|
for(var i=0;i<n;i++) { V.w[i] = this.w[i]; }
|
|
return V;
|
|
},
|
|
addFrom: function(V) { for(var k=0;k<this.w.length;k++) { this.w[k] += V.w[k]; }},
|
|
addFromScaled: function(V, a) { for(var k=0;k<this.w.length;k++) { this.w[k] += a*V.w[k]; }},
|
|
setConst: function(a) { for(var k=0;k<this.w.length;k++) { this.w[k] = a; }},
|
|
|
|
toJSON: function() {
|
|
// todo: we may want to only save d most significant digits to save space
|
|
var json = {}
|
|
json.sx = this.sx;
|
|
json.sy = this.sy;
|
|
json.depth = this.depth;
|
|
json.w = this.w;
|
|
return json;
|
|
// we wont back up gradients to save space
|
|
},
|
|
fromJSON: function(json) {
|
|
this.sx = json.sx;
|
|
this.sy = json.sy;
|
|
this.depth = json.depth;
|
|
|
|
var n = this.sx*this.sy*this.depth;
|
|
this.w = convnet.zeros(n);
|
|
this.dw = convnet.zeros(n);
|
|
// copy over the elements.
|
|
for(var i=0;i<n;i++) {
|
|
this.w[i] = json.w[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
convnet.Vol = Vol;
|
|
|
|
/*** convnet_vol_util ***/
|
|
var Vol = convnet.Vol; // convenience
|
|
|
|
// Volume utilities
|
|
// intended for use with data augmentation
|
|
// crop is the size of output
|
|
// dx,dy are offset wrt incoming volume, of the shift
|
|
// fliplr is boolean on whether we also want to flip left<->right
|
|
var augment = function(V, crop, dx, dy, fliplr) {
|
|
// note assumes square outputs of size crop x crop
|
|
if(typeof(fliplr)==='undefined') var fliplr = false;
|
|
if(typeof(dx)==='undefined') var dx = convnet.randi(0, V.sx - crop);
|
|
if(typeof(dy)==='undefined') var dy = convnet.randi(0, V.sy - crop);
|
|
|
|
// randomly sample a crop in the input volume
|
|
var W;
|
|
if(crop !== V.sx || dx!==0 || dy!==0) {
|
|
W = new Vol(crop, crop, V.depth, 0.0);
|
|
for(var x=0;x<crop;x++) {
|
|
for(var y=0;y<crop;y++) {
|
|
if(x+dx<0 || x+dx>=V.sx || y+dy<0 || y+dy>=V.sy) continue; // oob
|
|
for(var d=0;d<V.depth;d++) {
|
|
W.set(x,y,d,V.get(x+dx,y+dy,d)); // copy data over
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
W = V;
|
|
}
|
|
|
|
if(fliplr) {
|
|
// flip volume horziontally
|
|
var W2 = W.cloneAndZero();
|
|
for(var x=0;x<W.sx;x++) {
|
|
for(var y=0;y<W.sy;y++) {
|
|
for(var d=0;d<W.depth;d++) {
|
|
W2.set(x,y,d,W.get(W.sx - x - 1,y,d)); // copy data over
|
|
}
|
|
}
|
|
}
|
|
W = W2; //swap
|
|
}
|
|
return W;
|
|
}
|
|
|
|
// img is a DOM element that contains a loaded image
|
|
// returns a Vol of size (W, H, 4). 4 is for RGBA
|
|
var img_to_vol = function(img, convert_grayscale) {
|
|
|
|
if(typeof(convert_grayscale)==='undefined') var convert_grayscale = false;
|
|
|
|
var canvas = document.createElement('canvas');
|
|
canvas.width = img.width;
|
|
canvas.height = img.height;
|
|
var ctx = canvas.getContext("2d");
|
|
|
|
// due to a Firefox bug
|
|
try {
|
|
ctx.drawImage(img, 0, 0);
|
|
} catch (e) {
|
|
if (e.name === "NS_ERROR_NOT_AVAILABLE") {
|
|
// sometimes happens, lets just abort
|
|
return false;
|
|
} else {
|
|
throw e;
|
|
}
|
|
}
|
|
|
|
try {
|
|
var img_data = ctx.getImageData(0, 0, canvas.width, canvas.height);
|
|
} catch (e) {
|
|
if(e.name === 'IndexSizeError') {
|
|
return false; // not sure what causes this sometimes but okay abort
|
|
} else {
|
|
throw e;
|
|
}
|
|
}
|
|
|
|
// prepare the input: get pixels and normalize them
|
|
var p = img_data.data;
|
|
var W = img.width;
|
|
var H = img.height;
|
|
var pv = []
|
|
for(var i=0;i<p.length;i++) {
|
|
pv.push(p[i]/255.0-0.5); // normalize image pixels to [-0.5, 0.5]
|
|
}
|
|
var x = new Vol(W, H, 4, 0.0); //input volume (image)
|
|
x.w = pv;
|
|
|
|
if(convert_grayscale) {
|
|
// flatten into depth=1 array
|
|
var x1 = new Vol(W, H, 1, 0.0);
|
|
for(var i=0;i<W;i++) {
|
|
for(var j=0;j<H;j++) {
|
|
x1.set(i,j,0,x.get(i,j,0));
|
|
}
|
|
}
|
|
x = x1;
|
|
}
|
|
|
|
return x;
|
|
}
|
|
|
|
convnet.augment = augment;
|
|
convnet.img_to_vol = img_to_vol;
|
|
|
|
|
|
/*** convnet_layers_dotproducts ***/
|
|
// This file contains all layers that do dot products with input,
|
|
// but usually in a different connectivity pattern and weight sharing
|
|
// schemes:
|
|
// - FullyConn is fully connected dot products
|
|
// - ConvLayer does convolutions (so weight sharing spatially)
|
|
// putting them together in one file because they are very similar
|
|
var ConvLayer = function(opt) {
|
|
var opt = opt || {};
|
|
|
|
// required
|
|
this.out_depth = opt.filters;
|
|
this.sx = opt.sx; // filter size. Should be odd if possible, it's cleaner.
|
|
this.in_depth = opt.in_depth;
|
|
this.in_sx = opt.in_sx;
|
|
this.in_sy = opt.in_sy;
|
|
|
|
// optional
|
|
this.sy = typeof opt.sy !== 'undefined' ? opt.sy : this.sx;
|
|
this.stride = typeof opt.stride !== 'undefined' ? opt.stride : 1; // stride at which we apply filters to input volume
|
|
this.pad = typeof opt.pad !== 'undefined' ? opt.pad : 0; // amount of 0 padding to add around borders of input volume
|
|
this.l1_decay_mul = typeof opt.l1_decay_mul !== 'undefined' ? opt.l1_decay_mul : 0.0;
|
|
this.l2_decay_mul = typeof opt.l2_decay_mul !== 'undefined' ? opt.l2_decay_mul : 1.0;
|
|
|
|
// computed
|
|
// note we are doing floor, so if the strided convolution of the filter doesnt fit into the input
|
|
// volume exactly, the output volume will be trimmed and not contain the (incomplete) computed
|
|
// final application.
|
|
this.out_sx = Math.floor((this.in_sx + this.pad * 2 - this.sx) / this.stride + 1);
|
|
this.out_sy = Math.floor((this.in_sy + this.pad * 2 - this.sy) / this.stride + 1);
|
|
this.layer_type = 'conv';
|
|
|
|
// initializations
|
|
var bias = typeof opt.bias_pref !== 'undefined' ? opt.bias_pref : 0.0;
|
|
this.filters = [];
|
|
for(var i=0;i<this.out_depth;i++) { this.filters.push(new Vol(this.sx, this.sy, this.in_depth)); }
|
|
this.biases = new Vol(1, 1, this.out_depth, bias);
|
|
}
|
|
ConvLayer.prototype = {
|
|
forward: function(V, is_training) {
|
|
// optimized code by @mdda that achieves 2x speedup over previous version
|
|
|
|
this.in_act = V;
|
|
var A = new Vol(this.out_sx |0, this.out_sy |0, this.out_depth |0, 0.0);
|
|
|
|
var V_sx = V.sx |0;
|
|
var V_sy = V.sy |0;
|
|
var xy_stride = this.stride |0;
|
|
|
|
for(var d=0;d<this.out_depth;d++) {
|
|
var f = this.filters[d];
|
|
var x = -this.pad |0;
|
|
var y = -this.pad |0;
|
|
for(var ay=0; ay<this.out_sy; y+=xy_stride,ay++) { // xy_stride
|
|
x = -this.pad |0;
|
|
for(var ax=0; ax<this.out_sx; x+=xy_stride,ax++) { // xy_stride
|
|
|
|
// convolve centered at this particular location
|
|
var a = 0.0;
|
|
for(var fy=0;fy<f.sy;fy++) {
|
|
var oy = y+fy; // coordinates in the original input array coordinates
|
|
for(var fx=0;fx<f.sx;fx++) {
|
|
var ox = x+fx;
|
|
if(oy>=0 && oy<V_sy && ox>=0 && ox<V_sx) {
|
|
for(var fd=0;fd<f.depth;fd++) {
|
|
// avoid function call overhead (x2) for efficiency, compromise modularity :(
|
|
a += f.w[((f.sx * fy)+fx)*f.depth+fd] * V.w[((V_sx * oy)+ox)*V.depth+fd];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
a += this.biases.w[d];
|
|
A.set(ax, ay, d, a);
|
|
}
|
|
}
|
|
}
|
|
this.out_act = A;
|
|
return this.out_act;
|
|
},
|
|
backward: function() {
|
|
|
|
var V = this.in_act;
|
|
V.dw = convnet.zeros(V.w.length); // zero out gradient wrt bottom data, we're about to fill it
|
|
|
|
var V_sx = V.sx |0;
|
|
var V_sy = V.sy |0;
|
|
var xy_stride = this.stride |0;
|
|
|
|
for(var d=0;d<this.out_depth;d++) {
|
|
var f = this.filters[d];
|
|
var x = -this.pad |0;
|
|
var y = -this.pad |0;
|
|
for(var ay=0; ay<this.out_sy; y+=xy_stride,ay++) { // xy_stride
|
|
x = -this.pad |0;
|
|
for(var ax=0; ax<this.out_sx; x+=xy_stride,ax++) { // xy_stride
|
|
|
|
// convolve centered at this particular location
|
|
var chain_grad = this.out_act.get_grad(ax,ay,d); // gradient from above, from chain rule
|
|
for(var fy=0;fy<f.sy;fy++) {
|
|
var oy = y+fy; // coordinates in the original input array coordinates
|
|
for(var fx=0;fx<f.sx;fx++) {
|
|
var ox = x+fx;
|
|
if(oy>=0 && oy<V_sy && ox>=0 && ox<V_sx) {
|
|
for(var fd=0;fd<f.depth;fd++) {
|
|
// avoid function call overhead (x2) for efficiency, compromise modularity :(
|
|
var ix1 = ((V_sx * oy)+ox)*V.depth+fd;
|
|
var ix2 = ((f.sx * fy)+fx)*f.depth+fd;
|
|
f.dw[ix2] += V.w[ix1]*chain_grad;
|
|
V.dw[ix1] += f.w[ix2]*chain_grad;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
this.biases.dw[d] += chain_grad;
|
|
}
|
|
}
|
|
}
|
|
},
|
|
getParamsAndGrads: function() {
|
|
var response = [];
|
|
for(var i=0;i<this.out_depth;i++) {
|
|
response.push({params: this.filters[i].w, grads: this.filters[i].dw, l2_decay_mul: this.l2_decay_mul, l1_decay_mul: this.l1_decay_mul});
|
|
}
|
|
response.push({params: this.biases.w, grads: this.biases.dw, l1_decay_mul: 0.0, l2_decay_mul: 0.0});
|
|
return response;
|
|
},
|
|
toJSON: function() {
|
|
var json = {};
|
|
json.sx = this.sx; // filter size in x, y dims
|
|
json.sy = this.sy;
|
|
json.stride = this.stride;
|
|
json.in_depth = this.in_depth;
|
|
json.out_depth = this.out_depth;
|
|
json.out_sx = this.out_sx;
|
|
json.out_sy = this.out_sy;
|
|
json.layer_type = this.layer_type;
|
|
json.l1_decay_mul = this.l1_decay_mul;
|
|
json.l2_decay_mul = this.l2_decay_mul;
|
|
json.pad = this.pad;
|
|
json.filters = [];
|
|
for(var i=0;i<this.filters.length;i++) {
|
|
json.filters.push(this.filters[i].toJSON());
|
|
}
|
|
json.biases = this.biases.toJSON();
|
|
return json;
|
|
},
|
|
fromJSON: function(json) {
|
|
this.out_depth = json.out_depth;
|
|
this.out_sx = json.out_sx;
|
|
this.out_sy = json.out_sy;
|
|
this.layer_type = json.layer_type;
|
|
this.sx = json.sx; // filter size in x, y dims
|
|
this.sy = json.sy;
|
|
this.stride = json.stride;
|
|
this.in_depth = json.in_depth; // depth of input volume
|
|
this.filters = [];
|
|
this.l1_decay_mul = typeof json.l1_decay_mul !== 'undefined' ? json.l1_decay_mul : 1.0;
|
|
this.l2_decay_mul = typeof json.l2_decay_mul !== 'undefined' ? json.l2_decay_mul : 1.0;
|
|
this.pad = typeof json.pad !== 'undefined' ? json.pad : 0;
|
|
for(var i=0;i<json.filters.length;i++) {
|
|
var v = new Vol(0,0,0,0);
|
|
v.fromJSON(json.filters[i]);
|
|
this.filters.push(v);
|
|
}
|
|
this.biases = new Vol(0,0,0,0);
|
|
this.biases.fromJSON(json.biases);
|
|
}
|
|
}
|
|
|
|
var FullyConnLayer = function(opt) {
|
|
var opt = opt || {};
|
|
|
|
// required
|
|
// ok fine we will allow 'filters' as the word as well
|
|
this.out_depth = typeof opt.num_neurons !== 'undefined' ? opt.num_neurons : opt.filters;
|
|
|
|
// optional
|
|
this.l1_decay_mul = typeof opt.l1_decay_mul !== 'undefined' ? opt.l1_decay_mul : 0.0;
|
|
this.l2_decay_mul = typeof opt.l2_decay_mul !== 'undefined' ? opt.l2_decay_mul : 1.0;
|
|
|
|
// computed
|
|
this.num_inputs = opt.in_sx * opt.in_sy * opt.in_depth;
|
|
this.out_sx = 1;
|
|
this.out_sy = 1;
|
|
this.layer_type = 'fc';
|
|
|
|
// initializations
|
|
var bias = typeof opt.bias_pref !== 'undefined' ? opt.bias_pref : 0.0;
|
|
this.filters = [];
|
|
for(var i=0;i<this.out_depth ;i++) { this.filters.push(new Vol(1, 1, this.num_inputs)); }
|
|
this.biases = new Vol(1, 1, this.out_depth, bias);
|
|
}
|
|
|
|
FullyConnLayer.prototype = {
|
|
forward: function(V, is_training) {
|
|
this.in_act = V;
|
|
var A = new Vol(1, 1, this.out_depth, 0.0);
|
|
var Vw = V.w;
|
|
for(var i=0;i<this.out_depth;i++) {
|
|
var a = 0.0;
|
|
var wi = this.filters[i].w;
|
|
for(var d=0;d<this.num_inputs;d++) {
|
|
a += Vw[d] * wi[d]; // for efficiency use Vols directly for now
|
|
}
|
|
a += this.biases.w[i];
|
|
A.w[i] = a;
|
|
}
|
|
this.out_act = A;
|
|
return this.out_act;
|
|
},
|
|
backward: function() {
|
|
var V = this.in_act;
|
|
V.dw = convnet.zeros(V.w.length); // zero out the gradient in input Vol
|
|
|
|
// compute gradient wrt weights and data
|
|
for(var i=0;i<this.out_depth;i++) {
|
|
var tfi = this.filters[i];
|
|
var chain_grad = this.out_act.dw[i];
|
|
for(var d=0;d<this.num_inputs;d++) {
|
|
V.dw[d] += tfi.w[d]*chain_grad; // grad wrt input data
|
|
tfi.dw[d] += V.w[d]*chain_grad; // grad wrt params
|
|
}
|
|
this.biases.dw[i] += chain_grad;
|
|
}
|
|
},
|
|
getParamsAndGrads: function() {
|
|
var response = [];
|
|
for(var i=0;i<this.out_depth;i++) {
|
|
response.push({params: this.filters[i].w, grads: this.filters[i].dw, l1_decay_mul: this.l1_decay_mul, l2_decay_mul: this.l2_decay_mul});
|
|
}
|
|
response.push({params: this.biases.w, grads: this.biases.dw, l1_decay_mul: 0.0, l2_decay_mul: 0.0});
|
|
return response;
|
|
},
|
|
toJSON: function() {
|
|
var json = {};
|
|
json.out_depth = this.out_depth;
|
|
json.out_sx = this.out_sx;
|
|
json.out_sy = this.out_sy;
|
|
json.layer_type = this.layer_type;
|
|
json.num_inputs = this.num_inputs;
|
|
json.l1_decay_mul = this.l1_decay_mul;
|
|
json.l2_decay_mul = this.l2_decay_mul;
|
|
json.filters = [];
|
|
for(var i=0;i<this.filters.length;i++) {
|
|
json.filters.push(this.filters[i].toJSON());
|
|
}
|
|
json.biases = this.biases.toJSON();
|
|
return json;
|
|
},
|
|
fromJSON: function(json) {
|
|
this.out_depth = json.out_depth;
|
|
this.out_sx = json.out_sx;
|
|
this.out_sy = json.out_sy;
|
|
this.layer_type = json.layer_type;
|
|
this.num_inputs = json.num_inputs;
|
|
this.l1_decay_mul = typeof json.l1_decay_mul !== 'undefined' ? json.l1_decay_mul : 1.0;
|
|
this.l2_decay_mul = typeof json.l2_decay_mul !== 'undefined' ? json.l2_decay_mul : 1.0;
|
|
this.filters = [];
|
|
for(var i=0;i<json.filters.length;i++) {
|
|
var v = new Vol(0,0,0,0);
|
|
v.fromJSON(json.filters[i]);
|
|
this.filters.push(v);
|
|
}
|
|
this.biases = new Vol(0,0,0,0);
|
|
this.biases.fromJSON(json.biases);
|
|
}
|
|
}
|
|
|
|
convnet.ConvLayer = ConvLayer;
|
|
convnet.FullyConnLayer = FullyConnLayer;
|
|
|
|
|
|
/*** convnet_layers_pool ***/
|
|
var PoolLayer = function(opt) {
|
|
|
|
var opt = opt || {};
|
|
|
|
// required
|
|
this.sx = opt.sx; // filter size
|
|
this.in_depth = opt.in_depth;
|
|
this.in_sx = opt.in_sx;
|
|
this.in_sy = opt.in_sy;
|
|
|
|
// optional
|
|
this.sy = typeof opt.sy !== 'undefined' ? opt.sy : this.sx;
|
|
this.stride = typeof opt.stride !== 'undefined' ? opt.stride : 2;
|
|
this.pad = typeof opt.pad !== 'undefined' ? opt.pad : 0; // amount of 0 padding to add around borders of input volume
|
|
|
|
// computed
|
|
this.out_depth = this.in_depth;
|
|
this.out_sx = Math.floor((this.in_sx + this.pad * 2 - this.sx) / this.stride + 1);
|
|
this.out_sy = Math.floor((this.in_sy + this.pad * 2 - this.sy) / this.stride + 1);
|
|
this.layer_type = 'pool';
|
|
// store switches for x,y coordinates for where the max comes from, for each output neuron
|
|
this.switchx = convnet.zeros(this.out_sx*this.out_sy*this.out_depth);
|
|
this.switchy = convnet.zeros(this.out_sx*this.out_sy*this.out_depth);
|
|
}
|
|
|
|
PoolLayer.prototype = {
|
|
forward: function(V, is_training) {
|
|
this.in_act = V;
|
|
|
|
var A = new Vol(this.out_sx, this.out_sy, this.out_depth, 0.0);
|
|
|
|
var n=0; // a counter for switches
|
|
for(var d=0;d<this.out_depth;d++) {
|
|
var x = -this.pad;
|
|
var y = -this.pad;
|
|
for(var ax=0; ax<this.out_sx; x+=this.stride,ax++) {
|
|
y = -this.pad;
|
|
for(var ay=0; ay<this.out_sy; y+=this.stride,ay++) {
|
|
|
|
// convolve centered at this particular location
|
|
var a = -99999; // hopefully small enough ;\
|
|
var winx=-1,winy=-1;
|
|
for(var fx=0;fx<this.sx;fx++) {
|
|
for(var fy=0;fy<this.sy;fy++) {
|
|
var oy = y+fy;
|
|
var ox = x+fx;
|
|
if(oy>=0 && oy<V.sy && ox>=0 && ox<V.sx) {
|
|
var v = V.get(ox, oy, d);
|
|
// perform max pooling and store pointers to where
|
|
// the max came from. This will speed up backprop
|
|
// and can help make nice visualizations in future
|
|
if(v > a) { a = v; winx=ox; winy=oy;}
|
|
}
|
|
}
|
|
}
|
|
this.switchx[n] = winx;
|
|
this.switchy[n] = winy;
|
|
n++;
|
|
A.set(ax, ay, d, a);
|
|
}
|
|
}
|
|
}
|
|
this.out_act = A;
|
|
return this.out_act;
|
|
},
|
|
backward: function() {
|
|
// pooling layers have no parameters, so simply compute
|
|
// gradient wrt data here
|
|
var V = this.in_act;
|
|
V.dw = convnet.zeros(V.w.length); // zero out gradient wrt data
|
|
var A = this.out_act; // computed in forward pass
|
|
|
|
var n = 0;
|
|
for(var d=0;d<this.out_depth;d++) {
|
|
var x = -this.pad;
|
|
var y = -this.pad;
|
|
for(var ax=0; ax<this.out_sx; x+=this.stride,ax++) {
|
|
y = -this.pad;
|
|
for(var ay=0; ay<this.out_sy; y+=this.stride,ay++) {
|
|
|
|
var chain_grad = this.out_act.get_grad(ax,ay,d);
|
|
V.add_grad(this.switchx[n], this.switchy[n], d, chain_grad);
|
|
n++;
|
|
|
|
}
|
|
}
|
|
}
|
|
},
|
|
getParamsAndGrads: function() {
|
|
return [];
|
|
},
|
|
toJSON: function() {
|
|
var json = {};
|
|
json.sx = this.sx;
|
|
json.sy = this.sy;
|
|
json.stride = this.stride;
|
|
json.in_depth = this.in_depth;
|
|
json.out_depth = this.out_depth;
|
|
json.out_sx = this.out_sx;
|
|
json.out_sy = this.out_sy;
|
|
json.layer_type = this.layer_type;
|
|
json.pad = this.pad;
|
|
return json;
|
|
},
|
|
fromJSON: function(json) {
|
|
this.out_depth = json.out_depth;
|
|
this.out_sx = json.out_sx;
|
|
this.out_sy = json.out_sy;
|
|
this.layer_type = json.layer_type;
|
|
this.sx = json.sx;
|
|
this.sy = json.sy;
|
|
this.stride = json.stride;
|
|
this.in_depth = json.in_depth;
|
|
this.pad = typeof json.pad !== 'undefined' ? json.pad : 0; // backwards compatibility
|
|
this.switchx = convnet.zeros(this.out_sx*this.out_sy*this.out_depth); // need to re-init these appropriately
|
|
this.switchy = convnet.zeros(this.out_sx*this.out_sy*this.out_depth);
|
|
}
|
|
}
|
|
|
|
convnet.PoolLayer = PoolLayer;
|
|
|
|
|
|
/*** convnet_layers_input ***/
|
|
var getopt = convnet.getopt;
|
|
|
|
var InputLayer = function(opt) {
|
|
var opt = opt || {};
|
|
|
|
// required: depth
|
|
this.out_depth = getopt(opt, ['out_depth', 'depth'], 0);
|
|
|
|
// optional: default these dimensions to 1
|
|
this.out_sx = getopt(opt, ['out_sx', 'sx', 'width'], 1);
|
|
this.out_sy = getopt(opt, ['out_sy', 'sy', 'height'], 1);
|
|
|
|
// computed
|
|
this.layer_type = 'input';
|
|
}
|
|
InputLayer.prototype = {
|
|
forward: function(V, is_training) {
|
|
this.in_act = V;
|
|
this.out_act = V;
|
|
return this.out_act; // simply identity function for now
|
|
},
|
|
backward: function() { },
|
|
getParamsAndGrads: function() {
|
|
return [];
|
|
},
|
|
toJSON: function() {
|
|
var json = {};
|
|
json.out_depth = this.out_depth;
|
|
json.out_sx = this.out_sx;
|
|
json.out_sy = this.out_sy;
|
|
json.layer_type = this.layer_type;
|
|
return json;
|
|
},
|
|
fromJSON: function(json) {
|
|
this.out_depth = json.out_depth;
|
|
this.out_sx = json.out_sx;
|
|
this.out_sy = json.out_sy;
|
|
this.layer_type = json.layer_type;
|
|
}
|
|
}
|
|
|
|
convnet.InputLayer = InputLayer;
|
|
|
|
|
|
/*** convnet_layers_loss ***/
|
|
// Layers that implement a loss. Currently these are the layers that
|
|
// can initiate a backward() pass. In future we probably want a more
|
|
// flexible system that can accomodate multiple losses to do multi-task
|
|
// learning, and stuff like that. But for now, one of the layers in this
|
|
// file must be the final layer in a Net.
|
|
|
|
// This is a classifier, with N discrete classes from 0 to N-1
|
|
// it gets a stream of N incoming numbers and computes the softmax
|
|
// function (exponentiate and normalize to sum to 1 as probabilities should)
|
|
var SoftmaxLayer = function(opt) {
|
|
var opt = opt || {};
|
|
|
|
// computed
|
|
this.num_inputs = opt.in_sx * opt.in_sy * opt.in_depth;
|
|
this.out_depth = this.num_inputs;
|
|
this.out_sx = 1;
|
|
this.out_sy = 1;
|
|
this.layer_type = 'softmax';
|
|
}
|
|
|
|
SoftmaxLayer.prototype = {
|
|
forward: function(V, is_training) {
|
|
this.in_act = V;
|
|
|
|
var A = new Vol(1, 1, this.out_depth, 0.0);
|
|
|
|
// compute max activation
|
|
var as = V.w;
|
|
var amax = V.w[0];
|
|
for(var i=1;i<this.out_depth;i++) {
|
|
if(as[i] > amax) amax = as[i];
|
|
}
|
|
|
|
// compute exponentials (carefully to not blow up)
|
|
var es = convnet.zeros(this.out_depth);
|
|
var esum = 0.0;
|
|
for(var i=0;i<this.out_depth;i++) {
|
|
var e = Math.exp(as[i] - amax);
|
|
esum += e;
|
|
es[i] = e;
|
|
}
|
|
|
|
// normalize and output to sum to one
|
|
for(var i=0;i<this.out_depth;i++) {
|
|
es[i] /= esum;
|
|
A.w[i] = es[i];
|
|
}
|
|
|
|
this.es = es; // save these for backprop
|
|
this.out_act = A;
|
|
return this.out_act;
|
|
},
|
|
backward: function(y) {
|
|
|
|
// compute and accumulate gradient wrt weights and bias of this layer
|
|
var x = this.in_act;
|
|
x.dw = convnet.zeros(x.w.length); // zero out the gradient of input Vol
|
|
|
|
for(var i=0;i<this.out_depth;i++) {
|
|
var indicator = i === y ? 1.0 : 0.0;
|
|
var mul = -(indicator - this.es[i]);
|
|
x.dw[i] = mul;
|
|
}
|
|
|
|
// loss is the class negative log likelihood
|
|
return -Math.log(this.es[y]);
|
|
},
|
|
getParamsAndGrads: function() {
|
|
return [];
|
|
},
|
|
toJSON: function() {
|
|
var json = {};
|
|
json.out_depth = this.out_depth;
|
|
json.out_sx = this.out_sx;
|
|
json.out_sy = this.out_sy;
|
|
json.layer_type = this.layer_type;
|
|
json.num_inputs = this.num_inputs;
|
|
return json;
|
|
},
|
|
fromJSON: function(json) {
|
|
this.out_depth = json.out_depth;
|
|
this.out_sx = json.out_sx;
|
|
this.out_sy = json.out_sy;
|
|
this.layer_type = json.layer_type;
|
|
this.num_inputs = json.num_inputs;
|
|
}
|
|
}
|
|
|
|
// implements an L2 regression cost layer,
|
|
// so penalizes \sum_i(||x_i - y_i||^2), where x is its input
|
|
// and y is the user-provided array of "correct" values.
|
|
var RegressionLayer = function(opt) {
|
|
var opt = opt || {};
|
|
|
|
// computed
|
|
this.num_inputs = opt.in_sx * opt.in_sy * opt.in_depth;
|
|
this.out_depth = this.num_inputs;
|
|
this.out_sx = 1;
|
|
this.out_sy = 1;
|
|
this.layer_type = 'regression';
|
|
}
|
|
|
|
RegressionLayer.prototype = {
|
|
forward: function(V, is_training) {
|
|
this.in_act = V;
|
|
this.out_act = V;
|
|
return V; // identity function
|
|
},
|
|
// y is a list here of size num_inputs
|
|
// or it can be a number if only one value is regressed
|
|
// or it can be a struct {dim: i, val: x} where we only want to
|
|
// regress on dimension i and asking it to have value x
|
|
backward: function(y) {
|
|
|
|
// compute and accumulate gradient wrt weights and bias of this layer
|
|
var x = this.in_act;
|
|
x.dw = convnet.zeros(x.w.length); // zero out the gradient of input Vol
|
|
var loss = 0.0;
|
|
if(y instanceof Array || y instanceof Float64Array) {
|
|
for(var i=0;i<this.out_depth;i++) {
|
|
var dy = x.w[i] - y[i];
|
|
x.dw[i] = dy;
|
|
loss += 0.5*dy*dy;
|
|
}
|
|
} else if(typeof y === 'number') {
|
|
// lets hope that only one number is being regressed
|
|
var dy = x.w[0] - y;
|
|
x.dw[0] = dy;
|
|
loss += 0.5*dy*dy;
|
|
} else {
|
|
// assume it is a struct with entries .dim and .val
|
|
// and we pass gradient only along dimension dim to be equal to val
|
|
var i = y.dim;
|
|
var yi = y.val;
|
|
var dy = x.w[i] - yi;
|
|
x.dw[i] = dy;
|
|
loss += 0.5*dy*dy;
|
|
}
|
|
return loss;
|
|
},
|
|
getParamsAndGrads: function() {
|
|
return [];
|
|
},
|
|
toJSON: function() {
|
|
var json = {};
|
|
json.out_depth = this.out_depth;
|
|
json.out_sx = this.out_sx;
|
|
json.out_sy = this.out_sy;
|
|
json.layer_type = this.layer_type;
|
|
json.num_inputs = this.num_inputs;
|
|
return json;
|
|
},
|
|
fromJSON: function(json) {
|
|
this.out_depth = json.out_depth;
|
|
this.out_sx = json.out_sx;
|
|
this.out_sy = json.out_sy;
|
|
this.layer_type = json.layer_type;
|
|
this.num_inputs = json.num_inputs;
|
|
}
|
|
}
|
|
|
|
var SVMLayer = function(opt) {
|
|
var opt = opt || {};
|
|
|
|
// computed
|
|
this.num_inputs = opt.in_sx * opt.in_sy * opt.in_depth;
|
|
this.out_depth = this.num_inputs;
|
|
this.out_sx = 1;
|
|
this.out_sy = 1;
|
|
this.layer_type = 'svm';
|
|
}
|
|
|
|
SVMLayer.prototype = {
|
|
forward: function(V, is_training) {
|
|
this.in_act = V;
|
|
this.out_act = V; // nothing to do, output raw scores
|
|
return V;
|
|
},
|
|
backward: function(y) {
|
|
|
|
// compute and accumulate gradient wrt weights and bias of this layer
|
|
var x = this.in_act;
|
|
x.dw = convnet.zeros(x.w.length); // zero out the gradient of input Vol
|
|
|
|
// we're using structured loss here, which means that the score
|
|
// of the ground truth should be higher than the score of any other
|
|
// class, by a margin
|
|
var yscore = x.w[y]; // score of ground truth
|
|
var margin = 1.0;
|
|
var loss = 0.0;
|
|
for(var i=0;i<this.out_depth;i++) {
|
|
if(y === i) { continue; }
|
|
var ydiff = -yscore + x.w[i] + margin;
|
|
if(ydiff > 0) {
|
|
// violating dimension, apply loss
|
|
x.dw[i] += 1;
|
|
x.dw[y] -= 1;
|
|
loss += ydiff;
|
|
}
|
|
}
|
|
|
|
return loss;
|
|
},
|
|
getParamsAndGrads: function() {
|
|
return [];
|
|
},
|
|
toJSON: function() {
|
|
var json = {};
|
|
json.out_depth = this.out_depth;
|
|
json.out_sx = this.out_sx;
|
|
json.out_sy = this.out_sy;
|
|
json.layer_type = this.layer_type;
|
|
json.num_inputs = this.num_inputs;
|
|
return json;
|
|
},
|
|
fromJSON: function(json) {
|
|
this.out_depth = json.out_depth;
|
|
this.out_sx = json.out_sx;
|
|
this.out_sy = json.out_sy;
|
|
this.layer_type = json.layer_type;
|
|
this.num_inputs = json.num_inputs;
|
|
}
|
|
}
|
|
|
|
convnet.RegressionLayer = RegressionLayer;
|
|
convnet.SoftmaxLayer = SoftmaxLayer;
|
|
convnet.SVMLayer = SVMLayer;
|
|
|
|
|
|
/*** convnet_layers_nonlinearities ***/
|
|
// Implements ReLU nonlinearity elementwise
|
|
// x -> max(0, x)
|
|
// the output is in [0, inf)
|
|
var ReluLayer = function(opt) {
|
|
var opt = opt || {};
|
|
|
|
// computed
|
|
this.out_sx = opt.in_sx;
|
|
this.out_sy = opt.in_sy;
|
|
this.out_depth = opt.in_depth;
|
|
this.layer_type = 'relu';
|
|
}
|
|
ReluLayer.prototype = {
|
|
forward: function(V, is_training) {
|
|
this.in_act = V;
|
|
var V2 = V.clone();
|
|
var N = V.w.length;
|
|
var V2w = V2.w;
|
|
for(var i=0;i<N;i++) {
|
|
if(V2w[i] < 0) V2w[i] = 0; // threshold at 0
|
|
}
|
|
this.out_act = V2;
|
|
return this.out_act;
|
|
},
|
|
backward: function() {
|
|
var V = this.in_act; // we need to set dw of this
|
|
var V2 = this.out_act;
|
|
var N = V.w.length;
|
|
V.dw = convnet.zeros(N); // zero out gradient wrt data
|
|
for(var i=0;i<N;i++) {
|
|
if(V2.w[i] <= 0) V.dw[i] = 0; // threshold
|
|
else V.dw[i] = V2.dw[i];
|
|
}
|
|
},
|
|
getParamsAndGrads: function() {
|
|
return [];
|
|
},
|
|
toJSON: function() {
|
|
var json = {};
|
|
json.out_depth = this.out_depth;
|
|
json.out_sx = this.out_sx;
|
|
json.out_sy = this.out_sy;
|
|
json.layer_type = this.layer_type;
|
|
return json;
|
|
},
|
|
fromJSON: function(json) {
|
|
this.out_depth = json.out_depth;
|
|
this.out_sx = json.out_sx;
|
|
this.out_sy = json.out_sy;
|
|
this.layer_type = json.layer_type;
|
|
}
|
|
}
|
|
|
|
// Implements Sigmoid nnonlinearity elementwise
|
|
// x -> 1/(1+e^(-x))
|
|
// so the output is between 0 and 1.
|
|
var SigmoidLayer = function(opt) {
|
|
var opt = opt || {};
|
|
|
|
// computed
|
|
this.out_sx = opt.in_sx;
|
|
this.out_sy = opt.in_sy;
|
|
this.out_depth = opt.in_depth;
|
|
this.layer_type = 'sigmoid';
|
|
}
|
|
SigmoidLayer.prototype = {
|
|
forward: function(V, is_training) {
|
|
this.in_act = V;
|
|
var V2 = V.cloneAndZero();
|
|
var N = V.w.length;
|
|
var V2w = V2.w;
|
|
var Vw = V.w;
|
|
for(var i=0;i<N;i++) {
|
|
V2w[i] = 1.0/(1.0+Math.exp(-Vw[i]));
|
|
}
|
|
this.out_act = V2;
|
|
return this.out_act;
|
|
},
|
|
backward: function() {
|
|
var V = this.in_act; // we need to set dw of this
|
|
var V2 = this.out_act;
|
|
var N = V.w.length;
|
|
V.dw = convnet.zeros(N); // zero out gradient wrt data
|
|
for(var i=0;i<N;i++) {
|
|
var v2wi = V2.w[i];
|
|
V.dw[i] = v2wi * (1.0 - v2wi) * V2.dw[i];
|
|
}
|
|
},
|
|
getParamsAndGrads: function() {
|
|
return [];
|
|
},
|
|
toJSON: function() {
|
|
var json = {};
|
|
json.out_depth = this.out_depth;
|
|
json.out_sx = this.out_sx;
|
|
json.out_sy = this.out_sy;
|
|
json.layer_type = this.layer_type;
|
|
return json;
|
|
},
|
|
fromJSON: function(json) {
|
|
this.out_depth = json.out_depth;
|
|
this.out_sx = json.out_sx;
|
|
this.out_sy = json.out_sy;
|
|
this.layer_type = json.layer_type;
|
|
}
|
|
}
|
|
|
|
// Implements Maxout nnonlinearity that computes
|
|
// x -> max(x)
|
|
// where x is a vector of size group_size. Ideally of course,
|
|
// the input size should be exactly divisible by group_size
|
|
var MaxoutLayer = function(opt) {
|
|
var opt = opt || {};
|
|
|
|
// required
|
|
this.group_size = typeof opt.group_size !== 'undefined' ? opt.group_size : 2;
|
|
|
|
// computed
|
|
this.out_sx = opt.in_sx;
|
|
this.out_sy = opt.in_sy;
|
|
this.out_depth = Math.floor(opt.in_depth / this.group_size);
|
|
this.layer_type = 'maxout';
|
|
|
|
this.switches = convnet.zeros(this.out_sx*this.out_sy*this.out_depth); // useful for backprop
|
|
}
|
|
MaxoutLayer.prototype = {
|
|
forward: function(V, is_training) {
|
|
this.in_act = V;
|
|
var N = this.out_depth;
|
|
var V2 = new Vol(this.out_sx, this.out_sy, this.out_depth, 0.0);
|
|
|
|
// optimization branch. If we're operating on 1D arrays we dont have
|
|
// to worry about keeping track of x,y,d coordinates inside
|
|
// input volumes. In convnets we do :(
|
|
if(this.out_sx === 1 && this.out_sy === 1) {
|
|
for(var i=0;i<N;i++) {
|
|
var ix = i * this.group_size; // base index offset
|
|
var a = V.w[ix];
|
|
var ai = 0;
|
|
for(var j=1;j<this.group_size;j++) {
|
|
var a2 = V.w[ix+j];
|
|
if(a2 > a) {
|
|
a = a2;
|
|
ai = j;
|
|
}
|
|
}
|
|
V2.w[i] = a;
|
|
this.switches[i] = ix + ai;
|
|
}
|
|
} else {
|
|
var n=0; // counter for switches
|
|
for(var x=0;x<V.sx;x++) {
|
|
for(var y=0;y<V.sy;y++) {
|
|
for(var i=0;i<N;i++) {
|
|
var ix = i * this.group_size;
|
|
var a = V.get(x, y, ix);
|
|
var ai = 0;
|
|
for(var j=1;j<this.group_size;j++) {
|
|
var a2 = V.get(x, y, ix+j);
|
|
if(a2 > a) {
|
|
a = a2;
|
|
ai = j;
|
|
}
|
|
}
|
|
V2.set(x,y,i,a);
|
|
this.switches[n] = ix + ai;
|
|
n++;
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
this.out_act = V2;
|
|
return this.out_act;
|
|
},
|
|
backward: function() {
|
|
var V = this.in_act; // we need to set dw of this
|
|
var V2 = this.out_act;
|
|
var N = this.out_depth;
|
|
V.dw = convnet.zeros(V.w.length); // zero out gradient wrt data
|
|
|
|
// pass the gradient through the appropriate switch
|
|
if(this.out_sx === 1 && this.out_sy === 1) {
|
|
for(var i=0;i<N;i++) {
|
|
var chain_grad = V2.dw[i];
|
|
V.dw[this.switches[i]] = chain_grad;
|
|
}
|
|
} else {
|
|
// bleh okay, lets do this the hard way
|
|
var n=0; // counter for switches
|
|
for(var x=0;x<V2.sx;x++) {
|
|
for(var y=0;y<V2.sy;y++) {
|
|
for(var i=0;i<N;i++) {
|
|
var chain_grad = V2.get_grad(x,y,i);
|
|
V.set_grad(x,y,this.switches[n],chain_grad);
|
|
n++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
getParamsAndGrads: function() {
|
|
return [];
|
|
},
|
|
toJSON: function() {
|
|
var json = {};
|
|
json.out_depth = this.out_depth;
|
|
json.out_sx = this.out_sx;
|
|
json.out_sy = this.out_sy;
|
|
json.layer_type = this.layer_type;
|
|
json.group_size = this.group_size;
|
|
return json;
|
|
},
|
|
fromJSON: function(json) {
|
|
this.out_depth = json.out_depth;
|
|
this.out_sx = json.out_sx;
|
|
this.out_sy = json.out_sy;
|
|
this.layer_type = json.layer_type;
|
|
this.group_size = json.group_size;
|
|
this.switches = convnet.zeros(this.group_size);
|
|
}
|
|
}
|
|
|
|
// a helper function, since tanh is not yet part of ECMAScript. Will be in v6.
|
|
function tanh(x) {
|
|
var y = Math.exp(2 * x);
|
|
return (y - 1) / (y + 1);
|
|
}
|
|
// Implements Tanh nnonlinearity elementwise
|
|
// x -> tanh(x)
|
|
// so the output is between -1 and 1.
|
|
var TanhLayer = function(opt) {
|
|
var opt = opt || {};
|
|
|
|
// computed
|
|
this.out_sx = opt.in_sx;
|
|
this.out_sy = opt.in_sy;
|
|
this.out_depth = opt.in_depth;
|
|
this.layer_type = 'tanh';
|
|
}
|
|
TanhLayer.prototype = {
|
|
forward: function(V, is_training) {
|
|
this.in_act = V;
|
|
var V2 = V.cloneAndZero();
|
|
var N = V.w.length;
|
|
for(var i=0;i<N;i++) {
|
|
V2.w[i] = tanh(V.w[i]);
|
|
}
|
|
this.out_act = V2;
|
|
return this.out_act;
|
|
},
|
|
backward: function() {
|
|
var V = this.in_act; // we need to set dw of this
|
|
var V2 = this.out_act;
|
|
var N = V.w.length;
|
|
V.dw = convnet.zeros(N); // zero out gradient wrt data
|
|
for(var i=0;i<N;i++) {
|
|
var v2wi = V2.w[i];
|
|
V.dw[i] = (1.0 - v2wi * v2wi) * V2.dw[i];
|
|
}
|
|
},
|
|
getParamsAndGrads: function() {
|
|
return [];
|
|
},
|
|
toJSON: function() {
|
|
var json = {};
|
|
json.out_depth = this.out_depth;
|
|
json.out_sx = this.out_sx;
|
|
json.out_sy = this.out_sy;
|
|
json.layer_type = this.layer_type;
|
|
return json;
|
|
},
|
|
fromJSON: function(json) {
|
|
this.out_depth = json.out_depth;
|
|
this.out_sx = json.out_sx;
|
|
this.out_sy = json.out_sy;
|
|
this.layer_type = json.layer_type;
|
|
}
|
|
}
|
|
|
|
convnet.TanhLayer = TanhLayer;
|
|
convnet.MaxoutLayer = MaxoutLayer;
|
|
convnet.ReluLayer = ReluLayer;
|
|
convnet.SigmoidLayer = SigmoidLayer;
|
|
|
|
|
|
|
|
|
|
/*** convnet_layers_dropout ***/
|
|
// An inefficient dropout layer
|
|
// Note this is not most efficient implementation since the layer before
|
|
// computed all these activations and now we're just going to drop them :(
|
|
// same goes for backward pass. Also, if we wanted to be efficient at test time
|
|
// we could equivalently be clever and upscale during train and copy pointers during test
|
|
// todo: make more efficient.
|
|
var DropoutLayer = function(opt) {
|
|
var opt = opt || {};
|
|
|
|
// computed
|
|
this.out_sx = opt.in_sx;
|
|
this.out_sy = opt.in_sy;
|
|
this.out_depth = opt.in_depth;
|
|
this.layer_type = 'dropout';
|
|
this.drop_prob = typeof opt.drop_prob !== 'undefined' ? opt.drop_prob : 0.5;
|
|
this.dropped = convnet.zeros(this.out_sx*this.out_sy*this.out_depth);
|
|
}
|
|
DropoutLayer.prototype = {
|
|
forward: function(V, is_training) {
|
|
this.in_act = V;
|
|
if(typeof(is_training)==='undefined') { is_training = false; } // default is prediction mode
|
|
var V2 = V.clone();
|
|
var N = V.w.length;
|
|
if(is_training) {
|
|
// do dropout
|
|
for(var i=0;i<N;i++) {
|
|
if(Math.random()<this.drop_prob) { V2.w[i]=0; this.dropped[i] = true; } // drop!
|
|
else {this.dropped[i] = false;}
|
|
}
|
|
} else {
|
|
// scale the activations during prediction
|
|
for(var i=0;i<N;i++) { V2.w[i]*=this.drop_prob; }
|
|
}
|
|
this.out_act = V2;
|
|
return this.out_act; // dummy identity function for now
|
|
},
|
|
backward: function() {
|
|
var V = this.in_act; // we need to set dw of this
|
|
var chain_grad = this.out_act;
|
|
var N = V.w.length;
|
|
V.dw = convnet.zeros(N); // zero out gradient wrt data
|
|
for(var i=0;i<N;i++) {
|
|
if(!(this.dropped[i])) {
|
|
V.dw[i] = chain_grad.dw[i]; // copy over the gradient
|
|
}
|
|
}
|
|
},
|
|
getParamsAndGrads: function() {
|
|
return [];
|
|
},
|
|
toJSON: function() {
|
|
var json = {};
|
|
json.out_depth = this.out_depth;
|
|
json.out_sx = this.out_sx;
|
|
json.out_sy = this.out_sy;
|
|
json.layer_type = this.layer_type;
|
|
json.drop_prob = this.drop_prob;
|
|
return json;
|
|
},
|
|
fromJSON: function(json) {
|
|
this.out_depth = json.out_depth;
|
|
this.out_sx = json.out_sx;
|
|
this.out_sy = json.out_sy;
|
|
this.layer_type = json.layer_type;
|
|
this.drop_prob = json.drop_prob;
|
|
}
|
|
}
|
|
|
|
convnet.DropoutLayer = DropoutLayer;
|
|
|
|
/*** convnet_layers_normailzation ***/
|
|
// a bit experimental layer for now. I think it works but I'm not 100%
|
|
// the gradient check is a bit funky. I'll look into this a bit later.
|
|
// Local Response Normalization in window, along depths of volumes
|
|
var LocalResponseNormalizationLayer = function(opt) {
|
|
var opt = opt || {};
|
|
|
|
// required
|
|
this.k = opt.k;
|
|
this.n = opt.n;
|
|
this.alpha = opt.alpha;
|
|
this.beta = opt.beta;
|
|
|
|
// computed
|
|
this.out_sx = opt.in_sx;
|
|
this.out_sy = opt.in_sy;
|
|
this.out_depth = opt.in_depth;
|
|
this.layer_type = 'lrn';
|
|
|
|
// checks
|
|
if(this.n%2 === 0) { console.log('WARNING n should be odd for LRN layer'); }
|
|
}
|
|
LocalResponseNormalizationLayer.prototype = {
|
|
forward: function(V, is_training) {
|
|
this.in_act = V;
|
|
|
|
var A = V.cloneAndZero();
|
|
this.S_cache_ = V.cloneAndZero();
|
|
var n2 = Math.floor(this.n/2);
|
|
for(var x=0;x<V.sx;x++) {
|
|
for(var y=0;y<V.sy;y++) {
|
|
for(var i=0;i<V.depth;i++) {
|
|
|
|
var ai = V.get(x,y,i);
|
|
|
|
// normalize in a window of size n
|
|
var den = 0.0;
|
|
for(var j=Math.max(0,i-n2);j<=Math.min(i+n2,V.depth-1);j++) {
|
|
var aa = V.get(x,y,j);
|
|
den += aa*aa;
|
|
}
|
|
den *= this.alpha / this.n;
|
|
den += this.k;
|
|
this.S_cache_.set(x,y,i,den); // will be useful for backprop
|
|
den = Math.pow(den, this.beta);
|
|
A.set(x,y,i,ai/den);
|
|
}
|
|
}
|
|
}
|
|
|
|
this.out_act = A;
|
|
return this.out_act; // dummy identity function for now
|
|
},
|
|
backward: function() {
|
|
// evaluate gradient wrt data
|
|
var V = this.in_act; // we need to set dw of this
|
|
V.dw = convnet.zeros(V.w.length); // zero out gradient wrt data
|
|
var A = this.out_act; // computed in forward pass
|
|
|
|
var n2 = Math.floor(this.n/2);
|
|
for(var x=0;x<V.sx;x++) {
|
|
for(var y=0;y<V.sy;y++) {
|
|
for(var i=0;i<V.depth;i++) {
|
|
|
|
var chain_grad = this.out_act.get_grad(x,y,i);
|
|
var S = this.S_cache_.get(x,y,i);
|
|
var SB = Math.pow(S, this.beta);
|
|
var SB2 = SB*SB;
|
|
|
|
// normalize in a window of size n
|
|
for(var j=Math.max(0,i-n2);j<=Math.min(i+n2,V.depth-1);j++) {
|
|
var aj = V.get(x,y,j);
|
|
var g = -aj*this.beta*Math.pow(S,this.beta-1)*this.alpha/this.n*2*aj;
|
|
if(j===i) g+= SB;
|
|
g /= SB2;
|
|
g *= chain_grad;
|
|
V.add_grad(x,y,j,g);
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|
|
},
|
|
getParamsAndGrads: function() { return []; },
|
|
toJSON: function() {
|
|
var json = {};
|
|
json.k = this.k;
|
|
json.n = this.n;
|
|
json.alpha = this.alpha; // normalize by size
|
|
json.beta = this.beta;
|
|
json.out_sx = this.out_sx;
|
|
json.out_sy = this.out_sy;
|
|
json.out_depth = this.out_depth;
|
|
json.layer_type = this.layer_type;
|
|
return json;
|
|
},
|
|
fromJSON: function(json) {
|
|
this.k = json.k;
|
|
this.n = json.n;
|
|
this.alpha = json.alpha; // normalize by size
|
|
this.beta = json.beta;
|
|
this.out_sx = json.out_sx;
|
|
this.out_sy = json.out_sy;
|
|
this.out_depth = json.out_depth;
|
|
this.layer_type = json.layer_type;
|
|
}
|
|
}
|
|
|
|
convnet.LocalResponseNormalizationLayer = LocalResponseNormalizationLayer;
|
|
|
|
|
|
|
|
/*** convnet_net ***/
|
|
var assert = convnet.assert;
|
|
|
|
// Net manages a set of layers
|
|
// For now constraints: Simple linear order of layers, first layer input last layer a cost layer
|
|
var Net = function(options) {
|
|
this.layers = [];
|
|
}
|
|
|
|
Net.prototype = {
|
|
|
|
// takes a list of layer definitions and creates the network layer objects
|
|
makeLayers: function(defs) {
|
|
|
|
// few checks
|
|
assert(defs.length >= 2, 'Error! At least one input layer and one loss layer are required.');
|
|
assert(defs[0].type === 'input', 'Error! First layer must be the input layer, to declare size of inputs');
|
|
|
|
// desugar layer_defs for adding activation, dropout layers etc
|
|
var desugar = function() {
|
|
var new_defs = [];
|
|
for(var i=0;i<defs.length;i++) {
|
|
var def = defs[i];
|
|
|
|
if(def.type==='softmax' || def.type==='svm') {
|
|
// add an fc layer here, there is no reason the user should
|
|
// have to worry about this and we almost always want to
|
|
new_defs.push({type:'fc', num_neurons: def.num_classes});
|
|
}
|
|
|
|
if(def.type==='regression') {
|
|
// add an fc layer here, there is no reason the user should
|
|
// have to worry about this and we almost always want to
|
|
new_defs.push({type:'fc', num_neurons: def.num_neurons});
|
|
}
|
|
|
|
if((def.type==='fc' || def.type==='conv')
|
|
&& typeof(def.bias_pref) === 'undefined'){
|
|
def.bias_pref = 0.0;
|
|
if(typeof def.activation !== 'undefined' && def.activation === 'relu') {
|
|
def.bias_pref = 0.1; // relus like a bit of positive bias to get gradients early
|
|
// otherwise it's technically possible that a relu unit will never turn on (by chance)
|
|
// and will never get any gradient and never contribute any computation. Dead relu.
|
|
}
|
|
}
|
|
|
|
new_defs.push(def);
|
|
|
|
if(typeof def.activation !== 'undefined') {
|
|
if(def.activation==='relu') { new_defs.push({type:'relu'}); }
|
|
else if (def.activation==='sigmoid') { new_defs.push({type:'sigmoid'}); }
|
|
else if (def.activation==='tanh') { new_defs.push({type:'tanh'}); }
|
|
else if (def.activation==='maxout') {
|
|
// create maxout activation, and pass along group size, if provided
|
|
var gs = def.group_size !== 'undefined' ? def.group_size : 2;
|
|
new_defs.push({type:'maxout', group_size:gs});
|
|
}
|
|
else { console.log('ERROR unsupported activation ' + def.activation); }
|
|
}
|
|
if(typeof def.drop_prob !== 'undefined' && def.type !== 'dropout') {
|
|
new_defs.push({type:'dropout', drop_prob: def.drop_prob});
|
|
}
|
|
|
|
}
|
|
return new_defs;
|
|
}
|
|
defs = desugar(defs);
|
|
|
|
// create the layers
|
|
this.layers = [];
|
|
for(var i=0;i<defs.length;i++) {
|
|
var def = defs[i];
|
|
if(i>0) {
|
|
var prev = this.layers[i-1];
|
|
def.in_sx = prev.out_sx;
|
|
def.in_sy = prev.out_sy;
|
|
def.in_depth = prev.out_depth;
|
|
}
|
|
|
|
switch(def.type) {
|
|
case 'fc': this.layers.push(new convnet.FullyConnLayer(def)); break;
|
|
case 'lrn': this.layers.push(new convnet.LocalResponseNormalizationLayer(def)); break;
|
|
case 'dropout': this.layers.push(new convnet.DropoutLayer(def)); break;
|
|
case 'input': this.layers.push(new convnet.InputLayer(def)); break;
|
|
case 'softmax': this.layers.push(new convnet.SoftmaxLayer(def)); break;
|
|
case 'regression': this.layers.push(new convnet.RegressionLayer(def)); break;
|
|
case 'conv': this.layers.push(new convnet.ConvLayer(def)); break;
|
|
case 'pool': this.layers.push(new convnet.PoolLayer(def)); break;
|
|
case 'relu': this.layers.push(new convnet.ReluLayer(def)); break;
|
|
case 'sigmoid': this.layers.push(new convnet.SigmoidLayer(def)); break;
|
|
case 'tanh': this.layers.push(new convnet.TanhLayer(def)); break;
|
|
case 'maxout': this.layers.push(new convnet.MaxoutLayer(def)); break;
|
|
case 'svm': this.layers.push(new convnet.SVMLayer(def)); break;
|
|
default: console.log('ERROR: UNRECOGNIZED LAYER TYPE: ' + def.type);
|
|
}
|
|
}
|
|
},
|
|
|
|
// forward prop the network.
|
|
// The trainer class passes is_training = true, but when this function is
|
|
// called from outside (not from the trainer), it defaults to prediction mode
|
|
forward: function(V, is_training) {
|
|
if(typeof(is_training) === 'undefined') is_training = false;
|
|
var act = this.layers[0].forward(V, is_training);
|
|
for(var i=1;i<this.layers.length;i++) {
|
|
act = this.layers[i].forward(act, is_training);
|
|
}
|
|
return act;
|
|
},
|
|
|
|
getCostLoss: function(V, y) {
|
|
this.forward(V, false);
|
|
var N = this.layers.length;
|
|
var loss = this.layers[N-1].backward(y);
|
|
return loss;
|
|
},
|
|
|
|
// backprop: compute gradients wrt all parameters
|
|
backward: function(y) {
|
|
var N = this.layers.length;
|
|
var loss = this.layers[N-1].backward(y); // last layer assumed to be loss layer
|
|
for(var i=N-2;i>=0;i--) { // first layer assumed input
|
|
this.layers[i].backward();
|
|
}
|
|
return loss;
|
|
},
|
|
getParamsAndGrads: function() {
|
|
// accumulate parameters and gradients for the entire network
|
|
var response = [];
|
|
for(var i=0;i<this.layers.length;i++) {
|
|
var layer_reponse = this.layers[i].getParamsAndGrads();
|
|
for(var j=0;j<layer_reponse.length;j++) {
|
|
response.push(layer_reponse[j]);
|
|
}
|
|
}
|
|
return response;
|
|
},
|
|
getPrediction: function() {
|
|
// this is a convenience function for returning the argmax
|
|
// prediction, assuming the last layer of the net is a softmax
|
|
var S = this.layers[this.layers.length-1];
|
|
assert(S.layer_type === 'softmax', 'getPrediction function assumes softmax as last layer of the net!');
|
|
|
|
var p = S.out_act.w;
|
|
var maxv = p[0];
|
|
var maxi = 0;
|
|
for(var i=1;i<p.length;i++) {
|
|
if(p[i] > maxv) { maxv = p[i]; maxi = i;}
|
|
}
|
|
return maxi; // return index of the class with highest class probability
|
|
},
|
|
toJSON: function() {
|
|
var json = {};
|
|
json.layers = [];
|
|
for(var i=0;i<this.layers.length;i++) {
|
|
json.layers.push(this.layers[i].toJSON());
|
|
}
|
|
return json;
|
|
},
|
|
fromJSON: function(json) {
|
|
this.layers = [];
|
|
for(var i=0;i<json.layers.length;i++) {
|
|
var Lj = json.layers[i]
|
|
var t = Lj.layer_type;
|
|
var L;
|
|
if(t==='input') { L = new convnet.InputLayer(); }
|
|
if(t==='relu') { L = new convnet.ReluLayer(); }
|
|
if(t==='sigmoid') { L = new convnet.SigmoidLayer(); }
|
|
if(t==='tanh') { L = new convnet.TanhLayer(); }
|
|
if(t==='dropout') { L = new convnet.DropoutLayer(); }
|
|
if(t==='conv') { L = new convnet.ConvLayer(); }
|
|
if(t==='pool') { L = new convnet.PoolLayer(); }
|
|
if(t==='lrn') { L = new convnet.LocalResponseNormalizationLayer(); }
|
|
if(t==='softmax') { L = new convnet.SoftmaxLayer(); }
|
|
if(t==='regression') { L = new convnet.RegressionLayer(); }
|
|
if(t==='fc') { L = new convnet.FullyConnLayer(); }
|
|
if(t==='maxout') { L = new convnet.MaxoutLayer(); }
|
|
if(t==='svm') { L = new convnet.SVMLayer(); }
|
|
L.fromJSON(Lj);
|
|
this.layers.push(L);
|
|
}
|
|
}
|
|
}
|
|
|
|
convnet.Net = Net;
|
|
|
|
|
|
/*** convnet_trainers ***/
|
|
var Trainer = function(net, options) {
|
|
|
|
this.net = net;
|
|
|
|
var options = options || {};
|
|
this.learning_rate = typeof options.learning_rate !== 'undefined' ? options.learning_rate : 0.01;
|
|
this.l1_decay = typeof options.l1_decay !== 'undefined' ? options.l1_decay : 0.0;
|
|
this.l2_decay = typeof options.l2_decay !== 'undefined' ? options.l2_decay : 0.0;
|
|
this.batch_size = typeof options.batch_size !== 'undefined' ? options.batch_size : 1;
|
|
this.method = typeof options.method !== 'undefined' ? options.method : 'sgd'; // sgd/adam/adagrad/adadelta/windowgrad/netsterov
|
|
|
|
this.momentum = typeof options.momentum !== 'undefined' ? options.momentum : 0.9;
|
|
this.ro = typeof options.ro !== 'undefined' ? options.ro : 0.95; // used in adadelta
|
|
this.eps = typeof options.eps !== 'undefined' ? options.eps : 1e-8; // used in adam or adadelta
|
|
this.beta1 = typeof options.beta1 !== 'undefined' ? options.beta1 : 0.9; // used in adam
|
|
this.beta2 = typeof options.beta2 !== 'undefined' ? options.beta2 : 0.999; // used in adam
|
|
|
|
this.k = 0; // iteration counter
|
|
this.gsum = []; // last iteration gradients (used for momentum calculations)
|
|
this.xsum = []; // used in adam or adadelta
|
|
|
|
// check if regression is expected
|
|
if(this.net.layers[this.net.layers.length - 1].layer_type === "regression")
|
|
this.regression = true;
|
|
else
|
|
this.regression = false;
|
|
}
|
|
|
|
Trainer.prototype = {
|
|
train: function(x, y) {
|
|
|
|
var start = new Date().getTime();
|
|
this.net.forward(x, true); // also set the flag that lets the net know we're just training
|
|
var end = new Date().getTime();
|
|
var fwd_time = end - start;
|
|
|
|
var start = new Date().getTime();
|
|
var cost_loss = this.net.backward(y);
|
|
var l2_decay_loss = 0.0;
|
|
var l1_decay_loss = 0.0;
|
|
var end = new Date().getTime();
|
|
var bwd_time = end - start;
|
|
|
|
if(this.regression && y.constructor !== Array)
|
|
console.log("Warning: a regression net requires an array as training output vector.");
|
|
|
|
this.k++;
|
|
if(this.k % this.batch_size === 0) {
|
|
|
|
var pglist = this.net.getParamsAndGrads();
|
|
|
|
// initialize lists for accumulators. Will only be done once on first iteration
|
|
if(this.gsum.length === 0 && (this.method !== 'sgd' || this.momentum > 0.0)) {
|
|
// only vanilla sgd doesnt need either lists
|
|
// momentum needs gsum
|
|
// adagrad needs gsum
|
|
// adam and adadelta needs gsum and xsum
|
|
for(var i=0;i<pglist.length;i++) {
|
|
this.gsum.push(convnet.zeros(pglist[i].params.length));
|
|
if(this.method === 'adam' || this.method === 'adadelta') {
|
|
this.xsum.push(convnet.zeros(pglist[i].params.length));
|
|
} else {
|
|
this.xsum.push([]); // conserve memory
|
|
}
|
|
}
|
|
}
|
|
|
|
// perform an update for all sets of weights
|
|
for(var i=0;i<pglist.length;i++) {
|
|
var pg = pglist[i]; // param, gradient, other options in future (custom learning rate etc)
|
|
var p = pg.params;
|
|
var g = pg.grads;
|
|
|
|
// learning rate for some parameters.
|
|
var l2_decay_mul = typeof pg.l2_decay_mul !== 'undefined' ? pg.l2_decay_mul : 1.0;
|
|
var l1_decay_mul = typeof pg.l1_decay_mul !== 'undefined' ? pg.l1_decay_mul : 1.0;
|
|
var l2_decay = this.l2_decay * l2_decay_mul;
|
|
var l1_decay = this.l1_decay * l1_decay_mul;
|
|
|
|
var plen = p.length;
|
|
for(var j=0;j<plen;j++) {
|
|
l2_decay_loss += l2_decay*p[j]*p[j]/2; // accumulate weight decay loss
|
|
l1_decay_loss += l1_decay*Math.abs(p[j]);
|
|
var l1grad = l1_decay * (p[j] > 0 ? 1 : -1);
|
|
var l2grad = l2_decay * (p[j]);
|
|
|
|
var gij = (l2grad + l1grad + g[j]) / this.batch_size; // raw batch gradient
|
|
|
|
var gsumi = this.gsum[i];
|
|
var xsumi = this.xsum[i];
|
|
if(this.method === 'adam') {
|
|
// adam update
|
|
gsumi[j] = gsumi[j] * this.beta1 + (1- this.beta1) * gij; // update biased first moment estimate
|
|
xsumi[j] = xsumi[j] * this.beta2 + (1-this.beta2) * gij * gij; // update biased second moment estimate
|
|
var biasCorr1 = gsumi[j] * (1 - Math.pow(this.beta1, this.k)); // correct bias first moment estimate
|
|
var biasCorr2 = xsumi[j] * (1 - Math.pow(this.beta2, this.k)); // correct bias second moment estimate
|
|
var dx = - this.learning_rate * biasCorr1 / (Math.sqrt(biasCorr2) + this.eps);
|
|
p[j] += dx;
|
|
} else if(this.method === 'adagrad') {
|
|
// adagrad update
|
|
gsumi[j] = gsumi[j] + gij * gij;
|
|
var dx = - this.learning_rate / Math.sqrt(gsumi[j] + this.eps) * gij;
|
|
p[j] += dx;
|
|
} else if(this.method === 'windowgrad') {
|
|
// this is adagrad but with a moving window weighted average
|
|
// so the gradient is not accumulated over the entire history of the run.
|
|
// it's also referred to as Idea #1 in Zeiler paper on Adadelta. Seems reasonable to me!
|
|
gsumi[j] = this.ro * gsumi[j] + (1-this.ro) * gij * gij;
|
|
var dx = - this.learning_rate / Math.sqrt(gsumi[j] + this.eps) * gij; // eps added for better conditioning
|
|
p[j] += dx;
|
|
} else if(this.method === 'adadelta') {
|
|
gsumi[j] = this.ro * gsumi[j] + (1-this.ro) * gij * gij;
|
|
var dx = - Math.sqrt((xsumi[j] + this.eps)/(gsumi[j] + this.eps)) * gij;
|
|
xsumi[j] = this.ro * xsumi[j] + (1-this.ro) * dx * dx; // yes, xsum lags behind gsum by 1.
|
|
p[j] += dx;
|
|
} else if(this.method === 'nesterov') {
|
|
var dx = gsumi[j];
|
|
gsumi[j] = gsumi[j] * this.momentum + this.learning_rate * gij;
|
|
dx = this.momentum * dx - (1.0 + this.momentum) * gsumi[j];
|
|
p[j] += dx;
|
|
} else {
|
|
// assume SGD
|
|
if(this.momentum > 0.0) {
|
|
// momentum update
|
|
var dx = this.momentum * gsumi[j] - this.learning_rate * gij; // step
|
|
gsumi[j] = dx; // back this up for next iteration of momentum
|
|
p[j] += dx; // apply corrected gradient
|
|
} else {
|
|
// vanilla sgd
|
|
p[j] += - this.learning_rate * gij;
|
|
}
|
|
}
|
|
g[j] = 0.0; // zero out gradient so that we can begin accumulating anew
|
|
}
|
|
}
|
|
}
|
|
|
|
// appending softmax_loss for backwards compatibility, but from now on we will always use cost_loss
|
|
// in future, TODO: have to completely redo the way loss is done around the network as currently
|
|
// loss is a bit of a hack. Ideally, user should specify arbitrary number of loss functions on any layer
|
|
// and it should all be computed correctly and automatically.
|
|
return {fwd_time: fwd_time, bwd_time: bwd_time,
|
|
l2_decay_loss: l2_decay_loss, l1_decay_loss: l1_decay_loss,
|
|
cost_loss: cost_loss, softmax_loss: cost_loss,
|
|
loss: cost_loss + l1_decay_loss + l2_decay_loss}
|
|
}
|
|
}
|
|
|
|
convnet.Trainer = Trainer;
|
|
convnet.SGDTrainer = Trainer; // backwards compatibility
|
|
|
|
|
|
/*** convnet_magicnets ***/
|
|
// used utilities, make explicit local references
|
|
var randf = convnet.randf;
|
|
var randi = convnet.randi;
|
|
var Net = convnet.Net;
|
|
var Trainer = convnet.Trainer;
|
|
var maxmin = convnet.maxmin;
|
|
var randperm = convnet.randperm;
|
|
var weightedSample = convnet.weightedSample;
|
|
var getopt = convnet.getopt;
|
|
var arrUnique = convnet.arrUnique;
|
|
|
|
/*
|
|
A MagicNet takes data: a list of convnetjs.Vol(), and labels
|
|
which for now are assumed to be class indeces 0..K. MagicNet then:
|
|
- creates data folds for cross-validation
|
|
- samples candidate networks
|
|
- evaluates candidate networks on all data folds
|
|
- produces predictions by model-averaging the best networks
|
|
*/
|
|
var MagicNet = function(data, labels, opt) {
|
|
var opt = opt || {};
|
|
if(typeof data === 'undefined') { data = []; }
|
|
if(typeof labels === 'undefined') { labels = []; }
|
|
|
|
// required inputs
|
|
this.data = data; // store these pointers to data
|
|
this.labels = labels;
|
|
|
|
// optional inputs
|
|
this.train_ratio = getopt(opt, 'train_ratio', 0.7);
|
|
this.num_folds = getopt(opt, 'num_folds', 10);
|
|
this.num_candidates = getopt(opt, 'num_candidates', 50); // we evaluate several in parallel
|
|
// how many epochs of data to train every network? for every fold?
|
|
// higher values mean higher accuracy in final results, but more expensive
|
|
this.num_epochs = getopt(opt, 'num_epochs', 50);
|
|
// number of best models to average during prediction. Usually higher = better
|
|
this.ensemble_size = getopt(opt, 'ensemble_size', 10);
|
|
|
|
// candidate parameters
|
|
this.batch_size_min = getopt(opt, 'batch_size_min', 10);
|
|
this.batch_size_max = getopt(opt, 'batch_size_max', 300);
|
|
this.l2_decay_min = getopt(opt, 'l2_decay_min', -4);
|
|
this.l2_decay_max = getopt(opt, 'l2_decay_max', 2);
|
|
this.learning_rate_min = getopt(opt, 'learning_rate_min', -4);
|
|
this.learning_rate_max = getopt(opt, 'learning_rate_max', 0);
|
|
this.momentum_min = getopt(opt, 'momentum_min', 0.9);
|
|
this.momentum_max = getopt(opt, 'momentum_max', 0.9);
|
|
this.neurons_min = getopt(opt, 'neurons_min', 5);
|
|
this.neurons_max = getopt(opt, 'neurons_max', 30);
|
|
|
|
// computed
|
|
this.folds = []; // data fold indices, gets filled by sampleFolds()
|
|
this.candidates = []; // candidate networks that are being currently evaluated
|
|
this.evaluated_candidates = []; // history of all candidates that were fully evaluated on all folds
|
|
this.unique_labels = arrUnique(labels);
|
|
this.iter = 0; // iteration counter, goes from 0 -> num_epochs * num_training_data
|
|
this.foldix = 0; // index of active fold
|
|
|
|
// callbacks
|
|
this.finish_fold_callback = null;
|
|
this.finish_batch_callback = null;
|
|
|
|
// initializations
|
|
if(this.data.length > 0) {
|
|
this.sampleFolds();
|
|
this.sampleCandidates();
|
|
}
|
|
};
|
|
|
|
MagicNet.prototype = {
|
|
|
|
// sets this.folds to a sampling of this.num_folds folds
|
|
sampleFolds: function() {
|
|
var N = this.data.length;
|
|
var num_train = Math.floor(this.train_ratio * N);
|
|
this.folds = []; // flush folds, if any
|
|
for(var i=0;i<this.num_folds;i++) {
|
|
var p = randperm(N);
|
|
this.folds.push({train_ix: p.slice(0, num_train), test_ix: p.slice(num_train, N)});
|
|
}
|
|
},
|
|
|
|
// returns a random candidate network
|
|
sampleCandidate: function() {
|
|
var input_depth = this.data[0].w.length;
|
|
var num_classes = this.unique_labels.length;
|
|
|
|
// sample network topology and hyperparameters
|
|
var layer_defs = [];
|
|
layer_defs.push({type:'input', out_sx:1, out_sy:1, out_depth: input_depth});
|
|
var nwl = weightedSample([0,1,2,3], [0.2, 0.3, 0.3, 0.2]); // prefer nets with 1,2 hidden layers
|
|
for(var q=0;q<nwl;q++) {
|
|
var ni = randi(this.neurons_min, this.neurons_max);
|
|
var act = ['tanh','maxout','relu'][randi(0,3)];
|
|
if(randf(0,1)<0.5) {
|
|
var dp = Math.random();
|
|
layer_defs.push({type:'fc', num_neurons: ni, activation: act, drop_prob: dp});
|
|
} else {
|
|
layer_defs.push({type:'fc', num_neurons: ni, activation: act});
|
|
}
|
|
}
|
|
layer_defs.push({type:'softmax', num_classes: num_classes});
|
|
var net = new Net();
|
|
net.makeLayers(layer_defs);
|
|
|
|
// sample training hyperparameters
|
|
var bs = randi(this.batch_size_min, this.batch_size_max); // batch size
|
|
var l2 = Math.pow(10, randf(this.l2_decay_min, this.l2_decay_max)); // l2 weight decay
|
|
var lr = Math.pow(10, randf(this.learning_rate_min, this.learning_rate_max)); // learning rate
|
|
var mom = randf(this.momentum_min, this.momentum_max); // momentum. Lets just use 0.9, works okay usually ;p
|
|
var tp = randf(0,1); // trainer type
|
|
var trainer_def;
|
|
if(tp<0.33) {
|
|
trainer_def = {method:'adadelta', batch_size:bs, l2_decay:l2};
|
|
} else if(tp<0.66) {
|
|
trainer_def = {method:'adagrad', learning_rate: lr, batch_size:bs, l2_decay:l2};
|
|
} else {
|
|
trainer_def = {method:'sgd', learning_rate: lr, momentum: mom, batch_size:bs, l2_decay:l2};
|
|
}
|
|
|
|
var trainer = new Trainer(net, trainer_def);
|
|
|
|
var cand = {};
|
|
cand.acc = [];
|
|
cand.accv = 0; // this will maintained as sum(acc) for convenience
|
|
cand.layer_defs = layer_defs;
|
|
cand.trainer_def = trainer_def;
|
|
cand.net = net;
|
|
cand.trainer = trainer;
|
|
return cand;
|
|
},
|
|
|
|
// sets this.candidates with this.num_candidates candidate nets
|
|
sampleCandidates: function() {
|
|
this.candidates = []; // flush, if any
|
|
for(var i=0;i<this.num_candidates;i++) {
|
|
var cand = this.sampleCandidate();
|
|
this.candidates.push(cand);
|
|
}
|
|
},
|
|
|
|
step: function() {
|
|
|
|
// run an example through current candidate
|
|
this.iter++;
|
|
|
|
// step all candidates on a random data point
|
|
var fold = this.folds[this.foldix]; // active fold
|
|
var dataix = fold.train_ix[randi(0, fold.train_ix.length)];
|
|
for(var k=0;k<this.candidates.length;k++) {
|
|
var x = this.data[dataix];
|
|
var l = this.labels[dataix];
|
|
this.candidates[k].trainer.train(x, l);
|
|
}
|
|
|
|
// process consequences: sample new folds, or candidates
|
|
var lastiter = this.num_epochs * fold.train_ix.length;
|
|
if(this.iter >= lastiter) {
|
|
// finished evaluation of this fold. Get final validation
|
|
// accuracies, record them, and go on to next fold.
|
|
var val_acc = this.evalValErrors();
|
|
for(var k=0;k<this.candidates.length;k++) {
|
|
var c = this.candidates[k];
|
|
c.acc.push(val_acc[k]);
|
|
c.accv += val_acc[k];
|
|
}
|
|
this.iter = 0; // reset step number
|
|
this.foldix++; // increment fold
|
|
|
|
if(this.finish_fold_callback !== null) {
|
|
this.finish_fold_callback();
|
|
}
|
|
|
|
if(this.foldix >= this.folds.length) {
|
|
// we finished all folds as well! Record these candidates
|
|
// and sample new ones to evaluate.
|
|
for(var k=0;k<this.candidates.length;k++) {
|
|
this.evaluated_candidates.push(this.candidates[k]);
|
|
}
|
|
// sort evaluated candidates according to accuracy achieved
|
|
this.evaluated_candidates.sort(function(a, b) {
|
|
return (a.accv / a.acc.length)
|
|
> (b.accv / b.acc.length)
|
|
? -1 : 1;
|
|
});
|
|
// and clip only to the top few ones (lets place limit at 3*ensemble_size)
|
|
// otherwise there are concerns with keeping these all in memory
|
|
// if MagicNet is being evaluated for a very long time
|
|
if(this.evaluated_candidates.length > 3 * this.ensemble_size) {
|
|
this.evaluated_candidates = this.evaluated_candidates.slice(0, 3 * this.ensemble_size);
|
|
}
|
|
if(this.finish_batch_callback !== null) {
|
|
this.finish_batch_callback();
|
|
}
|
|
this.sampleCandidates(); // begin with new candidates
|
|
this.foldix = 0; // reset this
|
|
} else {
|
|
// we will go on to another fold. reset all candidates nets
|
|
for(var k=0;k<this.candidates.length;k++) {
|
|
var c = this.candidates[k];
|
|
var net = new Net();
|
|
net.makeLayers(c.layer_defs);
|
|
var trainer = new Trainer(net, c.trainer_def);
|
|
c.net = net;
|
|
c.trainer = trainer;
|
|
}
|
|
}
|
|
}
|
|
},
|
|
|
|
evalValErrors: function() {
|
|
// evaluate candidates on validation data and return performance of current networks
|
|
// as simple list
|
|
var vals = [];
|
|
var fold = this.folds[this.foldix]; // active fold
|
|
for(var k=0;k<this.candidates.length;k++) {
|
|
var net = this.candidates[k].net;
|
|
var v = 0.0;
|
|
for(var q=0;q<fold.test_ix.length;q++) {
|
|
var x = this.data[fold.test_ix[q]];
|
|
var l = this.labels[fold.test_ix[q]];
|
|
net.forward(x);
|
|
var yhat = net.getPrediction();
|
|
v += (yhat === l ? 1.0 : 0.0); // 0 1 loss
|
|
}
|
|
v /= fold.test_ix.length; // normalize
|
|
vals.push(v);
|
|
}
|
|
return vals;
|
|
},
|
|
|
|
// returns prediction scores for given test data point, as Vol
|
|
// uses an averaged prediction from the best ensemble_size models
|
|
// x is a Vol.
|
|
predict_soft: function(data) {
|
|
// forward prop the best networks
|
|
// and accumulate probabilities at last layer into a an output Vol
|
|
|
|
var eval_candidates = [];
|
|
var nv = 0;
|
|
if(this.evaluated_candidates.length === 0) {
|
|
// not sure what to do here, first batch of nets hasnt evaluated yet
|
|
// lets just predict with current candidates.
|
|
nv = this.candidates.length;
|
|
eval_candidates = this.candidates;
|
|
} else {
|
|
// forward prop the best networks from evaluated_candidates
|
|
nv = Math.min(this.ensemble_size, this.evaluated_candidates.length);
|
|
eval_candidates = this.evaluated_candidates
|
|
}
|
|
|
|
// forward nets of all candidates and average the predictions
|
|
var xout, n;
|
|
for(var j=0;j<nv;j++) {
|
|
var net = eval_candidates[j].net;
|
|
var x = net.forward(data);
|
|
if(j===0) {
|
|
xout = x;
|
|
n = x.w.length;
|
|
} else {
|
|
// add it on
|
|
for(var d=0;d<n;d++) {
|
|
xout.w[d] += x.w[d];
|
|
}
|
|
}
|
|
}
|
|
// produce average
|
|
for(var d=0;d<n;d++) {
|
|
xout.w[d] /= nv;
|
|
}
|
|
return xout;
|
|
},
|
|
|
|
predict: function(data) {
|
|
var xout = this.predict_soft(data);
|
|
if(xout.w.length !== 0) {
|
|
var stats = maxmin(xout.w);
|
|
var predicted_label = stats.maxi;
|
|
} else {
|
|
var predicted_label = -1; // error out
|
|
}
|
|
return predicted_label;
|
|
|
|
},
|
|
|
|
toJSON: function() {
|
|
// dump the top ensemble_size networks as a list
|
|
var nv = Math.min(this.ensemble_size, this.evaluated_candidates.length);
|
|
var json = {};
|
|
json.nets = [];
|
|
for(var i=0;i<nv;i++) {
|
|
json.nets.push(this.evaluated_candidates[i].net.toJSON());
|
|
}
|
|
return json;
|
|
},
|
|
|
|
fromJSON: function(json) {
|
|
this.ensemble_size = json.nets.length;
|
|
this.evaluated_candidates = [];
|
|
for(var i=0;i<this.ensemble_size;i++) {
|
|
var net = new Net();
|
|
net.fromJSON(json.nets[i]);
|
|
var dummy_candidate = {};
|
|
dummy_candidate.net = net;
|
|
this.evaluated_candidates.push(dummy_candidate);
|
|
}
|
|
},
|
|
|
|
// callback functions
|
|
// called when a fold is finished, while evaluating a batch
|
|
onFinishFold: function(f) { this.finish_fold_callback = f; },
|
|
// called when a batch of candidates has finished evaluating
|
|
onFinishBatch: function(f) { this.finish_batch_callback = f; }
|
|
|
|
};
|
|
|
|
convnet.MagicNet = MagicNet;
|
|
|
|
|