diff --git a/js/ml/convnet.js b/js/ml/convnet.js
new file mode 100644
index 0000000..2dc0763
--- /dev/null
+++ b/js/ml/convnet.js
@@ -0,0 +1,2159 @@
+
+/*** https://github.com/karpathy/convnetjs ***/
+
+var convnet={REVISION: 'ALPHA'}
+module.exports=convnet;
+"use strict";
+
+/*** convnet_util ***/
+  // Random number utilities
+  var return_v = false;
+  var v_val = 0.0;
+  var gaussRandom = function() {
+    if(return_v) { 
+      return_v = false;
+      return v_val; 
+    }
+    var u = 2*Math.random()-1;
+    var v = 2*Math.random()-1;
+    var r = u*u + v*v;
+    if(r == 0 || r > 1) return gaussRandom();
+    var c = Math.sqrt(-2*Math.log(r)/r);
+    v_val = v*c; // cache this
+    return_v = true;
+    return u*c;
+  }
+  var randf = function(a, b) { return Math.random()*(b-a)+a; }
+  var randi = function(a, b) { return Math.floor(Math.random()*(b-a)+a); }
+  var randn = function(mu, std){ return mu+gaussRandom()*std; }
+
+  // Array utilities
+  var zeros = function(n) {
+    if(typeof(n)==='undefined' || isNaN(n)) { return []; }
+    if(typeof ArrayBuffer === 'undefined') {
+      // lacking browser support
+      var arr = new Array(n);
+      for(var i=0;i<n;i++) { arr[i]= 0; }
+      return arr;
+    } else {
+      return new Float64Array(n);
+    }
+  }
+
+  var arrContains = function(arr, elt) {
+    for(var i=0,n=arr.length;i<n;i++) {
+      if(arr[i]===elt) return true;
+    }
+    return false;
+  }
+
+  var arrUnique = function(arr) {
+    var b = [];
+    for(var i=0,n=arr.length;i<n;i++) {
+      if(!arrContains(b, arr[i])) {
+        b.push(arr[i]);
+      }
+    }
+    return b;
+  }
+
+  // return max and min of a given non-empty array.
+  var maxmin = function(w) {
+    if(w.length === 0) { return {}; } // ... ;s
+    var maxv = w[0];
+    var minv = w[0];
+    var maxi = 0;
+    var mini = 0;
+    var n = w.length;
+    for(var i=1;i<n;i++) {
+      if(w[i] > maxv) { maxv = w[i]; maxi = i; } 
+      if(w[i] < minv) { minv = w[i]; mini = i; } 
+    }
+    return {maxi: maxi, maxv: maxv, mini: mini, minv: minv, dv:maxv-minv};
+  }
+
+  // create random permutation of numbers, in range [0...n-1]
+  var randperm = function(n) {
+    var i = n,
+        j = 0,
+        temp;
+    var array = [];
+    for(var q=0;q<n;q++)array[q]=q;
+    while (i--) {
+        j = Math.floor(Math.random() * (i+1));
+        temp = array[i];
+        array[i] = array[j];
+        array[j] = temp;
+    }
+    return array;
+  }
+
+  // sample from list lst according to probabilities in list probs
+  // the two lists are of same size, and probs adds up to 1
+  var weightedSample = function(lst, probs) {
+    var p = randf(0, 1.0);
+    var cumprob = 0.0;
+    for(var k=0,n=lst.length;k<n;k++) {
+      cumprob += probs[k];
+      if(p < cumprob) { return lst[k]; }
+    }
+  }
+
+  // syntactic sugar function for getting default parameter values
+  var getopt = function(opt, field_name, default_value) {
+    if(typeof field_name === 'string') {
+      // case of single string
+      return (typeof opt[field_name] !== 'undefined') ? opt[field_name] : default_value;
+    } else {
+      // assume we are given a list of string instead
+      var ret = default_value;
+      for(var i=0;i<field_name.length;i++) {
+        var f = field_name[i];
+        if (typeof opt[f] !== 'undefined') {
+          ret = opt[f]; // overwrite return value
+        }
+      }
+      return ret;
+    }
+  }
+
+  function assert(condition, message) {
+    if (!condition) {
+      message = message || "Assertion failed";
+      if (typeof Error !== "undefined") {
+        throw new Error(message);
+      }
+      throw message; // Fallback
+    }
+  }
+
+  convnet.randf = randf;
+  convnet.randi = randi;
+  convnet.randn = randn;
+  convnet.zeros = zeros;
+  convnet.maxmin = maxmin;
+  convnet.randperm = randperm;
+  convnet.weightedSample = weightedSample;
+  convnet.arrUnique = arrUnique;
+  convnet.arrContains = arrContains;
+  convnet.getopt = getopt;
+  convnet.assert = assert;
+
+/*** convnet_vol ***/
+ // Vol is the basic building block of all data in a net.
+  // it is essentially just a 3D volume of numbers, with a
+  // width (sx), height (sy), and depth (depth).
+  // it is used to hold data for all filters, all volumes,
+  // all weights, and also stores all gradients w.r.t. 
+  // the data. c is optionally a value to initialize the volume
+  // with. If c is missing, fills the Vol with random numbers.
+  var Vol = function(sx, sy, depth, c) {
+    // this is how you check if a variable is an array. Oh, Javascript :)
+    if(Object.prototype.toString.call(sx) === '[object Array]') {
+      // we were given a list in sx, assume 1D volume and fill it up
+      this.sx = 1;
+      this.sy = 1;
+      this.depth = sx.length;
+      // we have to do the following copy because we want to use
+      // fast typed arrays, not an ordinary javascript array
+      this.w = convnet.zeros(this.depth);
+      this.dw = convnet.zeros(this.depth);
+      for(var i=0;i<this.depth;i++) {
+        this.w[i] = sx[i];
+      }
+    } else {
+      // we were given dimensions of the vol
+      this.sx = sx;
+      this.sy = sy;
+      this.depth = depth;
+      var n = sx*sy*depth;
+      this.w = convnet.zeros(n);
+      this.dw = convnet.zeros(n);
+      if(typeof c === 'undefined') {
+        // weight normalization is done to equalize the output
+        // variance of every neuron, otherwise neurons with a lot
+        // of incoming connections have outputs of larger variance
+        var scale = Math.sqrt(1.0/(sx*sy*depth));
+        for(var i=0;i<n;i++) { 
+          this.w[i] = convnet.randn(0.0, scale);
+        }
+      } else {
+        for(var i=0;i<n;i++) { 
+          this.w[i] = c;
+        }
+      }
+    }
+  }
+
+  Vol.prototype = {
+    get: function(x, y, d) { 
+      var ix=((this.sx * y)+x)*this.depth+d;
+      return this.w[ix];
+    },
+    set: function(x, y, d, v) { 
+      var ix=((this.sx * y)+x)*this.depth+d;
+      this.w[ix] = v; 
+    },
+    add: function(x, y, d, v) { 
+      var ix=((this.sx * y)+x)*this.depth+d;
+      this.w[ix] += v; 
+    },
+    get_grad: function(x, y, d) { 
+      var ix = ((this.sx * y)+x)*this.depth+d;
+      return this.dw[ix]; 
+    },
+    set_grad: function(x, y, d, v) { 
+      var ix = ((this.sx * y)+x)*this.depth+d;
+      this.dw[ix] = v; 
+    },
+    add_grad: function(x, y, d, v) { 
+      var ix = ((this.sx * y)+x)*this.depth+d;
+      this.dw[ix] += v; 
+    },
+    cloneAndZero: function() { return new Vol(this.sx, this.sy, this.depth, 0.0)},
+    clone: function() {
+      var V = new Vol(this.sx, this.sy, this.depth, 0.0);
+      var n = this.w.length;
+      for(var i=0;i<n;i++) { V.w[i] = this.w[i]; }
+      return V;
+    },
+    addFrom: function(V) { for(var k=0;k<this.w.length;k++) { this.w[k] += V.w[k]; }},
+    addFromScaled: function(V, a) { for(var k=0;k<this.w.length;k++) { this.w[k] += a*V.w[k]; }},
+    setConst: function(a) { for(var k=0;k<this.w.length;k++) { this.w[k] = a; }},
+
+    toJSON: function() {
+      // todo: we may want to only save d most significant digits to save space
+      var json = {}
+      json.sx = this.sx; 
+      json.sy = this.sy;
+      json.depth = this.depth;
+      json.w = this.w;
+      return json;
+      // we wont back up gradients to save space
+    },
+    fromJSON: function(json) {
+      this.sx = json.sx;
+      this.sy = json.sy;
+      this.depth = json.depth;
+
+      var n = this.sx*this.sy*this.depth;
+      this.w = convnet.zeros(n);
+      this.dw = convnet.zeros(n);
+      // copy over the elements.
+      for(var i=0;i<n;i++) {
+        this.w[i] = json.w[i];
+      }
+    }
+  }
+
+  convnet.Vol = Vol;
+
+/*** convnet_vol_util ***/
+  var Vol = convnet.Vol; // convenience
+
+  // Volume utilities
+  // intended for use with data augmentation
+  // crop is the size of output
+  // dx,dy are offset wrt incoming volume, of the shift
+  // fliplr is boolean on whether we also want to flip left<->right
+  var augment = function(V, crop, dx, dy, fliplr) {
+    // note assumes square outputs of size crop x crop
+    if(typeof(fliplr)==='undefined') var fliplr = false;
+    if(typeof(dx)==='undefined') var dx = convnet.randi(0, V.sx - crop);
+    if(typeof(dy)==='undefined') var dy = convnet.randi(0, V.sy - crop);
+    
+    // randomly sample a crop in the input volume
+    var W;
+    if(crop !== V.sx || dx!==0 || dy!==0) {
+      W = new Vol(crop, crop, V.depth, 0.0);
+      for(var x=0;x<crop;x++) {
+        for(var y=0;y<crop;y++) {
+          if(x+dx<0 || x+dx>=V.sx || y+dy<0 || y+dy>=V.sy) continue; // oob
+          for(var d=0;d<V.depth;d++) {
+           W.set(x,y,d,V.get(x+dx,y+dy,d)); // copy data over
+          }
+        }
+      }
+    } else {
+      W = V;
+    }
+
+    if(fliplr) {
+      // flip volume horziontally
+      var W2 = W.cloneAndZero();
+      for(var x=0;x<W.sx;x++) {
+        for(var y=0;y<W.sy;y++) {
+          for(var d=0;d<W.depth;d++) {
+           W2.set(x,y,d,W.get(W.sx - x - 1,y,d)); // copy data over
+          }
+        }
+      }
+      W = W2; //swap
+    }
+    return W;
+  }
+
+  // img is a DOM element that contains a loaded image
+  // returns a Vol of size (W, H, 4). 4 is for RGBA
+  var img_to_vol = function(img, convert_grayscale) {
+
+    if(typeof(convert_grayscale)==='undefined') var convert_grayscale = false;
+
+    var canvas = document.createElement('canvas');
+    canvas.width = img.width;
+    canvas.height = img.height;
+    var ctx = canvas.getContext("2d");
+
+    // due to a Firefox bug
+    try {
+      ctx.drawImage(img, 0, 0);
+    } catch (e) {
+      if (e.name === "NS_ERROR_NOT_AVAILABLE") {
+        // sometimes happens, lets just abort
+        return false;
+      } else {
+        throw e;
+      }
+    }
+
+    try {
+      var img_data = ctx.getImageData(0, 0, canvas.width, canvas.height);
+    } catch (e) {
+      if(e.name === 'IndexSizeError') {
+        return false; // not sure what causes this sometimes but okay abort
+      } else {
+        throw e;
+      }
+    }
+
+    // prepare the input: get pixels and normalize them
+    var p = img_data.data;
+    var W = img.width;
+    var H = img.height;
+    var pv = []
+    for(var i=0;i<p.length;i++) {
+      pv.push(p[i]/255.0-0.5); // normalize image pixels to [-0.5, 0.5]
+    }
+    var x = new Vol(W, H, 4, 0.0); //input volume (image)
+    x.w = pv;
+
+    if(convert_grayscale) {
+      // flatten into depth=1 array
+      var x1 = new Vol(W, H, 1, 0.0);
+      for(var i=0;i<W;i++) {
+        for(var j=0;j<H;j++) {
+          x1.set(i,j,0,x.get(i,j,0));
+        }
+      }
+      x = x1;
+    }
+
+    return x;
+  }
+  
+  convnet.augment = augment;
+  convnet.img_to_vol = img_to_vol;
+
+
+/*** convnet_layers_dotproducts ***/
+  // This file contains all layers that do dot products with input,
+  // but usually in a different connectivity pattern and weight sharing
+  // schemes: 
+  // - FullyConn is fully connected dot products 
+  // - ConvLayer does convolutions (so weight sharing spatially)
+  // putting them together in one file because they are very similar
+  var ConvLayer = function(opt) {
+    var opt = opt || {};
+
+    // required
+    this.out_depth = opt.filters;
+    this.sx = opt.sx; // filter size. Should be odd if possible, it's cleaner.
+    this.in_depth = opt.in_depth;
+    this.in_sx = opt.in_sx;
+    this.in_sy = opt.in_sy;
+    
+    // optional
+    this.sy = typeof opt.sy !== 'undefined' ? opt.sy : this.sx;
+    this.stride = typeof opt.stride !== 'undefined' ? opt.stride : 1; // stride at which we apply filters to input volume
+    this.pad = typeof opt.pad !== 'undefined' ? opt.pad : 0; // amount of 0 padding to add around borders of input volume
+    this.l1_decay_mul = typeof opt.l1_decay_mul !== 'undefined' ? opt.l1_decay_mul : 0.0;
+    this.l2_decay_mul = typeof opt.l2_decay_mul !== 'undefined' ? opt.l2_decay_mul : 1.0;
+
+    // computed
+    // note we are doing floor, so if the strided convolution of the filter doesnt fit into the input
+    // volume exactly, the output volume will be trimmed and not contain the (incomplete) computed
+    // final application.
+    this.out_sx = Math.floor((this.in_sx + this.pad * 2 - this.sx) / this.stride + 1);
+    this.out_sy = Math.floor((this.in_sy + this.pad * 2 - this.sy) / this.stride + 1);
+    this.layer_type = 'conv';
+
+    // initializations
+    var bias = typeof opt.bias_pref !== 'undefined' ? opt.bias_pref : 0.0;
+    this.filters = [];
+    for(var i=0;i<this.out_depth;i++) { this.filters.push(new Vol(this.sx, this.sy, this.in_depth)); }
+    this.biases = new Vol(1, 1, this.out_depth, bias);
+  }
+  ConvLayer.prototype = {
+    forward: function(V, is_training) {
+      // optimized code by @mdda that achieves 2x speedup over previous version
+
+      this.in_act = V;
+      var A = new Vol(this.out_sx |0, this.out_sy |0, this.out_depth |0, 0.0);
+      
+      var V_sx = V.sx |0;
+      var V_sy = V.sy |0;
+      var xy_stride = this.stride |0;
+
+      for(var d=0;d<this.out_depth;d++) {
+        var f = this.filters[d];
+        var x = -this.pad |0;
+        var y = -this.pad |0;
+        for(var ay=0; ay<this.out_sy; y+=xy_stride,ay++) {  // xy_stride
+          x = -this.pad |0;
+          for(var ax=0; ax<this.out_sx; x+=xy_stride,ax++) {  // xy_stride
+
+            // convolve centered at this particular location
+            var a = 0.0;
+            for(var fy=0;fy<f.sy;fy++) {
+              var oy = y+fy; // coordinates in the original input array coordinates
+              for(var fx=0;fx<f.sx;fx++) {
+                var ox = x+fx;
+                if(oy>=0 && oy<V_sy && ox>=0 && ox<V_sx) {
+                  for(var fd=0;fd<f.depth;fd++) {
+                    // avoid function call overhead (x2) for efficiency, compromise modularity :(
+                    a += f.w[((f.sx * fy)+fx)*f.depth+fd] * V.w[((V_sx * oy)+ox)*V.depth+fd];
+                  }
+                }
+              }
+            }
+            a += this.biases.w[d];
+            A.set(ax, ay, d, a);
+          }
+        }
+      }
+      this.out_act = A;
+      return this.out_act;
+    },
+    backward: function() {
+
+      var V = this.in_act;
+      V.dw = convnet.zeros(V.w.length); // zero out gradient wrt bottom data, we're about to fill it
+
+      var V_sx = V.sx |0;
+      var V_sy = V.sy |0;
+      var xy_stride = this.stride |0;
+
+      for(var d=0;d<this.out_depth;d++) {
+        var f = this.filters[d];
+        var x = -this.pad |0;
+        var y = -this.pad |0;
+        for(var ay=0; ay<this.out_sy; y+=xy_stride,ay++) {  // xy_stride
+          x = -this.pad |0;
+          for(var ax=0; ax<this.out_sx; x+=xy_stride,ax++) {  // xy_stride
+
+            // convolve centered at this particular location
+            var chain_grad = this.out_act.get_grad(ax,ay,d); // gradient from above, from chain rule
+            for(var fy=0;fy<f.sy;fy++) {
+              var oy = y+fy; // coordinates in the original input array coordinates
+              for(var fx=0;fx<f.sx;fx++) {
+                var ox = x+fx;
+                if(oy>=0 && oy<V_sy && ox>=0 && ox<V_sx) {
+                  for(var fd=0;fd<f.depth;fd++) {
+                    // avoid function call overhead (x2) for efficiency, compromise modularity :(
+                    var ix1 = ((V_sx * oy)+ox)*V.depth+fd;
+                    var ix2 = ((f.sx * fy)+fx)*f.depth+fd;
+                    f.dw[ix2] += V.w[ix1]*chain_grad;
+                    V.dw[ix1] += f.w[ix2]*chain_grad;
+                  }
+                }
+              }
+            }
+            this.biases.dw[d] += chain_grad;
+          }
+        }
+      }
+    },
+    getParamsAndGrads: function() {
+      var response = [];
+      for(var i=0;i<this.out_depth;i++) {
+        response.push({params: this.filters[i].w, grads: this.filters[i].dw, l2_decay_mul: this.l2_decay_mul, l1_decay_mul: this.l1_decay_mul});
+      }
+      response.push({params: this.biases.w, grads: this.biases.dw, l1_decay_mul: 0.0, l2_decay_mul: 0.0});
+      return response;
+    },
+    toJSON: function() {
+      var json = {};
+      json.sx = this.sx; // filter size in x, y dims
+      json.sy = this.sy;
+      json.stride = this.stride;
+      json.in_depth = this.in_depth;
+      json.out_depth = this.out_depth;
+      json.out_sx = this.out_sx;
+      json.out_sy = this.out_sy;
+      json.layer_type = this.layer_type;
+      json.l1_decay_mul = this.l1_decay_mul;
+      json.l2_decay_mul = this.l2_decay_mul;
+      json.pad = this.pad;
+      json.filters = [];
+      for(var i=0;i<this.filters.length;i++) {
+        json.filters.push(this.filters[i].toJSON());
+      }
+      json.biases = this.biases.toJSON();
+      return json;
+    },
+    fromJSON: function(json) {
+      this.out_depth = json.out_depth;
+      this.out_sx = json.out_sx;
+      this.out_sy = json.out_sy;
+      this.layer_type = json.layer_type;
+      this.sx = json.sx; // filter size in x, y dims
+      this.sy = json.sy;
+      this.stride = json.stride;
+      this.in_depth = json.in_depth; // depth of input volume
+      this.filters = [];
+      this.l1_decay_mul = typeof json.l1_decay_mul !== 'undefined' ? json.l1_decay_mul : 1.0;
+      this.l2_decay_mul = typeof json.l2_decay_mul !== 'undefined' ? json.l2_decay_mul : 1.0;
+      this.pad = typeof json.pad !== 'undefined' ? json.pad : 0;
+      for(var i=0;i<json.filters.length;i++) {
+        var v = new Vol(0,0,0,0);
+        v.fromJSON(json.filters[i]);
+        this.filters.push(v);
+      }
+      this.biases = new Vol(0,0,0,0);
+      this.biases.fromJSON(json.biases);
+    }
+  }
+
+  var FullyConnLayer = function(opt) {
+    var opt = opt || {};
+
+    // required
+    // ok fine we will allow 'filters' as the word as well
+    this.out_depth = typeof opt.num_neurons !== 'undefined' ? opt.num_neurons : opt.filters;
+
+    // optional 
+    this.l1_decay_mul = typeof opt.l1_decay_mul !== 'undefined' ? opt.l1_decay_mul : 0.0;
+    this.l2_decay_mul = typeof opt.l2_decay_mul !== 'undefined' ? opt.l2_decay_mul : 1.0;
+
+    // computed
+    this.num_inputs = opt.in_sx * opt.in_sy * opt.in_depth;
+    this.out_sx = 1;
+    this.out_sy = 1;
+    this.layer_type = 'fc';
+
+    // initializations
+    var bias = typeof opt.bias_pref !== 'undefined' ? opt.bias_pref : 0.0;
+    this.filters = [];
+    for(var i=0;i<this.out_depth ;i++) { this.filters.push(new Vol(1, 1, this.num_inputs)); }
+    this.biases = new Vol(1, 1, this.out_depth, bias);
+  }
+
+  FullyConnLayer.prototype = {
+    forward: function(V, is_training) {
+      this.in_act = V;
+      var A = new Vol(1, 1, this.out_depth, 0.0);
+      var Vw = V.w;
+      for(var i=0;i<this.out_depth;i++) {
+        var a = 0.0;
+        var wi = this.filters[i].w;
+        for(var d=0;d<this.num_inputs;d++) {
+          a += Vw[d] * wi[d]; // for efficiency use Vols directly for now
+        }
+        a += this.biases.w[i];
+        A.w[i] = a;
+      }
+      this.out_act = A;
+      return this.out_act;
+    },
+    backward: function() {
+      var V = this.in_act;
+      V.dw = convnet.zeros(V.w.length); // zero out the gradient in input Vol
+      
+      // compute gradient wrt weights and data
+      for(var i=0;i<this.out_depth;i++) {
+        var tfi = this.filters[i];
+        var chain_grad = this.out_act.dw[i];
+        for(var d=0;d<this.num_inputs;d++) {
+          V.dw[d] += tfi.w[d]*chain_grad; // grad wrt input data
+          tfi.dw[d] += V.w[d]*chain_grad; // grad wrt params
+        }
+        this.biases.dw[i] += chain_grad;
+      }
+    },
+    getParamsAndGrads: function() {
+      var response = [];
+      for(var i=0;i<this.out_depth;i++) {
+        response.push({params: this.filters[i].w, grads: this.filters[i].dw, l1_decay_mul: this.l1_decay_mul, l2_decay_mul: this.l2_decay_mul});
+      }
+      response.push({params: this.biases.w, grads: this.biases.dw, l1_decay_mul: 0.0, l2_decay_mul: 0.0});
+      return response;
+    },
+    toJSON: function() {
+      var json = {};
+      json.out_depth = this.out_depth;
+      json.out_sx = this.out_sx;
+      json.out_sy = this.out_sy;
+      json.layer_type = this.layer_type;
+      json.num_inputs = this.num_inputs;
+      json.l1_decay_mul = this.l1_decay_mul;
+      json.l2_decay_mul = this.l2_decay_mul;
+      json.filters = [];
+      for(var i=0;i<this.filters.length;i++) {
+        json.filters.push(this.filters[i].toJSON());
+      }
+      json.biases = this.biases.toJSON();
+      return json;
+    },
+    fromJSON: function(json) {
+      this.out_depth = json.out_depth;
+      this.out_sx = json.out_sx;
+      this.out_sy = json.out_sy;
+      this.layer_type = json.layer_type;
+      this.num_inputs = json.num_inputs;
+      this.l1_decay_mul = typeof json.l1_decay_mul !== 'undefined' ? json.l1_decay_mul : 1.0;
+      this.l2_decay_mul = typeof json.l2_decay_mul !== 'undefined' ? json.l2_decay_mul : 1.0;
+      this.filters = [];
+      for(var i=0;i<json.filters.length;i++) {
+        var v = new Vol(0,0,0,0);
+        v.fromJSON(json.filters[i]);
+        this.filters.push(v);
+      }
+      this.biases = new Vol(0,0,0,0);
+      this.biases.fromJSON(json.biases);
+    }
+  }
+
+  convnet.ConvLayer = ConvLayer;
+  convnet.FullyConnLayer = FullyConnLayer;
+
+
+/*** convnet_layers_pool ***/
+  var PoolLayer = function(opt) {
+
+    var opt = opt || {};
+
+    // required
+    this.sx = opt.sx; // filter size
+    this.in_depth = opt.in_depth;
+    this.in_sx = opt.in_sx;
+    this.in_sy = opt.in_sy;
+
+    // optional
+    this.sy = typeof opt.sy !== 'undefined' ? opt.sy : this.sx;
+    this.stride = typeof opt.stride !== 'undefined' ? opt.stride : 2;
+    this.pad = typeof opt.pad !== 'undefined' ? opt.pad : 0; // amount of 0 padding to add around borders of input volume
+
+    // computed
+    this.out_depth = this.in_depth;
+    this.out_sx = Math.floor((this.in_sx + this.pad * 2 - this.sx) / this.stride + 1);
+    this.out_sy = Math.floor((this.in_sy + this.pad * 2 - this.sy) / this.stride + 1);
+    this.layer_type = 'pool';
+    // store switches for x,y coordinates for where the max comes from, for each output neuron
+    this.switchx = convnet.zeros(this.out_sx*this.out_sy*this.out_depth);
+    this.switchy = convnet.zeros(this.out_sx*this.out_sy*this.out_depth);
+  }
+
+  PoolLayer.prototype = {
+    forward: function(V, is_training) {
+      this.in_act = V;
+
+      var A = new Vol(this.out_sx, this.out_sy, this.out_depth, 0.0);
+      
+      var n=0; // a counter for switches
+      for(var d=0;d<this.out_depth;d++) {
+        var x = -this.pad;
+        var y = -this.pad;
+        for(var ax=0; ax<this.out_sx; x+=this.stride,ax++) {
+          y = -this.pad;
+          for(var ay=0; ay<this.out_sy; y+=this.stride,ay++) {
+
+            // convolve centered at this particular location
+            var a = -99999; // hopefully small enough ;\
+            var winx=-1,winy=-1;
+            for(var fx=0;fx<this.sx;fx++) {
+              for(var fy=0;fy<this.sy;fy++) {
+                var oy = y+fy;
+                var ox = x+fx;
+                if(oy>=0 && oy<V.sy && ox>=0 && ox<V.sx) {
+                  var v = V.get(ox, oy, d);
+                  // perform max pooling and store pointers to where
+                  // the max came from. This will speed up backprop 
+                  // and can help make nice visualizations in future
+                  if(v > a) { a = v; winx=ox; winy=oy;}
+                }
+              }
+            }
+            this.switchx[n] = winx;
+            this.switchy[n] = winy;
+            n++;
+            A.set(ax, ay, d, a);
+          }
+        }
+      }
+      this.out_act = A;
+      return this.out_act;
+    },
+    backward: function() { 
+      // pooling layers have no parameters, so simply compute 
+      // gradient wrt data here
+      var V = this.in_act;
+      V.dw = convnet.zeros(V.w.length); // zero out gradient wrt data
+      var A = this.out_act; // computed in forward pass 
+
+      var n = 0;
+      for(var d=0;d<this.out_depth;d++) {
+        var x = -this.pad;
+        var y = -this.pad;
+        for(var ax=0; ax<this.out_sx; x+=this.stride,ax++) {
+          y = -this.pad;
+          for(var ay=0; ay<this.out_sy; y+=this.stride,ay++) {
+
+            var chain_grad = this.out_act.get_grad(ax,ay,d);
+            V.add_grad(this.switchx[n], this.switchy[n], d, chain_grad);
+            n++;
+
+          }
+        }
+      }
+    },
+    getParamsAndGrads: function() {
+      return [];
+    },
+    toJSON: function() {
+      var json = {};
+      json.sx = this.sx;
+      json.sy = this.sy;
+      json.stride = this.stride;
+      json.in_depth = this.in_depth;
+      json.out_depth = this.out_depth;
+      json.out_sx = this.out_sx;
+      json.out_sy = this.out_sy;
+      json.layer_type = this.layer_type;
+      json.pad = this.pad;
+      return json;
+    },
+    fromJSON: function(json) {
+      this.out_depth = json.out_depth;
+      this.out_sx = json.out_sx;
+      this.out_sy = json.out_sy;
+      this.layer_type = json.layer_type;
+      this.sx = json.sx;
+      this.sy = json.sy;
+      this.stride = json.stride;
+      this.in_depth = json.in_depth;
+      this.pad = typeof json.pad !== 'undefined' ? json.pad : 0; // backwards compatibility
+      this.switchx = convnet.zeros(this.out_sx*this.out_sy*this.out_depth); // need to re-init these appropriately
+      this.switchy = convnet.zeros(this.out_sx*this.out_sy*this.out_depth);
+    }
+  }
+
+  convnet.PoolLayer = PoolLayer;
+
+
+/*** convnet_layers_input ***/
+  var getopt = convnet.getopt;
+
+  var InputLayer = function(opt) {
+    var opt = opt || {};
+
+    // required: depth
+    this.out_depth = getopt(opt, ['out_depth', 'depth'], 0);
+
+    // optional: default these dimensions to 1
+    this.out_sx = getopt(opt, ['out_sx', 'sx', 'width'], 1);
+    this.out_sy = getopt(opt, ['out_sy', 'sy', 'height'], 1);
+    
+    // computed
+    this.layer_type = 'input';
+  }
+  InputLayer.prototype = {
+    forward: function(V, is_training) {
+      this.in_act = V;
+      this.out_act = V;
+      return this.out_act; // simply identity function for now
+    },
+    backward: function() { },
+    getParamsAndGrads: function() {
+      return [];
+    },
+    toJSON: function() {
+      var json = {};
+      json.out_depth = this.out_depth;
+      json.out_sx = this.out_sx;
+      json.out_sy = this.out_sy;
+      json.layer_type = this.layer_type;
+      return json;
+    },
+    fromJSON: function(json) {
+      this.out_depth = json.out_depth;
+      this.out_sx = json.out_sx;
+      this.out_sy = json.out_sy;
+      this.layer_type = json.layer_type; 
+    }
+  }
+
+  convnet.InputLayer = InputLayer;
+
+
+/*** convnet_layers_loss ***/
+  // Layers that implement a loss. Currently these are the layers that 
+  // can initiate a backward() pass. In future we probably want a more 
+  // flexible system that can accomodate multiple losses to do multi-task
+  // learning, and stuff like that. But for now, one of the layers in this
+  // file must be the final layer in a Net.
+
+  // This is a classifier, with N discrete classes from 0 to N-1
+  // it gets a stream of N incoming numbers and computes the softmax
+  // function (exponentiate and normalize to sum to 1 as probabilities should)
+  var SoftmaxLayer = function(opt) {
+    var opt = opt || {};
+
+    // computed
+    this.num_inputs = opt.in_sx * opt.in_sy * opt.in_depth;
+    this.out_depth = this.num_inputs;
+    this.out_sx = 1;
+    this.out_sy = 1;
+    this.layer_type = 'softmax';
+  }
+
+  SoftmaxLayer.prototype = {
+    forward: function(V, is_training) {
+      this.in_act = V;
+
+      var A = new Vol(1, 1, this.out_depth, 0.0);
+
+      // compute max activation
+      var as = V.w;
+      var amax = V.w[0];
+      for(var i=1;i<this.out_depth;i++) {
+        if(as[i] > amax) amax = as[i];
+      }
+
+      // compute exponentials (carefully to not blow up)
+      var es = convnet.zeros(this.out_depth);
+      var esum = 0.0;
+      for(var i=0;i<this.out_depth;i++) {
+        var e = Math.exp(as[i] - amax);
+        esum += e;
+        es[i] = e;
+      }
+
+      // normalize and output to sum to one
+      for(var i=0;i<this.out_depth;i++) {
+        es[i] /= esum;
+        A.w[i] = es[i];
+      }
+
+      this.es = es; // save these for backprop
+      this.out_act = A;
+      return this.out_act;
+    },
+    backward: function(y) {
+
+      // compute and accumulate gradient wrt weights and bias of this layer
+      var x = this.in_act;
+      x.dw = convnet.zeros(x.w.length); // zero out the gradient of input Vol
+
+      for(var i=0;i<this.out_depth;i++) {
+        var indicator = i === y ? 1.0 : 0.0;
+        var mul = -(indicator - this.es[i]);
+        x.dw[i] = mul;
+      }
+
+      // loss is the class negative log likelihood
+      return -Math.log(this.es[y]);
+    },
+    getParamsAndGrads: function() { 
+      return [];
+    },
+    toJSON: function() {
+      var json = {};
+      json.out_depth = this.out_depth;
+      json.out_sx = this.out_sx;
+      json.out_sy = this.out_sy;
+      json.layer_type = this.layer_type;
+      json.num_inputs = this.num_inputs;
+      return json;
+    },
+    fromJSON: function(json) {
+      this.out_depth = json.out_depth;
+      this.out_sx = json.out_sx;
+      this.out_sy = json.out_sy;
+      this.layer_type = json.layer_type;
+      this.num_inputs = json.num_inputs;
+    }
+  }
+
+  // implements an L2 regression cost layer,
+  // so penalizes \sum_i(||x_i - y_i||^2), where x is its input
+  // and y is the user-provided array of "correct" values.
+  var RegressionLayer = function(opt) {
+    var opt = opt || {};
+
+    // computed
+    this.num_inputs = opt.in_sx * opt.in_sy * opt.in_depth;
+    this.out_depth = this.num_inputs;
+    this.out_sx = 1;
+    this.out_sy = 1;
+    this.layer_type = 'regression';
+  }
+
+  RegressionLayer.prototype = {
+    forward: function(V, is_training) {
+      this.in_act = V;
+      this.out_act = V;
+      return V; // identity function
+    },
+    // y is a list here of size num_inputs
+    // or it can be a number if only one value is regressed
+    // or it can be a struct {dim: i, val: x} where we only want to 
+    // regress on dimension i and asking it to have value x
+    backward: function(y) { 
+
+      // compute and accumulate gradient wrt weights and bias of this layer
+      var x = this.in_act;
+      x.dw = convnet.zeros(x.w.length); // zero out the gradient of input Vol
+      var loss = 0.0;
+      if(y instanceof Array || y instanceof Float64Array) {
+        for(var i=0;i<this.out_depth;i++) {
+          var dy = x.w[i] - y[i];
+          x.dw[i] = dy;
+          loss += 0.5*dy*dy;
+        }
+      } else if(typeof y === 'number') {
+        // lets hope that only one number is being regressed
+        var dy = x.w[0] - y;
+        x.dw[0] = dy;
+        loss += 0.5*dy*dy;
+      } else {
+        // assume it is a struct with entries .dim and .val
+        // and we pass gradient only along dimension dim to be equal to val
+        var i = y.dim;
+        var yi = y.val;
+        var dy = x.w[i] - yi;
+        x.dw[i] = dy;
+        loss += 0.5*dy*dy;
+      }
+      return loss;
+    },
+    getParamsAndGrads: function() { 
+      return [];
+    },
+    toJSON: function() {
+      var json = {};
+      json.out_depth = this.out_depth;
+      json.out_sx = this.out_sx;
+      json.out_sy = this.out_sy;
+      json.layer_type = this.layer_type;
+      json.num_inputs = this.num_inputs;
+      return json;
+    },
+    fromJSON: function(json) {
+      this.out_depth = json.out_depth;
+      this.out_sx = json.out_sx;
+      this.out_sy = json.out_sy;
+      this.layer_type = json.layer_type;
+      this.num_inputs = json.num_inputs;
+    }
+  }
+
+  var SVMLayer = function(opt) {
+    var opt = opt || {};
+
+    // computed
+    this.num_inputs = opt.in_sx * opt.in_sy * opt.in_depth;
+    this.out_depth = this.num_inputs;
+    this.out_sx = 1;
+    this.out_sy = 1;
+    this.layer_type = 'svm';
+  }
+
+  SVMLayer.prototype = {
+    forward: function(V, is_training) {
+      this.in_act = V;
+      this.out_act = V; // nothing to do, output raw scores
+      return V;
+    },
+    backward: function(y) {
+
+      // compute and accumulate gradient wrt weights and bias of this layer
+      var x = this.in_act;
+      x.dw = convnet.zeros(x.w.length); // zero out the gradient of input Vol
+
+      // we're using structured loss here, which means that the score
+      // of the ground truth should be higher than the score of any other 
+      // class, by a margin
+      var yscore = x.w[y]; // score of ground truth
+      var margin = 1.0;
+      var loss = 0.0;
+      for(var i=0;i<this.out_depth;i++) {
+        if(y === i) { continue; }
+        var ydiff = -yscore + x.w[i] + margin;
+        if(ydiff > 0) {
+          // violating dimension, apply loss
+          x.dw[i] += 1;
+          x.dw[y] -= 1;
+          loss += ydiff;
+        }
+      }
+
+      return loss;
+    },
+    getParamsAndGrads: function() { 
+      return [];
+    },
+    toJSON: function() {
+      var json = {};
+      json.out_depth = this.out_depth;
+      json.out_sx = this.out_sx;
+      json.out_sy = this.out_sy;
+      json.layer_type = this.layer_type;
+      json.num_inputs = this.num_inputs;
+      return json;
+    },
+    fromJSON: function(json) {
+      this.out_depth = json.out_depth;
+      this.out_sx = json.out_sx;
+      this.out_sy = json.out_sy;
+      this.layer_type = json.layer_type;
+      this.num_inputs = json.num_inputs;
+    }
+  }
+  
+  convnet.RegressionLayer = RegressionLayer;
+  convnet.SoftmaxLayer = SoftmaxLayer;
+  convnet.SVMLayer = SVMLayer;
+
+
+/*** convnet_layers_nonlinearities ***/
+  // Implements ReLU nonlinearity elementwise
+  // x -> max(0, x)
+  // the output is in [0, inf)
+  var ReluLayer = function(opt) {
+    var opt = opt || {};
+
+    // computed
+    this.out_sx = opt.in_sx;
+    this.out_sy = opt.in_sy;
+    this.out_depth = opt.in_depth;
+    this.layer_type = 'relu';
+  }
+  ReluLayer.prototype = {
+    forward: function(V, is_training) {
+      this.in_act = V;
+      var V2 = V.clone();
+      var N = V.w.length;
+      var V2w = V2.w;
+      for(var i=0;i<N;i++) { 
+        if(V2w[i] < 0) V2w[i] = 0; // threshold at 0
+      }
+      this.out_act = V2;
+      return this.out_act;
+    },
+    backward: function() {
+      var V = this.in_act; // we need to set dw of this
+      var V2 = this.out_act;
+      var N = V.w.length;
+      V.dw = convnet.zeros(N); // zero out gradient wrt data
+      for(var i=0;i<N;i++) {
+        if(V2.w[i] <= 0) V.dw[i] = 0; // threshold
+        else V.dw[i] = V2.dw[i];
+      }
+    },
+    getParamsAndGrads: function() {
+      return [];
+    },
+    toJSON: function() {
+      var json = {};
+      json.out_depth = this.out_depth;
+      json.out_sx = this.out_sx;
+      json.out_sy = this.out_sy;
+      json.layer_type = this.layer_type;
+      return json;
+    },
+    fromJSON: function(json) {
+      this.out_depth = json.out_depth;
+      this.out_sx = json.out_sx;
+      this.out_sy = json.out_sy;
+      this.layer_type = json.layer_type; 
+    }
+  }
+
+  // Implements Sigmoid nnonlinearity elementwise
+  // x -> 1/(1+e^(-x))
+  // so the output is between 0 and 1.
+  var SigmoidLayer = function(opt) {
+    var opt = opt || {};
+
+    // computed
+    this.out_sx = opt.in_sx;
+    this.out_sy = opt.in_sy;
+    this.out_depth = opt.in_depth;
+    this.layer_type = 'sigmoid';
+  }
+  SigmoidLayer.prototype = {
+    forward: function(V, is_training) {
+      this.in_act = V;
+      var V2 = V.cloneAndZero();
+      var N = V.w.length;
+      var V2w = V2.w;
+      var Vw = V.w;
+      for(var i=0;i<N;i++) { 
+        V2w[i] = 1.0/(1.0+Math.exp(-Vw[i]));
+      }
+      this.out_act = V2;
+      return this.out_act;
+    },
+    backward: function() {
+      var V = this.in_act; // we need to set dw of this
+      var V2 = this.out_act;
+      var N = V.w.length;
+      V.dw = convnet.zeros(N); // zero out gradient wrt data
+      for(var i=0;i<N;i++) {
+        var v2wi = V2.w[i];
+        V.dw[i] =  v2wi * (1.0 - v2wi) * V2.dw[i];
+      }
+    },
+    getParamsAndGrads: function() {
+      return [];
+    },
+    toJSON: function() {
+      var json = {};
+      json.out_depth = this.out_depth;
+      json.out_sx = this.out_sx;
+      json.out_sy = this.out_sy;
+      json.layer_type = this.layer_type;
+      return json;
+    },
+    fromJSON: function(json) {
+      this.out_depth = json.out_depth;
+      this.out_sx = json.out_sx;
+      this.out_sy = json.out_sy;
+      this.layer_type = json.layer_type; 
+    }
+  }
+
+  // Implements Maxout nnonlinearity that computes
+  // x -> max(x)
+  // where x is a vector of size group_size. Ideally of course,
+  // the input size should be exactly divisible by group_size
+  var MaxoutLayer = function(opt) {
+    var opt = opt || {};
+
+    // required
+    this.group_size = typeof opt.group_size !== 'undefined' ? opt.group_size : 2;
+
+    // computed
+    this.out_sx = opt.in_sx;
+    this.out_sy = opt.in_sy;
+    this.out_depth = Math.floor(opt.in_depth / this.group_size);
+    this.layer_type = 'maxout';
+
+    this.switches = convnet.zeros(this.out_sx*this.out_sy*this.out_depth); // useful for backprop
+  }
+  MaxoutLayer.prototype = {
+    forward: function(V, is_training) {
+      this.in_act = V;
+      var N = this.out_depth; 
+      var V2 = new Vol(this.out_sx, this.out_sy, this.out_depth, 0.0);
+
+      // optimization branch. If we're operating on 1D arrays we dont have
+      // to worry about keeping track of x,y,d coordinates inside
+      // input volumes. In convnets we do :(
+      if(this.out_sx === 1 && this.out_sy === 1) {
+        for(var i=0;i<N;i++) {
+          var ix = i * this.group_size; // base index offset
+          var a = V.w[ix];
+          var ai = 0;
+          for(var j=1;j<this.group_size;j++) {
+            var a2 = V.w[ix+j];
+            if(a2 > a) {
+              a = a2;
+              ai = j;
+            }
+          }
+          V2.w[i] = a;
+          this.switches[i] = ix + ai;
+        }
+      } else {
+        var n=0; // counter for switches
+        for(var x=0;x<V.sx;x++) {
+          for(var y=0;y<V.sy;y++) {
+            for(var i=0;i<N;i++) {
+              var ix = i * this.group_size;
+              var a = V.get(x, y, ix);
+              var ai = 0;
+              for(var j=1;j<this.group_size;j++) {
+                var a2 = V.get(x, y, ix+j);
+                if(a2 > a) {
+                  a = a2;
+                  ai = j;
+                }
+              }
+              V2.set(x,y,i,a);
+              this.switches[n] = ix + ai;
+              n++;
+            }
+          }
+        }
+
+      }
+      this.out_act = V2;
+      return this.out_act;
+    },
+    backward: function() {
+      var V = this.in_act; // we need to set dw of this
+      var V2 = this.out_act;
+      var N = this.out_depth;
+      V.dw = convnet.zeros(V.w.length); // zero out gradient wrt data
+
+      // pass the gradient through the appropriate switch
+      if(this.out_sx === 1 && this.out_sy === 1) {
+        for(var i=0;i<N;i++) {
+          var chain_grad = V2.dw[i];
+          V.dw[this.switches[i]] = chain_grad;
+        }
+      } else {
+        // bleh okay, lets do this the hard way
+        var n=0; // counter for switches
+        for(var x=0;x<V2.sx;x++) {
+          for(var y=0;y<V2.sy;y++) {
+            for(var i=0;i<N;i++) {
+              var chain_grad = V2.get_grad(x,y,i);
+              V.set_grad(x,y,this.switches[n],chain_grad);
+              n++;
+            }
+          }
+        }
+      }
+    },
+    getParamsAndGrads: function() {
+      return [];
+    },
+    toJSON: function() {
+      var json = {};
+      json.out_depth = this.out_depth;
+      json.out_sx = this.out_sx;
+      json.out_sy = this.out_sy;
+      json.layer_type = this.layer_type;
+      json.group_size = this.group_size;
+      return json;
+    },
+    fromJSON: function(json) {
+      this.out_depth = json.out_depth;
+      this.out_sx = json.out_sx;
+      this.out_sy = json.out_sy;
+      this.layer_type = json.layer_type; 
+      this.group_size = json.group_size;
+      this.switches = convnet.zeros(this.group_size);
+    }
+  }
+
+  // a helper function, since tanh is not yet part of ECMAScript. Will be in v6.
+  function tanh(x) {
+    var y = Math.exp(2 * x);
+    return (y - 1) / (y + 1);
+  }
+  // Implements Tanh nnonlinearity elementwise
+  // x -> tanh(x) 
+  // so the output is between -1 and 1.
+  var TanhLayer = function(opt) {
+    var opt = opt || {};
+
+    // computed
+    this.out_sx = opt.in_sx;
+    this.out_sy = opt.in_sy;
+    this.out_depth = opt.in_depth;
+    this.layer_type = 'tanh';
+  }
+  TanhLayer.prototype = {
+    forward: function(V, is_training) {
+      this.in_act = V;
+      var V2 = V.cloneAndZero();
+      var N = V.w.length;
+      for(var i=0;i<N;i++) { 
+        V2.w[i] = tanh(V.w[i]);
+      }
+      this.out_act = V2;
+      return this.out_act;
+    },
+    backward: function() {
+      var V = this.in_act; // we need to set dw of this
+      var V2 = this.out_act;
+      var N = V.w.length;
+      V.dw = convnet.zeros(N); // zero out gradient wrt data
+      for(var i=0;i<N;i++) {
+        var v2wi = V2.w[i];
+        V.dw[i] = (1.0 - v2wi * v2wi) * V2.dw[i];
+      }
+    },
+    getParamsAndGrads: function() {
+      return [];
+    },
+    toJSON: function() {
+      var json = {};
+      json.out_depth = this.out_depth;
+      json.out_sx = this.out_sx;
+      json.out_sy = this.out_sy;
+      json.layer_type = this.layer_type;
+      return json;
+    },
+    fromJSON: function(json) {
+      this.out_depth = json.out_depth;
+      this.out_sx = json.out_sx;
+      this.out_sy = json.out_sy;
+      this.layer_type = json.layer_type; 
+    }
+  }
+  
+  convnet.TanhLayer = TanhLayer;
+  convnet.MaxoutLayer = MaxoutLayer;
+  convnet.ReluLayer = ReluLayer;
+  convnet.SigmoidLayer = SigmoidLayer;
+
+
+
+
+/*** convnet_layers_dropout ***/
+  // An inefficient dropout layer
+  // Note this is not most efficient implementation since the layer before
+  // computed all these activations and now we're just going to drop them :(
+  // same goes for backward pass. Also, if we wanted to be efficient at test time
+  // we could equivalently be clever and upscale during train and copy pointers during test
+  // todo: make more efficient.
+  var DropoutLayer = function(opt) {
+    var opt = opt || {};
+
+    // computed
+    this.out_sx = opt.in_sx;
+    this.out_sy = opt.in_sy;
+    this.out_depth = opt.in_depth;
+    this.layer_type = 'dropout';
+    this.drop_prob = typeof opt.drop_prob !== 'undefined' ? opt.drop_prob : 0.5;
+    this.dropped = convnet.zeros(this.out_sx*this.out_sy*this.out_depth);
+  }
+  DropoutLayer.prototype = {
+    forward: function(V, is_training) {
+      this.in_act = V;
+      if(typeof(is_training)==='undefined') { is_training = false; } // default is prediction mode
+      var V2 = V.clone();
+      var N = V.w.length;
+      if(is_training) {
+        // do dropout
+        for(var i=0;i<N;i++) {
+          if(Math.random()<this.drop_prob) { V2.w[i]=0; this.dropped[i] = true; } // drop!
+          else {this.dropped[i] = false;}
+        }
+      } else {
+        // scale the activations during prediction
+        for(var i=0;i<N;i++) { V2.w[i]*=this.drop_prob; }
+      }
+      this.out_act = V2;
+      return this.out_act; // dummy identity function for now
+    },
+    backward: function() {
+      var V = this.in_act; // we need to set dw of this
+      var chain_grad = this.out_act;
+      var N = V.w.length;
+      V.dw = convnet.zeros(N); // zero out gradient wrt data
+      for(var i=0;i<N;i++) {
+        if(!(this.dropped[i])) { 
+          V.dw[i] = chain_grad.dw[i]; // copy over the gradient
+        }
+      }
+    },
+    getParamsAndGrads: function() {
+      return [];
+    },
+    toJSON: function() {
+      var json = {};
+      json.out_depth = this.out_depth;
+      json.out_sx = this.out_sx;
+      json.out_sy = this.out_sy;
+      json.layer_type = this.layer_type;
+      json.drop_prob = this.drop_prob;
+      return json;
+    },
+    fromJSON: function(json) {
+      this.out_depth = json.out_depth;
+      this.out_sx = json.out_sx;
+      this.out_sy = json.out_sy;
+      this.layer_type = json.layer_type; 
+      this.drop_prob = json.drop_prob;
+    }
+  }
+  
+  convnet.DropoutLayer = DropoutLayer;
+
+/*** convnet_layers_normailzation ***/
+  // a bit experimental layer for now. I think it works but I'm not 100%
+  // the gradient check is a bit funky. I'll look into this a bit later.
+  // Local Response Normalization in window, along depths of volumes
+  var LocalResponseNormalizationLayer = function(opt) {
+    var opt = opt || {};
+
+    // required
+    this.k = opt.k;
+    this.n = opt.n;
+    this.alpha = opt.alpha;
+    this.beta = opt.beta;
+
+    // computed
+    this.out_sx = opt.in_sx;
+    this.out_sy = opt.in_sy;
+    this.out_depth = opt.in_depth;
+    this.layer_type = 'lrn';
+
+    // checks
+    if(this.n%2 === 0) { console.log('WARNING n should be odd for LRN layer'); }
+  }
+  LocalResponseNormalizationLayer.prototype = {
+    forward: function(V, is_training) {
+      this.in_act = V;
+
+      var A = V.cloneAndZero();
+      this.S_cache_ = V.cloneAndZero();
+      var n2 = Math.floor(this.n/2);
+      for(var x=0;x<V.sx;x++) {
+        for(var y=0;y<V.sy;y++) {
+          for(var i=0;i<V.depth;i++) {
+
+            var ai = V.get(x,y,i);
+
+            // normalize in a window of size n
+            var den = 0.0;
+            for(var j=Math.max(0,i-n2);j<=Math.min(i+n2,V.depth-1);j++) {
+              var aa = V.get(x,y,j);
+              den += aa*aa;
+            }
+            den *= this.alpha / this.n;
+            den += this.k;
+            this.S_cache_.set(x,y,i,den); // will be useful for backprop
+            den = Math.pow(den, this.beta);
+            A.set(x,y,i,ai/den);
+          }
+        }
+      }
+
+      this.out_act = A;
+      return this.out_act; // dummy identity function for now
+    },
+    backward: function() { 
+      // evaluate gradient wrt data
+      var V = this.in_act; // we need to set dw of this
+      V.dw = convnet.zeros(V.w.length); // zero out gradient wrt data
+      var A = this.out_act; // computed in forward pass 
+
+      var n2 = Math.floor(this.n/2);
+      for(var x=0;x<V.sx;x++) {
+        for(var y=0;y<V.sy;y++) {
+          for(var i=0;i<V.depth;i++) {
+
+            var chain_grad = this.out_act.get_grad(x,y,i);
+            var S = this.S_cache_.get(x,y,i);
+            var SB = Math.pow(S, this.beta);
+            var SB2 = SB*SB;
+
+            // normalize in a window of size n
+            for(var j=Math.max(0,i-n2);j<=Math.min(i+n2,V.depth-1);j++) {              
+              var aj = V.get(x,y,j); 
+              var g = -aj*this.beta*Math.pow(S,this.beta-1)*this.alpha/this.n*2*aj;
+              if(j===i) g+= SB;
+              g /= SB2;
+              g *= chain_grad;
+              V.add_grad(x,y,j,g);
+            }
+
+          }
+        }
+      }
+    },
+    getParamsAndGrads: function() { return []; },
+    toJSON: function() {
+      var json = {};
+      json.k = this.k;
+      json.n = this.n;
+      json.alpha = this.alpha; // normalize by size
+      json.beta = this.beta;
+      json.out_sx = this.out_sx; 
+      json.out_sy = this.out_sy;
+      json.out_depth = this.out_depth;
+      json.layer_type = this.layer_type;
+      return json;
+    },
+    fromJSON: function(json) {
+      this.k = json.k;
+      this.n = json.n;
+      this.alpha = json.alpha; // normalize by size
+      this.beta = json.beta;
+      this.out_sx = json.out_sx; 
+      this.out_sy = json.out_sy;
+      this.out_depth = json.out_depth;
+      this.layer_type = json.layer_type;
+    }
+  }
+  
+  convnet.LocalResponseNormalizationLayer = LocalResponseNormalizationLayer;
+
+
+
+/*** convnet_net ***/
+  var assert = convnet.assert;
+
+  // Net manages a set of layers
+  // For now constraints: Simple linear order of layers, first layer input last layer a cost layer
+  var Net = function(options) {
+    this.layers = [];
+  }
+
+  Net.prototype = {
+    
+    // takes a list of layer definitions and creates the network layer objects
+    makeLayers: function(defs) {
+
+      // few checks
+      assert(defs.length >= 2, 'Error! At least one input layer and one loss layer are required.');
+      assert(defs[0].type === 'input', 'Error! First layer must be the input layer, to declare size of inputs');
+
+      // desugar layer_defs for adding activation, dropout layers etc
+      var desugar = function() {
+        var new_defs = [];
+        for(var i=0;i<defs.length;i++) {
+          var def = defs[i];
+          
+          if(def.type==='softmax' || def.type==='svm') {
+            // add an fc layer here, there is no reason the user should
+            // have to worry about this and we almost always want to
+            new_defs.push({type:'fc', num_neurons: def.num_classes});
+          }
+
+          if(def.type==='regression') {
+            // add an fc layer here, there is no reason the user should
+            // have to worry about this and we almost always want to
+            new_defs.push({type:'fc', num_neurons: def.num_neurons});
+          }
+
+          if((def.type==='fc' || def.type==='conv') 
+              && typeof(def.bias_pref) === 'undefined'){
+            def.bias_pref = 0.0;
+            if(typeof def.activation !== 'undefined' && def.activation === 'relu') {
+              def.bias_pref = 0.1; // relus like a bit of positive bias to get gradients early
+              // otherwise it's technically possible that a relu unit will never turn on (by chance)
+              // and will never get any gradient and never contribute any computation. Dead relu.
+            }
+          }
+
+          new_defs.push(def);
+
+          if(typeof def.activation !== 'undefined') {
+            if(def.activation==='relu') { new_defs.push({type:'relu'}); }
+            else if (def.activation==='sigmoid') { new_defs.push({type:'sigmoid'}); }
+            else if (def.activation==='tanh') { new_defs.push({type:'tanh'}); }
+            else if (def.activation==='maxout') {
+              // create maxout activation, and pass along group size, if provided
+              var gs = def.group_size !== 'undefined' ? def.group_size : 2;
+              new_defs.push({type:'maxout', group_size:gs});
+            }
+            else { console.log('ERROR unsupported activation ' + def.activation); }
+          }
+          if(typeof def.drop_prob !== 'undefined' && def.type !== 'dropout') {
+            new_defs.push({type:'dropout', drop_prob: def.drop_prob});
+          }
+
+        }
+        return new_defs;
+      }
+      defs = desugar(defs);
+
+      // create the layers
+      this.layers = [];
+      for(var i=0;i<defs.length;i++) {
+        var def = defs[i];
+        if(i>0) {
+          var prev = this.layers[i-1];
+          def.in_sx = prev.out_sx;
+          def.in_sy = prev.out_sy;
+          def.in_depth = prev.out_depth;
+        }
+
+        switch(def.type) {
+          case 'fc': this.layers.push(new convnet.FullyConnLayer(def)); break;
+          case 'lrn': this.layers.push(new convnet.LocalResponseNormalizationLayer(def)); break;
+          case 'dropout': this.layers.push(new convnet.DropoutLayer(def)); break;
+          case 'input': this.layers.push(new convnet.InputLayer(def)); break;
+          case 'softmax': this.layers.push(new convnet.SoftmaxLayer(def)); break;
+          case 'regression': this.layers.push(new convnet.RegressionLayer(def)); break;
+          case 'conv': this.layers.push(new convnet.ConvLayer(def)); break;
+          case 'pool': this.layers.push(new convnet.PoolLayer(def)); break;
+          case 'relu': this.layers.push(new convnet.ReluLayer(def)); break;
+          case 'sigmoid': this.layers.push(new convnet.SigmoidLayer(def)); break;
+          case 'tanh': this.layers.push(new convnet.TanhLayer(def)); break;
+          case 'maxout': this.layers.push(new convnet.MaxoutLayer(def)); break;
+          case 'svm': this.layers.push(new convnet.SVMLayer(def)); break;
+          default: console.log('ERROR: UNRECOGNIZED LAYER TYPE: ' + def.type);
+        }
+      }
+    },
+
+    // forward prop the network. 
+    // The trainer class passes is_training = true, but when this function is
+    // called from outside (not from the trainer), it defaults to prediction mode
+    forward: function(V, is_training) {
+      if(typeof(is_training) === 'undefined') is_training = false;
+      var act = this.layers[0].forward(V, is_training);
+      for(var i=1;i<this.layers.length;i++) {
+        act = this.layers[i].forward(act, is_training);
+      }
+      return act;
+    },
+
+    getCostLoss: function(V, y) {
+      this.forward(V, false);
+      var N = this.layers.length;
+      var loss = this.layers[N-1].backward(y);
+      return loss;
+    },
+    
+    // backprop: compute gradients wrt all parameters
+    backward: function(y) {
+      var N = this.layers.length;
+      var loss = this.layers[N-1].backward(y); // last layer assumed to be loss layer
+      for(var i=N-2;i>=0;i--) { // first layer assumed input
+        this.layers[i].backward();
+      }
+      return loss;
+    },
+    getParamsAndGrads: function() {
+      // accumulate parameters and gradients for the entire network
+      var response = [];
+      for(var i=0;i<this.layers.length;i++) {
+        var layer_reponse = this.layers[i].getParamsAndGrads();
+        for(var j=0;j<layer_reponse.length;j++) {
+          response.push(layer_reponse[j]);
+        }
+      }
+      return response;
+    },
+    getPrediction: function() {
+      // this is a convenience function for returning the argmax
+      // prediction, assuming the last layer of the net is a softmax
+      var S = this.layers[this.layers.length-1];
+      assert(S.layer_type === 'softmax', 'getPrediction function assumes softmax as last layer of the net!');
+
+      var p = S.out_act.w;
+      var maxv = p[0];
+      var maxi = 0;
+      for(var i=1;i<p.length;i++) {
+        if(p[i] > maxv) { maxv = p[i]; maxi = i;}
+      }
+      return maxi; // return index of the class with highest class probability
+    },
+    toJSON: function() {
+      var json = {};
+      json.layers = [];
+      for(var i=0;i<this.layers.length;i++) {
+        json.layers.push(this.layers[i].toJSON());
+      }
+      return json;
+    },
+    fromJSON: function(json) {
+      this.layers = [];
+      for(var i=0;i<json.layers.length;i++) {
+        var Lj = json.layers[i]
+        var t = Lj.layer_type;
+        var L;
+        if(t==='input') { L = new convnet.InputLayer(); }
+        if(t==='relu') { L = new convnet.ReluLayer(); }
+        if(t==='sigmoid') { L = new convnet.SigmoidLayer(); }
+        if(t==='tanh') { L = new convnet.TanhLayer(); }
+        if(t==='dropout') { L = new convnet.DropoutLayer(); }
+        if(t==='conv') { L = new convnet.ConvLayer(); }
+        if(t==='pool') { L = new convnet.PoolLayer(); }
+        if(t==='lrn') { L = new convnet.LocalResponseNormalizationLayer(); }
+        if(t==='softmax') { L = new convnet.SoftmaxLayer(); }
+        if(t==='regression') { L = new convnet.RegressionLayer(); }
+        if(t==='fc') { L = new convnet.FullyConnLayer(); }
+        if(t==='maxout') { L = new convnet.MaxoutLayer(); }
+        if(t==='svm') { L = new convnet.SVMLayer(); }
+        L.fromJSON(Lj);
+        this.layers.push(L);
+      }
+    }
+  }
+  
+  convnet.Net = Net;
+
+
+/*** convnet_trainers ***/
+  var Trainer = function(net, options) {
+
+    this.net = net;
+
+    var options = options || {};
+    this.learning_rate = typeof options.learning_rate !== 'undefined' ? options.learning_rate : 0.01;
+    this.l1_decay = typeof options.l1_decay !== 'undefined' ? options.l1_decay : 0.0;
+    this.l2_decay = typeof options.l2_decay !== 'undefined' ? options.l2_decay : 0.0;
+    this.batch_size = typeof options.batch_size !== 'undefined' ? options.batch_size : 1;
+    this.method = typeof options.method !== 'undefined' ? options.method : 'sgd'; // sgd/adam/adagrad/adadelta/windowgrad/netsterov
+
+    this.momentum = typeof options.momentum !== 'undefined' ? options.momentum : 0.9;
+    this.ro = typeof options.ro !== 'undefined' ? options.ro : 0.95; // used in adadelta
+    this.eps = typeof options.eps !== 'undefined' ? options.eps : 1e-8; // used in adam or adadelta
+    this.beta1 = typeof options.beta1 !== 'undefined' ? options.beta1 : 0.9; // used in adam
+    this.beta2 = typeof options.beta2 !== 'undefined' ? options.beta2 : 0.999; // used in adam
+
+    this.k = 0; // iteration counter
+    this.gsum = []; // last iteration gradients (used for momentum calculations)
+    this.xsum = []; // used in adam or adadelta
+
+    // check if regression is expected 
+    if(this.net.layers[this.net.layers.length - 1].layer_type === "regression")
+      this.regression = true;
+    else
+      this.regression = false;
+  }
+
+  Trainer.prototype = {
+    train: function(x, y) {
+
+      var start = new Date().getTime();
+      this.net.forward(x, true); // also set the flag that lets the net know we're just training
+      var end = new Date().getTime();
+      var fwd_time = end - start;
+
+      var start = new Date().getTime();
+      var cost_loss = this.net.backward(y);
+      var l2_decay_loss = 0.0;
+      var l1_decay_loss = 0.0;
+      var end = new Date().getTime();
+      var bwd_time = end - start;
+
+      if(this.regression && y.constructor !== Array)
+        console.log("Warning: a regression net requires an array as training output vector.");
+      
+      this.k++;
+      if(this.k % this.batch_size === 0) {
+
+        var pglist = this.net.getParamsAndGrads();
+
+        // initialize lists for accumulators. Will only be done once on first iteration
+        if(this.gsum.length === 0 && (this.method !== 'sgd' || this.momentum > 0.0)) {
+          // only vanilla sgd doesnt need either lists
+          // momentum needs gsum
+          // adagrad needs gsum
+          // adam and adadelta needs gsum and xsum
+          for(var i=0;i<pglist.length;i++) {
+            this.gsum.push(convnet.zeros(pglist[i].params.length));
+            if(this.method === 'adam' || this.method === 'adadelta') {
+              this.xsum.push(convnet.zeros(pglist[i].params.length));
+            } else {
+              this.xsum.push([]); // conserve memory
+            }
+          }
+        }
+
+        // perform an update for all sets of weights
+        for(var i=0;i<pglist.length;i++) {
+          var pg = pglist[i]; // param, gradient, other options in future (custom learning rate etc)
+          var p = pg.params;
+          var g = pg.grads;
+
+          // learning rate for some parameters.
+          var l2_decay_mul = typeof pg.l2_decay_mul !== 'undefined' ? pg.l2_decay_mul : 1.0;
+          var l1_decay_mul = typeof pg.l1_decay_mul !== 'undefined' ? pg.l1_decay_mul : 1.0;
+          var l2_decay = this.l2_decay * l2_decay_mul;
+          var l1_decay = this.l1_decay * l1_decay_mul;
+
+          var plen = p.length;
+          for(var j=0;j<plen;j++) {
+            l2_decay_loss += l2_decay*p[j]*p[j]/2; // accumulate weight decay loss
+            l1_decay_loss += l1_decay*Math.abs(p[j]);
+            var l1grad = l1_decay * (p[j] > 0 ? 1 : -1);
+            var l2grad = l2_decay * (p[j]);
+
+            var gij = (l2grad + l1grad + g[j]) / this.batch_size; // raw batch gradient
+
+            var gsumi = this.gsum[i];
+            var xsumi = this.xsum[i];
+            if(this.method === 'adam') {
+              // adam update
+              gsumi[j] = gsumi[j] * this.beta1 + (1- this.beta1) * gij; // update biased first moment estimate
+              xsumi[j] = xsumi[j] * this.beta2 + (1-this.beta2) * gij * gij; // update biased second moment estimate
+              var biasCorr1 = gsumi[j] * (1 - Math.pow(this.beta1, this.k)); // correct bias first moment estimate
+              var biasCorr2 = xsumi[j] * (1 - Math.pow(this.beta2, this.k)); // correct bias second moment estimate
+              var dx =  - this.learning_rate * biasCorr1 / (Math.sqrt(biasCorr2) + this.eps);
+              p[j] += dx;
+            } else if(this.method === 'adagrad') {
+              // adagrad update
+              gsumi[j] = gsumi[j] + gij * gij;
+              var dx = - this.learning_rate / Math.sqrt(gsumi[j] + this.eps) * gij;
+              p[j] += dx;
+            } else if(this.method === 'windowgrad') {
+              // this is adagrad but with a moving window weighted average
+              // so the gradient is not accumulated over the entire history of the run. 
+              // it's also referred to as Idea #1 in Zeiler paper on Adadelta. Seems reasonable to me!
+              gsumi[j] = this.ro * gsumi[j] + (1-this.ro) * gij * gij;
+              var dx = - this.learning_rate / Math.sqrt(gsumi[j] + this.eps) * gij; // eps added for better conditioning
+              p[j] += dx;
+            } else if(this.method === 'adadelta') {
+              gsumi[j] = this.ro * gsumi[j] + (1-this.ro) * gij * gij;
+              var dx = - Math.sqrt((xsumi[j] + this.eps)/(gsumi[j] + this.eps)) * gij;
+              xsumi[j] = this.ro * xsumi[j] + (1-this.ro) * dx * dx; // yes, xsum lags behind gsum by 1.
+              p[j] += dx;
+            } else if(this.method === 'nesterov') {
+            	var dx = gsumi[j];
+            	gsumi[j] = gsumi[j] * this.momentum + this.learning_rate * gij;
+                dx = this.momentum * dx - (1.0 + this.momentum) * gsumi[j];
+                p[j] += dx;
+            } else {
+              // assume SGD
+              if(this.momentum > 0.0) {
+                // momentum update
+                var dx = this.momentum * gsumi[j] - this.learning_rate * gij; // step
+                gsumi[j] = dx; // back this up for next iteration of momentum
+                p[j] += dx; // apply corrected gradient
+              } else {
+                // vanilla sgd
+                p[j] +=  - this.learning_rate * gij;
+              }
+            }
+            g[j] = 0.0; // zero out gradient so that we can begin accumulating anew
+          }
+        }
+      }
+
+      // appending softmax_loss for backwards compatibility, but from now on we will always use cost_loss
+      // in future, TODO: have to completely redo the way loss is done around the network as currently 
+      // loss is a bit of a hack. Ideally, user should specify arbitrary number of loss functions on any layer
+      // and it should all be computed correctly and automatically. 
+      return {fwd_time: fwd_time, bwd_time: bwd_time, 
+              l2_decay_loss: l2_decay_loss, l1_decay_loss: l1_decay_loss,
+              cost_loss: cost_loss, softmax_loss: cost_loss, 
+              loss: cost_loss + l1_decay_loss + l2_decay_loss}
+    }
+  }
+  
+  convnet.Trainer = Trainer;
+  convnet.SGDTrainer = Trainer; // backwards compatibility
+
+
+/*** convnet_magicnets ***/
+  // used utilities, make explicit local references
+  var randf = convnet.randf;
+  var randi = convnet.randi;
+  var Net = convnet.Net;
+  var Trainer = convnet.Trainer;
+  var maxmin = convnet.maxmin;
+  var randperm = convnet.randperm;
+  var weightedSample = convnet.weightedSample;
+  var getopt = convnet.getopt;
+  var arrUnique = convnet.arrUnique;
+
+  /*
+  A MagicNet takes data: a list of convnetjs.Vol(), and labels
+  which for now are assumed to be class indeces 0..K. MagicNet then:
+  - creates data folds for cross-validation
+  - samples candidate networks
+  - evaluates candidate networks on all data folds
+  - produces predictions by model-averaging the best networks
+  */
+  var MagicNet = function(data, labels, opt) {
+    var opt = opt || {};
+    if(typeof data === 'undefined') { data = []; }
+    if(typeof labels === 'undefined') { labels = []; }
+
+    // required inputs
+    this.data = data; // store these pointers to data
+    this.labels = labels;
+
+    // optional inputs
+    this.train_ratio = getopt(opt, 'train_ratio', 0.7);
+    this.num_folds = getopt(opt, 'num_folds', 10);
+    this.num_candidates = getopt(opt, 'num_candidates', 50); // we evaluate several in parallel
+    // how many epochs of data to train every network? for every fold?
+    // higher values mean higher accuracy in final results, but more expensive
+    this.num_epochs = getopt(opt, 'num_epochs', 50); 
+    // number of best models to average during prediction. Usually higher = better
+    this.ensemble_size = getopt(opt, 'ensemble_size', 10);
+
+    // candidate parameters
+    this.batch_size_min = getopt(opt, 'batch_size_min', 10);
+    this.batch_size_max = getopt(opt, 'batch_size_max', 300);
+    this.l2_decay_min = getopt(opt, 'l2_decay_min', -4);
+    this.l2_decay_max = getopt(opt, 'l2_decay_max', 2);
+    this.learning_rate_min = getopt(opt, 'learning_rate_min', -4);
+    this.learning_rate_max = getopt(opt, 'learning_rate_max', 0);
+    this.momentum_min = getopt(opt, 'momentum_min', 0.9);
+    this.momentum_max = getopt(opt, 'momentum_max', 0.9);
+    this.neurons_min = getopt(opt, 'neurons_min', 5);
+    this.neurons_max = getopt(opt, 'neurons_max', 30);
+
+    // computed
+    this.folds = []; // data fold indices, gets filled by sampleFolds()
+    this.candidates = []; // candidate networks that are being currently evaluated
+    this.evaluated_candidates = []; // history of all candidates that were fully evaluated on all folds
+    this.unique_labels = arrUnique(labels);
+    this.iter = 0; // iteration counter, goes from 0 -> num_epochs * num_training_data
+    this.foldix = 0; // index of active fold
+
+    // callbacks
+    this.finish_fold_callback = null;
+    this.finish_batch_callback = null;
+
+    // initializations
+    if(this.data.length > 0) {
+      this.sampleFolds();
+      this.sampleCandidates();
+    }
+  };
+
+  MagicNet.prototype = {
+
+    // sets this.folds to a sampling of this.num_folds folds
+    sampleFolds: function() {
+      var N = this.data.length;
+      var num_train = Math.floor(this.train_ratio * N);
+      this.folds = []; // flush folds, if any
+      for(var i=0;i<this.num_folds;i++) {
+        var p = randperm(N);
+        this.folds.push({train_ix: p.slice(0, num_train), test_ix: p.slice(num_train, N)});
+      }
+    },
+
+    // returns a random candidate network
+    sampleCandidate: function() {
+      var input_depth = this.data[0].w.length;
+      var num_classes = this.unique_labels.length;
+
+      // sample network topology and hyperparameters
+      var layer_defs = [];
+      layer_defs.push({type:'input', out_sx:1, out_sy:1, out_depth: input_depth});
+      var nwl = weightedSample([0,1,2,3], [0.2, 0.3, 0.3, 0.2]); // prefer nets with 1,2 hidden layers
+      for(var q=0;q<nwl;q++) {
+        var ni = randi(this.neurons_min, this.neurons_max);
+        var act = ['tanh','maxout','relu'][randi(0,3)];
+        if(randf(0,1)<0.5) {
+          var dp = Math.random();
+          layer_defs.push({type:'fc', num_neurons: ni, activation: act, drop_prob: dp});
+        } else {
+          layer_defs.push({type:'fc', num_neurons: ni, activation: act});
+        }
+      }
+      layer_defs.push({type:'softmax', num_classes: num_classes});
+      var net = new Net();
+      net.makeLayers(layer_defs);
+
+      // sample training hyperparameters
+      var bs = randi(this.batch_size_min, this.batch_size_max); // batch size
+      var l2 = Math.pow(10, randf(this.l2_decay_min, this.l2_decay_max)); // l2 weight decay
+      var lr = Math.pow(10, randf(this.learning_rate_min, this.learning_rate_max)); // learning rate
+      var mom = randf(this.momentum_min, this.momentum_max); // momentum. Lets just use 0.9, works okay usually ;p
+      var tp = randf(0,1); // trainer type
+      var trainer_def;
+      if(tp<0.33) {
+        trainer_def = {method:'adadelta', batch_size:bs, l2_decay:l2};
+      } else if(tp<0.66) {
+        trainer_def = {method:'adagrad', learning_rate: lr, batch_size:bs, l2_decay:l2};
+      } else {
+        trainer_def = {method:'sgd', learning_rate: lr, momentum: mom, batch_size:bs, l2_decay:l2};
+      }
+      
+      var trainer = new Trainer(net, trainer_def);
+
+      var cand = {};
+      cand.acc = [];
+      cand.accv = 0; // this will maintained as sum(acc) for convenience
+      cand.layer_defs = layer_defs;
+      cand.trainer_def = trainer_def;
+      cand.net = net;
+      cand.trainer = trainer;
+      return cand;
+    },
+
+    // sets this.candidates with this.num_candidates candidate nets
+    sampleCandidates: function() {
+      this.candidates = []; // flush, if any
+      for(var i=0;i<this.num_candidates;i++) {
+        var cand = this.sampleCandidate();
+        this.candidates.push(cand);
+      }
+    },
+
+    step: function() {
+      
+      // run an example through current candidate
+      this.iter++;
+
+      // step all candidates on a random data point
+      var fold = this.folds[this.foldix]; // active fold
+      var dataix = fold.train_ix[randi(0, fold.train_ix.length)];
+      for(var k=0;k<this.candidates.length;k++) {
+        var x = this.data[dataix];
+        var l = this.labels[dataix];
+        this.candidates[k].trainer.train(x, l);
+      }
+
+      // process consequences: sample new folds, or candidates
+      var lastiter = this.num_epochs * fold.train_ix.length;
+      if(this.iter >= lastiter) {
+        // finished evaluation of this fold. Get final validation
+        // accuracies, record them, and go on to next fold.
+        var val_acc = this.evalValErrors();
+        for(var k=0;k<this.candidates.length;k++) {
+          var c = this.candidates[k];
+          c.acc.push(val_acc[k]);
+          c.accv += val_acc[k];
+        }
+        this.iter = 0; // reset step number
+        this.foldix++; // increment fold
+
+        if(this.finish_fold_callback !== null) {
+          this.finish_fold_callback();
+        }
+
+        if(this.foldix >= this.folds.length) {
+          // we finished all folds as well! Record these candidates
+          // and sample new ones to evaluate.
+          for(var k=0;k<this.candidates.length;k++) {
+            this.evaluated_candidates.push(this.candidates[k]);
+          }
+          // sort evaluated candidates according to accuracy achieved
+          this.evaluated_candidates.sort(function(a, b) { 
+            return (a.accv / a.acc.length) 
+                 > (b.accv / b.acc.length) 
+                 ? -1 : 1;
+          });
+          // and clip only to the top few ones (lets place limit at 3*ensemble_size)
+          // otherwise there are concerns with keeping these all in memory 
+          // if MagicNet is being evaluated for a very long time
+          if(this.evaluated_candidates.length > 3 * this.ensemble_size) {
+            this.evaluated_candidates = this.evaluated_candidates.slice(0, 3 * this.ensemble_size);
+          }
+          if(this.finish_batch_callback !== null) {
+            this.finish_batch_callback();
+          }
+          this.sampleCandidates(); // begin with new candidates
+          this.foldix = 0; // reset this
+        } else {
+          // we will go on to another fold. reset all candidates nets
+          for(var k=0;k<this.candidates.length;k++) {
+            var c = this.candidates[k];
+            var net = new Net();
+            net.makeLayers(c.layer_defs);
+            var trainer = new Trainer(net, c.trainer_def);
+            c.net = net;
+            c.trainer = trainer;
+          }
+        }
+      }
+    },
+
+    evalValErrors: function() {
+      // evaluate candidates on validation data and return performance of current networks
+      // as simple list
+      var vals = [];
+      var fold = this.folds[this.foldix]; // active fold
+      for(var k=0;k<this.candidates.length;k++) {
+        var net = this.candidates[k].net;
+        var v = 0.0;
+        for(var q=0;q<fold.test_ix.length;q++) {
+          var x = this.data[fold.test_ix[q]];
+          var l = this.labels[fold.test_ix[q]];
+          net.forward(x);
+          var yhat = net.getPrediction();
+          v += (yhat === l ? 1.0 : 0.0); // 0 1 loss
+        }
+        v /= fold.test_ix.length; // normalize
+        vals.push(v);
+      }
+      return vals;
+    },
+
+    // returns prediction scores for given test data point, as Vol
+    // uses an averaged prediction from the best ensemble_size models
+    // x is a Vol.
+    predict_soft: function(data) {
+      // forward prop the best networks
+      // and accumulate probabilities at last layer into a an output Vol
+
+      var eval_candidates = [];
+      var nv = 0;
+      if(this.evaluated_candidates.length === 0) {
+        // not sure what to do here, first batch of nets hasnt evaluated yet
+        // lets just predict with current candidates.
+        nv = this.candidates.length;
+        eval_candidates = this.candidates;
+      } else {
+        // forward prop the best networks from evaluated_candidates
+        nv = Math.min(this.ensemble_size, this.evaluated_candidates.length);
+        eval_candidates = this.evaluated_candidates
+      }
+
+      // forward nets of all candidates and average the predictions
+      var xout, n;
+      for(var j=0;j<nv;j++) {
+        var net = eval_candidates[j].net;
+        var x = net.forward(data);
+        if(j===0) { 
+          xout = x; 
+          n = x.w.length; 
+        } else {
+          // add it on
+          for(var d=0;d<n;d++) {
+            xout.w[d] += x.w[d];
+          }
+        }
+      }
+      // produce average
+      for(var d=0;d<n;d++) {
+        xout.w[d] /= nv;
+      }
+      return xout;
+    },
+
+    predict: function(data) {
+      var xout = this.predict_soft(data);
+      if(xout.w.length !== 0) {
+        var stats = maxmin(xout.w);
+        var predicted_label = stats.maxi; 
+      } else {
+        var predicted_label = -1; // error out
+      }
+      return predicted_label;
+
+    },
+
+    toJSON: function() {
+      // dump the top ensemble_size networks as a list
+      var nv = Math.min(this.ensemble_size, this.evaluated_candidates.length);
+      var json = {};
+      json.nets = [];
+      for(var i=0;i<nv;i++) {
+        json.nets.push(this.evaluated_candidates[i].net.toJSON());
+      }
+      return json;
+    },
+
+    fromJSON: function(json) {
+      this.ensemble_size = json.nets.length;
+      this.evaluated_candidates = [];
+      for(var i=0;i<this.ensemble_size;i++) {
+        var net = new Net();
+        net.fromJSON(json.nets[i]);
+        var dummy_candidate = {};
+        dummy_candidate.net = net;
+        this.evaluated_candidates.push(dummy_candidate);
+      }
+    },
+
+    // callback functions
+    // called when a fold is finished, while evaluating a batch
+    onFinishFold: function(f) { this.finish_fold_callback = f; },
+    // called when a batch of candidates has finished evaluating
+    onFinishBatch: function(f) { this.finish_batch_callback = f; }
+    
+  };
+
+  convnet.MagicNet = MagicNet;
+
+