diff --git a/js/ml/ml.js.bak b/js/ml/ml.js.bak new file mode 100644 index 0000000..e2c7fe7 --- /dev/null +++ b/js/ml/ml.js.bak @@ -0,0 +1,1281 @@ +/** + ** ============================== + ** O O O OOOO + ** O O O O O O + ** O O O O O O + ** OOOO OOOO O OOO OOOO + ** O O O O O O O + ** O O O O O O O + ** OOOO OOOO O O OOOO + ** ============================== + ** Dr. Stefan Bosse http://www.bsslab.de + ** + ** COPYRIGHT: THIS SOFTWARE, EXECUTABLE AND SOURCE CODE IS OWNED + ** BY THE AUTHOR(S). + ** THIS SOURCE CODE MAY NOT BE COPIED, EXTRACTED, + ** MODIFIED, OR OTHERWISE USED IN A CONTEXT + ** OUTSIDE OF THE SOFTWARE SYSTEM. + ** + ** $AUTHORS: Stefan Bosse + ** $INITIAL: (C) 2006-2020 BSSLAB + ** $CREATED: 8-2-16 by sbosse. + ** $VERSION: 1.16.2 + ** + ** $INFO: + ** + ** JavaScript AIOS Machine Learning API + ** + ** type algorithm = {'dti','dt','id3','c45','kmeans','knn','knn2','mlp','slp','rl','svm','txt','cnn'} + ** + ** + ** id3: Symbolic Decision Tree algorithm + ** ------------------------------------- + ** + ** typeof @options = { + ** algorithm='id3', + ** data:{x1:number,x2:number,..,y:*} [] + ** target:string is e.g. 'y' + ** features: string [] is e.g. ['x1','x2',..] + ** } + ** + ** ice: decision tree algorithm supporting numbers with eps intervals (hybrid C45/ID3) + ** ------------------------------------- + ** + ** General feature variable set: + ** + ** typeof @options = { + ** algorithm='dt', + ** data:{x1:number,x2:number,..,y:*} [], + ** target:string is e.g. 'y', + ** features: string [] is e.g. ['x1','x2',..], + ** eps:number is e.g. '5', + ** } + ** + ** dti: interval decision tree algorithm + ** ------------------------------------- + ** + ** General feature variable set: + ** + ** typeof @options = { + ** algorithm='dti', + ** data:{x1:number,x2:number,..,y:*} [] + ** target:string is e.g. 'y' + ** features: string [] is e.g. ['x1','x2',..] + ** eps:number is e.g. '5', + ** maxdepth:number, + ** } + ** + ** Or vector feature variables (i.e., features=[0,1,2,...n-1], target=n): + ** + ** typeof @options = { + ** algorithm='dti', + ** x:* [] [], + ** y:* [], + ** eps:number is e.g. '5', + ** maxdepth:number, + ** } + ** + ** knn: k-Nearest-Neighbour Algorithm + ** ---------------------------------- + ** + ** typeof @options = { + ** algorithm='knn', + ** x: number [][], + ** y: * [] + ** } + ** + ** mlp: multi layer perceptron Algorithm + ** ---------------------------------- + ** + ** typeof @options = { + ** algorithm='mlp', + ** x: number [][], + ** y: number [] [] | * [], + ** hidden_layers?:number [], + ** lr?:number, + ** epochs?:number, + ** labels?:string [], + ** features?: string [], + ** normalize?, + ** verbose?:number + ** } + ** + ** + ** cnn: Convolutional Neural Network for numerial (2D) data + ** ------------------------------------- + ** + ** General feature variable set: + ** + ** typeof @options = { + ** algorithm='cnn', + ** data:{x:[]|[][],y:'a} [] + ** layers: layer [], + ** trainer:trainer, + ** } + ** type layer = + ** {type:'input', out_sx:number, out_sy:number, out_depth:number} | // Input Layer + ** {type:'conv', sx:number, filters:number, stride:number, pad:number, activation:string} | // Convolution Layer + ** {type:'pool', sx:number, stride:number} | // Pooling Layer + ** {type:'softmax', num_classes:number} | // Classifier Layers + ** {type:'svm', num_classes:number| // Classifier Layers + ** {type:'fc', num_neurons:number, activation:string} // Fully Connected Layer + ** + ** typeof activation = 'relu'| 'maxout' | 'sigmoid' | 'tanh' .. + ** + ** type trainer = + ** {method: 'sgd', learning_rate:number, momentum: number, batch_size:number, l2_decay:number} | + ** {method: 'adadelta', learning_rate:number, eps: number, ro:number, batch_size:number, l2_decay:number} | + ** {method: 'adam', learning_rate:number, eps: number, beta1: number, beta2: number, batch_size: number, l2_decay:number} | + ** .. + ** + ** text: text analysis (similarity checking) + ** ----------------------------------------- + ** classify(model,string) -> {match:number [0..1],string:string } + ** learn({algorithm:ML.TXT, data:string []]) -> model + ** test({algorithm:ML.TXT,string:string}|model,string) -> number [0..1] + ** similarity(string,string) -> number [0..1] + ** + ** $ENDOFINFO + */ +var Io = Require('com/io'); +var Comp = Require('com/compat'); + + +var ICE = Require('ml/ice'); // ICE ID3/C45 eps +var DTI = Require('ml/dti'); +var KNN = Require('ml/knn'); +var KMN = Require('ml/kmeans'); +var SVM = Require('ml/svm'); +var MLP = Require('ml/mlp'); +var ID3 = Require('ml/id3'); +var C45 = Require('ml/C45'); +var TXT = Require('ml/text'); +var RF = Require('ml/rf'); +var RL = Require('ml/rl'); +var STAT= Require('ml/stats'); +var CNN = Require('ml/cnn'); +var ANN = Require('ml/ann'); +var PCA = Require('ml/pca'); + +var current=none; +var Aios=none; + +var options = { + version: '1.16.2' +} + +// Some definitions +var ML = { + // Algorithms + ANN:'ann', // neataptic NN + C45:'c45', + CNN:'cnn', + ICE:'ice', // ICE ID3/C45 eps + DTI:'dti', + ID3:'id3', + KMN:'kmeans', + KNN:'knn', + KNN2:'knn2', + MLP:'mlp', + RF:'rf', // Random Forest + RL:'rl', // Reinforcement Leerner + SLP:'slp', // Synonym for MLP (but single layer) + SVM:'svm', + TXT:'txt', + // Some Functions + EUCL:'euclidean', + PEAR:'pearson', + + // RL agents + DPAgent:'DPAgent', + TDAgent:'TDAgent', + DQNAgent:'DQNAgent', +}; + +/** + * Computes Log with base-2 + * @private + */ +function log2(n) { + return Math.log(n) / Math.log(2); +} + +function obj2Array(row,features) { + return features.map(function (attr) { return row[attr] }); +} +function objSlice(row,features) { + var o = {}; + features.forEach(function (attr) { o[attr]=row[attr] }); + return o; +} + +// transform [v][] -> v[] +function relax(mat) { + if (Comp.obj.isMatrix(mat) && mat[0].length==1) return mat.map(function (row) { return row[0]}) + else return mat; +} + +// transform v[] -> [v][] +function wrap(mat) { + if (!Comp.obj.isMatrix(mat)) return mat.map(function (v) { return [v]}) + else return mat +} + +/* Common data transformation between different formats +** +** 1a. need='xy': data={$x:'a,$y:'b}[] -> {x:{$x} [], y:'b[]} +** 1b. need='xy': data=('a|'b)[][] -> {x:'a [][], y:'b[]} +** 1c. need='xry': data=('a|'b)[][] -> {x:{$x} [], y:'b[]} +** 1c. need='io': data=number[][] -> {input:number, output:number} [] +** 1d. need='io': data={$x:number,$y:number}[] -> {input:number, output:number} [] +** 2. need='xmy': data={$x:'a,$y:'b}[] -> {x:'a [][], y:'b[]} +** 3. need='d': data={x:'a[][],y:'b[]}} -> {data:{$x:'a,$y:'b}[][]} +** 4. need='dm': data={x:'a[][],y;'b[]} -> {data:('a|'b)[][]} +** 5. need='m': data={$x:'a}[] -> 'a [][] +** 6. need='a': data={$x:'a} -> 'a [] + +** typeof options = { +** scale: {k:number, off:number, shift:number} is transformation of input data, +** xscale: {k:number, off:number, shift:number} is transformation of input data, +** yscale: {k:number, off:number, shift:number} is transformation of output data, +** features : string [] is feature variable list, +** target: string is output variable, +** +**/ +function scale(vrow,scala) { + if (!scala) return vrow; + if (typeof vrow == 'number') { + if (typeof scala.k == 'number') + return scala.shift+(vrow-scala.off)*scala.k + else + return scala.shift+(vrow-scala.off[0])*scala.k[0]; + } + if (typeof scala.k == 'number') + return vrow.map(function (col,i) { + return scala.shift+(col-scala.off)*scala.k }) + else + return vrow.map(function (col,i) { + return scala.shift+(col-scala.off[i])*scala.k[i] }) +} + +function unscale(vrow,scala) { + if (!scala) return vrow; + if (typeof vrow == 'number') { + if (typeof scala.k == 'number') + return (vrow-scala.shift)/scala.k+scala.off + else + return (vrow-scala.shift)/scala.k[0]+scala.off[0] + } +} + +function preprocess(data,need,options) { + var row,x,y,_data; + options=options||{}; + var scala=options.scale || options.xscale; + function array(data) { + return Comp.obj.isArray(data)?data:[data] + } + if (Comp.obj.isArray(data)) { + row=data[0]; + switch (need) { + case 'xy': + case 'xry': + if (options.target!=undefined && options.features!=undefined) { + if (Comp.obj.isArray(row) && need=='xy') { + if (Number(options.target)==row.length-1) { + x=data.map(function (row) { return scale(row.slice(0,options.target),scala) }); + y=data.map(function (row) { return row[options.target] }) + } + } else if (Comp.obj.isObj(row)) { + if (typeof options.target == 'string') { + x=data.map(function (row) { return scale(objSlice(row,options.features),scala) }); + y=data.map(function (row) { return row[options.target] }); + } + } + } + if (x && y) return {x:x,y:y} + break; + case 'a': + if (Comp.obj.isArray(data) && typeof data[0] != 'object') return {data:data}; + if (Comp.obj.isObject(data) && options.features!=undefined) { + return { data:data.map(function (row) { + return scale(objSlice(row,options.features),scala) })}; + } + break; + case 'm': + if (Comp.obj.isMatrix(data)) return {data:data}; + if (Comp.obj.isObject(row) && options.features!=undefined) { + return { data:data.map(function (row) { + return scale(obj2Array(row,options.features),scala) })}; + } + break; + case 'xmy': + if (Comp.obj.isObject(row) && options.features!=undefined && options.target!=undefined) { + return { x:data.map(function (row) { + return scale(obj2Array(row,options.features),scala) }), + y:data.map(function (row) { return row[options.target]})}; + } + break; + case 'io': + if (Comp.obj.isArray(row) && options.target!=undefined) { + // number [][] + if (Number(options.target)==row.length-1) { + _data=data.map(function (row) { return { input:scale(row.slice(0,options.target),scala), + output:array(row[options.target]) }}); + return _data + } + } else if (Comp.obj.isObject(row) && options.target!=undefined && options.features!=undefined) { + _data=data.map(function (row) { return { input:scale(obj2Array(row,options.features),scala), + output:array(row[options.target]) }}); + return _data + } + + break; + } + } else if (data.x && data.y) { + if (Comp.obj.isArray(data.x) && Comp.obj.isArray(data.y)) { + row=data.x[0]; + switch (need) { + case 'io': + if (Comp.obj.isArray(row)) { + // number [][] + _data=data.x.map(function (row, rowi) { return { input:scale(row,scala), + output:array(data.y[rowi]) }}); + return _data + } + if (Comp.obj.isObject(row) && options.features!=undefined) { + _data=data.x.map(function (row, rowi) { return { input:scale(obj2Array(row,options.features),scala), + output:array(data.y[rowi]) }}); + return _data + } + break; + case 'xm': + if (Comp.obj.isArray(row)) return data.x; + break; + case 'xmy': + if (Comp.obj.isArray(row)) return { x:data.x, y:data.y}; + break; + case 'xmya': + if (Comp.obj.isArray(row)) return { x:data.x, y:data.y.map(array)}; + break; + case 'd': + return data.x.map(function (row,rowi) { + var newrow={}; + if (options.features && options.target) { + options.features.forEach(function (f,coli) { + newrow[f]=row[coli]; + }); + newrow[options.target]=data.y[rowi]; + } else { + row.forEach(function (col,f) { + newrow[String(f)]=col; + }); + newrow[String(row.length)]=data.y[rowi]; + } + return newrow; + }) + break; + } + } + } +} + + + +// Agent AIOS API +var ml = { + // only RL + action : function (model,arg) { + switch (model.algorithm) { + // Selects and returns next action from set of actions + case ML.RL: + switch (model.kind) { + case ML.DQNAgent: + // arg == state array + return model.actions[RL.DQNAgent.code.act(model,arg)]; + break; + case ML.DPAgent: + // arg == state (integer number) + return model.actions[RL.DPAgent.code.act(model,arg)]; + break; + case ML.TDAgent: + // arg == state (integer number) + return model.actions[RL.TDAgent.code.act(model,arg)]; + break; + } + break; + } + }, + /** Classification (prediction): Apply sample data to learned model. + * Returns prediction result. + * + */ + classify: function (model,samples) { + var x,solutions,result; + switch (model.algorithm) { + + case ML.ANN: + if (Comp.obj.isArray(samples)) + return samples.map(function (sample) { + return model.network.activate(sample) + }); + else + return model.network.activate(samples); + + case ML.CNN: + if (Comp.obj.isMatrix(samples)) + return samples.map(function (sample) { + return CNN.predict(model,sample); + }); + else + return CNN.predict(model,samples); + break; + + case ML.C45: + // Sample row format: [x1,x2,..,xn] + if (Comp.obj.isMatrix(samples)) { + return samples.map(function (sample) { + return C45.classify(model,sample); + }); + } else if (Comp.obj.isArray(samples) && !Comp.obj.isObj(samples[0])) { + return C45.classify(model,samples); + } else if (Comp.obj.isArray(samples) && Comp.obj.isObj(samples[0])) { + return samples.map(function (sample) { + return C45.classify(model,sample); + }); + } else if (Comp.obj.isObj(samples)) { + return C45.classify(model,samples); + } + break; + + case ML.DT: + case ML.ICE: + if (Comp.obj.isMatrix(samples) || + Comp.obj.isArray(samples) && Comp.obj.isObj(samples[0])) + return samples.map(function (sample) { + return ICE.predict(model,sample) + }); + else + return ICE.predict(model,samples); + + case ML.DTI: + if (Comp.obj.isMatrix(samples)) + return samples.map(function (sample) { + return DTI.predict(model,sample) + }); + else + return DTI.predict(model,samples); + + case ML.ID3: + if (Comp.obj.isArray(samples)) + return samples.map(function (sample) { + return ID3.predict(model,sample) + }); + else + return ID3.predict(model,samples); + + case ML.KNN: + if (Comp.obj.isMatrix(samples)) + return KNN.predict(model,samples); + else if (Comp.obj.isArray(samples) && Comp.obj.isObj(samples[0])) + return KNN.predict(model,samples.map(function (sample) { + return obj2Array(sample,model.features)})); + else if (Comp.obj.isObj(samples)) + return KNN.predict(model,obj2Array(samples,model.features)); + else + return KNN.predict(model,samples); + break; + + case ML.KNN2: + if (Comp.obj.isMatrix(samples)) + return samples.map(function (sample) { + return KNN.predict2(model,sample); + }); + else if (Comp.obj.isArray(samples) && Comp.obj.isObj(samples[0])) + return samples.map(function (sample) { + return KNN.predict2(model,obj2Array(sample,model.features)) + }) + else if (Comp.obj.isObj(samples)) + return KNN.predict2(model,obj2Array(samples,model.features)); + else + return KNN.predict2(model,samples); + break; + + case ML.KMN: + return model.clusters + break; + + case ML.RF: + if (model.labels) { + if (Comp.obj.isMatrix(samples)) { + return samples.map(function (sample) { + return model.rfs.map(function (rf) { + return RF.code.predictOne(rf,sample); + }).map(function (v,i) { + return { value:model.labels[i], prob:v } + }) + }); + } else if (Comp.obj.isArray(samples) && typeof samples[0] == 'number') { + return model.rfs.map(function (rf) { + return RF.code.predictOne(rf,samples); + }).map(function (v,i) { + return { value:model.labels[i], prob:v } + }) + } // TODO + } else { + // Sample row format: [x1,x2,..,xn] + if (Comp.obj.isMatrix(samples)) { + return samples.map(function (sample) { + return RF.code.predictOne(model,sample); + }); + } else if (Comp.obj.isArray(samples) && typeof samples[0] == 'number') { + return RF.predictOne(model,samples); + } // TODO + } + // preprocess(samples,'m') + break; + + case ML.SVM: + if (!model._labels) { + // Single SVM + if (Comp.obj.isMatrix(samples)) + return samples.map(function (sample) { + return SVM.code.predict(model,sample); + }); + else + return SVM.code.predict(model,samples); + } else { + // Multi SVM + if (Comp.obj.isMatrix(samples)) + return samples.map(function (sample) { + solutions=model.svms.map(function (svm,index) { + if (svm.threshold==false) + return SVM.code.predict(svm,sample) + else + return SVM.code.predict(svm,sample); + }); + return solutions.map(function (v,i) { return { value:model._labels[i], prob:v } }); + }); + else { + solutions=model.svms.map(function (svm,index) { + if (svm.threshold==false) + return SVM.code.predict(svm,samples) + else + return SVM.code.predict(svm,samples)==1; + }) + return solutions.map(function (v,i) { return { value:model._labels[i], prob:v } }); + } + } + break; + + case ML.SLP: + case ML.MLP: + if (Comp.obj.isMatrix(samples)) { + x=samples; + if (model.xscale) + x=x.map(function (row) { return scale(row,model.xscale) }); + result = model.labels?MLP.code.predict(model,x).map(function (r) { + var o={}; + r.forEach(function (v,i) { o[model.labels[i]]=v }); + return o; + }):relax(MLP.code.predict(model,x)); + } else if (Comp.obj.isArray(samples)) { + x=samples; + if (model.xscale) + x=scale(x,model.xscale); + result = model.labels?MLP.code.predict(model,[x]).map(function (r) { + var o={}; + r.forEach(function (v,i) { o[model.labels[i]]=v }); + return o; + })[0]:relax(MLP.code.predict(model,[x])[0]); + } else if (Comp.obj.isObj(samples) && model.features) { + x=model.features.map(function (f) { return samples[f] }); + if (model.xscale) + x=scale(x,model.xscale); + result = model.labels?MLP.code.predict(model,[x]).map(function (r) { + var o={}; + r.forEach(function (v,i) { o[model.labels[i]]=v }); + return o; + })[0]:relax(MLP.code.predict(model,[x])[0]); + } + if (Comp.obj.isArray(result)) { + return model.yscale?result.map(function (y) { return unscale(y,model.yscale) }):result; + } else { + + } + break; + + case ML.TXT: + // typeof options = {data: string []} + if (Comp.obj.isArray(samples)) + return samples.map(function (sample) { return TXT.classify(model,sample) }); + else + return TXT.classify(model,samples); + break; + + } + }, + + compact: function (model) { + switch (model.algorithm) { + case ML.DTI: + default: + return DTI.compactTree(model); + } + }, + + depth: function (model) { + switch (model.algorithm) { + case ML.DTI: + return DTI.depth(model); + case ML.DT: + case ML.ICE: + return ICE.depth(model); + case ML.C45: + return C45.depth(model); + case ML.ID3: + return ID3.depth(model); + } + }, + + + evaluate: function (model,target,samples) { + switch (model.algorithm) { + case ML.DTI: + default: + return DTI.evaluate(model,target,samples); + } + }, + + info: function (model) { + switch (model.algorithm) { + case ML.C45: + return C45.info(model); + case ML.DT: + case ML.ICE: + return ICE.info(model); + case ML.ID3: + return ID3.info(model); + } + }, + /** Learning: Create a classification model from training data (or an empty model that can be updated) + * + */ + learn: function (options) { + var model,data,data2,x,y,features,featureTypes,test,target, + result,cols,n_ins,n_outs,x,y,xscale,xoffset,xshift,yscale,yoffset,yshift,key,err, + t0=Io.time(); + if (options==_) options={}; + switch (options.algorithm) { + + case ML.ANN: + // typeof options = { x,y,features?,target?,layers:number [], trainerror:number} + data = preprocess(options,'io',options); + model={}; + model.algorithm=options.algorithm + if (!options.layers) options.layers=[] + if (data) + model.network = new ANN.Network(options.layers[0],options.layers[options.layers.length-1]); + else throw 'ML.learn.ANN: Invalid options'; + model.network.evolve(data,options); + model.time=Io.time()-t0; + return model; + break; + + + case ML.CNN: + // typeof options = {x:[][],y:[],..} + model = CNN.create(options); + model.algorithm=options.algorithm; + model.time=Io.time()-t0; + return model; + break; + + case ML.C45: + // typeof options = {data: {}[], target:string, features: string []} | + // {data: [][], target?:string, features?: string []} | + // {x: number [][], y:[]} | + // {data: {x,y}[] } + var model = C45.create(); + if (options.x && options.y) { + features=options.x[0].map(function (col,i) { return String(i) }); + featureTypes=options.x[0].map(function (col,i) { return 'number' }); + data=options.x.map(function (row,i) { row=row.slice(); row.push(options.y[i]); return row}); + target='y'; + } else if (options.data && Comp.obj.isMatrix(options.data)) { + data=options.data; + features=options.features||options.data[0].slice(0,-1).map(function (col,i) { return String(i) }); + featureTypes=options.data[0].slice(0,-1).map(function (col,i) { return typeof col == 'number'?'number':'category' }); + target=options.target||'y'; + } else if (options.data && Comp.obj.isObj(options.data[0]) && options.data[0].x && options.data[0].y!=undefined) { + data=options.data.map(function (row) { return row.x.concat(row.y) }); + features=options.features||options.data[0].x.slice(0,-1).map(function (col,i) { return String(i) }); + featureTypes=options.data[0].x.slice(0,-1).map(function (col,i) { return typeof col == 'number'?'number':'category' }); + target=options.target||'y'; + } else if (options.data && Comp.obj.isArray(options.data) && Comp.obj.isObj(options.data[0]) && + options.target && options.features) { + rowNames=Comp.obj.isArray(options.target)?options.features.concat(options.target): + options.features.concat([options.target]); + data=options.data.map(function (row) { return obj2Array(row,rowNames) }) + features=options.features; + featureTypes=data[0].slice(0,-1).map(function (col,i) { return typeof col == 'number'?'number':'category' }); + target=options.target; + } else throw 'ML.learn.C45: Invalid options'; + + C45.train(model,{ + data: data, + target: target, + features: features, + featureTypes: featureTypes + }); + model.algorithm=options.algorithm + model.time=Io.time()-t0; + return model; + break; + + + case ML.DTI: + // typeof options = {data: {}[], target:string, features: string [], eps;number, maxdepth} | + // {x: number [][], y:[], eps;number, maxdepth} + if (options.eps==_) options.eps=0; + if (options.maxdepth==_) options.maxdepth=20; + if (options.data && options.target && options.features) + model = DTI.create(options); + else if (options.x && options.y) { + if (options.x.length != options.y.length) throw 'ML.learn.DTI: X and Y vector have different length'; + data=options.x.map(function (row,i) { row=row.slice(); row.push(options.y[i]); return row}); + features=Comp.array.init(data[0].length-1,function (i) { return String(i)}); + target=String(data[0].length-1); + // console.log(data,features,target) + model = DTI.create({ + data:data, + features:features, + target:target, + eps:options.eps, + maxdepth:options.maxdepth + }); + } else throw 'ML.learn.DTI: Invalid options'; + model.algorithm=options.algorithm; + model.time=Io.time()-t0; + return model; + + + case ML.ICE: + case ML.DT: + if (options.eps==_) options.eps=0; + if (options.data && options.target && options.features) + model = ICE.create(options); + else if (options.x && options.y) { + if (options.x.length != options.y.length) throw 'ML.learn.ICE: X and Y vector have different length'; + data=options.x.map(function (row,i) { row=row.slice(); row.push(options.y[i]); return row}); + features=Comp.array.init(data[0].length-1,function (i) { return String(i)}); + target=String(data[0].length-1); + model = ICE.create({ + data:data, + features:features, + target:target, + eps:options.eps, + }); + } else throw 'ML.learn.ICE: Invalid options'; + model.algorithm=options.algorithm; + model.eps=options.eps; + model.time=Io.time()-t0; + return model; + break; + + case ML.ID3: + if (options.data && options.target && options.features) + model = ID3.createTree(options.data,options.target, + options.features); + else throw 'ML.learn.ID3: Invalid options'; + model.algorithm=options.algorithm + model.time=Io.time()-t0; + return model; + break; + + case ML.KNN: + // typeof @options = {data: {}[]|[][], distance?:function|string,k?:number} + // typeof @options = {x:number [][], y:number [], + // distance?:function|string,k?:number} + if (options.features && options.target) target=options.target,features = options.features; + else { + features = []; + if (options.data) { + for(key in options.data[0]) features.push(key); + target = features.pop() + } else if (options.x) { + for(key in options.x[0]) features.push('x'+key); + target='y'; + } + } + if (options.data && Comp.obj.isObj(options.data[0])) { + x = options.data.map(function (row) { return obj2Array(row,features) }); + y = options.data.map(function (row) { return row[target] }) + } else if (options.data && Comp.obj.isMatrix(options.data)) { + x = options.data,map(function (row) { return row.slice(0,row.length-1) }); + y = options.data,map(function (row) { return row[row.length-1] }); + } else if (options.x && options.y) { + x = options.x; + y = options.y; + } + model = KNN.create( + x, + y, + { + distance:options.distance, + k:options.k + }); + model.algorithm = options.algorithm + model.features = features + model.target = target + model.time=Io.time()-t0; + return model; + break; + + case ML.KNN2: + // typeof @options = {data: {}[]|[][], distance?:function|string,k?:number} + // typeof @options = {x:number [][], y:number [], + // distance?:function|string,k?:number} + if (options.features && options.target) target=options.target,features = options.features; + else { + features = []; + if (options.data) { + for(key in options.data[0]) features.push(key); + target = features.pop() + } else if (options.x) { + for(key in options.x[0]) features.push('x'+key); + target='y'; + } + } + if (options.data && Comp.obj.isObj(options.data[0])) { + x = options.data.map(function (row) { return obj2Array(row,features) }); + y = options.data.map(function (row) { return row[target] }) + } else if (options.data && Comp.obj.isMatrix(options.data)) { + x = options.data,map(function (row) { return row.slice(0,row.length-1) }); + y = options.data,map(function (row) { return row[row.length-1] }); + } else if (options.x && options.y) { + x = options.x; + y = options.y; + } + model = KNN.create2( + { + x : x, + y : y, + distance:options.distance, + k:options.k + }); + model.algorithm=options.algorithm + model.features = features + model.target = target + model.time=Io.time()-t0; + return model; + break; + + case ML.KMN: + if (options.data && Comp.obj.isMatrix(options.data)) { + data=options.data; + } + model = KMN.cluster({ + data:data, + k:options.k, + distance:options.distance, + epochs:options.epochs, + }) + model.algorithm=options.algorithm + model.data = data + model.time=Io.time()-t0; + return model; + break; + + case ML.RF: + var model={}; + // Single Binary RF (y={-1,1}) or Multi-RF (y:string is in labels) + // typeof options = {data: {}[], target:string, features: string []} | + // {data: [][], target?:string, features?: string []} | + // {x: number [][], y: {-1,1} []} | + // {data: {x,y}[] } + // {data: {x,y}[], labels: string [] } + if (!options.x || !options.y) throw 'ML.learn.RF: Invalid options'; + // data=preprocess(data,'xmy',{features:features,target:target}) + data={x:options.x,y:options.y}; // TODO + if (options.labels) { + // multi-RF + model.labels = options.labels; + model.rfs = model.labels.map (function (label) { return RF() }); + model.rfs.forEach (function (rf,i) { + var y = data.y.map(function (label) { return label==model.labels[i]?1:-1} ); + RF.code.train(rf,options.x,y,{ + numTrees:options.numTrees, + maxDepth:options.maxDepth, + numTries:options.numTries, + type:options.weakType, + }); + }); + } else { + model = RF(); + features=options.x[0].map(function (col,i) { return String(i) }); + target='y'; + + RF.code.train(model, + options.x, + options.y, + { + numTrees:options.numTrees, + maxDepth:options.maxDepth, + numTries:options.numTries, + type:options.weakType, + }); + } + model.algorithm=options.algorithm + model.time=Io.time()-t0; + return model; + break; + + case ML.RL: + // Create learner instance + model = {} + options.environment=checkOptions(options.environment,{}); + options.environment.getMaxNumActions= + checkOption(options.environment.getMaxNumActions, + function () { return options.actions.length }) + options.environment.getNumStates= + checkOption(options.environment.getNumStates, + function () { return options.states.length }) + var allowedActions=checkOption(options.environment.allowedActions, function () { return options.actions }); + options.environment.allowedActions= + // Ensure that allowedActions return number array! + function (state) { + return allowedActions(state).map(function (a) { + return options.actions.indexOf(a) + }) + } + var nextState = options.environment.nextState; + if (nextState) { + options.environment.nextState = function (state,action) { + return nextState(state,options.actions[action]) + } + } + switch (options.kind) { + case ML.DQNAgent: + model = RL.DQNAgent( + options.environment, + { + alpha:options.alpha,gamma:options.gamma,epsilon:options.epsilon, + experience_add_every:options.experience_add_every, + experience_size:options.experience_size, + learning_steps_per_iteration:options.learning_steps_per_iteration, + tderror_clamp:options.tderror_clamp, + num_hidden_units:options.num_hidden_units, + update:options.update, + } + ) + break; + case ML.DPAgent: + model = RL.DPAgent( + options.environment, + {alpha:options.alpha,beta:options.beta,gamma:options.gamma, + epsilon:options.epsilon,lambda:options.lambda} + ) + break; + case ML.TDAgent: + model = RL.TDAgent( + options.environment, + // specs + {alpha:options.alpha,beta:options.beta,gamma:options.gamma, + epsilon:options.epsilon,lambda:options.lambda, + replacing_traces:options.replacing_traces, + smooth_policy_update:options.smooth_policy_update, + update:options.update, + planN:options.planN} + ) + break; + } + model.algorithm = options.algorithm; + model.kind = options.kind; + if (options.actions) model.actions = options.actions; + if (options.states) model.states = options.states; + if (options.rewards) model.rewards = options.rewards; + return model; + break; + + + + case ML.SLP: + case ML.MLP: + // typeof options = {x: number [][], + // y: number number [][] | string [], + // hidden_layers?:[],epochs?:number, + // labels?:string [], features?: string [], + // regression?, + // normalize?, bipolar?, eps?:number | number [], verbose?} + // + // y and MLP(learn) requires [[p1,p2,..],[p1,p2,..],..] with 0>=p>=1 + // p:label probability + x=options.x; + if (Comp.obj.isArray(options.x) && typeof options.x[0] == 'number') + x=wrap(options.x); + if (Comp.obj.isMatrix(options.y)) + y=options.y; + else if (Comp.obj.isArray(options.y) && typeof options.y[0] == 'number') + y=wrap(options.y); + else if (Comp.obj.isArray(options.y) && options.labels) { + y=options.y.map(function (l1) { + return options.labels.map(function (l2) { + return l1==l2?1:0; + }); + }); + } else throw 'ML.learn.MLP: invalid options'; + if (options.normalize) { + // normalize each variable independently!? + var max=x[0].map(function (col) { return col}), + min=x[0].map(function (col) { return col}); + x.forEach(function (row) { row.forEach(function (col,i) { + max[i]=Math.max(max[i],col); + min[i]=Math.min(min[i],col) }) }); + xshift=options.bipolar?-1:0; + xscale=max.map(function (x,i) { return (xshift?2:1)/((x-min[i])==0?1:x-min[i])}); + xoffset=min; + x=x.map(function (row) { return row.map(function (col,i) { return xshift+(col-xoffset[i])*xscale[i] }) }); + if (options.regression) { + // scale y, too, [0,1] + max=y[0].map(function (col) { return col}); + min=y[0].map(function (col) { return col}); + y.forEach(function (row) { row.forEach(function (col,i) { + max[i]=Math.max(max[i],col); + min[i]=Math.min(min[i],col) }) }); + + yshift=options.bipolar?-1:0; + yscale=max.map(function (x,i) { return (yshift?2:1)/((x-min[i])==0?1:x-min[i])}); + yoffset=min; + y=y.map(function (row) { return row.map(function (col,i) { return yshift+(col-yoffset[i])*yscale[i] }) }); + } + } + + model = MLP({ + input : x, + output : y, + n_ins : x[0].length, + n_outs : y[0].length, + hidden_layer_sizes:options.algorithm==ML.SLP?[]:(options.hidden_layers||[]) + }); + model.algorithm=options.algorithm; + model.labels=options.labels; + model.features=options.features; + model.xscale=options.normalize?{k:xscale,off:xoffset,shift:xshift}:undefined; + model.yscale=options.normalize&&options.regression?{k:yscale,off:yoffset,shift:yshift}:undefined; + model.nOutputs=y[0].length; + + MLP.code.set(model,'log level',options.verbose||0); // 0 : nothing, 1 : info, 2 : warning. + MLP.code.train(model,{ + epochs : options.epochs||20000 + }); + model.time=Io.time()-t0; + return model; + break; + + case ML.SVM: + // typeof options = {x: number [][], + // y: ({-1,1}|string) [], + // labels?:string|number [], + // threshold?:number|false, + // C?:numer,tol?:number,max_passes?:number,alpha_tol?:number,kernel?:{}} + + // If classes then multi-SVM (one for each class to be separated)! + if (!options.labels) { + model = SVM({ + x:options.x, + y:options.y, + threshold:options.threshold, + }); + model.algorithm=options.algorithm + SVM.code.train(model,{ + C:options.C||1.0, + tol:options.tol||1e-4, + max_passes:options.max_passes||20, + alpha_tol:options.alpha_tol||1e-5, + kernel:options.kernel + }); + } else { + model={}; + model.algorithm=options.algorithm; + model._labels=options.labels; + model.svms=options.labels.map(function (cl) { + return SVM({ + x:options.x, + y:options.y.map(function (y) { return y==cl?1:-1 }), + threshold:options.threshold, + }); + }); + + model.svms.forEach(function (svm) { + SVM.code.train(svm,{ + C:options.C||1.0, + tol:options.tol||1e-4, + max_passes:options.max_passes||20, + alpha_tol:options.alpha_tol||1e-5, + kernel:options.kernel + }); + }); + // Create one SVM for each class + // Transform y vector + } + model.time=Io.time()-t0; + return model; + break; + + case ML.TXT: + // typeof options = {data: string []} + model = TXT.create(options.data,{ + }); + model.algorithm=options.algorithm + return model; + break; + } + }, + + preprocess:preprocess, + + print: function (model,indent,compact) { + switch (model.algorithm) { + case ML.DTI: + return DTI.print(model,indent,compact); + case ML.DT: + case ML.ICE: + return ICE.print(model,indent); + case ML.C45: + return C45.print(model,indent); + case ML.ID3: + return ID3.print(model,indent); + } + }, + + // Only text module + similarity : TXT.similarity, + + stats : STAT, + + // Check model consistency + test: function (model,samples) { + var x,y,data,res,p=0.0; + switch (model.algorithm) { + + case ML.ANN: + data=preprocess(samples,'xmya',{features:model.features,target:model.target}); + // TODO + break; + + case ML.C45: + // Sample row format: [x1,x2,..,y] + if (Comp.obj.isMatrix(samples)) { + samples.forEach(function (sample) { + x=sample.slice(0,sample.length-1); + y=sample[sample.length-1]; + res= C45.classify(model,x); + if (res==y) p += 1; + }); + return p/samples.length; + } else if (Comp.obj.isArray(samples)) { + x=samples.slice(0,samples.length-1); + y=samples[samples.length-1]; + res = C45.classify(model,x); + return res==y?1.0:0.0 + } else if (Comp.obj.isObj(samples) && model.features) { + } + break; + + case ML.TXT: + var model = model.string?{ data : [model.string] }:model; + if (Comp.obj.isArray(samples)) + return samples.map(function (sample) { + return TXT.classify(model,sample).match + }); + else + return TXT.classify(model,samples).match; + break; + + + } + }, + + + /** Update a learned model + * + */ + update: function (model,options) { + switch (model.algorithm||options.algorithm) { + + case ML.RL: + switch (model.kind) { + case ML.DQNAgent: + return RL.DQNAgent.code.learn(model,options); + break; + case ML.DPAgent: + return RL.DPAgent.code.learn(model,options); + break; + case ML.TDAgent: + return RL.TDAgent.code.learn(model,options); + break; + } + break; + + case ML.DTI: + // typeof @options = {data: number [][], target:string, features: string [], eps?:number, maxdepth?:number} | + // {x: number [][], y:[], eps?:number, maxdepth?:number} + if (options.eps==_) options.eps=0; + if (options.maxdepth==_) options.maxdepth=20; + if (options.data && options.target && options.features) + model = DTI.update(model,options); + else if (options.x && options.y) { + if (options.x.length != options.y.length) throw 'ML.update.DTI: X and Y vector have different length'; + data=options.x.slice(); + data=data.map(function (row,i) {row.push(options.y[i]); return row}); + features=Comp.array.init(data[0].length-1,function (i) { return String(i)}); + target=String(data[0].length-1); + console.log(data,features,target) + model = DTI.update(model,{ + data:data, + features:features, + target:target, + eps:options.eps, + maxdepth:options.maxdepth + }); + } else throw 'ML.update.DTI: Invalid options'; + + model.algorithm=options.algorithm; + return model; + + case ML.CNN: + break; + } + }, + ML:ML, +}; + +ICE.ml=ml; +CNN.ml=ml; +ml.predict=ml.classify; +ml.train=ml.learn; +ml.best=ml.stats.utils.best; + +module.exports = { + agent:ml, + classify:ml.classify, + column:ml.column, + compact:ml.compact, + depth:ml.depth, + entropy:STAT.entropy, + entropyN:STAT.entropyN, + entropyDep:STAT.entropyDep, + evaluate:ml.evaluate, + info:ml.info, + learn:ml.learn, + options:options, + preprocess:preprocess, + print:ml.print, + stats:STAT, + test:ml.test, + unique:ml.unique, + update:ml.update, + ML:ML, + current:function (module) { current=module.current; Aios=module; } +}