jam/js/ml/ml.js.bak

1282 lines
44 KiB
JavaScript

/**
** ==============================
** O O O OOOO
** O O O O O O
** O O O O O O
** OOOO OOOO O OOO OOOO
** O O O O O O O
** O O O O O O O
** OOOO OOOO O O OOOO
** ==============================
** Dr. Stefan Bosse http://www.bsslab.de
**
** COPYRIGHT: THIS SOFTWARE, EXECUTABLE AND SOURCE CODE IS OWNED
** BY THE AUTHOR(S).
** THIS SOURCE CODE MAY NOT BE COPIED, EXTRACTED,
** MODIFIED, OR OTHERWISE USED IN A CONTEXT
** OUTSIDE OF THE SOFTWARE SYSTEM.
**
** $AUTHORS: Stefan Bosse
** $INITIAL: (C) 2006-2020 BSSLAB
** $CREATED: 8-2-16 by sbosse.
** $VERSION: 1.16.2
**
** $INFO:
**
** JavaScript AIOS Machine Learning API
**
** type algorithm = {'dti','dt','id3','c45','kmeans','knn','knn2','mlp','slp','rl','svm','txt','cnn'}
**
**
** id3: Symbolic Decision Tree algorithm
** -------------------------------------
**
** typeof @options = {
** algorithm='id3',
** data:{x1:number,x2:number,..,y:*} []
** target:string is e.g. 'y'
** features: string [] is e.g. ['x1','x2',..]
** }
**
** ice: decision tree algorithm supporting numbers with eps intervals (hybrid C45/ID3)
** -------------------------------------
**
** General feature variable set:
**
** typeof @options = {
** algorithm='dt',
** data:{x1:number,x2:number,..,y:*} [],
** target:string is e.g. 'y',
** features: string [] is e.g. ['x1','x2',..],
** eps:number is e.g. '5',
** }
**
** dti: interval decision tree algorithm
** -------------------------------------
**
** General feature variable set:
**
** typeof @options = {
** algorithm='dti',
** data:{x1:number,x2:number,..,y:*} []
** target:string is e.g. 'y'
** features: string [] is e.g. ['x1','x2',..]
** eps:number is e.g. '5',
** maxdepth:number,
** }
**
** Or vector feature variables (i.e., features=[0,1,2,...n-1], target=n):
**
** typeof @options = {
** algorithm='dti',
** x:* [] [],
** y:* [],
** eps:number is e.g. '5',
** maxdepth:number,
** }
**
** knn: k-Nearest-Neighbour Algorithm
** ----------------------------------
**
** typeof @options = {
** algorithm='knn',
** x: number [][],
** y: * []
** }
**
** mlp: multi layer perceptron Algorithm
** ----------------------------------
**
** typeof @options = {
** algorithm='mlp',
** x: number [][],
** y: number [] [] | * [],
** hidden_layers?:number [],
** lr?:number,
** epochs?:number,
** labels?:string [],
** features?: string [],
** normalize?,
** verbose?:number
** }
**
**
** cnn: Convolutional Neural Network for numerial (2D) data
** -------------------------------------
**
** General feature variable set:
**
** typeof @options = {
** algorithm='cnn',
** data:{x:[]|[][],y:'a} []
** layers: layer [],
** trainer:trainer,
** }
** type layer =
** {type:'input', out_sx:number, out_sy:number, out_depth:number} | // Input Layer
** {type:'conv', sx:number, filters:number, stride:number, pad:number, activation:string} | // Convolution Layer
** {type:'pool', sx:number, stride:number} | // Pooling Layer
** {type:'softmax', num_classes:number} | // Classifier Layers
** {type:'svm', num_classes:number| // Classifier Layers
** {type:'fc', num_neurons:number, activation:string} // Fully Connected Layer
**
** typeof activation = 'relu'| 'maxout' | 'sigmoid' | 'tanh' ..
**
** type trainer =
** {method: 'sgd', learning_rate:number, momentum: number, batch_size:number, l2_decay:number} |
** {method: 'adadelta', learning_rate:number, eps: number, ro:number, batch_size:number, l2_decay:number} |
** {method: 'adam', learning_rate:number, eps: number, beta1: number, beta2: number, batch_size: number, l2_decay:number} |
** ..
**
** text: text analysis (similarity checking)
** -----------------------------------------
** classify(model,string) -> {match:number [0..1],string:string }
** learn({algorithm:ML.TXT, data:string []]) -> model
** test({algorithm:ML.TXT,string:string}|model,string) -> number [0..1]
** similarity(string,string) -> number [0..1]
**
** $ENDOFINFO
*/
var Io = Require('com/io');
var Comp = Require('com/compat');
var ICE = Require('ml/ice'); // ICE ID3/C45 eps
var DTI = Require('ml/dti');
var KNN = Require('ml/knn');
var KMN = Require('ml/kmeans');
var SVM = Require('ml/svm');
var MLP = Require('ml/mlp');
var ID3 = Require('ml/id3');
var C45 = Require('ml/C45');
var TXT = Require('ml/text');
var RF = Require('ml/rf');
var RL = Require('ml/rl');
var STAT= Require('ml/stats');
var CNN = Require('ml/cnn');
var ANN = Require('ml/ann');
var PCA = Require('ml/pca');
var current=none;
var Aios=none;
var options = {
version: '1.16.2'
}
// Some definitions
var ML = {
// Algorithms
ANN:'ann', // neataptic NN
C45:'c45',
CNN:'cnn',
ICE:'ice', // ICE ID3/C45 eps
DTI:'dti',
ID3:'id3',
KMN:'kmeans',
KNN:'knn',
KNN2:'knn2',
MLP:'mlp',
RF:'rf', // Random Forest
RL:'rl', // Reinforcement Leerner
SLP:'slp', // Synonym for MLP (but single layer)
SVM:'svm',
TXT:'txt',
// Some Functions
EUCL:'euclidean',
PEAR:'pearson',
// RL agents
DPAgent:'DPAgent',
TDAgent:'TDAgent',
DQNAgent:'DQNAgent',
};
/**
* Computes Log with base-2
* @private
*/
function log2(n) {
return Math.log(n) / Math.log(2);
}
function obj2Array(row,features) {
return features.map(function (attr) { return row[attr] });
}
function objSlice(row,features) {
var o = {};
features.forEach(function (attr) { o[attr]=row[attr] });
return o;
}
// transform [v][] -> v[]
function relax(mat) {
if (Comp.obj.isMatrix(mat) && mat[0].length==1) return mat.map(function (row) { return row[0]})
else return mat;
}
// transform v[] -> [v][]
function wrap(mat) {
if (!Comp.obj.isMatrix(mat)) return mat.map(function (v) { return [v]})
else return mat
}
/* Common data transformation between different formats
**
** 1a. need='xy': data={$x:'a,$y:'b}[] -> {x:{$x} [], y:'b[]}
** 1b. need='xy': data=('a|'b)[][] -> {x:'a [][], y:'b[]}
** 1c. need='xry': data=('a|'b)[][] -> {x:{$x} [], y:'b[]}
** 1c. need='io': data=number[][] -> {input:number, output:number} []
** 1d. need='io': data={$x:number,$y:number}[] -> {input:number, output:number} []
** 2. need='xmy': data={$x:'a,$y:'b}[] -> {x:'a [][], y:'b[]}
** 3. need='d': data={x:'a[][],y:'b[]}} -> {data:{$x:'a,$y:'b}[][]}
** 4. need='dm': data={x:'a[][],y;'b[]} -> {data:('a|'b)[][]}
** 5. need='m': data={$x:'a}[] -> 'a [][]
** 6. need='a': data={$x:'a} -> 'a []
** typeof options = {
** scale: {k:number, off:number, shift:number} is transformation of input data,
** xscale: {k:number, off:number, shift:number} is transformation of input data,
** yscale: {k:number, off:number, shift:number} is transformation of output data,
** features : string [] is feature variable list,
** target: string is output variable,
**
**/
function scale(vrow,scala) {
if (!scala) return vrow;
if (typeof vrow == 'number') {
if (typeof scala.k == 'number')
return scala.shift+(vrow-scala.off)*scala.k
else
return scala.shift+(vrow-scala.off[0])*scala.k[0];
}
if (typeof scala.k == 'number')
return vrow.map(function (col,i) {
return scala.shift+(col-scala.off)*scala.k })
else
return vrow.map(function (col,i) {
return scala.shift+(col-scala.off[i])*scala.k[i] })
}
function unscale(vrow,scala) {
if (!scala) return vrow;
if (typeof vrow == 'number') {
if (typeof scala.k == 'number')
return (vrow-scala.shift)/scala.k+scala.off
else
return (vrow-scala.shift)/scala.k[0]+scala.off[0]
}
}
function preprocess(data,need,options) {
var row,x,y,_data;
options=options||{};
var scala=options.scale || options.xscale;
function array(data) {
return Comp.obj.isArray(data)?data:[data]
}
if (Comp.obj.isArray(data)) {
row=data[0];
switch (need) {
case 'xy':
case 'xry':
if (options.target!=undefined && options.features!=undefined) {
if (Comp.obj.isArray(row) && need=='xy') {
if (Number(options.target)==row.length-1) {
x=data.map(function (row) { return scale(row.slice(0,options.target),scala) });
y=data.map(function (row) { return row[options.target] })
}
} else if (Comp.obj.isObj(row)) {
if (typeof options.target == 'string') {
x=data.map(function (row) { return scale(objSlice(row,options.features),scala) });
y=data.map(function (row) { return row[options.target] });
}
}
}
if (x && y) return {x:x,y:y}
break;
case 'a':
if (Comp.obj.isArray(data) && typeof data[0] != 'object') return {data:data};
if (Comp.obj.isObject(data) && options.features!=undefined) {
return { data:data.map(function (row) {
return scale(objSlice(row,options.features),scala) })};
}
break;
case 'm':
if (Comp.obj.isMatrix(data)) return {data:data};
if (Comp.obj.isObject(row) && options.features!=undefined) {
return { data:data.map(function (row) {
return scale(obj2Array(row,options.features),scala) })};
}
break;
case 'xmy':
if (Comp.obj.isObject(row) && options.features!=undefined && options.target!=undefined) {
return { x:data.map(function (row) {
return scale(obj2Array(row,options.features),scala) }),
y:data.map(function (row) { return row[options.target]})};
}
break;
case 'io':
if (Comp.obj.isArray(row) && options.target!=undefined) {
// number [][]
if (Number(options.target)==row.length-1) {
_data=data.map(function (row) { return { input:scale(row.slice(0,options.target),scala),
output:array(row[options.target]) }});
return _data
}
} else if (Comp.obj.isObject(row) && options.target!=undefined && options.features!=undefined) {
_data=data.map(function (row) { return { input:scale(obj2Array(row,options.features),scala),
output:array(row[options.target]) }});
return _data
}
break;
}
} else if (data.x && data.y) {
if (Comp.obj.isArray(data.x) && Comp.obj.isArray(data.y)) {
row=data.x[0];
switch (need) {
case 'io':
if (Comp.obj.isArray(row)) {
// number [][]
_data=data.x.map(function (row, rowi) { return { input:scale(row,scala),
output:array(data.y[rowi]) }});
return _data
}
if (Comp.obj.isObject(row) && options.features!=undefined) {
_data=data.x.map(function (row, rowi) { return { input:scale(obj2Array(row,options.features),scala),
output:array(data.y[rowi]) }});
return _data
}
break;
case 'xm':
if (Comp.obj.isArray(row)) return data.x;
break;
case 'xmy':
if (Comp.obj.isArray(row)) return { x:data.x, y:data.y};
break;
case 'xmya':
if (Comp.obj.isArray(row)) return { x:data.x, y:data.y.map(array)};
break;
case 'd':
return data.x.map(function (row,rowi) {
var newrow={};
if (options.features && options.target) {
options.features.forEach(function (f,coli) {
newrow[f]=row[coli];
});
newrow[options.target]=data.y[rowi];
} else {
row.forEach(function (col,f) {
newrow[String(f)]=col;
});
newrow[String(row.length)]=data.y[rowi];
}
return newrow;
})
break;
}
}
}
}
// Agent AIOS API
var ml = {
// only RL
action : function (model,arg) {
switch (model.algorithm) {
// Selects and returns next action from set of actions
case ML.RL:
switch (model.kind) {
case ML.DQNAgent:
// arg == state array
return model.actions[RL.DQNAgent.code.act(model,arg)];
break;
case ML.DPAgent:
// arg == state (integer number)
return model.actions[RL.DPAgent.code.act(model,arg)];
break;
case ML.TDAgent:
// arg == state (integer number)
return model.actions[RL.TDAgent.code.act(model,arg)];
break;
}
break;
}
},
/** Classification (prediction): Apply sample data to learned model.
* Returns prediction result.
*
*/
classify: function (model,samples) {
var x,solutions,result;
switch (model.algorithm) {
case ML.ANN:
if (Comp.obj.isArray(samples))
return samples.map(function (sample) {
return model.network.activate(sample)
});
else
return model.network.activate(samples);
case ML.CNN:
if (Comp.obj.isMatrix(samples))
return samples.map(function (sample) {
return CNN.predict(model,sample);
});
else
return CNN.predict(model,samples);
break;
case ML.C45:
// Sample row format: [x1,x2,..,xn]
if (Comp.obj.isMatrix(samples)) {
return samples.map(function (sample) {
return C45.classify(model,sample);
});
} else if (Comp.obj.isArray(samples) && !Comp.obj.isObj(samples[0])) {
return C45.classify(model,samples);
} else if (Comp.obj.isArray(samples) && Comp.obj.isObj(samples[0])) {
return samples.map(function (sample) {
return C45.classify(model,sample);
});
} else if (Comp.obj.isObj(samples)) {
return C45.classify(model,samples);
}
break;
case ML.DT:
case ML.ICE:
if (Comp.obj.isMatrix(samples) ||
Comp.obj.isArray(samples) && Comp.obj.isObj(samples[0]))
return samples.map(function (sample) {
return ICE.predict(model,sample)
});
else
return ICE.predict(model,samples);
case ML.DTI:
if (Comp.obj.isMatrix(samples))
return samples.map(function (sample) {
return DTI.predict(model,sample)
});
else
return DTI.predict(model,samples);
case ML.ID3:
if (Comp.obj.isArray(samples))
return samples.map(function (sample) {
return ID3.predict(model,sample)
});
else
return ID3.predict(model,samples);
case ML.KNN:
if (Comp.obj.isMatrix(samples))
return KNN.predict(model,samples);
else if (Comp.obj.isArray(samples) && Comp.obj.isObj(samples[0]))
return KNN.predict(model,samples.map(function (sample) {
return obj2Array(sample,model.features)}));
else if (Comp.obj.isObj(samples))
return KNN.predict(model,obj2Array(samples,model.features));
else
return KNN.predict(model,samples);
break;
case ML.KNN2:
if (Comp.obj.isMatrix(samples))
return samples.map(function (sample) {
return KNN.predict2(model,sample);
});
else if (Comp.obj.isArray(samples) && Comp.obj.isObj(samples[0]))
return samples.map(function (sample) {
return KNN.predict2(model,obj2Array(sample,model.features))
})
else if (Comp.obj.isObj(samples))
return KNN.predict2(model,obj2Array(samples,model.features));
else
return KNN.predict2(model,samples);
break;
case ML.KMN:
return model.clusters
break;
case ML.RF:
if (model.labels) {
if (Comp.obj.isMatrix(samples)) {
return samples.map(function (sample) {
return model.rfs.map(function (rf) {
return RF.code.predictOne(rf,sample);
}).map(function (v,i) {
return { value:model.labels[i], prob:v }
})
});
} else if (Comp.obj.isArray(samples) && typeof samples[0] == 'number') {
return model.rfs.map(function (rf) {
return RF.code.predictOne(rf,samples);
}).map(function (v,i) {
return { value:model.labels[i], prob:v }
})
} // TODO
} else {
// Sample row format: [x1,x2,..,xn]
if (Comp.obj.isMatrix(samples)) {
return samples.map(function (sample) {
return RF.code.predictOne(model,sample);
});
} else if (Comp.obj.isArray(samples) && typeof samples[0] == 'number') {
return RF.predictOne(model,samples);
} // TODO
}
// preprocess(samples,'m')
break;
case ML.SVM:
if (!model._labels) {
// Single SVM
if (Comp.obj.isMatrix(samples))
return samples.map(function (sample) {
return SVM.code.predict(model,sample);
});
else
return SVM.code.predict(model,samples);
} else {
// Multi SVM
if (Comp.obj.isMatrix(samples))
return samples.map(function (sample) {
solutions=model.svms.map(function (svm,index) {
if (svm.threshold==false)
return SVM.code.predict(svm,sample)
else
return SVM.code.predict(svm,sample);
});
return solutions.map(function (v,i) { return { value:model._labels[i], prob:v } });
});
else {
solutions=model.svms.map(function (svm,index) {
if (svm.threshold==false)
return SVM.code.predict(svm,samples)
else
return SVM.code.predict(svm,samples)==1;
})
return solutions.map(function (v,i) { return { value:model._labels[i], prob:v } });
}
}
break;
case ML.SLP:
case ML.MLP:
if (Comp.obj.isMatrix(samples)) {
x=samples;
if (model.xscale)
x=x.map(function (row) { return scale(row,model.xscale) });
result = model.labels?MLP.code.predict(model,x).map(function (r) {
var o={};
r.forEach(function (v,i) { o[model.labels[i]]=v });
return o;
}):relax(MLP.code.predict(model,x));
} else if (Comp.obj.isArray(samples)) {
x=samples;
if (model.xscale)
x=scale(x,model.xscale);
result = model.labels?MLP.code.predict(model,[x]).map(function (r) {
var o={};
r.forEach(function (v,i) { o[model.labels[i]]=v });
return o;
})[0]:relax(MLP.code.predict(model,[x])[0]);
} else if (Comp.obj.isObj(samples) && model.features) {
x=model.features.map(function (f) { return samples[f] });
if (model.xscale)
x=scale(x,model.xscale);
result = model.labels?MLP.code.predict(model,[x]).map(function (r) {
var o={};
r.forEach(function (v,i) { o[model.labels[i]]=v });
return o;
})[0]:relax(MLP.code.predict(model,[x])[0]);
}
if (Comp.obj.isArray(result)) {
return model.yscale?result.map(function (y) { return unscale(y,model.yscale) }):result;
} else {
}
break;
case ML.TXT:
// typeof options = {data: string []}
if (Comp.obj.isArray(samples))
return samples.map(function (sample) { return TXT.classify(model,sample) });
else
return TXT.classify(model,samples);
break;
}
},
compact: function (model) {
switch (model.algorithm) {
case ML.DTI:
default:
return DTI.compactTree(model);
}
},
depth: function (model) {
switch (model.algorithm) {
case ML.DTI:
return DTI.depth(model);
case ML.DT:
case ML.ICE:
return ICE.depth(model);
case ML.C45:
return C45.depth(model);
case ML.ID3:
return ID3.depth(model);
}
},
evaluate: function (model,target,samples) {
switch (model.algorithm) {
case ML.DTI:
default:
return DTI.evaluate(model,target,samples);
}
},
info: function (model) {
switch (model.algorithm) {
case ML.C45:
return C45.info(model);
case ML.DT:
case ML.ICE:
return ICE.info(model);
case ML.ID3:
return ID3.info(model);
}
},
/** Learning: Create a classification model from training data (or an empty model that can be updated)
*
*/
learn: function (options) {
var model,data,data2,x,y,features,featureTypes,test,target,
result,cols,n_ins,n_outs,x,y,xscale,xoffset,xshift,yscale,yoffset,yshift,key,err,
t0=Io.time();
if (options==_) options={};
switch (options.algorithm) {
case ML.ANN:
// typeof options = { x,y,features?,target?,layers:number [], trainerror:number}
data = preprocess(options,'io',options);
model={};
model.algorithm=options.algorithm
if (!options.layers) options.layers=[]
if (data)
model.network = new ANN.Network(options.layers[0],options.layers[options.layers.length-1]);
else throw 'ML.learn.ANN: Invalid options';
model.network.evolve(data,options);
model.time=Io.time()-t0;
return model;
break;
case ML.CNN:
// typeof options = {x:[][],y:[],..}
model = CNN.create(options);
model.algorithm=options.algorithm;
model.time=Io.time()-t0;
return model;
break;
case ML.C45:
// typeof options = {data: {}[], target:string, features: string []} |
// {data: [][], target?:string, features?: string []} |
// {x: number [][], y:[]} |
// {data: {x,y}[] }
var model = C45.create();
if (options.x && options.y) {
features=options.x[0].map(function (col,i) { return String(i) });
featureTypes=options.x[0].map(function (col,i) { return 'number' });
data=options.x.map(function (row,i) { row=row.slice(); row.push(options.y[i]); return row});
target='y';
} else if (options.data && Comp.obj.isMatrix(options.data)) {
data=options.data;
features=options.features||options.data[0].slice(0,-1).map(function (col,i) { return String(i) });
featureTypes=options.data[0].slice(0,-1).map(function (col,i) { return typeof col == 'number'?'number':'category' });
target=options.target||'y';
} else if (options.data && Comp.obj.isObj(options.data[0]) && options.data[0].x && options.data[0].y!=undefined) {
data=options.data.map(function (row) { return row.x.concat(row.y) });
features=options.features||options.data[0].x.slice(0,-1).map(function (col,i) { return String(i) });
featureTypes=options.data[0].x.slice(0,-1).map(function (col,i) { return typeof col == 'number'?'number':'category' });
target=options.target||'y';
} else if (options.data && Comp.obj.isArray(options.data) && Comp.obj.isObj(options.data[0]) &&
options.target && options.features) {
rowNames=Comp.obj.isArray(options.target)?options.features.concat(options.target):
options.features.concat([options.target]);
data=options.data.map(function (row) { return obj2Array(row,rowNames) })
features=options.features;
featureTypes=data[0].slice(0,-1).map(function (col,i) { return typeof col == 'number'?'number':'category' });
target=options.target;
} else throw 'ML.learn.C45: Invalid options';
C45.train(model,{
data: data,
target: target,
features: features,
featureTypes: featureTypes
});
model.algorithm=options.algorithm
model.time=Io.time()-t0;
return model;
break;
case ML.DTI:
// typeof options = {data: {}[], target:string, features: string [], eps;number, maxdepth} |
// {x: number [][], y:[], eps;number, maxdepth}
if (options.eps==_) options.eps=0;
if (options.maxdepth==_) options.maxdepth=20;
if (options.data && options.target && options.features)
model = DTI.create(options);
else if (options.x && options.y) {
if (options.x.length != options.y.length) throw 'ML.learn.DTI: X and Y vector have different length';
data=options.x.map(function (row,i) { row=row.slice(); row.push(options.y[i]); return row});
features=Comp.array.init(data[0].length-1,function (i) { return String(i)});
target=String(data[0].length-1);
// console.log(data,features,target)
model = DTI.create({
data:data,
features:features,
target:target,
eps:options.eps,
maxdepth:options.maxdepth
});
} else throw 'ML.learn.DTI: Invalid options';
model.algorithm=options.algorithm;
model.time=Io.time()-t0;
return model;
case ML.ICE:
case ML.DT:
if (options.eps==_) options.eps=0;
if (options.data && options.target && options.features)
model = ICE.create(options);
else if (options.x && options.y) {
if (options.x.length != options.y.length) throw 'ML.learn.ICE: X and Y vector have different length';
data=options.x.map(function (row,i) { row=row.slice(); row.push(options.y[i]); return row});
features=Comp.array.init(data[0].length-1,function (i) { return String(i)});
target=String(data[0].length-1);
model = ICE.create({
data:data,
features:features,
target:target,
eps:options.eps,
});
} else throw 'ML.learn.ICE: Invalid options';
model.algorithm=options.algorithm;
model.eps=options.eps;
model.time=Io.time()-t0;
return model;
break;
case ML.ID3:
if (options.data && options.target && options.features)
model = ID3.createTree(options.data,options.target,
options.features);
else throw 'ML.learn.ID3: Invalid options';
model.algorithm=options.algorithm
model.time=Io.time()-t0;
return model;
break;
case ML.KNN:
// typeof @options = {data: {}[]|[][], distance?:function|string,k?:number}
// typeof @options = {x:number [][], y:number [],
// distance?:function|string,k?:number}
if (options.features && options.target) target=options.target,features = options.features;
else {
features = [];
if (options.data) {
for(key in options.data[0]) features.push(key);
target = features.pop()
} else if (options.x) {
for(key in options.x[0]) features.push('x'+key);
target='y';
}
}
if (options.data && Comp.obj.isObj(options.data[0])) {
x = options.data.map(function (row) { return obj2Array(row,features) });
y = options.data.map(function (row) { return row[target] })
} else if (options.data && Comp.obj.isMatrix(options.data)) {
x = options.data,map(function (row) { return row.slice(0,row.length-1) });
y = options.data,map(function (row) { return row[row.length-1] });
} else if (options.x && options.y) {
x = options.x;
y = options.y;
}
model = KNN.create(
x,
y,
{
distance:options.distance,
k:options.k
});
model.algorithm = options.algorithm
model.features = features
model.target = target
model.time=Io.time()-t0;
return model;
break;
case ML.KNN2:
// typeof @options = {data: {}[]|[][], distance?:function|string,k?:number}
// typeof @options = {x:number [][], y:number [],
// distance?:function|string,k?:number}
if (options.features && options.target) target=options.target,features = options.features;
else {
features = [];
if (options.data) {
for(key in options.data[0]) features.push(key);
target = features.pop()
} else if (options.x) {
for(key in options.x[0]) features.push('x'+key);
target='y';
}
}
if (options.data && Comp.obj.isObj(options.data[0])) {
x = options.data.map(function (row) { return obj2Array(row,features) });
y = options.data.map(function (row) { return row[target] })
} else if (options.data && Comp.obj.isMatrix(options.data)) {
x = options.data,map(function (row) { return row.slice(0,row.length-1) });
y = options.data,map(function (row) { return row[row.length-1] });
} else if (options.x && options.y) {
x = options.x;
y = options.y;
}
model = KNN.create2(
{
x : x,
y : y,
distance:options.distance,
k:options.k
});
model.algorithm=options.algorithm
model.features = features
model.target = target
model.time=Io.time()-t0;
return model;
break;
case ML.KMN:
if (options.data && Comp.obj.isMatrix(options.data)) {
data=options.data;
}
model = KMN.cluster({
data:data,
k:options.k,
distance:options.distance,
epochs:options.epochs,
})
model.algorithm=options.algorithm
model.data = data
model.time=Io.time()-t0;
return model;
break;
case ML.RF:
var model={};
// Single Binary RF (y={-1,1}) or Multi-RF (y:string is in labels)
// typeof options = {data: {}[], target:string, features: string []} |
// {data: [][], target?:string, features?: string []} |
// {x: number [][], y: {-1,1} []} |
// {data: {x,y}[] }
// {data: {x,y}[], labels: string [] }
if (!options.x || !options.y) throw 'ML.learn.RF: Invalid options';
// data=preprocess(data,'xmy',{features:features,target:target})
data={x:options.x,y:options.y}; // TODO
if (options.labels) {
// multi-RF
model.labels = options.labels;
model.rfs = model.labels.map (function (label) { return RF() });
model.rfs.forEach (function (rf,i) {
var y = data.y.map(function (label) { return label==model.labels[i]?1:-1} );
RF.code.train(rf,options.x,y,{
numTrees:options.numTrees,
maxDepth:options.maxDepth,
numTries:options.numTries,
type:options.weakType,
});
});
} else {
model = RF();
features=options.x[0].map(function (col,i) { return String(i) });
target='y';
RF.code.train(model,
options.x,
options.y,
{
numTrees:options.numTrees,
maxDepth:options.maxDepth,
numTries:options.numTries,
type:options.weakType,
});
}
model.algorithm=options.algorithm
model.time=Io.time()-t0;
return model;
break;
case ML.RL:
// Create learner instance
model = {}
options.environment=checkOptions(options.environment,{});
options.environment.getMaxNumActions=
checkOption(options.environment.getMaxNumActions,
function () { return options.actions.length })
options.environment.getNumStates=
checkOption(options.environment.getNumStates,
function () { return options.states.length })
var allowedActions=checkOption(options.environment.allowedActions, function () { return options.actions });
options.environment.allowedActions=
// Ensure that allowedActions return number array!
function (state) {
return allowedActions(state).map(function (a) {
return options.actions.indexOf(a)
})
}
var nextState = options.environment.nextState;
if (nextState) {
options.environment.nextState = function (state,action) {
return nextState(state,options.actions[action])
}
}
switch (options.kind) {
case ML.DQNAgent:
model = RL.DQNAgent(
options.environment,
{
alpha:options.alpha,gamma:options.gamma,epsilon:options.epsilon,
experience_add_every:options.experience_add_every,
experience_size:options.experience_size,
learning_steps_per_iteration:options.learning_steps_per_iteration,
tderror_clamp:options.tderror_clamp,
num_hidden_units:options.num_hidden_units,
update:options.update,
}
)
break;
case ML.DPAgent:
model = RL.DPAgent(
options.environment,
{alpha:options.alpha,beta:options.beta,gamma:options.gamma,
epsilon:options.epsilon,lambda:options.lambda}
)
break;
case ML.TDAgent:
model = RL.TDAgent(
options.environment,
// specs
{alpha:options.alpha,beta:options.beta,gamma:options.gamma,
epsilon:options.epsilon,lambda:options.lambda,
replacing_traces:options.replacing_traces,
smooth_policy_update:options.smooth_policy_update,
update:options.update,
planN:options.planN}
)
break;
}
model.algorithm = options.algorithm;
model.kind = options.kind;
if (options.actions) model.actions = options.actions;
if (options.states) model.states = options.states;
if (options.rewards) model.rewards = options.rewards;
return model;
break;
case ML.SLP:
case ML.MLP:
// typeof options = {x: number [][],
// y: number number [][] | string [],
// hidden_layers?:[],epochs?:number,
// labels?:string [], features?: string [],
// regression?,
// normalize?, bipolar?, eps?:number | number [], verbose?}
//
// y and MLP(learn) requires [[p1,p2,..],[p1,p2,..],..] with 0>=p>=1
// p:label probability
x=options.x;
if (Comp.obj.isArray(options.x) && typeof options.x[0] == 'number')
x=wrap(options.x);
if (Comp.obj.isMatrix(options.y))
y=options.y;
else if (Comp.obj.isArray(options.y) && typeof options.y[0] == 'number')
y=wrap(options.y);
else if (Comp.obj.isArray(options.y) && options.labels) {
y=options.y.map(function (l1) {
return options.labels.map(function (l2) {
return l1==l2?1:0;
});
});
} else throw 'ML.learn.MLP: invalid options';
if (options.normalize) {
// normalize each variable independently!?
var max=x[0].map(function (col) { return col}),
min=x[0].map(function (col) { return col});
x.forEach(function (row) { row.forEach(function (col,i) {
max[i]=Math.max(max[i],col);
min[i]=Math.min(min[i],col) }) });
xshift=options.bipolar?-1:0;
xscale=max.map(function (x,i) { return (xshift?2:1)/((x-min[i])==0?1:x-min[i])});
xoffset=min;
x=x.map(function (row) { return row.map(function (col,i) { return xshift+(col-xoffset[i])*xscale[i] }) });
if (options.regression) {
// scale y, too, [0,1]
max=y[0].map(function (col) { return col});
min=y[0].map(function (col) { return col});
y.forEach(function (row) { row.forEach(function (col,i) {
max[i]=Math.max(max[i],col);
min[i]=Math.min(min[i],col) }) });
yshift=options.bipolar?-1:0;
yscale=max.map(function (x,i) { return (yshift?2:1)/((x-min[i])==0?1:x-min[i])});
yoffset=min;
y=y.map(function (row) { return row.map(function (col,i) { return yshift+(col-yoffset[i])*yscale[i] }) });
}
}
model = MLP({
input : x,
output : y,
n_ins : x[0].length,
n_outs : y[0].length,
hidden_layer_sizes:options.algorithm==ML.SLP?[]:(options.hidden_layers||[])
});
model.algorithm=options.algorithm;
model.labels=options.labels;
model.features=options.features;
model.xscale=options.normalize?{k:xscale,off:xoffset,shift:xshift}:undefined;
model.yscale=options.normalize&&options.regression?{k:yscale,off:yoffset,shift:yshift}:undefined;
model.nOutputs=y[0].length;
MLP.code.set(model,'log level',options.verbose||0); // 0 : nothing, 1 : info, 2 : warning.
MLP.code.train(model,{
epochs : options.epochs||20000
});
model.time=Io.time()-t0;
return model;
break;
case ML.SVM:
// typeof options = {x: number [][],
// y: ({-1,1}|string) [],
// labels?:string|number [],
// threshold?:number|false,
// C?:numer,tol?:number,max_passes?:number,alpha_tol?:number,kernel?:{}}
// If classes then multi-SVM (one for each class to be separated)!
if (!options.labels) {
model = SVM({
x:options.x,
y:options.y,
threshold:options.threshold,
});
model.algorithm=options.algorithm
SVM.code.train(model,{
C:options.C||1.0,
tol:options.tol||1e-4,
max_passes:options.max_passes||20,
alpha_tol:options.alpha_tol||1e-5,
kernel:options.kernel
});
} else {
model={};
model.algorithm=options.algorithm;
model._labels=options.labels;
model.svms=options.labels.map(function (cl) {
return SVM({
x:options.x,
y:options.y.map(function (y) { return y==cl?1:-1 }),
threshold:options.threshold,
});
});
model.svms.forEach(function (svm) {
SVM.code.train(svm,{
C:options.C||1.0,
tol:options.tol||1e-4,
max_passes:options.max_passes||20,
alpha_tol:options.alpha_tol||1e-5,
kernel:options.kernel
});
});
// Create one SVM for each class
// Transform y vector
}
model.time=Io.time()-t0;
return model;
break;
case ML.TXT:
// typeof options = {data: string []}
model = TXT.create(options.data,{
});
model.algorithm=options.algorithm
return model;
break;
}
},
preprocess:preprocess,
print: function (model,indent,compact) {
switch (model.algorithm) {
case ML.DTI:
return DTI.print(model,indent,compact);
case ML.DT:
case ML.ICE:
return ICE.print(model,indent);
case ML.C45:
return C45.print(model,indent);
case ML.ID3:
return ID3.print(model,indent);
}
},
// Only text module
similarity : TXT.similarity,
stats : STAT,
// Check model consistency
test: function (model,samples) {
var x,y,data,res,p=0.0;
switch (model.algorithm) {
case ML.ANN:
data=preprocess(samples,'xmya',{features:model.features,target:model.target});
// TODO
break;
case ML.C45:
// Sample row format: [x1,x2,..,y]
if (Comp.obj.isMatrix(samples)) {
samples.forEach(function (sample) {
x=sample.slice(0,sample.length-1);
y=sample[sample.length-1];
res= C45.classify(model,x);
if (res==y) p += 1;
});
return p/samples.length;
} else if (Comp.obj.isArray(samples)) {
x=samples.slice(0,samples.length-1);
y=samples[samples.length-1];
res = C45.classify(model,x);
return res==y?1.0:0.0
} else if (Comp.obj.isObj(samples) && model.features) {
}
break;
case ML.TXT:
var model = model.string?{ data : [model.string] }:model;
if (Comp.obj.isArray(samples))
return samples.map(function (sample) {
return TXT.classify(model,sample).match
});
else
return TXT.classify(model,samples).match;
break;
}
},
/** Update a learned model
*
*/
update: function (model,options) {
switch (model.algorithm||options.algorithm) {
case ML.RL:
switch (model.kind) {
case ML.DQNAgent:
return RL.DQNAgent.code.learn(model,options);
break;
case ML.DPAgent:
return RL.DPAgent.code.learn(model,options);
break;
case ML.TDAgent:
return RL.TDAgent.code.learn(model,options);
break;
}
break;
case ML.DTI:
// typeof @options = {data: number [][], target:string, features: string [], eps?:number, maxdepth?:number} |
// {x: number [][], y:[], eps?:number, maxdepth?:number}
if (options.eps==_) options.eps=0;
if (options.maxdepth==_) options.maxdepth=20;
if (options.data && options.target && options.features)
model = DTI.update(model,options);
else if (options.x && options.y) {
if (options.x.length != options.y.length) throw 'ML.update.DTI: X and Y vector have different length';
data=options.x.slice();
data=data.map(function (row,i) {row.push(options.y[i]); return row});
features=Comp.array.init(data[0].length-1,function (i) { return String(i)});
target=String(data[0].length-1);
console.log(data,features,target)
model = DTI.update(model,{
data:data,
features:features,
target:target,
eps:options.eps,
maxdepth:options.maxdepth
});
} else throw 'ML.update.DTI: Invalid options';
model.algorithm=options.algorithm;
return model;
case ML.CNN:
break;
}
},
ML:ML,
};
ICE.ml=ml;
CNN.ml=ml;
ml.predict=ml.classify;
ml.train=ml.learn;
ml.best=ml.stats.utils.best;
module.exports = {
agent:ml,
classify:ml.classify,
column:ml.column,
compact:ml.compact,
depth:ml.depth,
entropy:STAT.entropy,
entropyN:STAT.entropyN,
entropyDep:STAT.entropyDep,
evaluate:ml.evaluate,
info:ml.info,
learn:ml.learn,
options:options,
preprocess:preprocess,
print:ml.print,
stats:STAT,
test:ml.test,
unique:ml.unique,
update:ml.update,
ML:ML,
current:function (module) { current=module.current; Aios=module; }
}