From 3c36c8f54aecfcbb2a9990bf3304c7009c22a287 Mon Sep 17 00:00:00 2001 From: sbosse Date: Mon, 21 Jul 2025 23:13:39 +0200 Subject: [PATCH] Mon 21 Jul 22:43:21 CEST 2025 --- js/test/learn.js | 431 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 431 insertions(+) create mode 100644 js/test/learn.js diff --git a/js/test/learn.js b/js/test/learn.js new file mode 100644 index 0000000..1899f54 --- /dev/null +++ b/js/test/learn.js @@ -0,0 +1,431 @@ +global.TOP='/home/sbosse/proj/jam/js'; +require(TOP+'/top/module')([process.cwd(),TOP]); +var Comp = Require('com/compat'); +var Io = Require('com/io'); +var Aios = Require('jam/aios'); +var Db = Require('db/db'); +var Fs = require('fs'); +var db = Db.Sqlc('/tmp/sqld',1); +var Ml = Require('ml/ml'); +var ml = Ml.agent; +var _ = Require('ml/lodash'); + +var repl; +db.init(); +var datasets=[0,1,2,3,4,5]; +var features=[]; +var eps=10; +var class_name='load'; +var roid = 1; +var roi0 = {x:3,y:4}; + +var dataset=0; +var training_data=[]; +var model; +//console.log(ml.entropy([31,11,10],10)); + +function entropyEps(vals,eps) { + var uniqueVals = _.unique(vals); + var probs = uniqueVals.map(function(x) { + return probEps(x, vals, eps) + }); + + var logVals = probs.map(function(p) { + return -p * log2(p) + }); + + return logVals.reduce(function(a, b) { + return a + b + }, 0); +} + +// with additional 2*epsilon interval, only applicable to numerical values +function probEps(value, list, eps) { + var occurrences = _.filter(list, function(element) { + return (element >= (value-eps)) && (element <= (value+eps)); + }); + + var numOccurrences = occurrences.length; + var numElements = list.length; + return numOccurrences / numElements; +} + +/** + * Computes Log with base-2 + * @private + */ +function log2(n) { + return Math.log(n) / Math.log(2); +} + + +var NODE_TYPES = { + RESULT: 'result', + FEATURE: 'feature', + FEATURE_RANGE: 'feature_range', + FEATURE_VALUE: 'feature_value' +}; + +function Result(key) { + return { + type:NODE_TYPES.RESULT, + name:key + } +} + +function Feature(name,vals) { + return { + type:NODE_TYPES.FEATURE, + name:name, + vals:vals + } +} + +// A value can be a scalar or a range {a,b} object +function Value(val,child) { + return { + type:NODE_TYPES.FEATURE_VALUE, + val:val, + child:child + } +} + +function print(model) { + var line='',sep; + if (!model) return ''; + switch (model.type) { + case NODE_TYPES.RESULT: + return ' -> '+model.name; + case NODE_TYPES.FEATURE: + line='('+model.name+'?'; + sep=''; + Comp.array.iter(model.vals,function (v) { + line += sep+print(v); + sep=','; + }); + return line+')'; + case NODE_TYPES.FEATURE_VALUE: + return ' '+(model.val.a==undefined?model.val:'['+model.val.a+','+model.val.b+']')+ + ':'+print(model.child); + } + return 'model?'; +} + + +/** + * Finds element with highest occurrence in a list + * @private + */ +function mostCommon(list) { + var elementFrequencyMap = {}; + var largestFrequency = -1; + var mostCommonElement = null; + + list.forEach(function(element) { + var elementFrequency = (elementFrequencyMap[element] || 0) + 1; + elementFrequencyMap[element] = elementFrequency; + + if (largestFrequency < elementFrequency) { + mostCommonElement = element; + largestFrequency = elementFrequency; + } + }); + + return mostCommonElement; +} + +function addVal(v1,v2) { + return v1+v2; // TODO +} + +function lowerBound(v) { + if (v.a==undefined) return v; else return v.a; +} + +function upperBound(v) { + if (v.b==undefined) return v; else return v.b; +} + +function Bounds(vl,v) { + if (vl.length==0) return {a:v,b:v}; + else if (v==undefined) return {a:Min(vl),b:Max(vl)}; + else return {a:Min([Min(vl),v]),b:Max([Max(vl),v])}; +} + +function overlap(v1,v2) { + return (upperBound(v1) >= lowerBound(v2) && upperBound(v1) <= upperBound(v2)) || + (upperBound(v2) >= lowerBound(v1) && upperBound(v2) <= upperBound(v1)) +} + +function containsVal(vl,v) { + for (var i in vl) { + var v2=vl[i]; + if (overlap(v,v2)) return true; + } + return false; +} + +function Min(vals) { + var min=none; + Comp.array.iter(vals,function (val) { + if (min==none) min=(val.a==undefined?val:val.a); + else min=val.a==undefined?(valmax?val:max):(val.b>max?val.a:max)); + }); + return max; +} + +function centerVal(v) { + if (v.a==undefined) return v; else return (v.a+v.b)/2; +} + +function distanceVal (v1,v2) { + return Math.abs(centerVal(v1)-centerVal(v2)); +} + + +function getBestFeatures(data,target,features,eps) { + var bestfeatures=[]; + function deviation(vals) { + var n = vals.length; + var mu=Comp.array.sum(vals,function (val) { + return (lowerBound(val)+upperBound(val))/2; + })/n; + var dev=Comp.array.sum(vals,function (val) { + return Math.pow(((lowerBound(val)+upperBound(val))/2)-mu,2); + })/n; + return dev; + } + for (var feature in features) { + var vals=_.pluck(data, features[feature]); + var e = entropyEps(vals,eps); + var d = deviation(vals); + var min = Min(vals); + var max = Max(vals); + bestfeatures.push({e:e,d:d,range:{a:min,b:max},name:features[feature]}); + } + bestfeatures.sort(function (ef1,ef2) { + if (ef1.e > ef2.e) return -1; else return 1; + }); + return bestfeatures; +} + +function getPossibleValues(data,feature) { + return Comp.array.sort(_.pluck(data, feature), function (x,y) { + if (upperBound(x) < lowerBound(y)) return -1; else return 1; // increasing value order + }); +} + +function add_training_set(set) { + // Merge a data set with an existing for a specific key; create value ranges + training_data.push(set); +} + +function partitionVals(vals,eps) { + var last=none; + var partitions=[]; + var partition=[]; + for(var i in vals) { + var val0=vals[i]; + var val1=vals[i-1]; + + if (val1==undefined) partition.push(val0); + else if ( upperBound(val0) < upperBound(addVal(val1,2*eps))) partition.push(val0); + else { + partitions.push(partition); + partition=[val0]; + } + } + if (partition.length>0) partitions.push(partition); + return partitions; +} + +/** + * Creates a new tree + */ +function createTree(data, target, features, eps) { + var _newS,child_node,bounds; + + var targets = _.unique(_.pluck(data, target)); + var classes = _.unique(_.pluck(training_data, class_name)); + + console.log(data); + console.log(features); + + // Aios.aios.log('createTree:'+targets.length); + if (targets.length == 1) return Result(targets[0]); + + if (features.length == 0) { + var topTarget = mostCommon(targets); + return Result(topTarget) + } + var bestFeatures = getBestFeatures(data, target, features, eps); + var bestFeature = bestFeatures[0]; + var remainingFeatures = Comp.array.filtermap(bestFeatures,function (feat) { + if (feat.name!=bestFeature.name) return feat.name; + else return none; + }); + var possibleValues = getPossibleValues(data,bestFeature.name); + var vals=[]; + + var partitions=partitionVals(possibleValues,eps); + + console.log(partitions); + console.log(bestFeatures); + //console.log(possibleValues); + if (partitions.length==1) { + // no further 2*eps separation possible, find best feature by largest distance + // resort beat feature list with respect to value deviation + bestFeatures.sort(function (ef1,ef2) { + if (ef1.d > ef2.d) return -1; else return 1; + }); + bestFeature = bestFeatures[0]; + possibleValues = getPossibleValues(data,bestFeature.name); + Comp.array.iter(possibleValues,function (val,i) { + + _newS = data.filter(function(x) { + console.log(x[bestFeature.name],val,overlap(val,x[bestFeature.name])) + + return overlap(val,x[bestFeature.name]); + }); + child_node = Value(val); + child_node.child = createTree(_newS, target, remainingFeatures, eps); + //console.log(_newS); + vals.push(child_node); + }) + + } else Comp.array.iter(partitions,function (partition,i) { + + _newS = data.filter(function(x) { + // console.log(x[bestFeature.name],v,overlap(x[bestFeature.name],v)) + return containsVal(partition,x[bestFeature.name]); + }); + bounds = Bounds(partition); + child_node = Value(eps==0?v:{a:bounds.a-eps,b:bounds.b+eps}); + child_node.child = createTree(_newS, target, remainingFeatures, eps); + //console.log(_newS); + vals.push(child_node); + }); + + return Feature(bestFeature.name,vals); +} + +function depth(model) { + switch (model.type) { + case NODE_TYPES.RESULT: return 0; + case NODE_TYPES.FEATURE: + return 1+Comp.array.max(model.vals,function (val) { + return depth(val); + }); + case NODE_TYPES.FEATURE_VALUE: + return depth(model.child); + } + return 0; +} + +function results(model) { + var line='',sep; + if (!model) return ''; + switch (model.type) { + case NODE_TYPES.RESULT: + return model.name; + case NODE_TYPES.FEATURE: + sep=''; + line=''; + Comp.array.iter(model.vals,function (v) { + line += sep+results(v); + sep=','; + }); + return line; + case NODE_TYPES.FEATURE_VALUE: + return results(model.child); + } + return 'result?'; +} + + +function nearestVal(vals,sample,fun) { + var best=none; + for (var v in vals) { + var d=fun?distanceVal(fun(vals[v]),sample):distanceVal(vals[v],sample); + if (best==none) + best={v:vals[v],d:d}; + else if (best.d > d) + best={v:vals[v],d:d}; + } + if (best) return best.v; + else return none; +} + +function predict(model,sample) { + var root = model; + while (root && root.type !== NODE_TYPES.RESULT) { + var attr = root.name; + var sampleVal = sample[attr]; + var childNode = nearestVal(root.vals,sampleVal,function (node) { + return node.val; + }); + + if (childNode){ + root = childNode.child; + } else { + root = none; + } + } + if (root) return root.name||root.val; + else return none; +}; + +var datasets=[]; +var noise=function () { + return Comp.random.range(-eps/2,eps/2); +}; + +for (dataset=0;dataset<=5;dataset++) { + var training_set={}; + var data={}; + var matA = db.readMatrix('sensorsA'+dataset); + var matB = db.readMatrix('sensorsB'+dataset); + var matAs = Aios.aios.matrix(3,3); + var matBs = Aios.aios.matrix(3,3); + + var n=0; + features=[]; + for (j = roi0.y-roid;j <= (roi0.y+roid);j++) { + for (i = roi0.x-roid;i <= (roi0.x+roid);i++) { + matAs[j-(roi0.y-roid)][i-(roi0.x-roid)]=matA[j][i]; + matBs[j-(roi0.y-roid)][i-(roi0.x-roid)]=matB[j][i]; + training_set['A'+n]=matA[j][i]+noise(); + training_set['B'+n]=matB[j][i]+noise(); + data['A'+n]=matA[j][i]+noise(); + data['B'+n]=matB[j][i]+noise(); + features.push('A'+n); + features.push('B'+n); + n++; + } + } + training_set[class_name]='L'+dataset; + add_training_set(training_set); + datasets.push(data); +} +//console.log(training_data); +var bestfeatures=getBestFeatures(training_data, class_name, features, eps);; +var classes=_.unique(_.pluck(training_data, class_name)); +var model = createTree(training_data,class_name,features,eps); +console.log(print(model)); +console.log(depth(model)); +console.log(results(model)); +console.log(predict(model,datasets[0])); +console.log(predict(model,datasets[1])); +console.log(predict(model,datasets[2])); +console.log(predict(model,datasets[3])); +console.log(predict(model,datasets[4])); +console.log(predict(model,datasets[5]));