Tue 27 Aug 00:14:56 CEST 2024
This commit is contained in:
		
							parent
							
								
									2bb30de385
								
							
						
					
					
						commit
						00ee12d73c
					
				
							
								
								
									
										171
									
								
								test/test-rl3.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										171
									
								
								test/test-rl3.js
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,171 @@
 | 
				
			||||||
 | 
					// Maze of Torment World
 | 
				
			||||||
 | 
					// Deep-Q Learning (DQN)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					var height=7,width=7,start,dest;
 | 
				
			||||||
 | 
					// 0: free place, 1: start, 2: destination, -1: wall
 | 
				
			||||||
 | 
					var f=0,s=1,d=2,w=-1
 | 
				
			||||||
 | 
					var maze = [
 | 
				
			||||||
 | 
					[s,f,w,d,w,f,f], 
 | 
				
			||||||
 | 
					[f,f,w,f,w,f,f], 
 | 
				
			||||||
 | 
					[f,f,w,f,f,f,f], 
 | 
				
			||||||
 | 
					[f,f,w,w,w,f,f], 
 | 
				
			||||||
 | 
					[f,f,f,f,f,f,f], 
 | 
				
			||||||
 | 
					[f,f,f,f,w,w,w], 
 | 
				
			||||||
 | 
					[f,w,f,f,f,f,f], 
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// world states
 | 
				
			||||||
 | 
					var states = []
 | 
				
			||||||
 | 
					maze.forEach(function (row,j) {
 | 
				
			||||||
 | 
					  states=states.concat(row)
 | 
				
			||||||
 | 
					  row.forEach(function (cell,i) {
 | 
				
			||||||
 | 
					    if (cell==s) start=i+j*width;
 | 
				
			||||||
 | 
					    if (cell==d) dest={x:i,y:j}
 | 
				
			||||||
 | 
					  })
 | 
				
			||||||
 | 
					})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					var way = []
 | 
				
			||||||
 | 
					function reset (pr) {
 | 
				
			||||||
 | 
					  if (pr) print(way.join('\n'))
 | 
				
			||||||
 | 
					  way = maze.map(function (row) { 
 | 
				
			||||||
 | 
					    return row.map(function (col) { return col==s?1:(col==w?'w':0) })})
 | 
				
			||||||
 | 
					  env.steps=0;
 | 
				
			||||||
 | 
					  env.good=0;
 | 
				
			||||||
 | 
					  env.error=0;
 | 
				
			||||||
 | 
					  env.iteration++;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					var actions = ['left','right','up','down']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Agent sensor states (perception)
 | 
				
			||||||
 | 
					// Distances {N,S,W,E} to boundaries and walls, distance
 | 
				
			||||||
 | 
					var sensors = [0,0,0,0,0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					var env = {};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					env.steps = 0;
 | 
				
			||||||
 | 
					env.iteration = 0;
 | 
				
			||||||
 | 
					env.error = 0;
 | 
				
			||||||
 | 
					env.good = 0;
 | 
				
			||||||
 | 
					env.last = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// required by learner
 | 
				
			||||||
 | 
					env.getNumStates      = function() { return sensors.length /*!!*/ }
 | 
				
			||||||
 | 
					env.getMaxNumActions  = function() { return actions.length; }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// internals
 | 
				
			||||||
 | 
					env.nextState = function(state,action) {
 | 
				
			||||||
 | 
					  var nx, ny, nextstate;
 | 
				
			||||||
 | 
					  var x = env.stox(state);
 | 
				
			||||||
 | 
					  var y = env.stoy(state);
 | 
				
			||||||
 | 
					  // free place to move around
 | 
				
			||||||
 | 
					  switch (action) {
 | 
				
			||||||
 | 
					    case 'left'  : nx=x-1; ny=y; break;
 | 
				
			||||||
 | 
					    case 'right' : nx=x+1; ny=y; break;
 | 
				
			||||||
 | 
					    case 'up'    : ny=y-1; nx=x; break;
 | 
				
			||||||
 | 
					    case 'down'  : ny=y+1; nx=x; break;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  nextstate = env.xytos(nx,ny);
 | 
				
			||||||
 | 
					  if (nx<0 || ny<0 || nx >= width || ny >= height ||
 | 
				
			||||||
 | 
					      states[nextstate]==w) {
 | 
				
			||||||
 | 
					    nextstate=-1;
 | 
				
			||||||
 | 
					    return nextstate;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  way[ny][nx]=1;
 | 
				
			||||||
 | 
					  env.steps++;
 | 
				
			||||||
 | 
					  return nextstate;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					env.reward = function (state,action,nextstate) {
 | 
				
			||||||
 | 
					  // reward of being in s, taking action a, and ending up in ns
 | 
				
			||||||
 | 
					  var reward;
 | 
				
			||||||
 | 
					  var dist1=Math.sqrt(Math.pow(dest.x-env.stox(nextstate),2)+
 | 
				
			||||||
 | 
					                      Math.pow(dest.y-env.stoy(nextstate),2))
 | 
				
			||||||
 | 
					  var dist2=Math.sqrt(Math.pow(dest.x-env.stox(state),2)+
 | 
				
			||||||
 | 
					                      Math.pow(dest.y-env.stoy(state),2))
 | 
				
			||||||
 | 
					  if (nextstate==env.laststate) reward = -10; // avoid ping-pong
 | 
				
			||||||
 | 
					  else if (nextstate==-1) reward = -100; // wall hit or outside world
 | 
				
			||||||
 | 
					  else if (dist1 < 1) reward = 100-env.steps/10; // destination found
 | 
				
			||||||
 | 
					  else reward = (dist1-dist2)<0?dist1/10:-dist1/10; // on the way
 | 
				
			||||||
 | 
					  env.laststate=nextstate;
 | 
				
			||||||
 | 
					  return reward;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Update sensors
 | 
				
			||||||
 | 
					env.perception = function (state) {
 | 
				
			||||||
 | 
					  var i,
 | 
				
			||||||
 | 
					      dist=Math.sqrt(Math.pow(dest.x-env.stox(state),2)+
 | 
				
			||||||
 | 
					                     Math.pow(dest.y-env.stoy(state),2)),
 | 
				
			||||||
 | 
					      x = env.stox(state),
 | 
				
			||||||
 | 
					      y = env.stoy(state),
 | 
				
			||||||
 | 
					      sensors = [0,0,0,0,dist]; // N S W E
 | 
				
			||||||
 | 
					  // Distances to obstacles
 | 
				
			||||||
 | 
					  for(i=y;i>0;i--) { if (states[env.xytos(x,i)]==w) break }
 | 
				
			||||||
 | 
					  sensors[0]=y-i-1;
 | 
				
			||||||
 | 
					  for(i=y;i<height;i++) { if (states[env.xytos(x,i)]==w) break }
 | 
				
			||||||
 | 
					  sensors[1]=i-y-1;
 | 
				
			||||||
 | 
					  for(i=x;i>0;i--) { if (states[env.xytos(i,y)]==w) break }
 | 
				
			||||||
 | 
					  sensors[2]=x-i-1;
 | 
				
			||||||
 | 
					  for(i=x;i<width;i++) { if (states[env.xytos(i,y)]==w) break }
 | 
				
			||||||
 | 
					  sensors[3]=i-x-1;
 | 
				
			||||||
 | 
					  return sensors
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					// utils
 | 
				
			||||||
 | 
					env.stox = function (s)    { return s % width }
 | 
				
			||||||
 | 
					env.stoy = function (s)    { return Math.floor(s / width) }
 | 
				
			||||||
 | 
					env.xytos = function (x,y) { return x+y*width }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					reset()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// create the DQN agent
 | 
				
			||||||
 | 
					var model = ml.learn({
 | 
				
			||||||
 | 
					  algorithm   : ml.ML.RL,
 | 
				
			||||||
 | 
					  kind        : ml.ML.DQNAgent,
 | 
				
			||||||
 | 
					  actions     : actions,
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  // specs
 | 
				
			||||||
 | 
					  update : 'qlearn', // qlearn | sarsa
 | 
				
			||||||
 | 
					  gamma : 0.9, // discount factor, [0, 1)
 | 
				
			||||||
 | 
					  epsilon : 0.2, // initial epsilon for epsilon-greedy policy, [0, 1)
 | 
				
			||||||
 | 
					  alpha : 0.005, // value function learning rate
 | 
				
			||||||
 | 
					  experience_add_every : 5, // number of time steps before we add another experience to replay memory
 | 
				
			||||||
 | 
					  experience_size : 10000, // size of experience
 | 
				
			||||||
 | 
					  learning_steps_per_iteration : 5,
 | 
				
			||||||
 | 
					  tderror_clamp : 1.0, // for robustness
 | 
				
			||||||
 | 
					  num_hidden_units : 100, // number of neurons in hidden layer
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  environment : env
 | 
				
			||||||
 | 
					}); 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					print(model)
 | 
				
			||||||
 | 
					print(toJSON(model).length+' Bytes')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					var state = start;  // world state. upper left corner
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// The agent searches the destination with random walk
 | 
				
			||||||
 | 
					// If the the destination was found, it jumps back to the start
 | 
				
			||||||
 | 
					later(1,function(task){ // start the learning loop
 | 
				
			||||||
 | 
					  sensors = env.perception(state);
 | 
				
			||||||
 | 
					  var action = ml.action(model,sensors); // s is a vector
 | 
				
			||||||
 | 
					  //... execute action in environment and get the reward
 | 
				
			||||||
 | 
					  var ns = env.nextState(state,action);
 | 
				
			||||||
 | 
					  var reward = env.reward(state,action,ns)
 | 
				
			||||||
 | 
					  if (states[ns]==d) {
 | 
				
			||||||
 | 
					    // destination found
 | 
				
			||||||
 | 
					    print('iteration='+env.iteration,', reward='+reward,' action: steps='+env.good,'error='+env.error+' tderror='+
 | 
				
			||||||
 | 
					          model.tderror)
 | 
				
			||||||
 | 
					    ns=start;
 | 
				
			||||||
 | 
					    reset(true);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  if (ns==-1) env.error++;
 | 
				
			||||||
 | 
					  else env.good++;
 | 
				
			||||||
 | 
					// print(state,ns,sensors,reward)    
 | 
				
			||||||
 | 
					  ml.update(model,reward)
 | 
				
			||||||
 | 
					  state = ns==-1?state:ns
 | 
				
			||||||
 | 
					  // state = ns==-1?start:ns
 | 
				
			||||||
 | 
					  if (reward > 10) {
 | 
				
			||||||
 | 
					    save('/tmp/rl.json',model);
 | 
				
			||||||
 | 
					    print('continue with test-rl4.js ...')
 | 
				
			||||||
 | 
					    kill(task);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return true
 | 
				
			||||||
 | 
					});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user