Tue 27 Aug 00:14:56 CEST 2024
This commit is contained in:
		
							parent
							
								
									f8c169076d
								
							
						
					
					
						commit
						a6e752e33c
					
				
							
								
								
									
										134
									
								
								test/test-rl1.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										134
									
								
								test/test-rl1.js
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,134 @@
 | 
				
			||||||
 | 
					// Maze of Torment World
 | 
				
			||||||
 | 
					// Temporal Difference Learning (TD)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					var height=7,width=7,start=0;
 | 
				
			||||||
 | 
					// 0: free place, 1: start, 2: destination, -1: wall
 | 
				
			||||||
 | 
					var f=0,s=1,d=2,w=-1
 | 
				
			||||||
 | 
					var maze = [
 | 
				
			||||||
 | 
					[s,f,w,d,w,f,f], 
 | 
				
			||||||
 | 
					[f,f,w,f,w,f,f], 
 | 
				
			||||||
 | 
					[f,f,w,f,f,f,f], 
 | 
				
			||||||
 | 
					[f,f,w,w,w,f,f], 
 | 
				
			||||||
 | 
					[f,f,f,f,f,f,f], 
 | 
				
			||||||
 | 
					[f,f,f,f,w,w,w], 
 | 
				
			||||||
 | 
					[f,w,f,f,f,f,f], 
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					var states = []
 | 
				
			||||||
 | 
					maze.forEach(function (row) {
 | 
				
			||||||
 | 
					  states=states.concat(row)
 | 
				
			||||||
 | 
					})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					var way = []
 | 
				
			||||||
 | 
					function reset (pr) {
 | 
				
			||||||
 | 
					  if (pr) print(way.join('\n'))
 | 
				
			||||||
 | 
					  way = maze.map(function (row) { 
 | 
				
			||||||
 | 
					    return row.map(function (col) { return col==s?1:(col==w?'w':0) })})
 | 
				
			||||||
 | 
					  env.steps=0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					var actions = ['left','right','up','down']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					var env = {};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					env.steps = 0;
 | 
				
			||||||
 | 
					env.iteration = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// required by learner
 | 
				
			||||||
 | 
					env.getNumStates      = function() { return height*width; }
 | 
				
			||||||
 | 
					env.getMaxNumActions  = function() { return actions.length; }
 | 
				
			||||||
 | 
					env.nextState = function(state,action) {
 | 
				
			||||||
 | 
					  var nx, ny, nextstate;
 | 
				
			||||||
 | 
					  var x = env.stox(state);
 | 
				
			||||||
 | 
					  var y = env.stoy(state);
 | 
				
			||||||
 | 
					  switch (states[state]) {
 | 
				
			||||||
 | 
					    case f: 
 | 
				
			||||||
 | 
					    case s: 
 | 
				
			||||||
 | 
					      // free place to move around
 | 
				
			||||||
 | 
					      switch (action) {
 | 
				
			||||||
 | 
					        case 'left'  : nx=x-1; ny=y; break;
 | 
				
			||||||
 | 
					        case 'right' : nx=x+1; ny=y; break;
 | 
				
			||||||
 | 
					        case 'up'    : ny=y-1; nx=x; break;
 | 
				
			||||||
 | 
					        case 'down'  : ny=y+1; nx=x; break;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      nextstate = ny*width+nx;
 | 
				
			||||||
 | 
					      way[ny][nx]=1;
 | 
				
			||||||
 | 
					      env.steps++;
 | 
				
			||||||
 | 
					      break;
 | 
				
			||||||
 | 
					    case w:
 | 
				
			||||||
 | 
					      // cliff! oh no! Should not happend - see below
 | 
				
			||||||
 | 
					      // print('Back to start...')
 | 
				
			||||||
 | 
					      nextstate=start;
 | 
				
			||||||
 | 
					      reset(false)
 | 
				
			||||||
 | 
					      env.iteration++;
 | 
				
			||||||
 | 
					      break;
 | 
				
			||||||
 | 
					    case d:
 | 
				
			||||||
 | 
					      // agent wins! teleport to start
 | 
				
			||||||
 | 
					      print('['+env.iteration+'] Found destination !!!!!!! steps='+env.steps)
 | 
				
			||||||
 | 
					      reset(true);
 | 
				
			||||||
 | 
					      nextstate=start;
 | 
				
			||||||
 | 
					      env.iteration++;
 | 
				
			||||||
 | 
					      break;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					//print(state,action,nextstate)
 | 
				
			||||||
 | 
					  return nextstate;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					env.reward = function (state,action,nextstate) {
 | 
				
			||||||
 | 
					  // reward of being in s, taking action a, and ending up in ns
 | 
				
			||||||
 | 
					  var reward;
 | 
				
			||||||
 | 
					  // If the destination was found, weight the reward with the number of steps
 | 
				
			||||||
 | 
					  // return best reward for shortest path
 | 
				
			||||||
 | 
					  if (states[state]==d) reward = 1.0-(env.steps/100)
 | 
				
			||||||
 | 
					  else if (states[state]==w) reward = -1;
 | 
				
			||||||
 | 
					  else reward = 0;
 | 
				
			||||||
 | 
					    return reward;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					env.allowedActions    = function(state) { 
 | 
				
			||||||
 | 
					  var x = env.stox(state), y = env.stoy(state);
 | 
				
			||||||
 | 
					  var actions=[];
 | 
				
			||||||
 | 
					  if (x>0) actions.push('left');
 | 
				
			||||||
 | 
					  if (y>0) actions.push('up');
 | 
				
			||||||
 | 
					  if (x<width-1) actions.push('right');
 | 
				
			||||||
 | 
					  if (y<height-1) actions.push('down');
 | 
				
			||||||
 | 
					  return actions 
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// utils
 | 
				
			||||||
 | 
					env.stox = function (s) { return s % width }
 | 
				
			||||||
 | 
					env.stoy = function (s) { return Math.floor(s / width) }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					reset()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// create the DQN agent
 | 
				
			||||||
 | 
					var model = ml.learn({
 | 
				
			||||||
 | 
					  algorithm   : ml.ML.RL,
 | 
				
			||||||
 | 
					  kind        : ml.ML.TDAgent,
 | 
				
			||||||
 | 
					  actions     : actions,
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  // specs
 | 
				
			||||||
 | 
					  alpha       : 0.1,  // value function learning rate
 | 
				
			||||||
 | 
					  beta        : 0.2,  // learning rate for smooth policy update
 | 
				
			||||||
 | 
					  epsilon     : 0.2,  // initial epsilon for epsilon-greedy policy, [0, 1)
 | 
				
			||||||
 | 
					  gamma       : 0.5,  // discount factor, [0, 1)
 | 
				
			||||||
 | 
					  lambda      : 0,    // eligibility trace decay, [0,1). 0 = no eligibility traces
 | 
				
			||||||
 | 
					  planN       : 5,   // number of planning steps per iteration. 0 = no planning
 | 
				
			||||||
 | 
					  replacing_traces : true,
 | 
				
			||||||
 | 
					  smooth_policy_update : false,
 | 
				
			||||||
 | 
					  update : 'qlearn',  // 'qlearn' or 'sarsa'
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  environment : env
 | 
				
			||||||
 | 
					}); 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					print(model)
 | 
				
			||||||
 | 
					print(toJSON(model).length+' Bytes')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					var state = start;  // uppel left corner
 | 
				
			||||||
 | 
					var timer = setInterval(function(){ // start the learning loop
 | 
				
			||||||
 | 
					  var action = ml.action(model,state); // s is an integer
 | 
				
			||||||
 | 
					  //... execute action in environment and get the reward
 | 
				
			||||||
 | 
					  // print(state,action,states[state])
 | 
				
			||||||
 | 
					  var ns = env.nextState(state,action);
 | 
				
			||||||
 | 
					  var reward = env.reward(ns)-0.01
 | 
				
			||||||
 | 
					  ml.update(model,reward)
 | 
				
			||||||
 | 
					  state = ns
 | 
				
			||||||
 | 
					}, 1);
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user