153 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
		
		
			
		
	
	
			153 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| 
								 | 
							
								// Maze of Torment World
							 | 
						||
| 
								 | 
							
								// Deep-Q Learning (DQN)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								var height=7,width=7,start,dest;
							 | 
						||
| 
								 | 
							
								// 0: free place, 1: start, 2: destination, -1: wall
							 | 
						||
| 
								 | 
							
								var f=0,s=1,d=2,w=-1
							 | 
						||
| 
								 | 
							
								var maze = [
							 | 
						||
| 
								 | 
							
								[s,f,w,d,w,f,f], 
							 | 
						||
| 
								 | 
							
								[f,f,w,f,w,f,f], 
							 | 
						||
| 
								 | 
							
								[f,f,w,f,f,f,f], 
							 | 
						||
| 
								 | 
							
								[f,f,w,w,w,f,f], 
							 | 
						||
| 
								 | 
							
								[f,f,f,f,f,f,f], 
							 | 
						||
| 
								 | 
							
								[f,f,f,f,w,w,w], 
							 | 
						||
| 
								 | 
							
								[f,w,f,f,f,f,f], 
							 | 
						||
| 
								 | 
							
								]
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// world states
							 | 
						||
| 
								 | 
							
								var states = []
							 | 
						||
| 
								 | 
							
								maze.forEach(function (row,j) {
							 | 
						||
| 
								 | 
							
								  states=states.concat(row)
							 | 
						||
| 
								 | 
							
								  row.forEach(function (cell,i) {
							 | 
						||
| 
								 | 
							
								    if (cell==s) start=i+j*width;
							 | 
						||
| 
								 | 
							
								    if (cell==d) dest={x:i,y:j}
							 | 
						||
| 
								 | 
							
								  })
							 | 
						||
| 
								 | 
							
								})
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								var way = []
							 | 
						||
| 
								 | 
							
								function reset (pr) {
							 | 
						||
| 
								 | 
							
								  if (pr) print(way.join('\n'))
							 | 
						||
| 
								 | 
							
								  way = maze.map(function (row) { 
							 | 
						||
| 
								 | 
							
								    return row.map(function (col) { return col==s?1:(col==w?'w':0) })})
							 | 
						||
| 
								 | 
							
								  env.steps=0;
							 | 
						||
| 
								 | 
							
								  env.good=0;
							 | 
						||
| 
								 | 
							
								  env.error=0;
							 | 
						||
| 
								 | 
							
								  env.iteration++;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								var actions = ['left','right','up','down']
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Agent sensor states (perception)
							 | 
						||
| 
								 | 
							
								// Distances {N,S,W,E} to boundaries and walls, distance
							 | 
						||
| 
								 | 
							
								var sensors = [0,0,0,0,0]
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								var env = {};
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								env.steps = 0;
							 | 
						||
| 
								 | 
							
								env.iteration = 0;
							 | 
						||
| 
								 | 
							
								env.error = 0;
							 | 
						||
| 
								 | 
							
								env.good = 0;
							 | 
						||
| 
								 | 
							
								env.last = 0;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// required by learner
							 | 
						||
| 
								 | 
							
								env.getNumStates      = function() { return sensors.length /*!!*/ }
							 | 
						||
| 
								 | 
							
								env.getMaxNumActions  = function() { return actions.length; }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// internals
							 | 
						||
| 
								 | 
							
								env.nextState = function(state,action) {
							 | 
						||
| 
								 | 
							
								  var nx, ny, nextstate;
							 | 
						||
| 
								 | 
							
								  var x = env.stox(state);
							 | 
						||
| 
								 | 
							
								  var y = env.stoy(state);
							 | 
						||
| 
								 | 
							
								  // free place to move around
							 | 
						||
| 
								 | 
							
								  switch (action) {
							 | 
						||
| 
								 | 
							
								    case 'left'  : nx=x-1; ny=y; break;
							 | 
						||
| 
								 | 
							
								    case 'right' : nx=x+1; ny=y; break;
							 | 
						||
| 
								 | 
							
								    case 'up'    : ny=y-1; nx=x; break;
							 | 
						||
| 
								 | 
							
								    case 'down'  : ny=y+1; nx=x; break;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								  nextstate = env.xytos(nx,ny);
							 | 
						||
| 
								 | 
							
								  if (nx<0 || ny<0 || nx >= width || ny >= height ||
							 | 
						||
| 
								 | 
							
								      states[nextstate]==w) {
							 | 
						||
| 
								 | 
							
								    nextstate=-1;
							 | 
						||
| 
								 | 
							
								    return nextstate;
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								  way[ny][nx]=1;
							 | 
						||
| 
								 | 
							
								  env.steps++;
							 | 
						||
| 
								 | 
							
								  return nextstate;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								env.reward = function (state,action,nextstate) {
							 | 
						||
| 
								 | 
							
								  // reward of being in s, taking action a, and ending up in ns
							 | 
						||
| 
								 | 
							
								  var reward;
							 | 
						||
| 
								 | 
							
								  var dist1=Math.sqrt(Math.pow(dest.x-env.stox(nextstate),2)+
							 | 
						||
| 
								 | 
							
								                      Math.pow(dest.y-env.stoy(nextstate),2))
							 | 
						||
| 
								 | 
							
								  var dist2=Math.sqrt(Math.pow(dest.x-env.stox(state),2)+
							 | 
						||
| 
								 | 
							
								                      Math.pow(dest.y-env.stoy(state),2))
							 | 
						||
| 
								 | 
							
								  if (nextstate==env.laststate) reward = -10; // avoid ping-pong
							 | 
						||
| 
								 | 
							
								  else if (nextstate==-1) reward = -100; // wall hit or outside world
							 | 
						||
| 
								 | 
							
								  else if (dist1 < 1) reward = 100-env.steps/10; // destination found
							 | 
						||
| 
								 | 
							
								  else reward = (dist1-dist2)<0?dist1/10:-dist1/10; // on the way
							 | 
						||
| 
								 | 
							
								  env.laststate=nextstate;
							 | 
						||
| 
								 | 
							
								  return reward;
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// Update sensors
							 | 
						||
| 
								 | 
							
								env.perception = function (state) {
							 | 
						||
| 
								 | 
							
								  var i,
							 | 
						||
| 
								 | 
							
								      dist=Math.sqrt(Math.pow(dest.x-env.stox(state),2)+
							 | 
						||
| 
								 | 
							
								                     Math.pow(dest.y-env.stoy(state),2)),
							 | 
						||
| 
								 | 
							
								      x = env.stox(state),
							 | 
						||
| 
								 | 
							
								      y = env.stoy(state),
							 | 
						||
| 
								 | 
							
								      sensors = [0,0,0,0,dist]; // N S W E
							 | 
						||
| 
								 | 
							
								  // Distances to obstacles
							 | 
						||
| 
								 | 
							
								  for(i=y;i>0;i--) { if (states[env.xytos(x,i)]==w) break }
							 | 
						||
| 
								 | 
							
								  sensors[0]=y-i-1;
							 | 
						||
| 
								 | 
							
								  for(i=y;i<height;i++) { if (states[env.xytos(x,i)]==w) break }
							 | 
						||
| 
								 | 
							
								  sensors[1]=i-y-1;
							 | 
						||
| 
								 | 
							
								  for(i=x;i>0;i--) { if (states[env.xytos(i,y)]==w) break }
							 | 
						||
| 
								 | 
							
								  sensors[2]=x-i-1;
							 | 
						||
| 
								 | 
							
								  for(i=x;i<width;i++) { if (states[env.xytos(i,y)]==w) break }
							 | 
						||
| 
								 | 
							
								  sensors[3]=i-x-1;
							 | 
						||
| 
								 | 
							
								  return sensors
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								// utils
							 | 
						||
| 
								 | 
							
								env.stox = function (s)    { return s % width }
							 | 
						||
| 
								 | 
							
								env.stoy = function (s)    { return Math.floor(s / width) }
							 | 
						||
| 
								 | 
							
								env.xytos = function (x,y) { return x+y*width }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								reset()
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// create the DQN agent
							 | 
						||
| 
								 | 
							
								var model = load('/tmp/rl.json')
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								print(model)
							 | 
						||
| 
								 | 
							
								print(toJSON(model).length+' Bytes')
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								var state = start;  // world state. upper left corner
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// The agent searches the destination with random walk
							 | 
						||
| 
								 | 
							
								// If the the destination was found, it jumps back to the start
							 | 
						||
| 
								 | 
							
								later(1,function(task){ // start the learning loop
							 | 
						||
| 
								 | 
							
								  sensors = env.perception(state);
							 | 
						||
| 
								 | 
							
								  var action = ml.action(model,sensors); // s is a vector
							 | 
						||
| 
								 | 
							
								  //... execute action in environment and get the reward
							 | 
						||
| 
								 | 
							
								  var ns = env.nextState(state,action);
							 | 
						||
| 
								 | 
							
								  var reward = env.reward(state,action,ns)
							 | 
						||
| 
								 | 
							
								  if (states[ns]==d) {
							 | 
						||
| 
								 | 
							
								    // destination found
							 | 
						||
| 
								 | 
							
								    print('iteration='+env.iteration,', reward='+reward,' action: steps='+env.good,'error='+env.error+' tderror='+
							 | 
						||
| 
								 | 
							
								          model.tderror)
							 | 
						||
| 
								 | 
							
								    ns=start;
							 | 
						||
| 
								 | 
							
								    reset(true);
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								  if (ns==-1) env.error++;
							 | 
						||
| 
								 | 
							
								  else env.good++;
							 | 
						||
| 
								 | 
							
								// print(state,ns,sensors,reward)    
							 | 
						||
| 
								 | 
							
								  ml.update(model,reward)
							 | 
						||
| 
								 | 
							
								  state = ns==-1?state:ns
							 | 
						||
| 
								 | 
							
								  // state = ns==-1?start:ns
							 | 
						||
| 
								 | 
							
								  if (reward > 98.4) {
							 | 
						||
| 
								 | 
							
								    save('/tmp/rl.json',model);
							 | 
						||
| 
								 | 
							
								    kill(task);
							 | 
						||
| 
								 | 
							
								  }
							 | 
						||
| 
								 | 
							
								  return true;
							 | 
						||
| 
								 | 
							
								});
							 |