// Maze of Torment World // Dynamic Programming (DP) var height=7,width=7,start=0; var UPDATES=15 // 0: free place, 1: start, 2: destination, -1: wall var f=0,s=1,d=2,w=-1 var maze = [ [s,f,w,d,w,f,f], [f,f,w,f,w,f,f], [f,f,w,f,f,f,f], [f,f,w,w,w,f,f], [f,f,f,f,f,f,f], [f,f,f,f,w,w,w], [f,w,f,f,f,f,f], ] var states = [] maze.forEach(function (row) { states=states.concat(row) }) var rewards = states.map(function (s) { return s==w?-1:(s==d?1:0) }) var actions = ['left','right','up','down'] var env = {}; env.steps = 0; env.iteration = 0; var way = [] function reset (pr) { if (pr) print(way.join('\n')) way = maze.map(function (row) { return row.map(function (col) { return col==s?1:(col==w?'w':0) })}) env.steps=0; } // required by learner env.getNumStates = function() { return height*width; } env.getMaxNumActions = function() { return actions.length; } env.nextState = function(state,action,pr) { var nx, ny, nextstate; var x = env.stox(state); var y = env.stoy(state); switch (states[state]) { case f: case s: // free place to move around switch (action) { case 'left' : nx=x-1; ny=y; break; case 'right' : nx=x+1; ny=y; break; case 'up' : ny=y-1; nx=x; break; case 'down' : ny=y+1; nx=x; break; } nextstate = ny*width+nx; way[ny][nx]=1; env.steps++; break; case w: // cliff! oh no! Should not happend - see below // print('Back to start...') nextstate=start; reset() env.iteration++; break; case d: // agent wins! teleport to start if (pr) print('['+env.iteration+'] Found destination !!!!!!! steps='+env.steps) reset(pr) nextstate=start; env.iteration++; break; } //print(state,action,nextstate) return nextstate; } env.reward = function (state,action,nextstate) { // reward of being in s, taking action a, and ending up in ns var reward; // If the destination was found, weight the reward with the number of steps // return best reward for shortest path if (states[state]==d) reward = rewards[state]; else reward = rewards[state]; return reward; } env.allowedActions = function(state) { var x = env.stox(state), y = env.stoy(state); var actions=[]; if (x>0) actions.push('left'); if (y>0) actions.push('up'); if (x<width-1) actions.push('right'); if (y<height-1) actions.push('down'); return actions } // utils env.stox = function (s) { return s % width } env.stoy = function (s) { return Math.floor(s / width) } // create the DQN agent var model = ml.learn({ algorithm : ml.ML.RL, kind : ml.ML.DPAgent, actions : actions, gamma : 0.9, // discount factor, [0, 1) environment : env }); print(model) print(toJSON(model).length+' Bytes') reset() var state = start; // uppel left corner for(var i=0;i<UPDATES;i++) ml.update(model) print('Required '+env.iteration+' iterations') reset() var timer = setInterval(function(){ // start the learning loop var action = ml.action(model,state); // s is an integer //... execute action in environment and get the reward // print(state,action,states[state]) var ns = env.nextState(state,action,true); //var reward = env.reward(ns)-0.01 //ml.update(model) state = ns }, 100);