-
-
Save AeroGeekDean/eb817fc9764a596d834a4489a817101a to your computer and use it in GitHub Desktop.
| # ---------- | |
| # User Instructions: | |
| # | |
| # Implement the function optimum_policy2D below. | |
| # | |
| # You are given a car in grid with initial state | |
| # init. Your task is to compute and return the car's | |
| # optimal path to the position specified in goal; | |
| # the costs for each motion are as defined in cost. | |
| # | |
| # There are four motion directions: up, left, down, and right. | |
| # Increasing the index in this array corresponds to making a | |
| # a left turn, and decreasing the index corresponds to making a | |
| # right turn. | |
| # | |
| # [Dean added:] FOR MORE DETAILS: | |
| # | |
| # direct link to Udacity's course module, by Sebastian Thrun | |
| # https://classroom.udacity.com/courses/cs373/lessons/48646841/concepts/486468400923 | |
| # | |
| # if above link doesn't work, below are the direct link to the course videos on YouTube. | |
| # Quiz video: https://www.youtube.com/watch?v=bQA2ELDNmmg | |
| # Answer video: https://www.youtube.com/watch?v=rH5DKpwYQLY | |
| # | |
| import time # for performance timing purpose | |
| forward = [[-1, 0], # go up | |
| [ 0, -1], # go left | |
| [ 1, 0], # go down | |
| [ 0, 1]] # go right | |
| forward_name = ['up', 'left', 'down', 'right'] | |
| # action has 3 values: right turn, no turn, left turn | |
| action = [-1, 0, 1] | |
| action_name = ['R', '#', 'L'] | |
| cost = [2, 1, 20] # cost has 3 values, corresponding to making | |
| # a right turn, no turn, and a left turn | |
| # EXAMPLE INPUTS: | |
| # grid format: | |
| # 0 = navigable space | |
| # 1 = unnavigable space, ie: wall | |
| grid = [[1, 1, 1, 0, 0, 0], | |
| [1, 1, 1, 0, 1, 0], | |
| [0, 0, 0, 0, 0, 0], | |
| [1, 1, 1, 0, 1, 1], | |
| [1, 1, 1, 0, 1, 1]] | |
| init = [4, 3, 0] # Init state, given in the form [row,col,direction] | |
| # direction = 0: up | |
| # 1: left | |
| # 2: down | |
| # 3: right | |
| goal = [2, 0] # given in the form [row,col] (direction is unconstrained) | |
| # EXAMPLE OUTPUT: | |
| # calling optimum_policy2D with the given parameters should return | |
| # [[' ', ' ', ' ', 'R', '#', 'R'], | |
| # [' ', ' ', ' ', '#', ' ', '#'], | |
| # ['*', '#', '#', '#', '#', 'R'], | |
| # [' ', ' ', ' ', '#', ' ', ' '], | |
| # [' ', ' ', ' ', '#', ' ', ' ']] | |
| # ---------- | |
| # ------------------------------------------------------------------- | |
| # udacity's "brute force" method. very computationally inefficient, but gets job done | |
| # ------------------------------------------------------------------- | |
| def optimum_policy2D_udacity(grid,init,goal,cost): | |
| value = [[[999 for r in range(len(grid[0]))] for c in range(len(grid))], | |
| [[999 for r in range(len(grid[0]))] for c in range(len(grid))], | |
| [[999 for r in range(len(grid[0]))] for c in range(len(grid))], | |
| [[999 for r in range(len(grid[0]))] for c in range(len(grid))]] | |
| policy = [[[' ' for r in range(len(grid[0]))] for c in range(len(grid))], | |
| [[' ' for r in range(len(grid[0]))] for c in range(len(grid))], | |
| [[' ' for r in range(len(grid[0]))] for c in range(len(grid))], | |
| [[' ' for r in range(len(grid[0]))] for c in range(len(grid))]] | |
| policy2D = [[' ' for r in range(len(grid[0]))] for c in range(len(grid))] | |
| # First, generate the policy 'potential field'(*) of 3D array... | |
| # (*) a physics concept that I borrowed | |
| change = True | |
| while change: | |
| change = False | |
| for x in range(len(grid)): | |
| for y in range(len(grid[0])): | |
| for o in range(len(forward)): # Orientation | |
| if x == goal[0] and y == goal[1]: # at goal location | |
| if value[o][x][y] > 0: | |
| value[o][x][y] = 0 # set goal grid value to zero | |
| policy[o][x][y] = '*' # to denote at goal location | |
| change = True | |
| elif grid[x][y] == 0: # navigable grid... | |
| for a in range(len(action)): # search around for adjacent grids (xy2) where we *could* step forward to | |
| o2 = (o + action[a]) % len(forward) | |
| x2 = x + forward[o2][0] | |
| y2 = y + forward[o2][1] | |
| # Check: is within grid world & navigable? | |
| if x2 >= 0 and x2 < len(grid) and \ | |
| y2 >= 0 and y2 < len(grid[0]) and \ | |
| grid[x2][y2] == 0: | |
| v2 = value[o2][x2][y2] + cost[a] # potential (lower?) cost back at xy, given value at xy2 + cost of move | |
| if v2 < value[o][x][y]: # reduce value at xy if possible | |
| value[o][x][y] = v2 | |
| policy[o][x][y] = action_name[a] | |
| change = True | |
| # --- Now we have the potential field, propogate from init state to produce actual policy --- | |
| # set state to init state | |
| x = init[0] | |
| y = init[1] | |
| o = init[2] | |
| policy2D[x][y] = policy[o][x][y] | |
| while policy[o][x][y] != '*': | |
| # extract action index for the current policy | |
| a = action_name.index(policy[o][x][y]) | |
| # propogate to new state according to potential field | |
| o2 = (o + action[a]) % len(forward) | |
| x2 = x + forward[o2][0] | |
| y2 = y + forward[o2][1] | |
| # update policy2D at new state | |
| policy2D[x2][y2] = policy[o2][x2][y2] | |
| # update state | |
| o = o2 | |
| x = x2 | |
| y = y2 | |
| return policy2D | |
| # return [policy2D, policy] # debug - to see 3D policy | |
| # ------------------------------------------------------------------- | |
| # My method. inspired from the more efficient 'open item list' approach | |
| # from A* search solution, but just walk backwards from the goal instead. | |
| # ------------------------------------------------------------------- | |
| def optimum_policy2D(grid,init,goal,cost): | |
| value = [[[999 for r in range(len(grid[0]))] for c in range(len(grid))], | |
| [[999 for r in range(len(grid[0]))] for c in range(len(grid))], | |
| [[999 for r in range(len(grid[0]))] for c in range(len(grid))], | |
| [[999 for r in range(len(grid[0]))] for c in range(len(grid))]] | |
| policy = [[[' ' for r in range(len(grid[0]))] for c in range(len(grid))], | |
| [[' ' for r in range(len(grid[0]))] for c in range(len(grid))], | |
| [[' ' for r in range(len(grid[0]))] for c in range(len(grid))], | |
| [[' ' for r in range(len(grid[0]))] for c in range(len(grid))]] | |
| policy2D = [[' ' for r in range(len(grid[0]))] for c in range(len(grid))] | |
| open = [] # expansion list of 'open' grids | |
| x = goal[0] | |
| y = goal[1] | |
| v = 0 | |
| # orientation doesn't matter at goal spot, thus appending all orientations into open list | |
| for o in range(len(forward)): | |
| value[o][x][y] = v | |
| policy[o][x][y] = '*' | |
| open.append([v,o,x,y]) | |
| done = False | |
| while not done: | |
| if len(open) == 0: # expand list empty, we done! | |
| done = True | |
| else: | |
| open.sort(reverse=True) | |
| next = open.pop() | |
| v = next[0] # value/cost | |
| o = next[1] # orientation | |
| x = next[2] # X position | |
| y = next[3] # Y position | |
| # Since fwd movement sequence is defined as: turn first, then step... | |
| # Given 'o' at current xy, we could ONLY have CAME FROM grid xy2 via direction 'o' | |
| x2 = x - forward[o][0] | |
| y2 = y - forward[o][1] | |
| # Check: is within grid world & navigable? | |
| if 0 <= x2 and x2 < len(grid) and \ | |
| 0 <= y2 and y2 < len(grid[0]) and \ | |
| grid[x2][y2] == 0: | |
| for a in range(len(action)): # search all (turning) actions to get to where we were | |
| o2 = (o - action[a]) % len(forward) # back out the turning action | |
| v2 = v + cost[a] # val at xy2 | |
| if v2 < value[o2][x2][y2]: | |
| value[o2][x2][y2] = v2 | |
| policy[o2][x2][y2] = action_name[a] | |
| open.append([v2,o2,x2,y2]) | |
| # --- Now we have the potential field, propogate from init state --- | |
| # set state to init state | |
| x = init[0] | |
| y = init[1] | |
| o = init[2] | |
| policy2D[x][y] = policy[o][x][y] | |
| while policy[o][x][y] != '*': | |
| # extract action index for the current policy | |
| a = action_name.index(policy[o][x][y]) | |
| # propogate to new state according to potential field | |
| o2 = (o + action[a]) % len(forward) | |
| x2 = x + forward[o2][0] | |
| y2 = y + forward[o2][1] | |
| # update policy2D at new state | |
| policy2D[x2][y2] = policy[o2][x2][y2] | |
| # update state | |
| o = o2 | |
| x = x2 | |
| y = y2 | |
| return policy2D | |
| # return [policy2D, policy] # debug - to see 3D policy | |
| # [result_u, result_u3d] = optimum_policy2D_udacity(grid,init,goal,cost) # debug | |
| t0 = time.time() | |
| result_u = optimum_policy2D_udacity(grid,init,goal,cost) | |
| t1 = time.time() | |
| print 'Udacity brute-force method' | |
| print 'time = {0:.2f} ms'.format((t1-t0)*1000) | |
| print '\n'.join(str(p) for p in result_u) | |
| # debug - print out 3D policy | |
| # for o, r in enumerate(result_u3d): | |
| # print 'Orientation = ', o | |
| # print '\n'.join(str(p) for p in r) | |
| # [result, result3d] = optimum_policy2D(grid,init,goal,cost) # debug | |
| t0 = time.time() | |
| result = optimum_policy2D(grid,init,goal,cost) | |
| t1 = time.time() | |
| print "Dean's method" | |
| print 'time = {0:.2f} ms'.format((t1-t0)*1000) | |
| print '\n'.join(str(p) for p in result) | |
| # debug - print out 3D policy | |
| # for o, r in enumerate(result3d): | |
| # print 'Orientation = ', o | |
| # print '\n'.join(str(p) for p in r) |
This is interesting. I like how you handled that. I guess it's like learning backwards using memory.
Whoa, apparently gist comments do not trigger notifications! Didn't know this comment was made until I checked back manually.
So after going thru this additional quiz (AI4Robotics->Prob Set 4 -> Quiz 5: Stochastic Motion.), I realized that my "faster" algorithm can not be guaranteed to arrive at the correct solution! ...I was just lucky earlier.
My method utilizes a best-first exploration front through the nodes, but explores it backwards & outwards from the goal node. It ASSUMES a node's value is dependent ONLY on nodes that are already within the EXPLORED space. Which the rules of the earlier example satisfied this assumption.
For problems where a node's value are also dependent upon nodes in the unexplored space, then my method breaks down... as there's no easy way to guarantee a revisit of the affected nodes.
Oh well...
I didn't like the brute-force method in class room solution. Decided to extend the best-first openset approach taught in A*, but walk it backwards from the goal, and explore entire possible state-space that could reach the goal.
Resulting performance was ~5x faster over the brute-force approach.