AeroGeekDean · March 19, 2021 06:48 · AeroGeekDean · Sep 9, 2017 · Tachyon5 · Sep 10, 2017
diff --git a/Quiz_DP_LeftTurnPolicy.py b/Quiz_DP_LeftTurnPolicy.py
 # ----------
 # User Instructions:
 #
 # Implement the function optimum_policy2D below.
 #
 # You are given a car in grid with initial state
 # init. Your task is to compute and return the car's
 # optimal path to the position specified in goal;
 # the costs for each motion are as defined in cost.
 #
 # There are four motion directions: up, left, down, and right.
 # Increasing the index in this array corresponds to making a
 # a left turn, and decreasing the index corresponds to making a
 # right turn.
 #
 # [Dean added:] FOR MORE DETAILS:
 #
 # direct link to Udacity's course module, by Sebastian Thrun 
 # https://classroom.udacity.com/courses/cs373/lessons/48646841/concepts/486468400923
 #
 # if above link doesn't work, below are the direct link to the course videos on YouTube.
 # Quiz video:    https://www.youtube.com/watch?v=bQA2ELDNmmg
 # Answer video:  https://www.youtube.com/watch?v=rH5DKpwYQLY
 #

 import time # for performance timing purpose

 forward = [[-1,  0], # go up
           [ 0, -1], # go left
           [ 1,  0], # go down
           [ 0,  1]] # go right
 forward_name = ['up', 'left', 'down', 'right']

 # action has 3 values: right turn, no turn, left turn
 action = [-1, 0, 1]
 action_name = ['R', '#', 'L']

 cost = [2, 1, 20] # cost has 3 values, corresponding to making
                  # a right turn, no turn, and a left turn

 # EXAMPLE INPUTS:
 # grid format:
 #     0 = navigable space
 #     1 = unnavigable space, ie: wall
 grid = [[1, 1, 1, 0, 0, 0],
        [1, 1, 1, 0, 1, 0],
        [0, 0, 0, 0, 0, 0],
        [1, 1, 1, 0, 1, 1],
        [1, 1, 1, 0, 1, 1]]

 init = [4, 3, 0] # Init state, given in the form [row,col,direction]
                 # direction = 0: up
                 #             1: left
                 #             2: down
                 #             3: right

 goal = [2, 0] # given in the form [row,col] (direction is unconstrained)

 # EXAMPLE OUTPUT:
 # calling optimum_policy2D with the given parameters should return
 # [[' ', ' ', ' ', 'R', '#', 'R'],
 #  [' ', ' ', ' ', '#', ' ', '#'],
 #  ['*', '#', '#', '#', '#', 'R'],
 #  [' ', ' ', ' ', '#', ' ', ' '],
 #  [' ', ' ', ' ', '#', ' ', ' ']]
 # ----------

 # -------------------------------------------------------------------
 # udacity's "brute force" method. very computationally inefficient, but gets job done
 # -------------------------------------------------------------------
 def optimum_policy2D_udacity(grid,init,goal,cost):
    value = [[[999 for r in range(len(grid[0]))] for c in range(len(grid))],
             [[999 for r in range(len(grid[0]))] for c in range(len(grid))],
             [[999 for r in range(len(grid[0]))] for c in range(len(grid))],
             [[999 for r in range(len(grid[0]))] for c in range(len(grid))]]

    policy = [[[' ' for r in range(len(grid[0]))] for c in range(len(grid))],
              [[' ' for r in range(len(grid[0]))] for c in range(len(grid))],
              [[' ' for r in range(len(grid[0]))] for c in range(len(grid))],
              [[' ' for r in range(len(grid[0]))] for c in range(len(grid))]]

    policy2D = [[' ' for r in range(len(grid[0]))] for c in range(len(grid))]

    # First, generate the policy 'potential field'(*) of 3D array...
    # (*) a physics concept that I borrowed
    change = True
    while change:
        change = False
        for x in range(len(grid)):
            for y in range(len(grid[0])):
                for o in range(len(forward)): # Orientation
                    if x == goal[0] and y == goal[1]: # at goal location
                        if value[o][x][y] > 0:
                            value[o][x][y] = 0    # set goal grid value to zero
                            policy[o][x][y] = '*' # to denote at goal location
                            change = True
                    elif grid[x][y] == 0: # navigable grid...
                        for a in range(len(action)): # search around for adjacent grids (xy2) where we *could* step forward to
                            o2 = (o + action[a]) % len(forward)
                            x2 = x + forward[o2][0]
                            y2 = y + forward[o2][1]
                            # Check: is within grid world & navigable?
                            if x2 >= 0 and x2 < len(grid) and \
                               y2 >= 0 and y2 < len(grid[0]) and \
                               grid[x2][y2] == 0:
                                v2 = value[o2][x2][y2] + cost[a] # potential (lower?) cost back at xy, given value at xy2 + cost of move
                                if v2 < value[o][x][y]: # reduce value at xy if possible
                                    value[o][x][y] = v2
                                    policy[o][x][y] = action_name[a]
                                    change = True

    # --- Now we have the potential field, propogate from init state to produce actual policy ---

    # set state to init state
    x = init[0]
    y = init[1]
    o = init[2]

    policy2D[x][y] = policy[o][x][y]
    while policy[o][x][y] != '*':
        # extract action index for the current policy
        a = action_name.index(policy[o][x][y])
        # propogate to new state according to potential field
        o2 = (o + action[a]) % len(forward)
        x2 = x + forward[o2][0]
        y2 = y + forward[o2][1]
        # update policy2D at new state
        policy2D[x2][y2] = policy[o2][x2][y2]
        # update state
        o = o2
        x = x2
        y = y2
    return policy2D
    # return [policy2D, policy] # debug - to see 3D policy

 # -------------------------------------------------------------------
 # My method. inspired from the more efficient 'open item list' approach
 # from A* search solution, but just walk backwards from the goal instead.
 # -------------------------------------------------------------------
 def optimum_policy2D(grid,init,goal,cost):
    value = [[[999 for r in range(len(grid[0]))] for c in range(len(grid))],
             [[999 for r in range(len(grid[0]))] for c in range(len(grid))],
             [[999 for r in range(len(grid[0]))] for c in range(len(grid))],
             [[999 for r in range(len(grid[0]))] for c in range(len(grid))]]

    policy = [[[' ' for r in range(len(grid[0]))] for c in range(len(grid))],
              [[' ' for r in range(len(grid[0]))] for c in range(len(grid))],
              [[' ' for r in range(len(grid[0]))] for c in range(len(grid))],
              [[' ' for r in range(len(grid[0]))] for c in range(len(grid))]]

    policy2D = [[' ' for r in range(len(grid[0]))] for c in range(len(grid))]

    open = [] # expansion list of 'open' grids
    x = goal[0]
    y = goal[1]
    v = 0
    # orientation doesn't matter at goal spot, thus appending all orientations into open list
    for o in range(len(forward)):
        value[o][x][y] = v
        policy[o][x][y] = '*'
        open.append([v,o,x,y])
    done = False

    while not done:
        if len(open) == 0: # expand list empty, we done!
            done = True
        else:
            open.sort(reverse=True)
            next = open.pop()
            v = next[0] # value/cost
            o = next[1] # orientation
            x = next[2] # X position
            y = next[3] # Y position
            # Since fwd movement sequence is defined as: turn first, then step...
            # Given 'o' at current xy, we could ONLY have CAME FROM grid xy2 via direction 'o'
            x2 = x - forward[o][0]
            y2 = y - forward[o][1]
            # Check: is within grid world & navigable?
            if 0 <= x2 and x2 < len(grid) and \
               0 <= y2 and y2 < len(grid[0]) and \
               grid[x2][y2] == 0:
                for a in range(len(action)): # search all (turning) actions to get to where we were
                    o2 = (o - action[a]) % len(forward) # back out the turning action
                    v2 = v + cost[a]  # val at xy2
                    if v2 < value[o2][x2][y2]:
                        value[o2][x2][y2] = v2
                        policy[o2][x2][y2] = action_name[a]
                        open.append([v2,o2,x2,y2])

    # --- Now we have the potential field, propogate from init state ---

    # set state to init state
    x = init[0]
    y = init[1]
    o = init[2]

    policy2D[x][y] = policy[o][x][y]
    while policy[o][x][y] != '*':
        # extract action index for the current policy
        a = action_name.index(policy[o][x][y])
        # propogate to new state according to potential field
        o2 = (o + action[a]) % len(forward)
        x2 = x + forward[o2][0]
        y2 = y + forward[o2][1]
        # update policy2D at new state
        policy2D[x2][y2] = policy[o2][x2][y2]
        # update state
        o = o2
        x = x2
        y = y2
    return policy2D
    # return [policy2D, policy] # debug - to see 3D policy

 # [result_u, result_u3d] = optimum_policy2D_udacity(grid,init,goal,cost) # debug
 t0 = time.time()
 result_u = optimum_policy2D_udacity(grid,init,goal,cost)
 t1 = time.time()
 print 'Udacity brute-force method'
 print 'time = {0:.2f} ms'.format((t1-t0)*1000)
 print '\n'.join(str(p) for p in result_u)
 # debug - print out 3D policy
 # for o, r in enumerate(result_u3d):
 #     print 'Orientation = ', o
 #     print '\n'.join(str(p) for p in r)
 print

 # [result, result3d] = optimum_policy2D(grid,init,goal,cost) # debug
 t0 = time.time()
 result = optimum_policy2D(grid,init,goal,cost)
 t1 = time.time()
 print "Dean's method"
 print 'time = {0:.2f} ms'.format((t1-t0)*1000)
 print '\n'.join(str(p) for p in result)
 # debug - print out 3D policy
 # for o, r in enumerate(result3d):
 #     print 'Orientation = ', o
 #     print '\n'.join(str(p) for p in r)
	# ----------
	# User Instructions:
	#
	# Implement the function optimum_policy2D below.
	#
	# You are given a car in grid with initial state
	# init. Your task is to compute and return the car's
	# optimal path to the position specified in goal;
	# the costs for each motion are as defined in cost.
	#
	# There are four motion directions: up, left, down, and right.
	# Increasing the index in this array corresponds to making a
	# a left turn, and decreasing the index corresponds to making a
	# right turn.
	#
	# [Dean added:] FOR MORE DETAILS:
	#
	# direct link to Udacity's course module, by Sebastian Thrun
	# https://classroom.udacity.com/courses/cs373/lessons/48646841/concepts/486468400923
	#
	# if above link doesn't work, below are the direct link to the course videos on YouTube.
	# Quiz video: https://www.youtube.com/watch?v=bQA2ELDNmmg
	# Answer video: https://www.youtube.com/watch?v=rH5DKpwYQLY
	#

	import time # for performance timing purpose

	forward = [[-1, 0], # go up
	[ 0, -1], # go left
	[ 1, 0], # go down
	[ 0, 1]] # go right
	forward_name = ['up', 'left', 'down', 'right']

	# action has 3 values: right turn, no turn, left turn
	action = [-1, 0, 1]
	action_name = ['R', '#', 'L']

	cost = [2, 1, 20] # cost has 3 values, corresponding to making
	# a right turn, no turn, and a left turn

	# EXAMPLE INPUTS:
	# grid format:
	# 0 = navigable space
	# 1 = unnavigable space, ie: wall
	grid = [[1, 1, 1, 0, 0, 0],
	[1, 1, 1, 0, 1, 0],
	[0, 0, 0, 0, 0, 0],
	[1, 1, 1, 0, 1, 1],
	[1, 1, 1, 0, 1, 1]]

	init = [4, 3, 0] # Init state, given in the form [row,col,direction]
	# direction = 0: up
	# 1: left
	# 2: down
	# 3: right

	goal = [2, 0] # given in the form [row,col] (direction is unconstrained)

	# EXAMPLE OUTPUT:
	# calling optimum_policy2D with the given parameters should return
	# [[' ', ' ', ' ', 'R', '#', 'R'],
	# [' ', ' ', ' ', '#', ' ', '#'],
	# ['*', '#', '#', '#', '#', 'R'],
	# [' ', ' ', ' ', '#', ' ', ' '],
	# [' ', ' ', ' ', '#', ' ', ' ']]
	# ----------

	# -------------------------------------------------------------------
	# udacity's "brute force" method. very computationally inefficient, but gets job done
	# -------------------------------------------------------------------
	def optimum_policy2D_udacity(grid,init,goal,cost):
	value = [[[999 for r in range(len(grid[0]))] for c in range(len(grid))],
	[[999 for r in range(len(grid[0]))] for c in range(len(grid))],
	[[999 for r in range(len(grid[0]))] for c in range(len(grid))],
	[[999 for r in range(len(grid[0]))] for c in range(len(grid))]]

	policy = [[[' ' for r in range(len(grid[0]))] for c in range(len(grid))],
	[[' ' for r in range(len(grid[0]))] for c in range(len(grid))],
	[[' ' for r in range(len(grid[0]))] for c in range(len(grid))],
	[[' ' for r in range(len(grid[0]))] for c in range(len(grid))]]

	policy2D = [[' ' for r in range(len(grid[0]))] for c in range(len(grid))]

	# First, generate the policy 'potential field'(*) of 3D array...
	# (*) a physics concept that I borrowed
	change = True
	while change:
	change = False
	for x in range(len(grid)):
	for y in range(len(grid[0])):
	for o in range(len(forward)): # Orientation
	if x == goal[0] and y == goal[1]: # at goal location
	if value[o][x][y] > 0:
	value[o][x][y] = 0 # set goal grid value to zero
	policy[o][x][y] = '*' # to denote at goal location
	change = True
	elif grid[x][y] == 0: # navigable grid...
	for a in range(len(action)): # search around for adjacent grids (xy2) where we could step forward to
	o2 = (o + action[a]) % len(forward)
	x2 = x + forward[o2][0]
	y2 = y + forward[o2][1]
	# Check: is within grid world & navigable?
	if x2 >= 0 and x2 < len(grid) and \
	y2 >= 0 and y2 < len(grid[0]) and \
	grid[x2][y2] == 0:
	v2 = value[o2][x2][y2] + cost[a] # potential (lower?) cost back at xy, given value at xy2 + cost of move
	if v2 < value[o][x][y]: # reduce value at xy if possible
	value[o][x][y] = v2
	policy[o][x][y] = action_name[a]
	change = True

	# --- Now we have the potential field, propogate from init state to produce actual policy ---

	# set state to init state
	x = init[0]
	y = init[1]
	o = init[2]

	policy2D[x][y] = policy[o][x][y]
	while policy[o][x][y] != '*':
	# extract action index for the current policy
	a = action_name.index(policy[o][x][y])
	# propogate to new state according to potential field
	o2 = (o + action[a]) % len(forward)
	x2 = x + forward[o2][0]
	y2 = y + forward[o2][1]
	# update policy2D at new state
	policy2D[x2][y2] = policy[o2][x2][y2]
	# update state
	o = o2
	x = x2
	y = y2
	return policy2D
	# return [policy2D, policy] # debug - to see 3D policy

	# -------------------------------------------------------------------
	# My method. inspired from the more efficient 'open item list' approach
	# from A* search solution, but just walk backwards from the goal instead.
	# -------------------------------------------------------------------
	def optimum_policy2D(grid,init,goal,cost):
	value = [[[999 for r in range(len(grid[0]))] for c in range(len(grid))],
	[[999 for r in range(len(grid[0]))] for c in range(len(grid))],
	[[999 for r in range(len(grid[0]))] for c in range(len(grid))],
	[[999 for r in range(len(grid[0]))] for c in range(len(grid))]]

	policy = [[[' ' for r in range(len(grid[0]))] for c in range(len(grid))],
	[[' ' for r in range(len(grid[0]))] for c in range(len(grid))],
	[[' ' for r in range(len(grid[0]))] for c in range(len(grid))],
	[[' ' for r in range(len(grid[0]))] for c in range(len(grid))]]

	policy2D = [[' ' for r in range(len(grid[0]))] for c in range(len(grid))]

	open = [] # expansion list of 'open' grids
	x = goal[0]
	y = goal[1]
	v = 0
	# orientation doesn't matter at goal spot, thus appending all orientations into open list
	for o in range(len(forward)):
	value[o][x][y] = v
	policy[o][x][y] = '*'
	open.append([v,o,x,y])
	done = False

	while not done:
	if len(open) == 0: # expand list empty, we done!
	done = True
	else:
	open.sort(reverse=True)
	next = open.pop()
	v = next[0] # value/cost
	o = next[1] # orientation
	x = next[2] # X position
	y = next[3] # Y position
	# Since fwd movement sequence is defined as: turn first, then step...
	# Given 'o' at current xy, we could ONLY have CAME FROM grid xy2 via direction 'o'
	x2 = x - forward[o][0]
	y2 = y - forward[o][1]
	# Check: is within grid world & navigable?
	if 0 <= x2 and x2 < len(grid) and \
	0 <= y2 and y2 < len(grid[0]) and \
	grid[x2][y2] == 0:
	for a in range(len(action)): # search all (turning) actions to get to where we were
	o2 = (o - action[a]) % len(forward) # back out the turning action
	v2 = v + cost[a] # val at xy2
	if v2 < value[o2][x2][y2]:
	value[o2][x2][y2] = v2
	policy[o2][x2][y2] = action_name[a]
	open.append([v2,o2,x2,y2])

	# --- Now we have the potential field, propogate from init state ---

	# set state to init state
	x = init[0]
	y = init[1]
	o = init[2]

	policy2D[x][y] = policy[o][x][y]
	while policy[o][x][y] != '*':
	# extract action index for the current policy
	a = action_name.index(policy[o][x][y])
	# propogate to new state according to potential field
	o2 = (o + action[a]) % len(forward)
	x2 = x + forward[o2][0]
	y2 = y + forward[o2][1]
	# update policy2D at new state
	policy2D[x2][y2] = policy[o2][x2][y2]
	# update state
	o = o2
	x = x2
	y = y2
	return policy2D
	# return [policy2D, policy] # debug - to see 3D policy

	# [result_u, result_u3d] = optimum_policy2D_udacity(grid,init,goal,cost) # debug
	t0 = time.time()
	result_u = optimum_policy2D_udacity(grid,init,goal,cost)
	t1 = time.time()
	print 'Udacity brute-force method'
	print 'time = {0:.2f} ms'.format((t1-t0)*1000)
	print '\n'.join(str(p) for p in result_u)
	# debug - print out 3D policy
	# for o, r in enumerate(result_u3d):
	# print 'Orientation = ', o
	# print '\n'.join(str(p) for p in r)
	print

	# [result, result3d] = optimum_policy2D(grid,init,goal,cost) # debug
	t0 = time.time()
	result = optimum_policy2D(grid,init,goal,cost)
	t1 = time.time()
	print "Dean's method"
	print 'time = {0:.2f} ms'.format((t1-t0)*1000)
	print '\n'.join(str(p) for p in result)
	# debug - print out 3D policy
	# for o, r in enumerate(result3d):
	# print 'Orientation = ', o
	# print '\n'.join(str(p) for p in r)
No results found