Last active
November 4, 2021 15:03
-
-
Save zkytony/51d43ee6818375434eb3b84a77a47a5c to your computer and use it in GitHub Desktop.
Revisions
-
zkytony revised this gist
Nov 4, 2021 . 1 changed file with 0 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -70,8 +70,6 @@ def cryingbaby(): return S, A, Z, T, O, R, gamma if __name__ == "__main__": S, A, Z, T, O, R, gamma = cryingbaby() pi = pomdp_py.UniformPolicyModel(A) b0 = pomdp_py.Histogram({"hungry": 0.22, -
zkytony renamed this gist
Nov 4, 2021 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
zkytony created this gist
Nov 4, 2021 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,115 @@ """ Example of defining a small, tabular POMDP and solving it using Cassandra's pomdp-solve value iteration solver. Refer to documentation: https://h2r.github.io/pomdp-py/html/examples.external_solvers.html """ import pomdp_py def cryingbaby(): """This is a POMDP defined in the Algorithms for Decision Making book by M. J. Kochenderfer et al. in section F.7""" S = ['hungry', 'sated'] A = ['feed', 'sing', 'ignore'] Z = ['crying', 'quiet'] T = pomdp_py.TabularTransitionModel({ # state, action, next state ('hungry', 'feed', 'sated'): 1.0, ('hungry', 'feed', 'hungry'): 0.0, ('hungry', 'sing', 'hungry'): 1.0, ('hungry', 'sing', 'sated'): 0.0, ('hungry', 'ignore', 'hungry'): 1.0, ('hungry', 'ignore', 'sated'): 0.0, ('sated', 'feed', 'sated'): 1.0, ('sated', 'feed', 'hungry'): 0.0, ('sated', 'sing', 'hungry'): 0.1, ('sated', 'sing', 'sated'): 0.9, ('sated', 'ignore', 'hungry'): 0.1, ('sated', 'ignore', 'sated'): 0.9 }) O = pomdp_py.TabularObservationModel({ # state, action, observation ('hungry', 'feed', 'crying'): 0.8, ('hungry', 'feed', 'quiet'): 0.2, ('hungry', 'sing', 'crying'): 0.9, ('hungry', 'sing', 'quiet'): 0.1, ('hungry', 'ignore', 'crying'): 0.8, ('hungry', 'ignore', 'quiet'): 0.2, ('sated', 'feed', 'crying'): 0.1, ('sated', 'feed', 'quiet'): 0.9, ('sated', 'sing', 'crying'): 0.1, ('sated', 'sing', 'quiet'): 0.9, ('sated', 'ignore', 'crying'): 0.1, ('sated', 'ignore', 'quiet'): 0.9, }) R = pomdp_py.TabularRewardModel({ # state, action ('hungry', 'feed'): -10 - 5, ('hungry', 'sing'): -10 - 0.5, ('hungry', 'ignore'): -10, ('sated', 'feed'): -5, ('sated', 'sing'): -0.5, ('sated', 'ignore'): 0 }) gamma = 0.9 return S, A, Z, T, O, R, gamma if __name__ == "__main__": b0 = pomdp_py.Histogram({"hungry": 0.5, "sated": 0.5}) S, A, Z, T, O, R, gamma = cryingbaby() pi = pomdp_py.UniformPolicyModel(A) b0 = pomdp_py.Histogram({"hungry": 0.22, "sated": 0.78}) agent = pomdp_py.Agent(b0, pi, T, O, R) horizon = 5 filename = "cryingbaby.POMDP" pomdp_py.to_pomdp_file(agent, filename, discount_factor=gamma) # path to the pomdp-solve binary pomdp_solve_path = "/home/kaiyuzh/software/pomdp-solve-5.4/src/pomdp-solve" policy = pomdp_py.vi_pruning(agent, pomdp_solve_path, discount_factor=gamma, options=["-horizon", horizon], remove_generated_files=False, return_policy_graph=False) print(pomdp_py.value(agent.belief, S, A, Z, T, O, R, gamma, horizon=horizon)) state = "hungry" # true initial state for step in range(10): action = policy.plan(agent) next_state = T.sample(state, action) reward = R.sample(state, action, next_state) observation = O.sample(next_state, action) # print print(f"step = {step+1}" f"\t|\taction: {action}" f"\t|\tobservation: {observation}" f"\t|\tstate: {state} " f"\t|\treward: {reward}" f"\t|\tbelief: {agent.belief}") # update agent belief next_belief = pomdp_py.belief_update(agent.belief, action, observation, T, O) agent.set_belief(pomdp_py.Histogram(next_belief)) # apply state transition to the environment state = next_state