Skip to content

Instantly share code, notes, and snippets.

@zkytony
Last active November 4, 2021 15:03
Show Gist options
  • Select an option

  • Save zkytony/51d43ee6818375434eb3b84a77a47a5c to your computer and use it in GitHub Desktop.

Select an option

Save zkytony/51d43ee6818375434eb3b84a77a47a5c to your computer and use it in GitHub Desktop.

Revisions

  1. zkytony revised this gist Nov 4, 2021. 1 changed file with 0 additions and 2 deletions.
    2 changes: 0 additions & 2 deletions cryingbaby.py
    Original file line number Diff line number Diff line change
    @@ -70,8 +70,6 @@ def cryingbaby():
    return S, A, Z, T, O, R, gamma

    if __name__ == "__main__":
    b0 = pomdp_py.Histogram({"hungry": 0.5,
    "sated": 0.5})
    S, A, Z, T, O, R, gamma = cryingbaby()
    pi = pomdp_py.UniformPolicyModel(A)
    b0 = pomdp_py.Histogram({"hungry": 0.22,
  2. zkytony renamed this gist Nov 4, 2021. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  3. zkytony created this gist Nov 4, 2021.
    115 changes: 115 additions & 0 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,115 @@
    """
    Example of defining a small, tabular POMDP and solving
    it using Cassandra's pomdp-solve value iteration solver.

    Refer to documentation:
    https://h2r.github.io/pomdp-py/html/examples.external_solvers.html
    """
    import pomdp_py

    def cryingbaby():
    """This is a POMDP defined in the Algorithms for Decision Making book
    by M. J. Kochenderfer et al. in section F.7"""
    S = ['hungry', 'sated']
    A = ['feed', 'sing', 'ignore']
    Z = ['crying', 'quiet']
    T = pomdp_py.TabularTransitionModel({
    # state, action, next state
    ('hungry', 'feed', 'sated'): 1.0,
    ('hungry', 'feed', 'hungry'): 0.0,

    ('hungry', 'sing', 'hungry'): 1.0,
    ('hungry', 'sing', 'sated'): 0.0,

    ('hungry', 'ignore', 'hungry'): 1.0,
    ('hungry', 'ignore', 'sated'): 0.0,

    ('sated', 'feed', 'sated'): 1.0,
    ('sated', 'feed', 'hungry'): 0.0,

    ('sated', 'sing', 'hungry'): 0.1,
    ('sated', 'sing', 'sated'): 0.9,

    ('sated', 'ignore', 'hungry'): 0.1,
    ('sated', 'ignore', 'sated'): 0.9
    })

    O = pomdp_py.TabularObservationModel({
    # state, action, observation
    ('hungry', 'feed', 'crying'): 0.8,
    ('hungry', 'feed', 'quiet'): 0.2,

    ('hungry', 'sing', 'crying'): 0.9,
    ('hungry', 'sing', 'quiet'): 0.1,

    ('hungry', 'ignore', 'crying'): 0.8,
    ('hungry', 'ignore', 'quiet'): 0.2,

    ('sated', 'feed', 'crying'): 0.1,
    ('sated', 'feed', 'quiet'): 0.9,

    ('sated', 'sing', 'crying'): 0.1,
    ('sated', 'sing', 'quiet'): 0.9,

    ('sated', 'ignore', 'crying'): 0.1,
    ('sated', 'ignore', 'quiet'): 0.9,
    })

    R = pomdp_py.TabularRewardModel({
    # state, action
    ('hungry', 'feed'): -10 - 5,
    ('hungry', 'sing'): -10 - 0.5,
    ('hungry', 'ignore'): -10,

    ('sated', 'feed'): -5,
    ('sated', 'sing'): -0.5,
    ('sated', 'ignore'): 0
    })

    gamma = 0.9
    return S, A, Z, T, O, R, gamma

    if __name__ == "__main__":
    b0 = pomdp_py.Histogram({"hungry": 0.5,
    "sated": 0.5})
    S, A, Z, T, O, R, gamma = cryingbaby()
    pi = pomdp_py.UniformPolicyModel(A)
    b0 = pomdp_py.Histogram({"hungry": 0.22,
    "sated": 0.78})
    agent = pomdp_py.Agent(b0, pi, T, O, R)
    horizon = 5

    filename = "cryingbaby.POMDP"
    pomdp_py.to_pomdp_file(agent, filename, discount_factor=gamma)

    # path to the pomdp-solve binary
    pomdp_solve_path = "/home/kaiyuzh/software/pomdp-solve-5.4/src/pomdp-solve"
    policy = pomdp_py.vi_pruning(agent, pomdp_solve_path,
    discount_factor=gamma,
    options=["-horizon", horizon],
    remove_generated_files=False,
    return_policy_graph=False)

    print(pomdp_py.value(agent.belief, S, A, Z, T, O, R, gamma, horizon=horizon))

    state = "hungry" # true initial state
    for step in range(10):
    action = policy.plan(agent)
    next_state = T.sample(state, action)
    reward = R.sample(state, action, next_state)
    observation = O.sample(next_state, action)

    # print
    print(f"step = {step+1}"
    f"\t|\taction: {action}"
    f"\t|\tobservation: {observation}"
    f"\t|\tstate: {state} "
    f"\t|\treward: {reward}"
    f"\t|\tbelief: {agent.belief}")

    # update agent belief
    next_belief = pomdp_py.belief_update(agent.belief, action, observation, T, O)
    agent.set_belief(pomdp_py.Histogram(next_belief))

    # apply state transition to the environment
    state = next_state