zkytony · November 4, 2021 15:03 · Nov 4, 2021 · Nov 4, 2021 · Nov 4, 2021
diff --git a/cryingbaby.py b/cryingbaby.py
@@ -70,8 +70,6 @@ def cryingbaby():
     return S, A, Z, T, O, R, gamma
 
 if __name__ == "__main__":
-    b0 = pomdp_py.Histogram({"hungry": 0.5,
-                             "sated": 0.5})
     S, A, Z, T, O, R, gamma = cryingbaby()
     pi = pomdp_py.UniformPolicyModel(A)
     b0 = pomdp_py.Histogram({"hungry": 0.22,

diff --git a/gistfile1.txt → cryingbaby.py b/gistfile1.txt → cryingbaby.py
diff --git a/gistfile1.txt b/gistfile1.txt
@@ -0,0 +1,115 @@
+"""
+Example of defining a small, tabular POMDP and solving
+it using Cassandra's pomdp-solve value iteration solver.
+
+Refer to documentation:
+https://h2r.github.io/pomdp-py/html/examples.external_solvers.html
+"""
+import pomdp_py
+
+def cryingbaby():
+    """This is a POMDP defined in the Algorithms for Decision Making book
+    by M. J. Kochenderfer et al. in section F.7"""
+    S = ['hungry', 'sated']
+    A = ['feed', 'sing', 'ignore']
+    Z = ['crying', 'quiet']
+    T = pomdp_py.TabularTransitionModel({
+        # state, action, next state
+        ('hungry', 'feed',   'sated'):  1.0,
+        ('hungry', 'feed',   'hungry'): 0.0,
+
+        ('hungry', 'sing',   'hungry'): 1.0,
+        ('hungry', 'sing',   'sated'):  0.0,
+
+        ('hungry', 'ignore', 'hungry'): 1.0,
+        ('hungry', 'ignore', 'sated'):  0.0,
+
+        ('sated',  'feed',   'sated'):  1.0,
+        ('sated',  'feed',   'hungry'): 0.0,
+
+        ('sated',  'sing',   'hungry'): 0.1,
+        ('sated',  'sing',   'sated'):  0.9,
+
+        ('sated',  'ignore', 'hungry'): 0.1,
+        ('sated',  'ignore', 'sated'):  0.9
+    })
+
+    O = pomdp_py.TabularObservationModel({
+        # state, action, observation
+        ('hungry', 'feed', 'crying'):  0.8,
+        ('hungry', 'feed', 'quiet'):   0.2,
+
+        ('hungry', 'sing', 'crying'):  0.9,
+        ('hungry', 'sing', 'quiet'):   0.1,
+
+        ('hungry', 'ignore', 'crying'): 0.8,
+        ('hungry', 'ignore', 'quiet'):  0.2,
+
+        ('sated', 'feed', 'crying'):   0.1,
+        ('sated', 'feed', 'quiet'):    0.9,
+
+        ('sated', 'sing', 'crying'):   0.1,
+        ('sated', 'sing', 'quiet'):    0.9,
+
+        ('sated', 'ignore', 'crying'): 0.1,
+        ('sated', 'ignore', 'quiet'):  0.9,
+    })
+
+    R = pomdp_py.TabularRewardModel({
+        # state, action
+        ('hungry', 'feed'): -10 - 5,
+        ('hungry', 'sing'): -10 - 0.5,
+        ('hungry', 'ignore'): -10,
+
+        ('sated', 'feed'): -5,
+        ('sated', 'sing'): -0.5,
+        ('sated', 'ignore'): 0
+    })
+
+    gamma = 0.9
+    return S, A, Z, T, O, R, gamma
+
+if __name__ == "__main__":
+    b0 = pomdp_py.Histogram({"hungry": 0.5,
+                             "sated": 0.5})
+    S, A, Z, T, O, R, gamma = cryingbaby()
+    pi = pomdp_py.UniformPolicyModel(A)
+    b0 = pomdp_py.Histogram({"hungry": 0.22,
+                             "sated": 0.78})
+    agent = pomdp_py.Agent(b0, pi, T, O, R)
+    horizon = 5
+
+    filename = "cryingbaby.POMDP"
+    pomdp_py.to_pomdp_file(agent, filename, discount_factor=gamma)
+
+    # path to the pomdp-solve binary
+    pomdp_solve_path = "/home/kaiyuzh/software/pomdp-solve-5.4/src/pomdp-solve"
+    policy = pomdp_py.vi_pruning(agent, pomdp_solve_path,
+                                 discount_factor=gamma,
+                                 options=["-horizon", horizon],
+                                 remove_generated_files=False,
+                                 return_policy_graph=False)
+
+    print(pomdp_py.value(agent.belief, S, A, Z, T, O, R, gamma, horizon=horizon))
+
+    state = "hungry"  # true initial state
+    for step in range(10):
+        action = policy.plan(agent)
+        next_state = T.sample(state, action)
+        reward = R.sample(state, action, next_state)
+        observation = O.sample(next_state, action)
+
+        # print
+        print(f"step = {step+1}"
+              f"\t|\taction: {action}"
+              f"\t|\tobservation: {observation}"
+              f"\t|\tstate: {state}  "
+              f"\t|\treward: {reward}"
+              f"\t|\tbelief: {agent.belief}")
+
+        # update agent belief
+        next_belief = pomdp_py.belief_update(agent.belief, action, observation, T, O)
+        agent.set_belief(pomdp_py.Histogram(next_belief))
+
+        # apply state transition to the environment
+        state = next_state
No results found