import sys import gym from gym.envs.registration import register from tensorforce.environments import Environment from tensorforce.agents import Agent if len(sys.argv) < 2: print("Usage: python {} path/to/checkpoint".format(sys.argv[0])) exit() ckpt = sys.argv[1] env_name = 'Simple-v0' gym.envs.registration.register(id=env_name, entry_point="simple:SimpleEnv") env = gym.make(env_name) environment = Environment.create(environment=env) print("Loading", ckpt) agent = Agent.load(directory=ckpt, format='checkpoint', environment=environment) sum_rewards = 0.0 new_observation = environment.reset() rewards = 0. for i in range(2000): actions = agent.act( states=new_observation, independent=True, deterministic=True ) new_observation, done, reward = environment.execute(actions) print("Action {} State {} Reward {}".format( actions, new_observation, reward)) rewards += reward if done: print("Died on {} timestep".format(i)) break print("Total reward of {}".format(rewards)) # Close agent and environment agent.close() environment.close()