import gym from gym.envs.registration import register from tensorforce.environments import Environment from tensorforce.agents import Agent from tensorforce.execution import Runner env_name = 'Simple-v0' gym.envs.registration.register(id=env_name, entry_point="simple:SimpleEnv") def main(): environment = Environment.create( environment='gym', level=env_name, max_episode_timesteps=2000 ) agent = Agent.create( agent='tensorforce', environment=environment, update=100, optimizer=dict(optimizer='adam', learning_rate=1e-3), objective='policy_gradient', reward_estimation=dict(horizon=10), saver=dict(directory='simple_env_checkpt', frequency=25, max_checkpoints=20), ) runner = Runner( agent=agent, environment=environment ) runner.run(num_episodes=1000) runner.run(num_episodes=100, evaluation=True) runner.close() agent.close() if __name__ == '__main__': main()