from random import random import gym from gym import spaces import numpy as np class SimpleEnv(gym.Env): def __init__(self): self.action_space = spaces.Discrete(2) self.observation_space = spaces.Box(np.array([-0.2]), np.array([1.08]), dtype=np.float32) self.reset() def _take_action(self, action): assert self.action_space.contains(action) if action == 1: self.state += 0.1 else: self.state -= random() * 0.2 def step(self, action): self._take_action(action) self._reward = self.state - 0.5 ob = self.get_state() done = False # Game over if self.state > 0.98 or self.state < 0.: done = True self._reward = 0. return ob, self._reward, done, {} def get_state(self): return np.array([self.state]) def reset(self): self.state = 0.25 self._reward = 0. return self.get_state() def render(self, mode = "human"): print("{:.2f}, Reward: {:.2f}".format( self.state, self._reward )) def seed(self, seed = 1): random.seed(seed) np.random.seed(seed)