klezVirus · November 14, 2022 08:10 · Jul 6, 2021 · Jul 6, 2021
diff --git a/.py → rl_glue.py b/.py → rl_glue.py
diff --git a/.py b/.py
@@ -0,0 +1,184 @@
+class RLGlue:
+    """RLGlue class
+    args:
+        env_name (string): the name of the module where the Environment class can be found
+        agent_name (string): the name of the module where the Agent class can be found
+    """
+
+    def __init__(self, env_class, agent_class):
+        self.environment = env_class()
+        self.agent = agent_class()
+
+        self.total_reward = None
+        self.last_action = None
+        self.num_steps = None
+        self.num_episodes = None
+
+    def rl_init(self, agent_init_info={}, env_init_info={}):
+        """Initial method called when RLGlue experiment is created"""
+        self.environment.env_init(env_init_info)
+        self.agent.agent_init(agent_init_info)
+
+        self.total_reward = 0.0
+        self.num_steps = 0
+        self.num_episodes = 0
+
+    def rl_start(self, agent_start_info={}, env_start_info={}):
+        """Starts RLGlue experiment
+        Returns:
+            tuple: (state, action)
+        """
+
+        last_state = self.environment.env_start()
+        self.last_action = self.agent.agent_start(last_state)
+
+        observation = (last_state, self.last_action)
+
+        return observation
+
+    def rl_agent_start(self, observation):
+        """Starts the agent.
+        Args:
+            observation: The first observation from the environment
+        Returns:
+            The action taken by the agent.
+        """
+        return self.agent.agent_start(observation)
+
+    def rl_agent_step(self, reward, observation):
+        """Step taken by the agent
+        Args:
+            reward (float): the last reward the agent received for taking the
+                last action.
+            observation : the state observation the agent receives from the
+                environment.
+        Returns:
+            The action taken by the agent.
+        """
+        return self.agent.agent_step(reward, observation)
+
+    def rl_agent_end(self, reward):
+        """Run when the agent terminates
+        Args:
+            reward (float): the reward the agent received when terminating
+        """
+        self.agent.agent_end(reward)
+
+    def rl_env_start(self):
+        """Starts RL-Glue environment.
+        Returns:
+            (float, state, Boolean): reward, state observation, boolean
+                indicating termination
+        """
+        self.total_reward = 0.0
+        self.num_steps = 1
+
+        this_observation = self.environment.env_start()
+
+        return this_observation
+
+    def rl_env_step(self, action):
+        """Step taken by the environment based on action from agent
+        Args:
+            action: Action taken by agent.
+        Returns:
+            (float, state, Boolean): reward, state observation, boolean
+                indicating termination.
+        """
+        ro = self.environment.env_step(action)
+        (this_reward, _, terminal) = ro
+
+        self.total_reward += this_reward
+
+        if terminal:
+            self.num_episodes += 1
+        else:
+            self.num_steps += 1
+
+        return ro
+
+    def rl_step(self):
+        """Step taken by RLGlue, takes environment step and either step or
+            end by agent.
+        Returns:
+            (float, state, action, Boolean): reward, last state observation,
+                last action, boolean indicating termination
+        """
+
+        (reward, last_state, term) = self.environment.env_step(self.last_action)
+
+        self.total_reward += reward
+
+        if term:
+            self.num_episodes += 1
+            self.agent.agent_end(reward)
+            roat = (reward, last_state, None, term)
+        else:
+            self.num_steps += 1
+            self.last_action = self.agent.agent_step(reward, last_state)
+            roat = (reward, last_state, self.last_action, term)
+
+        return roat
+
+    def rl_cleanup(self):
+        """Cleanup done at end of experiment."""
+        self.environment.env_cleanup()
+        self.agent.agent_cleanup()
+
+    def rl_agent_message(self, message):
+        """Message passed to communicate with agent during experiment
+        Args:
+            message: the message (or question) to send to the agent
+        Returns:
+            The message back (or answer) from the agent
+        """
+
+        return self.agent.agent_message(message)
+
+    def rl_env_message(self, message):
+        """Message passed to communicate with environment during experiment
+        Args:
+            message: the message (or question) to send to the environment
+        Returns:
+            The message back (or answer) from the environment
+        """
+        return self.environment.env_message(message)
+
+    def rl_episode(self, max_steps_this_episode):
+        """Runs an RLGlue episode
+        Args:
+            max_steps_this_episode (Int): the maximum steps for the experiment to run in an episode
+        Returns:
+            Boolean: if the episode should terminate
+        """
+        is_terminal = False
+
+        self.rl_start()
+
+        while (not is_terminal) and ((max_steps_this_episode == 0) or
+                                     (self.num_steps < max_steps_this_episode)):
+            rl_step_result = self.rl_step()
+            is_terminal = rl_step_result[3]
+
+        return is_terminal
+
+    def rl_return(self):
+        """The total reward
+        Returns:
+            float: the total reward
+        """
+        return self.total_reward
+
+    def rl_num_steps(self):
+        """The total number of steps taken
+        Returns:
+            Int: the total number of steps taken
+        """
+        return self.num_steps
+
+    def rl_num_episodes(self):
+        """The number of episodes
+        Returns
+            Int: the total number of episodes
+        """
+        return self.num_episodes
No results found