Experience = namedtuple('Experience', field_names=['state', 'action', 'reward', 'done', 'new_state']) class ReplayMemory(): def __init__(self, capacity): self.buffer = deque(maxlen=capacity) def __len__(self): return len(self.buffer) def append(self, experience): self.buffer.append(experience) def sample(self, batch_size=32): indices = np.random.choice(len(self.buffer), batch_size, replace=False) states, actions, rewards, dones, next_states = zip(*[self.buffer[idx] for idx in indices]) return np.array(states), np.array(actions), np.array(rewards), np.array(dones), np.array(next_states)