# very messy. you can ignore this file. lots of different stats recorded. #!/usr/bin/env python3 import os, time, subprocess, threading, pickle, json import torch import torch.nn.functional as F # 12x6 grid -> 1920x1080, so per-tile = 160x180 GRID_COLS, GRID_ROWS = 12, 6 TILE_W, TILE_H = 160, 180 OUT_W, OUT_H = GRID_COLS * TILE_W, GRID_ROWS * TILE_H # filenames identical to originals csvf = lambda g: f"episodes_tmp/{g.replace('/','__')}_rawer.csv" epf = lambda g: f"episodes_tmp/{g.replace('/','__')}_raw.jsonl" lpf = lambda g: f"episodes_tmp/{g.replace('/','__')}.lenl" # thread-local per-env logger state _tl = threading.local() def bind_logger(game_id, g_idx, info_s): os.makedirs("episodes_tmp", exist_ok=True) # episode files are created lazily on write; touch not required for identical behavior st = _tl st.game_id = game_id st.g_idx = g_idx st.info_s = info_s st.ep = 0.0 st.ep_len = 0 st.last_action = 0 st.savetriples = int(os.getenv('savetriples', 0)) > 0 if st.savetriples: os.makedirs('recording', exist_ok=True) st.triplepath = f"recording/{game_id.split('/')[-1]}_triples.pkl" st.csvff = open(csvf(game_id), 'a') def log_step(action, obs, rew, term, trunc): st = _tl # 1) write raw csv exactly as before st.csvff.write(f"{action},{rew},{term},{trunc}\n") # 2) if episode ended, flush previous ep stats BEFORE adding current step (matches old ordering) if term or trunc: with open(epf(st.game_id), "a") as f: f.write(f"{st.ep}\n") with open(lpf(st.game_id), "a") as f: f.write(f"{st.ep_len}\n") st.ep = 0.0 st.ep_len = 0 # 3) optional triples with raw rew, and obs.copy() if st.savetriples: with open(st.triplepath, 'ab+') as f: pickle.dump((obs.copy(), action, rew), f) # 4) reward shaping identical to original shaped = max(-1.0, min(1.0, rew)) if action != st.last_action and action != 0: shaped -= 0.0001 st.last_action = action # 5) accumulate episode stats and global counters st.ep += shaped st.ep_len += 1 st.info_s[st.g_idx, 0].add_(float(shaped)) # accumulated reward st.info_s[st.g_idx, 1].add_(1) # accumulated frames if term: st.info_s[st.g_idx, 2].add_(1) # accumulated terminations if trunc: st.info_s[st.g_idx, 3].add_(1) # accumulated truncations def log_close(): st = _tl try: if st.ep_len: with open(epf(st.game_id), "a") as f: f.write(f"{st.ep}\n") with open(lpf(st.game_id), "a") as f: f.write(f"{st.ep_len}\n") finally: try: st.csvff.close() except Exception: pass def _prep_tiles(obs_s): # obs_s: (N, 250, 160, 3) uint8 on cuda n = min(64, obs_s.shape[0]) x = obs_s[:n].permute(0, 3, 1, 2).contiguous().to(torch.float32) # (n,3,250,160) in [0..255] x = F.interpolate(x, size=(TILE_H, TILE_W), mode='area') # (n,3,180,160) x = x.clamp(0, 255).to(torch.uint8).permute(0, 2, 3, 1).contiguous() # (n,180,160,3) return x def _composite_grid(tiles): # tiles: (k, 180,160,3) on cuda k = tiles.shape[0] grid = torch.zeros((OUT_H, OUT_W, 3), dtype=torch.uint8, device=tiles.device) # Layout fills all cells except the bottom row outer 4+4 (eight blanks). # This matches our existing 12x6 gaps layout used elsewhere. vi = 0 for cell in range(GRID_COLS * GRID_ROWS): r, c = divmod(cell, GRID_COLS) y0, x0 = r * TILE_H, c * TILE_W # blanks at bottom row left 4 and right 4 if r == GRID_ROWS - 1 and (c <= 3 or c >= 8): continue if vi < k: grid[y0:y0+TILE_H, x0:x0+TILE_W] = tiles[vi] vi += 1 else: pass return grid def _stats_header_and_banner(games): os.makedirs("episodes_tmp", exist_ok=True) for g in games: open(epf(g), "a").close() open(lpf(g), "a").close() # header line for simplestats.csv with open("simplestats.csv", "w") as f: f.write("ts," + ",".join(g.split('/')[-1].replace('-v5','') for g in games) + '\n') # console header print(f"{'time_s,':>8} {'game,':<26} {'steps,':>11} {'reward'}") def _write_stats_row(info_s, games, first_start_at): stats = info_s.clone().cpu() ts = int(time.time() - first_start_at) for i, game in enumerate(games): steps, reward = stats[i, 1].item(), stats[i, 0].item() print(f"{ts:>7,}, {game:<25}, {int(steps):>10,}, {reward:>12.2f}") total_rewards, total_steps = stats[:, 0], stats[:, 1] reward_per_step = torch.where(total_steps > 0, total_rewards / total_steps, 0.0) with open("simplestats.csv", "a") as f: f.write(f"{ts}," + ",".join(f"{v:.4f}" for v in reward_per_step) + '\n') def _final_scoring(info_s, games): # --- FINAL SCORE --- print("\n--- FINAL SCORE ---") stats = info_s.clone().cpu() total_rewards, total_steps = stats[:, 0], stats[:, 1] num_resets = stats[:, 2] + stats[:, 3] adj_rewards = total_rewards - 5.0 * num_resets reward_per_step = torch.where(total_steps > 72000, adj_rewards / total_steps, 0.0) env_badnesses = torch.where(reward_per_step <= 0, 1e7, 1.0 / reward_per_step) env_badnesses = env_badnesses.clamp(1e-6, 1e7) run_badness = torch.exp(torch.log(env_badnesses).mean()).item() # geometric mean env_badnesses = env_badnesses.tolist() with open('badnesses.json','w') as f: json.dump(env_badnesses, f) with open('badness.json','w') as f: json.dump(run_badness, f) all_ep = {g: {"rewards":[float(x) for x in open(epf(g)) if x.strip()], "lengths":[int(x) for x in open(lpf(g)) if x.strip()]} for g in games} with open('all_episode_rewards_raw.json','w') as f: json.dump(all_ep, f) mean_ep = {g: (sum(v["rewards"])/len(v["rewards"]) if v["rewards"] else 0.0) for g, v in all_ep.items()} with open('mean_episode_reward_raw.json','w') as f: json.dump(mean_ep, f) print(f"{env_badnesses=}") # ignore print(f"{run_badness=}") # ignore # SCORING HAS CHANGED: there is a new scoring function outside this file. basically just the total reward of the top E episodes in the run. and some other stuff. def bg_record_proc(obs_s, info_s, shutdown, games, first_start_at, out_path="12x6_1080_30.mp4"): fps=30 # obs_s is a cuda uint8 tensor (N, 250, 160, 3) gpuidx = int(os.environ['CUDA_VISIBLE_DEVICES']) print(f'bg_record_proc: {gpuidx=}') _stats_header_and_banner(games) cmd = [ "ffmpeg","-hide_banner","-loglevel","error","-y", "-hwaccel", "cuda", "-hwaccel_output_format", "cuda", "-f","rawvideo","-vcodec","rawvideo", "-s", f"{OUT_W}x{OUT_H}", "-pix_fmt","rgb24", "-r", str(fps), "-i","-", "-an","-r", str(fps), "-c:v","h264_nvenc","-preset","p3","-pix_fmt","yuv420p","-movflags","+faststart", out_path ] p = subprocess.Popen(cmd, stdin=subprocess.PIPE) period = 1.0 / float(fps) next_due = time.time() next_stats_due = time.time() + 15.0 while not shutdown.is_set(): now = time.time() if now < next_due: time.sleep(next_due - now) # video frame tiles = _prep_tiles(obs_s) frame = _composite_grid(tiles) # (1080,1920,3) u8 cuda buf = frame.cpu().numpy().tobytes() p.stdin.write(buf) next_due += period # periodic stats every ~15s, identical formatting/behavior if now >= next_stats_due: _write_stats_row(info_s, games, first_start_at) next_stats_due += 15.0 # finalize ffmpeg p.stdin.close() p.wait() # small grace to let env threads flush ep files in log_close time.sleep(2.0) _final_scoring(info_s, games)