# very messy. you can ignore this file. lots of different stats recorded.


#!/usr/bin/env python3
import os, time, subprocess, threading, pickle, json
import torch
import torch.nn.functional as F

# 12x6 grid -> 1920x1080, so per-tile = 160x180
GRID_COLS, GRID_ROWS = 12, 6
TILE_W, TILE_H = 160, 180
OUT_W, OUT_H = GRID_COLS * TILE_W, GRID_ROWS * TILE_H

# filenames identical to originals
csvf = lambda g: f"episodes_tmp/{g.replace('/','__')}_rawer.csv"
epf  = lambda g: f"episodes_tmp/{g.replace('/','__')}_raw.jsonl"
lpf  = lambda g: f"episodes_tmp/{g.replace('/','__')}.lenl"

# thread-local per-env logger state
_tl = threading.local()

def bind_logger(game_id, g_idx, info_s):
    os.makedirs("episodes_tmp", exist_ok=True)
    # episode files are created lazily on write; touch not required for identical behavior
    st = _tl
    st.game_id = game_id
    st.g_idx = g_idx
    st.info_s = info_s
    st.ep = 0.0
    st.ep_len = 0
    st.last_action = 0
    st.savetriples = int(os.getenv('savetriples', 0)) > 0
    if st.savetriples:
        os.makedirs('recording', exist_ok=True)
        st.triplepath = f"recording/{game_id.split('/')[-1]}_triples.pkl"
    st.csvff = open(csvf(game_id), 'a')

def log_step(action, obs, rew, term, trunc):
    st = _tl
    # 1) write raw csv exactly as before
    st.csvff.write(f"{action},{rew},{term},{trunc}\n")

    # 2) if episode ended, flush previous ep stats BEFORE adding current step (matches old ordering)
    if term or trunc:
        with open(epf(st.game_id), "a") as f: f.write(f"{st.ep}\n")
        with open(lpf(st.game_id), "a") as f: f.write(f"{st.ep_len}\n")
        st.ep = 0.0
        st.ep_len = 0

    # 3) optional triples with raw rew, and obs.copy()
    if st.savetriples:
        with open(st.triplepath, 'ab+') as f:
            pickle.dump((obs.copy(), action, rew), f)

    # 4) reward shaping identical to original
    shaped = max(-1.0, min(1.0, rew))
    if action != st.last_action and action != 0:
        shaped -= 0.0001
    st.last_action = action

    # 5) accumulate episode stats and global counters
    st.ep += shaped
    st.ep_len += 1

    st.info_s[st.g_idx, 0].add_(float(shaped))  # accumulated reward
    st.info_s[st.g_idx, 1].add_(1)              # accumulated frames
    if term:
        st.info_s[st.g_idx, 2].add_(1)          # accumulated terminations
    if trunc:
        st.info_s[st.g_idx, 3].add_(1)          # accumulated truncations

def log_close():
    st = _tl
    try:
        if st.ep_len:
            with open(epf(st.game_id), "a") as f: f.write(f"{st.ep}\n")
            with open(lpf(st.game_id), "a") as f: f.write(f"{st.ep_len}\n")
    finally:
        try:
            st.csvff.close()
        except Exception:
            pass

def _prep_tiles(obs_s):
    # obs_s: (N, 250, 160, 3) uint8 on cuda
    n = min(64, obs_s.shape[0])
    x = obs_s[:n].permute(0, 3, 1, 2).contiguous().to(torch.float32)  # (n,3,250,160) in [0..255]
    x = F.interpolate(x, size=(TILE_H, TILE_W), mode='area')  # (n,3,180,160)
    x = x.clamp(0, 255).to(torch.uint8).permute(0, 2, 3, 1).contiguous()  # (n,180,160,3)
    return x

def _composite_grid(tiles):
    # tiles: (k, 180,160,3) on cuda
    k = tiles.shape[0]
    grid = torch.zeros((OUT_H, OUT_W, 3), dtype=torch.uint8, device=tiles.device)

    # Layout fills all cells except the bottom row outer 4+4 (eight blanks).
    # This matches our existing 12x6 gaps layout used elsewhere.
    vi = 0
    for cell in range(GRID_COLS * GRID_ROWS):
        r, c = divmod(cell, GRID_COLS)
        y0, x0 = r * TILE_H, c * TILE_W
        # blanks at bottom row left 4 and right 4
        if r == GRID_ROWS - 1 and (c <= 3 or c >= 8):
            continue
        if vi < k:
            grid[y0:y0+TILE_H, x0:x0+TILE_W] = tiles[vi]
            vi += 1
        else:
            pass
    return grid

def _stats_header_and_banner(games):
    os.makedirs("episodes_tmp", exist_ok=True)
    for g in games:
        open(epf(g), "a").close()
        open(lpf(g), "a").close()
    # header line for simplestats.csv
    with open("simplestats.csv", "w") as f:
        f.write("ts," + ",".join(g.split('/')[-1].replace('-v5','') for g in games) + '\n')
    # console header
    print(f"{'time_s,':>8} {'game,':<26} {'steps,':>11} {'reward'}")

def _write_stats_row(info_s, games, first_start_at):
    stats = info_s.clone().cpu()
    ts = int(time.time() - first_start_at)
    for i, game in enumerate(games):
        steps, reward = stats[i, 1].item(), stats[i, 0].item()
        print(f"{ts:>7,}, {game:<25}, {int(steps):>10,}, {reward:>12.2f}")
    total_rewards, total_steps = stats[:, 0], stats[:, 1]
    reward_per_step = torch.where(total_steps > 0, total_rewards / total_steps, 0.0)
    with open("simplestats.csv", "a") as f:
        f.write(f"{ts}," + ",".join(f"{v:.4f}" for v in reward_per_step) + '\n')

def _final_scoring(info_s, games):
    # --- FINAL SCORE ---
    print("\n--- FINAL SCORE ---")
    stats = info_s.clone().cpu()
    total_rewards, total_steps = stats[:, 0], stats[:, 1]
    num_resets = stats[:, 2] + stats[:, 3]
    adj_rewards = total_rewards - 5.0 * num_resets
    reward_per_step = torch.where(total_steps > 72000, adj_rewards / total_steps, 0.0)
    env_badnesses = torch.where(reward_per_step <= 0, 1e7, 1.0 / reward_per_step)
    env_badnesses = env_badnesses.clamp(1e-6, 1e7)
    run_badness = torch.exp(torch.log(env_badnesses).mean()).item() # geometric mean
    env_badnesses = env_badnesses.tolist()
    with open('badnesses.json','w') as f: json.dump(env_badnesses, f)
    with open('badness.json','w') as f: json.dump(run_badness, f)
    all_ep = {g: {"rewards":[float(x) for x in open(epf(g)) if x.strip()], "lengths":[int(x) for x in open(lpf(g)) if x.strip()]} for g in games}
    with open('all_episode_rewards_raw.json','w') as f: json.dump(all_ep, f)
    mean_ep = {g: (sum(v["rewards"])/len(v["rewards"]) if v["rewards"] else 0.0) for g, v in all_ep.items()}
    with open('mean_episode_reward_raw.json','w') as f: json.dump(mean_ep, f)
    print(f"{env_badnesses=}") # ignore
    print(f"{run_badness=}") # ignore
    # SCORING HAS CHANGED: there is a new scoring function outside this file. basically just the total reward of the top E episodes in the run. and some other stuff.

def bg_record_proc(obs_s, info_s, shutdown, games, first_start_at, out_path="12x6_1080_30.mp4"):
    fps=30
    # obs_s is a cuda uint8 tensor (N, 250, 160, 3)
    gpuidx = int(os.environ['CUDA_VISIBLE_DEVICES'])
    print(f'bg_record_proc: {gpuidx=}')
    _stats_header_and_banner(games)
    cmd = [
        "ffmpeg","-hide_banner","-loglevel","error","-y",
        "-hwaccel", "cuda", "-hwaccel_output_format", "cuda",
        "-f","rawvideo","-vcodec","rawvideo",
        "-s", f"{OUT_W}x{OUT_H}",
        "-pix_fmt","rgb24",
        "-r", str(fps),
        "-i","-",
        "-an","-r", str(fps),
        "-c:v","h264_nvenc","-preset","p3","-pix_fmt","yuv420p","-movflags","+faststart",
        out_path
    ]
    p = subprocess.Popen(cmd, stdin=subprocess.PIPE)

    period = 1.0 / float(fps)
    next_due = time.time()
    next_stats_due = time.time() + 15.0
    while not shutdown.is_set():
        now = time.time()
        if now < next_due:
            time.sleep(next_due - now)
        # video frame
        tiles = _prep_tiles(obs_s)
        frame = _composite_grid(tiles)  # (1080,1920,3) u8 cuda
        buf = frame.cpu().numpy().tobytes()
        p.stdin.write(buf)
        next_due += period
        # periodic stats every ~15s, identical formatting/behavior
        if now >= next_stats_due:
            _write_stats_row(info_s, games, first_start_at)
            next_stats_due += 15.0

    # finalize ffmpeg
    p.stdin.close()
    p.wait()

    # small grace to let env threads flush ep files in log_close
    time.sleep(2.0)
    _final_scoring(info_s, games)