Skip to content

Instantly share code, notes, and snippets.

View amulil's full-sized avatar
:octocat:
Focusing

amulil

:octocat:
Focusing
View GitHub Profile
@amulil
amulil / grpo_demo.py
Created February 18, 2025 15:18 — forked from willccbb/grpo_demo.py
GRPO Llama-1B
# train_grpo.py
#
# See https://github.com/willccbb/verifiers for ongoing developments
#
import re
import torch
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig
from trl import GRPOConfig, GRPOTrainer
ClusterName=config your cluster name
SlurmctldHost=config your host name
MpiDefault=pmix
ProctrackType=proctrack/linuxproc
ReturnToService=1
SlurmctldPidFile=/var/run/slurmctld.pid
SlurmctldPort=8086
SlurmdPidFile=/var/run/slurmd.pid