Skip to content

Instantly share code, notes, and snippets.

View omar-florez's full-sized avatar

Omar U. Florez omar-florez

  • Twitter - ML Research
  • San Francisco, California, USA
View GitHub Profile
@omar-florez
omar-florez / grpo_demo.py
Created March 27, 2025 20:09 — forked from willccbb/grpo_demo.py
GRPO Llama-1B
# train_grpo.py
#
# See https://github.com/willccbb/verifiers for ongoing developments
#
import re
import torch
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig
from trl import GRPOConfig, GRPOTrainer
#!/bin/sh
setup_brew () {
if ![-f "/usr/local/bin/brew"]; then
/usr/bin/ruby -e "$(/usr/bin/curl -fsSL https://raw.github.com/mxcl/homebrew/master/Library/Contributions/install_homebrew.rb)"
fi
}
setup_ipython () {
brew install readline
#!/bin/sh
setup_brew () {
if ![-f "/usr/local/bin/brew"]; then
/usr/bin/ruby -e "$(/usr/bin/curl -fsSL https://raw.github.com/mxcl/homebrew/master/Library/Contributions/install_homebrew.rb)"
fi
}
setup_ipython () {
brew install readline