Skip to content

Instantly share code, notes, and snippets.

@jarrelscy
jarrelscy / grpo_demo.py
Created March 19, 2025 21:13 — forked from willccbb/grpo_demo.py
GRPO Llama-1B
# train_grpo.py
#
# See https://github.com/willccbb/verifiers for ongoing developments
#
import re
import torch
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig
from trl import GRPOConfig, GRPOTrainer
@jarrelscy
jarrelscy / train.py
Created February 11, 2025 23:17
Sample Unsloth GRPO
from unsloth import FastLanguageModel, PatchFastRL
PatchFastRL("GRPO", FastLanguageModel)
from unsloth import is_bfloat16_supported
import torch
import wandb # Import wandb for logging
# Initialize wandb with your project name
wandb.init(project="unsloth-grpo")
$StoragePoolName = "MyStoragePool"
$TieredSpaceName = "MyTieredSpace"
$SSDTierName = "SSDTier"
$HDDTierName = "HDDTier"
$PhysicalDisks = (Get-PhysicalDisk -CanPool $True | Where MediaType -NE UnSpecified)
$SubSysName = (Get-StorageSubSystem).FriendlyName
New-StoragePool -PhysicalDisks $PhysicalDisks -StorageSubSystemFriendlyName $SubSysName -FriendlyName $StoragePoolName
Get-StoragePool -FriendlyName $StoragePoolName | Get-PhysicalDisk | Select FriendlyName, MediaType
$SSDTier = New-StorageTier -StoragePoolFriendlyName $StoragePoolName -FriendlyName $SSDTierName -MediaType SSD -ResiliencySettingName Mirror -PhysicalDiskRedundancy 1
$HDDTier = New-StorageTier -StoragePoolFriendlyName $StoragePoolName -FriendlyName $HDDTierName -MediaType HDD -ResiliencySettingName Mirror -PhysicalDiskRedundancy 1