Skip to content

Instantly share code, notes, and snippets.

View darrenangle's full-sized avatar
🏋️

darren angle darrenangle

🏋️
View GitHub Profile
@darrenangle
darrenangle / grpo_demo.py
Created January 28, 2025 23:48 — forked from willccbb/grpo_demo.py
GRPO Llama-1B
# train_grpo.py
import re
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer
from peft import LoraConfig
from trl import GRPOConfig, GRPOTrainer
# Load and prep dataset
SYSTEM_PROMPT = """
@darrenangle
darrenangle / choice_tree.py
Created June 3, 2024 13:44 — forked from wassname/choice_tree.py
for huggingface transformers sometime you want to constrain output to json schema and record the probabilities on choices/enums. I use it when rating, judging. It's much more efficient than sampling multiple times.
from jaxtyping import Float, Int
import torch
from torch.nn import functional as F
from torch import Tensor
from typing import List, Callable, Tuple, Dict, Optional
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer
def get_valid_next_choices(choices_tokens, current_tokens):
@darrenangle
darrenangle / vllm.py
Last active March 20, 2024 16:56
vllm server wrapper with auto restart and vllm node script with exponential backoff for requests
import subprocess
import time
import re
import signal
import sys
import select
import os
def start_server():
command = [
@darrenangle
darrenangle / embed_and_load_arxiv.py
Created January 24, 2024 13:09
embed and load 2.5M arxiv abstracts into qdrant
import json
import numpy as np
from FlagEmbedding import FlagModel
from qdrant_client import QdrantClient
from qdrant_client.http.models import Batch, VectorParams, Distance
import uuid
# Initialize the Qdrant client
client = QdrantClient(host="localhost", port=6333)
@darrenangle
darrenangle / tools.txt
Last active May 5, 2024 20:44
open LLM tool use, a prompt template, tested with Mistral, OpenChat, SOLAR.
# WHO ARE YOU
- You are an AI trained to call functions in order to solve problems.
- You are an expert in XML outputs and well-formatted JSON.
# YOUR TASK
- You will be given a scenario that requires a decision.
- After thinking quietly about the scenario and reflecting on all of your options, you will respond by using a tool.