This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # train_grpo.py | |
| import re | |
| import torch | |
| from datasets import load_dataset, Dataset | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from peft import LoraConfig | |
| from trl import GRPOConfig, GRPOTrainer | |
| # Load and prep dataset |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # .github/workflows/changelog_generator.yml | |
| name: Generate Changelog and Post to Slack | |
| on: | |
| schedule: | |
| # This will run every Friday at 3 PM UTC | |
| - cron: "0 15 * * 5" | |
| jobs: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| {"problem_type": "dense_la", "language": "cpp", "name": "00_dense_la_lu_decomp", "parallelism_model": "cuda", "prompt": "/* Factorize the matrix A into A=LU where L is a lower triangular matrix and U is an upper triangular matrix.\n Store the results for L and U into the original matrix A. \n A is an NxN matrix stored in row-major.\n Use CUDA to compute in parallel. The kernel is launched on an NxN grid of threads.\n Example:\n\n input: [[4, 3], [6, 3]]\n output: [[4, 3], [1.5, -1.5]]\n*/\n__global__ void luFactorize(double *A, size_t N) {"} | |
| {"problem_type": "dense_la", "language": "cpp", "name": "01_dense_la_solve", "parallelism_model": "cuda", "prompt": "/* Solve the linear system Ax=b for x.\n A is an NxN matrix in row-major. x and b have N elements.\n Use CUDA to compute in parallel. The kernel is launched on an NxN grid of threads.\n Example:\n \n input: A=[[1,4,2], [1,2,3], [2,1,3]] b=[11, 11, 13]\n output: x=[3, 1, 2]\n*/\n__global__ void solveLinearSystem(const double *A, const |