Skip to content

Instantly share code, notes, and snippets.

@ilyakhov
ilyakhov / pytorch_trainer.py
Created June 27, 2023 16:25
Pytorch model toy trainer implementation
from tqdm.auto import tqdm
class Trainer:
def __init__(
self,
model,
train_loader,
test_loader,
criterion,
optimizer,
@ilyakhov
ilyakhov / gpt_ppl.py
Created March 21, 2023 14:28
Autoregressive LLM Perplexity Score Evaluation
from transformers import GPT2LMHeadModel, GPT2TokenizerFast
import torch
from tqdm import tqdm
device = "cpu"
model_id = "gpt2" #-large"
gpt2_model = GPT2LMHeadModel.from_pretrained(model_id).to(device)
gpt2_tokenizer = GPT2TokenizerFast.from_pretrained(model_id)
def gpt2_ppl_score(model, tokenizer, sequence):
@ilyakhov
ilyakhov / load_dotenv.sh
Created November 29, 2021 20:32 — forked from mihow/load_dotenv.sh
Load environment variables from dotenv / .env file in Bash
if [ ! -f .env ]
then
export $(cat .env | xargs)
fi
@ilyakhov
ilyakhov / rank_metrics.py
Created January 15, 2021 09:27 — forked from bwhite/rank_metrics.py
Ranking Metrics
"""Information Retrieval metrics
Useful Resources:
http://www.cs.utexas.edu/~mooney/ir-course/slides/Evaluation.ppt
http://www.nii.ac.jp/TechReports/05-014E.pdf
http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
http://hal.archives-ouvertes.fr/docs/00/72/67/60/PDF/07-busa-fekete.pdf
Learning to Rank for Information Retrieval (Tie-Yan Liu)
"""
import numpy as np
@ilyakhov
ilyakhov / features_percentiles.py
Created August 29, 2017 11:01
Calculate aggregations distribution
from collections import defaultdict
import numpy as np
def get_features_percentiles(DF, features_cols):
column2percentiles = defaultdict(dict)
for col in features_cols:
percentiles = np.percentile(DF[col], range(10,101,10))
column2percentiles[col] = {i:round(p, 3) for p,i in zip(percentiles, range(10,101,10))}
return column2percentiles