Skip to content

Instantly share code, notes, and snippets.

View lmassaron's full-sized avatar
🦉
Frustra fit per plura quod potest fieri per pauciora

Luca Massaron lmassaron

🦉
Frustra fit per plura quod potest fieri per pauciora
View GitHub Profile
@lmassaron
lmassaron / token_analysis.py
Last active September 27, 2025 08:42
Token analysis
# %%
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from transformers import AutoTokenizer
# %%
model_name = "google/gemma-3-1b-it"
@lmassaron
lmassaron / personal-assistant-for-knowledge-management-based-on-gemini-on-vertex-ai.ipynb
Last active February 26, 2025 18:07
Personal Assistant for knowledge management based on Gemini on Vertex AI.ipynb
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import math
class RunningStats:
def __init__(self):
self.count = 0
self.mean = 0.0
self.M2 = 0.0
def update(self, value):
self.count += 1
@lmassaron
lmassaron / decorator_example
Created May 15, 2023 06:31
An example about how to build a decorator
import functools
def decorator(func_to_decorate):
"""
@wraps
(func) updates .__name__ and .__doc__ so that code completion
works in editors and you can pull up documentation.
"""
@functools.wraps(func_to_decorate)
def wrapper(*args, **kwargs):
# Do something
@lmassaron
lmassaron / theils_u
Created March 19, 2023 12:30
Theil's U & Cramer's V
def cramers_v(x, y):
confusion_matrix = pd.crosstab(x,y)
chi2 = ss.chi2_contingency(confusion_matrix)[0]
n = confusion_matrix.sum().sum()
phi2 = chi2/n
r,k = confusion_matrix.shape
phi2corr = max(0, phi2-((k-1)*(r-1))/(n-1))
rcorr = r-((r-1)**2)/(n-1)
kcorr = k-((k-1)**2)/(n-1)
return np.sqrt(phi2corr/min((kcorr-1),(rcorr-1)))
@lmassaron
lmassaron / gist:700cad521198798cfb901ba9a711ae80
Created October 27, 2022 09:36
Fixed effects in linear regression
https://lost-stats.github.io/Model_Estimation/OLS/fixed_effects_in_linear_regression.html
dealing with zeros and ones in a beta regression
------------------------------------------------
Smithson, M. & Verkuilen, J.
A better lemon squeezer? Maximum-likelihood regression with beta-distributed dependent variables.
Psychol. Methods 11, 54–71 (2006).
DOI: 10.1037/1082-989X.11.1.54
https://stats.stackexchange.com/questions/31300/dealing-with-0-1-values-in-a-beta-regression
zero-one inflated beta regression
from scipy.stats import beta, norm
import numpy as np
data = np.array([0.0, 0.0, 0.1, 0.1, 0.2, 0.4, 0.5, 0.7, 0.8, 0.8, 0.9, 1.0, 1.0, 1.0])
eps = 0.000001
data[data==0.0] += eps
data[data==1.0] -= eps
a, b, loc, scale = beta.fit(data, floc=0, fscale=1)
def poly1_cross_entropy(logits, labels, epsilon=1.0):
# pt, CE, and Poly1 have shape [batch].
pt = tf.reduce_sum(labels * tf.nn.softmax(logits), axis=-1)
CE = tf.nn.softmax_cross_entropy_with_logits(labels, logits)
Poly1 = CE + epsilon * (1 - pt)
return Poly1
def poly1_focal_loss(logits, labels, epsilon=1.0, gamma=2.0):
# p, pt, FL, and Poly1 have shape [batch, num of classes].
p = tf.math.sigmoid(logits)
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
class ClassifierTransformer(BaseEstimator, TransformerMixin):
"""
Classifier's estimates of a regression problem using oof
"""
def __init__(self, estimator=None, n_classes=2, cv=3):
self.estimator = estimator
self.n_classes = n_classes