Created
September 8, 2024 14:14
-
-
Save BexTuychiev/0159e9dd4cbba8b7903368ebfd31c44c to your computer and use it in GitHub Desktop.
Revisions
-
BexTuychiev created this gist
Sep 8, 2024 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,70 @@ import os import time import warnings import neptune from dotenv import load_dotenv from lightgbm import LGBMClassifier from xgboost import XGBClassifier from sklearn.datasets import make_classification from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from tqdm.notebook import tqdm load_dotenv() warnings.filterwarnings('ignore') api_token = os.getenv("NEPTUNE_API_TOKEN") project = os.getenv("NEPTUNE_PROJECT_NAME") def create_run(name): run = neptune.init_run( project=os.getenv("NEPTUNE_PROJECT_NAME"), api_token=os.getenv("NEPTUNE_API_TOKEN"), custom_run_id=name ) return run # Creating two separate experiments lgbm_run = create_run('LightGBM') xgb_run = create_run('XGBoost') # Configuration for our custom dataset min_samples = 1000 max_samples = 20000 step = 1000 for sample_size in tqdm(range(min_samples, max_samples + step, step)): # Generating the dataset of custom sample size X, y = make_classification(n_samples=sample_size) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y) # XGBoost training xgb_model = XGBClassifier(random_state=42, verbosity=0) start = time.time() xgb_model.fit(X_train, y_train) end = time.time() xgb_runtime = end - start xgb_accuracy = accuracy_score(y_test, xgb_model.predict(X_test)) # LightGBM training lgbm_model = LGBMClassifier(random_state=42, verbosity=-1) start = time.time() lgbm_model.fit(X_train, y_train) end = time.time() lgbm_runtime = end - start lgbm_accuracy = accuracy_score(y_test, lgbm_model.predict(X_test)) # Logging lgbm_run["metrics/comparison/runtime"].append(lgbm_runtime, step=sample_size) lgbm_run["metrics/comparison/accuracy"].append(lgbm_accuracy, step=sample_size) xgb_run["metrics/comparison/accuracy"].append(xgb_accuracy, step=sample_size) xgb_run["metrics/comparison/runtime"].append(xgb_runtime, step=sample_size) xgb_run.stop() lgbm_run.stop()