Skip to content

Instantly share code, notes, and snippets.

@BexTuychiev
Created September 8, 2024 14:14
Show Gist options
  • Save BexTuychiev/0159e9dd4cbba8b7903368ebfd31c44c to your computer and use it in GitHub Desktop.
Save BexTuychiev/0159e9dd4cbba8b7903368ebfd31c44c to your computer and use it in GitHub Desktop.

Revisions

  1. BexTuychiev created this gist Sep 8, 2024.
    70 changes: 70 additions & 0 deletions xgb_lgbm_neptune.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,70 @@
    import os
    import time
    import warnings
    import neptune

    from dotenv import load_dotenv
    from lightgbm import LGBMClassifier
    from xgboost import XGBClassifier
    from sklearn.datasets import make_classification
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import accuracy_score
    from tqdm.notebook import tqdm

    load_dotenv()
    warnings.filterwarnings('ignore')

    api_token = os.getenv("NEPTUNE_API_TOKEN")
    project = os.getenv("NEPTUNE_PROJECT_NAME")


    def create_run(name):
    run = neptune.init_run(
    project=os.getenv("NEPTUNE_PROJECT_NAME"),
    api_token=os.getenv("NEPTUNE_API_TOKEN"),
    custom_run_id=name
    )

    return run
    # Creating two separate experiments
    lgbm_run = create_run('LightGBM')
    xgb_run = create_run('XGBoost')

    # Configuration for our custom dataset
    min_samples = 1000
    max_samples = 20000
    step = 1000

    for sample_size in tqdm(range(min_samples, max_samples + step, step)):
    # Generating the dataset of custom sample size
    X, y = make_classification(n_samples=sample_size)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

    # XGBoost training
    xgb_model = XGBClassifier(random_state=42, verbosity=0)
    start = time.time()
    xgb_model.fit(X_train, y_train)
    end = time.time()

    xgb_runtime = end - start
    xgb_accuracy = accuracy_score(y_test, xgb_model.predict(X_test))

    # LightGBM training
    lgbm_model = LGBMClassifier(random_state=42, verbosity=-1)

    start = time.time()
    lgbm_model.fit(X_train, y_train)
    end = time.time()

    lgbm_runtime = end - start
    lgbm_accuracy = accuracy_score(y_test, lgbm_model.predict(X_test))

    # Logging
    lgbm_run["metrics/comparison/runtime"].append(lgbm_runtime, step=sample_size)
    lgbm_run["metrics/comparison/accuracy"].append(lgbm_accuracy, step=sample_size)
    xgb_run["metrics/comparison/accuracy"].append(xgb_accuracy, step=sample_size)
    xgb_run["metrics/comparison/runtime"].append(xgb_runtime, step=sample_size)


    xgb_run.stop()
    lgbm_run.stop()