Skip to content

Instantly share code, notes, and snippets.

@dtrizna
Last active September 21, 2022 06:28
Show Gist options
  • Select an option

  • Save dtrizna/eabf6d9be2862afcf40e92c76ea3d6f0 to your computer and use it in GitHub Desktop.

Select an option

Save dtrizna/eabf6d9be2862afcf40e92c76ea3d6f0 to your computer and use it in GitHub Desktop.

Revisions

  1. dtrizna revised this gist Sep 21, 2022. 1 changed file with 0 additions and 1 deletion.
    1 change: 0 additions & 1 deletion cross_validation_xgbclassifier.py
    Original file line number Diff line number Diff line change
    @@ -1,4 +1,3 @@
    from xgboost import XGBClassifier
    from sklearn.model_selection import cross_validate
    from sklearn.model_selection import StratifiedKFold

  2. dtrizna revised this gist Sep 21, 2022. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion cross_validation_xgbclassifier.py
    Original file line number Diff line number Diff line change
    @@ -15,4 +15,4 @@ def print_scores(cv):
    cv[key] = cross_validate(xgb_model, X[key], y, cv=skf, scoring=metrics)

    print(f"{key}:")
    print_scores(cv)
    print_scores(cv[key])
  3. dtrizna revised this gist Sep 21, 2022. 1 changed file with 9 additions and 1 deletion.
    10 changes: 9 additions & 1 deletion cross_validation_xgbclassifier.py
    Original file line number Diff line number Diff line change
    @@ -2,9 +2,17 @@
    from sklearn.model_selection import cross_validate
    from sklearn.model_selection import StratifiedKFold

    def print_scores(cv):
    means = np.mean(list(cv.values()), axis=1)
    [print(f"\tAverage {x[0].strip('test_'):<10} over all folds: {x[1]:.2f}") for x in zip(cv.keys(), means) if "test_" in x[0]]
    print()

    cv = {}
    metrics = ["accuracy", "precision", "recall", "f1", "roc_auc"]
    for key in ["HashingVectorizer", "TfidfVectorizer"]:
    xgb_model = XGBClassifier(n_estimators=100, use_label_encoder=False, eval_metric="logloss")
    skf = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)
    cv[key] = cross_validate(xgb_model, X[key], y, cv=skf, scoring=metrics)
    cv[key] = cross_validate(xgb_model, X[key], y, cv=skf, scoring=metrics)

    print(f"{key}:")
    print_scores(cv)
  4. dtrizna revised this gist Sep 21, 2022. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion cross_validation_xgbclassifier.py
    Original file line number Diff line number Diff line change
    @@ -6,5 +6,5 @@
    metrics = ["accuracy", "precision", "recall", "f1", "roc_auc"]
    for key in ["HashingVectorizer", "TfidfVectorizer"]:
    xgb_model = XGBClassifier(n_estimators=100, use_label_encoder=False, eval_metric="logloss")
    skf = StratifiedKFold(n_splits=5, random_state=RANDOM_SEED, shuffle=True)
    skf = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)
    cv[key] = cross_validate(xgb_model, X[key], y, cv=skf, scoring=metrics)
  5. dtrizna revised this gist Sep 21, 2022. 1 changed file with 1 addition and 0 deletions.
    1 change: 1 addition & 0 deletions cross_validation_xgbclassifier.py
    Original file line number Diff line number Diff line change
    @@ -3,6 +3,7 @@
    from sklearn.model_selection import StratifiedKFold

    cv = {}
    metrics = ["accuracy", "precision", "recall", "f1", "roc_auc"]
    for key in ["HashingVectorizer", "TfidfVectorizer"]:
    xgb_model = XGBClassifier(n_estimators=100, use_label_encoder=False, eval_metric="logloss")
    skf = StratifiedKFold(n_splits=5, random_state=RANDOM_SEED, shuffle=True)
  6. dtrizna revised this gist Sep 21, 2022. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion cross_validation_xgbclassifier.py
    Original file line number Diff line number Diff line change
    @@ -3,7 +3,7 @@
    from sklearn.model_selection import StratifiedKFold

    cv = {}
    for key in ["HashingVectorizer", "TF-IDF", "One-Hot"]:
    for key in ["HashingVectorizer", "TfidfVectorizer"]:
    xgb_model = XGBClassifier(n_estimators=100, use_label_encoder=False, eval_metric="logloss")
    skf = StratifiedKFold(n_splits=5, random_state=RANDOM_SEED, shuffle=True)
    cv[key] = cross_validate(xgb_model, X[key], y, cv=skf, scoring=metrics)
  7. dtrizna created this gist Sep 20, 2022.
    9 changes: 9 additions & 0 deletions cross_validation_xgbclassifier.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,9 @@
    from xgboost import XGBClassifier
    from sklearn.model_selection import cross_validate
    from sklearn.model_selection import StratifiedKFold

    cv = {}
    for key in ["HashingVectorizer", "TF-IDF", "One-Hot"]:
    xgb_model = XGBClassifier(n_estimators=100, use_label_encoder=False, eval_metric="logloss")
    skf = StratifiedKFold(n_splits=5, random_state=RANDOM_SEED, shuffle=True)
    cv[key] = cross_validate(xgb_model, X[key], y, cv=skf, scoring=metrics)