Skip to content

Instantly share code, notes, and snippets.

@Polaris000
Created November 4, 2022 16:49
Show Gist options
  • Select an option

  • Save Polaris000/ec437e80718851d51d46223c2675fca0 to your computer and use it in GitHub Desktop.

Select an option

Save Polaris000/ec437e80718851d51d46223c2675fca0 to your computer and use it in GitHub Desktop.
def auc_recall_at_k(y_true, y_conf):
"""
Compute AUC under the Recall@k curve.
y_true: A numpy array of expected predictions
y_conf: A numpy array of the model's confidence
scores for each datapoint
Returns: AUC-Recall@k (float)
"""
# if there are no positive targets (good leads),
# auc becomes invalid
if y_true.count(1) == 0:
return np.nan
conf_df = pd.DataFrame()
conf_df["conf"] = y_conf
conf_df["expected"] = y_true
conf_df.columns = ["conf", "expected"]
conf_df = conf_df.sort_values("conf", ascending=False)
recall_at_k = []
# calculating recall@k
for i in range(len(conf_df)):
recall_at_k.append(
conf_df.iloc[:i+1, :]["expected"].to_list().count(1)
/ conf_df["expected"].to_list().count(1)
)
# calculating ideal recall@k
ideal_recall_at_k = np.minimum(
np.ones(len(conf_df)),
np.array(list(range(1, len(conf_df["expected"]) + 1)))/ conf_df["expected"].to_list().count(1)
)
# Computing our final metric by getting the proportion of the areas
# under these two curves
return np.trapz(recall_at_k) / np.trapz(ideal_recall_at_k)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment