Polaris000 · November 4, 2022 16:52 · Nov 4, 2022
diff --git a/numpy_implemenation_without_df.py b/numpy_implemenation_without_df.py
@@ -0,0 +1,32 @@
+def auc_recall_at_k_np_no_df(y_true, y_conf):
+    """
+    Experiment #3:
+    --------------
+    Compute AUC under the Recall@k curve using numpy's
+    functions. We do away with the conf_df dataframe 
+    as well.
+
+    y_true: A numpy array of expected predictions
+    y_conf: A numpy array of the model's confidence
+            scores for each datapoint
+            
+    Returns: AUC-Recall@k (float)
+    """
+
+    # if there are no positive targets (good leads),
+    # auc becomes invalid
+    if (y_true == 1).sum() == 0:
+        return np.nan
+
+    ranking = y_true[np.argsort(y_conf)[::-1]]
+
+    # calculating recall@k based on sorted ranking
+    recall_at_k = (ranking == 1).cumsum() / (ranking == 1).sum()
+
+    # calculating ideal recall@k
+    ideal_recall_at_k = np.minimum(
+        np.ones(len(ranking)),
+         np.array(list(range(1, len(ranking) + 1)))/ (ranking == 1).sum()
+    )
+
+    return np.trapz(recall_at_k) / np.trapz(ideal_recall_at_k)
No results found