Polaris000 · November 4, 2022 16:49
diff --git a/baseline.py b/baseline.py
 def auc_recall_at_k(y_true, y_conf):
    """
    Compute AUC under the Recall@k curve.

    y_true: A numpy array of expected predictions
    y_conf: A numpy array of the model's confidence
            scores for each datapoint
            
    Returns: AUC-Recall@k (float)
    """

    # if there are no positive targets (good leads),
    # auc becomes invalid
    if y_true.count(1) == 0:
        return np.nan

    conf_df = pd.DataFrame()
    conf_df["conf"] = y_conf
    conf_df["expected"] = y_true
    conf_df.columns = ["conf", "expected"]
    conf_df = conf_df.sort_values("conf", ascending=False)

    recall_at_k = []

    # calculating recall@k
    for i in range(len(conf_df)):
        recall_at_k.append(
            conf_df.iloc[:i+1, :]["expected"].to_list().count(1)
            / conf_df["expected"].to_list().count(1)
        )

    # calculating ideal recall@k
    ideal_recall_at_k = np.minimum(
        np.ones(len(conf_df)),
        np.array(list(range(1, len(conf_df["expected"]) + 1)))/ conf_df["expected"].to_list().count(1)
    )

    # Computing our final metric by getting the proportion of the areas
    # under these two curves
    return np.trapz(recall_at_k) / np.trapz(ideal_recall_at_k)
	def auc_recall_at_k(y_true, y_conf):
	"""
	Compute AUC under the Recall@k curve.

	y_true: A numpy array of expected predictions
	y_conf: A numpy array of the model's confidence
	scores for each datapoint

	Returns: AUC-Recall@k (float)
	"""

	# if there are no positive targets (good leads),
	# auc becomes invalid
	if y_true.count(1) == 0:
	return np.nan

	conf_df = pd.DataFrame()
	conf_df["conf"] = y_conf
	conf_df["expected"] = y_true
	conf_df.columns = ["conf", "expected"]
	conf_df = conf_df.sort_values("conf", ascending=False)

	recall_at_k = []

	# calculating recall@k
	for i in range(len(conf_df)):
	recall_at_k.append(
	conf_df.iloc[:i+1, :]["expected"].to_list().count(1)
	/ conf_df["expected"].to_list().count(1)
	)

	# calculating ideal recall@k
	ideal_recall_at_k = np.minimum(
	np.ones(len(conf_df)),
	np.array(list(range(1, len(conf_df["expected"]) + 1)))/ conf_df["expected"].to_list().count(1)
	)

	# Computing our final metric by getting the proportion of the areas
	# under these two curves
	return np.trapz(recall_at_k) / np.trapz(ideal_recall_at_k)
No results found