Skip to content

Instantly share code, notes, and snippets.

@asifr
Created December 3, 2020 16:46
Show Gist options
  • Save asifr/9115fe8420139e90ca6dd1a6f690a3d4 to your computer and use it in GitHub Desktop.
Save asifr/9115fe8420139e90ca6dd1a6f690a3d4 to your computer and use it in GitHub Desktop.

Revisions

  1. asifr created this gist Dec 3, 2020.
    25 changes: 25 additions & 0 deletions outlier_detect.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,25 @@
    import numpy as np

    def outlier_detect(data, threshold=1, method="IQR"):
    assert method in ["IQR", "STD", "MAD"], "Method must be one of IQR|STD|MAD"

    if method == "IQR":
    IQR = np.quantile(data, 0.75) - np.quantile(data, 0.25)
    lower = np.quantile(data, 0.25) - (IQR * threshold)
    upper = np.quantile(data, 0.75) + (IQR * threshold)
    if method == "STD":
    upper = np.nanmean(data) + threshold * np.nanstd(data)
    lower = np.nanmean(data) - threshold * np.nanstd(data)
    if method == "MAD":
    median = data.median()
    median_absolute_deviation = np.median([np.abs(y - median) for y in data])
    modified_z_scores = [
    0.6745 * (y - median) / median_absolute_deviation for y in data
    ]
    outlier_index = np.abs(modified_z_scores) > threshold
    return outlier_index, (median_absolute_deviation, median_absolute_deviation)

    upper_lower = (upper, lower)
    outlier_index = np.any(np.concatenate([data > upper, data < lower]))

    return outlier_index, upper_lower