Skip to content

Instantly share code, notes, and snippets.

@asifr
Created December 3, 2020 16:46
Show Gist options
  • Save asifr/9115fe8420139e90ca6dd1a6f690a3d4 to your computer and use it in GitHub Desktop.
Save asifr/9115fe8420139e90ca6dd1a6f690a3d4 to your computer and use it in GitHub Desktop.
import numpy as np
def outlier_detect(data, threshold=1, method="IQR"):
assert method in ["IQR", "STD", "MAD"], "Method must be one of IQR|STD|MAD"
if method == "IQR":
IQR = np.quantile(data, 0.75) - np.quantile(data, 0.25)
lower = np.quantile(data, 0.25) - (IQR * threshold)
upper = np.quantile(data, 0.75) + (IQR * threshold)
if method == "STD":
upper = np.nanmean(data) + threshold * np.nanstd(data)
lower = np.nanmean(data) - threshold * np.nanstd(data)
if method == "MAD":
median = data.median()
median_absolute_deviation = np.median([np.abs(y - median) for y in data])
modified_z_scores = [
0.6745 * (y - median) / median_absolute_deviation for y in data
]
outlier_index = np.abs(modified_z_scores) > threshold
return outlier_index, (median_absolute_deviation, median_absolute_deviation)
upper_lower = (upper, lower)
outlier_index = np.any(np.concatenate([data > upper, data < lower]))
return outlier_index, upper_lower
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment