In [1]:
import numpy as np

# Sample data with outliers
data = np.array([1, 200, 3, 10, 4, 50, 6, 9, 3, 100])

# One-liner: Robust scaling using MAD
scaled = (data - np.median(data)) / np.median(np.abs(data - np.median(data)))
print(scaled)


[-1.44444444 42.77777778 -1. 0.55555556 -0.77777778 9.44444444
 -0.33333333 0.33333333 -1. 20.55555556]


In [2]:
# Sample continuous data (e.g., customer ages)
ages = np.array([18, 25, 35, 22, 45, 67, 23, 29, 34, 56, 41, 38, 52, 28, 33])

# One-liner: Create 4 equal-frequency bins
binned = np.digitize(ages, np.percentile(ages, [25, 50, 75])) - 1
print(binned)


[-1 -1 1 -1 2 2 -1 0 1 2 1 1 2 0 0]


In [3]:
# Original features (e.g., temperature, humidity)
X = np.array([[20, 65], [25, 70], [30, 45], [22, 80]])

# One-liner: Generate degree-2 polynomial features
poly_features = np.column_stack([X[:, [i, j]].prod(axis=1) for i in range(X.shape[1]) for j in range(i, X.shape[1])])
print(poly_features)

[[ 400 1300 4225]
 [ 625 1750 4900]
 [ 900 1350 2025]
 [ 484 1760 6400]]


In [4]:
# Time series data (e.g., daily sales)
sales = np.array([100, 98, 120,130, 74, 145, 110, 140, 65, 105, 135])

lags = np.column_stack([np.roll(sales, shift) for shift in [1, 2, 3]])[3:]
print(lags)


[[120 98 100]
 [130 120 98]
 [ 74 130 120]
 [145 74 130]
 [110 145 74]
 [140 110 145]
 [ 65 140 110]
 [105 65 140]]


In [5]:
# Categorical data (e.g., product categories)
categories = np.array([0, 1, 2, 1, 0, 2, 3, 1])

# One-liner: One-hot encode
one_hot = (categories[:, None] == np.arange(categories.max() + 1)).astype(int)
print(one_hot)


[[1 0 0 0]
 [0 1 0 0]
 [0 0 1 0]
 [0 1 0 0]
 [1 0 0 0]
 [0 0 1 0]
 [0 0 0 1]
 [0 1 0 0]]


In [6]:
# Coordinate data
locations = np.array([[40.7128, -74.0060],
 [34.0522, -118.2437],
 [41.8781, -87.6298],
 [29.7604, -95.3698]])
reference = np.array([39.7392, -104.9903])

# One-liner: Calculate Euclidean distances from reference point
distances = np.sqrt(((locations - reference) ** 2).sum(axis=1))
print(distances)


[30.99959263 14.42201722 17.4917653 13.86111358]


In [7]:
# Sample features (e.g., price, quality, brand_score)
features = np.array([[10, 8, 7], [15, 9, 6], [12, 7, 8], [20, 10, 9]])

# One-liner: Create all pairwise interactions
interactions = np.array([features[:, i] * features[:, j]
 for i in range(features.shape[1])
 for j in range(i+1, features.shape[1])]).T
print(interactions)


[[ 80 70 56]
 [135 90 54]
 [ 84 96 56]
 [200 180 90]]


In [9]:
# Noisy signal data (e.g., stock prices, sensor readings)
signal = np.array([10, 27, 12, 18, 11, 19, 20, 26, 12, 19, 25, 31, 28])
window_size = 4

# One-liner: Create rolling mean features
rolling_mean = np.convolve(signal, np.ones(window_size)/window_size, mode='valid')
print(rolling_mean)


[16.75 17. 15. 17. 19. 19.25 19.25 20.5 21.75 25.75]


In [10]:
# Data with potential outliers (e.g., transaction amounts)
amounts = np.array([25, 30, 28, 32, 500, 29, 31, 27, 33, 26])

# One-liner: Create outlier indicator features
outlier_flags = ((amounts < np.percentile(amounts, 5)) |
 (amounts > np.percentile(amounts, 95))).astype(int)
print(outlier_flags)

[1 0 0 0 1 0 0 0 0 0]


In [11]:
# Categorical data (e.g., product categories)
categories = np.array(['Electronics', 'Books', 'Electronics', 'Clothing',
 'Books', 'Electronics', 'Home', 'Books'])

# One-liner: Frequency encode
unique_cats, counts = np.unique(categories, return_counts=True)
freq_encoded = np.array([counts[np.where(unique_cats == cat)[0][0]] for cat in categories])
print(freq_encoded)

[3 3 3 1 3 3 1 3]
