Limitations:
- for binary classification/OVR/OVO only
- suitable for documents that are not too long
Advantage:
- take class label into consideration, correct the inappropriate scaling by IDF
- better than TF-IDF in most benchmarks
Pratical advice:
| import numpy as np | |
| import matplotlib | |
| matplotlib.use("Agg") | |
| import matplotlib.pyplot as plt | |
| import matplotlib.animation as manimation | |
| FFMpegWriter = manimation.writers['ffmpeg'] | |
| writer = FFMpegWriter(fps=5) | |
| fig = plt.figure(figsize=(12, 8)) |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| # here's the precomputed histogram via `plt.hist` or `np.histogram` | |
| bins = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9]).astype(float) | |
| counts = np.array([5, 3, 4, 5, 6, 1, 3, 7]).astype(float) | |
| assert len(bins) == len(counts) + 1 | |
| # recover | |
| centroids = (bins[1:] + bins[:-1]) / 2 | |
| counts_, bins_, _ = plt.hist(centroids, bins=len(counts), |