''' Non-parametric computation of entropy and mutual-information Adapted by G Varoquaux for code created by R Brette, itself from several papers (see in the code). These computations rely on nearest-neighbor statistics ''' import numpy as np from scipy.special import gamma,psi from scipy import ndimage from scipy.linalg import det from numpy import pi from sklearn.neighbors import NearestNeighbors __all__=['entropy', 'mutual_information', 'entropy_gaussian'] EPS = np.finfo(float).eps def nearest_distances(X, k=1): ''' X = array(N,M) N = number of points M = number of dimensions returns the distance to the kth nearest neighbor for every point in X ''' knn = NearestNeighbors(n_neighbors=k) knn.fit(X) d, _ = knn.kneighbors(X) # the first nearest neighbor is itself return d[:, -1] # returns the distance to the kth nearest neighbor def entropy_gaussian(C): ''' Entropy of a gaussian variable with covariance matrix C ''' if np.isscalar(C): # C is the variance return .5*(1 + np.log(2*pi)) + .5*np.log(C) else: n = C.shape[0] # dimension return .5*n*(1 + np.log(2*pi)) + .5*np.log(abs(det(C))) def entropy(X, k=1): ''' Returns the entropy of the X. Parameters =========== X : array-like, shape (n_samples, n_features) The data the entropy of which is computed k : int, optional number of nearest neighbors for density estimation Notes ====== Kozachenko, L. F. & Leonenko, N. N. 1987 Sample estimate of entropy of a random vector. Probl. Inf. Transm. 23, 95-101. See also: Evans, D. 2008 A computationally efficient estimator for mutual information, Proc. R. Soc. A 464 (2093), 1203-1215. and: Kraskov A, Stogbauer H, Grassberger P. (2004). Estimating mutual information. Phys Rev E 69(6 Pt 2):066138. ''' # Distance to kth nearest neighbor r = nearest_distances(X, k) # squared distances n, d = X.shape volume_unit_ball = (pi**(.5*d)) / gamma(.5*d + 1) ''' F. Perez-Cruz, (2008). Estimation of Information Theoretic Measures for Continuous Random Variables. Advances in Neural Information Processing Systems 21 (NIPS). Vancouver (Canada), December. return d*mean(log(r))+log(volume_unit_ball)+log(n-1)-log(k) ''' return (d*np.mean(np.log(r + np.finfo(X.dtype).eps)) + np.log(volume_unit_ball) + psi(n) - psi(k)) def mutual_information(variables, k=1): ''' Returns the mutual information between any number of variables. Each variable is a matrix X = array(n_samples, n_features) where n = number of samples dx,dy = number of dimensions Optionally, the following keyword argument can be specified: k = number of nearest neighbors for density estimation Example: mutual_information((X, Y)), mutual_information((X, Y, Z), k=5) ''' if len(variables) < 2: raise AttributeError( "Mutual information must involve at least 2 variables") all_vars = np.hstack(variables) return (sum([entropy(X, k=k) for X in variables]) - entropy(all_vars, k=k)) def mutual_information_2d(x, y, sigma=1, normalized=False): """ Computes (normalized) mutual information between two 1D variate from a joint histogram. Parameters ---------- x : 1D array first variable y : 1D array second variable sigma: float sigma for Gaussian smoothing of the joint histogram Returns ------- nmi: float the computed similariy measure """ bins = (256, 256) jh = np.histogram2d(x, y, bins=bins)[0] # smooth the jh with a gaussian filter of given sigma ndimage.gaussian_filter(jh, sigma=sigma, mode='constant', output=jh) # compute marginal histograms jh = jh + EPS sh = np.sum(jh) jh = jh / sh s1 = np.sum(jh, axis=0).reshape((-1, jh.shape[0])) s2 = np.sum(jh, axis=1).reshape((jh.shape[1], -1)) # Normalised Mutual Information of: # Studholme, jhill & jhawkes (1998). # "A normalized entropy measure of 3-D medical image alignment". # in Proc. Medical Imaging 1998, vol. 3338, San Diego, CA, pp. 132-143. if normalized: mi = ((np.sum(s1 * np.log(s1)) + np.sum(s2 * np.log(s2))) / np.sum(jh * np.log(jh))) - 1 else: mi = ( np.sum(jh * np.log(jh)) - np.sum(s1 * np.log(s1)) - np.sum(s2 * np.log(s2))) return mi ############################################################################### # Tests def test_entropy(): # Testing against correlated Gaussian variables # (analytical results are known) # Entropy of a 3-dimensional gaussian variable rng = np.random.RandomState(0) n = 50000 d = 3 P = np.array([[1, 0, 0], [0, 1, .5], [0, 0, 1]]) C = np.dot(P, P.T) Y = rng.randn(d, n) X = np.dot(P, Y) H_th = entropy_gaussian(C) H_est = entropy(X.T, k=5) # Our estimated entropy should always be less that the actual one # (entropy estimation undershoots) but not too much np.testing.assert_array_less(H_est, H_th) np.testing.assert_array_less(.9*H_th, H_est) def test_mutual_information(): # Mutual information between two correlated gaussian variables # Entropy of a 2-dimensional gaussian variable n = 50000 rng = np.random.RandomState(0) #P = np.random.randn(2, 2) P = np.array([[1, 0], [0.5, 1]]) C = np.dot(P, P.T) U = rng.randn(2, n) Z = np.dot(P, U).T X = Z[:, 0] X = X.reshape(len(X), 1) Y = Z[:, 1] Y = Y.reshape(len(Y), 1) # in bits MI_est = mutual_information((X, Y), k=5) MI_th = (entropy_gaussian(C[0, 0]) + entropy_gaussian(C[1, 1]) - entropy_gaussian(C) ) # Our estimator should undershoot once again: it will undershoot more # for the 2D estimation that for the 1D estimation print MI_est, MI_th np.testing.assert_array_less(MI_est, MI_th) np.testing.assert_array_less(MI_th, MI_est + .3) def test_degenerate(): # Test that our estimators are well-behaved with regards to # degenerate solutions rng = np.random.RandomState(0) x = rng.randn(50000) X = np.c_[x, x] assert np.isfinite(entropy(X)) assert np.isfinite(mutual_information((x[:, np.newaxis], x[:, np.newaxis]))) assert 2.9 < mutual_information_2d(x, x) < 3.1 def test_mutual_information_2d(): # Mutual information between two correlated gaussian variables # Entropy of a 2-dimensional gaussian variable n = 50000 rng = np.random.RandomState(0) #P = np.random.randn(2, 2) P = np.array([[1, 0], [.9, .1]]) C = np.dot(P, P.T) U = rng.randn(2, n) Z = np.dot(P, U).T X = Z[:, 0] X = X.reshape(len(X), 1) Y = Z[:, 1] Y = Y.reshape(len(Y), 1) # in bits MI_est = mutual_information_2d(X.ravel(), Y.ravel()) MI_th = (entropy_gaussian(C[0, 0]) + entropy_gaussian(C[1, 1]) - entropy_gaussian(C) ) print MI_est, MI_th # Our estimator should undershoot once again: it will undershoot more # for the 2D estimation that for the 1D estimation np.testing.assert_array_less(MI_est, MI_th) np.testing.assert_array_less(MI_th, MI_est + .2) if __name__ == '__main__': # Run our tests test_entropy() test_mutual_information() test_degenerate() test_mutual_information_2d()