-
-
Save AlyShmahell/dc65a154cfbe4efb2a7870e6fce14e60 to your computer and use it in GitHub Desktop.
Revisions
-
GaelVaroquaux revised this gist
Jul 30, 2019 . 1 changed file with 3 additions and 3 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -28,7 +28,7 @@ def nearest_distances(X, k=1): returns the distance to the kth nearest neighbor for every point in X ''' knn = NearestNeighbors(n_neighbors=k + 1) knn.fit(X) d, _ = knn.kneighbors(X) # the first nearest neighbor is itself return d[:, -1] # returns the distance to the kth nearest neighbor @@ -201,7 +201,7 @@ def test_mutual_information(): ) # Our estimator should undershoot once again: it will undershoot more # for the 2D estimation that for the 1D estimation print((MI_est, MI_th)) np.testing.assert_array_less(MI_est, MI_th) np.testing.assert_array_less(MI_th, MI_est + .3) @@ -238,7 +238,7 @@ def test_mutual_information_2d(): + entropy_gaussian(C[1, 1]) - entropy_gaussian(C) ) print((MI_est, MI_th)) # Our estimator should undershoot once again: it will undershoot more # for the 2D estimation that for the 1D estimation np.testing.assert_array_less(MI_est, MI_th) -
GaelVaroquaux revised this gist
Sep 16, 2014 . 1 changed file with 88 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -9,13 +9,16 @@ import numpy as np from scipy.special import gamma,psi from scipy import ndimage from scipy.linalg import det from numpy import pi from sklearn.neighbors import NearestNeighbors __all__=['entropy', 'mutual_information', 'entropy_gaussian'] EPS = np.finfo(float).eps def nearest_distances(X, k=1): ''' @@ -102,6 +105,61 @@ def mutual_information(variables, k=1): - entropy(all_vars, k=k)) def mutual_information_2d(x, y, sigma=1, normalized=False): """ Computes (normalized) mutual information between two 1D variate from a joint histogram. Parameters ---------- x : 1D array first variable y : 1D array second variable sigma: float sigma for Gaussian smoothing of the joint histogram Returns ------- nmi: float the computed similariy measure """ bins = (256, 256) jh = np.histogram2d(x, y, bins=bins)[0] # smooth the jh with a gaussian filter of given sigma ndimage.gaussian_filter(jh, sigma=sigma, mode='constant', output=jh) # compute marginal histograms jh = jh + EPS sh = np.sum(jh) jh = jh / sh s1 = np.sum(jh, axis=0).reshape((-1, jh.shape[0])) s2 = np.sum(jh, axis=1).reshape((jh.shape[1], -1)) # Normalised Mutual Information of: # Studholme, jhill & jhawkes (1998). # "A normalized entropy measure of 3-D medical image alignment". # in Proc. Medical Imaging 1998, vol. 3338, San Diego, CA, pp. 132-143. if normalized: mi = ((np.sum(s1 * np.log(s1)) + np.sum(s2 * np.log(s2))) / np.sum(jh * np.log(jh))) - 1 else: mi = ( np.sum(jh * np.log(jh)) - np.sum(s1 * np.log(s1)) - np.sum(s2 * np.log(s2))) return mi ############################################################################### # Tests def test_entropy(): # Testing against correlated Gaussian variables # (analytical results are known) @@ -157,9 +215,39 @@ def test_degenerate(): assert np.isfinite(entropy(X)) assert np.isfinite(mutual_information((x[:, np.newaxis], x[:, np.newaxis]))) assert 2.9 < mutual_information_2d(x, x) < 3.1 def test_mutual_information_2d(): # Mutual information between two correlated gaussian variables # Entropy of a 2-dimensional gaussian variable n = 50000 rng = np.random.RandomState(0) #P = np.random.randn(2, 2) P = np.array([[1, 0], [.9, .1]]) C = np.dot(P, P.T) U = rng.randn(2, n) Z = np.dot(P, U).T X = Z[:, 0] X = X.reshape(len(X), 1) Y = Z[:, 1] Y = Y.reshape(len(Y), 1) # in bits MI_est = mutual_information_2d(X.ravel(), Y.ravel()) MI_th = (entropy_gaussian(C[0, 0]) + entropy_gaussian(C[1, 1]) - entropy_gaussian(C) ) print MI_est, MI_th # Our estimator should undershoot once again: it will undershoot more # for the 2D estimation that for the 1D estimation np.testing.assert_array_less(MI_est, MI_th) np.testing.assert_array_less(MI_th, MI_est + .2) if __name__ == '__main__': # Run our tests test_entropy() test_mutual_information() test_degenerate() test_mutual_information_2d() -
GaelVaroquaux revised this gist
Sep 15, 2014 . 1 changed file with 6 additions and 4 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,8 +1,10 @@ ''' Non-parametric computation of entropy and mutual-information Adapted by G Varoquaux for code created by R Brette, itself from several papers (see in the code). These computations rely on nearest-neighbor statistics ''' import numpy as np @@ -160,4 +162,4 @@ def test_degenerate(): # Run our tests test_entropy() test_mutual_information() test_degenerate() -
GaelVaroquaux created this gist
Sep 15, 2014 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,163 @@ ''' Information theory measures. Adapted by R Brette from several papers (see in the code). Uses the ANN wrapper in scikits. ''' import numpy as np from scipy.special import gamma,psi from scipy.linalg import det from numpy import pi from sklearn.neighbors import NearestNeighbors __all__=['entropy', 'mutual_information', 'entropy_gaussian'] def nearest_distances(X, k=1): ''' X = array(N,M) N = number of points M = number of dimensions returns the distance to the kth nearest neighbor for every point in X ''' knn = NearestNeighbors(n_neighbors=k) knn.fit(X) d, _ = knn.kneighbors(X) # the first nearest neighbor is itself return d[:, -1] # returns the distance to the kth nearest neighbor def entropy_gaussian(C): ''' Entropy of a gaussian variable with covariance matrix C ''' if np.isscalar(C): # C is the variance return .5*(1 + np.log(2*pi)) + .5*np.log(C) else: n = C.shape[0] # dimension return .5*n*(1 + np.log(2*pi)) + .5*np.log(abs(det(C))) def entropy(X, k=1): ''' Returns the entropy of the X. Parameters =========== X : array-like, shape (n_samples, n_features) The data the entropy of which is computed k : int, optional number of nearest neighbors for density estimation Notes ====== Kozachenko, L. F. & Leonenko, N. N. 1987 Sample estimate of entropy of a random vector. Probl. Inf. Transm. 23, 95-101. See also: Evans, D. 2008 A computationally efficient estimator for mutual information, Proc. R. Soc. A 464 (2093), 1203-1215. and: Kraskov A, Stogbauer H, Grassberger P. (2004). Estimating mutual information. Phys Rev E 69(6 Pt 2):066138. ''' # Distance to kth nearest neighbor r = nearest_distances(X, k) # squared distances n, d = X.shape volume_unit_ball = (pi**(.5*d)) / gamma(.5*d + 1) ''' F. Perez-Cruz, (2008). Estimation of Information Theoretic Measures for Continuous Random Variables. Advances in Neural Information Processing Systems 21 (NIPS). Vancouver (Canada), December. return d*mean(log(r))+log(volume_unit_ball)+log(n-1)-log(k) ''' return (d*np.mean(np.log(r + np.finfo(X.dtype).eps)) + np.log(volume_unit_ball) + psi(n) - psi(k)) def mutual_information(variables, k=1): ''' Returns the mutual information between any number of variables. Each variable is a matrix X = array(n_samples, n_features) where n = number of samples dx,dy = number of dimensions Optionally, the following keyword argument can be specified: k = number of nearest neighbors for density estimation Example: mutual_information((X, Y)), mutual_information((X, Y, Z), k=5) ''' if len(variables) < 2: raise AttributeError( "Mutual information must involve at least 2 variables") all_vars = np.hstack(variables) return (sum([entropy(X, k=k) for X in variables]) - entropy(all_vars, k=k)) def test_entropy(): # Testing against correlated Gaussian variables # (analytical results are known) # Entropy of a 3-dimensional gaussian variable rng = np.random.RandomState(0) n = 50000 d = 3 P = np.array([[1, 0, 0], [0, 1, .5], [0, 0, 1]]) C = np.dot(P, P.T) Y = rng.randn(d, n) X = np.dot(P, Y) H_th = entropy_gaussian(C) H_est = entropy(X.T, k=5) # Our estimated entropy should always be less that the actual one # (entropy estimation undershoots) but not too much np.testing.assert_array_less(H_est, H_th) np.testing.assert_array_less(.9*H_th, H_est) def test_mutual_information(): # Mutual information between two correlated gaussian variables # Entropy of a 2-dimensional gaussian variable n = 50000 rng = np.random.RandomState(0) #P = np.random.randn(2, 2) P = np.array([[1, 0], [0.5, 1]]) C = np.dot(P, P.T) U = rng.randn(2, n) Z = np.dot(P, U).T X = Z[:, 0] X = X.reshape(len(X), 1) Y = Z[:, 1] Y = Y.reshape(len(Y), 1) # in bits MI_est = mutual_information((X, Y), k=5) MI_th = (entropy_gaussian(C[0, 0]) + entropy_gaussian(C[1, 1]) - entropy_gaussian(C) ) # Our estimator should undershoot once again: it will undershoot more # for the 2D estimation that for the 1D estimation print MI_est, MI_th np.testing.assert_array_less(MI_est, MI_th) np.testing.assert_array_less(MI_th, MI_est + .3) def test_degenerate(): # Test that our estimators are well-behaved with regards to # degenerate solutions rng = np.random.RandomState(0) x = rng.randn(50000) X = np.c_[x, x] assert np.isfinite(entropy(X)) assert np.isfinite(mutual_information((x[:, np.newaxis], x[:, np.newaxis]))) if __name__ == '__main__': # Run our tests test_entropy() test_mutual_information() test_degenerate()