Created
November 3, 2011 23:11
-
-
Save diogojc/1338222 to your computer and use it in GitHub Desktop.
Revisions
-
diogojc revised this gist
Nov 5, 2011 . 1 changed file with 24 additions and 16 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,28 +1,35 @@ import numpy as np from scipy.sparse import csc_matrix def pageRank(G, s = .85, maxerr = .001): """ Computes the pagerank for each of the n states. Used in webpage ranking and text summarization using unweighted or weighted transitions respectively. Args ---------- G: matrix representing state transitions Gij can be a boolean or non negative real number representing the transition weight from state i to j. Kwargs ---------- s: probability of following a transition. 1-s probability of teleporting to another state. Defaults to 0.85 maxerr: if the sum of pageranks between iterations is bellow this we will have converged. Defaults to 0.001 """ n = G.shape[0] # transform G into markov matrix M M = csc_matrix(G,dtype=np.float) rsums = np.array(M.sum(1))[:,0] ri, ci = M.nonzero() M.data /= rsums[ri] # bool array of sink states sink = rsums==0 @@ -34,16 +41,16 @@ def pageRank(G, s = .85, maxerr = .0001): # calculate each pagerank at a time for i in xrange(0,n): # inlinks of state i Ii = np.array(M[:,i].todense())[:,0] # account for sink states Si = sink / float(n) # account for teleportation to state i Ti = np.ones(n) / float(n) r[i] = ro.dot( Ii*s + Si*s + Ti*(1-s) ) # return normalized pagerank return r/sum(r) @@ -57,4 +64,5 @@ def pageRank(G, s = .85, maxerr = .0001): [0,0,0,0,0,0,1], [0,0,0,0,0,1,1], [0,0,0,1,1,0,1]]) print pageRank(G,s=.86) -
diogojc created this gist
Nov 3, 2011 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,60 @@ import numpy as np from scipy.sparse import csc_matrix def pageRank(G, s = .85, maxerr = .0001): """ Computes the pagerank for each of the n states Parameters ---------- G: matrix representing state transitions Gij is a binary value representing a transition from state i to j. s: probability of following a transition. 1-s probability of teleporting to another state. maxerr: if the sum of pageranks between iterations is bellow this we will have converged. """ n = G.shape[0] # transform G into markov matrix A A = csc_matrix(G,dtype=np.float) rsums = np.array(A.sum(1))[:,0] ri, ci = A.nonzero() A.data /= rsums[ri] # bool array of sink states sink = rsums==0 # Compute pagerank r until we converge ro, r = np.zeros(n), np.ones(n) while np.sum(np.abs(r-ro)) > maxerr: ro = r.copy() # calculate each pagerank at a time for i in xrange(0,n): # inlinks of state i Ai = np.array(A[:,i].todense())[:,0] # account for sink states Di = sink / float(n) # account for teleportation to state i Ei = np.ones(n) / float(n) r[i] = ro.dot( Ai*s + Di*s + Ei*(1-s) ) # return normalized pagerank return r/float(sum(r)) if __name__=='__main__': # Example extracted from 'Introduction to Information Retrieval' G = np.array([[0,0,1,0,0,0,0], [0,1,1,0,0,0,0], [1,0,1,1,0,0,0], [0,0,0,1,1,0,0], [0,0,0,0,0,0,1], [0,0,0,0,0,1,1], [0,0,0,1,1,0,1]]) print pageRank(G,s=.86)