diogojc · November 3, 2011 23:11 · Nov 5, 2011 · Nov 3, 2011
diff --git a/pagerank.py b/pagerank.py
@@ -1,28 +1,35 @@
 import numpy as np
 from scipy.sparse import csc_matrix
 
-def pageRank(G, s = .85, maxerr = .0001):
+def pageRank(G, s = .85, maxerr = .001):
     """
-    Computes the pagerank for each of the n states
+    Computes the pagerank for each of the n states.
 
-    Parameters
+    Used in webpage ranking and text summarization using unweighted
+    or weighted transitions respectively.
+
+
+    Args
     ----------
     G: matrix representing state transitions
-       Gij is a binary value representing a transition from state i to j.
+       Gij can be a boolean or non negative real number representing the
+       transition weight from state i to j.
 
+    Kwargs
+    ----------
     s: probability of following a transition. 1-s probability of teleporting
-       to another state.
+       to another state. Defaults to 0.85
 
     maxerr: if the sum of pageranks between iterations is bellow this we will
-            have converged.
+            have converged. Defaults to 0.001
     """
     n = G.shape[0]
 
-    # transform G into markov matrix A
-    A = csc_matrix(G,dtype=np.float)
-    rsums = np.array(A.sum(1))[:,0]
-    ri, ci = A.nonzero()
-    A.data /= rsums[ri]
+    # transform G into markov matrix M
+    M = csc_matrix(G,dtype=np.float)
+    rsums = np.array(M.sum(1))[:,0]
+    ri, ci = M.nonzero()
+    M.data /= rsums[ri]
 
     # bool array of sink states
     sink = rsums==0
@@ -34,16 +41,16 @@ def pageRank(G, s = .85, maxerr = .0001):
         # calculate each pagerank at a time
         for i in xrange(0,n):
             # inlinks of state i
-            Ai = np.array(A[:,i].todense())[:,0]
+            Ii = np.array(M[:,i].todense())[:,0]
             # account for sink states
-            Di = sink / float(n)
+            Si = sink / float(n)
             # account for teleportation to state i
-            Ei = np.ones(n) / float(n)
+            Ti = np.ones(n) / float(n)
 
-            r[i] = ro.dot( Ai*s + Di*s + Ei*(1-s) )
+            r[i] = ro.dot( Ii*s + Si*s + Ti*(1-s) )
 
     # return normalized pagerank
-    return r/float(sum(r))
+    return r/sum(r)
 
 
 
@@ -57,4 +64,5 @@ def pageRank(G, s = .85, maxerr = .0001):
                   [0,0,0,0,0,0,1],
                   [0,0,0,0,0,1,1],
                   [0,0,0,1,1,0,1]])
+
     print pageRank(G,s=.86)
diff --git a/pagerank.py b/pagerank.py
@@ -0,0 +1,60 @@
+import numpy as np
+from scipy.sparse import csc_matrix
+
+def pageRank(G, s = .85, maxerr = .0001):
+    """
+    Computes the pagerank for each of the n states
+
+    Parameters
+    ----------
+    G: matrix representing state transitions
+       Gij is a binary value representing a transition from state i to j.
+
+    s: probability of following a transition. 1-s probability of teleporting
+       to another state.
+
+    maxerr: if the sum of pageranks between iterations is bellow this we will
+            have converged.
+    """
+    n = G.shape[0]
+
+    # transform G into markov matrix A
+    A = csc_matrix(G,dtype=np.float)
+    rsums = np.array(A.sum(1))[:,0]
+    ri, ci = A.nonzero()
+    A.data /= rsums[ri]
+
+    # bool array of sink states
+    sink = rsums==0
+
+    # Compute pagerank r until we converge
+    ro, r = np.zeros(n), np.ones(n)
+    while np.sum(np.abs(r-ro)) > maxerr:
+        ro = r.copy()
+        # calculate each pagerank at a time
+        for i in xrange(0,n):
+            # inlinks of state i
+            Ai = np.array(A[:,i].todense())[:,0]
+            # account for sink states
+            Di = sink / float(n)
+            # account for teleportation to state i
+            Ei = np.ones(n) / float(n)
+
+            r[i] = ro.dot( Ai*s + Di*s + Ei*(1-s) )
+
+    # return normalized pagerank
+    return r/float(sum(r))
+
+
+
+
+if __name__=='__main__':
+    # Example extracted from 'Introduction to Information Retrieval'
+    G = np.array([[0,0,1,0,0,0,0],
+                  [0,1,1,0,0,0,0],
+                  [1,0,1,1,0,0,0],
+                  [0,0,0,1,1,0,0],
+                  [0,0,0,0,0,0,1],
+                  [0,0,0,0,0,1,1],
+                  [0,0,0,1,1,0,1]])
+    print pageRank(G,s=.86)