Skip to content

Instantly share code, notes, and snippets.

@diogojc
Created November 3, 2011 23:11
Show Gist options
  • Save diogojc/1338222 to your computer and use it in GitHub Desktop.
Save diogojc/1338222 to your computer and use it in GitHub Desktop.

Revisions

  1. diogojc revised this gist Nov 5, 2011. 1 changed file with 24 additions and 16 deletions.
    40 changes: 24 additions & 16 deletions pagerank.py
    Original file line number Diff line number Diff line change
    @@ -1,28 +1,35 @@
    import numpy as np
    from scipy.sparse import csc_matrix

    def pageRank(G, s = .85, maxerr = .0001):
    def pageRank(G, s = .85, maxerr = .001):
    """
    Computes the pagerank for each of the n states
    Computes the pagerank for each of the n states.
    Parameters
    Used in webpage ranking and text summarization using unweighted
    or weighted transitions respectively.
    Args
    ----------
    G: matrix representing state transitions
    Gij is a binary value representing a transition from state i to j.
    Gij can be a boolean or non negative real number representing the
    transition weight from state i to j.
    Kwargs
    ----------
    s: probability of following a transition. 1-s probability of teleporting
    to another state.
    to another state. Defaults to 0.85
    maxerr: if the sum of pageranks between iterations is bellow this we will
    have converged.
    have converged. Defaults to 0.001
    """
    n = G.shape[0]

    # transform G into markov matrix A
    A = csc_matrix(G,dtype=np.float)
    rsums = np.array(A.sum(1))[:,0]
    ri, ci = A.nonzero()
    A.data /= rsums[ri]
    # transform G into markov matrix M
    M = csc_matrix(G,dtype=np.float)
    rsums = np.array(M.sum(1))[:,0]
    ri, ci = M.nonzero()
    M.data /= rsums[ri]

    # bool array of sink states
    sink = rsums==0
    @@ -34,16 +41,16 @@ def pageRank(G, s = .85, maxerr = .0001):
    # calculate each pagerank at a time
    for i in xrange(0,n):
    # inlinks of state i
    Ai = np.array(A[:,i].todense())[:,0]
    Ii = np.array(M[:,i].todense())[:,0]
    # account for sink states
    Di = sink / float(n)
    Si = sink / float(n)
    # account for teleportation to state i
    Ei = np.ones(n) / float(n)
    Ti = np.ones(n) / float(n)

    r[i] = ro.dot( Ai*s + Di*s + Ei*(1-s) )
    r[i] = ro.dot( Ii*s + Si*s + Ti*(1-s) )

    # return normalized pagerank
    return r/float(sum(r))
    return r/sum(r)



    @@ -57,4 +64,5 @@ def pageRank(G, s = .85, maxerr = .0001):
    [0,0,0,0,0,0,1],
    [0,0,0,0,0,1,1],
    [0,0,0,1,1,0,1]])

    print pageRank(G,s=.86)
  2. diogojc created this gist Nov 3, 2011.
    60 changes: 60 additions & 0 deletions pagerank.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,60 @@
    import numpy as np
    from scipy.sparse import csc_matrix

    def pageRank(G, s = .85, maxerr = .0001):
    """
    Computes the pagerank for each of the n states
    Parameters
    ----------
    G: matrix representing state transitions
    Gij is a binary value representing a transition from state i to j.
    s: probability of following a transition. 1-s probability of teleporting
    to another state.
    maxerr: if the sum of pageranks between iterations is bellow this we will
    have converged.
    """
    n = G.shape[0]

    # transform G into markov matrix A
    A = csc_matrix(G,dtype=np.float)
    rsums = np.array(A.sum(1))[:,0]
    ri, ci = A.nonzero()
    A.data /= rsums[ri]

    # bool array of sink states
    sink = rsums==0

    # Compute pagerank r until we converge
    ro, r = np.zeros(n), np.ones(n)
    while np.sum(np.abs(r-ro)) > maxerr:
    ro = r.copy()
    # calculate each pagerank at a time
    for i in xrange(0,n):
    # inlinks of state i
    Ai = np.array(A[:,i].todense())[:,0]
    # account for sink states
    Di = sink / float(n)
    # account for teleportation to state i
    Ei = np.ones(n) / float(n)

    r[i] = ro.dot( Ai*s + Di*s + Ei*(1-s) )

    # return normalized pagerank
    return r/float(sum(r))




    if __name__=='__main__':
    # Example extracted from 'Introduction to Information Retrieval'
    G = np.array([[0,0,1,0,0,0,0],
    [0,1,1,0,0,0,0],
    [1,0,1,1,0,0,0],
    [0,0,0,1,1,0,0],
    [0,0,0,0,0,0,1],
    [0,0,0,0,0,1,1],
    [0,0,0,1,1,0,1]])
    print pageRank(G,s=.86)