chyikwei · August 29, 2015 14:07
diff --git a/original b/original
 File: lda.py
 Function: _dirichlet_expectation at line 24
 Total time: 8.96912 s

 Line #      Hits         Time  Per Hit   % Time  Line Contents
 ==============================================================
    24                                           @profile
    25                                           def _dirichlet_expectation(alpha):
    26                                               """
    27                                               For a vector theta ~ Dir(alpha), computes E[log(theta)] given alpha.
    28                                               """
    29    379947       411076      1.1      4.6      if (len(alpha.shape) == 1):
    30    379940      8545062     22.5     95.3          return(psi(alpha) - psi(np.sum(alpha)))
    31         7        12980   1854.3      0.1      return(psi(alpha) - psi(np.sum(alpha, 1))[:, np.newaxis])

 File: lda.py
 Function: _update_gamma at line 33
 Total time: 37.1273 s

 Line #      Hits         Time  Per Hit   % Time  Line Contents
 ==============================================================
    33                                           @profile
    34                                           def _update_gamma(X, expElogbeta, alpha, rng, max_iters,
    35                                                             meanchangethresh, cal_delta):
    36                                               """
    37                                               E-step: update latent variable gamma
    38                                               """
    39
    40         2            8      4.0      0.0      n_docs, n_vocabs = X.shape
    41         2            4      2.0      0.0      n_topics = expElogbeta.shape[0]
    42
    43                                               # gamma is non-normailzed topic distribution
    44         2         5032   2516.0      0.0      gamma = rng.gamma(100., 1. / 100., (n_docs, n_topics))
    45         2         5883   2941.5      0.0      expElogtheta = np.exp(_dirichlet_expectation(gamma))
    46                                               # diff on component (only calculate it when keep_comp_change is True)
    47         2           70     35.0      0.0      delta_component = np.zeros(expElogbeta.shape) if cal_delta else None
    48
    49         2            3      1.5      0.0      X_data = X.data
    50         2            2      1.0      0.0      X_indices = X.indices
    51         2            2      1.0      0.0      X_indptr = X.indptr
    52
    53      8002        12721      1.6      0.0      for d in xrange(n_docs):
    54      8000        25173      3.1      0.1          ids = X_indices[X_indptr[d]:X_indptr[d + 1]]
    55      8000        19870      2.5      0.1          cnts = X_data[X_indptr[d]:X_indptr[d + 1]]
    56      8000        30900      3.9      0.1          gammad = gamma[d, :]
    57      8000        26641      3.3      0.1          expElogthetad = expElogtheta[d, :]
    58      8000       104626     13.1      0.3          expElogbetad = expElogbeta[:, ids]
    59                                                   # The optimal phi_{dwk} is proportional to
    60                                                   # expElogthetad_k * expElogbetad_w. phinorm is the normalizer.
    61      8000        79777     10.0      0.2          phinorm = np.dot(expElogthetad, expElogbetad) + 1e-100
    62
    63                                                   # Iterate between gamma and phi until convergence
    64    381325       467124      1.2      1.3          for it in xrange(0, max_iters):
    65    379940       565605      1.5      1.5              lastgamma = gammad
    66                                                       # We represent phi implicitly to save memory and time.
    67                                                       # Substituting the value of the optimal phi back into
    68                                                       # the update for gamma gives this update. Cf. Lee&Seung 2001.
    69    379940       428819      1.1      1.2              gammad = alpha + expElogthetad * \
    70    379940      5605904     14.8     15.1                  np.dot(cnts / phinorm, expElogbetad.T)
    71    379940     12712990     33.5     34.2              expElogthetad = np.exp(_dirichlet_expectation(gammad))
    72    379940      3375137      8.9      9.1              phinorm = np.dot(expElogthetad, expElogbetad) + 1e-100
    73
    74    379940     12524287     33.0     33.7              meanchange = np.mean(abs(gammad - lastgamma))
    75    379940       620657      1.6      1.7              if (meanchange < meanchangethresh):
    76      6615         8065      1.2      0.0                  break
    77      8000        50140      6.3      0.1          gamma[d, :] = gammad
    78                                                   # Contribution of document d to the expected sufficient
    79                                                   # statistics for the M step.
    80      8000         9904      1.2      0.0          if cal_delta:
    81      8000       447906     56.0      1.2              delta_component[:, ids] += np.outer(expElogthetad, cnts / phinorm)
    82
    83         2            3      1.5      0.0      return (gamma, delta_component)
	File: lda.py
	Function: _dirichlet_expectation at line 24
	Total time: 8.96912 s

	Line # Hits Time Per Hit % Time Line Contents
	==============================================================
	24 @profile
	25 def _dirichlet_expectation(alpha):
	26 """
	27 For a vector theta ~ Dir(alpha), computes E[log(theta)] given alpha.
	28 """
	29 379947 411076 1.1 4.6 if (len(alpha.shape) == 1):
	30 379940 8545062 22.5 95.3 return(psi(alpha) - psi(np.sum(alpha)))
	31 7 12980 1854.3 0.1 return(psi(alpha) - psi(np.sum(alpha, 1))[:, np.newaxis])

	File: lda.py
	Function: _update_gamma at line 33
	Total time: 37.1273 s

	Line # Hits Time Per Hit % Time Line Contents
	==============================================================
	33 @profile
	34 def _update_gamma(X, expElogbeta, alpha, rng, max_iters,
	35 meanchangethresh, cal_delta):
	36 """
	37 E-step: update latent variable gamma
	38 """
	39
	40 2 8 4.0 0.0 n_docs, n_vocabs = X.shape
	41 2 4 2.0 0.0 n_topics = expElogbeta.shape[0]
	42
	43 # gamma is non-normailzed topic distribution
	44 2 5032 2516.0 0.0 gamma = rng.gamma(100., 1. / 100., (n_docs, n_topics))
	45 2 5883 2941.5 0.0 expElogtheta = np.exp(_dirichlet_expectation(gamma))
	46 # diff on component (only calculate it when keep_comp_change is True)
	47 2 70 35.0 0.0 delta_component = np.zeros(expElogbeta.shape) if cal_delta else None
	48
	49 2 3 1.5 0.0 X_data = X.data
	50 2 2 1.0 0.0 X_indices = X.indices
	51 2 2 1.0 0.0 X_indptr = X.indptr
	52
	53 8002 12721 1.6 0.0 for d in xrange(n_docs):
	54 8000 25173 3.1 0.1 ids = X_indices[X_indptr[d]:X_indptr[d + 1]]
	55 8000 19870 2.5 0.1 cnts = X_data[X_indptr[d]:X_indptr[d + 1]]
	56 8000 30900 3.9 0.1 gammad = gamma[d, :]
	57 8000 26641 3.3 0.1 expElogthetad = expElogtheta[d, :]
	58 8000 104626 13.1 0.3 expElogbetad = expElogbeta[:, ids]
	59 # The optimal phi_{dwk} is proportional to
	60 # expElogthetad_k * expElogbetad_w. phinorm is the normalizer.
	61 8000 79777 10.0 0.2 phinorm = np.dot(expElogthetad, expElogbetad) + 1e-100
	62
	63 # Iterate between gamma and phi until convergence
	64 381325 467124 1.2 1.3 for it in xrange(0, max_iters):
	65 379940 565605 1.5 1.5 lastgamma = gammad
	66 # We represent phi implicitly to save memory and time.
	67 # Substituting the value of the optimal phi back into
	68 # the update for gamma gives this update. Cf. Lee&Seung 2001.
	69 379940 428819 1.1 1.2 gammad = alpha + expElogthetad * \
	70 379940 5605904 14.8 15.1 np.dot(cnts / phinorm, expElogbetad.T)
	71 379940 12712990 33.5 34.2 expElogthetad = np.exp(_dirichlet_expectation(gammad))
	72 379940 3375137 8.9 9.1 phinorm = np.dot(expElogthetad, expElogbetad) + 1e-100
	73
	74 379940 12524287 33.0 33.7 meanchange = np.mean(abs(gammad - lastgamma))
	75 379940 620657 1.6 1.7 if (meanchange < meanchangethresh):
	76 6615 8065 1.2 0.0 break
	77 8000 50140 6.3 0.1 gamma[d, :] = gammad
	78 # Contribution of document d to the expected sufficient
	79 # statistics for the M step.
	80 8000 9904 1.2 0.0 if cal_delta:
	81 8000 447906 56.0 1.2 delta_component[:, ids] += np.outer(expElogthetad, cnts / phinorm)
	82
	83 2 3 1.5 0.0 return (gamma, delta_component)
No results found