danielvarga · June 9, 2017 22:22
diff --git a/lamblinsNearestNeighbors.py b/lamblinsNearestNeighbors.py
 # Implementing lamblin's workaround at https://github.com/Theano/Theano/issues/1399

 # BEFORE:
 # bestIndices = T.argmin(s, axis=0)
 #   Time in Function.fn.__call__: 3.870916e-02s (99.867%)
 # <% time> <sum %> <apply time> <time per call> <type> <#call> <#apply> <Class name>
 #   77.7%    77.7%       0.029s       2.93e-02s     C        1       1   theano.tensor.basic.MaxAndArgmax
 #    7.0%    84.7%       0.003s       2.64e-03s     C        1       1   theano.sandbox.cuda.basic_ops.HostFromGpu
 #    6.9%    91.6%       0.003s       1.30e-03s     C        2       2   theano.sandbox.cuda.basic_ops.GpuFromHost
 #    6.2%    97.8%       0.002s       2.35e-03s     C        1       1   theano.sandbox.cuda.blas.GpuDot22Scalar

 # AFTER:
 # bestIndices = T.cast( ( T.arange(n).dimshuffle(0, 'x') * T.cast(T.eq(s, s.min(axis=0, keepdims=True)), 'float32') ).sum(axis=0), 'int32')
 #   Time in 1 calls to Function.__call__: 1.155996e-02s
 # <% time> <sum %> <apply time> <time per call> <type> <#call> <#apply> <Class name>
 #   39.9%    39.9%       0.004s       1.06e-03s     C        4       4   theano.sandbox.cuda.basic_ops.GpuCAReduce
 #   23.3%    63.2%       0.002s       1.24e-03s     C        2       2   theano.sandbox.cuda.basic_ops.GpuFromHost
 #   21.8%    85.0%       0.002s       2.32e-03s     C        1       1   theano.sandbox.cuda.blas.GpuDot22Scalar
 #   14.6%    99.6%       0.002s       3.88e-04s     C        4       4   theano.sandbox.cuda.basic_ops.GpuElemwise

 import numpy as np
 import theano
 import theano.tensor as T


 def randomMatrix(n, f):
    return np.random.normal(size=n*f).astype(np.float32).reshape((n, f))

 n = 5000 # number of candidates
 m = 1000 # number of targets
 f = 500  # number of features

 x = T.matrix('x') # candidates
 y = T.matrix('y') # targets

 xL2S = T.sum(x*x, axis=-1) # [n]
 yL2S = T.sum(y*y, axis=-1) # [m]
 xL2SM = T.zeros((m, n)) + xL2S # broadcasting, [m, n]
 yL2SM = T.zeros((n, m)) + yL2S # # broadcasting, [n, m]
 squaredPairwiseDistances = xL2SM.T + yL2SM - 2.0*T.dot(x, y.T) # [n, m]

 np.random.seed(1)

 N = randomMatrix(n, f)
 M = randomMatrix(m, f)

 lamblinsTrick = True

 if lamblinsTrick:
    s = squaredPairwiseDistances
    bestIndices = T.cast( ( T.arange(n).dimshuffle(0, 'x') * T.cast(T.eq(s, s.min(axis=0, keepdims=True)), 'float32') ).sum(axis=0), 'int32')
 else:
    bestIndices = T.argmin(squaredPairwiseDistances, axis=0)

 nearests_fn = theano.function([x, y], bestIndices, profile=True)

 print nearests_fn(N, M).sum()
diff --git a/nearestNeighbors.py b/nearestNeighbors.py
 import numpy as np
 import theano
 import theano.tensor as T


 def randomMatrix(n, f):
    return np.random.normal(size=n*f).astype(np.float32).reshape((n, f))

 n = 5000 # number of candidates
 m = 1000 # number of targets
 f = 500  # number of features

 x = T.matrix('x') # candidates
 y = T.matrix('y') # targets

 xL2S = T.sum(x*x, axis=-1) # [n]
 yL2S = T.sum(y*y, axis=-1) # [m]
 xL2SM = T.zeros((m, n)) + xL2S # broadcasting, [m, n]
 yL2SM = T.zeros((n, m)) + yL2S # # broadcasting, [n, m]
 squaredPairwiseDistances = xL2SM.T + yL2SM - 2.0*T.dot(x, y.T) # [n, m]
 bestIndices = T.argmin(squaredPairwiseDistances, axis=0)

 nearests_fn = theano.function([x, y], bestIndices, profile=True)

 print nearests_fn(randomMatrix(n, f), randomMatrix(m, f)).shape
	# Implementing lamblin's workaround at https://github.com/Theano/Theano/issues/1399

	# BEFORE:
	# bestIndices = T.argmin(s, axis=0)
	# Time in Function.fn.__call__: 3.870916e-02s (99.867%)
	# <% time> <sum %> <apply time> <time per call> <type> <#call> <#apply> <Class name>
	# 77.7% 77.7% 0.029s 2.93e-02s C 1 1 theano.tensor.basic.MaxAndArgmax
	# 7.0% 84.7% 0.003s 2.64e-03s C 1 1 theano.sandbox.cuda.basic_ops.HostFromGpu
	# 6.9% 91.6% 0.003s 1.30e-03s C 2 2 theano.sandbox.cuda.basic_ops.GpuFromHost
	# 6.2% 97.8% 0.002s 2.35e-03s C 1 1 theano.sandbox.cuda.blas.GpuDot22Scalar

	# AFTER:
	# bestIndices = T.cast( ( T.arange(n).dimshuffle(0, 'x') * T.cast(T.eq(s, s.min(axis=0, keepdims=True)), 'float32') ).sum(axis=0), 'int32')
	# Time in 1 calls to Function.__call__: 1.155996e-02s
	# <% time> <sum %> <apply time> <time per call> <type> <#call> <#apply> <Class name>
	# 39.9% 39.9% 0.004s 1.06e-03s C 4 4 theano.sandbox.cuda.basic_ops.GpuCAReduce
	# 23.3% 63.2% 0.002s 1.24e-03s C 2 2 theano.sandbox.cuda.basic_ops.GpuFromHost
	# 21.8% 85.0% 0.002s 2.32e-03s C 1 1 theano.sandbox.cuda.blas.GpuDot22Scalar
	# 14.6% 99.6% 0.002s 3.88e-04s C 4 4 theano.sandbox.cuda.basic_ops.GpuElemwise

	import numpy as np
	import theano
	import theano.tensor as T


	def randomMatrix(n, f):
	return np.random.normal(size=n*f).astype(np.float32).reshape((n, f))

	n = 5000 # number of candidates
	m = 1000 # number of targets
	f = 500 # number of features

	x = T.matrix('x') # candidates
	y = T.matrix('y') # targets

	xL2S = T.sum(x*x, axis=-1) # [n]
	yL2S = T.sum(y*y, axis=-1) # [m]
	xL2SM = T.zeros((m, n)) + xL2S # broadcasting, [m, n]
	yL2SM = T.zeros((n, m)) + yL2S # # broadcasting, [n, m]
	squaredPairwiseDistances = xL2SM.T + yL2SM - 2.0*T.dot(x, y.T) # [n, m]

	np.random.seed(1)

	N = randomMatrix(n, f)
	M = randomMatrix(m, f)

	lamblinsTrick = True

	if lamblinsTrick:
	s = squaredPairwiseDistances
	bestIndices = T.cast( ( T.arange(n).dimshuffle(0, 'x') * T.cast(T.eq(s, s.min(axis=0, keepdims=True)), 'float32') ).sum(axis=0), 'int32')
	else:
	bestIndices = T.argmin(squaredPairwiseDistances, axis=0)

	nearests_fn = theano.function([x, y], bestIndices, profile=True)

	print nearests_fn(N, M).sum()
No results found