-- Max Jaderberg 4/9/13 -- GPU Effectiveness test require 'torch' require 'sys' require 'nn' require 'xlua' cmd = torch.CmdLine() cmd:text() cmd:text('GPU Benchmark. Max Jaderberg.') cmd:text() cmd:text('Options:') cmd:option('-size', 24, 'size of images') cmd:option('-N', 1000, 'number of images') cmd:option('-threads', 2, 'number of threads') cmd:text() opt = cmd:parse(arg or {}) torch.setnumthreads(opt.threads) torch.setdefaulttensortype("torch.FloatTensor") -- Input size sz = opt.size finalsize = (sz - 4)/2 --for a 5x5 filter Ntest = opt.N -- Test input x = torch.rand(Ntest, 3, sz, sz) -- Simple network model = nn.Sequential() model:add(nn.SpatialConvolution(3, 64, 5, 5)) model:add(nn.Tanh()) model:add(nn.SpatialLPPooling(64, 2, 2, 2, 2, 2)) model:add(nn.Reshape(64*finalsize*finalsize)) model:add(nn.Linear(64*finalsize*finalsize, 128)) model:add(nn.Tanh()) model:add(nn.Linear(128, 2)) model:add(nn.Tanh()) criterion = nn.MSECriterion() -- Test ouput y = torch.rand(Ntest, 2) -- Test routine local runtest = function() print('==> Type is '..x:type()) for i = 1,x:size(1) do xlua.progress(i, x:size(1)) local yp = model:forward(x[i]) local err = criterion:forward(yp, y[i]) local df_do = criterion:backward(yp, y[i]) model:backward(x[i], df_do) end end -- CPU TEST cputime0 = sys.clock() runtest() cputime1 = sys.clock() cputime = cputime1 - cputime0 print('CPU Time: '.. (cputime*1000) .. 'ms') -- GPU TEST require 'cunn' x = x:cuda() y = y:cuda() model:cuda() criterion:cuda() gputime0 = sys.clock() runtest() gputime1 = sys.clock() gputime = gputime1 - gputime0 print('GPU Time: '.. (gputime*1000) .. 'ms') print('------------------') print('GPU speedup: '..cputime/gputime..'x')