dofile('./nn/rop.lua') -- dofile('./rop_0.lua') --include('SUMCriterion.lua') local input_size = 3 local target_size = 4 local hidden_size = 2 --local mini_batch_size = 11 local input = torch.randn(input_size) local target = torch.rand(target_size) --local target = 2 local eps = 1e-6 ------------------------------------------------------------------------------- -------------------------- 1 -------------------------------------- ------------------------------------------------------------------------------- local model = nn.Sequential() model:add(nn.Linear(input_size, target_size)) -- model:add(nn.Tanh()) -- model:add(nn.Linear(hidden_size, target_size)) -- model:add(nn.Sigmoid()) -- model:add(nn.Linear(target_size, target_size)) -- model:add(nn.Sigmoid()) model:add(nn.LogSoftMax()) local criterion = nn.MSECriterion() --local criterion = SUMCriterion() --local criterion = nn.ClassNLLCriterion() local parameters, gradParameters, rParameters, rGradParameters = model:getParameters() parameters:randn(parameters:size()) rParameters:randn(parameters:size()) --rParameters:fill(0) --rParameters[1] = 1 local parameters_backup = parameters:clone() local rParameters_backup = rParameters:clone() local pred = model:forward(input) local obj = criterion:forward(pred, target) local df_do = criterion:backward(pred, target) --df_do:mul(mini_batch_size*target_size/2) --local df_do = torch.ones(mini_batch_size,target_size ) model:backward(input, df_do) local r_pred = model:rForward(input) -- input, rInput, gradOutput, rGradOutput -- rInput is always 0, gradOutput comes from criterion.backWard, rGradOutput comes from criterion.rBackward local rGradOutput = criterion:rBackward(r_pred, target) --local rGradOutput = torch.zeros( mini_batch_size, target_size) model:rBackward(input, torch.zeros(input_size), df_do, rGradOutput) -- compute Hv g1 = gradParameters:clone() Hv1 = rGradParameters:clone() ------------------------------------------------------------------------------- -------------------------- 2 ------------------------------------ ------------------------------------------------------------------------------- local model2 = nn.Sequential() model2:add(nn.Linear(input_size, target_size)) -- model2:add(nn.Tanh()) -- model2:add(nn.Linear(hidden_size, target_size)) -- model2:add(nn.Sigmoid()) -- model2:add(nn.Linear(target_size, target_size)) -- model2:add(nn.Sigmoid()) model2:add(nn.LogSoftMax()) local criterion2 = nn.MSECriterion() --local criterion2 = SUMCriterion() --local criterion2 = nn.ClassNLLCriterion() local parameters2, gradParameters2, rParameters2, rGradParameters2 = model2:getParameters() parameters2:mul(rParameters_backup, eps) parameters2:add(parameters_backup) rParameters2:copy(rParameters_backup) --rParameters2 = rParameters_backup:clone() local pred2 = model2:forward(input) local obj2 = criterion2:forward(pred2, target) local df_do2 = criterion2:backward(pred2, target) --df_do2:mul(mini_batch_size*target_size/2) --local df_do2 = torch.ones(mini_batch_size,target_size ) model2:backward(input, df_do2) local r_pred2 = model2:rForward(input) local rGradOutput2 = criterion2:rBackward(r_pred2, target) --local rGradOutput2 = torch.zeros( mini_batch_size, target_size) model2:rBackward(input, torch.zeros(input_size), df_do2, rGradOutput2) g2 = gradParameters2:clone() Hv2 = rGradParameters2:clone() --Hv2 = torch.Tensor(rGradParameters2:size()) --Hv2:copy(rGradParameters2) Hv_estimate = g2:add(-g1):div(eps) --Hv_estimate:div(mini_batch_size) print("diff: " .. torch.add(Hv_estimate, -Hv1):norm()) diff = torch.add(Hv_estimate, -Hv1):norm() --print("Hv1 norm: " .. Hv1:norm()) --print("Hv2 norm: " .. Hv2:norm()) print(torch.add(parameters,-parameters2):norm()) print(torch.add(gradParameters,-gradParameters2):norm()) print(torch.add(rParameters,-rParameters2):norm()) print(torch.add(rGradParameters,-rGradParameters2):norm()) assert(diff < 1e-4,'r-op check failed')