Skip to content

Instantly share code, notes, and snippets.

@cswhjiang
Created October 9, 2015 22:30
Show Gist options
  • Save cswhjiang/a57e51d53fb45d850cc2 to your computer and use it in GitHub Desktop.
Save cswhjiang/a57e51d53fb45d850cc2 to your computer and use it in GitHub Desktop.

Revisions

  1. cswhjiang revised this gist Oct 9, 2015. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions check_logsoftmax.lua
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,5 @@
    --dofile('./nn/rop.lua')
    dofile('./rop_0.lua')
    dofile('./nn/rop.lua')
    -- dofile('./rop_0.lua')
    --include('SUMCriterion.lua')

    local input_size = 3
  2. cswhjiang created this gist Oct 9, 2015.
    113 changes: 113 additions & 0 deletions check_logsoftmax.lua
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,113 @@
    --dofile('./nn/rop.lua')
    dofile('./rop_0.lua')
    --include('SUMCriterion.lua')

    local input_size = 3
    local target_size = 4
    local hidden_size = 2
    --local mini_batch_size = 11

    local input = torch.randn(input_size)
    local target = torch.rand(target_size)
    --local target = 2

    local eps = 1e-6

    -------------------------------------------------------------------------------
    -------------------------- 1 --------------------------------------
    -------------------------------------------------------------------------------
    local model = nn.Sequential()
    model:add(nn.Linear(input_size, target_size))
    -- model:add(nn.Tanh())
    -- model:add(nn.Linear(hidden_size, target_size))
    -- model:add(nn.Sigmoid())
    -- model:add(nn.Linear(target_size, target_size))
    -- model:add(nn.Sigmoid())
    model:add(nn.LogSoftMax())
    local criterion = nn.MSECriterion()
    --local criterion = SUMCriterion()
    --local criterion = nn.ClassNLLCriterion()

    local parameters, gradParameters, rParameters, rGradParameters = model:getParameters()

    parameters:randn(parameters:size())
    rParameters:randn(parameters:size())
    --rParameters:fill(0)
    --rParameters[1] = 1
    local parameters_backup = parameters:clone()
    local rParameters_backup = rParameters:clone()

    local pred = model:forward(input)
    local obj = criterion:forward(pred, target)

    local df_do = criterion:backward(pred, target)
    --df_do:mul(mini_batch_size*target_size/2)
    --local df_do = torch.ones(mini_batch_size,target_size )
    model:backward(input, df_do)

    local r_pred = model:rForward(input)
    -- input, rInput, gradOutput, rGradOutput
    -- rInput is always 0, gradOutput comes from criterion.backWard, rGradOutput comes from criterion.rBackward

    local rGradOutput = criterion:rBackward(r_pred, target)
    --local rGradOutput = torch.zeros( mini_batch_size, target_size)
    model:rBackward(input, torch.zeros(input_size), df_do, rGradOutput) -- compute Hv

    g1 = gradParameters:clone()
    Hv1 = rGradParameters:clone()

    -------------------------------------------------------------------------------
    -------------------------- 2 ------------------------------------
    -------------------------------------------------------------------------------
    local model2 = nn.Sequential()
    model2:add(nn.Linear(input_size, target_size))
    -- model2:add(nn.Tanh())
    -- model2:add(nn.Linear(hidden_size, target_size))
    -- model2:add(nn.Sigmoid())
    -- model2:add(nn.Linear(target_size, target_size))
    -- model2:add(nn.Sigmoid())
    model2:add(nn.LogSoftMax())
    local criterion2 = nn.MSECriterion()
    --local criterion2 = SUMCriterion()
    --local criterion2 = nn.ClassNLLCriterion()
    local parameters2, gradParameters2, rParameters2, rGradParameters2 = model2:getParameters()

    parameters2:mul(rParameters_backup, eps)
    parameters2:add(parameters_backup)
    rParameters2:copy(rParameters_backup)
    --rParameters2 = rParameters_backup:clone()

    local pred2 = model2:forward(input)
    local obj2 = criterion2:forward(pred2, target)

    local df_do2 = criterion2:backward(pred2, target)
    --df_do2:mul(mini_batch_size*target_size/2)
    --local df_do2 = torch.ones(mini_batch_size,target_size )
    model2:backward(input, df_do2)


    local r_pred2 = model2:rForward(input)
    local rGradOutput2 = criterion2:rBackward(r_pred2, target)
    --local rGradOutput2 = torch.zeros( mini_batch_size, target_size)
    model2:rBackward(input, torch.zeros(input_size), df_do2, rGradOutput2)

    g2 = gradParameters2:clone()
    Hv2 = rGradParameters2:clone()
    --Hv2 = torch.Tensor(rGradParameters2:size())
    --Hv2:copy(rGradParameters2)

    Hv_estimate = g2:add(-g1):div(eps)
    --Hv_estimate:div(mini_batch_size)
    print("diff: " .. torch.add(Hv_estimate, -Hv1):norm())
    diff = torch.add(Hv_estimate, -Hv1):norm()

    --print("Hv1 norm: " .. Hv1:norm())
    --print("Hv2 norm: " .. Hv2:norm())


    print(torch.add(parameters,-parameters2):norm())
    print(torch.add(gradParameters,-gradParameters2):norm())
    print(torch.add(rParameters,-rParameters2):norm())
    print(torch.add(rGradParameters,-rGradParameters2):norm())

    assert(diff < 1e-4,'r-op check failed')