AcOfSJ · September 18, 2017 01:28 · Dec 2, 2015 · Dec 2, 2015 · Jul 10, 2015 · Jul 10, 2015
diff --git a/check_grad.m b/check_grad.m
@@ -3,8 +3,13 @@ function check_grad(f, x0, varargin)
 % INPUT
 %  f  - a function handle of f(x) that returns function values and gradients given parameter x 
 %  x0 - the location near which the gradient will be evaluted. 
-%
+
 % For a correct gradiet, the displayed ratio should be near 1.0  
+%
+% to check why the code works there is a useful link:
+%    http://ufldl.stanford.edu/tutorial/supervised/DebuggingGradientChecking/
+%
+% Jiayu, Dec 2, 2015
 
 delta = rand(size(x0));
 delta = delta ./ norm(delta);

diff --git a/check_grad_example.m → check_grad_example_matrix.m b/check_grad_example.m → check_grad_example_matrix.m
@@ -1,7 +1,5 @@
-function check_grad_example(feature_dim, dic_size, sample_size)
-% an example of check_grad on the dictionary learning (a special version where
-% an additional scala is learned (just an example, the scala is not 
-% necessary in this setting):
+function check_grad_example_matrix(feature_dim, dic_size, sample_size)
+% an example of check_grad on the dictionary learning:
 %          min_{alpha, X} || R - alpha * D * X ||_F^2
 %
 % by Jiayu Zhou. July 9, 2015.

diff --git a/check_grad_example_vector.m b/check_grad_example_vector.m
@@ -0,0 +1,34 @@
+function check_grad_example_vector(feature_dim, sample_size)
+% an example of check_grad on the Lasso 
+%          min_{x} || A * x - y ||_F^2
+%
+% by Jiayu Zhou. Dec 2, 2015.
+
+if nargin < 1, feature_dim = 500; end
+if nargin < 3, sample_size = 30; end
+
+A  = randn(sample_size, feature_dim);
+y  = randn(sample_size, 1);
+x0 = rand(feature_dim, 1);
+
+% closure on the constant variables.
+test_func = @(x) dic_obj(x, A, y);
+
+% perform testing.
+check_grad(test_func, x0)
+
+
+function [f, g] = dic_obj(x, A, y)
+% The function value and gradient of the following objective
+%     min_{x} || A * x - y ||_F^2
+% where
+% INPUT
+%   [X(:); alpha]
+% OUTPUT
+% given the search point, variable_vect
+%   f - function value
+%   g - the vectorized gradient
+
+Axy = (A * x - y);
+g = A' * Axy;
+f = 0.5 * sum(Axy.^2);
diff --git a/check_grad_example.m b/check_grad_example.m
@@ -0,0 +1,55 @@
+function check_grad_example(feature_dim, dic_size, sample_size)
+% an example of check_grad on the dictionary learning (a special version where
+% an additional scala is learned (just an example, the scala is not 
+% necessary in this setting):
+%          min_{alpha, X} || R - alpha * D * X ||_F^2
+%
+% by Jiayu Zhou. July 9, 2015.
+
+if nargin < 1, feature_dim = 50; end
+if nargin < 2, dic_size    = 20; end
+if nargin < 3, sample_size = 30; end
+
+Rdata = randn(feature_dim, sample_size);
+Dic   = randn(feature_dim, dic_size);
+vect0 = rand(dic_size * sample_size + 1, 1)
+
+% closure on the constant variables.
+test_func = @(x) dic_obj(x, Dic, Rdata)
+
+% perform testing.
+check_grad(test_func, vect0)
+
+
+function [f, g] = dic_obj(variable_vect, D, R)
+% The function value and gradient of the following objective
+%     min_{alpha, X} || R - alpha * D * X ||_F^2
+% where
+% INPUT
+%   [X(:); alpha]
+% OUTPUT
+% given the search point, variable_vect
+%   f - function value
+%   g - the vectorized gradient
+
+% the size of the features and dictionary
+dic_size    = size(D, 2);
+sample_size = size(R, 2);
+
+% reshape variables
+a = variable_vect(end);
+X = reshape(variable_vect(1:end-1), [ dic_size, sample_size] );
+
+aDX  = a * D * X;
+RaDX = R - aDX;
+
+% compute the objective
+f = sum(sum((RaDX).^2));
+
+% compute gradients
+grad_X = - (2 * a) * D' * RaDX;
+grad_a = - 2 * sum(sum((RaDX' * D)' .* X));
+%grad_a = - 2 * trace((RaDX' * D) * X);  % less efficient but readable version
+
+% the vectorized gradient
+g = [grad_X(:); grad_a];
diff --git a/check_grad.m b/check_grad.m
@@ -3,6 +3,8 @@ function check_grad(f, x0, varargin)
 % INPUT
 %  f  - a function handle of f(x) that returns function values and gradients given parameter x 
 %  x0 - the location near which the gradient will be evaluted. 
+%
+% For a correct gradiet, the displayed ratio should be near 1.0  
 
 delta = rand(size(x0));
 delta = delta ./ norm(delta);

diff --git a/check_grad.m b/check_grad.m
@@ -0,0 +1,19 @@
+function check_grad(f, x0, varargin)
+% a simple function that checks the correctness of gradient. 
+% INPUT
+%  f  - a function handle of f(x) that returns function values and gradients given parameter x 
+%  x0 - the location near which the gradient will be evaluted. 
+
+delta = rand(size(x0));
+delta = delta ./ norm(delta);
+epsilon = 10.^[-7:-1];
+
+[f0, df0] = feval(f, x0, varargin{:});
+
+for i = 1:length(epsilon)
+    [f_left] = feval(f, x0-epsilon(i)*delta, varargin{:});
+    [f_right] = feval(f, x0+epsilon(i)*delta, varargin{:});
+    ys(i) = (f_right - f_left) / 2;
+    ys_hat(i) = df0' * epsilon(i)*delta;    
+    fprintf('epsilon: %d , gradient: %d \n', epsilon(i), ys(i) / ys_hat(i));
+end