Skip to content

Commit 7122ef0

Browse files
author
Michael Lou
committed
Compute Numerical Gradient and Gradient checking
1 parent d542a89 commit 7122ef0

File tree

3 files changed

+111
-3
lines changed

3 files changed

+111
-3
lines changed

check_numerical_gradient.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import numpy as np
2+
3+
from compute_numerical_gradient import compute_numerical_gradient
4+
5+
6+
def simple_quadratic_function(x):
7+
# this function accepts a 2D vector as input.
8+
# Its outputs are:
9+
# value: h(x1, x2) = x1^2 + 3*x1*x2
10+
# grad: A 2x1 vector that gives the partial derivatives of h with respect to x1 and x2
11+
# Note that when we pass simple_quadratic_function(x) to check_numerical_gradient, we're assuming
12+
# that compute_numerical_gradient will use only the first returned value of this function.
13+
value = x[0]**2 + 3*x[0]*x[1]
14+
15+
grad = np.empty_like(x)
16+
grad[0] = 2*x[0] + 3*x[1]
17+
grad[1] = 3*x[0]
18+
19+
return value, grad
20+
21+
22+
def check_numerical_gradient():
23+
# This code can be used to check your numerical gradient implementation
24+
# in computeNumericalGradient.m
25+
# It analytically evaluates the gradient of a very simple function called
26+
# simpleQuadraticFunction (see below) and compares the result with your numerical
27+
# solution. Your numerical gradient implementation is incorrect if
28+
# your numerical solution deviates too much from the analytical solution.
29+
30+
# Evaluate the function and gradient at x = [4, 10] (Here, x is a 2d vector.)
31+
x = np.array([4.0, 10.0])
32+
value, grad = simple_quadratic_function(x)
33+
34+
# Use your code to numerically compute the gradient of simple_quadratic_function at x.
35+
# (The notation "simple_quadratic_function" denotes a pointer to a function.)
36+
numgrad = compute_numerical_gradient(simple_quadratic_function, x)
37+
38+
# Visually examine the two gradient computations. The two columns
39+
# you get should be very similar.
40+
print numgrad, grad
41+
print 'The above two columns you get should be very similar.\n' \
42+
'(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n'
43+
44+
# Evaluate the norm of the difference between two solutions.
45+
# If you have a correct implementation, and assuming you used EPSILON = 0.0001
46+
# in computeNumericalGradient.m, then diff below should be 2.1452e-12
47+
diff = np.linalg.norm(numgrad-grad)/np.linalg.norm(numgrad+grad)
48+
print diff
49+
print 'Norm of the difference between numerical and analytical gradient (should be < 1e-9)\n\n'

compute_numerical_gradient.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import numpy as np
2+
3+
4+
def compute_numerical_gradient(func, theta):
5+
# theta: a vector of parameters
6+
# func: a function that outputs a real-number. Calling y = J(theta) will return the
7+
# function value at theta.
8+
9+
# Initialize numgrad with zeros
10+
numgrad = np.empty_like(theta)
11+
12+
# Instructions:
13+
# Implement numerical gradient checking, and return the result in numgrad.
14+
# (See Section 2.3 of the lecture notes.)
15+
# You should write code so that numgrad(i) is (the numerical approximation to) the
16+
# partial derivative of func with respect to the i-th input argument, evaluated at theta.
17+
# I.e., numgrad(i) should be the (approximately) the partial derivative of func with
18+
# respect to theta(i).
19+
#
20+
# Hint: You will probably want to compute the elements of numgrad one at a time.
21+
EPSILON = 1e-4
22+
for i in xrange(theta.size):
23+
theta_i = theta[i]
24+
theta[i] = theta_i+EPSILON
25+
val_plus = func(theta)[0]
26+
theta[i] = theta_i-EPSILON
27+
val_minus = func(theta)[0]
28+
numgrad[i] = (val_plus-val_minus)/(EPSILON*2)
29+
# recover theta
30+
theta[i] = theta_i
31+
32+
return numgrad

train.py

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
from sample_images import sample_images
77
from display_network import display_network
88
from sparse_autoencoder_cost import sparse_autoencoder_cost
9+
from check_numerical_gradient import check_numerical_gradient
10+
from compute_numerical_gradient import compute_numerical_gradient
911

1012

1113
def initialize_parameters(hidden_size, visible_size):
@@ -43,8 +45,8 @@ def train():
4345
# display a random sample of 200 patches from the dataset
4446

4547
patches = sample_images()
46-
list = [randint(0, patches.shape[0]-1) for i in xrange(64)]
47-
display_network(patches[list, :], 8)
48+
# list = [randint(0, patches.shape[0]-1) for i in xrange(64)]
49+
# display_network(patches[list, :], 8)
4850

4951
# Obtain random parameters theta
5052
theta = initialize_parameters(hidden_size, visible_size)
@@ -77,7 +79,32 @@ def train():
7779

7880
cost, grad = sparse_autoencoder_cost(theta, visible_size, hidden_size, decay_lambda, sparsity_param, beta, patches)
7981

80-
82+
## STEP 3: Gradient Checking
83+
#
84+
# Hint: If you are debugging your code, performing gradient checking on smaller models
85+
# and smaller training sets (e.g., using only 10 training examples and 1-2 hidden
86+
# units) may speed things up.
87+
88+
# First, lets make sure your numerical gradient computation is correct for a
89+
# simple function. After you have implemented compute_numerical_gradient,
90+
# run the following:
91+
check_numerical_gradient()
92+
93+
# Now we can use it to check your cost function and derivative calculations
94+
# for the sparse autoencoder.
95+
func = lambda x: sparse_autoencoder_cost(x, visible_size, hidden_size,
96+
decay_lambda, sparsity_param, beta, patches)
97+
numgrad = compute_numerical_gradient(func, theta)
98+
99+
# Use this to visually compare the gradients side by side
100+
print numgrad, grad
101+
102+
# Compare numerically computed gradients with the ones obtained from backpropagation
103+
diff = np.linalg.norm(numgrad-grad)/np.linalg.norm(numgrad+grad)
104+
# Should be small. In our implementation, these values are usually less than 1e-9.
105+
print diff
106+
107+
# When you got this working, Congratulations!!!
81108

82109

83110
if __name__ == "__main__":

0 commit comments

Comments
 (0)