Skip to content

Commit 73a3f68

Browse files
committed
Separating backprop and SGD more cleanly
1 parent 4834742 commit 73a3f68

File tree

1 file changed

+45
-35
lines changed

1 file changed

+45
-35
lines changed

code/network_basic.py

Lines changed: 45 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -59,52 +59,62 @@ def SGD(self, training_data, epochs, mini_batch_size, eta,
5959
training_data[k:k+mini_batch_size]
6060
for k in xrange(0, n, mini_batch_size)]
6161
for mini_batch in mini_batches:
62-
self.backprop(mini_batch, eta)
62+
self.update_mini_batch(mini_batch, eta)
6363
if test_data:
6464
print "Epoch {}: {} / {}".format(
6565
j, self.evaluate(test_data), n_test)
6666
else:
6767
print "Epoch %s complete" % j
6868

69-
def backprop(self, training_data, eta):
70-
"""Update the network's weights and biases by applying a single
71-
iteration of gradient descent using backpropagation. The
72-
``training_data`` is a list of tuples ``(x, y)``. It need not
73-
include the entire training data set --- it might be a
74-
mini-batch, or even a single training example. ``eta`` is the
75-
learning rate."""
69+
def update_mini_batch(self, mini_batch, eta):
70+
"""Update the network's weights and biases by applying a
71+
gradient descent using backpropagation to a single mini batch.
72+
The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta``
73+
is the learning rate."""
7674
nabla_b = [np.zeros(b.shape) for b in self.biases]
7775
nabla_w = [np.zeros(w.shape) for w in self.weights]
78-
for x, y in training_data:
79-
# feedforward
80-
activation = x
81-
activations = [x] # list to store all the activations, layer by layer
82-
zs = [] # list to store all the z vectors, layer by layer
83-
for b, w in zip(self.biases, self.weights):
84-
z = np.dot(w, activation)+b
85-
zs.append(z)
86-
activation = sigmoid_vec(z)
87-
activations.append(activation)
88-
# backward pass
89-
delta = self.cost_derivative(activations[-1], y) * \
90-
sigmoid_prime_vec(zs[-1])
91-
nabla_b[-1] += delta
92-
nabla_w[-1] += np.dot(delta, activations[-2].transpose())
93-
# Note that the variable l in the loop below is used a
94-
# little differently to the notation in Chapter 2 of the book.
95-
# Here, l = 1 means the last layer of neurons, l = 2 is the
96-
# second-last layer, and so on. It's a renumbering of the
97-
# scheme used in the book, used here to take advantage of the
98-
# fact that Python can use negative indices in lists.
99-
for l in xrange(2, self.num_layers):
100-
z = zs[-l]
101-
spv = sigmoid_prime_vec(z)
102-
delta = np.dot(self.weights[-l+1].transpose(), delta) * spv
103-
nabla_b[-l] += delta
104-
nabla_w[-l] += np.dot(delta, activations[-l-1].transpose())
76+
for x, y in mini_batch:
77+
delta_nabla_b, delta_nabla_w = self.backprop(x, y)
78+
nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
79+
nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
10580
self.weights = [w-eta*nw for w, nw in zip(self.weights, nabla_w)]
10681
self.biases = [b-eta*nb for b, nb in zip(self.biases, nabla_b)]
10782

83+
def backprop(self, x, y):
84+
"""Return a tuple ``(nabla_b, nabla_w)`` representing the
85+
gradient for the cost function C_x. ``nabla_b`` and
86+
``nabla_w`` are layer-by-layer lists of numpy arrays, similar
87+
to ``self.biases`` and ``self.weights``."""
88+
nabla_b = [np.zeros(b.shape) for b in self.biases]
89+
nabla_w = [np.zeros(w.shape) for w in self.weights]
90+
# feedforward
91+
activation = x
92+
activations = [x] # list to store all the activations, layer by layer
93+
zs = [] # list to store all the z vectors, layer by layer
94+
for b, w in zip(self.biases, self.weights):
95+
z = np.dot(w, activation)+b
96+
zs.append(z)
97+
activation = sigmoid_vec(z)
98+
activations.append(activation)
99+
# backward pass
100+
delta = self.cost_derivative(activations[-1], y) * \
101+
sigmoid_prime_vec(zs[-1])
102+
nabla_b[-1] = delta
103+
nabla_w[-1] = np.dot(delta, activations[-2].transpose())
104+
# Note that the variable l in the loop below is used a little
105+
# differently to the notation in Chapter 2 of the book. Here,
106+
# l = 1 means the last layer of neurons, l = 2 is the
107+
# second-last layer, and so on. It's a renumbering of the
108+
# scheme in the book, used here to take advantage of the fact
109+
# that Python can use negative indices in lists.
110+
for l in xrange(2, self.num_layers):
111+
z = zs[-l]
112+
spv = sigmoid_prime_vec(z)
113+
delta = np.dot(self.weights[-l+1].transpose(), delta) * spv
114+
nabla_b[-l] = delta
115+
nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
116+
return (nabla_b, nabla_w)
117+
108118
def evaluate(self, test_data):
109119
"""Return the number of test inputs for which the neural
110120
network outputs the correct result. Note that the neural

0 commit comments

Comments
 (0)