@@ -59,52 +59,62 @@ def SGD(self, training_data, epochs, mini_batch_size, eta,
59
59
training_data [k :k + mini_batch_size ]
60
60
for k in xrange (0 , n , mini_batch_size )]
61
61
for mini_batch in mini_batches :
62
- self .backprop (mini_batch , eta )
62
+ self .update_mini_batch (mini_batch , eta )
63
63
if test_data :
64
64
print "Epoch {}: {} / {}" .format (
65
65
j , self .evaluate (test_data ), n_test )
66
66
else :
67
67
print "Epoch %s complete" % j
68
68
69
- def backprop (self , training_data , eta ):
70
- """Update the network's weights and biases by applying a single
71
- iteration of gradient descent using backpropagation. The
72
- ``training_data`` is a list of tuples ``(x, y)``. It need not
73
- include the entire training data set --- it might be a
74
- mini-batch, or even a single training example. ``eta`` is the
75
- learning rate."""
69
+ def update_mini_batch (self , mini_batch , eta ):
70
+ """Update the network's weights and biases by applying a
71
+ gradient descent using backpropagation to a single mini batch.
72
+ The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta``
73
+ is the learning rate."""
76
74
nabla_b = [np .zeros (b .shape ) for b in self .biases ]
77
75
nabla_w = [np .zeros (w .shape ) for w in self .weights ]
78
- for x , y in training_data :
79
- # feedforward
80
- activation = x
81
- activations = [x ] # list to store all the activations, layer by layer
82
- zs = [] # list to store all the z vectors, layer by layer
83
- for b , w in zip (self .biases , self .weights ):
84
- z = np .dot (w , activation )+ b
85
- zs .append (z )
86
- activation = sigmoid_vec (z )
87
- activations .append (activation )
88
- # backward pass
89
- delta = self .cost_derivative (activations [- 1 ], y ) * \
90
- sigmoid_prime_vec (zs [- 1 ])
91
- nabla_b [- 1 ] += delta
92
- nabla_w [- 1 ] += np .dot (delta , activations [- 2 ].transpose ())
93
- # Note that the variable l in the loop below is used a
94
- # little differently to the notation in Chapter 2 of the book.
95
- # Here, l = 1 means the last layer of neurons, l = 2 is the
96
- # second-last layer, and so on. It's a renumbering of the
97
- # scheme used in the book, used here to take advantage of the
98
- # fact that Python can use negative indices in lists.
99
- for l in xrange (2 , self .num_layers ):
100
- z = zs [- l ]
101
- spv = sigmoid_prime_vec (z )
102
- delta = np .dot (self .weights [- l + 1 ].transpose (), delta ) * spv
103
- nabla_b [- l ] += delta
104
- nabla_w [- l ] += np .dot (delta , activations [- l - 1 ].transpose ())
76
+ for x , y in mini_batch :
77
+ delta_nabla_b , delta_nabla_w = self .backprop (x , y )
78
+ nabla_b = [nb + dnb for nb , dnb in zip (nabla_b , delta_nabla_b )]
79
+ nabla_w = [nw + dnw for nw , dnw in zip (nabla_w , delta_nabla_w )]
105
80
self .weights = [w - eta * nw for w , nw in zip (self .weights , nabla_w )]
106
81
self .biases = [b - eta * nb for b , nb in zip (self .biases , nabla_b )]
107
82
83
+ def backprop (self , x , y ):
84
+ """Return a tuple ``(nabla_b, nabla_w)`` representing the
85
+ gradient for the cost function C_x. ``nabla_b`` and
86
+ ``nabla_w`` are layer-by-layer lists of numpy arrays, similar
87
+ to ``self.biases`` and ``self.weights``."""
88
+ nabla_b = [np .zeros (b .shape ) for b in self .biases ]
89
+ nabla_w = [np .zeros (w .shape ) for w in self .weights ]
90
+ # feedforward
91
+ activation = x
92
+ activations = [x ] # list to store all the activations, layer by layer
93
+ zs = [] # list to store all the z vectors, layer by layer
94
+ for b , w in zip (self .biases , self .weights ):
95
+ z = np .dot (w , activation )+ b
96
+ zs .append (z )
97
+ activation = sigmoid_vec (z )
98
+ activations .append (activation )
99
+ # backward pass
100
+ delta = self .cost_derivative (activations [- 1 ], y ) * \
101
+ sigmoid_prime_vec (zs [- 1 ])
102
+ nabla_b [- 1 ] = delta
103
+ nabla_w [- 1 ] = np .dot (delta , activations [- 2 ].transpose ())
104
+ # Note that the variable l in the loop below is used a little
105
+ # differently to the notation in Chapter 2 of the book. Here,
106
+ # l = 1 means the last layer of neurons, l = 2 is the
107
+ # second-last layer, and so on. It's a renumbering of the
108
+ # scheme in the book, used here to take advantage of the fact
109
+ # that Python can use negative indices in lists.
110
+ for l in xrange (2 , self .num_layers ):
111
+ z = zs [- l ]
112
+ spv = sigmoid_prime_vec (z )
113
+ delta = np .dot (self .weights [- l + 1 ].transpose (), delta ) * spv
114
+ nabla_b [- l ] = delta
115
+ nabla_w [- l ] = np .dot (delta , activations [- l - 1 ].transpose ())
116
+ return (nabla_b , nabla_w )
117
+
108
118
def evaluate (self , test_data ):
109
119
"""Return the number of test inputs for which the neural
110
120
network outputs the correct result. Note that the neural
0 commit comments