Skip to content

Commit 0edd84e

Browse files
committed
Rationalized the data format in mnist_loader, and minor improvements and simplifications for network_basic
1 parent 372a869 commit 0edd84e

File tree

2 files changed

+50
-62
lines changed

2 files changed

+50
-62
lines changed

code/mnist_loader.py

Lines changed: 38 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,9 @@
33
~~~~~~~~~~~~
44
55
A library to load the MNIST image data. For details of the data
6-
structures that are returned, see the doc string for ``load_data``.
7-
The library also contains a helper method ``load_data_nn`` which
8-
returns the data in a format well adapted for use with our neural
9-
network code.
6+
structures that are returned, see the doc strings for ``load_data``
7+
and ``load_data_wrapper``. In practice, ``load_data_wrapper`` is the
8+
function usually called by our neural network code.
109
1110
Note that the code requires the file ``../data/mnist.pkl``. This is not
1211
included in the repository. It may be downloaded from:
@@ -40,46 +39,49 @@ def load_data():
4039
The ``validation_data`` and ``test_data`` are similar, except
4140
each contains only 10,000 images.
4241
43-
Note that the format the data is returned in is well adapted for
44-
use by scikit-learn's SVM method, but not so well adapted for our
45-
neural network code. For that, see the wrapper function
46-
``load_data_nn``.
42+
This is a nice and convenient data format, but for use in neural
43+
networks it's actually helpful to modify the format of the
44+
``training_data`` a little. That's done in the wrapper function
45+
``load_data_wrapper()``, see below.
4746
"""
4847
f = open('../data/mnist.pkl', 'rb')
4948
training_data, validation_data, test_data = cPickle.load(f)
5049
f.close()
5150
return (training_data, validation_data, test_data)
5251

53-
def load_data_nn():
54-
"""Return a tuple containing ``(training_data, test_inputs,
55-
actual_test_results)`` from the MNIST data. The tuples are in a
56-
format optimized for use by our neural network code. This
57-
function makesuse of ``load_data()``, but does some additional
58-
processing to put the data in the right format.
59-
60-
``training_data`` is a list containing 50,000 2-tuples ``(x, y)``.
61-
``x`` is a 784-dimensional numpy.ndarray containing the input
62-
image. ``y`` is a 10-dimensional numpy.ndarray representing the
63-
unit vector corresponding to the correct digit for ``x``.
64-
65-
``test_inputs`` is a list containing 10,000 x 784-dimensional
66-
numpy.ndarray objects, representing test images.
67-
68-
``actual_test_results`` is a list containing the 10,000 digit
69-
values (integers) corresponding to the ``test_inputs``.
70-
71-
Obviously, we're using slightly different formats for the training
72-
and test data. These formats turn out to be the most convenient
73-
for use in our neural network code."""
74-
training_data, validation_data, test_data = load_data()
75-
inputs = [np.reshape(x, (784, 1)) for x in training_data[0]]
76-
results = [vectorized_result(y) for y in training_data[1]]
77-
training_data = zip(inputs, results)
78-
test_inputs = [np.reshape(x, (784, 1)) for x in test_data[0]]
79-
return (training_data, test_inputs, test_data[1])
52+
def load_data_wrapper():
53+
"""Return a tuple containing ``(training_data, validation_data,
54+
test_data)``. Based on ``load_data``, but the format is a little more
55+
convenient for use in neural networks.
56+
57+
In particular, ``training_data`` is a list containing 50,000
58+
2-tuples ``(x, y)``. ``x`` is a 784-dimensional numpy.ndarray
59+
containing the input image. ``y`` is a 10-dimensional
60+
numpy.ndarray representing the unit vector corresponding to the
61+
correct digit for ``x``.
62+
63+
``validation_data`` and ``test_data`` are lists containing 10,000
64+
2-tuples ``(x, y)``. In each case, ``x`` is a 784-dimensional
65+
numpy.ndarry containing the input image, and ``y`` is the
66+
corresponding classification, i.e., the digit values (integers)
67+
corresponding to ``x``.
68+
69+
Obviously, this means we're using slightly different formats for
70+
the training data and the validation / test data. These formats
71+
turn out to be the most convenient for use in our neural network
72+
code."""
73+
tr_d, va_d, te_d = load_data()
74+
training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
75+
training_results = [vectorized_result(y) for y in tr_d[1]]
76+
training_data = zip(training_inputs, training_results)
77+
validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
78+
validation_data = zip(validation_inputs, va_d[1])
79+
test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
80+
test_data = zip(test_inputs, te_d[1])
81+
return (training_data, validation_data, test_data)
8082

8183
def vectorized_result(j):
82-
""" Return a 10-dimensional unit vector with a 1.0 in the jth
84+
"""Return a 10-dimensional unit vector with a 1.0 in the jth
8385
position and zeroes elsewhere. This is used to convert a digit
8486
(0...9) into a corresponding desired output from the neural
8587
network."""

code/network_basic.py

Lines changed: 12 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def feedforward(self, a):
4242
return a
4343

4444
def SGD(self, training_data, epochs, mini_batch_size, eta,
45-
lmbda, test=False, test_data=None)
45+
lmbda, test=False, test_data=None):
4646
"""Train the neural network using mini-batch stochastic
4747
gradient descent. The ``training_data`` is a list of tuples
4848
``(x, y)`` representing the training inputs and the desired
@@ -64,16 +64,16 @@ def SGD(self, training_data, epochs, mini_batch_size, eta,
6464
self.backprop(mini_batch, n, eta, lmbda)
6565
if test:
6666
print "Epoch {}: {} / {}".format(
67-
j, self.evaluate(test_inputs, actual_test_results), n_test)
67+
j, self.evaluate(test_data), n_test)
6868
else:
6969
print "Epoch %s complete" % j
7070

71-
def backprop(self, training_data, T, eta, lmbda):
71+
def backprop(self, training_data, n, eta, lmbda):
7272
"""Update the network's weights and biases by applying a
7373
single iteration of gradient descent using backpropagation.
7474
The ``training_data`` is a list of tuples ``(x, y)``. It need
7575
not include the entire training data set --- it might be a
76-
mini-batch, or even a single training example. ``T`` is the
76+
mini-batch, or even a single training example. ``n`` is the
7777
size of the total training set (which may not be the same as
7878
the size of ``training_data``). The other parameters are
7979
self-explanatory."""
@@ -108,19 +108,18 @@ def backprop(self, training_data, T, eta, lmbda):
108108
nabla_b[-l] += delta
109109
nabla_w[-l] += np.dot(delta, activations[-l-1].transpose())
110110
# Add the regularization terms to the gradient for the weights
111-
nabla_w = [nw+(lmbda*B/T)*w for nw, w in zip(nabla_w, self.weights)]
111+
nabla_w = [nw+(lmbda*B/n)*w for nw, w in zip(nabla_w, self.weights)]
112112
self.weights = [w-eta*nw for w, nw in zip(self.weights, nabla_w)]
113113
self.biases = [b-eta*nb for b, nb in zip(self.biases, nabla_b)]
114114

115-
def evaluate(self, test_inputs, actual_test_results):
116-
"""Return the number of ``test_inputs`` for which the neural
117-
network outputs the correct result, i.e., the same result as
118-
given in ``actual_test_results``. Note that the neural
115+
def evaluate(self, test_data):
116+
"""Return the number of test inputs for which the neural
117+
network outputs the correct result. Note that the neural
119118
network's output is assumed to be the index of whichever
120119
neuron in the final layer has the highest activation."""
121-
test_results = [np.argmax(self.feedforward(x)) for x in test_inputs]
120+
test_results = [np.argmax(self.feedforward(x)) for x in test_data[0]]
122121
return sum(int(x == y)
123-
for x, y in zip(test_results, actual_test_results))
122+
for x, y in zip(test_results, test_data[1]))
124123

125124
def cost(self, x, y):
126125
"""Return the quadratic cost associated to the network, with
@@ -135,23 +134,10 @@ def cost_derivative(self, output_activations, y):
135134
between the output activations and the desired output, ``y``."""
136135
return (output_activations-y)
137136

138-
def evaluate_training_results(self, training_data):
139-
"""Return the number of elements of the ``training_data`` that
140-
are correctly classified."""
141-
training_results = [np.argmax(self.feedforward(x[0])) for x in
142-
training_data]
143-
actual_training_results = [np.argmax(x[1]) for x in training_data]
144-
return sum(int(x == y)
145-
for x, y in zip(training_results, actual_training_results))
146-
147137
#### Miscellaneous functions
148138
def sigmoid(z):
149-
"""The sigmoid function. Note that it checks to see whether ``z``
150-
is very negative, to avoid overflow errors in the exponential
151-
function. No corresponding test of ``z`` being very positive is
152-
necessary --- ordinary Python arithmetic deals just fine with that
153-
case."""
154-
return 0.0 if z < -700 else 1.0/(1.0+np.exp(-z))
139+
"""The sigmoid function."""
140+
return 1.0/(1.0+np.exp(-z))
155141

156142
sigmoid_vec = np.vectorize(sigmoid)
157143

0 commit comments

Comments
 (0)