Rationalized the data format in mnist_loader, and minor improvements and simplifications for network_basic

mnielsen · mnielsen · commit 0edd84e9af4f · 2013-07-19T09:41:36.000-04:00
diff --git a/code/mnist_loader.py b/code/mnist_loader.py
@@ -3,10 +3,9 @@
 ~~~~~~~~~~~~
 
 A library to load the MNIST image data.  For details of the data
-structures that are returned, see the doc string for ``load_data``.
-The library also contains a helper method ``load_data_nn`` which
-returns the data in a format well adapted for use with our neural
-network code.
+structures that are returned, see the doc strings for ``load_data``
+and ``load_data_wrapper``.  In practice, ``load_data_wrapper`` is the
+function usually called by our neural network code.
 
 Note that the code requires the file ``../data/mnist.pkl``.  This is not
 included in the repository.  It may be downloaded from:
@@ -40,46 +39,49 @@ def load_data():
     The ``validation_data`` and ``test_data`` are similar, except
     each contains only 10,000 images.
 
-    Note that the format the data is returned in is well adapted for
-    use by scikit-learn's SVM method, but not so well adapted for our
-    neural network code.  For that, see the wrapper function
-    ``load_data_nn``.
+    This is a nice and convenient data format, but for use in neural
+    networks it's actually helpful to modify the format of the
+    ``training_data`` a little.  That's done in the wrapper function
+    ``load_data_wrapper()``, see below.
     """
     f = open('../data/mnist.pkl', 'rb')
     training_data, validation_data, test_data = cPickle.load(f)
     f.close()
     return (training_data, validation_data, test_data)
 
-def load_data_nn():
-    """Return a tuple containing ``(training_data, test_inputs,
-    actual_test_results)`` from the MNIST data.  The tuples are in a
-    format optimized for use by our neural network code.  This
-    function makesuse of ``load_data()``, but does some additional
-    processing to put the data in the right format.  
-
-    ``training_data`` is a list containing 50,000 2-tuples ``(x, y)``.
-    ``x`` is a 784-dimensional numpy.ndarray containing the input
-    image.  ``y`` is a 10-dimensional numpy.ndarray representing the
-    unit vector corresponding to the correct digit for ``x``.
-
-    ``test_inputs`` is a list containing 10,000 x 784-dimensional
-    numpy.ndarray objects, representing test images.
-
-    ``actual_test_results`` is a list containing the 10,000 digit
-    values (integers) corresponding to the ``test_inputs``. 
-
-    Obviously, we're using slightly different formats for the training
-    and test data.  These formats turn out to be the most convenient
-    for use in our neural network code."""
-    training_data, validation_data, test_data = load_data()
-    inputs = [np.reshape(x, (784, 1)) for x in training_data[0]]
-    results = [vectorized_result(y) for y in training_data[1]]
-    training_data = zip(inputs, results)
-    test_inputs = [np.reshape(x, (784, 1)) for x in test_data[0]]
-    return (training_data, test_inputs, test_data[1])
+def load_data_wrapper():
+    """Return a tuple containing ``(training_data, validation_data,
+    test_data)``. Based on ``load_data``, but the format is a little more
+    convenient for use in neural networks.
+
+    In particular, ``training_data`` is a list containing 50,000
+    2-tuples ``(x, y)``.  ``x`` is a 784-dimensional numpy.ndarray
+    containing the input image.  ``y`` is a 10-dimensional
+    numpy.ndarray representing the unit vector corresponding to the
+    correct digit for ``x``.
+
+    ``validation_data`` and ``test_data`` are lists containing 10,000
+    2-tuples ``(x, y)``.  In each case, ``x`` is a 784-dimensional
+    numpy.ndarry containing the input image, and ``y`` is the
+    corresponding classification, i.e., the digit values (integers)
+    corresponding to ``x``.
+
+    Obviously, this means we're using slightly different formats for
+    the training data and the validation / test data.  These formats
+    turn out to be the most convenient for use in our neural network
+    code."""
+    tr_d, va_d, te_d = load_data()
+    training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
+    training_results = [vectorized_result(y) for y in tr_d[1]]
+    training_data = zip(training_inputs, training_results)
+    validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
+    validation_data = zip(validation_inputs, va_d[1])
+    test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
+    test_data = zip(test_inputs, te_d[1])
+    return (training_data, validation_data, test_data)
 
 def vectorized_result(j):
-    """ Return a 10-dimensional unit vector with a 1.0 in the jth
+    """Return a 10-dimensional unit vector with a 1.0 in the jth
     position and zeroes elsewhere.  This is used to convert a digit
     (0...9) into a corresponding desired output from the neural
     network."""
diff --git a/code/network_basic.py b/code/network_basic.py
@@ -42,7 +42,7 @@ def feedforward(self, a):
         return a
 
     def SGD(self, training_data, epochs, mini_batch_size, eta,
-            lmbda, test=False, test_data=None)
+            lmbda, test=False, test_data=None):
         """Train the neural network using mini-batch stochastic
         gradient descent.  The ``training_data`` is a list of tuples
         ``(x, y)`` representing the training inputs and the desired
@@ -64,16 +64,16 @@ def SGD(self, training_data, epochs, mini_batch_size, eta,
                 self.backprop(mini_batch, n, eta, lmbda)
             if test:
                 print "Epoch {}: {} / {}".format(
-                    j, self.evaluate(test_inputs, actual_test_results), n_test)
+                    j, self.evaluate(test_data), n_test)
             else:
                 print "Epoch %s complete" % j
 
-    def backprop(self, training_data, T, eta, lmbda):
+    def backprop(self, training_data, n, eta, lmbda):
         """Update the network's weights and biases by applying a
         single iteration of gradient descent using backpropagation.
         The ``training_data`` is a list of tuples ``(x, y)``.  It need
         not include the entire training data set --- it might be a
-        mini-batch, or even a single training example.  ``T`` is the
+        mini-batch, or even a single training example.  ``n`` is the
         size of the total training set (which may not be the same as
         the size of ``training_data``).  The other parameters are
         self-explanatory."""
@@ -108,19 +108,18 @@ def backprop(self, training_data, T, eta, lmbda):
                 nabla_b[-l] += delta
                 nabla_w[-l] += np.dot(delta, activations[-l-1].transpose())
         # Add the regularization terms to the gradient for the weights
-        nabla_w = [nw+(lmbda*B/T)*w for nw, w in zip(nabla_w, self.weights)]
+        nabla_w = [nw+(lmbda*B/n)*w for nw, w in zip(nabla_w, self.weights)]
         self.weights = [w-eta*nw for w, nw in zip(self.weights, nabla_w)]
         self.biases = [b-eta*nb for b, nb in zip(self.biases, nabla_b)]
 
-    def evaluate(self, test_inputs, actual_test_results):
-        """Return the number of ``test_inputs`` for which the neural
-        network outputs the correct result, i.e., the same result as
-        given in ``actual_test_results``.  Note that the neural
+    def evaluate(self, test_data):
+        """Return the number of test inputs for which the neural
+        network outputs the correct result. Note that the neural
         network's output is assumed to be the index of whichever
         neuron in the final layer has the highest activation."""
-        test_results = [np.argmax(self.feedforward(x)) for x in test_inputs]
+        test_results = [np.argmax(self.feedforward(x)) for x in test_data[0]]
         return sum(int(x == y) 
-                   for x, y in zip(test_results, actual_test_results))
+                   for x, y in zip(test_results, test_data[1]))
         
     def cost(self, x, y):
         """Return the quadratic cost associated to the network, with
@@ -135,23 +134,10 @@ def cost_derivative(self, output_activations, y):
         between the output activations and the desired output, ``y``."""
         return (output_activations-y) 
 
-    def evaluate_training_results(self, training_data):
-        """Return the number of elements of the ``training_data`` that
-        are correctly classified."""
-        training_results = [np.argmax(self.feedforward(x[0])) for x in 
-                            training_data]
-        actual_training_results = [np.argmax(x[1]) for x in training_data]
-        return sum(int(x == y) 
-                   for x, y in zip(training_results, actual_training_results))
-
 #### Miscellaneous functions
 def sigmoid(z):
-    """The sigmoid function.  Note that it checks to see whether ``z``
-    is very negative, to avoid overflow errors in the exponential
-    function.  No corresponding test of ``z`` being very positive is
-    necessary --- ordinary Python arithmetic deals just fine with that
-    case."""
-    return 0.0 if z < -700 else 1.0/(1.0+np.exp(-z))
+    """The sigmoid function."""
+    return 1.0/(1.0+np.exp(-z))
 
 sigmoid_vec = np.vectorize(sigmoid)