Skip to content

Commit 372a869

Browse files
committed
Adding network_basic.py
1 parent bcfcf75 commit 372a869

File tree

1 file changed

+162
-0
lines changed

1 file changed

+162
-0
lines changed

code/network_basic.py

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
"""
2+
network_basic
3+
~~~~~~~~~~~~~~
4+
5+
A module to implement the stochastic gradient descent learning
6+
algorithm for a feedforward neural network. Gradients are calculated
7+
using backpropagation. Note that I have focused on making the code
8+
simple, easily readable, and easily modifiable. It is not optimized,
9+
and omits many desirable features.
10+
"""
11+
12+
#### Libraries
13+
# Standard library
14+
import random
15+
16+
# Third-party libraries
17+
import numpy as np
18+
19+
class Network():
20+
21+
def __init__(self, sizes):
22+
"""The list ``sizes`` contains the number of neurons in the
23+
respective layers of the network. For example, if the list
24+
was [2, 3, 1] then it would be a three-layer network, with the
25+
first layer containing 2 neurons, the second layer 3 neurons,
26+
and the third layer 1 neuron. The biases and weights for the
27+
network are initialized randomly, using a Gaussian
28+
distribution with mean 0, and variance 1. Note that the first
29+
layer is assumed to be an input layer, and by convention we
30+
won't set any biases for those neurons, since biases are only
31+
ever used in computing the outputs from later layers."""
32+
self.num_layers = len(sizes)
33+
self.sizes = sizes
34+
self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
35+
self.weights = [np.random.randn(y, x)
36+
for x, y in zip(sizes[:-1], sizes[1:])]
37+
38+
def feedforward(self, a):
39+
"Return the output of the network if ``a`` is input."
40+
for b, w in zip(self.biases, self.weights):
41+
a = sigmoid_vec(np.dot(w, a)+b)
42+
return a
43+
44+
def SGD(self, training_data, epochs, mini_batch_size, eta,
45+
lmbda, test=False, test_data=None)
46+
"""Train the neural network using mini-batch stochastic
47+
gradient descent. The ``training_data`` is a list of tuples
48+
``(x, y)`` representing the training inputs and the desired
49+
outputs. The other non-optional parameters are
50+
self-explanatory. Set ``test`` to ``True`` to evaluate the
51+
network against the test data after each epoch, and to print
52+
out partial progress. This is useful for tracking progress,
53+
but slows things down substantially. If ``test`` is set, then
54+
appropriate ``test_data`` must be supplied.
55+
"""
56+
if test: n_test = len(test_inputs)
57+
n = len(training_data)
58+
for j in xrange(epochs):
59+
random.shuffle(training_data)
60+
mini_batches = [
61+
training_data[k:k+mini_batch_size]
62+
for k in xrange(0, len(training_data), mini_batch_size)]
63+
for mini_batch in mini_batches:
64+
self.backprop(mini_batch, n, eta, lmbda)
65+
if test:
66+
print "Epoch {}: {} / {}".format(
67+
j, self.evaluate(test_inputs, actual_test_results), n_test)
68+
else:
69+
print "Epoch %s complete" % j
70+
71+
def backprop(self, training_data, T, eta, lmbda):
72+
"""Update the network's weights and biases by applying a
73+
single iteration of gradient descent using backpropagation.
74+
The ``training_data`` is a list of tuples ``(x, y)``. It need
75+
not include the entire training data set --- it might be a
76+
mini-batch, or even a single training example. ``T`` is the
77+
size of the total training set (which may not be the same as
78+
the size of ``training_data``). The other parameters are
79+
self-explanatory."""
80+
nabla_b = [np.zeros(b.shape) for b in self.biases]
81+
nabla_w = [np.zeros(w.shape) for w in self.weights]
82+
B = len(training_data)
83+
for x, y in training_data:
84+
# feedforward
85+
activation = x
86+
activations = [x] # list to store all the activations
87+
zs = [] # list to store all the z vectors
88+
for b, w in zip(self.biases, self.weights):
89+
z = np.dot(w, activation)+b
90+
zs.append(z)
91+
activation = sigmoid_vec(z)
92+
activations.append(activation)
93+
# backward pass
94+
delta = self.cost_derivative(activations[-1], y) * \
95+
sigmoid_prime_vec(zs[-1])
96+
nabla_b[-1] += delta
97+
nabla_w[-1] += np.dot(delta, activations[-2].transpose())
98+
# Note that the variable l in the loop below is used a
99+
# little differently to the book. Here, l = 1 means the
100+
# last layer of neurons, l = 2 is the second-last layer,
101+
# and so on. It's a renumbering of the scheme used in the
102+
# book, used to take advantage of the fact that Python can
103+
# use negative indices in lists.
104+
for l in xrange(2, self.num_layers):
105+
z = zs[-l]
106+
spv = sigmoid_prime_vec(z)
107+
delta = np.dot(self.weights[-l+1].transpose(), delta) * spv
108+
nabla_b[-l] += delta
109+
nabla_w[-l] += np.dot(delta, activations[-l-1].transpose())
110+
# Add the regularization terms to the gradient for the weights
111+
nabla_w = [nw+(lmbda*B/T)*w for nw, w in zip(nabla_w, self.weights)]
112+
self.weights = [w-eta*nw for w, nw in zip(self.weights, nabla_w)]
113+
self.biases = [b-eta*nb for b, nb in zip(self.biases, nabla_b)]
114+
115+
def evaluate(self, test_inputs, actual_test_results):
116+
"""Return the number of ``test_inputs`` for which the neural
117+
network outputs the correct result, i.e., the same result as
118+
given in ``actual_test_results``. Note that the neural
119+
network's output is assumed to be the index of whichever
120+
neuron in the final layer has the highest activation."""
121+
test_results = [np.argmax(self.feedforward(x)) for x in test_inputs]
122+
return sum(int(x == y)
123+
for x, y in zip(test_results, actual_test_results))
124+
125+
def cost(self, x, y):
126+
"""Return the quadratic cost associated to the network, with
127+
input ``x`` and desired output ``y``. Note that there is no
128+
regularization."""
129+
return np.sum((self.feedforward(x)-y)**2)/2.0
130+
131+
def cost_derivative(self, output_activations, y):
132+
"""Return the vector of partial derivatives \partial C_x /
133+
\partial a for the output activations, ``a``. For the
134+
unregularized quadratic cost this is just the difference
135+
between the output activations and the desired output, ``y``."""
136+
return (output_activations-y)
137+
138+
def evaluate_training_results(self, training_data):
139+
"""Return the number of elements of the ``training_data`` that
140+
are correctly classified."""
141+
training_results = [np.argmax(self.feedforward(x[0])) for x in
142+
training_data]
143+
actual_training_results = [np.argmax(x[1]) for x in training_data]
144+
return sum(int(x == y)
145+
for x, y in zip(training_results, actual_training_results))
146+
147+
#### Miscellaneous functions
148+
def sigmoid(z):
149+
"""The sigmoid function. Note that it checks to see whether ``z``
150+
is very negative, to avoid overflow errors in the exponential
151+
function. No corresponding test of ``z`` being very positive is
152+
necessary --- ordinary Python arithmetic deals just fine with that
153+
case."""
154+
return 0.0 if z < -700 else 1.0/(1.0+np.exp(-z))
155+
156+
sigmoid_vec = np.vectorize(sigmoid)
157+
158+
def sigmoid_prime(z):
159+
"""Derivative of the sigmoid function."""
160+
return sigmoid(z)*(1-sigmoid(z))
161+
162+
sigmoid_prime_vec = np.vectorize(sigmoid_prime)

0 commit comments

Comments
 (0)