Skip to content

Commit 454cb2c

Browse files
committed
Adding dropout
1 parent 2ae7c75 commit 454cb2c

File tree

2 files changed

+284
-58
lines changed

2 files changed

+284
-58
lines changed

src/conv.py

Lines changed: 192 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,30 +9,47 @@
99
1010
"""
1111

12+
from collections import Counter
13+
14+
import matplotlib
15+
matplotlib.use('Agg')
16+
import matplotlib.pyplot as plt
17+
import numpy as np
18+
import theano
19+
import theano.tensor as T
20+
1221
import network3
1322
from network3 import sigmoid, tanh, ReLU, Network
1423
from network3 import ConvPoolLayer, FullyConnectedLayer, SoftmaxLayer
24+
1525
training_data, validation_data, test_data = network3.load_data_shared()
1626
mini_batch_size = 10
1727

18-
def shallow():
19-
for j in range(3):
28+
def shallow(n=3, epochs=60):
29+
nets = []
30+
for j in range(n):
2031
print "A shallow net with 100 hidden neurons"
2132
net = Network([
2233
FullyConnectedLayer(n_in=784, n_out=100),
2334
SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
24-
net.SGD(training_data, 60, mini_batch_size, 0.1, validation_data, test_data)
35+
net.SGD(
36+
training_data, epochs, mini_batch_size, 0.1,
37+
validation_data, test_data)
38+
nets.append(net)
39+
return nets
2540

26-
def basic_conv():
27-
for j in range(3):
41+
def basic_conv(n=3, epochs=60):
42+
for j in range(n):
2843
print "Conv + FC architecture"
2944
net = Network([
3045
ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
3146
filter_shape=(20, 1, 5, 5),
3247
poolsize=(2, 2)),
3348
FullyConnectedLayer(n_in=20*12*12, n_out=100),
3449
SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
35-
net.SGD(training_data, 60, mini_batch_size, 0.1, validation_data, test_data)
50+
net.SGD(
51+
training_data, epochs, mini_batch_size, 0.1, validation_data, test_data)
52+
return net
3653

3754
def omit_FC():
3855
for j in range(3):
@@ -43,6 +60,7 @@ def omit_FC():
4360
poolsize=(2, 2)),
4461
SoftmaxLayer(n_in=20*12*12, n_out=10)], mini_batch_size)
4562
net.SGD(training_data, 60, mini_batch_size, 0.1, validation_data, test_data)
63+
return net
4664

4765
def dbl_conv(activation_fn=sigmoid):
4866
for j in range(3):
@@ -59,8 +77,14 @@ def dbl_conv(activation_fn=sigmoid):
5977
FullyConnectedLayer(
6078
n_in=40*4*4, n_out=100, activation_fn=activation_fn),
6179
SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
62-
net.SGD(training_data, 60, mini_batch_size, 0.1, validation_data, test_data)
80+
net.SGD(training_data, 60, mini_batch_size, 0.1, validation_data, test_data)
81+
return net
6382

83+
# The following experiment was eventually omitted from the chapter,
84+
# but I've left it in here, since it's an important negative result:
85+
# basic l2 regularization didn't help much. The reason (I believe) is
86+
# that using convolutional-pooling layers is already a pretty strong
87+
# regularizer.
6488
def regularized_dbl_conv():
6589
for lmbda in [0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0]:
6690
for j in range(3):
@@ -96,11 +120,15 @@ def dbl_conv_relu():
96120
#### Some subsequent functions may make use of the expanded MNIST
97121
#### data. That can be generated by running expand_mnist.py.
98122

99-
def expanded_data():
123+
def expanded_data(n=100):
124+
"""n is the number of neurons in the fully-connected layer. We'll try
125+
n=100, 300, and 1000.
126+
127+
"""
100128
expanded_training_data, _, _ = network3.load_data_shared(
101129
"../data/mnist_expanded.pkl.gz")
102130
for j in range(3):
103-
print "Training with expanded data, run num %s" % j
131+
print "Training with expanded data, %s neurons in the FC layer, run num %s" % (n, j)
104132
net = Network([
105133
ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
106134
filter_shape=(20, 1, 5, 5),
@@ -110,8 +138,160 @@ def expanded_data():
110138
filter_shape=(40, 20, 5, 5),
111139
poolsize=(2, 2),
112140
activation_fn=ReLU),
113-
FullyConnectedLayer(n_in=40*4*4, n_out=100, activation_fn=ReLU),
114-
SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
115-
net.SGD(expanded_training_data, 20, mini_batch_size, 0.03,
141+
FullyConnectedLayer(n_in=40*4*4, n_out=n, activation_fn=ReLU),
142+
SoftmaxLayer(n_in=n, n_out=10)], mini_batch_size)
143+
net.SGD(expanded_training_data, 60, mini_batch_size, 0.03,
144+
validation_data, test_data, lmbda=0.1)
145+
return net
146+
147+
def expanded_data_double_fc(n=100):
148+
"""n is the number of neurons in both fully-connected layers. We'll
149+
try n=100, 300, and 1000.
150+
151+
"""
152+
expanded_training_data, _, _ = network3.load_data_shared(
153+
"../data/mnist_expanded.pkl.gz")
154+
for j in range(3):
155+
print "Training with expanded data, %s neurons in two FC layers, run num %s" % (n, j)
156+
net = Network([
157+
ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
158+
filter_shape=(20, 1, 5, 5),
159+
poolsize=(2, 2),
160+
activation_fn=ReLU),
161+
ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12),
162+
filter_shape=(40, 20, 5, 5),
163+
poolsize=(2, 2),
164+
activation_fn=ReLU),
165+
FullyConnectedLayer(n_in=40*4*4, n_out=n, activation_fn=ReLU),
166+
FullyConnectedLayer(n_in=n, n_out=n, activation_fn=ReLU),
167+
SoftmaxLayer(n_in=n, n_out=10)], mini_batch_size)
168+
net.SGD(expanded_training_data, 60, mini_batch_size, 0.03,
116169
validation_data, test_data, lmbda=0.1)
170+
171+
def double_fc_dropout(p0, p1, p2, repetitions):
172+
expanded_training_data, _, _ = network3.load_data_shared(
173+
"../data/mnist_expanded.pkl.gz")
174+
nets = []
175+
for j in range(repetitions):
176+
print "\n\nTraining using a dropout network with parameters ",p0,p1,p2
177+
print "Training with expanded data, run num %s" % j
178+
net = Network([
179+
ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
180+
filter_shape=(20, 1, 5, 5),
181+
poolsize=(2, 2),
182+
activation_fn=ReLU),
183+
ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12),
184+
filter_shape=(40, 20, 5, 5),
185+
poolsize=(2, 2),
186+
activation_fn=ReLU),
187+
FullyConnectedLayer(
188+
n_in=40*4*4, n_out=1000, activation_fn=ReLU, p_dropout=p0),
189+
FullyConnectedLayer(
190+
n_in=1000, n_out=1000, activation_fn=ReLU, p_dropout=p1),
191+
SoftmaxLayer(n_in=1000, n_out=10, p_dropout=p2)], mini_batch_size)
192+
net.SGD(expanded_training_data, 40, mini_batch_size, 0.03,
193+
validation_data, test_data)
194+
nets.append(net)
195+
return nets
196+
197+
def ensemble(nets):
198+
"""Takes as input a list of nets, and then computes the accuracy on
199+
the test data when classifications are computed by taking a vote
200+
amongst the nets. Returns a tuple containing a list of indices
201+
for test data which is erroneously classified, and a list of the
202+
corresponding erroneous predictions.
203+
204+
Note that this is a quick-and-dirty kluge: it'd be more reusable
205+
(and faster) to define a Theano function taking the vote. But
206+
this works.
207+
208+
"""
117209

210+
test_x, test_y = test_data
211+
for net in nets:
212+
i = T.lscalar() # mini-batch index
213+
net.test_mb_predictions = theano.function(
214+
[i], net.layers[-1].y_out,
215+
givens={
216+
net.x:
217+
test_x[i*net.mini_batch_size: (i+1)*net.mini_batch_size]
218+
})
219+
net.test_predictions = list(np.concatenate(
220+
[net.test_mb_predictions(i) for i in xrange(1000)]))
221+
all_test_predictions = zip(*[net.test_predictions for net in nets])
222+
def plurality(p): return Counter(p).most_common(1)[0][0]
223+
plurality_test_predictions = [plurality(p)
224+
for p in all_test_predictions]
225+
test_y_eval = test_y.eval()
226+
error_locations = [j for j in xrange(10000)
227+
if plurality_test_predictions[j] != test_y_eval[j]]
228+
erroneous_predictions = [plurality(all_test_predictions[j])
229+
for j in error_locations]
230+
print "Accuracy is {:.2%}".format((1-len(error_locations)/10000.0))
231+
return error_locations, erroneous_predictions
232+
233+
def plot_errors(error_locations, erroneous_predictions=None):
234+
test_x, test_y = test_data[0].eval(), test_data[1].eval()
235+
fig = plt.figure()
236+
error_images = [np.array(test_x[i]).reshape(28, -1) for i in error_locations]
237+
n = min(40, len(error_locations))
238+
for j in range(n):
239+
ax = plt.subplot2grid((5, 8), (j/8, j % 8))
240+
ax.matshow(error_images[j], cmap = matplotlib.cm.binary)
241+
ax.text(24, 5, test_y[error_locations[j]])
242+
if erroneous_predictions:
243+
ax.text(24, 24, erroneous_predictions[j])
244+
plt.xticks(np.array([]))
245+
plt.yticks(np.array([]))
246+
plt.tight_layout()
247+
return plt
248+
249+
def plot_filters(net, layer, x, y):
250+
251+
"""Plot the filters for net after the (convolutional) layer number
252+
layer. They are plotted in x by y format. So, for example, if we
253+
have 20 filters after layer 0, then we can call show_filters(net, 0, 5, 4) to
254+
get a 5 by 4 plot of all filters."""
255+
filters = net.layers[layer].w.eval()
256+
fig = plt.figure()
257+
for j in range(len(filters)):
258+
ax = fig.add_subplot(y, x, j)
259+
ax.matshow(filters[j][0], cmap = matplotlib.cm.binary)
260+
plt.xticks(np.array([]))
261+
plt.yticks(np.array([]))
262+
plt.tight_layout()
263+
return plt
264+
265+
266+
#### Helper method to run all experiments in the book
267+
268+
def run_experiments():
269+
270+
"""Run the experiments described in the book. Note that the later
271+
experiments require access to the expanded training data, which
272+
can be generated by running expand_mnist.py.
273+
274+
"""
275+
shallow()
276+
basic_conv()
277+
omit_FC()
278+
dbl_conv(activation_fn=sigmoid)
279+
# omitted, but still interesting: regularized_dbl_conv()
280+
dbl_conv_relu()
281+
expanded_data(n=100)
282+
expanded_data(n=300)
283+
expanded_data(n=1000)
284+
expanded_data_double_fc(n=100)
285+
expanded_data_double_fc(n=300)
286+
expanded_data_double_fc(n=1000)
287+
nets = double_fc_dropout(0.5, 0.5, 0.5, 5)
288+
# plot the erroneous digits in the ensemble of nets just trained
289+
error_locations, erroneous_predictions = ensemble(nets)
290+
plt = plot_errors(error_locations, erroneous_predictions)
291+
plt.savefig("ensemble_errors.png")
292+
# plot the filters learned by the first of the nets just trained
293+
plt = plot_filters(nets[0], 0, 5, 4)
294+
plt.savefig("net_full_layer_0.png")
295+
plt = plot_filters(nets[0], 1, 8, 5)
296+
plt.savefig("net_full_layer_1.png")
297+

0 commit comments

Comments
 (0)