9
9
10
10
"""
11
11
12
+ from collections import Counter
13
+
14
+ import matplotlib
15
+ matplotlib .use ('Agg' )
16
+ import matplotlib .pyplot as plt
17
+ import numpy as np
18
+ import theano
19
+ import theano .tensor as T
20
+
12
21
import network3
13
22
from network3 import sigmoid , tanh , ReLU , Network
14
23
from network3 import ConvPoolLayer , FullyConnectedLayer , SoftmaxLayer
24
+
15
25
training_data , validation_data , test_data = network3 .load_data_shared ()
16
26
mini_batch_size = 10
17
27
18
- def shallow ():
19
- for j in range (3 ):
28
+ def shallow (n = 3 , epochs = 60 ):
29
+ nets = []
30
+ for j in range (n ):
20
31
print "A shallow net with 100 hidden neurons"
21
32
net = Network ([
22
33
FullyConnectedLayer (n_in = 784 , n_out = 100 ),
23
34
SoftmaxLayer (n_in = 100 , n_out = 10 )], mini_batch_size )
24
- net .SGD (training_data , 60 , mini_batch_size , 0.1 , validation_data , test_data )
35
+ net .SGD (
36
+ training_data , epochs , mini_batch_size , 0.1 ,
37
+ validation_data , test_data )
38
+ nets .append (net )
39
+ return nets
25
40
26
- def basic_conv ():
27
- for j in range (3 ):
41
+ def basic_conv (n = 3 , epochs = 60 ):
42
+ for j in range (n ):
28
43
print "Conv + FC architecture"
29
44
net = Network ([
30
45
ConvPoolLayer (image_shape = (mini_batch_size , 1 , 28 , 28 ),
31
46
filter_shape = (20 , 1 , 5 , 5 ),
32
47
poolsize = (2 , 2 )),
33
48
FullyConnectedLayer (n_in = 20 * 12 * 12 , n_out = 100 ),
34
49
SoftmaxLayer (n_in = 100 , n_out = 10 )], mini_batch_size )
35
- net .SGD (training_data , 60 , mini_batch_size , 0.1 , validation_data , test_data )
50
+ net .SGD (
51
+ training_data , epochs , mini_batch_size , 0.1 , validation_data , test_data )
52
+ return net
36
53
37
54
def omit_FC ():
38
55
for j in range (3 ):
@@ -43,6 +60,7 @@ def omit_FC():
43
60
poolsize = (2 , 2 )),
44
61
SoftmaxLayer (n_in = 20 * 12 * 12 , n_out = 10 )], mini_batch_size )
45
62
net .SGD (training_data , 60 , mini_batch_size , 0.1 , validation_data , test_data )
63
+ return net
46
64
47
65
def dbl_conv (activation_fn = sigmoid ):
48
66
for j in range (3 ):
@@ -59,8 +77,14 @@ def dbl_conv(activation_fn=sigmoid):
59
77
FullyConnectedLayer (
60
78
n_in = 40 * 4 * 4 , n_out = 100 , activation_fn = activation_fn ),
61
79
SoftmaxLayer (n_in = 100 , n_out = 10 )], mini_batch_size )
62
- net .SGD (training_data , 60 , mini_batch_size , 0.1 , validation_data , test_data )
80
+ net .SGD (training_data , 60 , mini_batch_size , 0.1 , validation_data , test_data )
81
+ return net
63
82
83
+ # The following experiment was eventually omitted from the chapter,
84
+ # but I've left it in here, since it's an important negative result:
85
+ # basic l2 regularization didn't help much. The reason (I believe) is
86
+ # that using convolutional-pooling layers is already a pretty strong
87
+ # regularizer.
64
88
def regularized_dbl_conv ():
65
89
for lmbda in [0.00001 , 0.0001 , 0.001 , 0.01 , 0.1 , 1.0 , 10.0 , 100.0 ]:
66
90
for j in range (3 ):
@@ -96,11 +120,15 @@ def dbl_conv_relu():
96
120
#### Some subsequent functions may make use of the expanded MNIST
97
121
#### data. That can be generated by running expand_mnist.py.
98
122
99
- def expanded_data ():
123
+ def expanded_data (n = 100 ):
124
+ """n is the number of neurons in the fully-connected layer. We'll try
125
+ n=100, 300, and 1000.
126
+
127
+ """
100
128
expanded_training_data , _ , _ = network3 .load_data_shared (
101
129
"../data/mnist_expanded.pkl.gz" )
102
130
for j in range (3 ):
103
- print "Training with expanded data, run num %s" % j
131
+ print "Training with expanded data, %s neurons in the FC layer, run num %s" % ( n , j )
104
132
net = Network ([
105
133
ConvPoolLayer (image_shape = (mini_batch_size , 1 , 28 , 28 ),
106
134
filter_shape = (20 , 1 , 5 , 5 ),
@@ -110,8 +138,160 @@ def expanded_data():
110
138
filter_shape = (40 , 20 , 5 , 5 ),
111
139
poolsize = (2 , 2 ),
112
140
activation_fn = ReLU ),
113
- FullyConnectedLayer (n_in = 40 * 4 * 4 , n_out = 100 , activation_fn = ReLU ),
114
- SoftmaxLayer (n_in = 100 , n_out = 10 )], mini_batch_size )
115
- net .SGD (expanded_training_data , 20 , mini_batch_size , 0.03 ,
141
+ FullyConnectedLayer (n_in = 40 * 4 * 4 , n_out = n , activation_fn = ReLU ),
142
+ SoftmaxLayer (n_in = n , n_out = 10 )], mini_batch_size )
143
+ net .SGD (expanded_training_data , 60 , mini_batch_size , 0.03 ,
144
+ validation_data , test_data , lmbda = 0.1 )
145
+ return net
146
+
147
+ def expanded_data_double_fc (n = 100 ):
148
+ """n is the number of neurons in both fully-connected layers. We'll
149
+ try n=100, 300, and 1000.
150
+
151
+ """
152
+ expanded_training_data , _ , _ = network3 .load_data_shared (
153
+ "../data/mnist_expanded.pkl.gz" )
154
+ for j in range (3 ):
155
+ print "Training with expanded data, %s neurons in two FC layers, run num %s" % (n , j )
156
+ net = Network ([
157
+ ConvPoolLayer (image_shape = (mini_batch_size , 1 , 28 , 28 ),
158
+ filter_shape = (20 , 1 , 5 , 5 ),
159
+ poolsize = (2 , 2 ),
160
+ activation_fn = ReLU ),
161
+ ConvPoolLayer (image_shape = (mini_batch_size , 20 , 12 , 12 ),
162
+ filter_shape = (40 , 20 , 5 , 5 ),
163
+ poolsize = (2 , 2 ),
164
+ activation_fn = ReLU ),
165
+ FullyConnectedLayer (n_in = 40 * 4 * 4 , n_out = n , activation_fn = ReLU ),
166
+ FullyConnectedLayer (n_in = n , n_out = n , activation_fn = ReLU ),
167
+ SoftmaxLayer (n_in = n , n_out = 10 )], mini_batch_size )
168
+ net .SGD (expanded_training_data , 60 , mini_batch_size , 0.03 ,
116
169
validation_data , test_data , lmbda = 0.1 )
170
+
171
+ def double_fc_dropout (p0 , p1 , p2 , repetitions ):
172
+ expanded_training_data , _ , _ = network3 .load_data_shared (
173
+ "../data/mnist_expanded.pkl.gz" )
174
+ nets = []
175
+ for j in range (repetitions ):
176
+ print "\n \n Training using a dropout network with parameters " ,p0 ,p1 ,p2
177
+ print "Training with expanded data, run num %s" % j
178
+ net = Network ([
179
+ ConvPoolLayer (image_shape = (mini_batch_size , 1 , 28 , 28 ),
180
+ filter_shape = (20 , 1 , 5 , 5 ),
181
+ poolsize = (2 , 2 ),
182
+ activation_fn = ReLU ),
183
+ ConvPoolLayer (image_shape = (mini_batch_size , 20 , 12 , 12 ),
184
+ filter_shape = (40 , 20 , 5 , 5 ),
185
+ poolsize = (2 , 2 ),
186
+ activation_fn = ReLU ),
187
+ FullyConnectedLayer (
188
+ n_in = 40 * 4 * 4 , n_out = 1000 , activation_fn = ReLU , p_dropout = p0 ),
189
+ FullyConnectedLayer (
190
+ n_in = 1000 , n_out = 1000 , activation_fn = ReLU , p_dropout = p1 ),
191
+ SoftmaxLayer (n_in = 1000 , n_out = 10 , p_dropout = p2 )], mini_batch_size )
192
+ net .SGD (expanded_training_data , 40 , mini_batch_size , 0.03 ,
193
+ validation_data , test_data )
194
+ nets .append (net )
195
+ return nets
196
+
197
+ def ensemble (nets ):
198
+ """Takes as input a list of nets, and then computes the accuracy on
199
+ the test data when classifications are computed by taking a vote
200
+ amongst the nets. Returns a tuple containing a list of indices
201
+ for test data which is erroneously classified, and a list of the
202
+ corresponding erroneous predictions.
203
+
204
+ Note that this is a quick-and-dirty kluge: it'd be more reusable
205
+ (and faster) to define a Theano function taking the vote. But
206
+ this works.
207
+
208
+ """
117
209
210
+ test_x , test_y = test_data
211
+ for net in nets :
212
+ i = T .lscalar () # mini-batch index
213
+ net .test_mb_predictions = theano .function (
214
+ [i ], net .layers [- 1 ].y_out ,
215
+ givens = {
216
+ net .x :
217
+ test_x [i * net .mini_batch_size : (i + 1 )* net .mini_batch_size ]
218
+ })
219
+ net .test_predictions = list (np .concatenate (
220
+ [net .test_mb_predictions (i ) for i in xrange (1000 )]))
221
+ all_test_predictions = zip (* [net .test_predictions for net in nets ])
222
+ def plurality (p ): return Counter (p ).most_common (1 )[0 ][0 ]
223
+ plurality_test_predictions = [plurality (p )
224
+ for p in all_test_predictions ]
225
+ test_y_eval = test_y .eval ()
226
+ error_locations = [j for j in xrange (10000 )
227
+ if plurality_test_predictions [j ] != test_y_eval [j ]]
228
+ erroneous_predictions = [plurality (all_test_predictions [j ])
229
+ for j in error_locations ]
230
+ print "Accuracy is {:.2%}" .format ((1 - len (error_locations )/ 10000.0 ))
231
+ return error_locations , erroneous_predictions
232
+
233
+ def plot_errors (error_locations , erroneous_predictions = None ):
234
+ test_x , test_y = test_data [0 ].eval (), test_data [1 ].eval ()
235
+ fig = plt .figure ()
236
+ error_images = [np .array (test_x [i ]).reshape (28 , - 1 ) for i in error_locations ]
237
+ n = min (40 , len (error_locations ))
238
+ for j in range (n ):
239
+ ax = plt .subplot2grid ((5 , 8 ), (j / 8 , j % 8 ))
240
+ ax .matshow (error_images [j ], cmap = matplotlib .cm .binary )
241
+ ax .text (24 , 5 , test_y [error_locations [j ]])
242
+ if erroneous_predictions :
243
+ ax .text (24 , 24 , erroneous_predictions [j ])
244
+ plt .xticks (np .array ([]))
245
+ plt .yticks (np .array ([]))
246
+ plt .tight_layout ()
247
+ return plt
248
+
249
+ def plot_filters (net , layer , x , y ):
250
+
251
+ """Plot the filters for net after the (convolutional) layer number
252
+ layer. They are plotted in x by y format. So, for example, if we
253
+ have 20 filters after layer 0, then we can call show_filters(net, 0, 5, 4) to
254
+ get a 5 by 4 plot of all filters."""
255
+ filters = net .layers [layer ].w .eval ()
256
+ fig = plt .figure ()
257
+ for j in range (len (filters )):
258
+ ax = fig .add_subplot (y , x , j )
259
+ ax .matshow (filters [j ][0 ], cmap = matplotlib .cm .binary )
260
+ plt .xticks (np .array ([]))
261
+ plt .yticks (np .array ([]))
262
+ plt .tight_layout ()
263
+ return plt
264
+
265
+
266
+ #### Helper method to run all experiments in the book
267
+
268
+ def run_experiments ():
269
+
270
+ """Run the experiments described in the book. Note that the later
271
+ experiments require access to the expanded training data, which
272
+ can be generated by running expand_mnist.py.
273
+
274
+ """
275
+ shallow ()
276
+ basic_conv ()
277
+ omit_FC ()
278
+ dbl_conv (activation_fn = sigmoid )
279
+ # omitted, but still interesting: regularized_dbl_conv()
280
+ dbl_conv_relu ()
281
+ expanded_data (n = 100 )
282
+ expanded_data (n = 300 )
283
+ expanded_data (n = 1000 )
284
+ expanded_data_double_fc (n = 100 )
285
+ expanded_data_double_fc (n = 300 )
286
+ expanded_data_double_fc (n = 1000 )
287
+ nets = double_fc_dropout (0.5 , 0.5 , 0.5 , 5 )
288
+ # plot the erroneous digits in the ensemble of nets just trained
289
+ error_locations , erroneous_predictions = ensemble (nets )
290
+ plt = plot_errors (error_locations , erroneous_predictions )
291
+ plt .savefig ("ensemble_errors.png" )
292
+ # plot the filters learned by the first of the nets just trained
293
+ plt = plot_filters (nets [0 ], 0 , 5 , 4 )
294
+ plt .savefig ("net_full_layer_0.png" )
295
+ plt = plot_filters (nets [0 ], 1 , 8 , 5 )
296
+ plt .savefig ("net_full_layer_1.png" )
297
+
0 commit comments