19
19
focused on making the code simple, easily readable, and easily
20
20
modifiable. It is not optimized, and omits many desirable features.
21
21
22
- This program is loosely based on the Theano documentation on
22
+ This program incorporates ideas from the Theano documentation on
23
23
convolutional neural nets (notably,
24
- http://deeplearning.net/tutorial/lenet.html ), as well as some ideas
25
- of Chris Olah (http://colah.github.io ).
24
+ http://deeplearning.net/tutorial/lenet.html ), from Misha Denil's
25
+ implementation of dropout (https://github.com/mdenil/dropout ), and
26
+ from Chris Olah (http://colah.github.io ).
26
27
27
28
"""
28
29
@@ -100,20 +101,16 @@ def __init__(self, layers, mini_batch_size):
100
101
self .output_dropout = self .layers [- 1 ].output_dropout
101
102
102
103
def SGD (self , training_data , epochs , mini_batch_size , eta ,
103
- validation_data = None , test_data = None , lmbda = 0.0 ):
104
+ validation_data , test_data , lmbda = 0.0 ):
104
105
"""Train the network using mini-batch stochastic gradient descent."""
105
106
training_x , training_y = training_data
106
- if validation_data :
107
- validation_x , validation_y = validation_data
108
- if test_data :
109
- test_x , test_y = test_data
107
+ validation_x , validation_y = validation_data
108
+ test_x , test_y = test_data
110
109
111
110
# compute number of minibatches for training, validation and testing
112
111
num_training_batches = size (training_data )/ mini_batch_size
113
- if validation_data :
114
- num_validation_batches = size (validation_data )/ mini_batch_size
115
- if test_data :
116
- num_test_batches = size (test_data )/ mini_batch_size
112
+ num_validation_batches = size (validation_data )/ mini_batch_size
113
+ num_test_batches = size (test_data )/ mini_batch_size
117
114
118
115
# define the (regularized) cost function, symbolic gradients, and updates
119
116
l2_norm_squared = sum ([(layer .w ** 2 ).sum () for layer in self .layers ])
@@ -134,39 +131,37 @@ def SGD(self, training_data, epochs, mini_batch_size, eta,
134
131
self .y :
135
132
training_y [i * self .mini_batch_size : (i + 1 )* self .mini_batch_size ]
136
133
})
137
- if validation_data :
138
- validate_mb_accuracy = theano .function (
139
- [i ], self .layers [- 1 ].accuracy (self .y ),
140
- givens = {
141
- self .x :
142
- validation_x [i * self .mini_batch_size : (i + 1 )* self .mini_batch_size ],
143
- self .y :
144
- validation_y [i * self .mini_batch_size : (i + 1 )* self .mini_batch_size ]
145
- })
146
- if test_data :
147
- test_mb_accuracy = theano .function (
148
- [i ], self .layers [- 1 ].accuracy (self .y ),
149
- givens = {
150
- self .x :
151
- test_x [i * self .mini_batch_size : (i + 1 )* self .mini_batch_size ],
152
- self .y :
153
- test_y [i * self .mini_batch_size : (i + 1 )* self .mini_batch_size ]
154
- })
155
- self .test_mb_predictions = theano .function (
156
- [i ], self .layers [- 1 ].y_out ,
157
- givens = {
158
- self .x :
159
- test_x [i * self .mini_batch_size : (i + 1 )* self .mini_batch_size ]
160
- })
134
+ validate_mb_accuracy = theano .function (
135
+ [i ], self .layers [- 1 ].accuracy (self .y ),
136
+ givens = {
137
+ self .x :
138
+ validation_x [i * self .mini_batch_size : (i + 1 )* self .mini_batch_size ],
139
+ self .y :
140
+ validation_y [i * self .mini_batch_size : (i + 1 )* self .mini_batch_size ]
141
+ })
142
+ test_mb_accuracy = theano .function (
143
+ [i ], self .layers [- 1 ].accuracy (self .y ),
144
+ givens = {
145
+ self .x :
146
+ test_x [i * self .mini_batch_size : (i + 1 )* self .mini_batch_size ],
147
+ self .y :
148
+ test_y [i * self .mini_batch_size : (i + 1 )* self .mini_batch_size ]
149
+ })
150
+ self .test_mb_predictions = theano .function (
151
+ [i ], self .layers [- 1 ].y_out ,
152
+ givens = {
153
+ self .x :
154
+ test_x [i * self .mini_batch_size : (i + 1 )* self .mini_batch_size ]
155
+ })
161
156
# Do the actual training
162
- if validation_data : best_validation_accuracy = 0.0
157
+ best_validation_accuracy = 0.0
163
158
for epoch in xrange (epochs ):
164
159
for minibatch_index in xrange (num_training_batches ):
165
160
iteration = num_training_batches * epoch + minibatch_index
166
161
if iteration % 1000 == 0 :
167
162
print ("Training mini-batch number {0}" .format (iteration ))
168
163
cost_ij = train_mb (minibatch_index )
169
- if validation_data and (iteration + 1 ) % num_training_batches == 0 :
164
+ if (iteration + 1 ) % num_training_batches == 0 :
170
165
validation_accuracy = np .mean (
171
166
[validate_mb_accuracy (j ) for j in xrange (num_validation_batches )])
172
167
print ("Epoch {0}: validation accuracy {1:.2%}" .format (
@@ -178,16 +173,12 @@ def SGD(self, training_data, epochs, mini_batch_size, eta,
178
173
if test_data :
179
174
test_accuracy = np .mean (
180
175
[test_mb_accuracy (j ) for j in xrange (num_test_batches )])
181
- print ('The corresponding test accuracy is {0:.2%}' .format (test_accuracy ))
176
+ print ('The corresponding test accuracy is {0:.2%}' .format (
177
+ test_accuracy ))
182
178
print ("Finished training network." )
183
- if validation_data :
184
- print ("Best validation accuracy of {0:.2%} obtained at iteration {1}" .format (best_validation_accuracy , best_iteration ))
185
- if test_data :
186
- print ("Corresponding test accuracy of {0:.2%}" .format (test_accuracy ))
187
-
188
- def log_likelihood (self ):
189
- "Return the log-likelihood cost."
190
- return - T .mean (T .log (self .output )[T .arange (self .y .shape [0 ]), self .y ])
179
+ print ("Best validation accuracy of {0:.2%} obtained at iteration {1}" .format (
180
+ best_validation_accuracy , best_iteration ))
181
+ print ("Corresponding test accuracy of {0:.2%}" .format (test_accuracy ))
191
182
192
183
#### Define layer types
193
184
@@ -213,8 +204,6 @@ def __init__(self, filter_shape, image_shape, poolsize=(2, 2),
213
204
x pooling sizes.
214
205
215
206
"""
216
- self .inpt = None
217
- self .output = None
218
207
self .filter_shape = filter_shape
219
208
self .image_shape = image_shape
220
209
self .poolsize = poolsize
@@ -251,8 +240,6 @@ def __init__(self, n_in, n_out, activation_fn=sigmoid, p_dropout=0.0):
251
240
self .n_out = n_out
252
241
self .activation_fn = activation_fn
253
242
self .p_dropout = p_dropout
254
- self .inpt = None
255
- self .output = None
256
243
# Initialize weights and biases
257
244
self .w = theano .shared (
258
245
np .asarray (
@@ -273,11 +260,8 @@ def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
273
260
self .y_out = T .argmax (self .output , axis = 1 )
274
261
self .inpt_dropout = dropout_layer (
275
262
inpt_dropout .reshape ((mini_batch_size , self .n_in )), self .p_dropout )
276
- self .output_dropout = self .activation_fn (T .dot (self .inpt_dropout , self .w ) + self .b )
277
-
278
- def cost (self , net ):
279
- "Return the cross-entropy cost."
280
- return T .nnet .binary_crossentropy (self .output , net .y ).mean ()
263
+ self .output_dropout = self .activation_fn (
264
+ T .dot (self .inpt_dropout , self .w ) + self .b )
281
265
282
266
def accuracy (self , y ):
283
267
"Return the accuracy for the mini-batch."
@@ -286,8 +270,6 @@ def accuracy(self, y):
286
270
class SoftmaxLayer ():
287
271
288
272
def __init__ (self , n_in , n_out , p_dropout = 0.0 ):
289
- self .inpt = None
290
- self .output = None
291
273
self .n_in = n_in
292
274
self .n_out = n_out
293
275
self .p_dropout = p_dropout
0 commit comments