Skip to content

Commit e1f1bf2

Browse files
committed
Adding network3.py and expand_mnist.py
1 parent 8037bb5 commit e1f1bf2

File tree

3 files changed

+367
-1
lines changed

3 files changed

+367
-1
lines changed

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,6 @@
22
*.org
33
*.pkl
44
*.pyc
5-
.DS_Store
5+
.DS_Store
6+
loc.py
7+
src/ec2

src/expand_mnist.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
"""expand_mnist.py
2+
~~~~~~~~~~~~~~~~~~
3+
4+
Take the 50,000 MNIST training images, and create an expanded set of
5+
250,000 images, by displacing each training image up, down, left and
6+
right, by one pixel. Save the resulting file to
7+
../data/mnist_expanded.pkl.gz.
8+
9+
Note that this program is memory intensive, and may not run on small
10+
systems.
11+
12+
"""
13+
14+
from __future__ import print_function
15+
16+
#### Libraries
17+
18+
# Standard library
19+
import cPickle
20+
import gzip
21+
import os.path
22+
import random
23+
24+
# Third-party libraries
25+
import numpy as np
26+
27+
print("Expanding the MNIST training set")
28+
29+
if os.path.exists("../data/mnist_expanded.pkl.gz"):
30+
print("The expanded training set already exists. Exiting.")
31+
else:
32+
f = gzip.open("../data/mnist.pkl.gz", 'rb')
33+
training_data, validation_data, test_data = cPickle.load(f)
34+
f.close()
35+
expanded_training_pairs = []
36+
j = 0 # counter
37+
for x, y in zip(training_data[0], training_data[1]):
38+
expanded_training_pairs.append((x, y))
39+
image = np.reshape(x, (-1, 28))
40+
j += 1
41+
if j % 1000 == 0: print("Expanding image number", j)
42+
# iterate over data telling us the details of how to
43+
# do the displacement
44+
for d, axis, index_position, index in [
45+
(1, 0, "first", 0),
46+
(-1, 0, "first", 27),
47+
(1, 1, "last", 0),
48+
(-1, 1, "last", 27)]:
49+
new_img = np.roll(image, d, axis)
50+
if index_position == "first":
51+
new_img[index, :] = np.zeros(28)
52+
else:
53+
new_img[:, index] = np.zeros(28)
54+
expanded_training_pairs.append((np.reshape(new_img, 784), y))
55+
random.shuffle(expanded_training_pairs)
56+
expanded_training_data = [list(d) for d in zip(*expanded_training_pairs)]
57+
print("Saving expanded data. This may take a few minutes.")
58+
f = gzip.open("../data/mnist_expanded.pkl.gz", "w")
59+
cPickle.dump((expanded_training_data, validation_data, test_data), f)
60+
f.close()

src/network3.py

Lines changed: 304 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
"""network3.py
2+
~~~~~~~~~~~~~~
3+
4+
A Theano-based program for training and running simple neural
5+
networks.
6+
7+
Supports several layer types (fully connected, convolutional, max
8+
pooling, softmax), and activation functions (sigmoid, tanh, and
9+
rectified linear units, with more easily added).
10+
11+
When run on a CPU, this program is much faster than network.py and
12+
network2.py. However, unlike network.py and network2.py it can also
13+
be run on a GPU, which makes it faster still.
14+
15+
Because the code is based on Theano, the code is different in many
16+
ways from network.py and network2.py. However, where possible I have
17+
tried to maintain consistency with the earlier programs. In
18+
particular, the API is similar to network2.py. Note that I have
19+
focused on making the code simple, easily readable, and easily
20+
modifiable. It is not optimized, and omits many desirable features.
21+
22+
"""
23+
24+
#### Libraries
25+
# Standard library
26+
import cPickle
27+
import gzip
28+
29+
# Third-party libraries
30+
import numpy as np
31+
import theano
32+
import theano.tensor as T
33+
from theano.tensor.nnet import conv
34+
from theano.tensor.nnet import softmax
35+
from theano.tensor.signal import downsample
36+
37+
# Activation functions for neurons
38+
def linear(z): return z
39+
def ReLU(z): return T.maximum(0, z)
40+
from theano.tensor.nnet import sigmoid
41+
from theano.tensor import tanh
42+
43+
44+
#### Constants
45+
GPU = False
46+
if GPU:
47+
print "Trying to run under a GPU. If this is not desired, then modify "+\
48+
"network3.py\nto set the GPU flag to False."
49+
try: theano.config.device = 'gpu'
50+
except: pass # it's already set
51+
theano.config.floatX = 'float32'
52+
53+
def example(mini_batch_size=10):
54+
print("Loading the MNIST data")
55+
training_data, validation_data, test_data = load_data_shared("../data/mnist.pkl.gz")
56+
print("Building the network")
57+
net = create_net(10)
58+
print("Training the network")
59+
try:
60+
net.SGD(training_data, 200, mini_batch_size, 0.1,
61+
validation_data, test_data, lmbda=1.0)
62+
except KeyboardInterrupt:
63+
pass
64+
return net
65+
66+
def create_net(mini_batch_size=10, activation_fn=tanh):
67+
return Network(
68+
[ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28), filter_shape=(20, 1, 5, 5), poolsize=(2, 2), activation_fn=activation_fn),
69+
#ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12), filter_shape=(40, 20, 5, 5), poolsize=(2, 2), activation_fn=activation_fn),
70+
#FullyConnectedLayer(n_in=40*4*4, n_out=100, mini_batch_size=mini_batch_size, activation_fn=activation_fn),
71+
#FullyConnectedLayer(n_in=784, n_out=100, mini_batch_size=mini_batch_size, activation_fn=activation_fn),
72+
#FullyConnectedLayer(n_in=20*12*12, n_out=100, mini_batch_size=mini_batch_size),
73+
#FullyConnectedLayer(n_in=100, n_out=100, mini_batch_size=mini_batch_size, activation_fn=activation_fn),
74+
#SoftmaxLayer(n_in=100, n_out=10, mini_batch_size=mini_batch_size)], mini_batch_size)
75+
SoftmaxLayer(n_in=20*12*12, n_out=10)], mini_batch_size)
76+
77+
#### Load the MNIST data
78+
def load_data_shared(filename="../data/mnist.pkl.gz"):
79+
f = gzip.open(filename, 'rb')
80+
training_data, validation_data, test_data = cPickle.load(f)
81+
f.close()
82+
def shared(data):
83+
"""Place the data into shared variables. This allows Theano to copy
84+
the data to the GPU, if one is available.
85+
86+
"""
87+
shared_x = theano.shared(
88+
np.asarray(data[0], dtype=theano.config.floatX), borrow=True)
89+
shared_y = theano.shared(
90+
np.asarray(data[1], dtype=theano.config.floatX), borrow=True)
91+
return shared_x, T.cast(shared_y, "int32")
92+
return [shared(training_data), shared(validation_data), shared(test_data)]
93+
94+
#### Main class used to construct and train networks
95+
class Network():
96+
97+
def __init__(self, layers, mini_batch_size):
98+
"""Takes a list of `layers`, describing the network architecture, and
99+
a value for the `mini_batch_size` to be used during training
100+
by stochastic gradient descent.
101+
102+
"""
103+
self.layers = layers
104+
self.mini_batch_size = mini_batch_size
105+
self.params = [param for layer in self.layers for param in layer.params]
106+
self.x = T.matrix("x")
107+
self.y = T.ivector("y")
108+
init_layer = self.layers[0]
109+
init_layer.set_inpt(self.x, mini_batch_size)
110+
for j in xrange(1, len(self.layers)):
111+
prev_layer, layer = self.layers[j-1], self.layers[j]
112+
layer.set_inpt(prev_layer.output, mini_batch_size)
113+
self.output = self.layers[-1].output
114+
115+
def SGD(self, training_data, epochs, mini_batch_size, eta,
116+
validation_data, test_data, lmbda=0.0):
117+
"""Train the network using mini-batch stochastic gradient descent."""
118+
training_x, training_y = training_data
119+
validation_x, validation_y = validation_data
120+
test_x, test_y = test_data
121+
122+
# compute number of minibatches for training, validation and testing
123+
num_training_batches = size(training_data)/mini_batch_size
124+
num_validation_batches = size(validation_data)/mini_batch_size
125+
num_test_batches = size(test_data)/mini_batch_size
126+
127+
# define the (regularized) cost function, symbolic gradients, and updates
128+
l2_norm_squared = sum([(layer.w**2).sum() for layer in self.layers])
129+
cost = self.log_likelihood()+0.5*lmbda*l2_norm_squared/num_training_batches
130+
grads = T.grad(cost, self.params)
131+
updates = [(param, param-eta*grad)
132+
for param, grad in zip(self.params, grads)]
133+
134+
# define functions to train a mini-batch, and to compute the
135+
# accuracy in validation and test mini-batches.
136+
i = T.lscalar() # mini-batch index
137+
train_mb = theano.function(
138+
[i], cost, updates=updates,
139+
givens={
140+
self.x:
141+
training_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
142+
self.y:
143+
training_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
144+
})
145+
validate_mb_accuracy = theano.function(
146+
[i], self.layers[-1].accuracy(self.y),
147+
givens={
148+
self.x:
149+
validation_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
150+
self.y:
151+
validation_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
152+
})
153+
test_mb_accuracy = theano.function(
154+
[i], self.layers[-1].accuracy(self.y),
155+
givens={
156+
self.x:
157+
test_x[i*self.mini_batch_size: (i+1)*self.mini_batch_size],
158+
self.y:
159+
test_y[i*self.mini_batch_size: (i+1)*self.mini_batch_size]
160+
})
161+
162+
# Do the actual training
163+
best_validation_accuracy = 0.0
164+
for epoch in xrange(epochs):
165+
for minibatch_index in xrange(num_training_batches):
166+
iteration = num_training_batches*epoch+minibatch_index
167+
if iteration % 1000 == 0:
168+
print("Training mini-batch number {0}".format(iteration))
169+
cost_ij = train_mini_batch(minibatch_index)
170+
if (iteration+1) % num_training_batches == 0:
171+
validation_accuracy = np.mean(
172+
[validate_mb_accuracy(j) for j in xrange(num_validation_batches)])
173+
print("Epoch {0}: validation accuracy {1:.2%}".format(
174+
epoch, validation_accuracy))
175+
if validation_accuracy >= best_validation_accuracy:
176+
print("This is the best validation accuracy to date.")
177+
best_validation_accuracy = validation_accuracy
178+
best_iteration = iteration
179+
test_accuracy = np.mean(
180+
[test_mb_accuracy(j) for j in xrange(num_test_batches)])
181+
print('The corresponding test accuracy is {0:.2%}'.format(
182+
test_accuracy))
183+
print("Finished training network.")
184+
print("Best validation accuracy of {0:.2%} obtained at iteration {1}".format(
185+
best_validation_accuracy, best_iteration))
186+
print("Corresponding test accuracy of {0:.2%}".format(test_accuracy))
187+
188+
def log_likelihood(self):
189+
"Return the log-likelihood cost."
190+
return -T.mean(T.log(self.output)[T.arange(self.y.shape[0]), self.y])
191+
192+
193+
#### Define layer types
194+
195+
class ConvPoolLayer():
196+
"""Used to create a combination of a convolutional and a max-pooling
197+
layer. A more sophisticated implementation would separate the
198+
two, but for our purposes we'll always use them together, and it
199+
simplifies the code, so it makes sense to combine them.
200+
201+
"""
202+
203+
def __init__(self, filter_shape, image_shape, poolsize=(2, 2),
204+
activation_fn=sigmoid):
205+
"""`filter_shape` is a tuple of length 4, whose entries are the number
206+
of filters, the number of input feature maps, the filter height, and the
207+
filter width.
208+
209+
`image_shape` is a tuple of length 4, whose entries are the
210+
mini-batch size, the number of input feature maps, the image
211+
height, and the image width.
212+
213+
`poolsize` is a tuple of length 2, whose entries are the y and
214+
x pooling sizes.
215+
216+
"""
217+
self.inpt = None
218+
self.output = None
219+
self.filter_shape = filter_shape
220+
self.image_shape = image_shape
221+
self.poolsize = poolsize
222+
self.activation_fn=activation_fn
223+
# initialize weights and biases
224+
n_out = (filter_shape[0]*np.prod(filter_shape[2:])/np.prod(poolsize))
225+
self.w = theano.shared(
226+
np.asarray(
227+
np.random.normal(loc=0, scale=np.sqrt(1.0/n_out), size=filter_shape),
228+
dtype=theano.config.floatX),
229+
borrow=True)
230+
self.b = theano.shared(
231+
np.asarray(
232+
np.random.normal(loc=0, scale=1.0, size=(filter_shape[0],)),
233+
dtype=theano.config.floatX),
234+
borrow=True)
235+
self.params = [self.w, self.b]
236+
237+
def set_inpt(self, inpt, mini_batch_size):
238+
self.inpt = inpt.reshape(self.image_shape)
239+
conv_out = conv.conv2d(
240+
input=self.inpt, filters=self.w, filter_shape=self.filter_shape,
241+
image_shape=self.image_shape)
242+
pooled_out = downsample.max_pool_2d(
243+
input=conv_out, ds=self.poolsize, ignore_border=True)
244+
self.output = self.activation_fn(
245+
pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
246+
247+
248+
class FullyConnectedLayer():
249+
250+
def __init__(self, n_in, n_out, mini_batch_size=10, activation_fn=sigmoid):
251+
self.n_in = n_in
252+
self.n_out = n_out
253+
self.activation_fn = activation_fn
254+
self.inpt = None
255+
self.output = None
256+
# Initialize weights and biases
257+
self.w = theano.shared(
258+
np.asarray(
259+
np.random.normal(
260+
loc=0.0, scale=np.sqrt(1.0/n_out), size=(n_in, n_out)),
261+
dtype=theano.config.floatX),
262+
name='w', borrow=True)
263+
self.b = theano.shared(
264+
np.asarray(np.random.normal(loc=0.0, scale=1.0, size=(n_out,)),
265+
dtype=theano.config.floatX),
266+
name='b', borrow=True)
267+
self.params = [self.w, self.b]
268+
269+
def set_inpt(self, inpt, mini_batch_size):
270+
self.mini_batch_size = mini_batch_size
271+
self.inpt = inpt.reshape((self.mini_batch_size, self.n_in))
272+
self.output = self.activation_fn(T.dot(inpt, self.w)+self.b)
273+
274+
class SoftmaxLayer():
275+
276+
def __init__(self, n_in, n_out):
277+
self.inpt = None
278+
self.output = None
279+
self.n_in = n_in
280+
self.n_out = n_out
281+
# Initialize weights and biases
282+
self.w = theano.shared(
283+
np.zeros((n_in, n_out), dtype=theano.config.floatX),
284+
name='w', borrow=True)
285+
self.b = theano.shared(
286+
np.zeros((n_out,), dtype=theano.config.floatX),
287+
name='b', borrow=True)
288+
self.params = [self.w, self.b]
289+
290+
def set_inpt(self, inpt, mini_batch_size):
291+
self.mini_batch_size = mini_batch_size
292+
self.inpt = inpt.reshape((self.mini_batch_size, self.n_in))
293+
self.output = softmax(T.dot(self.inpt, self.w)+self.b)
294+
self.y_out = T.argmax(self.output, axis=1)
295+
296+
def accuracy(self, y):
297+
"Return the accuracy for the mini-batch."
298+
return T.mean(T.eq(y, self.y_out))
299+
300+
301+
#### Miscellanea
302+
def size(data):
303+
"Return the size of the dataset `data`."
304+
return data[0].get_value(borrow=True).shape[0]

0 commit comments

Comments
 (0)