Skip to content

Commit 4e8c4d9

Browse files
committed
Tracking gradients in different layers, and serializing some MNIST images to JSON
1 parent ffce979 commit 4e8c4d9

11 files changed

+156
-16
lines changed

fig/data_1000.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

fig/generate_gradient.py

Lines changed: 105 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,113 @@
77
88
"""
99

10-
import numpy as np
11-
10+
#### Libraries
11+
# Standard library
12+
import json
13+
import math
14+
import random
15+
import shutil
1216
import sys
1317
sys.path.append("../code/")
14-
import mnist_loader
15-
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
1618

19+
# My library
20+
import mnist_loader
1721
import network2
18-
net = network2.Network([784, 30, 30, 10])
19-
nabla_b_results = [net.backprop(x, y)[0] for x, y in training_data[:1000]]
20-
def sum(a, b): return [x+y for (x, y) in zip(a, b)]
21-
gradient = reduce(sum, nabla_b_results)
22-
average_gradient = [(np.reshape(g, len(g))/1000).tolist() for g in gradient]
23-
# Discard all but the first 6 terms in each layer, discard the output layer
24-
abbreviated_gradient = [ag[:6] for ag in average_gradient[:-1]]
2522

26-
import json
27-
f = open("initial_gradient.json", "w")
28-
json.dump(abbreviated_gradient, f)
29-
f.close()
23+
# Third-party libraries
24+
import matplotlib.pyplot as plt
25+
import numpy as np
26+
27+
def main():
28+
# Load the data
29+
full_td, _, _ = mnist_loader.load_data_wrapper()
30+
td = full_td[:1000] # Just use the first 1000 items of training data
31+
epochs = 500 # Number of epochs to train for
32+
33+
print "\nTwo hidden layers:"
34+
net = network2.Network([784, 30, 30, 10])
35+
initial_norms(td, net)
36+
abbreviated_gradient = [
37+
ag[:6] for ag in get_average_gradient(net, td)[:-1]]
38+
print "Saving the averaged gradient for the top six neurons in each "+\
39+
"layer.\nWARNING: This will affect the look of the book, so be "+\
40+
"sure to check the\nrelevant material (early chapter 5)."
41+
f = open("initial_gradient.json", "w")
42+
json.dump(abbreviated_gradient, f)
43+
f.close()
44+
shutil.copy("initial_gradient.json", "../../js/initial_gradient.json")
45+
training(td, net, epochs, "norms_during_training_2_layers.json")
46+
plot_training(
47+
epochs, "norms_during_training_2_layers.json", 2)
48+
49+
print "\nThree hidden layers:"
50+
net = network2.Network([784, 30, 30, 30, 10])
51+
initial_norms(td, net)
52+
training(td, net, epochs, "norms_during_training_3_layers.json")
53+
plot_training(
54+
epochs, "norms_during_training_3_layers.json", 3)
55+
56+
print "\nFour hidden layers:"
57+
net = network2.Network([784, 30, 30, 30, 30, 10])
58+
initial_norms(td, net)
59+
training(td, net, epochs,
60+
"norms_during_training_4_layers.json")
61+
plot_training(
62+
epochs, "norms_during_training_4_layers.json", 4)
63+
64+
def initial_norms(training_data, net):
65+
average_gradient = get_average_gradient(net, training_data)
66+
norms = [list_norm(avg) for avg in average_gradient[:-1]]
67+
print "Average gradient for the hidden layers: "+str(norms)
68+
69+
def training(training_data, net, epochs, filename):
70+
norms = []
71+
for j in range(epochs):
72+
average_gradient = get_average_gradient(net, training_data)
73+
norms.append([list_norm(avg) for avg in average_gradient[:-1]])
74+
print "Epoch: %s" % j
75+
net.SGD(training_data, 1, 1000, 0.1, lmbda=5.0)
76+
f = open(filename, "w")
77+
json.dump(norms, f)
78+
f.close()
79+
80+
def plot_training(epochs, filename, num_layers):
81+
f = open(filename, "r")
82+
norms = json.load(f)
83+
f.close()
84+
fig = plt.figure()
85+
ax = fig.add_subplot(111)
86+
colors = ["#2A6EA6", "#FFA933", "#FF5555", "#55FF55", "#5555FF"]
87+
for j in range(num_layers):
88+
ax.plot(np.arange(epochs),
89+
[n[j] for n in norms],
90+
color=colors[j],
91+
label="Hidden layer %s" % (j+1,))
92+
ax.set_xlim([0, epochs])
93+
ax.grid(True)
94+
ax.set_xlabel('Number of epochs of training')
95+
ax.set_title('Speed of learning: %s hidden layers' % num_layers)
96+
ax.set_yscale('log')
97+
plt.legend(loc="upper right")
98+
fig_filename = "training_speed_%s_layers.png" % num_layers
99+
plt.savefig(fig_filename)
100+
shutil.copy(fig_filename, "../../images/"+fig_filename)
101+
plt.show()
102+
103+
def get_average_gradient(net, training_data):
104+
nabla_b_results = [net.backprop(x, y)[0] for x, y in training_data]
105+
gradient = list_sum(nabla_b_results)
106+
return [(np.reshape(g, len(g))/len(training_data)).tolist()
107+
for g in gradient]
108+
109+
def zip_sum(a, b):
110+
return [x+y for (x, y) in zip(a, b)]
111+
112+
def list_sum(l):
113+
return reduce(zip_sum, l)
114+
115+
def list_norm(l):
116+
return math.sqrt(sum([x*x for x in l]))
117+
118+
if __name__ == "__main__":
119+
main()

fig/initial_gradient.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
[[-0.00015957744575933252, -7.109660645293893e-06, 0.00029705824697869363, -0.005433034945183055, -0.000601732153598837, -0.00031497136113071197], [-0.004743788813673901, -0.003335113231382309, -0.006826947354624844, 0.001668145239275299, -0.013916515462361398, 0.002312540509777085], [0.05364605271597593, -0.0057698230441689275, -0.010571727068813678, 0.07860259192197483, 0.014443898612513025, -0.019157824473129328]]
1+
[[-0.003970677333144113, -0.0031684316985881185, 0.008103235909196014, 0.012598010584130365, -0.026465907331998335, 0.0017583319323150341], [0.04152906589960523, 0.044025552524932406, -0.009669682279354514, 0.046736871369353235, 0.03877302528270452, 0.012336459551975156]]

fig/norms_during_training_2_layers.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

fig/norms_during_training_3_layers.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

fig/norms_during_training_4_layers.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

fig/serialize_images_to_json.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
"""
2+
serialize_images_to_json
3+
~~~~~~~~~~~~~~~~~~~~~~~~
4+
5+
Utility to serialize parts of the training and validation data to JSON,
6+
for use with Javascript. """
7+
8+
#### Libraries
9+
# Standard library
10+
import json
11+
import sys
12+
13+
# My library
14+
sys.path.append('../code/')
15+
import mnist_loader
16+
17+
# Third-party libraries
18+
import numpy as np
19+
20+
21+
# Number of training and validation data images to serialize
22+
NTD = 1000
23+
NVD = 100
24+
25+
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
26+
27+
def make_data_integer(td):
28+
# This will be slow, due to the loop. It'd be better if numpy did
29+
# this directly. But numpy.rint followed by tolist() doesn't
30+
# convert to a standard Python int.
31+
return [int(x) for x in (td*256).reshape(784).tolist()]
32+
33+
data = {"training": [
34+
{"x": [x[0] for x in training_data[j][0].tolist()],
35+
"y": [y[0] for y in training_data[j][1].tolist()]}
36+
for j in xrange(NTD)],
37+
"validation": [
38+
{"x": [x[0] for x in validation_data[j][0].tolist()],
39+
"y": validation_data[j][1]}
40+
for j in xrange(NVD)]}
41+
42+
f = open("data_1000.json", "w")
43+
json.dump(data, f)
44+
f.close()
45+
46+

fig/test.png

17.9 KB
Loading

fig/training_speed_2_layers.png

42.3 KB
Loading

fig/training_speed_3_layers.png

43.3 KB
Loading

fig/training_speed_4_layers.png

49.7 KB
Loading

0 commit comments

Comments
 (0)