|
7 | 7 |
|
8 | 8 | """
|
9 | 9 |
|
10 |
| -import numpy as np |
11 |
| - |
| 10 | +#### Libraries |
| 11 | +# Standard library |
| 12 | +import json |
| 13 | +import math |
| 14 | +import random |
| 15 | +import shutil |
12 | 16 | import sys
|
13 | 17 | sys.path.append("../code/")
|
14 |
| -import mnist_loader |
15 |
| -training_data, validation_data, test_data = mnist_loader.load_data_wrapper() |
16 | 18 |
|
| 19 | +# My library |
| 20 | +import mnist_loader |
17 | 21 | import network2
|
18 |
| -net = network2.Network([784, 30, 30, 10]) |
19 |
| -nabla_b_results = [net.backprop(x, y)[0] for x, y in training_data[:1000]] |
20 |
| -def sum(a, b): return [x+y for (x, y) in zip(a, b)] |
21 |
| -gradient = reduce(sum, nabla_b_results) |
22 |
| -average_gradient = [(np.reshape(g, len(g))/1000).tolist() for g in gradient] |
23 |
| -# Discard all but the first 6 terms in each layer, discard the output layer |
24 |
| -abbreviated_gradient = [ag[:6] for ag in average_gradient[:-1]] |
25 | 22 |
|
26 |
| -import json |
27 |
| -f = open("initial_gradient.json", "w") |
28 |
| -json.dump(abbreviated_gradient, f) |
29 |
| -f.close() |
| 23 | +# Third-party libraries |
| 24 | +import matplotlib.pyplot as plt |
| 25 | +import numpy as np |
| 26 | + |
| 27 | +def main(): |
| 28 | + # Load the data |
| 29 | + full_td, _, _ = mnist_loader.load_data_wrapper() |
| 30 | + td = full_td[:1000] # Just use the first 1000 items of training data |
| 31 | + epochs = 500 # Number of epochs to train for |
| 32 | + |
| 33 | + print "\nTwo hidden layers:" |
| 34 | + net = network2.Network([784, 30, 30, 10]) |
| 35 | + initial_norms(td, net) |
| 36 | + abbreviated_gradient = [ |
| 37 | + ag[:6] for ag in get_average_gradient(net, td)[:-1]] |
| 38 | + print "Saving the averaged gradient for the top six neurons in each "+\ |
| 39 | + "layer.\nWARNING: This will affect the look of the book, so be "+\ |
| 40 | + "sure to check the\nrelevant material (early chapter 5)." |
| 41 | + f = open("initial_gradient.json", "w") |
| 42 | + json.dump(abbreviated_gradient, f) |
| 43 | + f.close() |
| 44 | + shutil.copy("initial_gradient.json", "../../js/initial_gradient.json") |
| 45 | + training(td, net, epochs, "norms_during_training_2_layers.json") |
| 46 | + plot_training( |
| 47 | + epochs, "norms_during_training_2_layers.json", 2) |
| 48 | + |
| 49 | + print "\nThree hidden layers:" |
| 50 | + net = network2.Network([784, 30, 30, 30, 10]) |
| 51 | + initial_norms(td, net) |
| 52 | + training(td, net, epochs, "norms_during_training_3_layers.json") |
| 53 | + plot_training( |
| 54 | + epochs, "norms_during_training_3_layers.json", 3) |
| 55 | + |
| 56 | + print "\nFour hidden layers:" |
| 57 | + net = network2.Network([784, 30, 30, 30, 30, 10]) |
| 58 | + initial_norms(td, net) |
| 59 | + training(td, net, epochs, |
| 60 | + "norms_during_training_4_layers.json") |
| 61 | + plot_training( |
| 62 | + epochs, "norms_during_training_4_layers.json", 4) |
| 63 | + |
| 64 | +def initial_norms(training_data, net): |
| 65 | + average_gradient = get_average_gradient(net, training_data) |
| 66 | + norms = [list_norm(avg) for avg in average_gradient[:-1]] |
| 67 | + print "Average gradient for the hidden layers: "+str(norms) |
| 68 | + |
| 69 | +def training(training_data, net, epochs, filename): |
| 70 | + norms = [] |
| 71 | + for j in range(epochs): |
| 72 | + average_gradient = get_average_gradient(net, training_data) |
| 73 | + norms.append([list_norm(avg) for avg in average_gradient[:-1]]) |
| 74 | + print "Epoch: %s" % j |
| 75 | + net.SGD(training_data, 1, 1000, 0.1, lmbda=5.0) |
| 76 | + f = open(filename, "w") |
| 77 | + json.dump(norms, f) |
| 78 | + f.close() |
| 79 | + |
| 80 | +def plot_training(epochs, filename, num_layers): |
| 81 | + f = open(filename, "r") |
| 82 | + norms = json.load(f) |
| 83 | + f.close() |
| 84 | + fig = plt.figure() |
| 85 | + ax = fig.add_subplot(111) |
| 86 | + colors = ["#2A6EA6", "#FFA933", "#FF5555", "#55FF55", "#5555FF"] |
| 87 | + for j in range(num_layers): |
| 88 | + ax.plot(np.arange(epochs), |
| 89 | + [n[j] for n in norms], |
| 90 | + color=colors[j], |
| 91 | + label="Hidden layer %s" % (j+1,)) |
| 92 | + ax.set_xlim([0, epochs]) |
| 93 | + ax.grid(True) |
| 94 | + ax.set_xlabel('Number of epochs of training') |
| 95 | + ax.set_title('Speed of learning: %s hidden layers' % num_layers) |
| 96 | + ax.set_yscale('log') |
| 97 | + plt.legend(loc="upper right") |
| 98 | + fig_filename = "training_speed_%s_layers.png" % num_layers |
| 99 | + plt.savefig(fig_filename) |
| 100 | + shutil.copy(fig_filename, "../../images/"+fig_filename) |
| 101 | + plt.show() |
| 102 | + |
| 103 | +def get_average_gradient(net, training_data): |
| 104 | + nabla_b_results = [net.backprop(x, y)[0] for x, y in training_data] |
| 105 | + gradient = list_sum(nabla_b_results) |
| 106 | + return [(np.reshape(g, len(g))/len(training_data)).tolist() |
| 107 | + for g in gradient] |
| 108 | + |
| 109 | +def zip_sum(a, b): |
| 110 | + return [x+y for (x, y) in zip(a, b)] |
| 111 | + |
| 112 | +def list_sum(l): |
| 113 | + return reduce(zip_sum, l) |
| 114 | + |
| 115 | +def list_norm(l): |
| 116 | + return math.sqrt(sum([x*x for x in l])) |
| 117 | + |
| 118 | +if __name__ == "__main__": |
| 119 | + main() |
0 commit comments