|
| 1 | +from sklearn.datasets import fetch_mldata |
| 2 | +import numpy as np |
| 3 | +from scipy.optimize import minimize |
| 4 | +from random import shuffle |
| 5 | + |
| 6 | +from sparse_autoencoder.display_network import display_network |
| 7 | +from sparse_autoencoder.sparse_autoencoder_cost import initialize_parameters, sparse_autoencoder_cost_and_grad |
| 8 | +from softmax_regression.softmax_train import softmax_train |
| 9 | +from softmax_regression.softmax_predict import softmax_predict |
| 10 | + |
| 11 | +from feed_forward_autoencoder import feed_forward_autoencoder |
| 12 | + |
1 | 13 | ## CS294A/CS294W Self-taught Learning Exercise
|
2 | 14 |
|
3 | 15 | # Instructions
|
|
13 | 25 | # allow your sparse autoencoder to get good filters; you do not need to
|
14 | 26 | # change the parameters below.
|
15 | 27 |
|
16 |
| -inputSize = 28 * 28; |
17 |
| -numLabels = 5; |
18 |
| -hiddenSize = 200; |
19 |
| -sparsityParam = 0.1; # desired average activation of the hidden units. |
20 |
| - # (This was denoted by the Greek alphabet rho, which looks like a lower-case "p", |
21 |
| - # in the lecture notes). |
22 |
| -lambda = 3e-3; # weight decay parameter |
23 |
| -beta = 3; # weight of sparsity penalty term |
24 |
| -maxIter = 400; |
| 28 | +input_size = 28 * 28 |
| 29 | +num_labels = 5 |
| 30 | +hidden_size = 200 |
| 31 | +sparsity_param = 0.1 # desired average activation of the hidden units. |
| 32 | + # (This was denoted by the Greek alphabet rho, which looks like a lower-case "p", |
| 33 | + # in the lecture notes). |
| 34 | +decay_lambda = 3e-3 # weight decay parameter |
| 35 | +beta = 3 # weight of sparsity penalty term |
| 36 | +max_iter = 400 |
25 | 37 |
|
26 | 38 | ## ======================================================================
|
27 | 39 | # STEP 1: Load data from the MNIST database
|
|
31 | 43 | # change it.
|
32 | 44 |
|
33 | 45 | # Load MNIST database files
|
34 |
| -mnistData = loadMNISTImages('mnist/train-images-idx3-ubyte'); |
35 |
| -mnistLabels = loadMNISTLabels('mnist/train-labels-idx1-ubyte'); |
| 46 | +mnist = fetch_mldata('MNIST original', data_home='../data/') |
| 47 | +images = np.float32(mnist.data)/255.0 |
| 48 | +labels = mnist.target |
36 | 49 |
|
37 | 50 | # Set Unlabeled Set (All Images)
|
38 | 51 |
|
39 | 52 | # Simulate a Labeled and Unlabeled set
|
40 |
| -labeledSet = find(mnistLabels >= 0 & mnistLabels <= 4); |
41 |
| -unlabeledSet = find(mnistLabels >= 5); |
42 |
| - |
43 |
| -numTrain = round(numel(labeledSet)/2); |
44 |
| -trainSet = labeledSet(1:numTrain); |
45 |
| -testSet = labeledSet(numTrain+1:end); |
46 |
| - |
47 |
| -unlabeledData = mnistData(:, unlabeledSet); |
48 |
| - |
49 |
| -trainData = mnistData(:, trainSet); |
50 |
| -trainLabels = mnistLabels(trainSet)' + 1; # Shift Labels to the Range 1-5 |
| 53 | +labeled_set = np.where((labels >= 0) & (labels <= 5))[0] |
| 54 | +unlabeled_set = np.where(labels >= 6)[0] |
51 | 55 |
|
52 |
| -testData = mnistData(:, testSet); |
53 |
| -testLabels = mnistLabels(testSet)' + 1; # Shift Labels to the Range 1-5 |
| 56 | +unlabeled_data = images[unlabeled_set] |
54 | 57 |
|
55 | 58 | # Output Some Statistics
|
56 |
| -fprintf('# examples in unlabeled set: #d\n', size(unlabeledData, 2)); |
57 |
| -fprintf('# examples in supervised training set: #d\n\n', size(trainData, 2)); |
58 |
| -fprintf('# examples in supervised testing set: #d\n\n', size(testData, 2)); |
| 59 | +print '# examples in unlabeled set: {0}'.format(unlabeled_data.shape[0]) |
59 | 60 |
|
60 | 61 | ## ======================================================================
|
61 | 62 | # STEP 2: Train the sparse autoencoder
|
62 | 63 | # This trains the sparse autoencoder on the unlabeled training
|
63 | 64 | # images.
|
64 | 65 |
|
65 |
| -# Randomly initialize the parameters |
66 |
| -theta = initializeParameters(hiddenSize, inputSize); |
67 |
| - |
68 |
| -## ----------------- YOUR CODE HERE ---------------------- |
69 |
| -# Find opttheta by running the sparse autoencoder on |
70 |
| -# unlabeledTrainingImages |
71 |
| - |
72 |
| -opttheta = theta; |
73 |
| - |
74 |
| - |
75 |
| - |
76 |
| - |
77 |
| - |
78 |
| - |
79 |
| - |
80 |
| - |
| 66 | +trained_theta_file = '../data/opttheta.npy' |
| 67 | +TRAIN = False |
| 68 | +if TRAIN: |
| 69 | + # Randomly initialize the parameters |
| 70 | + theta = initialize_parameters(input_size, hidden_size) |
| 71 | + |
| 72 | + # Find opttheta by running the sparse autoencoder on |
| 73 | + # unlabeledTrainingImages |
| 74 | + func_args = (input_size, hidden_size, decay_lambda, sparsity_param, beta, unlabeled_data) |
| 75 | + res = minimize(sparse_autoencoder_cost_and_grad, x0=theta, args=func_args, method='L-BFGS-B', |
| 76 | + jac=True, options={'maxiter': max_iter, 'disp': True}) |
| 77 | + opttheta = res.x |
| 78 | + np.save(trained_theta_file, opttheta) |
| 79 | +else: |
| 80 | + opttheta = np.load(trained_theta_file) |
81 | 81 |
|
82 | 82 | ## -----------------------------------------------------
|
83 |
| - |
| 83 | + |
84 | 84 | # Visualize weights
|
85 |
| -W1 = reshape(opttheta(1:hiddenSize * inputSize), hiddenSize, inputSize); |
86 |
| -display_network(W1'); |
| 85 | +w1 = opttheta[0: hidden_size*input_size].reshape((input_size, hidden_size)) |
| 86 | +display_network(w1.T, save_figure_path='../data/stl.png') |
87 | 87 |
|
88 | 88 | ##======================================================================
|
89 | 89 | ## STEP 3: Extract Features from the Supervised Dataset
|
90 | 90 | #
|
91 | 91 | # You need to complete the code in feedForwardAutoencoder.m so that the
|
92 | 92 | # following command will extract features from the data.
|
93 | 93 |
|
94 |
| -trainFeatures = feedForwardAutoencoder(opttheta, hiddenSize, inputSize, ... |
95 |
| - trainData); |
| 94 | +num_train = np.round(labeled_set.shape[0]/2) |
| 95 | +indices = [i for i in xrange(labeled_set.shape[0])] |
| 96 | +shuffle(indices) |
| 97 | +train_set = labeled_set[indices[0:num_train]] |
| 98 | +test_set = labeled_set[indices[num_train:]] |
| 99 | + |
| 100 | +train_data = images[train_set] |
| 101 | +train_labels = labels[train_set] # Shift Labels to the Range 1-5 |
96 | 102 |
|
97 |
| -testFeatures = feedForwardAutoencoder(opttheta, hiddenSize, inputSize, ... |
98 |
| - testData); |
| 103 | +test_data = images[test_set] |
| 104 | +test_labels = labels[test_set] # Shift Labels to the Range 1-5 |
| 105 | + |
| 106 | +print '# examples in supervised training set: {0}'.format(train_data.shape[0]) |
| 107 | +print '# examples in supervised testing set: {0}'.format(test_data.shape[0]) |
| 108 | + |
| 109 | +train_features = feed_forward_autoencoder(opttheta, hidden_size, input_size, train_data) |
| 110 | + |
| 111 | +test_features = feed_forward_autoencoder(opttheta, hidden_size, input_size, test_data) |
99 | 112 |
|
100 | 113 | ##======================================================================
|
101 | 114 | ## STEP 4: Train the softmax classifier
|
102 |
| - |
103 |
| -softmaxModel = struct; |
104 |
| -## ----------------- YOUR CODE HERE ---------------------- |
105 | 115 | # Use softmaxTrain.m from the previous exercise to train a multi-class
|
106 | 116 | # classifier.
|
107 | 117 |
|
|
110 | 120 | # You need to compute softmaxModel using softmaxTrain on trainFeatures and
|
111 | 121 | # trainLabels
|
112 | 122 |
|
113 |
| - |
114 |
| - |
115 |
| - |
116 |
| - |
117 |
| - |
118 |
| - |
119 |
| - |
120 |
| - |
121 |
| - |
122 |
| -## ----------------------------------------------------- |
123 |
| - |
| 123 | +num_classes = 10 |
| 124 | +decay_lambda = 1e-4 |
| 125 | +options = {'maxiter': 100} |
| 126 | +softmax_model = softmax_train(hidden_size, num_classes, decay_lambda, train_features, train_labels, options) |
124 | 127 |
|
125 | 128 | ##======================================================================
|
126 | 129 | ## STEP 5: Testing
|
127 | 130 |
|
128 |
| -## ----------------- YOUR CODE HERE ---------------------- |
129 | 131 | # Compute Predictions on the test set (testFeatures) using softmaxPredict
|
130 | 132 | # and softmaxModel
|
131 |
| - |
132 |
| - |
133 |
| - |
134 |
| - |
135 |
| - |
136 |
| - |
137 |
| - |
138 |
| - |
139 |
| - |
140 |
| - |
141 |
| - |
142 |
| - |
143 |
| - |
144 |
| - |
| 133 | +pred = softmax_predict(softmax_model, test_features) |
145 | 134 |
|
146 | 135 | ## -----------------------------------------------------
|
147 |
| - |
148 | 136 | # Classification Score
|
149 |
| -fprintf('Test Accuracy: #f##\n', 100*mean(pred(:) == testLabels(:))); |
| 137 | +acc = np.mean(test_labels == pred) |
| 138 | +print 'Test Accuracy: {0:.3f}\n'.format(100*acc) |
150 | 139 |
|
151 | 140 | # (note that we shift the labels by 1, so that digit 0 now corresponds to
|
152 | 141 | # label 1)
|
|
0 commit comments