Skip to content

Commit ac28ed7

Browse files
committed
Self-Taught Learning
1 parent 209a200 commit ac28ed7

File tree

7 files changed

+95
-115
lines changed

7 files changed

+95
-115
lines changed

self_taught_learning/feed_forward_autoencoder.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,14 @@ def feed_forward_autoencoder(theta, hidden_size, visible_size, data):
1212
# We first convert theta to the (W1, W2, b1, b2) matrix/vector format, so that this
1313
# follows the notation convention of the lecture notes.
1414

15-
W1 = reshape(theta(1:hiddenSize*visibleSize), hiddenSize, visibleSize);
16-
b1 = theta(2*hiddenSize*visibleSize+1:2*hiddenSize*visibleSize+hiddenSize);
15+
num_combinations = visible_size*hidden_size
16+
w1 = theta[0:num_combinations].reshape((visible_size, hidden_size))
17+
b1 = theta[2*num_combinations:2*num_combinations+hidden_size]
1718

1819
# Instructions: Compute the activation of the hidden layer for the Sparse Autoencoder.
19-
20+
activation = sigmoid(np.dot(data, w1)+b1)
2021

2122
#-------------------------------------------------------------------
22-
2323
return activation
2424

2525

self_taught_learning/stl_exercise.py

Lines changed: 70 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
from sklearn.datasets import fetch_mldata
2+
import numpy as np
3+
from scipy.optimize import minimize
4+
from random import shuffle
5+
6+
from sparse_autoencoder.display_network import display_network
7+
from sparse_autoencoder.sparse_autoencoder_cost import initialize_parameters, sparse_autoencoder_cost_and_grad
8+
from softmax_regression.softmax_train import softmax_train
9+
from softmax_regression.softmax_predict import softmax_predict
10+
11+
from feed_forward_autoencoder import feed_forward_autoencoder
12+
113
## CS294A/CS294W Self-taught Learning Exercise
214

315
# Instructions
@@ -13,15 +25,15 @@
1325
# allow your sparse autoencoder to get good filters; you do not need to
1426
# change the parameters below.
1527

16-
inputSize = 28 * 28;
17-
numLabels = 5;
18-
hiddenSize = 200;
19-
sparsityParam = 0.1; # desired average activation of the hidden units.
20-
# (This was denoted by the Greek alphabet rho, which looks like a lower-case "p",
21-
# in the lecture notes).
22-
lambda = 3e-3; # weight decay parameter
23-
beta = 3; # weight of sparsity penalty term
24-
maxIter = 400;
28+
input_size = 28 * 28
29+
num_labels = 5
30+
hidden_size = 200
31+
sparsity_param = 0.1 # desired average activation of the hidden units.
32+
# (This was denoted by the Greek alphabet rho, which looks like a lower-case "p",
33+
# in the lecture notes).
34+
decay_lambda = 3e-3 # weight decay parameter
35+
beta = 3 # weight of sparsity penalty term
36+
max_iter = 400
2537

2638
## ======================================================================
2739
# STEP 1: Load data from the MNIST database
@@ -31,77 +43,75 @@
3143
# change it.
3244

3345
# Load MNIST database files
34-
mnistData = loadMNISTImages('mnist/train-images-idx3-ubyte');
35-
mnistLabels = loadMNISTLabels('mnist/train-labels-idx1-ubyte');
46+
mnist = fetch_mldata('MNIST original', data_home='../data/')
47+
images = np.float32(mnist.data)/255.0
48+
labels = mnist.target
3649

3750
# Set Unlabeled Set (All Images)
3851

3952
# Simulate a Labeled and Unlabeled set
40-
labeledSet = find(mnistLabels >= 0 & mnistLabels <= 4);
41-
unlabeledSet = find(mnistLabels >= 5);
42-
43-
numTrain = round(numel(labeledSet)/2);
44-
trainSet = labeledSet(1:numTrain);
45-
testSet = labeledSet(numTrain+1:end);
46-
47-
unlabeledData = mnistData(:, unlabeledSet);
48-
49-
trainData = mnistData(:, trainSet);
50-
trainLabels = mnistLabels(trainSet)' + 1; # Shift Labels to the Range 1-5
53+
labeled_set = np.where((labels >= 0) & (labels <= 5))[0]
54+
unlabeled_set = np.where(labels >= 6)[0]
5155

52-
testData = mnistData(:, testSet);
53-
testLabels = mnistLabels(testSet)' + 1; # Shift Labels to the Range 1-5
56+
unlabeled_data = images[unlabeled_set]
5457

5558
# Output Some Statistics
56-
fprintf('# examples in unlabeled set: #d\n', size(unlabeledData, 2));
57-
fprintf('# examples in supervised training set: #d\n\n', size(trainData, 2));
58-
fprintf('# examples in supervised testing set: #d\n\n', size(testData, 2));
59+
print '# examples in unlabeled set: {0}'.format(unlabeled_data.shape[0])
5960

6061
## ======================================================================
6162
# STEP 2: Train the sparse autoencoder
6263
# This trains the sparse autoencoder on the unlabeled training
6364
# images.
6465

65-
# Randomly initialize the parameters
66-
theta = initializeParameters(hiddenSize, inputSize);
67-
68-
## ----------------- YOUR CODE HERE ----------------------
69-
# Find opttheta by running the sparse autoencoder on
70-
# unlabeledTrainingImages
71-
72-
opttheta = theta;
73-
74-
75-
76-
77-
78-
79-
80-
66+
trained_theta_file = '../data/opttheta.npy'
67+
TRAIN = False
68+
if TRAIN:
69+
# Randomly initialize the parameters
70+
theta = initialize_parameters(input_size, hidden_size)
71+
72+
# Find opttheta by running the sparse autoencoder on
73+
# unlabeledTrainingImages
74+
func_args = (input_size, hidden_size, decay_lambda, sparsity_param, beta, unlabeled_data)
75+
res = minimize(sparse_autoencoder_cost_and_grad, x0=theta, args=func_args, method='L-BFGS-B',
76+
jac=True, options={'maxiter': max_iter, 'disp': True})
77+
opttheta = res.x
78+
np.save(trained_theta_file, opttheta)
79+
else:
80+
opttheta = np.load(trained_theta_file)
8181

8282
## -----------------------------------------------------
83-
83+
8484
# Visualize weights
85-
W1 = reshape(opttheta(1:hiddenSize * inputSize), hiddenSize, inputSize);
86-
display_network(W1');
85+
w1 = opttheta[0: hidden_size*input_size].reshape((input_size, hidden_size))
86+
display_network(w1.T, save_figure_path='../data/stl.png')
8787

8888
##======================================================================
8989
## STEP 3: Extract Features from the Supervised Dataset
9090
#
9191
# You need to complete the code in feedForwardAutoencoder.m so that the
9292
# following command will extract features from the data.
9393

94-
trainFeatures = feedForwardAutoencoder(opttheta, hiddenSize, inputSize, ...
95-
trainData);
94+
num_train = np.round(labeled_set.shape[0]/2)
95+
indices = [i for i in xrange(labeled_set.shape[0])]
96+
shuffle(indices)
97+
train_set = labeled_set[indices[0:num_train]]
98+
test_set = labeled_set[indices[num_train:]]
99+
100+
train_data = images[train_set]
101+
train_labels = labels[train_set] # Shift Labels to the Range 1-5
96102

97-
testFeatures = feedForwardAutoencoder(opttheta, hiddenSize, inputSize, ...
98-
testData);
103+
test_data = images[test_set]
104+
test_labels = labels[test_set] # Shift Labels to the Range 1-5
105+
106+
print '# examples in supervised training set: {0}'.format(train_data.shape[0])
107+
print '# examples in supervised testing set: {0}'.format(test_data.shape[0])
108+
109+
train_features = feed_forward_autoencoder(opttheta, hidden_size, input_size, train_data)
110+
111+
test_features = feed_forward_autoencoder(opttheta, hidden_size, input_size, test_data)
99112

100113
##======================================================================
101114
## STEP 4: Train the softmax classifier
102-
103-
softmaxModel = struct;
104-
## ----------------- YOUR CODE HERE ----------------------
105115
# Use softmaxTrain.m from the previous exercise to train a multi-class
106116
# classifier.
107117

@@ -110,43 +120,22 @@
110120
# You need to compute softmaxModel using softmaxTrain on trainFeatures and
111121
# trainLabels
112122

113-
114-
115-
116-
117-
118-
119-
120-
121-
122-
## -----------------------------------------------------
123-
123+
num_classes = 10
124+
decay_lambda = 1e-4
125+
options = {'maxiter': 100}
126+
softmax_model = softmax_train(hidden_size, num_classes, decay_lambda, train_features, train_labels, options)
124127

125128
##======================================================================
126129
## STEP 5: Testing
127130

128-
## ----------------- YOUR CODE HERE ----------------------
129131
# Compute Predictions on the test set (testFeatures) using softmaxPredict
130132
# and softmaxModel
131-
132-
133-
134-
135-
136-
137-
138-
139-
140-
141-
142-
143-
144-
133+
pred = softmax_predict(softmax_model, test_features)
145134

146135
## -----------------------------------------------------
147-
148136
# Classification Score
149-
fprintf('Test Accuracy: #f##\n', 100*mean(pred(:) == testLabels(:)));
137+
acc = np.mean(test_labels == pred)
138+
print 'Test Accuracy: {0:.3f}\n'.format(100*acc)
150139

151140
# (note that we shift the labels by 1, so that digit 0 now corresponds to
152141
# label 1)

softmax_regression/softmax_cost.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,11 @@ def softmax_cost_and_grad(theta, num_classes, input_size, decay_lambda, data, la
3838
decay_lambda/2.0*np.dot(theta.flatten(), theta.flatten())
3939

4040
# compute gradient
41-
thetagrad = -np.dot(data.T, ground_truth-hypothesis)/num_cases + \
42-
decay_lambda*theta
41+
thetagrad = -np.dot(data.T, ground_truth-hypothesis)/num_cases + decay_lambda*theta
4342

4443
# ------------------------------------------------------------------
4544
# Unroll the gradient matrices into a vector for minFunc
46-
grad = thetagrad.flatten()
45+
grad = thetagrad.ravel()
4746

4847
return cost, grad
4948

softmax_regression/softmax_exercise.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@
120120
pred = softmax_predict(softmax_model, input_data)
121121

122122
acc = np.mean(labels == pred)
123-
print ('Accuracy: #0.3f##\n', acc * 100)
123+
print 'Accuracy: {0:.3f}\n'.format(acc * 100)
124124

125125
# Accuracy is the proportion of correctly classified images
126126
# After 100 iterations, the results for our implementation were:

sparse_autoencoder/display_network.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def display_network(data, cols=-1, opt_normalize=True, opt_graycolor=True, save_
4747
k = 0
4848
for i in xrange(m):
4949
for j in xrange(n):
50-
if k > num:
50+
if k >= num:
5151
continue
5252
if opt_normalize:
5353
clim = np.amax(np.absolute(data[k, :]))

sparse_autoencoder/sparse_autoencoder_cost.py

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from math import sqrt
2+
import gc
23

34
import numpy as np
45

@@ -65,47 +66,38 @@ def sparse_autoencoder_cost_and_grad(theta, visible_size, hidden_size, decay_lam
6566

6667
# feedforward pass
6768
a1 = data
68-
z2 = np.dot(a1, w1)+b1
69-
a2 = sigmoid(z2)
70-
z3 = np.dot(a2, w2)+b2
71-
a3 = sigmoid(z3)
69+
a2 = sigmoid(np.dot(a1, w1)+b1)
70+
a3 = sigmoid(np.dot(a2, w2)+b2)
7271

7372
# compute all deltas
7473
# output layer
75-
prime3 = np.multiply(a3, (1.0-a3))
76-
delta3 = -np.multiply(y-a3, prime3)
74+
delta3 = (a3-y)*a3*(1.0-a3)
7775
# hidden layer
7876
one_over_m = 1.0/np.float32(data.shape[0])
7977
sparsity_avg = one_over_m*np.sum(a2, axis=0)
8078
sparsity_term = -sparsity_param/sparsity_avg+(1.0-sparsity_param)/(1.0-sparsity_avg)
81-
prime2 = np.multiply(a2, (1.0-a2))
82-
delta2 = np.multiply(np.dot(delta3, np.transpose(w2)) + beta*sparsity_term, prime2)
83-
84-
# compute partial gradient
85-
w1grad_p = np.dot(a1.T, delta2)
86-
w2grad_p = np.dot(a2.T, delta3)
87-
b1grad_p = delta2
88-
b2grad_p = delta3
79+
delta2 = (np.dot(delta3, w2.T) + beta*sparsity_term)*a2*(1.0-a2)
80+
del sparsity_term
81+
gc.collect()
8982

9083
# compute gradient
91-
w1grad = one_over_m*w1grad_p + decay_lambda*w1
92-
w2grad = one_over_m*w2grad_p + decay_lambda*w2
93-
b1grad = one_over_m*np.sum(b1grad_p, axis=0)
94-
b2grad = one_over_m*np.sum(b2grad_p, axis=0)
84+
w1grad = one_over_m*np.dot(a1.T, delta2) + decay_lambda*w1
85+
w2grad = one_over_m*np.dot(a2.T, delta3) + decay_lambda*w2
86+
b1grad = one_over_m*np.sum(delta2, axis=0)
87+
b2grad = one_over_m*np.sum(delta3, axis=0)
9588

9689
# compute cost
97-
error_flatten = (a3-y).flatten()
98-
w1_flatten = w1.flatten()
99-
w2_flatten = w2.flatten()
100-
cost = np.dot(error_flatten, error_flatten)*one_over_m/2.0 + \
101-
decay_lambda*(np.dot(w1_flatten, w1_flatten)+np.dot(w2_flatten, w2_flatten))/2.0 + \
90+
w1_ravel = w1.ravel()
91+
w2_ravel = w2.ravel()
92+
cost = np.dot((a3-y).ravel(), (a3-y).ravel())*one_over_m/2.0 + \
93+
decay_lambda*(np.dot(w1_ravel, w1_ravel)+np.dot(w2_ravel, w2_ravel))/2.0 + \
10294
beta*(np.sum(sparsity_param*np.log(sparsity_param/sparsity_avg) +
10395
(1.0-sparsity_param)*np.log((1.0-sparsity_param)/(1.0-sparsity_avg))))
10496

10597
# After computing the cost and gradient, we will convert the gradients back
10698
# to a vector format (suitable for minFunc). Specifically, we will unroll
10799
# your gradient matrices into a vector.
108-
grad = np.concatenate((w1grad.flatten(), w2grad.flatten(), b1grad.flatten(), b2grad.flatten()))
100+
grad = np.concatenate((w1grad.ravel(), w2grad.ravel(), b1grad.ravel(), b2grad.ravel()))
109101

110102
return cost, grad
111103

sparse_autoencoder/train.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def train():
100100

101101
# STEP 5: Visualization
102102
w1 = res.x[0: hidden_size*visible_size].reshape((visible_size, hidden_size))
103-
display_network(np.transpose(w1), 5, save_figure_path='../data/sparse_autoencoder.png')
103+
display_network(w1.T, 5, save_figure_path='../data/sparse_autoencoder.png')
104104

105105

106106
if __name__ == "__main__":

0 commit comments

Comments
 (0)