Skip to content

Commit 91e1c4c

Browse files
committed
Format
1 parent ac28ed7 commit 91e1c4c

12 files changed

+112
-113
lines changed

self_taught_learning/feed_forward_autoencoder.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,12 @@ def feed_forward_autoencoder(theta, hidden_size, visible_size, data):
1212
# We first convert theta to the (W1, W2, b1, b2) matrix/vector format, so that this
1313
# follows the notation convention of the lecture notes.
1414

15-
num_combinations = visible_size*hidden_size
15+
num_combinations = visible_size * hidden_size
1616
w1 = theta[0:num_combinations].reshape((visible_size, hidden_size))
17-
b1 = theta[2*num_combinations:2*num_combinations+hidden_size]
17+
b1 = theta[2 * num_combinations:2 * num_combinations + hidden_size]
1818

19-
# Instructions: Compute the activation of the hidden layer for the Sparse Autoencoder.
20-
activation = sigmoid(np.dot(data, w1)+b1)
19+
# Instructions: Compute the activation of the hidden layer for the Sparse Autoencoder.
20+
activation = sigmoid(np.dot(data, w1) + b1)
2121

2222
#-------------------------------------------------------------------
2323
return activation

self_taught_learning/stl_exercise.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
1+
from random import shuffle
2+
13
from sklearn.datasets import fetch_mldata
24
import numpy as np
35
from scipy.optimize import minimize
4-
from random import shuffle
56

67
from sparse_autoencoder.display_network import display_network
78
from sparse_autoencoder.sparse_autoencoder_cost import initialize_parameters, sparse_autoencoder_cost_and_grad
89
from softmax_regression.softmax_train import softmax_train
910
from softmax_regression.softmax_predict import softmax_predict
10-
1111
from feed_forward_autoencoder import feed_forward_autoencoder
1212

13-
## CS294A/CS294W Self-taught Learning Exercise
13+
14+
# # CS294A/CS294W Self-taught Learning Exercise
1415

1516
# Instructions
1617
# ------------
@@ -28,11 +29,11 @@
2829
input_size = 28 * 28
2930
num_labels = 5
3031
hidden_size = 200
31-
sparsity_param = 0.1 # desired average activation of the hidden units.
32-
# (This was denoted by the Greek alphabet rho, which looks like a lower-case "p",
33-
# in the lecture notes).
34-
decay_lambda = 3e-3 # weight decay parameter
35-
beta = 3 # weight of sparsity penalty term
32+
sparsity_param = 0.1 # desired average activation of the hidden units.
33+
# (This was denoted by the Greek alphabet rho, which looks like a lower-case "p",
34+
# in the lecture notes).
35+
decay_lambda = 3e-3 # weight decay parameter
36+
beta = 3 # weight of sparsity penalty term
3637
max_iter = 400
3738

3839
## ======================================================================
@@ -44,7 +45,7 @@
4445

4546
# Load MNIST database files
4647
mnist = fetch_mldata('MNIST original', data_home='../data/')
47-
images = np.float32(mnist.data)/255.0
48+
images = np.float32(mnist.data) / 255.0
4849
labels = mnist.target
4950

5051
# Set Unlabeled Set (All Images)
@@ -73,7 +74,7 @@
7374
# unlabeledTrainingImages
7475
func_args = (input_size, hidden_size, decay_lambda, sparsity_param, beta, unlabeled_data)
7576
res = minimize(sparse_autoencoder_cost_and_grad, x0=theta, args=func_args, method='L-BFGS-B',
76-
jac=True, options={'maxiter': max_iter, 'disp': True})
77+
jac=True, options={'maxiter': max_iter, 'disp': True})
7778
opttheta = res.x
7879
np.save(trained_theta_file, opttheta)
7980
else:
@@ -82,7 +83,7 @@
8283
## -----------------------------------------------------
8384

8485
# Visualize weights
85-
w1 = opttheta[0: hidden_size*input_size].reshape((input_size, hidden_size))
86+
w1 = opttheta[0: hidden_size * input_size].reshape((input_size, hidden_size))
8687
display_network(w1.T, save_figure_path='../data/stl.png')
8788

8889
##======================================================================
@@ -91,17 +92,17 @@
9192
# You need to complete the code in feedForwardAutoencoder.m so that the
9293
# following command will extract features from the data.
9394

94-
num_train = np.round(labeled_set.shape[0]/2)
95+
num_train = np.round(labeled_set.shape[0] / 2)
9596
indices = [i for i in xrange(labeled_set.shape[0])]
9697
shuffle(indices)
9798
train_set = labeled_set[indices[0:num_train]]
9899
test_set = labeled_set[indices[num_train:]]
99100

100101
train_data = images[train_set]
101-
train_labels = labels[train_set] # Shift Labels to the Range 1-5
102+
train_labels = labels[train_set] # Shift Labels to the Range 1-5
102103

103104
test_data = images[test_set]
104-
test_labels = labels[test_set] # Shift Labels to the Range 1-5
105+
test_labels = labels[test_set] # Shift Labels to the Range 1-5
105106

106107
print '# examples in supervised training set: {0}'.format(train_data.shape[0])
107108
print '# examples in supervised testing set: {0}'.format(test_data.shape[0])
@@ -135,7 +136,7 @@
135136
## -----------------------------------------------------
136137
# Classification Score
137138
acc = np.mean(test_labels == pred)
138-
print 'Test Accuracy: {0:.3f}\n'.format(100*acc)
139+
print 'Test Accuracy: {0:.3f}\n'.format(100 * acc)
139140

140141
# (note that we shift the labels by 1, so that digit 0 now corresponds to
141142
# label 1)

softmax_regression/softmax_cost.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,18 @@ def softmax_cost_and_grad(theta, num_classes, input_size, decay_lambda, data, la
66
# input_size - the size N of the input vector
77
# decay_lambda - weight decay parameter
88
# data - the N x M input matrix, where each row data(i, :) corresponds to
9-
# a single test set
9+
# a single test set
1010
# labels - an 1 x N matrix containing the labels corresponding for the input data
1111
#
12-
12+
1313
# Unroll the parameters from theta
1414
theta = theta.reshape((input_size, num_classes))
15-
15+
1616
num_cases = data.shape[0]
1717

1818
ground_truth = np.zeros((num_cases, num_classes))
1919
ground_truth[xrange(num_cases), labels.tolist()] = 1
20-
20+
2121
thetagrad = np.empty_like(theta)
2222

2323
# Instructions: Compute the cost and gradient for softmax regression.
@@ -31,14 +31,14 @@ def softmax_cost_and_grad(theta, num_classes, input_size, decay_lambda, data, la
3131
exp_theta_x = np.exp(theta_x)
3232

3333
# compute h(x)
34-
hypothesis = exp_theta_x/np.sum(exp_theta_x, axis=1, keepdims=True)
34+
hypothesis = exp_theta_x / np.sum(exp_theta_x, axis=1, keepdims=True)
3535

3636
# compute cost
37-
cost = -np.sum(ground_truth*np.log(hypothesis))/num_cases + \
38-
decay_lambda/2.0*np.dot(theta.flatten(), theta.flatten())
37+
cost = -np.sum(ground_truth * np.log(hypothesis)) / num_cases + \
38+
decay_lambda / 2.0 * np.dot(theta.flatten(), theta.flatten())
3939

4040
# compute gradient
41-
thetagrad = -np.dot(data.T, ground_truth-hypothesis)/num_cases + decay_lambda*theta
41+
thetagrad = -np.dot(data.T, ground_truth - hypothesis) / num_cases + decay_lambda * theta
4242

4343
# ------------------------------------------------------------------
4444
# Unroll the gradient matrices into a vector for minFunc

softmax_regression/softmax_exercise.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
from softmax_cost import softmax_cost, softmax_cost_and_grad
77
from softmax_train import softmax_train
88
from softmax_predict import softmax_predict
9-
109
from sparse_autoencoder.compute_numerical_gradient import compute_numerical_gradient
1110

12-
# Instructions
11+
12+
# Instructions
1313
# ------------
1414
#
1515
# This file contains code that helps you get started on the
@@ -26,10 +26,10 @@
2626
# to be used more generally on any arbitrary input.
2727
# We also initialise some parameters used for tuning the model.
2828

29-
input_size = 28 * 28 # Size of input vector (MNIST images are 28x28)
30-
num_classes = 10 # Number of classes (MNIST images fall into 10 classes)
29+
input_size = 28 * 28 # Size of input vector (MNIST images are 28x28)
30+
num_classes = 10 # Number of classes (MNIST images fall into 10 classes)
3131

32-
decay_lambda = 1e-4 # Weight decay parameter
32+
decay_lambda = 1e-4 # Weight decay parameter
3333

3434
#======================================================================
3535
# STEP 1: Load data
@@ -47,7 +47,7 @@
4747
mnist = fetch_mldata('MNIST original', data_home='../data/')
4848

4949
# scale the pixel values to the range [0,1]
50-
images = np.float32(mnist.data)/255.0
50+
images = np.float32(mnist.data) / 255.0
5151
labels = mnist.target
5252

5353
input_data = images
@@ -56,14 +56,14 @@
5656
# in order to speed up gradient checking.
5757
# Here, we create synthetic dataset using random data for testing
5858

59-
DEBUG = False # Set DEBUG to true when debugging.
59+
DEBUG = False # Set DEBUG to true when debugging.
6060
if DEBUG:
6161
input_size = 8
6262
input_data = np.random.randn(100, 8)
6363
labels = np.random.randint(10, size=(100, ))
6464

6565
# Randomly initialise theta
66-
theta = 0.005 * np.random.randn(input_size*num_classes, )
66+
theta = 0.005 * np.random.randn(input_size * num_classes, )
6767

6868
#======================================================================
6969
# STEP 2: Implement softmaxCost
@@ -87,7 +87,7 @@
8787
print numgrad, grad
8888

8989
# Compare numerically computed gradients with those computed analytically
90-
diff = np.linalg.norm(numgrad-grad)/np.linalg.norm(numgrad+grad)
90+
diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)
9191
print diff
9292
# The difference should be small.
9393
# In our implementation, these values are usually less than 1e-7.
@@ -103,7 +103,7 @@
103103

104104
options = {'maxiter': 100}
105105
softmax_model = softmax_train(input_size, num_classes, decay_lambda, input_data, labels, options)
106-
106+
107107
# Although we only use 100 iterations here to train a classifier for the
108108
# MNIST data set, in practice, training for more iterations is usually
109109
# beneficial.

softmax_regression/softmax_predict.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
def softmax_predict(softmax_model, data):
55
# softmaxModel - model trained using softmaxTrain
66
# data - the N x M input matrix, where each column data(:, i) corresponds to
7-
# a single test set
7+
# a single test set
88
#
99
# Your code should produce the prediction matrix
1010
# pred, where pred(i) is argmax_c P(y(c) | x(i)).

softmax_regression/softmax_train.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def softmax_train(input_size, num_classes, decay_lambda, input_data, labels, opt
1313
# numClasses: the number of classes
1414
# lambda: weight decay parameter
1515
# inputData: an N by M matrix containing the input data, such that
16-
# inputData(:, c) is the cth input
16+
# inputData(:, c) is the cth input
1717
# labels: M by 1 matrix containing the class labels for the
1818
# corresponding inputs. labels(c) is the class label for
1919
# the cth input
@@ -24,7 +24,7 @@ def softmax_train(input_size, num_classes, decay_lambda, input_data, labels, opt
2424
options['maxiter'] = 400
2525

2626
# initialize parameters
27-
theta = 0.005 * np.random.randn(input_size*num_classes, )
27+
theta = 0.005 * np.random.randn(input_size * num_classes, )
2828

2929
# Use minFunc to minimize the function
3030
options['disp'] = True

sparse_autoencoder/check_numerical_gradient.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,18 @@
66
def simple_quadratic_function(x):
77
# this function accepts a 2D vector as input.
88
# Its outputs are:
9-
# value: h(x1, x2) = x1^2 + 3*x1*x2
9+
# value: h(x1, x2) = x1^2 + 3*x1*x2
1010
# grad: A 2x1 vector that gives the partial derivatives of h with respect to x1 and x2
1111
# Note that when we pass simple_quadratic_function(x) to check_numerical_gradient, we're assuming
1212
# that compute_numerical_gradient will use only the first returned value of this function.
13-
value = x[0]**2 + 3*x[0]*x[1]
13+
value = x[0] ** 2 + 3 * x[0] * x[1]
1414
return value
1515

1616

1717
def simple_quadratic_function_grad(x):
1818
grad = np.empty_like(x)
19-
grad[0] = 2*x[0] + 3*x[1]
20-
grad[1] = 3*x[0]
19+
grad[0] = 2 * x[0] + 3 * x[1]
20+
grad[1] = 3 * x[0]
2121
return grad
2222

2323

@@ -46,6 +46,6 @@ def check_numerical_gradient():
4646
# Evaluate the norm of the difference between two solutions.
4747
# If you have a correct implementation, and assuming you used EPSILON = 0.0001
4848
# in computeNumericalGradient.m, then diff below should be 2.1452e-12
49-
diff = np.linalg.norm(numgrad-grad)/np.linalg.norm(numgrad+grad)
49+
diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)
5050
print diff
5151
print 'Norm of the difference between numerical and analytical gradient (should be < 1e-9)\n\n'

sparse_autoencoder/compute_numerical_gradient.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,20 @@ def compute_numerical_gradient(func, theta):
1616
# partial derivative of func with respect to the i-th input argument, evaluated at theta.
1717
# I.e., numgrad(i) should be the (approximately) the partial derivative of func with
1818
# respect to theta(i).
19-
#
19+
#
2020
# Hint: You will probably want to compute the elements of numgrad one at a time.
2121
epsilon = 1e-4
2222
for i in xrange(theta.size):
2323
# temporarily save the value
2424
theta_i = theta[i]
2525
# temporarily increase the value
26-
theta[i] = theta_i+epsilon
26+
theta[i] = theta_i + epsilon
2727
val_plus = func(theta)
2828
# temporarily decrease the value
29-
theta[i] = theta_i-epsilon
29+
theta[i] = theta_i - epsilon
3030
val_minus = func(theta)
3131
# compute numerical gradient
32-
numgrad[i] = (val_plus-val_minus)/(epsilon*2)
32+
numgrad[i] = (val_plus - val_minus) / (epsilon * 2)
3333
# restore theta
3434
theta[i] = theta_i
3535

sparse_autoencoder/display_network.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
from math import sqrt, ceil, floor
2-
31
import numpy as np
42
import matplotlib.pyplot as plt
53
import matplotlib.cm as cm
64

5+
from math import sqrt, ceil, floor
6+
77

88
def display_network(data, cols=-1, opt_normalize=True, opt_graycolor=True, save_figure_path=None):
99
# This function visualizes filters in matrix A. Each row of A is a
@@ -25,21 +25,21 @@ def display_network(data, cols=-1, opt_normalize=True, opt_graycolor=True, save_
2525
sz = int(sqrt(area))
2626
buf = 1
2727
if cols < 0:
28-
if floor(sqrt(num))**2 != num:
28+
if floor(sqrt(num)) ** 2 != num:
2929
n = ceil(sqrt(num))
30-
while num % n != 0 and n < 1.2*sqrt(num):
30+
while num % n != 0 and n < 1.2 * sqrt(num):
3131
n += 1
32-
m = ceil(num/n)
32+
m = ceil(num / n)
3333
else:
3434
n = sqrt(num)
3535
m = n
3636
else:
3737
n = cols
38-
m = ceil(num/n)
38+
m = ceil(num / n)
3939
n = int(n)
4040
m = int(m)
4141

42-
array = -np.ones((buf+m*(sz+buf), buf+n*(sz+buf)))
42+
array = -np.ones((buf + m * (sz + buf), buf + n * (sz + buf)))
4343

4444
if not opt_graycolor:
4545
array *= 0.1
@@ -53,14 +53,15 @@ def display_network(data, cols=-1, opt_normalize=True, opt_graycolor=True, save_
5353
clim = np.amax(np.absolute(data[k, :]))
5454
else:
5555
clim = np.amax(np.absolute(data))
56-
array[buf+i*(sz+buf):buf+i*(sz+buf)+sz, buf+j*(sz+buf):buf+j*(sz+buf)+sz] = data[k, :].reshape([sz, sz])/clim
56+
array[buf + i * (sz + buf):buf + i * (sz + buf) + sz,
57+
buf + j * (sz + buf):buf + j * (sz + buf) + sz] = data[k, :].reshape([sz, sz]) / clim
5758
k += 1
5859

5960
# simulate imagesc
6061
ax = plt.figure().gca()
6162
pix_width = 5
6263
h, w = array.shape
63-
exts = (0, pix_width*w, 0, pix_width*h)
64+
exts = (0, pix_width * w, 0, pix_width * h)
6465
if opt_graycolor:
6566
ax.imshow(array, interpolation='nearest', extent=exts, cmap=cm.gray)
6667
else:

0 commit comments

Comments
 (0)