Format

cxyzs7 · cxyzs7 · commit 91e1c4c74542 · 2014-07-08T10:31:42.000-07:00
diff --git a/self_taught_learning/feed_forward_autoencoder.py b/self_taught_learning/feed_forward_autoencoder.py
@@ -12,12 +12,12 @@ def feed_forward_autoencoder(theta, hidden_size, visible_size, data):
     # We first convert theta to the (W1, W2, b1, b2) matrix/vector format, so that this
     # follows the notation convention of the lecture notes.
 
-    num_combinations = visible_size*hidden_size
+    num_combinations = visible_size * hidden_size
     w1 = theta[0:num_combinations].reshape((visible_size, hidden_size))
-    b1 = theta[2*num_combinations:2*num_combinations+hidden_size]
+    b1 = theta[2 * num_combinations:2 * num_combinations + hidden_size]
 
-    #  Instructions: Compute the activation of the hidden layer for the Sparse Autoencoder.
-    activation = sigmoid(np.dot(data, w1)+b1)
+    # Instructions: Compute the activation of the hidden layer for the Sparse Autoencoder.
+    activation = sigmoid(np.dot(data, w1) + b1)
 
     #-------------------------------------------------------------------
     return activation
diff --git a/self_taught_learning/stl_exercise.py b/self_taught_learning/stl_exercise.py
@@ -1,16 +1,17 @@
+from random import shuffle
+
 from sklearn.datasets import fetch_mldata
 import numpy as np
 from scipy.optimize import minimize
-from random import shuffle
 
 from sparse_autoencoder.display_network import display_network
 from sparse_autoencoder.sparse_autoencoder_cost import initialize_parameters, sparse_autoencoder_cost_and_grad
 from softmax_regression.softmax_train import softmax_train
 from softmax_regression.softmax_predict import softmax_predict
-
 from feed_forward_autoencoder import feed_forward_autoencoder
 
-## CS294A/CS294W Self-taught Learning Exercise
+
+# # CS294A/CS294W Self-taught Learning Exercise
 
 #  Instructions
 #  ------------
@@ -28,11 +29,11 @@
 input_size = 28 * 28
 num_labels = 5
 hidden_size = 200
-sparsity_param = 0.1    # desired average activation of the hidden units.
-                        # (This was denoted by the Greek alphabet rho, which looks like a lower-case "p",
-                        #  in the lecture notes).
-decay_lambda = 3e-3     # weight decay parameter
-beta = 3                # weight of sparsity penalty term
+sparsity_param = 0.1  # desired average activation of the hidden units.
+# (This was denoted by the Greek alphabet rho, which looks like a lower-case "p",
+#  in the lecture notes).
+decay_lambda = 3e-3  # weight decay parameter
+beta = 3  # weight of sparsity penalty term
 max_iter = 400
 
 ## ======================================================================
@@ -44,7 +45,7 @@
 
 # Load MNIST database files
 mnist = fetch_mldata('MNIST original', data_home='../data/')
-images = np.float32(mnist.data)/255.0
+images = np.float32(mnist.data) / 255.0
 labels = mnist.target
 
 # Set Unlabeled Set (All Images)
@@ -73,7 +74,7 @@
     #  unlabeledTrainingImages
     func_args = (input_size, hidden_size, decay_lambda, sparsity_param, beta, unlabeled_data)
     res = minimize(sparse_autoencoder_cost_and_grad, x0=theta, args=func_args, method='L-BFGS-B',
-                    jac=True, options={'maxiter': max_iter, 'disp': True})
+                   jac=True, options={'maxiter': max_iter, 'disp': True})
     opttheta = res.x
     np.save(trained_theta_file, opttheta)
 else:
@@ -82,7 +83,7 @@
 ## -----------------------------------------------------
 
 # Visualize weights
-w1 = opttheta[0: hidden_size*input_size].reshape((input_size, hidden_size))
+w1 = opttheta[0: hidden_size * input_size].reshape((input_size, hidden_size))
 display_network(w1.T, save_figure_path='../data/stl.png')
 
 ##======================================================================
@@ -91,17 +92,17 @@
 #  You need to complete the code in feedForwardAutoencoder.m so that the 
 #  following command will extract features from the data.
 
-num_train = np.round(labeled_set.shape[0]/2)
+num_train = np.round(labeled_set.shape[0] / 2)
 indices = [i for i in xrange(labeled_set.shape[0])]
 shuffle(indices)
 train_set = labeled_set[indices[0:num_train]]
 test_set = labeled_set[indices[num_train:]]
 
 train_data = images[train_set]
-train_labels = labels[train_set]      # Shift Labels to the Range 1-5
+train_labels = labels[train_set]  # Shift Labels to the Range 1-5
 
 test_data = images[test_set]
-test_labels = labels[test_set]        # Shift Labels to the Range 1-5
+test_labels = labels[test_set]  # Shift Labels to the Range 1-5
 
 print '# examples in supervised training set: {0}'.format(train_data.shape[0])
 print '# examples in supervised testing set: {0}'.format(test_data.shape[0])
@@ -135,7 +136,7 @@
 ## -----------------------------------------------------
 # Classification Score
 acc = np.mean(test_labels == pred)
-print 'Test Accuracy: {0:.3f}\n'.format(100*acc)
+print 'Test Accuracy: {0:.3f}\n'.format(100 * acc)
 
 # (note that we shift the labels by 1, so that digit 0 now corresponds to
 #  label 1)
diff --git a/softmax_regression/softmax_cost.py b/softmax_regression/softmax_cost.py
@@ -6,18 +6,18 @@ def softmax_cost_and_grad(theta, num_classes, input_size, decay_lambda, data, la
     # input_size - the size N of the input vector
     # decay_lambda - weight decay parameter
     # data - the N x M input matrix, where each row data(i, :) corresponds to
-    #        a single test set
+    # a single test set
     # labels - an 1 x N matrix containing the labels corresponding for the input data
     #
-    
+
     # Unroll the parameters from theta
     theta = theta.reshape((input_size, num_classes))
-    
+
     num_cases = data.shape[0]
 
     ground_truth = np.zeros((num_cases, num_classes))
     ground_truth[xrange(num_cases), labels.tolist()] = 1
-    
+
     thetagrad = np.empty_like(theta)
 
     # Instructions: Compute the cost and gradient for softmax regression.
@@ -31,14 +31,14 @@ def softmax_cost_and_grad(theta, num_classes, input_size, decay_lambda, data, la
     exp_theta_x = np.exp(theta_x)
 
     # compute h(x)
-    hypothesis = exp_theta_x/np.sum(exp_theta_x, axis=1, keepdims=True)
+    hypothesis = exp_theta_x / np.sum(exp_theta_x, axis=1, keepdims=True)
 
     # compute cost
-    cost = -np.sum(ground_truth*np.log(hypothesis))/num_cases + \
-        decay_lambda/2.0*np.dot(theta.flatten(), theta.flatten())
+    cost = -np.sum(ground_truth * np.log(hypothesis)) / num_cases + \
+           decay_lambda / 2.0 * np.dot(theta.flatten(), theta.flatten())
 
     # compute gradient
-    thetagrad = -np.dot(data.T, ground_truth-hypothesis)/num_cases + decay_lambda*theta
+    thetagrad = -np.dot(data.T, ground_truth - hypothesis) / num_cases + decay_lambda * theta
 
     # ------------------------------------------------------------------
     # Unroll the gradient matrices into a vector for minFunc
diff --git a/softmax_regression/softmax_exercise.py b/softmax_regression/softmax_exercise.py
@@ -6,10 +6,10 @@
 from softmax_cost import softmax_cost, softmax_cost_and_grad
 from softmax_train import softmax_train
 from softmax_predict import softmax_predict
-
 from sparse_autoencoder.compute_numerical_gradient import compute_numerical_gradient
 
-#  Instructions
+
+# Instructions
 #  ------------
 # 
 #  This file contains code that helps you get started on the
@@ -26,10 +26,10 @@
 # to be used more generally on any arbitrary input. 
 # We also initialise some parameters used for tuning the model.
 
-input_size = 28 * 28     # Size of input vector (MNIST images are 28x28)
-num_classes = 10         # Number of classes (MNIST images fall into 10 classes)
+input_size = 28 * 28  # Size of input vector (MNIST images are 28x28)
+num_classes = 10  # Number of classes (MNIST images fall into 10 classes)
 
-decay_lambda = 1e-4      # Weight decay parameter
+decay_lambda = 1e-4  # Weight decay parameter
 
 #======================================================================
 # STEP 1: Load data
@@ -47,7 +47,7 @@
 mnist = fetch_mldata('MNIST original', data_home='../data/')
 
 # scale the pixel values to the range [0,1]
-images = np.float32(mnist.data)/255.0
+images = np.float32(mnist.data) / 255.0
 labels = mnist.target
 
 input_data = images
@@ -56,14 +56,14 @@
 # in order to speed up gradient checking. 
 # Here, we create synthetic dataset using random data for testing
 
-DEBUG = False   # Set DEBUG to true when debugging.
+DEBUG = False  # Set DEBUG to true when debugging.
 if DEBUG:
     input_size = 8
     input_data = np.random.randn(100, 8)
     labels = np.random.randint(10, size=(100, ))
 
 # Randomly initialise theta
-theta = 0.005 * np.random.randn(input_size*num_classes, )
+theta = 0.005 * np.random.randn(input_size * num_classes, )
 
 #======================================================================
 # STEP 2: Implement softmaxCost
@@ -87,7 +87,7 @@
     print numgrad, grad
 
     # Compare numerically computed gradients with those computed analytically
-    diff = np.linalg.norm(numgrad-grad)/np.linalg.norm(numgrad+grad)
+    diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)
     print diff
     # The difference should be small. 
     # In our implementation, these values are usually less than 1e-7.
@@ -103,7 +103,7 @@
 
 options = {'maxiter': 100}
 softmax_model = softmax_train(input_size, num_classes, decay_lambda, input_data, labels, options)
-                          
+
 # Although we only use 100 iterations here to train a classifier for the 
 # MNIST data set, in practice, training for more iterations is usually
 # beneficial.
diff --git a/softmax_regression/softmax_predict.py b/softmax_regression/softmax_predict.py
@@ -4,7 +4,7 @@
 def softmax_predict(softmax_model, data):
     # softmaxModel - model trained using softmaxTrain
     # data - the N x M input matrix, where each column data(:, i) corresponds to
-    #        a single test set
+    # a single test set
     #
     # Your code should produce the prediction matrix
     # pred, where pred(i) is argmax_c P(y(c) | x(i)).
diff --git a/softmax_regression/softmax_train.py b/softmax_regression/softmax_train.py
@@ -13,7 +13,7 @@ def softmax_train(input_size, num_classes, decay_lambda, input_data, labels, opt
     # numClasses: the number of classes
     # lambda: weight decay parameter
     # inputData: an N by M matrix containing the input data, such that
-    #            inputData(:, c) is the cth input
+    # inputData(:, c) is the cth input
     # labels: M by 1 matrix containing the class labels for the
     #            corresponding inputs. labels(c) is the class label for
     #            the cth input
@@ -24,7 +24,7 @@ def softmax_train(input_size, num_classes, decay_lambda, input_data, labels, opt
         options['maxiter'] = 400
 
     # initialize parameters
-    theta = 0.005 * np.random.randn(input_size*num_classes, )
+    theta = 0.005 * np.random.randn(input_size * num_classes, )
 
     # Use minFunc to minimize the function
     options['disp'] = True
diff --git a/sparse_autoencoder/check_numerical_gradient.py b/sparse_autoencoder/check_numerical_gradient.py
@@ -6,18 +6,18 @@
 def simple_quadratic_function(x):
     # this function accepts a 2D vector as input.
     # Its outputs are:
-    #   value: h(x1, x2) = x1^2 + 3*x1*x2
+    # value: h(x1, x2) = x1^2 + 3*x1*x2
     #   grad: A 2x1 vector that gives the partial derivatives of h with respect to x1 and x2
     # Note that when we pass simple_quadratic_function(x) to check_numerical_gradient, we're assuming
     # that compute_numerical_gradient will use only the first returned value of this function.
-    value = x[0]**2 + 3*x[0]*x[1]
+    value = x[0] ** 2 + 3 * x[0] * x[1]
     return value
 
 
 def simple_quadratic_function_grad(x):
     grad = np.empty_like(x)
-    grad[0] = 2*x[0] + 3*x[1]
-    grad[1] = 3*x[0]
+    grad[0] = 2 * x[0] + 3 * x[1]
+    grad[1] = 3 * x[0]
     return grad
 
 
@@ -46,6 +46,6 @@ def check_numerical_gradient():
     # Evaluate the norm of the difference between two solutions.
     # If you have a correct implementation, and assuming you used EPSILON = 0.0001
     # in computeNumericalGradient.m, then diff below should be 2.1452e-12
-    diff = np.linalg.norm(numgrad-grad)/np.linalg.norm(numgrad+grad)
+    diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)
     print diff
     print 'Norm of the difference between numerical and analytical gradient (should be < 1e-9)\n\n'
diff --git a/sparse_autoencoder/compute_numerical_gradient.py b/sparse_autoencoder/compute_numerical_gradient.py
@@ -16,20 +16,20 @@ def compute_numerical_gradient(func, theta):
     # partial derivative of func with respect to the i-th input argument, evaluated at theta.
     # I.e., numgrad(i) should be the (approximately) the partial derivative of func with
     # respect to theta(i).
-    #                
+    #
     # Hint: You will probably want to compute the elements of numgrad one at a time.
     epsilon = 1e-4
     for i in xrange(theta.size):
         # temporarily save the value
         theta_i = theta[i]
         # temporarily increase the value
-        theta[i] = theta_i+epsilon
+        theta[i] = theta_i + epsilon
         val_plus = func(theta)
         # temporarily decrease the value
-        theta[i] = theta_i-epsilon
+        theta[i] = theta_i - epsilon
         val_minus = func(theta)
         # compute numerical gradient
-        numgrad[i] = (val_plus-val_minus)/(epsilon*2)
+        numgrad[i] = (val_plus - val_minus) / (epsilon * 2)
         # restore theta
         theta[i] = theta_i
 
diff --git a/sparse_autoencoder/display_network.py b/sparse_autoencoder/display_network.py
@@ -1,9 +1,9 @@
-from math import sqrt, ceil, floor
-
 import numpy as np
 import matplotlib.pyplot as plt
 import matplotlib.cm as cm
 
+from math import sqrt, ceil, floor
+
 
 def display_network(data, cols=-1, opt_normalize=True, opt_graycolor=True, save_figure_path=None):
     # This function visualizes filters in matrix A. Each row of A is a
@@ -25,21 +25,21 @@ def display_network(data, cols=-1, opt_normalize=True, opt_graycolor=True, save_
     sz = int(sqrt(area))
     buf = 1
     if cols < 0:
-        if floor(sqrt(num))**2 != num:
+        if floor(sqrt(num)) ** 2 != num:
             n = ceil(sqrt(num))
-            while num % n != 0 and n < 1.2*sqrt(num):
+            while num % n != 0 and n < 1.2 * sqrt(num):
                 n += 1
-                m = ceil(num/n)
+                m = ceil(num / n)
         else:
             n = sqrt(num)
             m = n
     else:
         n = cols
-        m = ceil(num/n)
+        m = ceil(num / n)
     n = int(n)
     m = int(m)
 
-    array = -np.ones((buf+m*(sz+buf), buf+n*(sz+buf)))
+    array = -np.ones((buf + m * (sz + buf), buf + n * (sz + buf)))
 
     if not opt_graycolor:
         array *= 0.1
@@ -53,14 +53,15 @@ def display_network(data, cols=-1, opt_normalize=True, opt_graycolor=True, save_
                 clim = np.amax(np.absolute(data[k, :]))
             else:
                 clim = np.amax(np.absolute(data))
-            array[buf+i*(sz+buf):buf+i*(sz+buf)+sz, buf+j*(sz+buf):buf+j*(sz+buf)+sz] = data[k, :].reshape([sz, sz])/clim
+            array[buf + i * (sz + buf):buf + i * (sz + buf) + sz,
+            buf + j * (sz + buf):buf + j * (sz + buf) + sz] = data[k, :].reshape([sz, sz]) / clim
             k += 1
 
     # simulate imagesc
     ax = plt.figure().gca()
     pix_width = 5
     h, w = array.shape
-    exts = (0, pix_width*w, 0, pix_width*h)
+    exts = (0, pix_width * w, 0, pix_width * h)
     if opt_graycolor:
         ax.imshow(array, interpolation='nearest', extent=exts, cmap=cm.gray)
     else:
diff --git a/sparse_autoencoder/sample_images.py b/sparse_autoencoder/sample_images.py
diff --git a/sparse_autoencoder/sparse_autoencoder_cost.py b/sparse_autoencoder/sparse_autoencoder_cost.py
diff --git a/sparse_autoencoder/train.py b/sparse_autoencoder/train.py

Original file line number	Diff line number	Diff line change
`@@ -4,7 +4,7 @@`
`4`	`4`	`def softmax_predict(softmax_model, data):`
`5`	`5`	`# softmaxModel - model trained using softmaxTrain`
`6`	`6`	`# data - the N x M input matrix, where each column data(:, i) corresponds to`
`7`		`- # a single test set`
	`7`	`+ # a single test set`
`8`	`8`	`#`
`9`	`9`	`# Your code should produce the prediction matrix`
`10`	`10`	`# pred, where pred(i) is argmax_c P(y(c) \| x(i)).`