|
| 1 | +# CS294A/CS294W Softmax Exercise |
| 2 | + |
| 3 | +from sklearn.datasets import fetch_mldata |
| 4 | +import numpy as np |
| 5 | + |
| 6 | +# Instructions |
| 7 | +# ------------ |
| 8 | +# |
| 9 | +# This file contains code that helps you get started on the |
| 10 | +# softmax exercise. You will need to write the softmax cost function |
| 11 | +# in softmaxCost.m and the softmax prediction function in softmaxPred.m. |
| 12 | +# For this exercise, you will not need to change any code in this file, |
| 13 | +# or any other files other than those mentioned above. |
| 14 | +# (However, you may be required to do so in later exercises) |
| 15 | + |
| 16 | +#====================================================================== |
| 17 | +# STEP 0: Initialise constants and parameters |
| 18 | +# |
| 19 | +# Here we define and initialise some constants which allow your code |
| 20 | +# to be used more generally on any arbitrary input. |
| 21 | +# We also initialise some parameters used for tuning the model. |
| 22 | + |
| 23 | +inputSize = 28 * 28 # Size of input vector (MNIST images are 28x28) |
| 24 | +numClasses = 10 # Number of classes (MNIST images fall into 10 classes) |
| 25 | + |
| 26 | +decayLambda = 1e-4 # Weight decay parameter |
| 27 | + |
| 28 | +#====================================================================== |
| 29 | +# STEP 1: Load data |
| 30 | +# |
| 31 | +# In this section, we load the input and output data. |
| 32 | +# For softmax regression on MNIST pixels, |
| 33 | +# the input data is the images, and |
| 34 | +# the output data is the labels. |
| 35 | +# |
| 36 | + |
| 37 | +# Change the filenames if you've saved the files under different names |
| 38 | +# On some platforms, the files might be saved as |
| 39 | +# train-images.idx3-ubyte / train-labels.idx1-ubyte |
| 40 | + |
| 41 | +mnist = fetch_mldata('MNIST original', data_home='./data/') |
| 42 | + |
| 43 | +# scale the pixel values to the range [0,1] |
| 44 | +images = np.float32(mnist.data)/255.0 |
| 45 | +labels = mnist.target |
| 46 | +# Remap 0 to 10 |
| 47 | +labels[labels == 0] = 10 |
| 48 | + |
| 49 | +inputData = images |
| 50 | + |
| 51 | +# For debugging purposes, you may wish to reduce the size of the input data |
| 52 | +# in order to speed up gradient checking. |
| 53 | +# Here, we create synthetic dataset using random data for testing |
| 54 | + |
| 55 | +DEBUG = True # Set DEBUG to true when debugging. |
| 56 | +if DEBUG: |
| 57 | + inputSize = 8 |
| 58 | + inputData = np.random.randn(100, 8) |
| 59 | + labels = np.random.randint(10, size=(100, 1)) |
| 60 | + |
| 61 | +# Randomly initialise theta |
| 62 | +theta = 0.005 * np.random.randn(numClasses * inputSize, 1) |
| 63 | + |
| 64 | +#====================================================================== |
| 65 | +# STEP 2: Implement softmaxCost |
| 66 | +# |
| 67 | +# Implement softmaxCost in softmaxCost.m. |
| 68 | + |
| 69 | +#[cost, grad] = softmaxCost(theta, numClasses, inputSize, lambda, inputData, labels) |
| 70 | + |
| 71 | +#====================================================================== |
| 72 | +# STEP 3: Gradient checking |
| 73 | +# |
| 74 | +# As with any learning algorithm, you should always check that your |
| 75 | +# gradients are correct before learning the parameters. |
| 76 | +# |
| 77 | + |
| 78 | +#if DEBUG: |
| 79 | +# numGrad = computeNumericalGradient( @(x) softmaxCost(x, numClasses, ... |
| 80 | +# inputSize, lambda, inputData, labels), theta) |
| 81 | + |
| 82 | + # Use this to visually compare the gradients side by side |
| 83 | +# disp([numGrad grad]) |
| 84 | + |
| 85 | + # Compare numerically computed gradients with those computed analytically |
| 86 | +# diff = norm(numGrad-grad)/norm(numGrad+grad) |
| 87 | +# disp(diff) |
| 88 | + # The difference should be small. |
| 89 | + # In our implementation, these values are usually less than 1e-7. |
| 90 | + |
| 91 | + # When your gradients are correct, congratulations! |
| 92 | + |
| 93 | +#====================================================================== |
| 94 | +# STEP 4: Learning parameters |
| 95 | +# |
| 96 | +# Once you have verified that your gradients are correct, |
| 97 | +# you can start training your softmax regression code using softmaxTrain |
| 98 | +# (which uses minFunc). |
| 99 | + |
| 100 | +#options.maxIter = 100 |
| 101 | +#softmaxModel = softmaxTrain(inputSize, numClasses, lambda, ... |
| 102 | +# inputData, labels, options) |
| 103 | + |
| 104 | +# Although we only use 100 iterations here to train a classifier for the |
| 105 | +# MNIST data set, in practice, training for more iterations is usually |
| 106 | +# beneficial. |
| 107 | + |
| 108 | +#====================================================================== |
| 109 | +# STEP 5: Testing |
| 110 | +# |
| 111 | +# You should now test your model against the test images. |
| 112 | +# To do this, you will first need to write softmaxPredict |
| 113 | +# (in softmaxPredict.m), which should return predictions |
| 114 | +# given a softmax model and the input data. |
| 115 | + |
| 116 | +#images = loadMNISTImages('mnist/t10k-images-idx3-ubyte') |
| 117 | +#labels = loadMNISTLabels('mnist/t10k-labels-idx1-ubyte') |
| 118 | +#labels(labels==0) = 10; # Remap 0 to 10 |
| 119 | + |
| 120 | +#inputData = images |
| 121 | + |
| 122 | +# You will have to implement softmaxPredict in softmaxPredict.m |
| 123 | +#[pred] = softmaxPredict(softmaxModel, inputData) |
| 124 | + |
| 125 | +#acc = mean(labels(:) == pred(:)) |
| 126 | +#fprintf('Accuracy: #0.3f##\n', acc * 100) |
| 127 | + |
| 128 | +# Accuracy is the proportion of correctly classified images |
| 129 | +# After 100 iterations, the results for our implementation were: |
| 130 | +# |
| 131 | +# Accuracy: 92.200# |
| 132 | +# |
| 133 | +# If your values are too low (accuracy less than 0.91), you should check |
| 134 | +# your code for errors, and make sure you are training on the |
| 135 | +# entire data set of 60000 28x28 training images |
| 136 | +# (unless you modified the loading code, this should be the case) |
0 commit comments