@@ -61,10 +61,12 @@ In this section, you will download the zipped MNIST dataset files originally sto
61
61
** 1.** Define a variable to store the training/test image/label names of the MNIST dataset in a list:
62
62
63
63
``` {code-cell} ipython3
64
- filename = [["training_images", "train-images-idx3-ubyte.gz"], # 60,000 training images.
65
- ["test_images", "t10k-images-idx3-ubyte.gz"], # 10,000 test images.
66
- ["training_labels", "train-labels-idx1-ubyte.gz"], # 60,000 training labels.
67
- ["test_labels", "t10k-labels-idx1-ubyte.gz"]] # 10,000 test labels.
64
+ data_sources = {
65
+ "training_images": "train-images-idx3-ubyte.gz", # 60,000 training images.
66
+ "test_images": "t10k-images-idx3-ubyte.gz", # 10,000 test images.
67
+ "training_labels": "train-labels-idx1-ubyte.gz", # 60,000 training labels.
68
+ "test_labels": "t10k-labels-idx1-ubyte.gz" # 10,000 test labels.
69
+ }
68
70
```
69
71
70
72
** 2.** Load the data. First check if the data is stored locally; if not, then
@@ -82,11 +84,11 @@ headers = {
82
84
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0"
83
85
}
84
86
85
- for name in filename :
86
- fpath = os.path.join(data_dir, name[1] )
87
+ for fname in data_sources.values() :
88
+ fpath = os.path.join(data_dir, fname )
87
89
if not os.path.exists(fpath):
88
- print("Downloading file: " + name[1] )
89
- resp = requests.get(base_url + name[1] , headers=headers, stream=True)
90
+ print("Downloading file: " + fname )
91
+ resp = requests.get(base_url + fname , headers=headers, stream=True)
90
92
with open(fpath, "wb") as fh:
91
93
for chunk in resp.iter_content(chunk_size=128):
92
94
fh.write(chunk)
@@ -101,13 +103,13 @@ import numpy as np
101
103
mnist_dataset = {}
102
104
103
105
# Images
104
- for name in filename[:2] :
105
- with gzip.open(os.path.join(data_dir, name[1 ]), 'rb') as mnist_file:
106
- mnist_dataset[name[0] ] = np.frombuffer(mnist_file.read(), np.uint8, offset=16).reshape(-1, 28*28)
106
+ for key in ("training_images", "test_images") :
107
+ with gzip.open(os.path.join(data_dir, data_sources[key ]), 'rb') as mnist_file:
108
+ mnist_dataset[key ] = np.frombuffer(mnist_file.read(), np.uint8, offset=16).reshape(-1, 28*28)
107
109
# Labels
108
- for name in filename[-2:] :
109
- with gzip.open(os.path.join(data_dir, name[1 ]), 'rb') as mnist_file:
110
- mnist_dataset[name[0] ] = np.frombuffer(mnist_file.read(), np.uint8, offset=8)
110
+ for key in ("training_labels", "test_labels") :
111
+ with gzip.open(os.path.join(data_dir, data_sources[key ]), 'rb') as mnist_file:
112
+ mnist_dataset[key ] = np.frombuffer(mnist_file.read(), np.uint8, offset=8)
111
113
```
112
114
113
115
** 4.** Split the data into training and test sets using the standard notation of ` x ` for data and ` y ` for labels, calling the training and test set images ` x_train ` and ` x_test ` , and the labels ` y_train ` and ` y_test ` :
0 commit comments