Skip to content

Commit 4f7d724

Browse files
committed
Use dict for data src instead of list-of-list.
1 parent 1b6b189 commit 4f7d724

File tree

1 file changed

+16
-14
lines changed

1 file changed

+16
-14
lines changed

content/tutorial-deep-learning-on-mnist.md

+16-14
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,12 @@ In this section, you will download the zipped MNIST dataset files originally sto
6161
**1.** Define a variable to store the training/test image/label names of the MNIST dataset in a list:
6262

6363
```{code-cell} ipython3
64-
filename = [["training_images", "train-images-idx3-ubyte.gz"], # 60,000 training images.
65-
["test_images", "t10k-images-idx3-ubyte.gz"], # 10,000 test images.
66-
["training_labels", "train-labels-idx1-ubyte.gz"], # 60,000 training labels.
67-
["test_labels", "t10k-labels-idx1-ubyte.gz"]] # 10,000 test labels.
64+
data_sources = {
65+
"training_images": "train-images-idx3-ubyte.gz", # 60,000 training images.
66+
"test_images": "t10k-images-idx3-ubyte.gz", # 10,000 test images.
67+
"training_labels": "train-labels-idx1-ubyte.gz", # 60,000 training labels.
68+
"test_labels": "t10k-labels-idx1-ubyte.gz" # 10,000 test labels.
69+
}
6870
```
6971

7072
**2.** Load the data. First check if the data is stored locally; if not, then
@@ -82,11 +84,11 @@ headers = {
8284
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0"
8385
}
8486
85-
for name in filename:
86-
fpath = os.path.join(data_dir, name[1])
87+
for fname in data_sources.values():
88+
fpath = os.path.join(data_dir, fname)
8789
if not os.path.exists(fpath):
88-
print("Downloading file: " + name[1])
89-
resp = requests.get(base_url + name[1], headers=headers, stream=True)
90+
print("Downloading file: " + fname)
91+
resp = requests.get(base_url + fname, headers=headers, stream=True)
9092
with open(fpath, "wb") as fh:
9193
for chunk in resp.iter_content(chunk_size=128):
9294
fh.write(chunk)
@@ -101,13 +103,13 @@ import numpy as np
101103
mnist_dataset = {}
102104
103105
# Images
104-
for name in filename[:2]:
105-
with gzip.open(os.path.join(data_dir, name[1]), 'rb') as mnist_file:
106-
mnist_dataset[name[0]] = np.frombuffer(mnist_file.read(), np.uint8, offset=16).reshape(-1, 28*28)
106+
for key in ("training_images", "test_images"):
107+
with gzip.open(os.path.join(data_dir, data_sources[key]), 'rb') as mnist_file:
108+
mnist_dataset[key] = np.frombuffer(mnist_file.read(), np.uint8, offset=16).reshape(-1, 28*28)
107109
# Labels
108-
for name in filename[-2:]:
109-
with gzip.open(os.path.join(data_dir, name[1]), 'rb') as mnist_file:
110-
mnist_dataset[name[0]] = np.frombuffer(mnist_file.read(), np.uint8, offset=8)
110+
for key in ("training_labels", "test_labels"):
111+
with gzip.open(os.path.join(data_dir, data_sources[key]), 'rb') as mnist_file:
112+
mnist_dataset[key] = np.frombuffer(mnist_file.read(), np.uint8, offset=8)
111113
```
112114

113115
**4.** Split the data into training and test sets using the standard notation of `x` for data and `y` for labels, calling the training and test set images `x_train` and `x_test`, and the labels `y_train` and `y_test`:

0 commit comments

Comments
 (0)