From 6ac8d09cc68c229c3ae5583394e019671566c015 Mon Sep 17 00:00:00 2001 From: Page David Date: Sun, 10 Jun 2018 07:03:24 +0800 Subject: [PATCH 01/21] add batch info input in the load func --- mnist/loader.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/mnist/loader.py b/mnist/loader.py index 89d19fb..8cd2ff1 100644 --- a/mnist/loader.py +++ b/mnist/loader.py @@ -193,7 +193,11 @@ def opener(self, path_fn, *args, **kwargs): else: return open(path_fn, *args, **kwargs) - def load(self, path_img, path_lbl): + def load(self, path_img, path_lbl, batch=None): + if type(batch) is not list or len(batch) is not 2: + raise ValueError('batch should be a 1-D list' + '(start_point, batch_size)') + with self.opener(path_lbl, 'rb') as file: magic, size = struct.unpack(">II", file.read(8)) if magic != 2049: @@ -210,6 +214,10 @@ def load(self, path_img, path_lbl): image_data = array("B", file.read()) + if batch is not None: + image_data = image_data[batch[0], batch[0]+batch[1]] + size = batch[1] + images = [] for i in range(size): images.append([0] * rows * cols) From ca565d8b54ec7d58e85bc8bd0794bff6a6f186fd Mon Sep 17 00:00:00 2001 From: Page David Date: Sun, 10 Jun 2018 10:17:06 +0800 Subject: [PATCH 02/21] add batch loading and bug fix --- mnist/loader.py | 50 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/mnist/loader.py b/mnist/loader.py index 8cd2ff1..4c319a7 100644 --- a/mnist/loader.py +++ b/mnist/loader.py @@ -130,6 +130,45 @@ def load_training(self): return self.train_images, self.train_labels + def load_training_in_batchs(self, batch_size): + if type(batch_size) is not int: + raise ValueError('batch_size must be a int number') + batch_sp = 0 + self._get_dataset_size(os.path.join(self.path, self.train_img_fname), + os.path.join(self.path, self.train_lbl_fname)) + + while True: + if batch_sp + batch_size > self.dataset_size: + batch_sp = 0 + ims, labels = self.load( + os.path.join(self.path, self.train_img_fname), + os.path.join(self.path, self.train_lbl_fname), + batch=[batch_sp, batch_size]) + + self.train_images = self.process_images(ims) + self.train_labels = self.process_labels(labels) + + batch_sp += batch_size + yield self.train_images, self.train_labels + + def _get_dataset_size(self, path_img, path_lbl): + with self.opener(path_lbl, 'rb') as file: + magic, lb_size = struct.unpack(">II", file.read(8)) + if magic != 2049: + raise ValueError('Magic number mismatch, expected 2049,' + 'got {}'.format(magic)) + + with self.opener(path_img, 'rb') as file: + magic, im_size = struct.unpack(">II", file.read(8)) + if magic != 2051: + raise ValueError('Magic number mismatch, expected 2051,' + 'got {}'.format(magic)) + + if lb_size != im_size: + raise ValueError('image size is not equal to label size') + + self.dataset_size = lb_size + def process_images(self, images): if self.return_type is 'lists': return self.process_images_to_lists(images) @@ -194,9 +233,10 @@ def opener(self, path_fn, *args, **kwargs): return open(path_fn, *args, **kwargs) def load(self, path_img, path_lbl, batch=None): - if type(batch) is not list or len(batch) is not 2: - raise ValueError('batch should be a 1-D list' - '(start_point, batch_size)') + if batch is not None: + if type(batch) is not list or len(batch) is not 2: + raise ValueError('batch should be a 1-D list' + '(start_point, batch_size)') with self.opener(path_lbl, 'rb') as file: magic, size = struct.unpack(">II", file.read(8)) @@ -215,7 +255,9 @@ def load(self, path_img, path_lbl, batch=None): image_data = array("B", file.read()) if batch is not None: - image_data = image_data[batch[0], batch[0]+batch[1]] + image_data = image_data[batch[0] * rows * cols:\ + (batch[0] + batch[1]) * rows * cols] + labels = labels[batch[0]: batch[0] + batch[1]] size = batch[1] images = [] From 2321cb7b2b5eff913db4dc98b87335576a7e2d10 Mon Sep 17 00:00:00 2001 From: Richard Marko Date: Mon, 6 Aug 2018 17:45:15 +0200 Subject: [PATCH 03/21] readme: add note about filenames Closes #19. --- README.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.rst b/README.rst index fd2a2f7..545dbe1 100644 --- a/README.rst +++ b/README.rst @@ -46,6 +46,9 @@ To enable loading of gzip-ed files use:: mndata.gz = True +Library tries to load files named `t10k-images-idx3-ubyte` `train-labels-idx1-ubyte` `train-images-idx3-ubyte` and `t10k-labels-idx1-ubyte`. +If loading throws an exception check if these names match. + EMNIST ------ From 8769f3f10c30977e0f916c4c7a9259f8a909c2c6 Mon Sep 17 00:00:00 2001 From: Richard Marko Date: Mon, 6 Aug 2018 17:45:26 +0200 Subject: [PATCH 04/21] readme: add fancy example! --- README.rst | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/README.rst b/README.rst index 545dbe1..413867e 100644 --- a/README.rst +++ b/README.rst @@ -87,3 +87,39 @@ Notes This package doesn't use `numpy` by design as when I've tried to find a working implementation all of them were based on some archaic version of `numpy` and none of them worked. This loads data files with `struct.unpack` instead. + +Example +------- + +:: + $ PYTHONPATH=. ./bin/mnist_preview + Showing num: 3 + + ............................ + ............................ + ............................ + ............................ + ............................ + ............................ + .............@@@@@.......... + ..........@@@@@@@@@@........ + .......@@@@@@......@@....... + .......@@@........@@@....... + .................@@......... + ................@@@......... + ...............@@@@@........ + .............@@@............ + .............@.......@...... + .....................@...... + .....................@@..... + ....................@@...... + ...................@@@...... + .................@@@@....... + ................@@@@........ + ....@........@@@@@.......... + ....@@@@@@@@@@@@............ + ......@@@@@@................ + ............................ + ............................ + ............................ + ............................ From 9ce11be7e46cbbb58aa77c914bef89b1b26223ba Mon Sep 17 00:00:00 2001 From: Richard Marko Date: Mon, 6 Aug 2018 17:46:38 +0200 Subject: [PATCH 05/21] fix all shebangs --- bin/emnist_preview | 2 +- bin/mnist_preview | 2 +- get_data.sh | 2 +- get_emnist_data.sh | 2 +- test.sh | 2 +- tests/loader.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/bin/emnist_preview b/bin/emnist_preview index 290ef38..fd1fbef 100755 --- a/bin/emnist_preview +++ b/bin/emnist_preview @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import random import argparse diff --git a/bin/mnist_preview b/bin/mnist_preview index 047f1ce..0b9fab1 100755 --- a/bin/mnist_preview +++ b/bin/mnist_preview @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 import random import argparse diff --git a/get_data.sh b/get_data.sh index 50dbe93..70c84a1 100755 --- a/get_data.sh +++ b/get_data.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash if [ -d data ]; then echo "data directory already present, exiting" diff --git a/get_emnist_data.sh b/get_emnist_data.sh index d773f0b..d17a3bb 100755 --- a/get_emnist_data.sh +++ b/get_emnist_data.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash if [ -d emnist_data ]; then echo "emnist_data directory already present, exiting" diff --git a/test.sh b/test.sh index 13524c6..c326c32 100755 --- a/test.sh +++ b/test.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash dfile='data/t10k-images-idx3-ubyte' diff --git a/tests/loader.py b/tests/loader.py index 941f69a..35d9619 100755 --- a/tests/loader.py +++ b/tests/loader.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python import os import sys import logging From f7ef3c5c8e605d672f509e5642651cf714c7ee6a Mon Sep 17 00:00:00 2001 From: Richard Marko Date: Mon, 6 Aug 2018 18:30:16 +0200 Subject: [PATCH 06/21] fix batch loading termination, add test --- mnist/loader.py | 14 ++++++++++---- tests/loader.py | 9 +++++++++ 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/mnist/loader.py b/mnist/loader.py index 4c319a7..6e6fca8 100644 --- a/mnist/loader.py +++ b/mnist/loader.py @@ -130,16 +130,15 @@ def load_training(self): return self.train_images, self.train_labels - def load_training_in_batchs(self, batch_size): + def load_training_in_batches(self, batch_size): if type(batch_size) is not int: raise ValueError('batch_size must be a int number') batch_sp = 0 + last = False self._get_dataset_size(os.path.join(self.path, self.train_img_fname), os.path.join(self.path, self.train_lbl_fname)) while True: - if batch_sp + batch_size > self.dataset_size: - batch_sp = 0 ims, labels = self.load( os.path.join(self.path, self.train_img_fname), os.path.join(self.path, self.train_lbl_fname), @@ -148,9 +147,16 @@ def load_training_in_batchs(self, batch_size): self.train_images = self.process_images(ims) self.train_labels = self.process_labels(labels) - batch_sp += batch_size yield self.train_images, self.train_labels + if last: + break + + batch_sp += batch_size + if batch_sp + batch_size > self.dataset_size: + last = True + batch_size = self.dataset_size - batch_sp + def _get_dataset_size(self, path_img, path_lbl): with self.opener(path_lbl, 'rb') as file: magic, lb_size = struct.unpack(">II", file.read(8)) diff --git a/tests/loader.py b/tests/loader.py index 35d9619..4dba411 100755 --- a/tests/loader.py +++ b/tests/loader.py @@ -35,6 +35,15 @@ def test_gzip(self): self.assertEqual(len(train_img), len(train_label)) self.assertEqual(len(train_img), 60000) + def test_batches(self): + mn = mnist.MNIST(DATA_PATH) + total = 0 + + for images, labels in mn.load_training_in_batches(11000): + total += len(images) + + self.assertEqual(total, 60000) + if __name__ == "__main__": logging.basicConfig(level=logging.INFO) unittest.main() From 5126044347451dda8c4b5a21127b042e2be592b8 Mon Sep 17 00:00:00 2001 From: Richard Marko Date: Mon, 6 Aug 2018 18:30:41 +0200 Subject: [PATCH 07/21] bump to 0.6 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f25fa6e..1d7c0b4 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ from distutils.core import setup NAME = 'python-mnist' -VERSION = '0.5' +VERSION = '0.6' # Compile the list of packages available, because distutils doesn't have # an easy way to do this. From 127ce38399c89412588638be1c0aa13121bf920b Mon Sep 17 00:00:00 2001 From: Richard Marko Date: Mon, 6 Aug 2018 18:35:36 +0200 Subject: [PATCH 08/21] fix readme --- README.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/README.rst b/README.rst index 413867e..b361438 100644 --- a/README.rst +++ b/README.rst @@ -92,6 +92,7 @@ Example ------- :: + $ PYTHONPATH=. ./bin/mnist_preview Showing num: 3 From a2cf3728bc9cd88cd5dd62c76a946ba4923f9cd1 Mon Sep 17 00:00:00 2001 From: Page David Date: Tue, 7 Aug 2018 11:57:18 +0800 Subject: [PATCH 09/21] add packer --- mnist/__init__.py | 3 ++- mnist/packer.py | 62 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 mnist/packer.py diff --git a/mnist/__init__.py b/mnist/__init__.py index cc2d654..36bbf48 100644 --- a/mnist/__init__.py +++ b/mnist/__init__.py @@ -1,3 +1,4 @@ from .loader import MNIST +from .packer import label_packer, data_packer -__all__ = [MNIST, ] +__all__ = [MNIST, label_packer, data_packer] diff --git a/mnist/packer.py b/mnist/packer.py new file mode 100644 index 0000000..b7edbdf --- /dev/null +++ b/mnist/packer.py @@ -0,0 +1,62 @@ +import gzip +import os +import struct + + +def _binary_writter(data, filepath): + with open(filepath, 'wb') as file: + file.write(data) + + +def _gzip_writter(data, filepath): + with gzip.open(filepath, 'wb') as file: + file.write(data) + + +def data_packer(path, filename, imgs, gzip=False, + magic=2051, rows=28, cols=28): + data = b'' + data += struct.pack(">IIII", magic, len(imgs), rows, cols) + + to_list = list() + if type(imgs).__name__ == 'array': + to_list = list(imgs) + elif type(imgs).__name__ == 'ndarray': + to_list = list(imgs) + elif type(imgs).__name__ == 'list': + to_list = imgs + else: + raise TypeError('Unsupported data type.') + + for i in to_list: + pack_format = '>' + 'B' * len(i) + data += struct.pack(pack_format, *i) + + if gzip: + _gzip_writter(data, os.path.join(path, filename)) + else: + _binary_writter(data, os.path.join(path, filename)) + + +def label_packer(path, filename, label, + gzip=False, magic=2049): + data = b'' + data += struct.pack(">II", magic, len(label)) + + to_list = list() + if type(label).__name__ == 'array': + to_list = list(label) + elif type(label).__name__ == 'ndarray': + to_list = list(label) + elif type(label).__name__ == 'list': + to_list = label + else: + raise TypeError('Unsupported label type.') + + pack_format = '>' + 'B' * len(to_list) + data += struct.pack(pack_format, *to_list) + + if gzip: + _gzip_writter(data, os.path.join(path, filename)) + else: + _binary_writter(data, os.path.join(path, filename)) From b281650d46078dc4c40693eb8d519ceec4c65233 Mon Sep 17 00:00:00 2001 From: Page David Date: Tue, 7 Aug 2018 12:05:43 +0800 Subject: [PATCH 10/21] rename datapacker to imgpacker --- mnist/__init__.py | 4 ++-- mnist/packer.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mnist/__init__.py b/mnist/__init__.py index 36bbf48..e4dfc3c 100644 --- a/mnist/__init__.py +++ b/mnist/__init__.py @@ -1,4 +1,4 @@ from .loader import MNIST -from .packer import label_packer, data_packer +from .packer import label_packer, img_packer -__all__ = [MNIST, label_packer, data_packer] +__all__ = [MNIST, label_packer, img_packer] diff --git a/mnist/packer.py b/mnist/packer.py index b7edbdf..251e316 100644 --- a/mnist/packer.py +++ b/mnist/packer.py @@ -13,8 +13,8 @@ def _gzip_writter(data, filepath): file.write(data) -def data_packer(path, filename, imgs, gzip=False, - magic=2051, rows=28, cols=28): +def img_packer(path, filename, imgs, gzip=False, + magic=2051, rows=28, cols=28): data = b'' data += struct.pack(">IIII", magic, len(imgs), rows, cols) From 3d51121004db0210a99fd20e40984346742ff08d Mon Sep 17 00:00:00 2001 From: Page David Date: Tue, 2 Oct 2018 22:30:39 +0800 Subject: [PATCH 11/21] add emnist rotating binary --- bin/emnist_repack | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100755 bin/emnist_repack diff --git a/bin/emnist_repack b/bin/emnist_repack new file mode 100755 index 0000000..56dc7ec --- /dev/null +++ b/bin/emnist_repack @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 + +import argparse +import os.path +from mnist import MNIST +from mnist import img_packer + +if __name__ == '__main__': + + parser = argparse.ArgumentParser() + parser.add_argument("--data", default="./emnist_data", + help="Path to MNIST data dir") + parser.add_argument("--output", default=None, + help="Where to save result") + + args = parser.parse_args() + + DATASETS = ["balanced", "byclass", "bymerge", + "digits", "letters", "mnist"] + + mn = MNIST(args.data) + + if not args.output: + dest = args.data + train_img_fname = 'rf_' + mn.train_img_fname + test_img_fname = 'rf_' + mn.test_img_fname + else: + dest = args.output + train_img_fname = mn.train_img_fname + test_img_fname = mn.test_img_fname + + for dt_name in DATASETS: + mn.select_emnist(dt_name) + + tra_img, _ = mn.load_training() + + print("========procesing {} dataset========".format(dt_name)) + img_packer(dest, train_img_fname, + tra_img, gzip=True) + + tes_img, _ = mn.load_testing() + img_packer(dest, test_img_fname, + tes_img, gzip=True) From c8046d7b52bc982f5159baa6f0489dd8e50a130a Mon Sep 17 00:00:00 2001 From: Page David Date: Tue, 2 Oct 2018 22:34:58 +0800 Subject: [PATCH 12/21] put status output before load --- bin/emnist_repack | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/emnist_repack b/bin/emnist_repack index 56dc7ec..eb96600 100755 --- a/bin/emnist_repack +++ b/bin/emnist_repack @@ -32,9 +32,9 @@ if __name__ == '__main__': for dt_name in DATASETS: mn.select_emnist(dt_name) - tra_img, _ = mn.load_training() - print("========procesing {} dataset========".format(dt_name)) + + tra_img, _ = mn.load_training() img_packer(dest, train_img_fname, tra_img, gzip=True) From 25baccbca9512603c4e13b09622a8098137227ca Mon Sep 17 00:00:00 2001 From: Richard Marko Date: Sun, 1 Mar 2020 14:12:35 +0100 Subject: [PATCH 13/21] fix emnist data URL --- get_emnist_data.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/get_emnist_data.sh b/get_emnist_data.sh index d17a3bb..eaabf37 100755 --- a/get_emnist_data.sh +++ b/get_emnist_data.sh @@ -7,7 +7,8 @@ fi mkdir emnist_data pushd emnist_data -wget http://biometrics.nist.gov/cs_links/EMNIST/gzip.zip +#wget http://biometrics.nist.gov/cs_links/EMNIST/gzip.zip +wget http://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/gzip.zip unzip gzip.zip rm -f gzip.zip mv gzip/* . From 34853d4b8667f85881f183a6f9aeded7ed6c4b48 Mon Sep 17 00:00:00 2001 From: Richard Marko Date: Sun, 1 Mar 2020 14:15:14 +0100 Subject: [PATCH 14/21] rename get_data.sh to bin/mnist_get_data.sh, get_emnist_data.sh to bin/emnist_get_data.sh --- README.rst | 4 ++-- get_emnist_data.sh => bin/emnist_get_data.sh | 0 get_data.sh => bin/mnist_get_data.sh | 0 test.sh | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename get_emnist_data.sh => bin/emnist_get_data.sh (100%) rename get_data.sh => bin/mnist_get_data.sh (100%) diff --git a/README.rst b/README.rst index b361438..a745025 100644 --- a/README.rst +++ b/README.rst @@ -18,7 +18,7 @@ Usage - ``cd python-mnist`` - Get MNIST data:: - ./get_data.sh + ./bin/mnist_get_data.sh - Check preview with:: @@ -54,7 +54,7 @@ EMNIST - Get EMNIST data:: - ./get_emnist_data.sh + ./bin/emnist_get_data.sh - Check preview with:: diff --git a/get_emnist_data.sh b/bin/emnist_get_data.sh similarity index 100% rename from get_emnist_data.sh rename to bin/emnist_get_data.sh diff --git a/get_data.sh b/bin/mnist_get_data.sh similarity index 100% rename from get_data.sh rename to bin/mnist_get_data.sh diff --git a/test.sh b/test.sh index c326c32..4ccbb0d 100755 --- a/test.sh +++ b/test.sh @@ -9,7 +9,7 @@ if [ $? -ne 0 ]; then fi if [ ! -f $dfile ]; then - echo "MNIST data not found, fetch with get_data.sh script" + echo "MNIST data not found, fetch with ./bin/mnist_get_data.sh script" echo "was looking for $dfile" exit 1 fi From 657ed839ca0352f94981ccb50b273a1c8d929732 Mon Sep 17 00:00:00 2001 From: Richard Marko Date: Sun, 1 Mar 2020 14:17:59 +0100 Subject: [PATCH 15/21] Add scripts to setup.py includig all bin/ scripts Closes #24. --- setup.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/setup.py b/setup.py index 1d7c0b4..3b2b80e 100644 --- a/setup.py +++ b/setup.py @@ -51,6 +51,14 @@ package_dir={'mnist': 'mnist'}, packages=packages, package_data={'mnist': data_files}, + scripts=[ + 'bin/emnist_preview', + 'bin/emnist_repack', + 'bin/emnist_get_data.sh', + + 'bin/mnist_preview', + 'bin/mnist_get_data.sh', + ], classifiers=[ 'Development Status :: 5 - Production/Stable', From 003f299056bca6e38bea02d60ccf74c843b04bdb Mon Sep 17 00:00:00 2001 From: Richard Marko Date: Sun, 1 Mar 2020 15:03:20 +0100 Subject: [PATCH 16/21] reformat readme --- README.rst | 174 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 101 insertions(+), 73 deletions(-) diff --git a/README.rst b/README.rst index a745025..a0f0f57 100644 --- a/README.rst +++ b/README.rst @@ -3,124 +3,152 @@ python-mnist Simple MNIST and EMNIST data parser written in pure Python. -MNIST is a database of handwritten digits available on http://yann.lecun.com/exdb/mnist/. -EMNIST is an extended MNIST database https://www.nist.gov/itl/iad/image-group/emnist-dataset. +MNIST is a database of handwritten digits available on +http://yann.lecun.com/exdb/mnist/. EMNIST is an extended MNIST database +https://www.nist.gov/itl/iad/image-group/emnist-dataset. Requirements ------------ -- Python 2 or Python 3 +- Python 2 or Python 3 Usage ----- -- ``git clone https://github.com/sorki/python-mnist`` -- ``cd python-mnist`` -- Get MNIST data:: +- ``git clone https://github.com/sorki/python-mnist`` - ./bin/mnist_get_data.sh +- ``cd python-mnist`` -- Check preview with:: +- Get MNIST data: - PYTHONPATH=. ./bin/mnist_preview + :: + ./bin/mnist_get_data.sh + +- Check preview with: + + :: + + PYTHONPATH=. ./bin/mnist_preview Installation ------------ -Get the package from PyPi:: +Get the package from PyPi: - pip install python-mnist +:: -or install with ``setup.py``:: + pip install python-mnist - python setup.py install +or install with ``setup.py``: -Code sample:: +:: - from mnist import MNIST - mndata = MNIST('./dir_with_mnist_data_files') - images, labels = mndata.load_training() + python setup.py install -To enable loading of gzip-ed files use:: +Code sample: - mndata.gz = True +:: + + from mnist import MNIST + mndata = MNIST('./dir_with_mnist_data_files') + images, labels = mndata.load_training() -Library tries to load files named `t10k-images-idx3-ubyte` `train-labels-idx1-ubyte` `train-images-idx3-ubyte` and `t10k-labels-idx1-ubyte`. -If loading throws an exception check if these names match. +To enable loading of gzip-ed files use: + +:: + + mndata.gz = True + +Library tries to load files named t10k-images-idx3-ubyte +train-labels-idx1-ubyte train-images-idx3-ubyte and +t10k-labels-idx1-ubyte. If loading throws an exception check if these +names match. EMNIST ------ -- Get EMNIST data:: +- Get EMNIST data: + + :: + + ./bin/emnist_get_data.sh - ./bin/emnist_get_data.sh +- Check preview with: -- Check preview with:: + :: - PYTHONPATH=. ./bin/emnist_preview + PYTHONPATH=. ./bin/emnist_preview -To use EMNIST datasets you need to call:: +To use EMNIST datasets you need to call: - mndata.select_emnist('digits') +:: + + mndata.select_emnist('digits') -Where `digits` is one of the available EMNIST datasets. You can choose from +Where digits is one of the available EMNIST datasets. You can choose +from - - balanced - - byclass - - bymerge - - digits - - letters - - mnist + - balanced + - byclass + - bymerge + - digits + - letters + - mnist -EMNIST loader uses gziped files by default, this can be disabled by by setting:: +EMNIST loader uses gziped files by default, this can be disabled by by +setting: + +:: - mndata.gz = False + mndata.gz = False -You also need to unpack EMNIST files as `get_emnist_data.sh` script won't do it for you. -EMNIST loader also needs to mirror and rotate images so it is a bit slower (If this is an -issue for you, you should repack the data to avoid mirroring and rotation on each load). +You also need to unpack EMNIST files as bin/emnist_get_data.sh script +won't do it for you. EMNIST loader also needs to mirror and rotate +images so it is a bit slower (If this is an issue for you, you should +repack the data to avoid mirroring and rotation on each load). Notes ----- -This package doesn't use `numpy` by design as when I've tried to find a working implementation -all of them were based on some archaic version of `numpy` and none of them worked. This loads -data files with `struct.unpack` instead. +This package doesn't use numpy by design as when I've tried to find a +working implementation all of them were based on some archaic version of +numpy and none of them worked. This loads data files with struct.unpack +instead. Example ------- :: - $ PYTHONPATH=. ./bin/mnist_preview - Showing num: 3 - - ............................ - ............................ - ............................ - ............................ - ............................ - ............................ - .............@@@@@.......... - ..........@@@@@@@@@@........ - .......@@@@@@......@@....... - .......@@@........@@@....... - .................@@......... - ................@@@......... - ...............@@@@@........ - .............@@@............ - .............@.......@...... - .....................@...... - .....................@@..... - ....................@@...... - ...................@@@...... - .................@@@@....... - ................@@@@........ - ....@........@@@@@.......... - ....@@@@@@@@@@@@............ - ......@@@@@@................ - ............................ - ............................ - ............................ - ............................ + $ PYTHONPATH=. ./bin/mnist_preview + Showing num: 3 + + ............................ + ............................ + ............................ + ............................ + ............................ + ............................ + .............@@@@@.......... + ..........@@@@@@@@@@........ + .......@@@@@@......@@....... + .......@@@........@@@....... + .................@@......... + ................@@@......... + ...............@@@@@........ + .............@@@............ + .............@.......@...... + .....................@...... + .....................@@..... + ....................@@...... + ...................@@@...... + .................@@@@....... + ................@@@@........ + ....@........@@@@@.......... + ....@@@@@@@@@@@@............ + ......@@@@@@................ + ............................ + ............................ + ............................ + ............................ From 2e61c1a6964cb9d7422fe4c2fa04f958f9355527 Mon Sep 17 00:00:00 2001 From: Richard Marko Date: Sun, 1 Mar 2020 15:06:21 +0100 Subject: [PATCH 17/21] fix mail --- AUTHORS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 6a7f3c9..8d1ea89 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1 +1 @@ -Richard Marko +Richard Marko From 7864655c9c8fbe17765fd42af0a7d436c43de027 Mon Sep 17 00:00:00 2001 From: Richard Marko Date: Sun, 1 Mar 2020 15:06:54 +0100 Subject: [PATCH 18/21] bump to 0.7 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 3b2b80e..1e81b28 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ from distutils.core import setup NAME = 'python-mnist' -VERSION = '0.6' +VERSION = '0.7' # Compile the list of packages available, because distutils doesn't have # an easy way to do this. From 86d854097c4f63c73a59596158c64fb8f79e6e81 Mon Sep 17 00:00:00 2001 From: Richard Marko Date: Sun, 1 Mar 2020 16:27:44 +0100 Subject: [PATCH 19/21] add meta.yaml for conda Now also at https://anaconda.org/sorki/python-mnist Closes #22. --- meta.yaml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 meta.yaml diff --git a/meta.yaml b/meta.yaml new file mode 100644 index 0000000..caa96e3 --- /dev/null +++ b/meta.yaml @@ -0,0 +1,27 @@ +{% set name = "python-mnist" %} +{% set version = "0.7" %} + +package: + name: "{{ name|lower }}" + version: "{{ version }}" + +source: + url: https://pypi.io/packages/source/{{ name[0] }}/{{ name }}/{{ name }}-{{ version }}.tar.gz + sha256: a0cced01e83b5b844cff86109280df7a672a8e4e38fc19fa68999a17f8a9fbd8 + +build: + number: 0 + script: "{{ PYTHON }} -m pip install . --no-deps --ignore-installed -vv " + +requirements: + host: + - pip + - python + run: + - python + +about: + home: https://github.com/sorki/python-mnist + license: BSD + license_family: BSD + summary: Simple MNIST and EMNIST data parser written in pure Python From 23d1ebf88586e0aa3175816bee10d079eeb5b2a1 Mon Sep 17 00:00:00 2001 From: Richard Marko Date: Sun, 1 Mar 2020 16:39:33 +0100 Subject: [PATCH 20/21] conda: add build: noarch_python: True --- meta.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/meta.yaml b/meta.yaml index caa96e3..e813749 100644 --- a/meta.yaml +++ b/meta.yaml @@ -12,6 +12,7 @@ source: build: number: 0 script: "{{ PYTHON }} -m pip install . --no-deps --ignore-installed -vv " + noarch_python: True requirements: host: From 42d036d2f16e07f15de3850aa3d965818f60526a Mon Sep 17 00:00:00 2001 From: Richard Marko Date: Wed, 13 Oct 2021 13:40:23 +0200 Subject: [PATCH 21/21] README: quotes --- README.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index a0f0f57..68626aa 100644 --- a/README.rst +++ b/README.rst @@ -60,9 +60,9 @@ To enable loading of gzip-ed files use: mndata.gz = True -Library tries to load files named t10k-images-idx3-ubyte -train-labels-idx1-ubyte train-images-idx3-ubyte and -t10k-labels-idx1-ubyte. If loading throws an exception check if these +Library tries to load files named `t10k-images-idx3-ubyte` +`train-labels-idx1-ubyte` `train-images-idx3-ubyte` and +`t10k-labels-idx1-ubyte`. If loading throws an exception check if these names match. EMNIST @@ -103,7 +103,7 @@ setting: mndata.gz = False -You also need to unpack EMNIST files as bin/emnist_get_data.sh script +You also need to unpack EMNIST files as `bin/emnist_get_data.sh` script won't do it for you. EMNIST loader also needs to mirror and rotate images so it is a bit slower (If this is an issue for you, you should repack the data to avoid mirroring and rotation on each load). @@ -111,9 +111,9 @@ repack the data to avoid mirroring and rotation on each load). Notes ----- -This package doesn't use numpy by design as when I've tried to find a +This package doesn't use `numpy` by design as when I've tried to find a working implementation all of them were based on some archaic version of -numpy and none of them worked. This loads data files with struct.unpack +numpy and none of them worked. This loads data files with `struct.unpack` instead. Example