Skip to content

Commit 90bb4de

Browse files
New Examples (aymericdamien#160)
* Added basic models examples (kmeans, random forest, ...) * Added API examples (layers, estimator, ...) * Added other examples (Multi-GPU, build a dataset, ...) * Notebook refactoring with new header and more details
1 parent 4e829a6 commit 90bb4de

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+6451
-1524
lines changed

README.md

Lines changed: 62 additions & 82 deletions
Large diffs are not rendered by default.

examples/2_BasicModels/kmeans.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
""" K-Means.
2+
3+
Implement K-Means algorithm with TensorFlow, and apply it to classify
4+
handwritten digit images. This example is using the MNIST database of
5+
handwritten digits as training samples (http://yann.lecun.com/exdb/mnist/).
6+
7+
Note: This example requires TensorFlow v1.1.0 or over.
8+
9+
Author: Aymeric Damien
10+
Project: https://github.com/aymericdamien/TensorFlow-Examples/
11+
"""
12+
13+
from __future__ import print_function
14+
15+
import numpy as np
16+
import tensorflow as tf
17+
from tensorflow.contrib.factorization import KMeans
18+
19+
# Ignore all GPUs, tf random forest does not benefit from it.
20+
import os
21+
os.environ["CUDA_VISIBLE_DEVICES"] = ""
22+
23+
# Import MNIST data
24+
from tensorflow.examples.tutorials.mnist import input_data
25+
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
26+
full_data_x = mnist.train.images
27+
28+
# Parameters
29+
num_steps = 50 # Total steps to train
30+
batch_size = 1024 # The number of samples per batch
31+
k = 25 # The number of clusters
32+
num_classes = 10 # The 10 digits
33+
num_features = 784 # Each image is 28x28 pixels
34+
35+
# Input images
36+
X = tf.placeholder(tf.float32, shape=[None, num_features])
37+
# Labels (for assigning a label to a centroid and testing)
38+
Y = tf.placeholder(tf.float32, shape=[None, num_classes])
39+
40+
# K-Means Parameters
41+
kmeans = KMeans(inputs=X, num_clusters=k, distance_metric='cosine',
42+
use_mini_batch=True)
43+
44+
# Build KMeans graph
45+
(all_scores, cluster_idx, scores, cluster_centers_initialized, init_op,
46+
train_op) = kmeans.training_graph()
47+
cluster_idx = cluster_idx[0] # fix for cluster_idx being a tuple
48+
avg_distance = tf.reduce_mean(scores)
49+
50+
# Initialize the variables (i.e. assign their default value)
51+
init_vars = tf.global_variables_initializer()
52+
53+
# Start TensorFlow session
54+
sess = tf.Session()
55+
56+
# Run the initializer
57+
sess.run(init_vars, feed_dict={X: full_data_x})
58+
sess.run(init_op, feed_dict={X: full_data_x})
59+
60+
# Training
61+
for i in range(1, num_steps + 1):
62+
_, d, idx = sess.run([train_op, avg_distance, cluster_idx],
63+
feed_dict={X: full_data_x})
64+
if i % 10 == 0 or i == 1:
65+
print("Step %i, Avg Distance: %f" % (i, d))
66+
67+
# Assign a label to each centroid
68+
# Count total number of labels per centroid, using the label of each training
69+
# sample to their closest centroid (given by 'idx')
70+
counts = np.zeros(shape=(k, num_classes))
71+
for i in range(len(idx)):
72+
counts[idx[i]] += mnist.train.labels[i]
73+
# Assign the most frequent label to the centroid
74+
labels_map = [np.argmax(c) for c in counts]
75+
labels_map = tf.convert_to_tensor(labels_map)
76+
77+
# Evaluation ops
78+
# Lookup: centroid_id -> label
79+
cluster_label = tf.nn.embedding_lookup(labels_map, cluster_idx)
80+
# Compute accuracy
81+
correct_prediction = tf.equal(cluster_label, tf.cast(tf.argmax(Y, 1), tf.int32))
82+
accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
83+
84+
# Test Model
85+
test_x, test_y = mnist.test.images, mnist.test.labels
86+
print("Test Accuracy:", sess.run(accuracy_op, feed_dict={X: test_x, Y: test_y}))

examples/2_BasicModels/linear_regression.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,13 @@
4141
# Note, minimize() knows to modify W and b because Variable objects are trainable=True by default
4242
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
4343

44-
# Initializing the variables
44+
# Initialize the variables (i.e. assign their default value)
4545
init = tf.global_variables_initializer()
4646

47-
# Launch the graph
47+
# Start training
4848
with tf.Session() as sess:
49+
50+
# Run the initializer
4951
sess.run(init)
5052

5153
# Fit all training data

examples/2_BasicModels/logistic_regression.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,13 @@
3737
# Gradient Descent
3838
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
3939

40-
# Initializing the variables
40+
# Initialize the variables (i.e. assign their default value)
4141
init = tf.global_variables_initializer()
4242

43-
# Launch the graph
43+
# Start training
4444
with tf.Session() as sess:
45+
46+
# Run the initializer
4547
sess.run(init)
4648

4749
# Training cycle

examples/2_BasicModels/nearest_neighbor.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,13 @@
3232

3333
accuracy = 0.
3434

35-
# Initializing the variables
35+
# Initialize the variables (i.e. assign their default value)
3636
init = tf.global_variables_initializer()
3737

38-
# Launch the graph
38+
# Start training
3939
with tf.Session() as sess:
40+
41+
# Run the initializer
4042
sess.run(init)
4143

4244
# loop over test data
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
""" Random Forest.
2+
3+
Implement Random Forest algorithm with TensorFlow, and apply it to classify
4+
handwritten digit images. This example is using the MNIST database of
5+
handwritten digits as training samples (http://yann.lecun.com/exdb/mnist/).
6+
7+
Author: Aymeric Damien
8+
Project: https://github.com/aymericdamien/TensorFlow-Examples/
9+
"""
10+
11+
from __future__ import print_function
12+
13+
import tensorflow as tf
14+
from tensorflow.contrib.tensor_forest.python import tensor_forest
15+
16+
# Ignore all GPUs, tf random forest does not benefit from it.
17+
import os
18+
os.environ["CUDA_VISIBLE_DEVICES"] = ""
19+
20+
# Import MNIST data
21+
from tensorflow.examples.tutorials.mnist import input_data
22+
mnist = input_data.read_data_sets("/tmp/data/", one_hot=False)
23+
24+
# Parameters
25+
num_steps = 500 # Total steps to train
26+
batch_size = 1024 # The number of samples per batch
27+
num_classes = 10 # The 10 digits
28+
num_features = 784 # Each image is 28x28 pixels
29+
num_trees = 10
30+
max_nodes = 1000
31+
32+
# Input and Target data
33+
X = tf.placeholder(tf.float32, shape=[None, num_features])
34+
# For random forest, labels must be integers (the class id)
35+
Y = tf.placeholder(tf.int32, shape=[None])
36+
37+
# Random Forest Parameters
38+
hparams = tensor_forest.ForestHParams(num_classes=num_classes,
39+
num_features=num_features,
40+
num_trees=num_trees,
41+
max_nodes=max_nodes).fill()
42+
43+
# Build the Random Forest
44+
forest_graph = tensor_forest.RandomForestGraphs(hparams)
45+
# Get training graph and loss
46+
train_op = forest_graph.training_graph(X, Y)
47+
loss_op = forest_graph.training_loss(X, Y)
48+
49+
# Measure the accuracy
50+
infer_op = forest_graph.inference_graph(X)
51+
correct_prediction = tf.equal(tf.argmax(infer_op, 1), tf.cast(Y, tf.int64))
52+
accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
53+
54+
# Initialize the variables (i.e. assign their default value)
55+
init_vars = tf.global_variables_initializer()
56+
57+
# Start TensorFlow session
58+
sess = tf.Session()
59+
60+
# Run the initializer
61+
sess.run(init_vars)
62+
63+
# Training
64+
for i in range(1, num_steps + 1):
65+
# Prepare Data
66+
# Get the next batch of MNIST data (only images are needed, not labels)
67+
batch_x, batch_y = mnist.train.next_batch(batch_size)
68+
_, l = sess.run([train_op, loss_op], feed_dict={X: batch_x, Y: batch_y})
69+
if i % 50 == 0 or i == 1:
70+
acc = sess.run(accuracy_op, feed_dict={X: batch_x, Y: batch_y})
71+
print('Step %i, Loss: %f, Acc: %f' % (i, l, acc))
72+
73+
# Test Model
74+
test_x, test_y = mnist.test.images, mnist.test.labels
75+
print("Test Accuracy:", sess.run(accuracy_op, feed_dict={X: test_x, Y: test_y}))
Lines changed: 76 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
1-
# -*- coding: utf-8 -*-
2-
31
""" Auto Encoder Example.
4-
Using an auto encoder on MNIST handwritten digits.
2+
3+
Build a 2 layers auto-encoder with TensorFlow to compress images to a
4+
lower latent space and then reconstruct them.
5+
56
References:
67
Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. "Gradient-based
78
learning applied to document recognition." Proceedings of the IEEE,
89
86(11):2278-2324, November 1998.
10+
911
Links:
1012
[MNIST Dataset] http://yann.lecun.com/exdb/mnist/
13+
14+
Author: Aymeric Damien
15+
Project: https://github.com/aymericdamien/TensorFlow-Examples/
1116
"""
1217
from __future__ import division, print_function, absolute_import
1318

@@ -17,37 +22,37 @@
1722

1823
# Import MNIST data
1924
from tensorflow.examples.tutorials.mnist import input_data
20-
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
25+
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
2126

22-
# Parameters
27+
# Training Parameters
2328
learning_rate = 0.01
24-
training_epochs = 20
29+
num_steps = 30000
2530
batch_size = 256
26-
display_step = 1
31+
32+
display_step = 1000
2733
examples_to_show = 10
2834

2935
# Network Parameters
30-
n_hidden_1 = 256 # 1st layer num features
31-
n_hidden_2 = 128 # 2nd layer num features
32-
n_input = 784 # MNIST data input (img shape: 28*28)
36+
num_hidden_1 = 256 # 1st layer num features
37+
num_hidden_2 = 128 # 2nd layer num features (the latent dim)
38+
num_input = 784 # MNIST data input (img shape: 28*28)
3339

3440
# tf Graph input (only pictures)
35-
X = tf.placeholder("float", [None, n_input])
41+
X = tf.placeholder("float", [None, num_input])
3642

3743
weights = {
38-
'encoder_h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
39-
'encoder_h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
40-
'decoder_h1': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_1])),
41-
'decoder_h2': tf.Variable(tf.random_normal([n_hidden_1, n_input])),
44+
'encoder_h1': tf.Variable(tf.random_normal([num_input, num_hidden_1])),
45+
'encoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_hidden_2])),
46+
'decoder_h1': tf.Variable(tf.random_normal([num_hidden_2, num_hidden_1])),
47+
'decoder_h2': tf.Variable(tf.random_normal([num_hidden_1, num_input])),
4248
}
4349
biases = {
44-
'encoder_b1': tf.Variable(tf.random_normal([n_hidden_1])),
45-
'encoder_b2': tf.Variable(tf.random_normal([n_hidden_2])),
46-
'decoder_b1': tf.Variable(tf.random_normal([n_hidden_1])),
47-
'decoder_b2': tf.Variable(tf.random_normal([n_input])),
50+
'encoder_b1': tf.Variable(tf.random_normal([num_hidden_1])),
51+
'encoder_b2': tf.Variable(tf.random_normal([num_hidden_2])),
52+
'decoder_b1': tf.Variable(tf.random_normal([num_hidden_1])),
53+
'decoder_b2': tf.Variable(tf.random_normal([num_input])),
4854
}
4955

50-
5156
# Building the encoder
5257
def encoder(x):
5358
# Encoder Hidden layer with sigmoid activation #1
@@ -79,38 +84,59 @@ def decoder(x):
7984
y_true = X
8085

8186
# Define loss and optimizer, minimize the squared error
82-
cost = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
83-
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)
87+
loss = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
88+
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(loss)
8489

85-
# Initializing the variables
90+
# Initialize the variables (i.e. assign their default value)
8691
init = tf.global_variables_initializer()
8792

88-
# Launch the graph
93+
# Start Training
94+
# Start a new TF session
8995
with tf.Session() as sess:
96+
97+
# Run the initializer
9098
sess.run(init)
91-
total_batch = int(mnist.train.num_examples/batch_size)
92-
# Training cycle
93-
for epoch in range(training_epochs):
94-
# Loop over all batches
95-
for i in range(total_batch):
96-
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
97-
# Run optimization op (backprop) and cost op (to get loss value)
98-
_, c = sess.run([optimizer, cost], feed_dict={X: batch_xs})
99-
# Display logs per epoch step
100-
if epoch % display_step == 0:
101-
print("Epoch:", '%04d' % (epoch+1),
102-
"cost=", "{:.9f}".format(c))
103-
104-
print("Optimization Finished!")
105-
106-
# Applying encode and decode over test set
107-
encode_decode = sess.run(
108-
y_pred, feed_dict={X: mnist.test.images[:examples_to_show]})
109-
# Compare original images with their reconstructions
110-
f, a = plt.subplots(2, 10, figsize=(10, 2))
111-
for i in range(examples_to_show):
112-
a[0][i].imshow(np.reshape(mnist.test.images[i], (28, 28)))
113-
a[1][i].imshow(np.reshape(encode_decode[i], (28, 28)))
114-
f.show()
115-
plt.draw()
116-
plt.waitforbuttonpress()
99+
100+
# Training
101+
for i in range(1, num_steps+1):
102+
# Prepare Data
103+
# Get the next batch of MNIST data (only images are needed, not labels)
104+
batch_x, _ = mnist.train.next_batch(batch_size)
105+
106+
# Run optimization op (backprop) and cost op (to get loss value)
107+
_, l = sess.run([optimizer, loss], feed_dict={X: batch_x})
108+
# Display logs per step
109+
if i % display_step == 0 or i == 1:
110+
print('Step %i: Minibatch Loss: %f' % (i, l))
111+
112+
# Testing
113+
# Encode and decode images from test set and visualize their reconstruction.
114+
n = 4
115+
canvas_orig = np.empty((28 * n, 28 * n))
116+
canvas_recon = np.empty((28 * n, 28 * n))
117+
for i in range(n):
118+
# MNIST test set
119+
batch_x, _ = mnist.test.next_batch(n)
120+
# Encode and decode the digit image
121+
g = sess.run(decoder_op, feed_dict={X: batch_x})
122+
123+
# Display original images
124+
for j in range(n):
125+
# Draw the original digits
126+
canvas_orig[i * 28:(i + 1) * 28, j * 28:(j + 1) * 28] = \
127+
batch_x[j].reshape([28, 28])
128+
# Display reconstructed images
129+
for j in range(n):
130+
# Draw the reconstructed digits
131+
canvas_recon[i * 28:(i + 1) * 28, j * 28:(j + 1) * 28] = \
132+
g[j].reshape([28, 28])
133+
134+
print("Original Images")
135+
plt.figure(figsize=(n, n))
136+
plt.imshow(canvas_orig, origin="upper", cmap="gray")
137+
plt.show()
138+
139+
print("Reconstructed Images")
140+
plt.figure(figsize=(n, n))
141+
plt.imshow(canvas_recon, origin="upper", cmap="gray")
142+
plt.show()

0 commit comments

Comments
 (0)