From ccc7d403d731e819dd591aeeee585bda08b25d91 Mon Sep 17 00:00:00 2001
From: vaxin <dengfanxin@gmail.com>
Date: Wed, 19 Oct 2016 16:47:26 +0800
Subject: [PATCH 01/10] first commit

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index edc03d30..f46dea23 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,4 @@
+# 原项目的中文化伴读
 # TensorFlow Examples
 TensorFlow Tutorial with popular machine learning algorithms implementation. This tutorial was designed for easily diving into TensorFlow, through examples.
 

From 9ac85d57eea1ac7a794f389ed3c48a249105044d Mon Sep 17 00:00:00 2001
From: vaxin <dengfanxin@gmail.com>
Date: Wed, 19 Oct 2016 17:06:59 +0800
Subject: [PATCH 02/10] comment nearest_neighbor

---
 examples/2_BasicModels/nearest_neighbor.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/examples/2_BasicModels/nearest_neighbor.py b/examples/2_BasicModels/nearest_neighbor.py
index f11efcdb..86433bf3 100644
--- a/examples/2_BasicModels/nearest_neighbor.py
+++ b/examples/2_BasicModels/nearest_neighbor.py
@@ -1,3 +1,4 @@
+# - coding: utf-8 -
 '''
 A nearest neighbor learning algorithm example using TensorFlow library.
 This example is using the MNIST database of handwritten digits
@@ -7,8 +8,16 @@
 Project: https://github.com/aymericdamien/TensorFlow-Examples/
 '''
 
+'''
+最近邻算法
+基本思想：
+计算测试对象与训练集中所有对象的距离，找出与他距离最近的那个对象，对应的标签就是我们预测的标签
+'''
+
+# __future__是一个编译器兼容性标示，为了后续版本的兼容做出的一个声明，比如这个print_function就是告诉编译器，print方法要以函数的方式对待，不能像现在的版本这样可以不用括号，因为后续的版本已经取消了，不带对号的使用方式
 from __future__ import print_function
 
+
 import numpy as np
 import tensorflow as tf
 
@@ -21,11 +30,18 @@
 Xte, Yte = mnist.test.next_batch(200) #200 for testing
 
 # tf Graph Input
+# None表示不知这个维度有多少 即是一个有784列，但不知道行数的矩阵
 xtr = tf.placeholder("float", [None, 784])
 xte = tf.placeholder("float", [784])
 
 # Nearest Neighbor calculation using L1 Distance
 # Calculate L1 Distance
+# L1距离就是 |x - y|，就是两变量相减的绝对值
+# 注意也可以写成
+# distance = tf.reduce_sum(tf.abs(xtr -xte), reduction_indices = 1)
+# 其中xtr和xte的定义你会发现shape不同，怎么回事呢？
+# 实验：[ [ 2, 2, 2 ], [ 3, 3, 3] ] - [ 1, 1, 1] = [ [ 1, 1, 1], [ 2, 2, 2 ] ]，明白了吧，就是自动遍历能力，遍历xtr里的每一个第二位元素与xte做差
+# reduction_indices 的作用时压缩求和的步骤，比如 redution = 0 就是纵向求和，=1就是横向求和，如果是[ 0, 1 ]就是先纵向求和，再横向求和，对于二维数据而言，得到的就是一个值
 distance = tf.reduce_sum(tf.abs(tf.add(xtr, tf.neg(xte))), reduction_indices=1)
 # Prediction: Get min distance index (Nearest neighbor)
 pred = tf.arg_min(distance, 0)
@@ -42,11 +58,13 @@
     # loop over test data
     for i in range(len(Xte)):
         # Get nearest neighbor
+	# 这里Xte[i,:]直接写成Xte[i]就可以, nn_index返回的就是找到的那个最近对象的索引
         nn_index = sess.run(pred, feed_dict={xtr: Xtr, xte: Xte[i, :]})
         # Get nearest neighbor class label and compare it to its true label
         print("Test", i, "Prediction:", np.argmax(Ytr[nn_index]), \
             "True Class:", np.argmax(Yte[i]))
         # Calculate accuracy
+	# argmax作用就是表示向量转索引 如： [ 0, 0, 1, 0 ] --> 2
         if np.argmax(Ytr[nn_index]) == np.argmax(Yte[i]):
             accuracy += 1./len(Xte)
     print("Done!")

From 4b8afe3b945616d9a6e948a35cca68d1abaa6d4d Mon Sep 17 00:00:00 2001
From: vaxin <dengfanxin@gmail.com>
Date: Thu, 20 Oct 2016 11:09:11 +0800
Subject: [PATCH 03/10] finish 2_BasicModels

---
 examples/2_BasicModels/rand.py      | 14 ++++++++++
 examples/2_BasicModels/test.py      | 38 +++++++++++++++++++++++++++
 examples/2_BasicModels/zuilinjin.py | 40 +++++++++++++++++++++++++++++
 3 files changed, 92 insertions(+)
 create mode 100644 examples/2_BasicModels/rand.py
 create mode 100644 examples/2_BasicModels/test.py
 create mode 100644 examples/2_BasicModels/zuilinjin.py

diff --git a/examples/2_BasicModels/rand.py b/examples/2_BasicModels/rand.py
new file mode 100644
index 00000000..7247dab3
--- /dev/null
+++ b/examples/2_BasicModels/rand.py
@@ -0,0 +1,14 @@
+import numpy as np
+rng = np.random
+print rng.randn()
+print rng.randn(1)
+print rng.randn(5)
+print rng.randn(1, 1)
+print rng.randn(2, 3)
+
+import tensorflow as tf
+# illegal print tf.truncated_normal()
+# illegal print tf.truncated_normal(1)
+print tf.truncated_normal([ 1 ])
+print tf.truncated_normal([ 2, 2 ])
+
diff --git a/examples/2_BasicModels/test.py b/examples/2_BasicModels/test.py
new file mode 100644
index 00000000..4a1c66ea
--- /dev/null
+++ b/examples/2_BasicModels/test.py
@@ -0,0 +1,38 @@
+import tensorflow as tf
+
+a = tf.placeholder(tf.int32, [None, 4])
+b = tf.placeholder(tf.int32, [4])
+
+c = a - b
+
+feed = {
+	a: [ [ 2, 2, 2, 2 ], [ 3, 3, 3, 3 ], [ 4, 4, 4, 4 ] ],
+	b: [ 1, 1, 1, 1 ]
+}
+sess = tf.Session()
+print(sess.run(c, feed_dict = feed))
+
+print 'reduce to 1'
+e = tf.reduce_sum(c, reduction_indices = 1)
+print(sess.run(e, feed_dict = feed))
+
+print 'reduce to 0'
+d = tf.reduce_sum(c, reduction_indices = 0)
+print(sess.run(d, feed_dict = feed))
+
+
+print 'reduce to [0, 1]'
+d = tf.reduce_sum(c, reduction_indices = [0, 1])
+print(sess.run(d, feed_dict = feed))
+
+print 'reduce to [1, 0]'
+d = tf.reduce_sum(c, reduction_indices = [1, 0])
+print(sess.run(d, feed_dict = feed))
+
+print 'reduce to [0, 0]'
+d = tf.reduce_sum(c, reduction_indices = [0, 0])
+print(sess.run(d, feed_dict = feed))
+
+print 'reduce to [1, 1]'
+d = tf.reduce_sum(c, reduction_indices = [1, 1])
+print(sess.run(d, feed_dict = feed))
diff --git a/examples/2_BasicModels/zuilinjin.py b/examples/2_BasicModels/zuilinjin.py
new file mode 100644
index 00000000..c4f2c011
--- /dev/null
+++ b/examples/2_BasicModels/zuilinjin.py
@@ -0,0 +1,40 @@
+from __future__ import print_function
+
+import tensorflow as tf
+import numpy as np
+from tensorflow.examples.tutorials.mnist import input_data
+
+mnist = input_data.read_data_sets('MNIST_data', one_hot = True)
+
+train_image_set, train_label_set = mnist.train.next_batch(500)
+test_image_set, test_label_set = mnist.test.next_batch(10)
+
+
+# placeholder, Graph Input
+train_images = tf.placeholder(tf.float32, [ None, 784 ])
+test_image = tf.placeholder(tf.float32, [ 784 ])
+
+
+# L1 distance
+# the - will adapts different dimensions and than will iterates the train_images to minus test_image one by one
+distance = tf.reduce_sum(tf.abs(train_images - test_image), reduction_indices = 1)
+
+# find the nearest training example, Graph, not np.argmin
+predict_index = tf.arg_min(distance, 0)
+
+init = tf.initialize_all_variables()
+
+sess = tf.Session()
+sess.run(init)
+
+accuracy = 0
+for i in range(len(test_image_set)):
+	best_neighbour_index = sess.run(predict_index, feed_dict={ train_images: train_image_set, test_image: test_image_set[i] })
+
+	#print("test", i, "Prediction:", train_label_set[best_neighbour_index], "True Class:", test_label_set[i])
+	# vector representation --> index e.g. [ 0, 1, 0, 0] --> 1, [ 0, 0, 0, 1 ] --> 3	
+	print("test", i, "Prediction:", np.argmax(train_label_set[best_neighbour_index]), "True Class:", np.argmax(test_label_set[i]))
+	if np.argmax(train_label_set[best_neighbour_index]) == np.argmax(test_label_set[i]):
+		accuracy += 1. / len(test_image_set)
+
+print("Done!", "Accuracy:", accuracy)

From 862947ebe4ba2a3683afaa94cd23a3f1345935b8 Mon Sep 17 00:00:00 2001
From: vaxin <dengfanxin@gmail.com>
Date: Thu, 20 Oct 2016 11:09:33 +0800
Subject: [PATCH 04/10] finish 2_BasicModels

---
 examples/2_BasicModels/linear_regression.py   | 34 ++++++++++++++++++-
 examples/2_BasicModels/logistic_regression.py | 14 ++++++++
 2 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/examples/2_BasicModels/linear_regression.py b/examples/2_BasicModels/linear_regression.py
index bcb49358..91bcd4bc 100644
--- a/examples/2_BasicModels/linear_regression.py
+++ b/examples/2_BasicModels/linear_regression.py
@@ -5,11 +5,19 @@
 Project: https://github.com/aymericdamien/TensorFlow-Examples/
 '''
 
+'''
+线性回归
+一条直线模型产生的点，我戏称这条直线是样本集的母线，一般来说会围绕在母线附近，回归的目的针对当前样本集找出一条最优可能的直线做他们的母线，从概率上将就是使得P(这条直线|这个样本集)的值是最大的
+'''
+
 from __future__ import print_function
 
 import tensorflow as tf
 import numpy
+''' 绘图库，方便的模型、过程和结果可视化 '''
 import matplotlib.pyplot as plt
+
+''' numpy提供的随机函数，特别方便，有各种分布类型可选 '''
 rng = numpy.random
 
 # Parameters
@@ -25,19 +33,43 @@
 n_samples = train_X.shape[0]
 
 # tf Graph Input
+# 写tf.float32最好
 X = tf.placeholder("float")
 Y = tf.placeholder("float")
 
 # Set model weights
+'''
+randn标准正态分布随机发生器，randn() 只产生一个数，randn(1)则产生一个一维数组，只包含一个数, randn(d0, d1, d2, ...)可以产生多维数组
+Variable(初始化值)
+也可以使用tf.truncated_normal
+
+'''
 W = tf.Variable(rng.randn(), name="weight")
 b = tf.Variable(rng.randn(), name="bias")
 
 # Construct a linear model
+# pred = X * W + b
 pred = tf.add(tf.mul(X, W), b)
 
 # Mean squared error
+'''
+ 最小二乘作为成本函数, 找个一个直线的最佳参数组合
 cost = tf.reduce_sum(tf.pow(pred-Y, 2))/(2*n_samples)
-# Gradient descent
+ Gradient descent
+ 梯度下降优化器
+ 也可以尝试AdamOptimizer
+ @@Optimizer
+  
+全部的优化器  
+    @@GradientDescentOptimizer
+    @@AdadeltaOptimizer
+    @@AdagradOptimizer
+    @@AdagradDAOptimizer
+    @@MomentumOptimizer
+    @@AdamOptimizer
+    @@FtrlOptimizer
+    @@RMSPropOptimizer
+'''
 optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
 
 # Initializing the variables
diff --git a/examples/2_BasicModels/logistic_regression.py b/examples/2_BasicModels/logistic_regression.py
index c2af99c0..64803264 100644
--- a/examples/2_BasicModels/logistic_regression.py
+++ b/examples/2_BasicModels/logistic_regression.py
@@ -1,3 +1,4 @@
+# coding:utf-8
 '''
 A logistic regression learning algorithm example using TensorFlow library.
 This example is using the MNIST database of handwritten digits
@@ -15,6 +16,7 @@
 from tensorflow.examples.tutorials.mnist import input_data
 mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
 
+''' 对MNIST进行逻辑回归训练 '''
 # Parameters
 learning_rate = 0.01
 training_epochs = 25
@@ -30,9 +32,18 @@
 b = tf.Variable(tf.zeros([10]))
 
 # Construct model
+'''
+softmax模型
+softmax(logits, dim = -1, name = None)
+softmax = exp(logits) / reduce_sum(exp(logits), dim)
+softmax回归是logistic模型在分类问题上的推广，logistic回归是一个二元分类问题，softmax回归是一个多元分类问题
+所以softmax用在二元分类时就是logistic回归了，因而tensorflow只需要提供一个softmax函数就够了
+这哥们这次终于有+号了，你咋不用tf.add了呢？
+'''
 pred = tf.nn.softmax(tf.matmul(x, W) + b) # Softmax
 
 # Minimize error using cross entropy
+# 使用最小化交叉熵做来cost
 cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred), reduction_indices=1))
 # Gradient Descent
 optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
@@ -45,13 +56,16 @@
     sess.run(init)
 
     # Training cycle
+    # 做25轮训练，每轮用全部样本做分批梯度下降训练
     for epoch in range(training_epochs):
         avg_cost = 0.
         total_batch = int(mnist.train.num_examples/batch_size)
+        # 每批次100个样本
         # Loop over all batches
         for i in range(total_batch):
             batch_xs, batch_ys = mnist.train.next_batch(batch_size)
             # Run optimization op (backprop) and cost op (to get loss value)
+            # run的时候顺便也要把cost计算出来
             _, c = sess.run([optimizer, cost], feed_dict={x: batch_xs,
                                                           y: batch_ys})
             # Compute average loss

From f4729d5ef7e4238912422b9b26977f73f3b9890e Mon Sep 17 00:00:00 2001
From: vaxin <dengfanxin@gmail.com>
Date: Thu, 20 Oct 2016 20:12:58 +0800
Subject: [PATCH 05/10] finish cnn

---
 examples/3_NeuralNetworks/autoencoder.py      |  7 ++++
 .../3_NeuralNetworks/convolutional_network.py | 20 ++++++++++-
 .../3_NeuralNetworks/multilayer_perceptron.py | 35 +++++++++++++++++++
 .../3_NeuralNetworks/recurrent_network.py     |  3 ++
 4 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/examples/3_NeuralNetworks/autoencoder.py b/examples/3_NeuralNetworks/autoencoder.py
index f87f6b23..f424cf35 100644
--- a/examples/3_NeuralNetworks/autoencoder.py
+++ b/examples/3_NeuralNetworks/autoencoder.py
@@ -8,6 +8,8 @@
     86(11):2278-2324, November 1998.
 Links:
     [MNIST Dataset] http://yann.lecun.com/exdb/mnist/
+
+自动编码器
 """
 from __future__ import division, print_function, absolute_import
 
@@ -49,6 +51,7 @@
 
 
 # Building the encoder
+''' 网络的构建与之前的MLP基本相似 '''
 def encoder(x):
     # Encoder Hidden layer with sigmoid activation #1
     layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']),
@@ -70,6 +73,7 @@ def decoder(x):
     return layer_2
 
 # Construct model
+''' 既有编码器，又有解码器，编码器和解码器中间夹着中间层，中间层就是我们训练出来的关键特征层 '''
 encoder_op = encoder(X)
 decoder_op = decoder(encoder_op)
 
@@ -79,7 +83,9 @@ def decoder(x):
 y_true = X
 
 # Define loss and optimizer, minimize the squared error
+''' 最小二乘做cost，为了保证输入和输出的相同，这和交叉熵的目的是不同的，搞清楚哦 '''
 cost = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
+''' RMSPropOptimizer什么东西？ '''
 optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)
 
 # Initializing the variables
@@ -108,6 +114,7 @@ def decoder(x):
         y_pred, feed_dict={X: mnist.test.images[:examples_to_show]})
     # Compare original images with their reconstructions
     f, a = plt.subplots(2, 10, figsize=(10, 2))
+    ''' 对比显示处理前和处理后的图像 '''
     for i in range(examples_to_show):
         a[0][i].imshow(np.reshape(mnist.test.images[i], (28, 28)))
         a[1][i].imshow(np.reshape(encode_decode[i], (28, 28)))
diff --git a/examples/3_NeuralNetworks/convolutional_network.py b/examples/3_NeuralNetworks/convolutional_network.py
index 81461237..e75954ea 100644
--- a/examples/3_NeuralNetworks/convolutional_network.py
+++ b/examples/3_NeuralNetworks/convolutional_network.py
@@ -1,3 +1,4 @@
+#coding:utf-8
 '''
 A Convolutional Network implementation example using TensorFlow library.
 This example is using the MNIST database of handwritten digits
@@ -5,6 +6,7 @@
 
 Author: Aymeric Damien
 Project: https://github.com/aymericdamien/TensorFlow-Examples/
+卷积神经网络，由于权重的减少，可以支持更大规模的深度网络的训练
 '''
 
 from __future__ import print_function
@@ -17,6 +19,7 @@
 
 # Parameters
 learning_rate = 0.001
+''' 居然要这么大的迭代次数 '''
 training_iters = 200000
 batch_size = 128
 display_step = 10
@@ -29,17 +32,24 @@
 # tf Graph input
 x = tf.placeholder(tf.float32, [None, n_input])
 y = tf.placeholder(tf.float32, [None, n_classes])
+
+''' keep_prob用于dropout，dropout的目的是减少过拟合，他的实现方法是在训练的过程中，随机的去掉一些链接，这个keep_prob算是一个hyper parameter超级参数，有很多经验值可用 '''
 keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
 
 
 # Create some wrappers for simplicity
+''' 生成一个卷积层, stride代表卷积核的每次滑动距离 '''
 def conv2d(x, W, b, strides=1):
     # Conv2D wrapper, with bias and relu activation
+    '''
+    conv2d详解
+    TODO
+    '''
     x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
     x = tf.nn.bias_add(x, b)
     return tf.nn.relu(x)
 
-
+''' 池化层，这是一个2x2的池化，也就是前一层每四个神经元的输出映射到下一层的一个神经元的输入，进而将神经元数量压缩到原来的四分之一，减少后续层处理问题所需的计算量 '''
 def maxpool2d(x, k=2):
     # MaxPool2D wrapper
     return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
@@ -47,6 +57,12 @@ def maxpool2d(x, k=2):
 
 
 # Create model
+'''
+该卷积网络由卷积层+2x2池化层+卷积层+2x2池化层+一层全连接+输出层组成
+dropout发生在全连接层
+因而由四组权重值：分别是卷积1层权重，卷积2层权重，全连接层权重，输出层权重
+http://neuralnetworksanddeeplearning.com/chap6.html
+'''
 def conv_net(x, weights, biases, dropout):
     # Reshape input picture
     x = tf.reshape(x, shape=[-1, 28, 28, 1])
@@ -74,6 +90,7 @@ def conv_net(x, weights, biases, dropout):
     return out
 
 # Store layers weight & bias
+''' 卷积核大小5x5，通过两次池化28x28的输入变成了7x7的输入，全连接层有1024个输出，一般理解为1024个高维特征'''
 weights = {
     # 5x5 conv, 1 input, 32 outputs
     'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
@@ -96,6 +113,7 @@ def conv_net(x, weights, biases, dropout):
 pred = conv_net(x, weights, biases, keep_prob)
 
 # Define loss and optimizer
+''' 依旧使用softmax交叉熵cost和Adam优化器 '''
 cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
 optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
 
diff --git a/examples/3_NeuralNetworks/multilayer_perceptron.py b/examples/3_NeuralNetworks/multilayer_perceptron.py
index b5c990f3..8cbee4d9 100644
--- a/examples/3_NeuralNetworks/multilayer_perceptron.py
+++ b/examples/3_NeuralNetworks/multilayer_perceptron.py
@@ -1,3 +1,4 @@
+#coding:utf-8
 '''
 A Multilayer Perceptron implementation example using TensorFlow library.
 This example is using the MNIST database of handwritten digits
@@ -5,6 +6,8 @@
 
 Author: Aymeric Damien
 Project: https://github.com/aymericdamien/TensorFlow-Examples/
+多层感知器模型应该是这几个文件中的第一个应该学习的文件，是最早提出来的深度神经网络
+简称MLP
 '''
 
 from __future__ import print_function
@@ -35,16 +38,22 @@
 # Create model
 def multilayer_perceptron(x, weights, biases):
     # Hidden layer with RELU activation
+    ''' x * weights_h1 + b1 
+        relu做激活函数, 计算量小，效果也很好
+    '''
     layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
     layer_1 = tf.nn.relu(layer_1)
     # Hidden layer with RELU activation
+    ''' layer_1 * weights_h2 + b2 '''
     layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
     layer_2 = tf.nn.relu(layer_2)
     # Output layer with linear activation
+    ''' layer_2 * weight_out + out '''
     out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
     return out_layer
 
 # Store layers weight & bias
+''' 随机初始化权重 '''
 weights = {
     'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
     'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
@@ -60,7 +69,32 @@ def multilayer_perceptron(x, weights, biases):
 pred = multilayer_perceptron(x, weights, biases)
 
 # Define loss and optimizer
+'''
+softmax_cross_entropy_with_logits详解
+
+softmax的效果就是将输入的数据压扁以使得sum(outputs) = 1
+如:
+a = tf.constant(np.array([[.1, .3, .5, .9]]))
+print s.run(tf.nn.softmax(a))
+[[ 0.16838508  0.205666    0.25120102  0.37474789]]
+相反：softmax_cross_entropy_with_logits计算softmax处理后的值的交叉熵
+mid = softmax(inputs)
+outputs = cross_entropy(mid)
+
+The cross entropy is a summary metric - it sums across the elements. The output of tf.nn.softmax_cross_entropy_with_logits on a shape [2,5] tensor is of shape [2,1] (the first dimension is treated as the batch).
+交叉熵是一种系统的度量标准，总结整个系统内元素之间的某种关系。
+shape是[2, 5]的向量通过softmax_cross_entropy_with_logits计算后得到的是一个[2, 1]的向量，第一个维度默认当做batch对待
+
+logits代表没有正则化的数据，输出之和一般也不等于1，不能解释为概率，所以定义softmax(logits)的输入就是logits，logits通过softmax处理后就变成了一种概率表示，加和为1
+
+softmax是一个计算函数，softmax_cross_entropy_with_logits是一个训练中才使用的softmax的成本计算函数
+'''
 cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
+
+'''
+Adam优化器比梯度下降优化器要快
+'''
+#optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)
 optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
 
 # Initializing the variables
@@ -71,6 +105,7 @@ def multilayer_perceptron(x, weights, biases):
     sess.run(init)
 
     # Training cycle
+    # 做15次迭代训练
     for epoch in range(training_epochs):
         avg_cost = 0.
         total_batch = int(mnist.train.num_examples/batch_size)
diff --git a/examples/3_NeuralNetworks/recurrent_network.py b/examples/3_NeuralNetworks/recurrent_network.py
index 7fcd0a82..02d6c681 100644
--- a/examples/3_NeuralNetworks/recurrent_network.py
+++ b/examples/3_NeuralNetworks/recurrent_network.py
@@ -5,6 +5,7 @@
 
 Author: Aymeric Damien
 Project: https://github.com/aymericdamien/TensorFlow-Examples/
+终于来到RNN
 '''
 
 from __future__ import print_function
@@ -21,6 +22,8 @@
 To classify images using a recurrent neural network, we consider every image
 row as a sequence of pixels. Because MNIST image shape is 28*28px, we will then
 handle 28 sequences of 28 steps for every sample.
+作者将RNN用在了MNIST上，看看能玩出什么花样？
+将图像的每一行当成一个sequence,28行对应28个sequences
 '''
 
 # Parameters

From 9fac00983308d0ee32c063a625aae5fc51f15354 Mon Sep 17 00:00:00 2001
From: vaxin <dengfanxin@gmail.com>
Date: Thu, 20 Oct 2016 20:27:17 +0800
Subject: [PATCH 06/10] done

---
 examples/3_NeuralNetworks/recurrent_network.py | 7 ++++++-
 examples/4_Utils/save_restore_model.py         | 1 +
 examples/4_Utils/tensorboard_advanced.py       | 1 +
 examples/4_Utils/tensorboard_basic.py          | 3 +++
 examples/5_MultiGPU/multigpu_basics.py         | 1 +
 5 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/examples/3_NeuralNetworks/recurrent_network.py b/examples/3_NeuralNetworks/recurrent_network.py
index 02d6c681..46f9afdf 100644
--- a/examples/3_NeuralNetworks/recurrent_network.py
+++ b/examples/3_NeuralNetworks/recurrent_network.py
@@ -5,12 +5,15 @@
 
 Author: Aymeric Damien
 Project: https://github.com/aymericdamien/TensorFlow-Examples/
-终于来到RNN
+
+终于来到RNN，其实这里就是LSTM
+官方教程: https://www.tensorflow.org/versions/r0.11/tutorials/recurrent/index.html
 '''
 
 from __future__ import print_function
 
 import tensorflow as tf
+''' RNN包 '''
 from tensorflow.python.ops import rnn, rnn_cell
 import numpy as np
 
@@ -22,6 +25,7 @@
 To classify images using a recurrent neural network, we consider every image
 row as a sequence of pixels. Because MNIST image shape is 28*28px, we will then
 handle 28 sequences of 28 steps for every sample.
+
 作者将RNN用在了MNIST上，看看能玩出什么花样？
 将图像的每一行当成一个sequence,28行对应28个sequences
 '''
@@ -65,6 +69,7 @@ def RNN(x, weights, biases):
     x = tf.split(0, n_steps, x)
 
     # Define a lstm cell with tensorflow
+    ''' lstm_size = n_hidden'''
     lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
 
     # Get lstm cell output
diff --git a/examples/4_Utils/save_restore_model.py b/examples/4_Utils/save_restore_model.py
index d1e31781..f840867d 100644
--- a/examples/4_Utils/save_restore_model.py
+++ b/examples/4_Utils/save_restore_model.py
@@ -67,6 +67,7 @@ def multilayer_perceptron(x, weights, biases):
 init = tf.initialize_all_variables()
 
 # 'Saver' op to save and restore all the variables
+''' 整个文件的关键点 tf.train.Saver() save, restore两个方法'''
 saver = tf.train.Saver()
 
 # Running first session
diff --git a/examples/4_Utils/tensorboard_advanced.py b/examples/4_Utils/tensorboard_advanced.py
index ee6cd150..1943c1f2 100644
--- a/examples/4_Utils/tensorboard_advanced.py
+++ b/examples/4_Utils/tensorboard_advanced.py
@@ -90,6 +90,7 @@ def multilayer_perceptron(x, weights, biases):
 # Initializing the variables
 init = tf.initialize_all_variables()
 
+''' 可视化手段 '''
 # Create a summary to monitor cost tensor
 tf.scalar_summary("loss", loss)
 # Create a summary to monitor accuracy tensor
diff --git a/examples/4_Utils/tensorboard_basic.py b/examples/4_Utils/tensorboard_basic.py
index c690d549..5b7059f0 100644
--- a/examples/4_Utils/tensorboard_basic.py
+++ b/examples/4_Utils/tensorboard_basic.py
@@ -34,6 +34,7 @@
 
 # Construct model and encapsulating all ops into scopes, making
 # Tensorboard's Graph visualization more convenient
+''' scope的概念 '''
 with tf.name_scope('Model'):
     # Model
     pred = tf.nn.softmax(tf.matmul(x, W) + b) # Softmax
@@ -58,6 +59,8 @@
 # Merge all summaries into a single op
 merged_summary_op = tf.merge_all_summaries()
 
+''' 方便的管理tensor们 '''
+
 # Launch the graph
 with tf.Session() as sess:
     sess.run(init)
diff --git a/examples/5_MultiGPU/multigpu_basics.py b/examples/5_MultiGPU/multigpu_basics.py
index b31120fa..ab4fa953 100644
--- a/examples/5_MultiGPU/multigpu_basics.py
+++ b/examples/5_MultiGPU/multigpu_basics.py
@@ -69,6 +69,7 @@ def matpow(M, n):
 Multi GPU computing
 '''
 # GPU:0 computes A^n
+''' 将一个变量绑定到一个特定设备上，如CPU或GPU '''
 with tf.device('/gpu:0'):
     # Compute A^n and store result in c2
     a = tf.placeholder(tf.float32, [10000, 10000])

From 8f95c6ef9d757bf93a0224354171d583f0fa9bac Mon Sep 17 00:00:00 2001
From: vaxin <dengfanxin@gmail.com>
Date: Tue, 15 Nov 2016 17:49:45 +0800
Subject: [PATCH 07/10] add vae

---
 .../varational_autoencoder.py                 | 61 +++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 examples/3_NeuralNetworks/varational_autoencoder.py

diff --git a/examples/3_NeuralNetworks/varational_autoencoder.py b/examples/3_NeuralNetworks/varational_autoencoder.py
new file mode 100644
index 00000000..4e95b146
--- /dev/null
+++ b/examples/3_NeuralNetworks/varational_autoencoder.py
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+
+""" Varational Auto Encoder Example.
+Using an auto encoder on MNIST handwritten digits.
+References:
+    Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. "Gradient-based
+    learning applied to document recognition." Proceedings of the IEEE,
+    86(11):2278-2324, November 1998.
+Links:
+    [MNIST Dataset] http://yann.lecun.com/exdb/mnist/
+
+变分自动编码器
+"""
+from __future__ import division, print_function, absolute_import
+
+import tensorflow as tf
+import numpy as np
+import matplotlib.pyplot as plt
+
+# Import MNIST data
+from tensorflow.examples.tutorials.mnist import input_data
+mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
+
+# 样本集X
+n_input = 784 # 28 * 28
+X = tf.placeholder(tf.float32, [ None, n_input ])
+
+# Encoder
+
+## \mu(X)
+W_mu =
+b_mu = 
+mu = tf.matmul(X, W_mu) + b_mu
+
+## \Sigma(X)
+W_sigma = 
+b_sigma =
+sigma = tf.matmul(X, W_sigma) + b_sigma
+
+## KLD = D[N(mu(X), sigma(X))||N(0, I)] = 1/2 * sum(sigma_i + mu_i^2 - log(sigma_i) - 1)
+KLD = 0.5 * tf.reduce_sum(sigma + tf.pow(mu, 2) - tf.log(sigma) - 1, reduction_indices = 1) # reduction_indices = 1代表按照每个样本计算一条KLD
+
+
+# epsilon = N(0, I) 采样模块
+epsilon = tf.random_normal(tf.shape(sigma), name = 'epsilon')
+
+# z = mu + sigma^ 0.5 * epsilon
+z = mu + tf.pow(sigma, 0.5) * epsilon
+
+# Decoder ||f(z) - X|| ^ 2 重建的X与X的欧式距离，更加成熟的做法是使用crossentropy
+W_decoder
+b_decoder
+reconstructed_X = tf.matmul(z, W_decoder) + b_decoder
+
+
+reconstruction_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(reconstructed_X, X), reduction_indices = 1)
+
+loss = tf.reduce_mean(reconstruction_loss + KLD)
+
+
+# minimize(loss)

From 6c093f2d4e75a5d7b71ed940f47b3e98ee9f3b2b Mon Sep 17 00:00:00 2001
From: vaxin <dengfanxin@gmail.com>
Date: Tue, 15 Nov 2016 19:21:16 +0800
Subject: [PATCH 08/10] add vae

---
 .../variational_autoencoder.py                | 87 +++++++++++++++++++
 1 file changed, 87 insertions(+)
 create mode 100644 examples/3_NeuralNetworks/variational_autoencoder.py

diff --git a/examples/3_NeuralNetworks/variational_autoencoder.py b/examples/3_NeuralNetworks/variational_autoencoder.py
new file mode 100644
index 00000000..8ba823b1
--- /dev/null
+++ b/examples/3_NeuralNetworks/variational_autoencoder.py
@@ -0,0 +1,87 @@
+# -*- coding: utf-8 -*-
+
+""" Varational Auto Encoder Example.
+Using an auto encoder on MNIST handwritten digits.
+References:
+    Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. "Gradient-based
+    learning applied to document recognition." Proceedings of the IEEE,
+    86(11):2278-2324, November 1998.
+Links:
+    [MNIST Dataset] http://yann.lecun.com/exdb/mnist/
+
+变分自动编码器
+"""
+from __future__ import division, print_function, absolute_import
+
+import tensorflow as tf
+import numpy as np
+import matplotlib.pyplot as plt
+
+#Import MNIST data
+from tensorflow.examples.tutorials.mnist import input_data
+mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
+
+# 基础模块
+
+class Layer:
+  def __init__(self, input, n_output):
+    self.input = input
+    W = tf.Variable(tf.random_normal([ int(self.input.get_shape()[1]), n_output ], stddev = 0.001))#tf.shape(input)[0]
+    b = tf.Variable(tf.constant(0., shape = [ n_output ]))
+
+    self.raw_output = tf.matmul(input, W) + b
+    self.output = tf.nn.relu(self.raw_output)
+
+
+# 样本集X
+n_X = 784 # 28 * 28
+n_z = 100
+X = tf.placeholder(tf.float32, shape = [ None, n_X ])
+
+# Encoder
+
+## \mu(X) 采用二层网络
+ENCODER_HIDDEN_COUNT = 400
+mu = Layer(Layer(X, ENCODER_HIDDEN_COUNT).output, n_z).raw_output
+
+## \Sigma(X) 采用二层网络
+sigma = Layer(Layer(X, ENCODER_HIDDEN_COUNT).output, n_z).raw_output
+
+## KLD = D[N(mu(X), sigma(X))||N(0, I)] = 1/2 * sum(sigma_i + mu_i^2 - log(sigma_i) - 1)
+KLD = 0.5 * tf.reduce_sum(sigma + tf.pow(mu, 2) - tf.log(sigma) - 1, reduction_indices = 1) # reduction_indices = 1代表按照每个样本计算一条KLD
+
+
+# epsilon = N(0, I) 采样模块
+epsilon = tf.random_normal(tf.shape(sigma), name = 'epsilon')
+
+# z = mu + sigma^ 0.5 * epsilon
+z = mu + tf.pow(sigma, 0.5) * epsilon
+
+# Decoder ||f(z) - X|| ^ 2 重建的X与X的欧式距离，更加成熟的做法是使用crossentropy
+def buildDecoderNetwork(z):
+  # 构建一个二层神经网络，因为二层神经网络可以逼近任何函数
+  DECODER_HIDDEN_COUNT = 400
+  layer1 = Layer(z, DECODER_HIDDEN_COUNT)
+  layer2 = Layer(layer1.output, n_X)
+  return layer2.raw_output
+
+reconstructed_X = buildDecoderNetwork(z)
+
+reconstruction_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(reconstructed_X, X), reduction_indices = 1)
+
+loss = tf.reduce_mean(reconstruction_loss + KLD)
+
+# minimize loss
+n_steps = 100000
+learning_rate = 0.01
+batch_size = 100
+
+optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
+with tf.Session() as sess:
+  sess.run(tf.initialize_all_variables())
+  for step in xrange(1, n_steps):
+    batch_x, batch_y = mnist.train.next_batch(batch_size)
+    _, l = sess.run([ optimizer, loss ], feed_dict = { X: batch_x })
+
+    if step % 100 == 0:
+      print('Step', step, ', Loss:', l)

From 59b51ce45151645a7ee2296dbd362e758d6efb00 Mon Sep 17 00:00:00 2001
From: vaxin <dengfanxin@gmail.com>
Date: Tue, 15 Nov 2016 19:28:48 +0800
Subject: [PATCH 09/10] done

---
 .../variational_autoencoder.py                | 22 +++++++------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/examples/3_NeuralNetworks/variational_autoencoder.py b/examples/3_NeuralNetworks/variational_autoencoder.py
index 8ba823b1..67a4993d 100644
--- a/examples/3_NeuralNetworks/variational_autoencoder.py
+++ b/examples/3_NeuralNetworks/variational_autoencoder.py
@@ -1,14 +1,7 @@
 # -*- coding: utf-8 -*-
 
-""" Varational Auto Encoder Example.
-Using an auto encoder on MNIST handwritten digits.
-References:
-    Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner. "Gradient-based
-    learning applied to document recognition." Proceedings of the IEEE,
-    86(11):2278-2324, November 1998.
-Links:
-    [MNIST Dataset] http://yann.lecun.com/exdb/mnist/
-
+"""
+Varational Auto Encoder Example.
 变分自动编码器
 """
 from __future__ import division, print_function, absolute_import
@@ -26,7 +19,7 @@
 class Layer:
   def __init__(self, input, n_output):
     self.input = input
-    W = tf.Variable(tf.random_normal([ int(self.input.get_shape()[1]), n_output ], stddev = 0.001))#tf.shape(input)[0]
+    W = tf.Variable(tf.truncated_normal([ int(self.input.get_shape()[1]), n_output ], stddev = 0.001))#tf.shape(input)[0]
     b = tf.Variable(tf.constant(0., shape = [ n_output ]))
 
     self.raw_output = tf.matmul(input, W) + b
@@ -35,7 +28,7 @@ def __init__(self, input, n_output):
 
 # 样本集X
 n_X = 784 # 28 * 28
-n_z = 100
+n_z = 20 # latent variable count
 X = tf.placeholder(tf.float32, shape = [ None, n_X ])
 
 # Encoder
@@ -45,17 +38,18 @@ def __init__(self, input, n_output):
 mu = Layer(Layer(X, ENCODER_HIDDEN_COUNT).output, n_z).raw_output
 
 ## \Sigma(X) 采用二层网络
-sigma = Layer(Layer(X, ENCODER_HIDDEN_COUNT).output, n_z).raw_output
+log_sigma = Layer(Layer(X, ENCODER_HIDDEN_COUNT).output, n_z).raw_output # 为了训练不出nan? 至少实验的时候，直接让这个网络代表sigma是算不出来的，请高人指点!!!
+sigma = tf.exp(log_sigma)
 
 ## KLD = D[N(mu(X), sigma(X))||N(0, I)] = 1/2 * sum(sigma_i + mu_i^2 - log(sigma_i) - 1)
-KLD = 0.5 * tf.reduce_sum(sigma + tf.pow(mu, 2) - tf.log(sigma) - 1, reduction_indices = 1) # reduction_indices = 1代表按照每个样本计算一条KLD
+KLD = 0.5 * tf.reduce_sum(sigma + tf.pow(mu, 2) - log_sigma - 1, reduction_indices = 1) # reduction_indices = 1代表按照每个样本计算一条KLD
 
 
 # epsilon = N(0, I) 采样模块
 epsilon = tf.random_normal(tf.shape(sigma), name = 'epsilon')
 
 # z = mu + sigma^ 0.5 * epsilon
-z = mu + tf.pow(sigma, 0.5) * epsilon
+z = mu + tf.exp(0.5 * log_sigma) * epsilon
 
 # Decoder ||f(z) - X|| ^ 2 重建的X与X的欧式距离，更加成熟的做法是使用crossentropy
 def buildDecoderNetwork(z):

From 9ad905437326c48304223fd02f85b19d63d03665 Mon Sep 17 00:00:00 2001
From: vaxin <dengfanxin@gmail.com>
Date: Tue, 15 Nov 2016 19:38:10 +0800
Subject: [PATCH 10/10] change read me

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index f46dea23..0c3f6316 100644
--- a/README.md
+++ b/README.md
@@ -26,6 +26,7 @@ It is suitable for beginners who want to find clear and concise examples about T
 - Bidirectional Recurrent Neural Network (LSTM) ([notebook](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/3_NeuralNetworks/bidirectional_rnn.ipynb)) ([code](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/bidirectional_rnn.py))
 - Dynamic Recurrent Neural Network (LSTM) ([code](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/dynamic_rnn.py))
 - AutoEncoder ([notebook](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/3_NeuralNetworks/autoencoder.ipynb)) ([code](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/autoencoder.py))
+- Variational AutoEncoder(VAE) 新增的,就不提供ipynb了
 
 #### 4 - Utilities
 - Save and Restore a model ([notebook](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/4_Utils/save_restore_model.ipynb)) ([code](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/4_Utils/save_restore_model.py))