diff --git a/CNNs/mobilenet_v2.py b/CNNs/mobilenet_v2.py
new file mode 100644
index 0000000..23d6c0f
--- /dev/null
+++ b/CNNs/mobilenet_v2.py
@@ -0,0 +1,349 @@
+"""
+2018-11-24
+"""
+
+from collections import namedtuple
+import copy
+
+import tensorflow as tf
+
+slim = tf.contrib.slim
+
+def _make_divisible(v, divisor, min_value=None):
+    """make `v` is divided exactly by `divisor`, but keep the min_value"""
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than 10%.
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+
+
+@slim.add_arg_scope
+def _depth_multiplier_func(params,
+                           multiplier,
+                           divisible_by=8,
+                           min_depth=8):
+    """get the new channles"""
+    if 'num_outputs' not in params:
+        return
+    d = params['num_outputs']
+    params['num_outputs'] = _make_divisible(d * multiplier, divisible_by,
+                                                   min_depth)
+
+def _fixed_padding(inputs, kernel_size, rate=1):
+    """Pads the input along the spatial dimensions independently of input size.
+      Pads the input such that if it was used in a convolution with 'VALID' padding,
+      the output would have the same dimensions as if the unpadded input was used
+      in a convolution with 'SAME' padding.
+      Args:
+        inputs: A tensor of size [batch, height_in, width_in, channels].
+        kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
+        rate: An integer, rate for atrous convolution.
+      Returns:
+        output: A tensor of size [batch, height_out, width_out, channels] with the
+        input, either intact (if kernel_size == 1) or padded (if kernel_size > 1).
+    """
+    kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1),
+                               kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)]
+    pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1]
+    pad_beg = [pad_total[0] // 2, pad_total[1] // 2]
+    pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]]
+    padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg[0], pad_end[0]],
+                                      [pad_beg[1], pad_end[1]], [0, 0]])
+    return padded_inputs
+
+
+@slim.add_arg_scope
+def expanded_conv(x,
+                  num_outputs,
+                  expansion=6,
+                  stride=1,
+                  rate=1,
+                  normalizer_fn=slim.batch_norm,
+                  project_activation_fn=tf.identity,
+                  padding="SAME",
+                  scope=None):
+    """The expand conv op in MobileNetv2
+        1x1 conv -> depthwise 3x3 conv -> 1x1 linear conv
+    """
+    with tf.variable_scope(scope, default_name="expanded_conv") as s, \
+       tf.name_scope(s.original_name_scope):
+        prev_depth = x.get_shape().as_list()[3]
+        # the filters of expanded conv
+        inner_size = prev_depth * expansion
+        net = x
+        # only inner_size > prev_depth, use expanded conv
+        if inner_size > prev_depth:
+            net = slim.conv2d(net, inner_size, 1, normalizer_fn=normalizer_fn,
+                              scope="expand")
+        # depthwise conv
+        net = slim.separable_conv2d(net, num_outputs=None, kernel_size=3,
+                                    depth_multiplier=1, stride=stride,
+                                    rate=rate, normalizer_fn=normalizer_fn,
+                                    padding=padding, scope="depthwise")
+        # projection
+        net = slim.conv2d(net, num_outputs, 1, normalizer_fn=normalizer_fn,
+                          activation_fn=project_activation_fn, scope="project")
+
+        # residual connection
+        if stride == 1 and net.get_shape().as_list()[-1] == prev_depth:
+            net += x
+
+        return net
+
+def global_pool(x, pool_op=tf.nn.avg_pool):
+    """Applies avg pool to produce 1x1 output.
+    NOTE: This function is funcitonally equivalenet to reduce_mean, but it has
+        baked in average pool which has better support across hardware.
+    Args:
+        input_tensor: input tensor
+        pool_op: pooling op (avg pool is default)
+    Returns:
+        a tensor batch_size x 1 x 1 x depth.
+    """
+    shape = x.get_shape().as_list()
+    if shape[1] is None or shape[2] is None:
+        kernel_size = tf.convert_to_tensor(
+            [1, tf.shape(x)[1], tf.shape(x)[2], 1])
+    else:
+        kernel_size = [1, shape[1], shape[2], 1]
+    output = pool_op(x, ksize=kernel_size, strides=[1, 1, 1, 1], padding='VALID')
+    # Recover output shape, for unknown shape.
+    output.set_shape([None, 1, 1, None])
+    return output
+
+
+_Op = namedtuple("Op", ['op', 'params', 'multiplier_func'])
+
+def op(op_func, **params):
+    return _Op(op=op_func, params=params,
+               multiplier_func=_depth_multiplier_func)
+
+
+CONV_DEF = [op(slim.conv2d, num_outputs=32, stride=2, kernel_size=3),
+            op(expanded_conv, num_outputs=16, expansion=1),
+            op(expanded_conv, num_outputs=24, stride=2),
+            op(expanded_conv, num_outputs=24, stride=1),
+            op(expanded_conv, num_outputs=32, stride=2),
+            op(expanded_conv, num_outputs=32, stride=1),
+            op(expanded_conv, num_outputs=32, stride=1),
+            op(expanded_conv, num_outputs=64, stride=2),
+            op(expanded_conv, num_outputs=64, stride=1),
+            op(expanded_conv, num_outputs=64, stride=1),
+            op(expanded_conv, num_outputs=64, stride=1),
+            op(expanded_conv, num_outputs=96, stride=1),
+            op(expanded_conv, num_outputs=96, stride=1),
+            op(expanded_conv, num_outputs=96, stride=1),
+            op(expanded_conv, num_outputs=160, stride=2),
+            op(expanded_conv, num_outputs=160, stride=1),
+            op(expanded_conv, num_outputs=160, stride=1),
+            op(expanded_conv, num_outputs=320, stride=1),
+            op(slim.conv2d, num_outputs=1280, stride=1, kernel_size=1),
+            ]
+
+
+def mobilenet_arg_scope(is_training=True,
+                        weight_decay=0.00004,
+                        stddev=0.09,
+                        dropout_keep_prob=0.8,
+                        bn_decay=0.997):
+    """Defines Mobilenet default arg scope.
+    Usage:
+     with tf.contrib.slim.arg_scope(mobilenet.training_scope()):
+       logits, endpoints = mobilenet_v2.mobilenet(input_tensor)
+     # the network created will be trainble with dropout/batch norm
+     # initialized appropriately.
+    Args:
+        is_training: if set to False this will ensure that all customizations are
+            set to non-training mode. This might be helpful for code that is reused
+        across both training/evaluation, but most of the time training_scope with
+        value False is not needed. If this is set to None, the parameters is not
+        added to the batch_norm arg_scope.
+        weight_decay: The weight decay to use for regularizing the model.
+        stddev: Standard deviation for initialization, if negative uses xavier.
+        dropout_keep_prob: dropout keep probability (not set if equals to None).
+        bn_decay: decay for the batch norm moving averages (not set if equals to
+            None).
+    Returns:
+        An argument scope to use via arg_scope.
+    """
+    # Note: do not introduce parameters that would change the inference
+    # model here (for example whether to use bias), modify conv_def instead.
+    batch_norm_params = {
+        'center': True,
+        'scale': True,
+        'decay': bn_decay,
+        'is_training': is_training
+    }
+    if stddev < 0:
+        weight_intitializer = slim.initializers.xavier_initializer()
+    else:
+        weight_intitializer = tf.truncated_normal_initializer(stddev=stddev)
+
+    # Set weight_decay for weights in Conv and FC layers.
+    with slim.arg_scope(
+        [slim.conv2d, slim.fully_connected, slim.separable_conv2d],
+        weights_initializer=weight_intitializer,
+        normalizer_fn=slim.batch_norm,
+        activation_fn=tf.nn.relu6), \
+        slim.arg_scope([slim.batch_norm], **batch_norm_params), \
+        slim.arg_scope([slim.dropout], is_training=is_training,
+                     keep_prob=dropout_keep_prob), \
+        slim.arg_scope([slim.conv2d, slim.separable_conv2d],
+                       biases_initializer=None,
+                       padding="SAME"), \
+        slim.arg_scope([slim.conv2d],
+                     weights_regularizer=slim.l2_regularizer(weight_decay)), \
+        slim.arg_scope([slim.separable_conv2d], weights_regularizer=None) as s:
+        return s
+
+
+def mobilenetv2(x,
+                num_classes=1001,
+                depth_multiplier=1.0,
+                scope='MobilenetV2',
+                finegrain_classification_mode=False,
+                min_depth=8,
+                divisible_by=8,
+                output_stride=None,
+                ):
+    """Mobilenet v2
+    Args:
+        x: The input tensor
+        num_classes: number of classes
+        depth_multiplier: The multiplier applied to scale number of
+            channels in each layer. Note: this is called depth multiplier in the
+            paper but the name is kept for consistency with slim's model builder.
+        scope: Scope of the operator
+        finegrain_classification_mode: When set to True, the model
+            will keep the last layer large even for small multipliers.
+            The paper suggests that it improves performance for ImageNet-type of problems.
+        min_depth: If provided, will ensure that all layers will have that
+          many channels after application of depth multiplier.
+       divisible_by: If provided will ensure that all layers # channels
+          will be divisible by this number.
+    """
+    conv_defs = CONV_DEF
+
+    # keep the last conv layer very larger channel
+    if finegrain_classification_mode:
+        conv_defs = copy.deepcopy(conv_defs)
+        if depth_multiplier < 1:
+            conv_defs[-1].params['num_outputs'] /= depth_multiplier
+
+    depth_args = {}
+    # NB: do not set depth_args unless they are provided to avoid overriding
+    # whatever default depth_multiplier might have thanks to arg_scope.
+    if min_depth is not None:
+        depth_args['min_depth'] = min_depth
+    if divisible_by is not None:
+        depth_args['divisible_by'] = divisible_by
+
+    with slim.arg_scope([_depth_multiplier_func], **depth_args):
+        with tf.variable_scope(scope, default_name='Mobilenet'):
+            # The current_stride variable keeps track of the output stride of the
+            # activations, i.e., the running product of convolution strides up to the
+            # current network layer. This allows us to invoke atrous convolution
+            # whenever applying the next convolution would result in the activations
+            # having output stride larger than the target output_stride.
+            current_stride = 1
+
+            # The atrous convolution rate parameter.
+            rate = 1
+
+            net = x
+            # Insert default parameters before the base scope which includes
+            # any custom overrides set in mobilenet.
+            end_points = {}
+            scopes = {}
+            for i, opdef in enumerate(conv_defs):
+                params = dict(opdef.params)
+                opdef.multiplier_func(params, depth_multiplier)
+                stride = params.get('stride', 1)
+                if output_stride is not None and current_stride == output_stride:
+                    # If we have reached the target output_stride, then we need to employ
+                    # atrous convolution with stride=1 and multiply the atrous rate by the
+                    # current unit's stride for use in subsequent layers.
+                    layer_stride = 1
+                    layer_rate = rate
+                    rate *= stride
+                else:
+                    layer_stride = stride
+                    layer_rate = 1
+                    current_stride *= stride
+                # Update params.
+                params['stride'] = layer_stride
+                # Only insert rate to params if rate > 1.
+                if layer_rate > 1:
+                    params['rate'] = layer_rate
+
+                try:
+                    net = opdef.op(net, **params)
+                except Exception:
+                    raise ValueError('Failed to create op %i: %r params: %r' % (i, opdef, params))
+
+            with tf.variable_scope('Logits'):
+                net = global_pool(net)
+                end_points['global_pool'] = net
+                if not num_classes:
+                    return net, end_points
+                net = slim.dropout(net, scope='Dropout')
+                # 1 x 1 x num_classes
+                # Note: legacy scope name.
+                logits = slim.conv2d(
+                    net,
+                    num_classes, [1, 1],
+                    activation_fn=None,
+                    normalizer_fn=None,
+                    biases_initializer=tf.zeros_initializer(),
+                    scope='Conv2d_1c_1x1')
+
+                logits = tf.squeeze(logits, [1, 2])
+
+                return logits
+
+
+if __name__ == "__main__":
+    import cv2
+    import numpy as np
+
+    inputs = tf.placeholder(tf.uint8, [None, None, 3])
+    images = tf.expand_dims(inputs, 0)
+    images = tf.cast(images, tf.float32) / 128. - 1
+    images.set_shape((None, None, None, 3))
+    images = tf.image.resize_images(images, (224, 224))
+
+    with slim.arg_scope(mobilenet_arg_scope(is_training=False)):
+        logits = mobilenetv2(images)
+
+    # Restore using exponential moving average since it produces (1.5-2%) higher
+    # accuracy
+    ema = tf.train.ExponentialMovingAverage(0.999)
+    vars = ema.variables_to_restore()
+
+    saver = tf.train.Saver(vars)
+
+    print(len(tf.global_variables()))
+    for var in tf.global_variables():
+        print(var)
+    checkpoint_path = r"C:\Users\xiaoh\Desktop\temp\mobilenet_v2_1.0_224\mobilenet_v2_1.0_224.ckpt"
+    image_file = "C:/Users/xiaoh/Desktop/temp/pandas.jpg"
+    with tf.Session() as sess:
+        saver.restore(sess, checkpoint_path)
+
+        img = cv2.imread(image_file)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+
+        print(np.argmax(sess.run(logits, feed_dict={inputs: img})[0]))
+
+
+
+
+
+
+
+
+
+
diff --git a/README.md b/README.md
index 45575ec..b1ebefe 100644
--- a/README.md
+++ b/README.md
@@ -18,6 +18,7 @@ Note: the project aims at imitating the well-implemented algorithms in [Deep Lea
 
 ### CNN Models
 - MobileNet [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/CNNs/MobileNet.py) [paper](https://arxiv.org/abs/1704.04861) [ref](https://github.com/Zehaos/MobileNet/blob/master/nets/mobilenet.py)]
+- MobileNetv2 [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/CNNs/mobilenet_v2.py) [paper](https://arxiv.org/pdf/1801.04381.pdf) [ref](https://github.com/tensorflow/models/tree/master/research/slim/nets/mobilenet)]
 - SqueezeNet [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/CNNs/SqueezeNet.py) [paper](https://arxiv.org/abs/1602.07360)]
 - ResNet [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/CNNs/ResNet50.py) [caffe ref](https://github.com/KaimingHe/deep-residual-networks) [paper1](https://arxiv.org/abs/1512.03385) [paper2](https://arxiv.org/abs/1603.05027)]
 - ShuffleNet [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/CNNs/ShuffleNet.py) by pytorch [paper](http://cn.arxiv.org/pdf/1707.01083v2)]
diff --git a/examples/cnn_setence_classification/text_cnn.py b/examples/cnn_setence_classification/text_cnn.py
index f186faf..3518a5c 100644
--- a/examples/cnn_setence_classification/text_cnn.py
+++ b/examples/cnn_setence_classification/text_cnn.py
@@ -81,7 +81,7 @@ def __init__(self, seq_len, vocab_size, embedding_size, filter_sizes, num_filter
                 pooled_outputs.append(pool_output)   # [None, 1, 1, num_filters]
         # Combine all pooled features
         num_filters_total = num_filters * len(filter_sizes)
-        self.h_pool = tf.concat(3, pooled_outputs)  # [None, 1, 1, num_filters_total]
+        self.h_pool = tf.concat( pooled_outputs,3)  # [None, 1, 1, num_filters_total]
         self.h_pool_flat = tf.reshape(self.h_pool, shape=[-1, num_filters_total]) # [None, num_filters_total]
 
         # The dropout layer
@@ -100,7 +100,7 @@ def __init__(self, seq_len, vocab_size, embedding_size, filter_sizes, num_filter
         
         # The loss
         with tf.name_scope("loss"):
-            losses = tf.nn.softmax_cross_entropy_with_logits(self.scores, self.y)
+            losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.y)
             self.loss = tf.reduce_mean(losses) + L2_loss * l2_reg_lambda
         
         # Accuracy
diff --git a/examples/cnn_setence_classification/train_cnn.py b/examples/cnn_setence_classification/train_cnn.py
index aefa2b5..bc58dc0 100644
--- a/examples/cnn_setence_classification/train_cnn.py
+++ b/examples/cnn_setence_classification/train_cnn.py
@@ -1,74 +1,74 @@
-"""
-Test the TextRNN class 
-2016/12/22
-"""
-import os
-import sys
-import numpy as np
-import tensorflow as tf
-from sklearn.model_selection import train_test_split
-from tensorflow.contrib import learn
-
-from data_helpers import load_data_and_labels, batch_iter
-from text_cnn import TextCNN
-
-
-# Load original data
-path = sys.path[0]
-pos_filename = path + "/data/rt-polarity.pos"
-neg_filename = path + "/data/rt-polarity.neg"
-
-X_data, y_data = load_data_and_labels(pos_filename, neg_filename)
-max_document_length = max([len(sen.split(" ")) for sen in X_data])
-print("Max_document_length:,", max_document_length)
-# Create the vacabulary
-vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
-# The idx data
-x = np.array(list(vocab_processor.fit_transform(X_data)), dtype=np.float32)
-y = np.array(y_data, dtype=np.int32)
-vocabulary_size = len(vocab_processor.vocabulary_)
-print("The size of vocabulary:", vocabulary_size)
-# Split the data
-X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=1111)
-print("X_train shape {0}, y_train shape {1}".format(X_train.shape, y_train.shape))
-print("X_test shape {0}, y_test shape {1}".format(X_test.shape, y_test.shape))
-
-# The parameters of RNN
-seq_len = X_train.shape[1]
-vocab_size = vocabulary_size
-embedding_size = 128
-filter_sizes = [2, 3, 4]
-num_filters = 128
-num_classes = y_train.shape[1]
-l2_reg_lambda = 0.0
-
-# Construct RNN model
-text_rnn_model = TextCNN(seq_len=seq_len, vocab_size=vocab_size, embedding_size=embedding_size, filter_sizes=
-                        filter_sizes, num_filters=num_filters, num_classes=num_classes)
-loss = text_rnn_model.loss
-train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
-accuracy = text_rnn_model.accuracy
-# The parameters for training
-batch_size = 64
-training_epochs = 10
-dispaly_every = 1
-dropout_keep_prob = 0.5
-
-batch_num = int(X_train.shape[0]/batch_size)
-
-sess = tf.Session()
-sess.run(tf.global_variables_initializer())
-print("Starting training...")
-for epoch in range(training_epochs):
-    avg_cost = 0
-    for batch in range(batch_num):
-        _, cost = sess.run([train_op, loss], feed_dict={text_rnn_model.x: X_train[batch*batch_size:(batch+1)*batch_size],
-                                    text_rnn_model.y: y_train[batch*batch_size:(batch+1)*batch_size],
-                                    text_rnn_model.dropout_keep_prob:dropout_keep_prob})
-        avg_cost += cost
-    if epoch % dispaly_every == 0:
-        cost, acc = sess.run([loss, accuracy], feed_dict={text_rnn_model.x: X_test,
-                                    text_rnn_model.y: y_test,
-                                    text_rnn_model.dropout_keep_prob: 1.0})
-        print("\nEpoch {0} : loss {1}, accuracy {2}".format(epoch, cost, acc))
-
+"""
+Test the TextRNN class
+2016/12/22
+"""
+import os
+import sys
+import numpy as np
+import tensorflow as tf
+from sklearn.model_selection import train_test_split
+from tensorflow.contrib import learn
+
+from data_helpers import load_data_and_labels, batch_iter
+from text_cnn import TextCNN
+import pudb;pu.db
+
+# Load original data
+path = sys.path[0]
+pos_filename = path + "/data/rt-polarity.pos"
+neg_filename = path + "/data/rt-polarity.neg"
+
+X_data, y_data = load_data_and_labels(pos_filename, neg_filename)
+max_document_length = max([len(sen.split(" ")) for sen in X_data])
+print("Max_document_length:,", max_document_length)
+# Create the vacabulary
+vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
+# The idx data
+x = np.array(list(vocab_processor.fit_transform(X_data)), dtype=np.float32)
+y = np.array(y_data, dtype=np.int32)
+vocabulary_size = len(vocab_processor.vocabulary_)
+print("The size of vocabulary:", vocabulary_size)
+# Split the data
+X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=1111)
+print("X_train shape {0}, y_train shape {1}".format(X_train.shape, y_train.shape))
+print("X_test shape {0}, y_test shape {1}".format(X_test.shape, y_test.shape))
+
+# The parameters of RNN
+seq_len = X_train.shape[1]
+vocab_size = vocabulary_size
+embedding_size = 128
+filter_sizes = [2, 3, 4]
+num_filters = 128
+num_classes = y_train.shape[1]
+l2_reg_lambda = 0.0
+
+# Construct RNN model
+text_rnn_model = TextCNN(seq_len=seq_len, vocab_size=vocab_size, embedding_size=embedding_size, filter_sizes=
+                        filter_sizes, num_filters=num_filters, num_classes=num_classes)
+loss = text_rnn_model.loss
+train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
+accuracy = text_rnn_model.accuracy
+# The parameters for training
+batch_size = 64
+training_epochs = 10
+dispaly_every = 1
+dropout_keep_prob = 0.5
+
+batch_num = int(X_train.shape[0]/batch_size)
+
+sess = tf.Session()
+sess.run(tf.global_variables_initializer())
+print("Starting training...")
+for epoch in range(training_epochs):
+    avg_cost = 0
+    for batch in range(batch_num):
+        _, cost = sess.run([train_op, loss], feed_dict={text_rnn_model.x: X_train[batch*batch_size:(batch+1)*batch_size],
+                                    text_rnn_model.y: y_train[batch*batch_size:(batch+1)*batch_size],
+                                    text_rnn_model.dropout_keep_prob:dropout_keep_prob})
+        avg_cost += cost
+    if epoch % dispaly_every == 0:
+        cost, acc = sess.run([loss, accuracy], feed_dict={text_rnn_model.x: X_test,
+                                    text_rnn_model.y: y_test,
+                                    text_rnn_model.dropout_keep_prob: 1.0})
+        print("\nEpoch {0} : loss {1}, accuracy {2}".format(epoch, cost, acc))
+