diff --git a/CNNs/densenet.py b/CNNs/densenet.py new file mode 100644 index 0000000..8f70424 --- /dev/null +++ b/CNNs/densenet.py @@ -0,0 +1,228 @@ +""" +DenseNet, original: https://github.com/pytorch/vision/blob/master/torchvision/models/densenet.py +""" +import re +from collections import OrderedDict + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.model_zoo as model_zoo +import torchvision.transforms as transforms + +from PIL import Image +import numpy as np + +model_urls = { + 'densenet121': 'https://download.pytorch.org/models/densenet121-a639ec97.pth', + 'densenet169': 'https://download.pytorch.org/models/densenet169-b2777c0a.pth', + 'densenet201': 'https://download.pytorch.org/models/densenet201-c1103571.pth', + 'densenet161': 'https://download.pytorch.org/models/densenet161-8d451a50.pth', +} + + +class _DenseLayer(nn.Sequential): + """Basic unit of DenseBlock (using bottleneck layer) """ + def __init__(self, num_input_features, growth_rate, bn_size, drop_rate): + super(_DenseLayer, self).__init__() + self.add_module("norm1", nn.BatchNorm2d(num_input_features)) + self.add_module("relu1", nn.ReLU(inplace=True)) + self.add_module("conv1", nn.Conv2d(num_input_features, bn_size*growth_rate, + kernel_size=1, stride=1, bias=False)) + self.add_module("norm2", nn.BatchNorm2d(bn_size*growth_rate)) + self.add_module("relu2", nn.ReLU(inplace=True)) + self.add_module("conv2", nn.Conv2d(bn_size*growth_rate, growth_rate, + kernel_size=3, stride=1, padding=1, bias=False)) + self.drop_rate = drop_rate + + def forward(self, x): + new_features = super(_DenseLayer, self).forward(x) + if self.drop_rate > 0: + new_features = F.dropout(new_features, p=self.drop_rate, training=self.training) + return torch.cat([x, new_features], 1) + +class _DenseBlock(nn.Sequential): + """DenseBlock""" + def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate): + super(_DenseBlock, self).__init__() + for i in range(num_layers): + layer = _DenseLayer(num_input_features+i*growth_rate, growth_rate, bn_size, + drop_rate) + self.add_module("denselayer%d" % (i+1,), layer) + + +class _Transition(nn.Sequential): + """Transition layer between two adjacent DenseBlock""" + def __init__(self, num_input_feature, num_output_features): + super(_Transition, self).__init__() + self.add_module("norm", nn.BatchNorm2d(num_input_feature)) + self.add_module("relu", nn.ReLU(inplace=True)) + self.add_module("conv", nn.Conv2d(num_input_feature, num_output_features, + kernel_size=1, stride=1, bias=False)) + self.add_module("pool", nn.AvgPool2d(2, stride=2)) + + +class DenseNet(nn.Module): + "DenseNet-BC model" + def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), num_init_features=64, + bn_size=4, compression_rate=0.5, drop_rate=0, num_classes=1000): + """ + :param growth_rate: (int) number of filters used in DenseLayer, `k` in the paper + :param block_config: (list of 4 ints) number of layers in each DenseBlock + :param num_init_features: (int) number of filters in the first Conv2d + :param bn_size: (int) the factor using in the bottleneck layer + :param compression_rate: (float) the compression rate used in Transition Layer + :param drop_rate: (float) the drop rate after each DenseLayer + :param num_classes: (int) number of classes for classification + """ + super(DenseNet, self).__init__() + # first Conv2d + self.features = nn.Sequential(OrderedDict([ + ("conv0", nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)), + ("norm0", nn.BatchNorm2d(num_init_features)), + ("relu0", nn.ReLU(inplace=True)), + ("pool0", nn.MaxPool2d(3, stride=2, padding=1)) + ])) + + # DenseBlock + num_features = num_init_features + for i, num_layers in enumerate(block_config): + block = _DenseBlock(num_layers, num_features, bn_size, growth_rate, drop_rate) + self.features.add_module("denseblock%d" % (i + 1), block) + num_features += num_layers*growth_rate + if i != len(block_config) - 1: + transition = _Transition(num_features, int(num_features*compression_rate)) + self.features.add_module("transition%d" % (i + 1), transition) + num_features = int(num_features * compression_rate) + + # final bn+ReLU + self.features.add_module("norm5", nn.BatchNorm2d(num_features)) + self.features.add_module("relu5", nn.ReLU(inplace=True)) + + # classification layer + self.classifier = nn.Linear(num_features, num_classes) + + # params initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1) + elif isinstance(m, nn.Linear): + nn.init.constant_(m.bias, 0) + + def forward(self, x): + features = self.features(x) + out = F.avg_pool2d(features, 7, stride=1).view(features.size(0), -1) + out = self.classifier(out) + return out + +class DenseNet_MNIST(nn.Module): + """DenseNet for MNIST dataset""" + def __init__(self, growth_rate=12, block_config=(6, 6, 6), num_init_features=16, + bn_size=4, compression_rate=0.5, drop_rate=0, num_classes=10): + """ + :param growth_rate: (int) number of filters used in DenseLayer, `k` in the paper + :param block_config: (list of 2 ints) number of layers in each DenseBlock + :param num_init_features: (int) number of filters in the first Conv2d + :param bn_size: (int) the factor using in the bottleneck layer + :param compression_rate: (float) the compression rate used in Transition Layer + :param drop_rate: (float) the drop rate after each DenseLayer + :param num_classes: (int) number of classes for classification + """ + super(DenseNet_MNIST, self).__init__() + # first Conv2d + self.features = nn.Sequential(OrderedDict([ + ("conv0", nn.Conv2d(1, num_init_features, kernel_size=3, stride=1, padding=1, bias=False)), + ("norm0", nn.BatchNorm2d(num_init_features)), + ("relu0", nn.ReLU(inplace=True)), + ])) + + # DenseBlock + num_features = num_init_features + for i, num_layers in enumerate(block_config): + block = _DenseBlock(num_layers, num_features, bn_size, growth_rate, drop_rate) + self.features.add_module("denseblock%d" % (i + 1), block) + num_features += num_layers * growth_rate + if i != len(block_config) - 1: + transition = _Transition(num_features, int(num_features * compression_rate)) + self.features.add_module("transition%d" % (i + 1), transition) + num_features = int(num_features * compression_rate) + + # final bn+ReLU + self.features.add_module("norm5", nn.BatchNorm2d(num_features)) + self.features.add_module("relu5", nn.ReLU(inplace=True)) + + # classification layer + self.classifier = nn.Linear(num_features, num_classes) + + # params initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1) + elif isinstance(m, nn.Linear): + nn.init.constant_(m.bias, 0) + + def forward(self, x): + features = self.features(x) + out = F.avg_pool2d(features, 7, stride=1).view(features.size(0), -1) + out = self.classifier(out) + return out + + +def densenet121(pretrained=False, **kwargs): + """DenseNet121""" + model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 24, 16), + **kwargs) + + if pretrained: + # '.'s are no longer allowed in module names, but pervious _DenseLayer + # has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'. + # They are also in the checkpoints in model_urls. This pattern is used + # to find such keys. + pattern = re.compile( + r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$') + state_dict = model_zoo.load_url(http://github.com/model_urls['densenet121']) + for key in list(state_dict.keys()): + res = pattern.match(key) + if res: + new_key = res.group(1) + res.group(2) + state_dict[new_key] = state_dict[key] + del state_dict[key] + model.load_state_dict(state_dict) + return model + +if __name__ == "__main__": + densenet = densenet121(pretrained=True) + densenet.eval() + + img = Image.open("./images/cat.jpg") + + trans_ops = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + ]) + + images = trans_ops(img).view(-1, 3, 224, 224) + print(images) + outputs = densenet(images) + + _, predictions = outputs.topk(5, dim=1) + + labels = list(map(lambda s: s.strip(), open("./data/imagenet/synset_words.txt").readlines())) + for idx in predictions.numpy()[0]: + print("Predicted labels:", labels[idx]) + + + + + + + diff --git a/CNNs/mobilenet_v2.py b/CNNs/mobilenet_v2.py new file mode 100644 index 0000000..23d6c0f --- /dev/null +++ b/CNNs/mobilenet_v2.py @@ -0,0 +1,349 @@ +""" +2018-11-24 +""" + +from collections import namedtuple +import copy + +import tensorflow as tf + +slim = tf.contrib.slim + +def _make_divisible(v, divisor, min_value=None): + """make `v` is divided exactly by `divisor`, but keep the min_value""" + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +@slim.add_arg_scope +def _depth_multiplier_func(params, + multiplier, + divisible_by=8, + min_depth=8): + """get the new channles""" + if 'num_outputs' not in params: + return + d = params['num_outputs'] + params['num_outputs'] = _make_divisible(d * multiplier, divisible_by, + min_depth) + +def _fixed_padding(inputs, kernel_size, rate=1): + """Pads the input along the spatial dimensions independently of input size. + Pads the input such that if it was used in a convolution with 'VALID' padding, + the output would have the same dimensions as if the unpadded input was used + in a convolution with 'SAME' padding. + Args: + inputs: A tensor of size [batch, height_in, width_in, channels]. + kernel_size: The kernel to be used in the conv2d or max_pool2d operation. + rate: An integer, rate for atrous convolution. + Returns: + output: A tensor of size [batch, height_out, width_out, channels] with the + input, either intact (if kernel_size == 1) or padded (if kernel_size > 1). + """ + kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1), + kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)] + pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1] + pad_beg = [pad_total[0] // 2, pad_total[1] // 2] + pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]] + padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg[0], pad_end[0]], + [pad_beg[1], pad_end[1]], [0, 0]]) + return padded_inputs + + +@slim.add_arg_scope +def expanded_conv(x, + num_outputs, + expansion=6, + stride=1, + rate=1, + normalizer_fn=slim.batch_norm, + project_activation_fn=tf.identity, + padding="SAME", + scope=None): + """The expand conv op in MobileNetv2 + 1x1 conv -> depthwise 3x3 conv -> 1x1 linear conv + """ + with tf.variable_scope(scope, default_name="expanded_conv") as s, \ + tf.name_scope(s.original_name_scope): + prev_depth = x.get_shape().as_list()[3] + # the filters of expanded conv + inner_size = prev_depth * expansion + net = x + # only inner_size > prev_depth, use expanded conv + if inner_size > prev_depth: + net = slim.conv2d(net, inner_size, 1, normalizer_fn=normalizer_fn, + scope="expand") + # depthwise conv + net = slim.separable_conv2d(net, num_outputs=None, kernel_size=3, + depth_multiplier=1, stride=stride, + rate=rate, normalizer_fn=normalizer_fn, + padding=padding, scope="depthwise") + # projection + net = slim.conv2d(net, num_outputs, 1, normalizer_fn=normalizer_fn, + activation_fn=project_activation_fn, scope="project") + + # residual connection + if stride == 1 and net.get_shape().as_list()[-1] == prev_depth: + net += x + + return net + +def global_pool(x, pool_op=tf.nn.avg_pool): + """Applies avg pool to produce 1x1 output. + NOTE: This function is funcitonally equivalenet to reduce_mean, but it has + baked in average pool which has better support across hardware. + Args: + input_tensor: input tensor + pool_op: pooling op (avg pool is default) + Returns: + a tensor batch_size x 1 x 1 x depth. + """ + shape = x.get_shape().as_list() + if shape[1] is None or shape[2] is None: + kernel_size = tf.convert_to_tensor( + [1, tf.shape(x)[1], tf.shape(x)[2], 1]) + else: + kernel_size = [1, shape[1], shape[2], 1] + output = pool_op(x, ksize=kernel_size, strides=[1, 1, 1, 1], padding='VALID') + # Recover output shape, for unknown shape. + output.set_shape([None, 1, 1, None]) + return output + + +_Op = namedtuple("Op", ['op', 'params', 'multiplier_func']) + +def op(op_func, **params): + return _Op(op=op_func, params=params, + multiplier_func=_depth_multiplier_func) + + +CONV_DEF = [op(slim.conv2d, num_outputs=32, stride=2, kernel_size=3), + op(expanded_conv, num_outputs=16, expansion=1), + op(expanded_conv, num_outputs=24, stride=2), + op(expanded_conv, num_outputs=24, stride=1), + op(expanded_conv, num_outputs=32, stride=2), + op(expanded_conv, num_outputs=32, stride=1), + op(expanded_conv, num_outputs=32, stride=1), + op(expanded_conv, num_outputs=64, stride=2), + op(expanded_conv, num_outputs=64, stride=1), + op(expanded_conv, num_outputs=64, stride=1), + op(expanded_conv, num_outputs=64, stride=1), + op(expanded_conv, num_outputs=96, stride=1), + op(expanded_conv, num_outputs=96, stride=1), + op(expanded_conv, num_outputs=96, stride=1), + op(expanded_conv, num_outputs=160, stride=2), + op(expanded_conv, num_outputs=160, stride=1), + op(expanded_conv, num_outputs=160, stride=1), + op(expanded_conv, num_outputs=320, stride=1), + op(slim.conv2d, num_outputs=1280, stride=1, kernel_size=1), + ] + + +def mobilenet_arg_scope(is_training=True, + weight_decay=0.00004, + stddev=0.09, + dropout_keep_prob=0.8, + bn_decay=0.997): + """Defines Mobilenet default arg scope. + Usage: + with tf.contrib.slim.arg_scope(mobilenet.training_scope()): + logits, endpoints = mobilenet_v2.mobilenet(input_tensor) + # the network created will be trainble with dropout/batch norm + # initialized appropriately. + Args: + is_training: if set to False this will ensure that all customizations are + set to non-training mode. This might be helpful for code that is reused + across both training/evaluation, but most of the time training_scope with + value False is not needed. If this is set to None, the parameters is not + added to the batch_norm arg_scope. + weight_decay: The weight decay to use for regularizing the model. + stddev: Standard deviation for initialization, if negative uses xavier. + dropout_keep_prob: dropout keep probability (not set if equals to None). + bn_decay: decay for the batch norm moving averages (not set if equals to + None). + Returns: + An argument scope to use via arg_scope. + """ + # Note: do not introduce parameters that would change the inference + # model here (for example whether to use bias), modify conv_def instead. + batch_norm_params = { + 'center': True, + 'scale': True, + 'decay': bn_decay, + 'is_training': is_training + } + if stddev < 0: + weight_intitializer = slim.initializers.xavier_initializer() + else: + weight_intitializer = tf.truncated_normal_initializer(stddev=stddev) + + # Set weight_decay for weights in Conv and FC layers. + with slim.arg_scope( + [slim.conv2d, slim.fully_connected, slim.separable_conv2d], + weights_initializer=weight_intitializer, + normalizer_fn=slim.batch_norm, + activation_fn=tf.nn.relu6), \ + slim.arg_scope([slim.batch_norm], **batch_norm_params), \ + slim.arg_scope([slim.dropout], is_training=is_training, + keep_prob=dropout_keep_prob), \ + slim.arg_scope([slim.conv2d, slim.separable_conv2d], + biases_initializer=None, + padding="SAME"), \ + slim.arg_scope([slim.conv2d], + weights_regularizer=slim.l2_regularizer(weight_decay)), \ + slim.arg_scope([slim.separable_conv2d], weights_regularizer=None) as s: + return s + + +def mobilenetv2(x, + num_classes=1001, + depth_multiplier=1.0, + scope='MobilenetV2', + finegrain_classification_mode=False, + min_depth=8, + divisible_by=8, + output_stride=None, + ): + """Mobilenet v2 + Args: + x: The input tensor + num_classes: number of classes + depth_multiplier: The multiplier applied to scale number of + channels in each layer. Note: this is called depth multiplier in the + paper but the name is kept for consistency with slim's model builder. + scope: Scope of the operator + finegrain_classification_mode: When set to True, the model + will keep the last layer large even for small multipliers. + The paper suggests that it improves performance for ImageNet-type of problems. + min_depth: If provided, will ensure that all layers will have that + many channels after application of depth multiplier. + divisible_by: If provided will ensure that all layers # channels + will be divisible by this number. + """ + conv_defs = CONV_DEF + + # keep the last conv layer very larger channel + if finegrain_classification_mode: + conv_defs = copy.deepcopy(conv_defs) + if depth_multiplier < 1: + conv_defs[-1].params['num_outputs'] /= depth_multiplier + + depth_args = {} + # NB: do not set depth_args unless they are provided to avoid overriding + # whatever default depth_multiplier might have thanks to arg_scope. + if min_depth is not None: + depth_args['min_depth'] = min_depth + if divisible_by is not None: + depth_args['divisible_by'] = divisible_by + + with slim.arg_scope([_depth_multiplier_func], **depth_args): + with tf.variable_scope(scope, default_name='Mobilenet'): + # The current_stride variable keeps track of the output stride of the + # activations, i.e., the running product of convolution strides up to the + # current network layer. This allows us to invoke atrous convolution + # whenever applying the next convolution would result in the activations + # having output stride larger than the target output_stride. + current_stride = 1 + + # The atrous convolution rate parameter. + rate = 1 + + net = x + # Insert default parameters before the base scope which includes + # any custom overrides set in mobilenet. + end_points = {} + scopes = {} + for i, opdef in enumerate(conv_defs): + params = dict(opdef.params) + opdef.multiplier_func(params, depth_multiplier) + stride = params.get('stride', 1) + if output_stride is not None and current_stride == output_stride: + # If we have reached the target output_stride, then we need to employ + # atrous convolution with stride=1 and multiply the atrous rate by the + # current unit's stride for use in subsequent layers. + layer_stride = 1 + layer_rate = rate + rate *= stride + else: + layer_stride = stride + layer_rate = 1 + current_stride *= stride + # Update params. + params['stride'] = layer_stride + # Only insert rate to params if rate > 1. + if layer_rate > 1: + params['rate'] = layer_rate + + try: + net = opdef.op(net, **params) + except Exception: + raise ValueError('Failed to create op %i: %r params: %r' % (i, opdef, params)) + + with tf.variable_scope('Logits'): + net = global_pool(net) + end_points['global_pool'] = net + if not num_classes: + return net, end_points + net = slim.dropout(net, scope='Dropout') + # 1 x 1 x num_classes + # Note: legacy scope name. + logits = slim.conv2d( + net, + num_classes, [1, 1], + activation_fn=None, + normalizer_fn=None, + biases_initializer=tf.zeros_initializer(), + scope='Conv2d_1c_1x1') + + logits = tf.squeeze(logits, [1, 2]) + + return logits + + +if __name__ == "__main__": + import cv2 + import numpy as np + + inputs = tf.placeholder(tf.uint8, [None, None, 3]) + images = tf.expand_dims(inputs, 0) + images = tf.cast(images, tf.float32) / 128. - 1 + images.set_shape((None, None, None, 3)) + images = tf.image.resize_images(images, (224, 224)) + + with slim.arg_scope(mobilenet_arg_scope(is_training=False)): + logits = mobilenetv2(images) + + # Restore using exponential moving average since it produces (1.5-2%) higher + # accuracy + ema = tf.train.ExponentialMovingAverage(0.999) + vars = ema.variables_to_restore() + + saver = tf.train.Saver(vars) + + print(len(tf.global_variables())) + for var in tf.global_variables(): + print(var) + checkpoint_path = r"C:\Users\xiaoh\Desktop\temp\mobilenet_v2_1.0_224\mobilenet_v2_1.0_224.ckpt" + image_file = "C:/Users/xiaoh/Desktop/temp/pandas.jpg" + with tf.Session() as sess: + saver.restore(sess, checkpoint_path) + + img = cv2.imread(image_file) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + print(np.argmax(sess.run(logits, feed_dict={inputs: img})[0])) + + + + + + + + + + diff --git a/CNNs/shufflenet_v2.py b/CNNs/shufflenet_v2.py new file mode 100644 index 0000000..dbb25c9 --- /dev/null +++ b/CNNs/shufflenet_v2.py @@ -0,0 +1,243 @@ +""" +The implement of shufflenet_v2 by Keras +""" + +import tensorflow as tf +from tensorflow.keras.layers import Conv2D, DepthwiseConv2D +from tensorflow.keras.layers import MaxPool2D, GlobalAveragePooling2D, Dense +from tensorflow.keras.layers import BatchNormalization, Activation + + +def channle_shuffle(inputs, group): + """Shuffle the channel + Args: + inputs: 4D Tensor + group: int, number of groups + Returns: + Shuffled 4D Tensor + """ + in_shape = inputs.get_shape().as_list() + h, w, in_channel = in_shape[1:] + assert in_channel % group == 0 + l = tf.reshape(inputs, [-1, h, w, in_channel // group, group]) + l = tf.transpose(l, [0, 1, 2, 4, 3]) + l = tf.reshape(l, [-1, h, w, in_channel]) + + return l + +class Conv2D_BN_ReLU(tf.keras.Model): + """Conv2D -> BN -> ReLU""" + def __init__(self, channel, kernel_size=1, stride=1): + super(Conv2D_BN_ReLU, self).__init__() + + self.conv = Conv2D(channel, kernel_size, strides=stride, + padding="SAME", use_bias=False) + self.bn = BatchNormalization(axis=-1, momentum=0.9, epsilon=1e-5) + self.relu = Activation("relu") + + def call(self, inputs, training=True): + x = self.conv(inputs) + x = self.bn(x, training=training) + x = self.relu(x) + return x + +class DepthwiseConv2D_BN(tf.keras.Model): + """DepthwiseConv2D -> BN""" + def __init__(self, kernel_size=3, stride=1): + super(DepthwiseConv2D_BN, self).__init__() + + self.dconv = DepthwiseConv2D(kernel_size, strides=stride, + depth_multiplier=1, + padding="SAME", use_bias=False) + self.bn = BatchNormalization(axis=-1, momentum=0.9, epsilon=1e-5) + + def call(self, inputs, training=True): + x = self.dconv(inputs) + x = self.bn(x, training=training) + return x + + +class ShufflenetUnit1(tf.keras.Model): + def __init__(self, out_channel): + """The unit of shufflenetv2 for stride=1 + Args: + out_channel: int, number of channels + """ + super(ShufflenetUnit1, self).__init__() + + assert out_channel % 2 == 0 + self.out_channel = out_channel + + self.conv1_bn_relu = Conv2D_BN_ReLU(out_channel // 2, 1, 1) + self.dconv_bn = DepthwiseConv2D_BN(3, 1) + self.conv2_bn_relu = Conv2D_BN_ReLU(out_channel // 2, 1, 1) + + def call(self, inputs, training=False): + # split the channel + shortcut, x = tf.split(inputs, 2, axis=3) + + x = self.conv1_bn_relu(x, training=training) + x = self.dconv_bn(x, training=training) + x = self.conv2_bn_relu(x, training=training) + + x = tf.concat([shortcut, x], axis=3) + x = channle_shuffle(x, 2) + return x + +class ShufflenetUnit2(tf.keras.Model): + """The unit of shufflenetv2 for stride=2""" + def __init__(self, in_channel, out_channel): + super(ShufflenetUnit2, self).__init__() + + assert out_channel % 2 == 0 + self.in_channel = in_channel + self.out_channel = out_channel + + self.conv1_bn_relu = Conv2D_BN_ReLU(out_channel // 2, 1, 1) + self.dconv_bn = DepthwiseConv2D_BN(3, 2) + self.conv2_bn_relu = Conv2D_BN_ReLU(out_channel - in_channel, 1, 1) + + # for shortcut + self.shortcut_dconv_bn = DepthwiseConv2D_BN(3, 2) + self.shortcut_conv_bn_relu = Conv2D_BN_ReLU(in_channel, 1, 1) + + def call(self, inputs, training=False): + shortcut, x = inputs, inputs + + x = self.conv1_bn_relu(x, training=training) + x = self.dconv_bn(x, training=training) + x = self.conv2_bn_relu(x, training=training) + + shortcut = self.shortcut_dconv_bn(shortcut, training=training) + shortcut = self.shortcut_conv_bn_relu(shortcut, training=training) + + x = tf.concat([shortcut, x], axis=3) + x = channle_shuffle(x, 2) + return x + +class ShufflenetStage(tf.keras.Model): + """The stage of shufflenet""" + def __init__(self, in_channel, out_channel, num_blocks): + super(ShufflenetStage, self).__init__() + + self.in_channel = in_channel + self.out_channel = out_channel + + self.ops = [] + for i in range(num_blocks): + if i == 0: + op = ShufflenetUnit2(in_channel, out_channel) + else: + op = ShufflenetUnit1(out_channel) + self.ops.append(op) + + def call(self, inputs, training=False): + x = inputs + for op in self.ops: + x = op(x, training=training) + return x + + +class ShuffleNetv2(tf.keras.Model): + """Shufflenetv2""" + def __init__(self, num_classes, first_channel=24, channels_per_stage=(116, 232, 464)): + super(ShuffleNetv2, self).__init__() + + self.num_classes = num_classes + + self.conv1_bn_relu = Conv2D_BN_ReLU(first_channel, 3, 2) + self.pool1 = MaxPool2D(3, strides=2, padding="SAME") + self.stage2 = ShufflenetStage(first_channel, channels_per_stage[0], 4) + self.stage3 = ShufflenetStage(channels_per_stage[0], channels_per_stage[1], 8) + self.stage4 = ShufflenetStage(channels_per_stage[1], channels_per_stage[2], 4) + self.conv5_bn_relu = Conv2D_BN_ReLU(1024, 1, 1) + self.gap = GlobalAveragePooling2D() + self.linear = Dense(num_classes) + + def call(self, inputs, training=False): + x = self.conv1_bn_relu(inputs, training=training) + x = self.pool1(x) + x = self.stage2(x, training=training) + x = self.stage3(x, training=training) + x = self.stage4(x, training=training) + x = self.conv5_bn_relu(x, training=training) + x = self.gap(x) + x = self.linear(x) + return x + + +if __name__ =="__main__": + """ + inputs = tf.placeholder(tf.float32, [None, 224, 224, 3]) + + model = ShuffleNetv2(1000) + outputs = model(inputs) + + print(model.summary()) + + with tf.Session() as sess: + pass + + + vars = [] + for v in tf.global_variables(): + + vars.append((v.name, v)) + print(v.name) + print(len(vars)) + + + import numpy as np + + path = "C:/models/ShuffleNetV2-1x.npz" + weights = np.load(path) + np_vars = [] + for k in weights: + k_ = k.replace("beta", "gbeta") + k_ = k_.replace("/dconv", "/conv10_dconv") + k_ = k_.replace("shortcut_dconv", "shortcut_a_dconv") + k_ = k_.replace("conv5", "su_conv5") + k_ = k_.replace("linear", "t_linear") + np_vars.append((k_, weights[k])) + np_vars.sort(key=lambda x: x[0]) + + for k, _ in np_vars: + print(k) + + saver = tf.train.Saver(tf.global_variables()) + with tf.Session() as sess: + sess.run(tf.global_variables_initializer()) + + assign_ops = [] + for id in range(len(vars)): + print(vars[id][0], np_vars[id][0]) + assign_ops.append(tf.assign(vars[id][1], np_vars[id][1])) + + sess.run(assign_ops) + saver.save(sess, "./models/shufflene_v2_1.0.ckpt") + + model.save("./models/shufflenet_v2_1.0.hdf5") + + """ + + import numpy as np + from tensorflow.keras.preprocessing import image + from tensorflow.keras.applications.densenet import preprocess_input, decode_predictions + + img_path = './images/cat.jpg' + img = image.load_img(img_path, target_size=(224, 224)) + x = image.img_to_array(img) + x = np.expand_dims(x, axis=0) + x = preprocess_input(x) + + inputs = tf.placeholder(tf.float32, [None, 224, 224, 3]) + model = ShuffleNetv2(1000) + outputs = model(inputs, training=False) + outputs = tf.nn.softmax(outputs) + + saver = tf.train.Saver() + with tf.Session() as sess: + saver.restore(sess, "./models/shufflene_v2_1.0.ckpt") + preds = sess.run(outputs, feed_dict={inputs: x}) + print(decode_predictions(preds, top=3)[0]) + diff --git a/ObjectDetections/yolo2/config.py b/ObjectDetections/yolo2/config.py new file mode 100644 index 0000000..ad7fa91 --- /dev/null +++ b/ObjectDetections/yolo2/config.py @@ -0,0 +1,25 @@ +""" +Yolov2 anchors and coco classes +""" + +""" +anchors = [[0.738768, 0.874946], + [2.42204, 2.65704], + [4.30971, 7.04493], + [10.246, 4.59428], + [12.6868, 11.8741]] +""" +anchors = [[0.57273, 0.677385], + [1.87446, 2.06253], + [3.33843, 5.47434], + [7.88282, 3.52778], + [9.77052, 9.16828]] + +def read_coco_labels(): + f = open("./data/coco_classes.txt") + class_names = [] + for l in f.readlines(): + class_names.append(l[:-1]) + return class_names + +class_names = read_coco_labels() \ No newline at end of file diff --git a/ObjectDetections/yolo2/data/coco_classes.txt b/ObjectDetections/yolo2/data/coco_classes.txt new file mode 100644 index 0000000..ca76c80 --- /dev/null +++ b/ObjectDetections/yolo2/data/coco_classes.txt @@ -0,0 +1,80 @@ +person +bicycle +car +motorbike +aeroplane +bus +train +truck +boat +traffic light +fire hydrant +stop sign +parking meter +bench +bird +cat +dog +horse +sheep +cow +elephant +bear +zebra +giraffe +backpack +umbrella +handbag +tie +suitcase +frisbee +skis +snowboard +sports ball +kite +baseball bat +baseball glove +skateboard +surfboard +tennis racket +bottle +wine glass +cup +fork +knife +spoon +bowl +banana +apple +sandwich +orange +broccoli +carrot +hot dog +pizza +donut +cake +chair +sofa +pottedplant +bed +diningtable +toilet +tvmonitor +laptop +mouse +remote +keyboard +cell phone +microwave +oven +toaster +sink +refrigerator +book +clock +vase +scissors +teddy bear +hair drier +toothbrush diff --git a/ObjectDetections/yolo2/demo.py b/ObjectDetections/yolo2/demo.py new file mode 100644 index 0000000..4a7183c --- /dev/null +++ b/ObjectDetections/yolo2/demo.py @@ -0,0 +1,50 @@ +""" +Demo for yolov2 +""" + +import numpy as np +import tensorflow as tf +import cv2 +from PIL import Image + +from model import darknet +from detect_ops import decode +from utils import preprocess_image, postprocess, draw_detection +from config import anchors, class_names + + +input_size = (416, 416) +image_file = "./images/car.jpg" +image = cv2.imread(image_file) +image_shape = image.shape[:2] +image_cp = preprocess_image(image, input_size) +""" +image = Image.open(image_file) +image_cp = image.resize(input_size, Image.BICUBIC) +image_cp = np.array(image_cp, dtype=np.float32)/255.0 +image_cp = np.expand_dims(image_cp, 0) +#print(image_cp) +""" + + +images = tf.placeholder(tf.float32, [1, input_size[0], input_size[1], 3]) +detection_feat = darknet(images) +feat_sizes = input_size[0] // 32, input_size[1] // 32 +detection_results = decode(detection_feat, feat_sizes, len(class_names), anchors) + +checkpoint_path = "./checkpoint_dir/yolo2_coco.ckpt" +saver = tf.train.Saver() +with tf.Session() as sess: + saver.restore(sess, checkpoint_path) + bboxes, obj_probs, class_probs = sess.run(detection_results, feed_dict={images: image_cp}) + +bboxes, scores, class_inds = postprocess(bboxes, obj_probs, class_probs, + image_shape=image_shape) +img_detection = draw_detection(image, bboxes, scores, class_inds, class_names) +cv2.imwrite("detection.jpg", img_detection) +cv2.imshow("detection results", img_detection) + +cv2.waitKey(0) + + + diff --git a/ObjectDetections/yolo2/detect_ops.py b/ObjectDetections/yolo2/detect_ops.py new file mode 100644 index 0000000..6060ece --- /dev/null +++ b/ObjectDetections/yolo2/detect_ops.py @@ -0,0 +1,39 @@ +""" +Detection ops for Yolov2 +""" + +import tensorflow as tf +import numpy as np + + +def decode(detection_feat, feat_sizes=(13, 13), num_classes=80, + anchors=None): + """decode from the detection feature""" + H, W = feat_sizes + num_anchors = len(anchors) + detetion_results = tf.reshape(detection_feat, [-1, H * W, num_anchors, + num_classes + 5]) + + bbox_xy = tf.nn.sigmoid(detetion_results[:, :, :, 0:2]) + bbox_wh = tf.exp(detetion_results[:, :, :, 2:4]) + obj_probs = tf.nn.sigmoid(detetion_results[:, :, :, 4]) + class_probs = tf.nn.softmax(detetion_results[:, :, :, 5:]) + + anchors = tf.constant(anchors, dtype=tf.float32) + + height_ind = tf.range(H, dtype=tf.float32) + width_ind = tf.range(W, dtype=tf.float32) + x_offset, y_offset = tf.meshgrid(height_ind, width_ind) + x_offset = tf.reshape(x_offset, [1, -1, 1]) + y_offset = tf.reshape(y_offset, [1, -1, 1]) + + # decode + bbox_x = (bbox_xy[:, :, :, 0] + x_offset) / W + bbox_y = (bbox_xy[:, :, :, 1] + y_offset) / H + bbox_w = bbox_wh[:, :, :, 0] * anchors[:, 0] / W * 0.5 + bbox_h = bbox_wh[:, :, :, 1] * anchors[:, 1] / H * 0.5 + + bboxes = tf.stack([bbox_x - bbox_w, bbox_y - bbox_h, + bbox_x + bbox_w, bbox_y + bbox_h], axis=3) + + return bboxes, obj_probs, class_probs diff --git a/ObjectDetections/yolo2/loss.py b/ObjectDetections/yolo2/loss.py new file mode 100644 index 0000000..931359f --- /dev/null +++ b/ObjectDetections/yolo2/loss.py @@ -0,0 +1,86 @@ +""" +Loss function for YOLOv2 +""" + +import numpy as np +import tensorflow as tf + +def compute_loss(predictions, targets, anchors, scales, num_classes=20, feat_sizes=(13, 13)): + """ + Compute the loss of Yolov2 for training + """ + H, W = feat_sizes + C = num_classes + B = len(anchors) + anchors = tf.constant(anchors, dtype=tf.float32) + anchors = tf.reshape(anchors, [1, 1, B, 2]) + + sprob, sconf, snoob, scoor = scales # the scales for different parts + + _coords = targets["coords"] # ground truth [-1, H*W, B, 4] + _probs = targets["probs"] # class probability [-1, H*W, B, C] one hot + _confs = targets["confs"] # 1 for object, 0 for background, [-1, H*W, B] + + # decode the net output + predictions = tf.reshape(predictions, [-1, H, W, B, (5 + C)]) + coords = predictions[:, :, :, :, 0:4] # t_x, t_y, t_w, t_h + coords = tf.reshape(coords, [-1, H*W, B, 4]) + coords_xy = tf.nn.sigmoid(coords[:, :, :, 0:2]) # (0, 1) relative cell top left + coords_wh = tf.sqrt(tf.exp(coords[:, :, :, 2:4]) * anchors / + np.reshape([W, H], [1, 1, 1, 2])) # sqrt of w, h (0, 1) + coords = tf.concat([coords_xy, coords_wh], axis=3) # [batch_size, H*W, B, 4] + + confs = tf.nn.sigmoid(predictions[:, :, :, :, 4]) # object confidence + confs = tf.reshape(confs, [-1, H*W, B, 1]) + + probs = tf.nn.softmax(predictions[:, :, :, :, 5:]) # class probability + probs = tf.reshape(probs, [-1, H*W, B, C]) + + preds = tf.concat([coords, confs, probs], axis=3) # [-1, H*W, B, (4+1+C)] + + # match ground truths with anchors (predictions in fact) + # assign ground truths to the predictions with the best IOU (select 1 among 5 anchors) + wh = tf.pow(coords[:, :, :, 2:4], 2) * np.reshape([W, H], [1, 1, 1, 2]) + areas = wh[:, :, :, 0] * wh[:, :, :, 1] + centers = coords[:, :, :, 0:2] + up_left, down_right = centers - (wh * 0.5), centers + (wh * 0.5) + + # the ground truth + _wh = tf.pow(_coords[:, :, :, 2:4], 2) * np.reshape([W, H], [1, 1, 1, 2]) + _areas = _wh[:, :, :, 0] * _wh[:, :, :, 1] + _centers = _coords[:, :, :, 0:2] + _up_left, _down_right = _centers - (_wh * 0.5), _centers + (_wh * 0.5) + + # compute IOU + inter_upleft = tf.maximum(up_left, _up_left) + inter_downright = tf.minimum(down_right, _down_right) + inter_wh = tf.maximum(inter_downright - inter_upleft, 0.0) + intersects = inter_wh[:, :, :, 0] * inter_wh[:, :, :, 1] + ious = tf.truediv(intersects, areas + _areas - intersects) + + best_iou_mask = tf.equal(ious, tf.reduce_max(ious, axis=2, keep_dims=True)) + best_iou_mask = tf.cast(best_iou_mask, tf.float32) + mask = best_iou_mask * _confs # [-1, H*W, B] + mask = tf.expand_dims(mask, -1) # [-1, H*W, B, 1] + + # compute weight terms + confs_w = snoob * (1 - mask) + sconf * mask + coords_w = scoor * mask + probs_w = sprob * mask + weights = tf.concat([coords_w, confs_w, probs_w], axis=3) + + truths = tf.concat([_coords, tf.expand_dims(_confs, -1), _probs], 3) + + loss = tf.pow(preds - truths, 2) * weights + loss = tf.reduce_sum(loss, axis=[1, 2, 3]) + loss = 0.5 * tf.reduce_mean(loss) + return loss + + + + + + + + + diff --git a/ObjectDetections/yolo2/model.png b/ObjectDetections/yolo2/model.png new file mode 100644 index 0000000..07ab142 Binary files /dev/null and b/ObjectDetections/yolo2/model.png differ diff --git a/ObjectDetections/yolo2/model.py b/ObjectDetections/yolo2/model.py new file mode 100644 index 0000000..697dd37 --- /dev/null +++ b/ObjectDetections/yolo2/model.py @@ -0,0 +1,89 @@ +""" +YOLOv2 implemented by Tensorflow, only for predicting +""" +import os + +import numpy as np +import tensorflow as tf + + + +######## basic layers ####### + +def leaky_relu(x): + return tf.nn.leaky_relu(x, alpha=0.1, name="leaky_relu") + +# Conv2d +def conv2d(x, filters, size, pad=0, stride=1, batch_normalize=1, + activation=leaky_relu, use_bias=False, name="conv2d"): + if pad > 0: + x = tf.pad(x, [[0, 0], [pad, pad], [pad, pad], [0, 0]]) + out = tf.layers.conv2d(x, filters, size, strides=stride, padding="VALID", + activation=None, use_bias=use_bias, name=name) + if batch_normalize == 1: + out = tf.layers.batch_normalization(out, axis=-1, momentum=0.9, + training=False, name=name+"_bn") + if activation: + out = activation(out) + return out + +# maxpool2d +def maxpool(x, size=2, stride=2, name="maxpool"): + return tf.layers.max_pooling2d(x, size, stride) + +# reorg layer +def reorg(x, stride): + return tf.extract_image_patches(x, [1, stride, stride, 1], + [1, stride, stride, 1], [1,1,1,1], padding="VALID") + + +def darknet(images, n_last_channels=425): + """Darknet19 for YOLOv2""" + net = conv2d(images, 32, 3, 1, name="conv1") + net = maxpool(net, name="pool1") + net = conv2d(net, 64, 3, 1, name="conv2") + net = maxpool(net, name="pool2") + net = conv2d(net, 128, 3, 1, name="conv3_1") + net = conv2d(net, 64, 1, name="conv3_2") + net = conv2d(net, 128, 3, 1, name="conv3_3") + net = maxpool(net, name="pool3") + net = conv2d(net, 256, 3, 1, name="conv4_1") + net = conv2d(net, 128, 1, name="conv4_2") + net = conv2d(net, 256, 3, 1, name="conv4_3") + net = maxpool(net, name="pool4") + net = conv2d(net, 512, 3, 1, name="conv5_1") + net = conv2d(net, 256, 1, name="conv5_2") + net = conv2d(net, 512, 3, 1, name="conv5_3") + net = conv2d(net, 256, 1, name="conv5_4") + net = conv2d(net, 512, 3, 1, name="conv5_5") + shortcut = net + net = maxpool(net, name="pool5") + net = conv2d(net, 1024, 3, 1, name="conv6_1") + net = conv2d(net, 512, 1, name="conv6_2") + net = conv2d(net, 1024, 3, 1, name="conv6_3") + net = conv2d(net, 512, 1, name="conv6_4") + net = conv2d(net, 1024, 3, 1, name="conv6_5") + # --------- + net = conv2d(net, 1024, 3, 1, name="conv7_1") + net = conv2d(net, 1024, 3, 1, name="conv7_2") + # shortcut + shortcut = conv2d(shortcut, 64, 1, name="conv_shortcut") + shortcut = reorg(shortcut, 2) + net = tf.concat([shortcut, net], axis=-1) + net = conv2d(net, 1024, 3, 1, name="conv8") + # detection layer + net = conv2d(net, n_last_channels, 1, batch_normalize=0, + activation=None, use_bias=True, name="conv_dec") + return net + + + +if __name__ == "__main__": + x = tf.random_normal([1, 416, 416, 3]) + model = darknet(x) + + saver = tf.train.Saver() + with tf.Session() as sess: + saver.restore(sess, "./checkpoint_dir/yolo2_coco.ckpt") + print(sess.run(model).shape) + diff --git a/ObjectDetections/yolo2/utils.py b/ObjectDetections/yolo2/utils.py new file mode 100644 index 0000000..5821a3b --- /dev/null +++ b/ObjectDetections/yolo2/utils.py @@ -0,0 +1,163 @@ +""" +Help functions for YOLOv2 +""" +import random +import colorsys + +import cv2 +import numpy as np + + + +############## preprocess image ################## + + +def preprocess_image(image, image_size=(416, 416)): + """Preprocess a image to inference""" + image_cp = np.copy(image).astype(np.float32) + # resize the image + image_rgb = cv2.cvtColor(image_cp, cv2.COLOR_BGR2RGB) + image_resized = cv2.resize(image_rgb, image_size) + # normalize + image_normalized = image_resized.astype(np.float32) / 255.0 + # expand the batch_size dim + image_expanded = np.expand_dims(image_normalized, axis=0) + return image_expanded + +def postprocess(bboxes, obj_probs, class_probs, image_shape=(416, 416), + threshold=0.5): + """post process the detection results""" + bboxes = np.reshape(bboxes, [-1, 4]) + bboxes[:, 0::2] *= float(image_shape[1]) + bboxes[:, 1::2] *= float(image_shape[0]) + bboxes = bboxes.astype(np.int32) + + # clip the bboxs + bbox_ref = [0, 0, image_shape[1] - 1, image_shape[0] - 1] + bboxes = bboxes_clip(bbox_ref, bboxes) + + obj_probs = np.reshape(obj_probs, [-1]) + class_probs = np.reshape(class_probs, [len(obj_probs), -1]) + class_inds = np.argmax(class_probs, axis=1) + class_probs = class_probs[np.arange(len(obj_probs)), class_inds] + scores = obj_probs * class_probs + + # filter bboxes with scores > threshold + keep_inds = scores > threshold + bboxes = bboxes[keep_inds] + scores = scores[keep_inds] + class_inds = class_inds[keep_inds] + + # sort top K + class_inds, scores, bboxes = bboxes_sort(class_inds, scores, bboxes) + # nms + class_inds, scores, bboxes = bboxes_nms(class_inds, scores, bboxes) + + return bboxes, scores, class_inds + +def draw_detection(im, bboxes, scores, cls_inds, labels, thr=0.3): + # for display + ############################ + # Generate colors for drawing bounding boxes. + hsv_tuples = [(x / float(len(labels)), 1., 1.) + for x in range(len(labels))] + colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) + colors = list( + map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), + colors)) + random.seed(10101) # Fixed seed for consistent colors across runs. + random.shuffle(colors) # Shuffle colors to decorrelate adjacent classes. + random.seed(None) # Reset seed to default. + # draw image + imgcv = np.copy(im) + h, w, _ = imgcv.shape + for i, box in enumerate(bboxes): + if scores[i] < thr: + continue + cls_indx = cls_inds[i] + + thick = int((h + w) / 300) + cv2.rectangle(imgcv, + (box[0], box[1]), (box[2], box[3]), + colors[cls_indx], thick) + mess = '%s: %.3f' % (labels[cls_indx], scores[i]) + if box[1] < 20: + text_loc = (box[0] + 2, box[1] + 15) + else: + text_loc = (box[0], box[1] - 10) + cv2.putText(imgcv, mess, text_loc, + cv2.FONT_HERSHEY_SIMPLEX, 1e-3 * h, colors[cls_indx], thick // 3) + + return imgcv + + +############## process bboxes ################## +def bboxes_clip(bbox_ref, bboxes): + """Clip bounding boxes with respect to reference bbox. + """ + bboxes = np.copy(bboxes) + bboxes = np.transpose(bboxes) + bbox_ref = np.transpose(bbox_ref) + bboxes[0] = np.maximum(bboxes[0], bbox_ref[0]) + bboxes[1] = np.maximum(bboxes[1], bbox_ref[1]) + bboxes[2] = np.minimum(bboxes[2], bbox_ref[2]) + bboxes[3] = np.minimum(bboxes[3], bbox_ref[3]) + bboxes = np.transpose(bboxes) + return bboxes + +def bboxes_sort(classes, scores, bboxes, top_k=400): + """Sort bounding boxes by decreasing order and keep only the top_k + """ + # if priority_inside: + # inside = (bboxes[:, 0] > margin) & (bboxes[:, 1] > margin) & \ + # (bboxes[:, 2] < 1-margin) & (bboxes[:, 3] < 1-margin) + # idxes = np.argsort(-scores) + # inside = inside[idxes] + # idxes = np.concatenate([idxes[inside], idxes[~inside]]) + idxes = np.argsort(-scores) + classes = classes[idxes][:top_k] + scores = scores[idxes][:top_k] + bboxes = bboxes[idxes][:top_k] + return classes, scores, bboxes + +def bboxes_iou(bboxes1, bboxes2): + """Computing iou between bboxes1 and bboxes2. + Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable. + """ + bboxes1 = np.transpose(bboxes1) + bboxes2 = np.transpose(bboxes2) + # Intersection bbox and volume. + int_ymin = np.maximum(bboxes1[0], bboxes2[0]) + int_xmin = np.maximum(bboxes1[1], bboxes2[1]) + int_ymax = np.minimum(bboxes1[2], bboxes2[2]) + int_xmax = np.minimum(bboxes1[3], bboxes2[3]) + + int_h = np.maximum(int_ymax - int_ymin, 0.) + int_w = np.maximum(int_xmax - int_xmin, 0.) + int_vol = int_h * int_w + # Union volume. + vol1 = (bboxes1[2] - bboxes1[0]) * (bboxes1[3] - bboxes1[1]) + vol2 = (bboxes2[2] - bboxes2[0]) * (bboxes2[3] - bboxes2[1]) + iou = int_vol / (vol1 + vol2 - int_vol) + return iou + +def bboxes_nms(classes, scores, bboxes, nms_threshold=0.5): + """Apply non-maximum selection to bounding boxes. + """ + keep_bboxes = np.ones(scores.shape, dtype=np.bool) + for i in range(scores.size-1): + if keep_bboxes[i]: + # Computer overlap with bboxes which are following. + overlap = bboxes_iou(bboxes[i], bboxes[(i+1):]) + # Overlap threshold for keeping + checking part of the same class + keep_overlap = np.logical_or(overlap < nms_threshold, classes[(i+1):] != classes[i]) + keep_bboxes[(i+1):] = np.logical_and(keep_bboxes[(i+1):], keep_overlap) + + idxes = np.where(keep_bboxes) + return classes[idxes], scores[idxes], bboxes[idxes] + + + + + + diff --git a/README.md b/README.md index bd61406..b1ebefe 100644 --- a/README.md +++ b/README.md @@ -18,13 +18,17 @@ Note: the project aims at imitating the well-implemented algorithms in [Deep Lea ### CNN Models - MobileNet [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/CNNs/MobileNet.py) [paper](https://arxiv.org/abs/1704.04861) [ref](https://github.com/Zehaos/MobileNet/blob/master/nets/mobilenet.py)] +- MobileNetv2 [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/CNNs/mobilenet_v2.py) [paper](https://arxiv.org/pdf/1801.04381.pdf) [ref](https://github.com/tensorflow/models/tree/master/research/slim/nets/mobilenet)] - SqueezeNet [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/CNNs/SqueezeNet.py) [paper](https://arxiv.org/abs/1602.07360)] - ResNet [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/CNNs/ResNet50.py) [caffe ref](https://github.com/KaimingHe/deep-residual-networks) [paper1](https://arxiv.org/abs/1512.03385) [paper2](https://arxiv.org/abs/1603.05027)] - ShuffleNet [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/CNNs/ShuffleNet.py) by pytorch [paper](http://cn.arxiv.org/pdf/1707.01083v2)] +- ShuffleNetv2 [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/CNNs/shufflenet_v2.py) [ref](https://github.com/tensorpack/tensorpack/blob/master/examples/ImageNetModels/shufflenet.py) [paper](https://arxiv.org/abs/1807.11164)] +- DenseNet [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/CNNs/densenet.py) [pytorch_ref](https://github.com/pytorch/vision/blob/master/torchvision/models/densenet.py) [paper](https://arxiv.org/abs/1608.06993)] ### Object detection - YOLOv1 [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/ObjectDetections/yolo/yolo_tf.py) [paper](https://arxiv.org/abs/1506.02640) [ref](https://github.com/gliese581gg/YOLO_tensorflow)] - SSD [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/ObjectDetections/SSD/SSD_demo.py) [paper](https://arxiv.org/pdf/1611.10012.pdf) [slides](http://www.cs.unc.edu/~wliu/papers/ssd_eccv2016_slide.pdf) [cafe](https://github.com/weiliu89/caffe/tree/ssd) [TF](https://arxiv.org/abs/1512.02325) [pytorch](https://github.com/amdegroot/ssd.pytorch) ] +- YOLOv2 [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/tree/master/ObjectDetections/yolo2) [paper](https://arxiv.org/abs/1612.08242) [ref](https://github.com/yhcc/yolo2)] ### Practical examples You can find more practical examples with tensorflow here: diff --git a/examples/cnn_setence_classification/text_cnn.py b/examples/cnn_setence_classification/text_cnn.py index f186faf..3518a5c 100644 --- a/examples/cnn_setence_classification/text_cnn.py +++ b/examples/cnn_setence_classification/text_cnn.py @@ -81,7 +81,7 @@ def __init__(self, seq_len, vocab_size, embedding_size, filter_sizes, num_filter pooled_outputs.append(pool_output) # [None, 1, 1, num_filters] # Combine all pooled features num_filters_total = num_filters * len(filter_sizes) - self.h_pool = tf.concat(3, pooled_outputs) # [None, 1, 1, num_filters_total] + self.h_pool = tf.concat( pooled_outputs,3) # [None, 1, 1, num_filters_total] self.h_pool_flat = tf.reshape(self.h_pool, shape=[-1, num_filters_total]) # [None, num_filters_total] # The dropout layer @@ -100,7 +100,7 @@ def __init__(self, seq_len, vocab_size, embedding_size, filter_sizes, num_filter # The loss with tf.name_scope("loss"): - losses = tf.nn.softmax_cross_entropy_with_logits(self.scores, self.y) + losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.y) self.loss = tf.reduce_mean(losses) + L2_loss * l2_reg_lambda # Accuracy diff --git a/examples/cnn_setence_classification/train_cnn.py b/examples/cnn_setence_classification/train_cnn.py index aefa2b5..bc58dc0 100644 --- a/examples/cnn_setence_classification/train_cnn.py +++ b/examples/cnn_setence_classification/train_cnn.py @@ -1,74 +1,74 @@ -""" -Test the TextRNN class -2016/12/22 -""" -import os -import sys -import numpy as np -import tensorflow as tf -from sklearn.model_selection import train_test_split -from tensorflow.contrib import learn - -from data_helpers import load_data_and_labels, batch_iter -from text_cnn import TextCNN - - -# Load original data -path = sys.path[0] -pos_filename = path + "/data/rt-polarity.pos" -neg_filename = path + "/data/rt-polarity.neg" - -X_data, y_data = load_data_and_labels(pos_filename, neg_filename) -max_document_length = max([len(sen.split(" ")) for sen in X_data]) -print("Max_document_length:,", max_document_length) -# Create the vacabulary -vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length) -# The idx data -x = np.array(list(vocab_processor.fit_transform(X_data)), dtype=np.float32) -y = np.array(y_data, dtype=np.int32) -vocabulary_size = len(vocab_processor.vocabulary_) -print("The size of vocabulary:", vocabulary_size) -# Split the data -X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=1111) -print("X_train shape {0}, y_train shape {1}".format(X_train.shape, y_train.shape)) -print("X_test shape {0}, y_test shape {1}".format(X_test.shape, y_test.shape)) - -# The parameters of RNN -seq_len = X_train.shape[1] -vocab_size = vocabulary_size -embedding_size = 128 -filter_sizes = [2, 3, 4] -num_filters = 128 -num_classes = y_train.shape[1] -l2_reg_lambda = 0.0 - -# Construct RNN model -text_rnn_model = TextCNN(seq_len=seq_len, vocab_size=vocab_size, embedding_size=embedding_size, filter_sizes= - filter_sizes, num_filters=num_filters, num_classes=num_classes) -loss = text_rnn_model.loss -train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss) -accuracy = text_rnn_model.accuracy -# The parameters for training -batch_size = 64 -training_epochs = 10 -dispaly_every = 1 -dropout_keep_prob = 0.5 - -batch_num = int(X_train.shape[0]/batch_size) - -sess = tf.Session() -sess.run(tf.global_variables_initializer()) -print("Starting training...") -for epoch in range(training_epochs): - avg_cost = 0 - for batch in range(batch_num): - _, cost = sess.run([train_op, loss], feed_dict={text_rnn_model.x: X_train[batch*batch_size:(batch+1)*batch_size], - text_rnn_model.y: y_train[batch*batch_size:(batch+1)*batch_size], - text_rnn_model.dropout_keep_prob:dropout_keep_prob}) - avg_cost += cost - if epoch % dispaly_every == 0: - cost, acc = sess.run([loss, accuracy], feed_dict={text_rnn_model.x: X_test, - text_rnn_model.y: y_test, - text_rnn_model.dropout_keep_prob: 1.0}) - print("\nEpoch {0} : loss {1}, accuracy {2}".format(epoch, cost, acc)) - +""" +Test the TextRNN class +2016/12/22 +""" +import os +import sys +import numpy as np +import tensorflow as tf +from sklearn.model_selection import train_test_split +from tensorflow.contrib import learn + +from data_helpers import load_data_and_labels, batch_iter +from text_cnn import TextCNN +import pudb;pu.db + +# Load original data +path = sys.path[0] +pos_filename = path + "/data/rt-polarity.pos" +neg_filename = path + "/data/rt-polarity.neg" + +X_data, y_data = load_data_and_labels(pos_filename, neg_filename) +max_document_length = max([len(sen.split(" ")) for sen in X_data]) +print("Max_document_length:,", max_document_length) +# Create the vacabulary +vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length) +# The idx data +x = np.array(list(vocab_processor.fit_transform(X_data)), dtype=np.float32) +y = np.array(y_data, dtype=np.int32) +vocabulary_size = len(vocab_processor.vocabulary_) +print("The size of vocabulary:", vocabulary_size) +# Split the data +X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=1111) +print("X_train shape {0}, y_train shape {1}".format(X_train.shape, y_train.shape)) +print("X_test shape {0}, y_test shape {1}".format(X_test.shape, y_test.shape)) + +# The parameters of RNN +seq_len = X_train.shape[1] +vocab_size = vocabulary_size +embedding_size = 128 +filter_sizes = [2, 3, 4] +num_filters = 128 +num_classes = y_train.shape[1] +l2_reg_lambda = 0.0 + +# Construct RNN model +text_rnn_model = TextCNN(seq_len=seq_len, vocab_size=vocab_size, embedding_size=embedding_size, filter_sizes= + filter_sizes, num_filters=num_filters, num_classes=num_classes) +loss = text_rnn_model.loss +train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss) +accuracy = text_rnn_model.accuracy +# The parameters for training +batch_size = 64 +training_epochs = 10 +dispaly_every = 1 +dropout_keep_prob = 0.5 + +batch_num = int(X_train.shape[0]/batch_size) + +sess = tf.Session() +sess.run(tf.global_variables_initializer()) +print("Starting training...") +for epoch in range(training_epochs): + avg_cost = 0 + for batch in range(batch_num): + _, cost = sess.run([train_op, loss], feed_dict={text_rnn_model.x: X_train[batch*batch_size:(batch+1)*batch_size], + text_rnn_model.y: y_train[batch*batch_size:(batch+1)*batch_size], + text_rnn_model.dropout_keep_prob:dropout_keep_prob}) + avg_cost += cost + if epoch % dispaly_every == 0: + cost, acc = sess.run([loss, accuracy], feed_dict={text_rnn_model.x: X_test, + text_rnn_model.y: y_test, + text_rnn_model.dropout_keep_prob: 1.0}) + print("\nEpoch {0} : loss {1}, accuracy {2}".format(epoch, cost, acc)) +