|  | 
|  | 1 | +""" | 
|  | 2 | +SSD net (vgg_based) 300x300 | 
|  | 3 | +""" | 
|  | 4 | +from collections import namedtuple | 
|  | 5 | + | 
|  | 6 | +import numpy as np | 
|  | 7 | +import tensorflow as tf | 
|  | 8 | + | 
|  | 9 | +from ssd_layers import conv2d, max_pool2d, l2norm, dropout, \ | 
|  | 10 | +    pad2d, ssd_multibox_layer | 
|  | 11 | +from ssd_anchors import ssd_anchors_all_layers | 
|  | 12 | + | 
|  | 13 | +# SSD parameters | 
|  | 14 | +SSDParams = namedtuple('SSDParameters', ['img_shape',  # the input image size: 300x300 | 
|  | 15 | +                                         'num_classes',  # number of classes: 20+1 | 
|  | 16 | +                                         'no_annotation_label', | 
|  | 17 | +                                         'feat_layers', # list of names of layer for detection | 
|  | 18 | +                                         'feat_shapes', # list of feature map sizes of layer for detection | 
|  | 19 | +                                         'anchor_size_bounds', # the down and upper bounds of anchor sizes | 
|  | 20 | +                                         'anchor_sizes',   # list of anchor sizes of layer for detection | 
|  | 21 | +                                         'anchor_ratios',  # list of rations used in layer for detection | 
|  | 22 | +                                         'anchor_steps',   # list of cell size (pixel size) of layer for detection | 
|  | 23 | +                                         'anchor_offset',  # the center point offset | 
|  | 24 | +                                         'normalizations', # list of normalizations of layer for detection | 
|  | 25 | +                                         'prior_scaling'   # | 
|  | 26 | +                                         ]) | 
|  | 27 | +class SSD(object): | 
|  | 28 | +    """SSD net 300""" | 
|  | 29 | +    def __init__(self, is_training=True): | 
|  | 30 | +        self.is_training = is_training | 
|  | 31 | +        self.threshold = 0.5  # class score threshold | 
|  | 32 | +        self.ssd_params = SSDParams(img_shape=(300, 300), | 
|  | 33 | +                                    num_classes=21, | 
|  | 34 | +                                    no_annotation_label=21, | 
|  | 35 | +                                    feat_layers=["block4", "block7", "block8", "block9", "block10", "block11"], | 
|  | 36 | +                                    feat_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)], | 
|  | 37 | +                                    anchor_size_bounds=[0.15, 0.90],  # diff from the original paper | 
|  | 38 | +                                    anchor_sizes=[(21., 45.), | 
|  | 39 | +                                                  (45., 99.), | 
|  | 40 | +                                                  (99., 153.), | 
|  | 41 | +                                                  (153., 207.), | 
|  | 42 | +                                                  (207., 261.), | 
|  | 43 | +                                                  (261., 315.)], | 
|  | 44 | +                                    anchor_ratios=[[2, .5], | 
|  | 45 | +                                                   [2, .5, 3, 1. / 3], | 
|  | 46 | +                                                   [2, .5, 3, 1. / 3], | 
|  | 47 | +                                                   [2, .5, 3, 1. / 3], | 
|  | 48 | +                                                   [2, .5], | 
|  | 49 | +                                                   [2, .5]], | 
|  | 50 | +                                    anchor_steps=[8, 16, 32, 64, 100, 300], | 
|  | 51 | +                                    anchor_offset=0.5, | 
|  | 52 | +                                    normalizations=[20, -1, -1, -1, -1, -1], | 
|  | 53 | +                                    prior_scaling=[0.1, 0.1, 0.2, 0.2] | 
|  | 54 | +                                    ) | 
|  | 55 | + | 
|  | 56 | +        predictions, logits, locations = self._built_net() | 
|  | 57 | +        #self._update_feat_shapes_from_net() | 
|  | 58 | +        classes, scores, bboxes = self._bboxes_select(predictions, locations) | 
|  | 59 | +        self._classes = classes | 
|  | 60 | +        self._scores = scores | 
|  | 61 | +        self._bboxes = bboxes | 
|  | 62 | + | 
|  | 63 | +    def _built_net(self): | 
|  | 64 | +        """Construct the SSD net""" | 
|  | 65 | +        self.end_points = {}  # record the detection layers output | 
|  | 66 | +        self._images = tf.placeholder(tf.float32, shape=[None, self.ssd_params.img_shape[0], | 
|  | 67 | +                                                        self.ssd_params.img_shape[1], 3]) | 
|  | 68 | +        with tf.variable_scope("ssd_300_vgg"): | 
|  | 69 | +            # original vgg layers | 
|  | 70 | +            # block 1 | 
|  | 71 | +            net = conv2d(self._images, 64, 3, scope="conv1_1") | 
|  | 72 | +            net = conv2d(net, 64, 3, scope="conv1_2") | 
|  | 73 | +            self.end_points["block1"] = net | 
|  | 74 | +            net = max_pool2d(net, 2, scope="pool1") | 
|  | 75 | +            # block 2 | 
|  | 76 | +            net = conv2d(net, 128, 3, scope="conv2_1") | 
|  | 77 | +            net = conv2d(net, 128, 3, scope="conv2_2") | 
|  | 78 | +            self.end_points["block2"] = net | 
|  | 79 | +            net = max_pool2d(net, 2, scope="pool2") | 
|  | 80 | +            # block 3 | 
|  | 81 | +            net = conv2d(net, 256, 3, scope="conv3_1") | 
|  | 82 | +            net = conv2d(net, 256, 3, scope="conv3_2") | 
|  | 83 | +            net = conv2d(net, 256, 3, scope="conv3_3") | 
|  | 84 | +            self.end_points["block3"] = net | 
|  | 85 | +            net = max_pool2d(net, 2, scope="pool3") | 
|  | 86 | +            # block 4 | 
|  | 87 | +            net = conv2d(net, 512, 3, scope="conv4_1") | 
|  | 88 | +            net = conv2d(net, 512, 3, scope="conv4_2") | 
|  | 89 | +            net = conv2d(net, 512, 3, scope="conv4_3") | 
|  | 90 | +            self.end_points["block4"] = net | 
|  | 91 | +            net = max_pool2d(net, 2, scope="pool4") | 
|  | 92 | +            # block 5 | 
|  | 93 | +            net = conv2d(net, 512, 3, scope="conv5_1") | 
|  | 94 | +            net = conv2d(net, 512, 3, scope="conv5_2") | 
|  | 95 | +            net = conv2d(net, 512, 3, scope="conv5_3") | 
|  | 96 | +            self.end_points["block5"] = net | 
|  | 97 | +            print(net) | 
|  | 98 | +            net = max_pool2d(net, 3, stride=1, scope="pool5") | 
|  | 99 | +            print(net) | 
|  | 100 | + | 
|  | 101 | +            # additional SSD layers | 
|  | 102 | +            # block 6: use dilate conv | 
|  | 103 | +            net = conv2d(net, 1024, 3, dilation_rate=6, scope="conv6") | 
|  | 104 | +            self.end_points["block6"] = net | 
|  | 105 | +            #net = dropout(net, is_training=self.is_training) | 
|  | 106 | +            # block 7 | 
|  | 107 | +            net = conv2d(net, 1024, 1, scope="conv7") | 
|  | 108 | +            self.end_points["block7"] = net | 
|  | 109 | +            # block 8 | 
|  | 110 | +            net = conv2d(net, 256, 1, scope="conv8_1x1") | 
|  | 111 | +            net = conv2d(pad2d(net, 1), 512, 3, stride=2, scope="conv8_3x3", | 
|  | 112 | +                         padding="valid") | 
|  | 113 | +            self.end_points["block8"] = net | 
|  | 114 | +            # block 9 | 
|  | 115 | +            net = conv2d(net, 128, 1, scope="conv9_1x1") | 
|  | 116 | +            net = conv2d(pad2d(net, 1), 256, 3, stride=2, scope="conv9_3x3", | 
|  | 117 | +                         padding="valid") | 
|  | 118 | +            self.end_points["block9"] = net | 
|  | 119 | +            # block 10 | 
|  | 120 | +            net = conv2d(net, 128, 1, scope="conv10_1x1") | 
|  | 121 | +            net = conv2d(net, 256, 3, scope="conv10_3x3", padding="valid") | 
|  | 122 | +            self.end_points["block10"] = net | 
|  | 123 | +            # block 11 | 
|  | 124 | +            net = conv2d(net, 128, 1, scope="conv11_1x1") | 
|  | 125 | +            net = conv2d(net, 256, 3, scope="conv11_3x3", padding="valid") | 
|  | 126 | +            self.end_points["block11"] = net | 
|  | 127 | + | 
|  | 128 | +            # class and location predictions | 
|  | 129 | +            predictions = [] | 
|  | 130 | +            logits = [] | 
|  | 131 | +            locations = [] | 
|  | 132 | +            for i, layer in enumerate(self.ssd_params.feat_layers): | 
|  | 133 | +                cls, loc = ssd_multibox_layer(self.end_points[layer], self.ssd_params.num_classes, | 
|  | 134 | +                                              self.ssd_params.anchor_sizes[i], | 
|  | 135 | +                                              self.ssd_params.anchor_ratios[i], | 
|  | 136 | +                                              self.ssd_params.normalizations[i], scope=layer+"_box") | 
|  | 137 | +                predictions.append(tf.nn.softmax(cls)) | 
|  | 138 | +                logits.append(cls) | 
|  | 139 | +                locations.append(loc) | 
|  | 140 | +            return predictions, logits, locations | 
|  | 141 | + | 
|  | 142 | +    def _update_feat_shapes_from_net(self, predictions): | 
|  | 143 | +        """ Obtain the feature shapes from the prediction layers""" | 
|  | 144 | +        new_feat_shapes = [] | 
|  | 145 | +        for l in predictions: | 
|  | 146 | +            new_feat_shapes.append(l.get_shape().as_list()[1:]) | 
|  | 147 | +        self.ssd_params._replace(feat_shapes=new_feat_shapes) | 
|  | 148 | + | 
|  | 149 | +    def anchors(self): | 
|  | 150 | +        """Get sSD anchors""" | 
|  | 151 | +        return ssd_anchors_all_layers(self.ssd_params.img_shape, | 
|  | 152 | +                                      self.ssd_params.feat_shapes, | 
|  | 153 | +                                      self.ssd_params.anchor_sizes, | 
|  | 154 | +                                      self.ssd_params.anchor_ratios, | 
|  | 155 | +                                      self.ssd_params.anchor_steps, | 
|  | 156 | +                                      self.ssd_params.anchor_offset, | 
|  | 157 | +                                      np.float32) | 
|  | 158 | + | 
|  | 159 | +    def _bboxes_decode_layer(self, feat_locations, anchor_bboxes, prior_scaling): | 
|  | 160 | +        """ | 
|  | 161 | +        Decode the feat location of one layer | 
|  | 162 | +        params: | 
|  | 163 | +         feat_locations: 5D Tensor, [batch_size, size, size, n_anchors, 4] | 
|  | 164 | +         anchor_bboxes: list of Tensors(y, x, w, h) | 
|  | 165 | +                        shape: [size,size,1], [size, size,1], [n_anchors], [n_anchors] | 
|  | 166 | +         prior_scaling: list of 4 floats | 
|  | 167 | +        """ | 
|  | 168 | +        yref, xref, href, wref = anchor_bboxes | 
|  | 169 | +        print(yref) | 
|  | 170 | +        # Compute center, height and width | 
|  | 171 | +        cx = feat_locations[:, :, :, :, 0] * wref * prior_scaling[0] + xref | 
|  | 172 | +        cy = feat_locations[:, :, :, :, 1] * href * prior_scaling[1] + yref | 
|  | 173 | +        w = wref * tf.exp(feat_locations[:, :, :, :, 2] * prior_scaling[2]) | 
|  | 174 | +        h = href * tf.exp(feat_locations[:, :, :, :, 3] * prior_scaling[3]) | 
|  | 175 | +        # compute boxes coordinates (ymin, xmin, ymax,,xmax) | 
|  | 176 | +        bboxes = tf.stack([cy - h / 2., cx - w / 2., | 
|  | 177 | +                           cy + h / 2., cx + w / 2.], axis=-1) | 
|  | 178 | +        # shape [batch_size, size, size, n_anchors, 4] | 
|  | 179 | +        return bboxes | 
|  | 180 | + | 
|  | 181 | +    def _bboxes_select_layer(self, feat_predictions, feat_locations, anchor_bboxes, | 
|  | 182 | +                             prior_scaling): | 
|  | 183 | +        """Select boxes from the feat layer, only for bacth_size=1""" | 
|  | 184 | +        n_bboxes = np.product(feat_predictions.get_shape().as_list()[1:-1]) | 
|  | 185 | +        # decode the location | 
|  | 186 | +        bboxes = self._bboxes_decode_layer(feat_locations, anchor_bboxes, prior_scaling) | 
|  | 187 | +        bboxes = tf.reshape(bboxes, [n_bboxes, 4]) | 
|  | 188 | +        predictions = tf.reshape(feat_predictions, [n_bboxes, self.ssd_params.num_classes]) | 
|  | 189 | +        # remove the background predictions | 
|  | 190 | +        sub_predictions = predictions[:, 1:] | 
|  | 191 | +        # choose the max score class | 
|  | 192 | +        classes = tf.argmax(sub_predictions, axis=1) + 1  # class labels | 
|  | 193 | +        scores = tf.reduce_max(sub_predictions, axis=1)   # max_class scores | 
|  | 194 | +        # Boxes selection: use threshold | 
|  | 195 | +        filter_mask = scores > self.threshold | 
|  | 196 | +        classes = tf.boolean_mask(classes, filter_mask) | 
|  | 197 | +        scores = tf.boolean_mask(scores, filter_mask) | 
|  | 198 | +        bboxes = tf.boolean_mask(bboxes, filter_mask) | 
|  | 199 | +        return classes, scores, bboxes | 
|  | 200 | + | 
|  | 201 | +    def _bboxes_select(self, predictions, locations): | 
|  | 202 | +        """Select all bboxes predictions, only for bacth_size=1""" | 
|  | 203 | +        anchor_bboxes_list = self.anchors() | 
|  | 204 | +        classes_list = [] | 
|  | 205 | +        scores_list = [] | 
|  | 206 | +        bboxes_list = [] | 
|  | 207 | +        # select bboxes for each feat layer | 
|  | 208 | +        for n in range(len(predictions)): | 
|  | 209 | +            anchor_bboxes = list(map(tf.convert_to_tensor, anchor_bboxes_list[n])) | 
|  | 210 | +            classes, scores, bboxes = self._bboxes_select_layer(predictions[n], | 
|  | 211 | +                            locations[n], anchor_bboxes, self.ssd_params.prior_scaling) | 
|  | 212 | +            classes_list.append(classes) | 
|  | 213 | +            scores_list.append(scores) | 
|  | 214 | +            bboxes_list.append(bboxes) | 
|  | 215 | +        # combine all feat layers | 
|  | 216 | +        classes = tf.concat(classes_list, axis=0) | 
|  | 217 | +        scores = tf.concat(scores_list, axis=0) | 
|  | 218 | +        bboxes = tf.concat(bboxes_list, axis=0) | 
|  | 219 | +        return classes, scores, bboxes | 
|  | 220 | + | 
|  | 221 | +    def images(self): | 
|  | 222 | +        return self._images | 
|  | 223 | + | 
|  | 224 | +    def detections(self): | 
|  | 225 | +        return self._classes, self._scores, self._bboxes | 
|  | 226 | + | 
|  | 227 | + | 
|  | 228 | +if __name__ == "__main__": | 
|  | 229 | +    ssd = SSD() | 
|  | 230 | +    sess = tf.Session() | 
|  | 231 | +    saver_ = tf.train.Saver() | 
|  | 232 | +    saver_.restore(sess, "../SSD-Tensorflow-master/ssd_checkpoints/ssd_vgg_300_weights.ckpt") | 
|  | 233 | + | 
0 commit comments