| 
 | 1 | +"""  | 
 | 2 | +SSD net (vgg_based) 300x300  | 
 | 3 | +"""  | 
 | 4 | +from collections import namedtuple  | 
 | 5 | + | 
 | 6 | +import numpy as np  | 
 | 7 | +import tensorflow as tf  | 
 | 8 | + | 
 | 9 | +from ssd_layers import conv2d, max_pool2d, l2norm, dropout, \  | 
 | 10 | +    pad2d, ssd_multibox_layer  | 
 | 11 | +from ssd_anchors import ssd_anchors_all_layers  | 
 | 12 | + | 
 | 13 | +# SSD parameters  | 
 | 14 | +SSDParams = namedtuple('SSDParameters', ['img_shape',  # the input image size: 300x300  | 
 | 15 | +                                         'num_classes',  # number of classes: 20+1  | 
 | 16 | +                                         'no_annotation_label',  | 
 | 17 | +                                         'feat_layers', # list of names of layer for detection  | 
 | 18 | +                                         'feat_shapes', # list of feature map sizes of layer for detection  | 
 | 19 | +                                         'anchor_size_bounds', # the down and upper bounds of anchor sizes  | 
 | 20 | +                                         'anchor_sizes',   # list of anchor sizes of layer for detection  | 
 | 21 | +                                         'anchor_ratios',  # list of rations used in layer for detection  | 
 | 22 | +                                         'anchor_steps',   # list of cell size (pixel size) of layer for detection  | 
 | 23 | +                                         'anchor_offset',  # the center point offset  | 
 | 24 | +                                         'normalizations', # list of normalizations of layer for detection  | 
 | 25 | +                                         'prior_scaling'   #  | 
 | 26 | +                                         ])  | 
 | 27 | +class SSD(object):  | 
 | 28 | +    """SSD net 300"""  | 
 | 29 | +    def __init__(self, is_training=True):  | 
 | 30 | +        self.is_training = is_training  | 
 | 31 | +        self.threshold = 0.5  # class score threshold  | 
 | 32 | +        self.ssd_params = SSDParams(img_shape=(300, 300),  | 
 | 33 | +                                    num_classes=21,  | 
 | 34 | +                                    no_annotation_label=21,  | 
 | 35 | +                                    feat_layers=["block4", "block7", "block8", "block9", "block10", "block11"],  | 
 | 36 | +                                    feat_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)],  | 
 | 37 | +                                    anchor_size_bounds=[0.15, 0.90],  # diff from the original paper  | 
 | 38 | +                                    anchor_sizes=[(21., 45.),  | 
 | 39 | +                                                  (45., 99.),  | 
 | 40 | +                                                  (99., 153.),  | 
 | 41 | +                                                  (153., 207.),  | 
 | 42 | +                                                  (207., 261.),  | 
 | 43 | +                                                  (261., 315.)],  | 
 | 44 | +                                    anchor_ratios=[[2, .5],  | 
 | 45 | +                                                   [2, .5, 3, 1. / 3],  | 
 | 46 | +                                                   [2, .5, 3, 1. / 3],  | 
 | 47 | +                                                   [2, .5, 3, 1. / 3],  | 
 | 48 | +                                                   [2, .5],  | 
 | 49 | +                                                   [2, .5]],  | 
 | 50 | +                                    anchor_steps=[8, 16, 32, 64, 100, 300],  | 
 | 51 | +                                    anchor_offset=0.5,  | 
 | 52 | +                                    normalizations=[20, -1, -1, -1, -1, -1],  | 
 | 53 | +                                    prior_scaling=[0.1, 0.1, 0.2, 0.2]  | 
 | 54 | +                                    )  | 
 | 55 | + | 
 | 56 | +        predictions, logits, locations = self._built_net()  | 
 | 57 | +        #self._update_feat_shapes_from_net()  | 
 | 58 | +        classes, scores, bboxes = self._bboxes_select(predictions, locations)  | 
 | 59 | +        self._classes = classes  | 
 | 60 | +        self._scores = scores  | 
 | 61 | +        self._bboxes = bboxes  | 
 | 62 | + | 
 | 63 | +    def _built_net(self):  | 
 | 64 | +        """Construct the SSD net"""  | 
 | 65 | +        self.end_points = {}  # record the detection layers output  | 
 | 66 | +        self._images = tf.placeholder(tf.float32, shape=[None, self.ssd_params.img_shape[0],  | 
 | 67 | +                                                        self.ssd_params.img_shape[1], 3])  | 
 | 68 | +        with tf.variable_scope("ssd_300_vgg"):  | 
 | 69 | +            # original vgg layers  | 
 | 70 | +            # block 1  | 
 | 71 | +            net = conv2d(self._images, 64, 3, scope="conv1_1")  | 
 | 72 | +            net = conv2d(net, 64, 3, scope="conv1_2")  | 
 | 73 | +            self.end_points["block1"] = net  | 
 | 74 | +            net = max_pool2d(net, 2, scope="pool1")  | 
 | 75 | +            # block 2  | 
 | 76 | +            net = conv2d(net, 128, 3, scope="conv2_1")  | 
 | 77 | +            net = conv2d(net, 128, 3, scope="conv2_2")  | 
 | 78 | +            self.end_points["block2"] = net  | 
 | 79 | +            net = max_pool2d(net, 2, scope="pool2")  | 
 | 80 | +            # block 3  | 
 | 81 | +            net = conv2d(net, 256, 3, scope="conv3_1")  | 
 | 82 | +            net = conv2d(net, 256, 3, scope="conv3_2")  | 
 | 83 | +            net = conv2d(net, 256, 3, scope="conv3_3")  | 
 | 84 | +            self.end_points["block3"] = net  | 
 | 85 | +            net = max_pool2d(net, 2, scope="pool3")  | 
 | 86 | +            # block 4  | 
 | 87 | +            net = conv2d(net, 512, 3, scope="conv4_1")  | 
 | 88 | +            net = conv2d(net, 512, 3, scope="conv4_2")  | 
 | 89 | +            net = conv2d(net, 512, 3, scope="conv4_3")  | 
 | 90 | +            self.end_points["block4"] = net  | 
 | 91 | +            net = max_pool2d(net, 2, scope="pool4")  | 
 | 92 | +            # block 5  | 
 | 93 | +            net = conv2d(net, 512, 3, scope="conv5_1")  | 
 | 94 | +            net = conv2d(net, 512, 3, scope="conv5_2")  | 
 | 95 | +            net = conv2d(net, 512, 3, scope="conv5_3")  | 
 | 96 | +            self.end_points["block5"] = net  | 
 | 97 | +            print(net)  | 
 | 98 | +            net = max_pool2d(net, 3, stride=1, scope="pool5")  | 
 | 99 | +            print(net)  | 
 | 100 | + | 
 | 101 | +            # additional SSD layers  | 
 | 102 | +            # block 6: use dilate conv  | 
 | 103 | +            net = conv2d(net, 1024, 3, dilation_rate=6, scope="conv6")  | 
 | 104 | +            self.end_points["block6"] = net  | 
 | 105 | +            #net = dropout(net, is_training=self.is_training)  | 
 | 106 | +            # block 7  | 
 | 107 | +            net = conv2d(net, 1024, 1, scope="conv7")  | 
 | 108 | +            self.end_points["block7"] = net  | 
 | 109 | +            # block 8  | 
 | 110 | +            net = conv2d(net, 256, 1, scope="conv8_1x1")  | 
 | 111 | +            net = conv2d(pad2d(net, 1), 512, 3, stride=2, scope="conv8_3x3",  | 
 | 112 | +                         padding="valid")  | 
 | 113 | +            self.end_points["block8"] = net  | 
 | 114 | +            # block 9  | 
 | 115 | +            net = conv2d(net, 128, 1, scope="conv9_1x1")  | 
 | 116 | +            net = conv2d(pad2d(net, 1), 256, 3, stride=2, scope="conv9_3x3",  | 
 | 117 | +                         padding="valid")  | 
 | 118 | +            self.end_points["block9"] = net  | 
 | 119 | +            # block 10  | 
 | 120 | +            net = conv2d(net, 128, 1, scope="conv10_1x1")  | 
 | 121 | +            net = conv2d(net, 256, 3, scope="conv10_3x3", padding="valid")  | 
 | 122 | +            self.end_points["block10"] = net  | 
 | 123 | +            # block 11  | 
 | 124 | +            net = conv2d(net, 128, 1, scope="conv11_1x1")  | 
 | 125 | +            net = conv2d(net, 256, 3, scope="conv11_3x3", padding="valid")  | 
 | 126 | +            self.end_points["block11"] = net  | 
 | 127 | + | 
 | 128 | +            # class and location predictions  | 
 | 129 | +            predictions = []  | 
 | 130 | +            logits = []  | 
 | 131 | +            locations = []  | 
 | 132 | +            for i, layer in enumerate(self.ssd_params.feat_layers):  | 
 | 133 | +                cls, loc = ssd_multibox_layer(self.end_points[layer], self.ssd_params.num_classes,  | 
 | 134 | +                                              self.ssd_params.anchor_sizes[i],  | 
 | 135 | +                                              self.ssd_params.anchor_ratios[i],  | 
 | 136 | +                                              self.ssd_params.normalizations[i], scope=layer+"_box")  | 
 | 137 | +                predictions.append(tf.nn.softmax(cls))  | 
 | 138 | +                logits.append(cls)  | 
 | 139 | +                locations.append(loc)  | 
 | 140 | +            return predictions, logits, locations  | 
 | 141 | + | 
 | 142 | +    def _update_feat_shapes_from_net(self, predictions):  | 
 | 143 | +        """ Obtain the feature shapes from the prediction layers"""  | 
 | 144 | +        new_feat_shapes = []  | 
 | 145 | +        for l in predictions:  | 
 | 146 | +            new_feat_shapes.append(l.get_shape().as_list()[1:])  | 
 | 147 | +        self.ssd_params._replace(feat_shapes=new_feat_shapes)  | 
 | 148 | + | 
 | 149 | +    def anchors(self):  | 
 | 150 | +        """Get sSD anchors"""  | 
 | 151 | +        return ssd_anchors_all_layers(self.ssd_params.img_shape,  | 
 | 152 | +                                      self.ssd_params.feat_shapes,  | 
 | 153 | +                                      self.ssd_params.anchor_sizes,  | 
 | 154 | +                                      self.ssd_params.anchor_ratios,  | 
 | 155 | +                                      self.ssd_params.anchor_steps,  | 
 | 156 | +                                      self.ssd_params.anchor_offset,  | 
 | 157 | +                                      np.float32)  | 
 | 158 | + | 
 | 159 | +    def _bboxes_decode_layer(self, feat_locations, anchor_bboxes, prior_scaling):  | 
 | 160 | +        """  | 
 | 161 | +        Decode the feat location of one layer  | 
 | 162 | +        params:  | 
 | 163 | +         feat_locations: 5D Tensor, [batch_size, size, size, n_anchors, 4]  | 
 | 164 | +         anchor_bboxes: list of Tensors(y, x, w, h)  | 
 | 165 | +                        shape: [size,size,1], [size, size,1], [n_anchors], [n_anchors]  | 
 | 166 | +         prior_scaling: list of 4 floats  | 
 | 167 | +        """  | 
 | 168 | +        yref, xref, href, wref = anchor_bboxes  | 
 | 169 | +        print(yref)  | 
 | 170 | +        # Compute center, height and width  | 
 | 171 | +        cx = feat_locations[:, :, :, :, 0] * wref * prior_scaling[0] + xref  | 
 | 172 | +        cy = feat_locations[:, :, :, :, 1] * href * prior_scaling[1] + yref  | 
 | 173 | +        w = wref * tf.exp(feat_locations[:, :, :, :, 2] * prior_scaling[2])  | 
 | 174 | +        h = href * tf.exp(feat_locations[:, :, :, :, 3] * prior_scaling[3])  | 
 | 175 | +        # compute boxes coordinates (ymin, xmin, ymax,,xmax)  | 
 | 176 | +        bboxes = tf.stack([cy - h / 2., cx - w / 2.,  | 
 | 177 | +                           cy + h / 2., cx + w / 2.], axis=-1)  | 
 | 178 | +        # shape [batch_size, size, size, n_anchors, 4]  | 
 | 179 | +        return bboxes  | 
 | 180 | + | 
 | 181 | +    def _bboxes_select_layer(self, feat_predictions, feat_locations, anchor_bboxes,  | 
 | 182 | +                             prior_scaling):  | 
 | 183 | +        """Select boxes from the feat layer, only for bacth_size=1"""  | 
 | 184 | +        n_bboxes = np.product(feat_predictions.get_shape().as_list()[1:-1])  | 
 | 185 | +        # decode the location  | 
 | 186 | +        bboxes = self._bboxes_decode_layer(feat_locations, anchor_bboxes, prior_scaling)  | 
 | 187 | +        bboxes = tf.reshape(bboxes, [n_bboxes, 4])  | 
 | 188 | +        predictions = tf.reshape(feat_predictions, [n_bboxes, self.ssd_params.num_classes])  | 
 | 189 | +        # remove the background predictions  | 
 | 190 | +        sub_predictions = predictions[:, 1:]  | 
 | 191 | +        # choose the max score class  | 
 | 192 | +        classes = tf.argmax(sub_predictions, axis=1) + 1  # class labels  | 
 | 193 | +        scores = tf.reduce_max(sub_predictions, axis=1)   # max_class scores  | 
 | 194 | +        # Boxes selection: use threshold  | 
 | 195 | +        filter_mask = scores > self.threshold  | 
 | 196 | +        classes = tf.boolean_mask(classes, filter_mask)  | 
 | 197 | +        scores = tf.boolean_mask(scores, filter_mask)  | 
 | 198 | +        bboxes = tf.boolean_mask(bboxes, filter_mask)  | 
 | 199 | +        return classes, scores, bboxes  | 
 | 200 | + | 
 | 201 | +    def _bboxes_select(self, predictions, locations):  | 
 | 202 | +        """Select all bboxes predictions, only for bacth_size=1"""  | 
 | 203 | +        anchor_bboxes_list = self.anchors()  | 
 | 204 | +        classes_list = []  | 
 | 205 | +        scores_list = []  | 
 | 206 | +        bboxes_list = []  | 
 | 207 | +        # select bboxes for each feat layer  | 
 | 208 | +        for n in range(len(predictions)):  | 
 | 209 | +            anchor_bboxes = list(map(tf.convert_to_tensor, anchor_bboxes_list[n]))  | 
 | 210 | +            classes, scores, bboxes = self._bboxes_select_layer(predictions[n],  | 
 | 211 | +                            locations[n], anchor_bboxes, self.ssd_params.prior_scaling)  | 
 | 212 | +            classes_list.append(classes)  | 
 | 213 | +            scores_list.append(scores)  | 
 | 214 | +            bboxes_list.append(bboxes)  | 
 | 215 | +        # combine all feat layers  | 
 | 216 | +        classes = tf.concat(classes_list, axis=0)  | 
 | 217 | +        scores = tf.concat(scores_list, axis=0)  | 
 | 218 | +        bboxes = tf.concat(bboxes_list, axis=0)  | 
 | 219 | +        return classes, scores, bboxes  | 
 | 220 | + | 
 | 221 | +    def images(self):  | 
 | 222 | +        return self._images  | 
 | 223 | + | 
 | 224 | +    def detections(self):  | 
 | 225 | +        return self._classes, self._scores, self._bboxes  | 
 | 226 | + | 
 | 227 | + | 
 | 228 | +if __name__ == "__main__":  | 
 | 229 | +    ssd = SSD()  | 
 | 230 | +    sess = tf.Session()  | 
 | 231 | +    saver_ = tf.train.Saver()  | 
 | 232 | +    saver_.restore(sess, "../SSD-Tensorflow-master/ssd_checkpoints/ssd_vgg_300_weights.ckpt")  | 
 | 233 | + | 
0 commit comments