|
| 1 | +""" |
| 2 | +SSD net (vgg_based) 300x300 |
| 3 | +""" |
| 4 | +from collections import namedtuple |
| 5 | + |
| 6 | +import numpy as np |
| 7 | +import tensorflow as tf |
| 8 | + |
| 9 | +from ssd_layers import conv2d, max_pool2d, l2norm, dropout, \ |
| 10 | + pad2d, ssd_multibox_layer |
| 11 | +from ssd_anchors import ssd_anchors_all_layers |
| 12 | + |
| 13 | +# SSD parameters |
| 14 | +SSDParams = namedtuple('SSDParameters', ['img_shape', # the input image size: 300x300 |
| 15 | + 'num_classes', # number of classes: 20+1 |
| 16 | + 'no_annotation_label', |
| 17 | + 'feat_layers', # list of names of layer for detection |
| 18 | + 'feat_shapes', # list of feature map sizes of layer for detection |
| 19 | + 'anchor_size_bounds', # the down and upper bounds of anchor sizes |
| 20 | + 'anchor_sizes', # list of anchor sizes of layer for detection |
| 21 | + 'anchor_ratios', # list of rations used in layer for detection |
| 22 | + 'anchor_steps', # list of cell size (pixel size) of layer for detection |
| 23 | + 'anchor_offset', # the center point offset |
| 24 | + 'normalizations', # list of normalizations of layer for detection |
| 25 | + 'prior_scaling' # |
| 26 | + ]) |
| 27 | +class SSD(object): |
| 28 | + """SSD net 300""" |
| 29 | + def __init__(self, is_training=True): |
| 30 | + self.is_training = is_training |
| 31 | + self.threshold = 0.5 # class score threshold |
| 32 | + self.ssd_params = SSDParams(img_shape=(300, 300), |
| 33 | + num_classes=21, |
| 34 | + no_annotation_label=21, |
| 35 | + feat_layers=["block4", "block7", "block8", "block9", "block10", "block11"], |
| 36 | + feat_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)], |
| 37 | + anchor_size_bounds=[0.15, 0.90], # diff from the original paper |
| 38 | + anchor_sizes=[(21., 45.), |
| 39 | + (45., 99.), |
| 40 | + (99., 153.), |
| 41 | + (153., 207.), |
| 42 | + (207., 261.), |
| 43 | + (261., 315.)], |
| 44 | + anchor_ratios=[[2, .5], |
| 45 | + [2, .5, 3, 1. / 3], |
| 46 | + [2, .5, 3, 1. / 3], |
| 47 | + [2, .5, 3, 1. / 3], |
| 48 | + [2, .5], |
| 49 | + [2, .5]], |
| 50 | + anchor_steps=[8, 16, 32, 64, 100, 300], |
| 51 | + anchor_offset=0.5, |
| 52 | + normalizations=[20, -1, -1, -1, -1, -1], |
| 53 | + prior_scaling=[0.1, 0.1, 0.2, 0.2] |
| 54 | + ) |
| 55 | + |
| 56 | + predictions, logits, locations = self._built_net() |
| 57 | + #self._update_feat_shapes_from_net() |
| 58 | + classes, scores, bboxes = self._bboxes_select(predictions, locations) |
| 59 | + self._classes = classes |
| 60 | + self._scores = scores |
| 61 | + self._bboxes = bboxes |
| 62 | + |
| 63 | + def _built_net(self): |
| 64 | + """Construct the SSD net""" |
| 65 | + self.end_points = {} # record the detection layers output |
| 66 | + self._images = tf.placeholder(tf.float32, shape=[None, self.ssd_params.img_shape[0], |
| 67 | + self.ssd_params.img_shape[1], 3]) |
| 68 | + with tf.variable_scope("ssd_300_vgg"): |
| 69 | + # original vgg layers |
| 70 | + # block 1 |
| 71 | + net = conv2d(self._images, 64, 3, scope="conv1_1") |
| 72 | + net = conv2d(net, 64, 3, scope="conv1_2") |
| 73 | + self.end_points["block1"] = net |
| 74 | + net = max_pool2d(net, 2, scope="pool1") |
| 75 | + # block 2 |
| 76 | + net = conv2d(net, 128, 3, scope="conv2_1") |
| 77 | + net = conv2d(net, 128, 3, scope="conv2_2") |
| 78 | + self.end_points["block2"] = net |
| 79 | + net = max_pool2d(net, 2, scope="pool2") |
| 80 | + # block 3 |
| 81 | + net = conv2d(net, 256, 3, scope="conv3_1") |
| 82 | + net = conv2d(net, 256, 3, scope="conv3_2") |
| 83 | + net = conv2d(net, 256, 3, scope="conv3_3") |
| 84 | + self.end_points["block3"] = net |
| 85 | + net = max_pool2d(net, 2, scope="pool3") |
| 86 | + # block 4 |
| 87 | + net = conv2d(net, 512, 3, scope="conv4_1") |
| 88 | + net = conv2d(net, 512, 3, scope="conv4_2") |
| 89 | + net = conv2d(net, 512, 3, scope="conv4_3") |
| 90 | + self.end_points["block4"] = net |
| 91 | + net = max_pool2d(net, 2, scope="pool4") |
| 92 | + # block 5 |
| 93 | + net = conv2d(net, 512, 3, scope="conv5_1") |
| 94 | + net = conv2d(net, 512, 3, scope="conv5_2") |
| 95 | + net = conv2d(net, 512, 3, scope="conv5_3") |
| 96 | + self.end_points["block5"] = net |
| 97 | + print(net) |
| 98 | + net = max_pool2d(net, 3, stride=1, scope="pool5") |
| 99 | + print(net) |
| 100 | + |
| 101 | + # additional SSD layers |
| 102 | + # block 6: use dilate conv |
| 103 | + net = conv2d(net, 1024, 3, dilation_rate=6, scope="conv6") |
| 104 | + self.end_points["block6"] = net |
| 105 | + #net = dropout(net, is_training=self.is_training) |
| 106 | + # block 7 |
| 107 | + net = conv2d(net, 1024, 1, scope="conv7") |
| 108 | + self.end_points["block7"] = net |
| 109 | + # block 8 |
| 110 | + net = conv2d(net, 256, 1, scope="conv8_1x1") |
| 111 | + net = conv2d(pad2d(net, 1), 512, 3, stride=2, scope="conv8_3x3", |
| 112 | + padding="valid") |
| 113 | + self.end_points["block8"] = net |
| 114 | + # block 9 |
| 115 | + net = conv2d(net, 128, 1, scope="conv9_1x1") |
| 116 | + net = conv2d(pad2d(net, 1), 256, 3, stride=2, scope="conv9_3x3", |
| 117 | + padding="valid") |
| 118 | + self.end_points["block9"] = net |
| 119 | + # block 10 |
| 120 | + net = conv2d(net, 128, 1, scope="conv10_1x1") |
| 121 | + net = conv2d(net, 256, 3, scope="conv10_3x3", padding="valid") |
| 122 | + self.end_points["block10"] = net |
| 123 | + # block 11 |
| 124 | + net = conv2d(net, 128, 1, scope="conv11_1x1") |
| 125 | + net = conv2d(net, 256, 3, scope="conv11_3x3", padding="valid") |
| 126 | + self.end_points["block11"] = net |
| 127 | + |
| 128 | + # class and location predictions |
| 129 | + predictions = [] |
| 130 | + logits = [] |
| 131 | + locations = [] |
| 132 | + for i, layer in enumerate(self.ssd_params.feat_layers): |
| 133 | + cls, loc = ssd_multibox_layer(self.end_points[layer], self.ssd_params.num_classes, |
| 134 | + self.ssd_params.anchor_sizes[i], |
| 135 | + self.ssd_params.anchor_ratios[i], |
| 136 | + self.ssd_params.normalizations[i], scope=layer+"_box") |
| 137 | + predictions.append(tf.nn.softmax(cls)) |
| 138 | + logits.append(cls) |
| 139 | + locations.append(loc) |
| 140 | + return predictions, logits, locations |
| 141 | + |
| 142 | + def _update_feat_shapes_from_net(self, predictions): |
| 143 | + """ Obtain the feature shapes from the prediction layers""" |
| 144 | + new_feat_shapes = [] |
| 145 | + for l in predictions: |
| 146 | + new_feat_shapes.append(l.get_shape().as_list()[1:]) |
| 147 | + self.ssd_params._replace(feat_shapes=new_feat_shapes) |
| 148 | + |
| 149 | + def anchors(self): |
| 150 | + """Get sSD anchors""" |
| 151 | + return ssd_anchors_all_layers(self.ssd_params.img_shape, |
| 152 | + self.ssd_params.feat_shapes, |
| 153 | + self.ssd_params.anchor_sizes, |
| 154 | + self.ssd_params.anchor_ratios, |
| 155 | + self.ssd_params.anchor_steps, |
| 156 | + self.ssd_params.anchor_offset, |
| 157 | + np.float32) |
| 158 | + |
| 159 | + def _bboxes_decode_layer(self, feat_locations, anchor_bboxes, prior_scaling): |
| 160 | + """ |
| 161 | + Decode the feat location of one layer |
| 162 | + params: |
| 163 | + feat_locations: 5D Tensor, [batch_size, size, size, n_anchors, 4] |
| 164 | + anchor_bboxes: list of Tensors(y, x, w, h) |
| 165 | + shape: [size,size,1], [size, size,1], [n_anchors], [n_anchors] |
| 166 | + prior_scaling: list of 4 floats |
| 167 | + """ |
| 168 | + yref, xref, href, wref = anchor_bboxes |
| 169 | + print(yref) |
| 170 | + # Compute center, height and width |
| 171 | + cx = feat_locations[:, :, :, :, 0] * wref * prior_scaling[0] + xref |
| 172 | + cy = feat_locations[:, :, :, :, 1] * href * prior_scaling[1] + yref |
| 173 | + w = wref * tf.exp(feat_locations[:, :, :, :, 2] * prior_scaling[2]) |
| 174 | + h = href * tf.exp(feat_locations[:, :, :, :, 3] * prior_scaling[3]) |
| 175 | + # compute boxes coordinates (ymin, xmin, ymax,,xmax) |
| 176 | + bboxes = tf.stack([cy - h / 2., cx - w / 2., |
| 177 | + cy + h / 2., cx + w / 2.], axis=-1) |
| 178 | + # shape [batch_size, size, size, n_anchors, 4] |
| 179 | + return bboxes |
| 180 | + |
| 181 | + def _bboxes_select_layer(self, feat_predictions, feat_locations, anchor_bboxes, |
| 182 | + prior_scaling): |
| 183 | + """Select boxes from the feat layer, only for bacth_size=1""" |
| 184 | + n_bboxes = np.product(feat_predictions.get_shape().as_list()[1:-1]) |
| 185 | + # decode the location |
| 186 | + bboxes = self._bboxes_decode_layer(feat_locations, anchor_bboxes, prior_scaling) |
| 187 | + bboxes = tf.reshape(bboxes, [n_bboxes, 4]) |
| 188 | + predictions = tf.reshape(feat_predictions, [n_bboxes, self.ssd_params.num_classes]) |
| 189 | + # remove the background predictions |
| 190 | + sub_predictions = predictions[:, 1:] |
| 191 | + # choose the max score class |
| 192 | + classes = tf.argmax(sub_predictions, axis=1) + 1 # class labels |
| 193 | + scores = tf.reduce_max(sub_predictions, axis=1) # max_class scores |
| 194 | + # Boxes selection: use threshold |
| 195 | + filter_mask = scores > self.threshold |
| 196 | + classes = tf.boolean_mask(classes, filter_mask) |
| 197 | + scores = tf.boolean_mask(scores, filter_mask) |
| 198 | + bboxes = tf.boolean_mask(bboxes, filter_mask) |
| 199 | + return classes, scores, bboxes |
| 200 | + |
| 201 | + def _bboxes_select(self, predictions, locations): |
| 202 | + """Select all bboxes predictions, only for bacth_size=1""" |
| 203 | + anchor_bboxes_list = self.anchors() |
| 204 | + classes_list = [] |
| 205 | + scores_list = [] |
| 206 | + bboxes_list = [] |
| 207 | + # select bboxes for each feat layer |
| 208 | + for n in range(len(predictions)): |
| 209 | + anchor_bboxes = list(map(tf.convert_to_tensor, anchor_bboxes_list[n])) |
| 210 | + classes, scores, bboxes = self._bboxes_select_layer(predictions[n], |
| 211 | + locations[n], anchor_bboxes, self.ssd_params.prior_scaling) |
| 212 | + classes_list.append(classes) |
| 213 | + scores_list.append(scores) |
| 214 | + bboxes_list.append(bboxes) |
| 215 | + # combine all feat layers |
| 216 | + classes = tf.concat(classes_list, axis=0) |
| 217 | + scores = tf.concat(scores_list, axis=0) |
| 218 | + bboxes = tf.concat(bboxes_list, axis=0) |
| 219 | + return classes, scores, bboxes |
| 220 | + |
| 221 | + def images(self): |
| 222 | + return self._images |
| 223 | + |
| 224 | + def detections(self): |
| 225 | + return self._classes, self._scores, self._bboxes |
| 226 | + |
| 227 | + |
| 228 | +if __name__ == "__main__": |
| 229 | + ssd = SSD() |
| 230 | + sess = tf.Session() |
| 231 | + saver_ = tf.train.Saver() |
| 232 | + saver_.restore(sess, "../SSD-Tensorflow-master/ssd_checkpoints/ssd_vgg_300_weights.ckpt") |
| 233 | + |
0 commit comments