Skip to content

Commit 86248df

Browse files
authored
add ssd inference
1 parent 1134291 commit 86248df

File tree

7 files changed

+761
-0
lines changed

7 files changed

+761
-0
lines changed

ObjectDetections/SSD/SSD_demo.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
"""
2+
SSD demo
3+
"""
4+
5+
import cv2
6+
import numpy as np
7+
import tensorflow as tf
8+
import matplotlib.image as mpimg
9+
10+
from ssd_300_vgg import SSD
11+
from utils import preprocess_image, process_bboxes
12+
from visualization import plt_bboxes
13+
14+
15+
16+
ssd_net = SSD()
17+
classes, scores, bboxes = ssd_net.detections()
18+
images = ssd_net.images()
19+
20+
sess = tf.Session()
21+
# Restore SSD model.
22+
ckpt_filename = './ssd_checkpoints/ssd_vgg_300_weights.ckpt'
23+
sess.run(tf.global_variables_initializer())
24+
saver = tf.train.Saver()
25+
saver.restore(sess, ckpt_filename)
26+
27+
img = cv2.imread('./demo/dog.jpg')
28+
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
29+
img_prepocessed = preprocess_image(img)
30+
31+
rclasses, rscores, rbboxes = sess.run([classes, scores, bboxes],
32+
feed_dict={images: img_prepocessed})
33+
34+
35+
rclasses, rscores, rbboxes = process_bboxes(rclasses, rscores, rbboxes)
36+
37+
plt_bboxes(img, rclasses, rscores, rbboxes)
38+
39+
40+
Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
"""
2+
SSD net (vgg_based) 300x300
3+
"""
4+
from collections import namedtuple
5+
6+
import numpy as np
7+
import tensorflow as tf
8+
9+
from ssd_layers import conv2d, max_pool2d, l2norm, dropout, \
10+
pad2d, ssd_multibox_layer
11+
from ssd_anchors import ssd_anchors_all_layers
12+
13+
# SSD parameters
14+
SSDParams = namedtuple('SSDParameters', ['img_shape', # the input image size: 300x300
15+
'num_classes', # number of classes: 20+1
16+
'no_annotation_label',
17+
'feat_layers', # list of names of layer for detection
18+
'feat_shapes', # list of feature map sizes of layer for detection
19+
'anchor_size_bounds', # the down and upper bounds of anchor sizes
20+
'anchor_sizes', # list of anchor sizes of layer for detection
21+
'anchor_ratios', # list of rations used in layer for detection
22+
'anchor_steps', # list of cell size (pixel size) of layer for detection
23+
'anchor_offset', # the center point offset
24+
'normalizations', # list of normalizations of layer for detection
25+
'prior_scaling' #
26+
])
27+
class SSD(object):
28+
"""SSD net 300"""
29+
def __init__(self, is_training=True):
30+
self.is_training = is_training
31+
self.threshold = 0.5 # class score threshold
32+
self.ssd_params = SSDParams(img_shape=(300, 300),
33+
num_classes=21,
34+
no_annotation_label=21,
35+
feat_layers=["block4", "block7", "block8", "block9", "block10", "block11"],
36+
feat_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)],
37+
anchor_size_bounds=[0.15, 0.90], # diff from the original paper
38+
anchor_sizes=[(21., 45.),
39+
(45., 99.),
40+
(99., 153.),
41+
(153., 207.),
42+
(207., 261.),
43+
(261., 315.)],
44+
anchor_ratios=[[2, .5],
45+
[2, .5, 3, 1. / 3],
46+
[2, .5, 3, 1. / 3],
47+
[2, .5, 3, 1. / 3],
48+
[2, .5],
49+
[2, .5]],
50+
anchor_steps=[8, 16, 32, 64, 100, 300],
51+
anchor_offset=0.5,
52+
normalizations=[20, -1, -1, -1, -1, -1],
53+
prior_scaling=[0.1, 0.1, 0.2, 0.2]
54+
)
55+
56+
predictions, logits, locations = self._built_net()
57+
#self._update_feat_shapes_from_net()
58+
classes, scores, bboxes = self._bboxes_select(predictions, locations)
59+
self._classes = classes
60+
self._scores = scores
61+
self._bboxes = bboxes
62+
63+
def _built_net(self):
64+
"""Construct the SSD net"""
65+
self.end_points = {} # record the detection layers output
66+
self._images = tf.placeholder(tf.float32, shape=[None, self.ssd_params.img_shape[0],
67+
self.ssd_params.img_shape[1], 3])
68+
with tf.variable_scope("ssd_300_vgg"):
69+
# original vgg layers
70+
# block 1
71+
net = conv2d(self._images, 64, 3, scope="conv1_1")
72+
net = conv2d(net, 64, 3, scope="conv1_2")
73+
self.end_points["block1"] = net
74+
net = max_pool2d(net, 2, scope="pool1")
75+
# block 2
76+
net = conv2d(net, 128, 3, scope="conv2_1")
77+
net = conv2d(net, 128, 3, scope="conv2_2")
78+
self.end_points["block2"] = net
79+
net = max_pool2d(net, 2, scope="pool2")
80+
# block 3
81+
net = conv2d(net, 256, 3, scope="conv3_1")
82+
net = conv2d(net, 256, 3, scope="conv3_2")
83+
net = conv2d(net, 256, 3, scope="conv3_3")
84+
self.end_points["block3"] = net
85+
net = max_pool2d(net, 2, scope="pool3")
86+
# block 4
87+
net = conv2d(net, 512, 3, scope="conv4_1")
88+
net = conv2d(net, 512, 3, scope="conv4_2")
89+
net = conv2d(net, 512, 3, scope="conv4_3")
90+
self.end_points["block4"] = net
91+
net = max_pool2d(net, 2, scope="pool4")
92+
# block 5
93+
net = conv2d(net, 512, 3, scope="conv5_1")
94+
net = conv2d(net, 512, 3, scope="conv5_2")
95+
net = conv2d(net, 512, 3, scope="conv5_3")
96+
self.end_points["block5"] = net
97+
print(net)
98+
net = max_pool2d(net, 3, stride=1, scope="pool5")
99+
print(net)
100+
101+
# additional SSD layers
102+
# block 6: use dilate conv
103+
net = conv2d(net, 1024, 3, dilation_rate=6, scope="conv6")
104+
self.end_points["block6"] = net
105+
#net = dropout(net, is_training=self.is_training)
106+
# block 7
107+
net = conv2d(net, 1024, 1, scope="conv7")
108+
self.end_points["block7"] = net
109+
# block 8
110+
net = conv2d(net, 256, 1, scope="conv8_1x1")
111+
net = conv2d(pad2d(net, 1), 512, 3, stride=2, scope="conv8_3x3",
112+
padding="valid")
113+
self.end_points["block8"] = net
114+
# block 9
115+
net = conv2d(net, 128, 1, scope="conv9_1x1")
116+
net = conv2d(pad2d(net, 1), 256, 3, stride=2, scope="conv9_3x3",
117+
padding="valid")
118+
self.end_points["block9"] = net
119+
# block 10
120+
net = conv2d(net, 128, 1, scope="conv10_1x1")
121+
net = conv2d(net, 256, 3, scope="conv10_3x3", padding="valid")
122+
self.end_points["block10"] = net
123+
# block 11
124+
net = conv2d(net, 128, 1, scope="conv11_1x1")
125+
net = conv2d(net, 256, 3, scope="conv11_3x3", padding="valid")
126+
self.end_points["block11"] = net
127+
128+
# class and location predictions
129+
predictions = []
130+
logits = []
131+
locations = []
132+
for i, layer in enumerate(self.ssd_params.feat_layers):
133+
cls, loc = ssd_multibox_layer(self.end_points[layer], self.ssd_params.num_classes,
134+
self.ssd_params.anchor_sizes[i],
135+
self.ssd_params.anchor_ratios[i],
136+
self.ssd_params.normalizations[i], scope=layer+"_box")
137+
predictions.append(tf.nn.softmax(cls))
138+
logits.append(cls)
139+
locations.append(loc)
140+
return predictions, logits, locations
141+
142+
def _update_feat_shapes_from_net(self, predictions):
143+
""" Obtain the feature shapes from the prediction layers"""
144+
new_feat_shapes = []
145+
for l in predictions:
146+
new_feat_shapes.append(l.get_shape().as_list()[1:])
147+
self.ssd_params._replace(feat_shapes=new_feat_shapes)
148+
149+
def anchors(self):
150+
"""Get sSD anchors"""
151+
return ssd_anchors_all_layers(self.ssd_params.img_shape,
152+
self.ssd_params.feat_shapes,
153+
self.ssd_params.anchor_sizes,
154+
self.ssd_params.anchor_ratios,
155+
self.ssd_params.anchor_steps,
156+
self.ssd_params.anchor_offset,
157+
np.float32)
158+
159+
def _bboxes_decode_layer(self, feat_locations, anchor_bboxes, prior_scaling):
160+
"""
161+
Decode the feat location of one layer
162+
params:
163+
feat_locations: 5D Tensor, [batch_size, size, size, n_anchors, 4]
164+
anchor_bboxes: list of Tensors(y, x, w, h)
165+
shape: [size,size,1], [size, size,1], [n_anchors], [n_anchors]
166+
prior_scaling: list of 4 floats
167+
"""
168+
yref, xref, href, wref = anchor_bboxes
169+
print(yref)
170+
# Compute center, height and width
171+
cx = feat_locations[:, :, :, :, 0] * wref * prior_scaling[0] + xref
172+
cy = feat_locations[:, :, :, :, 1] * href * prior_scaling[1] + yref
173+
w = wref * tf.exp(feat_locations[:, :, :, :, 2] * prior_scaling[2])
174+
h = href * tf.exp(feat_locations[:, :, :, :, 3] * prior_scaling[3])
175+
# compute boxes coordinates (ymin, xmin, ymax,,xmax)
176+
bboxes = tf.stack([cy - h / 2., cx - w / 2.,
177+
cy + h / 2., cx + w / 2.], axis=-1)
178+
# shape [batch_size, size, size, n_anchors, 4]
179+
return bboxes
180+
181+
def _bboxes_select_layer(self, feat_predictions, feat_locations, anchor_bboxes,
182+
prior_scaling):
183+
"""Select boxes from the feat layer, only for bacth_size=1"""
184+
n_bboxes = np.product(feat_predictions.get_shape().as_list()[1:-1])
185+
# decode the location
186+
bboxes = self._bboxes_decode_layer(feat_locations, anchor_bboxes, prior_scaling)
187+
bboxes = tf.reshape(bboxes, [n_bboxes, 4])
188+
predictions = tf.reshape(feat_predictions, [n_bboxes, self.ssd_params.num_classes])
189+
# remove the background predictions
190+
sub_predictions = predictions[:, 1:]
191+
# choose the max score class
192+
classes = tf.argmax(sub_predictions, axis=1) + 1 # class labels
193+
scores = tf.reduce_max(sub_predictions, axis=1) # max_class scores
194+
# Boxes selection: use threshold
195+
filter_mask = scores > self.threshold
196+
classes = tf.boolean_mask(classes, filter_mask)
197+
scores = tf.boolean_mask(scores, filter_mask)
198+
bboxes = tf.boolean_mask(bboxes, filter_mask)
199+
return classes, scores, bboxes
200+
201+
def _bboxes_select(self, predictions, locations):
202+
"""Select all bboxes predictions, only for bacth_size=1"""
203+
anchor_bboxes_list = self.anchors()
204+
classes_list = []
205+
scores_list = []
206+
bboxes_list = []
207+
# select bboxes for each feat layer
208+
for n in range(len(predictions)):
209+
anchor_bboxes = list(map(tf.convert_to_tensor, anchor_bboxes_list[n]))
210+
classes, scores, bboxes = self._bboxes_select_layer(predictions[n],
211+
locations[n], anchor_bboxes, self.ssd_params.prior_scaling)
212+
classes_list.append(classes)
213+
scores_list.append(scores)
214+
bboxes_list.append(bboxes)
215+
# combine all feat layers
216+
classes = tf.concat(classes_list, axis=0)
217+
scores = tf.concat(scores_list, axis=0)
218+
bboxes = tf.concat(bboxes_list, axis=0)
219+
return classes, scores, bboxes
220+
221+
def images(self):
222+
return self._images
223+
224+
def detections(self):
225+
return self._classes, self._scores, self._bboxes
226+
227+
228+
if __name__ == "__main__":
229+
ssd = SSD()
230+
sess = tf.Session()
231+
saver_ = tf.train.Saver()
232+
saver_.restore(sess, "../SSD-Tensorflow-master/ssd_checkpoints/ssd_vgg_300_weights.ckpt")
233+
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
"""
2+
SSD anchors
3+
"""
4+
import math
5+
6+
import numpy as np
7+
8+
def ssd_size_bounds_to_values(size_bounds,
9+
n_feat_layers,
10+
img_shape=(300, 300)):
11+
"""Compute the reference sizes of the anchor boxes from relative bounds.
12+
The absolute values are measured in pixels, based on the network
13+
default size (300 pixels).
14+
15+
This function follows the computation performed in the original
16+
implementation of SSD in Caffe.
17+
18+
Return:
19+
list of list containing the absolute sizes at each scale. For each scale,
20+
the ratios only apply to the first value.
21+
"""
22+
assert img_shape[0] == img_shape[1]
23+
24+
img_size = img_shape[0]
25+
min_ratio = int(size_bounds[0] * 100)
26+
max_ratio = int(size_bounds[1] * 100)
27+
step = int(math.floor((max_ratio - min_ratio) / (n_feat_layers - 2)))
28+
# Start with the following smallest sizes.
29+
sizes = [[img_size * size_bounds[0] / 2, img_size * size_bounds[0]]]
30+
for ratio in range(min_ratio, max_ratio + 1, step):
31+
sizes.append((img_size * ratio / 100.,
32+
img_size * (ratio + step) / 100.))
33+
return sizes
34+
35+
def ssd_anchor_one_layer(img_shape,
36+
feat_shape,
37+
sizes,
38+
ratios,
39+
step,
40+
offset=0.5,
41+
dtype=np.float32):
42+
"""Computer SSD default anchor boxes for one feature layer.
43+
44+
Determine the relative position grid of the centers, and the relative
45+
width and height.
46+
47+
Arguments:
48+
feat_shape: Feature shape, used for computing relative position grids;
49+
size: Absolute reference sizes;
50+
ratios: Ratios to use on these features;
51+
img_shape: Image shape, used for computing height, width relatively to the
52+
former;
53+
offset: Grid offset.
54+
55+
Return:
56+
y, x, h, w: Relative x and y grids, and height and width.
57+
"""
58+
# Compute the position grid: simple way.
59+
# y, x = np.mgrid[0:feat_shape[0], 0:feat_shape[1]]
60+
# y = (y.astype(dtype) + offset) / feat_shape[0]
61+
# x = (x.astype(dtype) + offset) / feat_shape[1]
62+
# Weird SSD-Caffe computation using steps values...
63+
y, x = np.mgrid[0:feat_shape[0], 0:feat_shape[1]]
64+
y = (y.astype(dtype) + offset) * step / img_shape[0]
65+
x = (x.astype(dtype) + offset) * step / img_shape[1]
66+
67+
# Expand dims to support easy broadcasting.
68+
y = np.expand_dims(y, axis=-1) # [size, size, 1]
69+
x = np.expand_dims(x, axis=-1) # [size, size, 1]
70+
71+
# Compute relative height and width.
72+
# Tries to follow the original implementation of SSD for the order.
73+
num_anchors = len(sizes) + len(ratios)
74+
h = np.zeros((num_anchors, ), dtype=dtype) # [n_anchors]
75+
w = np.zeros((num_anchors, ), dtype=dtype) # [n_anchors]
76+
# Add first anchor boxes with ratio=1.
77+
h[0] = sizes[0] / img_shape[0]
78+
w[0] = sizes[0] / img_shape[1]
79+
di = 1
80+
if len(sizes) > 1:
81+
h[1] = math.sqrt(sizes[0] * sizes[1]) / img_shape[0]
82+
w[1] = math.sqrt(sizes[0] * sizes[1]) / img_shape[1]
83+
di += 1
84+
for i, r in enumerate(ratios):
85+
h[i+di] = sizes[0] / img_shape[0] / math.sqrt(r)
86+
w[i+di] = sizes[0] / img_shape[1] * math.sqrt(r)
87+
return y, x, h, w
88+
89+
90+
def ssd_anchors_all_layers(img_shape,
91+
layers_shape,
92+
anchor_sizes,
93+
anchor_ratios,
94+
anchor_steps,
95+
offset=0.5,
96+
dtype=np.float32):
97+
"""Compute anchor boxes for all feature layers.
98+
"""
99+
layers_anchors = []
100+
for i, s in enumerate(layers_shape):
101+
anchor_bboxes = ssd_anchor_one_layer(img_shape, s,
102+
anchor_sizes[i],
103+
anchor_ratios[i],
104+
anchor_steps[i],
105+
offset=offset, dtype=dtype)
106+
layers_anchors.append(anchor_bboxes)
107+
return layers_anchors

0 commit comments

Comments
 (0)