ssd检测不到怎么办 ssd检测( 三 )


现在,让我们用以下代码对其进行总结 。
def create_prior_boxes(self):
"""
Create the 8732 prior (default) boxes for the SSD300, as defined in the paper.
:return: prior boxes in center-size coordinates, a tensor of dimensions (8732, 4)
"""
fmap_dims = {'conv4_3': 38,
'conv7': 19,
'conv8_2': 10,
'conv9_2': 5,
'conv10_2': 3,
'conv11_2': 1}
obj_scales = {'conv4_3': 0.1,
'conv7': 0.2,
'conv8_2': 0.375,
'conv9_2': 0.55,
'conv10_2': 0.725,
'conv11_2': 0.9}
"""
Note that we were considering four boxes in certain layers and six boxes in another layer. Now, if we want to
have four boxes, we remove the {3, 1/3} aspect ratios, else we consider all of the six possible boxes
"""
aspect_ratios = {'conv4_3': [1., 2., 0.5],
'conv7': [1., 2., 3., 0.5, .333],
'conv8_2': [1., 2., 3., 0.5, .333],
'conv9_2': [1., 2., 3., 0.5, .333],
'conv10_2': [1., 2., 0.5],
'conv11_2': [1., 2., 0.5]}
fmaps = list(fmap_dims.keys())
prior_boxes = []
self.prior_boxes_info = []
for k, fmap in enumerate(fmaps):
for i in range(fmap_dims[fmap]):
for j in range(fmap_dims[fmap]):
cx = (j + 0.5) / fmap_dims[fmap]
cy = (i + 0.5) / fmap_dims[fmap]
for ratio in aspect_ratios[fmap]:
prior_boxes.append([cx, cy, obj_scales[fmap] * sqrt(ratio), obj_scales[fmap] / sqrt(ratio)])
self.prior_boxes_info.append([fmap, i, j, ratio])
# For an aspect ratio of 1, use an additional prior whose scale is the geometric mean of the
# scale of the current feature map and the scale of the next feature map
if ratio == 1.:
try:
additional_scale = sqrt(obj_scales[fmap] * obj_scales[fmaps[k + 1]])
# For the last feature map, there is no "next" feature map
except IndexError:
additional_scale = 1.
prior_boxes.append([cx, cy, additional_scale, additional_scale])
self.prior_boxes_info.append([fmap, i, j, ratio])
prior_boxes = torch.FloatTensor(prior_boxes).to(self.device) # (8732, 4)
prior_boxes.clamp_(0, 1) # (8732, 4)
return prior_boxes
它为 SSD 做出的 8732 个预测返回 8732 个先验框 。
预测卷积class PredictionConvolutions(nn.Module):
"""
Convolutions to predict class scores and bounding boxes using lower and higher-level feature maps.
The bounding boxes are predicted as encoded offsets w.r.t each of the 8732 anchor boxes.
See 'cxcy_to_gcxgcy' in utils.py for the encoding definition.
The class scores represent the scores of each object class in each of the 8732 bounding boxes located.
A high score for 'background' = no object.
"""
def __init__(self, n_classes):
"""
:param n_classes: number of different types of objects
"""
super(PredictionConvolutions, self).__init__()
self.n_classes = n_classes
# Number of prior-boxes we are considering per position in each feature map
n_boxes = {'conv4_3': 4,
'conv7': 6,
'conv8_2': 6,
'conv9_2': 6,
'conv10_2': 4,
'conv11_2': 4}
# 4 prior-boxes implies we use 4 different aspect ratios, etc.
# Localization prediction convolutions (predict offsets w.r.t prior-boxes)
self.loc_conv4_3 = nn.Conv2d(512, n_boxes['conv4_3'] * 4, kernel_size=3, padding=1)
self.loc_conv7 = nn.Conv2d(1024, n_boxes['conv7'] * 4, kernel_size=3, padding=1)
self.loc_conv8_2 = nn.Conv2d(512, n_boxes['conv8_2'] * 4, kernel_size=3, padding=1)

推荐阅读