pytorch yolov3代码理解(1)
代码链接:https://github.com/BobLiu20/YOLOv3_PyTorch
1.解读nets部分
1.1 backbone:Darknet53
yolov3网络结构展示:
darknet.py
这段代码是我们的分类网络。
import torch
import torch.nn as nn
import math
from collections import OrderedDict
__all__ = ['darknet21', 'darknet53']
class BasicBlock(nn.Module):
'''
可以理解为每个residual模块中的前两层。分别是1x1和3x3卷积核。
planes为卷积核个数。
'''
def __init__(self, inplanes, planes):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes[0], kernel_size=1,
stride=1, padding=0, bias=False)
self.bn1 = nn.BatchNorm2d(planes[0])
self.relu1 = nn.LeakyReLU(0.1)
self.conv2 = nn.Conv2d(planes[0], planes[1], kernel_size=3,
stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes[1])
self.relu2 = nn.LeakyReLU(0.1)
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu1(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu2(out)
out += residual
return out
class DarkNet(nn.Module):
'''
layer[i]为5个residual模块。
isinstance()用来判断对象是不是已知的类型。
'''
def __init__(self, layers):
super(DarkNet, self).__init__()
self.inplanes = 32
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(self.inplanes)
self.relu1 = nn.LeakyReLU(0.1)
self.layer1 = self._make_layer([32, 64], layers[0])
self.layer2 = self._make_layer([64, 128], layers[1])
self.layer3 = self._make_layer([128, 256], layers[2])
self.layer4 = self._make_layer([256, 512], layers[3])
self.layer5 = self._make_layer([512, 1024], layers[4])
self.layers_out_filters = [64, 128, 256, 512, 1024]
'''
for函数体中实现了什么?没有看懂。
'''
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_layer(self, planes, blocks):
'''
先将需要的每一个block搭建起来,block个数在后续的Darknet函数中加入。
'''
layers = []
# downsample
layers.append(("ds_conv", nn.Conv2d(self.inplanes, planes[1], kernel_size=3,
stride=2, padding=1, bias=False)))
layers.append(("ds_bn", nn.BatchNorm2d(planes[1])))
layers.append(("ds_relu", nn.LeakyReLU(0.1)))
# blocks
self.inplanes = planes[1]
for i in range(0, blocks):
layers.append(("residual_{}".format(i), BasicBlock(self.inplanes, planes)))
return nn.Sequential(OrderedDict(layers))
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu1(x)
x = self.layer1(x)
x = self.layer2(x)
out3 = self.layer3(x)
out4 = self.layer4(out3)
out5 = self.layer5(out4)
return out3, out4, out5
def darknet21(pretrained, **kwargs):
"""Constructs a darknet-21 model.
"""
model = DarkNet([1, 1, 2, 2, 1])
if pretrained:
if isinstance(pretrained, str):
model.load_state_dict(torch.load(pretrained))
else:
raise Exception("darknet request a pretrained path. got [{}]".format(pretrained))
return model
def darknet53(pretrained, **kwargs):
"""Constructs a darknet-53 model.
"""
model = DarkNet([1, 2, 8, 8, 4])
if pretrained:
if isinstance(pretrained, str):
model.load_state_dict(torch.load(pretrained))
else:
raise Exception("darknet request a pretrained path. got [{}]".format(pretrained))
return model
1.2 modelmain.py
这段代码接在分类网络后,即从上述网络结构的75层开始,是我们的yolo3检测网络部分。
import torch
import torch.nn as nn
from collections import OrderedDict
from .backbone import backbone_fn
class ModelMain(nn.Module):
'''
网络的74层输出13x13x1024。_out_filters为[64, 128, 256, 512, 1024]。
这段代码对应上述yolov3网络结构的图片,为了自己看的方便画的一个图。
'''
def __init__(self, config, is_training=True):
super(ModelMain, self).__init__()
self.config = config
self.training = is_training
self.model_params = config["model_params"]
# backbone
_backbone_fn = backbone_fn[self.model_params["backbone_name"]]
self.backbone = _backbone_fn(self.model_params["backbone_pretrained"])
_out_filters = self.backbone.layers_out_filters
# embedding0
final_out_filter0 = len(config["yolo"]["anchors"][0]) * (5 + config["yolo"]["classes"])
self.embedding0 = self._make_embedding([512, 1024], _out_filters[-1], final_out_filter0)
# embedding1
final_out_filter1 = len(config["yolo"]["anchors"][1]) * (5 + config["yolo"]["classes"])
self.embedding1_cbl = self._make_cbl(512, 256, 1)
self.embedding1_upsample = nn.Upsample(scale_factor=2, mode='nearest')
self.embedding1 = self._make_embedding([256, 512], _out_filters[-2] + 256, final_out_filter1)
# embedding2
final_out_filter2 = len(config["yolo"]["anchors"][2]) * (5 + config["yolo"]["classes"])
self.embedding2_cbl = self._make_cbl(256, 128, 1)
self.embedding2_upsample = nn.Upsample(scale_factor=2, mode='nearest')
self.embedding2 = self._make_embedding([128, 256], _out_filters[-3] + 128, final_out_filter2)
def _make_cbl(self, _in, _out, ks):
''' cbl = conv + batch_norm + leaky_relu
'''
pad = (ks - 1) // 2 if ks else 0
return nn.Sequential(OrderedDict([
("conv", nn.Conv2d(_in, _out, kernel_size=ks, stride=1, padding=pad, bias=False)),
("bn", nn.BatchNorm2d(_out)),
("relu", nn.LeakyReLU(0.1)),
]))
def _make_embedding(self, filters_list, in_filters, out_filter):
'''
该函数实现了convolution set部分加上后续的3x3和1x1。
'''
m = nn.ModuleList([
self._make_cbl(in_filters, filters_list[0], 1),
self._make_cbl(filters_list[0], filters_list[1], 3),
self._make_cbl(filters_list[1], filters_list[0], 1),
self._make_cbl(filters_list[0], filters_list[1], 3),
self._make_cbl(filters_list[1], filters_list[0], 1),
self._make_cbl(filters_list[0], filters_list[1], 3)])
m.add_module("conv_out", nn.Conv2d(filters_list[1], out_filter, kernel_size=1,
stride=1, padding=0, bias=True))
return m
def forward(self, x):
def _branch(_embedding, _in):
for i, e in enumerate(_embedding):
_in = e(_in)
if i == 4:
out_branch = _in
return _in, out_branch
# backbone
x2, x1, x0 = self.backbone(x)
# yolo branch 0
out0, out0_branch = _branch(self.embedding0, x0)
# yolo branch 1
x1_in = self.embedding1_cbl(out0_branch)
x1_in = self.embedding1_upsample(x1_in)
x1_in = torch.cat([x1_in, x1], 1)
out1, out1_branch = _branch(self.embedding1, x1_in)
# yolo branch 2
x2_in = self.embedding2_cbl(out1_branch)
x2_in = self.embedding2_upsample(x2_in)
x2_in = torch.cat([x2_in, x2], 1)
out2, out2_branch = _branch(self.embedding2, x2_in)
return out0, out1, out2
def load_darknet_weights(self, weights_path):
import numpy as np
#Open the weights file
fp = open(weights_path, "rb")
header = np.fromfile(fp, dtype=np.int32, count=5) # First five are header values
# Needed to write header when saving weights
weights = np.fromfile(fp, dtype=np.float32) # The rest are weights
print ("total len weights = ", weights.shape)
fp.close()
ptr = 0
all_dict = self.state_dict()
all_keys = self.state_dict().keys()
print (all_keys)
last_bn_weight = None
last_conv = None
for i, (k, v) in enumerate(all_dict.items()):
if 'bn' in k:
if 'weight' in k:
last_bn_weight = v
elif 'bias' in k:
num_b = v.numel()
vv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(v)
v.copy_(vv)
print ("bn_bias: ", ptr, num_b, k)
ptr += num_b
# weight
v = last_bn_weight
num_b = v.numel()
vv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(v)
v.copy_(vv)
print ("bn_weight: ", ptr, num_b, k)
ptr += num_b
last_bn_weight = None
elif 'running_mean' in k:
num_b = v.numel()
vv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(v)
v.copy_(vv)
print ("bn_mean: ", ptr, num_b, k)
ptr += num_b
elif 'running_var' in k:
num_b = v.numel()
vv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(v)
v.copy_(vv)
print ("bn_var: ", ptr, num_b, k)
ptr += num_b
# conv
v = last_conv
num_b = v.numel()
vv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(v)
v.copy_(vv)
print ("conv wight: ", ptr, num_b, k)
ptr += num_b
last_conv = None
else:
raise Exception("Error for bn")
elif 'conv' in k:
if 'weight' in k:
last_conv = v
else:
num_b = v.numel()
vv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(v)
v.copy_(vv)
print ("conv bias: ", ptr, num_b, k)
ptr += num_b
# conv
v = last_conv
num_b = v.numel()
vv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(v)
v.copy_(vv)
print ("conv wight: ", ptr, num_b, k)
ptr += num_b
last_conv = None
print("Total ptr = ", ptr)
print("real size = ", weights.shape)
if __name__ == "__main__":
config = {"model_params": {"backbone_name": "darknet_53"}}
m = ModelMain(config)
x = torch.randn(1, 3, 416, 416)
y0, y1, y2 = m(x)
print(y0.size())
print(y1.size())
print(y2.size())