benchmark mask训练自己的COCO数据集
1.COCO 数据集制作
采用VIA标注polygon导出相应的.csv数据。
注意:sgmentation是[[x0,y0,x1,y1,x2,y2,x3,y3,x4,y4]]
遇到的坑:”annotations“字段的"segmentation"是一个二维度的数组(大概是考虑到某个实例由不相连的好几个部分组成)
#!/usr/bin/env python3
import datetime
import json
import os
import re
import fnmatch
from PIL import Image
import numpy as np
import pandas as pd
import cv2
import getArea
#%%
ROOT_DIR = 'train'
IMAGE_DIR = '/media/ices18/Data/sms/shiyou_segment_model/data/JPEGImages'
#ANNOTATION_DIR = os.path.join(ROOT_DIR, "annotations")
INFO = {
"description": "Example Dataset",
"url": "https://github.com/waspinator/pycococreator",
"version": "0.1.0",
"year": 2019,
"contributor": "mingshan",
"date_created": datetime.datetime.utcnow().isoformat(' ')
}
LICENSES = [
{
"id": 1,
"name": "Attribution-NonCommercial-ShareAlike License",
"url": "http://creativecommons.org/licenses/by-nc-sa/2.0/"
}
]
CATEGORIES_NAMES=['car','truck','build','collapse','river','dig','mound','brick','tube','pole']
TRUNCATED_NAMES=['10052.jpg','14191.jpg','14383.jpg','13285.jpg','12048.jpg']
CATEGORIES = [
{
'id': 1,
'name': 'car',
'supercategory': 'vehicle',
},
{
'id': 2,
'name': 'truck',
'supercategory': 'vehicle',
},
{
'id': 3,
'name': 'build',
'supercategory': 'engineer',
},
{
'id': 4,
'name': 'river',
'supercategory': 'disaster',
},
{
'id': 5,
'name': 'collapse',
'supercategory': 'disaster',
},
{
'id': 6,
'name': 'mound',
'supercategory': 'engineer',
},
{
'id': 7,
'name': 'brick',
'supercategory': 'engineer',
},
{
'id': 8,
'name': 'tube',
'supercategory': 'engineer',
},
{
'id': 9,
'name': 'dig',
'supercategory': 'engineer',
},
{
'id': 10,
'name': 'pole',
'supercategory': 'engineer',
}
]
#%%
def filter_for_jpeg(root, files):
file_types = ['*.jpeg', '*.jpg']
file_types = r'|'.join([fnmatch.translate(x) for x in file_types])
files = [os.path.join(root, f) for f in files]
files = [f for f in files if re.match(file_types, f)]
return files
#%%
def get_segmenation(coord_x, coord_y):
seg = []
for x, y in zip(coord_x, coord_y):
seg.append(x)
seg.append(y)
return [seg]
def create_image_info(image_id, file_name, image_size,
date_captured=datetime.datetime.utcnow().isoformat(' '),
license_id=1, coco_url="", flickr_url=""):
image_info = {
"id": image_id,
"file_name": file_name,
"width": image_size[0],
"height": image_size[1],
"date_captured": date_captured,
"license": license_id,
"coco_url": coco_url,
"flickr_url": flickr_url
}
return image_info
#%%
def create_annotation_info(annotation_id, image_id, category_info_id, all_x, all_y,
image_size=None, tolerance=2, bounding_box=None):
segmentation=get_segmenation(all_x, all_y)
bounding_box=[min(all_x),min(all_y),max(all_x)-min(all_x),max(all_y)-min(all_y)]
area = getArea.GetAreaOfPolyGon(all_x, all_y)
annotation_info = {
"id": annotation_id,
"image_id": image_id,
"category_id": category_info_id,
"iscrowd": 0,
"area": area,
"bbox": bounding_box,
"segmentation": segmentation
}
return annotation_info
#%%
def main():
coco_output = {
"info": INFO,
"licenses": LICENSES,
"categories": CATEGORIES,
"images": [],
"annotations": []
}
image_id = 1
segmentation_id = 1
df_re=pd.DataFrame()
csv_path='/media/ices18/Data/sms/shiyou_benchmark/shiyou_seg/csv_train/'
for csv_name in os.listdir(csv_path):
df=pd.read_csv(csv_path+csv_name,header=0)
df_re=pd.concat([df_re,df])
print (df_re)
for img_name, group in df_re.groupby('filename'):
if group.iloc[0]['filename'] in TRUNCATED_NAMES:#跳过损毁图片
continue
if group.iloc[0]['region_count']>0:
image=Image.open(IMAGE_DIR +'/'+ img_name)
image_info = create_image_info(image_id, img_name, image.size)
coco_output["images"].append(image_info)
print (group)
for row in range(0,group.shape[0]):
if group.iloc[row]['region_attributes']=='{}' :
continue
class_name = eval(group.iloc[row]['region_attributes'])['name']
if class_name in CATEGORIES_NAMES:
class_id = [x['id'] for x in CATEGORIES if x['name'] == class_name][0]
if ('all_points_x' in eval(group.iloc[row]['region_shape_attributes']).keys()):
all_x= eval(group.iloc[row]['region_shape_attributes'])['all_points_x']
else:
continue
all_y= eval(group.iloc[row]['region_shape_attributes'])['all_points_y']
annotation_info = create_annotation_info(segmentation_id, image_id, class_id, all_x, all_y,image.size, tolerance=2)
else:
continue
if annotation_info is not None:
coco_output["annotations"].append(annotation_info)
segmentation_id = segmentation_id + 1
image_id = image_id + 1
with open('/media/ices18/Data/sms/shiyou_benchmark/shiyou_seg/instances_train2017.json','w') as output_json_file:
json.dump(coco_output, output_json_file)
if __name__ == "__main__":
main()
# -*- coding: cp936 -*-
import math
class Point():
def __init__(self,x,y):
self.x = x
self.y = y
def GetAreaOfPolyGon(points_x, points_y):
points = []
for index in range(len(points_x)):
points.append(Point(points_x[index], points_y[index]))
area = 0
if(len(points)<3):
raise Exception("error")
p1 = points[0]
for i in range(1,len(points)-1):
p2 = points[1]
p3 = points[2]
vecp1p2 = Point(p2.x-p1.x,p2.y-p1.y)
vecp2p3 = Point(p3.x-p2.x,p3.y-p2.y)
vecMult = vecp1p2.x*vecp2p3.y - vecp1p2.y*vecp2p3.x
sign = 0
if(vecMult>0):
sign = 1
elif(vecMult<0):
sign = -1
triArea = GetAreaOfTriangle(p1,p2,p3)*sign
area += triArea
return abs(area)
def GetAreaOfTriangle(p1,p2,p3):
area = 0
p1p2 = GetLineLength(p1,p2)
p2p3 = GetLineLength(p2,p3)
p3p1 = GetLineLength(p3,p1)
s = (p1p2 + p2p3 + p3p1)/2
area = s*(s-p1p2)*(s-p2p3)*(s-p3p1)
area = math.sqrt(area)
return area
def GetLineLength(p1,p2):
length = math.pow((p1.x-p2.x),2) + math.pow((p1.y-p2.y),2)
length = math.sqrt(length)
return length
2.COCO数据集可视化
from pycocotools.coco import COCO
import numpy as np
import skimage.io as io
import matplotlib.pyplot as plt
import pylab
from PIL import Image
from scipy import misc
pylab.rcParams['figure.figsize'] = (10.0, 8.0)
annFile='/media/ices18/Data/sms/shiyou_segment_model/maskrcnn-benchmark/datasets/coco/annotations/instances_train2017.json'
# initialize COCO api for instance annotations
coco=COCO(annFile)
# display COCO categories and supercategories
cats = coco.loadCats(coco.getCatIds())
nms=[cat['name'] for cat in cats]
#print 'COCO categories: \n\n', ' '.join(nms)
nms = set([cat['supercategory'] for cat in cats])
#print 'COCO supercategories: \n', ' '.join(nms)
# get all images containing given categories, select one at random
catIds = coco.getCatIds(catNms=['truck','car','build'])
imgIds = coco.getImgIds(catIds=catIds )
img = coco.loadImgs(imgIds[np.random.randint(0,len(imgIds))])[0]
# load and display image
I = io.imread('/media/ices18/Data/sms/shiyou_segment_model/data/JPEGImages/%s'%(img['file_name']))
plt.figure(); plt.axis('off')
I=np.delete(I, [1], axis=0)
Img=[]
for i in range(len(I[0])):
Img.append(np.array(I[0][i]))
Img=np.array(Img)
#plt.imshow(Img)
#plt.show()
# load and display instance annotations
plt.imshow(Img); plt.axis('off')
annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
anns = coco.loadAnns(annIds)
print(len(anns))
coco.showAnns(anns)
plt.show()
3.模型准备
直接使用官方po的docker file创建容器,此处一定务必要用pytorch 1.0.0 nightly(用pytorch1.0.1的时候即使编译通过train时也会报错)
defaults.py修改_C.DATALOADER.NUM_WORKERS=0(因为在docker里面训练,资源有限)
import os
from yacs.config import CfgNode as CN
_C = CN()
_C.MODEL = CN()
_C.MODEL.RPN_ONLY = False
_C.MODEL.MASK_ON = False
_C.MODEL.DEVICE = "cuda"
_C.MODEL.META_ARCHITECTURE = "GeneralizedRCNN"
_C.MODEL.WEIGHT = ""
_C.INPUT = CN()
_C.INPUT.MIN_SIZE_TRAIN = 800 #训练集图片最小尺寸
_C.INPUT.MAX_SIZE_TRAIN = 1333 #训练集图片最大尺寸
_C.INPUT.MIN_SIZE_TEST = 800
_C.INPUT.MAX_SIZE_TEST = 1333
_C.INPUT.PIXEL_MEAN = [102.9801, 115.9465, 122.7717]
_C.INPUT.PIXEL_STD = [1., 1., 1.]
_C.INPUT.TO_BGR255 = True
_C.DATASETS = CN()
_C.DATASETS.TRAIN = () #在模型配置文件中已给出
_C.DATASETS.TEST = ()
_C.DATALOADER = CN()
_C.DATALOADER.NUM_WORKERS = 4 #数据生成启线程数
_C.DATALOADER.SIZE_DIVISIBILITY = 0
_C.DATALOADER.ASPECT_RATIO_GROUPING = True
_C.MODEL.BACKBONE = CN()
_C.MODEL.BACKBONE.CONV_BODY = "R-50-C4"
_C.MODEL.BACKBONE.FREEZE_CONV_BODY_AT = 2
_C.MODEL.BACKBONE.OUT_CHANNELS = 256 * 4
_C.MODEL.RPN = CN()
_C.MODEL.RPN.USE_FPN = False
_C.MODEL.RPN.ANCHOR_SIZES = (32, 64, 128, 256, 512)
_C.MODEL.RPN.ANCHOR_STRIDE = (16,)
_C.MODEL.RPN.ASPECT_RATIOS = (0.5, 1.0, 2.0)
_C.MODEL.RPN.STRADDLE_THRESH = 0
_C.MODEL.RPN.FG_IOU_THRESHOLD = 0.7
_C.MODEL.RPN.BG_IOU_THRESHOLD = 0.3
_C.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 256
_C.MODEL.RPN.POSITIVE_FRACTION = 0.5
_C.MODEL.RPN.PRE_NMS_TOP_N_TRAIN = 12000
_C.MODEL.RPN.PRE_NMS_TOP_N_TEST = 6000
_C.MODEL.RPN.POST_NMS_TOP_N_TRAIN = 2000
_C.MODEL.RPN.POST_NMS_TOP_N_TEST = 1000
_C.MODEL.RPN.NMS_THRESH = 0.7
_C.MODEL.RPN.MIN_SIZE = 0
_C.MODEL.RPN.FPN_POST_NMS_TOP_N_TRAIN = 2000
_C.MODEL.RPN.FPN_POST_NMS_TOP_N_TEST = 2000
_C.MODEL.ROI_HEADS = CN()
_C.MODEL.ROI_HEADS.USE_FPN = False
_C.MODEL.ROI_HEADS.FG_IOU_THRESHOLD = 0.5
_C.MODEL.ROI_HEADS.BG_IOU_THRESHOLD = 0.5
_C.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS = (10., 10., 5., 5.)
_C.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
_C.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.25
_C.MODEL.ROI_HEADS.SCORE_THRESH = 0.05
_C.MODEL.ROI_HEADS.NMS = 0.5
_C.MODEL.ROI_HEADS.DETECTIONS_PER_IMG = 100
_C.MODEL.ROI_BOX_HEAD = CN()
_C.MODEL.ROI_BOX_HEAD.FEATURE_EXTRACTOR = "ResNet50Conv5ROIFeatureExtractor"
_C.MODEL.ROI_BOX_HEAD.PREDICTOR = "FastRCNNPredictor"
_C.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION = 14
_C.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO = 0
_C.MODEL.ROI_BOX_HEAD.POOLER_SCALES = (1.0 / 16,)
#数据集类别数,默认是81,因为coco数据集为80+1(背景),我的数据集只有4个类别,加上背景也就是5个类别
_C.MODEL.ROI_BOX_HEAD.NUM_CLASSES = 5
_C.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM = 1024
_C.MODEL.ROI_MASK_HEAD = CN()
_C.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR = "ResNet50Conv5ROIFeatureExtractor"
_C.MODEL.ROI_MASK_HEAD.PREDICTOR = "MaskRCNNC4Predictor"
_C.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION = 14
_C.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO = 0
_C.MODEL.ROI_MASK_HEAD.POOLER_SCALES = (1.0 / 16,)
_C.MODEL.ROI_MASK_HEAD.MLP_HEAD_DIM = 1024
_C.MODEL.ROI_MASK_HEAD.CONV_LAYERS = (256, 256, 256, 256)
_C.MODEL.ROI_MASK_HEAD.RESOLUTION = 14
_C.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR = True
_C.MODEL.RESNETS = CN()
_C.MODEL.RESNETS.NUM_GROUPS = 1
_C.MODEL.RESNETS.WIDTH_PER_GROUP = 64
_C.MODEL.RESNETS.STRIDE_IN_1X1 = True
_C.MODEL.RESNETS.TRANS_FUNC = "BottleneckWithFixedBatchNorm"
_C.MODEL.RESNETS.STEM_FUNC = "StemWithFixedBatchNorm"
_C.MODEL.RESNETS.RES5_DILATION = 1
_C.MODEL.RESNETS.RES2_OUT_CHANNELS = 256
_C.MODEL.RESNETS.STEM_OUT_CHANNELS = 64
_C.SOLVER = CN()
_C.SOLVER.MAX_ITER = 40000 #最大迭代次数
_C.SOLVER.BASE_LR = 0.02 #初始学习率,这个通常在模型配置文件中有设置
_C.SOLVER.BIAS_LR_FACTOR = 2
_C.SOLVER.MOMENTUM = 0.9
_C.SOLVER.WEIGHT_DECAY = 0.0005
_C.SOLVER.WEIGHT_DECAY_BIAS = 0
_C.SOLVER.GAMMA = 0.1
_C.SOLVER.STEPS = (30000,)
_C.SOLVER.WARMUP_FACTOR = 1.0 / 3
_C.SOLVER.WARMUP_ITERS = 500 #预热迭代次数,预热迭代次数内(小于訪值)的学习率比较低
_C.SOLVER.WARMUP_METHOD = "constant" #预热策略,有'constant'和'linear'两种
_C.SOLVER.CHECKPOINT_PERIOD = 2000 #生成检查点(checkpoint)的步长
_C.SOLVER.IMS_PER_BATCH = 1 #一个batch包含的图片数量
_C.TEST = CN()
_C.TEST.EXPECTED_RESULTS = []
_C.TEST.EXPECTED_RESULTS_SIGMA_TOL = 4
_C.TEST.IMS_PER_BATCH = 1
_C.OUTPUT_DIR = "output" #主要作为checkpoint和inference的输出目录
_C.PATHS_CATALOG = os.path.join(os.path.dirname(__file__), "paths_catalog.py")
path_catalog:修改数据路径
模型配置文件在启动训练时由–config-file参数指定,在config子目录下默认提供了mask_rcnn和faster_rcnn框架不同骨干网的基于YAML格式的配置文件。选用的e2e_mask_rcnn_R_101_FPN_1x.yaml,也就是使用mask_rcnn检测模型,骨干网使用ResNet101-FPN,配置详情如下(根据自己的数据集作相应的调整):
MODEL:
META_ARCHITECTURE: "GeneralizedRCNN"
WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
BACKBONE:
CONV_BODY: "R-101-FPN"
OUT_CHANNELS: 256
RPN:
USE_FPN: True #是否使用FPN,也就是特征金字塔结构,选择True将在不同的特征图提取候选区域
ANCHOR_STRIDE: (4, 8, 16, 32, 64) #ANCHOR的步长
PRE_NMS_TOP_N_TRAIN: 2000 #训练时,NMS之前的候选区数量
PRE_NMS_TOP_N_TEST: 1000 #测试时,NMS之后的候选区数量
POST_NMS_TOP_N_TEST: 1000
FPN_POST_NMS_TOP_N_TEST: 1000
ROI_HEADS:
USE_FPN: True
ROI_BOX_HEAD:
POOLER_RESOLUTION: 7
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
POOLER_SAMPLING_RATIO: 2
FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
PREDICTOR: "FPNPredictor"
ROI_MASK_HEAD:
POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
PREDICTOR: "MaskRCNNC4Predictor"
POOLER_RESOLUTION: 14
POOLER_SAMPLING_RATIO: 2
RESOLUTION: 28
SHARE_BOX_FEATURE_EXTRACTOR: False
MASK_ON: False #默认是True,我这里改为False,因为我没有用到语义分割的功能
DATASETS:
TRAIN: ("coco_2014_train",) #注意这里的训练集和测试集的名字,
TEST: ("coco_2014_val",) #它们和paths_catalog.py中DATASETS相对应
DATALOADER:
SIZE_DIVISIBILITY: 32
SOLVER:
BASE_LR: 0.01 #起始学习率,学习率的调整有多种策略,訪框架自定义了一种策略
WEIGHT_DECAY: 0.0001
#这是什么意思呢?是为了在不同的迭代区间进行学习率的调整而设定的.以我的数据集为例,
#我149898张图,计划是每4个epoch衰减一次,所以如下设置.
STEPS: (599592, 1199184)
MAX_ITER: 1300000 #最大迭代次数
4.开始训练
python tools/train_net.py --config-file "configs/e2e_mask_rcnn_R_101_FPN_1x.yaml" SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025 SOLVER.MAX_ITER 720000 SOLVER.STEPS "(480000, 640000)" TEST.IMS_PER_BATCH 1