benchmark mask训练自己的COCO数据集

1.COCO 数据集制作
采用VIA标注polygon导出相应的.csv数据。
注意:sgmentation是[[x0,y0,x1,y1,x2,y2,x3,y3,x4,y4]]
遇到的坑:”annotations“字段的"segmentation"是一个二维度的数组(大概是考虑到某个实例由不相连的好几个部分组成)

#!/usr/bin/env python3
import datetime
import json
import os
import re
import fnmatch
from PIL import Image
import numpy as np
import pandas as pd
import cv2
import getArea

#%%
ROOT_DIR = 'train'
IMAGE_DIR = '/media/ices18/Data/sms/shiyou_segment_model/data/JPEGImages'
#ANNOTATION_DIR = os.path.join(ROOT_DIR, "annotations")

INFO = {
    "description": "Example Dataset",
    "url": "https://github.com/waspinator/pycococreator",
    "version": "0.1.0",
    "year": 2019,
    "contributor": "mingshan",
    "date_created": datetime.datetime.utcnow().isoformat(' ')
}

LICENSES = [
    {
        "id": 1,
        "name": "Attribution-NonCommercial-ShareAlike License",
        "url": "http://creativecommons.org/licenses/by-nc-sa/2.0/"
    }
]
CATEGORIES_NAMES=['car','truck','build','collapse','river','dig','mound','brick','tube','pole']
TRUNCATED_NAMES=['10052.jpg','14191.jpg','14383.jpg','13285.jpg','12048.jpg']
CATEGORIES = [
    {
        'id': 1,
        'name': 'car',
        'supercategory': 'vehicle',
    },
    {
        'id': 2,
        'name': 'truck',
        'supercategory': 'vehicle',
    },
    {
        'id': 3,
        'name': 'build',
        'supercategory': 'engineer',
    },
    {
        'id': 4,
        'name': 'river',
        'supercategory': 'disaster',
    },
    {
        'id': 5,
        'name': 'collapse',
        'supercategory': 'disaster',
    },
    {
        'id': 6,
        'name': 'mound',
        'supercategory': 'engineer',
    },
    {
        'id': 7,
        'name': 'brick',
        'supercategory': 'engineer',
    },
    {
        'id': 8,
        'name': 'tube',
        'supercategory': 'engineer',
    },
    {
        'id': 9,
        'name': 'dig',
        'supercategory': 'engineer',
    },
    {
        'id': 10,
        'name': 'pole',
        'supercategory': 'engineer',
    }
]
#%%
def filter_for_jpeg(root, files):
    file_types = ['*.jpeg', '*.jpg']
    file_types = r'|'.join([fnmatch.translate(x) for x in file_types])
    files = [os.path.join(root, f) for f in files]
    files = [f for f in files if re.match(file_types, f)]   
    return files
#%% 
def get_segmenation(coord_x, coord_y):
    seg = []
    for x, y in zip(coord_x, coord_y):
        seg.append(x)
        seg.append(y)
    return [seg]

def create_image_info(image_id, file_name, image_size, 
                      date_captured=datetime.datetime.utcnow().isoformat(' '),
                      license_id=1, coco_url="", flickr_url=""):

    image_info = {
            "id": image_id,
            "file_name": file_name,
            "width": image_size[0],
            "height": image_size[1],
            "date_captured": date_captured,
            "license": license_id,
            "coco_url": coco_url,
            "flickr_url": flickr_url
    }
    return image_info
#%%
def create_annotation_info(annotation_id, image_id, category_info_id, all_x, all_y, 
                           image_size=None, tolerance=2, bounding_box=None):

    segmentation=get_segmenation(all_x, all_y)
    bounding_box=[min(all_x),min(all_y),max(all_x)-min(all_x),max(all_y)-min(all_y)]
    area = getArea.GetAreaOfPolyGon(all_x, all_y)
    annotation_info = {
        "id": annotation_id,
        "image_id": image_id,
        "category_id": category_info_id,
        "iscrowd": 0,
        "area": area,
        "bbox": bounding_box,
        "segmentation": segmentation
    } 
    return annotation_info
#%%      
def main():
    coco_output = {
        "info": INFO,
        "licenses": LICENSES,
        "categories": CATEGORIES,
        "images": [],
        "annotations": []
    }

    image_id = 1
    segmentation_id = 1

    df_re=pd.DataFrame()
    csv_path='/media/ices18/Data/sms/shiyou_benchmark/shiyou_seg/csv_train/'
    for csv_name in os.listdir(csv_path):
        df=pd.read_csv(csv_path+csv_name,header=0)
        df_re=pd.concat([df_re,df])
    print (df_re)

    for img_name, group in df_re.groupby('filename'):
        if group.iloc[0]['filename'] in TRUNCATED_NAMES:#跳过损毁图片
            continue
        
        if group.iloc[0]['region_count']>0:
            image=Image.open(IMAGE_DIR +'/'+ img_name)
            image_info = create_image_info(image_id, img_name, image.size)
            coco_output["images"].append(image_info)

            print (group)

            for row in range(0,group.shape[0]):
                if group.iloc[row]['region_attributes']=='{}' :
                    continue
                class_name = eval(group.iloc[row]['region_attributes'])['name']
                if class_name in CATEGORIES_NAMES:
                    class_id = [x['id'] for x in CATEGORIES if x['name'] == class_name][0]
                    if ('all_points_x' in eval(group.iloc[row]['region_shape_attributes']).keys()):                        
                        all_x= eval(group.iloc[row]['region_shape_attributes'])['all_points_x']
                    else:
                        continue
                    all_y= eval(group.iloc[row]['region_shape_attributes'])['all_points_y']
                    annotation_info = create_annotation_info(segmentation_id, image_id, class_id, all_x, all_y,image.size, tolerance=2)     
                else: 
                    continue
                if annotation_info is not None:
                    coco_output["annotations"].append(annotation_info)
                    segmentation_id = segmentation_id + 1                
            image_id = image_id + 1

    with open('/media/ices18/Data/sms/shiyou_benchmark/shiyou_seg/instances_train2017.json','w') as output_json_file:
        json.dump(coco_output, output_json_file)
        
if __name__ == "__main__":
    main()

getArea.py

# -*- coding: cp936 -*-
import math

class Point():
    def __init__(self,x,y):
        self.x = x
        self.y = y

def GetAreaOfPolyGon(points_x, points_y):
    points = []
    for index in range(len(points_x)):
        points.append(Point(points_x[index], points_y[index]))
    area = 0
    if(len(points)<3):
        
         raise Exception("error")

    p1 = points[0]
    for i in range(1,len(points)-1):
        p2 = points[1]
        p3 = points[2]

        vecp1p2 = Point(p2.x-p1.x,p2.y-p1.y)
        vecp2p3 = Point(p3.x-p2.x,p3.y-p2.y)

        vecMult = vecp1p2.x*vecp2p3.y - vecp1p2.y*vecp2p3.x   
        sign = 0
        if(vecMult>0):
            sign = 1
        elif(vecMult<0):
            sign = -1

        triArea = GetAreaOfTriangle(p1,p2,p3)*sign
        area += triArea
    return abs(area)

def GetAreaOfTriangle(p1,p2,p3):   
    area = 0
    p1p2 = GetLineLength(p1,p2)
    p2p3 = GetLineLength(p2,p3)
    p3p1 = GetLineLength(p3,p1)
    s = (p1p2 + p2p3 + p3p1)/2
    area = s*(s-p1p2)*(s-p2p3)*(s-p3p1)   
    area = math.sqrt(area)
    return area

def GetLineLength(p1,p2):
    length = math.pow((p1.x-p2.x),2) + math.pow((p1.y-p2.y),2) 
    length = math.sqrt(length)   
    return length    

2.COCO数据集可视化

from pycocotools.coco import COCO
import numpy as np
import skimage.io as io
import matplotlib.pyplot as plt
import pylab
from PIL import Image
from scipy import misc

pylab.rcParams['figure.figsize'] = (10.0, 8.0)

annFile='/media/ices18/Data/sms/shiyou_segment_model/maskrcnn-benchmark/datasets/coco/annotations/instances_train2017.json'
# initialize COCO api for instance annotations
coco=COCO(annFile)
# display COCO categories and supercategories
cats = coco.loadCats(coco.getCatIds())
nms=[cat['name'] for cat in cats]
#print 'COCO categories: \n\n', ' '.join(nms)

nms = set([cat['supercategory'] for cat in cats])
#print 'COCO supercategories: \n', ' '.join(nms)

# get all images containing given categories, select one at random
catIds = coco.getCatIds(catNms=['truck','car','build'])
imgIds = coco.getImgIds(catIds=catIds )
img = coco.loadImgs(imgIds[np.random.randint(0,len(imgIds))])[0]

# load and display image
I = io.imread('/media/ices18/Data/sms/shiyou_segment_model/data/JPEGImages/%s'%(img['file_name']))
plt.figure(); plt.axis('off')
I=np.delete(I, [1], axis=0) 
Img=[]
for i in range(len(I[0])):
    Img.append(np.array(I[0][i]))
Img=np.array(Img)
#plt.imshow(Img)
#plt.show()

# load and display instance annotations
plt.imshow(Img); plt.axis('off')
annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
anns = coco.loadAnns(annIds)
print(len(anns))
coco.showAnns(anns)
plt.show()

benchmark mask训练自己的COCO数据集3.模型准备
直接使用官方po的docker file创建容器,此处一定务必要用pytorch 1.0.0 nightly(用pytorch1.0.1的时候即使编译通过train时也会报错)
defaults.py修改_C.DATALOADER.NUM_WORKERS=0(因为在docker里面训练,资源有限)

import os
from yacs.config import CfgNode as CN
_C = CN()
_C.MODEL = CN()
_C.MODEL.RPN_ONLY = False
_C.MODEL.MASK_ON = False
_C.MODEL.DEVICE = "cuda" 
_C.MODEL.META_ARCHITECTURE = "GeneralizedRCNN"
_C.MODEL.WEIGHT = ""
_C.INPUT = CN()
_C.INPUT.MIN_SIZE_TRAIN = 800  #训练集图片最小尺寸
_C.INPUT.MAX_SIZE_TRAIN = 1333 #训练集图片最大尺寸
_C.INPUT.MIN_SIZE_TEST = 800
_C.INPUT.MAX_SIZE_TEST = 1333
_C.INPUT.PIXEL_MEAN = [102.9801, 115.9465, 122.7717]
_C.INPUT.PIXEL_STD = [1., 1., 1.]
_C.INPUT.TO_BGR255 = True
_C.DATASETS = CN()
_C.DATASETS.TRAIN = () #在模型配置文件中已给出
_C.DATASETS.TEST = ()
_C.DATALOADER = CN()
_C.DATALOADER.NUM_WORKERS = 4 #数据生成启线程数
_C.DATALOADER.SIZE_DIVISIBILITY = 0
_C.DATALOADER.ASPECT_RATIO_GROUPING = True
_C.MODEL.BACKBONE = CN()
_C.MODEL.BACKBONE.CONV_BODY = "R-50-C4"
_C.MODEL.BACKBONE.FREEZE_CONV_BODY_AT = 2
_C.MODEL.BACKBONE.OUT_CHANNELS = 256 * 4
_C.MODEL.RPN = CN()
_C.MODEL.RPN.USE_FPN = False
_C.MODEL.RPN.ANCHOR_SIZES = (32, 64, 128, 256, 512)
_C.MODEL.RPN.ANCHOR_STRIDE = (16,)
_C.MODEL.RPN.ASPECT_RATIOS = (0.5, 1.0, 2.0)
_C.MODEL.RPN.STRADDLE_THRESH = 0
_C.MODEL.RPN.FG_IOU_THRESHOLD = 0.7
_C.MODEL.RPN.BG_IOU_THRESHOLD = 0.3
_C.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 256
_C.MODEL.RPN.POSITIVE_FRACTION = 0.5
_C.MODEL.RPN.PRE_NMS_TOP_N_TRAIN = 12000
_C.MODEL.RPN.PRE_NMS_TOP_N_TEST = 6000
_C.MODEL.RPN.POST_NMS_TOP_N_TRAIN = 2000
_C.MODEL.RPN.POST_NMS_TOP_N_TEST = 1000
_C.MODEL.RPN.NMS_THRESH = 0.7
_C.MODEL.RPN.MIN_SIZE = 0
_C.MODEL.RPN.FPN_POST_NMS_TOP_N_TRAIN = 2000
_C.MODEL.RPN.FPN_POST_NMS_TOP_N_TEST = 2000
_C.MODEL.ROI_HEADS = CN()
_C.MODEL.ROI_HEADS.USE_FPN = False
_C.MODEL.ROI_HEADS.FG_IOU_THRESHOLD = 0.5
_C.MODEL.ROI_HEADS.BG_IOU_THRESHOLD = 0.5
_C.MODEL.ROI_HEADS.BBOX_REG_WEIGHTS = (10., 10., 5., 5.)
_C.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
_C.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.25
_C.MODEL.ROI_HEADS.SCORE_THRESH = 0.05
_C.MODEL.ROI_HEADS.NMS = 0.5
_C.MODEL.ROI_HEADS.DETECTIONS_PER_IMG = 100
_C.MODEL.ROI_BOX_HEAD = CN()
_C.MODEL.ROI_BOX_HEAD.FEATURE_EXTRACTOR = "ResNet50Conv5ROIFeatureExtractor"
_C.MODEL.ROI_BOX_HEAD.PREDICTOR = "FastRCNNPredictor"
_C.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION = 14
_C.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO = 0
_C.MODEL.ROI_BOX_HEAD.POOLER_SCALES = (1.0 / 16,)
#数据集类别数,默认是81,因为coco数据集为80+1(背景),我的数据集只有4个类别,加上背景也就是5个类别
_C.MODEL.ROI_BOX_HEAD.NUM_CLASSES = 5
_C.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM = 1024
_C.MODEL.ROI_MASK_HEAD = CN()
_C.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR = "ResNet50Conv5ROIFeatureExtractor"
_C.MODEL.ROI_MASK_HEAD.PREDICTOR = "MaskRCNNC4Predictor"
_C.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION = 14
_C.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO = 0
_C.MODEL.ROI_MASK_HEAD.POOLER_SCALES = (1.0 / 16,)
_C.MODEL.ROI_MASK_HEAD.MLP_HEAD_DIM = 1024
_C.MODEL.ROI_MASK_HEAD.CONV_LAYERS = (256, 256, 256, 256)
_C.MODEL.ROI_MASK_HEAD.RESOLUTION = 14
_C.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR = True
_C.MODEL.RESNETS = CN()
_C.MODEL.RESNETS.NUM_GROUPS = 1
_C.MODEL.RESNETS.WIDTH_PER_GROUP = 64
_C.MODEL.RESNETS.STRIDE_IN_1X1 = True
_C.MODEL.RESNETS.TRANS_FUNC = "BottleneckWithFixedBatchNorm"
_C.MODEL.RESNETS.STEM_FUNC = "StemWithFixedBatchNorm"
_C.MODEL.RESNETS.RES5_DILATION = 1
_C.MODEL.RESNETS.RES2_OUT_CHANNELS = 256
_C.MODEL.RESNETS.STEM_OUT_CHANNELS = 64
_C.SOLVER = CN()
_C.SOLVER.MAX_ITER = 40000 #最大迭代次数
_C.SOLVER.BASE_LR = 0.02 #初始学习率,这个通常在模型配置文件中有设置
_C.SOLVER.BIAS_LR_FACTOR = 2
_C.SOLVER.MOMENTUM = 0.9
_C.SOLVER.WEIGHT_DECAY = 0.0005
_C.SOLVER.WEIGHT_DECAY_BIAS = 0
_C.SOLVER.GAMMA = 0.1
_C.SOLVER.STEPS = (30000,)
_C.SOLVER.WARMUP_FACTOR = 1.0 / 3
_C.SOLVER.WARMUP_ITERS = 500 #预热迭代次数,预热迭代次数内(小于訪值)的学习率比较低
_C.SOLVER.WARMUP_METHOD = "constant" #预热策略,有'constant'和'linear'两种
_C.SOLVER.CHECKPOINT_PERIOD = 2000 #生成检查点(checkpoint)的步长
_C.SOLVER.IMS_PER_BATCH = 1 #一个batch包含的图片数量
_C.TEST = CN()
_C.TEST.EXPECTED_RESULTS = []
_C.TEST.EXPECTED_RESULTS_SIGMA_TOL = 4
_C.TEST.IMS_PER_BATCH = 1
_C.OUTPUT_DIR = "output" #主要作为checkpoint和inference的输出目录
_C.PATHS_CATALOG = os.path.join(os.path.dirname(__file__), "paths_catalog.py")

path_catalog:修改数据路径

模型配置文件在启动训练时由–config-file参数指定,在config子目录下默认提供了mask_rcnn和faster_rcnn框架不同骨干网的基于YAML格式的配置文件。选用的e2e_mask_rcnn_R_101_FPN_1x.yaml,也就是使用mask_rcnn检测模型,骨干网使用ResNet101-FPN,配置详情如下(根据自己的数据集作相应的调整):

MODEL:
  META_ARCHITECTURE: "GeneralizedRCNN"
  WEIGHT: "catalog://ImageNetPretrained/MSRA/R-101"
  BACKBONE:
    CONV_BODY: "R-101-FPN"
    OUT_CHANNELS: 256
  RPN:
    USE_FPN: True #是否使用FPN,也就是特征金字塔结构,选择True将在不同的特征图提取候选区域
    ANCHOR_STRIDE: (4, 8, 16, 32, 64) #ANCHOR的步长
    PRE_NMS_TOP_N_TRAIN: 2000 #训练时,NMS之前的候选区数量
    PRE_NMS_TOP_N_TEST: 1000 #测试时,NMS之后的候选区数量
    POST_NMS_TOP_N_TEST: 1000
    FPN_POST_NMS_TOP_N_TEST: 1000
  ROI_HEADS:
    USE_FPN: True
  ROI_BOX_HEAD:
    POOLER_RESOLUTION: 7
    POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
    POOLER_SAMPLING_RATIO: 2
    FEATURE_EXTRACTOR: "FPN2MLPFeatureExtractor"
    PREDICTOR: "FPNPredictor"
  ROI_MASK_HEAD:
    POOLER_SCALES: (0.25, 0.125, 0.0625, 0.03125)
    FEATURE_EXTRACTOR: "MaskRCNNFPNFeatureExtractor"
    PREDICTOR: "MaskRCNNC4Predictor"
    POOLER_RESOLUTION: 14
    POOLER_SAMPLING_RATIO: 2
    RESOLUTION: 28
    SHARE_BOX_FEATURE_EXTRACTOR: False
  MASK_ON: False #默认是True,我这里改为False,因为我没有用到语义分割的功能
DATASETS:
  TRAIN: ("coco_2014_train",) #注意这里的训练集和测试集的名字,
  TEST: ("coco_2014_val",) #它们和paths_catalog.py中DATASETS相对应
DATALOADER:
  SIZE_DIVISIBILITY: 32
SOLVER:
  BASE_LR: 0.01 #起始学习率,学习率的调整有多种策略,訪框架自定义了一种策略
  WEIGHT_DECAY: 0.0001
  #这是什么意思呢?是为了在不同的迭代区间进行学习率的调整而设定的.以我的数据集为例,
  #我149898张图,计划是每4个epoch衰减一次,所以如下设置.
  STEPS: (599592, 1199184) 
  MAX_ITER: 1300000 #最大迭代次数

4.开始训练

python tools/train_net.py --config-file "configs/e2e_mask_rcnn_R_101_FPN_1x.yaml" SOLVER.IMS_PER_BATCH 2 SOLVER.BASE_LR 0.0025 SOLVER.MAX_ITER 720000 SOLVER.STEPS "(480000, 640000)" TEST.IMS_PER_BATCH 1