基于Mask RCNN的目标检测

1、环境配置

  • win10,pycharm
  • tensoflow-gpu,keras
  • 实现了单张图片的目标检测以及调用摄像头的实时检测。

2、问题

  • 在使用Pycharm运行程序时冒出一个错误:Loaded runtime CuDNN library: 7401 (compatibility version 7400) but source was compiled with 7003 (compatibility version 7000). If using a binary install, upgrade your CuDNN library to match. If building from sources, make sure the library loaded at runtime matches a compatible version specified during compile configuration.
  • **方法:**原因是在使用Pycharm安装tensoflow-gpu时默认的是安装最新的版本,导致与原模型不匹配。所以在安装深度学习库的时候要指定版本,这里改用1.5.0版本。

3、单张图片检测

  • 下面是一个简单的代码,确定待检测的目标id后即可获得结果图片,并将目标使用方框框出来。
import os, random
import numpy as np
import cv2
import mrcnn.config
import mrcnn.utils
from mrcnn.model import MaskRCNN
from pathlib import Path
from twilio.rest import Client
from mrcnn import visualize
import skimage
class MaskRCNNConfig(mrcnn.config.Config):
    NAME = "coco_pretrained_model_config"
    IMAGES_PER_GPU = 1
    GPU_COUNT = 1
    NUM_CLASSES = 1 + 80  # COCO dataset has 80 classes + one background class
    DETECTION_MIN_CONFIDENCE = 0.6
# COCO Class names
# Index of the class in the list is its ID. For example, to get ID of
# the teddy bear class, use: class_names.index('teddy bear')


ROOT_DIR = Path(".")        # Root directory of the project
MODEL_DIR = ".\logs"        # Directory to save logs and trained model
COCO_MODEL_PATH =  "data\mask_rcnn_coco.h5"     # Local path to trained weights file
IMAGE_DIR = "data\images"       # Directory of images to run detection on
model = MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=MaskRCNNConfig())
model.load_weights(COCO_MODEL_PATH, by_name=True)

class_names = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane','bus', 'train', 'truck',
               'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',
               'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
               'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
               'kite', 'baseball bat', 'baseball glove', 'skateboard','surfboard', 'tennis racket',
               'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
               'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
               'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse',
               'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
               'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
               'toothbrush']
def get_object_boxes(boxes, class_ids):
    object_ID = [1,3,17]# 列表里为想要检测的对象在class_names中的索引,检测person、car、traffic light
    object_boxes = []
    text = []
    for i, box in enumerate(boxes):
        if class_ids[i] in object_ID:
            object_boxes.append(box)
            text.append(class_names[class_ids[i]])
    return np.array(object_boxes), text

def image_detect():
    # 随机选择文件
    # file_names = next(os.walk(IMAGE_DIR))[2]
    # path = os.path.join(IMAGE_DIR, random.choice(file_names))
    # pic_name = path.split('\\')[-1]
    # result_path = "data/images_marked/" + pic_name

    # 指定单张图片测试
    file_names = "my_pic1.jpg"
    path = "data/images/" + file_names
    result_path = "data/images_marked/" + file_names

    image = cv2.imread(path)
    rgb_image = image[:, :, ::-1]
    results = model.detect([rgb_image], verbose=0)
    r = results[0]
    # visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'],class_names, r['scores'])
    object_boxes, text = get_object_boxes(r[ 'rois' ], r[ 'class_ids' ])
    # print(text)
    print("Objects found in the picture:")
    for i, box in enumerate(object_boxes):       # Draw each box on the frame
        print(text[i], ': ', box)
        y1, x1, y2, x2 = box
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 1)
        font = cv2.FONT_HERSHEY_DUPLEX  # Write the IoU measurement inside the box
        cv2.putText(image,  text[i],  (x1 + 6, y2 - 6), font, 0.3, (255, 255, 255))

    cv2.imshow( 'Video' , image)# Show the frame of video on the screen
    cv2.imwrite(result_path , image)
    cv2.waitKey(1)
    cv2.destroyAllWindows()

if __name__ == "__main__":
    image_detect()
  • 运行上面的代码一方面会将结果图片保存到指定路径文件夹,同时返回待检测目标的坐标。
  • 基于Mask RCNN的目标检测
    基于Mask RCNN的目标检测
    基于Mask RCNN的目标检测
    基于Mask RCNN的目标检测
    基于Mask RCNN的目标检测

4、调用电脑摄像头实时监测

import os, random
import numpy as np
import cv2
import mrcnn.config
import mrcnn.utils
from mrcnn.model import MaskRCNN
from pathlib import Path
from twilio.rest import Client
from mrcnn import visualize
import skimage
class MaskRCNNConfig(mrcnn.config.Config):
    NAME = "coco_pretrained_model_config"
    IMAGES_PER_GPU = 1
    GPU_COUNT = 1
    NUM_CLASSES = 1 + 80  # COCO dataset has 80 classes + one background class
    DETECTION_MIN_CONFIDENCE = 0.6
# COCO Class names
# Index of the class in the list is its ID. For example, to get ID of
# the teddy bear class, use: class_names.index('teddy bear')
class_names = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
               'bus', 'train', 'truck', 'boat', 'traffic light',
               'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',
               'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',
               'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
               'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
               'kite', 'baseball bat', 'baseball glove', 'skateboard',
               'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
               'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
               'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
               'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
               'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
               'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',
               'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
               'teddy bear', 'hair drier', 'toothbrush']


ROOT_DIR = Path(".")        # Root directory of the project
MODEL_DIR = ".\logs"        # Directory to save logs and trained model
COCO_MODEL_PATH =  "data\mask_rcnn_coco.h5"     # Local path to trained weights file
IMAGE_DIR = "data\images"       # Directory of images to run detection on
VIDEO_SOURCE = "data\parking.mp4"# Video file or camera to process - set this to 0 to use your webcam instead of a video file

model = MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=MaskRCNNConfig())
model.load_weights(COCO_MODEL_PATH, by_name=True)

# Filter a list of Mask R-CNN detection results to get only the detected cars / trucks
def get_object_boxes(boxes, class_ids):
    # object_ID = [1,22,27,42,74,40,68,65]# 列表里为想要检测的对象在class_names中的索引
    object_ID = list(range(81))
    object_boxes = []
    text = []
    for i, box in enumerate(boxes):
        if class_ids[i] in object_ID:
            object_boxes.append(box)
            text.append(class_names[class_ids[i]])
    return np.array(object_boxes), text

def video_detect():
    video_capture = cv2.VideoCapture(0)
    fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
    out = cv2.VideoWriter('data\output.mp4', fourcc, 20.0, (640, 480),True)
    while video_capture.isOpened():
        success, frame = video_capture.read()
        if not success:
            break
        rgb_image = frame[:, :, ::-1]  # Convert the image from BGR color (which OpenCV uses) to RGB color
        results = model.detect([rgb_image], verbose=0)  # Run the image through the Mask R-CNN model to get results.
        r = results[0]
        object_boxes, text = get_object_boxes(r['rois'], r['class_ids'])
        for i, box in enumerate(object_boxes):  # Draw each box on the frame
            print(text[i], ': ', box)
            y1, x1, y2, x2 = box
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 1)
            font = cv2.FONT_HERSHEY_DUPLEX  # Write the IoU measurement inside the box
            cv2.putText(frame, text[i], (x1 + 6, y2 - 6), font, 0.3, (255, 255, 255))
        out.write(frame)
        cv2.imshow('Video', frame)
        key = cv2.waitKey(1)
        if key == ord('q'):
            break

    # Clean up everything when finished
    video_capture.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    video_detect()
  • 视频效果截图:
    基于Mask RCNN的目标检测
    基于Mask RCNN的目标检测
    基于Mask RCNN的目标检测