第五章 TensorFlow Object Detection API+双目视觉=实时物体定位+检测
本章节是第三章和第四章的结合,旨在实现实时物体的定位+检测。
直接上代码:
# -*- coding: utf-8 -*-
"""
Created on Mon Jul 16 17:28:57 2018
@author: bjw
"""
import os
import cv2
import sys
#import time
#import argparse
#import multiprocessing
import numpy as np
import tensorflow as tf
#from matplotlib import pyplot as plt
import camera_configs
from IPython import get_ipython
get_ipython().run_line_magic('matplotlib', 'inline')
sys.path.append("..")
#cv2.namedWindow("left")
#cv2.namedWindow("right")
cv2.namedWindow("depth")
cv2.moveWindow("left", 0, 0)
cv2.moveWindow("right", 640, 0)
cv2.createTrackbar("num", "depth", 2, 10, lambda x: None)
cv2.createTrackbar("blockSize", "depth", 5, 255, lambda x: None)
# 添加点击事件,打印当前点的距离q
def callbackFunc(e, x, y, f, p):
if e == cv2.EVENT_LBUTTONDOWN:
print (threeD[y][x])
cv2.setMouseCallback("depth", callbackFunc, None)
cap = cv2.VideoCapture(0)
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
CWD_PATH = os.getcwd()
PATH_TO_CKPT = os.path.join(CWD_PATH,'ssd_mobilenet_v1_coco_2017_11_17','frozen_inference_graph.pb')
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join(CWD_PATH,'data', 'mscoco_label_map.pbtxt')
NUM_CLASSES = 90
# Loading label map
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES,use_display_name=True)
category_index = label_map_util.create_category_index(categories)
#Load a frozen TF model
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
with detection_graph.as_default():
with tf.Session(graph=detection_graph) as sess:
while True:
ret1, frame = cap.read()
#ret2, frame2 = camera2.read()
#if not ret1 or not ret2:
if ret1 != True:
break
cv2.resize(frame,(2560,360),interpolation=cv2.INTER_LINEAR)
dsize = (1280, 360)
imagedst = cv2.resize(frame,dsize,interpolation=cv2.INTER_LINEAR)
frame1 = imagedst[0:360,0:640]
frame2 = imagedst[0:360,640:1280]
# 根据更正map对图片进行重构
img1_rectified = cv2.remap(frame1, camera_configs.left_map1, camera_configs.left_map2, cv2.INTER_LINEAR)
img2_rectified = cv2.remap(frame2, camera_configs.right_map1, camera_configs.right_map2, cv2.INTER_LINEAR)
# 将图片置为灰度图,为StereoBM作准备
imgL = cv2.cvtColor(img1_rectified, cv2.COLOR_BGR2GRAY)
imgR = cv2.cvtColor(img2_rectified, cv2.COLOR_BGR2GRAY)
# 两个trackbar用来调节不同的参数查看效果
num = cv2.getTrackbarPos("num", "depth")
blockSize = cv2.getTrackbarPos("blockSize", "depth")
if blockSize % 2 == 0:
blockSize += 1
if blockSize < 5:
blockSize = 5
# 根据Block Maching方法生成差异图(opencv里也提供了SGBM/Semi-Global Block Matching算法,有兴趣可以试试)
stereo = cv2.StereoBM_create(numDisparities = 16*num,
blockSize = 31)
disparity = stereo.compute(imgL, imgR)
disp = cv2.normalize(disparity, disparity, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)
# 将图片扩展至3d空间中,其z方向的值则为当前的距离
threeD = cv2.reprojectImageTo3D(disparity.astype(np.float32)/16., camera_configs.Q)
#cv2.imshow("left", img1_rectified)
#cv2.imshow("right", img2_rectified)
cv2.imshow("depth", disp)
key = cv2.waitKey(1)
if key == ord("q"):
break
elif key == ord("s"):
cv2.imwrite(path_BM_left, imgL)
cv2.imwrite(path_BM_right, imgR)
cv2.imwrite(path_BM_depth, disp)
#ret, image_np = cap.read()
# 扩展维度,应为模型期待: [1, None, None, 3]
image_np_expanded = np.expand_dims(img1_rectified, axis=0)
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
# 每个框代表一个物体被侦测到
boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
#每个分值代表侦测到物体的可信度.
scores = detection_graph.get_tensor_by_name('detection_scores:0')
classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
# 执行侦测任务.
(boxes, scores, classes, num_detections) = sess.run(
[boxes, scores, classes, num_detections],
feed_dict={image_tensor: image_np_expanded})
#打印识别物体的个数
final_score = np.squeeze(scores)
final_box = np.squeeze(boxes)
count = 0
for i in range(20):
if scores is None or final_score[i] > 0.5:
count = count + 1
#print(final_box[i])
#print(finalbox[count])
Center = [((final_box[i][2]-final_box[i][0])/2+final_box[i][0])*360,
((final_box[i][3]-final_box[i][1])/2+final_box[i][1])*640]
#print(Center)
y = int(Center[0])
x = int(Center[1])
depth = threeD[y][x]
#print(depth)
cv2.putText(img1_rectified,str(depth[2]) + "mm",(x,y),cv2.FONT_HERSHEY_SIMPLEX,0.6,(0, 255, 255),2,cv2.FONT_HERSHEY_SIMPLEX)
#print(count)
# 检测结果的可视化
vis_util.visualize_boxes_and_labels_on_image_array(
img1_rectified,
np.squeeze(boxes),
np.squeeze(classes).astype(np.int32),
np.squeeze(scores),
category_index,
use_normalized_coordinates=True,
line_thickness=8)
cv2.imshow('object detection', cv2.resize(img1_rectified,(640,360)))
if cv2.waitKey(25) & 0xFF ==ord('q'):
cv2.destroyAllWindows()
break
cap.release()
cv2.destroyAllWindows()
测试结果:
因为是晚上,视差图效果不太好 。