from __future__ import division
import os
import random
import sys
import logging
import cv2
import fire
import numpy as np
import tensorflow as tf
import time
import networks
import commons
from boundingbox import BoundingBox, Coordinate
from configs import ADNetConf
from networks import ADNetwork
from pystopwatch import StopWatchManager
_log_level = logging.DEBUG
_logger = logging.getLogger('ADNetRunner')
_logger.setLevel(_log_level)
ch = logging.StreamHandler(sys.stdout)
ch.setLevel(_log_level)
formatter = logging.Formatter('[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s')
ch.setFormatter(formatter)
_logger.addHandler(ch)
class ADNetRunner:
MAX_BATCHSIZE = 512
def __init__(self):
self.tensor_input = tf.placeholder(tf.float32, shape=(None, 112, 112, 3), name='patch')
self.tensor_action_history = tf.placeholder(tf.float32, shape=(None, 1, 1, 110), name='action_history')
self.tensor_lb_action = tf.placeholder(tf.int32, shape=(None, ), name='lb_action')
self.tensor_lb_class = tf.placeholder(tf.int32, shape=(None, ), name='lb_class')
self.tensor_is_training = tf.placeholder(tf.bool, name='is_training')
self.learning_rate_placeholder = tf.placeholder(tf.float32, [], name='learning_rate')
self.persistent_sess = tf.Session(config=tf.ConfigProto(
inter_op_parallelism_threads=1,
intra_op_parallelism_threads=1
))
self.adnet = ADNetwork(self.learning_rate_placeholder)
self.adnet.create_network(self.tensor_input, self.tensor_lb_action, self.tensor_lb_class, self.tensor_action_history, self.tensor_is_training)
if 'ADNET_MODEL_PATH' in os.environ.keys():
self.adnet.read_original_weights(self.persistent_sess, os.environ['ADNET_MODEL_PATH'])
else:
self.adnet.read_original_weights(self.persistent_sess)
self.action_histories = np.array([0] * ADNetConf.get()['action_history'], dtype=np.int8)
self.action_histories_old = np.array([0] * ADNetConf.get()['action_history'], dtype=np.int8)
self.histories = []
self.iteration = 0
self.imgwh = None
self.callback_redetection = self.redetection_by_sampling
self.failed_cnt = 0
self.latest_score = 0
self.stopwatch = StopWatchManager()
def by_dataset(self, vid_path='./data/freeman1/'):
assert os.path.exists(vid_path)
gt_boxes = BoundingBox.read_vid_gt(vid_path)
curr_bbox = None
self.stopwatch.start('total')
_logger.info('---- start dataset l=%d' % (len(gt_boxes)))
for idx, gt_box in enumerate(gt_boxes):
img = commons.imread(os.path.join(vid_path, 'img', '%04d.jpg' % (idx + 1)))
self.imgwh = Coordinate.get_imgwh(img)
if idx == 0:
# initialization : initial fine-tuning
self.initial_finetune(img, gt_box)
curr_bbox = gt_box
# tracking
predicted_box = self.tracking(img, curr_bbox)
self.show(img, gt_box=gt_box, predicted_box=predicted_box)
# cv2.imwrite('/Users/ildoonet/Downloads/aaa/%d.jpg' % self.iteration, img)
curr_bbox = predicted_box
self.stopwatch.stop('total')
_logger.info('----')
_logger.info(self.stopwatch)
_logger.info('%.3f FPS' % (len(gt_boxes) / self.stopwatch.get_elapsed('total')))
def show(self, img, delay=1, predicted_box=None, gt_box=None):
if isinstance(img, str):
img = commons.imread(img)
if gt_box is not None:
gt_box.draw(img, BoundingBox.COLOR_GT)
if predicted_box is not None:
predicted_box.draw(img, BoundingBox.COLOR_PREDICT)
cv2.imshow('result', img)
cv2.waitKey(delay)
def _get_features(self, samples):
feats = []
for batch in commons.chunker(samples, ADNetRunner.MAX_BATCHSIZE):
feats_batch = self.persistent_sess.run(self.adnet.layer_feat, feed_dict={
self.adnet.input_tensor: batch
})
feats.extend(feats_batch)
return feats
def initial_finetune(self, img, detection_box):
self.stopwatch.start('initial_finetune')
t = time.time()
# generate samples
pos_num, neg_num = ADNetConf.g()['initial_finetune']['pos_num'], ADNetConf.g()['initial_finetune']['neg_num']
pos_boxes, neg_boxes = detection_box.get_posneg_samples(self.imgwh, pos_num, neg_num, use_whole=True)
pos_lb_action = BoundingBox.get_action_labels(pos_boxes, detection_box)
feats = self._get_features([commons.extract_region(img, box) for i, box in enumerate(pos_boxes)])
for box, feat in zip(pos_boxes, feats):
box.feat = feat
feats = self._get_features([commons.extract_region(img, box) for i, box in enumerate(neg_boxes)])
for box, feat in zip(neg_boxes, feats):
box.feat = feat
# train_fc_finetune_hem
self._finetune_fc(
img, pos_boxes, neg_boxes, pos_lb_action,
ADNetConf.get()['initial_finetune']['learning_rate'],
ADNetConf.get()['initial_finetune']['iter']
)
self.histories.append((pos_boxes, neg_boxes, pos_lb_action, np.copy(img), self.iteration))
_logger.info('ADNetRunner.initial_finetune t=%.3f' % t)
self.stopwatch.stop('initial_finetune')
def _finetune_fc(self, img, pos_boxes, neg_boxes, pos_lb_action, learning_rate, iter, iter_score=1):
BATCHSIZE = ADNetConf.g()['minibatch_size']
def get_img(idx, posneg):
if isinstance(img, tuple):
return img[posneg][idx]
return img
pos_samples = [commons.extract_region(get_img(i, 0), box) for i, box in enumerate(pos_boxes)]
neg_samples = [commons.extract_region(get_img(i, 1), box) for i, box in enumerate(neg_boxes)]
# pos_feats, neg_feats = self._get_features(pos_samples), self._get_features(neg_samples)
commons.imshow_grid('pos', pos_samples[-50:], 10, 5)
commons.imshow_grid('neg', neg_samples[-50:], 10, 5)
cv2.waitKey(1)
for i in range(iter):
batch_idxs = commons.random_idxs(len(pos_boxes), BATCHSIZE)
batch_feats = [x.feat for x in commons.choices_by_idx(pos_boxes, batch_idxs)]
batch_lb_action = commons.choices_by_idx(pos_lb_action, batch_idxs)
self.persistent_sess.run(
self.adnet.weighted_grads_op1,
feed_dict={
self.adnet.layer_feat: batch_feats,
self.adnet.label_tensor: batch_lb_action,
self.adnet.action_history_tensor: np.zeros(shape=(BATCHSIZE, 1, 1, 110)),
self.learning_rate_placeholder: learning_rate,
self.tensor_is_training: True
}
)
if i % iter_score == 0:
# training score auxiliary(fc2)
# -- hard score example mining
scores = []
for batch_neg in commons.chunker([x.feat for x in neg_boxes], ADNetRunner.MAX_BATCHSIZE):
scores_batch = self.persistent_sess.run(
self.adnet.layer_scores,
feed_dict={
self.adnet.layer_feat: batch_neg,
self.adnet.action_history_tensor: np.zeros(shape=(len(batch_neg), 1, 1, 110)),
self.learning_rate_placeholder: learning_rate,
self.tensor_is_training: False
}
)
scores.extend(scores_batch)
desc_order_idx = [i[0] for i in sorted(enumerate(scores), reverse=True, key=lambda x:x[1][1])]
# -- train
batch_feats_neg = [x.feat for x in commons.choices_by_idx(neg_boxes, desc_order_idx[:BATCHSIZE])]
self.persistent_sess.run(
self.adnet.weighted_grads_op2,
feed_dict={
self.adnet.layer_feat: batch_feats + batch_feats_neg,
self.adnet.class_tensor: [1]*len(batch_feats) + [0]*len(batch_feats_neg),
self.adnet.action_history_tensor: np.zeros(shape=(len(batch_feats)+len(batch_feats_neg), 1, 1, 110)),
self.learning_rate_placeholder: learning_rate,
self.tensor_is_training: True
}
)
def tracking(self, img, curr_bbox):
self.iteration += 1
is_tracked = True
boxes = []
self.latest_score = -1
self.stopwatch.start('tracking.do_action')
for track_i in range(ADNetConf.get()['predict']['num_action']):
patch = commons.extract_region(img, curr_bbox)
# forward with image & action history
actions, classes = self.persistent_sess.run(
[self.adnet.layer_actions, self.adnet.layer_scores],
feed_dict={
self.adnet.input_tensor: [patch],
self.adnet.action_history_tensor: [commons.onehot_flatten(self.action_histories)],
self.tensor_is_training: False
}
)
latest_score = classes[0][1]
if latest_score < ADNetConf.g()['predict']['thresh_fail']:
is_tracked = False
self.action_histories_old = np.copy(self.action_histories)
self.action_histories = np.insert(self.action_histories, 0, 12)[:-1]
break
else:
self.failed_cnt = 0
self.latest_score = latest_score
# move box
action_idx = np.argmax(actions[0])
self.action_histories = np.insert(self.action_histories, 0, action_idx)[:-1]
prev_bbox = curr_bbox
curr_bbox = curr_bbox.do_action(self.imgwh, action_idx)
if action_idx != ADNetwork.ACTION_IDX_STOP:
if prev_bbox == curr_bbox:
print('action idx', action_idx)
print(prev_bbox)
print(curr_bbox)
raise Exception('box not moved.')
# oscillation check
if action_idx != ADNetwork.ACTION_IDX_STOP and curr_bbox in boxes:
action_idx = ADNetwork.ACTION_IDX_STOP
if action_idx == ADNetwork.ACTION_IDX_STOP:
break
boxes.append(curr_bbox)
self.stopwatch.stop('tracking.do_action')
# redetection when tracking failed
new_score = 0.0
if not is_tracked:
self.failed_cnt += 1
# run redetection callback function
new_box, new_score = self.callback_redetection(curr_bbox, img)
if new_box is not None:
curr_bbox = new_box
patch = commons.extract_region(img, curr_bbox)
_logger.debug('redetection success=%s' % (str(new_box is not None)))
# save samples
if is_tracked or new_score > ADNetConf.g()['predict']['thresh_success']:
self.stopwatch.start('tracking.save_samples.roi')
imgwh = Coordinate.get_imgwh(img)
pos_num, neg_num = ADNetConf.g()['finetune']['pos_num'], ADNetConf.g()['finetune']['neg_num']
pos_boxes, neg_boxes = curr_bbox.get_posneg_samples(
imgwh, pos_num, neg_num, use_whole=False,
pos_thresh=ADNetConf.g()['finetune']['pos_thresh'],
neg_thresh=ADNetConf.g()['finetune']['neg_thresh'],
uniform_translation_f=2,
uniform_scale_f=5
)
self.stopwatch.stop('tracking.save_samples.roi')
self.stopwatch.start('tracking.save_samples.feat')
feats = self._get_features([commons.extract_region(img, box) for i, box in enumerate(pos_boxes)])
for box, feat in zip(pos_boxes, feats):
box.feat = feat
feats = self._get_features([commons.extract_region(img, box) for i, box in enumerate(neg_boxes)])
for box, feat in zip(neg_boxes, feats):
box.feat = feat
pos_lb_action = BoundingBox.get_action_labels(pos_boxes, curr_bbox)
self.histories.append((pos_boxes, neg_boxes, pos_lb_action, np.copy(img), self.iteration))
# clear old ones
self.histories = self.histories[-ADNetConf.g()['finetune']['long_term']:]
self.stopwatch.stop('tracking.save_samples.feat')
# online finetune
if self.iteration % ADNetConf.g()['finetune']['interval'] == 0 or is_tracked is False:
img_pos, img_neg = [], []
pos_boxes, neg_boxes, pos_lb_action = [], [], []
pos_term = 'long_term' if is_tracked else 'short_term'
for i in range(ADNetConf.g()['finetune'][pos_term]):
if i >= len(self.histories):
break
pos_boxes.extend(self.histories[-(i+1)][0])
pos_lb_action.extend(self.histories[-(i+1)][2])
img_pos.extend([self.histories[-(i+1)][3]]*len(self.histories[-(i+1)][0]))
for i in range(ADNetConf.g()['finetune']['short_term']):
if i >= len(self.histories):
break
neg_boxes.extend(self.histories[-(i+1)][1])
img_neg.extend([self.histories[-(i+1)][3]]*len(self.histories[-(i+1)][1]))
self.stopwatch.start('tracking.online_finetune')
self._finetune_fc(
(img_pos, img_neg), pos_boxes, neg_boxes, pos_lb_action,
ADNetConf.get()['finetune']['learning_rate'],
ADNetConf.get()['finetune']['iter']
)
_logger.debug('finetuned')
self.stopwatch.stop('tracking.online_finetune')
cv2.imshow('patch', patch)
return curr_bbox
def redetection_by_sampling(self, prev_box, img):
"""
default redetection method
"""
imgwh = Coordinate.get_imgwh(img)
translation_f = min(1.5, 0.6 * 1.15**self.failed_cnt)
candidates = prev_box.gen_noise_samples(imgwh, 'gaussian', ADNetConf.g()['redetection']['samples'],
gaussian_translation_f=translation_f)
scores = []
for c_batch in commons.chunker(candidates, ADNetRunner.MAX_BATCHSIZE):
samples = [commons.extract_region(img, box) for box in c_batch]
classes = self.persistent_sess.run(
self.adnet.layer_scores,
feed_dict={
self.adnet.input_tensor: samples,
self.adnet.action_history_tensor: [commons.onehot_flatten(self.action_histories_old)]*len(c_batch),
self.tensor_is_training: False
}
)
scores.extend([x[1] for x in classes])
top5_idx = [i[0] for i in sorted(enumerate(scores), reverse=True, key=lambda x: x[1])][:5]
mean_score = sum([scores[x] for x in top5_idx]) / 5.0
if mean_score >= self.latest_score:
mean_box = candidates[0]
for i in range(1, 5):
mean_box += candidates[i]
return mean_box / 5.0, mean_score
return None, 0.0
def __del__(self):
self.persistent_sess.close()
if __name__ == '__main__':
ADNetConf.get('./conf/repo.yaml')
random.seed(1258)
np.random.seed(1258)
tf.set_random_seed(1258)
fire.Fire(ADNetRunner)