YOLO v3代码学习
本人使用的版本是https://github.com/AlexeyAB/darknet
源码在darknet-master\src目录下
还记得我们用YOLO v3训练自己的数据集的过程,控制台下使用以下代码进行训练
darknet.exe detector train data/obj.data yolo-obj.cfg darknet53.conv.74
上面代码的意思是启动darknet.exe这个文件并以后面作为参数开始运行。参数位置对应,如下:
argv[1] = dectector ; argv[2] = train ; argv[3] = data/obj.data ; argv[4] = yolo-obj.cfg ; argv[5] = darknet53.conv.74 ;
1、darknet.exe就是对应darknet.c的main函数
int main(int argc, char **argv)
{
#ifdef _DEBUG
_CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
#endif
int i;
for (i = 0; i < argc; ++i) {
if (!argv[i]) continue;
strip_args(argv[i]);
}
//test_resize("data/bad.jpg");
//test_box();
//test_convolutional_layer();
if(argc < 2){
fprintf(stderr, "usage: %s <function>\n", argv[0]);
return 0;
}
gpu_index = find_int_arg(argc, argv, "-i", 0);
if(find_arg(argc, argv, "-nogpu")) {
gpu_index = -1;
printf("\n Currently Darknet doesn't support -nogpu flag. If you want to use CPU - please compile Darknet with GPU=0 in the Makefile, or compile darknet_no_gpu.sln on Windows.\n");
exit(-1);
}
#ifndef GPU
gpu_index = -1;
#else
if(gpu_index >= 0){
cuda_set_device(gpu_index);
check_error(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync));
}
#endif
if (0 == strcmp(argv[1], "average")){
average(argc, argv);
} else if (0 == strcmp(argv[1], "yolo")){
run_yolo(argc, argv);
} else if (0 == strcmp(argv[1], "voxel")){
run_voxel(argc, argv);
} else if (0 == strcmp(argv[1], "super")){
run_super(argc, argv);
} else if (0 == strcmp(argv[1], "detector")){
run_detector(argc, argv);
} else if (0 == strcmp(argv[1], "detect")){
float thresh = find_float_arg(argc, argv, "-thresh", .24);
int ext_output = find_arg(argc, argv, "-ext_output");
char *filename = (argc > 4) ? argv[4]: 0;
test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh, 0.5, 0, 1, 0, NULL);
} else if (0 == strcmp(argv[1], "cifar")){
run_cifar(argc, argv);
} else if (0 == strcmp(argv[1], "go")){
run_go(argc, argv);
} else if (0 == strcmp(argv[1], "rnn")){
run_char_rnn(argc, argv);
} else if (0 == strcmp(argv[1], "vid")){
run_vid_rnn(argc, argv);
} else if (0 == strcmp(argv[1], "coco")){
run_coco(argc, argv);
} else if (0 == strcmp(argv[1], "classify")){
predict_classifier("cfg/imagenet1k.data", argv[2], argv[3], argv[4], 5);
} else if (0 == strcmp(argv[1], "classifier")){
run_classifier(argc, argv);
} else if (0 == strcmp(argv[1], "art")){
run_art(argc, argv);
} else if (0 == strcmp(argv[1], "tag")){
run_tag(argc, argv);
} else if (0 == strcmp(argv[1], "compare")){
run_compare(argc, argv);
} else if (0 == strcmp(argv[1], "dice")){
run_dice(argc, argv);
} else if (0 == strcmp(argv[1], "writing")){
run_writing(argc, argv);
} else if (0 == strcmp(argv[1], "3d")){
composite_3d(argv[2], argv[3], argv[4], (argc > 5) ? atof(argv[5]) : 0);
} else if (0 == strcmp(argv[1], "test")){
test_resize(argv[2]);
} else if (0 == strcmp(argv[1], "captcha")){
run_captcha(argc, argv);
} else if (0 == strcmp(argv[1], "nightmare")){
run_nightmare(argc, argv);
} else if (0 == strcmp(argv[1], "rgbgr")){
rgbgr_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "reset")){
reset_normalize_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "denormalize")){
denormalize_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "statistics")){
statistics_net(argv[2], argv[3]);
} else if (0 == strcmp(argv[1], "normalize")){
normalize_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "rescale")){
rescale_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "ops")){
operations(argv[2]);
} else if (0 == strcmp(argv[1], "speed")){
speed(argv[2], (argc > 3 && argv[3]) ? atoi(argv[3]) : 0);
} else if (0 == strcmp(argv[1], "oneoff")){
oneoff(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "partial")){
partial(argv[2], argv[3], argv[4], atoi(argv[5]));
} else if (0 == strcmp(argv[1], "average")){
average(argc, argv);
} else if (0 == strcmp(argv[1], "visualize")){
visualize(argv[2], (argc > 3) ? argv[3] : 0);
} else if (0 == strcmp(argv[1], "imtest")){
test_resize(argv[2]);
} else {
fprintf(stderr, "Not an option: %s\n", argv[1]);
}
return 0;
}
2、根据参数argv[1]可以看出,将执行run_detector(argc, argv),它在detector.c文件里
void run_detector(int argc, char **argv)
{
//查看argv里有没有'-dont_show'这个参数,有返回1,无0,下同
int dont_show = find_arg(argc, argv, "-dont_show");
int show = find_arg(argc, argv, "-show");
int calc_map = find_arg(argc, argv, "-map");
check_mistakes = find_arg(argc, argv, "-check_mistakes");
int mjpeg_port = find_int_arg(argc, argv, "-mjpeg_port", -1);
int json_port = find_int_arg(argc, argv, "-json_port", -1);
char *out_filename = find_char_arg(argc, argv, "-out_filename", 0);
char *outfile = find_char_arg(argc, argv, "-out", 0);
char *prefix = find_char_arg(argc, argv, "-prefix", 0);
float thresh = find_float_arg(argc, argv, "-thresh", .25); // 0.24
float iou_thresh = find_float_arg(argc, argv, "-iou_thresh", .5); // 0.5 for mAP
float hier_thresh = find_float_arg(argc, argv, "-hier", .5);
int cam_index = find_int_arg(argc, argv, "-c", 0);
int frame_skip = find_int_arg(argc, argv, "-s", 0);
int num_of_clusters = find_int_arg(argc, argv, "-num_of_clusters", 5);
int width = find_int_arg(argc, argv, "-width", -1);
int height = find_int_arg(argc, argv, "-height", -1);
// extended output in test mode (output of rect bound coords)
// and for recall mode (extended output table-like format with results for best_class fit)
int ext_output = find_arg(argc, argv, "-ext_output");
int save_labels = find_arg(argc, argv, "-save_labels");
if (argc < 4) {
fprintf(stderr, "usage: %s %s [train/test/valid/demo/map] [data] [cfg] [weights (optional)]\n", argv[0], argv[1]);
return;
}
char *gpu_list = find_char_arg(argc, argv, "-gpus", 0);
int *gpus = 0;
int gpu = 0;
int ngpus = 0;
if (gpu_list) {
printf("%s\n", gpu_list);
int len = strlen(gpu_list);
ngpus = 1;
int i;
for (i = 0; i < len; ++i) {
if (gpu_list[i] == ',') ++ngpus;
}
gpus = calloc(ngpus, sizeof(int));
for (i = 0; i < ngpus; ++i) {
gpus[i] = atoi(gpu_list);
gpu_list = strchr(gpu_list, ',') + 1;
}
}
else {
gpu = gpu_index;
gpus = &gpu;
ngpus = 1;//不使用gpu,故这里ngpus = 1
}
int clear = find_arg(argc, argv, "-clear");
char *datacfg = argv[3];
char *cfg = argv[4];
char *weights = (argc > 5) ? argv[5] : 0;
if (weights)
if (strlen(weights) > 0)
if (weights[strlen(weights) - 1] == 0x0d) weights[strlen(weights) - 1] = 0;
char *filename = (argc > 6) ? argv[6] : 0;
if (0 == strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, dont_show, ext_output, save_labels, outfile);
else if (0 == strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear, dont_show, calc_map, mjpeg_port);
else if (0 == strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile);
else if (0 == strcmp(argv[2], "recall")) validate_detector_recall(datacfg, cfg, weights);
else if (0 == strcmp(argv[2], "map")) validate_detector_map(datacfg, cfg, weights, thresh, iou_thresh, NULL);
else if (0 == strcmp(argv[2], "calc_anchors")) calc_anchors(datacfg, num_of_clusters, width, height, show);
else if (0 == strcmp(argv[2], "demo")) {
list *options = read_data_cfg(datacfg);
int classes = option_find_int(options, "classes", 20);
char *name_list = option_find_str(options, "names", "data/names.list");
char **names = get_labels(name_list);
if (filename)
if (strlen(filename) > 0)
if (filename[strlen(filename) - 1] == 0x0d) filename[strlen(filename) - 1] = 0;
demo(cfg, weights, thresh, hier_thresh, cam_index, filename, names, classes, frame_skip, prefix, out_filename,
mjpeg_port, json_port, dont_show, ext_output);
free_list_contents_kvp(options);
free_list(options);
}
else printf(" There isn't such command: %s", argv[2]);
}
3、参数argv[2]是train,所以接下来执行train_detector(datacfg, cfg, weights, gpus, ngpus, clear, dont_show, calc_map, mjpeg_port);
也是在detector.c文件中
void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int calc_map, int mjpeg_port)
{
list *options = read_data_cfg(datacfg);//读入obj.data配置文件
//查找配置文件里train的值,如果没有则使用第三个参数"data/train.txt"作为默认值
char *train_images = option_find_str(options, "train", "data/train.txt");
char *valid_images = option_find_str(options, "valid", train_images);
char *backup_directory = option_find_str(options, "backup", "/backup/");
int train_images_num = 0;
network net_map;
if (calc_map) {
FILE* valid_file = fopen(valid_images, "r");
if (!valid_file) {
printf("\n Error: There is no %s file for mAP calculation!\n Don't use -map flag.\n Or set valid=%s in your %s file. \n", valid_images, train_images, datacfg);
getchar();
exit(-1);
}
else fclose(valid_file);
//读入train.txt文件中每行的图片地址
list *plist = get_paths(train_images);
train_images_num = plist->size;
free_list(plist);
cuda_set_device(gpus[0]);
printf(" Prepare additional network for mAP calculation...\n");
//解析yolo-obj.cfg配置文件(下面讲)
net_map = parse_network_cfg_custom(cfgfile, 1);
int k; // free memory unnecessary arrays
for (k = 0; k < net_map.n; ++k) {
free_layer(net_map.layers[k]);
}
#ifdef GPU
cuda_free(net_map.workspace);
cuda_free(net_map.input_state_gpu);
if (*net_map.input16_gpu) cuda_free(*net_map.input16_gpu);
if (*net_map.output16_gpu) cuda_free(*net_map.output16_gpu);
#else
free(net_map.workspace);
#endif
}
srand(time(0));//随机数种子,配合下面的rand()实现随机数的产生
char *base = basecfg(cfgfile);
printf("%s\n", base);
float avg_loss = -1;
network *nets = calloc(ngpus, sizeof(network));
srand(time(0));
int seed = rand();
int i;
for (i = 0; i < ngpus; ++i) {
srand(seed);
#ifdef GPU
cuda_set_device(gpus[i]);
#endif
nets[i] = parse_network_cfg(cfgfile);
if (weightfile) {
load_weights(&nets[i], weightfile);
}
if (clear) *nets[i].seen = 0;
nets[i].learning_rate *= ngpus;
}
srand(time(0));
network net = nets[0];
const int actual_batch_size = net.batch * net.subdivisions;
if (actual_batch_size == 1) {
printf("\n Error: You set incorrect value batch=1 for Training! You should set batch=64 subdivision=64 \n");
getchar();
}
else if (actual_batch_size < 64) {
printf("\n Warning: You set batch=%d lower than 64! It is recommended to set batch=64 subdivision=64 \n", actual_batch_size);
}
int imgs = net.batch * net.subdivisions * ngpus;
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
data train, buffer;
layer l = net.layers[net.n - 1];
int classes = l.classes;
float jitter = l.jitter;//抖动产生额外的数据
list *plist = get_paths(train_images);//得到训练数据路径
//int N = plist->size;
char **paths = (char **)list_to_array(plist);
int init_w = net.w;
int init_h = net.h;
int iter_save, iter_save_last, iter_map;
iter_save = get_current_batch(net);
iter_save_last = get_current_batch(net);
iter_map = get_current_batch(net);
float mean_average_precision = -1;
load_args args = { 0 };
args.w = net.w;
args.h = net.h;
args.c = net.c;
args.paths = paths;
args.n = imgs;
args.m = plist->size;
args.classes = classes;
args.flip = net.flip;
args.jitter = jitter;
args.num_boxes = l.max_boxes;
args.small_object = net.small_object;
args.d = &buffer;
args.type = DETECTION_DATA;
args.threads = 64; // 16 or 64
//数据扩增,角度,曝光,饱和,灰度
args.angle = net.angle;
args.exposure = net.exposure;
args.saturation = net.saturation;
args.hue = net.hue;
#ifdef OPENCV
args.threads = 3 * ngpus; // Amazon EC2 Tesla V100: p3.2xlarge (8 logical cores) - p3.16xlarge
//args.threads = 12 * ngpus; // Ryzen 7 2700X (16 logical cores)
IplImage* img = NULL;
float max_img_loss = 5;
int number_of_lines = 100;
int img_size = 1000;
img = draw_train_chart(max_img_loss, net.max_batches, number_of_lines, img_size, dont_show);
#endif //OPENCV
pthread_t load_thread = load_data(args);
double time;
int count = 0;
//while(i*imgs < N*120){
//这里的net.max_batches就是你yolo-obj.cfg里定义的max_batches的值
while (get_current_batch(net) < net.max_batches) {
//是否多尺度训练
if (l.random && count++ % 10 == 0) {
printf("Resizing\n");
//int dim = (rand() % 12 + (init_w/32 - 5)) * 32; // +-160
//int dim = (rand() % 4 + 16) * 32;
//if (get_current_batch(net)+100 > net.max_batches) dim = 544;
//int random_val = rand() % 12;
//int dim_w = (random_val + (init_w / 32 - 5)) * 32; // +-160
//int dim_h = (random_val + (init_h / 32 - 5)) * 32; // +-160
float random_val = rand_scale(1.4); // *x or /x
int dim_w = roundl(random_val*init_w / 32 + 1) * 32;
int dim_h = roundl(random_val*init_h / 32 + 1) * 32;
// at the beginning
if (avg_loss < 0) {
dim_w = roundl(1.4*init_w / 32 + 1) * 32;
dim_h = roundl(1.4*init_h / 32 + 1) * 32;
}
if (dim_w < 32) dim_w = 32;
if (dim_h < 32) dim_h = 32;
printf("%d x %d \n", dim_w, dim_h);
args.w = dim_w;
args.h = dim_h;
pthread_join(load_thread, 0);
train = buffer;
free_data(train);
load_thread = load_data(args);
for (i = 0; i < ngpus; ++i) {
resize_network(nets + i, dim_w, dim_h);
}
net = nets[0];
}
time = what_time_is_it_now();
//线程相关
pthread_join(load_thread, 0);
train = buffer;
load_thread = load_data(args);
/*
int k;
for(k = 0; k < l.max_boxes; ++k){
box b = float_to_box(train.y.vals[10] + 1 + k*5);
if(!b.x) break;
printf("loaded: %f %f %f %f\n", b.x, b.y, b.w, b.h);
}
image im = float_to_image(448, 448, 3, train.X.vals[10]);
int k;
for(k = 0; k < l.max_boxes; ++k){
box b = float_to_box(train.y.vals[10] + 1 + k*5);
printf("%d %d %d %d\n", truth.x, truth.y, truth.w, truth.h);
draw_bbox(im, b, 8, 1,0,0);
}
save_image(im, "truth11");
*/
printf("Loaded: %lf seconds\n", (what_time_is_it_now() - time));
time = what_time_is_it_now();
float loss = 0;
#ifdef GPU
if (ngpus == 1) {
loss = train_network(net, train);
}
else {
loss = train_networks(nets, ngpus, train, 4);
}
#else
//训练的函数,返回损失值(下面讲)
loss = train_network(net, train);
#endif
if (avg_loss < 0 || avg_loss != avg_loss) avg_loss = loss; // if(-inf or nan)
avg_loss = avg_loss*.9 + loss*.1;
i = get_current_batch(net);
if (mean_average_precision > 0) {
printf("\n Last accuracy [email protected] = %2.2f %% ", mean_average_precision*100);
}
if (net.cudnn_half) {
if (i < net.burn_in * 3) printf("\n Tensor Cores are disabled until the first %d iterations are reached.", 3 * net.burn_in);
else printf("\n Tensor Cores are used.");
}
//输出损失等数值
printf("\n %d: %f, %f avg loss, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), (what_time_is_it_now() - time), i*imgs);
int draw_precision = 0;
int calc_map_for_each = 4 * train_images_num / (net.batch * net.subdivisions); // calculate mAP for each 4 Epochs
if (calc_map && (i >= (iter_map + calc_map_for_each) || i == net.max_batches) && i >= net.burn_in && i >= 1000) {
if (l.random) {
printf("Resizing to initial size: %d x %d \n", init_w, init_h);
args.w = init_w;
args.h = init_h;
pthread_join(load_thread, 0);
train = buffer;
load_thread = load_data(args);
int k;
for (k = 0; k < ngpus; ++k) {
resize_network(nets + k, init_w, init_h);
}
net = nets[0];
}
// combine Training and Validation networks
network net_combined = combine_train_valid_networks(net, net_map);
iter_map = i;
mean_average_precision = validate_detector_map(datacfg, cfgfile, weightfile, 0.25, 0.5, &net_combined);
printf("\n mean_average_precision = %f \n", mean_average_precision);
draw_precision = 1;
}
#ifdef OPENCV
//画出损失函数图
draw_train_loss(img, img_size, avg_loss, max_img_loss, i, net.max_batches, mean_average_precision, draw_precision, dont_show, mjpeg_port);
#endif // OPENCV
//if (i % 1000 == 0 || (i < 1000 && i % 100 == 0)) {
//if (i % 100 == 0) {
//每1000轮保存一次权重文件,名字为yolo-obj(i).weights
if (i >= (iter_save + 1000)) {
iter_save = i;
#ifdef GPU
if (ngpus != 1) sync_nets(nets, ngpus, 0);
#endif
char buff[256];
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
save_weights(net, buff);
}
//每100轮保存一次权重文件,名字为yolo-obj_last.weights
if (i >= (iter_save_last + 100)) {
iter_save_last = i;
#ifdef GPU
if (ngpus != 1) sync_nets(nets, ngpus, 0);
#endif
char buff[256];
sprintf(buff, "%s/%s_last.weights", backup_directory, base, i);
save_weights(net, buff);
}
free_data(train);
}
#ifdef GPU
if (ngpus != 1) sync_nets(nets, ngpus, 0);
#endif
char buff[256];
sprintf(buff, "%s/%s_final.weights", backup_directory, base);
save_weights(net, buff);
#ifdef OPENCV
cvReleaseImage(&img);
cvDestroyAllWindows();
#endif
// free memory
pthread_join(load_thread, 0);
free_data(buffer);
free(base);
free(paths);
free_list_contents(plist);
free_list(plist);
free_list_contents_kvp(options);
free_list(options);
for (i = 0; i < ngpus; ++i) free_network(nets[i]);
free(nets);
//free_network(net);
}
上面调用了很多函数,如parse_network_cfg_custom、train_network等,下面我们分别看看,
4、首先是parse_network_cfg_custom(char *filename, int batch),它在parser.c文件中
network parse_network_cfg_custom(char *filename, int batch)
{
list *sections = read_cfg(filename);
node *n = sections->front;
if(!n) error("Config file has no sections");
network net = make_network(sections->size - 1);
net.gpu_index = gpu_index;
size_params params;
section *s = (section *)n->val;
list *options = s->options;
if(!is_network(s)) error("First section must be [net] or [network]");
parse_net_options(options, &net);
params.h = net.h;
params.w = net.w;
params.c = net.c;
params.inputs = net.inputs;
if (batch > 0) net.batch = batch;
params.batch = net.batch;
params.time_steps = net.time_steps;
params.net = net;
float bflops = 0;
size_t workspace_size = 0;
size_t max_inputs = 0;
size_t max_outputs = 0;
n = n->next;
int count = 0;
free_section(s);
fprintf(stderr, "layer filters size input output\n");
//循环打印所有层的信息
while(n){
params.index = count;
fprintf(stderr, "%4d ", count);
s = (section *)n->val;
options = s->options;
layer l = {0};
LAYER_TYPE lt = string_to_layer_type(s->type);
if(lt == CONVOLUTIONAL){
//这里使用的很多是convolution
l = parse_convolutional(options, params);
}else if(lt == LOCAL){
l = parse_local(options, params);
}else if(lt == ACTIVE){
l = parse_activation(options, params);
}else if(lt == RNN){
l = parse_rnn(options, params);
}else if(lt == GRU){
l = parse_gru(options, params);
}else if(lt == CRNN){
l = parse_crnn(options, params);
}else if(lt == CONNECTED){
l = parse_connected(options, params);
}else if(lt == CROP){
l = parse_crop(options, params);
}else if(lt == COST){
l = parse_cost(options, params);
}else if(lt == REGION){
l = parse_region(options, params);
}else if (lt == YOLO) {
l = parse_yolo(options, params);
}else if(lt == DETECTION){
l = parse_detection(options, params);
}else if(lt == SOFTMAX){
l = parse_softmax(options, params);
net.hierarchy = l.softmax_tree;
}else if(lt == NORMALIZATION){
l = parse_normalization(options, params);
}else if(lt == BATCHNORM){
l = parse_batchnorm(options, params);
}else if(lt == MAXPOOL){
l = parse_maxpool(options, params);
}else if(lt == REORG){
l = parse_reorg(options, params); }
else if (lt == REORG_OLD) {
l = parse_reorg_old(options, params);
}else if(lt == AVGPOOL){
l = parse_avgpool(options, params);
}else if(lt == ROUTE){
l = parse_route(options, params, net);
int k;
for (k = 0; k < l.n; ++k) net.layers[l.input_layers[k]].use_bin_output = 0;
}else if (lt == UPSAMPLE) {
l = parse_upsample(options, params, net);
}else if(lt == SHORTCUT){
l = parse_shortcut(options, params, net);
net.layers[count - 1].use_bin_output = 0;
net.layers[l.index].use_bin_output = 0;
}else if(lt == DROPOUT){
l = parse_dropout(options, params);
l.output = net.layers[count-1].output;
l.delta = net.layers[count-1].delta;
#ifdef GPU
l.output_gpu = net.layers[count-1].output_gpu;
l.delta_gpu = net.layers[count-1].delta_gpu;
#endif
}else{
fprintf(stderr, "Type not recognized: %s\n", s->type);
}
l.onlyforward = option_find_int_quiet(options, "onlyforward", 0);
l.stopbackward = option_find_int_quiet(options, "stopbackward", 0);
l.dontload = option_find_int_quiet(options, "dontload", 0);
l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0);
l.learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1);
option_unused(options);
net.layers[count] = l;
if (l.workspace_size > workspace_size) workspace_size = l.workspace_size;
if (l.inputs > max_inputs) max_inputs = l.inputs;
if (l.outputs > max_outputs) max_outputs = l.outputs;
free_section(s);
n = n->next;
++count;
if(n){
params.h = l.out_h;
params.w = l.out_w;
params.c = l.out_c;
params.inputs = l.outputs;
}
if (l.bflops > 0) bflops += l.bflops;
}
free_list(sections);
net.outputs = get_network_output_size(net);
net.output = get_network_output(net);
printf("Total BFLOPS %5.3f \n", bflops);
if(workspace_size){
//printf("%ld\n", workspace_size);
#ifdef GPU
get_cuda_stream();
get_cuda_memcpy_stream();
if(gpu_index >= 0){
net.workspace = cuda_make_array(0, workspace_size/sizeof(float) + 1);
int size = get_network_input_size(net) * net.batch;
net.input_state_gpu = cuda_make_array(0, size);
if (cudaSuccess == cudaHostAlloc(&net.input_pinned_cpu, size*sizeof(float), cudaHostRegisterMapped)) net.input_pinned_cpu_flag = 1;
else net.input_pinned_cpu = calloc(size, sizeof(float));
// pre-allocate memory for inference on Tensor Cores (fp16)
if (net.cudnn_half) {
*net.max_input16_size = max_inputs;
check_error(cudaMalloc((void **)net.input16_gpu, *net.max_input16_size * sizeof(short))); //sizeof(half)
*net.max_output16_size = max_outputs;
check_error(cudaMalloc((void **)net.output16_gpu, *net.max_output16_size * sizeof(short))); //sizeof(half)
}
}else {
net.workspace = calloc(1, workspace_size);
}
#else
net.workspace = calloc(1, workspace_size);
#endif
}
LAYER_TYPE lt = net.layers[net.n - 1].type;
if ((net.w % 32 != 0 || net.h % 32 != 0) && (lt == YOLO || lt == REGION || lt == DETECTION)) {
printf("\n Warning: width=%d and height=%d in cfg-file must be divisible by 32 for default networks Yolo v1/v2/v3!!! \n\n",
net.w, net.h);
}
return net;
}
parse_convolutional(options, params)主要是cfg配置文件中主要是[convolutional]
size表示卷积层大小,filters是过滤器数量,batch_normalize 表示是否批归一化处理,stride是步长,pad是外围填充层数,activation是**函数的类型。
其它shortcut、route、yolo等类似
5、下面再来看train_network(network net, data d),它在network.c文件中
float train_network(network net, data d)
{
assert(d.X.rows % net.batch == 0);
int batch = net.batch;
int n = d.X.rows / batch;
float *X = calloc(batch*d.X.cols, sizeof(float));
float *y = calloc(batch*d.y.cols, sizeof(float));
int i;
float sum = 0;
for(i = 0; i < n; ++i){
get_next_batch(d, batch, i*batch, X, y);
float err = train_network_datum(net, X, y);//具体训练函数
sum += err;
}
free(X);
free(y);
return (float)sum/(n*batch);
}
6、其中,train_network_datum()细节如下:
float train_network_datum(network net, float *x, float *y)
{
#ifdef GPU
if(gpu_index >= 0) return train_network_datum_gpu(net, x, y);
#endif
network_state state;
*net.seen += net.batch;
state.index = 0;
state.net = net;
state.input = x;
state.delta = 0;
state.truth = y;
state.train = 1;
forward_network(net, state);//前向传播
backward_network(net, state);//反向函数
float error = get_network_cost(net);
if(((*net.seen)/net.batch)%net.subdivisions == 0) update_network(net);
return error;
}
到这来就能基本修改实现自己的功能了。
还有很多细节,有空再更新了!