tensorflow系列之_pspnet网络
目标检测领域的实例分割网络 PSPnet,基于像素级的分类,提出了一种金字塔池化模块,金字塔层级的数量和每一层的大小都可以进行调整
主题结构是一个提去特征的基础网络(resnet/mobilenet)+金字塔池化。
项目中为了减小网络参数大小和部署方便,backbone采用的mobilenetv1。
mobilenet_v1.py代码(mobilenet_v1输出 f1,f2,f3,f4,f5不同尺度的特征)
#!/usr/bin/env python # -*- coding: utf-8 -*- # @time: 2020/6/22 19:27 """ 标准的mobilenet_v1 只能接受输入224*224大小的图片 输出 f1,f2,f3,f4,f5不同尺度的特征图 """ import tensorflow as tf import tensorflow.contrib.slim as slim def mobilenet(inputs, num_classes=100, is_training=True, width_multiplier=1, scope='mobilenet'): """ :param inputs: [batch_size,h,w,c] :param num_classes: :param is_training: :param width_multiplier: :param scope: :return: logits: the pre_softmax activations end_points: 包含了特征图的dict """ def _depthwise_separable_conv(inputs, num_pwc_filters, width_multiplier, sc, downsample=False): """ depth-wise separable 卷积层 :param inputs: :param num_pwc_filters: 点卷积和的个数 :param width_multiplier: :param sc:命名空间 :param dowmnsample: :return: """ num_pwc_filters = round(num_pwc_filters * width_multiplier) _stride = 2 if downsample else 1 # 如果下采样就为2,否则为1 # skip pointwise by setting num_outputs=None , # pointwise 卷积的卷积核个数,如果为空,将跳过pointwise卷积的步骤. # 深度可分离卷积 depthwise_conv = slim.separable_convolution2d(inputs, num_outputs=None, stride=_stride, depth_multiplier=1, kernel_size=[3, 3], scope=sc + '/depthwise_conv') bn = slim.batch_norm(depthwise_conv, scope=sc + '/dw_batch_norm') #点卷积 就是1*1的普通卷积 pointwise_conv = slim.conv2d(bn, num_pwc_filters, kernel_size=[1, 1], scope=sc + '/pointwise_conv') bn = slim.batch_norm(pointwise_conv, scope=sc + '/pw_batch_norm') return bn with tf.variable_scope(scope) as sc: end_points_collection=sc.name+'_end_points' with slim.arg_scope([slim.conv2d,slim.separable_convolution2d],activation_fn=None,outputs_collections=[end_points_collection]): with slim.arg_scope([slim.batch_norm],is_training=is_training,activation_fn=tf.nn.relu, fused=True): net=slim.conv2d(inputs,round(32*width_multiplier),[3,3],stride=2,padding= 'SAME',scope='conv_1') # [batch_size,h/2,w/2,32] net=slim.batch_norm(net,scope='conv_1/batch_norm') net=_depthwise_separable_conv(net,64,width_multiplier,sc='conv_ds_2') # [batch_size,h/2,w/2,64] net=_depthwise_separable_conv(net,128,width_multiplier,downsample=True,sc='conv_ds_3')# 下采样 [batch_size,h/4,w/4,128] net=_depthwise_separable_conv(net,128,width_multiplier,sc='conv_ds_4') #[batch_size,h/4,w/4,128] net = _depthwise_separable_conv(net, 256, width_multiplier, downsample=True, sc='conv_ds_5') # 下采样 [batch_size,h/8,w/8,256] net=_depthwise_separable_conv(net,256,width_multiplier,sc='conv_ds_6') #[batch_size,h/8,w/8,256] net = _depthwise_separable_conv(net, 512, width_multiplier, downsample=True, sc='conv_ds_7') # 下采样 # [batch_size,h/16,w/16,256] net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_8') # [batch_size,h/16,w/16,512] net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_9') # [batch_size,h/16,w/16,512] net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_10') # [batch_size,h/16,w/16,512] net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_11')# [batch_size,h/16,w/16,512] net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_12')# [batch_size,h/16,w/16,512] net = _depthwise_separable_conv(net, 1024, width_multiplier, downsample=True, sc='conv_ds_13')# [batch_size,h/32,w/32,1024] net = _depthwise_separable_conv(net, 1024, width_multiplier, sc='conv_ds_14')# [batch_size,h/32,w/32,1024] net=slim.avg_pool2d(net,[7,7],scope='avg_pool_15') # [batch_size,h/32*7,w/32*7,1024] # end_points=slim.utils.convert_collection_to_dict(end_points_collection) # net=tf.squeeze(net,[1,2],name='SpatialSqueeze') # end_points['squeeze']=net # logits=slim.fully_connected(net,num_classes,activation_fn=None,scope='fc_16') # predictions=slim.softmax(logits,scope='Predictions') # # end_points['Logits']=logits # end_points['Predictions']=predictions f1=end_points['mobilenet/conv_ds_2/pointwise_conv'] #[None,112,112,64] f2=end_points['mobilenet/conv_ds_4/pointwise_conv']#[None,56,56,128] f3=end_points['mobilenet/conv_ds_6/pointwise_conv'] #[None,28,28,256] f4=end_points['mobilenet/conv_ds_12/pointwise_conv'] #[None.14,14,512] f5=end_points['mobilenet/conv_ds_14/pointwise_conv'] return inputs, [f1 , f2 , f3 , f4 , f5 ] mobilenet.default_image_size = 224 def mobilenet_arg_scope(weight_decay=0.0): """ 定义默认的scope参数 :param weight_decay: 正则的衰减率 :return: """ with slim.arg_scope( [slim.convolution2d, slim.separable_convolution2d], weights_initializer=slim.initializers.xavier_initializer(), biases_initializer=slim.init_ops.zeros_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay)) as sc: return sc if __name__ == '__main__': batch_size = 5 height, width = 224, 224 num_classes = 100 sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) inputs = tf.random_uniform((batch_size, height, width, 3)) inputs_2, [f1 , f2 , f3 , f4 , f5 ] = mobilenet(inputs, num_classes)
打印出f1,f2,f3,f4,f5的shape
f1: Tensor("mobilenet/conv_ds_2/pointwise_conv/BiasAdd:0", shape=(5, 112, 112, 64), dtype=float32)
f2: Tensor("mobilenet/conv_ds_4/pointwise_conv/BiasAdd:0", shape=(5, 56, 56, 128), dtype=float32)
f3: Tensor("mobilenet/conv_ds_6/pointwise_conv/BiasAdd:0", shape=(5, 28, 28, 256), dtype=float32)
f4: Tensor("mobilenet/conv_ds_12/pointwise_conv/BiasAdd:0", shape=(5, 14, 14, 512), dtype=float32)
f5: Tensor("mobilenet/conv_ds_14/pointwise_conv/BiasAdd:0", shape=(5, 7, 7, 1024), dtype=float32)
PSPnet部分的代码
pspnet_model.py
import tensorflow as tf from tensorflow.contrib import slim from nets.mobilenet_v1 import mobilenet_v1 as get_mobilenet_encoder import numpy as np # # MERGE_AXIS = -1 # 通道维度上链接 def resize_image(input_images,s): """ :param input_images: 4D tensor :param s: 缩放比例 (2,2) # 扩大的倍数 :return: """ h,w=input_images.get_shape().as_list()[1],input_images.get_shape().as_list()[2] h_ratio=s[0] w_ration=s[1] h=int(h*h_ratio) w=int(w*w_ration) images=tf.image.resize_images(input_images,size=(h,w)) return images #进场不同尺度池化的操作 def pool_block( x , pool_factor ,IMAGE_ORDERING = 'NHWC'): """ :param x: 4D (1,224,224,3) :param pool_factor: 2 池化的尺度 :return: """ if IMAGE_ORDERING == 'NHWC': h, w = x.get_shape().as_list()[1], x.get_shape().as_list()[2] elif IMAGE_ORDERING == 'NCHW': h, w = x.get_shape().as_list()[2], x.get_shape().as_list()[3] # strides = [18,18],[9,9],[6,6],[3,3] pool_size = [int(np.round( float(h) / pool_factor)), int(np.round( float(w )/ pool_factor))] strides=pool_size # 进行不同程度的平均 x=slim.avg_pool2d(x,kernel_size=pool_size,stride=strides, padding='SAME') # [1,2,2,3] # x = AveragePooling2D(pool_size , data_format=IMAGE_ORDERING , strides=strides, padding='same')( feats ) # # 进行卷积 x=slim.conv2d(x,512,kernel_size=(1,1),stride=1,padding='SAME') #[1,2,2,512] # x = Conv2D(512, (1 ,1 ), data_format=IMAGE_ORDERING , padding='same' , use_bias=False )( x ) x=slim.batch_norm(x) # x = BatchNormalization()(x) # x=slim.relu(x) x=tf.nn.relu(x) # x = Activation('relu' )(x) x=resize_image( x , strides) # x = resize_image( x , strides , data_format=IMAGE_ORDERING ) return x def pspnet(input_images,num_classes,input_height=576, input_width=576): assert input_height%192 == 0 assert input_width%192 == 0 img_input , levels = get_mobilenet_encoder(inputs=input_images, num_classes=num_classes ) [f1 , f2 , f3 , f4 , f5 ] = levels o = f5 # 对f5进行不同程度的池化 pool_factors = [ 1,2,3,6] pool_outs = [o ] for p in pool_factors: pooled = pool_block( o , p ) pool_outs.append( pooled ) # 连接 o=tf.concat(pool_outs,axis=-1) # o = Concatenate( axis=MERGE_AXIS)(pool_outs ) o=slim.conv2d(o,512,kernel_size=(1,1),stride=1,padding='SAME') # o = ( Conv2D(512, (1, 1), padding='valid', data_format=IMAGE_ORDERING))(o) o=slim.batch_norm(o) # o = ( BatchNormalization())(o) o=resize_image(o,(2,2)) # o = resize_image(o,(2,2),data_format=IMAGE_ORDERING) o=tf.concat([o,f4],axis=-1) # o = Concatenate( axis=MERGE_AXIS)([o,f4]) o = slim.conv2d(o,512, kernel_size=(1, 1), stride=1, padding='SAME') # o = ( Conv2D(512, (1, 1), padding='valid', data_format=IMAGE_ORDERING))(o) o = slim.batch_norm(o) # o = ( BatchNormalization())(o) # o = Activation('relu' )(o) o=tf.nn.relu(o) pool_outs = [o] # 对f4进行不同程度的池化 for p in pool_factors: pooled = pool_block( o , p ) pool_outs.append( pooled ) # 连接 o = tf.concat(pool_outs, axis=-1) # o = Concatenate( axis=MERGE_AXIS)(pool_outs ) o = slim.conv2d(o, 512, kernel_size=(1, 1), stride=1, padding='SAME') # o = ( Conv2D(512, (1, 1), padding='valid', data_format=IMAGE_ORDERING))(o) # o = ( BatchNormalization())(o) o=slim.batch_norm(o) o = resize_image(o,(2,2)) o = tf.concat([o, f3], axis=-1) # o = Concatenate( axis=MERGE_AXIS)([o,f3]) o = slim.conv2d(o, 512, kernel_size=(1, 1), stride=1, padding='SAME') # o = ( Conv2D(512, (1, 1), padding='valid', data_format=IMAGE_ORDERING))(o) o = slim.batch_norm(o) # o = ( BatchNormalization())(o) # o = Activation('relu' )(o) o = tf.nn.relu(o) pool_outs = [o ] # 对f3进行不同程度的池化 for p in pool_factors: pooled = pool_block( o , p ) pool_outs.append( pooled ) # 连接 o = tf.concat(pool_outs, axis=-1) # o = Concatenate( axis=MERGE_AXIS)(pool_outs ) # 卷积 o = slim.conv2d(o, 512, kernel_size=(1, 1), stride=1, padding='SAME') # o = Conv2D(512, (1,1), data_format=IMAGE_ORDERING, use_bias=False )(o) o = slim.batch_norm(o) # o = ( BatchNormalization())(o) # o = Activation('relu' )(o) o = tf.nn.relu(o) # 此时输出为[144,144,nclasses] o=slim.conv2d(o,num_classes,kernel_size=(3,3),stride=1,padding='SAME') # o = Conv2D( n_classes,(3,3),data_format=IMAGE_ORDERING, padding='same' )(o) o = resize_image(o,(2,2)) # print('o shape',o.get_shape()) o=tf.reshape(o,(-1,(input_height*input_width)//16,num_classes)) # o=slim.softmax(o) # o = Reshape((-1,n_classes))(o) # o = Softmax()(o) # model = Model(img_input,o) out_put=o return out_put if __name__ == '__main__': input_images=tf.ones((1,576,576,3),dtype=tf.float32) # images=resize_image(input_images,s=(2,2)) out_put=pspnet(input_images,num_classes=100,input_height=576, input_width=576)