tensorflow系列之_pspnet网络

目标检测领域的实例分割网络 PSPnet,基于像素级的分类,提出了一种金字塔池化模块,金字塔层级的数量和每一层的大小都可以进行调整

tensorflow系列之_pspnet网络

主题结构是一个提去特征的基础网络(resnet/mobilenet)+金字塔池化。

项目中为了减小网络参数大小和部署方便,backbone采用的mobilenetv1。

mobilenet_v1.py代码(mobilenet_v1输出 f1,f2,f3,f4,f5不同尺度的特征)

#!/usr/bin/env python 
# -*- coding: utf-8 -*-
# @time: 2020/6/22 19:27

"""
标准的mobilenet_v1
只能接受输入224*224大小的图片
输出 f1,f2,f3,f4,f5不同尺度的特征图

"""

import tensorflow as tf
import tensorflow.contrib.slim as slim

def mobilenet(inputs,
              num_classes=100,
              is_training=True,
              width_multiplier=1,
              scope='mobilenet'):
    """

    :param inputs: [batch_size,h,w,c]
    :param num_classes:
    :param is_training:
    :param width_multiplier:
    :param scope:
    :return: logits: the pre_softmax activations
     end_points: 包含了特征图的dict
    """
    def _depthwise_separable_conv(inputs,
                                  num_pwc_filters,
                                  width_multiplier,
                                  sc,
                                  downsample=False):
        """
        depth-wise separable 卷积层
        :param inputs:
        :param num_pwc_filters: 点卷积和的个数
        :param width_multiplier:
        :param sc:命名空间
        :param dowmnsample:
        :return:
        """
        num_pwc_filters = round(num_pwc_filters * width_multiplier)
        _stride = 2 if downsample else 1 # 如果下采样就为2,否则为1

        # skip pointwise by setting num_outputs=None ,
        # pointwise 卷积的卷积核个数,如果为空,将跳过pointwise卷积的步骤.
        # 深度可分离卷积
        depthwise_conv = slim.separable_convolution2d(inputs,
                                                      num_outputs=None,
                                                      stride=_stride,
                                                      depth_multiplier=1,
                                                      kernel_size=[3, 3],
                                                      scope=sc + '/depthwise_conv')

        bn = slim.batch_norm(depthwise_conv, scope=sc + '/dw_batch_norm')

        #点卷积 就是1*1的普通卷积
        pointwise_conv = slim.conv2d(bn,
                                            num_pwc_filters,
                                            kernel_size=[1, 1],
                                            scope=sc + '/pointwise_conv')
        bn = slim.batch_norm(pointwise_conv, scope=sc + '/pw_batch_norm')
        return bn

    with tf.variable_scope(scope) as sc:
        end_points_collection=sc.name+'_end_points'
        with slim.arg_scope([slim.conv2d,slim.separable_convolution2d],activation_fn=None,outputs_collections=[end_points_collection]):
            with slim.arg_scope([slim.batch_norm],is_training=is_training,activation_fn=tf.nn.relu,
                                fused=True):

                net=slim.conv2d(inputs,round(32*width_multiplier),[3,3],stride=2,padding=
                                'SAME',scope='conv_1') # [batch_size,h/2,w/2,32]
                net=slim.batch_norm(net,scope='conv_1/batch_norm')

                net=_depthwise_separable_conv(net,64,width_multiplier,sc='conv_ds_2') # [batch_size,h/2,w/2,64]
                net=_depthwise_separable_conv(net,128,width_multiplier,downsample=True,sc='conv_ds_3')# 下采样 [batch_size,h/4,w/4,128]
                net=_depthwise_separable_conv(net,128,width_multiplier,sc='conv_ds_4') #[batch_size,h/4,w/4,128]


                net = _depthwise_separable_conv(net, 256, width_multiplier, downsample=True, sc='conv_ds_5')  # 下采样 [batch_size,h/8,w/8,256]
                net=_depthwise_separable_conv(net,256,width_multiplier,sc='conv_ds_6') #[batch_size,h/8,w/8,256]
                net = _depthwise_separable_conv(net, 512, width_multiplier, downsample=True, sc='conv_ds_7')  # 下采样 # [batch_size,h/16,w/16,256]

                net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_8') # [batch_size,h/16,w/16,512]
                net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_9') # [batch_size,h/16,w/16,512]
                net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_10') # [batch_size,h/16,w/16,512]
                net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_11')# [batch_size,h/16,w/16,512]
                net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_12')# [batch_size,h/16,w/16,512]

                net = _depthwise_separable_conv(net, 1024, width_multiplier, downsample=True, sc='conv_ds_13')# [batch_size,h/32,w/32,1024]
                net = _depthwise_separable_conv(net, 1024, width_multiplier, sc='conv_ds_14')# [batch_size,h/32,w/32,1024]

                net=slim.avg_pool2d(net,[7,7],scope='avg_pool_15') # [batch_size,h/32*7,w/32*7,1024]
         #
            end_points=slim.utils.convert_collection_to_dict(end_points_collection)
            # net=tf.squeeze(net,[1,2],name='SpatialSqueeze')
            # end_points['squeeze']=net
            # logits=slim.fully_connected(net,num_classes,activation_fn=None,scope='fc_16')
            # predictions=slim.softmax(logits,scope='Predictions')
         #
            # end_points['Logits']=logits
            # end_points['Predictions']=predictions
            f1=end_points['mobilenet/conv_ds_2/pointwise_conv'] #[None,112,112,64]
            f2=end_points['mobilenet/conv_ds_4/pointwise_conv']#[None,56,56,128]
            f3=end_points['mobilenet/conv_ds_6/pointwise_conv'] #[None,28,28,256]
            f4=end_points['mobilenet/conv_ds_12/pointwise_conv']  #[None.14,14,512]
            f5=end_points['mobilenet/conv_ds_14/pointwise_conv']

        return inputs,  [f1 , f2 , f3 , f4 , f5 ]
mobilenet.default_image_size = 224

def mobilenet_arg_scope(weight_decay=0.0):
    """
    定义默认的scope参数
    :param weight_decay:  正则的衰减率
    :return:
    """
    with slim.arg_scope(
            [slim.convolution2d, slim.separable_convolution2d],
            weights_initializer=slim.initializers.xavier_initializer(),
            biases_initializer=slim.init_ops.zeros_initializer(),
            weights_regularizer=slim.l2_regularizer(weight_decay)) as sc:
        return sc


if __name__ == '__main__':
    batch_size = 5
    height, width = 224, 224
    num_classes = 100
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())
    inputs = tf.random_uniform((batch_size, height, width, 3))
    inputs_2,  [f1 , f2 , f3 , f4 , f5 ] = mobilenet(inputs, num_classes)

 

打印出f1,f2,f3,f4,f5的shape

f1: Tensor("mobilenet/conv_ds_2/pointwise_conv/BiasAdd:0", shape=(5, 112, 112, 64), dtype=float32)
f2: Tensor("mobilenet/conv_ds_4/pointwise_conv/BiasAdd:0", shape=(5, 56, 56, 128), dtype=float32)
f3: Tensor("mobilenet/conv_ds_6/pointwise_conv/BiasAdd:0", shape=(5, 28, 28, 256), dtype=float32)
f4: Tensor("mobilenet/conv_ds_12/pointwise_conv/BiasAdd:0", shape=(5, 14, 14, 512), dtype=float32)
f5: Tensor("mobilenet/conv_ds_14/pointwise_conv/BiasAdd:0", shape=(5, 7, 7, 1024), dtype=float32)

 

 

PSPnet部分的代码

pspnet_model.py

import tensorflow as tf
from tensorflow.contrib import  slim
from nets.mobilenet_v1 import mobilenet_v1 as  get_mobilenet_encoder
import  numpy as np


#
# MERGE_AXIS = -1 # 通道维度上链接


def resize_image(input_images,s):
   """

   :param input_images: 4D tensor
   :param s: 缩放比例 (2,2) # 扩大的倍数
   :return:
   """
   h,w=input_images.get_shape().as_list()[1],input_images.get_shape().as_list()[2]
   h_ratio=s[0]
   w_ration=s[1]
   h=int(h*h_ratio)
   w=int(w*w_ration)
   images=tf.image.resize_images(input_images,size=(h,w))

   return images

#进场不同尺度池化的操作
def pool_block( x , pool_factor ,IMAGE_ORDERING = 'NHWC'):
   """

   :param x: 4D (1,224,224,3)
   :param pool_factor: 2 池化的尺度
   :return:
   """

   if IMAGE_ORDERING == 'NHWC':
      h, w = x.get_shape().as_list()[1], x.get_shape().as_list()[2]
   elif IMAGE_ORDERING == 'NCHW':
      h, w = x.get_shape().as_list()[2], x.get_shape().as_list()[3]

   # strides = [18,18],[9,9],[6,6],[3,3]
   pool_size = [int(np.round( float(h) /  pool_factor)), int(np.round(  float(w )/  pool_factor))]
   strides=pool_size
   # 进行不同程度的平均
   x=slim.avg_pool2d(x,kernel_size=pool_size,stride=strides,  padding='SAME') # [1,2,2,3]
   # x = AveragePooling2D(pool_size , data_format=IMAGE_ORDERING , strides=strides, padding='same')( feats )
   #
   # 进行卷积
   x=slim.conv2d(x,512,kernel_size=(1,1),stride=1,padding='SAME') #[1,2,2,512]
   # x = Conv2D(512, (1 ,1 ), data_format=IMAGE_ORDERING , padding='same' , use_bias=False )( x )
   x=slim.batch_norm(x)
   # x = BatchNormalization()(x)
   # x=slim.relu(x)
   x=tf.nn.relu(x)
   # x = Activation('relu' )(x)

   x=resize_image( x , strides)

   # x = resize_image( x , strides , data_format=IMAGE_ORDERING )

   return x


def pspnet(input_images,num_classes,input_height=576, input_width=576):

   assert input_height%192 == 0
   assert input_width%192 == 0

   img_input , levels = get_mobilenet_encoder(inputs=input_images,
                                  num_classes=num_classes
                                   )
   [f1 , f2 , f3 , f4 , f5 ] = levels 

   o = f5
   # 对f5进行不同程度的池化
   pool_factors = [ 1,2,3,6]
   pool_outs = [o ]
   for p in pool_factors:
      pooled = pool_block(  o , p  )
      pool_outs.append( pooled )
   # 连接
   o=tf.concat(pool_outs,axis=-1)
   # o = Concatenate( axis=MERGE_AXIS)(pool_outs )
   o=slim.conv2d(o,512,kernel_size=(1,1),stride=1,padding='SAME')
   # o = ( Conv2D(512, (1, 1), padding='valid', data_format=IMAGE_ORDERING))(o)
   o=slim.batch_norm(o)
   # o = ( BatchNormalization())(o)
   o=resize_image(o,(2,2))
   # o = resize_image(o,(2,2),data_format=IMAGE_ORDERING)
   o=tf.concat([o,f4],axis=-1)
   # o = Concatenate( axis=MERGE_AXIS)([o,f4])
   o = slim.conv2d(o,512, kernel_size=(1, 1), stride=1, padding='SAME')
   # o = ( Conv2D(512, (1, 1), padding='valid', data_format=IMAGE_ORDERING))(o)
   o = slim.batch_norm(o)
   # o = ( BatchNormalization())(o)
   # o = Activation('relu' )(o)
   o=tf.nn.relu(o)
   pool_outs = [o]
   # 对f4进行不同程度的池化
   for p in pool_factors:
      pooled = pool_block(  o , p  )
      pool_outs.append( pooled )
   # 连接
   o = tf.concat(pool_outs, axis=-1)
   # o = Concatenate( axis=MERGE_AXIS)(pool_outs )
   o = slim.conv2d(o, 512, kernel_size=(1, 1), stride=1, padding='SAME')
   # o = ( Conv2D(512, (1, 1), padding='valid', data_format=IMAGE_ORDERING))(o)
   # o = ( BatchNormalization())(o)
   o=slim.batch_norm(o)
   o = resize_image(o,(2,2))

   o = tf.concat([o, f3], axis=-1)
   # o = Concatenate( axis=MERGE_AXIS)([o,f3])
   o = slim.conv2d(o, 512, kernel_size=(1, 1), stride=1, padding='SAME')
   # o = ( Conv2D(512, (1, 1), padding='valid', data_format=IMAGE_ORDERING))(o)
   o = slim.batch_norm(o)
   # o = ( BatchNormalization())(o)
   # o = Activation('relu' )(o)
   o = tf.nn.relu(o)
   pool_outs = [o ]
   # 对f3进行不同程度的池化
   for p in pool_factors:
      pooled = pool_block(  o , p  )
      pool_outs.append( pooled )
   # 连接
   o = tf.concat(pool_outs, axis=-1)
   # o = Concatenate( axis=MERGE_AXIS)(pool_outs )

   # 卷积
   o = slim.conv2d(o, 512, kernel_size=(1, 1), stride=1, padding='SAME')
   # o = Conv2D(512, (1,1), data_format=IMAGE_ORDERING, use_bias=False )(o)
   o = slim.batch_norm(o)
   # o = ( BatchNormalization())(o)
   # o = Activation('relu' )(o)
   o = tf.nn.relu(o)

   # 此时输出为[144,144,nclasses]
   o=slim.conv2d(o,num_classes,kernel_size=(3,3),stride=1,padding='SAME')
   # o = Conv2D( n_classes,(3,3),data_format=IMAGE_ORDERING, padding='same' )(o)
   o = resize_image(o,(2,2))
   # print('o shape',o.get_shape())
   o=tf.reshape(o,(-1,(input_height*input_width)//16,num_classes))
   # o=slim.softmax(o)
   # o = Reshape((-1,n_classes))(o)
   # o = Softmax()(o)
   # model = Model(img_input,o)
   out_put=o
   return out_put



if __name__ == '__main__':

   input_images=tf.ones((1,576,576,3),dtype=tf.float32)
   # images=resize_image(input_images,s=(2,2))
   out_put=pspnet(input_images,num_classes=100,input_height=576, input_width=576)