TensorFlow实践(10)——卷积神经网络模型LeNet5

(一)前 言

卷积神经网络(Convoltional Neural Networks, CNN)是一类包含卷积或相关计算且具有深度结构的前馈神经网络(Feedforward Neural Networks),是深度学习(deep learning)的代表算法之一 。由于卷积神经网络能够进行平移不变分类(shift-invariant classification),因此在文献中也被称为“平移不变人工神经网络(Shift-Invariant Artificial Neural Networks, SIANN)” 。卷积神经网络仿造生物的视知觉(visual perception)机制构建,可以进行监督学习和非监督学习,其隐含层内的卷积参数共享和层间连接的稀疏性使得卷积神经网络能够以较小的计算量对格点化(grid-like topology)特征,例如像素和音频进行学习、有稳定的效果且对数据没有额外的特征工程(feature engineering)要求 。对卷积神经网络的研究始于二十世纪80至90年代,时间延迟网络和LeNet-5是最早被证实有效的卷积神经网络算法;在二十一世纪后,随着数值计算设备的改进,卷积神经网络得到了快速发展,并被大量应用于计算机视觉、自然语言处理等领域。
——引用自百度百科

本节我们要使用TensorFlow实现LeNet5模型,这是一个非常经典的卷机模型,它的结构图如下所示:
TensorFlow实践(10)——卷积神经网络模型LeNet5
LeNet5模型一共包含两个卷积层、两个池化层和一个全连接层

(二)问题描述

使用卷积神经网络识别MNIST手写体数字数据集:
TensorFlow实践(10)——卷积神经网络模型LeNet5

(三)LeNet5模型的TensorFlow实现

(1)模型参数设置

# 设置学习率
learning_rate = 0.01
# 设置训练次数
train_steps = 1000

(2)导入数据

import tensorflow.examples.tutorials.mnist.input_data as input_data
mnist = input_data.read_data_sets('', one_hot = True)

(3)定义相关函数

# 定义卷积层
def conv(input, filter_shape, bias_shape, strides_shape):
    filter = tf.get_variable("filter", filter_shape, initializer= tf.truncated_normal_initializer())
    bias = tf.get_variable("bias", bias_shape, initializer= tf.truncated_normal_initializer())
    conv = tf.nn.conv2d(input, filter, strides= strides_shape, padding= 'SAME')
    output = tf.nn.sigmoid(conv + bias)
    return output

# 定义池化层
def pooling(input, ksize_shape, strides_shape):
    output = tf.nn.max_pool(input, ksize= ksize_shape, strides= strides_shape, padding = 'SAME')
    return output

# 定义全连接层
def connection(input, weight_shape, bias_shape, flat_shape):
    weight = tf.get_variable("weight", weight_shape, initializer= tf.truncated_normal_initializer())
    bias = tf.get_variable("bias", bias_shape, initializer= tf.truncated_normal_initializer())

    flat = tf.reshape(input, flat_shape)
    output = tf.nn.sigmoid(tf.matmul(flat, weight) + bias)
    return output

(4)模型构建

with tf.name_scope('Input'):
    x_data = tf.placeholder(tf.float32, [None, 784])
    y_data = tf.placeholder(tf.float32, [None, 10])
    x_image = tf.reshape(x_data, [-1, 28, 28, 1])

with tf.variable_scope('Conv1'):
    conv1_output = conv(x_image, [5, 5, 1, 6], [6], [1, 1, 1, 1])

with tf.variable_scope('Pooling1'):
    pooling1_output = pooling(conv1_output, [1, 2, 2, 1], [1, 2, 2, 1])

with tf.variable_scope('Conv2'):
    conv2_output = conv(pooling1_output, [5, 5, 6, 16], [16], [1, 1, 1, 1])

with tf.variable_scope('Pooling2'):
    pooling2_output = pooling(conv2_output, [1, 2, 2, 1], [1, 2, 2, 1])

with tf.variable_scope('Conv3'):
    conv3_output = conv(pooling2_output, [5, 5, 16, 120], [120], [1, 1, 1, 1])

with tf.variable_scope('Connection'):
    connection_output = connection(conv3_output, [7*7*120, 80], [80], [-1, 7*7*120])

with tf.name_scope('Output'):
    weight = tf.Variable( tf.truncated_normal([80, 10]),dtype= tf.float32)
    bias = tf.Variable(tf.truncated_normal([10]),dtype= tf.float32)
    y_model = tf.nn.softmax(tf.add(tf.matmul(connection_output, weight), bias))

(5)定义损失函数和训练精度

# 使用交叉熵作为损失函数
with tf.name_scope('Loss'):
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels = y_model, logits = y_data))
    tf.summary.scalar('The variation of the loss', loss)
    
with tf.name_scope('Accuracy'):
    prediction = tf.equal(tf.argmax(y_model, 1), tf.argmax(y_data, 1))
    accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32))
    tf.summary.scalar('The variation of the accuracy', accuracy)

(6)选择优化器及定义训练操作

# 此处选择Adam优化器
with tf.name_scope('Train'):
    train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)

(7)创建会话进行训练

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    x_batch, y_batch = mnist.train.next_batch(50)
    writer = tf.summary.FileWriter("logs/", sess.graph)
    merged = tf.summary.merge_all()
    batch_x, batch_y = mnist.train.next_batch(200)
    a = []
    for _ in range(train_steps):
        sess.run(train_op, feed_dict={x_data: batch_x, y_data: batch_y})
        if _ % 50 == 0:
            print(sess.run(accuracy, feed_dict={x_data: mnist.test.images, y_data: mnist.test.labels}))
            summary, acc = sess.run([merged, accuracy],
                                    feed_dict={x_data: mnist.test.images, y_data: mnist.test.labels})
            a.append(acc)
            writer.add_summary(summary, _)
    writer.close()

    plt.plot(a)
    plt.title('The variation of the acuracy')
    plt.xlabel('The sampling point')
    plt.ylabel('Accuracy')
    plt.tight_layout()
    plt.show()
    plt.savefig('mnist_cnn_acc.png', dpi = 200)

(8)完整代码

import tensorflow as tf
import matplotlib.pyplot as plt
import time
from tensorflow.examples.tutorials.mnist import input_data

# 设置学习率
learning_rate = 0.01
# 设置训练次数
train_steps = 1000

# 定义卷积层
def conv(input, filter_shape, bias_shape, strides_shape):
    filter = tf.get_variable("filter", filter_shape, initializer= tf.truncated_normal_initializer())
    bias = tf.get_variable("bias", bias_shape, initializer= tf.truncated_normal_initializer())
    conv = tf.nn.conv2d(input, filter, strides= strides_shape, padding= 'SAME')
    output = tf.nn.sigmoid(conv + bias)
    return output

# define the pooling layer
def pooling(input, ksize_shape, strides_shape):
    output = tf.nn.max_pool(input, ksize= ksize_shape, strides= strides_shape, padding = 'SAME')
    return output

# define the connection layer
def connection(input, weight_shape, bias_shape, flat_shape):
    weight = tf.get_variable("weight", weight_shape, initializer= tf.truncated_normal_initializer())
    bias = tf.get_variable("bias", bias_shape, initializer= tf.truncated_normal_initializer())

    flat = tf.reshape(input, flat_shape)
    output = tf.nn.sigmoid(tf.matmul(flat, weight) + bias)
    return output

mnist = input_data.read_data_sets('C:/Users/12394/PycharmProjects/Machine Learning/MNIST_data', one_hot = True)

with tf.name_scope('Input'):
    x_data = tf.placeholder(tf.float32, [None, 784])
    y_data = tf.placeholder(tf.float32, [None, 10])
    x_image = tf.reshape(x_data, [-1, 28, 28, 1])

with tf.variable_scope('Conv1'):
    conv1_output = conv(x_image, [5, 5, 1, 6], [6], [1, 1, 1, 1])

with tf.variable_scope('Pooling1'):
    pooling1_output = pooling(conv1_output, [1, 2, 2, 1], [1, 2, 2, 1])

with tf.variable_scope('Conv2'):
    conv2_output = conv(pooling1_output, [5, 5, 6, 16], [16], [1, 1, 1, 1])

with tf.variable_scope('Pooling2'):
    pooling2_output = pooling(conv2_output, [1, 2, 2, 1], [1, 2, 2, 1])

with tf.variable_scope('Conv3'):
    conv3_output = conv(pooling2_output, [5, 5, 16, 120], [120], [1, 1, 1, 1])

with tf.variable_scope('Connection'):
    connection_output = connection(conv3_output, [7*7*120, 80], [80], [-1, 7*7*120])

with tf.name_scope('Output'):
    weight = tf.Variable( tf.truncated_normal([80, 10]),dtype= tf.float32)
    bias = tf.Variable(tf.truncated_normal([10]),dtype= tf.float32)
    y_model = tf.nn.softmax(tf.add(tf.matmul(connection_output, weight), bias))

with tf.name_scope('Loss'):
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels = y_model, logits = y_data))
    tf.summary.scalar('The variation of the loss', loss)

with tf.name_scope('Accuracy'):
    prediction = tf.equal(tf.argmax(y_model, 1), tf.argmax(y_data, 1))
    accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32))
    tf.summary.scalar('The variation of the accuracy', accuracy)

with tf.name_scope('Train'):
    train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    x_batch, y_batch = mnist.train.next_batch(50)
    writer = tf.summary.FileWriter("logs/", sess.graph)
    merged = tf.summary.merge_all()
    batch_x, batch_y = mnist.train.next_batch(200)
    a = []
    for _ in range(train_steps):
        sess.run(train_op, feed_dict={x_data: batch_x, y_data: batch_y})
        if _ % 50 == 0:
            print(sess.run(accuracy, feed_dict={x_data: mnist.test.images, y_data: mnist.test.labels}))
            summary, acc = sess.run([merged, accuracy],
                                    feed_dict={x_data: mnist.test.images, y_data: mnist.test.labels})
            a.append(acc)
            writer.add_summary(summary, _)
    writer.close()
# 绘制训练精度变化图
    plt.plot(a)
    plt.title('The variation of the acuracy')
    plt.xlabel('The sampling point')
    plt.ylabel('Accuracy')
    plt.tight_layout()
    plt.show()
    plt.savefig('mnist_cnn_acc.png', dpi = 200)

(9)Tensorboard可视化

使用tensorboard读取训练日志,得到如下计算图:
TensorFlow实践(10)——卷积神经网络模型LeNet5

(四)总 结

本文介绍了如何使用TensorFlow建立LeNet5卷积神经网络模型,读者可通过修改卷积核大小、池化方法、数据批处理数、训练迭代次数、学习率等因素调整模型,有任何的问题请在评论区留言,我会尽快回复,谢谢支持。