DeepLearning: 搭建多层神经网络

一步一步搭建多层神经网络识别猫

导入包

import numpy as np
import h5py
import matplotlib.pyplot as plt

工具函数

sigmoid

def sigmoid(Z):
    '''
    cache - as activation_cache
    '''
    
    A=1/(1+np.exp(-Z))
    cache=Z
    
    return A,cache

sigmoid backward

def sigmoid_backward(dA,cache):
    '''
    dA - at present dA
    Z - at present Z
    cache - include dZ
    '''
    Z=cache
    s=1/(1+np.exp(-Z))
    dZ=dA*s*(1-s)
    
    assert(dZ.shape==Z.shape)
    
    return dZ

relu

def relu(Z):
    '''
    cache - as activation_cache
    '''
    A=np.maximum(0,Z)
    
    assert(A.shape == Z.shape)
    
    cache=Z
    
    return A,cache

relu backward

def relu_backward(dA,cache):
    
    Z=cache
    dZ=np.array(dA,copy=True)
    
    dZ[Z<=0]=0
    
    assert(dZ.shape==Z.shape)
    
    return dZ

初始化参数

def init_params(layer_dims):
    '''
    layer_dims - a tuple:[n0,...,nL] include number of node of layer for 0 to L
    params - include params of layer for 1 to L
    '''
    np.random.seed(3)
    L=len(layer_dims) 
    params={}
        
    for l in range(1,L):
        params['W'+str(l)]=np.random.randn(layer_dims[l],layer_dims[l-1])/np.sqrt(layer_dims[l-1])
        params['b'+str(l)]=np.zeros((layer_dims[l],1))
        
        assert(params['W'+str(l)].shape==(layer_dims[l],layer_dims[l-1]))
        assert(params['b'+str(l)].shape==(layer_dims[l],1))
    
    return params

前向传播

def forward_weight(A_prev,W,b):
    '''
    A_prev - A of prev layer 
    W - W of this layer 
    b - b of this layer 
    
    Z - weight of this layer
    cache - include A_prev,W,b
    '''
    Z=np.dot(W,A_prev)+b
    
    assert(Z.shape==(W.shape[0],A_prev.shape[1]))
    cache=(A_prev,W,b)
    
    return Z,cache

def forward_activation(A_prev,W,b,activation):
    '''
    A_prev - A of prev layer 
    W - W of this layer 
    b - b of this layer 
    activation - choice func of activation
    
    A - A of this layer at 
    
    cache -
            weight_cache include A_prev,W,b
            activation_cache include Z
    '''
    Z,weight_cache=forward_weight(A_prev,W,b)
    
    if activation=='sigmoid':
        A,activation_cache=sigmoid(Z)
    elif activation=='relu':
        A,activation_cache=relu(Z)
    cache=(weight_cache,activation_cache)
    
    return A,cache

def forward_propagation(X,params):
    '''
    X - train data
    params - params of layer for 1 to L
    
    AL - A of last layer (output)
    caches - include cache of layer for 1 to L
    '''
    
    caches=[]
    A=X
    L=len(params)//2
    
    for l in range(1,L):
        A_prev=A
        A,cache=forward_activation(A_prev,params['W'+str(l)],params['b'+str(l)],'relu')
        caches.append(cache)
    
    AL,cache=forward_activation(A,params['W'+str(L)],params['b'+str(L)],'sigmoid')
    caches.append(cache)

    assert(AL.shape==(1,X.shape[1]))
    
    return AL,caches

计算损失

def compute_cost(AL,Y):
    '''
    AL - A of last layer
    Y - label of train data
    cost - a value
    '''
    
    m=Y.shape[1]
    cost = -np.sum(np.multiply(np.log(AL),Y) + np.multiply(np.log(1 - AL), 1 - Y)) / m
    cost=np.squeeze(cost)
    
    assert(cost.shape==())
    
    return cost

反向传播

def back_propagation(dZ,cache):
    '''
    dZ - at present dZ
    cache - include A_prev,W,b
    '''
    
    A_prev,W,b=cache
    m=A_prev.shape[1]
    
    dW=np.dot(dZ,A_prev.T)/m
    db=dZ.sum(axis=1,keepdims=True)/m
    dA_prev=np.dot(W.T,dZ)
    
    assert(dW.shape==W.shape)
    assert(db.shape==b.shape)
    assert(dA_prev.shape==A_prev.shape)
    
    return dA_prev,dW,db

def back_activation(dA,cache,activation):
    '''
    dA - dA of this layer
    cache - include weight_cache and acvation_cache
    activation - choice the func of activation
    '''
    weight_cache,activation_cache=cache
    
    if activation=='sigmoid':
        dZ=sigmoid_backward(dA,activation_cache)
    elif activation=='relu':
        dZ=relu_backward(dA,activation_cache)
        
    dA_prev,dW,db=back_propagation(dZ,weight_cache)
    
    return dA_prev,dW,db

计算梯度

def compute_grads(AL,Y,caches):
    
    grads={}
    L=len(caches)
    m=AL.shape[1]
    Y=Y.reshape(AL.shape)

    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    
    current_cache=caches[L-1]
    grads["dA"+str(L)],grads["dW"+str(L)],grads["db"+str(L)]=back_activation(dAL,current_cache,"sigmoid")

    for l in reversed(range(L-1)):
        current_cache=caches[l]
        dA_prev_temp,dW_temp,db_temp=back_activation(grads["dA"+str(l+2)],current_cache,"relu")
        grads["dA"+str(l+1)]=dA_prev_temp
        grads["dW"+str(l+1)]=dW_temp
        grads["db"+str(l+1)]=db_temp
        
    return grads

更新参数

def update_params(params,grads,learning_rate):
    
    L=len(params)//2
    
    for l in range(L):
        
        params["W"+str(l+1)]=params["W"+str(l+1)] - learning_rate * grads['dW'+str(l+1)]
        params["b"+str(l+1)]=params["b"+str(l+1)] - learning_rate * grads['db'+str(l+1)]
        
    return params

搭建L层神经网络

def L_layer_model(X,Y,layers_dims,n_iterations,learning_rate,print_cost=False,plot_image=False):
    
    np.random.seed(1)
    
    costs=[]
    params=init_params(layers_dims)
    
    for iters in range(n_iterations):
        
        AL,caches=forward_propagation(X,params)
        cost=compute_cost(AL,Y)
        costs.append(cost)

        grads=compute_grads(AL,Y,caches)
        params=update_params(params,grads,learning_rate)

        if print_cost:
            if iters%100==0:
                print("第{}次迭代,损失为{}".format(iters,cost))
                
    if plot_image:
        plt.figure(figsize=(12,6))
        plt.plot(range(n_iterations),costs)
        plt.title("Learning rate="+str(learning_rate))
        plt.xlabel("cost")
        plt.ylabel("iterations")
        plt.show()
    
    return params

预测

def predict(X,params,threshold):
    
    AL,caches=forward_propagation(X,params)
    y_pred=np.where(AL>threshold,1,0)
    
    return y_pred

计算准确率

def score(y_pred,Y,threshold):
    
    scores=len(Y[y_pred-Y==0])/Y.shape[1]
    
    return scores

加载数据

def load_dataset():
    train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
    train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels

    test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
    test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels

    classes = np.array(test_dataset["list_classes"][:]) # the list of classes

    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))

    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

处理数据

train_set_x_orig , train_set_y , test_set_x_orig , test_set_y , classes = load_dataset()

train_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T 
test_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T

train_x = train_x_flatten / 255
train_y = train_set_y
test_x = test_x_flatten
test_y = test_set_y

训练

# 设置超参数
layers_dims=[12288,20,7,5,1]
n_iterations=2500
learning_rate=0.0075
threshold=0.5

params=L_layer_model(train_x,train_y,layers_dims,n_iterations,learning_rate,print_cost=True,plot_image=True)

第0次迭代,损失为0.715731513413713
第100次迭代,损失为0.6747377593469114
第200次迭代,损失为0.6603365433622127
第300次迭代,损失为0.6462887802148751
第400次迭代,损失为0.6298131216927773
第500次迭代,损失为0.6060056229265339
第600次迭代,损失为0.5690041263975134
第700次迭代,损失为0.519796535043806
第800次迭代,损失为0.46415716786282285
第900次迭代,损失为0.40842030048298916
第1000次迭代,损失为0.37315499216069037
第1100次迭代,损失为0.3057237457304711
第1200次迭代,损失为0.26810152847740837
第1300次迭代,损失为0.23872474827672607
第1400次迭代,损失为0.20632263257914712
第1500次迭代,损失为0.17943886927493552
第1600次迭代,损失为0.15798735818801324
第1700次迭代,损失为0.14240413012274078
第1800次迭代,损失为0.128651659978866
第1900次迭代,损失为0.11244314998157877
第2000次迭代,损失为0.08505631034970913
第2100次迭代,损失为0.05758391198608711
第2200次迭代,损失为0.04456753454695279
第2300次迭代,损失为0.038082751665983655
第2400次迭代,损失为0.034410749018407606

DeepLearning: 搭建多层神经网络

train_y_pred=predict(train_x,params,threshold)
test_y_pred=predict(test_x,params,threshold)

train_score=score(train_y_pred,train_y,threshold)
test_score=score(test_y_pred,test_y,threshold)

print("训练集准确率为:",train_score)
print("测试集准确率为:",test_score)

训练集准确率为: 0.9952153110047847
测试集准确率为: 0.78

打印出分类错误的图片

def print_mistake_imag(img_data,y_true,y_pred):
    
    plt.figure(figsize=(12,6))
    y_m=y_pred-y_true
    mistake_index=np.where(y_m==1)
    width=len(mistake_index[1])
    classes=['non-cat','cat']
    
    for i,m_index in enumerate(mistake_index[1]):
        plt.subplot(1,width,i+1)
        plt.imshow(img_data[m_index])
        string='Prediction:'+classes[y_pred[0][m_index]]+'\n'+'True:'+classes[y_true[0][m_index]]
        plt.title(string)

print_mistake_imag(test_set_x_orig,test_y_pred,test_y)

DeepLearning: 搭建多层神经网络

用模型测试自己的图片

def discern_cat(impath,params):
    classes=['non-cat','cat']
    img=cv2.imread(impath)
    img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    img=cv2.resize(img,(64,64))
    plt.figure(figsize=(5,5))
    plt.imshow(img)
    
    img_array=img.reshape(-1,1)
    reason=predict(img_array,params,0.5)[0]
    print('This is a '+classes[int(reason)])

import cv2

path='my_image/1.jpg'
discern_cat(path,params)

This is a cat

DeepLearning: 搭建多层神经网络

完整代码:https://github.com/TaoistNie/DeepLearning/tree/master/DNN

DeepLearning: 搭建多层神经网络

一步一步搭建多层神经网络识别猫

导入包

工具函数

sigmoid

sigmoid backward

relu

relu backward

初始化参数

前向传播

计算损失

反向传播

计算梯度

更新参数

搭建L层神经网络

预测

计算准确率

加载数据

处理数据

训练

打印出分类错误的图片

用模型测试自己的图片

相关推荐