DeepLearning: 搭建多层神经网络
一步一步搭建多层神经网络识别猫
导入包
import numpy as np
import h5py
import matplotlib.pyplot as plt
工具函数
sigmoid
def sigmoid(Z):
'''
cache - as activation_cache
'''
A=1/(1+np.exp(-Z))
cache=Z
return A,cache
sigmoid backward
def sigmoid_backward(dA,cache):
'''
dA - at present dA
Z - at present Z
cache - include dZ
'''
Z=cache
s=1/(1+np.exp(-Z))
dZ=dA*s*(1-s)
assert(dZ.shape==Z.shape)
return dZ
relu
def relu(Z):
'''
cache - as activation_cache
'''
A=np.maximum(0,Z)
assert(A.shape == Z.shape)
cache=Z
return A,cache
relu backward
def relu_backward(dA,cache):
Z=cache
dZ=np.array(dA,copy=True)
dZ[Z<=0]=0
assert(dZ.shape==Z.shape)
return dZ
初始化参数
def init_params(layer_dims):
'''
layer_dims - a tuple:[n0,...,nL] include number of node of layer for 0 to L
params - include params of layer for 1 to L
'''
np.random.seed(3)
L=len(layer_dims)
params={}
for l in range(1,L):
params['W'+str(l)]=np.random.randn(layer_dims[l],layer_dims[l-1])/np.sqrt(layer_dims[l-1])
params['b'+str(l)]=np.zeros((layer_dims[l],1))
assert(params['W'+str(l)].shape==(layer_dims[l],layer_dims[l-1]))
assert(params['b'+str(l)].shape==(layer_dims[l],1))
return params
前向传播
def forward_weight(A_prev,W,b):
'''
A_prev - A of prev layer
W - W of this layer
b - b of this layer
Z - weight of this layer
cache - include A_prev,W,b
'''
Z=np.dot(W,A_prev)+b
assert(Z.shape==(W.shape[0],A_prev.shape[1]))
cache=(A_prev,W,b)
return Z,cache
def forward_activation(A_prev,W,b,activation):
'''
A_prev - A of prev layer
W - W of this layer
b - b of this layer
activation - choice func of activation
A - A of this layer at
cache -
weight_cache include A_prev,W,b
activation_cache include Z
'''
Z,weight_cache=forward_weight(A_prev,W,b)
if activation=='sigmoid':
A,activation_cache=sigmoid(Z)
elif activation=='relu':
A,activation_cache=relu(Z)
cache=(weight_cache,activation_cache)
return A,cache
def forward_propagation(X,params):
'''
X - train data
params - params of layer for 1 to L
AL - A of last layer (output)
caches - include cache of layer for 1 to L
'''
caches=[]
A=X
L=len(params)//2
for l in range(1,L):
A_prev=A
A,cache=forward_activation(A_prev,params['W'+str(l)],params['b'+str(l)],'relu')
caches.append(cache)
AL,cache=forward_activation(A,params['W'+str(L)],params['b'+str(L)],'sigmoid')
caches.append(cache)
assert(AL.shape==(1,X.shape[1]))
return AL,caches
计算损失
def compute_cost(AL,Y):
'''
AL - A of last layer
Y - label of train data
cost - a value
'''
m=Y.shape[1]
cost = -np.sum(np.multiply(np.log(AL),Y) + np.multiply(np.log(1 - AL), 1 - Y)) / m
cost=np.squeeze(cost)
assert(cost.shape==())
return cost
反向传播
def back_propagation(dZ,cache):
'''
dZ - at present dZ
cache - include A_prev,W,b
'''
A_prev,W,b=cache
m=A_prev.shape[1]
dW=np.dot(dZ,A_prev.T)/m
db=dZ.sum(axis=1,keepdims=True)/m
dA_prev=np.dot(W.T,dZ)
assert(dW.shape==W.shape)
assert(db.shape==b.shape)
assert(dA_prev.shape==A_prev.shape)
return dA_prev,dW,db
def back_activation(dA,cache,activation):
'''
dA - dA of this layer
cache - include weight_cache and acvation_cache
activation - choice the func of activation
'''
weight_cache,activation_cache=cache
if activation=='sigmoid':
dZ=sigmoid_backward(dA,activation_cache)
elif activation=='relu':
dZ=relu_backward(dA,activation_cache)
dA_prev,dW,db=back_propagation(dZ,weight_cache)
return dA_prev,dW,db
计算梯度
def compute_grads(AL,Y,caches):
grads={}
L=len(caches)
m=AL.shape[1]
Y=Y.reshape(AL.shape)
dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
current_cache=caches[L-1]
grads["dA"+str(L)],grads["dW"+str(L)],grads["db"+str(L)]=back_activation(dAL,current_cache,"sigmoid")
for l in reversed(range(L-1)):
current_cache=caches[l]
dA_prev_temp,dW_temp,db_temp=back_activation(grads["dA"+str(l+2)],current_cache,"relu")
grads["dA"+str(l+1)]=dA_prev_temp
grads["dW"+str(l+1)]=dW_temp
grads["db"+str(l+1)]=db_temp
return grads
更新参数
def update_params(params,grads,learning_rate):
L=len(params)//2
for l in range(L):
params["W"+str(l+1)]=params["W"+str(l+1)] - learning_rate * grads['dW'+str(l+1)]
params["b"+str(l+1)]=params["b"+str(l+1)] - learning_rate * grads['db'+str(l+1)]
return params
搭建L层神经网络
def L_layer_model(X,Y,layers_dims,n_iterations,learning_rate,print_cost=False,plot_image=False):
np.random.seed(1)
costs=[]
params=init_params(layers_dims)
for iters in range(n_iterations):
AL,caches=forward_propagation(X,params)
cost=compute_cost(AL,Y)
costs.append(cost)
grads=compute_grads(AL,Y,caches)
params=update_params(params,grads,learning_rate)
if print_cost:
if iters%100==0:
print("第{}次迭代,损失为{}".format(iters,cost))
if plot_image:
plt.figure(figsize=(12,6))
plt.plot(range(n_iterations),costs)
plt.title("Learning rate="+str(learning_rate))
plt.xlabel("cost")
plt.ylabel("iterations")
plt.show()
return params
预测
def predict(X,params,threshold):
AL,caches=forward_propagation(X,params)
y_pred=np.where(AL>threshold,1,0)
return y_pred
计算准确率
def score(y_pred,Y,threshold):
scores=len(Y[y_pred-Y==0])/Y.shape[1]
return scores
加载数据
def load_dataset():
train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")
train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels
test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r")
test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels
classes = np.array(test_dataset["list_classes"][:]) # the list of classes
train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes
处理数据
train_set_x_orig , train_set_y , test_set_x_orig , test_set_y , classes = load_dataset()
train_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T
test_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T
train_x = train_x_flatten / 255
train_y = train_set_y
test_x = test_x_flatten
test_y = test_set_y
训练
# 设置超参数
layers_dims=[12288,20,7,5,1]
n_iterations=2500
learning_rate=0.0075
threshold=0.5
params=L_layer_model(train_x,train_y,layers_dims,n_iterations,learning_rate,print_cost=True,plot_image=True)
第0次迭代,损失为0.715731513413713
第100次迭代,损失为0.6747377593469114
第200次迭代,损失为0.6603365433622127
第300次迭代,损失为0.6462887802148751
第400次迭代,损失为0.6298131216927773
第500次迭代,损失为0.6060056229265339
第600次迭代,损失为0.5690041263975134
第700次迭代,损失为0.519796535043806
第800次迭代,损失为0.46415716786282285
第900次迭代,损失为0.40842030048298916
第1000次迭代,损失为0.37315499216069037
第1100次迭代,损失为0.3057237457304711
第1200次迭代,损失为0.26810152847740837
第1300次迭代,损失为0.23872474827672607
第1400次迭代,损失为0.20632263257914712
第1500次迭代,损失为0.17943886927493552
第1600次迭代,损失为0.15798735818801324
第1700次迭代,损失为0.14240413012274078
第1800次迭代,损失为0.128651659978866
第1900次迭代,损失为0.11244314998157877
第2000次迭代,损失为0.08505631034970913
第2100次迭代,损失为0.05758391198608711
第2200次迭代,损失为0.04456753454695279
第2300次迭代,损失为0.038082751665983655
第2400次迭代,损失为0.034410749018407606
train_y_pred=predict(train_x,params,threshold)
test_y_pred=predict(test_x,params,threshold)
train_score=score(train_y_pred,train_y,threshold)
test_score=score(test_y_pred,test_y,threshold)
print("训练集准确率为:",train_score)
print("测试集准确率为:",test_score)
训练集准确率为: 0.9952153110047847
测试集准确率为: 0.78
打印出分类错误的图片
def print_mistake_imag(img_data,y_true,y_pred):
plt.figure(figsize=(12,6))
y_m=y_pred-y_true
mistake_index=np.where(y_m==1)
width=len(mistake_index[1])
classes=['non-cat','cat']
for i,m_index in enumerate(mistake_index[1]):
plt.subplot(1,width,i+1)
plt.imshow(img_data[m_index])
string='Prediction:'+classes[y_pred[0][m_index]]+'\n'+'True:'+classes[y_true[0][m_index]]
plt.title(string)
print_mistake_imag(test_set_x_orig,test_y_pred,test_y)
用模型测试自己的图片
def discern_cat(impath,params):
classes=['non-cat','cat']
img=cv2.imread(impath)
img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
img=cv2.resize(img,(64,64))
plt.figure(figsize=(5,5))
plt.imshow(img)
img_array=img.reshape(-1,1)
reason=predict(img_array,params,0.5)[0]
print('This is a '+classes[int(reason)])
import cv2
path='my_image/1.jpg'
discern_cat(path,params)
This is a cat
完整代码:https://github.com/TaoistNie/DeepLearning/tree/master/DNN