逻辑回归-线性决策边界(python3版本)
导入相关库
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
导入数据
#load data
fpath = r'.../ex2data1.txt'
df = pd.read_table(fpath, engine='python', header=None, sep=',')
df.rename(columns={0:'Exam_1', 1:'Exam_2', 2:'Admitted'}, inplace=True)
数据预处理
#数据特征缩放到[0,1]区间
df_norm = df.apply(lambda x: (x - x.min()) / (x.max() - x.min()))
数据可视化
#plot data
plt.scatter(df[df['Admitted'] == 0]['Exam_1'],df[df['Admitted'] == 0]['Exam_2'], edgecolors='k', color='y', label='Not Admitted')
plt.scatter(df[df['Admitted'] == 1]['Exam_1'],df[df['Admitted'] == 1]['Exam_2'], marker='+', color='k', label='Admitted')
plt.legend(loc='upper right')
plt.xlabel('Exam 1 Score')
plt.ylabel('Exam 2 Score')
plt.title('Figure 1:Scatter plot of training data')
Sigmoid Function
-
hypothesis:
-
sigmoid function:
#定义sigmoid 函数
def sgd_f(theta_mtr, x_mtr):
z = x_mtr * theta_mtr.T
g = 1 / (1 + np.exp(-z))
return g
#定义代价函数
def cost_f(theta_mtr, x_mtr, y_mtr):
m, n = x_mtr.shape
h = sgd_f(theta_mtr, x_mtr)
lh = y_mtr.T*np.log(h) + (1-y_mtr).T*np.log(1-h)
J = - lh / m #为了后面利用梯度下降法求解最小值,此处加“-”是将lh变成凸函数
return J[0,0]
梯度法
# 定义theta增量函数
def derv_theta(theta_mtr, x_mtr, y_mtr):
m,_ = x_mtr.shape
h = sgd_f(theta_mtr, x_mtr)
derv = (1 / m) * ((h - y_mtr).T * x_mtr)
return derv
# 批量梯度法
def grsdient_bgd(x_mtr, y_mtr, theta_mtr, alpha, epsilon, MaxIter):
J0 = 0
J_lst = []
theta_mtr_lst = []
for iternum in range(MaxIter):
J = cost_f(theta_mtr, x_mtr, y_mtr)
if abs(J-J0) < epsilon:
iternum=iternum-1
break
theta_mtr_lst.append(theta_mtr)
theta_mtr = theta_mtr - alpha*derv_theta(theta_mtr, x_mtr, y_mtr)
J0 = J
J_lst.append(J)
print('MaxIteration Num is %d'%(iternum+1))
return J_lst, theta_mtr_lst, (iternum+1)
# 随机梯度法
def grsdient_sgd(x_mtr, y_mtr, theta_mtr, alpha, epsilon, MaxIter):
m, n = x_mtr.shape
J0 = 0
J_lst = []
theta_mtr_lst = []
for iternum in range(MaxIter):
J = cost_f(theta_mtr, x_mtr, y_mtr)
if abs(J-J0) < epsilon:
iternum=iternum-1
break
theta_mtr_lst.append(theta_mtr)
for i in range(m):
theta_mtr = theta_mtr - alpha*derv_theta(theta_mtr, x_mtr[i,:], y_mtr[i,:])
J0 = J
J_lst.append(J)
print('MaxIteration Num is %d'%(iternum+1))
return J_lst, theta_mtr_lst, (iternum+1)
if __name__ == __'main'__:
alpha = 0.1
epsilon = 1e-7
MaxIter = 15000
m,n = df.iloc[:,:2].shape
#theta
theta_mtr = np.zeros([1,n+1])
#x_matrix
x_mtr = np.matrix(df_norm.iloc[:,:2].values)
x0 = np.matrix(np.ones(m))
x_mtr = np.hstack([x0.T, x_mtr])
#y_matrix
y_mtr = np.matrix(df.iloc[:,2:].values)
J = cost_f(theta_mtr, x_mtr, y_mtr)
J_lst, theta_mtr_lst, Maxiternum = grsdient_bgd(x_mtr, y_mtr, theta_mtr, alpha, epsilon, MaxIter)
线性决策边界
#求解线性决策边界的系数
line_theta = theta_mtr_lst[-1]
line_param_0 = line_theta[0,0] / -line_theta[0,2]
line_param_1 = line_theta[0,1] / -line_theta[0,2]
fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(111)
#plot classify line
plot_x = np.linspace(0,0.9,100)
plot_y = line_param_1 * plot_x + line_param_0
plot_x = plot_x * (df['Exam_1'].max() - df['Exam_1'].min()) + df['Exam_1'].min()
plot_y = plot_y * (df['Exam_2'].max() - df['Exam_2'].min()) + df['Exam_2'].min()
ax.plot(plot_x, plot_y, 'r-', label='classify line')
#plot original data
ax.scatter(df[df['Admitted'] == 0]['Exam_1'],df[df['Admitted'] == 0]['Exam_2'],\edgecolors='k', color='y', label='Not Admitted')
ax.scatter(df[df['Admitted'] == 1]['Exam_1'],df[df['Admitted'] == 1]['Exam_2'], marker='+', color='k', label='Admitted')
ax.set(xlim=[30,100], ylim=[30,100], title='Figure 2: Training data with decision boundary', xlabel='Exam 1 Score', ylabel='Exam 2 Score')
plt.legend(loc='upper right')
预测
#predict
def peedic_f(exam1, exam2):
theta_mtr = theta_mtr_lst[-1]
x1 = (exam1 - df['Exam_1'].min()) / (df['Exam_1'].max() - df['Exam_1'].min())
x2 = (exam2 - df['Exam_2'].min()) / (df['Exam_2'].max() - df['Exam_2'].min())
x_predict = np.matrix([1, x1, x2])
prob = sgd_f(theta_mtr, x_predict)[0,0]
if prob > 0.5:
return 1
else:
return 0
df['Predicted_label'] = df.apply(lambda x: peedic_f(x['Exam_1'], x['Exam_2']), axis=1)
df[df['Admitted']!=df['Predicted_label']]
- 利用得到的逻辑回归作为分类器对原始数据是否录取做预测,可以看出有8个数据预测结果与实际结果不符合,这8个数据刚好和上图中8个误分类的点一一对应。
3个值在迭代过程中的变化
fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(111)
#theta
theta0_lst, theta1_lst, theta2_lst = [], [], []
for i in theta_mtr_lst:
theta0_lst.append(i[0,0])
theta1_lst.append(i[0,1])
theta2_lst.append(i[0,2])
#number of interation
IterNum = list(range(0,Maxiternum))
ax.plot(IterNum, theta0_lst, 'r-', label=r'$\theta_0$')
ax.plot(IterNum, theta1_lst, 'b-', label=r'$\theta_1$')
ax.plot(IterNum, theta2_lst, 'g-', label=r'$\theta_2$')
ax.set(xlabel='Number of Iteration', ylabel=r'$\theta$', title=r'Relation between Number of Interation and $\theta$')
plt.legend()
- 从图中可以看出,是先增大后降低,,最后3个值趋于定值。
⚠️博主非相关专业出生,转专业自学,写此博客纯为交流和分享,有错误之处请在留言处指出,谢谢????。
⚠️参考博客《斯坦福机器学习笔记》:https://yoyoyohamapi.gitbooks.io/mit-ml/content/
⚠️吴恩达《机器学习》课后作业,源数据下载:https://github.com/nsoojin/coursera-ml-py。