6 LogisticRegression(ML100Days by Avik-Jain)
import numpy as np import pandas as pd from sklearn import preprocessing from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler,PolynomialFeatures from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score,confusion_matrix from sklearn.pipeline import Pipeline import matplotlib as mpl import matplotlib.pyplot as plt import matplotlib.patches as mpatches #1 导入数据 只提取age和salary特征 dataset = pd.read_csv('/home/joye/datasets/Social_Network_Ads.csv') # X = dataset.iloc[:,1:4].values # Y = dataset.iloc[:,-1].values # le = preprocessing.LabelEncoder() # X[:,0] = le.fit_transform(X[:,0]) X = dataset.iloc[:,2:4].values Y = dataset.iloc[:,-1].values print(X) X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.25,random_state=0) # 特征降维 # sc = StandardScaler() # X_train = sc.fit_transform(X_train) # X_test = sc.fit_transform(X_test) #2 Pipeline lr = Pipeline([('sc', StandardScaler()), ('poly', PolynomialFeatures(degree=2)), ('clf', LogisticRegression()) ]) lr.fit(X_train,Y_train) y_test_hat = lr.predict(X_test) #3 精度与confusion_matrix acc = accuracy_score(Y_test,y_test_hat) print(acc) cm = confusion_matrix(Y_test,y_test_hat) print(cm) #4 可视化 mpl.rcParams['font.sans-serif'] = ['simHei']#字体显示设置 mpl.rcParams['axes.unicode_minus'] = False cm_light = mpl.colors.ListedColormap(['#77E0A0', '#FF8080'])#设置不同标签的颜色深浅 cm_dark = mpl.colors.ListedColormap(['g', 'r']) N, M = 500, 500 # 横纵各采样多少个值 x1_min, x1_max = X[:, 0].min(), X[:, 0].max() # 第0列的范围 x2_min, x2_max = X[:, 1].min(), X[:, 1].max() # 第1列的范围 t1 = np.linspace(x1_min, x1_max, N) t2 = np.linspace(x2_min, x2_max, M) x1, x2 = np.meshgrid(t1, t2) # 生成网格采样点 x_test = np.stack((x1.flat, x2.flat), axis=1) # 测试点 y_hat = lr.predict(x_test) # 预测值 y_hat = y_hat.reshape(x1.shape) # 使之与输入的形状相同 plt.figure(facecolor='w') plt.pcolormesh(x1, x2, y_hat, cmap=cm_light) # 预测值的显示 plt.scatter(X_train[:, 0], X_train[:, 1], s=50,c=Y_train, edgecolors='k', cmap=cm_dark) plt.scatter(X_test[:, 0], X_test[:, 1], s=50,c=Y_test, edgecolors='k', cmap=cm_dark) plt.xlabel('age') plt.ylabel('salary') plt.title('SUV购买') patchs = [mpatches.Patch(color='#77E0A0', label='0'), mpatches.Patch(color='#FF8080', label='1')] plt.legend(handles=patchs, fancybox=True, framealpha=0.8,loc ='upper right') plt.show()
-------------------------------------------------------
结果:
accuracy = 0.92
confusion_matrix =[[64 , 4],[ 4 ,28]]