logistic

数据的可视化和梯度上升法

import matplotlib.pyplot as plt

定义数据集下载函数

def loadDataSet():
    dataMat = []
    labelMat = []
    f = open('testSet.txt')
    for line in f.readlines():
        line_list = line.strip().split()
        dataMat.append([1,float(line_list[0]),float(line_list[1])])
        labelMat.append(int(line_list[2]))
    return dataMat,labelMat

定义**函数

def sigmoid(inX):
    return 1.0 / (1+np.exp(-inX))    
import numpy as np
dataMat,labelMat = loadDataSet()
dataMatArr = np.array(dataMat)
n = np.shape(dataArr)[0]
x_cord_1 = []; y_cord_1 = []
x_cord_2 = []; y_cord_2 = []
for i in range(n):
    if int(labelMat[i]) == 1:
        x_cord_1.append(dataMatArr[i,1])
        y_cord_1.append(dataMatArr[i,2])
    else:
        x_cord_2.append(dataMatArr[i,1])
        y_cord_2.append(dataMatArr[i,2])

绘制图像

fig = plt.figure()
ax = fig.add_subplot(111) # 将画布分成一行一列,按从左到右,从上到下去一个格子
ax.scatter(x_cord_1,y_cord_1,s=30,c='red')
ax.scatter(x_cord_2,y_cord_2,s=30,c='green')
<matplotlib.collections.PathCollection at 0x8db64a8>

logistic

logistic

实现梯度上升算法函数

def gradAscent(dataMatIn, classLabels):
    # 获取数据,并转化成
    dataMatrix = np.mat(dataMatIn)
    labelMatrix = np.mat(classLabels).transpose()
    m,n = np.shape(dataMatrix)
    alpha = 0.001
    maxCycles = 500
    weights = np.ones((n,1))
    for k in range(maxCycles):
        h = sigmoid(dataMatrix * weights)
        error = (labelMatrix - h)
        weights = weights + alpha *  dataMatrix.transpose() * error
    return weights
weights = gradAscent(dataArr,labelMat)
weights
matrix([[ 4.12414349],
        [ 0.48007329],
        [-0.6168482 ]])

分析数据,画出决策边界

def plotBestFit(weights):
    dataMat,labelMat = loadDataSet()
    dataArr = np.array(dataMat)
    n = np.shape(dataArr)[0]
    x_cord_1 = []; y_cord_1 = []
    x_cord_2 = []; y_cord_2 = []
    for i in range(n):
        if int(labelMat[i]) == 1:
            x_cord_1.append(dataArr[i,1])
            y_cord_1.append(dataArr[i,2])
        else:
            x_cord_2.append(dataArr[i,1])
            y_cord_2.append(dataArr[i,2])
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(x_cord_1,y_cord_1,s=30,c='red')
    ax.scatter(x_cord_2,y_cord_2,s=30,c='green')
    x = np.arange(-3,3,0.1)
    y = (-weights[0]-weights[1]*x) / weights[2] # w0 * x0 + w1 * x1 + w2 * x2 = 0
    ax.plot(x,y)
    plt.xlabel('X1')
    plt.ylabel('X2')
    plt.show()
plotBestFit(weights.getA())  # getA()函数与mat()函数的功能相反,是将一个numpy矩阵转换为数组

logistic