使用Python实现LR算法_ RuntimeWarning: overflow encountered in exp问题解决方案
import numpy as np
filepath = r'C:\Users\Administrator\Desktop\ML\machinelearninginaction-master\machinelearninginaction-master\Ch05'
def load_dataset():
data_mat = []
label_mat = []
fr = open(filepath + '/testSet.txt')
for line in fr.readlines():
line = line.strip().split()
data_mat.append([1.0, float(line[0]), float(line[1])]) # 第一个是常数项
label_mat.append(int(line[2]))
return data_mat, label_mat
def sigmoid(in_x): # RuntimeWarning: overflow encountered in exp
# return 1.0/(1 + np.exp(-in_x))
# 优化方法
if in_x >= 0:
return 1.0/(1+np.exp(-in_x))
else:
return np.exp(in_x)/(1+np.exp(in_x))
# # 梯度上升算法
# def grad_ascent(datamat_in, class_labels):
# datamat = np.mat(datamat_in)
# label_mat = np.mat(class_labels).transpose()
# m, n = np.shape(datamat)
# alpha = 0.001
# max_cycle = 500
# weights = np.ones((n, 1))
# for k in range(max_cycle):
# h = sigmoid(datamat*weights)
# error = label_mat - h
# weights = weights + alpha * datamat.transpose() * error
# return weights
def plotBestFit(wei):
import matplotlib.pyplot as plt
# plt.scatter(x=np.array(data)[:, 1], y=np.array(data)[:, 2], s=30, c=label)
# weights = wei.getA()
weights = wei
dataMat, labelMAt = load_dataset()
dataArr = np.array(dataMat)
n = np.shape(dataArr)[0]
xcord1 = []; ycord1 = []
xcord2 = []; ycord2 = []
for i in range(n):
if int(labelMAt[i]) == 1:
xcord1.append(dataArr[i, 1]); ycord1.append(dataArr[i, 2])
else:
xcord2.append(dataArr[i, 1]); ycord2.append(dataArr[i, 2])
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
ax.scatter(xcord2, ycord2, s=30, c='green')
x = dataArr[:, 1]
# x = np.arange(-3.0, 3.0, 0.1)
y = (-weights[0] - weights[1] * x)/weights[2]
ax.plot(x, y)
plt.xlabel('X1'); plt.ylabel('X2')
plt.show()
# # 随机梯度上升算法
# def stocGradAscent0(dataMat, classLabels):
# m, n = np.shape(dataMat)
# alpha = 0.01
# weights = np.ones(n)
# for i in range(m):
# h = sigmoid(sum(dataMat[i] * weights))
# error = classLabels[i] - h
# weights = weights + alpha * error * dataMat[i]
# return weights
# 改进的随机梯度上升算法
def stocGradAscent1(dataMatrix, classLabels, numIter=150):
m,n = np.shape(dataMatrix)
weights = np.ones(n) # initialize to all ones
for j in range(numIter):
dataIndex = list(range(m)) # ###修改的地方.原文是range(m)
for i in range(m):
alpha = 4/(1.0+j+i)+0.0001 # alpha decreases with iteration, does not
randIndex = int(np.random.uniform(0,len(dataIndex))) # go to 0 because of the constant
h = sigmoid(sum(dataMatrix[randIndex]*weights))
error = classLabels[randIndex] - h
weights = weights + alpha * error * dataMatrix[randIndex]
del(dataIndex[randIndex])
return weights
def classifyVector(inX, weights):
prob = sigmoid(sum(inX * weights))
if prob > 0.5: return 1.0
else: return 0.0
def colicTest():
frTrain = open(filepath + '/horseColicTraining.txt')
frTest = open(filepath + '/horseColicTest.txt')
trainingSet = []; trainingLabels = []
for line in frTrain.readlines():
currentline = line.strip().split('\t')
lineArr = []
for i in range(21):
lineArr.append(float(currentline[i]))
trainingSet.append(lineArr)
trainingLabels.append(float(currentline[21]))
trainingWeights = stocGradAscent1(np.array(trainingSet), trainingLabels, 500)
errorCount = 0; numTestVec = 0.0
for line in frTest.readlines():
numTestVec += 1.0
currentline = line.strip().split('\t')
lineArr = []
for i in range(21):
lineArr.append(float(currentline[i]))
if int(classifyVector(np.array(lineArr), trainingWeights)) != int(currentline[21]):
errorCount += 1
errorRate = (float(errorCount)/numTestVec)
return errorRate
def multiTest():
numTests = 10; errorSum = 0.0
for k in range(numTests):
errorSum += colicTest()
print("after %d iterations the average error rate is: %f" %(numTests, errorSum/float(numTests)))
最终的error rate 为33.3%