时间序列预测 参考众多资料自己总结 欢迎指正!

# -*- coding: utf-8 -*-
"""
Created on Wed Sep 19 13:32:47 2018

@author: Administrator
"""


import matplotlib.pyplot as plt
import math

from keras.models import Sequential
#from keras.layers import Dropout
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import numpy as np
import copy
import pandas as pd


#import statsmodels.tsa.stattools as ts
#from statsmodels.graphics.tsaplots import plot_pacf,plot_acf
df1=pd.read_csv('C:\\Users\\Administrator\\Desktop\\01.csv')
df1.index = pd.DatetimeIndex(df1["Time"])#把时间变为索引
del df1["Time"]
df1[df1.index.duplicated()]#找出重复索引   
df2 = df1[~df1.index.duplicated()] #去掉重复索引行
name=[column for column in df2]
df3=df2.reindex(pd.date_range(start='2018-06-17 16:00',end='2018-07-08 16:00',freq='t'))
#print(df3[df3.isnull().values==True])
df4=df3.fillna(df3[name].mean())#赋予nan平均值
#df4.plot()#df4绘图


#ADF:单位根检验法
#result = ts.adfuller(df4[name].values.ravel(), 1)
#print (result)
#自相关系数检验
#plot_acf(df4[name].values.ravel())

dataset = df4.values
dataset= dataset.astype('float32')
def create_train_dataset(dataset, look_back):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)
    
np.random.seed(7)

scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
dataset1=copy.deepcopy(dataset)

train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
              
look_back =8000

trainX, trainY = create_train_dataset(train, look_back)
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
#网络结构
model = Sequential()
model.add(LSTM(10, input_shape=(1, look_back)))

model.add(Dense(1))

model.compile(loss='mean_squared_error', optimizer='Adam')

model.fit(trainX, trainY, epochs=3, batch_size=200, verbose=2)

trainPredict0 = model.predict(trainX)

testY = test.reshape(len(test),)
testY=copy.deepcopy(testY)
#生成测试集
y=[]
trainxx=[]
for i in range(len(test)):
    trainxx=dataset[len(train)-look_back+i:len(train)+i,0]
    trainxx=trainxx.reshape(1,1,look_back)
    b=model.predict(trainxx)
    y.append(b)
    
    dataset[len(train)+i,0]=b

testPredict0 = np.array(y).reshape(len(test),1)
trainPredict = scaler.inverse_transform(trainPredict0)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict0)
testY = scaler.inverse_transform([testY])

trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train Score: %.6f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test Score: %.6f RMSE' % (testScore))

trainPredictPlot = np.empty_like(dataset)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict

testPredictPlot = np.empty_like(dataset)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(train):len(dataset), :] = testPredict

plt.plot(scaler.inverse_transform(dataset1))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
   



#model.add(Dropout(0.2))
#model.add(LSTM(10, return_sequences=True))
#model.add(Dropout(0.2))
#model.add(LSTM(4, return_sequences=False))
#model.add(Dropout(0.2))
#model.add(LSTM(32))
#model.add(Dropout(0.2))
#model.add(LSTM(6))
#model.add(Dense(16))
#model.add(Dropout(0.2))              

#b=test[8976:9976]
#c=b.reshape(1,1000)
#e=np.append(a,c,axis=0)
#f=e.reshape(1000,1,1000)
#pre=model.predict(f)
#
#predict=scaler.inverse_transform(pre)
#
#dataset=scaler.inverse_transform(dataset)
#
#array1= np.zeros((31229,1), dtype=np.float)
#TruePlot = numpy.empty_like(array)
#TruePlot[:, :] = numpy.nan
#TruePlot[0:30229, :] = dataset
#
#
#PredictPlot = numpy.empty_like(array)
#PredictPlot[:, :] = numpy.nan
#PredictPlot[30229:31229, :] = predict
#
#plt.plot(TruePlot)
#plt.plot(PredictPlot)
#plt.show()





# shift train predictions for plotting
#trainPredictPlot = numpy.empty_like(dataset)
#trainPredictPlot[:, :] = numpy.nan
#trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
#trainPredictPlot[0:len(trainPredict), :] = trainPredict
# #shift test predictions for plotting
#testPredictPlot = numpy.empty_like(dataset)
#testPredictPlot[:, :] = numpy.nan
#testPredictPlot[len(trainPredict)+look_back:len(dataset)-look_back, :] = testPredict
#testPredictPlot[len(trainPredict):len(dataset), :] = testPredict
#plot baseline and predictions
#plt.plot(scaler.inverse_transform(dataset))
#plt.plot(trainPredictPlot)
#plt.plot(scaler.inverse_transform(train)+scaler.inverse_transform(test[:1000,0]))




#plt.plot(scaler.inverse_transform(np.hstack((train[:,0],test[:8976,0]))))
#plt.plot(testPredictPlot[29229:30229])
#plt.show()

#
#
#test1=[]
#for i in range(200):
#    pre = scaler.inverse_transform(model.predict(testX))
#    c=testX[1:].reshape(9974,1)
#    e=scaler.inverse_transform(c)
#    d=pre[-1].reshape(1,1)
#    test1.append(pre[-1])
#    f=np.vstack((e,d))
#    testX=scaler.fit_transform(f)
#    testX=numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
#    #testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
#plt.plot(scaler.inverse_transform(dataset))
#plt.plot(test1, color='r')
#plt.plot(pre, color='r')
#a=numpy.array(test1).reshape(2,1)
#b=dataset
#plt.plot(list(range(len(b))), b, color='b')
#plt.plot(list(range(len(b), len(b) + len(a))), a, color='r')
#             
              
              

 

 

 

时间序列预测 参考众多资料自己总结 欢迎指正!