Python 时间序列分析
关注微信号:小程在线
关注CSDN博客:程志伟的博客
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
sns.set_style('whitegrid')
#读取时间序列
goertek=pd.read_csv('H:/0date/gertek_2016_d.csv')
goertek.head()
Out[3]:
date open high ... v_ma5 v_ma10 v_ma20
0 2016-12-23 3333.40 3334.89 ... 93437446.4 110550392.0 139164898.4
1 2016-12-22 3335.12 3340.36 ... 92103587.2 115836250.4 141973738.0
2 2016-12-21 3313.75 3342.48 ... 96640270.4 118346918.4 144647190.8
3 2016-12-20 3328.30 3329.10 ... 99790819.2 118955928.8 147353865.6
4 2016-12-19 3342.34 3344.86 ... 106497921.6 121751879.2 151401558.8
[5 rows x 14 columns]
#参看数据类型
type(goertek['date'])
Out[4]: pandas.core.series.Series
type(goertek['date'][0])
Out[5]: str
#改为时间类型
goertek['date']=pd.to_datetime(goertek['date'])
type(goertek['date'][0])
Out[6]: pandas._libs.tslibs.timestamps.Timestamp
#把时间作为索引
goertek=goertek.set_index('date')
goertek.head()
Out[7]:
open high close ... v_ma5 v_ma10 v_ma20
date ...
2016-12-23 3333.40 3334.89 3307.60 ... 93437446.4 110550392.0 139164898.4
2016-12-22 3335.12 3340.36 3335.67 ... 92103587.2 115836250.4 141973738.0
2016-12-21 3313.75 3342.48 3338.54 ... 96640270.4 118346918.4 144647190.8
2016-12-20 3328.30 3329.10 3309.06 ... 99790819.2 118955928.8 147353865.6
2016-12-19 3342.34 3344.86 3328.98 ... 106497921.6 121751879.2 151401558.8
[5 rows x 13 columns]
type(goertek.index)
Out[8]: pandas.core.indexes.datetimes.DatetimeIndex
#parse_dates判断数据是否是时间格式
goertek=pd.read_csv('H:/0date/gertek_2016_d.csv',index_col='date',parse_dates=True)
goertek.head()
Out[9]:
open high close ... v_ma5 v_ma10 v_ma20
date ...
2016-12-23 3333.40 3334.89 3307.60 ... 93437446.4 110550392.0 139164898.4
2016-12-22 3335.12 3340.36 3335.67 ... 92103587.2 115836250.4 141973738.0
2016-12-21 3313.75 3342.48 3338.54 ... 96640270.4 118346918.4 144647190.8
2016-12-20 3328.30 3329.10 3309.06 ... 99790819.2 118955928.8 147353865.6
2016-12-19 3342.34 3344.86 3328.98 ... 106497921.6 121751879.2 151401558.8
[5 rows x 13 columns]
type(goertek.index)
Out[10]: pandas.core.indexes.datetimes.DatetimeIndex
#------------DateOffset与TimeDelta-----------
lastday=pd.datetime(2016,12,31)
lastday
__main__:1: FutureWarning: The pandas.datetime class is deprecated and will be removed from pandas in a future version. Import from datetime module instead.
Out[11]: datetime.datetime(2016, 12, 31, 0, 0)
#DateOffset偏移时间
newyear=lastday+pd.DateOffset(days=1)
newyear
Out[47]: Timestamp('2017-01-01 00:00:00')
#now查看当前时间
today=pd.datetime.now()
today
__main__:1: FutureWarning: The pandas.datetime class is deprecated and will be removed from pandas in a future version. Import from datetime module instead.
Out[12]: datetime.datetime(2020, 2, 21, 11, 5, 26, 681789)
#偏移一周
today+pd.DateOffset(weeks=1)
Out[13]: Timestamp('2020-02-28 11:05:26.681789')
#偏移2年6个月 乘以 2
today+2*pd.DateOffset(years=2, months=6)
Out[14]: Timestamp('2025-02-21 11:05:26.681789')
import datetime
#时间间隔7天
weekDelta=datetime.timedelta(weeks=1)
weekDelta
Out[16]: datetime.timedelta(days=7)
#延后7天
today=pd.datetime.now()
today+weekDelta
__main__:1: FutureWarning: The pandas.datetime class is deprecated and will be removed from pandas in a future version. Import from datetime module instead.
Out[17]: datetime.datetime(2020, 2, 28, 11, 10, 35, 497125)
#----------------------------时间序列函数-------------------
goertek=goertek.sort_index(ascending=True)
goertek.head()
Out[18]:
open high close ... v_ma5 v_ma10 v_ma20
date ...
2016-01-04 3725.856 3726.245 3469.066 ... 115749635.2 145030968.0 138741052.8
2016-01-05 3382.177 3518.217 3478.780 ... 117375344.0 139499139.2 141152744.8
2016-01-06 3482.406 3543.739 3539.808 ... 126191462.4 136065387.2 142161998.0
2016-01-07 3481.150 3481.150 3294.384 ... 113886000.0 119773936.0 138700858.8
2016-01-08 3371.871 3418.851 3361.563 ... 130703180.8 123651742.4 141988050.4
[5 rows x 13 columns]
goertek.tail()
Out[19]:
open high close ... v_ma5 v_ma10 v_ma20
date ...
2016-12-19 3342.34 3344.86 3328.98 ... 106497921.6 121751879.2 151401558.8
2016-12-20 3328.30 3329.10 3309.06 ... 99790819.2 118955928.8 147353865.6
2016-12-21 3313.75 3342.48 3338.54 ... 96640270.4 118346918.4 144647190.8
2016-12-22 3335.12 3340.36 3335.67 ... 92103587.2 115836250.4 141973738.0
2016-12-23 3333.40 3334.89 3307.60 ... 93437446.4 110550392.0 139164898.4
[5 rows x 13 columns]
#shift偏移数据
goertek.shift(1).head(3)
Out[20]:
open high close ... v_ma5 v_ma10 v_ma20
date ...
2016-01-04 NaN NaN NaN ... NaN NaN NaN
2016-01-05 3725.856 3726.245 3469.066 ... 115749635.2 145030968.0 138741052.8
2016-01-06 3382.177 3518.217 3478.780 ... 117375344.0 139499139.2 141152744.8
[3 rows x 13 columns]
#按月进行
goertek.asfreq('BM')
Out[21]:
open high close ... v_ma5 v_ma10 v_ma20
date ...
2016-01-29 2855.598 2965.309 2946.090 ... 110646072.0 111380396.8 122499499.2
2016-02-29 2939.580 2939.875 2877.467 ... 123694110.4 121202825.6 108902741.2
2016-03-31 3229.200 3241.927 3218.088 ... 107497318.4 138788992.8 129922554.0
2016-04-29 3153.537 3171.289 3156.745 ... 62888933.6 83644518.0 103044368.2
2016-05-31 3068.596 3170.926 3169.559 ... 75895244.0 69714819.6 74371733.0
2016-06-30 3152.833 3163.720 3153.921 ... 92700076.8 83469470.0 84343779.4
2016-07-29 3217.192 3225.418 3203.930 ... 102453550.4 95620234.4 112470896.4
2016-08-31 3310.565 3333.972 3327.793 ... 82890593.6 95886160.0 107540506.4
2016-09-30 3240.177 3260.940 3253.284 ... 54814162.4 57857660.8 72046803.6
2016-10-31 NaN NaN NaN ... NaN NaN NaN
2016-11-30 3557.640 3567.070 3538.000 ... 184444368.0 163708126.4 151030986.4
[11 rows x 13 columns]
#按小时,前填充
goertek.asfreq('H',method='ffill')
Out[22]:
open high ... v_ma10 v_ma20
date ...
2016-01-04 00:00:00 3725.856 3726.245 ... 145030968.0 138741052.8
2016-01-04 01:00:00 3725.856 3726.245 ... 145030968.0 138741052.8
2016-01-04 02:00:00 3725.856 3726.245 ... 145030968.0 138741052.8
2016-01-04 03:00:00 3725.856 3726.245 ... 145030968.0 138741052.8
2016-01-04 04:00:00 3725.856 3726.245 ... 145030968.0 138741052.8
... ... ... ... ...
2016-12-22 20:00:00 3335.120 3340.360 ... 115836250.4 141973738.0
2016-12-22 21:00:00 3335.120 3340.360 ... 115836250.4 141973738.0
2016-12-22 22:00:00 3335.120 3340.360 ... 115836250.4 141973738.0
2016-12-22 23:00:00 3335.120 3340.360 ... 115836250.4 141973738.0
2016-12-23 00:00:00 3333.400 3334.890 ... 110550392.0 139164898.4
[8497 rows x 13 columns]
goertek_m=pd.read_csv('H:/0date/gertek_2016.csv',parse_dates=True,index_col='date')
goertek_m.head()
Out[23]:
open high close ... v_ma5 v_ma10 v_ma20
date ...
2016-12-22 15:00:00 3334.41 3335.63 3335.63 ... 5323810.0 4528870.0 5701990.0
2016-12-22 14:45:00 3339.31 3340.04 3334.36 ... 4604820.0 4151560.0 5685350.0
2016-12-22 14:30:00 3334.99 3339.39 3339.31 ... 4160060.0 4118640.0 5785230.0
2016-12-22 14:15:00 3331.56 3336.99 3335.10 ... 3909410.0 4263560.0 5786760.0
2016-12-22 14:00:00 3328.97 3331.21 3331.21 ... 3858020.0 4628960.0 5906510.0
[5 rows x 13 columns]
goertek_resample=goertek_m.resample('30Min')
type(goertek_resample)
Out[24]: pandas.core.resample.DatetimeIndexResampler
goertek_resample.mean()
Out[25]:
open high ... v_ma10 v_ma20
date ...
2016-10-10 10:30:00 3263.620 3272.490 ... 5132490.0 3723360.0
2016-10-10 11:00:00 3277.510 3280.080 ... 5362790.0 3780180.0
2016-10-10 11:30:00 3279.470 3292.100 ... 5557140.0 3905820.0
2016-10-10 12:00:00 NaN NaN ... NaN NaN
2016-10-10 12:30:00 NaN NaN ... NaN NaN
... ... ... ... ...
2016-12-22 13:00:00 3327.580 3328.270 ... 6539310.0 6348340.0
2016-12-22 13:30:00 3324.625 3328.825 ... 5723880.0 6146430.0
2016-12-22 14:00:00 3330.265 3334.100 ... 4446260.0 5846635.0
2016-12-22 14:30:00 3337.150 3339.715 ... 4135100.0 5735290.0
2016-12-22 15:00:00 3334.410 3335.630 ... 4528870.0 5701990.0
[3514 rows x 13 columns]
#-------------------股票数据案例----------------------
import tushare as ts
from datetime import datetime
auto_list=['000625','600104','002594','601238']
changan=ts.get_h_data('000625','2016-01-01','2016-12-23')
sqjt=ts.get_h_data('600104','2016-01-01','2016-12-23')
byd=ts.get_h_data('002594','2016-01-01','2016-12-23')
gqjt=ts.get_h_data('601238','2016-01-01','2016-12-23')
changan=pd.read_csv('H:/0date/changan.csv',index_col='date',parse_dates=True)
changan.head()
Out[36]:
open high close low volume amount
date
2016-12-23 15.25 15.25 15.03 14.99 17339349.0 261627029.0
2016-12-22 15.25 15.35 15.26 15.11 21042253.0 320306792.0
2016-12-21 15.12 15.27 15.25 15.09 18803404.0 285494781.0
2016-12-20 15.19 15.22 15.06 14.98 16265938.0 245299075.0
2016-12-19 15.28 15.33 15.17 15.12 14068049.0 214149866.0
changan.describe()
Out[37]:
open high ... volume amount
count 219.000000 219.000000 ... 2.190000e+02 2.190000e+02
mean 14.965479 15.166027 ... 2.967817e+07 4.572622e+08
std 0.974644 0.977192 ... 1.816421e+07 2.902145e+08
min 13.180000 13.380000 ... 8.074326e+06 1.254679e+08
25% 13.970000 14.190000 ... 1.739413e+07 2.612642e+08
50% 15.280000 15.500000 ... 2.552496e+07 3.804228e+08
75% 15.715000 15.910000 ... 3.458674e+07 5.210668e+08
max 16.710000 17.260000 ... 1.360965e+08 2.020822e+09
[8 rows x 6 columns]
changan.info()
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 219 entries, 2016-12-23 to 2016-01-04
Data columns (total 6 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 open 219 non-null float64
1 high 219 non-null float64
2 close 219 non-null float64
3 low 219 non-null float64
4 volume 219 non-null float64
5 amount 219 non-null float64
dtypes: float64(6)
memory usage: 12.0 KB
#收盘价
changan['close'].plot(legend=True,figsize=(10,4))
Out[39]: <matplotlib.axes._subplots.AxesSubplot at 0x222e15c4f60>

#成交量
changan['volume'].plot(legend=True,figsize=(10,4))
Out[40]: <matplotlib.axes._subplots.AxesSubplot at 0x222e190cb38>

#移动平均值
ma_day = [10,20,50]
for ma in ma_day:
column_name = "MA for %s days" %(str(ma))
changan[column_name] = changan['close'].rolling(window=ma).mean()
changan[['close','MA for 10 days','MA for 20 days','MA for 50 days']].plot(subplots=False,figsize=(10,4))
Out[42]: <matplotlib.axes._subplots.AxesSubplot at 0x222e1974518>

#收盘价变动比率
changan['Daily Return'] = changan['close'].pct_change()
changan['Daily Return'].plot(figsize=(10,4),legend=True,linestyle='--',marker='o')
Out[43]: <matplotlib.axes._subplots.AxesSubplot at 0x222e1a53550>

#接近正态分布
sns.distplot(changan['Daily Return'].dropna(),bins=100, color = 'purple');

changan['Daily Return'].hist(bins=100)
Out[45]: <matplotlib.axes._subplots.AxesSubplot at 0x222e1c3ef28>