from selenium import webdriver
from lxml import etree
import json
import os
import time
def write_to_file(content,p_name):
new_time = time.strftime('%Y%m%d')
with open('../20190708期货数据/'+new_time+p_name+'.txt', 'a', encoding='utf-8') as f:
f.write(json.dumps(content, ensure_ascii=False) + '\n')
f.close()
def get_html(url,s_name,p_Name):
try:
driver.get(url=url)
input0 = driver.find_element_by_id('futures_exchange')
input0.send_keys(s_name)
input1 = driver.find_element_by_id('futures_variety')
input1.send_keys(p_Name)
# 选择日期
# driver.execute_script("arguments[0].value=arguments[1]", driver.find_element_by_id("inputDate"),
# "2019-07-08")
input3 = driver.find_element_by_id('btnSearch')
time.sleep(1)
input3.click()
time.sleep(1)
html = driver.page_source
return html
except:
print('没能打开浏览器')
return None
def parse_one_page(html):
et = etree.HTML(html)
ls1,ls2,ls3,ls4,ls5,ls6,ls7,ls8,ls9 = [],[],[],[],[],[],[],[],[]
# 成交量龙虎榜
title01 = et.xpath("//div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb1'][2]/div[@class='IFtit']/text()")
ls1.append(title01)
title11 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb1'][2]/div[@class='IFUlDiv']/ul[1]/li[@class='IFbb']/span/text()")
ls1.append(title11)
content1 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb1'][2]/div[@class='IFUlDiv']/ul[@id='ulCjl']/li//text()")
for i1 in range(0, len(content1), 4):
ls1.append( content1[i1:i1 + 4])
# 多头持仓龙虎榜
title02 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb2'][3]/div[@class='IFtit']/text()")
ls2.append(title02)
title12 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb2'][3]/div[@class='IFUlDiv']/ul[1]/li[@class='IFbb']/span/text()")
ls2.append(title12)
content2 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb2'][3]/div[@class='IFUlDiv']/ul[@id='ulDtcc']/li//text()")
for i2 in range(0, len(content2), 4):
ls2.append(content2[i2:i2 + 4])
# 空头持仓龙虎榜
title03 = et.xpath(
"/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb2'][4]/div[@class='IFtit']/text()")
ls3.append(title03)
title13 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb2'][4]/div[@class='IFUlDiv']/ul[1]/li/span/text()")
ls3.append(title13)
content3 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@class='IFcb2'][4]/div[@class='IFUlDiv']/ul[@id='ulKtcc']/li//text()")
for i3 in range(0, len(content3), 4):
ls3.append(content3[i3:i3 + 4])
# 净多头龙虎榜
title04 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb1'][1]/div[@class='IFtit']/text()")
ls4.append(title04)
title14 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb1'][1]/div[@class='IFUlDiv']/ul[1]/li/span/text()")
ls4.append(title14)
content4 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb1'][1]/div[@class='IFUlDiv']/ul[@id='ulJdt']/li//text()")
for i4 in range(0, len(content4), 4):
ls4.append(content4[i4:i4 + 4])
# 多头增仓龙虎榜
title05 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][1]/div[@class='IFtit']/text()")
ls5.append(title05)
title15 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][1]/div[@class='IFUlDiv']/ul[1]/li[@class='IFbb']/span/text()")
ls5.append(title15)
content5 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][1]/div[@class='IFUlDiv']/ul[@id='ulDtzc']/li//text()")
for i5 in range(0, len(content5), 4):
ls5.append(content5[i5:i5 + 4])
# 多头减仓龙虎榜
title06 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][2]/div[@class='IFtit']/text()")
ls6.append(title06)
title16 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][2]/div[@class='IFUlDiv']/ul[1]/li/span/text()")
ls6.append(title16)
content6 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][2]/div[@class='IFUlDiv']/ul[@id='ulDtjc']/li//text()")
for i6 in range(0, len(content6), 4):
ls6.append(content6[i6:i6 + 4])
# 净空头龙虎榜
title07 = et.xpath(
"/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][2]/div[@class='IFtit']/text()")
ls7.append(title07)
title17 = et.xpath(
"/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][2]/div[@class='IFUlDiv']/ul[1]/li/span/text()")
ls7.append(title17)
content7 = et.xpath(
"/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][2]/div[@class='IFUlDiv']/ul[@id='ulDtjc']/li//text()")
for i7 in range(0, len(content7), 4):
ls7.append(content7[i7:i7 + 4])
# 空头增仓龙虎榜
title08 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][3]/div[@class='IFtit']/text()")
ls8.append(title08)
title18 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][3]/div[@class='IFUlDiv']/ul[1]/li/span/text()")
ls8.append(title18)
content8 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][3]/div[@class='IFUlDiv']/ul[@id='ulKtzc']/li//text()")
for i8 in range(0, len(content8), 4):
ls8.append(content8[i8:i8 + 4])
#空头减仓龙虎榜
title09 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][4]/div[@class='IFtit']/text()")
ls9.append(title09)
title19 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][4]/div[@class='IFUlDiv']/ul[1]/li/span/text()")
ls9.append(title19)
content9 = et.xpath("/html/body/div[@id='page']/div[@class='main']/div[@class='framecontent']/div[@class='sitebody']/div[@class='maincont']/div[@class='IFcontentBox']/div[@class='content']/div[2]/div[@id='mainContent']/div[@id='otherList']/div[@class='IFcb2'][4]/div[@class='IFUlDiv']/ul[@id='ulKtjc']/li//text()")
for i9 in range(0, len(content9), 4):
ls9.append(content9[i9:i9 + 4])
return [ls1,ls2,ls3,ls4,ls5,ls6,ls7,ls8,ls9]
def Main():
Date = {
'上海期货交易所':['沪银','沪铝','沪金','沥青','沪铜','燃油','热卷','镍','沪铅','螺纹钢','橡胶','锡','纸浆','沪锌'],
'大连商品期货交易所':['豆一','豆二','玉米','玉米淀粉','乙二醇','铁矿石','焦炭','鸡蛋','焦煤','塑料','豆粕','棕榈','聚丙烯','PVC','豆油'],
'郑州商品交易所':['郑煤','PTA','白糖','锰硅','硅铁','菜籽','菜粕','普麦','菜油','甲醇','晚籼','粳稻','玻璃','棉纱','红枣','郑棉','苹果']
}
for key,value in Date.items():
for p in value:
url = 'http://data.eastmoney.com/futures/dl/data.html'
html = get_html(url,key,p)
print('--- 正在解析网站 ---')
try:
ls = parse_one_page(html)
for i in ls:
for j in i:
write_to_file(j,p)
print(p,"---写入成功---")
except Exception:
print('爬虫出错了')
if __name__ == '__main__':
while True:
n = time.strftime('%H:%M:%S')
print(n)
time.sleep(1)
if n == "16:31:00":
path = 'D:\chromedriver\chromedriver.exe'
driver = webdriver.Chrome(path)
if os.path.exists('20190708期货数据') is False:
new_time = time.strftime('%Y%m%d')
os.mkdir('../'+'20190708期货数据')
Main()
os.rename('../20190708期货数据',r'D:\Code\Spider\Date\期货数据\\'+new_time+'20190708期货数据' )
driver.close()
break