用Python爬取51job保存到CSV

#导入相关
import requests
from lxml import etree
import csv

def spider(url_):
response = requests.get(url_, headers=headers)
response.encoding = ‘gbk’
return etree.HTML(response.text)

#获取网页内容
def download(list_url):
selector = spider(list_url)
all_list = selector.xpath(’//div[@class=“dw_table”]/div[@class=“el”]’)#获取页面全部的例子
for div in all_list:
a = div.xpath(“p/span/a”)[0]
name = a.xpath(“text()”)[0].strip()#招聘职位名字
company = div.xpath(‘span[@class=“t2”]/a/text()’)[0]#招聘公司
place = div.xpath(‘span[@class=“t3”]/text()’)[0]#招聘地点
money = div.xpath(‘span[@class=“t4”]/text()’)#工资
money = money[0] if money else"面议"
time = div.xpath(‘span[@class=“t5”]/text()’)#发布时间
time = time[0] if time else “没有时间”
csv_writer([name,company,place,money,time])

#将内容保存到CSV文件
def csv_writer(item):
with open(‘51job.csv’,‘a’,encoding=‘utf-8’,newline=’’)as csvfile:
writer = csv.writer(csvfile)
try:
writer.writerow(item)
except Exception as e:
print(e)

headers = {
‘user-agent’: ‘Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36’,
‘Host’: ‘search.51job.com’
}

url_list = ‘https://search.51job.com/list/070400,000000,0000,00,9,99,python,2,1.html?lang=c&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare=’

download(url_list)

#代码图片供参考
用Python爬取51job保存到CSV

用Python爬取51job保存到CSV

最后保存到CSV中的内容
用Python爬取51job保存到CSV

用Python爬取51job保存到CSV

相关推荐