Python爬虫学习(四)抓取猫眼电影的top100
利用requests库加上beautifulsoup完成代码如下
import requests from bs4 import BeautifulSoup headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36' } def get_page(offset): url = 'http://maoyan.com/board/4?offset='+str(offset) response=requests.get(url,headers=headers) if response.status_code==200: return response.text return None def check(html): list_1=[] list_2=[] list_3=[] list_4=[] soup=BeautifulSoup(html,'lxml') list1=soup.find_all(attrs={'class':'movie-item-info'}) for div in list1: for p in div.find_all(attrs={'class':'name'}): list_1.append(p.string) for p in div.find_all(attrs={'class':'star'}): list_2.append(p.string.strip()) for p in div.find_all(attrs={'class':'releasetime'}): list_3.append(p.string) print('名字 主演 上映时间 ') for i in range(len(list_1)): print(list_1[i],' ',list_2[i],' ',list_3[i]) if __name__=='__main__': for i in range(10): html=get_page(i*10) check(html)
运行结果