python爬虫网页

 首先是pip安装 把pythion下的script目录下的pip安装器 拖进cmd中 然后 install 

import requests
import bs4
res = requests.get("https://movie.douban.com/top250")
soup = bs4.BeautifulSoup(res.text,"html.parser")  // 指定解析器
targets = soup.find_all("div",class_="hd") //加下划线 是因为class 是关键字
for each in targets:

    print(each.a.span.text)

python爬虫网页


返回了》》网页1的标题内容

肖申克的救赎
霸王别姬
这个杀手不太冷

阿甘正传

这是书250的排名 小修改下 加了文本输出

import requests
from bs4 import BeautifulSoup as bs
depth = 10 #一共有10页
books = [] #存储定向信息
for i in range(depth):
    s = i*25
    r = 'https://book.douban.com/top250?start='
    url = r+str(s) #获取完整的url
    demo = requests.get(url).text
    soup = bs(demo,'html.parser')
    book = soup.find_all('td',)
    for i in book:
        if i.a.get('title') == None:
            pass
        else:
            name = i.a.get('title')
            text = i.p.text.split('/')
            author = text[0]
            price = text[-1]
            rate = i.find('span',{'class':'rating_nums'}).text
            try:
                info = i.find('span',{'class':'inq'}).text
            except:
                info = '无信息'
            adr = i.a.get('href')
            books.append([name,author,price,rate,info,adr])
with open("豆瓣读书250.txt","w",encoding="utf-8") as f:
   count = 0
   for each in books:
    count += 1
    booklist = "Top:{}\n书名:{}\n作者:{}\n定价:{}\n评分:{}\n一句话书评:{}\n信息查阅:{}\n"
    #print(booklist.format(count,each[0],each[1],each[2],each[3],each[4],each[5]))
    booklist=booklist.format(count,each[0],each[1],each[2],each[3],each[4],each[5])
    #f.write(str(count)+each+'\n')
    f.write(booklist)