面向对象爬虫使用继承的思想爬取网站主页(分享代码与思维导图)
父类:
import requests
class spider_v1:
def init(self):
self.headers={“User-Agent”:“Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36”}
def bb(self,url):
response = requests.get(url,self.headers)
response.encoding = “GBK”
return response.text
if name==‘main’:
url=“http://www.51testing.com”
spider1=spider_v1()
res=spider1.bb(url)
print(res)
子类:
import requests
from spider.V1 import spider_v1
class spider_v2(spider_v1):
def init(self,url):
self.url=url
spider_v1.init(self)
if name==‘main’:
url=“http://www.51testing.com”
spider2=spider_v2(url)
res=spider2.bb(url)
print(res)