from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import requests
import os
import re
import time
class Spider:
url = "https://mm.enterdesk.com/"
directory = "images2"
pages_pattern = '\/([0-9]+?).html'
def get_html(self):
browser=webdriver.Chrome()
browser.get("https://mm.enterdesk.com/")
wait=WebDriverWait(browser,10)
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,".egeli_pic_m")))
for i in range(6):
browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
print("第"+str(i+1)+"进行下拉操作")
time.sleep(3)
lst=browser.find_element(By.CSS_SELECTOR,".egeli_pic_m")
lst = lst.find_elements(By.CSS_SELECTOR,".egeli_pic_li dl dd img")
images=[]
for l in lst:
image={'url':l.get_attribute("src"),'title':l.get_attribute("title")}
images.append(image)
return images
def get_image(self, images):
i = 0
if not os.path.exists(Spider.directory):
os.makedirs(Spider.directory)
for img in images:
dirc=os.path.join(Spider.directory, img['title']+'.jpg')
if os.path.exists(dirc):
continue
res = requests.get(img['url'])
with open(dirc,'wb') as f:
f.write(res.content)
i += 1
print("本次共保存%s张图片"%i)
def go(self):
start = time.time()
images = self.get_html()
self.get_image(images)
end = time.time()
print("本次总共耗时:",end-start)
spider = Spider()
spider.go()