2021年最新Python爬蟲教程+實戰(zhàn)項目案例(新增scarpy部分+分布式爬

p31 去除爬取子頁面的代碼直接爬取圖片。 精簡了代碼,減少了requests的請求。
import requests from bs4 import BeautifulSoup import os os.mkdir(r"D:\\python_homework\\src\\爬取內(nèi)容\\") url = 'https://www.umei.cc/weimeitupian/xiaoqingxintupian/' url_2 = 'https://www.umei.cc' headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36 SLBrowser/8.0.0.9071 SLBChan/105' } response = requests.get(url) response.encoding = 'utf-8' mian_page = BeautifulSoup(response.text, "html.parser") imglist = mian_page.find("div", class_='item_list infinite_scroll').find_all("img", class_="lazy") for i in imglist: img = requests.get(i.get("data-original"), headers=headers).content name = i.get("data-original").split("/")[-1] with open('D:\\python_homework\\src\\爬取內(nèi)容\\plcture\\{}'.format(name), 'wb')as fp: fp.write(img) print(name + "下載完成!") fp.close()
標(biāo)簽: