Python爬蟲實(shí)戰(zhàn)教程:批量爬取某網(wǎng)站圖片
2023-02-17 02:26 作者:我將放逐這個(gè)世界 | 我要投稿

import re import time import os import requests def get_html_str(url, head): resp = requests.get(url=url, headers=head).text return resp if __name__ == '__main__': headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/103.0.5060.114 Safari/537.36 Edg/103.0.1264.62 ' } start = input("請輸入起始頁[start,end]:") end = input("請輸入終止頁:") for i in range(int(start), int(end) + 1): base_url = "https://www.vmgirls.net/page/{}?ref=www.xike.store".format(i) txt01 = get_html_str(base_url, headers) re01 = 'class="media-content" href="(.*?)"' title_url = re.findall(re01, txt01) for j in title_url: txt02 = get_html_str(j, headers) re02 = '<h1 class="post-title mb-3">(.*?)</h1>' file_title = re.findall(re02, txt02)[-1] file_title.replace('/', '').replace('|', '').replace(':', '') file_path = 'weimei/'+file_title if os.path.exists(file_path): print('文件夾已存在') continue os.mkdir(file_path) re03 = 'src="(.*?)"' img_url = re.findall(re03, txt02) del img_url[0:1] # print(img_url) for k in range(1, len(img_url)): img_name = str(k)+".png" img = requests.get(url=img_url[k], headers=headers).content with open(file_path+'/'+img_name, 'wb') as fp: fp.write(img) fp.close() print(f"第{k}張下載成功") time.sleep(0.3)
# 沒難度
標(biāo)簽: