尚硅谷Python爬蟲教程小白零基礎(chǔ)速通(含python基礎(chǔ)+爬蟲案例)

P76 爬取星巴克圖片源碼:
# 下載星巴克所有產(chǎn)品圖片 import urllib.request as ur import os from lxml import etree import threading url = 'https://www.starbucks.com.cn/menu/' response = ur.urlopen(url) content = response.read().decode('utf-8') tree = etree.HTML(content) # 提取圖片地址屬性 src_list = tree.xpath("//div[@class='preview circle']/@style") name_list = tree.xpath("//strong/text()") # 拼接圖片地址 images_url_list = [url[:-6] + src[23:-2] for src in src_list] # 創(chuàng)建目錄用于保存圖片 os.makedirs('../starbucks_images', exist_ok=True) # 下載圖片的線程函數(shù) def download_image(image_url, filename, index): try: ur.urlretrieve(image_url, filename) print(f"下載圖片{index}: {filename} 完成") except Exception as e: print(f"下載圖片{index}: {filename} 時(shí)出錯(cuò): {str(e)}") # 多線程下載圖片 threads = [] for i, (image_url, name) in enumerate(zip(images_url_list, name_list)): filename = f'../starbucks_images/{name.replace("/", " or ")}.jpg' thread = threading.Thread(target=download_image, args=(image_url, filename, i + 1)) threads.append(thread) thread.start() # 等待所有線程完成 for thread in threads: thread.join() print("圖片下載完成")
標(biāo)簽: