【Python超強(qiáng)爬蟲5天速成】學(xué)完可以接任何爬蟲副業(yè)!來者不拒!目前B站最完整

啦啦啦啦 我來交作業(yè)了 from lxml import etree import requests import re import json headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.50', 'cookie': 'lv=1676985897; vn=1; ip_ck=4MCF4f//j7QuNzc0NjU4LjE2NzY5ODU4OTc=; Hm_lvt_ae5edc2bc4fc71370807f6187f0a2dd0=1676985897; Adshow=0; Hm_lpvt_ae5edc2bc4fc71370807f6187f0a2dd0=1676987325; questionnaire_pv=1676937645' } url = 'https://desk.zol.com.cn/meinv/' def get_home_page(url): res = requests.get(url, headers=headers) res.encoding = 'GB2312' return res.text def get_urls(html): et = etree.HTML(html) urls = et.xpath('/html/body/div/div/ul[@class="pic-list2 clearfix"]/li/a/@href') return urls def get_imgs(urls): imgsrcs = [] html = get_home_page(urls) obj = re.compile(r'var deskPicArr.*?=(?P<deskPicArr>.*?);', re.S) result = obj.search(html) deskPicArr = result.group('deskPicArr').strip() deskPic = json.loads(deskPicArr) msg = deskPic.get('list') for item in msg: oriSize = item.get('oriSize') imgsrc = item.get('imgsrc') imgsrc = imgsrc.replace('##SIZE##', oriSize) imgsrcs.append(imgsrc) return imgsrcs def download(imgsrc): res = requests.get(url=imgsrc, headers=headers) name = imgsrc.split('/')[-1] with open(f'wallpaper/{name}', 'wb') as f: f.write(res.content) def run(): html = get_home_page(url) urls = get_urls(html) for u in urls[2:]: urls = 'https://desk.zol.com.cn/' + u[1:] imgsrcs = get_imgs(urls) for imgsrc in imgsrcs: download(imgsrc) run()
標(biāo)簽: