【爬蟲完整版】學(xué)完可以接任何爬蟲副業(yè)單!來者不拒!目前B站最完整的python學(xué)

from bs4 import BeautifulSoup import requests from fake_useragent import UserAgent import csv # 預(yù)加載寫入的文件 f = open('價(jià)格表.csv', mode='w', newline='') csvwirter = csv.writer(f) # 偽裝UA ua = UserAgent() heard = { "User-Agent": ua.firefox } # 初始化URL.get方法請(qǐng)求 url = 'https://www.lvguo.net/baojia' resp = requests.get(url, headers=heard, ) # 實(shí)例化bs4對(duì)象 page = BeautifulSoup(resp.text, features="html.parser") # features="html.parser" 標(biāo)記傳入的數(shù)據(jù)為html數(shù)據(jù) # 查找標(biāo)記為table class=bjtbl的數(shù)據(jù) table = page.find('table', attrs={"class": "bjtbl"}) # 找到所有包含 tr 的數(shù)據(jù),返回值為list tr[1:] 切除數(shù)據(jù)注釋行 tr = table.find_all('tr') trs = tr[1:] # 遍歷每行的數(shù)據(jù) for i in trs: # 查找標(biāo)記為td的數(shù)據(jù) td = i.find_all('td') time = td[0].text adress = td[1].text name = td[2].text # 數(shù)據(jù)拆分 type = td[3].text.split("\n")[0].split(':')[0] price = td[3].text.split("\n")[0].split(':')[1] + td[3].text.split("\n")[1] # 數(shù)據(jù)寫入 csvwirter.writerow([time, adress, name, type, price]) f.close()
標(biāo)簽: