最新版

import ?requests
from bs4 import BeautifulSoup as bs
from selenium import webdriver
import time
a = input('請輸入爬取頁面的數(shù)量:')
driver = webdriver.Chrome()
driver.get("https://movie.douban.com/subject/33455421/?tag=%E7%83%AD%E9%97%A8&from=gaia")
driver.find_element_by_xpath("//a[contains(text(),' 條')]").click()
for cishu in range(int(a)):
? ?time.sleep(3)
? ?driver.switch_to.window(driver.window_handles[-1])
? ?a = driver.current_url
? ?cookies = {
? ? ? ?'ll': '"118254"',
? ? ? ?'bid': 'aXdoJiY-aR0',
? ? ? ?'ap_v': '0,6.0',
? ? ? ?'_pk_id.100001.4cf6': '302c3bdefe2a6c04.1685689908.',
? ? ? ?'__utmc': '30149280',
? ? ? ?'__utmz': '30149280.1685689908.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic',
? ? ? ?'__utmc': '223695111',
? ? ? ?'__utmz': '223695111.1685689908.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic',
? ? ? ?'__yadk_uid': 'hD9K77Vag4LPaL52WBu3tIOusW1m831I',
? ? ? ?'_vwo_uuid_v2': 'DA61E56E708BE65DB3A6C5A77FB5C29CB|e38615be2b4802b5e32f0f56bc7adc44',
? ? ? ?'__gads': 'ID=ba12cee8a1683aac-22eb64aab1e10079:T=1685689921:RT=1685690733:S=ALNI_MZlB3mZ0tgPF9c5nBhX4c6PBYxZNA',
? ? ? ?'__gpi': 'UID=00000c0e8f42923b:T=1685689921:RT=1685690733:S=ALNI_MaxDBxu9Ora9k5hJt6hWuENVsK1FA',
? ? ? ?'__utma': '30149280.469148891.1685689908.1685689908.1685693862.2',
? ? ? ?'__utmb': '30149280.0.10.1685693862',
? ? ? ?'__utma': '223695111.960813586.1685689908.1685689908.1685693862.2',
? ? ? ?'__utmb': '223695111.0.10.1685693862',
? ? ? ?'_pk_ref.100001.4cf6': '%5B%22%22%2C%22%22%2C1685693862%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DHA_VPCeMD3sZH-y0jBKk29963IqtrUqQVP56SxjwgBVGpmM7MzaGMT8yzKr116Iw%26wd%3D%26eqid%3De0e168f600022f96000000066479962a%22%5D',
? ? ? ?'_pk_ses.100001.4cf6': '1',
? ?}
? ?headers = {
? ? ? ?'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
? ? ? ?'Accept-Language': 'zh-CN,zh;q=0.9',
? ? ? ?'Cache-Control': 'max-age=0',
? ? ? ?'Connection': 'keep-alive',
? ? ? ?# 'Cookie': 'll="118254"; bid=aXdoJiY-aR0; ap_v=0,6.0; _pk_id.100001.4cf6=302c3bdefe2a6c04.1685689908.; __utmc=30149280; __utmz=30149280.1685689908.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utmc=223695111; __utmz=223695111.1685689908.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __yadk_uid=hD9K77Vag4LPaL52WBu3tIOusW1m831I; _vwo_uuid_v2=DA61E56E708BE65DB3A6C5A77FB5C29CB|e38615be2b4802b5e32f0f56bc7adc44; __gads=ID=ba12cee8a1683aac-22eb64aab1e10079:T=1685689921:RT=1685690733:S=ALNI_MZlB3mZ0tgPF9c5nBhX4c6PBYxZNA; __gpi=UID=00000c0e8f42923b:T=1685689921:RT=1685690733:S=ALNI_MaxDBxu9Ora9k5hJt6hWuENVsK1FA; __utma=30149280.469148891.1685689908.1685689908.1685693862.2; __utmb=30149280.0.10.1685693862; __utma=223695111.960813586.1685689908.1685689908.1685693862.2; __utmb=223695111.0.10.1685693862; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1685693862%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DHA_VPCeMD3sZH-y0jBKk29963IqtrUqQVP56SxjwgBVGpmM7MzaGMT8yzKr116Iw%26wd%3D%26eqid%3De0e168f600022f96000000066479962a%22%5D; _pk_ses.100001.4cf6=1',
? ? ? ?'Referer': 'https://movie.douban.com/subject/33455421/?tag=%E7%83%AD%E9%97%A8&from=gaia',
? ? ? ?'Sec-Fetch-Dest': 'document',
? ? ? ?'Sec-Fetch-Mode': 'navigate',
? ? ? ?'Sec-Fetch-Site': 'same-origin',
? ? ? ?'Sec-Fetch-User': '?1',
? ? ? ?'Upgrade-Insecure-Requests': '1',
? ? ? ?'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36',
? ? ? ?'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
? ? ? ?'sec-ch-ua-mobile': '?0',
? ? ? ?'sec-ch-ua-platform': '"Windows"',
? ?}
? ?response = requests.get(a, cookies=cookies,
? ? ? ? ? ? ? ? ? ? ? ? ? ?headers=headers)
? ?res = bs(response.text, 'lxml')
? ?short = res.find_all(attrs={'class': {'comment-info'}})
? ?j = 0
? ?x = []
? ?for i in short:
? ? ? ?y = []
? ? ? ?name = res.find_all('a', attrs={'class': {''}})[j].get_text()
? ? ? ?star = res.find_all('span', attrs={'class': {'rating'}})[j]
? ? ? ?times = res.find_all(attrs={'class': {'comment-time'}})[j]
? ? ? ?address = res.find_all(attrs={'class': {'comment-location'}})[j].get_text()
? ? ? ?comment = res.find_all(attrs={'class': {'short'}})[j].get_text()
? ? ? ?star = star.get('title')
? ? ? ?times = times.get('title')
? ? ? ?j += 1
? ? ? ?y.append(name)
? ? ? ?y.append(star)
? ? ? ?y.append(times)
? ? ? ?y.append(address)
? ? ? ?y.append(comment)
? ? ? ?x.append(y)
? ?print(x)
? ?for i in range(20):
? ? ? ?with open('comment{}.txt'.format(cishu), 'a', encoding='utf-8') as f:
? ? ? ? ? ?f.write(str(x[i]))
? ? ? ? ? ?f.write('\n\n')
? ?time.sleep(3)
? ?driver.find_element_by_xpath("//a[contains(text(),'后頁')]").click()