呃呃呃
import? requests
from bs4 import BeautifulSoup as bs
from selenium import webdriver
import time
a = input('請輸入爬取頁面的數(shù)量:')
driver = webdriver.Chrome()
driver.get("https://movie.douban.com/subject/33455421/?tag=%E7%83%AD%E9%97%A8&from=gaia")
driver.find_element_by_xpath("//a[contains(text(),' 條')]").click()
for cishu in range(a):
? ? time.sleep(3)
? ? driver.switch_to.window(driver.window_handles[-1])
? ? a = driver.current_url
? ? cookies = {
? ? ? ? 'll': '"118254"',
? ? ? ? 'bid': 'aXdoJiY-aR0',
? ? ? ? 'ap_v': '0,6.0',
? ? ? ? '_pk_id.100001.4cf6': '302c3bdefe2a6c04.1685689908.',
? ? ? ? '__utmc': '30149280',
? ? ? ? '__utmz': '30149280.1685689908.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic',
? ? ? ? '__utmc': '223695111',
? ? ? ? '__utmz': '223695111.1685689908.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic',
? ? ? ? '__yadk_uid': 'hD9K77Vag4LPaL52WBu3tIOusW1m831I',
? ? ? ? '_vwo_uuid_v2': 'DA61E56E708BE65DB3A6C5A77FB5C29CB|e38615be2b4802b5e32f0f56bc7adc44',
? ? ? ? '__gads': 'ID=ba12cee8a1683aac-22eb64aab1e10079:T=1685689921:RT=1685690733:S=ALNI_MZlB3mZ0tgPF9c5nBhX4c6PBYxZNA',
? ? ? ? '__gpi': 'UID=00000c0e8f42923b:T=1685689921:RT=1685690733:S=ALNI_MaxDBxu9Ora9k5hJt6hWuENVsK1FA',
? ? ? ? '__utma': '30149280.469148891.1685689908.1685689908.1685693862.2',
? ? ? ? '__utmb': '30149280.0.10.1685693862',
? ? ? ? '__utma': '223695111.960813586.1685689908.1685689908.1685693862.2',
? ? ? ? '__utmb': '223695111.0.10.1685693862',
? ? ? ? '_pk_ref.100001.4cf6': '%5B%22%22%2C%22%22%2C1685693862%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DHA_VPCeMD3sZH-y0jBKk29963IqtrUqQVP56SxjwgBVGpmM7MzaGMT8yzKr116Iw%26wd%3D%26eqid%3De0e168f600022f96000000066479962a%22%5D',
? ? ? ? '_pk_ses.100001.4cf6': '1',
? ? }
? ? headers = {
? ? ? ? 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
? ? ? ? 'Accept-Language': 'zh-CN,zh;q=0.9',
? ? ? ? 'Cache-Control': 'max-age=0',
? ? ? ? 'Connection': 'keep-alive',
? ? ? ? # 'Cookie': 'll="118254"; bid=aXdoJiY-aR0; ap_v=0,6.0; _pk_id.100001.4cf6=302c3bdefe2a6c04.1685689908.; __utmc=30149280; __utmz=30149280.1685689908.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utmc=223695111; __utmz=223695111.1685689908.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __yadk_uid=hD9K77Vag4LPaL52WBu3tIOusW1m831I; _vwo_uuid_v2=DA61E56E708BE65DB3A6C5A77FB5C29CB|e38615be2b4802b5e32f0f56bc7adc44; __gads=ID=ba12cee8a1683aac-22eb64aab1e10079:T=1685689921:RT=1685690733:S=ALNI_MZlB3mZ0tgPF9c5nBhX4c6PBYxZNA; __gpi=UID=00000c0e8f42923b:T=1685689921:RT=1685690733:S=ALNI_MaxDBxu9Ora9k5hJt6hWuENVsK1FA; __utma=30149280.469148891.1685689908.1685689908.1685693862.2; __utmb=30149280.0.10.1685693862; __utma=223695111.960813586.1685689908.1685689908.1685693862.2; __utmb=223695111.0.10.1685693862; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1685693862%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DHA_VPCeMD3sZH-y0jBKk29963IqtrUqQVP56SxjwgBVGpmM7MzaGMT8yzKr116Iw%26wd%3D%26eqid%3De0e168f600022f96000000066479962a%22%5D; _pk_ses.100001.4cf6=1',
? ? ? ? 'Referer': 'https://movie.douban.com/subject/33455421/?tag=%E7%83%AD%E9%97%A8&from=gaia',
? ? ? ? 'Sec-Fetch-Dest': 'document',
? ? ? ? 'Sec-Fetch-Mode': 'navigate',
? ? ? ? 'Sec-Fetch-Site': 'same-origin',
? ? ? ? 'Sec-Fetch-User': '?1',
? ? ? ? 'Upgrade-Insecure-Requests': '1',
? ? ? ? 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36',
? ? ? ? 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
? ? ? ? 'sec-ch-ua-mobile': '?0',
? ? ? ? 'sec-ch-ua-platform': '"Windows"',
? ? }
? ? response = requests.get(a, cookies=cookies,
? ? ? ? ? ? ? ? ? ? ? ? ? ? headers=headers)
? ? res = bs(response.text, 'lxml')
? ? short = res.find_all(attrs={'class': {'comment-info'}})
? ? j = 0
? ? x = []
? ? for i in short:
? ? ? ? y = []
? ? ? ? name = res.find_all('a', attrs={'class': {''}})[j].get_text()
? ? ? ? star = res.find_all('span', attrs={'class': {'rating'}})[j]
? ? ? ? time = res.find_all(attrs={'class': {'comment-time'}})[j]
? ? ? ? address = res.find_all(attrs={'class': {'comment-location'}})[j].get_text()
? ? ? ? comment = res.find_all(attrs={'class': {'short'}})[j].get_text()
? ? ? ? star = star.get('title')
? ? ? ? time = time.get('title')
? ? ? ? j += 1
? ? ? ? y.append(name)
? ? ? ? y.append(star)
? ? ? ? y.append(time)
? ? ? ? y.append(address)
? ? ? ? y.append(comment)
? ? ? ? x.append(y)
? ? print(x)
? ? for i in range(20):
? ? ? ? with open('comment{}.txt'.format(cishu), 'a', encoding='utf-8') as f:
? ? ? ? ? ? f.write(str(x[i]))
? ? ? ? ? ? f.write('\n\n')
? ? driver.find_element_by_xpath("//a[contains(text(),' 條')]").click()