【python】爬取京東華碩筆記本自營(yíng)店代碼
配合視頻使用(空間里有,還沒(méi)過(guò)審核就沒(méi)鏈接)

import urllib.request
from bs4 import BeautifulSoup
import re
import pandas
import time
time.sleep(2)
import json
url_txt = open('C:/Users/28051/Desktop/pclinks.txt','r',encoding='utf-8') ? #寫(xiě)入的文件
url = url_txt.read().split('\n')
url.pop(99)
print(len(url))
pat = 'http://item.jd.com/(.*?).html'
id = re.compile(pat).findall(str(url))
print(len(id))
headers = ('User-Agent',
? ? ? ? ? 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36')
pc_info = []
for url_i in url:
? ?print(url_i)
? ?html = urllib.request.urlopen(url_i)
? ?soup = BeautifulSoup(html,'lxml')
? ?data = soup.find_all('ul',{'class':'parameter2 p-parameter-list'})
? ?for item in data:
? ? ? ?result = item.get_text()
? ? ? ?result.lstrip()
? ? ? ?result.rstrip()
? ?pc_info.append(result)
? ?print(len(pc_info))
pp = []
pm = []
pop = []
commentcount = []
goodrate = []
for price_i in id:
? ?pc_price_url = 'https://p.3.cn/prices/mgets?skuIds=J_' + price_i
? ?pc_json = json.load(urllib.request.urlopen(pc_price_url, timeout=5))[0]
? ?pc_price_p_data = pc_json['p'] ?# 現(xiàn)在的價(jià)格
? ?pc_price_m_data = pc_json['m'] ?# 最高價(jià)
? ?pc_price_op_data = pc_json['op'] ?# 指導(dǎo)價(jià)
? ?pp.append(pc_price_p_data)
? ?pm.append(pc_price_m_data)
? ?pop.append(pc_price_op_data)
? ?pc_count_url = 'https://club.jd.com/comment/skuProductPageComments.action?callback=fetchJSON_comment98&productId='+price_i+'&score=0&sortType=5&page=0&pageSize=10&isShadowSku=0&fold=1'
? ?pc_count_html = urllib.request.urlopen(pc_count_url).read().decode('GBK')
? ?pc_count_html1 = pc_count_html.replace('fetchJSON_comment98(', '')
? ?pc_count_html2 = pc_count_html1.replace(');', '')
? ?pc_count_js = json.loads(pc_count_html2)
? ?pc_count_data = pc_count_js['productCommentSummary']
? ?commentCount = pc_count_data['commentCount']
? ?goodRate = pc_count_data['goodRate']
? ?commentcount.append(commentCount)
? ?goodrate.append(goodRate)
? ?'''generalCount = pc_count_data['generalCount']
? ?generalRate = pc_count_data['generalRate']
? ?poorCount = pc_count_data['poorCount']
? ?videoCount = pc_count_data['videoCount']
? ?showCount = pc_count_data['showCount']'''
? ?print(len(pp))
? ?print(len(pm))
? ?print(len(pop))
? ?print(len(commentcount))
? ?print(len(goodrate))
pc_data = {'url':url,'id':id,'info':pc_info,'pp':pp,'pm':pm,'pop':pop,'commentcount':commentcount,'goodrate':goodrate}
frame = pandas.DataFrame(pc_data)
frame.to_csv('C:/Users/28051/Desktop/pc.csv',encoding='utf-8-sig')
print(frame)