把爬取到的數(shù)據(jù)存儲(chǔ)到數(shù)據(jù)庫(kù)中
import pymysql
import re
import requests
#爬取源代碼
url='https://search.jd.com/Search?keyword=ctf&enc=utf-8&wq=ctf'
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0'}
res=requests.get(url,headers=headers).text
def 過濾出需要的數(shù)據(jù)(res):
??? s1=re.findall('<em>(.*?)</em>',res,re.S)
??? li=[] #li用于存放商品名稱,s1用于存放商品價(jià)格
??? s2=re.findall('<i data-price=.*?>(.*?)</i>',res,re.S)
??? for i in s1:
??????? st=''
??????? if '¥' in i and len(i)>1:
??????????? st=i
??????? if '<font' in i:
????????? st=''.join(''.join(i.split(re.findall('<font.*?>',i)[0])).split('</font>'))
??????? if '<img' in i:
??????????? st=st.strip('<img'+re.findall('<img(.*?)/>',st)[0]+'/>')
??????? if '<span' in i:
??????????? st=''.join(''.join(st.split(re.findall('<span.*?>',st)[0])).split('</span>'))
??????? li.append(st.strip())
??? lis=[]
??? for i in li: #去除li里的空格
??????? if len(i)>1:
??????????? lis.append(i)
??? return lis,s2
def MySQL操作(lis,s2):
??? db=pymysql.connect(host='localhost',port=3306,user='root',password='',charset='utf8')
??? cur=db.cursor()
??? cur.execute("create database 商品查詢系統(tǒng) character set utf8")
??? cur.execute('use 商品查詢系統(tǒng)')
??? cur.execute('create table ctf書籍(書名 varchar(100),價(jià)格 varchar(100))')
??? for i in range(len(s2)):
??????? sql="insert into ctf書籍(書名,價(jià)格) values ('%s','%s')"%(lis[i],s2[i])
??????? cur.execute(sql)
??? db.commit()
??? x=1?? ?
??? while x!='exit':
??????? x=input('輸入書名:')
??????? sql=f"select * from ctf書籍 where 書名 like '%{x}%'"
??????? cur.execute(sql)
??????? data=cur.fetchall()
??????? for i in data:
??????????? print(i[0],i[1])
??? cur.close()
??? db.close()
?? ?
x=過濾出需要的數(shù)據(jù)(res)
MySQL操作(x[0], x[1])
