最美情侣中文字幕电影,在线麻豆精品传媒,在线网站高清黄,久久黄色视频

歡迎光臨散文網(wǎng) 會(huì)員登陸 & 注冊(cè)

Python課程天花板,Python入門(mén)+Python爬蟲(chóng)+Python數(shù)據(jù)分析

2023-07-06 16:23 作者:可愛(ài)不是可受  | 我要投稿

跟著前幾個(gè)視頻做了一下豆瓣Top250的網(wǎng)絡(luò)爬蟲(chóng),自己小改了一下代碼,大部分還是老師的源代碼

from bs4 import BeautifulSoup

import re

import urllib.request,urllib.error

import xlwt

import sqlite3

def main():

??baseurl="https://movie.douban.com/top250?start="

??Datalist=getData(baseurl)

??savepath=".\\Data.xls"

??saveData(Datalist,savepath)

??#askUrl("https://movie.douban.com/top250?start=")


#創(chuàng)建正則表達(dá)式,表示字符串的規(guī)則

#影片超鏈接

findlink=re.compile(r'<a href="(.*?)">')

#影片的圖片

findImgSrc=re.compile(r'<img.*src="(.*?)"',re.S) #re.S可以匹配換行符

#影片片名

findTitle=re.compile(r'<span class="title">(.*)</span>')

#影片評(píng)分

findRating=re.compile(r'<span class="rating_num" property="v:average">(.*)</span>')

#評(píng)價(jià)人數(shù)

findJudge=re.compile(r'<span>(\d*)人評(píng)價(jià)</span>')

#評(píng)價(jià)

findInq=re.compile(r'<span class="inq">(.*)</span>')

#影片相關(guān)內(nèi)容

findBd=re.compile(r'<p class="">(.*?)</p>',re.S)

#爬取網(wǎng)頁(yè)

def getData(baseurl):

??Datalist=[]

??for i in range(0,10):

????url=baseurl+str(i*25)

????html=askUrl(url)

????soup=BeautifulSoup(html,"html.parser")

????for item in soup.find_all('div',class_="item"):

??????#print(item) #測(cè)試是否能成功篩選

??????#break

??????data = []?# 用來(lái)保存一部電影的全部信息

??????item=str(item)

??????#影片詳情的鏈接0

??????link=re.findall(findlink,item) #[0]表示只要第一個(gè)

??????#print(link) #測(cè)試

??????data.append(link)

??????ImgSrc=re.findall(findImgSrc,item)

??????#print(ImgSrc)

??????data.append(ImgSrc)

??????titles=re.findall(findTitle,item)

??????#print(titles)

??????if(len(titles)==2):

????????ctitle=titles[0]

????????data.append(ctitle)

????????otitle=str(titles[1]).replace('\xa0',"")

????????data.append(otitle)

??????else:

????????data.append(titles[0])

????????data.append(" ")

??????#print(data)

??????Ratings=re.findall(findRating,item)

??????data.append(Ratings)

??????#print(Ratings)

??????Judge=re.findall(findJudge,item)

??????data.append(Judge)

??????#print(Judge)

??????Inq=re.findall(findInq,item)

??????if(len(Inq) != 0):

????????inq=Inq[0].replace("。"," ")

????????data.append(inq)

??????else:

????????data.append(" ")

??????Bd=str(re.findall(findBd,item))

??????Bd=re.sub('<br/(/s+)?>(/s+)?'," ",Bd)

??????Bd=Bd.replace('\\n',"")

??????Bd=Bd.replace('\\xa0',"")

??????#print(Bd)

??????data.append(Bd.strip())

??????Datalist.append(data)

??return Datalist


#保存數(shù)據(jù)

def saveData(datalist,savepath):

??print("save....")

??book=xlwt.Workbook(encoding="utf-8")

??sheet=book.add_sheet('豆瓣電影Top250',cell_overwrite_ok=True)

??col=("電影鏈接","封面圖片鏈接","電影中文名","電影外文名","評(píng)分","評(píng)價(jià)數(shù)","概況","相關(guān)信息")

??for i in range(0,8):

????sheet.write(0,i,col[i])

??for i in range(0,250):

????print("第%d條"%i)

????data=datalist[i]

????for j in range(0,8):

??????sheet.write(i+1,j,data[j])

??book.save(savepath)


def askUrl(url):

??head={

????"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"

??} #偽裝瀏覽器進(jìn)行訪問(wèn)

??request=urllib.request.Request(url,headers=head)

??html=""

??try:

????response=urllib.request.urlopen(request)

????html=response.read().decode("utf-8")

????#print(html)

??except urllib.error.URLError as e:

????if hasattr(e,"code"):

??????print(e.code)

????if hasattr(e,"reason"):

??????print(e.reason)

??return html

main()

Python課程天花板,Python入門(mén)+Python爬蟲(chóng)+Python數(shù)據(jù)分析的評(píng)論 (共 條)

分享到微博請(qǐng)遵守國(guó)家法律
吐鲁番市| 沐川县| 苍溪县| 满洲里市| 九江市| 滕州市| 龙门县| 博爱县| 新密市| 田阳县| 澄迈县| 周宁县| 吉林省| 连城县| 涞源县| 洛阳市| 鄂尔多斯市| 镇远县| 平顶山市| 乌兰浩特市| 蓝山县| 宿松县| 孟村| 郁南县| 牙克石市| 郓城县| 清原| 东明县| 长沙县| 出国| 柯坪县| 雷州市| 太原市| 西畴县| 黄石市| 华亭县| 闸北区| 桃江县| 申扎县| 手游| 罗定市|