獲得b站彈幕并分析生成詞云-以露米煙火之下為例
#? 分析網(wǎng)頁源代碼pages":[{"cid":860252148,"? ?此處獲取視頻彈幕cid代碼
例:<d p="57.01800,1,25,16777215,1665738755,0,579ea34d,1163059996662086144,10">煙火 fy 煙火</d>
url =?https://comment.bilibili.com/{}.xml?? ?#? {}填充cid

# 使用的庫
import requests
from lxml import etree
from imageio.v2 import imread
import jieba
import wordcloud

#? 爬取保存彈幕信息網(wǎng)頁? 請求標頭見瀏覽器
def crawler():
????url = ''
????headers = {
? ?????'authority': 'www.bilibili.com';,
? ?????'accept': '',
? ?????'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
? ?????'cookie': '',
? ?????'user-agent': ''
????}
????response = requests.get(url=url, headers=headers)
????response.encoding = 'utf-8'
????print(response.status_code)
????xml = response.text
????with open('煙火之下.xml', 'w', encoding='utf-8') as fw:
? ?????fw.write(xml)

# 解析xml文本轉(zhuǎn)化txt
def xml_parse():
????tree = etree.parse('煙火之下.xml')
????i = tree.getroot()
????print('i:', i.tag)
????ds = i.getchildren()
????ds_list = []
????for d in ds:
? ?????ds_list.append(d.text)
????with open('彈幕.txt', 'w', encoding='utf-8') as f:
? ?????f.writelines(ds_list)

# 生成詞云? 此處請自行準備mask圖片,本實踐圖片為一只大鳥。
def? word_cloud_pic():
????stopwords = {'chun', 'a'}
????mask = imread('1.png')
????with open('彈幕.txt', 'r', encoding='utf-8') as f_2:
? ?????f_dan = f_2.read()
????ls = jieba.lcut(f_dan)
????txt = ''.join(ls)
????wo = wordcloud.WordCloud(width=800, height=600,background_color='white',?? ? ????????????font_path="simkai.ttf",stopwords=stopwords, max_font_size=32, min_font_size=4, font_step=1, max_words=100, mask=mask)
????text_word = wo.generate(txt)
????wo.to_file('彈幕.png')

# 運行程序
if __name__ == '__main__':
????crawler()
????xml_parse()
????word_cloud_pic()

# 私貨? 關注露米Lumi_Official喵,謝謝了喵?。。?/span>

