Scrapy 返回中文亂碼
對于scrpay亂碼的數(shù)據(jù),剛開始在settings.py中配置了FEED_EXPORT_ENCODING = 'utf-8',發(fā)現(xiàn)還是不起作用,
于是想到了中間件,在請求返回的時候,對返回的內(nèi)容進(jìn)行轉(zhuǎn)碼處理
def process_response(self, request, response, spider): ? ? ? ?# Called with the response returned from the downloader. ? ? ? ?# 修改頁面編碼為指定的utf-8格式 ? ? ? ?# import pdb ? ? ? ?# pdb.set_trace() ? ? ? ?# print("當(dāng)前的編碼是:", response.encoding) ? ? ? ?# # return ? ? ? ?# # TextResponse(url=response.url,status=200,request=request,body=self.browser.page_source.encode('utf-8')) ? ? ? ?response = HtmlResponse( ? ? ? ? ? ?url=response.url, status=200, request=request, body=response.body, ? ? ? ? ? ?encoding='utf-8') ? ? ? ?return response
在spider的custom_settings中的?DOWNLOADER_MIDDLEWARES中開啟這個中間件即可對返回內(nèi)容進(jìn)行轉(zhuǎn)碼操作
custom_settings = { ? ? ? ?'ITEM_PIPELINES': {'AggProject.pipelines.AggprojectPipeline': 100}, ? ? ? ?'DOWNLOADER_MIDDLEWARES': { ? ? ? ? ? ?'AggProject.middlewares.ZyZhanDownloaderMiddleware': 543, ? ? ? ?}, ? ? ? ?'DEFAULT_REQUEST_HEADERS': { ? ? ? ? ? ?'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', ? ? ? ? ? ?'Accept-Encoding': 'gzip, deflate', ? ? ? ? ? ?'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7,en-GB;q=0.6', ? ? ? ? ? ?'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36' ? ? ? ?}, ? ? ? ?'REDIRECT_ENABLED': True, ? ? ? ?'COOKIES_ENABLED': False, ? ? ? ?'DOWNLOAD_DELAY': 1.5, ? ? ? ?'CONCURRENT_REQUESTS': 6, ? ? ? ?'RETRY_ENABLED': True, ? ? ? ?'RETRY_TIMES': 2, ? ? ? ?'DEPTH_LIMIT': 3 ? ?}
鏈接:https://www.dianjilingqu.com/489522.html