網(wǎng)頁(yè)爬取程序1.0源碼
本次使用的是python語(yǔ)言
源碼:
import tkinter as tk
from tkinter import ttk
import requests
import urllib3
import time
import random
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)? # 禁用InsecureRequestWarning警告
# 創(chuàng)建事件處理函數(shù)
def crawl_website():
? ? url = url_entry.get()
? ? filename = filename_entry.get()
? ??
? ? feedback_text = "開(kāi)始爬取網(wǎng)頁(yè)..."
? ? update_status_bar(feedback_text)
? ? time.sleep(2)
? ??
? ? feedback_text = "完成頁(yè)面爬取."
? ? update_status_bar(feedback_text)
? ? time.sleep(2)
? ??
? ? # 發(fā)送GET請(qǐng)求并獲取網(wǎng)頁(yè)源代碼(忽略SSL證書(shū)驗(yàn)證錯(cuò)誤)
? ? response = requests.get(url, verify=False)
? ? html_content = response.text
? ??
? ? time.sleep(1)
? ??
? ? # 如果文件名不包含后綴,則添加默認(rèn)后綴
? ? if not filename.lower().endswith('.html'):
? ? ? ? filename += '.html'
? ? ? ??
? ? # 將網(wǎng)頁(yè)源代碼保存到文件中
? ? with open(filename, 'w', encoding='utf-8') as file:
? ? ? ? file.write(html_content)
? ??
? ? feedback_text = f"網(wǎng)頁(yè)爬取成功并保存到文件: {filename}"
? ? update_status_bar(feedback_text)
# 創(chuàng)建主窗口
window = tk.Tk()
window.title("網(wǎng)站源代碼爬取器")
window.geometry("400x300")
# 創(chuàng)建標(biāo)簽和輸入框
tk.Label(window, text="網(wǎng)址:").grid(row=0, column=0, padx=10, pady=10)
url_entry = tk.Entry(window, width=30)
url_entry.grid(row=0, column=1, padx=10, pady=10)
tk.Label(window, text="保存文件名:").grid(row=1, column=0, padx=10, pady=10)
filename_entry = tk.Entry(window, width=30)
filename_entry.grid(row=1, column=1, padx=10, pady=10)
# 創(chuàng)建開(kāi)始按鈕
start_button = ttk.Button(window, text="開(kāi)始", command=crawl_website)
start_button.grid(row=2, column=0, columnspan=2, padx=10, pady=10)
# 狀態(tài)欄
status_var = tk.StringVar()
status_bar = ttk.Label(window, textvariable=status_var, anchor=tk.W)
status_bar.grid(row=3, column=0, columnspan=2, sticky=tk.W+tk.E)
# 更新?tīng)顟B(tài)欄
def update_status_bar(text):
? ? status_var.set(text)
? ? status_bar.update()
window.mainloop()
如有問(wèn)題私信UP