備份5(python爬蟲任務(wù)定時啟動)
記錄自己的學(xué)習(xí)過程用,以下代碼有定時模塊,爬蟲模塊,數(shù)據(jù)寫入模塊
#! /usr/bin/python3
# coding=utf-8
import re, os
import datetime
import csv
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
from selenium import webdriver
from selenium.webdriver import Chrome
import time
import schedule
os.system('mkdir -p ./數(shù)據(jù)')
def web1(name, url):??
? ? # name = "SHIB"
? ? # url = f"https://www.okex.com/trade-spot/shib-usdt"
? ? now = datetime.datetime.now()
? ? now_time = now.strftime("%Y-%m-%d %H:%M:%S")
? ? path_csv = f"./數(shù)據(jù)/{name}數(shù)據(jù)匯總.csv"
? ? options = webdriver.ChromeOptions? # 調(diào)用ChromeOptions方法
? ? options.binary_location = r'/opt/apps/cn.google.chrome/files/google-chrome'? # Chrome執(zhí)行路徑
? ? # web = webdriver.Chrome()
? ? web = Chrome()
? ? web.maximize_window()
? ? web.get(url)? # 打開網(wǎng)頁
? ? time.sleep(3)
? ? # web.find_element(By.XPATH, '//*[@id="details-button"]').click()
? ? # web.find_element(By.XPATH, '// *[ @ id = "proceed-link"]').click()
? ? # print(web.title)? # 顯示頁面標(biāo)題
? ? print(f"====={name}=====")
? ? html = web.page_source
? ? # print(html)
? ? obj = re.compile(
? ? ? ? r'">24h最低</span><span class="value">(?P<low>.*?)</span></div>.*?">24h最高</span><span class="value">(?P<hight>.*?)</span></div>.*?</span><span class="value">(?P<number>.*?)億 </span></div>.*?<span class="label">24h額</span><span class="value">(?P<money>.*?)</span></div>',
? ? ? ? re.S)
? ? result = obj.finditer(html)
? ? # print(result)
? ? for it in result:
? ? ? ? low = it.group('low')
? ? ? ? hight = it.group('hight')
? ? ? ? number = it.group('number')
? ? ? ? money = it.group('money')
? ? ? ? print(low, hight, number, money)
? ? ? ? with open(path_csv, "r", newline="") as inccsv, open(path_csv, "a", newline="") as outcsv:
? ? ? ? ? ? freader = csv.reader(inccsv, delimiter=",")
? ? ? ? ? ? fwriter = csv.writer(outcsv)
? ? ? ? ? ? fwriter.writerow([low, hight, number, money, now_time])
? ? print(f"所有{name}數(shù)據(jù)已經(jīng)導(dǎo)出完成!??!")
? ? web.close()
? ? time.sleep(1)
def web2(name, url):??
? ? # name = "SHIB"
? ? # url = f"https://www.okex.com/trade-spot/shib-usdt"
? ? now = datetime.datetime.now()
? ? now_time = now.strftime("%Y-%m-%d %H:%M:%S")
? ? path_csv = f"./數(shù)據(jù)/{name}數(shù)據(jù)匯總.csv"
? ? options = webdriver.ChromeOptions? # 調(diào)用ChromeOptions方法
? ? options.binary_location = r'/opt/apps/cn.google.chrome/files/google-chrome'? # Chrome執(zhí)行路徑
? ? # web = webdriver.Chrome()
? ? web = Chrome()
? ? web.maximize_window()
? ? web.get(url)? # 打開網(wǎng)頁
? ? time.sleep(3)
? ? # web.find_element(By.XPATH, '//*[@id="details-button"]').click()
? ? # web.find_element(By.XPATH, '// *[ @ id = "proceed-link"]').click()
? ? # print(web.title)? # 顯示頁面標(biāo)題
? ? print(f"====={name}=====")
? ? html = web.page_source
? ? # print(html)
? ? obj = re.compile(
? ? ? ? r'">24h最低</span><span class="value">(?P<low>.*?)</span></div>.*?">24h最高</span><span class="value">(?P<hight>.*?)</span></div>.*?class="label">24h量.*?</span><span class="value">(?P<number>.*?)萬 </span></div>.*?">24h額</span><span class="value">(?P<money>.*?)</span></div>',
? ? ? ? re.S)
? ? result = obj.finditer(html)
? ? # print(result)
? ? for it in result:
? ? ? ? low = it.group('low')
? ? ? ? hight = it.group('hight')
? ? ? ? number = it.group('number')
? ? ? ? money = it.group('money')
? ? ? ? print(low, hight, number, money)
? ? ? ? with open(path_csv, "r", newline="") as inccsv, open(path_csv, "a", newline="") as outcsv:
? ? ? ? ? ? freader = csv.reader(inccsv, delimiter=",")
? ? ? ? ? ? fwriter = csv.writer(outcsv)
? ? ? ? ? ? fwriter.writerow([low, hight, number, money, now_time])
? ? print(f"所有{name}數(shù)據(jù)已經(jīng)導(dǎo)出完成!!!")
? ? web.close()
? ? time.sleep(1)
def man1():
? ? names1 = {
? ? ? ? "SHIB": "https://www.okex.com/trade-spot/shib-usdt",
? ? }
? ? for i in names1:
? ? ? ? # print(i, names1[i])
? ? ? ? i = i
? ? ? ? j = names1[i]
? ? ? ? path_csv = f"./數(shù)據(jù)/{i}數(shù)據(jù)匯總.csv"
? ? ? ? if not os.access(path_csv, os.F_OK):
? ? ? ? ? ? f = open(path_csv, mode="w")
? ? ? ? ? ? csvwriter = csv.writer(f)
? ? ? ? ? ? csvwriter.writerow(["24H最低", "24H最高", f"24H量({i})", "24H額"])
? ? ? ? ? ? f.close()
? ? ? ? else:
? ? ? ? ? ? web1(i, j)
def man2():
? ? names2 = {
? ? ? ? "ICP": "https://www.okex.com/trade-spot/icp-usdt",
? ? ? ? "OMG": "https://www.okex.com/trade-spot/omg-usdt",
? ? ? ? "Doge": "https://www.okex.com/trade-spot/doge-usdt",
? ? ? ? "xch": "https://www.okex.com/trade-spot/xch-usdt",
? ? ? ? "BTC": "https://www.okex.com/trade-spot/btc-usdt",
? ? ? ? "ETH": "https://www.okex.com/trade-spot/eth-usdt",
? ? }
? ? for i in names2:
? ? ? ? # print(i, names2[i])
? ? ? ? i = i
? ? ? ? j = names2[i]
? ? ? ? path_csv = f"./數(shù)據(jù)/{i}數(shù)據(jù)匯總.csv"
? ? ? ? if not os.access(path_csv, os.F_OK):
? ? ? ? ? ? f = open(path_csv, mode="w")
? ? ? ? ? ? csvwriter = csv.writer(f)
? ? ? ? ? ? csvwriter.writerow(["24H最低", "24H最高", f"24H量({i})", "24H額"])
? ? ? ? ? ? f.close()
? ? ? ? else:
? ? ? ? ? ? web2(i, j)
def man():
? ? man1()
? ? man2()
if __name__ == '__main__':
? ? schedule.every(10).minutes.do(man)
? ? while True:
? ? ? ? schedule.run_pending()
? ? ? ? time.sleep(1)
以下是時間管理模塊