不要把遺憾留在CSGO

import pandas as pd
from sklearn import preprocessing
from sklearn.feature_selection import mutual_info_classif
from sklearn.feature_selection import SelectKBest
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
# 導(dǎo)入所需的庫
# 讀取訓(xùn)練數(shù)據(jù)和測試數(shù)據(jù)
data_train=pd.read_csv("Train_data.csv") # 從CSV文件中讀取訓(xùn)練數(shù)據(jù),并存儲在名為data_train的DataFrame中
data_test=pd.read_csv("Test_data.csv") # 從CSV文件中讀取測試數(shù)據(jù),并存儲在名為data_test的DataFrame中
def encoding(df):
# 對DataFrame中的每一列進行標簽編碼
for col in df.columns: # 遍歷DataFrame的每一列
if df[col].dtype == 'object': # 如果列的數(shù)據(jù)類型是object(字符串類型)
label_encoder = preprocessing.LabelEncoder() # 創(chuàng)建一個LabelEncoder對象
df[col] = label_encoder.fit_transform(df[col]) # 使用LabelEncoder對列進行標簽編碼
# 對訓(xùn)練數(shù)據(jù)進行標簽編碼
encoding(data_train)
X = data_train.drop(["class"], axis=1) # 從訓(xùn)練數(shù)據(jù)中刪除"class"列,并將其賦值給X
y = data_train["class"] # 將訓(xùn)練數(shù)據(jù)的"class"列賦值給y
# 特征選擇
select_best_cols = SelectKBest(mutual_info_classif, k=25) # 使用互信息進行特征選擇,選擇最好的25個特征
select_best_cols.fit(X, y) # 對X和y進行特征選擇
selected_features = X.columns[select_best_cols.get_support()] # 獲取被選擇的特征列的列名
X = X[selected_features] # 保留被選擇的特征列
# 劃分訓(xùn)練集和測試集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# 將數(shù)據(jù)劃分為訓(xùn)練集和測試集,測試集占總數(shù)據(jù)的30%,隨機種子為42
# 數(shù)據(jù)標準化
sc = StandardScaler() # 創(chuàng)建StandardScaler對象
X_train=sc.fit_transform(X_train) # 對訓(xùn)練集進行數(shù)據(jù)標準化
X_test=sc.transform(X_test) # 對測試集進行數(shù)據(jù)標準化
def classalgo_test(x_train, x_test, y_train, y_test):
rfc = RandomForestClassifier() # 創(chuàng)建隨機森林分類器對象
algo = rfc # 將隨機森林分類器對象賦值給algo
algo.fit(x_train, y_train) # 使用訓(xùn)練集進行訓(xùn)練
y_test_pred = algo.predict(x_test) # 使用測試集進行預(yù)測
test_acc = "{:.2f}".format(accuracy_score(y_test, y_test_pred)) # 計算預(yù)測準確率
return test_acc
# 調(diào)用分類算法進行測試
a = classalgo_test(X_train, X_test, y_train, y_test) # 調(diào)用classalgo_test函數(shù)進行測試
print(a) # 打印預(yù)測準確率
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
# 讀取數(shù)據(jù)集
data = pd.read_csv("sms_spam.csv", encoding='ISO-8859-1')
# 定義惡意郵件關(guān)鍵詞詞庫
words = set()
# 對數(shù)據(jù)集進行數(shù)據(jù)清洗
column = 'text'
data[column] = data[column].str.lower() # 將文本轉(zhuǎn)為小寫
data[column] = data[column].str.replace('[^a-zA-Z ]', '') # 只保留字母和空格
data[column] = data[column].str.strip() # 去除多余空格
# 劃分訓(xùn)練集和測試集
X_train, X_test, Y_train, Y_test = train_test_split(data["text"], data["type"], test_size=0.1, random_state=42)
# 構(gòu)建模型
for doc in X_train:
words.update(set(doc.split())) # 更新關(guān)鍵詞詞庫
X_train_counts = np.array([[doc.count(word) for word in words] for doc in X_train]) # 構(gòu)建訓(xùn)練集的特征矩陣
X_test_counts = np.array([[doc.count(word) for word in words] for doc in X_test]) # 構(gòu)建測試集的特征矩陣
model = MultinomialNB() # 創(chuàng)建樸素貝葉斯分類器對象
model.fit(X_train_counts, Y_train) # 訓(xùn)練模型
# 評價模型
Y_pred = model.predict(X_test_counts) # 對測試集進行預(yù)測
accuracy = accuracy_score(Y_test, Y_pred) # 計算準確率
# 輸出評價值
print(f'{accuracy:.2f}') # 打印準確率
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
# 創(chuàng)建空列表
str_list = [] # 存儲字符串類型的數(shù)據(jù)
digital_list = [] # 存儲數(shù)字化的數(shù)據(jù)
my_dict = None # 存儲字符串和數(shù)字的映射關(guān)系
# 讀取數(shù)據(jù)
crime_data = pd.read_csv('crime.csv')
# 處理字符串類型的列
for col in crime_data.columns:
if crime_data[col].dtypes == 'object': # 判斷列類型為字符串
unique_data = crime_data[col].unique() # 獲取唯一值
for item in unique_data:
str_list.append(item) # 存儲唯一值
crime_data.drop_duplicates() # 刪除重復(fù)值
crime_data.dropna() # 刪除缺失值
# 將字符串類型的列轉(zhuǎn)換為數(shù)字
for col in crime_data.columns:
if crime_data[col].dtypes == 'object': # 判斷列類型為字符串
le = LabelEncoder()
crime_data[col] = le.fit_transform(crime_data[col]) # 進行標簽編碼
unique_data = crime_data[col].unique() # 獲取唯一值
for item in unique_data:
digital_list.append(item) # 存儲數(shù)字化的值
pairs = zip(str_list, digital_list) # 將字符串和數(shù)字對應(yīng)起來
my_dict = {key: value for key, value in pairs} # 構(gòu)建映射字典
# 獲取特征和標簽
x = crime_data.loc[:, ['NEIGHBOURHOOD', 'MONTH']]
y = crime_data.loc[:, 'TYPE']
# 劃分訓(xùn)練集和測試集
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=10)
# 創(chuàng)建決策樹分類器對象
dtc = DecisionTreeClassifier()
dtc.fit(x_train, y_train) # 訓(xùn)練模型
# 創(chuàng)建新數(shù)據(jù)示例
new_data = {"NEIGHBOURHOOD": my_dict['Sunset'], "MONTH": '3'}
# 進行預(yù)測
prediction = dtc.predict(pd.DataFrame([new_data]))
# 根據(jù)預(yù)測結(jié)果獲取對應(yīng)的犯罪類型
for k, v in my_dict.items():
if v == prediction:
outcome = k
break
# 打印預(yù)測結(jié)果
print('根據(jù)預(yù)測可能的犯罪類型是:', outcome)
import math
import os
import sys
import pickle
import numpy as np
from numpy import *
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV