最美情侣中文字幕电影,在线麻豆精品传媒,在线网站高清黄,久久黄色视频

歡迎光臨散文網(wǎng) 會員登陸 & 注冊

不要把遺憾留在CSGO

2023-06-30 10:25 作者:I春樹I  | 我要投稿

import pandas as pd

from sklearn import preprocessing

from sklearn.feature_selection import mutual_info_classif

from sklearn.feature_selection import SelectKBest

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score


# 導(dǎo)入所需的庫


# 讀取訓(xùn)練數(shù)據(jù)和測試數(shù)據(jù)

data_train=pd.read_csv("Train_data.csv") # 從CSV文件中讀取訓(xùn)練數(shù)據(jù),并存儲在名為data_train的DataFrame中

data_test=pd.read_csv("Test_data.csv") # 從CSV文件中讀取測試數(shù)據(jù),并存儲在名為data_test的DataFrame中


def encoding(df):

# 對DataFrame中的每一列進行標簽編碼

for col in df.columns: # 遍歷DataFrame的每一列

if df[col].dtype == 'object': # 如果列的數(shù)據(jù)類型是object(字符串類型)

label_encoder = preprocessing.LabelEncoder() # 創(chuàng)建一個LabelEncoder對象

df[col] = label_encoder.fit_transform(df[col]) # 使用LabelEncoder對列進行標簽編碼


# 對訓(xùn)練數(shù)據(jù)進行標簽編碼

encoding(data_train)


X = data_train.drop(["class"], axis=1) # 從訓(xùn)練數(shù)據(jù)中刪除"class"列,并將其賦值給X

y = data_train["class"] # 將訓(xùn)練數(shù)據(jù)的"class"列賦值給y


# 特征選擇

select_best_cols = SelectKBest(mutual_info_classif, k=25) # 使用互信息進行特征選擇,選擇最好的25個特征

select_best_cols.fit(X, y) # 對X和y進行特征選擇

selected_features = X.columns[select_best_cols.get_support()] # 獲取被選擇的特征列的列名

X = X[selected_features] # 保留被選擇的特征列


# 劃分訓(xùn)練集和測試集

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 將數(shù)據(jù)劃分為訓(xùn)練集和測試集,測試集占總數(shù)據(jù)的30%,隨機種子為42


# 數(shù)據(jù)標準化

sc = StandardScaler() # 創(chuàng)建StandardScaler對象

X_train=sc.fit_transform(X_train) # 對訓(xùn)練集進行數(shù)據(jù)標準化

X_test=sc.transform(X_test) # 對測試集進行數(shù)據(jù)標準化


def classalgo_test(x_train, x_test, y_train, y_test):

rfc = RandomForestClassifier() # 創(chuàng)建隨機森林分類器對象

algo = rfc # 將隨機森林分類器對象賦值給algo

algo.fit(x_train, y_train) # 使用訓(xùn)練集進行訓(xùn)練

y_test_pred = algo.predict(x_test) # 使用測試集進行預(yù)測

test_acc = "{:.2f}".format(accuracy_score(y_test, y_test_pred)) # 計算預(yù)測準確率

return test_acc


# 調(diào)用分類算法進行測試

a = classalgo_test(X_train, X_test, y_train, y_test) # 調(diào)用classalgo_test函數(shù)進行測試

print(a) # 打印預(yù)測準確率



import pandas as pd

import numpy as np

from sklearn.model_selection import train_test_split

from sklearn.naive_bayes import MultinomialNB

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


# 讀取數(shù)據(jù)集

data = pd.read_csv("sms_spam.csv", encoding='ISO-8859-1')


# 定義惡意郵件關(guān)鍵詞詞庫

words = set()


# 對數(shù)據(jù)集進行數(shù)據(jù)清洗

column = 'text'

data[column] = data[column].str.lower() # 將文本轉(zhuǎn)為小寫

data[column] = data[column].str.replace('[^a-zA-Z ]', '') # 只保留字母和空格

data[column] = data[column].str.strip() # 去除多余空格


# 劃分訓(xùn)練集和測試集

X_train, X_test, Y_train, Y_test = train_test_split(data["text"], data["type"], test_size=0.1, random_state=42)


# 構(gòu)建模型

for doc in X_train:

words.update(set(doc.split())) # 更新關(guān)鍵詞詞庫

X_train_counts = np.array([[doc.count(word) for word in words] for doc in X_train]) # 構(gòu)建訓(xùn)練集的特征矩陣

X_test_counts = np.array([[doc.count(word) for word in words] for doc in X_test]) # 構(gòu)建測試集的特征矩陣

model = MultinomialNB() # 創(chuàng)建樸素貝葉斯分類器對象

model.fit(X_train_counts, Y_train) # 訓(xùn)練模型


# 評價模型

Y_pred = model.predict(X_test_counts) # 對測試集進行預(yù)測

accuracy = accuracy_score(Y_test, Y_pred) # 計算準確率


# 輸出評價值

print(f'{accuracy:.2f}') # 打印準確率




import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.tree import DecisionTreeClassifier

from sklearn.metrics import accuracy_score

from sklearn.preprocessing import LabelEncoder


# 創(chuàng)建空列表

str_list = [] # 存儲字符串類型的數(shù)據(jù)

digital_list = [] # 存儲數(shù)字化的數(shù)據(jù)

my_dict = None # 存儲字符串和數(shù)字的映射關(guān)系


# 讀取數(shù)據(jù)

crime_data = pd.read_csv('crime.csv')


# 處理字符串類型的列

for col in crime_data.columns:

if crime_data[col].dtypes == 'object': # 判斷列類型為字符串

unique_data = crime_data[col].unique() # 獲取唯一值

for item in unique_data:

str_list.append(item) # 存儲唯一值


crime_data.drop_duplicates() # 刪除重復(fù)值

crime_data.dropna() # 刪除缺失值


# 將字符串類型的列轉(zhuǎn)換為數(shù)字

for col in crime_data.columns:

if crime_data[col].dtypes == 'object': # 判斷列類型為字符串

le = LabelEncoder()

crime_data[col] = le.fit_transform(crime_data[col]) # 進行標簽編碼

unique_data = crime_data[col].unique() # 獲取唯一值

for item in unique_data:

digital_list.append(item) # 存儲數(shù)字化的值


pairs = zip(str_list, digital_list) # 將字符串和數(shù)字對應(yīng)起來

my_dict = {key: value for key, value in pairs} # 構(gòu)建映射字典


# 獲取特征和標簽

x = crime_data.loc[:, ['NEIGHBOURHOOD', 'MONTH']]

y = crime_data.loc[:, 'TYPE']


# 劃分訓(xùn)練集和測試集

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=10)


# 創(chuàng)建決策樹分類器對象

dtc = DecisionTreeClassifier()

dtc.fit(x_train, y_train) # 訓(xùn)練模型


# 創(chuàng)建新數(shù)據(jù)示例

new_data = {"NEIGHBOURHOOD": my_dict['Sunset'], "MONTH": '3'}


# 進行預(yù)測

prediction = dtc.predict(pd.DataFrame([new_data]))


# 根據(jù)預(yù)測結(jié)果獲取對應(yīng)的犯罪類型

for k, v in my_dict.items():

if v == prediction:

outcome = k

break


# 打印預(yù)測結(jié)果

print('根據(jù)預(yù)測可能的犯罪類型是:', outcome)



import math

import os

import sys

import pickle

import numpy as np

from numpy import *

from sklearn.svm import SVC

from sklearn.model_selection import GridSearchCV


不要把遺憾留在CSGO的評論 (共 條)

分享到微博請遵守國家法律
西乌珠穆沁旗| 留坝县| 丰台区| 玛沁县| 蛟河市| 鄂伦春自治旗| 阳新县| 寿光市| 库车县| 大荔县| 沾益县| 福安市| 边坝县| 石棉县| 仁寿县| 宾阳县| 石棉县| 楚雄市| 莱州市| 三江| 乐业县| 海兴县| 阿荣旗| 宜君县| 梁平县| 通山县| 迁西县| 农安县| 陈巴尔虎旗| 阜新| 三河市| 沧州市| 枣庄市| 五常市| 蒙城县| 固始县| 隆化县| 榆中县| 青田县| 五寨县| 昌邑市|