單個神經(jīng)元的邏輯分類源代碼全注釋 Logistic Regression

test.py
# -*- coding: utf-8 -*-
from logistic_regression import LogisticRegressionClassifier
# 從文件讀取數(shù)據(jù)
# 每行數(shù)據(jù)以\t隔開,最后一列為類標(biāo)號
def load_data_set(datafile):
feature_data = []
label_date = []
lines = 0
with open(datafile, 'r') as fr_file:
for each_line in fr_file:
lines = lines + 1
one_line = each_line.split('\t')
# print('len of one_line = ', len(one_line))
temp_arr = []
# 對這一行的每一列(樣例是21+1列) -1是因為最后一列是標(biāo)號,前面的是特征數(shù)據(jù)
for i in range(len(one_line)-1):
# 這一行形成一維數(shù)組
temp_arr.append(float(one_line[i]))
# 形成二維數(shù)組
feature_data.append(temp_arr)
# 每行最后一列是標(biāo)號
label_date.append(int(float(one_line[-1].strip()))) # list。strip()移除字符串頭尾字符(默認空格)
# print('lines = ', lines)
return feature_data, label_date # 返回的數(shù)據(jù)是list
def main():
# 讀入的訓(xùn)練文件和測試文件名
train_file = r"data\train1.txt"
test_file = r"data\test1.txt"
# train_x是特征數(shù)據(jù) train_y是標(biāo)號 都是list
train_x, train_y = load_data_set(train_file)
test_x, test_y = load_data_set(test_file)
print('train_x = ', train_x)
print('train_y = ', train_y)
print('test_x = ', test_x)
print('test_y = ', test_y)
# 實例分類器
classifier = LogisticRegressionClassifier()
# 訓(xùn)練后的權(quán)重
# alpha為步長(學(xué)習(xí)率);maxCycles最大迭代次數(shù)
weigh = classifier.fit(train_x, train_y, alpha=0.1, max_cycles=100)
# 進行預(yù)測
classifier.predict(test_x, test_y, weigh)
# 主函數(shù)
if __name__ == "__main__":
main()
logistic_regression.py
# -*- coding: utf-8 -*-
# !/usr/bin/python
import numpy as np
class LogisticRegressionClassifier():
def __init__(self):
self._alpha = None
# 使用梯度下降方法訓(xùn)練模型,如果使用其它的尋參方法,此處可以做相應(yīng)修改
def fit(self, train_x, train_y, alpha=0.01, max_cycles=100):
return self._grad_descent(train_x, train_y, alpha, max_cycles)
# alpha為步長(學(xué)習(xí)率) max_cycles最大迭代次數(shù)
def _grad_descent(self, feat_data, label_data, alpha, max_cycles):
data_mat = np.mat(feat_data) # size: m*n
label_mat = np.mat(label_data).transpose() # size: m*1 transpose()轉(zhuǎn)置
# m行數(shù)據(jù) n列特征
m, n = np.shape(data_mat)
weigh = np.ones((n, 1))
# 對每次迭代
for i in range(max_cycles):
# 正向計算
# 矩陣相乘 m*n * n*1 前面是特征矩陣后面是權(quán)重矩陣 = m*1 結(jié)果矩陣
hx = self._sigmoid(data_mat * weigh)
# 誤差矩陣m*1
error = label_mat - hx
# 新權(quán)重 = 老權(quán)重 + 學(xué)習(xí)率*權(quán)重變化
weigh = weigh + alpha * data_mat.transpose() * error # 根據(jù)誤差修改回歸系數(shù) 梯度下降法
print('weigh = ', weigh)
return weigh
# 使用學(xué)習(xí)得到的參數(shù)進行分類
def predict(self, test_x, test_y, weigh):
data_mat = np.mat(test_x) # size: m*n
label_mat = np.mat(test_y).transpose() # size: m*1 transpose()轉(zhuǎn)置
hx = self._sigmoid(data_mat*weigh) # size:m*1 正向計算
print('hx = ', hx)
m = len(hx)
error = 0.0
# 對每一個預(yù)測值
for i in range(m):
# 以0.5為界 這里int好像是截斷取整
if float(hx[i]) > 0.5:
print('\n', str(i+1)+'-th sample ', int(label_mat[i]), 'is classfied as: 1', end='')
# 答案矩陣中第i個不為1,就說明預(yù)測錯了
if int(label_mat[i]) != 1:
error += 1.0
print(" classify error.", end='')
else:
print('\n', str(i+1)+'-th sample ', int(label_mat[i]), 'is classfied as: 0', end='')
if int(label_mat[i]) != 0:
error += 1.0
print(" classify error.", end='')
# 錯誤數(shù)量/總數(shù)量
error_rate = error/m
print('\n', "error rate is:", "%.4f" % error_rate)
return error_rate
# 經(jīng)典sigmoid函數(shù)
def _sigmoid(self, fx):
return 1.0 / (1 + np.exp(-fx))
訓(xùn)練和測試文件數(shù)據(jù)與格式(用tab分隔)
2 1 1
3 2 1
1 2 0
1 3 0