最美情侣中文字幕电影,在线麻豆精品传媒,在线网站高清黄,久久黄色视频

歡迎光臨散文網(wǎng) 會(huì)員登陸 & 注冊(cè)

簡(jiǎn)單的SVM支持向量機(jī)源代碼注釋

2017-12-10 15:28 作者:licuihe  | 我要投稿

 

支持向量機(jī)

 

CV2庫:

https://www.lfd.uci.edu/~gohlke/pythonlibs/ 

 


 

# coding: utf-8
"""
支持向量機(jī)
用于區(qū)分手寫字母abc
"""

from scipy.misc import imread
import CV2
import numpy as np
from sklearn import svm
import matplotlib.pyplot as plt


def boundaries(binarized, axis):  # 2值化后的大數(shù)組 坐標(biāo)軸
    """
    輸入二值化的數(shù)組和坐標(biāo)
    輸出分割的邊界
    """
    # separate時(shí)用到

    # variables named assuming axis = 0; algorithm valid for axis=1
    # [1,0][axis] effectively swaps axes for summing
    rows = np.sum(binarized, axis=[1, 0][axis]) > 0  # 在這個(gè)坐標(biāo)軸上求和 1*3013
    rows[1:] = np.logical_xor(rows[1:], rows[:-1])  # 邊緣檢測(cè)?當(dāng)前和前一個(gè)
    change = np.nonzero(rows)[0]  # 記錄第幾列[28,60,169,202,297,326……] 1 3 5 7 2 4 6 8
    ymin = change[::2]
    ymax = change[1::2]
    height = ymax-ymin  # 兩個(gè)邊緣的距離
    too_small = 10  # 10像素以下不考慮了
    ymin = ymin[height > too_small]  # 正常數(shù)據(jù)
    ymax = ymax[height > too_small]  # 正常數(shù)據(jù)
    return zip(ymin, ymax)


def separate(abc_list):
    orig_img = abc_list.copy()
    pure_white = 255.
    white = np.max(abc_list)
    black = np.min(abc_list)
    thresh = (white+black)/2.0  # (最大值+最小值)/2.0 ???
    binarized = abc_list < thresh  # 用中值區(qū)分 二值化的大數(shù)組
    row_bounds = boundaries(binarized, axis=0)
    cropped = []
    for top, bottom in row_bounds:  # 打包的ymin ymax每次取一對(duì)
        abc_list = binarized[top:bottom, :]  # 上下夾
        left, right = zip(*boundaries(abc_list, axis=1))  # 左右夾
        rects = [top, bottom, left[0], right[0]]  # 上下左右 在大數(shù)組中的位置
        # print('上下左右=', top, bottom, left[0], right[0])
        cropped.append(np.array(orig_img[rects[0]:rects[1], rects[2]:rects[3]] / pure_white))
    return cropped


def partition(data_set, label_set, training_ratio):
    """
    輸入 全部數(shù)據(jù) 全部標(biāo)號(hào) 訓(xùn)練比例(0~1
    輸出 訓(xùn)練數(shù)據(jù)集 訓(xùn)練標(biāo)號(hào) 測(cè)試數(shù)據(jù)集 測(cè)試標(biāo)號(hào)
    """
    label_num = len(np.unique(label_set))  # 這里3
    sum_num = int(len(data_set)/label_num)
    train_num = int(sum_num * training_ratio)
    test_num = int(sum_num - train_num)

    # _train_label = np.chararray(int(label_num*train_num))
    # _test_label = np.chararray(int(label_num*test_num))
    _train_label = np.empty(int(label_num * train_num), dtype=label_set.dtype)
    _test_label = np.empty(int(label_num*test_num), dtype=label_set.dtype)

    _train_data = np.empty([len(_train_label), len(data_set[1])])
    _test_data = np.empty([len(_test_label), len(data_set[1])])
    for _target in np.arange(label_num):  # 對(duì)每組標(biāo)號(hào)
        for _i in np.arange(sum_num):  # 對(duì)每組中的每個(gè)數(shù)據(jù)
            if _i < train_num:  # 在約定的訓(xùn)練集大小之內(nèi)
                _train_label[_i+(_target*train_num-1)] = label_set[_target * sum_num]
                _train_data[_i+(_target*train_num-1)] = data_set[_i+(_target*sum_num-1)]
            else:  # 在約定的測(cè)試集大小之內(nèi)
                _test_label[(_target*test_num)+sum_num-_i-1] = label_set[_target * sum_num]
                _test_data[(_target*test_num)+sum_num-_i-1] = data_set[_i+(_target*sum_num-1)]
    return _train_data, _train_label, _test_data, _test_label


def main():
    """主程序 參數(shù)"""
    visualize = True
    training_percent = .15
    resized_w = 5
    resized_h = 5

    letters = list()
    for i in range(ord('a'), ord('z')+1):
        letters.append(chr(i))
    """ 讀入手寫圖像 """
    original_pic = list()
    for i in range(3):
        original_pic.append(imread(letters[i]+".png", flatten=True))
    """ 格式化圖片 """
    # 分割圖像 每個(gè)字符上下左右夾后重整到10*10
    separated_pic = list()
    resized_pic = list()
    data = resized_pic
    for i in range(len(original_pic)):
        separated_pic.append(separate(original_pic[i]))
        for img in separated_pic[i]:
            resized_pic.append(CV2.resize(img, (resized_w, resized_h)))
    """ 整理數(shù)據(jù)和標(biāo)號(hào) """
    data = np.asarray(data)
    data = data.reshape((-1, resized_w*resized_h))  # data[0:68]=一個(gè)10*10的數(shù)組,筆跡處數(shù)值較小0.5,空白處數(shù)值較大0.9
    # 全數(shù)據(jù)數(shù)組 data[第幾個(gè)圖片, 10, 10]
    label = np.empty(len(data), dtype=int)
    for i in range(len(data)):
        if i <= 22:
            label[i] = ord(letters[0])
        elif 23 <= i <= 45:
            label[i] = ord(letters[1])
        elif 46 <= i <= 68:
            label[i] = ord(letters[2])

    train_data, train_label, test_data, test_label = partition(data, label, training_percent)

    """ 訓(xùn)練分類器 和 測(cè)試其正確率 """
    # clf = svm.LinearSVC()
    # http://scikit-learn.org/stable/modules/svm.html
    clf = svm.SVC(C=1.0, kernel='rbf', gamma='auto')  # C懲罰量 kernel核函數(shù)選擇(rbf=exp(-gamma|u-v|^2)) gamma核函數(shù)參數(shù)
    clf.fit(train_data, train_label)  # 訓(xùn)練集 訓(xùn)練
    predict = clf.predict(test_data)  # 用測(cè)試集 測(cè)試
    correct = len(predict)  # 測(cè)試集大小 全對(duì)初始化...
    for i in range(len(predict)):  # 對(duì)測(cè)試集長(zhǎng)度
        if predict[i] != test_label[i]:  # 預(yù)測(cè)結(jié)果和測(cè)試集標(biāo)號(hào)不同
            correct -= 1
    """ 打印結(jié)果 """
    for i in range(len(predict)):
        print(i + 1, chr(test_label[i]), '->', chr(predict[i]))
    print("總數(shù)據(jù)量: %d類標(biāo)號(hào) * %d個(gè)樣本" % (len(np.unique(label)), int(len(data) / len(np.unique(label)))))
    train_data_num = int(int(len(data) / len(np.unique(label))) * training_percent)
    test_data_num = int(int(len(data) / len(np.unique(label))) - train_data_num)
    print("訓(xùn)練數(shù)據(jù)集 %d, " % train_data_num, end='')
    print("測(cè)試數(shù)據(jù)集 %d" % test_data_num, end='')
    print("(訓(xùn)練數(shù)據(jù)占總數(shù)據(jù){0}%)".format(training_percent * 100))
    print("測(cè)試集正確率{0}%".format(100 * correct / len(predict)))

    """ 可視化 """
    if visualize:
        test_data = test_data.reshape(-1, resized_h, resized_w)
        images_and_labels = list(zip(test_data, predict))
        for index, (image, label) in enumerate(images_and_labels[::2]):
            plt.subplot(6, 5, index + 1)
            plt.axis('off')
            plt.imshow(image, cmap='gray', interpolation='nearest')
            plt.title('predict:%s' % chr(label))
        plt.show()


if __name__ == '__main__':
    main()

 

文件名是a.png


簡(jiǎn)單的SVM支持向量機(jī)源代碼注釋的評(píng)論 (共 條)

分享到微博請(qǐng)遵守國(guó)家法律
南岸区| 南城县| 容城县| 阿坝县| 泉州市| 东阿县| 清苑县| 延寿县| 修武县| 安徽省| 铅山县| 黄浦区| 辽阳市| 星子县| 侯马市| 忻州市| 盱眙县| 固安县| 贡山| 西盟| 萨嘎县| 荔波县| 原平市| 馆陶县| 广宁县| 化德县| 田阳县| 漠河县| 墨玉县| 全南县| 谢通门县| 酒泉市| 景德镇市| 郴州市| 曲靖市| 莆田市| 调兵山市| 额敏县| 咸宁市| 广灵县| 休宁县|