YOLOV1_pytorch_(2)
## 3 預測
### 3.1 預測流程
1. 圖片預處理
2. 預測
3. 解碼
4. 畫框
(1) 預測
predict.py
```python
import torch
from torch.autograd import Variable
from resnet_yolo import resnet50
import torchvision.transforms as transforms
import CV2
import numpy as np
VOC_CLASSES = ( ? ?# always index 0
? ? 'aeroplane', 'bicycle', 'bird', 'boat',
? ? 'bottle', 'bus', 'car', 'cat', 'chair',
? ? 'cow', 'diningtable', 'dog', 'horse',
? ? 'motorbike', 'person', 'pottedplant',
? ? 'sheep', 'sofa', 'train', 'tvmonitor')
Color = [[0, 0, 0],[128, 0, 0],[0, 128, 0],[128, 128, 0],[0, 0, 128],
? ? ? ? ?[128, 0, 128],[0, 128, 128],[128, 128, 128],[64, 0, 0],[192, 0, 0],
? ? ? ? ?[64, 128, 0],[192, 128, 0],[64, 0, 128],[192, 0,128],[64, 128, 128],
? ? ? ? ?[192, 128, 128],[0, 64, 0],[128, 64, 0],[0, 192, 0],[128, 192, 0],[0, 64, 128]]
def nms(bboxes,scores,threshold=0.5):
? ? x1 = bboxes[:,0]
? ? y1 = bboxes[:,1]
? ? x2 = bboxes[:,2]
? ? y2 = bboxes[:,3]
? ? areas = (x2-x1)*(y2-y1)
? ? _,order = scores.sort(0,descending=True)
? ? keep = []
? ? while order.numel() > 0:
? ? ? ? if order.numel()>1:
? ? ? ? ? ? i = order[0]
? ? ? ? else:
? ? ? ? ? ? i = order
? ? ? ? keep.append(i)
? ? ? ? if order.numel() == 1:
? ? ? ? ? ? break
? ? ? ? xx1 = x1[order[1:]].clamp(min=x1[i])
? ? ? ? yy1 = y1[order[1:]].clamp(min=y1[i])
? ? ? ? xx2 = x2[order[1:]].clamp(max=x1[i])
? ? ? ? yy2 = y2[order[1:]].clamp(max=y1[i])
? ? ? ? w = (xx2-xx1).clamp(min=0)
? ? ? ? h = (yy2-yy1).clamp(min=0)
? ? ? ? inter = w*h
? ? ? ? ove = inter/(areas[i]+areas[order[1:]]-inter)
? ? ? ? #ids = (ove <= threshold).nonzero().squeeze()
? ? ? ? ids = torch.nonzero(ove <= threshold).squeeze()
? ? ? ? if ids.numel() == 0:
? ? ? ? ? ? break
? ? ? ? order = order[ids+1]
? ? return torch.LongTensor(keep)
def decoder(pred):
? ? grid_num = 7
? ? boxes = []
? ? cls_indexs = []
? ? probs = []
? ? cell_size = 1./grid_num
? ? pred = pred.data
? ? pred = pred.squeeze(0) ?# 7x7x30
? ? contain1 = pred[:,:,4].unsqueeze(2) ?# [7, 7, 1]
? ? contain2 = pred[:,:,9].unsqueeze(2) ?# [7, 7, 1]
? ? contain = torch.cat((contain1,contain2),2) ?# [7, 7, 2]
? ? mask1 = contain > 0.1 ?# [7, 7, 2]
? ? mask2 = (contain==contain.max()) ?# [7, 7, 2]
? ? mask = (mask1+mask2).gt(0) ?# [7, 7, 2]
? ? for i in range(grid_num):
? ? ? ? for j in range(grid_num):
? ? ? ? ? ? for b in range(2):
? ? ? ? ? ? ? ? if mask[i,j,b] == 1:
? ? ? ? ? ? ? ? ? ? box = pred[i,j,b*5:b*5+4]
? ? ? ? ? ? ? ? ? ? contain_prob = torch.FloatTensor([pred[i,j,b*5+4]])
? ? ? ? ? ? ? ? ? ? xy = torch.FloatTensor([j,i])*cell_size #cell左上角 ?up left of cell
? ? ? ? ? ? ? ? ? ? box[:2] = box[:2]*cell_size + xy # return cxcy relative to image
? ? ? ? ? ? ? ? ? ? box_xy = torch.FloatTensor(box.size())#轉換成xy形式 ? ?convert[cx,cy,w,h] to [x1,y1,x2,y2]
? ? ? ? ? ? ? ? ? ? box_xy[:2] = box[:2] - 0.5*box[2:]
? ? ? ? ? ? ? ? ? ? box_xy[2:] = box[:2] + 0.5*box[2:]
? ? ? ? ? ? ? ? ? ? max_prob,cls_index = torch.max(pred[i,j,10:],0)
? ? ? ? ? ? ? ? ? ? if float((contain_prob*max_prob)[0]) > 0.1:
? ? ? ? ? ? ? ? ? ? ? ? boxes.append(box_xy.view(1,4))
? ? ? ? ? ? ? ? ? ? ? ? cls_indexs.append(torch.LongTensor(cls_index,0))
? ? ? ? ? ? ? ? ? ? ? ? probs.append(contain_prob*max_prob)
? ? if len(boxes) == 0:
? ? ? ? boxes = torch.zeros((1,4))
? ? ? ? probs = torch.zeros(1)
? ? ? ? cls_indexs = torch.zeros(1)
? ? else:
? ? ? ? boxes = torch.cat(boxes,0) #(n,4)
? ? ? ? probs = torch.cat(probs,0) #(n,)
? ? ? ? cls_indexs = torch.cat(cls_indexs,0) #(n,)
? ? keep = nms(boxes,probs)
? ? return boxes[keep],cls_indexs[keep],probs[keep]
def predict_gpu(model,image_name,root_path='/Users/ls/PycharmProjects/YOLOV1_LS/VOCdevkit/VOC2007/JPEGImages/'):
? ? result = []
? ? image = CV2.imread(root_path+image_name)
? ? # 1 圖片預處理
? ? h,w,_ = image.shape
? ? img = CV2.resize(image,(448,448)) ?# 統(tǒng)一輸入模型的圖片尺寸
? ? img = CV2.cvtColor(img,CV2.COLOR_BGR2RGB) ?# 色彩空間轉換
? ? mean = (123,117,104) ?#RGB
? ? img = img - np.array(mean,dtype=np.float32) ?# 去均值
? ? transform = transforms.Compose([transforms.ToTensor(),])
? ? img = transform(img) ?# 轉置
? ? img = Variable(img[None,:,:,:],volatile=True)
? ? # img = img.cuda()
? ? # 2 預測
? ? pred = model(img) #1x7x7x30 ??
? ? pred = pred.cpu()
? ? # 3 解碼
? ? boxes,cls_indexs,probs = ?decoder(pred)
? ? for i,box in enumerate(boxes):
? ? ? ? x1 = int(box[0]*w)
? ? ? ? x2 = int(box[2]*w)
? ? ? ? y1 = int(box[1]*h)
? ? ? ? y2 = int(box[3]*h)
? ? ? ? cls_index = cls_indexs[i]
? ? ? ? if cls_index.numel()==0:return
? ? ? ? cls_index = int(cls_index) # convert LongTensor to int
? ? ? ? prob = probs[i]
? ? ? ? prob = float(prob)
? ? ? ? result.append([(x1,y1),(x2,y2),VOC_CLASSES[cls_index],image_name,prob])
? ? return result
if __name__ == '__main__':
? ? model = resnet50()
? ? print('load model...')
? ? # model.load_state_dict(torch.load('best.pth'))
? ? model.eval()
? ? #model.cuda()
? ? image_name = '000015.jpg'
? ? image = CV2.imread(image_name)
? ? print('predicting...')
? ? result = predict_gpu(model,image_name)
? ? # ?4 畫框
? ? for left_up,right_bottom,class_name, _ ,prob in result:
? ? ? ? color = Color[VOC_CLASSES.index(class_name)]
? ? ? ? CV2.rectangle(image,left_up,right_bottom,color,2)
? ? ? ? label = class_name+str(round(prob,2))
? ? ? ? text_size, baseline = CV2.getTextSize(label, CV2.FONT_HERSHEY_SIMPLEX, 0.4, 1)
? ? ? ? p1 = (left_up[0], left_up[1]- text_size[1])
? ? ? ? CV2.rectangle(image, (p1[0] - 2//2, p1[1] - 2 - baseline), (p1[0] + text_size[0], p1[1] + text_size[1]), color, -1)
? ? ? ? CV2.putText(image, label, (p1[0], p1[1] + baseline), CV2.FONT_HERSHEY_SIMPLEX, 0.4, (255,255,255), 1, 8)
? ? CV2.imwrite('result.jpg',image)
```
(2)NMS
NMS的目的是根據(jù)IoU刪除重復的預測框,原理是同一物體的預測框IoU 大,不同物體的預測框IoU小。
I. 計算預測框面積。
II. 對置信度降序排列,返回排序下標。把排序好的第一個概率大的預測框取出,計算剩余框與第一個框的IoU。
III. 根據(jù)閾值篩選保留的框,并對保留的框做同樣的操作,直到剩余一個框,則停止操作。
```python
def nms(bboxes,scores,threshold=0.5):
? ? x1 = bboxes[:,0]
? ? y1 = bboxes[:,1]
? ? x2 = bboxes[:,2]
? ? y2 = bboxes[:,3]
? ? # 1 計算所有預測框的面積
? ? areas = (x2-x1)*(y2-y1)
? ? # 2 按照預測概率排序
? ? _,order = scores.sort(0,descending=True)
? ? keep = []
? ? while order.numel() > 0:
? ? ? ? # 3 取出第一個框
? ? ? ? if order.numel()>1:
? ? ? ? ? ? i = order[0]
? ? ? ? else:
? ? ? ? ? ? i = order
? ? ? ? keep.append(i)
? ? ? ? if order.numel() == 1:
? ? ? ? ? ? break
? ? ? ? # 4 計算剩余框與第一個框的IoU
? ? ? ? xx1 = x1[order[1:]].clamp(min=x1[i])
? ? ? ? yy1 = y1[order[1:]].clamp(min=y1[i])
? ? ? ? xx2 = x2[order[1:]].clamp(max=x1[i])
? ? ? ? yy2 = y2[order[1:]].clamp(max=y1[i])
? ? ? ? w = (xx2-xx1).clamp(min=0)
? ? ? ? h = (yy2-yy1).clamp(min=0)
? ? ? ? inter = w*h
? ? ? ? ove = inter/(areas[i]+areas[order[1:]]-inter)
? ? ? ? #ids = (ove <= threshold).nonzero().squeeze()
? ? ? ? # 5 根據(jù)IoU剔除重合的框
? ? ? ? ids = torch.nonzero(ove <= threshold).squeeze()
? ? ? ? if ids.numel() == 0:
? ? ? ? ? ? break
? ? ? ? # 6 取出與第一個框重疊小或不重疊的框作為下一輪篩選的對象
? ? ? ? order = order[ids+1]
? ? return torch.LongTensor(keep)
```
(3)解碼
I. 取出預測值中的置信度,根據(jù)置信度閾值初篩預測框。
II.遍歷輸出特征圖的行、列、每個網(wǎng)格的框,取出對應的預測框、類別概率。根據(jù)預測偏移計算預測框的中心點。預測類別概率與置信度乘積作為最終的預測概率,再根據(jù)最終的預測概率設置閾值帥選一遍框。
III.根據(jù)預測框和物體類別概率進行非極大值抑制,輸出符合條件的預測值。
```python
def decoder(pred):
? ? grid_num = 7
? ? boxes = []
? ? cls_indexs = []
? ? probs = []
? ? cell_size = 1./grid_num
? ? pred = pred.data
? ? pred = pred.squeeze(0) ?# 7x7x30
? ? contain1 = pred[:,:,4].unsqueeze(2) ?# [7, 7, 1]
? ? contain2 = pred[:,:,9].unsqueeze(2) ?# [7, 7, 1]
? ? # 1 根據(jù)置信度篩選框
? ? contain = torch.cat((contain1,contain2),2) ?# [7, 7, 2]
? ? mask1 = contain > 0.1 ?# [7, 7, 2]
? ? mask2 = (contain==contain.max()) ?# [7, 7, 2]
? ? mask = (mask1+mask2).gt(0) ?# [7, 7, 2]
? ? for i in range(grid_num):
? ? ? ? for j in range(grid_num):
? ? ? ? ? ? for b in range(2):
? ? ? ? ? ? ? ? if mask[i,j,b] == 1:
? ? ? ? ? ? ? ? ? ? box = pred[i,j,b*5:b*5+4]
? ? ? ? ? ? ? ? ? ? contain_prob = torch.FloatTensor([pred[i,j,b*5+4]])
? ? ? ? ? ? ? ? ? ? xy = torch.FloatTensor([j,i])*cell_size #cell左上角 ?up left of cell
? ? ? ? ? ? ? ? ? ? # 2 解碼
? ? ? ? ? ? ? ? ? ? box[:2] = box[:2]*cell_size + xy # return cxcy relative to image
? ? ? ? ? ? ? ? ? ? box_xy = torch.FloatTensor(box.size())#轉換成xy形式 ? ?convert[cx,cy,w,h] to [x1,y1,x2,y2]
? ? ? ? ? ? ? ? ? ? box_xy[:2] = box[:2] - 0.5*box[2:]
? ? ? ? ? ? ? ? ? ? box_xy[2:] = box[:2] + 0.5*box[2:]
? ? ? ? ? ? ? ? ? ? max_prob,cls_index = torch.max(pred[i,j,10:],0)
? ? ? ? ? ? ? ? ? ? # 3 根據(jù)最終預測概率篩選框
? ? ? ? ? ? ? ? ? ? if float((contain_prob*max_prob)[0]) > 0.1:
? ? ? ? ? ? ? ? ? ? ? ? boxes.append(box_xy.view(1,4))
? ? ? ? ? ? ? ? ? ? ? ? cls_indexs.append(torch.LongTensor(cls_index,0))
? ? ? ? ? ? ? ? ? ? ? ? probs.append(contain_prob*max_prob)
? ? if len(boxes) == 0:
? ? ? ? boxes = torch.zeros((1,4))
? ? ? ? probs = torch.zeros(1)
? ? ? ? cls_indexs = torch.zeros(1)
? ? else:
? ? ? ? boxes = torch.cat(boxes,0) #(n,4)
? ? ? ? probs = torch.cat(probs,0) #(n,)
? ? ? ? cls_indexs = torch.cat(cls_indexs,0) #(n,)
? ? # 4 非極大值抑制
? ? keep = nms(boxes,probs)
? ? return boxes[keep],cls_indexs[keep],probs[keep]
```