-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathcamera.py
118 lines (91 loc) · 3.6 KB
/
camera.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import numpy as np
import pandas as pd
import time
import torch
import torch.nn as nn
import cv2
from utility import *
from yolo import Darknet
import random
import argparse
import pickle as pkl
def arg_parse():
"""Parsing arguments"""
parser = argparse.ArgumentParser(description='YOLO v3 Real Time Detection')
parser.add_argument("--confidence", dest="confidence", help="Object Confidence to filter predictions", default=0.25)
parser.add_argument("--nms_thresh", dest="nms_thresh", help="NMS Threshhold", default=0.4)
parser.add_argument("--reso", dest='reso', help=
"Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
default="160", type=str)
return parser.parse_args()
def prep_image(img, inp_dim):
"""Converting a numpy array of frame into PyTorch tensor"""
orig_im = img
dim = orig_im.shape[1], orig_im.shape[0]
img = cv2.resize(orig_im, (inp_dim, inp_dim))
img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy()
img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
return img_, orig_im, dim
def write(x, img):
c1 = tuple(x[1:3].int())
c2 = tuple(x[3:5].int())
cls = int(x[-1])
label = "{0}".format(classes[cls])
color = random.choice(colors)
cv2.rectangle(img, c1, c2, color, 1)
t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
cv2.rectangle(img, c1, c2, color, -1)
cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4),
cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 1);
return img
if __name__ == "__main__":
cfgfile = "cfg/yolov3.cfg"
weightsfile = "weight/yolov3.weights"
num_classes = 80
args = arg_parse()
confidence = float(args.confidence)
nms_thesh = float(args.nms_thresh)
start = 0
num_classes = 80
bbox_attrs = 5 + num_classes
model = Darknet(cfgfile).to(device)
model.load_weights(weightsfile)
model.network_info["height"] = args.reso
inp_dim = int(model.network_info["height"])
assert inp_dim % 32 == 0
assert inp_dim > 32
model.eval()
cap = cv2.VideoCapture(0)
assert cap.isOpened(), 'Cannot capture source'
frames = 0
start = time.time()
while cap.isOpened():
ret, frame = cap.read()
if ret:
img, orig_im, dim = prep_image(frame, inp_dim)
img.to(device)
output = model(img)
output = write_results(output, confidence, num_classes, nms_conf=nms_thesh)
if type(output) == int:
frames += 1
print("FPS of the video is {:5.2f}".format(frames / (time.time() - start)))
cv2.imshow("frame", orig_im)
key = cv2.waitKey(1)
if key & 0xFF == ord('q'):
break
continue
output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0, float(inp_dim)) / inp_dim
output[:, [1, 3]] *= frame.shape[1]
output[:, [2, 4]] *= frame.shape[0]
classes = load_classes('data/coco.names')
colors = pkl.load(open("color/pallete", "rb"))
list(map(lambda x: write(x, orig_im), output))
cv2.imshow("frame", orig_im)
key = cv2.waitKey(1)
if key & 0xFF == ord('q'):
break
frames += 1
print("FPS of the video is {:5.2f}".format(frames / (time.time() - start)))
else:
break