Skip to content

Commit 1aea74c

Browse files
VELCpropre-commit-ci[bot]glenn-jocher
authored
Add new --vid-stride inference parameter for videos (#9256)
* fps feature/skip frame added * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * predict.py updates * Update dataloaders.py Signed-off-by: Glenn Jocher <[email protected]> * Update dataloaders.py Signed-off-by: Glenn Jocher <[email protected]> * remove unused attribute Signed-off-by: Glenn Jocher <[email protected]> * Cleanup Signed-off-by: Glenn Jocher <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update predict.py Signed-off-by: Glenn Jocher <[email protected]> * Update detect.py Signed-off-by: Glenn Jocher <[email protected]> * Update dataloaders.py Signed-off-by: Glenn Jocher <[email protected]> * Rename skip_frame to vid_stride * cleanup * cleanup2 Signed-off-by: Glenn Jocher <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Glenn Jocher <[email protected]>
1 parent e45d335 commit 1aea74c

File tree

3 files changed

+17
-10
lines changed

3 files changed

+17
-10
lines changed

classify/predict.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ def run(
6666
exist_ok=False, # existing project/name ok, do not increment
6767
half=False, # use FP16 half-precision inference
6868
dnn=False, # use OpenCV DNN for ONNX inference
69+
vid_stride=1, # video frame-rate stride
6970
):
7071
source = str(source)
7172
save_img = not nosave and not source.endswith('.txt') # save inference images
@@ -88,10 +89,10 @@ def run(
8889
# Dataloader
8990
if webcam:
9091
view_img = check_imshow()
91-
dataset = LoadStreams(source, img_size=imgsz, transforms=classify_transforms(imgsz[0]))
92+
dataset = LoadStreams(source, img_size=imgsz, transforms=classify_transforms(imgsz[0]), vid_stride=vid_stride)
9293
bs = len(dataset) # batch_size
9394
else:
94-
dataset = LoadImages(source, img_size=imgsz, transforms=classify_transforms(imgsz[0]))
95+
dataset = LoadImages(source, img_size=imgsz, transforms=classify_transforms(imgsz[0]), vid_stride=vid_stride)
9596
bs = 1 # batch_size
9697
vid_path, vid_writer = [None] * bs, [None] * bs
9798

@@ -196,6 +197,7 @@ def parse_opt():
196197
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
197198
parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
198199
parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
200+
parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride')
199201
opt = parser.parse_args()
200202
opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
201203
print_args(vars(opt))

detect.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ def run(
7474
hide_conf=False, # hide confidences
7575
half=False, # use FP16 half-precision inference
7676
dnn=False, # use OpenCV DNN for ONNX inference
77+
vid_stride=1, # video frame-rate stride
7778
):
7879
source = str(source)
7980
save_img = not nosave and not source.endswith('.txt') # save inference images
@@ -96,10 +97,10 @@ def run(
9697
# Dataloader
9798
if webcam:
9899
view_img = check_imshow()
99-
dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
100+
dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
100101
bs = len(dataset) # batch_size
101102
else:
102-
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
103+
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
103104
bs = 1 # batch_size
104105
vid_path, vid_writer = [None] * bs, [None] * bs
105106

@@ -236,6 +237,7 @@ def parse_opt():
236237
parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
237238
parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
238239
parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
240+
parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride')
239241
opt = parser.parse_args()
240242
opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
241243
print_args(vars(opt))

utils/dataloaders.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ def __iter__(self):
187187

188188
class LoadImages:
189189
# YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`
190-
def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None):
190+
def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
191191
files = []
192192
for p in sorted(path) if isinstance(path, (list, tuple)) else [path]:
193193
p = str(Path(p).resolve())
@@ -212,6 +212,7 @@ def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None):
212212
self.mode = 'image'
213213
self.auto = auto
214214
self.transforms = transforms # optional
215+
self.vid_stride = vid_stride # video frame-rate stride
215216
if any(videos):
216217
self._new_video(videos[0]) # new video
217218
else:
@@ -232,6 +233,7 @@ def __next__(self):
232233
# Read video
233234
self.mode = 'video'
234235
ret_val, im0 = self.cap.read()
236+
self.cap.set(cv2.CAP_PROP_POS_FRAMES, self.vid_stride * (self.frame + 1)) # read at vid_stride
235237
while not ret_val:
236238
self.count += 1
237239
self.cap.release()
@@ -242,7 +244,7 @@ def __next__(self):
242244
ret_val, im0 = self.cap.read()
243245

244246
self.frame += 1
245-
# im0 = self._cv2_rotate(im0) # for use if cv2 auto rotation is False
247+
# im0 = self._cv2_rotate(im0) # for use if cv2 autorotation is False
246248
s = f'video {self.count + 1}/{self.nf} ({self.frame}/{self.frames}) {path}: '
247249

248250
else:
@@ -265,7 +267,7 @@ def _new_video(self, path):
265267
# Create a new video capture object
266268
self.frame = 0
267269
self.cap = cv2.VideoCapture(path)
268-
self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
270+
self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride)
269271
self.orientation = int(self.cap.get(cv2.CAP_PROP_ORIENTATION_META)) # rotation degrees
270272
# self.cap.set(cv2.CAP_PROP_ORIENTATION_AUTO, 0) # disable https://github.com/ultralytics/yolov5/issues/8493
271273

@@ -285,11 +287,12 @@ def __len__(self):
285287

286288
class LoadStreams:
287289
# YOLOv5 streamloader, i.e. `python detect.py --source 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP streams`
288-
def __init__(self, sources='streams.txt', img_size=640, stride=32, auto=True, transforms=None):
290+
def __init__(self, sources='streams.txt', img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
289291
torch.backends.cudnn.benchmark = True # faster for fixed-size inference
290292
self.mode = 'stream'
291293
self.img_size = img_size
292294
self.stride = stride
295+
self.vid_stride = vid_stride # video frame-rate stride
293296
sources = Path(sources).read_text().rsplit() if Path(sources).is_file() else [sources]
294297
n = len(sources)
295298
self.sources = [clean_str(x) for x in sources] # clean source names for later
@@ -329,11 +332,11 @@ def __init__(self, sources='streams.txt', img_size=640, stride=32, auto=True, tr
329332

330333
def update(self, i, cap, stream):
331334
# Read stream `i` frames in daemon thread
332-
n, f, read = 0, self.frames[i], 1 # frame number, frame array, inference every 'read' frame
335+
n, f = 0, self.frames[i] # frame number, frame array
333336
while cap.isOpened() and n < f:
334337
n += 1
335338
cap.grab() # .read() = .grab() followed by .retrieve()
336-
if n % read == 0:
339+
if n % self.vid_stride == 0:
337340
success, im = cap.retrieve()
338341
if success:
339342
self.imgs[i] = im

0 commit comments

Comments
 (0)