Skip to content

Commit 7507d31

Browse files
committed
nextqa integration
1 parent 74b3032 commit 7507d31

File tree

2 files changed

+69
-2
lines changed

2 files changed

+69
-2
lines changed

ns_vfs/api/run_with_nsvqa.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from ns_vfs.percepter.single_vision_percepter import SingleVisionPercepter
1111
from ns_vfs.validator import FrameValidator
1212
from ns_vfs.dataloader.longvideobench import LongVideoBench
13+
from ns_vfs.dataloader.nextqa import NextQA
1314

1415

1516
def run_nsvs_nsvqa(
@@ -102,12 +103,14 @@ def run_nsvs_nsvqa(
102103

103104

104105
if __name__ == "__main__":
105-
input_data_path = "/nas/mars/experiment_result/nsvqa/1_puls/longvideobench/longvideobench-outputs-fixed-specs-v2.json"
106+
# input_data_path = "/nas/mars/experiment_result/nsvqa/1_puls/longvideobench/longvideobench-outputs-fixed-specs-v2.json"
107+
input_data_path = "/nas/mars/experiment_result/nsvqa/1_puls/next-dataset/nextqa-outputs.json"
106108
with open(input_data_path, 'r', encoding='utf-8') as f:
107109
data = json.load(f)
108110

109111
for sample in data:
110-
loader = LongVideoBench(sample["video_path"], sample["subtitle_path"])
112+
# loader = LongVideoBench(sample["video_path"], sample["subtitle_path"])
113+
loader = NextQA(sample["video_path"], sample["subtitle_path"])
111114
nsvqa_input = loader.load_all()
112115
extracted = sample["video_path"].split('/')[-1].split('.')[0]
113116

ns_vfs/dataloader/nextqa.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
from typing import List, Dict, Union
2+
import numpy as np
3+
import json
4+
import cv2
5+
6+
from ns_vfs.dataloader._base import DatasetLoader
7+
8+
class NextQA(DatasetLoader):
9+
def _parse_timestamp(self, ts: str) -> float:
10+
"""
11+
Parse a timestamp like "HH:MM:SS.mmm" into total seconds as float.
12+
"""
13+
h, m, s = ts.split(':')
14+
return int(h) * 3600 + int(m) * 60 + float(s)
15+
16+
def load_all(self, sample_fps: int = 2, chunk_size: int = 10) -> List[Dict[str, Union[List[np.ndarray], None]]]:
17+
"""
18+
Load a video and subtitles, sample at `sample_fps` frames/sec, group every
19+
`chunk_size` frames into one dict, and attach subtitles overlapping each chunk.
20+
21+
Returns:
22+
List of dicts of the form:
23+
[
24+
{'frames': [f1, f2, ..., f10], 'subtitle': None},
25+
{'frames': [f11, ..., f20], 'subtitle': None},
26+
...
27+
]
28+
"""
29+
30+
# --- 1) Open video and get duration ---
31+
cap = cv2.VideoCapture(self.video_path)
32+
if not cap.isOpened():
33+
raise IOError(f"Cannot open video: {self.video_path}")
34+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
35+
vid_fps = cap.get(cv2.CAP_PROP_FPS)
36+
duration_sec = total_frames / vid_fps
37+
38+
# --- 2) Sample frames at regular intervals ---
39+
interval = 1.0 / sample_fps
40+
timestamps = np.arange(0, duration_sec, interval)
41+
42+
sampled = []
43+
for t in timestamps:
44+
cap.set(cv2.CAP_PROP_POS_MSEC, t * 1000)
45+
ret, frame = cap.read()
46+
if not ret:
47+
break
48+
sampled.append((t, frame.copy()))
49+
cap.release()
50+
51+
chunks: List[Dict[str, Union[List[np.ndarray], None]]] = []
52+
for i in range(0, len(sampled), chunk_size):
53+
chunk = sampled[i:i + chunk_size]
54+
if not chunk:
55+
continue
56+
57+
frames = [f for (_, f) in chunk]
58+
59+
chunks.append({
60+
'frames': frames,
61+
'subtitle': None
62+
})
63+
64+
return chunks

0 commit comments

Comments
 (0)