Skip to content

Commit 6e33dcf

Browse files
committed
Rename video to videos, reducing confusion.
std::vector<ov::Tensor> videos std::vector means multiple videos ov::Tensor means [N,H,W,C], N represents multiple frames of a video. Signed-off-by: xipingya <[email protected]>
1 parent bbbef65 commit 6e33dcf

File tree

13 files changed

+34
-33
lines changed

13 files changed

+34
-33
lines changed

src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline {
173173
GenerationHandle add_request(uint64_t request_id,
174174
const std::string& prompt,
175175
const std::vector<ov::Tensor>& images,
176-
const std::vector<ov::Tensor>& video,
176+
const std::vector<ov::Tensor>& videos,
177177
const ov::genai::GenerationConfig& sampling_params);
178178

179179
void step();

src/cpp/include/openvino/genai/visual_language/pipeline.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,10 @@ class OPENVINO_GENAI_EXPORTS VLMPipeline {
104104

105105
/// @brief Generate a response given a prompt and any number of
106106
/// uint8 RGB images with [NHWC] or [HWC] layout.
107+
/// Or uint8 RGB video frames with [NHWC] layout, first dim means frames number.
107108
/// @param prompt A prompt to respond to.
108109
/// @param images Images to be prepended to a prompt.
109-
/// @param video Video frames to be prepended to a prompt.
110+
/// @param videos Multiple videos, each providing multiple frames, to be prepended to a prompt.
110111
/// @param generation_config A config to follow for text generation.
111112
/// @param streamer A streamer to acquire intermediate result.
112113
/// @return A string generated by a model.
@@ -263,9 +264,9 @@ class OPENVINO_GENAI_EXPORTS VLMPipeline {
263264
* utils that allow to use generate() in the following way:
264265
* pipe.generate(prompt, ov::genai::image(image_tensor)).
265266
* pipe.generate(prompt, ov::genai::images(image_tensors)).
266-
* pipe.generate(prompt, ov::genai::video(video_tensors)).
267+
* pipe.generate(prompt, ov::genai::videos(videos_tensors)).
267268
*/
268269
static constexpr ov::Property<ov::Tensor> image{"image"};
269270
static constexpr ov::Property<std::vector<ov::Tensor>> images{"images"};
270-
static constexpr ov::Property<std::vector<ov::Tensor>> video{"video"};
271+
static constexpr ov::Property<std::vector<ov::Tensor>> videos{"videos"};
271272
}

src/cpp/src/continuous_batching/pipeline.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -249,9 +249,9 @@ GenerationHandle ContinuousBatchingPipeline::add_request(uint64_t request_id,
249249
GenerationHandle ContinuousBatchingPipeline::add_request(uint64_t request_id,
250250
const std::string& prompt,
251251
const std::vector<ov::Tensor>& images,
252-
const std::vector<ov::Tensor>& video,
252+
const std::vector<ov::Tensor>& videos,
253253
const ov::genai::GenerationConfig& sampling_params) {
254-
return m_impl->add_request(request_id, prompt, images, video, sampling_params);
254+
return m_impl->add_request(request_id, prompt, images, videos, sampling_params);
255255
}
256256

257257
void ContinuousBatchingPipeline::step() {

src/cpp/src/continuous_batching/pipeline_base.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ GenerationHandle ContinuousBatchingPipeline::IContinuousBatchingPipeline::add_re
323323
uint64_t request_id,
324324
const std::string& prompt,
325325
const std::vector<ov::Tensor>& images,
326-
const std::vector<ov::Tensor>& video,
326+
const std::vector<ov::Tensor>& videos,
327327
GenerationConfig sampling_params) {
328328
OPENVINO_ASSERT(m_model_input_type == ModelInputType::EMBEDDINGS, "Model doesn't support embeddings.");
329329

@@ -335,7 +335,7 @@ GenerationHandle ContinuousBatchingPipeline::IContinuousBatchingPipeline::add_re
335335

336336
auto encoded_images = m_inputs_embedder->encode_images(images);
337337
std::vector<std::vector<ov::genai::EncodedImage>> encoded_videos;
338-
for (auto& vd : video) {
338+
for (auto& vd : videos) {
339339
auto encoded_vd = m_inputs_embedder->encode_video({vd});
340340
encoded_videos.push_back(encoded_vd);
341341
}

src/cpp/src/continuous_batching/pipeline_base.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ class ContinuousBatchingPipeline::IContinuousBatchingPipeline {
101101
GenerationHandle add_request(uint64_t request_id,
102102
const std::string& prompt,
103103
const std::vector<ov::Tensor>& images,
104-
const std::vector<ov::Tensor>& video,
104+
const std::vector<ov::Tensor>& videos,
105105
GenerationConfig sampling_params);
106106

107107
/**

src/cpp/src/visual_language/continuous_batching_adapter.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,13 @@ class ov::genai::VLMPipeline::VLMContinuousBatchingAdapter : public ov::genai::V
5353
VLMDecodedResults generate(
5454
const std::string& prompt,
5555
const std::vector<ov::Tensor>& images,
56-
const std::vector<ov::Tensor>& video,
56+
const std::vector<ov::Tensor>& videos,
5757
GenerationConfig generation_config,
5858
const StreamerVariant& streamer
5959
) override {
6060
auto start_time = std::chrono::steady_clock::now();
6161
auto images_vec = images.size() == 0u ? std::vector<std::vector<ov::Tensor>>{} : std::vector<std::vector<ov::Tensor>>{images};
62-
auto video_vec = video.size() == 0u ? std::vector<std::vector<ov::Tensor>>{} : std::vector<std::vector<ov::Tensor>>{video};
62+
auto video_vec = videos.size() == 0u ? std::vector<std::vector<ov::Tensor>>{} : std::vector<std::vector<ov::Tensor>>{videos};
6363
auto result = m_impl.generate({prompt}, images_vec, video_vec, {generation_config}, streamer)[0];
6464
auto stop_time = std::chrono::steady_clock::now();
6565

src/cpp/src/visual_language/inputs_embedder.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ ov::Tensor InputsEmbedder::IInputsEmbedder::get_inputs_embeds(
186186
}
187187

188188
std::vector<ov::genai::EncodedImage> InputsEmbedder::IInputsEmbedder::encode_video(const std::vector<ov::Tensor>& videos) {
189-
OPENVINO_THROW("Current model doesn't support video preprocess currently. Input images are processed as separate images.");
189+
OPENVINO_THROW("Current model doesn't support videos preprocess currently. Input images are processed as separate images.");
190190
}
191191

192192
NormlizedPrompt InputsEmbedder::IInputsEmbedder::normalize_prompt(

src/cpp/src/visual_language/pipeline.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{
163163
VLMDecodedResults generate(
164164
const std::string& prompt,
165165
const std::vector<ov::Tensor>& images,
166-
const std::vector<ov::Tensor>& video,
166+
const std::vector<ov::Tensor>& videos,
167167
GenerationConfig generation_config,
168168
const StreamerVariant& streamer
169169
) override {
@@ -196,7 +196,7 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{
196196

197197
auto encoded_images = m_inputs_embedder->encode_images(images);
198198
std::vector<std::vector<ov::genai::EncodedImage>> encoded_videos;
199-
for (auto& vd : video) {
199+
for (auto& vd : videos) {
200200
auto encoded_vd = m_inputs_embedder->encode_video({vd});
201201
encoded_videos.push_back(encoded_vd);
202202
}
@@ -470,11 +470,11 @@ VLMPipeline::~VLMPipeline() = default;
470470
VLMDecodedResults VLMPipeline::generate(
471471
const std::string& prompt,
472472
const std::vector<ov::Tensor>& images,
473-
const std::vector<ov::Tensor>& video,
473+
const std::vector<ov::Tensor>& videos,
474474
const GenerationConfig& generation_config,
475475
const StreamerVariant& streamer
476476
) {
477-
return m_pimpl->generate(prompt, images, video, generation_config, streamer);
477+
return m_pimpl->generate(prompt, images, videos, generation_config, streamer);
478478
}
479479

480480
VLMDecodedResults VLMPipeline::generate(

src/cpp/src/visual_language/pipeline_base.hpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ class ov::genai::VLMPipeline::VLMPipelineBase {
3030
virtual VLMDecodedResults generate(
3131
const std::string& prompt,
3232
const std::vector<ov::Tensor>& images,
33-
const std::vector<ov::Tensor>& video,
33+
const std::vector<ov::Tensor>& videos,
3434
GenerationConfig generation_config,
3535
const StreamerVariant& streamer
3636
) = 0;
@@ -41,7 +41,7 @@ class ov::genai::VLMPipeline::VLMPipelineBase {
4141
) {
4242
auto image = config_map.find(ov::genai::image.name());
4343
auto images = config_map.find(ov::genai::images.name());
44-
auto video = config_map.find(ov::genai::video.name());
44+
auto videos = config_map.find(ov::genai::videos.name());
4545

4646
ov::genai::OptionalGenerationConfig config_arg = utils::get_config_from_map(config_map);
4747
GenerationConfig config = (config_arg.has_value()) ? *config_arg : get_generation_config();
@@ -64,13 +64,13 @@ class ov::genai::VLMPipeline::VLMPipelineBase {
6464
}
6565
}
6666

67-
if (config_map.end() != video) {
68-
if (video->second.is<std::vector<ov::Tensor>>()) {
69-
video_rgbs = video->second.as<std::vector<ov::Tensor>>();
70-
} else if (video->second.is<ov::Tensor>()) {
71-
video_rgbs = {video->second.as<ov::Tensor>()};
67+
if (config_map.end() != videos) {
68+
if (videos->second.is<std::vector<ov::Tensor>>()) {
69+
video_rgbs = videos->second.as<std::vector<ov::Tensor>>();
70+
} else if (videos->second.is<ov::Tensor>()) {
71+
video_rgbs = {videos->second.as<ov::Tensor>()};
7272
} else {
73-
OPENVINO_THROW("Unknown video type.");
73+
OPENVINO_THROW("Unknown videos type.");
7474
}
7575
}
7676

src/cpp/src/visual_language/qwen2vl/classes.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1006,9 +1006,9 @@ ov::Tensor InputsEmbedderQwen2VL::get_inputs_embeds(const std::string& unified_p
10061006
return qwen2_vl_utils::merge_text_and_image_embeddings(input_ids, text_embeds, merged_image_embeddings_tensor, image_pad_token_id, video_pad_token_id);
10071007
}
10081008

1009-
std::vector<ov::genai::EncodedImage> InputsEmbedderQwen2VL::encode_video(const std::vector<ov::Tensor>& video) {
1009+
std::vector<ov::genai::EncodedImage> InputsEmbedderQwen2VL::encode_video(const std::vector<ov::Tensor>& videos) {
10101010
std::vector<EncodedImage> embeds;
1011-
for (const ov::Tensor& single_video : video) {
1011+
for (const ov::Tensor& single_video : videos) {
10121012
std::vector<ov::Tensor> single_frames = to_single_image_tensors({single_video});
10131013
auto embeds_video = m_vision_encoder->encode_frames(single_frames);
10141014
embeds.insert(embeds.end(), embeds_video.begin(), embeds_video.end());

0 commit comments

Comments
 (0)