@@ -182,11 +182,15 @@ ov::Tensor InputsEmbedder::IInputsEmbedder::get_inputs_embeds(
182
182
bool recalculate_merged_embeddings,
183
183
const std::vector<size_t >& images_sequence,
184
184
const std::vector<size_t >& videos_sequence) {
185
- OPENVINO_THROW (" Current model doesn't support video preprocess currently. Input images are processed as separate images." );
185
+ if (videos.size () > 0 ) {
186
+ OPENVINO_THROW (" The model doesn't support 'videos' preprocessing yet. Please use 'images' instead." );
187
+ } else {
188
+ return get_inputs_embeds (prompt, images, metrics, recalculate_merged_embeddings, images_sequence);
189
+ }
186
190
}
187
191
188
192
std::vector<ov::genai::EncodedImage> InputsEmbedder::IInputsEmbedder::encode_video (const std::vector<ov::Tensor>& videos) {
189
- OPENVINO_THROW (" Current model doesn't support videos preprocess currently. Input images are processed as separate images." );
193
+ OPENVINO_THROW (" The model doesn't support ' videos' preprocessing yet. Please use ' images' instead ." );
190
194
}
191
195
192
196
NormlizedPrompt InputsEmbedder::IInputsEmbedder::normalize_prompt (
@@ -195,7 +199,13 @@ NormlizedPrompt InputsEmbedder::IInputsEmbedder::normalize_prompt(
195
199
size_t video_base_id,
196
200
const std::vector<EncodedImage>& images,
197
201
const std::vector<std::vector<EncodedImage>>& videos) const {
198
- OPENVINO_THROW (" Current model doesn't support video preprocess currently. Input images are processed as separate images." );
202
+ if (videos.size () > 0 ) {
203
+ OPENVINO_THROW (" The model doesn't support 'videos' preprocessing yet. Please use 'images' instead." );
204
+ } else {
205
+ NormlizedPrompt norm_prompt;
206
+ std::tie (norm_prompt.unified_prompt , norm_prompt.images_sequence ) = normalize_prompt (prompt, base_id, images);
207
+ return norm_prompt;
208
+ }
199
209
}
200
210
201
211
std::pair<ov::Tensor, ov::Tensor> InputsEmbedder::IInputsEmbedder::get_inputs_embeds_with_token_type_ids (
@@ -207,6 +217,21 @@ std::pair<ov::Tensor, ov::Tensor> InputsEmbedder::IInputsEmbedder::get_inputs_em
207
217
OPENVINO_THROW (" This model does not support token_type_ids." );
208
218
}
209
219
220
+ std::pair<ov::Tensor, ov::Tensor> InputsEmbedder::IInputsEmbedder::get_inputs_embeds_with_token_type_ids (
221
+ const std::string& prompt,
222
+ const std::vector<EncodedImage>& images,
223
+ const std::vector<std::vector<ov::genai::EncodedImage>>& videos,
224
+ VLMPerfMetrics& metrics,
225
+ bool recalculate_merged_embeddings,
226
+ const std::vector<size_t >& image_sequence,
227
+ const std::vector<size_t >& videos_sequence) {
228
+ if (videos.size () > 0 ) {
229
+ OPENVINO_THROW (" The model doesn't support 'videos' preprocessing yet. Please use 'images' instead." );
230
+ } else {
231
+ return get_inputs_embeds_with_token_type_ids (prompt, images, metrics, recalculate_merged_embeddings, image_sequence);
232
+ }
233
+ }
234
+
210
235
bool InputsEmbedder::IInputsEmbedder::has_token_type_ids () const { return false ; }
211
236
212
237
// / Public InputsEmbedder class
@@ -303,6 +328,18 @@ std::pair<ov::Tensor, ov::Tensor> InputsEmbedder::get_inputs_embeds_with_token_t
303
328
prompt, images, metrics, recalculate_merged_embeddings, image_sequence);
304
329
}
305
330
331
+ std::pair<ov::Tensor, ov::Tensor> InputsEmbedder::get_inputs_embeds_with_token_type_ids (
332
+ const std::string& prompt,
333
+ const std::vector<EncodedImage>& images,
334
+ const std::vector<std::vector<ov::genai::EncodedImage>>& videos,
335
+ VLMPerfMetrics& metrics,
336
+ bool recalculate_merged_embeddings,
337
+ const std::vector<size_t >& image_sequence,
338
+ const std::vector<size_t >& videos_sequence) {
339
+ return m_impl->get_inputs_embeds_with_token_type_ids (
340
+ prompt, images, videos, metrics, recalculate_merged_embeddings, image_sequence, videos_sequence);
341
+ }
342
+
306
343
bool InputsEmbedder::has_token_type_ids () const {
307
344
return m_impl->has_token_type_ids ();
308
345
}
0 commit comments