|
52 | 52 | GPTQMarlinConfig)
|
53 | 53 | from vllm.model_executor.layers.sampler import SamplerOutput, get_sampler
|
54 | 54 | from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
| 55 | +from vllm.model_executor.models.module_mapping import MultiModelKeys |
55 | 56 | from vllm.multimodal import MULTIMODAL_REGISTRY
|
56 | 57 | from vllm.multimodal.inputs import (ImageItem, ModalityData,
|
57 | 58 | MultiModalFieldConfig, MultiModalKwargs,
|
@@ -926,15 +927,23 @@ class Qwen2VLForConditionalGeneration(nn.Module, SupportsMultiModal,
|
926 | 927 | }
|
927 | 928 |
|
928 | 929 | # LoRA specific attributes
|
929 |
| - # TODO Support LoRA for the visual encoder in the future. |
930 | 930 | supported_lora_modules = [
|
931 | 931 | "qkv_proj",
|
932 | 932 | "o_proj",
|
933 | 933 | "gate_up_proj",
|
934 | 934 | "down_proj",
|
| 935 | + # vision tower |
| 936 | + "qkv", |
| 937 | + "attn.proj", # Distinguish patch_embed.proj |
| 938 | + "fc1", |
| 939 | + "fc2", |
| 940 | + # projector |
| 941 | + "mlp.0", |
| 942 | + "mlp.2" |
935 | 943 | ]
|
936 | 944 | embedding_modules = {}
|
937 | 945 | embedding_padding_modules = []
|
| 946 | + |
938 | 947 | # To ensure correct weight loading and mapping.
|
939 | 948 | hf_to_vllm_mapper = WeightsMapper(orig_to_new_prefix={
|
940 | 949 | "lm_head.": "language_model.lm_head.",
|
@@ -1231,3 +1240,12 @@ def load_weights(self, weights: Iterable[Tuple[str,
|
1231 | 1240 |
|
1232 | 1241 | loader = AutoWeightsLoader(self)
|
1233 | 1242 | return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
|
| 1243 | + |
| 1244 | + def get_mm_mapping(self) -> MultiModelKeys: |
| 1245 | + """ |
| 1246 | + Get the module prefix in multimodal models |
| 1247 | + """ |
| 1248 | + return MultiModelKeys.from_string_field( |
| 1249 | + language_model="language_model", |
| 1250 | + connector="visual.", |
| 1251 | + tower_model="visual.merger.") |
0 commit comments