zzzyq
diff --git a/‎pyproject.toml
Lines changed: 0 additions & 1 deletion b/‎pyproject.toml
Lines changed: 0 additions & 1 deletion
diff --git a/‎vllm/model_executor/models/arctic.py
Lines changed: 7 additions & 6 deletions b/‎vllm/model_executor/models/arctic.py
Lines changed: 7 additions & 6 deletions
diff --git a/‎vllm/model_executor/models/aria.py
Lines changed: 10 additions & 10 deletions b/‎vllm/model_executor/models/aria.py
Lines changed: 10 additions & 10 deletions
diff --git a/‎vllm/model_executor/models/aya_vision.py
Lines changed: 4 additions & 4 deletions b/‎vllm/model_executor/models/aya_vision.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎vllm/model_executor/models/baichuan.py
Lines changed: 8 additions & 7 deletions b/‎vllm/model_executor/models/baichuan.py
Lines changed: 8 additions & 7 deletions
diff --git a/‎vllm/model_executor/models/bamba.py
Lines changed: 8 additions & 7 deletions b/‎vllm/model_executor/models/bamba.py
Lines changed: 8 additions & 7 deletions
diff --git a/‎vllm/model_executor/models/bart.py
Lines changed: 4 additions & 3 deletions b/‎vllm/model_executor/models/bart.py
Lines changed: 4 additions & 3 deletions
diff --git a/‎vllm/model_executor/models/bert.py
Lines changed: 7 additions & 6 deletions b/‎vllm/model_executor/models/bert.py
Lines changed: 7 additions & 6 deletions
diff --git a/‎vllm/model_executor/models/bert_with_rope.py
Lines changed: 13 additions & 12 deletions b/‎vllm/model_executor/models/bert_with_rope.py
Lines changed: 13 additions & 12 deletions
@@ -77,7 +77,6 @@ exclude = [
 "vllm/engine/**/*.py" = ["UP006", "UP035"]
 "vllm/executor/**/*.py" = ["UP006", "UP035"]
 "vllm/model_executor/model_loader/**/*.py" = ["UP006", "UP035"]
-"vllm/model_executor/models/**/*.py" = ["UP006", "UP035"]
 "vllm/prompt_adapter/**/*.py" = ["UP006", "UP035"]
 "vllm/spec_decode/**/*.py" = ["UP006", "UP035"]
 "vllm/worker/**/*.py" = ["UP006", "UP035"]
 
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 """Inference-only Snowflake Arctic model."""
-from typing import Iterable, List, Optional, Set, Tuple, Union
+from collections.abc import Iterable
+from typing import Optional, Union
 
 import torch
 from torch import nn
@@ -458,17 +459,17 @@ def compute_logits(
                                        sampling_metadata)
         return logits
 
-    def load_weights(self, weights: Iterable[Tuple[str,
-                                                   torch.Tensor]]) -> Set[str]:
+    def load_weights(self, weights: Iterable[tuple[str,
+                                                   torch.Tensor]]) -> set[str]:
         stacked_params_mapping = [
             # (param_name, shard_name, shard_id)
             ("qkv_proj", "q_proj", "q"),
             ("qkv_proj", "k_proj", "k"),
             ("qkv_proj", "v_proj", "v"),
         ]
 
-        mlp_params_mapping: List[Tuple[str, str, int]] = []
-        expert_params_mapping: List[Tuple[str, str, int]] = []
+        mlp_params_mapping: list[tuple[str, str, int]] = []
+        expert_params_mapping: list[tuple[str, str, int]] = []
         num_layers = self.config.num_hidden_layers
 
         for layer in range(num_layers):
@@ -497,7 +498,7 @@ def load_weights(self, weights: Iterable[Tuple[str,
                         ("ws", f"experts.{expert_id}.w3.weight", expert_id))
 
         params_dict = dict(self.named_parameters())
-        loaded_params: Set[str] = set()
+        loaded_params: set[str] = set()
 
         logger.info(
             "It will take ~10 minutes loading from the 16-bit weights. "
 
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 from collections.abc import Iterable, Mapping, Sequence
-from typing import List, Optional, Set, Tuple, TypedDict, Union
+from typing import Optional, TypedDict, Union
 
 import torch
 import torch.nn as nn
@@ -66,16 +66,16 @@ def __init__(
         # Identity layer
         self.post_layernorm = nn.Identity()
 
-    def load_weights(self, weights: Iterable[Tuple[str,
-                                                   torch.Tensor]]) -> Set[str]:
+    def load_weights(self, weights: Iterable[tuple[str,
+                                                   torch.Tensor]]) -> set[str]:
         stacked_params_mapping = [
             # (param_name, shard_name, shard_id)
             ("qkv_proj", "q_proj", "q"),
             ("qkv_proj", "k_proj", "k"),
             ("qkv_proj", "v_proj", "v"),
         ]
         params_dict = dict(self.named_parameters())
-        loaded_params: Set[str] = set()
+        loaded_params: set[str] = set()
         for name, loaded_weight in weights:
 
             # NOTE: post_layernorm is not used in Aria
@@ -326,8 +326,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
 
     # Adapted from LlamaModel.load_weights with the modification of adding
     # the expert weights mapping to `stacked_params_mapping`
-    def load_weights(self, weights: Iterable[Tuple[str,
-                                                   torch.Tensor]]) -> Set[str]:
+    def load_weights(self, weights: Iterable[tuple[str,
+                                                   torch.Tensor]]) -> set[str]:
         stacked_params_mapping = [
             # (param_name, shard_name, shard_id)
             (".qkv_proj", ".q_proj", "q"),
@@ -339,7 +339,7 @@ def load_weights(self, weights: Iterable[Tuple[str,
             ("experts.w2_weight", "experts.fc2.weight", 'w2'),
         ]
         params_dict = dict(self.named_parameters())
-        loaded_params: Set[str] = set()
+        loaded_params: set[str] = set()
         for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
@@ -528,7 +528,7 @@ def __init__(
                                                 self.vocab_size, logit_scale)
 
     def _validate_image_sizes(
-            self, images: List[torch.Tensor]) -> List[torch.Tensor]:
+            self, images: list[torch.Tensor]) -> list[torch.Tensor]:
         if not all(img.shape == images[0].shape for img in images):
             raise ValueError("All images must be the same size")
         return images
@@ -578,7 +578,7 @@ def _create_patch_attention_mask(
 
     def _process_image_input(
         self, image_input: AriaImagePixelInputs
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
+    ) -> tuple[torch.Tensor, torch.Tensor]:
         assert self.vision_tower is not None
 
         pixel_values = image_input['pixel_values']
@@ -651,6 +651,6 @@ def compute_logits(self, hidden_states: torch.Tensor,
                                        sampling_metadata)
         return logits
 
-    def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
         loader = AutoWeightsLoader(self)
         loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0 Adapted from
 # https://github.com/huggingface/transformers/tree/main/src/transformers/models/aya_vision
-from typing import (Iterable, Literal, Mapping, Optional, Sequence, Set, Tuple,
-                    TypedDict, Union, cast)
+from collections.abc import Iterable, Mapping, Sequence
+from typing import Literal, Optional, TypedDict, Union, cast
 
 import torch
 from torch import nn
@@ -315,8 +315,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
     def dtype(self):
         return next(self.parameters()).dtype
 
-    def load_weights(self, weights: Iterable[Tuple[str,
-                                                   torch.Tensor]]) -> Set[str]:
+    def load_weights(self, weights: Iterable[tuple[str,
+                                                   torch.Tensor]]) -> set[str]:
         loader = AutoWeightsLoader(self)
         return loader.load_weights(weights)
 
 
@@ -20,7 +20,8 @@
 # limitations under the License.
 """Inference-only BaiChuan model compatible with HuggingFace weights."""
 import math
-from typing import Iterable, Optional, Set, Tuple, Union
+from collections.abc import Iterable
+from typing import Optional, Union
 
 import torch
 from torch import nn
@@ -230,7 +231,7 @@ def forward(
         positions: torch.Tensor,
         hidden_states: torch.Tensor,
         residual: Optional[torch.Tensor],
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
+    ) -> tuple[torch.Tensor, torch.Tensor]:
         # Self Attention
         if residual is None:
             residual = hidden_states
@@ -320,15 +321,15 @@ def forward(
         hidden_states, _ = self.norm(hidden_states, residual)
         return hidden_states
 
-    def load_weights(self, weights: Iterable[Tuple[str,
-                                                   torch.Tensor]]) -> Set[str]:
+    def load_weights(self, weights: Iterable[tuple[str,
+                                                   torch.Tensor]]) -> set[str]:
         stacked_params_mapping = [
             # (param_name, shard_name, shard_id)
             ("gate_up_proj", "gate_proj", 0),
             ("gate_up_proj", "up_proj", 1),
         ]
         params_dict = dict(self.named_parameters())
-        loaded_params: Set[str] = set()
+        loaded_params: set[str] = set()
         for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
@@ -421,8 +422,8 @@ def compute_logits(
                                        sampling_metadata)
         return logits
 
-    def load_weights(self, weights: Iterable[Tuple[str,
-                                                   torch.Tensor]]) -> Set[str]:
+    def load_weights(self, weights: Iterable[tuple[str,
+                                                   torch.Tensor]]) -> set[str]:
         loader = AutoWeightsLoader(self)
         return loader.load_weights(weights)
 
 
@@ -1,7 +1,8 @@
 # SPDX-License-Identifier: Apache-2.0
 """Inference-only Bamba model."""
 # Added by the IBM Team, 2024
-from typing import Iterable, Optional, Set, Tuple
+from collections.abc import Iterable
+from typing import Optional
 
 import torch
 from torch import nn
@@ -355,8 +356,8 @@ def forward(
         hidden_states, _ = self.final_layernorm(hidden_states, residual)
         return hidden_states
 
-    def load_weights(self, weights: Iterable[Tuple[str,
-                                                   torch.Tensor]]) -> Set[str]:
+    def load_weights(self, weights: Iterable[tuple[str,
+                                                   torch.Tensor]]) -> set[str]:
         stacked_params_mapping = [
             # (param_name, shard_name, shard_id)
             ("qkv_proj", "q_proj", "q"),
@@ -367,7 +368,7 @@ def load_weights(self, weights: Iterable[Tuple[str,
         ]
 
         params_dict = dict(self.named_parameters())
-        loaded_params: Set[str] = set()
+        loaded_params: set[str] = set()
         for name, loaded_weight in weights:
             if "rotary_emb.inv_freq" in name:
                 continue
@@ -495,7 +496,7 @@ def get_seqlen_agnostic_capture_inputs(self, batch_size: int):
         return self.mamba_cache.get_seqlen_agnostic_capture_inputs(batch_size)
 
     def _get_mamba_cache_shape(
-            self) -> Tuple[Tuple[int, int], Tuple[int, int]]:
+            self) -> tuple[tuple[int, int], tuple[int, int]]:
         world_size = get_tensor_model_parallel_world_size()
         hidden_size = self.config.hidden_size
 
@@ -535,7 +536,7 @@ def compute_logits(
                                        sampling_metadata)
         return logits
 
-    def load_weights(self, weights: Iterable[Tuple[str,
-                                                   torch.Tensor]]) -> Set[str]:
+    def load_weights(self, weights: Iterable[tuple[str,
+                                                   torch.Tensor]]) -> set[str]:
         loader = AutoWeightsLoader(self)
         return loader.load_weights(weights)
@@ -19,7 +19,8 @@
 # limitations under the License.
 """PyTorch BART model."""
 import math
-from typing import Iterable, Optional, Tuple
+from collections.abc import Iterable
+from typing import Optional
 
 import torch
 from torch import nn
@@ -859,14 +860,14 @@ def _rename_key(self, key: str):
     def _rename_stacked_param(
         self,
         name: str,
-    ) -> Tuple[str, Optional[str]]:
+    ) -> tuple[str, Optional[str]]:
         for key, mapping in self.stacked_params_mapping.items():
             if key in name:
                 name = name.replace(key, mapping["param_name"])
                 return name, mapping["shard_id"]
         return name, None
 
-    def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
 
         model_params_dict = dict(self.model.named_parameters())
         top_params_dict = dict(self.named_parameters())
 
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
-from typing import Iterable, Optional, Set, Tuple
+from collections.abc import Iterable
+from typing import Optional
 
 import torch
 from torch import nn
@@ -349,8 +350,8 @@ def forward(
                 token_type_ids=token_type_ids)
         return self.encoder(hidden_states)
 
-    def load_weights(self, weights: Iterable[Tuple[str,
-                                                   torch.Tensor]]) -> Set[str]:
+    def load_weights(self, weights: Iterable[tuple[str,
+                                                   torch.Tensor]]) -> set[str]:
         stacked_params_mapping = [
             # (param_name, shard_name, shard_id)
             ("qkv_proj", "query", "q"),
@@ -359,7 +360,7 @@ def load_weights(self, weights: Iterable[Tuple[str,
         ]
 
         params_dict = dict(self.named_parameters())
-        loaded_params: Set[str] = set()
+        loaded_params: set[str] = set()
         for name, loaded_weight in weights:
             if self.pooler is None and "pooler" in name:
                 continue
@@ -424,7 +425,7 @@ def pooler(
     ) -> Optional[PoolerOutput]:
         return self._pooler(hidden_states, pooling_metadata)
 
-    def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
         weights = self.hf_to_vllm_mapper.apply(weights)
         weights = ((name, data) for name, data in weights
                    if not name.startswith("lm_head."))
@@ -472,7 +473,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
         self._pooler = CrossEncodingPooler(config, self.classifier,
                                            self.bert.pooler)
 
-    def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
 
         self_weights = []
 
 
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
-from typing import Iterable, Optional, Set, Tuple
+from collections.abc import Iterable
+from typing import Optional
 
 import torch
 from torch import nn
@@ -208,7 +209,7 @@ def __init__(self, hidden_size: int, moe_num_experts: int, moe_top_k: int):
 
     def forward(
         self, x: torch.Tensor
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.LongTensor]:
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.LongTensor]:
         weights = self.layer(x.view(-1, x.shape[-1]))[0].softmax(
             dim=-1, dtype=torch.float32)
         top_weights, top_experts = torch.topk(weights, self.moe_top_k, dim=-1)
@@ -428,8 +429,8 @@ def forward(
                                             token_type_ids=token_type_ids)
         return self.encoder(positions, hidden_states)
 
-    def load_weights(self, weights: Iterable[Tuple[str,
-                                                   torch.Tensor]]) -> Set[str]:
+    def load_weights(self, weights: Iterable[tuple[str,
+                                                   torch.Tensor]]) -> set[str]:
         weights = self.hf_to_vllm_mapper.apply(weights)
 
         if self.config.hidden_act in ["silu", "geglu"]:
@@ -442,7 +443,7 @@ def load_weights(self, weights: Iterable[Tuple[str,
             stacked_params_mapping = []
 
         params_dict = dict(self.named_parameters())
-        loaded_params: Set[str] = set()
+        loaded_params: set[str] = set()
         for name, loaded_weight in weights:
             if "pooler" in name:
                 continue
@@ -567,7 +568,7 @@ def config_verify(self, vllm_config):
         }
         return config
 
-    def split_up_gate_proj(self, weights: Iterable[Tuple[str, torch.Tensor]]):
+    def split_up_gate_proj(self, weights: Iterable[tuple[str, torch.Tensor]]):
         n = "mlp.up_gate_proj"
         for name, weight in weights:
             if n in name:
@@ -578,14 +579,14 @@ def split_up_gate_proj(self, weights: Iterable[Tuple[str, torch.Tensor]]):
                 yield name, weight
 
     def ignore_unnecessary_layers(self,
-                                  weights: Iterable[Tuple[str, torch.Tensor]]):
+                                  weights: Iterable[tuple[str, torch.Tensor]]):
         for name, weight in weights:
             if name.startswith("classifier"):
                 continue
             yield name, weight
 
-    def load_weights(self, weights: Iterable[Tuple[str,
-                                                   torch.Tensor]]) -> Set[str]:
+    def load_weights(self, weights: Iterable[tuple[str,
+                                                   torch.Tensor]]) -> set[str]:
         weights = self.ignore_unnecessary_layers(weights)
         weights = self.split_up_gate_proj(weights)
         return super().load_weights(weights)
@@ -664,7 +665,7 @@ def forward(
                                token_type_ids=token_type_ids)
 
     @torch.inference_mode()
-    def jina_merge_lora_weights(self, weights: Iterable[Tuple[str,
+    def jina_merge_lora_weights(self, weights: Iterable[tuple[str,
                                                               torch.Tensor]]):
         # use for jina-embeddings-v3
         # Merge Lora weights into a single weight tensor.
@@ -707,7 +708,7 @@ def jina_merge_lora_weights(self, weights: Iterable[Tuple[str,
 
         return [(name, weight) for name, weight in weights.items()]
 
-    def load_weights(self, weights: Iterable[Tuple[str,
-                                                   torch.Tensor]]) -> Set[str]:
+    def load_weights(self, weights: Iterable[tuple[str,
+                                                   torch.Tensor]]) -> set[str]:
         weights = self.jina_merge_lora_weights(weights)
         return super().load_weights(weights)