Skip to content

Commit 7588cd9

Browse files
hmellorYuqi Zhang
authored andcommitted
Update deprecated type hinting in models (vllm-project#18132)
Signed-off-by: Harry Mellor <[email protected]> Signed-off-by: Yuqi Zhang <[email protected]>
1 parent 9e71dab commit 7588cd9

File tree

130 files changed

+971
-901
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

130 files changed

+971
-901
lines changed

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ exclude = [
7777
"vllm/engine/**/*.py" = ["UP006", "UP035"]
7878
"vllm/executor/**/*.py" = ["UP006", "UP035"]
7979
"vllm/model_executor/model_loader/**/*.py" = ["UP006", "UP035"]
80-
"vllm/model_executor/models/**/*.py" = ["UP006", "UP035"]
8180
"vllm/prompt_adapter/**/*.py" = ["UP006", "UP035"]
8281
"vllm/spec_decode/**/*.py" = ["UP006", "UP035"]
8382
"vllm/worker/**/*.py" = ["UP006", "UP035"]

vllm/model_executor/models/arctic.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22
"""Inference-only Snowflake Arctic model."""
3-
from typing import Iterable, List, Optional, Set, Tuple, Union
3+
from collections.abc import Iterable
4+
from typing import Optional, Union
45

56
import torch
67
from torch import nn
@@ -458,17 +459,17 @@ def compute_logits(
458459
sampling_metadata)
459460
return logits
460461

461-
def load_weights(self, weights: Iterable[Tuple[str,
462-
torch.Tensor]]) -> Set[str]:
462+
def load_weights(self, weights: Iterable[tuple[str,
463+
torch.Tensor]]) -> set[str]:
463464
stacked_params_mapping = [
464465
# (param_name, shard_name, shard_id)
465466
("qkv_proj", "q_proj", "q"),
466467
("qkv_proj", "k_proj", "k"),
467468
("qkv_proj", "v_proj", "v"),
468469
]
469470

470-
mlp_params_mapping: List[Tuple[str, str, int]] = []
471-
expert_params_mapping: List[Tuple[str, str, int]] = []
471+
mlp_params_mapping: list[tuple[str, str, int]] = []
472+
expert_params_mapping: list[tuple[str, str, int]] = []
472473
num_layers = self.config.num_hidden_layers
473474

474475
for layer in range(num_layers):
@@ -497,7 +498,7 @@ def load_weights(self, weights: Iterable[Tuple[str,
497498
("ws", f"experts.{expert_id}.w3.weight", expert_id))
498499

499500
params_dict = dict(self.named_parameters())
500-
loaded_params: Set[str] = set()
501+
loaded_params: set[str] = set()
501502

502503
logger.info(
503504
"It will take ~10 minutes loading from the 16-bit weights. "

vllm/model_executor/models/aria.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# SPDX-License-Identifier: Apache-2.0
22
from collections.abc import Iterable, Mapping, Sequence
3-
from typing import List, Optional, Set, Tuple, TypedDict, Union
3+
from typing import Optional, TypedDict, Union
44

55
import torch
66
import torch.nn as nn
@@ -66,16 +66,16 @@ def __init__(
6666
# Identity layer
6767
self.post_layernorm = nn.Identity()
6868

69-
def load_weights(self, weights: Iterable[Tuple[str,
70-
torch.Tensor]]) -> Set[str]:
69+
def load_weights(self, weights: Iterable[tuple[str,
70+
torch.Tensor]]) -> set[str]:
7171
stacked_params_mapping = [
7272
# (param_name, shard_name, shard_id)
7373
("qkv_proj", "q_proj", "q"),
7474
("qkv_proj", "k_proj", "k"),
7575
("qkv_proj", "v_proj", "v"),
7676
]
7777
params_dict = dict(self.named_parameters())
78-
loaded_params: Set[str] = set()
78+
loaded_params: set[str] = set()
7979
for name, loaded_weight in weights:
8080

8181
# NOTE: post_layernorm is not used in Aria
@@ -326,8 +326,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
326326

327327
# Adapted from LlamaModel.load_weights with the modification of adding
328328
# the expert weights mapping to `stacked_params_mapping`
329-
def load_weights(self, weights: Iterable[Tuple[str,
330-
torch.Tensor]]) -> Set[str]:
329+
def load_weights(self, weights: Iterable[tuple[str,
330+
torch.Tensor]]) -> set[str]:
331331
stacked_params_mapping = [
332332
# (param_name, shard_name, shard_id)
333333
(".qkv_proj", ".q_proj", "q"),
@@ -339,7 +339,7 @@ def load_weights(self, weights: Iterable[Tuple[str,
339339
("experts.w2_weight", "experts.fc2.weight", 'w2'),
340340
]
341341
params_dict = dict(self.named_parameters())
342-
loaded_params: Set[str] = set()
342+
loaded_params: set[str] = set()
343343
for name, loaded_weight in weights:
344344
if "rotary_emb.inv_freq" in name:
345345
continue
@@ -528,7 +528,7 @@ def __init__(
528528
self.vocab_size, logit_scale)
529529

530530
def _validate_image_sizes(
531-
self, images: List[torch.Tensor]) -> List[torch.Tensor]:
531+
self, images: list[torch.Tensor]) -> list[torch.Tensor]:
532532
if not all(img.shape == images[0].shape for img in images):
533533
raise ValueError("All images must be the same size")
534534
return images
@@ -578,7 +578,7 @@ def _create_patch_attention_mask(
578578

579579
def _process_image_input(
580580
self, image_input: AriaImagePixelInputs
581-
) -> Tuple[torch.Tensor, torch.Tensor]:
581+
) -> tuple[torch.Tensor, torch.Tensor]:
582582
assert self.vision_tower is not None
583583

584584
pixel_values = image_input['pixel_values']
@@ -651,6 +651,6 @@ def compute_logits(self, hidden_states: torch.Tensor,
651651
sampling_metadata)
652652
return logits
653653

654-
def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
654+
def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
655655
loader = AutoWeightsLoader(self)
656656
loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)

vllm/model_executor/models/aya_vision.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0 Adapted from
22
# https://github.com/huggingface/transformers/tree/main/src/transformers/models/aya_vision
3-
from typing import (Iterable, Literal, Mapping, Optional, Sequence, Set, Tuple,
4-
TypedDict, Union, cast)
3+
from collections.abc import Iterable, Mapping, Sequence
4+
from typing import Literal, Optional, TypedDict, Union, cast
55

66
import torch
77
from torch import nn
@@ -315,8 +315,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
315315
def dtype(self):
316316
return next(self.parameters()).dtype
317317

318-
def load_weights(self, weights: Iterable[Tuple[str,
319-
torch.Tensor]]) -> Set[str]:
318+
def load_weights(self, weights: Iterable[tuple[str,
319+
torch.Tensor]]) -> set[str]:
320320
loader = AutoWeightsLoader(self)
321321
return loader.load_weights(weights)
322322

vllm/model_executor/models/baichuan.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020
# limitations under the License.
2121
"""Inference-only BaiChuan model compatible with HuggingFace weights."""
2222
import math
23-
from typing import Iterable, Optional, Set, Tuple, Union
23+
from collections.abc import Iterable
24+
from typing import Optional, Union
2425

2526
import torch
2627
from torch import nn
@@ -230,7 +231,7 @@ def forward(
230231
positions: torch.Tensor,
231232
hidden_states: torch.Tensor,
232233
residual: Optional[torch.Tensor],
233-
) -> Tuple[torch.Tensor, torch.Tensor]:
234+
) -> tuple[torch.Tensor, torch.Tensor]:
234235
# Self Attention
235236
if residual is None:
236237
residual = hidden_states
@@ -320,15 +321,15 @@ def forward(
320321
hidden_states, _ = self.norm(hidden_states, residual)
321322
return hidden_states
322323

323-
def load_weights(self, weights: Iterable[Tuple[str,
324-
torch.Tensor]]) -> Set[str]:
324+
def load_weights(self, weights: Iterable[tuple[str,
325+
torch.Tensor]]) -> set[str]:
325326
stacked_params_mapping = [
326327
# (param_name, shard_name, shard_id)
327328
("gate_up_proj", "gate_proj", 0),
328329
("gate_up_proj", "up_proj", 1),
329330
]
330331
params_dict = dict(self.named_parameters())
331-
loaded_params: Set[str] = set()
332+
loaded_params: set[str] = set()
332333
for name, loaded_weight in weights:
333334
if "rotary_emb.inv_freq" in name:
334335
continue
@@ -421,8 +422,8 @@ def compute_logits(
421422
sampling_metadata)
422423
return logits
423424

424-
def load_weights(self, weights: Iterable[Tuple[str,
425-
torch.Tensor]]) -> Set[str]:
425+
def load_weights(self, weights: Iterable[tuple[str,
426+
torch.Tensor]]) -> set[str]:
426427
loader = AutoWeightsLoader(self)
427428
return loader.load_weights(weights)
428429

vllm/model_executor/models/bamba.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
# SPDX-License-Identifier: Apache-2.0
22
"""Inference-only Bamba model."""
33
# Added by the IBM Team, 2024
4-
from typing import Iterable, Optional, Set, Tuple
4+
from collections.abc import Iterable
5+
from typing import Optional
56

67
import torch
78
from torch import nn
@@ -355,8 +356,8 @@ def forward(
355356
hidden_states, _ = self.final_layernorm(hidden_states, residual)
356357
return hidden_states
357358

358-
def load_weights(self, weights: Iterable[Tuple[str,
359-
torch.Tensor]]) -> Set[str]:
359+
def load_weights(self, weights: Iterable[tuple[str,
360+
torch.Tensor]]) -> set[str]:
360361
stacked_params_mapping = [
361362
# (param_name, shard_name, shard_id)
362363
("qkv_proj", "q_proj", "q"),
@@ -367,7 +368,7 @@ def load_weights(self, weights: Iterable[Tuple[str,
367368
]
368369

369370
params_dict = dict(self.named_parameters())
370-
loaded_params: Set[str] = set()
371+
loaded_params: set[str] = set()
371372
for name, loaded_weight in weights:
372373
if "rotary_emb.inv_freq" in name:
373374
continue
@@ -495,7 +496,7 @@ def get_seqlen_agnostic_capture_inputs(self, batch_size: int):
495496
return self.mamba_cache.get_seqlen_agnostic_capture_inputs(batch_size)
496497

497498
def _get_mamba_cache_shape(
498-
self) -> Tuple[Tuple[int, int], Tuple[int, int]]:
499+
self) -> tuple[tuple[int, int], tuple[int, int]]:
499500
world_size = get_tensor_model_parallel_world_size()
500501
hidden_size = self.config.hidden_size
501502

@@ -535,7 +536,7 @@ def compute_logits(
535536
sampling_metadata)
536537
return logits
537538

538-
def load_weights(self, weights: Iterable[Tuple[str,
539-
torch.Tensor]]) -> Set[str]:
539+
def load_weights(self, weights: Iterable[tuple[str,
540+
torch.Tensor]]) -> set[str]:
540541
loader = AutoWeightsLoader(self)
541542
return loader.load_weights(weights)

vllm/model_executor/models/bart.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
# limitations under the License.
2020
"""PyTorch BART model."""
2121
import math
22-
from typing import Iterable, Optional, Tuple
22+
from collections.abc import Iterable
23+
from typing import Optional
2324

2425
import torch
2526
from torch import nn
@@ -859,14 +860,14 @@ def _rename_key(self, key: str):
859860
def _rename_stacked_param(
860861
self,
861862
name: str,
862-
) -> Tuple[str, Optional[str]]:
863+
) -> tuple[str, Optional[str]]:
863864
for key, mapping in self.stacked_params_mapping.items():
864865
if key in name:
865866
name = name.replace(key, mapping["param_name"])
866867
return name, mapping["shard_id"]
867868
return name, None
868869

869-
def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
870+
def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
870871

871872
model_params_dict = dict(self.model.named_parameters())
872873
top_params_dict = dict(self.named_parameters())

vllm/model_executor/models/bert.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22

3-
from typing import Iterable, Optional, Set, Tuple
3+
from collections.abc import Iterable
4+
from typing import Optional
45

56
import torch
67
from torch import nn
@@ -349,8 +350,8 @@ def forward(
349350
token_type_ids=token_type_ids)
350351
return self.encoder(hidden_states)
351352

352-
def load_weights(self, weights: Iterable[Tuple[str,
353-
torch.Tensor]]) -> Set[str]:
353+
def load_weights(self, weights: Iterable[tuple[str,
354+
torch.Tensor]]) -> set[str]:
354355
stacked_params_mapping = [
355356
# (param_name, shard_name, shard_id)
356357
("qkv_proj", "query", "q"),
@@ -359,7 +360,7 @@ def load_weights(self, weights: Iterable[Tuple[str,
359360
]
360361

361362
params_dict = dict(self.named_parameters())
362-
loaded_params: Set[str] = set()
363+
loaded_params: set[str] = set()
363364
for name, loaded_weight in weights:
364365
if self.pooler is None and "pooler" in name:
365366
continue
@@ -424,7 +425,7 @@ def pooler(
424425
) -> Optional[PoolerOutput]:
425426
return self._pooler(hidden_states, pooling_metadata)
426427

427-
def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
428+
def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
428429
weights = self.hf_to_vllm_mapper.apply(weights)
429430
weights = ((name, data) for name, data in weights
430431
if not name.startswith("lm_head."))
@@ -472,7 +473,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
472473
self._pooler = CrossEncodingPooler(config, self.classifier,
473474
self.bert.pooler)
474475

475-
def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
476+
def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):
476477

477478
self_weights = []
478479

vllm/model_executor/models/bert_with_rope.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# SPDX-License-Identifier: Apache-2.0
2-
from typing import Iterable, Optional, Set, Tuple
2+
from collections.abc import Iterable
3+
from typing import Optional
34

45
import torch
56
from torch import nn
@@ -208,7 +209,7 @@ def __init__(self, hidden_size: int, moe_num_experts: int, moe_top_k: int):
208209

209210
def forward(
210211
self, x: torch.Tensor
211-
) -> Tuple[torch.Tensor, torch.Tensor, torch.LongTensor]:
212+
) -> tuple[torch.Tensor, torch.Tensor, torch.LongTensor]:
212213
weights = self.layer(x.view(-1, x.shape[-1]))[0].softmax(
213214
dim=-1, dtype=torch.float32)
214215
top_weights, top_experts = torch.topk(weights, self.moe_top_k, dim=-1)
@@ -428,8 +429,8 @@ def forward(
428429
token_type_ids=token_type_ids)
429430
return self.encoder(positions, hidden_states)
430431

431-
def load_weights(self, weights: Iterable[Tuple[str,
432-
torch.Tensor]]) -> Set[str]:
432+
def load_weights(self, weights: Iterable[tuple[str,
433+
torch.Tensor]]) -> set[str]:
433434
weights = self.hf_to_vllm_mapper.apply(weights)
434435

435436
if self.config.hidden_act in ["silu", "geglu"]:
@@ -442,7 +443,7 @@ def load_weights(self, weights: Iterable[Tuple[str,
442443
stacked_params_mapping = []
443444

444445
params_dict = dict(self.named_parameters())
445-
loaded_params: Set[str] = set()
446+
loaded_params: set[str] = set()
446447
for name, loaded_weight in weights:
447448
if "pooler" in name:
448449
continue
@@ -567,7 +568,7 @@ def config_verify(self, vllm_config):
567568
}
568569
return config
569570

570-
def split_up_gate_proj(self, weights: Iterable[Tuple[str, torch.Tensor]]):
571+
def split_up_gate_proj(self, weights: Iterable[tuple[str, torch.Tensor]]):
571572
n = "mlp.up_gate_proj"
572573
for name, weight in weights:
573574
if n in name:
@@ -578,14 +579,14 @@ def split_up_gate_proj(self, weights: Iterable[Tuple[str, torch.Tensor]]):
578579
yield name, weight
579580

580581
def ignore_unnecessary_layers(self,
581-
weights: Iterable[Tuple[str, torch.Tensor]]):
582+
weights: Iterable[tuple[str, torch.Tensor]]):
582583
for name, weight in weights:
583584
if name.startswith("classifier"):
584585
continue
585586
yield name, weight
586587

587-
def load_weights(self, weights: Iterable[Tuple[str,
588-
torch.Tensor]]) -> Set[str]:
588+
def load_weights(self, weights: Iterable[tuple[str,
589+
torch.Tensor]]) -> set[str]:
589590
weights = self.ignore_unnecessary_layers(weights)
590591
weights = self.split_up_gate_proj(weights)
591592
return super().load_weights(weights)
@@ -664,7 +665,7 @@ def forward(
664665
token_type_ids=token_type_ids)
665666

666667
@torch.inference_mode()
667-
def jina_merge_lora_weights(self, weights: Iterable[Tuple[str,
668+
def jina_merge_lora_weights(self, weights: Iterable[tuple[str,
668669
torch.Tensor]]):
669670
# use for jina-embeddings-v3
670671
# Merge Lora weights into a single weight tensor.
@@ -707,7 +708,7 @@ def jina_merge_lora_weights(self, weights: Iterable[Tuple[str,
707708

708709
return [(name, weight) for name, weight in weights.items()]
709710

710-
def load_weights(self, weights: Iterable[Tuple[str,
711-
torch.Tensor]]) -> Set[str]:
711+
def load_weights(self, weights: Iterable[tuple[str,
712+
torch.Tensor]]) -> set[str]:
712713
weights = self.jina_merge_lora_weights(weights)
713714
return super().load_weights(weights)

0 commit comments

Comments
 (0)