We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent fe53b79 commit f38df5aCopy full SHA for f38df5a
vllm/model_executor/models/chatglm.py
@@ -75,12 +75,17 @@ def __init__(
75
linear_method=linear_method,
76
)
77
78
+ # https://huggingface.co/THUDM/chatglm3-6b-32k/blob/e210410255278dd9d74463cf396ba559c0ef801c/modeling_chatglm.py#L141
79
+ rope_ratio = getattr(config, "rope_ratio", 1.0)
80
+ max_positions = getattr(config, "seq_length", 8192)
81
self.attn = PagedAttentionWithRoPE(
82
self.num_heads,
83
self.head_dim,
84
self.scaling,
85
rotary_dim=self.head_dim // 2,
86
num_kv_heads=self.num_kv_heads,
87
+ max_position=max_positions,
88
+ base=10000 * rope_ratio,
89
is_neox_style=False,
90
91
0 commit comments