File tree Expand file tree Collapse file tree 2 files changed +506
-0
lines changed
server/text_generation_server/models Expand file tree Collapse file tree 2 files changed +506
-0
lines changed Original file line number Diff line number Diff line change 152
152
from text_generation_server .models .custom_modeling .flash_qwen2_modeling import (
153
153
Qwen2ForCausalLM ,
154
154
)
155
+ from text_generation_server .models .custom_modeling .flash_qwen3_modeling import (
156
+ Qwen3ForCausalLM ,
157
+ )
155
158
from text_generation_server .models .custom_modeling .flash_mistral_modeling import (
156
159
FlashMistralForCausalLM ,
157
160
)
@@ -348,6 +351,11 @@ class ModelType(enum.Enum):
348
351
"name" : "Qwen 2" ,
349
352
"url" : "https://huggingface.co/collections/Qwen/qwen2-6659360b33528ced941e557f" ,
350
353
}
354
+ QWEN3 = {
355
+ "type" : "qwen3" ,
356
+ "name" : "Qwen 3" ,
357
+ "url" : "https://huggingface.co/collections/Qwen/qwen3-67c6c6f89c4f76621268bb6d" ,
358
+ }
351
359
QWEN2_VL = {
352
360
"type" : "qwen2_vl" ,
353
361
"name" : "Qwen 2 VL" ,
@@ -1470,6 +1478,40 @@ def get_model(
1470
1478
trust_remote_code = trust_remote_code ,
1471
1479
)
1472
1480
1481
+ if model_type == QWEN3 :
1482
+ if FLASH_ATTENTION :
1483
+ return FlashCausalLM (
1484
+ model_id = model_id ,
1485
+ model_class = Qwen3ForCausalLM ,
1486
+ revision = revision ,
1487
+ quantize = quantize ,
1488
+ speculator = speculator ,
1489
+ dtype = dtype ,
1490
+ kv_cache_dtype = kv_cache_dtype ,
1491
+ trust_remote_code = trust_remote_code ,
1492
+ lora_adapter_ids = lora_adapter_ids ,
1493
+ )
1494
+ elif FLASH_TRANSFORMERS_BACKEND :
1495
+ return TransformersFlashCausalLM .fallback (
1496
+ model_id ,
1497
+ revision ,
1498
+ quantize = quantize ,
1499
+ speculator = speculator ,
1500
+ dtype = dtype ,
1501
+ trust_remote_code = trust_remote_code ,
1502
+ )
1503
+ elif sharded :
1504
+ raise NotImplementedError (FLASH_ATT_ERROR_MESSAGE .format ("Sharded Qwen3" ))
1505
+ else :
1506
+ return CausalLM .fallback (
1507
+ model_id ,
1508
+ revision ,
1509
+ quantize = quantize ,
1510
+ speculator = speculator ,
1511
+ dtype = dtype ,
1512
+ trust_remote_code = trust_remote_code ,
1513
+ )
1514
+
1473
1515
if model_type == OPT :
1474
1516
return CausalLM (
1475
1517
model_id = model_id ,
You can’t perform that action at this time.
0 commit comments