Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions tests/integration/test_lists/test-db/l0_b200.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ l0_b200:
- unittest/_torch -k "not (modeling or multi_gpu or auto_deploy)"
- unittest/_torch -k "modeling_llama"
- unittest/_torch/modeling -k "modeling_mixtral"
- unittest/_torch/modeling -k "modeling_deepseek"
- unittest/_torch/auto_deploy/unit/singlegpu
- unittest/_torch/speculative/test_eagle3.py
- condition:
Expand Down
87 changes: 87 additions & 0 deletions tests/unittest/_torch/modeling/test_modeling_deepseek.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import json
import os
import shutil
from pathlib import Path

import pytest
from utils.llm_data import llm_models_root
from utils.util import getSMVersion

from tensorrt_llm import SamplingParams
from tensorrt_llm._torch import LLM
from tensorrt_llm._torch.pyexecutor.config import PyTorchConfig
from tensorrt_llm.llmapi import KvCacheConfig
from tensorrt_llm.llmapi.utils import get_total_gpu_memory


def process_and_copy_folder(src_folder, dst_folder):
if os.path.exists(dst_folder):
shutil.rmtree(dst_folder)
os.makedirs(dst_folder)

for root, dirs, files in os.walk(src_folder):
rel_path = os.path.relpath(root, src_folder)
dest_dir = os.path.join(dst_folder, rel_path)

if not os.path.exists(dest_dir):
os.makedirs(dest_dir)

for file in files:
src_path = os.path.join(root, file)
dest_path = os.path.join(dest_dir, file)
if 'safetensor' in file:
continue

if file == 'config.json':
with open(src_path, 'r', encoding='utf-8') as f:
config = json.load(f)
config['num_hidden_layers'] = 4
with open(dest_path, 'w', encoding='utf-8') as f:
json.dump(config, f, indent=2, ensure_ascii=False)
else:
shutil.copy2(src_path, dest_path)


@pytest.mark.parametrize("model_name", ["DeepSeek-R1", "DeepSeek-R1-FP4"],
ids=["deepseekr1", "deepseekr1_fp4"])
def test_deepseek_trtllmgen(model_name):

if getSMVersion() < 100:
pytest.skip(f"FP4 is not supported in this SM version {getSMVersion()}")

if get_total_gpu_memory(0) < 60 * 1024**3:
pytest.skip(f"Not enough GPU memory to run. {get_total_gpu_memory(0)}")

prompts = [
"The president of the United States is",
] * 4

pytorch_config = PyTorchConfig(
enable_overlap_scheduler=False,
use_cuda_graph=False,
kv_cache_dtype="auto",
attn_backend="TRTLLM",
load_format="dummy",
moe_backend="TRTLLM",
)

model_dir = str(llm_models_root() / Path(f"DeepSeek-R1/{model_name}"))
assert Path(model_dir).exists()
tmp_model_dir = f"/tmp/{model_name}"
process_and_copy_folder(model_dir, tmp_model_dir)

llm = LLM(model=tmp_model_dir,
tensor_parallel_size=1,
enable_chunked_prefill=False,
pytorch_backend_config=pytorch_config,
moe_expert_parallel_size=-1,
moe_tensor_parallel_size=-1,
enable_attention_dp=False,
kv_cache_config=KvCacheConfig(enable_block_reuse=False))

sampling_params = SamplingParams(max_tokens=20)

try:
llm.generate(prompts, sampling_params)
except Exception as e:
raise e