[https://nvbugs/5419066][fix] Use trt flow LLM (NVIDIA#6467)

crazydemo · web-flow · commit 71524a1a4815 · 2025-08-01T03:33:07.000-04:00
Signed-off-by: Ivy Zhang &lt;25222398+crazydemo@users.noreply.github.com&gt;
diff --git a/tests/integration/defs/examples/test_llama.py b/tests/integration/defs/examples/test_llama.py
@@ -4069,7 +4069,8 @@ def test_llm_api_lookahead_decoding_1gpu(model_name, model_path):
     """
     from defs.conftest import llm_models_root
 
-    from tensorrt_llm.llmapi import (LLM, BuildConfig, KvCacheConfig,
+    from tensorrt_llm._tensorrt_engine import LLM
+    from tensorrt_llm.llmapi import (BuildConfig, KvCacheConfig,
                                      LookaheadDecodingConfig, SamplingParams)
     build_config = BuildConfig(max_batch_size=128,
                                max_input_len=2048,