NVIDIA · Tracin · Apr 21, 2025 · Apr 16, 2025 · Apr 17, 2025 · Apr 18, 2025
@@ -4249,7 +4249,7 @@ def test_llm_llama_v3_1_1node_multi_gpus(llama_example_root, llama_model_root,
     mmlu_cmd = generate_mmlu_cmd(example_root=llama_example_root,
                                  data_dir=mmlu_dataset_root,
                                  engine_dir=engine_dir,
-                                 tokenizer_dir=llama_model_root,
+                                 hf_model_dir=llama_model_root,
                                  enable_chunked_prefill=True)
     venv_check_call(llm_venv, mmlu_cmd)
 
@@ -4361,7 +4361,7 @@ def test_llm_llama_v3_1_2nodes_8gpus(test_type, llama_example_root,
         mmlu_cmd = generate_mmlu_cmd(example_root=llama_example_root,
                                      data_dir=mmlu_dataset_root,
                                      engine_dir=engine_dir,
-                                     tokenizer_dir=llama_model_root,
+                                     hf_model_dir=llama_model_root,
                                      enable_chunked_prefill=True)
         venv_check_call(llm_venv, mmlu_cmd)
 

@@ -182,7 +182,7 @@ def test_llm_mixtral_fp8_4gpus_summary(llama_example_root,
 
     print("Run mmlu...")
     mmlu_cmd = generate_mmlu_cmd(llama_example_root,
-                                 tokenizer_dir=llm_mixtral_model_root,
+                                 hf_model_dir=llm_mixtral_model_root,
                                  engine_dir=engine_dir,
                                  accuracy_threshold=70,
                                  data_dir=f"{llm_datasets_root}/mmlu")
@@ -247,7 +247,7 @@ def test_llm_mixtral_fp8_managed_weights_4gpus_summary(llama_example_root,
 
     print("Run mmlu...")
     mmlu_cmd = generate_mmlu_cmd(llama_example_root,
-                                 tokenizer_dir=llm_mixtral_model_root,
+                                 hf_model_dir=llm_mixtral_model_root,
                                  engine_dir=engine_dir,
                                  accuracy_threshold=70,
                                  data_dir=f"{llm_datasets_root}/mmlu")

@@ -417,7 +417,6 @@ examples/test_gpt.py::test_llm_gpt2_santacoder_1node_4gpus[parallel_build-enable
 examples/test_llama.py::test_llm_llama_v3_1_1node_multi_gpus[enable_gemm_allreduce_plugin-llama-3.1-405b-enable_fp8] SKIP (https://nvbugs/5219532)
 examples/test_llama.py::test_llm_llama_v3_1_1node_multi_gpus[enable_gemm_allreduce_plugin-llama-3.1-405b-fp8-disable_fp8] SKIP (https://nvbugs/5219532)
 examples/test_llama.py::test_llm_llama_v3_1_1node_multi_gpus[disable_gemm_allreduce_plugin-llama-3.1-70b-enable_fp8] SKIP (https://nvbugs/5219532)
-examples/test_llama.py::test_llm_llama_v3_1_1node_multi_gpus[enable_gemm_allreduce_plugin-llama-3.1-70b-disable_fp8] SKIP (https://nvbugs/5219533)
 examples/test_medusa.py::test_llama_medusa_1gpu[llama-v2-7b-hf] SKIP (https://nvbugs/5219534)
 examples/test_medusa.py::test_llama_medusa_1gpu[llama-3.2-1b] SKIP (https://nvbugs/5219534)
 examples/test_medusa.py::test_llama_medusa_1gpu[llama-3.1-8b] SKIP (https://nvbugs/5219534)
@@ -431,7 +430,6 @@ examples/test_eagle.py::test_llama_eagle_1gpu[llama-3.1-8b-eagle1] SKIP (https:/
 examples/test_eagle.py::test_mistral_eagle_1gpu[mistral-7b-v0.1-eagle1] SKIP (https://nvbugs/5219535)
 examples/test_eagle.py::test_llama_eagle_1gpu[llama-3.1-8b-eagle2] SKIP (https://nvbugs/5219535)
 examples/test_eagle.py::test_mistral_eagle_1gpu[mistral-7b-v0.1-eagle2] SKIP (https://nvbugs/5219535)
-examples/test_mixtral.py::test_llm_mixtral_fp8_4gpus_summary[Mixtral-8x22B-v0.1-nb:1] SKIP (https://nvbugs/5220758)
 examples/test_multimodal.py::test_llm_multimodal_general[VILA1.5-3b-pp:1-tp:1-float16-bs:8-cpp_e2e:False-nb:1] SKIP (https://nvbugs/5214239)
 examples/test_multimodal.py::test_llm_multimodal_general[VILA1.5-3b-pp:1-tp:1-float16-bs:1-cpp_e2e:True-nb:1] SKIP (https://nvbugs/5214239)
 examples/test_multimodal.py::test_llm_multimodal_general[VILA1.5-3b-pp:1-tp:1-float16-bs:8-cpp_e2e:True-nb:1] SKIP (https://nvbugs/5214239)