We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 24a3b57 commit 58ca798Copy full SHA for 58ca798
tests/quantization/test_compressed_tensors.py
@@ -160,11 +160,4 @@ def test_compressed_tensors_kv_cache(vllm_runner):
160
model_path = "nm-testing/TinyLlama-1.1B-compressed-tensors-kv-cache-scheme"
161
with vllm_runner(model_path, kv_cache_dtype="fp8") as llm:
162
output = llm.generate_greedy("Hello world!", max_tokens=20)
163
- assert output
164
-
165
166
-def test_compressed_tensors_fused_moe(vllm_runner):
167
- model_path = "nm-testing/Mixtral-8x7B-Instruct-v0.1-W4A16-channel-quantized"
168
- with vllm_runner(model_path) as llm:
169
- output = llm.generate_greedy("Hello world!", max_tokens=20)
170
+ assert output
0 commit comments