Skip to content
Merged
Show file tree
Hide file tree
Changes from 182 commits
Commits
Show all changes
187 commits
Select commit Hold shift + click to select a range
252a0c7
wip
cadedaniel Apr 3, 2024
dd629d4
Merge remote-tracking branch 'upstream/main' into executor_base
cadedaniel Apr 3, 2024
a34800f
wip
cadedaniel Apr 3, 2024
09f30bd
wip
cadedaniel Apr 3, 2024
8b5bb8b
clean
cadedaniel Apr 4, 2024
6fd424f
wip
cadedaniel Apr 4, 2024
2a347bb
wip
cadedaniel Apr 4, 2024
658ff9b
wip
cadedaniel Apr 4, 2024
acee7be
wip
cadedaniel Apr 4, 2024
85760d6
wip
cadedaniel Apr 4, 2024
408b29d
wip
cadedaniel Apr 4, 2024
9d8fd69
Merge remote-tracking branch 'upstream/main' into executor_base
cadedaniel Apr 4, 2024
3149a03
wip
cadedaniel Apr 4, 2024
0c32e0a
wip
cadedaniel Apr 4, 2024
f64d5b1
wip
cadedaniel Apr 4, 2024
7207f0c
wip
cadedaniel Apr 4, 2024
0c4df0b
wip
cadedaniel Apr 4, 2024
2e355e7
wip
cadedaniel Apr 4, 2024
edb7f62
wip
cadedaniel Apr 4, 2024
48bb3e9
wip
cadedaniel Apr 4, 2024
7b39044
fix test
cadedaniel Apr 4, 2024
9e5f2fb
fix test
cadedaniel Apr 5, 2024
1a3e26e
fix test
cadedaniel Apr 5, 2024
cd2015c
fix test
cadedaniel Apr 5, 2024
d926034
fix
cadedaniel Apr 5, 2024
607f7e2
fix
cadedaniel Apr 5, 2024
e127bb7
fix
cadedaniel Apr 5, 2024
deaa8b0
fix
cadedaniel Apr 5, 2024
7817d61
clean
cadedaniel Apr 5, 2024
99823a3
clean
cadedaniel Apr 5, 2024
849bfe9
fix
cadedaniel Apr 5, 2024
951ba85
fix
cadedaniel Apr 5, 2024
38948df
speed up cpu test
cadedaniel Apr 5, 2024
397ec77
wip
cadedaniel Apr 5, 2024
23382b9
wip
cadedaniel Apr 5, 2024
7a0294c
clean
cadedaniel Apr 5, 2024
dcdca68
wip
cadedaniel Apr 5, 2024
ed58af2
remove
cadedaniel Apr 5, 2024
df8688e
Revert "more test speedup"
cadedaniel Apr 5, 2024
55a5203
wip
cadedaniel Apr 5, 2024
55d083b
wip
cadedaniel Apr 5, 2024
0814d24
wip
cadedaniel Apr 5, 2024
b18d00c
rename profile_num_available_blocks to get_max_allowed_kv_blocks
cadedaniel Apr 5, 2024
8fb7b9a
rename again
cadedaniel Apr 5, 2024
3bb9e6f
rename
cadedaniel Apr 5, 2024
edad09c
wip
cadedaniel Apr 5, 2024
f93c845
wip
cadedaniel Apr 5, 2024
d2d2218
wip
cadedaniel Apr 5, 2024
2f960e7
lint
cadedaniel Apr 5, 2024
68552e1
wip
cadedaniel Apr 5, 2024
42983ba
import order
cadedaniel Apr 5, 2024
2d5dbb8
fix
cadedaniel Apr 5, 2024
ae2f7e6
docstrings
cadedaniel Apr 5, 2024
c89bb75
Merge branch 'main' into executor_base
cadedaniel Apr 5, 2024
bf041d9
Merge remote-tracking branch 'upstream/main' into llm-engine-spec
cadedaniel Apr 5, 2024
fa8705d
wip
cadedaniel Apr 7, 2024
8495321
wip
cadedaniel Apr 7, 2024
b63975b
wip
cadedaniel Apr 7, 2024
cb23e8c
wip
cadedaniel Apr 7, 2024
143ca28
wip
cadedaniel Apr 7, 2024
d8d4725
fix
cadedaniel Apr 7, 2024
b2728e0
wip
cadedaniel Apr 7, 2024
6250f6c
assertion
cadedaniel Apr 7, 2024
a930755
fix
cadedaniel Apr 7, 2024
5b896a3
fix
cadedaniel Apr 7, 2024
bb43b53
lint
cadedaniel Apr 7, 2024
cde3160
fix
cadedaniel Apr 7, 2024
dd8aeff
fix
cadedaniel Apr 7, 2024
46e4847
test
cadedaniel Apr 7, 2024
8454edc
test fixes
cadedaniel Apr 7, 2024
819e656
lint
cadedaniel Apr 7, 2024
2b0d787
Merge remote-tracking branch 'upstream/main' into executor_base
cadedaniel Apr 7, 2024
67fd287
Merge remote-tracking branch 'upstream/main' into llm-engine-spec
cadedaniel Apr 7, 2024
c3449ba
Merge branch 'executor_base' into llm-engine-spec
cadedaniel Apr 7, 2024
d0fbe47
clean
cadedaniel Apr 7, 2024
5445af6
refactor out beam search model processor
cadedaniel Apr 7, 2024
632b439
fix
cadedaniel Apr 7, 2024
26e7368
dedup stop check
cadedaniel Apr 7, 2024
06e7c01
wip
cadedaniel Apr 7, 2024
184a52c
del
cadedaniel Apr 7, 2024
34468fe
rename
cadedaniel Apr 7, 2024
208c467
wip
cadedaniel Apr 8, 2024
3c6abcc
wip
cadedaniel Apr 8, 2024
bbbcef7
wip
cadedaniel Apr 8, 2024
b58762d
fix
cadedaniel Apr 8, 2024
8b500d4
wip
cadedaniel Apr 8, 2024
782ce22
unit tests for block decode
cadedaniel Apr 8, 2024
3062e1c
stop token ids
cadedaniel Apr 8, 2024
fba3b30
format
cadedaniel Apr 8, 2024
bda141f
fixing spec tests
cadedaniel Apr 8, 2024
49865fb
lint
cadedaniel Apr 8, 2024
1a17ed1
clean up gpu executor
cadedaniel Apr 8, 2024
dea67bb
wip
cadedaniel Apr 8, 2024
189d7eb
fix
cadedaniel Apr 8, 2024
a70a040
wip
cadedaniel Apr 8, 2024
3e1b8f5
detokenization
cadedaniel Apr 8, 2024
b9777a6
lint
cadedaniel Apr 8, 2024
29b4f12
docstrings
cadedaniel Apr 8, 2024
42aa0bc
fix
cadedaniel Apr 8, 2024
0ebd93b
more spec test
cadedaniel Apr 8, 2024
33a3d72
remove
cadedaniel Apr 8, 2024
15c942d
wip
cadedaniel Apr 8, 2024
063e34b
strip
cadedaniel Apr 8, 2024
672a855
print
cadedaniel Apr 8, 2024
8021b38
fix flaky test
cadedaniel Apr 8, 2024
8e93fff
reduce output len
cadedaniel Apr 8, 2024
d06e9a4
strip
cadedaniel Apr 8, 2024
ca516aa
pr feedback
cadedaniel Apr 9, 2024
91cf0fc
Merge branch 'executor_base' into llm-engine-spec
cadedaniel Apr 9, 2024
f6c7b2e
Zhuohan offline pr feedback
cadedaniel Apr 9, 2024
0283fae
Merge remote-tracking branch 'upstream/main' into llm-engine-spec
cadedaniel Apr 9, 2024
96f81c4
lint
cadedaniel Apr 9, 2024
a573e2c
sampler output gpu tensor
cadedaniel Apr 9, 2024
bb36081
remove mock in multi step worker
cadedaniel Apr 9, 2024
4c40eaf
remove mock tensors from target worker
cadedaniel Apr 9, 2024
2222017
bs1 correctness test passes
cadedaniel Apr 9, 2024
51a7eae
bs32 passes
cadedaniel Apr 9, 2024
1153cbe
adding more correctness tests
cadedaniel Apr 9, 2024
68072af
test with ignore_eos=False
cadedaniel Apr 9, 2024
71860e1
lint
cadedaniel Apr 9, 2024
4b2470b
fix multinomial sampling
cadedaniel Apr 9, 2024
5e9dafb
more tests, unfix examples test
cadedaniel Apr 10, 2024
9f42d5a
lint
cadedaniel Apr 10, 2024
399e7dd
clean
cadedaniel Apr 10, 2024
44c8195
failing test
cadedaniel Apr 10, 2024
a931ed0
many bs test
cadedaniel Apr 10, 2024
e36b352
remove logs
cadedaniel Apr 10, 2024
de16919
pr feedback
cadedaniel Apr 10, 2024
196a395
Merge branch 'llm-engine-spec' into spec-decode-sampler
cadedaniel Apr 10, 2024
d933e50
Merge branch 'main' into llm-engine-spec
cadedaniel Apr 11, 2024
d30c4a5
test larger bs, remove many distinct test due to numerical instability
cadedaniel Apr 11, 2024
d8f8d1f
lint
cadedaniel Apr 11, 2024
2bbc12c
wip validate acceptance rate
cadedaniel Apr 11, 2024
2d6112b
WIP chunked prefill work
cadedaniel Apr 16, 2024
b7887bc
block manager v2 + chunked prefill test
cadedaniel Apr 16, 2024
84de321
check stages instead of num uncomputed
cadedaniel Apr 16, 2024
a9ad5ed
spec decode test passes?
cadedaniel Apr 16, 2024
430eb7a
Merge branch 'llm-engine-spec' into spec-decode-sampler
cadedaniel Apr 16, 2024
2a19f5e
allow append empty tokens in block table
cadedaniel Apr 16, 2024
79325d3
Merge remote-tracking branch 'upstream/main' into llm-engine-spec
cadedaniel Apr 16, 2024
b6e9e82
rebase on stop string fixes
cadedaniel Apr 16, 2024
bf0c37c
test spec
cadedaniel Apr 16, 2024
ed084d7
Merge branch 'llm-engine-spec' into spec-decode-sampler
cadedaniel Apr 16, 2024
a158256
lint & mypy
cadedaniel Apr 16, 2024
0b57080
Merge branch 'llm-engine-spec' into spec-decode-sampler
cadedaniel Apr 16, 2024
5a69f6c
doc
cadedaniel Apr 16, 2024
def072d
Merge branch 'llm-engine-spec' into spec-decode-sampler
cadedaniel Apr 16, 2024
16c3ec4
remove
cadedaniel Apr 16, 2024
cd78aa3
Merge branch 'main-rebase' into spec-decode-sampler
cadedaniel Apr 16, 2024
b53f984
Merge remote-tracking branch 'upstream/main' into spec-decode-sampler
cadedaniel Apr 16, 2024
ce07d3d
lint
cadedaniel Apr 16, 2024
f7938d2
trimming tests
cadedaniel Apr 16, 2024
aa4b562
move cpu/amd tests to after wait
cadedaniel Apr 16, 2024
e831854
spec decode + preemption test
cadedaniel Apr 16, 2024
f23ea7f
try different block sizes
cadedaniel Apr 16, 2024
d66ce83
fix
cadedaniel Apr 16, 2024
009c27b
Merge remote-tracking branch 'upstream/main' into amd-test-after-wait
cadedaniel Apr 17, 2024
ec0adf8
skip speculation test does not crash
cadedaniel Apr 16, 2024
7ee67f9
speculative_max_model_len
cadedaniel Apr 17, 2024
8c1e2a7
lint
cadedaniel Apr 17, 2024
519f5aa
fix
cadedaniel Apr 18, 2024
3d1938c
Merge branch 'amd-test-after-wait' into spec-decode-sampler
cadedaniel Apr 18, 2024
0439b79
test_many_k
cadedaniel Apr 18, 2024
58ba3b6
fixes
cadedaniel Apr 18, 2024
be38a33
fix llmengine issue
cadedaniel Apr 18, 2024
36c741a
wip metrics
cadedaniel Apr 18, 2024
7297bae
lint
cadedaniel Apr 18, 2024
140f198
fix system efficiency metric
cadedaniel Apr 18, 2024
f9b8a68
debugging prints
cadedaniel Apr 19, 2024
c5af09e
Revert "debugging prints"
cadedaniel Apr 19, 2024
da95e22
disable bonus token
cadedaniel Apr 19, 2024
9f8ff56
cleanup
cadedaniel Apr 22, 2024
a937a49
lint
cadedaniel Apr 22, 2024
87e4128
metrics log
cadedaniel Apr 22, 2024
75b271e
fix
cadedaniel Apr 22, 2024
f274ed7
sampler cleanup
cadedaniel Apr 22, 2024
e9c6271
clean
cadedaniel Apr 22, 2024
fa2ff3e
some docs on testing methodology
cadedaniel Apr 22, 2024
1676607
fix
cadedaniel Apr 22, 2024
63059fe
Merge remote-tracking branch 'upstream/main' into spec-decode-sampler
cadedaniel Apr 22, 2024
5a51b82
mypy fix
cadedaniel Apr 22, 2024
c3d619e
rejection sampler test fix
cadedaniel Apr 22, 2024
7bfe6dd
pr feedback
cadedaniel Apr 22, 2024
c38aa97
break compatibility tests into own file
cadedaniel Apr 22, 2024
f300f08
remove unnecessary flags
cadedaniel Apr 22, 2024
5434d90
lint
cadedaniel Apr 22, 2024
824d44c
Merge remote-tracking branch 'upstream/main' into spec-decode-sampler
cadedaniel Apr 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions tests/samplers/test_rejection_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,16 @@ def test_correct_output_format(which_tokens_accepted: str, seed: int,
bonus_token_ids,
)

# Bonus tokens are currently disabled. Verify they're set to -1.
# See https://github.com/vllm-project/vllm/issues/4212
expected_bonus_token_ids = bonus_token_ids.clone() * 0 - 1

if which_tokens_accepted == "all_tokens_accepted":
# Expect all tokens to be equal to draft tokens.
assert torch.equal(output_token_ids[:, :-1], draft_token_ids)

# Expect all bonus tokens to be included.
assert torch.equal(output_token_ids[:, -1:], bonus_token_ids)
assert torch.equal(output_token_ids[:, -1:], expected_bonus_token_ids)
elif which_tokens_accepted == "no_tokens_accepted":
# Expect first token to be equal to recovered tokens.
assert torch.equal(output_token_ids[:, 0], recovered_token_ids[:, 0])
Expand All @@ -106,7 +110,7 @@ def test_correct_output_format(which_tokens_accepted: str, seed: int,
torch.ones_like(output_token_ids[:, 1:]) * -1)
elif which_tokens_accepted == "some_tokens_accepted":
recovered_plus_bonus = torch.cat(
(recovered_token_ids, bonus_token_ids), dim=-1)
(recovered_token_ids, expected_bonus_token_ids), dim=-1)
# Assert first rejected token is a recovered token or bonus token.
assert torch.equal(
recovered_plus_bonus[torch.arange(0, batch_size),
Expand Down
3 changes: 2 additions & 1 deletion tests/samplers/test_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,8 @@ def test_sampler_top_k_top_p(seed: int, device: str):
def mock_sample(probs, *args, **kwargs):
nonlocal sample_probs
sample_probs = probs
return [[prob.topk(1, dim=-1).indices.tolist(), [0]] for prob in probs]
return ([[prob.topk(1, dim=-1).indices.tolist(), [0]]
for prob in probs], None)

with patch("vllm.model_executor.layers.sampler._sample", mock_sample):
sampler(logits=fake_logits, sampling_metadata=sampling_metadata)
Expand Down
31 changes: 20 additions & 11 deletions tests/spec_decode/e2e/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,34 @@


@pytest.fixture
def baseline_llm_generator(common_llm_kwargs, per_test_common_llm_kwargs,
baseline_llm_kwargs, seed):
return create_llm_generator(common_llm_kwargs, per_test_common_llm_kwargs,
def baseline_llm_generator(request, common_llm_kwargs,
per_test_common_llm_kwargs, baseline_llm_kwargs,
seed):
return create_llm_generator("baseline", request, common_llm_kwargs,
per_test_common_llm_kwargs,
baseline_llm_kwargs, seed)


@pytest.fixture
def test_llm_generator(common_llm_kwargs, per_test_common_llm_kwargs,
def test_llm_generator(request, common_llm_kwargs, per_test_common_llm_kwargs,
test_llm_kwargs, seed):
return create_llm_generator(common_llm_kwargs, per_test_common_llm_kwargs,
test_llm_kwargs, seed)
return create_llm_generator("test", request, common_llm_kwargs,
per_test_common_llm_kwargs, test_llm_kwargs,
seed)


def create_llm_generator(common_llm_kwargs, per_test_common_llm_kwargs,
distinct_llm_kwargs, seed):
def create_llm_generator(baseline_or_test, request, common_llm_kwargs,
per_test_common_llm_kwargs, distinct_llm_kwargs,
seed):
kwargs = {
**common_llm_kwargs,
**per_test_common_llm_kwargs,
**distinct_llm_kwargs,
}
test_name = request.node.name

def generator_inner():
print(f'Creating {baseline_or_test=} LLM for {test_name=}. {kwargs=}')
llm = LLM(**kwargs)

set_random_seed(seed)
Expand All @@ -36,6 +42,9 @@ def generator_inner():
del llm
cleanup()

for llm in generator_inner():
yield llm
del llm
def generator_outer():
for llm in generator_inner():
yield llm
del llm

return generator_outer
Loading