Skip to content

Commit 46cb6ab

Browse files
committed
remove joins test
Signed-off-by: Sophie du Couédic <[email protected]>
1 parent ac373c0 commit 46cb6ab

File tree

1 file changed

+0
-279
lines changed

1 file changed

+0
-279
lines changed

tests/e2e/test_spyre_cb_scheduler_steps.py

Lines changed: 0 additions & 279 deletions
Original file line numberDiff line numberDiff line change
@@ -530,285 +530,6 @@ def test_two_sequences_finish_same_time_as_new_arrive(
530530
hf_results=hf_outputs)
531531

532532

533-
@pytest.mark.cb
534-
@pytest.mark.parametrize("model", get_spyre_model_list())
535-
@pytest.mark.parametrize("backend", get_spyre_backend_list())
536-
def test_new_sequence_joins_during_decode(model: str, backend: str,
537-
monkeypatch: pytest.MonkeyPatch):
538-
""" Scenario where a new sequence joins while decoding other sequences
539-
540-
Configuration:
541-
* max_num_seqs: 4
542-
* number of prompts: 4
543-
* 1: len = 49, max tokens = 119, step joining = 0
544-
* 2: len = 14, max tokens = 52, step joining = 0
545-
* 3: len = 89, max tokens = 104, step joining = 32
546-
* 4: len = 9, max tokens = 64, step joining = 131
547-
"""
548-
# TODO change to 65 max_tokens for last prompt if ever possible
549-
550-
seqs_max_tokens = [119, 52, 104, 64]
551-
prompts_lengths = [49, 14, 89, 9]
552-
steps_add_reqs = [0, 0, 32, 131]
553-
available_blocks = -1 # no restriction
554-
max_num_seqs = 4
555-
max_model_len = 256
556-
# check_output = backend == "sendnn"
557-
check_output = True
558-
559-
checked_steps = [
560-
{
561-
"step": 0,
562-
"tkv": 0,
563-
"waiting": ["0", "1"],
564-
"running": [],
565-
"request_outputs": [],
566-
"n_reserved_blocks": 0,
567-
"n_used_blocks": 0
568-
},
569-
{
570-
# Prefill sequence 0
571-
"step": 1,
572-
"tkv": 64,
573-
"waiting": ["1"],
574-
"running": ["0"],
575-
"request_outputs": ["0"],
576-
"n_reserved_blocks": 3, # prefill (1 block) + 119 decode (2 block)
577-
"n_used_blocks": 1
578-
},
579-
{
580-
# Prefill sequence 1
581-
"step": 2,
582-
"tkv": 64,
583-
"waiting": [],
584-
"running": ["1", "0"],
585-
"request_outputs": ["1"],
586-
"n_reserved_blocks": 5, # prefill (1 block) + 51 decodes (1 block)
587-
"n_used_blocks": 2
588-
},
589-
{
590-
# Decode sequences 0 and 1
591-
"step": 3,
592-
"tkv": 65,
593-
"waiting": [],
594-
"running": ["1", "0"],
595-
"request_outputs": ["1", "0"],
596-
"n_reserved_blocks": 5,
597-
"n_used_blocks": 4 # 2 blocks extended, one for each sequence
598-
},
599-
{
600-
# Sequence 2 joins: one iteration in waiting queue
601-
"step": 32,
602-
"tkv": 94,
603-
"waiting": ["2"],
604-
"running": ["1", "0"],
605-
"request_outputs": ["1", "0"],
606-
"n_reserved_blocks": 5,
607-
"n_used_blocks": 4
608-
},
609-
{
610-
# Prefill sequence 2
611-
"step": 33,
612-
"tkv": 94,
613-
"waiting": [],
614-
"running": ["2", "1", "0"],
615-
"request_outputs": ["2"],
616-
"n_reserved_blocks": 9, # prefill (2 block) + 103 decode (2 block)
617-
"n_used_blocks": 6
618-
},
619-
{
620-
# Decode sequences 0, 1, and 2
621-
"step": 34,
622-
"tkv": 95,
623-
"waiting": [],
624-
"running": ["2", "1", "0"],
625-
"request_outputs": ["2", "1", "0"],
626-
"n_reserved_blocks": 9,
627-
"n_used_blocks": 6
628-
},
629-
{
630-
# Sequence 1 finishes at step 54
631-
# (start step + 2 prefills + 51 decodes - 1) = 2 + 2 + 51 - 1 = 54
632-
"step": 54,
633-
"tkv": 115,
634-
"waiting": [],
635-
"running": ["2", "0"],
636-
"request_outputs": ["2", "1", "0"],
637-
"finished_requests": ["1"],
638-
"n_reserved_blocks": 9,
639-
"n_used_blocks": 6
640-
},
641-
{
642-
# Decode sequences 0 and 2
643-
"step": 55,
644-
"tkv": 116,
645-
"waiting": [],
646-
"running": ["2", "0"],
647-
"request_outputs": ["2", "0"],
648-
"n_reserved_blocks": 7, # two blocks released
649-
"n_used_blocks": 4 # two blocks released
650-
},
651-
{
652-
# Decode sequences 0 and 2, tkv arrives to new block
653-
"step": 68,
654-
"tkv": 129,
655-
"waiting": [],
656-
"running": ["2", "0"],
657-
"request_outputs": ["2", "0"],
658-
"n_reserved_blocks": 7,
659-
"n_used_blocks": 6 # 2 blocks extended, one for each sequence
660-
},
661-
{
662-
# Sequence 0 finishes at step 121
663-
# (start step + 3 prefills + 118 decode - 1) = 1 + 3 + 118 - 1 = 121
664-
"step": 121,
665-
"tkv": 182,
666-
"waiting": [],
667-
"running": ["2"],
668-
"request_outputs": ["2", "0"],
669-
"finished_requests": ["0"],
670-
"n_reserved_blocks": 7,
671-
"n_used_blocks": 6
672-
},
673-
{
674-
# Decode sequence 2
675-
"step": 122,
676-
"tkv": 183,
677-
"waiting": [],
678-
"running": ["2"],
679-
"request_outputs": ["2"],
680-
"n_reserved_blocks": 4, # 3 blocks released
681-
"n_used_blocks": 3 # 3 blocks released
682-
},
683-
{
684-
# Sequence 3 joins: one iteration in waiting queue
685-
"step": 131,
686-
"tkv": 192,
687-
"waiting": ["3"],
688-
"running": ["2"],
689-
"request_outputs": ["2"],
690-
"n_reserved_blocks": 4,
691-
"n_used_blocks": 3
692-
},
693-
{
694-
# Prefill sequence 3
695-
"step": 132,
696-
"tkv": 192,
697-
"waiting": [],
698-
"running": ["3", "2"],
699-
"request_outputs": ["3"],
700-
"n_reserved_blocks": 8, # prefill (3 blocks) + 63 decode (1 block)
701-
"n_used_blocks": 6 # prefill (3 block)
702-
},
703-
{
704-
# Decode sequences 2 and 3
705-
"step": 133,
706-
"tkv": 193,
707-
"waiting": [],
708-
"running": ["3", "2"],
709-
"request_outputs": ["3", "2"],
710-
"n_reserved_blocks": 8,
711-
"n_used_blocks": 8 # 2 blocks extended, one for each sequence
712-
},
713-
{
714-
# Sequence 2 finishes at step 137
715-
# (start step + 2 prefills + 103 decodes) = 33 + 2 + 103 - 1 = 137
716-
"step": 137,
717-
"tkv": 197,
718-
"waiting": [],
719-
"running": ["3"],
720-
"request_outputs": ["3", "2"],
721-
"finished_requests": ["2"],
722-
"n_reserved_blocks": 8,
723-
"n_used_blocks": 8
724-
},
725-
{
726-
# Decode sequence 3
727-
"step": 138,
728-
"tkv": 70,
729-
"waiting": [],
730-
"running": ["3"],
731-
"request_outputs": ["3"],
732-
# 6 blocks freed: finished sequence (4) + left padding stripping (2)
733-
"n_reserved_blocks": 2,
734-
"n_used_blocks": 2
735-
},
736-
{
737-
# Sequence 3 finishes at step 196
738-
# (start step + 1 prefills + 103 decodes) = 132 + 1 + 63 - 1 = 196
739-
"step": 195,
740-
"tkv": 127,
741-
"waiting": [],
742-
"running": [],
743-
"request_outputs": ["3"],
744-
"finished_requests": ["3"],
745-
"n_reserved_blocks": 2,
746-
"n_used_blocks": 2
747-
},
748-
{
749-
# Tkv should be cleared one step later
750-
"step": 196,
751-
"tkv": 0,
752-
"waiting": [],
753-
"running": [],
754-
"request_outputs": [],
755-
"n_reserved_blocks": 0,
756-
"n_used_blocks": 0
757-
},
758-
# TODO this is when max_tokens = 65 for last prompt
759-
# {
760-
# # Sequence 3 finishes at step 196
761-
# # (start step + 1 prefills + 103 decodes) = 132 + 1 + 64 - 1 = 196
762-
# "step": 196,
763-
# "tkv": 128,
764-
# "waiting": [],
765-
# "running": [],
766-
# "request_outputs": ["3"],
767-
# "finished_requests": ["3"],
768-
# "n_reserved_blocks": 2,
769-
# "n_used_blocks": 2
770-
# },
771-
# {
772-
# # Tkv should be cleared one step later
773-
# "step": 197,
774-
# "tkv": 0,
775-
# "waiting": [],
776-
# "running": [],
777-
# "request_outputs": [],
778-
# "n_reserved_blocks": 0,
779-
# "n_used_blocks": 0
780-
# },
781-
]
782-
783-
cb_outputs, prompts = check_scheduler_inference_steps(
784-
model=model,
785-
backend=backend,
786-
monkeypatch=monkeypatch,
787-
seqs_max_tokens=seqs_max_tokens,
788-
prompts_lengths=prompts_lengths,
789-
steps_add_reqs=steps_add_reqs,
790-
checked_steps=checked_steps,
791-
max_num_seqs=max_num_seqs,
792-
max_model_len=max_model_len,
793-
available_blocks=available_blocks,
794-
use_cb=True,
795-
collect_outputs=check_output,
796-
)
797-
798-
if check_output:
799-
hf_outputs = generate_hf_output(
800-
model=model,
801-
prompts=prompts,
802-
max_new_tokens=seqs_max_tokens,
803-
ignore_eos=True,
804-
)
805-
compare_results(model=model,
806-
tensor_parallel_size=1,
807-
backend=backend,
808-
vllm_results=cb_outputs,
809-
hf_results=hf_outputs)
810-
811-
812533
@pytest.mark.cb
813534
@pytest.mark.parametrize("model", get_spyre_model_list())
814535
@pytest.mark.parametrize("backend", get_spyre_backend_list())

0 commit comments

Comments
 (0)