File tree Expand file tree Collapse file tree 1 file changed +3
-2
lines changed Expand file tree Collapse file tree 1 file changed +3
-2
lines changed Original file line number Diff line number Diff line change 20
20
from vllm .attention .backends .utils import (PAD_SLOT_ID , compute_slot_mapping ,
21
21
compute_slot_mapping_start_idx ,
22
22
is_block_tables_empty )
23
+ from vllm .attention .ops .paged_attn import PagedAttention
23
24
from vllm .sequence import SequenceGroupMetadata
24
25
from vllm .utils import get_kv_cache_torch_dtype , make_tensor_with_pad
25
26
@@ -61,14 +62,14 @@ def swap_blocks(
61
62
dst_kv_cache : torch .Tensor ,
62
63
src_to_dst : torch .Tensor ,
63
64
) -> None :
64
- raise NotImplementedError
65
+ PagedAttention . swap_blocks ( src_kv_cache , dst_kv_cache , src_to_dst )
65
66
66
67
@staticmethod
67
68
def copy_blocks (
68
69
kv_caches : List [torch .Tensor ],
69
70
src_to_dists : torch .Tensor ,
70
71
) -> None :
71
- raise NotImplementedError
72
+ PagedAttention . copy_blocks ( kv_caches , src_to_dists )
72
73
73
74
@staticmethod
74
75
def get_supported_head_sizes () -> List [int ]:
You can’t perform that action at this time.
0 commit comments