-
Notifications
You must be signed in to change notification settings - Fork 3.1k
Implement LRU eviction policy for LoRA adapters #11041
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Fridge003
merged 24 commits into
sgl-project:main
from
ConnorLi96:feature/sglang_lora_lru
Oct 14, 2025
Merged
Changes from 16 commits
Commits
Show all changes
24 commits
Select commit
Hold shift + click to select a range
43dfe76
Implement LRU eviction policy for LoRA adapters
ConnorLi96 06eaf34
feat: Add LRU eviction policy with comprehensive unit tests
ConnorLi96 8e7afe1
style: Fix code formatting and spelling issues
ConnorLi96 e6ae718
change default fifo to lru
ConnorLi96 77d8051
address comments and optmize code
ConnorLi96 65628b5
correct the engine command help=
ConnorLi96 3d6fdd2
support integration test
ConnorLi96 f1c446e
evict None base model
ConnorLi96 bfe931f
add DEFAULT_LORA_EVICTION_POLICY
ConnorLi96 76290d0
address some comments
ConnorLi96 a0b3a74
add more unit tests
ConnorLi96 15e1d8a
Merge branch 'main' into feature/sglang_lora_lru
ConnorLi96 03584ba
Update server_args.py
ConnorLi96 ca58c7f
fix format
ConnorLi96 11a6a1b
Merge branch 'main' into feature/sglang_lora_lru
Fridge003 721421e
Merge branch 'main' into feature/sglang_lora_lru
ConnorLi96 f221f60
fix wrong import
ConnorLi96 0921776
Merge branch 'main' into feature/sglang_lora_lru
ConnorLi96 56efbfb
Merge branch 'main' into feature/sglang_lora_lru
Fridge003 94b8bd2
Merge branch 'main' into feature/sglang_lora_lru
ConnorLi96 9094954
delete integration test in test_lora_eviction_policy.py
ConnorLi96 9349cb7
update arguments
ConnorLi96 8ccef17
update server_arguments.md
ConnorLi96 ed5de18
update lora.ipynb
ConnorLi96 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
# Copyright 2023-2024 SGLang Team | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# ============================================================================== | ||
|
||
""" | ||
Eviction policies for LoRA adapter memory management. | ||
""" | ||
|
||
import logging | ||
import time | ||
from abc import ABC, abstractmethod | ||
from collections import OrderedDict | ||
from typing import Any, Dict, List, Optional, Set | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class EvictionPolicy(ABC): | ||
"""Abstract base class for LoRA adapter eviction policies.""" | ||
|
||
@abstractmethod | ||
def mark_used(self, uid: Optional[str]) -> None: | ||
"""Marks an adapter as used.""" | ||
pass | ||
|
||
@abstractmethod | ||
def select_victim(self, candidates: Set[Optional[str]]) -> Optional[str]: | ||
"""Selects an adapter to evict from candidates.""" | ||
pass | ||
|
||
@abstractmethod | ||
def remove(self, uid: Optional[str]) -> None: | ||
"""Removes an adapter from the policy's tracking.""" | ||
pass | ||
|
||
|
||
class LRUEvictionPolicy(EvictionPolicy): | ||
"""LRU eviction policy - evicts the least recently used adapter.""" | ||
|
||
def __init__(self): | ||
self.access_order = OrderedDict() # key=uid, value=last_access_time | ||
self.total_accesses = 0 | ||
self.eviction_count = 0 | ||
|
||
def mark_used(self, uid: Optional[str]) -> None: | ||
if uid is not None: | ||
current_time = time.monotonic() | ||
# Remove and re-add to move to end (most recent) | ||
self.access_order.pop(uid, None) | ||
self.access_order[uid] = current_time | ||
self.total_accesses += 1 | ||
logger.debug(f"LoRA {uid} marked as used at {current_time}") | ||
|
||
def select_victim(self, candidates: Set[Optional[str]]) -> Optional[str]: | ||
"""Select the least recently used adapter from candidates.""" | ||
# Base model (currently None, will be replaced with special UID in future) | ||
# always has lowest priority - evict it first if available | ||
BASE_MODEL_UID = None # TODO: Replace with special UID constant | ||
if BASE_MODEL_UID in candidates: | ||
logger.debug(f"Selected base model for eviction (LRU)") | ||
self.eviction_count += 1 | ||
return BASE_MODEL_UID | ||
|
||
# Iterate through access_order (oldest first) to find LRU victim | ||
for uid in list(self.access_order.keys()): | ||
if uid in candidates: | ||
logger.debug(f"Selected LoRA {uid} for eviction (LRU)") | ||
self.eviction_count += 1 | ||
return uid | ||
|
||
# Should never reach here if candidates is non-empty | ||
assert False, f"Failed to select LRU victim from candidates: {candidates}" | ||
|
||
def remove(self, uid: Optional[str]) -> None: | ||
if uid is not None: | ||
self.access_order.pop(uid, None) | ||
logger.debug(f"Removed LoRA {uid} from LRU tracking") | ||
|
||
|
||
class FIFOEvictionPolicy(EvictionPolicy): | ||
"""FIFO eviction policy - for backward compatibility.""" | ||
|
||
def __init__(self): | ||
self.insertion_order = ( | ||
OrderedDict() | ||
) # key=uid, OrderedDict maintains insertion order | ||
self.eviction_count = 0 | ||
|
||
def mark_used(self, uid: Optional[str]) -> None: | ||
"""For FIFO, we only track insertion order (not access time).""" | ||
if uid is not None and uid not in self.insertion_order: | ||
ConnorLi96 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self.insertion_order[uid] = ( | ||
True # Value unused, OrderedDict tracks insertion order | ||
) | ||
|
||
def select_victim(self, candidates: Set[Optional[str]]) -> Optional[str]: | ||
"""Select the first inserted adapter from candidates.""" | ||
# Base model (currently None, will be replaced with special UID in future) | ||
# always has lowest priority - evict it first if available | ||
BASE_MODEL_UID = None # TODO: Replace with special UID constant | ||
if BASE_MODEL_UID in candidates: | ||
logger.debug(f"Selected base model for eviction (FIFO)") | ||
self.eviction_count += 1 | ||
return BASE_MODEL_UID | ||
|
||
# Iterate through insertion_order (oldest first) to find FIFO victim | ||
for uid in list(self.insertion_order.keys()): | ||
if uid in candidates: | ||
logger.debug(f"Selected LoRA {uid} for eviction (FIFO)") | ||
self.eviction_count += 1 | ||
return uid | ||
|
||
# Should never reach here if candidates is non-empty | ||
ConnorLi96 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
assert False, f"Failed to select FIFO victim from candidates: {candidates}" | ||
|
||
def remove(self, uid: Optional[str]) -> None: | ||
if uid is not None: | ||
self.insertion_order.pop(uid, None) | ||
|
||
|
||
def get_eviction_policy(policy_name: str) -> EvictionPolicy: | ||
"""Factory function to create eviction policy instances.""" | ||
policies = { | ||
"fifo": FIFOEvictionPolicy, | ||
"lru": LRUEvictionPolicy, | ||
} | ||
if policy_name not in policies: | ||
raise ValueError(f"Unknown eviction policy: {policy_name}") | ||
return policies[policy_name]() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.