Skip to content

Commit c6ff51b

Browse files
Convert cuda env tgi variables to lmi (#2013)
1 parent 3c95420 commit c6ff51b

File tree

1 file changed

+12
-0
lines changed

1 file changed

+12
-0
lines changed

serving/docker/dockerd-entrypoint-with-cuda-compat.sh

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,18 @@ translateTGIToLMI "SM_NUM_GPUS" "TENSOR_PARALLEL_DEGREE"
4747
translateTGIToLMI "MAX_CONCURRENT_REQUESTS" "SERVING_JOB_QUEUE_SIZE"
4848
translateTGIToLMI "MAX_BATCH_PREFILL_TOKENS" "OPTION_MAX_ROLLING_BATCH_PREFILL_TOKENS"
4949
translateTGIToLMI "MAX_BATCH_SIZE" "OPTION_MAX_ROLLING_BATCH_SIZE"
50+
if [[ -n "$ENABLE_CUDA_GRAPHS" && -z "$OPTION_ENFORCE_EAGER" ]]; then
51+
if [[ "$ENABLE_CUDA_GRAPHS" = true ]]; then
52+
export "OPTION_ENFORCE_EAGER"=false
53+
else
54+
export "OPTION_ENFORCE_EAGER"=true
55+
fi
56+
fi
57+
if [[ "$SERVING_FEATURES" = "trtllm" ]]; then
58+
translateTGIToLMI "CUDA_MEMORY_FRACTION" "OPTION_KV_CACHE_FREE_GPU_MEM_FRACTION"
59+
else
60+
translateTGIToLMI "CUDA_MEMORY_FRACTION" "OPTION_GPU_MEMORY_UTILIZATION"
61+
fi
5062

5163
if [[ "$1" = "serve" ]]; then
5264
shift 1

0 commit comments

Comments
 (0)