File tree Expand file tree Collapse file tree 1 file changed +12
-0
lines changed Expand file tree Collapse file tree 1 file changed +12
-0
lines changed Original file line number Diff line number Diff line change @@ -47,6 +47,18 @@ translateTGIToLMI "SM_NUM_GPUS" "TENSOR_PARALLEL_DEGREE"
4747translateTGIToLMI " MAX_CONCURRENT_REQUESTS" " SERVING_JOB_QUEUE_SIZE"
4848translateTGIToLMI " MAX_BATCH_PREFILL_TOKENS" " OPTION_MAX_ROLLING_BATCH_PREFILL_TOKENS"
4949translateTGIToLMI " MAX_BATCH_SIZE" " OPTION_MAX_ROLLING_BATCH_SIZE"
50+ if [[ -n " $ENABLE_CUDA_GRAPHS " && -z " $OPTION_ENFORCE_EAGER " ]]; then
51+ if [[ " $ENABLE_CUDA_GRAPHS " = true ]]; then
52+ export " OPTION_ENFORCE_EAGER" =false
53+ else
54+ export " OPTION_ENFORCE_EAGER" =true
55+ fi
56+ fi
57+ if [[ " $SERVING_FEATURES " = " trtllm" ]]; then
58+ translateTGIToLMI " CUDA_MEMORY_FRACTION" " OPTION_KV_CACHE_FREE_GPU_MEM_FRACTION"
59+ else
60+ translateTGIToLMI " CUDA_MEMORY_FRACTION" " OPTION_GPU_MEMORY_UTILIZATION"
61+ fi
5062
5163if [[ " $1 " = " serve" ]]; then
5264 shift 1
You can’t perform that action at this time.
0 commit comments