Skip to content

Commit a2723ef

Browse files
ckgreslazhanghao.smooth
authored andcommitted
a convenient script for spinning up the API with Model Workers (lm-sys#2790)
1 parent 42303d1 commit a2723ef

File tree

1 file changed

+60
-0
lines changed

1 file changed

+60
-0
lines changed

build-api.sh

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#!/bin/bash
2+
# A rather convenient script for spinning up models behind screens
3+
4+
5+
# Variables
6+
PROJECT_DIR="$(pwd)"
7+
CONDA_ENV_NAME="fastchat" #
8+
9+
MODEL_PATH="HuggingFaceH4/zephyr-7b-beta" #beta is better than the alpha version, base model w/o quantization
10+
MODEL_PATH="lmsys/vicuna-7b-v1.5"
11+
12+
API_HOST="0.0.0.0"
13+
API_PORT_NUMBER=8000
14+
15+
16+
# init the screens
17+
check_and_create_screen() {
18+
local SCREENNAME="$1"
19+
if screen -list | grep -q "$SCREENNAME"; then
20+
echo "Screen session '$SCREENNAME' exists. Doing nothing."
21+
else
22+
echo "Screen session '$SCREENNAME' not found. Creating..."
23+
screen -d -m -S "$SCREENNAME"
24+
echo "created!"
25+
fi
26+
}
27+
28+
# convenience function for sending commands to named screens
29+
send_cmd() {
30+
local SCREENNAME="$1"
31+
local CMD="$2"
32+
screen -DRRS $SCREENNAME -X stuff '$2 \r'
33+
}
34+
35+
# hardcoded names, for baby api
36+
SCREENNAMES=(
37+
"controller"
38+
"api"
39+
# Worker screens include the devices they are bound to, if 'd0' is only worker it has full GPU access
40+
"worker-d0"
41+
"worker-d1"
42+
)
43+
44+
for screen in "${SCREENNAMES[@]}"; do
45+
check_and_create_screen "$screen"
46+
sleep 0.1
47+
# also activate the conda compute environment for these
48+
screen -DRRS "$screen" -X stuff "conda deactivate \r"
49+
screen -DRRS "$screen" -X stuff "conda activate $CONDA_ENV_NAME \r"
50+
51+
done
52+
53+
54+
# Send Commmands on a per Screen Basis
55+
screen -DRRS controller -X stuff "python3 -m fastchat.serve.controller \r"
56+
57+
screen -DRRS worker-d0 -X stuff "CUDA_VISIBLE_DEVICES=0 python3 -m fastchat.serve.model_worker --model-path $MODEL_PATH --conv-template one_shot --limit-worker-concurrency 1 \r"
58+
screen -DRRS worker-d1 -X stuff "CUDA_VISIBLE_DEVICES=1 python3 -m fastchat.serve.model_worker --model-path $MODEL_PATH --port 21003 --worker-address http://localhost:21003 --conv-template one_shot --limit-worker-concurrency 1 \r"
59+
60+
screen -DRRS api -X stuff "python3 -m fastchat.serve.openai_api_server --host $API_HOST --port $API_PORT_NUMBER \r"

0 commit comments

Comments
 (0)