|
| 1 | +#!/bin/bash |
| 2 | +# A rather convenient script for spinning up models behind screens |
| 3 | + |
| 4 | + |
| 5 | +# Variables |
| 6 | +PROJECT_DIR="$(pwd)" |
| 7 | +CONDA_ENV_NAME="fastchat" # |
| 8 | + |
| 9 | +MODEL_PATH="HuggingFaceH4/zephyr-7b-beta" #beta is better than the alpha version, base model w/o quantization |
| 10 | +MODEL_PATH="lmsys/vicuna-7b-v1.5" |
| 11 | + |
| 12 | +API_HOST="0.0.0.0" |
| 13 | +API_PORT_NUMBER=8000 |
| 14 | + |
| 15 | + |
| 16 | +# init the screens |
| 17 | +check_and_create_screen() { |
| 18 | + local SCREENNAME="$1" |
| 19 | + if screen -list | grep -q "$SCREENNAME"; then |
| 20 | + echo "Screen session '$SCREENNAME' exists. Doing nothing." |
| 21 | + else |
| 22 | + echo "Screen session '$SCREENNAME' not found. Creating..." |
| 23 | + screen -d -m -S "$SCREENNAME" |
| 24 | + echo "created!" |
| 25 | + fi |
| 26 | +} |
| 27 | + |
| 28 | +# convenience function for sending commands to named screens |
| 29 | +send_cmd() { |
| 30 | + local SCREENNAME="$1" |
| 31 | + local CMD="$2" |
| 32 | + screen -DRRS $SCREENNAME -X stuff '$2 \r' |
| 33 | +} |
| 34 | + |
| 35 | +# hardcoded names, for baby api |
| 36 | +SCREENNAMES=( |
| 37 | + "controller" |
| 38 | + "api" |
| 39 | + # Worker screens include the devices they are bound to, if 'd0' is only worker it has full GPU access |
| 40 | + "worker-d0" |
| 41 | + "worker-d1" |
| 42 | +) |
| 43 | + |
| 44 | +for screen in "${SCREENNAMES[@]}"; do |
| 45 | + check_and_create_screen "$screen" |
| 46 | + sleep 0.1 |
| 47 | + # also activate the conda compute environment for these |
| 48 | + screen -DRRS "$screen" -X stuff "conda deactivate \r" |
| 49 | + screen -DRRS "$screen" -X stuff "conda activate $CONDA_ENV_NAME \r" |
| 50 | + |
| 51 | +done |
| 52 | + |
| 53 | + |
| 54 | +# Send Commmands on a per Screen Basis |
| 55 | +screen -DRRS controller -X stuff "python3 -m fastchat.serve.controller \r" |
| 56 | + |
| 57 | +screen -DRRS worker-d0 -X stuff "CUDA_VISIBLE_DEVICES=0 python3 -m fastchat.serve.model_worker --model-path $MODEL_PATH --conv-template one_shot --limit-worker-concurrency 1 \r" |
| 58 | +screen -DRRS worker-d1 -X stuff "CUDA_VISIBLE_DEVICES=1 python3 -m fastchat.serve.model_worker --model-path $MODEL_PATH --port 21003 --worker-address http://localhost:21003 --conv-template one_shot --limit-worker-concurrency 1 \r" |
| 59 | + |
| 60 | +screen -DRRS api -X stuff "python3 -m fastchat.serve.openai_api_server --host $API_HOST --port $API_PORT_NUMBER \r" |
0 commit comments