1
+ #! /bin/bash
2
+
3
+ # Compares VLM inference between vLLM and HuggingFace checkpoints
4
+ # Usage: ./scripts/compare_vllm.sh <model_path> [--max-tokens N] [--num-prompts N] [--prob-threshold F] [--seed N]
5
+
6
+ set -e
7
+
8
+ # Default values
9
+ DEFAULT_MAX_TOKENS=20
10
+ DEFAULT_NUM_PROMPTS=100
11
+ DEFAULT_PROB_THRESHOLD=0.20
12
+ DEFAULT_SEED=42
13
+
14
+ # Parse arguments
15
+ if [ $# -lt 1 ]; then
16
+ echo " Usage: $0 <model_path> [--max-tokens N] [--num-prompts N] [--prob-threshold F] [--seed N]"
17
+ echo " Example: $0 Qwen/Qwen2.5-VL-7B-Instruct"
18
+ echo " Example: $0 s3://ai2-oe-data/jakep/olmocr/model --max-tokens 50 --num-prompts 200"
19
+ exit 1
20
+ fi
21
+
22
+ MODEL_PATH=" $1 "
23
+ MAX_TOKENS=" $DEFAULT_MAX_TOKENS "
24
+ NUM_PROMPTS=" $DEFAULT_NUM_PROMPTS "
25
+ PROB_THRESHOLD=" $DEFAULT_PROB_THRESHOLD "
26
+ SEED=" $DEFAULT_SEED "
27
+
28
+ # Parse optional arguments
29
+ shift 1
30
+ while [[ $# -gt 0 ]]; do
31
+ case $1 in
32
+ --max-tokens)
33
+ MAX_TOKENS=" $2 "
34
+ shift 2
35
+ ;;
36
+ --num-prompts)
37
+ NUM_PROMPTS=" $2 "
38
+ shift 2
39
+ ;;
40
+ --prob-threshold)
41
+ PROB_THRESHOLD=" $2 "
42
+ shift 2
43
+ ;;
44
+ --seed)
45
+ SEED=" $2 "
46
+ shift 2
47
+ ;;
48
+ * )
49
+ echo " Unknown option: $1 "
50
+ exit 1
51
+ ;;
52
+ esac
53
+ done
54
+
55
+ # Check for uncommitted changes
56
+ if ! git diff-index --quiet HEAD --; then
57
+ echo " Error: There are uncommitted changes in the repository."
58
+ echo " Please commit or stash your changes before running the comparison."
59
+ echo " "
60
+ echo " Uncommitted changes:"
61
+ git status --short
62
+ exit 1
63
+ fi
64
+
65
+ # Use conda environment Python if available, otherwise use system Python
66
+ if [ -n " $CONDA_PREFIX " ]; then
67
+ PYTHON=" $CONDA_PREFIX /bin/python"
68
+ echo " Using conda Python from: $CONDA_PREFIX "
69
+ else
70
+ PYTHON=" python"
71
+ echo " Warning: No conda environment detected, using system Python"
72
+ fi
73
+
74
+ # Get version from version.py
75
+ VERSION=$( $PYTHON -c ' import olmocr.version; print(olmocr.version.VERSION)' )
76
+ echo " OlmOCR version: $VERSION "
77
+
78
+ # Get first 10 characters of git hash
79
+ GIT_HASH=$( git rev-parse HEAD | cut -c1-10)
80
+ echo " Git hash: $GIT_HASH "
81
+
82
+ # Get current git branch name
83
+ GIT_BRANCH=$( git rev-parse --abbrev-ref HEAD)
84
+ echo " Git branch: $GIT_BRANCH "
85
+
86
+ # Create full image tag
87
+ IMAGE_TAG=" olmocr-compare-vllm-${VERSION} -${GIT_HASH} "
88
+ echo " Building Docker image with tag: $IMAGE_TAG "
89
+
90
+ # Build the Docker image
91
+ echo " Building Docker image..."
92
+ docker build --platform linux/amd64 -f ./Dockerfile -t $IMAGE_TAG .
93
+
94
+ # Get Beaker username
95
+ BEAKER_USER=$( beaker account whoami --format json | jq -r ' .[0].name' )
96
+ echo " Beaker user: $BEAKER_USER "
97
+
98
+ # Push image to beaker
99
+ echo " Trying to push image to Beaker..."
100
+ if ! beaker image create --workspace ai2/oe-data-pdf --name $IMAGE_TAG $IMAGE_TAG 2> /dev/null; then
101
+ echo " Warning: Beaker image with tag $IMAGE_TAG already exists. Using existing image."
102
+ fi
103
+
104
+ # Create Python script to run beaker experiment
105
+ cat << 'EOF ' > /tmp/run_compare_vllm_experiment.py
106
+ import sys
107
+ from beaker import Beaker, ExperimentSpec, TaskSpec, TaskContext, ResultSpec, TaskResources, ImageSource, Priority, Constraints, EnvVar, DataMount
108
+
109
+ # Get parameters from command line
110
+ image_tag = sys.argv[1]
111
+ beaker_user = sys.argv[2]
112
+ git_branch = sys.argv[3]
113
+ git_hash = sys.argv[4]
114
+ model_path = sys.argv[5]
115
+ max_tokens = sys.argv[6]
116
+ num_prompts = sys.argv[7]
117
+ prob_threshold = sys.argv[8]
118
+ seed = sys.argv[9]
119
+
120
+ # Initialize Beaker client
121
+ b = Beaker.from_env(default_workspace="ai2/olmocr")
122
+
123
+ # Check if AWS credentials secret exists
124
+ aws_creds_secret = f"{beaker_user}-AWS_CREDENTIALS_FILE"
125
+ try:
126
+ # Try to get the secret to see if it exists
127
+ b.secret.get(aws_creds_secret, workspace="ai2/olmocr")
128
+ has_aws_creds = True
129
+ print(f"Found AWS credentials secret: {aws_creds_secret}")
130
+ except:
131
+ has_aws_creds = False
132
+ print(f"AWS credentials secret not found: {aws_creds_secret}")
133
+
134
+ # Build commands for comparison job
135
+ commands = []
136
+ if has_aws_creds:
137
+ commands.extend([
138
+ "mkdir -p ~/.aws",
139
+ 'echo "$AWS_CREDENTIALS_FILE" > ~/.aws/credentials'
140
+ ])
141
+
142
+ commands.extend([
143
+ # Run comparison
144
+ f'python -m olmocr.train.compare_vllm_checkpoint --model {model_path} --max-tokens {max_tokens} --num-prompts {num_prompts} --prob-threshold {prob_threshold} --seed {seed}'
145
+ ])
146
+
147
+ # Build task spec with optional env vars
148
+ task_spec_args = {
149
+ "name": "olmocr-compare-vllm",
150
+ "image": ImageSource(beaker=f"{beaker_user}/{image_tag}"),
151
+ "command": [
152
+ "bash", "-c",
153
+ " && ".join(commands)
154
+ ],
155
+ "context": TaskContext(
156
+ priority=Priority.normal,
157
+ preemptible=True,
158
+ ),
159
+ "resources": TaskResources(gpu_count=1),
160
+ "constraints": Constraints(cluster=["ai2/ceres-cirrascale", "ai2/jupiter-cirrascale-2"]),
161
+ "result": ResultSpec(path="/noop-results"),
162
+ "datasets": [
163
+ DataMount.new(mount_path="/weka/oe-data-default", weka="oe-data-default"),
164
+ DataMount.new(mount_path="/weka/oe-training-default", weka="oe-training-default"),
165
+ ]
166
+ }
167
+
168
+ # Add env vars if AWS credentials exist
169
+ if has_aws_creds:
170
+ task_spec_args["env_vars"] = [
171
+ EnvVar(name="AWS_CREDENTIALS_FILE", secret=aws_creds_secret)
172
+ ]
173
+
174
+ # Create experiment spec
175
+ experiment_spec = ExperimentSpec(
176
+ description=f"OlmOCR vLLM vs HF Comparison - Branch: {git_branch}, Commit: {git_hash}, Model: {model_path}",
177
+ budget="ai2/oe-data",
178
+ tasks=[TaskSpec(**task_spec_args)],
179
+ )
180
+
181
+ # Create the experiment
182
+ experiment = b.experiment.create(spec=experiment_spec, workspace="ai2/olmocr")
183
+ print(f"Created comparison experiment: {experiment.id}")
184
+ print(f"View at: https://beaker.org/ex/{experiment.id}")
185
+ EOF
186
+
187
+ # Run the Python script to create the experiment
188
+ echo " Creating Beaker experiment..."
189
+ echo " Comparing model: $MODEL_PATH "
190
+ echo " Max tokens: $MAX_TOKENS "
191
+ echo " Number of prompts: $NUM_PROMPTS "
192
+ echo " Probability threshold: $PROB_THRESHOLD "
193
+ echo " Random seed: $SEED "
194
+ $PYTHON /tmp/run_compare_vllm_experiment.py $IMAGE_TAG $BEAKER_USER $GIT_BRANCH $GIT_HASH " $MODEL_PATH " " $MAX_TOKENS " " $NUM_PROMPTS " " $PROB_THRESHOLD " " $SEED "
195
+
196
+ # Clean up temporary file
197
+ rm /tmp/run_compare_vllm_experiment.py
198
+
199
+ echo " Comparison experiment submitted successfully!"
0 commit comments