1
+ #! /bin/bash
2
+
3
+ # Runs an olmocr-bench run using the full pipeline (no fallback)
4
+ # Without model parameter (default behavior):, uses the default image from hugging face
5
+ # ./scripts/run_benchmark.sh
6
+ # With model parameter: for testing custom models
7
+ # ./scripts/run_benchmark.sh --model your-model-name
8
+
9
+ set -e
10
+
11
+ # Parse command line arguments
12
+ MODEL=" "
13
+ while [[ $# -gt 0 ]]; do
14
+ case $1 in
15
+ --model)
16
+ MODEL=" $2 "
17
+ shift 2
18
+ ;;
19
+ * )
20
+ echo " Unknown option: $1 "
21
+ echo " Usage: $0 [--model MODEL_NAME]"
22
+ exit 1
23
+ ;;
24
+ esac
25
+ done
26
+
27
+ # Check for uncommitted changes
28
+ if ! git diff-index --quiet HEAD --; then
29
+ echo " Error: There are uncommitted changes in the repository."
30
+ echo " Please commit or stash your changes before running the benchmark."
31
+ echo " "
32
+ echo " Uncommitted changes:"
33
+ git status --short
34
+ exit 1
35
+ fi
36
+
37
+ # Use conda environment Python if available, otherwise use system Python
38
+ if [ -n " $CONDA_PREFIX " ]; then
39
+ PYTHON=" $CONDA_PREFIX /bin/python"
40
+ echo " Using conda Python from: $CONDA_PREFIX "
41
+ else
42
+ PYTHON=" python"
43
+ echo " Warning: No conda environment detected, using system Python"
44
+ fi
45
+
46
+ # Get version from version.py
47
+ VERSION=$( $PYTHON -c ' import olmocr.version; print(olmocr.version.VERSION)' )
48
+ echo " OlmOCR version: $VERSION "
49
+
50
+ # Get first 10 characters of git hash
51
+ GIT_HASH=$( git rev-parse HEAD | cut -c1-10)
52
+ echo " Git hash: $GIT_HASH "
53
+
54
+ # Get current git branch name
55
+ GIT_BRANCH=$( git rev-parse --abbrev-ref HEAD)
56
+ echo " Git branch: $GIT_BRANCH "
57
+
58
+ # Create full image tag
59
+ IMAGE_TAG=" olmocr-benchmark-${VERSION} -${GIT_HASH} "
60
+ echo " Building Docker image with tag: $IMAGE_TAG "
61
+
62
+ # Build the Docker image
63
+ echo " Building Docker image..."
64
+ docker build --platform linux/amd64 -f ./Dockerfile -t $IMAGE_TAG .
65
+
66
+ # Get Beaker username
67
+ BEAKER_USER=$( beaker account whoami --format json | jq -r ' .[0].name' )
68
+ echo " Beaker user: $BEAKER_USER "
69
+
70
+ # Push image to beaker
71
+ echo " Trying to push image to Beaker..."
72
+ if ! beaker image create --workspace ai2/oe-data-pdf --name $IMAGE_TAG $IMAGE_TAG 2> /dev/null; then
73
+ echo " Warning: Beaker image with tag $IMAGE_TAG already exists. Using existing image."
74
+ fi
75
+
76
+ # Create Python script to run beaker experiment
77
+ cat << 'EOF ' > /tmp/run_benchmark_experiment.py
78
+ import sys
79
+ from beaker import Beaker, ExperimentSpec, TaskSpec, TaskContext, ResultSpec, TaskResources, ImageSource, Priority, Constraints, EnvVar
80
+
81
+ # Get image tag, beaker user, git branch, git hash, and optional model from command line
82
+ image_tag = sys.argv[1]
83
+ beaker_user = sys.argv[2]
84
+ git_branch = sys.argv[3]
85
+ git_hash = sys.argv[4]
86
+ model = sys.argv[5] if len(sys.argv) > 5 else None
87
+
88
+ # Initialize Beaker client
89
+ b = Beaker.from_env(default_workspace="ai2/olmocr")
90
+
91
+ # Build the pipeline command with optional model parameter
92
+ pipeline_cmd = "python -m olmocr.pipeline ./localworkspace --guided_decoding --markdown --pdfs ./olmOCR-bench/bench_data/pdfs/**/*.pdf"
93
+ if model:
94
+ pipeline_cmd += f" --model {model}"
95
+
96
+ # Check if AWS credentials secret exists
97
+ aws_creds_secret = f"{beaker_user}-AWS_CREDENTIALS_FILE"
98
+ try:
99
+ # Try to get the secret to see if it exists
100
+ b.secret.get(aws_creds_secret, workspace="ai2/olmocr")
101
+ has_aws_creds = True
102
+ print(f"Found AWS credentials secret: {aws_creds_secret}")
103
+ except:
104
+ has_aws_creds = False
105
+ print(f"AWS credentials secret not found: {aws_creds_secret}")
106
+
107
+ # First experiment: Original benchmark job
108
+ commands = []
109
+ if has_aws_creds:
110
+ commands.extend([
111
+ "mkdir -p ~/.aws",
112
+ 'echo "$AWS_CREDENTIALS_FILE" > ~/.aws/credentials'
113
+ ])
114
+ commands.extend([
115
+ "git clone https://huggingface.co/datasets/allenai/olmOCR-bench",
116
+ "cd olmOCR-bench && git lfs pull && cd ..",
117
+ pipeline_cmd,
118
+ "python olmocr/bench/scripts/workspace_to_bench.py localworkspace/ olmOCR-bench/bench_data/olmocr --bench-path ./olmOCR-bench/",
119
+ "python -m olmocr.bench.benchmark --dir ./olmOCR-bench/bench_data"
120
+ ])
121
+
122
+ # Build task spec with optional env vars
123
+ task_spec_args = {
124
+ "name": "olmocr-benchmark",
125
+ "image": ImageSource(beaker=f"{beaker_user}/{image_tag}"),
126
+ "command": [
127
+ "bash", "-c",
128
+ " && ".join(commands)
129
+ ],
130
+ "context": TaskContext(
131
+ priority=Priority.normal,
132
+ preemptible=True,
133
+ ),
134
+ "resources": TaskResources(gpu_count=1),
135
+ "constraints": Constraints(cluster=["ai2/ceres-cirrascale", "ai2/jupiter-cirrascale-2"]),
136
+ "result": ResultSpec(path="/noop-results"),
137
+ }
138
+
139
+ # Add env vars if AWS credentials exist
140
+ if has_aws_creds:
141
+ task_spec_args["env_vars"] = [
142
+ EnvVar(name="AWS_CREDENTIALS_FILE", secret=aws_creds_secret)
143
+ ]
144
+
145
+ # Create first experiment spec
146
+ experiment_spec = ExperimentSpec(
147
+ description=f"OlmOCR Benchmark Run - Branch: {git_branch}, Commit: {git_hash}",
148
+ budget="ai2/oe-data",
149
+ tasks=[TaskSpec(**task_spec_args)],
150
+ )
151
+
152
+ # Create the first experiment
153
+ experiment = b.experiment.create(spec=experiment_spec, workspace="ai2/olmocr")
154
+ print(f"Created benchmark experiment: {experiment.id}")
155
+ print(f"View at: https://beaker.org/ex/{experiment.id}")
156
+ print("-------")
157
+ print("")
158
+
159
+ # Second experiment: Performance test job
160
+ perf_pipeline_cmd = "python -m olmocr.pipeline ./localworkspace --guided_decoding --markdown --pdfs s3://ai2-oe-data/jakep/olmocr/olmOCR-mix-0225/benchmark_set/*.pdf"
161
+ if model:
162
+ perf_pipeline_cmd += f" --model {model}"
163
+
164
+ perf_commands = []
165
+ if has_aws_creds:
166
+ perf_commands.extend([
167
+ "mkdir -p ~/.aws",
168
+ 'echo "$AWS_CREDENTIALS_FILE" > ~/.aws/credentials'
169
+ ])
170
+ perf_commands.append(perf_pipeline_cmd)
171
+
172
+ # Build performance task spec
173
+ perf_task_spec_args = {
174
+ "name": "olmocr-performance",
175
+ "image": ImageSource(beaker=f"{beaker_user}/{image_tag}"),
176
+ "command": [
177
+ "bash", "-c",
178
+ " && ".join(perf_commands)
179
+ ],
180
+ "context": TaskContext(
181
+ priority=Priority.normal,
182
+ preemptible=True,
183
+ ),
184
+ "resources": TaskResources(gpu_count=1),
185
+ "constraints": Constraints(cluster=["ai2/ceres-cirrascale", "ai2/jupiter-cirrascale-2"]),
186
+ "result": ResultSpec(path="/noop-results"),
187
+ }
188
+
189
+ # Add env vars if AWS credentials exist
190
+ if has_aws_creds:
191
+ perf_task_spec_args["env_vars"] = [
192
+ EnvVar(name="AWS_CREDENTIALS_FILE", secret=aws_creds_secret)
193
+ ]
194
+
195
+ # Create performance experiment spec
196
+ perf_experiment_spec = ExperimentSpec(
197
+ description=f"OlmOCR Performance Test - Branch: {git_branch}, Commit: {git_hash}",
198
+ budget="ai2/oe-data",
199
+ tasks=[TaskSpec(**perf_task_spec_args)],
200
+ )
201
+
202
+ # Create the performance experiment
203
+ perf_experiment = b.experiment.create(spec=perf_experiment_spec, workspace="ai2/olmocr")
204
+ print(f"Created performance experiment: {perf_experiment.id}")
205
+ print(f"View at: https://beaker.org/ex/{perf_experiment.id}")
206
+ EOF
207
+
208
+ # Run the Python script to create the experiments
209
+ echo " Creating Beaker experiments..."
210
+ if [ -n " $MODEL " ]; then
211
+ echo " Using model: $MODEL "
212
+ $PYTHON /tmp/run_benchmark_experiment.py $IMAGE_TAG $BEAKER_USER $GIT_BRANCH $GIT_HASH " $MODEL "
213
+ else
214
+ $PYTHON /tmp/run_benchmark_experiment.py $IMAGE_TAG $BEAKER_USER $GIT_BRANCH $GIT_HASH
215
+ fi
216
+
217
+ # Clean up temporary file
218
+ rm /tmp/run_benchmark_experiment.py
219
+
220
+ echo " Benchmark experiments submitted successfully!"
0 commit comments