1
+ #! /usr/bin/env bash
2
+
3
+ set -e
4
+
5
+ # Use conda environment Python if available, otherwise use system Python
6
+ if [ -n " $CONDA_PREFIX " ]; then
7
+ PYTHON=" $CONDA_PREFIX /bin/python"
8
+ echo " Using conda Python from: $CONDA_PREFIX "
9
+ else
10
+ PYTHON=" python"
11
+ echo " Warning: No conda environment detected, using system Python"
12
+ fi
13
+
14
+ # Get version from version.py
15
+ VERSION=$( $PYTHON -c ' import olmocr.version; print(olmocr.version.VERSION)' )
16
+ echo " OlmOCR version: $VERSION "
17
+
18
+ # Get first 10 characters of git hash
19
+ GIT_HASH=$( git rev-parse HEAD | cut -c1-10)
20
+ echo " Git hash: $GIT_HASH "
21
+
22
+ # Get current git branch name
23
+ GIT_BRANCH=$( git rev-parse --abbrev-ref HEAD)
24
+ echo " Git branch: $GIT_BRANCH "
25
+
26
+ # Create full image tag
27
+ IMAGE_TAG=" olmocr-benchmark-${VERSION} -${GIT_HASH} "
28
+ echo " Building Docker image with tag: $IMAGE_TAG "
29
+
30
+ # Build the Docker image
31
+ echo " Building Docker image..."
32
+ docker build --platform linux/amd64 -f ./Dockerfile -t $IMAGE_TAG .
33
+
34
+ # Get Beaker username
35
+ BEAKER_USER=$( beaker account whoami --format json | jq -r ' .[0].name' )
36
+ echo " Beaker user: $BEAKER_USER "
37
+
38
+ # Push image to beaker
39
+ echo " Trying to push image to Beaker..."
40
+ if ! beaker image create --workspace ai2/oe-data-pdf --name $IMAGE_TAG $IMAGE_TAG 2> /dev/null; then
41
+ echo " Warning: Beaker image with tag $IMAGE_TAG already exists. Using existing image."
42
+ fi
43
+
44
+ gantry run \
45
+ --description " ${run_name} " \
46
+ --task-name " ${run_name} " \
47
+ --allow-dirty \
48
+ --host-networking \
49
+ --workspace ai2/olmocr \
50
+ --beaker-image $IMAGE_TAG \
51
+ --pip gantry-train-requirements.txt \
52
+ --priority normal \
53
+ --gpus 8 \
54
+ --preemptible \
55
+ --cluster " ai2/jupiter-cirrascale-2" \
56
+ --budget ai2/oe-data \
57
+ --env LOG_FILTER_TYPE=local_rank0_only \
58
+ --env OMP_NUM_THREADS=8 \
59
+ --env BEAKER_USER_ID=$( beaker account whoami --format json | jq ' .[0].name' -cr) \
60
+ --env-secret AWS_ACCESS_KEY_ID=S2_AWS_ACCESS_KEY_ID \
61
+ --env-secret AWS_SECRET_ACCESS_KEY=S2_AWS_SECRET_ACCESS_KEY \
62
+ --env-secret WANDB_API_KEY=JAKE_WANDB_API_KEY \
63
+ --shared-memory 10GiB \
64
+ --yes \
65
+ -- /bin/bash -c " source scripts/beaker/jupiter-ib.sh && python -m olmocr.train.train --config olmocr/train/configs/example_config.yaml"
0 commit comments