Skip to content

Commit a4752b5

Browse files
committed
Merge remote-tracking branch 'origin/main' into jakep/new_trainer
2 parents 9ef3fd7 + 61a13f9 commit a4752b5

File tree

3 files changed

+26
-6
lines changed

3 files changed

+26
-6
lines changed

README.md

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ conda activate olmocr
138138
pip install olmocr[bench]
139139

140140
# For actually converting the files with your own GPU
141-
pip install olmocr.[gpu] --extra-index-url https://download.pytorch.org/whl/cu128
141+
pip install olmocr[gpu] --extra-index-url https://download.pytorch.org/whl/cu128
142142

143143
# Recommended: Install flash infer for faster inference on GPU
144144
pip install https://download.pytorch.org/whl/cu128/flashinfer/flashinfer_python-0.2.5%2Bcu128torch2.7-cp38-abi3-linux_x86_64.whl
@@ -242,11 +242,11 @@ python -m olmocr.pipeline ./localworkspace --markdown --pdfs olmocr-sample.pdf
242242

243243
```bash
244244
python -m olmocr.pipeline --help
245-
usage: pipeline.py [-h] [--pdfs PDFS] [--workspace_profile WORKSPACE_PROFILE] [--pdf_profile PDF_PROFILE] [--pages_per_group PAGES_PER_GROUP]
246-
[--max_page_retries MAX_PAGE_RETRIES] [--max_page_error_rate MAX_PAGE_ERROR_RATE] [--workers WORKERS] [--apply_filter] [--stats] [--model MODEL]
247-
[--model_max_context MODEL_MAX_CONTEXT] [--model_chat_template MODEL_CHAT_TEMPLATE] [--target_longest_image_dim TARGET_LONGEST_IMAGE_DIM]
248-
[--target_anchor_text_len TARGET_ANCHOR_TEXT_LEN] [--beaker] [--beaker_workspace BEAKER_WORKSPACE] [--beaker_cluster BEAKER_CLUSTER]
249-
[--beaker_gpus BEAKER_GPUS] [--beaker_priority BEAKER_PRIORITY]
245+
usage: pipeline.py [-h] [--pdfs [PDFS ...]] [--workspace_profile WORKSPACE_PROFILE] [--pdf_profile PDF_PROFILE] [--pages_per_group PAGES_PER_GROUP] [--max_page_retries MAX_PAGE_RETRIES]
246+
[--max_page_error_rate MAX_PAGE_ERROR_RATE] [--workers WORKERS] [--apply_filter] [--stats] [--markdown] [--model MODEL] [--gpu-memory-utilization GPU_MEMORY_UTILIZATION]
247+
[--max_model_len MAX_MODEL_LEN] [--model_max_context MODEL_MAX_CONTEXT] [--model_chat_template MODEL_CHAT_TEMPLATE] [--target_longest_image_dim TARGET_LONGEST_IMAGE_DIM]
248+
[--target_anchor_text_len TARGET_ANCHOR_TEXT_LEN] [--beaker] [--beaker_workspace BEAKER_WORKSPACE] [--beaker_cluster BEAKER_CLUSTER] [--beaker_gpus BEAKER_GPUS]
249+
[--beaker_priority BEAKER_PRIORITY] [--port PORT] [--tensor-parallel-size TENSOR_PARALLEL_SIZE] [--data-parallel-size DATA_PARALLEL_SIZE]
250250
workspace
251251

252252
Manager for running millions of PDFs through a batch inference pipeline
@@ -273,6 +273,10 @@ options:
273273
--markdown Also write natural text to markdown files preserving the folder structure of the input pdfs
274274
--model MODEL List of paths where you can find the model to convert this pdf. You can specify several different paths here, and the script will try to use the
275275
one which is fastest to access
276+
--gpu-memory-utilization GPU_MEMORY_UTILIZATION
277+
Fraction of VRAM vLLM may pre-allocate for KV-cache (passed through to vllm serve).
278+
--max_model_len MAX_MODEL_LEN
279+
Upper bound (tokens) vLLM will allocate KV-cache for; passed through to vllm serve as --max-model-len.
276280
--model_max_context MODEL_MAX_CONTEXT
277281
Maximum context length that the model was fine tuned under
278282
--model_chat_template MODEL_CHAT_TEMPLATE

olmocr/pipeline.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -584,6 +584,12 @@ async def vllm_server_task(model_name_or_path, args, semaphore):
584584
str(args.data_parallel_size),
585585
]
586586

587+
if args.gpu_memory_utilization is not None:
588+
cmd.extend(["--gpu-memory-utilization", str(args.gpu_memory_utilization)])
589+
590+
if args.max_model_len is not None:
591+
cmd.extend(["--max-model-len", str(args.max_model_len)])
592+
587593
proc = await asyncio.create_subprocess_exec(
588594
*cmd,
589595
stdout=asyncio.subprocess.PIPE,
@@ -1008,6 +1014,10 @@ async def main():
10081014
help="List of paths where you can find the model to convert this pdf. You can specify several different paths here, and the script will try to use the one which is fastest to access",
10091015
default="allenai/olmOCR-7B-0225-preview",
10101016
)
1017+
1018+
parser.add_argument("--gpu-memory-utilization", type=float, help="Fraction of VRAM vLLM may pre-allocate for KV-cache " "(passed through to vllm serve).")
1019+
parser.add_argument("--max_model_len", type=int, help="Upper bound (tokens) vLLM will allocate KV-cache for; " "passed through to vllm serve as --max-model-len.",)
1020+
10111021
parser.add_argument("--model_max_context", type=int, default="8192", help="Maximum context length that the model was fine tuned under")
10121022
parser.add_argument("--target_longest_image_dim", type=int, help="Dimension on longest side to use for rendering the pdf pages", default=1288)
10131023
parser.add_argument("--target_anchor_text_len", type=int, help="Maximum amount of anchor text to use (characters), not used for new models", default=-1)
@@ -1028,6 +1038,10 @@ async def main():
10281038
parser.add_argument("--data-parallel-size", "-dp", type=int, default=1, help="Data parallel size for vLLM")
10291039
args = parser.parse_args()
10301040

1041+
logger.info(
1042+
"If you run out of GPU memory during start-up or get 'KV cache is larger than available memory' errors, retry with lower values, e.g. --gpu_memory_utilization 0.80 --max_model_len 16384"
1043+
)
1044+
10311045
global workspace_s3, pdf_s3
10321046
# set the global BASE_SERVER_PORT from args
10331047
global BASE_SERVER_PORT

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ olmocr = [
132132
"py.typed",
133133
"viewer/*.html",
134134
"eval/*.html",
135+
"bench/katex/*.js",
136+
"bench/katex/*.css",
135137
]
136138

137139
[tool.setuptools.dynamic]

0 commit comments

Comments
 (0)