Skip to content

Commit c106114

Browse files
committed
Fix for languages "no" in yaml
1 parent df52cb0 commit c106114

File tree

2 files changed

+32
-11
lines changed

2 files changed

+32
-11
lines changed

olmocr/bench/runners/run_transformers.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,17 @@
77
from PIL import Image
88
from transformers import (
99
AutoProcessor,
10-
Qwen2VLForConditionalGeneration,
1110
Qwen2_5_VLForConditionalGeneration,
11+
Qwen2VLForConditionalGeneration,
1212
)
1313

1414
from olmocr.data.renderpdf import render_pdf_to_base64png
1515
from olmocr.prompts.anchor import get_anchor_text
1616
from olmocr.prompts.prompts import (
1717
PageResponse,
1818
build_finetuning_prompt,
19-
build_openai_silver_data_prompt,
2019
build_no_anchoring_yaml_prompt,
20+
build_openai_silver_data_prompt,
2121
)
2222
from olmocr.train.dataloader import FrontMatterParser
2323

@@ -52,13 +52,10 @@ def run_transformers(
5252

5353
if _cached_model is None:
5454
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
55-
model_name,
56-
torch_dtype=torch.bfloat16,
57-
device_map="auto",
58-
attn_implementation="flash_attention_2"
55+
model_name, torch_dtype=torch.bfloat16, device_map="auto", attn_implementation="flash_attention_2"
5956
).eval()
6057
processor = AutoProcessor.from_pretrained(model_name)
61-
58+
6259
model = model.to(device)
6360

6461
_cached_model = model
@@ -69,7 +66,7 @@ def run_transformers(
6966

7067
# Convert the first page of the PDF to a base64-encoded PNG image.
7168
image_base64 = render_pdf_to_base64png(pdf_path, page_num=page_num, target_longest_image_dim=target_longest_image_dim)
72-
69+
7370
if prompt_template == "yaml":
7471
prompt = build_no_anchoring_yaml_prompt()
7572
else:

olmocr/train/dataloader.py

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,19 @@
99
from io import BytesIO
1010
from os import PathLike
1111
from pathlib import Path
12-
from typing import Any, Callable, Dict, List, Optional, Tuple, Type, TypeAlias
12+
from typing import (
13+
Any,
14+
Callable,
15+
Dict,
16+
List,
17+
Optional,
18+
Tuple,
19+
Type,
20+
TypeAlias,
21+
Union,
22+
get_args,
23+
get_origin,
24+
)
1325

1426
import numpy as np
1527
import yaml
@@ -166,6 +178,12 @@ class FrontMatterParser(PipelineStep):
166178

167179
front_matter_class: Optional[Type] = None
168180

181+
def _is_optional_str(self, field_type: Type) -> bool:
182+
"""Check if a type is Optional[str]."""
183+
origin = get_origin(field_type)
184+
args = get_args(field_type)
185+
return origin is Union and type(None) in args and str in args
186+
169187
def _extract_front_matter_and_text(self, markdown_content: str) -> tuple[Dict[str, Any], str]:
170188
"""Extract YAML front matter and text from markdown content."""
171189
if markdown_content.startswith("---\n"):
@@ -210,8 +228,14 @@ def _parse_front_matter(self, front_matter_dict: Dict[str, Any], text: str) -> A
210228
kwargs[field_name] = int(value)
211229
elif field_type is bool and isinstance(value, str):
212230
kwargs[field_name] = value.lower() == "true"
213-
elif field_type is Optional[str]:
214-
kwargs[field_name] = value if value else None
231+
elif self._is_optional_str(field_type):
232+
# Handle boolean values that YAML might produce (e.g., 'no' -> False)
233+
if isinstance(value, bool):
234+
kwargs[field_name] = None
235+
elif isinstance(value, str):
236+
kwargs[field_name] = value if value else None
237+
else:
238+
kwargs[field_name] = None if not value else value
215239
else:
216240
kwargs[field_name] = value
217241

0 commit comments

Comments
 (0)