Skip to content

Commit 08b263b

Browse files
committed
Cumulative rotation support
1 parent 5e991b6 commit 08b263b

File tree

2 files changed

+213
-3
lines changed

2 files changed

+213
-3
lines changed

olmocr/pipeline.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ async def process_page(args, worker_id: int, pdf_orig_path: str, pdf_local_path:
217217
MODEL_MAX_CONTEXT = 16384
218218
TEMPERATURE_BY_ATTEMPT = [0.1, 0.1, 0.2, 0.3, 0.5, 0.8, 0.9, 1.0]
219219
exponential_backoffs = 0
220-
local_image_rotation = 0
220+
cumulative_rotation = 0 # Track cumulative rotation instead of local
221221
attempt = 0
222222
await tracker.track_work(worker_id, f"{pdf_orig_path}-{page_num}", "started")
223223

@@ -227,7 +227,7 @@ async def process_page(args, worker_id: int, pdf_orig_path: str, pdf_local_path:
227227
pdf_local_path,
228228
page_num,
229229
args.target_longest_image_dim,
230-
image_rotation=local_image_rotation,
230+
image_rotation=cumulative_rotation,
231231
)
232232
# Change temperature as number of attempts increases to overcome repetition issues at expense of quality
233233
query["temperature"] = TEMPERATURE_BY_ATTEMPT[lookup_attempt]
@@ -273,7 +273,9 @@ async def process_page(args, worker_id: int, pdf_orig_path: str, pdf_local_path:
273273
logger.info(
274274
f"Got invalid_page rotation for {pdf_orig_path}-{page_num} attempt {attempt}, retrying with {page_response.rotation_correction} rotation"
275275
)
276-
local_image_rotation = page_response.rotation_correction
276+
# Add the rotation correction to the cumulative rotation
277+
cumulative_rotation = (cumulative_rotation + page_response.rotation_correction) % 360
278+
logger.info(f"Cumulative rotation is now {cumulative_rotation} degrees")
277279
raise ValueError(f"invalid_page rotation for {pdf_orig_path}-{page_num}")
278280

279281
metrics.add_metrics(**{"completed_pages": 1, f"finished_on_attempt_{attempt}": 1})

tests/test_pipeline.py

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,3 +296,211 @@ async def mock_build_page_query(local_pdf_path, page, target_longest_image_dim,
296296
# Verify tracker was called correctly
297297
mock_tracker.track_work.assert_any_call(0, "test-edgar-rotated90.pdf-1", "started")
298298
mock_tracker.track_work.assert_any_call(0, "test-edgar-rotated90.pdf-1", "finished")
299+
300+
@pytest.mark.asyncio
301+
async def test_process_page_with_cumulative_rotation(self):
302+
"""Test that process_page correctly accumulates rotations across multiple attempts."""
303+
304+
# Path to the test PDF (can use any test PDF)
305+
test_pdf_path = "tests/gnarly_pdfs/edgar-rotated90.pdf"
306+
307+
# Mock arguments
308+
args = MockArgs()
309+
310+
# Counter to track number of API calls
311+
call_count = 0
312+
313+
async def mock_apost(url, json_data):
314+
nonlocal call_count
315+
call_count += 1
316+
317+
# First call - model detects rotation is needed (90 degrees)
318+
if call_count == 1:
319+
response_content = """---
320+
primary_language: en
321+
is_rotation_valid: false
322+
rotation_correction: 90
323+
is_table: false
324+
is_diagram: false
325+
---
326+
327+
This document appears to be rotated and needs correction."""
328+
329+
# Second call - model still detects rotation is needed (another 90 degrees)
330+
elif call_count == 2:
331+
response_content = """---
332+
primary_language: en
333+
is_rotation_valid: false
334+
rotation_correction: 90
335+
is_table: false
336+
is_diagram: false
337+
---
338+
339+
Document still needs rotation."""
340+
341+
# Third call - after 180 total degrees of rotation, model says it's correct
342+
elif call_count == 3:
343+
response_content = """---
344+
primary_language: en
345+
is_rotation_valid: true
346+
rotation_correction: 0
347+
is_table: false
348+
is_diagram: false
349+
---
350+
351+
UNITED STATES
352+
SECURITIES AND EXCHANGE COMMISSION
353+
Washington, D.C. 20549
354+
355+
Document is now correctly oriented after 180 degree rotation."""
356+
357+
else:
358+
raise ValueError(f"Unexpected call count: {call_count}")
359+
360+
# Mock response structure
361+
response_body = {
362+
"choices": [{"message": {"content": response_content}, "finish_reason": "stop"}],
363+
"usage": {"prompt_tokens": 1000, "completion_tokens": 100, "total_tokens": 1100},
364+
}
365+
366+
return 200, json.dumps(response_body).encode()
367+
368+
# Mock the worker tracker
369+
mock_tracker = AsyncMock()
370+
371+
# Ensure the test PDF exists
372+
assert os.path.exists(test_pdf_path), f"Test PDF not found at {test_pdf_path}"
373+
374+
# Track calls to build_page_query
375+
build_page_query_calls = []
376+
original_build_page_query = build_page_query
377+
378+
async def mock_build_page_query(local_pdf_path, page, target_longest_image_dim, image_rotation=0):
379+
build_page_query_calls.append(image_rotation)
380+
return await original_build_page_query(local_pdf_path, page, target_longest_image_dim, image_rotation)
381+
382+
with patch("olmocr.pipeline.apost", side_effect=mock_apost):
383+
with patch("olmocr.pipeline.tracker", mock_tracker):
384+
with patch("olmocr.pipeline.build_page_query", side_effect=mock_build_page_query):
385+
result = await process_page(args=args, worker_id=0, pdf_orig_path="test-cumulative-rotation.pdf", pdf_local_path=test_pdf_path, page_num=1)
386+
387+
# Verify the result
388+
assert isinstance(result, PageResult)
389+
assert result.page_num == 1
390+
assert result.is_fallback == False
391+
assert result.response.is_rotation_valid == True
392+
assert result.response.rotation_correction == 0
393+
assert result.response.natural_text is not None
394+
assert "180 degree rotation" in result.response.natural_text
395+
396+
# Verify that exactly 3 API calls were made
397+
assert call_count == 3
398+
399+
# Verify build_page_query was called with correct cumulative rotations
400+
assert len(build_page_query_calls) == 3
401+
assert build_page_query_calls[0] == 0 # First call with no rotation
402+
assert build_page_query_calls[1] == 90 # Second call with 90 degree rotation
403+
assert build_page_query_calls[2] == 180 # Third call with cumulative 180 degree rotation
404+
405+
# Verify tracker was called correctly
406+
mock_tracker.track_work.assert_any_call(0, "test-cumulative-rotation.pdf-1", "started")
407+
mock_tracker.track_work.assert_any_call(0, "test-cumulative-rotation.pdf-1", "finished")
408+
409+
@pytest.mark.asyncio
410+
async def test_process_page_rotation_wraps_around(self):
411+
"""Test that cumulative rotation correctly wraps around at 360 degrees."""
412+
413+
# Path to the test PDF
414+
test_pdf_path = "tests/gnarly_pdfs/edgar-rotated90.pdf"
415+
416+
# Mock arguments
417+
args = MockArgs()
418+
419+
# Counter to track number of API calls
420+
call_count = 0
421+
422+
async def mock_apost(url, json_data):
423+
nonlocal call_count
424+
call_count += 1
425+
426+
# First call - model detects rotation is needed (270 degrees)
427+
if call_count == 1:
428+
response_content = """---
429+
primary_language: en
430+
is_rotation_valid: false
431+
rotation_correction: 270
432+
is_table: false
433+
is_diagram: false
434+
---
435+
436+
Document needs 270 degree rotation."""
437+
438+
# Second call - model detects more rotation is needed (180 degrees)
439+
# Total would be 450, but should wrap to 90
440+
elif call_count == 2:
441+
response_content = """---
442+
primary_language: en
443+
is_rotation_valid: false
444+
rotation_correction: 180
445+
is_table: false
446+
is_diagram: false
447+
---
448+
449+
Document needs additional rotation."""
450+
451+
# Third call - after wrapped rotation (90 degrees), model says it's correct
452+
elif call_count == 3:
453+
response_content = """---
454+
primary_language: en
455+
is_rotation_valid: true
456+
rotation_correction: 0
457+
is_table: false
458+
is_diagram: false
459+
---
460+
461+
Document correctly oriented at 90 degrees total rotation."""
462+
463+
else:
464+
raise ValueError(f"Unexpected call count: {call_count}")
465+
466+
# Mock response structure
467+
response_body = {
468+
"choices": [{"message": {"content": response_content}, "finish_reason": "stop"}],
469+
"usage": {"prompt_tokens": 1000, "completion_tokens": 100, "total_tokens": 1100},
470+
}
471+
472+
return 200, json.dumps(response_body).encode()
473+
474+
# Mock the worker tracker
475+
mock_tracker = AsyncMock()
476+
477+
# Ensure the test PDF exists
478+
assert os.path.exists(test_pdf_path), f"Test PDF not found at {test_pdf_path}"
479+
480+
# Track calls to build_page_query
481+
build_page_query_calls = []
482+
original_build_page_query = build_page_query
483+
484+
async def mock_build_page_query(local_pdf_path, page, target_longest_image_dim, image_rotation=0):
485+
build_page_query_calls.append(image_rotation)
486+
return await original_build_page_query(local_pdf_path, page, target_longest_image_dim, image_rotation)
487+
488+
with patch("olmocr.pipeline.apost", side_effect=mock_apost):
489+
with patch("olmocr.pipeline.tracker", mock_tracker):
490+
with patch("olmocr.pipeline.build_page_query", side_effect=mock_build_page_query):
491+
result = await process_page(args=args, worker_id=0, pdf_orig_path="test-rotation-wrap.pdf", pdf_local_path=test_pdf_path, page_num=1)
492+
493+
# Verify the result
494+
assert isinstance(result, PageResult)
495+
assert result.page_num == 1
496+
assert result.is_fallback == False
497+
assert result.response.is_rotation_valid == True
498+
499+
# Verify that exactly 3 API calls were made
500+
assert call_count == 3
501+
502+
# Verify build_page_query was called with correct cumulative rotations
503+
assert len(build_page_query_calls) == 3
504+
assert build_page_query_calls[0] == 0 # First call with no rotation
505+
assert build_page_query_calls[1] == 270 # Second call with 270 degree rotation
506+
assert build_page_query_calls[2] == 90 # Third call with wrapped rotation (270 + 180 = 450 % 360 = 90)

0 commit comments

Comments
 (0)