Skip to content

Commit a399741

Browse files
committed
Naming config entries better
1 parent 8e5e18f commit a399741

File tree

1 file changed

+8
-4
lines changed

1 file changed

+8
-4
lines changed

olmocr/train/configs/example_config.yaml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ model:
2727
dataset:
2828

2929
train:
30-
- root_dir: /weka/oe-data-default/jakep/olmOCR-mix-0225/processed_01_books_train_iabooks/
30+
- name: processed_01_books_train_iabooks
31+
root_dir: /weka/oe-data-default/jakep/olmOCR-mix-0225/processed_01_books_train_iabooks/
3132
pipeline: &basic_pipeline
3233
- name: FrontMatterParser
3334
front_matter_class: PageResponse
@@ -42,13 +43,16 @@ dataset:
4243
masking_index: -100
4344
end_of_message_token: "<|im_end|>"
4445
# Not putting in big bulk of data to speed up loading for debugging for now
45-
# - root_dir: /weka/oe-data-default/jakep/olmOCR-mix-0225/processed_00_documents_train_s2pdf/
46+
# - name: processed_00_documents_train_s2pdf
47+
# root_dir: /weka/oe-data-default/jakep/olmOCR-mix-0225/processed_00_documents_train_s2pdf/
4648
# pipeline: *basic_pipeline
4749

4850
eval:
49-
- root_dir: /weka/oe-data-default/jakep/olmOCR-mix-0225/processed_00_documents_eval_s2pdf/
51+
- name: processed_00_documents_eval_s2pdf
52+
root_dir: /weka/oe-data-default/jakep/olmOCR-mix-0225/processed_00_documents_eval_s2pdf/
5053
pipeline: *basic_pipeline
51-
- root_dir: /weka/oe-data-default/jakep/olmOCR-mix-0225/processed_01_books_eval_iabooks/
54+
- name: processed_01_books_eval_iabooks
55+
root_dir: /weka/oe-data-default/jakep/olmOCR-mix-0225/processed_01_books_eval_iabooks/
5256
pipeline: *basic_pipeline
5357

5458

0 commit comments

Comments
 (0)