File tree Expand file tree Collapse file tree 1 file changed +8
-4
lines changed Expand file tree Collapse file tree 1 file changed +8
-4
lines changed Original file line number Diff line number Diff line change 27
27
dataset :
28
28
29
29
train :
30
- - root_dir : /weka/oe-data-default/jakep/olmOCR-mix-0225/processed_01_books_train_iabooks/
30
+ - name : processed_01_books_train_iabooks
31
+ root_dir : /weka/oe-data-default/jakep/olmOCR-mix-0225/processed_01_books_train_iabooks/
31
32
pipeline : &basic_pipeline
32
33
- name : FrontMatterParser
33
34
front_matter_class : PageResponse
@@ -42,13 +43,16 @@ dataset:
42
43
masking_index : -100
43
44
end_of_message_token : " <|im_end|>"
44
45
# Not putting in big bulk of data to speed up loading for debugging for now
45
- # - root_dir: /weka/oe-data-default/jakep/olmOCR-mix-0225/processed_00_documents_train_s2pdf/
46
+ # - name: processed_00_documents_train_s2pdf
47
+ # root_dir: /weka/oe-data-default/jakep/olmOCR-mix-0225/processed_00_documents_train_s2pdf/
46
48
# pipeline: *basic_pipeline
47
49
48
50
eval :
49
- - root_dir : /weka/oe-data-default/jakep/olmOCR-mix-0225/processed_00_documents_eval_s2pdf/
51
+ - name : processed_00_documents_eval_s2pdf
52
+ root_dir : /weka/oe-data-default/jakep/olmOCR-mix-0225/processed_00_documents_eval_s2pdf/
50
53
pipeline : *basic_pipeline
51
- - root_dir : /weka/oe-data-default/jakep/olmOCR-mix-0225/processed_01_books_eval_iabooks/
54
+ - name : processed_01_books_eval_iabooks
55
+ root_dir : /weka/oe-data-default/jakep/olmOCR-mix-0225/processed_01_books_eval_iabooks/
52
56
pipeline : *basic_pipeline
53
57
54
58
You can’t perform that action at this time.
0 commit comments