Skip to content

Commit 2e42285

Browse files
authored
Single writer option (#79)
* save_checkpoint perf monitoring * Disable checkpoint save on exit * local rank arg * Single writer option
1 parent 1aa971a commit 2e42285

File tree

2 files changed

+4
-0
lines changed

2 files changed

+4
-0
lines changed

fast_io/model_checkpoint/deepspeed_save_model.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def _get_ds_config(args, writer_type):
4343
"io_buffer_size": args.io_buffer_mb * (1024**2),
4444
"io_buffer_double": not args.single_io_buffer,
4545
"show_statistics": not args.no_statistics,
46+
"data_parallel": not args.single_writer
4647
}
4748

4849
return ds_config

fast_io/model_checkpoint/save_model_utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,9 @@ def parse_arguments():
118118
action='store_true',
119119
help='Disable double buffering of i/o buffer.')
120120

121+
122+
parser.add_argument('--single_writer', action='store_true', help='Disable parallel rank writes of data parallel (replicated) state')
123+
121124
args = parser.parse_args()
122125
print(f'args = {args}')
123126
return args

0 commit comments

Comments
 (0)