Skip to content

Commit 56b6e3a

Browse files
committed
Crreate configs for polypythia
1 parent 0540615 commit 56b6e3a

File tree

10 files changed

+150
-108
lines changed

10 files changed

+150
-108
lines changed

configs/poly_pythia/2-8B.yml

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
{
2+
"pipe_parallel_size": 1,
3+
"model_parallel_size": 1,
4+
5+
"num_layers": 32,
6+
"hidden_size": 2560,
7+
"num_attention_heads": 32,
8+
"seq_length": 2048,
9+
"max_position_embeddings": 2048,
10+
"pos_emb": "rotary",
11+
"rotary_pct": 0.25,
12+
"no_weight_tying": true,
13+
"gpt_j_residual": true,
14+
"output_layer_parallelism": "column",
15+
16+
"attention_config": [[["flash"], 32]],
17+
18+
"scaled_upper_triang_masked_softmax_fusion": true,
19+
"bias_gelu_fusion": true,
20+
21+
"init_method": "small_init",
22+
"output_layer_init_method": "wang_init",
23+
24+
"optimizer": {
25+
"type": "Adam",
26+
"params": {
27+
"lr": 0.00016,
28+
"betas": [0.9, 0.95],
29+
"eps": 1.0e-8
30+
}
31+
},
32+
"min_lr": 0.000016,
33+
34+
"zero_optimization": {
35+
"stage": 1,
36+
"allgather_partitions": true,
37+
"allgather_bucket_size": 500000000,
38+
"overlap_comm": true,
39+
"reduce_scatter": true,
40+
"reduce_bucket_size": 500000000,
41+
"contiguous_gradients": true,
42+
"cpu_offload": false
43+
},
44+
45+
"train_micro_batch_size_per_gpu": 32,
46+
"gradient_accumulation_steps": 1,
47+
"data_impl": "mmap",
48+
"num_workers": 1,
49+
50+
"checkpoint_activations": true,
51+
"checkpoint_num_layers": 1,
52+
"partition_activations": true,
53+
"synchronize_each_layer": true,
54+
55+
"gradient_clipping": 1.0,
56+
"weight_decay": 0.1,
57+
"hidden_dropout": 0,
58+
"attention_dropout": 0,
59+
60+
"fp16": {
61+
"fp16": true,
62+
"enabled": true,
63+
"loss_scale": 0,
64+
"loss_scale_window": 1000,
65+
"initial_scale_power": 12,
66+
"hysteresis": 2,
67+
"min_loss_scale": 1
68+
},
69+
70+
"train_iters": 143000,
71+
"lr_decay_iters": 143000,
72+
73+
"distributed_backend": "nccl",
74+
"lr_decay_style": "cosine",
75+
"warmup": 0.01,
76+
"checkpoint_factor": 1000,
77+
"extra_save_iters": [0,1,2,4,8,16,32,64,128,256,512],
78+
"eval_interval": 40000,
79+
"eval_iters": 10,
80+
81+
"log_grad_norm": true,
82+
83+
"log_interval": 10,
84+
"steps_per_print": 10,
85+
"wall_clock_breakdown": true,
86+
87+
"tokenizer_type": "HFTokenizer",
88+
"vocab_file": "/data/neox_tokenizer/tokenizer.json",
89+
90+
"use_wandb": true,
91+
"wandb_host": "https://api.wandb.ai",
92+
"wandb_project": "pythia-extra-seeds",
93+
"wandb_team": "eleutherai",
94+
95+
"hostfile": "/workspace/hostfile",
96+
"deepspeed_mpi": true,
97+
"launcher": "openmpi",
98+
99+
"checkpoint_validation_with_forward_pass": false
100+
}

configs/poly_pythia/2-8B_Seed_1.yml

Lines changed: 2 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -1,112 +1,6 @@
11
{
2-
"pipe_parallel_size": 1,
3-
"model_parallel_size": 1,
4-
5-
"num_layers": 32,
6-
"hidden_size": 2560,
7-
"num_attention_heads": 32,
8-
"seq_length": 2048,
9-
"max_position_embeddings": 2048,
10-
"pos_emb": "rotary",
11-
"rotary_pct": 0.25,
12-
"no_weight_tying": true,
13-
"gpt_j_residual": true,
14-
"output_layer_parallelism": "column",
15-
16-
"attention_config": [[["flash"], 32]],
17-
18-
"scaled_upper_triang_masked_softmax_fusion": true,
19-
"bias_gelu_fusion": true,
20-
21-
"init_method": "small_init",
22-
"output_layer_init_method": "wang_init",
23-
24-
"optimizer": {
25-
"type": "Adam",
26-
"params": {
27-
"lr": 0.00016,
28-
"betas": [0.9, 0.95],
29-
"eps": 1.0e-8
30-
}
31-
},
32-
"min_lr": 0.000016,
33-
34-
"zero_optimization": {
35-
"stage": 1,
36-
"allgather_partitions": true,
37-
"allgather_bucket_size": 500000000,
38-
"overlap_comm": true,
39-
"reduce_scatter": true,
40-
"reduce_bucket_size": 500000000,
41-
"contiguous_gradients": true,
42-
"cpu_offload": false
43-
},
44-
45-
"train_micro_batch_size_per_gpu": 8,
46-
"gradient_accumulation_steps": 2,
47-
"data_impl": "mmap",
48-
"num_workers": 1,
49-
50-
"checkpoint_activations": true,
51-
"checkpoint_num_layers": 1,
52-
"partition_activations": true,
53-
"synchronize_each_layer": true,
54-
55-
"gradient_clipping": 1.0,
56-
"weight_decay": 0.1,
57-
"hidden_dropout": 0,
58-
"attention_dropout": 0,
59-
60-
"fp16": {
61-
"fp16": true,
62-
"enabled": true,
63-
"loss_scale": 0,
64-
"loss_scale_window": 1000,
65-
"initial_scale_power": 12,
66-
"hysteresis": 2,
67-
"min_loss_scale": 1
68-
},
69-
70-
"train_iters": 143000,
71-
"lr_decay_iters": 143000,
72-
"distributed_backend": "nccl",
73-
"lr_decay_style": "cosine",
74-
"warmup": 0.01,
75-
"checkpoint_factor": 1000,
76-
# "extra_save_iters": [0,1,2,4,8,16,32,64,128,256,512],
77-
"eval_interval": 40000,
78-
"eval_iters": 10,
79-
80-
"log_grad_norm": true,
81-
82-
"log_interval": 10,
83-
"steps_per_print": 10,
84-
"wall_clock_breakdown": true,
85-
86-
"tokenizer_type": "HFTokenizer",
87-
88-
# Run-Specific
892
"seed": 1,
90-
91-
# Test Dataset
92-
"data_path": "/data/enwik8/enwik8_text_document",
93-
"vocab_file": "/data/neox_tokenizer/tokenizer.json",
94-
95-
# Checkpoints
3+
"data_path": "/data/pile/datasets--EleutherAI--pile-preshuffled-seeds/snapshots/27a5768e205eab4c2ddec1f3d2ab8cd56ba85519/seed1/pile_20B_tokenizer_text_document",
964
"save": "/checkpoints/2-8b-seed-1",
97-
"load": "/checkpoints/2-8b-seed-1",
98-
"checkpoint_validation_with_forward_pass": False,
99-
100-
# Wandb
101-
"use_wandb": False,
102-
"wandb_host": "https://api.wandb.ai",
103-
"wandb_project": "pythia-extra-seeds",
104-
"wandb_team": "eleutherai",
105-
# "wandb_run_name": "2-8b-seed-1-enwik8", # Not supported in old NeoX
106-
107-
# Distributed Training
108-
"hostfile": "/workspace/hostfile",
109-
"deepspeed_mpi": True,
110-
"launcher": "openmpi",
111-
# "deepspeed_extra_args": { "ssh_port": 2222 }, # Not supported in old NeoX
5+
"load": "/checkpoints/2-8b-seed-1"
1126
}

configs/poly_pythia/2-8B_Seed_2.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"seed": 2,
3+
"data_path": "/data/pile/datasets--EleutherAI--pile-preshuffled-seeds/snapshots/27a5768e205eab4c2ddec1f3d2ab8cd56ba85519/seed2/pile_20B_tokenizer_text_document",
4+
"save": "/checkpoints/2-8b-seed-2",
5+
"load": "/checkpoints/2-8b-seed-2"
6+
}

configs/poly_pythia/2-8B_Seed_3.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"seed": 3,
3+
"data_path": "/data/pile/datasets--EleutherAI--pile-preshuffled-seeds/snapshots/27a5768e205eab4c2ddec1f3d2ab8cd56ba85519/seed3/pile_20B_tokenizer_text_document",
4+
"save": "/checkpoints/2-8b-seed-3",
5+
"load": "/checkpoints/2-8b-seed-3"
6+
}

configs/poly_pythia/2-8B_Seed_4.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"seed": 4,
3+
"data_path": "/data/pile/datasets--EleutherAI--pile-preshuffled-seeds/snapshots/27a5768e205eab4c2ddec1f3d2ab8cd56ba85519/seed4/pile_20B_tokenizer_text_document",
4+
"save": "/checkpoints/2-8b-seed-4",
5+
"load": "/checkpoints/2-8b-seed-4"
6+
}

configs/poly_pythia/2-8B_Seed_5.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"seed": 5,
3+
"data_path": "/data/pile/datasets--EleutherAI--pile-preshuffled-seeds/snapshots/27a5768e205eab4c2ddec1f3d2ab8cd56ba85519/seed5/pile_20B_tokenizer_text_document",
4+
"save": "/checkpoints/2-8b-seed-5",
5+
"load": "/checkpoints/2-8b-seed-5"
6+
}

configs/poly_pythia/2-8B_Seed_6.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"seed": 6,
3+
"data_path": "/data/pile/datasets--EleutherAI--pile-preshuffled-seeds/snapshots/27a5768e205eab4c2ddec1f3d2ab8cd56ba85519/seed6/pile_20B_tokenizer_text_document",
4+
"save": "/checkpoints/2-8b-seed-6",
5+
"load": "/checkpoints/2-8b-seed-6"
6+
}

configs/poly_pythia/2-8B_Seed_7.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"seed": 7,
3+
"data_path": "/data/pile/datasets--EleutherAI--pile-preshuffled-seeds/snapshots/27a5768e205eab4c2ddec1f3d2ab8cd56ba85519/seed7/pile_20B_tokenizer_text_document",
4+
"save": "/checkpoints/2-8b-seed-7",
5+
"load": "/checkpoints/2-8b-seed-7"
6+
}

configs/poly_pythia/2-8B_Seed_8.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"seed": 8,
3+
"data_path": "/data/pile/datasets--EleutherAI--pile-preshuffled-seeds/snapshots/27a5768e205eab4c2ddec1f3d2ab8cd56ba85519/seed8/pile_20B_tokenizer_text_document",
4+
"save": "/checkpoints/2-8b-seed-8",
5+
"load": "/checkpoints/2-8b-seed-8"
6+
}

configs/poly_pythia/2-8B_Seed_9.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"seed": 9,
3+
"data_path": "/data/pile/datasets--EleutherAI--pile-preshuffled-seeds/snapshots/27a5768e205eab4c2ddec1f3d2ab8cd56ba85519/seed9/pile_20B_tokenizer_text_document",
4+
"save": "/checkpoints/2-8b-seed-9",
5+
"load": "/checkpoints/2-8b-seed-9"
6+
}

0 commit comments

Comments
 (0)