Skip to content

Commit 783b9cb

Browse files
authored
Merge branch 'main' into fix/typo-in-readme
2 parents cfe4a3c + 6627eef commit 783b9cb

File tree

13 files changed

+201161
-50023
lines changed

13 files changed

+201161
-50023
lines changed

CHANGELOG.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1212
- Add GSM8K to in-loop evals (BPB over correct continuation)
1313
- Support for specifying custom dataset objects in the `data` section of the config file.
1414
- Added OLMo2-0425-1B configs for public usage.
15-
15+
- Added a .csv file of olmo-mix1124 csvgz files.
1616
### Fixed
1717

1818
- Changed a Union definition to be compatible with Python 3.9
19+
- Changed hf_olmo conversion to use backwards-compatible logic via `OLMo.from_checkpoint`.
20+
- fix save_overwrite pass
1921

2022
## [v0.6.1](https://github.com/allenai/OLMo/releases/tag/v0.6.1) - 2025-01-22
2123

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ Stage 1 is the biggest stage, where we train on 4T or 5T tokens on largely web-b
104104
| Training config | [OLMo2-1B-stage1.yaml](configs/official-0425/OLMo2-1B-stage1.yaml) |[OLMo2-7B-stage1.yaml](configs/official-1124/OLMo2-7B-stage1.yaml) | [OLMo2-13B-stage1.yaml](configs/official-1124/OLMo2-13B-stage1.yaml) | |
105105
| WandB | [wandb.ai/OLMo2-1B](https://api.wandb.ai/links/ai2-llm/izdtrtu0)|[wandb.ai/OLMo2-7B](https://wandb.ai/ai2-llm/OLMo-2-1124-7B/reports/OLMo-2-7B-Nov-2024--VmlldzoxMDUzMzE1OA) | [wandb.ai/OLMo2-13B](https://wandb.ai/ai2-llm/OLMo-2-1124-13B/reports/OLMo-2-13B-Nov-2024--VmlldzoxMDUzMjQxNg) |
106106

107+
You can find the .csv.gz files containing the training data [here](configs/official-1124/provenance.csv).
107108

108109
### Stage 2 for the 1B
109110

@@ -142,7 +143,7 @@ on 300B high quality tokens. Then we average ("soup") the models.
142143
| random seed 2662, 300B | [stage2-ingredient4-step11931-tokens300B](https://huggingface.co/allenai/OLMo-2-1124-13B/tree/stage2-ingredient4-step35773-tokens300B) | [OLMo2-13B-stage2-seed2662-300B.yaml](configs/official-1124/OLMo2-13B-stage2-seed2662-300B.yaml) | [wandb.ai/OLMo2-13B](https://wandb.ai/ai2-llm/OLMo-2-1124-13B/reports/OLMo-2-13B-Nov-2024--VmlldzoxMDUzMjQxNg) |
143144
| **final souped model** | [main](https://huggingface.co/allenai/OLMo-2-1124-13B/tree/main) | no config, we just averaged the weights in Python | |
144145

145-
The training configs linked here are set up to download the latest checkpoint after stage 1, and start training from there.
146+
The training configs linked here are set up to download the latest checkpoints after stage 1, and start training from there.
146147

147148
> Note: You can find all the information about the 32B in the [OLMo-core](https://github.com/allenai/OLMo-core) repository.
148149

configs/official-1124/provenance.csv

Lines changed: 1120 additions & 0 deletions
Large diffs are not rendered by default.

hf_olmo/convert_olmo_to_hf.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from hf_olmo.tokenization_olmo_fast import OLMoTokenizerFast
1818
from olmo import ModelConfig, Tokenizer, TrainConfig
1919
from olmo.checkpoint import build_sharded_checkpointer
20+
from olmo.model import OLMo
2021
from olmo.util import _get_s3_client
2122

2223
logger = logging.getLogger(__name__)
@@ -70,7 +71,8 @@ def write_model(checkpoint_dir: str, ignore_olmo_compatibility: bool = False):
7071
old_model_path = os.path.join(checkpoint_dir, "model.pt")
7172
new_model_path = os.path.join(checkpoint_dir, "pytorch_model.bin")
7273

73-
state_dict = torch.load(old_model_path, map_location="cpu")
74+
# Loading the checkpoint using `OLMo.from_checkpoint`` handles backwards compatibility logic.
75+
state_dict = OLMo.from_checkpoint(checkpoint_dir).state_dict()
7476

7577
# this takes care of the case where the model was saved with a different prefix,
7678
# typically due to unsharding.

hf_olmo/modeling_olmo.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from typing import Callable, List, Optional, Tuple, Union
44

55
import torch
6-
from transformers import PreTrainedModel
6+
from transformers import GenerationMixin, PreTrainedModel
77
from transformers.cache_utils import Cache
88
from transformers.modeling_outputs import CausalLMOutputWithPast
99
from transformers.models.auto import AutoModelForCausalLM
@@ -38,7 +38,7 @@ def create_model_config_from_pretrained_config(config: OLMoConfig):
3838
return model_config
3939

4040

41-
class OLMoForCausalLM(PreTrainedModel):
41+
class OLMoForCausalLM(PreTrainedModel, GenerationMixin):
4242
"""
4343
Extremely barebones HF model wrapper.
4444
"""
@@ -143,7 +143,8 @@ def forward(
143143
hidden_states=hidden_states,
144144
)
145145

146-
def can_generate(self) -> bool:
146+
@classmethod
147+
def can_generate(cls) -> bool:
147148
return True
148149

149150
def prepare_inputs_for_generation(

olmo/checkpoint.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1926,18 +1926,23 @@ def save_checkpoint(
19261926
(checkpoint_dir / "model").mkdir(exist_ok=True, parents=True)
19271927
(checkpoint_dir / "optim").mkdir(exist_ok=True, parents=True)
19281928
(checkpoint_dir / "train").mkdir(exist_ok=True, parents=True)
1929+
barrier()
19291930

19301931
wait_for(
19311932
lambda: (checkpoint_dir / "model").exists(), "Waiting for checkpoint model directory", timeout=10.0
19321933
)
1934+
19331935
wait_for(
19341936
lambda: (checkpoint_dir / "optim").exists(), "Waiting for checkpoint optim directory", timeout=10.0
19351937
)
1938+
19361939
wait_for(
19371940
lambda: (checkpoint_dir / "train").exists(), "Waiting for checkpoint train directory", timeout=10.0
19381941
)
19391942

1940-
local_files_created = save_model_and_optim_state(checkpoint_dir, dist_model, optim)
1943+
local_files_created = save_model_and_optim_state(
1944+
checkpoint_dir, dist_model, optim, save_overwrite=self.cfg.save_overwrite
1945+
)
19411946
if upload_to is not None:
19421947
for path in local_files_created:
19431948
path = Path(path)

test_fixtures/test-olmo-model/config.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
"rope_theta": 10000,
4545
"scale_emb_init": false,
4646
"scale_logits": false,
47-
"transformers_version": "4.44.2",
47+
"transformers_version": "4.52.0.dev0",
4848
"use_cache": true,
4949
"vocab_size": 50257,
5050
"weight_tying": true
838 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)