Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion model2vec/distill/distillation.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,13 @@ def distill_from_model(

model_name = getattr(model, "name_or_path", "")

config = {"tokenizer_name": model_name, "apply_pca": pca_dims, "apply_zipf": apply_zipf}
config = {
"tokenizer_name": model_name,
"apply_pca": pca_dims,
"apply_zipf": apply_zipf,
"hidden_dim": pca_dims,
"seq_length": 1000000, # Set this to a high value since we don't have a sequence length limit.
}
# Get the language from the model card
info = model_info(model_name)
language = info.cardData.get("language")
Expand Down