We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 75ef00a commit 19e70b9Copy full SHA for 19e70b9
model2vec/distill/distillation.py
@@ -125,7 +125,13 @@ def distill_from_model(
125
126
model_name = getattr(model, "name_or_path", "")
127
128
- config = {"tokenizer_name": model_name, "apply_pca": pca_dims, "apply_zipf": apply_zipf}
+ config = {
129
+ "tokenizer_name": model_name,
130
+ "apply_pca": pca_dims,
131
+ "apply_zipf": apply_zipf,
132
+ "hidden_dim": embeddings.shape[1],
133
+ "seq_length": 1000000, # Set this to a high value since we don't have a sequence length limit.
134
+ }
135
# Get the language from the model card
136
info = model_info(model_name)
137
language = info.cardData.get("language")
0 commit comments