|
99 | 99 | "llm.prediction.tools/serverPageDescriptionAddon": "Pass this through the request body as `tools` when using the server API",
|
100 | 100 | "llm.prediction.promptTemplate/title": "Prompt Template",
|
101 | 101 | "llm.prediction.promptTemplate/subTitle": "The format in which messages in chat are sent to the model. Changing this may introduce unexpected behavior - make sure you know what you're doing!",
|
102 |
| - "llm.prediction.mlx.speculativeDecoding.numDraftTokensExact/title": "Draft Tokens to Generate", |
103 |
| - "llm.prediction.mlx.speculativeDecoding.numDraftTokensExact/subTitle": "The number of tokens to generate with the draft model per main model token. Find the sweet spot of compute vs. reward", |
104 |
| - "llm.prediction.llama.speculativeDecoding.minContinueDraftingProbability/title": "Drafting Probability Cutoff", |
105 |
| - "llm.prediction.llama.speculativeDecoding.minContinueDraftingProbability/subTitle": "Continue drafting until a token's probability falls below this threshold. Higher values generally mean lower risk, lower reward", |
106 |
| - "llm.prediction.llama.speculativeDecoding.minDraftLengthToConsider/title": "Min Draft Size", |
107 |
| - "llm.prediction.llama.speculativeDecoding.minDraftLengthToConsider/subTitle": "Drafts smaller than this will be ignored by the main model. Higher values generally mean lower risk, lower reward", |
108 |
| - "llm.prediction.llama.speculativeDecoding.maxTokensToDraft/title": "Max Draft Size", |
109 |
| - "llm.prediction.llama.speculativeDecoding.maxTokensToDraft/subTitle": "Max number of tokens allowed in a draft. Ceiling if all token probs are > the cutoff. Lower values generally mean lower risk, lower reward", |
| 102 | + "llm.prediction.speculativeDecoding.numDraftTokensExact/title": "Draft Tokens to Generate", |
| 103 | + "llm.prediction.speculativeDecoding.numDraftTokensExact/subTitle": "The number of tokens to generate with the draft model per main model token. Find the sweet spot of compute vs. reward", |
| 104 | + "llm.prediction.speculativeDecoding.minContinueDraftingProbability/title": "Drafting Probability Cutoff", |
| 105 | + "llm.prediction.speculativeDecoding.minContinueDraftingProbability/subTitle": "Continue drafting until a token's probability falls below this threshold. Higher values generally mean lower risk, lower reward", |
| 106 | + "llm.prediction.speculativeDecoding.minDraftLengthToConsider/title": "Min Draft Size", |
| 107 | + "llm.prediction.speculativeDecoding.minDraftLengthToConsider/subTitle": "Drafts smaller than this will be ignored by the main model. Higher values generally mean lower risk, lower reward", |
| 108 | + "llm.prediction.speculativeDecoding.maxTokensToDraft/title": "Max Draft Size", |
| 109 | + "llm.prediction.speculativeDecoding.maxTokensToDraft/subTitle": "Max number of tokens allowed in a draft. Ceiling if all token probs are > the cutoff. Lower values generally mean lower risk, lower reward", |
110 | 110 | "llm.prediction.speculativeDecoding.draftModel/title": "Draft Model",
|
111 | 111 | "llm.prediction.reasoning.parsing/title": "Reasoning Section Parsing",
|
112 | 112 | "llm.prediction.reasoning.parsing/subTitle": "How to parse reasoning sections in the model's output",
|
|
0 commit comments