Skip to content

Commit 4440caf

Browse files
committed
integrate groq / cerebras to the self-hosting (#466)
1 parent 8046065 commit 4440caf

File tree

11 files changed

+138
-1
lines changed

11 files changed

+138
-1
lines changed

refact_known_models/passthrough.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,4 +133,92 @@
133133
"pp1000t_generated": 15_000, # $15.00 / 1M tokens (2024 oct)
134134
"filter_caps": ["chat", "tools"],
135135
},
136+
"groq-llama-3.1-8b": {
137+
"backend": "litellm",
138+
"provider": "groq",
139+
"tokenizer_path": "Xenova/Meta-Llama-3.1-Tokenizer",
140+
"resolve_as": "groq/llama-3.1-8b-instant",
141+
"T": 128_000,
142+
"T_out": 8000,
143+
"pp1000t_prompt": 150,
144+
"pp1000t_generated": 600, # TODO: don't know the price
145+
"filter_caps": ["chat"],
146+
},
147+
"groq-llama-3.1-70b": {
148+
"backend": "litellm",
149+
"provider": "groq",
150+
"tokenizer_path": "Xenova/Meta-Llama-3.1-Tokenizer",
151+
"resolve_as": "groq/llama-3.1-70b-versatile",
152+
"T": 128_000,
153+
"T_out": 8000,
154+
"pp1000t_prompt": 150,
155+
"pp1000t_generated": 600, # TODO: don't know the price
156+
"filter_caps": ["chat"],
157+
},
158+
"groq-llama-3.2-1b": {
159+
"backend": "litellm",
160+
"provider": "groq",
161+
"tokenizer_path": "Xenova/Meta-Llama-3.1-Tokenizer",
162+
"resolve_as": "groq/llama-3.2-1b-preview",
163+
"T": 128_000,
164+
"T_out": 8000,
165+
"pp1000t_prompt": 150,
166+
"pp1000t_generated": 600, # TODO: don't know the price
167+
"filter_caps": ["chat"],
168+
},
169+
"groq-llama-3.2-3b": {
170+
"backend": "litellm",
171+
"provider": "groq",
172+
"tokenizer_path": "Xenova/Meta-Llama-3.1-Tokenizer",
173+
"resolve_as": "groq/llama-3.2-3b-preview",
174+
"T": 128_000,
175+
"T_out": 8000,
176+
"pp1000t_prompt": 150,
177+
"pp1000t_generated": 600, # TODO: don't know the price
178+
"filter_caps": ["chat"],
179+
},
180+
"groq-llama-3.2-11b-vision": {
181+
"backend": "litellm",
182+
"provider": "groq",
183+
"tokenizer_path": "Xenova/Meta-Llama-3.1-Tokenizer",
184+
"resolve_as": "groq/llama-3.2-11b-vision-preview",
185+
"T": 128_000,
186+
"T_out": 8000,
187+
"pp1000t_prompt": 150,
188+
"pp1000t_generated": 600, # TODO: don't know the price
189+
"filter_caps": ["chat"],
190+
},
191+
"groq-llama-3.2-90b-vision": {
192+
"backend": "litellm",
193+
"provider": "groq",
194+
"tokenizer_path": "Xenova/Meta-Llama-3.1-Tokenizer",
195+
"resolve_as": "groq/llama-3.2-90b-vision-preview",
196+
"T": 128_000,
197+
"T_out": 8000,
198+
"pp1000t_prompt": 150,
199+
"pp1000t_generated": 600, # TODO: don't know the price
200+
"filter_caps": ["chat"],
201+
},
202+
"cerebras-llama3.1-8b": {
203+
"backend": "litellm",
204+
"provider": "cerebras",
205+
"tokenizer_path": "Xenova/Meta-Llama-3.1-Tokenizer",
206+
"resolve_as": "cerebras/llama3.1-8b",
207+
"T": 8192,
208+
"T_out": 4096,
209+
"pp1000t_prompt": 150,
210+
"pp1000t_generated": 600, # TODO: don't know the price
211+
"filter_caps": ["chat"],
212+
},
213+
"cerebras-llama3.1-70b": {
214+
"backend": "litellm",
215+
"provider": "cerebras",
216+
"tokenizer_path": "Xenova/Meta-Llama-3.1-Tokenizer",
217+
"resolve_as": "cerebras/llama3.1-70b",
218+
"T": 8192,
219+
"T_out": 4096,
220+
"pp1000t_prompt": 150,
221+
"pp1000t_generated": 600, # TODO: don't know the price
222+
"filter_caps": ["chat"],
223+
}
136224
}

refact_utils/finetune/utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,12 @@ def _add_results_for_passthrough_provider(provider: str) -> None:
107107
if data.get('anthropic_api_enable'):
108108
_add_results_for_passthrough_provider('anthropic')
109109

110+
if data.get('cerebras_api_enable'):
111+
_add_results_for_passthrough_provider('cerebras')
112+
113+
if data.get('groq_api_enable'):
114+
_add_results_for_passthrough_provider('groq')
115+
110116
for k, v in data.get("model_assign", {}).items():
111117
if model_dict := [d for d in data['models'] if d['name'] == k]:
112118
model_dict = model_dict[0]

refact_webgui/webgui/selfhost_fastapi_completions.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,8 @@ def _integrations_env_setup(env_var_name: str, api_key_name: str, api_enable_nam
231231
litellm.modify_params = True # NOTE: for Anthropic API
232232
_integrations_env_setup("OPENAI_API_KEY", "openai_api_key", "openai_api_enable")
233233
_integrations_env_setup("ANTHROPIC_API_KEY", "anthropic_api_key", "anthropic_api_enable")
234+
_integrations_env_setup("GROQ_API_KEY", "groq_api_key", "groq_api_enable")
235+
_integrations_env_setup("CEREBRAS_API_KEY", "cerebras_api_key", "cerebras_api_enable")
234236

235237
def _models_available_dict_rewrite(self, models_available: List[str]) -> Dict[str, Any]:
236238
rewrite_dict = {}
@@ -609,7 +611,10 @@ async def chat_completion_streamer():
609611
log(err_msg)
610612
yield prefix + json.dumps({"error": err_msg}) + postfix
611613

612-
if model_dict.get('backend') == 'litellm' and (model_name := model_dict.get('resolve_as', post.model)) in litellm.model_list:
614+
if model_dict.get('backend') == 'litellm':
615+
model_name = model_dict.get('resolve_as', post.model)
616+
if model_name not in litellm.model_list:
617+
log(f"warning: requested model {model_name} is not in the litellm.model_list (this might not be the issue for some providers)")
613618
log(f"chat/completions: model resolve {post.model} -> {model_name}")
614619
prompt_tokens_n = litellm.token_counter(model_name, messages=messages)
615620
if post.tools:

refact_webgui/webgui/selfhost_model_assigner.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,8 @@ def first_run(self):
184184
},
185185
"openai_api_enable": False,
186186
"anthropic_api_enable": False,
187+
"groq_api_enable": False,
188+
"cerebras_api_enable": False,
187189
}
188190
self.models_to_watchdog_configs(default_config)
189191

@@ -255,6 +257,8 @@ def models_info(self):
255257
def model_assignment(self):
256258
if os.path.exists(env.CONFIG_INFERENCE):
257259
j = json.load(open(env.CONFIG_INFERENCE, "r"))
260+
j["groq_api_enable"] = j.get("groq_api_enable", False)
261+
j["cerebras_api_enable"] = j.get("cerebras_api_enable", False)
258262
else:
259263
j = {"model_assign": {}}
260264

refact_webgui/webgui/selfhost_queue.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ def _add_models_for_passthrough_provider(provider):
6464
_add_models_for_passthrough_provider('openai')
6565
if j.get("anthropic_api_enable"):
6666
_add_models_for_passthrough_provider('anthropic')
67+
if j.get("groq_api_enable"):
68+
_add_models_for_passthrough_provider('groq')
69+
if j.get("cerebras_api_enable"):
70+
_add_models_for_passthrough_provider('cerebras')
6771

6872
return self._models_available
6973

refact_webgui/webgui/static/tab-model-hosting.html

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,14 @@ <h3>3rd Party APIs</h3>
3838
<input class="form-check-input" type="checkbox" role="switch" id="enable_anthropic">
3939
<label class="form-check-label" for="enable_anthropic">Enable Anthropic API</label>
4040
</div>
41+
<div class="form-check form-switch">
42+
<input class="form-check-input" type="checkbox" role="switch" id="enable_groq">
43+
<label class="form-check-label" for="enable_groq">Enable Groq API</label>
44+
</div>
45+
<div class="form-check form-switch">
46+
<input class="form-check-input" type="checkbox" role="switch" id="enable_cerebras">
47+
<label class="form-check-label" for="enable_cerebras">Enable Cerebras API</label>
48+
</div>
4149
<div class="chat-enabler-status">
4250
To enable Chat GPT add your API key in the <span id="redirect2credentials" class="main-tab-button fake-link" data-tab="settings">API Keys tab</span>.
4351
</div>

refact_webgui/webgui/static/tab-model-hosting.js

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ function get_models()
117117

118118
integration_switch_init('enable_chat_gpt', models_data['openai_api_enable']);
119119
integration_switch_init('enable_anthropic', models_data['anthropic_api_enable']);
120+
integration_switch_init('enable_groq', models_data['groq_api_enable']);
121+
integration_switch_init('enable_cerebras', models_data['cerebras_api_enable']);
120122

121123
const more_gpus_notification = document.querySelector('.model-hosting-error');
122124
if(data.hasOwnProperty('more_models_than_gpus') && data.more_models_than_gpus) {
@@ -140,12 +142,16 @@ function get_models()
140142
function save_model_assigned() {
141143
const openai_enable = document.querySelector('#enable_chat_gpt');
142144
const anthropic_enable = document.querySelector('#enable_anthropic');
145+
const groq_enable = document.querySelector('#enable_groq');
146+
const cerebras_enable = document.querySelector('#enable_cerebras');
143147
const data = {
144148
model_assign: {
145149
...models_data.model_assign,
146150
},
147151
openai_api_enable: openai_enable.checked,
148152
anthropic_api_enable: anthropic_enable.checked,
153+
groq_api_enable: groq_enable.checked,
154+
cerebras_api_enable: cerebras_enable.checked,
149155
};
150156
console.log(data);
151157
fetch("/tab-host-models-assign", {

refact_webgui/webgui/static/tab-settings.html

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ <h2>API Integrations</h2>
66
<input type="text" name="openai_api_key" value="" class="form-control" id="openai_api_key">
77
<label for="anthropic_api_key" class="form-label mt-4">Anthropic API Key</label>
88
<input type="text" name="anthropic_api_key" value="" class="form-control" id="anthropic_api_key">
9+
<label for="groq_api_key" class="form-label mt-4">Groq API Key</label>
10+
<input type="text" name="groq_api_key" value="" class="form-control" id="groq_api_key">
11+
<label for="cerebras_api_key" class="form-label mt-4">Cerebras API Key</label>
12+
<input type="text" name="cerebras_api_key" value="" class="form-control" id="cerebras_api_key">
913
<!-- <div class="d-flex flex-row-reverse mt-3"><button type="button" class="btn btn-primary" id="integrations-save">Save</button></div>-->
1014
</div>
1115
</div>

refact_webgui/webgui/static/tab-settings.js

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,8 @@ function throw_int_saved_success_toast(msg) {
172172
function save_integration_api_keys() {
173173
const openai_api_key = document.getElementById('openai_api_key');
174174
const anthropic_api_key = document.getElementById('anthropic_api_key');
175+
const groq_api_key = document.getElementById('groq_api_key');
176+
const cerebras_api_key = document.getElementById('cerebras_api_key');
175177
const huggingface_api_key = document.getElementById('huggingface_api_key');
176178
fetch("/tab-settings-integrations-save", {
177179
method: "POST",
@@ -181,6 +183,8 @@ function save_integration_api_keys() {
181183
body: JSON.stringify({
182184
openai_api_key: openai_api_key.getAttribute('data-value'),
183185
anthropic_api_key: anthropic_api_key.getAttribute('data-value'),
186+
groq_api_key: groq_api_key.getAttribute('data-value'),
187+
cerebras_api_key: cerebras_api_key.getAttribute('data-value'),
184188
huggingface_api_key: huggingface_api_key.getAttribute('data-value'),
185189
})
186190
})
@@ -189,6 +193,8 @@ function save_integration_api_keys() {
189193
throw_int_saved_success_toast('API Key saved')
190194
openai_api_key.setAttribute('data-saved-value', openai_api_key.getAttribute('data-value'))
191195
anthropic_api_key.setAttribute('data-saved-value', anthropic_api_key.getAttribute('data-value'))
196+
groq_api_key.setAttribute('data-saved-value', groq_api_key.getAttribute('data-value'))
197+
cerebras_api_key.setAttribute('data-saved-value', cerebras_api_key.getAttribute('data-value'))
192198
huggingface_api_key.setAttribute('data-saved-value', huggingface_api_key.getAttribute('data-value'))
193199
});
194200
}
@@ -222,6 +228,8 @@ export function tab_settings_integrations_get() {
222228
.then(function(data) {
223229
integrations_input_init(document.getElementById('openai_api_key'), data['openai_api_key']);
224230
integrations_input_init(document.getElementById('anthropic_api_key'), data['anthropic_api_key']);
231+
integrations_input_init(document.getElementById('groq_api_key'), data['groq_api_key']);
232+
integrations_input_init(document.getElementById('cerebras_api_key'), data['cerebras_api_key']);
225233
integrations_input_init(document.getElementById('huggingface_api_key'), data['huggingface_api_key']);
226234
});
227235
}

refact_webgui/webgui/tab_models_host.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ class TabHostModelsAssign(BaseModel):
4242
# integrations
4343
openai_api_enable: bool = False
4444
anthropic_api_enable: bool = False
45+
groq_api_enable: bool = False
46+
cerebras_api_enable: bool = False
4547

4648
model_config = ConfigDict(protected_namespaces=()) # avoiding model_ namespace protection
4749

0 commit comments

Comments
 (0)