Skip to content

Commit 1973bbc

Browse files
committed
feat: support arbitrary attributes for speak provider
1 parent 052aadd commit 1973bbc

File tree

9 files changed

+115
-58
lines changed

9 files changed

+115
-58
lines changed

deepgram/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,6 @@
342342
Listen,
343343
ListenProvider,
344344
Speak,
345-
SpeakProvider,
346345
Header,
347346
Item,
348347
Properties,

deepgram/client.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,6 @@
356356
Listen,
357357
ListenProvider,
358358
Speak,
359-
SpeakProvider,
360359
Header,
361360
Item,
362361
Properties,

deepgram/clients/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,6 @@
365365
Listen,
366366
ListenProvider,
367367
Speak,
368-
SpeakProvider,
369368
Header,
370369
Item,
371370
Properties,

deepgram/clients/agent/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@
4040
Listen,
4141
ListenProvider,
4242
Speak,
43-
SpeakProvider,
4443
Header,
4544
Item,
4645
Properties,

deepgram/clients/agent/client.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@
3939
Listen as LatestListen,
4040
ListenProvider as LatestListenProvider,
4141
Speak as LatestSpeak,
42-
SpeakProvider as LatestSpeakProvider,
4342
Header as LatestHeader,
4443
Item as LatestItem,
4544
Properties as LatestProperties,
@@ -87,7 +86,6 @@
8786
Listen = LatestListen
8887
ListenProvider = LatestListenProvider
8988
Speak = LatestSpeak
90-
SpeakProvider = LatestSpeakProvider
9189
Header = LatestHeader
9290
Item = LatestItem
9391
Properties = LatestProperties

deepgram/clients/agent/v1/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444
Listen,
4545
ListenProvider,
4646
Speak,
47-
SpeakProvider,
4847
Header,
4948
Item,
5049
Properties,

deepgram/clients/agent/v1/websocket/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
Listen,
3636
ListenProvider,
3737
Speak,
38-
SpeakProvider,
3938
Header,
4039
Item,
4140
Properties,

deepgram/clients/agent/v1/websocket/options.py

Lines changed: 15 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
33
# SPDX-License-Identifier: MIT
44

5-
from typing import List, Optional, Union, Any, Tuple
5+
from typing import List, Optional, Union, Any, Tuple, Dict
66
import logging
77

88
from dataclasses import dataclass, field
@@ -167,52 +167,6 @@ class ThinkProvider(BaseResponse):
167167
)
168168

169169

170-
@dataclass
171-
class SpeakProvider(BaseResponse):
172-
"""
173-
This class defines the provider for the Speak model.
174-
"""
175-
176-
type: Optional[str] = field(default="deepgram")
177-
"""
178-
Deepgram OR OpenAI model to use.
179-
"""
180-
model: Optional[str] = field(
181-
default="aura-2-thalia-en",
182-
metadata=dataclass_config(exclude=lambda f: f is None),
183-
)
184-
"""
185-
ElevenLabs or Cartesia model to use.
186-
"""
187-
model_id: Optional[str] = field(
188-
default=None, metadata=dataclass_config(exclude=lambda f: f is None)
189-
)
190-
"""
191-
Cartesia voice configuration.
192-
"""
193-
voice: Optional[CartesiaVoice] = field(
194-
default=None, metadata=dataclass_config(exclude=lambda f: f is None)
195-
)
196-
"""
197-
Cartesia language.
198-
"""
199-
language: Optional[str] = field(
200-
default=None, metadata=dataclass_config(exclude=lambda f: f is None)
201-
)
202-
"""
203-
ElevenLabs language.
204-
"""
205-
language_code: Optional[str] = field(
206-
default=None, metadata=dataclass_config(exclude=lambda f: f is None)
207-
)
208-
209-
def __getitem__(self, key):
210-
_dict = self.to_dict()
211-
if "voice" in _dict and isinstance(_dict["voice"], dict):
212-
_dict["voice"] = CartesiaVoice.from_dict(_dict["voice"])
213-
return _dict[key]
214-
215-
216170
@dataclass
217171
class Think(BaseResponse):
218172
"""
@@ -264,15 +218,26 @@ class Speak(BaseResponse):
264218
This class defines any configuration settings for the Speak model.
265219
"""
266220

267-
provider: SpeakProvider = field(default_factory=SpeakProvider)
221+
provider: dict = field(default_factory=dict)
268222
endpoint: Optional[Endpoint] = field(
269223
default=None, metadata=dataclass_config(exclude=lambda f: f is None)
270224
)
271225

226+
def __post_init__(self):
227+
# Allow attribute-style access to provider dict
228+
class AttrDict(dict):
229+
def __getattr__(self, name):
230+
try:
231+
return self[name]
232+
except KeyError:
233+
raise AttributeError(name)
234+
def __setattr__(self, name, value):
235+
self[name] = value
236+
if not isinstance(self.provider, AttrDict):
237+
self.provider = AttrDict(self.provider)
238+
272239
def __getitem__(self, key):
273240
_dict = self.to_dict()
274-
if "provider" in _dict and isinstance(_dict["provider"], dict):
275-
_dict["provider"] = SpeakProvider.from_dict(_dict["provider"])
276241
if "endpoint" in _dict and isinstance(_dict["endpoint"], dict):
277242
_dict["endpoint"] = Endpoint.from_dict(_dict["endpoint"])
278243
return _dict[key]

examples/agent/arbitrary_keys/main.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
# Copyright 2025 Deepgram SDK contributors. All Rights Reserved.
2+
# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
3+
# SPDX-License-Identifier: MIT
4+
5+
# Import dependencies and set up the main function
6+
import requests
7+
import wave
8+
import io
9+
import time
10+
import os
11+
import json
12+
import threading
13+
from datetime import datetime
14+
15+
from deepgram import (
16+
DeepgramClient,
17+
DeepgramClientOptions,
18+
AgentWebSocketEvents,
19+
AgentKeepAlive,
20+
)
21+
from deepgram.clients.agent.v1.websocket.options import SettingsOptions
22+
23+
def main():
24+
try:
25+
# Initialize the Voice Agent
26+
api_key = os.getenv("DEEPGRAM_API_KEY")
27+
if not api_key:
28+
raise ValueError("DEEPGRAM_API_KEY environment variable is not set")
29+
print(f"API Key found:")
30+
31+
# Initialize Deepgram client
32+
config = DeepgramClientOptions(
33+
options={
34+
"keepalive": "true",
35+
# "speaker_playback": "true",
36+
},
37+
)
38+
deepgram = DeepgramClient(api_key, config)
39+
connection = deepgram.agent.websocket.v("1")
40+
print("Created WebSocket connection...")
41+
42+
# 4. Configure the Agent
43+
options = SettingsOptions()
44+
# Audio input configuration
45+
options.audio.input.encoding = "linear16"
46+
options.audio.input.sample_rate = 24000
47+
# Audio output configuration
48+
options.audio.output.encoding = "linear16"
49+
options.audio.output.sample_rate = 24000
50+
options.audio.output.container = "wav"
51+
# Agent configuration
52+
options.agent.language = "en"
53+
options.agent.listen.provider.type = "deepgram"
54+
options.agent.listen.provider.model = "nova-3"
55+
options.agent.think.provider.type = "open_ai"
56+
options.agent.think.provider.model = "gpt-4o-mini"
57+
options.agent.think.prompt = "You are a friendly AI assistant."
58+
options.agent.speak.provider.type = "deepgram"
59+
options.agent.speak.provider.model = "aura-2-thalia-en"
60+
options.agent.greeting = "Hello! How can I help you today?"
61+
options.agent.speak.provider.arbitrary_key = "test"
62+
63+
def on_welcome(self, welcome, **kwargs):
64+
print(f"Welcome message received: {welcome}")
65+
with open("chatlog.txt", 'a') as chatlog:
66+
chatlog.write(f"Welcome message: {welcome}\n")
67+
68+
def on_settings_applied(self, settings_applied, **kwargs):
69+
print(f"Settings applied: {settings_applied}")
70+
with open("chatlog.txt", 'a') as chatlog:
71+
chatlog.write(f"Settings applied: {settings_applied}\n")
72+
73+
def on_error(self, error, **kwargs):
74+
print(f"Error received: {error}")
75+
with open("chatlog.txt", 'a') as chatlog:
76+
chatlog.write(f"Error: {error}\n")
77+
78+
# Register handlers
79+
connection.on(AgentWebSocketEvents.Welcome, on_welcome)
80+
connection.on(AgentWebSocketEvents.SettingsApplied, on_settings_applied)
81+
connection.on(AgentWebSocketEvents.Error, on_error)
82+
print("Event handlers registered")
83+
84+
# Start the connection
85+
print("Starting WebSocket connection...")
86+
print(options)
87+
if not connection.start(options):
88+
print("Failed to start connection")
89+
return
90+
print("WebSocket connection started successfully")
91+
92+
# Cleanup
93+
connection.finish()
94+
print("Finished")
95+
96+
except Exception as e:
97+
print(f"Error: {str(e)}")
98+
99+
if __name__ == "__main__":
100+
main()

0 commit comments

Comments
 (0)