Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 36 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

# Elevenlabs Conversational AI Swift SDK (experimental)

![convai222](https://github.com/user-attachments/assets/ca4fa726-5e98-4bbc-91b2-d055e957df7d)
Expand All @@ -8,6 +7,8 @@

Elevenlabs Conversational AI Swift SDK is a framework designed to integrate ElevenLabs' powerful conversational AI capabilities into your Swift applications. Leverage advanced audio processing and seamless WebSocket communication to create interactive and intelligent conversational voivce experiences.

For detailed documentation, visit the [ElevenLabs Swift SDK documentation](https://elevenlabs.io/docs/conversational-ai/libraries/conversational-ai-sdk-swift).

> [!NOTE]
> This library is launching to primarily support Conversational AI. The support for speech synthesis and other more generic use cases is planned for the future.

Expand Down Expand Up @@ -75,6 +76,40 @@ Add the Elevenlabs Conversational AI Swift SDK to your project using Swift Packa
}
```

### Advanced Configuration

1. Using Client Tools

```swift
var clientTools = ElevenLabsSDK.ClientTools()
clientTools.register("weather", handler: { async parameters throws -> String? in
print("Weather parameters received:", parameters)
...
})

let conversation = try await ElevenLabsSDK.Conversation.startSession(
config: config,
callbacks: callbacks,
clientTools: clientTools
)
```

2. Using Overrides

```swift
let overrides = ElevenLabsSDK.ConversationConfigOverride(
agent: ElevenLabsSDK.AgentConfig(
prompt: ElevenLabsSDK.AgentPrompt(prompt: "You are a helpful assistant"),
language: .en
)
)

let config = ElevenLabsSDK.SessionConfig(
agentId: "your-agent-id",
overrides: overrides
)
```

### Manage the Session

- End Session
Expand Down
95 changes: 90 additions & 5 deletions Sources/ElevenLabsSwift/ElevenLabsSwift.swift
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,39 @@ public class ElevenLabsSDK {
Data(base64Encoded: base64)
}

// MARK: - Client Tools

public typealias ClientToolHandler = @Sendable (Parameters) async throws -> String?

public typealias Parameters = [String: Any]

public struct ClientTools: Sendable {
private var tools: [String: ClientToolHandler] = [:]
private let lock = NSLock() // Ensure thread safety

public init() {}

public mutating func register(_ name: String, handler: @escaping @Sendable ClientToolHandler) {
lock.withLock {
tools[name] = handler
}
}

public func handle(_ name: String, parameters: Parameters) async throws -> String? {
let handler: ClientToolHandler? = lock.withLock { tools[name] }
guard let handler = handler else {
throw ClientToolError.handlerNotFound(name)
}
return try await handler(parameters)
}
}

public enum ClientToolError: Error {
case handlerNotFound(String)
case invalidParameters
case executionFailed(String)
}

// MARK: - Audio Processing

public class AudioConcatProcessor {
Expand Down Expand Up @@ -190,14 +223,14 @@ public class ElevenLabsSDK {
public let overrides: ConversationConfigOverride?
public let customLlmExtraBody: [String: LlmExtraBodyValue]?

public init(signedUrl: String, overrides: ConversationConfigOverride? = nil, customLlmExtraBody: [String: LlmExtraBodyValue]? = nil) {
public init(signedUrl: String, overrides: ConversationConfigOverride? = nil, customLlmExtraBody: [String: LlmExtraBodyValue]? = nil, clientTools _: ClientTools = ClientTools()) {
self.signedUrl = signedUrl
agentId = nil
self.overrides = overrides
self.customLlmExtraBody = customLlmExtraBody
}

public init(agentId: String, overrides: ConversationConfigOverride? = nil, customLlmExtraBody: [String: LlmExtraBodyValue]? = nil) {
public init(agentId: String, overrides: ConversationConfigOverride? = nil, customLlmExtraBody: [String: LlmExtraBodyValue]? = nil, clientTools _: ClientTools = ClientTools()) {
self.agentId = agentId
signedUrl = nil
self.overrides = overrides
Expand Down Expand Up @@ -559,6 +592,7 @@ public class ElevenLabsSDK {
private let input: Input
private let output: Output
private let callbacks: Callbacks
private let clientTools: ClientTools?

private let modeLock = NSLock()
private let statusLock = NSLock()
Expand Down Expand Up @@ -649,11 +683,12 @@ public class ElevenLabsSDK {
}
}

private init(connection: Connection, input: Input, output: Output, callbacks: Callbacks) {
private init(connection: Connection, input: Input, output: Output, callbacks: Callbacks, clientTools: ClientTools?) {
self.connection = connection
self.input = input
self.output = output
self.callbacks = callbacks
self.clientTools = clientTools

// Set the onProcess callback
audioConcatProcessor.onProcess = { [weak self] finished in
Expand All @@ -672,8 +707,9 @@ public class ElevenLabsSDK {
/// - Parameters:
/// - config: Session configuration
/// - callbacks: Callbacks for conversation events
/// - clientTools: Client tools callbacks (optional)
/// - Returns: A started `Conversation` instance
public static func startSession(config: SessionConfig, callbacks: Callbacks = Callbacks()) async throws -> Conversation {
public static func startSession(config: SessionConfig, callbacks: Callbacks = Callbacks(), clientTools: ClientTools? = nil) async throws -> Conversation {
// Step 1: Configure the audio session
try ElevenLabsSDK.configureAudioSession()

Expand All @@ -687,7 +723,7 @@ public class ElevenLabsSDK {
let output = try await Output.create(sampleRate: Double(connection.sampleRate))

// Step 5: Initialize the Conversation
let conversation = Conversation(connection: connection, input: input, output: output, callbacks: callbacks)
let conversation = Conversation(connection: connection, input: input, output: output, callbacks: callbacks, clientTools: clientTools)

// Step 6: Start the AVAudioEngine
try output.engine.start()
Expand Down Expand Up @@ -740,6 +776,9 @@ public class ElevenLabsSDK {
}

switch type {
case "client_tool_call":
handleClientToolCall(json)

case "interruption":
handleInterruptionEvent(json)

Expand Down Expand Up @@ -776,6 +815,52 @@ public class ElevenLabsSDK {
}
}

private func handleClientToolCall(_ json: [String: Any]) {
guard let toolCall = json["client_tool_call"] as? [String: Any],
let toolName = toolCall["tool_name"] as? String,
let toolCallId = toolCall["tool_call_id"] as? String,
let parameters = toolCall["parameters"] as? [String: Any]
else {
callbacks.onError("Invalid client tool call format", json)
return
}

// Serialize parameters to JSON Data for thread-safety
let serializedParameters: Data
do {
serializedParameters = try JSONSerialization.data(withJSONObject: parameters, options: [])
} catch {
callbacks.onError("Failed to serialize parameters", error)
return
}

// Execute in a Task (now safe because of serializedParameters)
Task { [toolName, toolCallId, serializedParameters] in
do {
// Deserialize within the Task to pass into clientTools.handle
let deserializedParameters = try JSONSerialization.jsonObject(with: serializedParameters) as? [String: Any] ?? [:]

let result = try await clientTools?.handle(toolName, parameters: deserializedParameters)

let response: [String: Any] = [
"type": "client_tool_result",
"tool_call_id": toolCallId,
"result": result ?? "",
"is_error": false,
]
sendWebSocketMessage(response)
} catch {
let response: [String: Any] = [
"type": "client_tool_result",
"tool_call_id": toolCallId,
"result": error.localizedDescription,
"is_error": true,
]
sendWebSocketMessage(response)
}
}
}

private func handleInterruptionEvent(_ json: [String: Any]) {
guard let event = json["interruption_event"] as? [String: Any],
let eventId = event["event_id"] as? Int else { return }
Expand Down
Loading