Skip to content

Commit cb7b7c8

Browse files
fix: performance issue on startup (#69)
1 parent d51cbc0 commit cb7b7c8

File tree

4 files changed

+482
-128
lines changed

4 files changed

+482
-128
lines changed

Package.resolved

Lines changed: 5 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Sources/ElevenLabs/Conversation.swift

Lines changed: 142 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ public final class Conversation: ObservableObject, RoomDelegate {
7070
throw ConversationError.alreadyActive
7171
}
7272

73+
let startTime = Date()
74+
print("[ElevenLabs-Timing] Starting conversation at \(startTime)")
75+
7376
state = .connecting
7477
self.options = options
7578

@@ -78,6 +81,8 @@ public final class Conversation: ObservableObject, RoomDelegate {
7881
self.deps = deps
7982

8083
// Acquire token / connection details
84+
let tokenFetchStart = Date()
85+
print("[ElevenLabs-Timing] Fetching token...")
8186
let connDetails: TokenService.ConnectionDetails
8287
do {
8388
connDetails = try await deps.tokenService.fetchConnectionDetails(configuration: auth)
@@ -93,14 +98,50 @@ public final class Conversation: ObservableObject, RoomDelegate {
9398
}
9499
}
95100

101+
print("[ElevenLabs-Timing] Token fetched in \(Date().timeIntervalSince(tokenFetchStart))s")
102+
96103
deps.connectionManager.onAgentReady = { [weak self, auth, options] in
97104
Task { @MainActor in
98105
guard let self else {
99106
return
100107
}
101108

102-
// Add minimal delay as safety buffer
103-
try? await Task.sleep(nanoseconds: 200_000_000) // 200ms delay
109+
print("[ElevenLabs-Timing] Agent ready callback triggered at \(Date().timeIntervalSince(startTime))s from start")
110+
111+
// Ensure room connection is fully complete before sending init
112+
// This prevents race condition where agent is ready but we can't publish data yet
113+
if let room = deps.connectionManager.room, room.connectionState == .connected {
114+
// Room is ready, proceed immediately
115+
print("[ElevenLabs-Timing] Room fully connected, proceeding...")
116+
} else {
117+
print("[ElevenLabs-Timing] Room not fully connected yet, waiting...")
118+
// Small delay to allow room connection to complete
119+
try? await Task.sleep(nanoseconds: 100_000_000) // 100ms
120+
if let room = deps.connectionManager.room, room.connectionState == .connected {
121+
print("[ElevenLabs-Timing] Room connected after wait")
122+
} else {
123+
print("[ElevenLabs-Timing] ⚠️ Room still not connected, proceeding anyway...")
124+
}
125+
}
126+
127+
print("[ElevenLabs-Timing] Sending conversation init...")
128+
129+
// Wait for data channel and agent to be fully ready (state-based, not time-based)
130+
let isReady = await self.waitForSystemReady()
131+
if isReady {
132+
print("[ElevenLabs-Timing] System confirmed ready for conversation init")
133+
// Add buffer based on whether agent was already there (fast path) or just joined
134+
let buffer = await self.determineOptimalBuffer()
135+
if buffer > 0 {
136+
print("[ElevenLabs-Timing] Adding \(Int(buffer))ms buffer for agent conversation handler readiness...")
137+
try? await Task.sleep(nanoseconds: UInt64(buffer * 1_000_000))
138+
print("[ElevenLabs-Timing] Buffer complete, sending conversation init")
139+
} else {
140+
print("[ElevenLabs-Timing] No buffer needed, sending conversation init immediately")
141+
}
142+
} else {
143+
print("[ElevenLabs-Timing] ⚠️ System readiness timeout, proceeding anyway")
144+
}
104145

105146
// Cancel any existing init attempt
106147
self.conversationInitTask?.cancel()
@@ -113,6 +154,7 @@ public final class Conversation: ObservableObject, RoomDelegate {
113154
// flip to .active once conversation init is sent
114155
self.state = .active(.init(agentId: self.extractAgentId(from: auth)))
115156
print("[ElevenLabs] State changed to active")
157+
print("[ElevenLabs-Timing] Total startup time: \(Date().timeIntervalSince(startTime))s")
116158
}
117159
}
118160

@@ -127,9 +169,12 @@ public final class Conversation: ObservableObject, RoomDelegate {
127169
}
128170

129171
// Connect room
172+
let connectionStart = Date()
173+
print("[ElevenLabs-Timing] Starting room connection...")
130174
do {
131175
try await deps.connectionManager.connect(details: connDetails,
132176
enableMic: !options.conversationOverrides.textOnly)
177+
print("[ElevenLabs-Timing] Room connected in \(Date().timeIntervalSince(connectionStart))s")
133178
} catch {
134179
// Convert connection errors to ConversationError
135180
throw ConversationError.connectionFailed(error)
@@ -400,16 +445,108 @@ public final class Conversation: ObservableObject, RoomDelegate {
400445
}
401446

402447
private func sendConversationInit(config: ConversationConfig) async throws {
448+
let initStart = Date()
403449
let initEvent = ConversationInitEvent(config: config)
404450
try await publish(.conversationInit(initEvent))
451+
print("[ElevenLabs-Timing] Conversation init sent in \(Date().timeIntervalSince(initStart))s")
452+
}
453+
454+
/// Determine optimal buffer time based on agent readiness pattern
455+
/// Different agents need different buffer times for conversation processing readiness
456+
private func determineOptimalBuffer() async -> TimeInterval {
457+
guard let room = deps?.connectionManager.room else { return 150.0 } // Default buffer if no room
458+
459+
// Check if we have any remote participants
460+
guard !room.remoteParticipants.isEmpty else {
461+
print("[ElevenLabs-Timing] No remote participants found, using longer buffer")
462+
return 200.0 // Longer wait if no agent detected
463+
}
464+
465+
// For now, we'll use a moderate buffer that should work for most cases
466+
// This is based on empirical observation that first messages arrive ~2-4s after conversation init
467+
// But we don't want to wait that long, so we'll use a compromise
468+
let buffer: TimeInterval = 150.0 // 150ms compromise between speed and reliability
469+
470+
print("[ElevenLabs-Timing] Determined optimal buffer: \(Int(buffer))ms")
471+
return buffer
472+
}
473+
474+
/// Wait for the system to be fully ready for conversation initialization
475+
/// Uses state-based detection instead of arbitrary delays
476+
private func waitForSystemReady(timeout: TimeInterval = 1.5) async -> Bool {
477+
let startTime = Date()
478+
let pollInterval: UInt64 = 50_000_000 // 50ms in nanoseconds
479+
let maxAttempts = Int(timeout * 1000 / 50) // Convert timeout to number of 50ms attempts
480+
481+
print("[ElevenLabs-Timing] Checking system readiness (state-based detection)...")
482+
483+
for attempt in 1 ... maxAttempts {
484+
// Check if we've exceeded timeout
485+
let elapsed = Date().timeIntervalSince(startTime)
486+
if elapsed > timeout {
487+
print("[ElevenLabs-Timing] System readiness timeout after \(String(format: "%.3f", elapsed))s")
488+
return false
489+
}
490+
491+
// Get room reference
492+
guard let room = deps?.connectionManager.room else {
493+
print("[ElevenLabs-Timing] Attempt \(attempt): No room available")
494+
try? await Task.sleep(nanoseconds: pollInterval)
495+
continue
496+
}
497+
498+
// Check 1: Room connection state
499+
guard room.connectionState == .connected else {
500+
print("[ElevenLabs-Timing] Attempt \(attempt): Room not connected (\(room.connectionState))")
501+
try? await Task.sleep(nanoseconds: pollInterval)
502+
continue
503+
}
504+
505+
// Check 2: Agent participant present
506+
guard !room.remoteParticipants.isEmpty else {
507+
print("[ElevenLabs-Timing] Attempt \(attempt): No remote participants")
508+
try? await Task.sleep(nanoseconds: pollInterval)
509+
continue
510+
}
511+
512+
// Check 3: Agent has published audio tracks (indicates full readiness)
513+
var agentHasAudioTrack = false
514+
for participant in room.remoteParticipants.values {
515+
if !participant.audioTracks.isEmpty {
516+
agentHasAudioTrack = true
517+
break
518+
}
519+
}
520+
521+
guard agentHasAudioTrack else {
522+
print("[ElevenLabs-Timing] Attempt \(attempt): Agent has no published audio tracks")
523+
try? await Task.sleep(nanoseconds: pollInterval)
524+
continue
525+
}
526+
527+
// Check 4: Data channel ready (test by ensuring we can publish)
528+
// We'll assume if room is connected and agent is present with tracks, data channel is ready
529+
// This is a reasonable assumption since LiveKit handles data channel setup automatically
530+
531+
print("[ElevenLabs-Timing] ✅ System ready after \(String(format: "%.3f", elapsed))s (attempt \(attempt))")
532+
print("[ElevenLabs-Timing] - Room: connected")
533+
print("[ElevenLabs-Timing] - Remote participants: \(room.remoteParticipants.count)")
534+
print("[ElevenLabs-Timing] - Agent audio tracks: confirmed")
535+
536+
return true
537+
}
538+
539+
let elapsed = Date().timeIntervalSince(startTime)
540+
print("[ElevenLabs-Timing] System readiness check exhausted after \(String(format: "%.3f", elapsed))s")
541+
return false
405542
}
406543

407544
private func sendConversationInitWithRetry(config: ConversationConfig, maxAttempts: Int = 3) async {
408545
for attempt in 1 ... maxAttempts {
409-
// Exponential backoff: 0, 0.5, 1.0 seconds
546+
// More aggressive exponential backoff: 0, 100ms, 300ms
410547
if attempt > 1 {
411-
let delay = pow(2.0, Double(attempt - 2)) * 0.5 // 0.5, 1.0 seconds
412-
print("[Retry] Attempt \(attempt) of \(maxAttempts), exponential backoff delay: \(delay)s")
548+
let delay = Double(attempt - 1) * 0.1 + Double(attempt - 2) * 0.2 // 0.1s, 0.3s
549+
print("[Retry] Attempt \(attempt) of \(maxAttempts), backoff delay: \(delay)s")
413550
try? await Task.sleep(nanoseconds: UInt64(delay * 1_000_000_000))
414551
} else {
415552
print("[Retry] Attempt \(attempt) of \(maxAttempts), no delay")

0 commit comments

Comments
 (0)