Skip to content

Commit 927c905

Browse files
authored
Update voice API to version 8 (#3183)
1 parent e61eb51 commit 927c905

File tree

8 files changed

+131
-27
lines changed

8 files changed

+131
-27
lines changed

src/Discord.Net.Core/DiscordConfig.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ public class DiscordConfig
2626
/// An <see cref="int"/> representing the API version that Discord.Net uses to communicate with Discord's
2727
/// voice server.
2828
/// </returns>
29-
public const int VoiceAPIVersion = 3;
29+
public const int VoiceAPIVersion = 8;
3030
/// <summary>
3131
/// Gets the Discord.Net version, including the build number.
3232
/// </summary>
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
using Newtonsoft.Json;
2+
3+
namespace Discord.API.Voice
4+
{
5+
internal class HeartbeatParams
6+
{
7+
[JsonProperty("t")]
8+
public long Timestamp { get; set; }
9+
[JsonProperty("seq_ack")]
10+
public int SequenceAck { get; set; }
11+
}
12+
}

src/Discord.Net.WebSocket/API/Voice/VoiceOpCode.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,13 @@ internal enum VoiceOpCode : byte
2222
Hello = 8,
2323
/// <summary> C←S - Used to acknowledge a resumed connection. </summary>
2424
Resumed = 9,
25+
/// <summary> C←S - One or more clients have connected to the voice channel. </summary>
26+
ClientConnect = 11,
2527
/// <summary> C←S - Used to notify that a client has disconnected. </summary>
2628
ClientDisconnect = 13,
29+
/// <summary> C←S - Contains the flags of a user that connected to voice, also sent on initial connection for each existing user. </summary>
30+
ClientFlags = 18,
31+
/// <summary> C←S - Contains the platform type of a user that connected to voice, also sent on initial connection for each existing user. </summary>
32+
ClientPlatform = 20,
2733
}
2834
}

src/Discord.Net.WebSocket/Audio/AudioClient.cs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ private async Task ProcessMessageAsync(VoiceOpCode opCode, object payload)
315315
_ssrc = data.SSRC;
316316

317317
if (!data.Modes.Contains(DiscordVoiceAPIClient.Mode))
318-
throw new InvalidOperationException($"Discord does not support {DiscordVoiceAPIClient.Mode}");
318+
throw new InvalidOperationException($"Discord does not support {DiscordVoiceAPIClient.Mode}. Available modes: {string.Join(", ", data.Modes)}");
319319

320320
ApiClient.SetUdpEndpoint(data.Ip, data.Port);
321321
await ApiClient.SendDiscoveryAsync(_ssrc).ConfigureAwait(false);
@@ -371,6 +371,9 @@ private async Task ProcessMessageAsync(VoiceOpCode opCode, object payload)
371371
await _speakingUpdatedEvent.InvokeAsync(data.UserId, data.Speaking);
372372
}
373373
break;
374+
case VoiceOpCode.ClientConnect:
375+
await _audioLogger.DebugAsync("Received ClientConnect").ConfigureAwait(false);
376+
break;
374377
case VoiceOpCode.ClientDisconnect:
375378
{
376379
await _audioLogger.DebugAsync("Received ClientDisconnect").ConfigureAwait(false);
@@ -391,6 +394,10 @@ private async Task ProcessMessageAsync(VoiceOpCode opCode, object payload)
391394
_ = _connection.CompleteAsync();
392395
}
393396
break;
397+
// Client flags and platform should be ignored: https://docs.discord.food/topics/voice-connections#client-connections
398+
case VoiceOpCode.ClientFlags:
399+
case VoiceOpCode.ClientPlatform:
400+
break;
394401
default:
395402
await _audioLogger.WarningAsync($"Unknown OpCode ({opCode})").ConfigureAwait(false);
396403
break;
@@ -513,7 +520,9 @@ private async Task RunHeartbeatAsync(int intervalMillis, CancellationToken cance
513520
_heartbeatTimes.Enqueue(now);
514521
try
515522
{
516-
await ApiClient.SendHeartbeatAsync().ConfigureAwait(false);
523+
// TODO: The last sequence number received should be sent.
524+
// https://discord.com/developers/docs/topics/voice-connections#buffered-resume
525+
await ApiClient.SendHeartbeatAsync(-1).ConfigureAwait(false);
517526
}
518527
catch (Exception ex)
519528
{

src/Discord.Net.WebSocket/Audio/Sodium/SecretBox.cs

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,54 @@
1-
using System;
21
using System.Runtime.InteropServices;
32
using System.Security;
43

54
namespace Discord.Audio
65
{
76
public unsafe static class SecretBox
87
{
9-
[DllImport("libsodium", EntryPoint = "crypto_secretbox_easy", CallingConvention = CallingConvention.Cdecl)]
10-
private static extern int SecretBoxEasy(byte* output, byte* input, long inputLength, byte[] nonce, byte[] secret);
11-
[DllImport("libsodium", EntryPoint = "crypto_secretbox_open_easy", CallingConvention = CallingConvention.Cdecl)]
12-
private static extern int SecretBoxOpenEasy(byte* output, byte* input, long inputLength, byte[] nonce, byte[] secret);
8+
[DllImport("libsodium", EntryPoint = "crypto_aead_xchacha20poly1305_ietf_encrypt", CallingConvention = CallingConvention.Cdecl)]
9+
private static extern int Encrypt(byte* ciphertext, out ulong ciphertextLength, byte* message, ulong messageLength, byte* ad, ulong adLength, byte* nsec, byte[] nonce, byte[] key);
1310

14-
public static int Encrypt(byte[] input, int inputOffset, int inputLength, byte[] output, int outputOffset, byte[] nonce, byte[] secret)
11+
[DllImport("libsodium", EntryPoint = "crypto_aead_xchacha20poly1305_ietf_decrypt", CallingConvention = CallingConvention.Cdecl)]
12+
private static extern int Decrypt(byte* plaintext, out ulong plaintextLength, byte* nsec, byte* ciphertext, ulong ciphertextLength, byte* ad, ulong adLength, byte[] nonce, byte[] key);
13+
14+
public static int Encrypt(byte[] input, int inputOffset, int inputLength, byte[] output, int outputOffset, byte[] header, byte[] nonce, byte[] key)
1515
{
1616
fixed (byte* inPtr = input)
1717
fixed (byte* outPtr = output)
18+
fixed (byte* adPtr = header)
1819
{
19-
int error = SecretBoxEasy(outPtr + outputOffset, inPtr + inputOffset, inputLength, nonce, secret);
20+
int error = Encrypt(
21+
outPtr + outputOffset, out ulong cipherLen,
22+
inPtr + inputOffset, (ulong)inputLength,
23+
adPtr, (ulong)header.Length,
24+
null, nonce, key
25+
);
26+
2027
if (error != 0)
21-
throw new SecurityException($"Sodium Error: {error}");
22-
return inputLength + 16;
28+
throw new SecurityException($"Sodium AEAD Error: {error}");
29+
30+
return (int)cipherLen;
2331
}
2432
}
25-
public static int Decrypt(byte[] input, int inputOffset, int inputLength, byte[] output, int outputOffset, byte[] nonce, byte[] secret)
33+
34+
public static int Decrypt(byte[] input, int inputOffset, int inputLength, byte[] output, int outputOffset, byte[] header, byte[] nonce, byte[] key)
2635
{
2736
fixed (byte* inPtr = input)
2837
fixed (byte* outPtr = output)
38+
fixed (byte* adPtr = header)
2939
{
30-
int error = SecretBoxOpenEasy(outPtr + outputOffset, inPtr + inputOffset, inputLength, nonce, secret);
40+
int error = Decrypt(
41+
outPtr + outputOffset, out ulong plainLen,
42+
null,
43+
inPtr + inputOffset, (ulong)inputLength,
44+
adPtr, (ulong)header.Length,
45+
nonce, key
46+
);
47+
3148
if (error != 0)
32-
throw new SecurityException($"Sodium Error: {error}");
33-
return inputLength - 16;
49+
throw new SecurityException($"Sodium AEAD Decrypt Error: {error}");
50+
51+
return (int)plainLen;
3452
}
3553
}
3654
}

src/Discord.Net.WebSocket/Audio/Streams/SodiumDecryptStream.cs

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ namespace Discord.Audio.Streams
99
/// </summary>
1010
public class SodiumDecryptStream : AudioOutStream
1111
{
12+
private const int RtpHeaderSize = 12;
13+
private const int NonceSize = 24;
14+
private const int NonceCounterSize = 4;
15+
1216
private readonly AudioClient _client;
1317
private readonly AudioStream _next;
1418
private readonly byte[] _nonce;
@@ -21,7 +25,7 @@ public SodiumDecryptStream(AudioStream next, IAudioClient client)
2125
{
2226
_next = next;
2327
_client = (AudioClient)client;
24-
_nonce = new byte[24];
28+
_nonce = new byte[NonceSize];
2529
}
2630

2731
public override Task WriteAsync(byte[] buffer, int offset, int count, CancellationToken cancelToken)
@@ -31,9 +35,27 @@ public override Task WriteAsync(byte[] buffer, int offset, int count, Cancellati
3135
if (_client.SecretKey == null)
3236
return Task.CompletedTask;
3337

34-
Buffer.BlockCopy(buffer, 0, _nonce, 0, 12); //Copy RTP header to nonce
35-
count = SecretBox.Decrypt(buffer, offset + 12, count - 12, buffer, offset + 12, _nonce, _client.SecretKey);
36-
return _next.WriteAsync(buffer, 0, count + 12, cancelToken);
38+
// Extract nonce from the payload.
39+
for (int i = 0; i < NonceCounterSize; i++ )
40+
_nonce[i] = buffer[offset + count + NonceCounterSize - i - 1]; // Big-endian to little-endian
41+
42+
// Decrypt payload
43+
byte[] rtpHeader = new byte[RtpHeaderSize];
44+
Buffer.BlockCopy(buffer, offset, rtpHeader, 0, rtpHeader.Length);
45+
int payloadOffset = offset + rtpHeader.Length;
46+
int payloadLength = count - rtpHeader.Length - NonceCounterSize;
47+
int decryptedLength = SecretBox.Decrypt(
48+
buffer,
49+
payloadOffset,
50+
payloadLength,
51+
buffer,
52+
payloadOffset,
53+
rtpHeader,
54+
_nonce,
55+
_client.SecretKey);
56+
57+
int packageLength = rtpHeader.Length + decryptedLength;
58+
return _next.WriteAsync(buffer, offset, packageLength, cancelToken);
3759
}
3860

3961
public override Task FlushAsync(CancellationToken cancelToken)

src/Discord.Net.WebSocket/Audio/Streams/SodiumEncryptStream.cs

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,23 @@ namespace Discord.Audio.Streams
99
/// </summary>
1010
public class SodiumEncryptStream : AudioOutStream
1111
{
12+
private const int RtpHeaderSize = 12;
13+
private const int NonceSize = 24;
14+
1215
private readonly AudioClient _client;
1316
private readonly AudioStream _next;
1417
private readonly byte[] _nonce;
1518
private bool _hasHeader;
1619
private ushort _nextSeq;
1720
private uint _nextTimestamp;
21+
private uint _nonceCounter;
1822

1923
public SodiumEncryptStream(AudioStream next, IAudioClient client)
2024
{
2125
_next = next;
2226
_client = (AudioClient)client;
23-
_nonce = new byte[24];
27+
_nonce = new byte[NonceSize];
28+
_nonceCounter = 0;
2429
}
2530

2631
/// <exception cref="InvalidOperationException">Header received with no payload.</exception>
@@ -46,10 +51,35 @@ public override async Task WriteAsync(byte[] buffer, int offset, int count, Canc
4651
if (_client.SecretKey == null)
4752
return;
4853

49-
Buffer.BlockCopy(buffer, offset, _nonce, 0, 12); //Copy nonce from RTP header
50-
count = SecretBox.Encrypt(buffer, offset + 12, count - 12, buffer, 12, _nonce, _client.SecretKey);
54+
// The first bytes of the nonce are the counter in big-endian.
55+
byte[] counterBytes = BitConverter.GetBytes(_nonceCounter);
56+
if (BitConverter.IsLittleEndian)
57+
Array.Reverse(counterBytes); // big-endian
58+
Buffer.BlockCopy(counterBytes, offset, _nonce, 0, counterBytes.Length);
59+
if (++_nonceCounter >= uint.MaxValue)
60+
_nonceCounter = 0;
61+
62+
// Encrypt payload
63+
byte[] rtpHeader = new byte[RtpHeaderSize];
64+
Buffer.BlockCopy(buffer, offset, rtpHeader, 0, rtpHeader.Length);
65+
int payloadOffset = offset + rtpHeader.Length;
66+
int payloadLength = count - rtpHeader.Length;
67+
int encryptedLength = SecretBox.Encrypt(
68+
buffer,
69+
payloadOffset,
70+
payloadLength,
71+
buffer,
72+
payloadOffset,
73+
rtpHeader,
74+
_nonce,
75+
_client.SecretKey);
76+
77+
// Append nonce to encripted payload
78+
Buffer.BlockCopy(counterBytes, 0, buffer, payloadOffset + encryptedLength, counterBytes.Length);
79+
int packageLength = rtpHeader.Length + encryptedLength + counterBytes.Length;
80+
5181
_next.WriteHeader(_nextSeq, _nextTimestamp, false);
52-
await _next.WriteAsync(buffer, 0, count + 12, cancelToken).ConfigureAwait(false);
82+
await _next.WriteAsync(buffer, offset, packageLength, cancelToken).ConfigureAwait(false);
5383
}
5484

5585
public override Task FlushAsync(CancellationToken cancelToken)

src/Discord.Net.WebSocket/DiscordVoiceApiClient.cs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ internal class DiscordVoiceAPIClient : IDisposable
1919
{
2020
#region DiscordVoiceAPIClient
2121
public const int MaxBitrate = 128 * 1024;
22-
public const string Mode = "xsalsa20_poly1305";
22+
public const string Mode = "aead_xchacha20_poly1305_rtpsize";
2323

2424
public event Func<string, string, double, Task> SentRequest { add { _sentRequestEvent.Add(value); } remove { _sentRequestEvent.Remove(value); } }
2525
private readonly AsyncEvent<Func<string, string, double, Task>> _sentRequestEvent = new AsyncEvent<Func<string, string, double, Task>>();
@@ -129,8 +129,15 @@ public async Task SendAsync(byte[] data, int offset, int bytes)
129129
#endregion
130130

131131
#region WebSocket
132-
public Task SendHeartbeatAsync(RequestOptions options = null)
133-
=> SendAsync(VoiceOpCode.Heartbeat, DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(), options: options);
132+
public Task SendHeartbeatAsync(int sequenceAck, RequestOptions options = null)
133+
{
134+
return SendAsync(VoiceOpCode.Heartbeat, new HeartbeatParams
135+
{
136+
Timestamp = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(),
137+
SequenceAck = sequenceAck
138+
},
139+
options: options);
140+
}
134141

135142
public Task SendIdentityAsync(ulong userId, string sessionId, string token)
136143
{

0 commit comments

Comments
 (0)