microsoft · dluc · Mar 7, 2024 · Mar 6, 2024
@@ -138,6 +138,11 @@ public async IAsyncEnumerable<string> GenerateTextAsync(
                 foreach (var s in options.StopSequences) { openaiOptions.StopSequences.Add(s); }
             }
 
+            if (options.TokenSelectionBiases is { Count: > 0 })
+            {
+                foreach (var (token, bias) in options.TokenSelectionBiases) { openaiOptions.TokenSelectionBiases.Add(token, (int)bias); }
+            }
+
             StreamingResponse<Completions>? response = await this._client.GetCompletionsStreamingAsync(openaiOptions, cancellationToken).ConfigureAwait(false);
             await foreach (Completions? completions in response.EnumerateValues().WithCancellation(cancellationToken).ConfigureAwait(false))
             {
@@ -165,6 +170,11 @@ public async IAsyncEnumerable<string> GenerateTextAsync(
                 foreach (var s in options.StopSequences) { openaiOptions.StopSequences.Add(s); }
             }
 
+            if (options.TokenSelectionBiases is { Count: > 0 })
+            {
+                foreach (var (token, bias) in options.TokenSelectionBiases) { openaiOptions.TokenSelectionBiases.Add(token, (int)bias); }
+            }
+
             openaiOptions.Messages.Add(new ChatRequestSystemMessage(prompt));
 
             StreamingResponse<StreamingChatCompletionsUpdate>? response = await this._client.GetChatCompletionsStreamingAsync(openaiOptions, cancellationToken).ConfigureAwait(false);

@@ -106,7 +106,7 @@ public IAsyncEnumerable<string> GenerateTextAsync(
             TopP = (float)options.TopP,
             PresencePenalty = (float)options.PresencePenalty,
             FrequencyPenalty = (float)options.FrequencyPenalty,
-            AntiPrompts = options.StopSequences.ToList(),
+            AntiPrompts = options.StopSequences?.ToList() ?? new(),
             LogitBias = new(),
             // RepeatLastTokensCount = 0, // [int] last n tokens to penalize (0 = disable penalty, -1 = context size)
             // TopK = 0, // [int] The number of highest probability vocabulary tokens to keep for top-k-filtering.
@@ -121,9 +121,12 @@ public IAsyncEnumerable<string> GenerateTextAsync(
             // Grammar = null // SafeLLamaGrammarHandle
         };
 
-        foreach (KeyValuePair<int, float> b in options.TokenSelectionBiases)
+        if (options.TokenSelectionBiases is { Count: > 0 })
         {
-            settings.LogitBias!.Add(b.Key, b.Value);
+            foreach (var (token, bias) in options.TokenSelectionBiases)
+            {
+                settings.LogitBias!.Add(token, bias);
+            }
         }
 
         return executor.InferAsync(prompt, settings, cancellationToken);

@@ -119,6 +119,11 @@ public async IAsyncEnumerable<string> GenerateTextAsync(
                 foreach (var s in options.StopSequences) { openaiOptions.StopSequences.Add(s); }
             }
 
+            if (options.TokenSelectionBiases is { Count: > 0 })
+            {
+                foreach (var (token, bias) in options.TokenSelectionBiases) { openaiOptions.TokenSelectionBiases.Add(token, (int)bias); }
+            }
+
             StreamingResponse<Completions>? response = await this._client.GetCompletionsStreamingAsync(openaiOptions, cancellationToken).ConfigureAwait(false);
             await foreach (Completions? completions in response.EnumerateValues().WithCancellation(cancellationToken).ConfigureAwait(false))
             {
@@ -146,6 +151,11 @@ public async IAsyncEnumerable<string> GenerateTextAsync(
                 foreach (var s in options.StopSequences) { openaiOptions.StopSequences.Add(s); }
             }
 
+            if (options.TokenSelectionBiases is { Count: > 0 })
+            {
+                foreach (var (token, bias) in options.TokenSelectionBiases) { openaiOptions.TokenSelectionBiases.Add(token, (int)bias); }
+            }
+
             openaiOptions.Messages.Add(new ChatRequestSystemMessage(prompt));
 
             StreamingResponse<StreamingChatCompletionsUpdate>? response = await this._client.GetChatCompletionsStreamingAsync(openaiOptions, cancellationToken).ConfigureAwait(false);

@@ -337,16 +337,15 @@ private IAsyncEnumerable<string> GenerateAnswerAsync(string question, string fac
 
         prompt = prompt.Replace("{{$notFound}}", this._config.EmptyAnswer, StringComparison.OrdinalIgnoreCase);
 
-        // TODO: receive options from API: https://github.com/microsoft/kernel-memory/issues/137
         var options = new TextGenerationOptions
         {
-            // Temperature = 0,
-            // TopP = 0,
-            // PresencePenalty = 0,
-            // FrequencyPenalty = 0,
+            Temperature = this._config.Temperature,
+            TopP = this._config.TopP,
+            PresencePenalty = this._config.PresencePenalty,
+            FrequencyPenalty = this._config.FrequencyPenalty,
             MaxTokens = this._config.AnswerTokens,
-            // StopSequences = null,
-            // TokenSelectionBiases = null
+            StopSequences = this._config.StopSequences,
+            TokenSelectionBiases = this._config.TokenSelectionBiases,
         };
 
         if (this._log.IsEnabled(LogLevel.Debug))