Search client update (#344)

marcominerva · web-flow · commit 68ee7ef2837b · 2024-03-07T09:44:52.000-08:00
## Motivation and Context (Why the change? What's the scenario?)

With the exception of `MaxTokens`, properties of the TextGenerationOptions 
object used by `AskAsync` are hard-coded.

## High level description (Approach, Design)

Complete the work started by the PR 341, updating SearchClient.cs and the
corresponding text generators to use the new LLM request settings that are
now available in the SearchClientConfig.cs.
diff --git a/extensions/AzureOpenAI/AzureOpenAITextGenerator.cs b/extensions/AzureOpenAI/AzureOpenAITextGenerator.cs
@@ -138,6 +138,11 @@ public async IAsyncEnumerable<string> GenerateTextAsync(
                 foreach (var s in options.StopSequences) { openaiOptions.StopSequences.Add(s); }
             }
 
+            if (options.TokenSelectionBiases is { Count: > 0 })
+            {
+                foreach (var (token, bias) in options.TokenSelectionBiases) { openaiOptions.TokenSelectionBiases.Add(token, (int)bias); }
+            }
+
             StreamingResponse<Completions>? response = await this._client.GetCompletionsStreamingAsync(openaiOptions, cancellationToken).ConfigureAwait(false);
             await foreach (Completions? completions in response.EnumerateValues().WithCancellation(cancellationToken).ConfigureAwait(false))
             {
@@ -165,6 +170,11 @@ public async IAsyncEnumerable<string> GenerateTextAsync(
                 foreach (var s in options.StopSequences) { openaiOptions.StopSequences.Add(s); }
             }
 
+            if (options.TokenSelectionBiases is { Count: > 0 })
+            {
+                foreach (var (token, bias) in options.TokenSelectionBiases) { openaiOptions.TokenSelectionBiases.Add(token, (int)bias); }
+            }
+
             openaiOptions.Messages.Add(new ChatRequestSystemMessage(prompt));
 
             StreamingResponse<StreamingChatCompletionsUpdate>? response = await this._client.GetChatCompletionsStreamingAsync(openaiOptions, cancellationToken).ConfigureAwait(false);
diff --git a/extensions/LlamaSharp/LlamaSharp/LlamaSharpTextGenerator.cs b/extensions/LlamaSharp/LlamaSharp/LlamaSharpTextGenerator.cs
@@ -106,7 +106,7 @@ public IAsyncEnumerable<string> GenerateTextAsync(
             TopP = (float)options.TopP,
             PresencePenalty = (float)options.PresencePenalty,
             FrequencyPenalty = (float)options.FrequencyPenalty,
-            AntiPrompts = options.StopSequences.ToList(),
+            AntiPrompts = options.StopSequences?.ToList() ?? new(),
             LogitBias = new(),
             // RepeatLastTokensCount = 0, // [int] last n tokens to penalize (0 = disable penalty, -1 = context size)
             // TopK = 0, // [int] The number of highest probability vocabulary tokens to keep for top-k-filtering.
@@ -121,9 +121,12 @@ public IAsyncEnumerable<string> GenerateTextAsync(
             // Grammar = null // SafeLLamaGrammarHandle
         };
 
-        foreach (KeyValuePair<int, float> b in options.TokenSelectionBiases)
+        if (options.TokenSelectionBiases is { Count: > 0 })
         {
-            settings.LogitBias!.Add(b.Key, b.Value);
+            foreach (var (token, bias) in options.TokenSelectionBiases)
+            {
+                settings.LogitBias!.Add(token, bias);
+            }
         }
 
         return executor.InferAsync(prompt, settings, cancellationToken);
diff --git a/extensions/OpenAI/OpenAITextGenerator.cs b/extensions/OpenAI/OpenAITextGenerator.cs
@@ -119,6 +119,11 @@ public async IAsyncEnumerable<string> GenerateTextAsync(
                 foreach (var s in options.StopSequences) { openaiOptions.StopSequences.Add(s); }
             }
 
+            if (options.TokenSelectionBiases is { Count: > 0 })
+            {
+                foreach (var (token, bias) in options.TokenSelectionBiases) { openaiOptions.TokenSelectionBiases.Add(token, (int)bias); }
+            }
+
             StreamingResponse<Completions>? response = await this._client.GetCompletionsStreamingAsync(openaiOptions, cancellationToken).ConfigureAwait(false);
             await foreach (Completions? completions in response.EnumerateValues().WithCancellation(cancellationToken).ConfigureAwait(false))
             {
@@ -146,6 +151,11 @@ public async IAsyncEnumerable<string> GenerateTextAsync(
                 foreach (var s in options.StopSequences) { openaiOptions.StopSequences.Add(s); }
             }
 
+            if (options.TokenSelectionBiases is { Count: > 0 })
+            {
+                foreach (var (token, bias) in options.TokenSelectionBiases) { openaiOptions.TokenSelectionBiases.Add(token, (int)bias); }
+            }
+
             openaiOptions.Messages.Add(new ChatRequestSystemMessage(prompt));
 
             StreamingResponse<StreamingChatCompletionsUpdate>? response = await this._client.GetChatCompletionsStreamingAsync(openaiOptions, cancellationToken).ConfigureAwait(false);
diff --git a/service/Core/Search/SearchClient.cs b/service/Core/Search/SearchClient.cs
@@ -337,16 +337,15 @@ private IAsyncEnumerable<string> GenerateAnswerAsync(string question, string fac
 
         prompt = prompt.Replace("{{$notFound}}", this._config.EmptyAnswer, StringComparison.OrdinalIgnoreCase);
 
-        // TODO: receive options from API: https://github.com/microsoft/kernel-memory/issues/137
         var options = new TextGenerationOptions
         {
-            // Temperature = 0,
-            // TopP = 0,
-            // PresencePenalty = 0,
-            // FrequencyPenalty = 0,
+            Temperature = this._config.Temperature,
+            TopP = this._config.TopP,
+            PresencePenalty = this._config.PresencePenalty,
+            FrequencyPenalty = this._config.FrequencyPenalty,
             MaxTokens = this._config.AnswerTokens,
-            // StopSequences = null,
-            // TokenSelectionBiases = null
+            StopSequences = this._config.StopSequences,
+            TokenSelectionBiases = this._config.TokenSelectionBiases,
         };
 
         if (this._log.IsEnabled(LogLevel.Debug))