feat: Add AzureOpenAIResponsesChatGenerator (#10019)

sjrl · Amnah199 · web-flow · commit bd927da68098 · 2025-11-06T14:27:10.000+01:00
* Add working ChatGenerator * rename * Improve and add live tests * Updates * Update the tests * Fix errors * Add release notes * Add support for openai tools * Remove openai tools test that times out * fix tool calls * Update release notes * PR comments * remove edits to chat message * Add a test * PR comments * Send back reasoning to model * Fix reasoning support * Add reasoning support * Fix tests * Refactor * Simplify methods * Fix mypy * Stream responses, tool calls etc * Update docstrings * Fix errors while using in Agent * Fix call_id and fc_id * Update tests * Updates * Add extra in ToolCall and ToolCallDelta * Update streaming chunk * Fix tests and linting * Update api key resolve * PR comments * PR comments * Updates * some type fixes and also make sure to use flatten_tools_or_toolsets * fix docs * Fix streaming chunks so assistant header is properly captured * Add finish_reason and update test * Skip streaming + pydantic model test b/c of known issue in openai python sdk openai/openai-python#2305 * Fix pylint * Initial commit adding AzureOpenAIResponsesChatGenerator support * fix unit test * Starting to refactor to use new recommended way to connect to Azure OpenAI * Updates * Fix tests * More tests * fix integration tests * Add to docs * Don't need warm_up method anymore * fix unit test * Fix pylint * fix docstrings * fix mypy typing * fix reno * Add another unit test --------- Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
diff --git a/docs/pydoc/config/generators_api.yml b/docs/pydoc/config/generators_api.yml
@@ -9,6 +9,7 @@ loaders:
         "openai",
         "openai_dalle",
         "chat/azure",
+        "chat/azure_responses",
         "chat/hugging_face_local",
         "chat/hugging_face_api",
         "chat/openai",
diff --git a/docs/pydoc/config_docusaurus/generators_api.yml b/docs/pydoc/config_docusaurus/generators_api.yml
@@ -9,6 +9,7 @@ loaders:
         "openai",
         "openai_dalle",
         "chat/azure",
+        "chat/azure_responses",
         "chat/hugging_face_local",
         "chat/hugging_face_api",
         "chat/openai",
diff --git a/haystack/components/generators/chat/__init__.py b/haystack/components/generators/chat/__init__.py
@@ -11,13 +11,15 @@
     "openai": ["OpenAIChatGenerator"],
     "openai_responses": ["OpenAIResponsesChatGenerator"],
     "azure": ["AzureOpenAIChatGenerator"],
+    "azure_responses": ["AzureOpenAIResponsesChatGenerator"],
     "hugging_face_local": ["HuggingFaceLocalChatGenerator"],
     "hugging_face_api": ["HuggingFaceAPIChatGenerator"],
     "fallback": ["FallbackChatGenerator"],
 }
 
 if TYPE_CHECKING:
     from .azure import AzureOpenAIChatGenerator as AzureOpenAIChatGenerator
+    from .azure_responses import AzureOpenAIResponsesChatGenerator as AzureOpenAIResponsesChatGenerator
     from .fallback import FallbackChatGenerator as FallbackChatGenerator
     from .hugging_face_api import HuggingFaceAPIChatGenerator as HuggingFaceAPIChatGenerator
     from .hugging_face_local import HuggingFaceLocalChatGenerator as HuggingFaceLocalChatGenerator
diff --git a/haystack/components/generators/chat/azure_responses.py b/haystack/components/generators/chat/azure_responses.py
@@ -0,0 +1,234 @@
+# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+from typing import Any, Awaitable, Callable, Optional, Union
+
+from openai.lib._pydantic import to_strict_json_schema
+from pydantic import BaseModel
+
+from haystack import component, default_from_dict, default_to_dict
+from haystack.components.generators.chat import OpenAIResponsesChatGenerator
+from haystack.dataclasses.streaming_chunk import StreamingCallbackT
+from haystack.tools import ToolsType, deserialize_tools_or_toolset_inplace, serialize_tools_or_toolset
+from haystack.utils import Secret, deserialize_callable, deserialize_secrets_inplace, serialize_callable
+
+
+@component
+class AzureOpenAIResponsesChatGenerator(OpenAIResponsesChatGenerator):
+    """
+    Completes chats using OpenAI's Responses API on Azure.
+
+    It works with the gpt-5 and o-series models and supports streaming responses
+    from OpenAI API. It uses [ChatMessage](https://docs.haystack.deepset.ai/docs/chatmessage)
+    format in input and output.
+
+    You can customize how the text is generated by passing parameters to the
+    OpenAI API. Use the `**generation_kwargs` argument when you initialize
+    the component or when you run it. Any parameter that works with
+    `openai.Responses.create` will work here too.
+
+    For details on OpenAI API parameters, see
+    [OpenAI documentation](https://platform.openai.com/docs/api-reference/responses).
+
+    ### Usage example
+
+    ```python
+    from haystack.components.generators.chat import AzureOpenAIResponsesChatGenerator
+    from haystack.dataclasses import ChatMessage
+
+    messages = [ChatMessage.from_user("What's Natural Language Processing?")]
+
+    client = AzureOpenAIResponsesChatGenerator(
+        azure_endpoint="https://example-resource.azure.openai.com/",
+        generation_kwargs={"reasoning": {"effort": "low", "summary": "auto"}}
+    )
+    response = client.run(messages)
+    print(response)
+    ```
+    """
+
+    # ruff: noqa: PLR0913
+    def __init__(
+        self,
+        *,
+        api_key: Union[Secret, Callable[[], str], Callable[[], Awaitable[str]]] = Secret.from_env_var(
+            "AZURE_OPENAI_API_KEY", strict=False
+        ),
+        azure_endpoint: Optional[str] = None,
+        azure_deployment: str = "gpt-5-mini",
+        streaming_callback: Optional[StreamingCallbackT] = None,
+        organization: Optional[str] = None,
+        generation_kwargs: Optional[dict[str, Any]] = None,
+        timeout: Optional[float] = None,
+        max_retries: Optional[int] = None,
+        tools: Optional[ToolsType] = None,
+        tools_strict: bool = False,
+        http_client_kwargs: Optional[dict[str, Any]] = None,
+    ):
+        """
+        Initialize the AzureOpenAIResponsesChatGenerator component.
+
+        :param api_key: The API key to use for authentication. Can be:
+            - A `Secret` object containing the API key.
+            - A `Secret` object containing the [Azure Active Directory token](https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id).
+            - A function that returns an Azure Active Directory token.
+        :param azure_endpoint: The endpoint of the deployed model, for example `"https://example-resource.azure.openai.com/"`.
+        :param azure_deployment: The deployment of the model, usually the model name.
+        :param organization: Your organization ID, defaults to `None`. For help, see
+        [Setting up your organization](https://platform.openai.com/docs/guides/production-best-practices/setting-up-your-organization).
+        :param streaming_callback: A callback function called when a new token is received from the stream.
+            It accepts [StreamingChunk](https://docs.haystack.deepset.ai/docs/data-classes#streamingchunk)
+            as an argument.
+        :param timeout: Timeout for OpenAI client calls. If not set, it defaults to either the
+            `OPENAI_TIMEOUT` environment variable, or 30 seconds.
+        :param max_retries: Maximum number of retries to contact OpenAI after an internal error.
+            If not set, it defaults to either the `OPENAI_MAX_RETRIES` environment variable, or set to 5.
+        :param generation_kwargs: Other parameters to use for the model. These parameters are sent
+           directly to the OpenAI endpoint.
+           See OpenAI [documentation](https://platform.openai.com/docs/api-reference/responses) for
+            more details.
+            Some of the supported parameters:
+            - `temperature`: What sampling temperature to use. Higher values like 0.8 will make the output more random,
+                while lower values like 0.2 will make it more focused and deterministic.
+            - `top_p`: An alternative to sampling with temperature, called nucleus sampling, where the model
+                considers the results of the tokens with top_p probability mass. For example, 0.1 means only the tokens
+                comprising the top 10% probability mass are considered.
+            - `previous_response_id`: The ID of the previous response.
+                Use this to create multi-turn conversations.
+            - `text_format`: A JSON schema or a Pydantic model that enforces the structure of the model's response.
+                If provided, the output will always be validated against this
+                format (unless the model returns a tool call).
+                For details, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs).
+                Notes:
+                - This parameter accepts Pydantic models and JSON schemas for latest models starting from GPT-4o.
+                  Older models only support basic version of structured outputs through `{"type": "json_object"}`.
+                  For detailed information on JSON mode, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs#json-mode).
+                - For structured outputs with streaming,
+                  the `text_format` must be a JSON schema and not a Pydantic model.
+            - `reasoning`: A dictionary of parameters for reasoning. For example:
+                - `summary`: The summary of the reasoning.
+                - `effort`: The level of effort to put into the reasoning. Can be `low`, `medium` or `high`.
+                - `generate_summary`: Whether to generate a summary of the reasoning.
+                Note: OpenAI does not return the reasoning tokens, but we can view summary if its enabled.
+                For details, see the [OpenAI Reasoning documentation](https://platform.openai.com/docs/guides/reasoning).
+        :param tools:
+            A list of Tool and/or Toolset objects, or a single Toolset for which the model can prepare calls.
+        :param tools_strict:
+            Whether to enable strict schema adherence for tool calls. If set to `True`, the model will follow exactly
+            the schema provided in the `parameters` field of the tool definition, but this may increase latency.
+        :param http_client_kwargs:
+            A dictionary of keyword arguments to configure a custom `httpx.Client`or `httpx.AsyncClient`.
+            For more information, see the [HTTPX documentation](https://www.python-httpx.org/api/#client).
+        """
+        azure_endpoint = azure_endpoint or os.getenv("AZURE_OPENAI_ENDPOINT")
+        if azure_endpoint is None:
+            raise ValueError(
+                "You must provide `azure_endpoint` or set the `AZURE_OPENAI_ENDPOINT` environment variable."
+            )
+        self._azure_endpoint = azure_endpoint
+        self._azure_deployment = azure_deployment
+        super(AzureOpenAIResponsesChatGenerator, self).__init__(
+            api_key=api_key,  # type: ignore[arg-type]
+            model=self._azure_deployment,
+            streaming_callback=streaming_callback,
+            api_base_url=f"{self._azure_endpoint.rstrip('/')}/openai/v1",
+            organization=organization,
+            generation_kwargs=generation_kwargs,
+            timeout=timeout,
+            max_retries=max_retries,
+            tools=tools,
+            tools_strict=tools_strict,
+            http_client_kwargs=http_client_kwargs,
+        )
+
+    def to_dict(self) -> dict[str, Any]:
+        """
+        Serialize this component to a dictionary.
+
+        :returns:
+            The serialized component as a dictionary.
+        """
+        callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None
+
+        # API key can be a secret or a callable
+        serialized_api_key = (
+            serialize_callable(self.api_key)
+            if callable(self.api_key)
+            else self.api_key.to_dict()
+            if isinstance(self.api_key, Secret)
+            else None
+        )
+
+        # If the response format is a Pydantic model, it's converted to openai's json schema format
+        # If it's already a json schema, it's left as is
+        generation_kwargs = self.generation_kwargs.copy()
+        response_format = generation_kwargs.get("response_format")
+        if response_format and issubclass(response_format, BaseModel):
+            json_schema = {
+                "type": "json_schema",
+                "json_schema": {
+                    "name": response_format.__name__,
+                    "strict": True,
+                    "schema": to_strict_json_schema(response_format),
+                },
+            }
+            generation_kwargs["response_format"] = json_schema
+
+        # OpenAI/MCP tools are passed as list of dictionaries
+        serialized_tools: Union[dict[str, Any], list[dict[str, Any]], None]
+        if self.tools and isinstance(self.tools, list) and isinstance(self.tools[0], dict):
+            # mypy can't infer that self.tools is list[dict] here
+            serialized_tools = self.tools  # type: ignore[assignment]
+        else:
+            serialized_tools = serialize_tools_or_toolset(self.tools)  # type: ignore[arg-type]
+
+        return default_to_dict(
+            self,
+            azure_endpoint=self._azure_endpoint,
+            api_key=serialized_api_key,
+            azure_deployment=self._azure_deployment,
+            streaming_callback=callback_name,
+            organization=self.organization,
+            generation_kwargs=generation_kwargs,
+            timeout=self.timeout,
+            max_retries=self.max_retries,
+            tools=serialized_tools,
+            tools_strict=self.tools_strict,
+            http_client_kwargs=self.http_client_kwargs,
+        )
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "AzureOpenAIResponsesChatGenerator":
+        """
+        Deserialize this component from a dictionary.
+
+        :param data: The dictionary representation of this component.
+        :returns:
+            The deserialized component instance.
+        """
+        serialized_api_key = data["init_parameters"].get("api_key")
+        # If it's a dict most likely a Secret
+        if isinstance(serialized_api_key, dict):
+            deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
+        # If it's a str, most likely a callable
+        elif isinstance(serialized_api_key, str):
+            data["init_parameters"]["api_key"] = deserialize_callable(serialized_api_key)
+
+        # we only deserialize the tools if they are haystack tools
+        # because openai tools are not serialized in the same way
+        tools = data["init_parameters"].get("tools")
+        if tools and (
+            isinstance(tools, dict)
+            and tools.get("type") == "haystack.tools.toolset.Toolset"
+            or isinstance(tools, list)
+            and tools[0].get("type") == "haystack.tools.tool.Tool"
+        ):
+            deserialize_tools_or_toolset_inplace(data["init_parameters"], key="tools")
+
+        init_params = data.get("init_parameters", {})
+        serialized_callback_handler = init_params.get("streaming_callback")
+        if serialized_callback_handler:
+            data["init_parameters"]["streaming_callback"] = deserialize_callable(serialized_callback_handler)
+        return default_from_dict(cls, data)
diff --git a/releasenotes/notes/add-azure-responses-api-1b2c990a060b09f5.yaml b/releasenotes/notes/add-azure-responses-api-1b2c990a060b09f5.yaml
@@ -0,0 +1,24 @@
+---
+features:
+  - |
+    Added the `AzureOpenAIResponsesChatGenerator`, a new component that integrates Azure OpenAI's Responses API into Haystack.
+    This unlocks several advanced capabilities from the Responses API:
+    - Allowing retrieval of concise summaries of the model's reasoning process.
+    - Allowing the use of native OpenAI or MCP tool formats, along with Haystack Tool objects and Toolset instances.
+
+    Example with reasoning and web search tool:
+    ```python
+    from haystack.components.generators.chat import AzureOpenAIResponsesChatGenerator
+    from haystack.dataclasses import ChatMessage
+
+    chat_generator = AzureOpenAIResponsesChatGenerator(
+        azure_endpoint="https://example-resource.azure.openai.com/",
+        azure_deployment="gpt-5-mini",
+        generation_kwargs={"reasoning": {"effort": "low", "summary": "auto"}},
+    )
+
+    response = chat_generator.run(
+        messages=[ChatMessage.from_user("What's Natural Language Processing?")]
+    )
+    print(response["replies"][0].text)
+    ```
diff --git a/test/components/generators/chat/test_azure_responses.py b/test/components/generators/chat/test_azure_responses.py