From 485a1c439eda5e954ee7d259c97a1bb7f48a52b9 Mon Sep 17 00:00:00 2001 From: gdammn Date: Mon, 1 Sep 2025 18:52:53 +0200 Subject: [PATCH 01/20] add support for previous_response_id from Responses API --- docs/models/openai.md | 20 +++ pydantic_ai_slim/pydantic_ai/models/openai.py | 21 +++ ...t_openai_invalid_previous_response_id.yaml | 66 +++++++++ .../test_openai_previous_response_id.yaml | 131 ++++++++++++++++++ tests/models/test_openai_responses.py | 38 +++++ tests/test_examples.py | 4 + 6 files changed, 280 insertions(+) create mode 100644 tests/models/cassettes/test_openai_responses/test_openai_invalid_previous_response_id.yaml create mode 100644 tests/models/cassettes/test_openai_responses/test_openai_previous_response_id.yaml diff --git a/docs/models/openai.md b/docs/models/openai.md index 86febbcc17..4ac3683d78 100644 --- a/docs/models/openai.md +++ b/docs/models/openai.md @@ -143,6 +143,26 @@ As of 7:48 AM on Wednesday, April 2, 2025, in Tokyo, Japan, the weather is cloud You can learn more about the differences between the Responses API and Chat Completions API in the [OpenAI API docs](https://platform.openai.com/docs/guides/responses-vs-chat-completions). +The Responses API also supports referencing earlier model responses in a new request. This is available through the `openai_previous_response_id` field in +[`OpenAIResponsesModelSettings`][pydantic_ai.models.openai.OpenAIResponsesModelSettings]. + +```python +from pydantic_ai import Agent +from pydantic_ai.models.openai import OpenAIResponsesModel, OpenAIResponsesModelSettings + +model = OpenAIResponsesModel('gpt-4o') +agent = Agent(model=model) + +result = agent.run_sync('The secret is 1234') +model_settings = OpenAIResponsesModelSettings( + openai_previous_response_id=result.all_messages()[-1].provider_response_id +) +result = agent.run_sync('What is the secret code?', model_settings=model_settings) +print(result.output) +#> 1234 +``` +By passing the `provider_response_id` from an earlier run, you can allow the model to build on its own prior reasoning without needing to resend the full message history. + ## OpenAI-compatible Models Many providers and models are compatible with the OpenAI API, and can be used with `OpenAIChatModel` in Pydantic AI. diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py index 3249aab7a4..e9f8310ba0 100644 --- a/pydantic_ai_slim/pydantic_ai/models/openai.py +++ b/pydantic_ai_slim/pydantic_ai/models/openai.py @@ -222,6 +222,13 @@ class OpenAIResponsesModelSettings(OpenAIChatModelSettings, total=False): `medium`, and `high`. """ + openai_previous_response_id: str + """The identifier of the most recent response to include in the API request. + + This enables the model to reference previous reasoning traces. + See the [OpenAI Responses API documentation](https://platform.openai.com/docs/guides/responses) for more information. + """ + @dataclass(init=False) class OpenAIChatModel(Model): @@ -977,6 +984,18 @@ async def _responses_create( else: tool_choice = 'auto' + previous_response_id: str | None = None + for message in reversed(messages): + # Instead of sending the full message history, get provider_response_id + # (openai-compatible) from the latest matching ModelResponse and + # pass it to the next ModelRequest as previous_response_id to preserve context. + # Since the full history isn't needed, only the latest message is kept. + if isinstance(message, ModelResponse) and message.model_name: + if self._model_name in message.model_name: + previous_response_id = message.provider_response_id + messages = [messages[-1]] + break + instructions, openai_messages = await self._map_messages(messages, model_settings) reasoning = self._get_reasoning(model_settings) @@ -1027,6 +1046,8 @@ async def _responses_create( truncation=model_settings.get('openai_truncation', NOT_GIVEN), timeout=model_settings.get('timeout', NOT_GIVEN), service_tier=model_settings.get('openai_service_tier', NOT_GIVEN), + previous_response_id=previous_response_id + or model_settings.get('openai_previous_response_id', NOT_GIVEN), reasoning=reasoning, user=model_settings.get('openai_user', NOT_GIVEN), text=text or NOT_GIVEN, diff --git a/tests/models/cassettes/test_openai_responses/test_openai_invalid_previous_response_id.yaml b/tests/models/cassettes/test_openai_responses/test_openai_invalid_previous_response_id.yaml new file mode 100644 index 0000000000..345369eb40 --- /dev/null +++ b/tests/models/cassettes/test_openai_responses/test_openai_invalid_previous_response_id.yaml @@ -0,0 +1,66 @@ +interactions: +- request: + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-type: + - application/json + host: + - api.openai.com + method: POST + parsed_body: + input: + - content: What is the secret key again? + role: user + instructions: '' + model: gpt-5 + text: + format: + type: text + uri: https://api.openai.com/v1/responses + response: + headers: + content-type: + - application/json + parsed_body: + created_at: 1743075630 + error: null + id: resp_1234 + incomplete_details: null + instructions: '' + max_output_tokens: null + metadata: {} + model: gpt-5 + object: response + output: + - content: + - annotations: [] + text: "sesame" + type: output_text + id: msg_test_invalid_previous_response_id + role: assistant + status: completed + type: message + parallel_tool_calls: true + previous_response_id: null + reasoning: null + status: complete + status_details: null + tool_calls: null + total_tokens: 15 + usage: + input_tokens: 10 + input_tokens_details: + cached_tokens: 0 + output_tokens: 1 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 11 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/models/cassettes/test_openai_responses/test_openai_previous_response_id.yaml b/tests/models/cassettes/test_openai_responses/test_openai_previous_response_id.yaml new file mode 100644 index 0000000000..aa5aabd789 --- /dev/null +++ b/tests/models/cassettes/test_openai_responses/test_openai_previous_response_id.yaml @@ -0,0 +1,131 @@ +interactions: +- request: + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-type: + - application/json + host: + - api.openai.com + method: POST + parsed_body: + input: + - content: The secret key is sesame + role: user + instructions: '' + model: gpt-5 + text: + format: + type: text + uri: https://api.openai.com/v1/responses + response: + headers: + content-type: + - application/json + parsed_body: + created_at: 1743075629 + error: null + id: resp_1234 + incomplete_details: null + instructions: '' + max_output_tokens: null + metadata: {} + model: gpt-5 + object: response + output: + - content: + - annotations: [] + text: "Open sesame! What would you like to unlock?" + type: output_text + id: msg_test_previous_response_id + role: assistant + status: completed + type: message + parallel_tool_calls: true + previous_response_id: null + reasoning: null + status: complete + status_details: null + tool_calls: null + total_tokens: 15 + usage: + input_tokens: 10 + input_tokens_details: + cached_tokens: 0 + output_tokens: 1 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 11 + status: + code: 200 + message: OK +- request: + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-type: + - application/json + host: + - api.openai.com + method: POST + parsed_body: + input: + - content: What is the secret key again? + role: user + instructions: '' + model: gpt-5 + text: + format: + type: text + previous_response_id: resp_1234 + uri: https://api.openai.com/v1/responses + response: + headers: + content-type: + - application/json + parsed_body: + created_at: 1743075630 + error: null + id: resp_5678 + incomplete_details: null + instructions: '' + max_output_tokens: null + metadata: {} + model: gpt-5 + object: response + output: + - content: + - annotations: [] + text: "sesame" + type: output_text + id: msg_test_previous_response_id + role: assistant + status: completed + type: message + parallel_tool_calls: true + previous_response_id: resp_1234 + reasoning: null + status: complete + status_details: null + tool_calls: null + total_tokens: 15 + usage: + input_tokens: 10 + input_tokens_details: + cached_tokens: 0 + output_tokens: 1 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 11 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/models/test_openai_responses.py b/tests/models/test_openai_responses.py index 5c44bdd246..a9b15ebe52 100644 --- a/tests/models/test_openai_responses.py +++ b/tests/models/test_openai_responses.py @@ -1180,6 +1180,44 @@ async def test_openai_responses_verbosity(allow_model_requests: None, openai_api assert result.output == snapshot('4') +@pytest.mark.vcr() +async def test_openai_previous_response_id(allow_model_requests: None, openai_api_key: str): + """Test if previous responses are detected via previous_response_id in settings""" + model = OpenAIResponsesModel('gpt-5', provider=OpenAIProvider(api_key=openai_api_key)) + agent = Agent(model=model) + result = await agent.run('The secret key is sesame') + settings = OpenAIResponsesModelSettings(openai_previous_response_id=result.all_messages()[-1].provider_response_id) # type: ignore + result = await agent.run('What is the secret code?', model_settings=settings) + assert result.output == snapshot('sesame') + + +@pytest.mark.vcr() +async def test_openai_invalid_previous_response_id(allow_model_requests: None, openai_api_key: str): + """Test if invalid previous response id is ignored when history contains non-OpenAI responses""" + history = [ + ModelRequest( + parts=[ + UserPromptPart( + content='The secret key is sesame', + ), + ], + ), + ModelResponse( + parts=[ + TextPart(content='Open sesame! What would you like to unlock?'), + ], + model_name='claude-3-5-sonnet-latest', + provider_name='anthropic', + provider_response_id='msg_1234', + ), + ] + + model = OpenAIResponsesModel('gpt-5', provider=OpenAIProvider(api_key=openai_api_key)) + agent = Agent(model=model) + result = await agent.run('What is the secret code?', message_history=history) + assert result.output == snapshot('sesame') + + async def test_openai_responses_usage_without_tokens_details(allow_model_requests: None): c = response_message( [ diff --git a/tests/test_examples.py b/tests/test_examples.py index 145b633b7f..86984d4971 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -622,6 +622,10 @@ async def model_logic( # noqa: C901 return ModelResponse(parts=list(response)) else: return ModelResponse(parts=[response]) + elif m.content == 'The secret is 1234': + return ModelResponse(parts=[TextPart('The secret is safe with me')]) + elif m.content == 'What is the secret code?': + return ModelResponse(parts=[TextPart('1234')]) elif isinstance(m, ToolReturnPart) and m.tool_name == 'roulette_wheel': win = m.content == 'winner' From 0027f6cb370bef5b81c4ca8897a1277ba797a74b Mon Sep 17 00:00:00 2001 From: gdammn Date: Mon, 1 Sep 2025 23:37:37 +0200 Subject: [PATCH 02/20] update link --- pydantic_ai_slim/pydantic_ai/models/openai.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py index e9f8310ba0..8ed5b88923 100644 --- a/pydantic_ai_slim/pydantic_ai/models/openai.py +++ b/pydantic_ai_slim/pydantic_ai/models/openai.py @@ -226,7 +226,8 @@ class OpenAIResponsesModelSettings(OpenAIChatModelSettings, total=False): """The identifier of the most recent response to include in the API request. This enables the model to reference previous reasoning traces. - See the [OpenAI Responses API documentation](https://platform.openai.com/docs/guides/responses) for more information. + See the [OpenAI Responses API documentation](https://platform.openai.com/docs/guides/reasoning#keeping-reasoning-items-in-context) + for more information. """ From 22eefe9a89ce194f0fd207c395538af2fa859ceb Mon Sep 17 00:00:00 2001 From: gdammn Date: Thu, 4 Sep 2025 19:52:19 +0200 Subject: [PATCH 03/20] update logic and docs --- docs/models/openai.md | 2 + pydantic_ai_slim/pydantic_ai/models/openai.py | 42 +++++-- ...t_openai_invalid_previous_response_id.yaml | 66 ----------- tests/models/test_openai_responses.py | 111 ++++++++++++++++-- 4 files changed, 134 insertions(+), 87 deletions(-) delete mode 100644 tests/models/cassettes/test_openai_responses/test_openai_invalid_previous_response_id.yaml diff --git a/docs/models/openai.md b/docs/models/openai.md index 4ac3683d78..9bd4cbcf5a 100644 --- a/docs/models/openai.md +++ b/docs/models/openai.md @@ -162,6 +162,8 @@ print(result.output) #> 1234 ``` By passing the `provider_response_id` from an earlier run, you can allow the model to build on its own prior reasoning without needing to resend the full message history. +If message history is provided and all responses come from the same openai model, +only the latest request and the `previous_response_id` from the latest response are sent to the server for efficiency. ## OpenAI-compatible Models diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py index 8ed5b88923..a6e6dd28c8 100644 --- a/pydantic_ai_slim/pydantic_ai/models/openai.py +++ b/pydantic_ai_slim/pydantic_ai/models/openai.py @@ -985,17 +985,9 @@ async def _responses_create( else: tool_choice = 'auto' - previous_response_id: str | None = None - for message in reversed(messages): - # Instead of sending the full message history, get provider_response_id - # (openai-compatible) from the latest matching ModelResponse and - # pass it to the next ModelRequest as previous_response_id to preserve context. - # Since the full history isn't needed, only the latest message is kept. - if isinstance(message, ModelResponse) and message.model_name: - if self._model_name in message.model_name: - previous_response_id = message.provider_response_id - messages = [messages[-1]] - break + previous_response_id = model_settings.get('openai_previous_response_id') + if not previous_response_id: + messages, previous_response_id = self._get_response_id_and_trim(messages) instructions, openai_messages = await self._map_messages(messages, model_settings) reasoning = self._get_reasoning(model_settings) @@ -1047,8 +1039,7 @@ async def _responses_create( truncation=model_settings.get('openai_truncation', NOT_GIVEN), timeout=model_settings.get('timeout', NOT_GIVEN), service_tier=model_settings.get('openai_service_tier', NOT_GIVEN), - previous_response_id=previous_response_id - or model_settings.get('openai_previous_response_id', NOT_GIVEN), + previous_response_id=previous_response_id, reasoning=reasoning, user=model_settings.get('openai_user', NOT_GIVEN), text=text or NOT_GIVEN, @@ -1114,6 +1105,31 @@ def _map_tool_definition(self, f: ToolDefinition) -> responses.FunctionToolParam ), } + def _get_response_id_and_trim( + self, messages: list[ModelMessage] + ) -> tuple[list[ModelMessage], str | NotGiven | None]: + # If the message history contains only openai responses, + # we can limit the history to the most recent ModelRequest. + # The provider_response_id from the latest ModelResponse is + # then passed as previous_response_id to preserve context. + response_id = NOT_GIVEN + latest_model_response: list[ModelMessage] = [] + for m in messages: + # Openai may return a dated model_name that differs from self.model_name + # (e.g., "gpt-5" vs "gpt-5-2025-08-07"). + if isinstance(m, ModelResponse) and (self.model_name in m.model_name): # type: ignore + response_id = m.provider_response_id + elif isinstance(m, ModelRequest): + latest_model_response = [m] + else: + # Mixed model responses invalidate response_id, + # so the history is kept intact. + response_id = NOT_GIVEN + break + if response_id: + messages = latest_model_response + return messages, response_id + async def _map_messages( # noqa: C901 self, messages: list[ModelMessage], model_settings: OpenAIResponsesModelSettings ) -> tuple[str | NotGiven, list[responses.ResponseInputItemParam]]: diff --git a/tests/models/cassettes/test_openai_responses/test_openai_invalid_previous_response_id.yaml b/tests/models/cassettes/test_openai_responses/test_openai_invalid_previous_response_id.yaml deleted file mode 100644 index 345369eb40..0000000000 --- a/tests/models/cassettes/test_openai_responses/test_openai_invalid_previous_response_id.yaml +++ /dev/null @@ -1,66 +0,0 @@ -interactions: -- request: - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-type: - - application/json - host: - - api.openai.com - method: POST - parsed_body: - input: - - content: What is the secret key again? - role: user - instructions: '' - model: gpt-5 - text: - format: - type: text - uri: https://api.openai.com/v1/responses - response: - headers: - content-type: - - application/json - parsed_body: - created_at: 1743075630 - error: null - id: resp_1234 - incomplete_details: null - instructions: '' - max_output_tokens: null - metadata: {} - model: gpt-5 - object: response - output: - - content: - - annotations: [] - text: "sesame" - type: output_text - id: msg_test_invalid_previous_response_id - role: assistant - status: completed - type: message - parallel_tool_calls: true - previous_response_id: null - reasoning: null - status: complete - status_details: null - tool_calls: null - total_tokens: 15 - usage: - input_tokens: 10 - input_tokens_details: - cached_tokens: 0 - output_tokens: 1 - output_tokens_details: - reasoning_tokens: 0 - total_tokens: 11 - status: - code: 200 - message: OK -version: 1 diff --git a/tests/models/test_openai_responses.py b/tests/models/test_openai_responses.py index a9b15ebe52..5d3e89f9db 100644 --- a/tests/models/test_openai_responses.py +++ b/tests/models/test_openai_responses.py @@ -34,7 +34,7 @@ from ..conftest import IsDatetime, IsStr, TestEnv, try_import from ..parts_from_messages import part_types_from_messages -from .mock_openai import MockOpenAIResponses, response_message +from .mock_openai import NOT_GIVEN, MockOpenAIResponses, response_message with try_import() as imports_successful: from openai.types.responses.response_output_message import Content, ResponseOutputMessage, ResponseOutputText @@ -1191,14 +1191,13 @@ async def test_openai_previous_response_id(allow_model_requests: None, openai_ap assert result.output == snapshot('sesame') -@pytest.mark.vcr() -async def test_openai_invalid_previous_response_id(allow_model_requests: None, openai_api_key: str): +async def test_previous_response_id_mixed_model_history(allow_model_requests: None, openai_api_key: str): """Test if invalid previous response id is ignored when history contains non-OpenAI responses""" history = [ ModelRequest( parts=[ UserPromptPart( - content='The secret key is sesame', + content='The first secret key is sesame', ), ], ), @@ -1206,16 +1205,112 @@ async def test_openai_invalid_previous_response_id(allow_model_requests: None, o parts=[ TextPart(content='Open sesame! What would you like to unlock?'), ], + model_name='gpt-5', + provider_name='openai', + provider_response_id='resp_68b9bd97025c8195b443af591ca2345c08cb6072affe6099', + ), + ModelRequest( + parts=[ + UserPromptPart( + content='The second secret key is olives', + ), + ], + ), + ModelResponse( + parts=[ + TextPart(content='Understood'), + ], model_name='claude-3-5-sonnet-latest', provider_name='anthropic', - provider_response_id='msg_1234', + provider_response_id='msg_01XUQuedGz9gusk4xZm4gWJj', + ), + ModelRequest( + parts=[ + UserPromptPart( + content='what is the first secret key?', + ), + ], ), ] model = OpenAIResponsesModel('gpt-5', provider=OpenAIProvider(api_key=openai_api_key)) - agent = Agent(model=model) - result = await agent.run('What is the secret code?', message_history=history) - assert result.output == snapshot('sesame') + history, previous_response_id = model._get_response_id_and_trim(history) # type: ignore + assert previous_response_id == NOT_GIVEN + assert history == snapshot( + [ + ModelRequest(parts=[UserPromptPart(content='The first secret key is sesame', timestamp=IsDatetime())]), + ModelResponse( + parts=[TextPart(content='Open sesame! What would you like to unlock?')], + usage=RequestUsage(), + model_name='gpt-5', + timestamp=IsDatetime(), + provider_name='openai', + provider_response_id='resp_68b9bd97025c8195b443af591ca2345c08cb6072affe6099', + ), + ModelRequest(parts=[UserPromptPart(content='The second secret key is olives', timestamp=IsDatetime())]), + ModelResponse( + parts=[TextPart(content='Understood')], + usage=RequestUsage(), + model_name='claude-3-5-sonnet-latest', + timestamp=IsDatetime(), + provider_name='anthropic', + provider_response_id='msg_01XUQuedGz9gusk4xZm4gWJj', + ), + ModelRequest(parts=[UserPromptPart(content='what is the first secret key?', timestamp=IsDatetime())]), + ] + ) + + +async def test_previous_response_id_same_model_history(allow_model_requests: None, openai_api_key: str): + """Test if message history is trimmed when model responses are from same model""" + history = [ + ModelRequest( + parts=[ + UserPromptPart( + content='The first secret key is sesame', + ), + ], + ), + ModelResponse( + parts=[ + TextPart(content='Open sesame! What would you like to unlock?'), + ], + model_name='gpt-5', + provider_name='openai', + provider_response_id='resp_68b9bd97025c8195b443af591ca2345c08cb6072affe6099', + ), + ModelRequest( + parts=[ + UserPromptPart( + content='The second secret key is olives', + ), + ], + ), + ModelResponse( + parts=[ + TextPart(content='Understood'), + ], + model_name='gpt-5', + provider_name='openai', + provider_response_id='resp_68b9bda81f5c8197a5a51a20a9f4150a000497db2a4c777b', + ), + ModelRequest( + parts=[ + UserPromptPart( + content='what is the first secret key?', + ), + ], + ), + ] + + model = OpenAIResponsesModel('gpt-5', provider=OpenAIProvider(api_key=openai_api_key)) + history, previous_response_id = model._get_response_id_and_trim(history) # type: ignore + assert previous_response_id == 'resp_68b9bda81f5c8197a5a51a20a9f4150a000497db2a4c777b' + assert history == snapshot( + [ + ModelRequest(parts=[UserPromptPart(content='what is the first secret key?', timestamp=IsDatetime())]), + ] + ) async def test_openai_responses_usage_without_tokens_details(allow_model_requests: None): From 68da416d9fe513285f35aad744f7e298e2a59386 Mon Sep 17 00:00:00 2001 From: gdammn Date: Thu, 4 Sep 2025 20:04:28 +0200 Subject: [PATCH 04/20] update import in test --- tests/models/test_openai_responses.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/models/test_openai_responses.py b/tests/models/test_openai_responses.py index 5d3e89f9db..d19cc54117 100644 --- a/tests/models/test_openai_responses.py +++ b/tests/models/test_openai_responses.py @@ -4,6 +4,7 @@ import pytest from inline_snapshot import snapshot +from openai import NOT_GIVEN from pydantic import BaseModel from typing_extensions import TypedDict @@ -34,7 +35,7 @@ from ..conftest import IsDatetime, IsStr, TestEnv, try_import from ..parts_from_messages import part_types_from_messages -from .mock_openai import NOT_GIVEN, MockOpenAIResponses, response_message +from .mock_openai import MockOpenAIResponses, response_message with try_import() as imports_successful: from openai.types.responses.response_output_message import Content, ResponseOutputMessage, ResponseOutputText From 51b1f0a73ce806359f8becfe450a4066c7f43b2e Mon Sep 17 00:00:00 2001 From: gdammn Date: Thu, 4 Sep 2025 21:14:27 +0200 Subject: [PATCH 05/20] fix test --- tests/models/test_openai_responses.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/models/test_openai_responses.py b/tests/models/test_openai_responses.py index d19cc54117..8ffc4401a6 100644 --- a/tests/models/test_openai_responses.py +++ b/tests/models/test_openai_responses.py @@ -4,7 +4,6 @@ import pytest from inline_snapshot import snapshot -from openai import NOT_GIVEN from pydantic import BaseModel from typing_extensions import TypedDict @@ -1236,7 +1235,7 @@ async def test_previous_response_id_mixed_model_history(allow_model_requests: No model = OpenAIResponsesModel('gpt-5', provider=OpenAIProvider(api_key=openai_api_key)) history, previous_response_id = model._get_response_id_and_trim(history) # type: ignore - assert previous_response_id == NOT_GIVEN + assert not previous_response_id assert history == snapshot( [ ModelRequest(parts=[UserPromptPart(content='The first secret key is sesame', timestamp=IsDatetime())]), From 9f7736bd59e241166eca9bfcd38f71f37d881dd7 Mon Sep 17 00:00:00 2001 From: gdammn Date: Fri, 5 Sep 2025 18:54:22 +0200 Subject: [PATCH 06/20] add fixes for types --- pydantic_ai_slim/pydantic_ai/models/openai.py | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py index a6e6dd28c8..343b123c5e 100644 --- a/pydantic_ai_slim/pydantic_ai/models/openai.py +++ b/pydantic_ai_slim/pydantic_ai/models/openai.py @@ -1105,29 +1105,30 @@ def _map_tool_definition(self, f: ToolDefinition) -> responses.FunctionToolParam ), } - def _get_response_id_and_trim( - self, messages: list[ModelMessage] - ) -> tuple[list[ModelMessage], str | NotGiven | None]: + def _get_response_id_and_trim(self, messages: list[ModelMessage]) -> tuple[list[ModelMessage], str | None]: # If the message history contains only openai responses, # we can limit the history to the most recent ModelRequest. # The provider_response_id from the latest ModelResponse is # then passed as previous_response_id to preserve context. - response_id = NOT_GIVEN - latest_model_response: list[ModelMessage] = [] + response_id = None + latest_model_request: ModelRequest | None = None for m in messages: # Openai may return a dated model_name that differs from self.model_name # (e.g., "gpt-5" vs "gpt-5-2025-08-07"). - if isinstance(m, ModelResponse) and (self.model_name in m.model_name): # type: ignore - response_id = m.provider_response_id + if isinstance(m, ModelResponse) and m.model_name: + if self.model_name in m.model_name: + response_id = m.provider_response_id + else: + # Mixed model responses invalidate response_id, + # so the history is kept intact. + response_id = None + break elif isinstance(m, ModelRequest): - latest_model_response = [m] + latest_model_request = m else: - # Mixed model responses invalidate response_id, - # so the history is kept intact. - response_id = NOT_GIVEN - break - if response_id: - messages = latest_model_response + pass + if response_id and latest_model_request: + messages = [latest_model_request] return messages, response_id async def _map_messages( # noqa: C901 From a0468add178597ddcd794ca33b6b92d0ea9682d6 Mon Sep 17 00:00:00 2001 From: Forge <64839751+GDaamn@users.noreply.github.com> Date: Fri, 5 Sep 2025 18:55:20 +0200 Subject: [PATCH 07/20] Update docs Co-authored-by: Douwe Maan --- docs/models/openai.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/models/openai.md b/docs/models/openai.md index 9bd4cbcf5a..5b969e1ecf 100644 --- a/docs/models/openai.md +++ b/docs/models/openai.md @@ -161,9 +161,11 @@ result = agent.run_sync('What is the secret code?', model_settings=model_setting print(result.output) #> 1234 ``` + By passing the `provider_response_id` from an earlier run, you can allow the model to build on its own prior reasoning without needing to resend the full message history. -If message history is provided and all responses come from the same openai model, -only the latest request and the `previous_response_id` from the latest response are sent to the server for efficiency. + +If message history is provided and all responses come from the same OpenAI model, +Pydantic AI will automatically only send the the latest request and the `previous_response_id` from the latest response to the API for efficiency. ## OpenAI-compatible Models From e7d904b08531ed4b7627fbeff8143c8a9cc3bd26 Mon Sep 17 00:00:00 2001 From: gdammn Date: Fri, 5 Sep 2025 22:49:16 +0200 Subject: [PATCH 08/20] update conditional check --- pydantic_ai_slim/pydantic_ai/models/openai.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py index 343b123c5e..8c1bfbefb7 100644 --- a/pydantic_ai_slim/pydantic_ai/models/openai.py +++ b/pydantic_ai_slim/pydantic_ai/models/openai.py @@ -1115,18 +1115,15 @@ def _get_response_id_and_trim(self, messages: list[ModelMessage]) -> tuple[list[ for m in messages: # Openai may return a dated model_name that differs from self.model_name # (e.g., "gpt-5" vs "gpt-5-2025-08-07"). - if isinstance(m, ModelResponse) and m.model_name: - if self.model_name in m.model_name: - response_id = m.provider_response_id - else: - # Mixed model responses invalidate response_id, - # so the history is kept intact. - response_id = None - break + if isinstance(m, ModelResponse) and m.model_name and (self.model_name in m.model_name): + response_id = m.provider_response_id elif isinstance(m, ModelRequest): latest_model_request = m else: - pass + # Mixed model responses invalidate response_id, + # so the history is kept intact. + response_id = None + break if response_id and latest_model_request: messages = [latest_model_request] return messages, response_id From dee583d9cce63e397d2e10ef9e4138553b9939d2 Mon Sep 17 00:00:00 2001 From: Forge <64839751+GDaamn@users.noreply.github.com> Date: Sat, 6 Sep 2025 00:05:41 +0200 Subject: [PATCH 09/20] update conditional check set response_id=None when there is a valid response_id but latest_model_request doesn't exist Co-authored-by: Douwe Maan --- pydantic_ai_slim/pydantic_ai/models/openai.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py index 8c1bfbefb7..7314dc8194 100644 --- a/pydantic_ai_slim/pydantic_ai/models/openai.py +++ b/pydantic_ai_slim/pydantic_ai/models/openai.py @@ -1125,8 +1125,9 @@ def _get_response_id_and_trim(self, messages: list[ModelMessage]) -> tuple[list[ response_id = None break if response_id and latest_model_request: - messages = [latest_model_request] - return messages, response_id + return [latest_model_request], response_id + else: + return messages, None async def _map_messages( # noqa: C901 self, messages: list[ModelMessage], model_settings: OpenAIResponsesModelSettings From 09f4f8d4510cc426750775a9d9ccd3a3c6d53440 Mon Sep 17 00:00:00 2001 From: gdammn Date: Wed, 10 Sep 2025 14:53:50 +0200 Subject: [PATCH 10/20] update tests and docs --- docs/models/openai.md | 30 ++++++++- pydantic_ai_slim/pydantic_ai/models/openai.py | 14 ++-- ...openai_previous_response_id_auto_mode.yaml | 67 +++++++++++++++++++ tests/models/test_openai_responses.py | 47 ++++++++++++- 4 files changed, 150 insertions(+), 8 deletions(-) create mode 100644 tests/models/cassettes/test_openai_responses/test_openai_previous_response_id_auto_mode.yaml diff --git a/docs/models/openai.md b/docs/models/openai.md index 5b969e1ecf..0e02ad1567 100644 --- a/docs/models/openai.md +++ b/docs/models/openai.md @@ -143,6 +143,8 @@ As of 7:48 AM on Wednesday, April 2, 2025, in Tokyo, Japan, the weather is cloud You can learn more about the differences between the Responses API and Chat Completions API in the [OpenAI API docs](https://platform.openai.com/docs/guides/responses-vs-chat-completions). +#### Referencing earlier responses + The Responses API also supports referencing earlier model responses in a new request. This is available through the `openai_previous_response_id` field in [`OpenAIResponsesModelSettings`][pydantic_ai.models.openai.OpenAIResponsesModelSettings]. @@ -164,8 +166,32 @@ print(result.output) By passing the `provider_response_id` from an earlier run, you can allow the model to build on its own prior reasoning without needing to resend the full message history. -If message history is provided and all responses come from the same OpenAI model, -Pydantic AI will automatically only send the the latest request and the `previous_response_id` from the latest response to the API for efficiency. +Alternatively, `openai_previous_response_id` field also supports `auto` mode. When enabled, Pydantic AI automatically selects the latest request and the most recent `provider_response_id` from message history to send to OpenAI API, leveraging server-side history instead, for improved efficiency. If `openai_previous_response_id` is not set, full history is sent. + +```python +from pydantic_ai import Agent +from pydantic_ai.models.openai import OpenAIResponsesModel, OpenAIResponsesModelSettings + +model = OpenAIResponsesModel('gpt-4o') +agent = Agent(model=model) + +result1 = agent.run_sync('Tell me a joke.') +print(result1.output) +#> Did you hear about the toothpaste scandal? They called it Colgate. + +# When set to 'auto', only the latest request and the most recent provider_response_id +# from history is sent to OpenAI API. +model_settings = OpenAIResponsesModelSettings(openai_previous_response_id='auto') +result2 = agent.run_sync( + 'Explain?', + message_history=result1.new_messages(), + model_settings=model_settings +) +print(result2.output) +#> This is an excellent joke invented by Samuel Colvin, it needs no explanation. +``` +It is recommended to use `auto` mode only when the history comes from a single, uninterrupted run, +with all responses coming from the same OpenAI model (e.g like internal tool calls), as the server-side history will override any locally modified history. ## OpenAI-compatible Models diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py index 7314dc8194..7fffde1339 100644 --- a/pydantic_ai_slim/pydantic_ai/models/openai.py +++ b/pydantic_ai_slim/pydantic_ai/models/openai.py @@ -222,9 +222,12 @@ class OpenAIResponsesModelSettings(OpenAIChatModelSettings, total=False): `medium`, and `high`. """ - openai_previous_response_id: str + openai_previous_response_id: Literal['auto'] | str """The identifier of the most recent response to include in the API request. + When set to `auto`, the request automatically uses the most recent + `provider_response_id` along with the latest request from the message history. + This enables the model to reference previous reasoning traces. See the [OpenAI Responses API documentation](https://platform.openai.com/docs/guides/reasoning#keeping-reasoning-items-in-context) for more information. @@ -984,11 +987,14 @@ async def _responses_create( tool_choice = 'required' else: tool_choice = 'auto' - + print(messages) + print('-------') previous_response_id = model_settings.get('openai_previous_response_id') - if not previous_response_id: + if previous_response_id == 'auto': messages, previous_response_id = self._get_response_id_and_trim(messages) - + print(messages) + print(previous_response_id) + print('==========') instructions, openai_messages = await self._map_messages(messages, model_settings) reasoning = self._get_reasoning(model_settings) diff --git a/tests/models/cassettes/test_openai_responses/test_openai_previous_response_id_auto_mode.yaml b/tests/models/cassettes/test_openai_responses/test_openai_previous_response_id_auto_mode.yaml new file mode 100644 index 0000000000..fa8a265e34 --- /dev/null +++ b/tests/models/cassettes/test_openai_responses/test_openai_previous_response_id_auto_mode.yaml @@ -0,0 +1,67 @@ +interactions: +- request: + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-type: + - application/json + host: + - api.openai.com + method: POST + parsed_body: + input: + - content: What is the first secret key? + role: user + instructions: '' + model: gpt-5 + text: + format: + type: text + previous_response_id: resp_68b9bda81f5c8197a5a51a20a9f4150a000497db2a4c777b + uri: https://api.openai.com/v1/responses + response: + headers: + content-type: + - application/json + parsed_body: + created_at: 1743075630 + error: null + id: resp_a4168b9bda81f5c8197a5a51a20a9f4150a000497db2a4c5 + incomplete_details: null + instructions: '' + max_output_tokens: null + metadata: {} + model: gpt-5 + object: response + output: + - content: + - annotations: [] + text: "sesame" + type: output_text + id: msg_test_previous_response_id_auto + role: assistant + status: completed + type: message + parallel_tool_calls: true + previous_response_id: resp_68b9bda81f5c8197a5a51a20a9f4150a000497db2a4c777b + reasoning: null + status: complete + status_details: null + tool_calls: null + total_tokens: 15 + usage: + input_tokens: 10 + input_tokens_details: + cached_tokens: 0 + output_tokens: 1 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 11 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/models/test_openai_responses.py b/tests/models/test_openai_responses.py index 8ffc4401a6..db70c8f77e 100644 --- a/tests/models/test_openai_responses.py +++ b/tests/models/test_openai_responses.py @@ -1191,7 +1191,50 @@ async def test_openai_previous_response_id(allow_model_requests: None, openai_ap assert result.output == snapshot('sesame') -async def test_previous_response_id_mixed_model_history(allow_model_requests: None, openai_api_key: str): +@pytest.mark.vcr() +async def test_openai_previous_response_id_auto_mode(allow_model_requests: None, openai_api_key: str): + """Test if invalid previous response id is ignored when history contains non-OpenAI responses""" + history = [ + ModelRequest( + parts=[ + UserPromptPart( + content='The first secret key is sesame', + ), + ], + ), + ModelResponse( + parts=[ + TextPart(content='Open sesame! What would you like to unlock?'), + ], + model_name='gpt-5', + provider_name='openai', + provider_response_id='resp_68b9bd97025c8195b443af591ca2345c08cb6072affe6099', + ), + ModelRequest( + parts=[ + UserPromptPart( + content='The second secret key is olives', + ), + ], + ), + ModelResponse( + parts=[ + TextPart(content='Understood'), + ], + model_name='gpt-5', + provider_name='openai', + provider_response_id='resp_68b9bda81f5c8197a5a51a20a9f4150a000497db2a4c777b', + ), + ] + + model = OpenAIResponsesModel('gpt-5', provider=OpenAIProvider(api_key=openai_api_key)) + agent = Agent(model=model) + settings = OpenAIResponsesModelSettings(openai_previous_response_id='auto') + result = await agent.run('what is the first secret key', message_history=history, model_settings=settings) + assert result.output == snapshot('sesame') + + +async def test_openai_previous_response_id_mixed_model_history(allow_model_requests: None, openai_api_key: str): """Test if invalid previous response id is ignored when history contains non-OpenAI responses""" history = [ ModelRequest( @@ -1261,7 +1304,7 @@ async def test_previous_response_id_mixed_model_history(allow_model_requests: No ) -async def test_previous_response_id_same_model_history(allow_model_requests: None, openai_api_key: str): +async def test_openai_previous_response_id_same_model_history(allow_model_requests: None, openai_api_key: str): """Test if message history is trimmed when model responses are from same model""" history = [ ModelRequest( From 29f453d063a85edca25bcccec9769959a575c383 Mon Sep 17 00:00:00 2001 From: gdammn Date: Wed, 10 Sep 2025 14:56:24 +0200 Subject: [PATCH 11/20] clean-up --- pydantic_ai_slim/pydantic_ai/models/openai.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py index 7fffde1339..9bacf62ddb 100644 --- a/pydantic_ai_slim/pydantic_ai/models/openai.py +++ b/pydantic_ai_slim/pydantic_ai/models/openai.py @@ -987,14 +987,9 @@ async def _responses_create( tool_choice = 'required' else: tool_choice = 'auto' - print(messages) - print('-------') previous_response_id = model_settings.get('openai_previous_response_id') if previous_response_id == 'auto': messages, previous_response_id = self._get_response_id_and_trim(messages) - print(messages) - print(previous_response_id) - print('==========') instructions, openai_messages = await self._map_messages(messages, model_settings) reasoning = self._get_reasoning(model_settings) From 85d96a715b446436b17d2aa5c57af786dcc2ed94 Mon Sep 17 00:00:00 2001 From: gdammn Date: Thu, 11 Sep 2025 14:41:46 +0200 Subject: [PATCH 12/20] update logic update logic update logic update logic --- pydantic_ai_slim/pydantic_ai/models/openai.py | 31 +++++++-------- tests/models/test_openai_responses.py | 39 +++++++------------ 2 files changed, 29 insertions(+), 41 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py index 9bacf62ddb..e274887c1a 100644 --- a/pydantic_ai_slim/pydantic_ai/models/openai.py +++ b/pydantic_ai_slim/pydantic_ai/models/openai.py @@ -987,9 +987,11 @@ async def _responses_create( tool_choice = 'required' else: tool_choice = 'auto' + previous_response_id = model_settings.get('openai_previous_response_id') if previous_response_id == 'auto': messages, previous_response_id = self._get_response_id_and_trim(messages) + instructions, openai_messages = await self._map_messages(messages, model_settings) reasoning = self._get_reasoning(model_settings) @@ -1107,26 +1109,21 @@ def _map_tool_definition(self, f: ToolDefinition) -> responses.FunctionToolParam } def _get_response_id_and_trim(self, messages: list[ModelMessage]) -> tuple[list[ModelMessage], str | None]: - # If the message history contains only openai responses, - # we can limit the history to the most recent ModelRequest. - # The provider_response_id from the latest ModelResponse is - # then passed as previous_response_id to preserve context. + # In `auto` mode, the history is trimmed up to (but not including) + # the latest ModelResponse with a valid `provider_response_id`. + # This is then passed as `previous_response_id` in the next request + # to maintain context along with the trimmed history. response_id = None - latest_model_request: ModelRequest | None = None - for m in messages: - # Openai may return a dated model_name that differs from self.model_name - # (e.g., "gpt-5" vs "gpt-5-2025-08-07"). - if isinstance(m, ModelResponse) and m.model_name and (self.model_name in m.model_name): + trimmed_messages: list[ModelMessage] = [] + for m in reversed(messages): + if isinstance(m, ModelResponse) and m.provider_name == self.system: response_id = m.provider_response_id - elif isinstance(m, ModelRequest): - latest_model_request = m - else: - # Mixed model responses invalidate response_id, - # so the history is kept intact. - response_id = None break - if response_id and latest_model_request: - return [latest_model_request], response_id + else: + trimmed_messages.append(m) + + if response_id and trimmed_messages: + return list(reversed(trimmed_messages)), response_id else: return messages, None diff --git a/tests/models/test_openai_responses.py b/tests/models/test_openai_responses.py index db70c8f77e..61f1e52717 100644 --- a/tests/models/test_openai_responses.py +++ b/tests/models/test_openai_responses.py @@ -1237,6 +1237,21 @@ async def test_openai_previous_response_id_auto_mode(allow_model_requests: None, async def test_openai_previous_response_id_mixed_model_history(allow_model_requests: None, openai_api_key: str): """Test if invalid previous response id is ignored when history contains non-OpenAI responses""" history = [ + # ModelRequest( + # parts=[ + # UserPromptPart( + # content='The first secret key is sesame', + # ), + # ], + # ), + # ModelResponse( + # parts=[ + # TextPart(content='Open sesame! What would you like to unlock?'), + # ], + # model_name='gpt-5', + # provider_name='openai', + # provider_response_id='resp_68b9bd97025c8195b443af591ca2345c08cb6072affe6099', + # ), ModelRequest( parts=[ UserPromptPart( @@ -1248,21 +1263,6 @@ async def test_openai_previous_response_id_mixed_model_history(allow_model_reque parts=[ TextPart(content='Open sesame! What would you like to unlock?'), ], - model_name='gpt-5', - provider_name='openai', - provider_response_id='resp_68b9bd97025c8195b443af591ca2345c08cb6072affe6099', - ), - ModelRequest( - parts=[ - UserPromptPart( - content='The second secret key is olives', - ), - ], - ), - ModelResponse( - parts=[ - TextPart(content='Understood'), - ], model_name='claude-3-5-sonnet-latest', provider_name='anthropic', provider_response_id='msg_01XUQuedGz9gusk4xZm4gWJj', @@ -1285,15 +1285,6 @@ async def test_openai_previous_response_id_mixed_model_history(allow_model_reque ModelResponse( parts=[TextPart(content='Open sesame! What would you like to unlock?')], usage=RequestUsage(), - model_name='gpt-5', - timestamp=IsDatetime(), - provider_name='openai', - provider_response_id='resp_68b9bd97025c8195b443af591ca2345c08cb6072affe6099', - ), - ModelRequest(parts=[UserPromptPart(content='The second secret key is olives', timestamp=IsDatetime())]), - ModelResponse( - parts=[TextPart(content='Understood')], - usage=RequestUsage(), model_name='claude-3-5-sonnet-latest', timestamp=IsDatetime(), provider_name='anthropic', From f6c55206036d37966b6ee5c782d13115e8d4aa7f Mon Sep 17 00:00:00 2001 From: gdammn Date: Thu, 11 Sep 2025 14:52:48 +0200 Subject: [PATCH 13/20] clean-up --- .../test_model_names/test_known_model_names.yaml | 6 +++--- tests/models/test_openai_responses.py | 15 --------------- 2 files changed, 3 insertions(+), 18 deletions(-) diff --git a/tests/models/cassettes/test_model_names/test_known_model_names.yaml b/tests/models/cassettes/test_model_names/test_known_model_names.yaml index 7d69d693d5..ff08e77042 100644 --- a/tests/models/cassettes/test_model_names/test_known_model_names.yaml +++ b/tests/models/cassettes/test_model_names/test_known_model_names.yaml @@ -99,7 +99,7 @@ interactions: alt-svc: - h3=":443"; ma=86400 content-length: - - '762' + - '99' content-type: - application/json referrer-policy: @@ -146,6 +146,6 @@ interactions: owned_by: Cerebras object: list status: - code: 200 - message: OK + code: 401 + message: Unauthorized version: 1 diff --git a/tests/models/test_openai_responses.py b/tests/models/test_openai_responses.py index 61f1e52717..4d78142109 100644 --- a/tests/models/test_openai_responses.py +++ b/tests/models/test_openai_responses.py @@ -1237,21 +1237,6 @@ async def test_openai_previous_response_id_auto_mode(allow_model_requests: None, async def test_openai_previous_response_id_mixed_model_history(allow_model_requests: None, openai_api_key: str): """Test if invalid previous response id is ignored when history contains non-OpenAI responses""" history = [ - # ModelRequest( - # parts=[ - # UserPromptPart( - # content='The first secret key is sesame', - # ), - # ], - # ), - # ModelResponse( - # parts=[ - # TextPart(content='Open sesame! What would you like to unlock?'), - # ], - # model_name='gpt-5', - # provider_name='openai', - # provider_response_id='resp_68b9bd97025c8195b443af591ca2345c08cb6072affe6099', - # ), ModelRequest( parts=[ UserPromptPart( From fc9cb257267302e80d0dad9d08d7be20235c0661 Mon Sep 17 00:00:00 2001 From: gdammn Date: Fri, 12 Sep 2025 11:54:21 +0200 Subject: [PATCH 14/20] clean-up --- .../cassettes/test_model_names/test_known_model_names.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/models/cassettes/test_model_names/test_known_model_names.yaml b/tests/models/cassettes/test_model_names/test_known_model_names.yaml index ff08e77042..7d69d693d5 100644 --- a/tests/models/cassettes/test_model_names/test_known_model_names.yaml +++ b/tests/models/cassettes/test_model_names/test_known_model_names.yaml @@ -99,7 +99,7 @@ interactions: alt-svc: - h3=":443"; ma=86400 content-length: - - '99' + - '762' content-type: - application/json referrer-policy: @@ -146,6 +146,6 @@ interactions: owned_by: Cerebras object: list status: - code: 401 - message: Unauthorized + code: 200 + message: OK version: 1 From 23f174e24999b92739ca50581330f40fb595913c Mon Sep 17 00:00:00 2001 From: Forge <64839751+GDaamn@users.noreply.github.com> Date: Fri, 12 Sep 2025 19:39:17 +0200 Subject: [PATCH 15/20] update docs Co-authored-by: Douwe Maan update docs Co-authored-by: Douwe Maan update docs Co-authored-by: Douwe Maan update docs Co-authored-by: Douwe Maan update docs Co-authored-by: Douwe Maan --- docs/models/openai.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/models/openai.md b/docs/models/openai.md index 0e02ad1567..970f26aff4 100644 --- a/docs/models/openai.md +++ b/docs/models/openai.md @@ -145,14 +145,14 @@ You can learn more about the differences between the Responses API and Chat Comp #### Referencing earlier responses -The Responses API also supports referencing earlier model responses in a new request. This is available through the `openai_previous_response_id` field in +The Responses API supports referencing earlier model responses in a new request using a `previous_response_id` parameter, to ensure the full [conversation state](https://platform.openai.com/docs/guides/conversation-state?api-mode=responses#passing-context-from-the-previous-response) including [reasoning items](https://platform.openai.com/docs/guides/reasoning#keeping-reasoning-items-in-context) are kept in context. This is available through the `openai_previous_response_id` field in [`OpenAIResponsesModelSettings`][pydantic_ai.models.openai.OpenAIResponsesModelSettings]. ```python from pydantic_ai import Agent from pydantic_ai.models.openai import OpenAIResponsesModel, OpenAIResponsesModelSettings -model = OpenAIResponsesModel('gpt-4o') +model = OpenAIResponsesModel('gpt-5') agent = Agent(model=model) result = agent.run_sync('The secret is 1234') @@ -166,13 +166,15 @@ print(result.output) By passing the `provider_response_id` from an earlier run, you can allow the model to build on its own prior reasoning without needing to resend the full message history. -Alternatively, `openai_previous_response_id` field also supports `auto` mode. When enabled, Pydantic AI automatically selects the latest request and the most recent `provider_response_id` from message history to send to OpenAI API, leveraging server-side history instead, for improved efficiency. If `openai_previous_response_id` is not set, full history is sent. +##### Automatically referencing earlier responses + +When the `openai_previous_response_id` field is set to `'auto'`, Pydantic AI will automatically select the most recent `provider_response_id` from message history and omit messages that came before it, letting the OpenAI SDK leverage server-side history instead for improved efficiency. ```python from pydantic_ai import Agent from pydantic_ai.models.openai import OpenAIResponsesModel, OpenAIResponsesModelSettings -model = OpenAIResponsesModel('gpt-4o') +model = OpenAIResponsesModel('gpt-5') agent = Agent(model=model) result1 = agent.run_sync('Tell me a joke.') @@ -190,8 +192,6 @@ result2 = agent.run_sync( print(result2.output) #> This is an excellent joke invented by Samuel Colvin, it needs no explanation. ``` -It is recommended to use `auto` mode only when the history comes from a single, uninterrupted run, -with all responses coming from the same OpenAI model (e.g like internal tool calls), as the server-side history will override any locally modified history. ## OpenAI-compatible Models From b1aba38b5f392095d82c6df29c5684891136e5de Mon Sep 17 00:00:00 2001 From: Forge <64839751+GDaamn@users.noreply.github.com> Date: Fri, 12 Sep 2025 19:41:19 +0200 Subject: [PATCH 16/20] update docstring Co-authored-by: Douwe Maan --- pydantic_ai_slim/pydantic_ai/models/openai.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py index e274887c1a..51babcc9a8 100644 --- a/pydantic_ai_slim/pydantic_ai/models/openai.py +++ b/pydantic_ai_slim/pydantic_ai/models/openai.py @@ -223,12 +223,12 @@ class OpenAIResponsesModelSettings(OpenAIChatModelSettings, total=False): """ openai_previous_response_id: Literal['auto'] | str - """The identifier of the most recent response to include in the API request. + """The ID of a previous response from the model to use as the starting point for a continued conversation. - When set to `auto`, the request automatically uses the most recent - `provider_response_id` along with the latest request from the message history. + When set to `'auto'`, the request automatically uses the most recent + `provider_response_id` from the message history and omits earlier messages. - This enables the model to reference previous reasoning traces. + This enables the model to use server-side conversation state and faithfully reference previous reasoning. See the [OpenAI Responses API documentation](https://platform.openai.com/docs/guides/reasoning#keeping-reasoning-items-in-context) for more information. """ From 9779c74fb6e1c56516805c1b0b2685d473bcdeba Mon Sep 17 00:00:00 2001 From: gdammn Date: Fri, 12 Sep 2025 19:50:02 +0200 Subject: [PATCH 17/20] update function signature --- pydantic_ai_slim/pydantic_ai/models/openai.py | 16 +++++++++------- tests/models/test_openai_responses.py | 8 ++++---- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py index 51babcc9a8..26845608c6 100644 --- a/pydantic_ai_slim/pydantic_ai/models/openai.py +++ b/pydantic_ai_slim/pydantic_ai/models/openai.py @@ -990,7 +990,7 @@ async def _responses_create( previous_response_id = model_settings.get('openai_previous_response_id') if previous_response_id == 'auto': - messages, previous_response_id = self._get_response_id_and_trim(messages) + previous_response_id, messages = self._get_previous_response_id_and_new_messages(messages) instructions, openai_messages = await self._map_messages(messages, model_settings) reasoning = self._get_reasoning(model_settings) @@ -1108,24 +1108,26 @@ def _map_tool_definition(self, f: ToolDefinition) -> responses.FunctionToolParam ), } - def _get_response_id_and_trim(self, messages: list[ModelMessage]) -> tuple[list[ModelMessage], str | None]: + def _get_previous_response_id_and_new_messages( + self, messages: list[ModelMessage] + ) -> tuple[str | None, list[ModelMessage]]: # In `auto` mode, the history is trimmed up to (but not including) # the latest ModelResponse with a valid `provider_response_id`. # This is then passed as `previous_response_id` in the next request # to maintain context along with the trimmed history. - response_id = None + previous_response_id = None trimmed_messages: list[ModelMessage] = [] for m in reversed(messages): if isinstance(m, ModelResponse) and m.provider_name == self.system: - response_id = m.provider_response_id + previous_response_id = m.provider_response_id break else: trimmed_messages.append(m) - if response_id and trimmed_messages: - return list(reversed(trimmed_messages)), response_id + if previous_response_id and trimmed_messages: + return previous_response_id, list(reversed(trimmed_messages)) else: - return messages, None + return None, messages async def _map_messages( # noqa: C901 self, messages: list[ModelMessage], model_settings: OpenAIResponsesModelSettings diff --git a/tests/models/test_openai_responses.py b/tests/models/test_openai_responses.py index 4d78142109..2b08708761 100644 --- a/tests/models/test_openai_responses.py +++ b/tests/models/test_openai_responses.py @@ -1262,9 +1262,9 @@ async def test_openai_previous_response_id_mixed_model_history(allow_model_reque ] model = OpenAIResponsesModel('gpt-5', provider=OpenAIProvider(api_key=openai_api_key)) - history, previous_response_id = model._get_response_id_and_trim(history) # type: ignore + previous_response_id, messages = model._get_previous_response_id_and_new_messages(history) # type: ignore assert not previous_response_id - assert history == snapshot( + assert messages == snapshot( [ ModelRequest(parts=[UserPromptPart(content='The first secret key is sesame', timestamp=IsDatetime())]), ModelResponse( @@ -1323,9 +1323,9 @@ async def test_openai_previous_response_id_same_model_history(allow_model_reques ] model = OpenAIResponsesModel('gpt-5', provider=OpenAIProvider(api_key=openai_api_key)) - history, previous_response_id = model._get_response_id_and_trim(history) # type: ignore + previous_response_id, messages = model._get_previous_response_id_and_new_messages(history) # type: ignore assert previous_response_id == 'resp_68b9bda81f5c8197a5a51a20a9f4150a000497db2a4c777b' - assert history == snapshot( + assert messages == snapshot( [ ModelRequest(parts=[UserPromptPart(content='what is the first secret key?', timestamp=IsDatetime())]), ] From ff81726726e5aeef92f0abb31c9f2a0b59da00a6 Mon Sep 17 00:00:00 2001 From: gdammn Date: Fri, 12 Sep 2025 19:55:28 +0200 Subject: [PATCH 18/20] update docstring --- pydantic_ai_slim/pydantic_ai/models/openai.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py index 26845608c6..2b83fcad30 100644 --- a/pydantic_ai_slim/pydantic_ai/models/openai.py +++ b/pydantic_ai_slim/pydantic_ai/models/openai.py @@ -1111,10 +1111,11 @@ def _map_tool_definition(self, f: ToolDefinition) -> responses.FunctionToolParam def _get_previous_response_id_and_new_messages( self, messages: list[ModelMessage] ) -> tuple[str | None, list[ModelMessage]]: - # In `auto` mode, the history is trimmed up to (but not including) - # the latest ModelResponse with a valid `provider_response_id`. - # This is then passed as `previous_response_id` in the next request - # to maintain context along with the trimmed history. + # When `openai_previous_response_id` is set to 'auto', the most recent + # `provider_response_id` from the message history is selected and all + # earlier messages are omitted. This allows the OpenAI SDK to reuse + # server-side history for efficiency. The returned tuple contains the + # `previous_response_id` (if found) and the trimmed list of messages. previous_response_id = None trimmed_messages: list[ModelMessage] = [] for m in reversed(messages): From 40ba00d940f445bba3b4c45006f1336ca7b7399d Mon Sep 17 00:00:00 2001 From: gdammn Date: Fri, 12 Sep 2025 20:08:16 +0200 Subject: [PATCH 19/20] update code comments in docs --- docs/models/openai.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/models/openai.md b/docs/models/openai.md index 970f26aff4..988e9f4103 100644 --- a/docs/models/openai.md +++ b/docs/models/openai.md @@ -181,8 +181,8 @@ result1 = agent.run_sync('Tell me a joke.') print(result1.output) #> Did you hear about the toothpaste scandal? They called it Colgate. -# When set to 'auto', only the latest request and the most recent provider_response_id -# from history is sent to OpenAI API. +# When set to 'auto', the most recent provider_response_id +# and messages after it are sent as request. model_settings = OpenAIResponsesModelSettings(openai_previous_response_id='auto') result2 = agent.run_sync( 'Explain?', From cf19b2b27b7767a14db098000fe244456b1abc79 Mon Sep 17 00:00:00 2001 From: Douwe Maan Date: Fri, 12 Sep 2025 13:26:43 -0600 Subject: [PATCH 20/20] Update docs/models/openai.md --- docs/models/openai.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/models/openai.md b/docs/models/openai.md index 988e9f4103..e444d6d04e 100644 --- a/docs/models/openai.md +++ b/docs/models/openai.md @@ -168,7 +168,7 @@ By passing the `provider_response_id` from an earlier run, you can allow the mod ##### Automatically referencing earlier responses -When the `openai_previous_response_id` field is set to `'auto'`, Pydantic AI will automatically select the most recent `provider_response_id` from message history and omit messages that came before it, letting the OpenAI SDK leverage server-side history instead for improved efficiency. +When the `openai_previous_response_id` field is set to `'auto'`, Pydantic AI will automatically select the most recent `provider_response_id` from message history and omit messages that came before it, letting the OpenAI API leverage server-side history instead for improved efficiency. ```python from pydantic_ai import Agent