diff --git a/docs/models/openai.md b/docs/models/openai.md index 86febbcc17..e444d6d04e 100644 --- a/docs/models/openai.md +++ b/docs/models/openai.md @@ -143,6 +143,56 @@ As of 7:48 AM on Wednesday, April 2, 2025, in Tokyo, Japan, the weather is cloud You can learn more about the differences between the Responses API and Chat Completions API in the [OpenAI API docs](https://platform.openai.com/docs/guides/responses-vs-chat-completions). +#### Referencing earlier responses + +The Responses API supports referencing earlier model responses in a new request using a `previous_response_id` parameter, to ensure the full [conversation state](https://platform.openai.com/docs/guides/conversation-state?api-mode=responses#passing-context-from-the-previous-response) including [reasoning items](https://platform.openai.com/docs/guides/reasoning#keeping-reasoning-items-in-context) are kept in context. This is available through the `openai_previous_response_id` field in +[`OpenAIResponsesModelSettings`][pydantic_ai.models.openai.OpenAIResponsesModelSettings]. + +```python +from pydantic_ai import Agent +from pydantic_ai.models.openai import OpenAIResponsesModel, OpenAIResponsesModelSettings + +model = OpenAIResponsesModel('gpt-5') +agent = Agent(model=model) + +result = agent.run_sync('The secret is 1234') +model_settings = OpenAIResponsesModelSettings( + openai_previous_response_id=result.all_messages()[-1].provider_response_id +) +result = agent.run_sync('What is the secret code?', model_settings=model_settings) +print(result.output) +#> 1234 +``` + +By passing the `provider_response_id` from an earlier run, you can allow the model to build on its own prior reasoning without needing to resend the full message history. + +##### Automatically referencing earlier responses + +When the `openai_previous_response_id` field is set to `'auto'`, Pydantic AI will automatically select the most recent `provider_response_id` from message history and omit messages that came before it, letting the OpenAI API leverage server-side history instead for improved efficiency. + +```python +from pydantic_ai import Agent +from pydantic_ai.models.openai import OpenAIResponsesModel, OpenAIResponsesModelSettings + +model = OpenAIResponsesModel('gpt-5') +agent = Agent(model=model) + +result1 = agent.run_sync('Tell me a joke.') +print(result1.output) +#> Did you hear about the toothpaste scandal? They called it Colgate. + +# When set to 'auto', the most recent provider_response_id +# and messages after it are sent as request. +model_settings = OpenAIResponsesModelSettings(openai_previous_response_id='auto') +result2 = agent.run_sync( + 'Explain?', + message_history=result1.new_messages(), + model_settings=model_settings +) +print(result2.output) +#> This is an excellent joke invented by Samuel Colvin, it needs no explanation. +``` + ## OpenAI-compatible Models Many providers and models are compatible with the OpenAI API, and can be used with `OpenAIChatModel` in Pydantic AI. diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py index 3249aab7a4..2b83fcad30 100644 --- a/pydantic_ai_slim/pydantic_ai/models/openai.py +++ b/pydantic_ai_slim/pydantic_ai/models/openai.py @@ -222,6 +222,17 @@ class OpenAIResponsesModelSettings(OpenAIChatModelSettings, total=False): `medium`, and `high`. """ + openai_previous_response_id: Literal['auto'] | str + """The ID of a previous response from the model to use as the starting point for a continued conversation. + + When set to `'auto'`, the request automatically uses the most recent + `provider_response_id` from the message history and omits earlier messages. + + This enables the model to use server-side conversation state and faithfully reference previous reasoning. + See the [OpenAI Responses API documentation](https://platform.openai.com/docs/guides/reasoning#keeping-reasoning-items-in-context) + for more information. + """ + @dataclass(init=False) class OpenAIChatModel(Model): @@ -977,6 +988,10 @@ async def _responses_create( else: tool_choice = 'auto' + previous_response_id = model_settings.get('openai_previous_response_id') + if previous_response_id == 'auto': + previous_response_id, messages = self._get_previous_response_id_and_new_messages(messages) + instructions, openai_messages = await self._map_messages(messages, model_settings) reasoning = self._get_reasoning(model_settings) @@ -1027,6 +1042,7 @@ async def _responses_create( truncation=model_settings.get('openai_truncation', NOT_GIVEN), timeout=model_settings.get('timeout', NOT_GIVEN), service_tier=model_settings.get('openai_service_tier', NOT_GIVEN), + previous_response_id=previous_response_id, reasoning=reasoning, user=model_settings.get('openai_user', NOT_GIVEN), text=text or NOT_GIVEN, @@ -1092,6 +1108,28 @@ def _map_tool_definition(self, f: ToolDefinition) -> responses.FunctionToolParam ), } + def _get_previous_response_id_and_new_messages( + self, messages: list[ModelMessage] + ) -> tuple[str | None, list[ModelMessage]]: + # When `openai_previous_response_id` is set to 'auto', the most recent + # `provider_response_id` from the message history is selected and all + # earlier messages are omitted. This allows the OpenAI SDK to reuse + # server-side history for efficiency. The returned tuple contains the + # `previous_response_id` (if found) and the trimmed list of messages. + previous_response_id = None + trimmed_messages: list[ModelMessage] = [] + for m in reversed(messages): + if isinstance(m, ModelResponse) and m.provider_name == self.system: + previous_response_id = m.provider_response_id + break + else: + trimmed_messages.append(m) + + if previous_response_id and trimmed_messages: + return previous_response_id, list(reversed(trimmed_messages)) + else: + return None, messages + async def _map_messages( # noqa: C901 self, messages: list[ModelMessage], model_settings: OpenAIResponsesModelSettings ) -> tuple[str | NotGiven, list[responses.ResponseInputItemParam]]: diff --git a/tests/models/cassettes/test_openai_responses/test_openai_previous_response_id.yaml b/tests/models/cassettes/test_openai_responses/test_openai_previous_response_id.yaml new file mode 100644 index 0000000000..aa5aabd789 --- /dev/null +++ b/tests/models/cassettes/test_openai_responses/test_openai_previous_response_id.yaml @@ -0,0 +1,131 @@ +interactions: +- request: + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-type: + - application/json + host: + - api.openai.com + method: POST + parsed_body: + input: + - content: The secret key is sesame + role: user + instructions: '' + model: gpt-5 + text: + format: + type: text + uri: https://api.openai.com/v1/responses + response: + headers: + content-type: + - application/json + parsed_body: + created_at: 1743075629 + error: null + id: resp_1234 + incomplete_details: null + instructions: '' + max_output_tokens: null + metadata: {} + model: gpt-5 + object: response + output: + - content: + - annotations: [] + text: "Open sesame! What would you like to unlock?" + type: output_text + id: msg_test_previous_response_id + role: assistant + status: completed + type: message + parallel_tool_calls: true + previous_response_id: null + reasoning: null + status: complete + status_details: null + tool_calls: null + total_tokens: 15 + usage: + input_tokens: 10 + input_tokens_details: + cached_tokens: 0 + output_tokens: 1 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 11 + status: + code: 200 + message: OK +- request: + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-type: + - application/json + host: + - api.openai.com + method: POST + parsed_body: + input: + - content: What is the secret key again? + role: user + instructions: '' + model: gpt-5 + text: + format: + type: text + previous_response_id: resp_1234 + uri: https://api.openai.com/v1/responses + response: + headers: + content-type: + - application/json + parsed_body: + created_at: 1743075630 + error: null + id: resp_5678 + incomplete_details: null + instructions: '' + max_output_tokens: null + metadata: {} + model: gpt-5 + object: response + output: + - content: + - annotations: [] + text: "sesame" + type: output_text + id: msg_test_previous_response_id + role: assistant + status: completed + type: message + parallel_tool_calls: true + previous_response_id: resp_1234 + reasoning: null + status: complete + status_details: null + tool_calls: null + total_tokens: 15 + usage: + input_tokens: 10 + input_tokens_details: + cached_tokens: 0 + output_tokens: 1 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 11 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/models/cassettes/test_openai_responses/test_openai_previous_response_id_auto_mode.yaml b/tests/models/cassettes/test_openai_responses/test_openai_previous_response_id_auto_mode.yaml new file mode 100644 index 0000000000..fa8a265e34 --- /dev/null +++ b/tests/models/cassettes/test_openai_responses/test_openai_previous_response_id_auto_mode.yaml @@ -0,0 +1,67 @@ +interactions: +- request: + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-type: + - application/json + host: + - api.openai.com + method: POST + parsed_body: + input: + - content: What is the first secret key? + role: user + instructions: '' + model: gpt-5 + text: + format: + type: text + previous_response_id: resp_68b9bda81f5c8197a5a51a20a9f4150a000497db2a4c777b + uri: https://api.openai.com/v1/responses + response: + headers: + content-type: + - application/json + parsed_body: + created_at: 1743075630 + error: null + id: resp_a4168b9bda81f5c8197a5a51a20a9f4150a000497db2a4c5 + incomplete_details: null + instructions: '' + max_output_tokens: null + metadata: {} + model: gpt-5 + object: response + output: + - content: + - annotations: [] + text: "sesame" + type: output_text + id: msg_test_previous_response_id_auto + role: assistant + status: completed + type: message + parallel_tool_calls: true + previous_response_id: resp_68b9bda81f5c8197a5a51a20a9f4150a000497db2a4c777b + reasoning: null + status: complete + status_details: null + tool_calls: null + total_tokens: 15 + usage: + input_tokens: 10 + input_tokens_details: + cached_tokens: 0 + output_tokens: 1 + output_tokens_details: + reasoning_tokens: 0 + total_tokens: 11 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/models/test_openai_responses.py b/tests/models/test_openai_responses.py index 5c44bdd246..2b08708761 100644 --- a/tests/models/test_openai_responses.py +++ b/tests/models/test_openai_responses.py @@ -1180,6 +1180,158 @@ async def test_openai_responses_verbosity(allow_model_requests: None, openai_api assert result.output == snapshot('4') +@pytest.mark.vcr() +async def test_openai_previous_response_id(allow_model_requests: None, openai_api_key: str): + """Test if previous responses are detected via previous_response_id in settings""" + model = OpenAIResponsesModel('gpt-5', provider=OpenAIProvider(api_key=openai_api_key)) + agent = Agent(model=model) + result = await agent.run('The secret key is sesame') + settings = OpenAIResponsesModelSettings(openai_previous_response_id=result.all_messages()[-1].provider_response_id) # type: ignore + result = await agent.run('What is the secret code?', model_settings=settings) + assert result.output == snapshot('sesame') + + +@pytest.mark.vcr() +async def test_openai_previous_response_id_auto_mode(allow_model_requests: None, openai_api_key: str): + """Test if invalid previous response id is ignored when history contains non-OpenAI responses""" + history = [ + ModelRequest( + parts=[ + UserPromptPart( + content='The first secret key is sesame', + ), + ], + ), + ModelResponse( + parts=[ + TextPart(content='Open sesame! What would you like to unlock?'), + ], + model_name='gpt-5', + provider_name='openai', + provider_response_id='resp_68b9bd97025c8195b443af591ca2345c08cb6072affe6099', + ), + ModelRequest( + parts=[ + UserPromptPart( + content='The second secret key is olives', + ), + ], + ), + ModelResponse( + parts=[ + TextPart(content='Understood'), + ], + model_name='gpt-5', + provider_name='openai', + provider_response_id='resp_68b9bda81f5c8197a5a51a20a9f4150a000497db2a4c777b', + ), + ] + + model = OpenAIResponsesModel('gpt-5', provider=OpenAIProvider(api_key=openai_api_key)) + agent = Agent(model=model) + settings = OpenAIResponsesModelSettings(openai_previous_response_id='auto') + result = await agent.run('what is the first secret key', message_history=history, model_settings=settings) + assert result.output == snapshot('sesame') + + +async def test_openai_previous_response_id_mixed_model_history(allow_model_requests: None, openai_api_key: str): + """Test if invalid previous response id is ignored when history contains non-OpenAI responses""" + history = [ + ModelRequest( + parts=[ + UserPromptPart( + content='The first secret key is sesame', + ), + ], + ), + ModelResponse( + parts=[ + TextPart(content='Open sesame! What would you like to unlock?'), + ], + model_name='claude-3-5-sonnet-latest', + provider_name='anthropic', + provider_response_id='msg_01XUQuedGz9gusk4xZm4gWJj', + ), + ModelRequest( + parts=[ + UserPromptPart( + content='what is the first secret key?', + ), + ], + ), + ] + + model = OpenAIResponsesModel('gpt-5', provider=OpenAIProvider(api_key=openai_api_key)) + previous_response_id, messages = model._get_previous_response_id_and_new_messages(history) # type: ignore + assert not previous_response_id + assert messages == snapshot( + [ + ModelRequest(parts=[UserPromptPart(content='The first secret key is sesame', timestamp=IsDatetime())]), + ModelResponse( + parts=[TextPart(content='Open sesame! What would you like to unlock?')], + usage=RequestUsage(), + model_name='claude-3-5-sonnet-latest', + timestamp=IsDatetime(), + provider_name='anthropic', + provider_response_id='msg_01XUQuedGz9gusk4xZm4gWJj', + ), + ModelRequest(parts=[UserPromptPart(content='what is the first secret key?', timestamp=IsDatetime())]), + ] + ) + + +async def test_openai_previous_response_id_same_model_history(allow_model_requests: None, openai_api_key: str): + """Test if message history is trimmed when model responses are from same model""" + history = [ + ModelRequest( + parts=[ + UserPromptPart( + content='The first secret key is sesame', + ), + ], + ), + ModelResponse( + parts=[ + TextPart(content='Open sesame! What would you like to unlock?'), + ], + model_name='gpt-5', + provider_name='openai', + provider_response_id='resp_68b9bd97025c8195b443af591ca2345c08cb6072affe6099', + ), + ModelRequest( + parts=[ + UserPromptPart( + content='The second secret key is olives', + ), + ], + ), + ModelResponse( + parts=[ + TextPart(content='Understood'), + ], + model_name='gpt-5', + provider_name='openai', + provider_response_id='resp_68b9bda81f5c8197a5a51a20a9f4150a000497db2a4c777b', + ), + ModelRequest( + parts=[ + UserPromptPart( + content='what is the first secret key?', + ), + ], + ), + ] + + model = OpenAIResponsesModel('gpt-5', provider=OpenAIProvider(api_key=openai_api_key)) + previous_response_id, messages = model._get_previous_response_id_and_new_messages(history) # type: ignore + assert previous_response_id == 'resp_68b9bda81f5c8197a5a51a20a9f4150a000497db2a4c777b' + assert messages == snapshot( + [ + ModelRequest(parts=[UserPromptPart(content='what is the first secret key?', timestamp=IsDatetime())]), + ] + ) + + async def test_openai_responses_usage_without_tokens_details(allow_model_requests: None): c = response_message( [ diff --git a/tests/test_examples.py b/tests/test_examples.py index 145b633b7f..86984d4971 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -622,6 +622,10 @@ async def model_logic( # noqa: C901 return ModelResponse(parts=list(response)) else: return ModelResponse(parts=[response]) + elif m.content == 'The secret is 1234': + return ModelResponse(parts=[TextPart('The secret is safe with me')]) + elif m.content == 'What is the secret code?': + return ModelResponse(parts=[TextPart('1234')]) elif isinstance(m, ToolReturnPart) and m.tool_name == 'roulette_wheel': win = m.content == 'winner'