From 81cce2f9545bb8c5047a20d236764e2b182f6a7b Mon Sep 17 00:00:00 2001 From: adtyavrdhn Date: Wed, 18 Jun 2025 13:34:52 +0530 Subject: [PATCH 01/20] Scrubbing prompts and completions, sensitive data from logs --- pydantic_ai_slim/pydantic_ai/messages.py | 17 +++++++-- .../pydantic_ai/models/instrumented.py | 5 ++- tests/models/test_instrumented.py | 38 +++++++++++++++++++ 3 files changed, 55 insertions(+), 5 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py index e468fc99aa..127ac884f9 100644 --- a/pydantic_ai_slim/pydantic_ai/messages.py +++ b/pydantic_ai_slim/pydantic_ai/messages.py @@ -75,6 +75,8 @@ class SystemPromptPart: """Part type identifier, this is available on all parts as a discriminator.""" def otel_event(self, _settings: InstrumentationSettings) -> Event: + if not _settings.include_sensitive_content: + return Event('gen_ai.system.message', body={'content': 'SCRUBBED', 'role': 'system'}) return Event('gen_ai.system.message', body={'content': self.content, 'role': 'system'}) __repr__ = _utils.dataclasses_no_defaults_repr @@ -353,6 +355,8 @@ class UserPromptPart: """Part type identifier, this is available on all parts as a discriminator.""" def otel_event(self, settings: InstrumentationSettings) -> Event: + if not settings.include_sensitive_content: + return Event('gen_ai.user.message', body={'content': 'SCRUBBED', 'role': 'user'}) # Redacting the content content: str | list[dict[str, Any] | str] if isinstance(self.content, str): content = self.content @@ -415,7 +419,12 @@ def model_response_object(self) -> dict[str, Any]: def otel_event(self, _settings: InstrumentationSettings) -> Event: return Event( 'gen_ai.tool.message', - body={'content': self.content, 'role': 'tool', 'id': self.tool_call_id, 'name': self.tool_name}, + body={ + 'content': self.content if _settings.include_sensitive_content else 'SCRUBBED', + 'role': 'tool', + 'id': self.tool_call_id, + 'name': self.tool_name, + }, ) __repr__ = _utils.dataclasses_no_defaults_repr @@ -478,7 +487,7 @@ def otel_event(self, _settings: InstrumentationSettings) -> Event: return Event( 'gen_ai.tool.message', body={ - 'content': self.model_response(), + 'content': self.model_response() if _settings.include_sensitive_content else 'SCRUBBED', 'role': 'tool', 'id': self.tool_call_id, 'name': self.tool_name, @@ -629,7 +638,7 @@ class ModelResponse: vendor_id: str | None = None """Vendor ID as specified by the model provider. This can be used to track the specific request to the model.""" - def otel_events(self) -> list[Event]: + def otel_events(self, _settings: InstrumentationSettings) -> list[Event]: """Return OpenTelemetry events for the response.""" result: list[Event] = [] @@ -655,7 +664,7 @@ def new_event_body(): elif isinstance(part, TextPart): if body.get('content'): body = new_event_body() - body['content'] = part.content + body['content'] = part.content if _settings.include_sensitive_content else 'SCRUBBED' return result diff --git a/pydantic_ai_slim/pydantic_ai/models/instrumented.py b/pydantic_ai_slim/pydantic_ai/models/instrumented.py index c99804ea33..0da2ef2ef7 100644 --- a/pydantic_ai_slim/pydantic_ai/models/instrumented.py +++ b/pydantic_ai_slim/pydantic_ai/models/instrumented.py @@ -92,6 +92,7 @@ def __init__( meter_provider: MeterProvider | None = None, event_logger_provider: EventLoggerProvider | None = None, include_binary_content: bool = True, + include_sensitive_content: bool = True, ): """Create instrumentation options. @@ -109,6 +110,7 @@ def __init__( Calling `logfire.configure()` sets the global event logger provider, so most users don't need this. This is only used if `event_mode='logs'`. include_binary_content: Whether to include binary content in the instrumentation events. + include_sensitive_content: Whether to include prompt and completion messages in the instrumentation events. """ from pydantic_ai import __version__ @@ -121,6 +123,7 @@ def __init__( self.event_logger = event_logger_provider.get_event_logger(scope_name, __version__) self.event_mode = event_mode self.include_binary_content = include_binary_content + self.include_sensitive_content = include_sensitive_content # As specified in the OpenTelemetry GenAI metrics spec: # https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/#metric-gen_aiclienttokenusage @@ -161,7 +164,7 @@ def messages_to_otel_events(self, messages: list[ModelMessage]) -> list[Event]: if hasattr(part, 'otel_event'): message_events.append(part.otel_event(self)) elif isinstance(message, ModelResponse): # pragma: no branch - message_events = message.otel_events() + message_events = message.otel_events(self) for event in message_events: event.attributes = { 'gen_ai.message.index': message_index, diff --git a/tests/models/test_instrumented.py b/tests/models/test_instrumented.py index f7caad3992..2b93862ac5 100644 --- a/tests/models/test_instrumented.py +++ b/tests/models/test_instrumented.py @@ -827,3 +827,41 @@ def test_messages_to_otel_events_without_binary_content(document_content: Binary } ] ) + + +def test_messages_to_otel_events_without_prompts_and_completions(): + messages: list[ModelMessage] = [ + ModelRequest(parts=[SystemPromptPart('system_prompt')]), + ModelResponse(parts=[TextPart('text1')]), + ModelRequest(parts=[UserPromptPart('user_prompt')]), + ModelResponse(parts=[TextPart('text2')]), + ] + settings = InstrumentationSettings(include_sensitive_content=False) + assert [InstrumentedModel.event_to_dict(e) for e in settings.messages_to_otel_events(messages)] == snapshot( + [ + { + 'content': 'SCRUBBED', + 'role': 'system', + 'gen_ai.message.index': 0, + 'event.name': 'gen_ai.system.message', + }, + { + 'content': 'SCRUBBED', + 'role': 'assistant', + 'gen_ai.message.index': 1, + 'event.name': 'gen_ai.assistant.message', + }, + { + 'content': 'SCRUBBED', + 'role': 'user', + 'gen_ai.message.index': 2, + 'event.name': 'gen_ai.user.message', + }, + { + 'content': 'SCRUBBED', + 'role': 'assistant', + 'gen_ai.message.index': 3, + 'event.name': 'gen_ai.assistant.message', + }, + ] + ) From 5989ad27ed7c701a44e7df09aea03d62a0fd4650 Mon Sep 17 00:00:00 2001 From: adtyavrdhn Date: Wed, 18 Jun 2025 13:39:39 +0530 Subject: [PATCH 02/20] Removing comment --- pydantic_ai_slim/pydantic_ai/messages.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py index 127ac884f9..d9f4b06332 100644 --- a/pydantic_ai_slim/pydantic_ai/messages.py +++ b/pydantic_ai_slim/pydantic_ai/messages.py @@ -356,7 +356,7 @@ class UserPromptPart: def otel_event(self, settings: InstrumentationSettings) -> Event: if not settings.include_sensitive_content: - return Event('gen_ai.user.message', body={'content': 'SCRUBBED', 'role': 'user'}) # Redacting the content + return Event('gen_ai.user.message', body={'content': 'SCRUBBED', 'role': 'user'}) content: str | list[dict[str, Any] | str] if isinstance(self.content, str): content = self.content From c7cd7b8a21250b6cef131a1cea64c39b1e16e521 Mon Sep 17 00:00:00 2001 From: adtyavrdhn Date: Wed, 18 Jun 2025 13:43:35 +0530 Subject: [PATCH 03/20] Adding test for all events --- tests/models/test_instrumented.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/models/test_instrumented.py b/tests/models/test_instrumented.py index 2b93862ac5..3f29530a14 100644 --- a/tests/models/test_instrumented.py +++ b/tests/models/test_instrumented.py @@ -835,6 +835,8 @@ def test_messages_to_otel_events_without_prompts_and_completions(): ModelResponse(parts=[TextPart('text1')]), ModelRequest(parts=[UserPromptPart('user_prompt')]), ModelResponse(parts=[TextPart('text2')]), + ModelRequest(parts=[ToolReturnPart('tool', 'tool_return_content', 'tool_call_1')]), + ModelRequest(parts=[RetryPromptPart('retry_prompt', tool_name='tool', tool_call_id='tool_call_2')]), ] settings = InstrumentationSettings(include_sensitive_content=False) assert [InstrumentedModel.event_to_dict(e) for e in settings.messages_to_otel_events(messages)] == snapshot( @@ -863,5 +865,21 @@ def test_messages_to_otel_events_without_prompts_and_completions(): 'gen_ai.message.index': 3, 'event.name': 'gen_ai.assistant.message', }, + { + 'content': 'SCRUBBED', + 'role': 'tool', + 'id': 'tool_call_1', + 'name': 'tool', + 'gen_ai.message.index': 4, + 'event.name': 'gen_ai.tool.message', + }, + { + 'content': 'SCRUBBED', + 'role': 'tool', + 'id': 'tool_call_2', + 'name': 'tool', + 'gen_ai.message.index': 5, + 'event.name': 'gen_ai.tool.message', + }, ] ) From 810b1db60be2b8ab10f5b0f5b8aacce947ffef64 Mon Sep 17 00:00:00 2001 From: adtyavrdhn Date: Wed, 18 Jun 2025 13:51:18 +0530 Subject: [PATCH 04/20] Adding example to docs --- docs/logfire.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/logfire.md b/docs/logfire.md index 2fc6c3d62a..5a3caf6a31 100644 --- a/docs/logfire.md +++ b/docs/logfire.md @@ -305,3 +305,15 @@ agent = Agent('gpt-4o', instrument=instrumentation_settings) # or to instrument all agents: Agent.instrument_all(instrumentation_settings) ``` + +### Excluding sensitive content + +```python {title="exluding_sensitive_content.py"} +from pydantic_ai.agent import Agent, InstrumentationSettings + +instrumentation_settings = InstrumentationSettings(include_sensitive_content=False) + +agent = Agent('gpt-4o', instrument=instrumentation_settings) +# or to instrument all agents: +Agent.instrument_all(instrumentation_settings) +``` From 8ebee59ece3c8896561a16db69f1567a40952a6a Mon Sep 17 00:00:00 2001 From: adtyavrdhn Date: Thu, 19 Jun 2025 18:16:35 +0530 Subject: [PATCH 05/20] renaming to include content --- docs/logfire.md | 2 +- pydantic_ai_slim/pydantic_ai/messages.py | 10 +++++----- pydantic_ai_slim/pydantic_ai/models/instrumented.py | 6 +++--- tests/models/test_instrumented.py | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/logfire.md b/docs/logfire.md index 5a3caf6a31..3b1d8ee7b6 100644 --- a/docs/logfire.md +++ b/docs/logfire.md @@ -311,7 +311,7 @@ Agent.instrument_all(instrumentation_settings) ```python {title="exluding_sensitive_content.py"} from pydantic_ai.agent import Agent, InstrumentationSettings -instrumentation_settings = InstrumentationSettings(include_sensitive_content=False) +instrumentation_settings = InstrumentationSettings(include_content=False) agent = Agent('gpt-4o', instrument=instrumentation_settings) # or to instrument all agents: diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py index d9f4b06332..2460ad6cfa 100644 --- a/pydantic_ai_slim/pydantic_ai/messages.py +++ b/pydantic_ai_slim/pydantic_ai/messages.py @@ -75,7 +75,7 @@ class SystemPromptPart: """Part type identifier, this is available on all parts as a discriminator.""" def otel_event(self, _settings: InstrumentationSettings) -> Event: - if not _settings.include_sensitive_content: + if not _settings.include_content: return Event('gen_ai.system.message', body={'content': 'SCRUBBED', 'role': 'system'}) return Event('gen_ai.system.message', body={'content': self.content, 'role': 'system'}) @@ -355,7 +355,7 @@ class UserPromptPart: """Part type identifier, this is available on all parts as a discriminator.""" def otel_event(self, settings: InstrumentationSettings) -> Event: - if not settings.include_sensitive_content: + if not settings.include_content: return Event('gen_ai.user.message', body={'content': 'SCRUBBED', 'role': 'user'}) content: str | list[dict[str, Any] | str] if isinstance(self.content, str): @@ -420,7 +420,7 @@ def otel_event(self, _settings: InstrumentationSettings) -> Event: return Event( 'gen_ai.tool.message', body={ - 'content': self.content if _settings.include_sensitive_content else 'SCRUBBED', + 'content': self.content if _settings.include_content else 'SCRUBBED', 'role': 'tool', 'id': self.tool_call_id, 'name': self.tool_name, @@ -487,7 +487,7 @@ def otel_event(self, _settings: InstrumentationSettings) -> Event: return Event( 'gen_ai.tool.message', body={ - 'content': self.model_response() if _settings.include_sensitive_content else 'SCRUBBED', + 'content': self.model_response() if _settings.include_content else 'SCRUBBED', 'role': 'tool', 'id': self.tool_call_id, 'name': self.tool_name, @@ -664,7 +664,7 @@ def new_event_body(): elif isinstance(part, TextPart): if body.get('content'): body = new_event_body() - body['content'] = part.content if _settings.include_sensitive_content else 'SCRUBBED' + body['content'] = part.content if _settings.include_content else 'SCRUBBED' return result diff --git a/pydantic_ai_slim/pydantic_ai/models/instrumented.py b/pydantic_ai_slim/pydantic_ai/models/instrumented.py index 0da2ef2ef7..108f1a99cf 100644 --- a/pydantic_ai_slim/pydantic_ai/models/instrumented.py +++ b/pydantic_ai_slim/pydantic_ai/models/instrumented.py @@ -92,7 +92,7 @@ def __init__( meter_provider: MeterProvider | None = None, event_logger_provider: EventLoggerProvider | None = None, include_binary_content: bool = True, - include_sensitive_content: bool = True, + include_content: bool = True, ): """Create instrumentation options. @@ -110,7 +110,7 @@ def __init__( Calling `logfire.configure()` sets the global event logger provider, so most users don't need this. This is only used if `event_mode='logs'`. include_binary_content: Whether to include binary content in the instrumentation events. - include_sensitive_content: Whether to include prompt and completion messages in the instrumentation events. + include_content: Whether to include prompt and completion messages in the instrumentation events. """ from pydantic_ai import __version__ @@ -123,7 +123,7 @@ def __init__( self.event_logger = event_logger_provider.get_event_logger(scope_name, __version__) self.event_mode = event_mode self.include_binary_content = include_binary_content - self.include_sensitive_content = include_sensitive_content + self.include_content = include_content # As specified in the OpenTelemetry GenAI metrics spec: # https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/#metric-gen_aiclienttokenusage diff --git a/tests/models/test_instrumented.py b/tests/models/test_instrumented.py index 3f29530a14..aca55b983e 100644 --- a/tests/models/test_instrumented.py +++ b/tests/models/test_instrumented.py @@ -838,7 +838,7 @@ def test_messages_to_otel_events_without_prompts_and_completions(): ModelRequest(parts=[ToolReturnPart('tool', 'tool_return_content', 'tool_call_1')]), ModelRequest(parts=[RetryPromptPart('retry_prompt', tool_name='tool', tool_call_id='tool_call_2')]), ] - settings = InstrumentationSettings(include_sensitive_content=False) + settings = InstrumentationSettings(include_content=False) assert [InstrumentedModel.event_to_dict(e) for e in settings.messages_to_otel_events(messages)] == snapshot( [ { From 16a9920c3999a716cb511936fcaef762bee010ab Mon Sep 17 00:00:00 2001 From: adtyavrdhn Date: Thu, 19 Jun 2025 18:17:08 +0530 Subject: [PATCH 06/20] typo --- docs/logfire.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/logfire.md b/docs/logfire.md index 3b1d8ee7b6..bb2605f575 100644 --- a/docs/logfire.md +++ b/docs/logfire.md @@ -308,7 +308,7 @@ Agent.instrument_all(instrumentation_settings) ### Excluding sensitive content -```python {title="exluding_sensitive_content.py"} +```python {title="excluding_sensitive_content.py.py"} from pydantic_ai.agent import Agent, InstrumentationSettings instrumentation_settings = InstrumentationSettings(include_content=False) From 1d2ba54bbaa7d8700718db08e339bd4ce82190c9 Mon Sep 17 00:00:00 2001 From: adtyavrdhn Date: Thu, 19 Jun 2025 18:35:42 +0530 Subject: [PATCH 07/20] Removing content key altogether + refactoring to remove duplicated code --- pydantic_ai_slim/pydantic_ai/messages.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py index 2460ad6cfa..280ba44827 100644 --- a/pydantic_ai_slim/pydantic_ai/messages.py +++ b/pydantic_ai_slim/pydantic_ai/messages.py @@ -74,10 +74,11 @@ class SystemPromptPart: part_kind: Literal['system-prompt'] = 'system-prompt' """Part type identifier, this is available on all parts as a discriminator.""" - def otel_event(self, _settings: InstrumentationSettings) -> Event: - if not _settings.include_content: - return Event('gen_ai.system.message', body={'content': 'SCRUBBED', 'role': 'system'}) - return Event('gen_ai.system.message', body={'content': self.content, 'role': 'system'}) + def otel_event(self, settings: InstrumentationSettings) -> Event: + return Event( + 'gen_ai.system.message', + body={'role': 'system', **({'content': self.content} if settings.include_content else {})}, + ) __repr__ = _utils.dataclasses_no_defaults_repr @@ -356,7 +357,7 @@ class UserPromptPart: def otel_event(self, settings: InstrumentationSettings) -> Event: if not settings.include_content: - return Event('gen_ai.user.message', body={'content': 'SCRUBBED', 'role': 'user'}) + return Event('gen_ai.user.message', body={'role': 'user'}) content: str | list[dict[str, Any] | str] if isinstance(self.content, str): content = self.content @@ -416,11 +417,11 @@ def model_response_object(self) -> dict[str, Any]: else: return {'return_value': tool_return_ta.dump_python(self.content, mode='json')} - def otel_event(self, _settings: InstrumentationSettings) -> Event: + def otel_event(self, settings: InstrumentationSettings) -> Event: return Event( 'gen_ai.tool.message', body={ - 'content': self.content if _settings.include_content else 'SCRUBBED', + **({'content': self.content} if settings.include_content else {}), 'role': 'tool', 'id': self.tool_call_id, 'name': self.tool_name, @@ -480,14 +481,14 @@ def model_response(self) -> str: description = f'{len(self.content)} validation errors: {json_errors.decode()}' return f'{description}\n\nFix the errors and try again.' - def otel_event(self, _settings: InstrumentationSettings) -> Event: + def otel_event(self, settings: InstrumentationSettings) -> Event: if self.tool_name is None: return Event('gen_ai.user.message', body={'content': self.model_response(), 'role': 'user'}) else: return Event( 'gen_ai.tool.message', body={ - 'content': self.model_response() if _settings.include_content else 'SCRUBBED', + **({'content': self.model_response()} if settings.include_content else {}), 'role': 'tool', 'id': self.tool_call_id, 'name': self.tool_name, @@ -638,7 +639,7 @@ class ModelResponse: vendor_id: str | None = None """Vendor ID as specified by the model provider. This can be used to track the specific request to the model.""" - def otel_events(self, _settings: InstrumentationSettings) -> list[Event]: + def otel_events(self, settings: InstrumentationSettings) -> list[Event]: """Return OpenTelemetry events for the response.""" result: list[Event] = [] @@ -664,7 +665,8 @@ def new_event_body(): elif isinstance(part, TextPart): if body.get('content'): body = new_event_body() - body['content'] = part.content if _settings.include_content else 'SCRUBBED' + if settings.include_content: + body['content'] = part.content return result From 15384f352fe39c448ae508d7eddbbaeb5846ead4 Mon Sep 17 00:00:00 2001 From: adtyavrdhn Date: Thu, 19 Jun 2025 18:38:48 +0530 Subject: [PATCH 08/20] Adding test for binary_content as well --- tests/models/test_instrumented.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/models/test_instrumented.py b/tests/models/test_instrumented.py index aca55b983e..4c8051ed66 100644 --- a/tests/models/test_instrumented.py +++ b/tests/models/test_instrumented.py @@ -829,7 +829,7 @@ def test_messages_to_otel_events_without_binary_content(document_content: Binary ) -def test_messages_to_otel_events_without_prompts_and_completions(): +def test_messages_to_otel_events_without_prompts_and_completions(document_content: BinaryContent): messages: list[ModelMessage] = [ ModelRequest(parts=[SystemPromptPart('system_prompt')]), ModelResponse(parts=[TextPart('text1')]), @@ -837,36 +837,32 @@ def test_messages_to_otel_events_without_prompts_and_completions(): ModelResponse(parts=[TextPart('text2')]), ModelRequest(parts=[ToolReturnPart('tool', 'tool_return_content', 'tool_call_1')]), ModelRequest(parts=[RetryPromptPart('retry_prompt', tool_name='tool', tool_call_id='tool_call_2')]), + ModelRequest(parts=[UserPromptPart(content=['user_prompt6', document_content])]), ] settings = InstrumentationSettings(include_content=False) assert [InstrumentedModel.event_to_dict(e) for e in settings.messages_to_otel_events(messages)] == snapshot( [ { - 'content': 'SCRUBBED', 'role': 'system', 'gen_ai.message.index': 0, 'event.name': 'gen_ai.system.message', }, { - 'content': 'SCRUBBED', 'role': 'assistant', 'gen_ai.message.index': 1, 'event.name': 'gen_ai.assistant.message', }, { - 'content': 'SCRUBBED', 'role': 'user', 'gen_ai.message.index': 2, 'event.name': 'gen_ai.user.message', }, { - 'content': 'SCRUBBED', 'role': 'assistant', 'gen_ai.message.index': 3, 'event.name': 'gen_ai.assistant.message', }, { - 'content': 'SCRUBBED', 'role': 'tool', 'id': 'tool_call_1', 'name': 'tool', @@ -874,12 +870,16 @@ def test_messages_to_otel_events_without_prompts_and_completions(): 'event.name': 'gen_ai.tool.message', }, { - 'content': 'SCRUBBED', 'role': 'tool', 'id': 'tool_call_2', 'name': 'tool', 'gen_ai.message.index': 5, 'event.name': 'gen_ai.tool.message', }, + { + 'role': 'user', + 'gen_ai.message.index': 6, + 'event.name': 'gen_ai.user.message', + }, ] ) From 94e08dd0a2b0195cdac2552b7cdaa05dd0689f98 Mon Sep 17 00:00:00 2001 From: adtyavrdhn Date: Fri, 20 Jun 2025 22:00:31 +0530 Subject: [PATCH 09/20] Retaining content for the kinds of data --- pydantic_ai_slim/pydantic_ai/messages.py | 8 +++---- tests/models/test_instrumented.py | 28 +++++++++++++++++++++--- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py index 280ba44827..909b0d8390 100644 --- a/pydantic_ai_slim/pydantic_ai/messages.py +++ b/pydantic_ai_slim/pydantic_ai/messages.py @@ -356,8 +356,6 @@ class UserPromptPart: """Part type identifier, this is available on all parts as a discriminator.""" def otel_event(self, settings: InstrumentationSettings) -> Event: - if not settings.include_content: - return Event('gen_ai.user.message', body={'role': 'user'}) content: str | list[dict[str, Any] | str] if isinstance(self.content, str): content = self.content @@ -365,12 +363,12 @@ def otel_event(self, settings: InstrumentationSettings) -> Event: content = [] for part in self.content: if isinstance(part, str): - content.append(part) + content.append(part if settings.include_content else {'kind': 'text'}) elif isinstance(part, (ImageUrl, AudioUrl, DocumentUrl, VideoUrl)): - content.append({'kind': part.kind, 'url': part.url}) + content.append({'kind': part.kind, **({'url': part.url} if settings.include_content else {})}) elif isinstance(part, BinaryContent): converted_part = {'kind': part.kind, 'media_type': part.media_type} - if settings.include_binary_content: + if settings.include_content and settings.include_binary_content: converted_part['binary_content'] = base64.b64encode(part.data).decode() content.append(converted_part) else: diff --git a/tests/models/test_instrumented.py b/tests/models/test_instrumented.py index 4c8051ed66..00e8f39a1c 100644 --- a/tests/models/test_instrumented.py +++ b/tests/models/test_instrumented.py @@ -829,15 +829,28 @@ def test_messages_to_otel_events_without_binary_content(document_content: Binary ) -def test_messages_to_otel_events_without_prompts_and_completions(document_content: BinaryContent): +def test_messages_without_content(document_content: BinaryContent): messages: list[ModelMessage] = [ ModelRequest(parts=[SystemPromptPart('system_prompt')]), ModelResponse(parts=[TextPart('text1')]), - ModelRequest(parts=[UserPromptPart('user_prompt')]), + ModelRequest( + parts=[ + UserPromptPart( + content=[ + 'user_prompt1', + VideoUrl('https://example.com/video.mp4'), + ImageUrl('https://example.com/image.png'), + AudioUrl('https://example.com/audio.mp3'), + DocumentUrl('https://example.com/document.pdf'), + document_content, + ] + ) + ] + ), ModelResponse(parts=[TextPart('text2')]), ModelRequest(parts=[ToolReturnPart('tool', 'tool_return_content', 'tool_call_1')]), ModelRequest(parts=[RetryPromptPart('retry_prompt', tool_name='tool', tool_call_id='tool_call_2')]), - ModelRequest(parts=[UserPromptPart(content=['user_prompt6', document_content])]), + ModelRequest(parts=[UserPromptPart(content=['user_prompt2', document_content])]), ] settings = InstrumentationSettings(include_content=False) assert [InstrumentedModel.event_to_dict(e) for e in settings.messages_to_otel_events(messages)] == snapshot( @@ -853,6 +866,14 @@ def test_messages_to_otel_events_without_prompts_and_completions(document_conten 'event.name': 'gen_ai.assistant.message', }, { + 'content': [ + {'kind': 'text'}, + {'kind': 'video-url'}, + {'kind': 'image-url'}, + {'kind': 'audio-url'}, + {'kind': 'document-url'}, + {'kind': 'binary', 'media_type': 'application/pdf'}, + ], 'role': 'user', 'gen_ai.message.index': 2, 'event.name': 'gen_ai.user.message', @@ -877,6 +898,7 @@ def test_messages_to_otel_events_without_prompts_and_completions(document_conten 'event.name': 'gen_ai.tool.message', }, { + 'content': [{'kind': 'text'}, {'kind': 'binary', 'media_type': 'application/pdf'}], 'role': 'user', 'gen_ai.message.index': 6, 'event.name': 'gen_ai.user.message', From 08189b12adc9c38343afc42c49941d5b3091a8f7 Mon Sep 17 00:00:00 2001 From: adtyavrdhn Date: Fri, 20 Jun 2025 23:14:08 +0530 Subject: [PATCH 10/20] Removing args from tool calls when include_content is false --- pydantic_ai_slim/pydantic_ai/_agent_graph.py | 8 +++++++- pydantic_ai_slim/pydantic_ai/agent.py | 1 + pydantic_ai_slim/pydantic_ai/tools.py | 5 +++-- tests/models/test_instrumented.py | 9 ++++++++- 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py index 703e8d43c5..c570ae5967 100644 --- a/pydantic_ai_slim/pydantic_ai/_agent_graph.py +++ b/pydantic_ai_slim/pydantic_ai/_agent_graph.py @@ -30,6 +30,7 @@ if TYPE_CHECKING: from .mcp import MCPServer + from .models.instrumented import InstrumentationSettings __all__ = ( 'GraphAgentState', @@ -98,6 +99,7 @@ class GraphAgentDeps(Generic[DepsT, OutputDataT]): default_retries: int tracer: Tracer + instrumentation_settings: InstrumentationSettings | None = None prepare_tools: ToolsPrepareFunc[DepsT] | None = None @@ -666,6 +668,10 @@ async def process_function_tools( # noqa C901 user_parts: list[_messages.UserPromptPart] = [] + include_tool_args = ( + ctx.deps.instrumentation_settings is not None and ctx.deps.instrumentation_settings.include_content + ) + # Run all tool tasks in parallel results_by_index: dict[int, _messages.ModelRequestPart] = {} with ctx.deps.tracer.start_as_current_span( @@ -676,7 +682,7 @@ async def process_function_tools( # noqa C901 }, ): tasks = [ - asyncio.create_task(tool.run(call, run_context, ctx.deps.tracer), name=call.tool_name) + asyncio.create_task(tool.run(call, run_context, ctx.deps.tracer, include_tool_args), name=call.tool_name) for tool, call in calls_to_run ] diff --git a/pydantic_ai_slim/pydantic_ai/agent.py b/pydantic_ai_slim/pydantic_ai/agent.py index d9bdc96c3d..9968b182e3 100644 --- a/pydantic_ai_slim/pydantic_ai/agent.py +++ b/pydantic_ai_slim/pydantic_ai/agent.py @@ -696,6 +696,7 @@ async def get_instructions(run_context: RunContext[AgentDepsT]) -> str | None: tracer=tracer, prepare_tools=self._prepare_tools, get_instructions=get_instructions, + instrumentation_settings=instrumentation_settings, ) start_node = _agent_graph.UserPromptNode[AgentDepsT]( user_prompt=user_prompt, diff --git a/pydantic_ai_slim/pydantic_ai/tools.py b/pydantic_ai_slim/pydantic_ai/tools.py index bb34015191..1785438531 100644 --- a/pydantic_ai_slim/pydantic_ai/tools.py +++ b/pydantic_ai_slim/pydantic_ai/tools.py @@ -372,6 +372,7 @@ async def run( message: _messages.ToolCallPart, run_context: RunContext[AgentDepsT], tracer: Tracer, + include_tool_args: bool = False, ) -> _messages.ToolReturnPart | _messages.RetryPromptPart: """Run the tool function asynchronously. @@ -383,14 +384,14 @@ async def run( 'gen_ai.tool.name': self.name, # NOTE: this means `gen_ai.tool.call.id` will be included even if it was generated by pydantic-ai 'gen_ai.tool.call.id': message.tool_call_id, - 'tool_arguments': message.args_as_json_str(), + **({'tool_arguments': message.args_as_json_str()} if include_tool_args else {}), 'logfire.msg': f'running tool: {self.name}', # add the JSON schema so these attributes are formatted nicely in Logfire 'logfire.json_schema': json.dumps( { 'type': 'object', 'properties': { - 'tool_arguments': {'type': 'object'}, + **({'tool_arguments': {'type': 'object'}} if include_tool_args else {}), 'gen_ai.tool.name': {}, 'gen_ai.tool.call.id': {}, }, diff --git a/tests/models/test_instrumented.py b/tests/models/test_instrumented.py index 00e8f39a1c..6ae7a02c86 100644 --- a/tests/models/test_instrumented.py +++ b/tests/models/test_instrumented.py @@ -847,7 +847,7 @@ def test_messages_without_content(document_content: BinaryContent): ) ] ), - ModelResponse(parts=[TextPart('text2')]), + ModelResponse(parts=[TextPart('text2'), ToolCallPart(tool_name='my_tool', args={'a': 13, 'b': 4})]), ModelRequest(parts=[ToolReturnPart('tool', 'tool_return_content', 'tool_call_1')]), ModelRequest(parts=[RetryPromptPart('retry_prompt', tool_name='tool', tool_call_id='tool_call_2')]), ModelRequest(parts=[UserPromptPart(content=['user_prompt2', document_content])]), @@ -880,6 +880,13 @@ def test_messages_without_content(document_content: BinaryContent): }, { 'role': 'assistant', + 'tool_calls': [ + { + 'id': IsStr(), + 'type': 'function', + 'function': {'name': 'my_tool', 'arguments': {'a': 13, 'b': 4}}, + } + ], 'gen_ai.message.index': 3, 'event.name': 'gen_ai.assistant.message', }, From 77e58d643ab6753ee364ab02b96313ac82881720 Mon Sep 17 00:00:00 2001 From: adtyavrdhn Date: Sat, 21 Jun 2025 17:53:04 +0530 Subject: [PATCH 11/20] Mistakes in docs --- docs/logfire.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/logfire.md b/docs/logfire.md index bb2605f575..e59d707e6d 100644 --- a/docs/logfire.md +++ b/docs/logfire.md @@ -308,8 +308,9 @@ Agent.instrument_all(instrumentation_settings) ### Excluding sensitive content -```python {title="excluding_sensitive_content.py.py"} -from pydantic_ai.agent import Agent, InstrumentationSettings +```python {title="excluding_sensitive_content.py"} +from pydantic_ai.agent import Agent +from pydantic_ai.models.instrumented import InstrumentationSettings instrumentation_settings = InstrumentationSettings(include_content=False) From 1031dab9a52f9ccf39d624a4105a34ad58f6a5a5 Mon Sep 17 00:00:00 2001 From: adtyavrdhn Date: Sat, 21 Jun 2025 22:54:41 +0530 Subject: [PATCH 12/20] Adding test to check include_content behaviour in the logfire spans --- tests/test_logfire.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tests/test_logfire.py b/tests/test_logfire.py index a862b05701..25d0b928b8 100644 --- a/tests/test_logfire.py +++ b/tests/test_logfire.py @@ -523,3 +523,34 @@ async def test_feedback(capfire: CaptureLogfire) -> None: }, ] ) + + +@pytest.mark.skipif(not logfire_installed, reason='logfire not installed') +@pytest.mark.parametrize('include_content', [True, False]) +def test_include_tool_args_span_attributes( + get_logfire_summary: Callable[[], LogfireSummary], + include_content: bool, +) -> None: + """Test that tool arguments are included/excluded in span attributes based on instrumentation settings.""" + + instrumentation_settings = InstrumentationSettings(include_content=include_content) + test_model = TestModel(seed=42) + my_agent = Agent(model=test_model, instrument=instrumentation_settings) + + @my_agent.tool_plain + async def add_numbers(x: int, y: int) -> int: + """Add two numbers together.""" + return x + y + + result = my_agent.run_sync('Add 42 and 42') + assert result.output == snapshot('{"add_numbers":84}') + + summary = get_logfire_summary() + + for _, attributes in summary.attributes.items(): + if attributes.get('gen_ai.tool.name') == 'add_numbers': + if include_content: + assert 'tool_arguments' in attributes + assert attributes['tool_arguments'] == snapshot('{"x":42,"y":42}') + else: + assert 'tool_arguments' not in attributes From c9a83e59c8034c256a8499f0dc99d90e88441f96 Mon Sep 17 00:00:00 2001 From: adtyavrdhn Date: Sat, 21 Jun 2025 22:55:02 +0530 Subject: [PATCH 13/20] Adding test to check include_content behaviour in the logfire spans --- tests/test_logfire.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_logfire.py b/tests/test_logfire.py index 25d0b928b8..f6df2cfe32 100644 --- a/tests/test_logfire.py +++ b/tests/test_logfire.py @@ -547,7 +547,7 @@ async def add_numbers(x: int, y: int) -> int: summary = get_logfire_summary() - for _, attributes in summary.attributes.items(): + for _span_id, attributes in summary.attributes.items(): if attributes.get('gen_ai.tool.name') == 'add_numbers': if include_content: assert 'tool_arguments' in attributes From ac4c40a5c70c3449035522f2e26e5f90819e95ed Mon Sep 17 00:00:00 2001 From: adtyavrdhn Date: Sat, 21 Jun 2025 23:18:58 +0530 Subject: [PATCH 14/20] Adding desc in logs --- docs/logfire.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/logfire.md b/docs/logfire.md index e59d707e6d..0afe353e41 100644 --- a/docs/logfire.md +++ b/docs/logfire.md @@ -308,6 +308,10 @@ Agent.instrument_all(instrumentation_settings) ### Excluding sensitive content +For privacy and security reasons, you may want to monitor your agent's behavior and performance without exposing sensitive user data or proprietary prompts in your observability platform. PydanticAI allows you to exclude the actual content from instrumentation events while preserving the structural information needed for debugging and monitoring. + +When `include_content=False` is set, PydanticAI will exclude sensitive content from OpenTelemetry events. + ```python {title="excluding_sensitive_content.py"} from pydantic_ai.agent import Agent from pydantic_ai.models.instrumented import InstrumentationSettings @@ -318,3 +322,5 @@ agent = Agent('gpt-4o', instrument=instrumentation_settings) # or to instrument all agents: Agent.instrument_all(instrumentation_settings) ``` + +This setting is particularly useful in production environments where compliance requirements or data sensitivity concerns make it necessary to limit what content is sent to your observability platform. From fb5361abfec2a84f0d7b8cbfa152ec60e8e41a4e Mon Sep 17 00:00:00 2001 From: adtyavrdhn Date: Mon, 23 Jun 2025 18:33:32 +0530 Subject: [PATCH 15/20] Resolving comments --- docs/logfire.md | 2 +- pydantic_ai_slim/pydantic_ai/_agent_graph.py | 4 ++-- pydantic_ai_slim/pydantic_ai/tools.py | 6 +++--- tests/test_logfire.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/logfire.md b/docs/logfire.md index 0afe353e41..7a33de5806 100644 --- a/docs/logfire.md +++ b/docs/logfire.md @@ -306,7 +306,7 @@ agent = Agent('gpt-4o', instrument=instrumentation_settings) Agent.instrument_all(instrumentation_settings) ``` -### Excluding sensitive content +### Excluding prompts and completions For privacy and security reasons, you may want to monitor your agent's behavior and performance without exposing sensitive user data or proprietary prompts in your observability platform. PydanticAI allows you to exclude the actual content from instrumentation events while preserving the structural information needed for debugging and monitoring. diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py index 7370cfacf6..36a0b14cab 100644 --- a/pydantic_ai_slim/pydantic_ai/_agent_graph.py +++ b/pydantic_ai_slim/pydantic_ai/_agent_graph.py @@ -698,7 +698,7 @@ async def process_function_tools( # noqa C901 user_parts: list[_messages.UserPromptPart] = [] - include_tool_args = ( + include_content = ( ctx.deps.instrumentation_settings is not None and ctx.deps.instrumentation_settings.include_content ) @@ -712,7 +712,7 @@ async def process_function_tools( # noqa C901 }, ): tasks = [ - asyncio.create_task(tool.run(call, run_context, ctx.deps.tracer, include_tool_args), name=call.tool_name) + asyncio.create_task(tool.run(call, run_context, ctx.deps.tracer, include_content), name=call.tool_name) for tool, call in calls_to_run ] diff --git a/pydantic_ai_slim/pydantic_ai/tools.py b/pydantic_ai_slim/pydantic_ai/tools.py index 1785438531..72b43e66c6 100644 --- a/pydantic_ai_slim/pydantic_ai/tools.py +++ b/pydantic_ai_slim/pydantic_ai/tools.py @@ -372,7 +372,7 @@ async def run( message: _messages.ToolCallPart, run_context: RunContext[AgentDepsT], tracer: Tracer, - include_tool_args: bool = False, + include_content: bool = False, ) -> _messages.ToolReturnPart | _messages.RetryPromptPart: """Run the tool function asynchronously. @@ -384,14 +384,14 @@ async def run( 'gen_ai.tool.name': self.name, # NOTE: this means `gen_ai.tool.call.id` will be included even if it was generated by pydantic-ai 'gen_ai.tool.call.id': message.tool_call_id, - **({'tool_arguments': message.args_as_json_str()} if include_tool_args else {}), + **({'tool_arguments': message.args_as_json_str()} if include_content else {}), 'logfire.msg': f'running tool: {self.name}', # add the JSON schema so these attributes are formatted nicely in Logfire 'logfire.json_schema': json.dumps( { 'type': 'object', 'properties': { - **({'tool_arguments': {'type': 'object'}} if include_tool_args else {}), + **({'tool_arguments': {'type': 'object'}} if include_content else {}), 'gen_ai.tool.name': {}, 'gen_ai.tool.call.id': {}, }, diff --git a/tests/test_logfire.py b/tests/test_logfire.py index f6df2cfe32..949f6ab9d4 100644 --- a/tests/test_logfire.py +++ b/tests/test_logfire.py @@ -547,7 +547,7 @@ async def add_numbers(x: int, y: int) -> int: summary = get_logfire_summary() - for _span_id, attributes in summary.attributes.items(): + for attributes in summary.attributes.values(): if attributes.get('gen_ai.tool.name') == 'add_numbers': if include_content: assert 'tool_arguments' in attributes From c330c86c7180ee1b22228a5b20466bdb4cfccf4d Mon Sep 17 00:00:00 2001 From: adtyavrdhn Date: Mon, 23 Jun 2025 18:40:59 +0530 Subject: [PATCH 16/20] Asserting only one such span exists --- tests/test_logfire.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tests/test_logfire.py b/tests/test_logfire.py index 949f6ab9d4..916380071a 100644 --- a/tests/test_logfire.py +++ b/tests/test_logfire.py @@ -547,10 +547,14 @@ async def add_numbers(x: int, y: int) -> int: summary = get_logfire_summary() - for attributes in summary.attributes.values(): - if attributes.get('gen_ai.tool.name') == 'add_numbers': - if include_content: - assert 'tool_arguments' in attributes - assert attributes['tool_arguments'] == snapshot('{"x":42,"y":42}') - else: - assert 'tool_arguments' not in attributes + tool_spans = [ + attributes for attributes in summary.attributes.values() if attributes.get('gen_ai.tool.name') == 'add_numbers' + ] + assert len(tool_spans) == 1 + + tool_attributes = tool_spans[0] + if include_content: + assert 'tool_arguments' in tool_attributes + assert tool_attributes['tool_arguments'] == snapshot('{"x":42,"y":42}') + else: + assert 'tool_arguments' not in tool_attributes From 1c75cfcf56ebd05460f758defb0b6925bc33f4ec Mon Sep 17 00:00:00 2001 From: adtyavrdhn Date: Mon, 23 Jun 2025 20:07:22 +0530 Subject: [PATCH 17/20] Adding that tool calls and args will also be excluded from the --- docs/logfire.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/logfire.md b/docs/logfire.md index 7a33de5806..20d3c8c8bc 100644 --- a/docs/logfire.md +++ b/docs/logfire.md @@ -310,7 +310,7 @@ Agent.instrument_all(instrumentation_settings) For privacy and security reasons, you may want to monitor your agent's behavior and performance without exposing sensitive user data or proprietary prompts in your observability platform. PydanticAI allows you to exclude the actual content from instrumentation events while preserving the structural information needed for debugging and monitoring. -When `include_content=False` is set, PydanticAI will exclude sensitive content from OpenTelemetry events. +When `include_content=False` is set, PydanticAI will exclude sensitive content from OpenTelemetry events, including user prompts and model completions, tool call arguments and responses, and any other message content. ```python {title="excluding_sensitive_content.py"} from pydantic_ai.agent import Agent From 4f3ecd130cbc64ec3820aa2273ea0e4f06081ba4 Mon Sep 17 00:00:00 2001 From: Aditya Vardhan <76904033+adtyavrdhn@users.noreply.github.com> Date: Wed, 25 Jun 2025 18:23:30 +0530 Subject: [PATCH 18/20] Update pydantic_ai_slim/pydantic_ai/models/instrumented.py Co-authored-by: Alex Hall --- pydantic_ai_slim/pydantic_ai/models/instrumented.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pydantic_ai_slim/pydantic_ai/models/instrumented.py b/pydantic_ai_slim/pydantic_ai/models/instrumented.py index 0a01c12b61..96859f9622 100644 --- a/pydantic_ai_slim/pydantic_ai/models/instrumented.py +++ b/pydantic_ai_slim/pydantic_ai/models/instrumented.py @@ -110,7 +110,8 @@ def __init__( Calling `logfire.configure()` sets the global event logger provider, so most users don't need this. This is only used if `event_mode='logs'`. include_binary_content: Whether to include binary content in the instrumentation events. - include_content: Whether to include prompt and completion messages in the instrumentation events. + include_content: Whether to include prompts, completions, and tool call arguments and responses + in the instrumentation events. """ from pydantic_ai import __version__ From 7e81361ffb5c97548e01834d1c1f4f61f53d7157 Mon Sep 17 00:00:00 2001 From: Aditya Vardhan <76904033+adtyavrdhn@users.noreply.github.com> Date: Wed, 25 Jun 2025 18:23:44 +0530 Subject: [PATCH 19/20] Update tests/test_logfire.py Co-authored-by: Alex Hall --- tests/test_logfire.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/test_logfire.py b/tests/test_logfire.py index 916380071a..df2fba6a6b 100644 --- a/tests/test_logfire.py +++ b/tests/test_logfire.py @@ -547,14 +547,11 @@ async def add_numbers(x: int, y: int) -> int: summary = get_logfire_summary() - tool_spans = [ + [tool_attributes] = [ attributes for attributes in summary.attributes.values() if attributes.get('gen_ai.tool.name') == 'add_numbers' ] - assert len(tool_spans) == 1 - tool_attributes = tool_spans[0] if include_content: - assert 'tool_arguments' in tool_attributes assert tool_attributes['tool_arguments'] == snapshot('{"x":42,"y":42}') else: assert 'tool_arguments' not in tool_attributes From 70a4161b012398ca6e5709030123fad3713f8ddd Mon Sep 17 00:00:00 2001 From: adtyavrdhn Date: Wed, 25 Jun 2025 18:28:00 +0530 Subject: [PATCH 20/20] Fix --- tests/test_logfire.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_logfire.py b/tests/test_logfire.py index 34edcc70ca..b0beef8597 100644 --- a/tests/test_logfire.py +++ b/tests/test_logfire.py @@ -549,7 +549,7 @@ async def add_numbers(x: int, y: int) -> int: summary = get_logfire_summary() - [tool_attributes] = [ + [tool_attributes] = [ attributes for attributes in summary.attributes.values() if attributes.get('gen_ai.tool.name') == 'add_numbers' ]