Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 59 additions & 2 deletions agent_memory_server/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,41 @@ def _calculate_messages_token_count(messages: list[MemoryMessage]) -> int:
return total_tokens


def _calculate_context_usage_percentage(
messages: list[MemoryMessage],
model_name: ModelNameLiteral | None,
context_window_max: int | None,
) -> float | None:
"""
Calculate the percentage of context window used before auto-summarization triggers.

Args:
messages: List of messages to calculate token count for
model_name: The client's LLM model name for context window determination
context_window_max: Direct specification of context window max tokens

Returns:
Percentage (0-100) of context used, or None if no model info provided
"""
if not messages or (not model_name and not context_window_max):
return None

# Calculate current token usage
current_tokens = _calculate_messages_token_count(messages)

# Get effective token limit for the client's model
max_tokens = _get_effective_token_limit(model_name, context_window_max)

# Use the same threshold as _summarize_working_memory (70% of context window)
token_threshold = int(max_tokens * 0.7)

# Calculate percentage of threshold used
percentage = (current_tokens / token_threshold) * 100.0

# Cap at 100% for display purposes
return min(percentage, 100.0)


async def _summarize_working_memory(
memory: WorkingMemory,
model_name: ModelNameLiteral | None = None,
Expand Down Expand Up @@ -269,7 +304,18 @@ async def get_working_memory(

logger.debug(f"Working mem: {working_mem}")

return working_mem
# Calculate context usage percentage
context_usage_percentage = _calculate_context_usage_percentage(
messages=working_mem.messages,
model_name=model_name,
context_window_max=context_window_max,
)

# Return WorkingMemoryResponse with percentage
return WorkingMemoryResponse(
**working_mem.model_dump(),
context_usage_percentage=context_usage_percentage,
)


@router.put("/v1/working-memory/{session_id}", response_model=WorkingMemoryResponse)
Expand Down Expand Up @@ -348,7 +394,18 @@ async def put_working_memory(
namespace=updated_memory.namespace,
)

return updated_memory
# Calculate context usage percentage based on the final state (after potential summarization)
context_usage_percentage = _calculate_context_usage_percentage(
messages=updated_memory.messages,
model_name=model_name,
context_window_max=context_window_max,
)

# Return WorkingMemoryResponse with percentage
return WorkingMemoryResponse(
**updated_memory.model_dump(),
context_usage_percentage=context_usage_percentage,
)


@router.delete("/v1/working-memory/{session_id}", response_model=AckResponse)
Expand Down
5 changes: 5 additions & 0 deletions agent_memory_server/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,11 @@ class WorkingMemory(BaseModel):
class WorkingMemoryResponse(WorkingMemory):
"""Response containing working memory"""

context_usage_percentage: float | None = Field(
default=None,
description="Percentage of context window used before auto-summarization triggers (0-100)",
)


class WorkingMemoryRequest(BaseModel):
"""Request parameters for working memory operations"""
Expand Down
18 changes: 9 additions & 9 deletions tests/test_full_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -773,9 +773,9 @@ async def test_memory_prompt_with_long_term_search(
)
for msg in messages
)
assert (
relevant_context_found
), f"No relevant memory context found in messages: {messages}"
assert relevant_context_found, (
f"No relevant memory context found in messages: {messages}"
)

# Cleanup
await client.delete_long_term_memories([m.id for m in test_memories])
Expand Down Expand Up @@ -1079,9 +1079,9 @@ async def test_full_workflow_integration(
)
print(f"No topic filter search results: {no_topic_search}")

assert (
len(search_results["memories"]) > 0
), f"No memories found in search results: {search_results}"
assert len(search_results["memories"]) > 0, (
f"No memories found in search results: {search_results}"
)

# 6. Test tool integration with a realistic scenario
tool_call = {
Expand Down Expand Up @@ -1126,9 +1126,9 @@ async def test_full_workflow_integration(
m for m in long_term_memories.memories if m.id.startswith(memory_id_prefix)
]

assert (
len(our_memories) == 0
), f"Expected 0 of our memories but found {len(our_memories)}: {our_memories}"
assert len(our_memories) == 0, (
f"Expected 0 of our memories but found {len(our_memories)}: {our_memories}"
)


@pytest.mark.integration
Expand Down
Loading