Support nested sub agents (#262)

aperkins1310 · web-flow · commit 3b6f9626e9fe · 2025-09-02T21:16:56.000-07:00
* Add hierarchical sub-agent support - Enhanced service.py to handle sub-agent message filtering - Added langgraph_supervisor_hierarchy_agent.py with 3-layer hierarchy example - Added comprehensive test for hierarchical agent message flow - Updated test fixtures for better import handling This builds on upstream's basic subgraph support (2e6c622) by adding: - Sophisticated node detection for supervisors and sub-agents - Proper message handling for handback tools and results - Support for nested agent hierarchies with proper naming conventions * Add hierarchical sub-agent UI support Enhanced Streamlit UI to properly display nested agent hierarchies: - Different visual indicators for sub-agents (💼) vs tools (🛠️) - Recursive handling of nested sub-agent transfers - Proper status container management for multi-level hierarchies - Support for transfer_back_to handoff messages - Expanded status containers for better visibility - Updated tests to match new UI labels This complements the service layer changes by providing a clear visual representation of complex agent hierarchies in the UI. * Add comprehensive UI tests for hierarchical sub-agents Added test fixtures and test cases to validate: - Multi-agent message fixtures for reusable test data - Hierarchical sub-agent UI rendering with proper status containers - Visual indicators (💼 for sub-agents, 🛠️ for tools) - Popover functionality for tool calls within sub-agents - Proper message flow through transfer_to/transfer_back_to patterns These tests ensure the UI correctly displays complex agent hierarchies with proper visual organization and user experience. * Add comprehensive hierarchical sub-agent UI test suite Added three critical test patterns for hierarchical sub-agents: 1. test_app_streaming_single_sub_agent: - Tests single sub-agent with multiple tool calls - Validates popover functionality for tools within sub-agents - Ensures proper status container organization 2. test_app_streaming_sequential_sub_agents: - Tests supervisor -> agent A -> supervisor -> agent C -> supervisor flow - Validates sequential agent handoffs with proper UI separation - Ensures multiple status containers are handled correctly 3. test_app_streaming_nested_sub_agents: - Tests true nesting: supervisor -> agent A -> agent B -> agent A -> supervisor - Validates recursive status container nesting - Ensures proper visual hierarchy for deeply nested agents These tests provide comprehensive coverage of all hierarchical patterns and ensure the UI correctly handles complex multi-agent workflows. * Remove unnecessary test * Unneeded extra check * Update streamlit version + agent desc * remove unneeded hardcoded behaviour * Revert unneeded change * Run precommit
diff --git a/.dockerignore b/.dockerignore
@@ -10,4 +10,4 @@ env
 venv
 .venv
 *.db
-privatecredentials/*
+privatecredentials/*
diff --git a/docs/File_Based_Credentials.md b/docs/File_Based_Credentials.md
@@ -1,4 +1,4 @@
-# File Based Crendentials 
+# File Based Crendentials
 
 As you develop your agents, you might discover that you have credentials that you need to store on disk that you don't want stored in your Git Repo or baked into your container image.
 
@@ -7,13 +7,13 @@ Examples:
 - Certificates or private keys needed for communication with external APIs
 
 
-The `privatecredentials/` folder give you a quick place to put these files in development. 
+The `privatecredentials/` folder give you a quick place to put these files in development.
 
 
 ## How it works
 
 *Protection*
-- The .dockerignore file excludes the entire folder to keep it out of the build process.  
+- The .dockerignore file excludes the entire folder to keep it out of the build process.
 - The .gitignore files only allows the `.gitkeep` file -- since git doesn't track empty folders.
 
 
@@ -34,7 +34,7 @@ The syncing feature of Docker Watch isn't used for these reasons:
 
 For each file based credential, do the following:
 1. Put the file (e.g. `example-creds.txt`) into the `privatecredentials/` folder
-2. In your `.env` file, create an environment variable for the credential (e.g `EXAMPLE_CREDENTIAL=/privatecredentials/example-creds.txt`) that your agent will use to reference the location at runtime 
+2. In your `.env` file, create an environment variable for the credential (e.g `EXAMPLE_CREDENTIAL=/privatecredentials/example-creds.txt`) that your agent will use to reference the location at runtime
 3. In your agent, use the environment variable wherever you need the path to the credential
 
 
@@ -72,5 +72,3 @@ There are a number of approaches:
 - Use the secrets management feature of your cloud hosting environment (Google Cloud Secrets, AWS Secrets Manager, etc)
 - Use a 3rd party secrets management platform
 - Manually place the credentials on your Docker hosts and mount volumes to map the credentials to the container (Less secure)
-
-
diff --git a/pyproject.toml b/pyproject.toml
@@ -52,7 +52,7 @@ dependencies = [
     "pyowm ~=3.3.0",
     "python-dotenv ~=1.0.1",
     "setuptools ~=75.6.0",
-    "streamlit ~=1.40.1",
+    "streamlit ~=1.46.0",
     "tiktoken >=0.8.0",
     "uvicorn ~=0.32.1",
 
@@ -77,7 +77,7 @@ client = [
     "httpx~=0.27.2",
     "pydantic ~=2.10.1",
     "python-dotenv ~=1.0.1",
-    "streamlit~=1.40.1",
+    "streamlit~=1.46.0",
 ]
 
 [tool.ruff]
diff --git a/src/agents/agents.py b/src/agents/agents.py
@@ -9,6 +9,7 @@
 from agents.interrupt_agent import interrupt_agent
 from agents.knowledge_base_agent import kb_agent
 from agents.langgraph_supervisor_agent import langgraph_supervisor_agent
+from agents.langgraph_supervisor_hierarchy_agent import langgraph_supervisor_hierarchy_agent
 from agents.rag_assistant import rag_assistant
 from agents.research_assistant import research_assistant
 from schema import AgentInfo
@@ -40,6 +41,10 @@ class Agent:
     "langgraph-supervisor-agent": Agent(
         description="A langgraph supervisor agent", graph=langgraph_supervisor_agent
     ),
+    "langgraph-supervisor-hierarchy-agent": Agent(
+        description="A langgraph supervisor agent with a nested hierarchy of agents",
+        graph=langgraph_supervisor_hierarchy_agent,
+    ),
     "interrupt-agent": Agent(description="An agent the uses interrupts.", graph=interrupt_agent),
     "knowledge-base-agent": Agent(
         description="A retrieval-augmented generation agent using Amazon Bedrock Knowledge Base",
diff --git a/src/agents/langgraph_supervisor_agent.py b/src/agents/langgraph_supervisor_agent.py
@@ -31,14 +31,14 @@ def web_search(query: str) -> str:
 math_agent = create_react_agent(
     model=model,
     tools=[add, multiply],
-    name="math_expert",
+    name="sub-agent-math_expert",
     prompt="You are a math expert. Always use one tool at a time.",
 ).with_config(tags=["skip_stream"])
 
 research_agent = create_react_agent(
     model=model,
     tools=[web_search],
-    name="research_expert",
+    name="sub-agent-research_expert",
     prompt="You are a world class researcher with access to web search. Do not do any math.",
 ).with_config(tags=["skip_stream"])
 
@@ -51,7 +51,9 @@ def web_search(query: str) -> str:
         "For current events, use research_agent. "
         "For math problems, use math_agent."
     ),
-    add_handoff_back_messages=False,
+    add_handoff_back_messages=True,
+    # UI now expects this to be True so we don't have to guess when a handoff back occurs
+    output_mode="full_history",  # otherwise when reloading conversations, the sub-agents' messages are not included
 )
 
 langgraph_supervisor_agent = workflow.compile()
diff --git a/src/agents/langgraph_supervisor_hierarchy_agent.py b/src/agents/langgraph_supervisor_hierarchy_agent.py
@@ -0,0 +1,46 @@
+from langgraph.prebuilt import create_react_agent
+from langgraph_supervisor import create_supervisor
+
+from agents.langgraph_supervisor_agent import add, multiply, web_search
+from core import get_model, settings
+
+model = get_model(settings.DEFAULT_MODEL)
+
+
+def workflow(chosen_model):
+    math_agent = create_react_agent(
+        model=chosen_model,
+        tools=[add, multiply],
+        name="sub-agent-math_expert",  # Identify the graph node as a sub-agent
+        prompt="You are a math expert. Always use one tool at a time.",
+    ).with_config(tags=["skip_stream"])
+
+    research_agent = (
+        create_supervisor(
+            [math_agent],
+            model=chosen_model,
+            tools=[web_search],
+            prompt="You are a world class researcher with access to web search. Do not do any math, you have a math expert for that. ",
+            supervisor_name="supervisor-research_expert",  # Identify the graph node as a supervisor to the math agent
+        )
+        .compile(
+            name="sub-agent-research_expert"
+        )  # Identify the graph node as a sub-agent to the main supervisor
+        .with_config(tags=["skip_stream"])
+    )  # Stream tokens are ignored for sub-agents in the UI
+
+    # Create supervisor workflow
+    return create_supervisor(
+        [research_agent],
+        model=chosen_model,
+        prompt=(
+            "You are a team supervisor managing a research expert with math capabilities."
+            "For current events, use research_agent. "
+        ),
+        add_handoff_back_messages=True,
+        # UI now expects this to be True so we don't have to guess when a handoff back occurs
+        output_mode="full_history",  # otherwise when reloading conversations, the sub-agents' messages are not included
+    )  # default name for supervisor is "supervisor".
+
+
+langgraph_supervisor_hierarchy_agent = workflow(model).compile()
diff --git a/src/service/service.py b/src/service/service.py
@@ -233,17 +233,17 @@ async def message_generator(
                     updates = updates or {}
                     update_messages = updates.get("messages", [])
                     # special cases for using langgraph-supervisor library
-                    if node == "supervisor":
-                        # Get only the last ToolMessage since is it added by the
-                        # langgraph lib and not actual AI output so it won't be an
-                        # independent event
+                    if "supervisor" in node or "sub-agent" in node:
+                        # the only tools that come from the actual agent are the handoff and handback tools
                         if isinstance(update_messages[-1], ToolMessage):
-                            update_messages = [update_messages[-1]]
+                            if "sub-agent" in node and len(update_messages) > 1:
+                                # If this is a sub-agent, we want to keep the last 2 messages - the handback tool, and it's result
+                                update_messages = update_messages[-2:]
+                            else:
+                                # If this is a supervisor, we want to keep the last message only - the handoff result. The tool comes from the 'agent' node.
+                                update_messages = [update_messages[-1]]
                         else:
                             update_messages = []
-
-                    if node in ("research_expert", "math_expert"):
-                        update_messages = []
                     new_messages.extend(update_messages)
 
             if stream_mode == "custom":
diff --git a/src/streamlit_app.py b/src/streamlit_app.py
@@ -325,19 +325,30 @@ async def draw_messages(
                         # correct status container.
                         call_results = {}
                         for tool_call in msg.tool_calls:
+                            # Use different labels for transfer vs regular tool calls
+                            if "transfer_to" in tool_call["name"]:
+                                label = f"""💼 Sub Agent: {tool_call["name"]}"""
+                            else:
+                                label = f"""🛠️ Tool Call: {tool_call["name"]}"""
+
                             status = st.status(
-                                f"""Tool Call: {tool_call["name"]}""",
+                                label,
                                 state="running" if is_new else "complete",
                             )
                             call_results[tool_call["id"]] = status
-                            status.write("Input:")
-                            status.write(tool_call["args"])
 
                         # Expect one ToolMessage for each tool call.
                         for tool_call in msg.tool_calls:
                             if "transfer_to" in tool_call["name"]:
-                                await handle_agent_msgs(messages_agen, call_results, is_new)
+                                status = call_results[tool_call["id"]]
+                                status.update(expanded=True)
+                                await handle_sub_agent_msgs(messages_agen, status, is_new)
                                 break
+
+                            # Only non-transfer tool calls reach this point
+                            status = call_results[tool_call["id"]]
+                            status.write("Input:")
+                            status.write(tool_call["args"])
                             tool_result: ChatMessage = await anext(messages_agen)
 
                             if tool_result.type != "tool":
@@ -417,58 +428,90 @@ async def handle_feedback() -> None:
         st.toast("Feedback recorded", icon=":material/reviews:")
 
 
-async def handle_agent_msgs(messages_agen, call_results, is_new):
+async def handle_sub_agent_msgs(messages_agen, status, is_new):
     """
     This function segregates agent output into a status container.
     It handles all messages after the initial tool call message
     until it reaches the final AI message.
+
+    Enhanced to support nested multi-agent hierarchies with handoff back messages.
+
+    Args:
+        messages_agen: Async generator of messages
+        status: the status container for the current agent
+        is_new: Whether messages are new or replayed
     """
     nested_popovers = {}
-    # looking for the Success tool call message
+
+    # looking for the transfer Success tool call message
     first_msg = await anext(messages_agen)
     if is_new:
         st.session_state.messages.append(first_msg)
-    status = call_results.get(getattr(first_msg, "tool_call_id", None))
-    # Process first message
-    if status and first_msg.content:
-        status.write(first_msg.content)
-        # Continue reading until finish_reason='stop'
+
+    # Continue reading until we get an explicit handoff back
     while True:
-        # Check for completion on current message
-        finish_reason = getattr(first_msg, "response_metadata", {}).get("finish_reason")
-        # Break out of status container if finish_reason is anything other than "tool_calls"
-        if finish_reason is not None and finish_reason != "tool_calls":
-            if status:
-                status.update(state="complete")
-            break
         # Read next message
         sub_msg = await anext(messages_agen)
+
         # this should only happen is skip_stream flag is removed
         # if isinstance(sub_msg, str):
         #     continue
+
         if is_new:
             st.session_state.messages.append(sub_msg)
 
+        # Handle tool results with nested popovers
         if sub_msg.type == "tool" and sub_msg.tool_call_id in nested_popovers:
             popover = nested_popovers[sub_msg.tool_call_id]
             popover.write("**Output:**")
             popover.write(sub_msg.content)
-            first_msg = sub_msg
             continue
-        # Display content and tool calls using the same status
+
+        # Handle transfer_back_to tool calls - these indicate a sub-agent is returning control
+        if (
+            hasattr(sub_msg, "tool_calls")
+            and sub_msg.tool_calls
+            and any("transfer_back_to" in tc.get("name", "") for tc in sub_msg.tool_calls)
+        ):
+            # Process transfer_back_to tool calls
+            for tc in sub_msg.tool_calls:
+                if "transfer_back_to" in tc.get("name", ""):
+                    # Read the corresponding tool result
+                    transfer_result = await anext(messages_agen)
+                    if is_new:
+                        st.session_state.messages.append(transfer_result)
+
+            # After processing transfer back, we're done with this agent
+            if status:
+                status.update(state="complete")
+            break
+
+        # Display content and tool calls in the same nested status
         if status:
             if sub_msg.content:
                 status.write(sub_msg.content)
+
             if hasattr(sub_msg, "tool_calls") and sub_msg.tool_calls:
                 for tc in sub_msg.tool_calls:
-                    popover = status.popover(f"{tc['name']}", icon="🛠️")
-                    popover.write(f"**Tool:** {tc['name']}")
-                    popover.write("**Input:**")
-                    popover.write(tc["args"])
-                    # Store the popover reference using the tool call ID
-                    nested_popovers[tc["id"]] = popover
-        # Update first_msg for next iteration
-        first_msg = sub_msg
+                    # Check if this is a nested transfer/delegate
+                    if "transfer_to" in tc["name"]:
+                        # Create a nested status container for the sub-agent
+                        nested_status = status.status(
+                            f"""💼 Sub Agent: {tc["name"]}""",
+                            state="running" if is_new else "complete",
+                            expanded=True,
+                        )
+
+                        # Recursively handle sub-agents of this sub-agent
+                        await handle_sub_agent_msgs(messages_agen, nested_status, is_new)
+                    else:
+                        # Regular tool call - create popover
+                        popover = status.popover(f"{tc['name']}", icon="🛠️")
+                        popover.write(f"**Tool:** {tc['name']}")
+                        popover.write("**Input:**")
+                        popover.write(tc["args"])
+                        # Store the popover reference using the tool call ID
+                        nested_popovers[tc["id"]] = popover
 
 
 if __name__ == "__main__":
diff --git a/tests/app/test_streamlit_app.py b/tests/app/test_streamlit_app.py
diff --git a/tests/service/test_service_e2e.py b/tests/service/test_service_e2e.py
diff --git a/tests/service/test_service_message_generator.py b/tests/service/test_service_message_generator.py
diff --git a/uv.lock b/uv.lock