Skip to content

Commit ee02b9f

Browse files
xunliulxl0413
andauthored
feat: Generate a fallback report upon recursion limit hit (#838)
* finish handle_recursion_limit_fallback * fix * renmae test file * fix * doc --------- Co-authored-by: lxl0413 <lixinling2021@gmail.com>
1 parent 9a34e32 commit ee02b9f

7 files changed

Lines changed: 895 additions & 12 deletions

File tree

docs/configuration_guide.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,31 @@ Or via API request parameter:
305305

306306
---
307307

308+
## Recursion Fallback Configuration
309+
310+
When agents hit the recursion limit, DeerFlow can gracefully generate a summary of accumulated findings instead of failing (enabled by default).
311+
312+
### Configuration
313+
314+
In `conf.yaml`:
315+
```yaml
316+
ENABLE_RECURSION_FALLBACK: true
317+
```
318+
319+
### Recursion Limit
320+
321+
Set the maximum recursion limit via environment variable:
322+
```bash
323+
export AGENT_RECURSION_LIMIT=50 # default: 25
324+
```
325+
326+
Or in `.env`:
327+
```ini
328+
AGENT_RECURSION_LIMIT=50
329+
```
330+
331+
---
332+
308333
## RAG (Retrieval-Augmented Generation) Configuration
309334

310335
DeerFlow supports multiple RAG providers for document retrieval. Configure the RAG provider by setting environment variables.

src/config/configuration.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ class Configuration:
6363
interrupt_before_tools: list[str] = field(
6464
default_factory=list
6565
) # List of tool names to interrupt before execution
66+
enable_recursion_fallback: bool = (
67+
True # Enable graceful fallback when recursion limit is reached
68+
)
6669

6770
@classmethod
6871
def from_runnable_config(

src/graph/nodes.py

Lines changed: 123 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,11 @@
77
from functools import partial
88
from typing import Annotated, Any, Literal
99

10-
from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
10+
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
1111
from langchain_core.runnables import RunnableConfig
1212
from langchain_core.tools import tool
1313
from langchain_mcp_adapters.client import MultiServerMCPClient
14+
from langgraph.errors import GraphRecursionError
1415
from langgraph.types import Command, interrupt
1516

1617
from src.agents import create_agent
@@ -19,7 +20,7 @@
1920
from src.config.configuration import Configuration
2021
from src.llms.llm import get_llm_by_type, get_llm_token_limit_by_type
2122
from src.prompts.planner_model import Plan
22-
from src.prompts.template import apply_prompt_template
23+
from src.prompts.template import apply_prompt_template, get_system_prompt_template
2324
from src.tools import (
2425
crawl_tool,
2526
get_retriever_tool,
@@ -929,6 +930,79 @@ def validate_web_search_usage(messages: list, agent_name: str = "agent") -> bool
929930
return web_search_used
930931

931932

933+
async def _handle_recursion_limit_fallback(
934+
messages: list,
935+
agent_name: str,
936+
current_step,
937+
state: State,
938+
) -> list:
939+
"""Handle GraphRecursionError with graceful fallback using LLM summary.
940+
941+
When the agent hits the recursion limit, this function generates a final output
942+
using only the observations already gathered, without calling any tools.
943+
944+
Args:
945+
messages: Messages accumulated during agent execution before hitting limit
946+
agent_name: Name of the agent that hit the limit
947+
current_step: The current step being executed
948+
state: Current workflow state
949+
950+
Returns:
951+
list: Messages including the accumulated messages plus the fallback summary
952+
953+
Raises:
954+
Exception: If the fallback LLM call fails
955+
"""
956+
logger.warning(
957+
f"Recursion limit reached for {agent_name} agent. "
958+
f"Attempting graceful fallback with {len(messages)} accumulated messages."
959+
)
960+
961+
if len(messages) == 0:
962+
return messages
963+
964+
cleared_messages = messages.copy()
965+
while len(cleared_messages) > 0 and cleared_messages[-1].type == "system":
966+
cleared_messages = cleared_messages[:-1]
967+
968+
# Prepare state for prompt template
969+
fallback_state = {
970+
"locale": state.get("locale", "en-US"),
971+
}
972+
973+
# Apply the recursion_fallback prompt template
974+
system_prompt = get_system_prompt_template(agent_name, fallback_state, None, fallback_state.get("locale", "en-US"))
975+
limit_prompt = get_system_prompt_template("recursion_fallback", fallback_state, None, fallback_state.get("locale", "en-US"))
976+
fallback_messages = cleared_messages + [
977+
SystemMessage(content=system_prompt),
978+
SystemMessage(content=limit_prompt)
979+
]
980+
981+
# Get the LLM without tools (strip all tools from binding)
982+
fallback_llm = get_llm_by_type(AGENT_LLM_MAP[agent_name])
983+
984+
# Call the LLM with the updated messages
985+
fallback_response = fallback_llm.invoke(fallback_messages)
986+
fallback_content = fallback_response.content
987+
988+
logger.info(
989+
f"Graceful fallback succeeded for {agent_name} agent. "
990+
f"Generated summary of {len(fallback_content)} characters."
991+
)
992+
993+
# Sanitize response
994+
fallback_content = sanitize_tool_response(str(fallback_content))
995+
996+
# Update the step with the fallback result
997+
current_step.execution_res = fallback_content
998+
999+
# Return the accumulated messages plus the fallback response
1000+
result_messages = list(cleared_messages)
1001+
result_messages.append(AIMessage(content=fallback_content, name=agent_name))
1002+
1003+
return result_messages
1004+
1005+
9321006
async def _execute_agent_step(
9331007
state: State, agent, agent_name: str, config: RunnableConfig = None
9341008
) -> Command[Literal["research_team"]]:
@@ -1049,11 +1123,51 @@ async def _execute_agent_step(
10491123
f"Context compression for {agent_name}: {len(compressed_state.get('messages', []))} messages, "
10501124
f"estimated tokens before: ~{token_count_before}, after: ~{token_count_after}"
10511125
)
1052-
1126+
10531127
try:
1054-
result = await agent.ainvoke(
1055-
input=agent_input, config={"recursion_limit": recursion_limit}
1056-
)
1128+
# Use stream from the start to capture messages in real-time
1129+
# This allows us to retrieve accumulated messages even if recursion limit is hit
1130+
accumulated_messages = []
1131+
for chunk in agent.stream(
1132+
input=agent_input,
1133+
config={"recursion_limit": recursion_limit},
1134+
stream_mode="values",
1135+
):
1136+
if isinstance(chunk, dict) and "messages" in chunk:
1137+
accumulated_messages = chunk["messages"]
1138+
1139+
# If we get here, execution completed successfully
1140+
result = {"messages": accumulated_messages}
1141+
except GraphRecursionError:
1142+
# Check if recursion fallback is enabled
1143+
configurable = Configuration.from_runnable_config(config) if config else Configuration()
1144+
1145+
if configurable.enable_recursion_fallback:
1146+
try:
1147+
# Call fallback with accumulated messages (function returns list of messages)
1148+
response_messages = await _handle_recursion_limit_fallback(
1149+
messages=accumulated_messages,
1150+
agent_name=agent_name,
1151+
current_step=current_step,
1152+
state=state,
1153+
)
1154+
1155+
# Create result dict so the code can continue normally from line 1178
1156+
result = {"messages": response_messages}
1157+
except Exception as fallback_error:
1158+
# If fallback fails, log and fall through to standard error handling
1159+
logger.error(
1160+
f"Recursion fallback failed for {agent_name} agent: {fallback_error}. "
1161+
"Falling back to standard error handling."
1162+
)
1163+
raise
1164+
else:
1165+
# Fallback disabled, let error propagate to standard handler
1166+
logger.info(
1167+
f"Recursion limit reached but graceful fallback is disabled. "
1168+
"Using standard error handling."
1169+
)
1170+
raise
10571171
except Exception as e:
10581172
import traceback
10591173

@@ -1088,8 +1202,10 @@ async def _execute_agent_step(
10881202
goto="research_team",
10891203
)
10901204

1205+
response_messages = result["messages"]
1206+
10911207
# Process the result
1092-
response_content = result["messages"][-1].content
1208+
response_content = response_messages[-1].content
10931209

10941210
# Sanitize response to remove extra tokens and truncate if needed
10951211
response_content = sanitize_tool_response(str(response_content))

src/prompts/recursion_fallback.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
---
2+
CURRENT_TIME: {{ CURRENT_TIME }}
3+
locale: {{ locale }}
4+
---
5+
6+
You have reached the maximum number of reasoning steps.
7+
8+
Using ONLY the tool observations already produced,
9+
write the final research report in EXACTLY the same format
10+
as you would normally output at the end of this task.
11+
12+
Do not call any tools.
13+
Do not add new information.
14+
If something is missing, state it explicitly.
15+
16+
Always output in the locale of **{{ locale }}**.

src/prompts/template.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import dataclasses
55
import os
66
from datetime import datetime
7-
87
from jinja2 import Environment, FileSystemLoader, TemplateNotFound, select_autoescape
98
from langchain.agents import AgentState
109

@@ -61,6 +60,28 @@ def apply_prompt_template(
6160
Returns:
6261
List of messages with the system prompt as the first message
6362
"""
63+
try:
64+
system_prompt = get_system_prompt_template(prompt_name, state, configurable, locale)
65+
return [{"role": "system", "content": system_prompt}] + state["messages"]
66+
except Exception as e:
67+
raise ValueError(f"Error applying template {prompt_name} for locale {locale}: {e}")
68+
69+
def get_system_prompt_template(
70+
prompt_name: str, state: AgentState, configurable: Configuration = None, locale: str = "en-US"
71+
) -> str:
72+
"""
73+
Render and return the system prompt template with state and configuration variables.
74+
This function loads a Jinja2-based prompt template (with optional locale-specific
75+
variants), applies variables from the agent state and Configuration object, and
76+
returns the fully rendered system prompt string.
77+
Args:
78+
prompt_name: Name of the prompt template to load (without .md extension).
79+
state: Current agent state containing variables available to the template.
80+
configurable: Optional Configuration object providing additional template variables.
81+
locale: Language locale for template selection (e.g., en-US, zh-CN).
82+
Returns:
83+
The rendered system prompt string after applying all template variables.
84+
"""
6485
# Convert state to dict for template rendering
6586
state_vars = {
6687
"CURRENT_TIME": datetime.now().strftime("%a %b %d %Y %H:%M:%S %z"),
@@ -74,15 +95,15 @@ def apply_prompt_template(
7495
try:
7596
# Normalize locale format
7697
normalized_locale = locale.replace("-", "_") if locale and locale.strip() else "en_US"
77-
98+
7899
# Try locale-specific template first
79100
try:
80101
template = env.get_template(f"{prompt_name}.{normalized_locale}.md")
81102
except TemplateNotFound:
82103
# Fallback to English template
83104
template = env.get_template(f"{prompt_name}.md")
84-
105+
85106
system_prompt = template.render(**state_vars)
86-
return [{"role": "system", "content": system_prompt}] + state["messages"]
107+
return system_prompt
87108
except Exception as e:
88-
raise ValueError(f"Error applying template {prompt_name} for locale {locale}: {e}")
109+
raise ValueError(f"Error loading template {prompt_name} for locale {locale}: {e}")

0 commit comments

Comments
 (0)