Merge branch 'feature/tool-error-recovery' into bot_manager
This commit is contained in:
commit
2898c9c42d
@ -23,6 +23,7 @@ from .guideline_middleware import GuidelineMiddleware
|
||||
from .tool_output_length_middleware import ToolOutputLengthMiddleware
|
||||
from .tool_use_cleanup_middleware import ToolUseCleanupMiddleware
|
||||
from .tool_metrics_middleware import ToolMetricsMiddleware
|
||||
from .tool_error_recovery_middleware import ToolErrorRecoveryMiddleware
|
||||
from .filepath_fix_middleware import FilePathFixMiddleware
|
||||
from .mcp_trace_meta import patch_mcp_client_session_trace_meta
|
||||
from utils.settings import (
|
||||
@ -567,6 +568,7 @@ def create_custom_cli_agent(
|
||||
interrupt_on = _add_interrupt_on()
|
||||
|
||||
deepagent_middleware = [
|
||||
ToolErrorRecoveryMiddleware(), # Outermost: turn any tool exception into a ToolMessage so the agent can recover
|
||||
TodoListMiddleware(),
|
||||
FilePathFixMiddleware(), # Fix extra spaces in CJK file names within tool call arguments
|
||||
CustomFilesystemMiddleware(backend=composite_backend), # Use the custom FilesystemMiddleware with full SKILL.md reading support
|
||||
|
||||
98
agent/tool_error_recovery_middleware.py
Normal file
98
agent/tool_error_recovery_middleware.py
Normal file
@ -0,0 +1,98 @@
|
||||
"""Outermost middleware that converts tool exceptions into ToolMessage(status="error").
|
||||
|
||||
When a tool call raises (most commonly an MCP `ToolException` from
|
||||
`langchain_mcp_adapters`), LangGraph's default handler re-raises and breaks the
|
||||
agent stream, which in turn breaks the SSE response to the client. This
|
||||
middleware sits as the outermost wrapper around every tool call and converts any
|
||||
caught exception into a ToolMessage so the agent can keep looping and reply to
|
||||
the user in natural language about what went wrong.
|
||||
|
||||
`asyncio.CancelledError` is intentionally not caught — task cancellation must
|
||||
propagate. Metric emission (`ToolMetricsMiddleware`) still observes the inner
|
||||
`raise` because it sits *inside* this middleware in the chain.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import re
|
||||
from typing import Any, Callable
|
||||
|
||||
from langchain.agents.middleware import AgentMiddleware
|
||||
from langchain.tools.tool_node import ToolCallRequest
|
||||
from langchain_core.messages import ToolMessage
|
||||
|
||||
logger = logging.getLogger("app")
|
||||
|
||||
# Matches `text="..."` (or `text='...'`) inside MCP TextContent repr. Non-greedy
|
||||
# so each TextContent in a list is captured separately.
|
||||
_TEXT_CONTENT_PATTERN = re.compile(
|
||||
r"""TextContent\([^)]*?text=(?P<quote>['"])(?P<text>.*?)(?<!\\)(?P=quote)""",
|
||||
re.DOTALL,
|
||||
)
|
||||
|
||||
|
||||
class ToolErrorRecoveryMiddleware(AgentMiddleware):
|
||||
"""Catch tool-call exceptions and return them as error ToolMessages."""
|
||||
|
||||
def _extract_error_text(self, error: Exception) -> str:
|
||||
"""Pull human-readable text out of an exception.
|
||||
|
||||
MCP `ToolException` typically wraps a list of `TextContent` objects, so
|
||||
their string repr looks like `[TextContent(type='text', text="...", ...)]`.
|
||||
Strip the wrapper and keep just the inner `text` fields when present;
|
||||
otherwise fall back to `str(error)`.
|
||||
"""
|
||||
raw = str(error)
|
||||
matches = _TEXT_CONTENT_PATTERN.findall(raw)
|
||||
if matches:
|
||||
# findall returns list of tuples because of named groups; pick group 'text' (index 1).
|
||||
return "\n".join(text for _quote, text in matches if text)
|
||||
return raw
|
||||
|
||||
def _build_error_message(
|
||||
self,
|
||||
request: ToolCallRequest,
|
||||
error: Exception,
|
||||
) -> ToolMessage:
|
||||
tool_call = request.tool_call or {}
|
||||
tool_name = tool_call.get("name") or "unknown_tool"
|
||||
tool_call_id = tool_call.get("id") or ""
|
||||
|
||||
error_text = self._extract_error_text(error)
|
||||
content = f"Tool '{tool_name}' failed: {error_text}"
|
||||
|
||||
logger.warning(
|
||||
"Tool error recovered as ToolMessage: tool_name=%s error_type=%s",
|
||||
tool_name,
|
||||
type(error).__name__,
|
||||
)
|
||||
|
||||
return ToolMessage(
|
||||
content=content,
|
||||
tool_call_id=tool_call_id,
|
||||
name=tool_name,
|
||||
status="error",
|
||||
)
|
||||
|
||||
def wrap_tool_call(
|
||||
self,
|
||||
request: ToolCallRequest,
|
||||
handler: Callable[[ToolCallRequest], Any],
|
||||
) -> Any:
|
||||
try:
|
||||
return handler(request)
|
||||
except Exception as exc: # noqa: BLE001 — outermost recovery
|
||||
return self._build_error_message(request, exc)
|
||||
|
||||
async def awrap_tool_call(
|
||||
self,
|
||||
request: ToolCallRequest,
|
||||
handler: Callable[[ToolCallRequest], Any],
|
||||
) -> Any:
|
||||
try:
|
||||
return await handler(request)
|
||||
except asyncio.CancelledError:
|
||||
# Cancellation must propagate so the agent task can shut down cleanly.
|
||||
raise
|
||||
except Exception as exc: # noqa: BLE001 — outermost recovery
|
||||
return self._build_error_message(request, exc)
|
||||
Loading…
Reference in New Issue
Block a user