diff --git a/agent/deep_assistant.py b/agent/deep_assistant.py index 99158b5..643bdd5 100644 --- a/agent/deep_assistant.py +++ b/agent/deep_assistant.py @@ -22,6 +22,7 @@ from utils.fastapi_utils import detect_provider from .guideline_middleware import GuidelineMiddleware from .tool_output_length_middleware import ToolOutputLengthMiddleware from .tool_use_cleanup_middleware import ToolUseCleanupMiddleware +from .filepath_fix_middleware import FilePathFixMiddleware from utils.settings import ( SUMMARIZATION_MAX_TOKENS, SUMMARIZATION_TOKENS_TO_KEEP, @@ -535,6 +536,7 @@ def create_custom_cli_agent( deepagent_middleware = [ TodoListMiddleware(), + FilePathFixMiddleware(), # 修复工具调用参数中 CJK 文件名的多余空格 CustomFilesystemMiddleware(backend=composite_backend), # 使用自定义的 FilesystemMiddleware,支持 SKILL.md 完整读取 AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"), PatchToolCallsMiddleware(), diff --git a/agent/filepath_fix_middleware.py b/agent/filepath_fix_middleware.py new file mode 100644 index 0000000..d3b7c71 --- /dev/null +++ b/agent/filepath_fix_middleware.py @@ -0,0 +1,88 @@ +import re +import logging + +from langchain.agents.middleware import AgentMiddleware +from langchain.tools.tool_node import ToolCallRequest + +logger = logging.getLogger('app') + +# 常见文件后缀(小写,用于正则匹配) +_FILE_EXT = ( + # 文档 + r'txt|pdf|doc|docx|xls|xlsx|ppt|pptx|csv|md|rtf|odt|ods|odp' + r'|jtd|jtt|jaw' # JustSystems Ichitaro + # 图片 + r'|jpg|jpeg|png|gif|bmp|svg|webp|ico|tiff|tif' + # 音频 + r'|mp3|wav|ogg|flac|aac|m4a' + # 视频 + r'|mp4|avi|mov|mkv|wmv|flv|webm' + # 压缩 + r'|zip|tar|gz|bz2|rar|7z|tgz' + # 代码/数据 + r'|py|js|ts|html|htm|css|json|xml|yaml|yml|sql|sh|bat' + r'|java|c|cpp|h|rb|go|rs|php' + # 其他 + r'|log|ini|cfg|conf|toml|env|bin|exe|dmg|iso|db|sqlite|tmp|bak' +) + +_EXT_PATTERN = rf'\.(?:{_FILE_EXT})\b' + +# 锚点:文件后缀 或 目录分隔符 /(用于匹配路径中间的目录名) +_ANCHOR = rf'(?:{_EXT_PATTERN}|/)' + +# 模式1: CJK字符 + 空格 + (以字母数字开头的片段 + 锚点) +# "農作業 2025.jtd" → "農業2025.jtd" +# "ファイル report.txt" → "ファイルreport.txt" +# "/datasets/報告書 2025/file.txt" → "/datasets/報告書2025/file.txt" +_CJK_TO_ALNUM = re.compile( + rf'([\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff])\s+([a-zA-Z0-9][\w]*{_ANCHOR})' +) + +# 模式2: 数字 + 空格 + (以CJK开头的片段 + 锚点) +# "2025 報告書.txt" → "2025報告書.txt" +# "/datasets/2025 報告書/file.txt" → "/datasets/2025報告書/file.txt" +# 注意: 只匹配数字不匹配字母,避免 "cat 報告.txt" 被误修 +# 注意: (? str: + """修复文件名/路径中 CJK 与非CJK 交界处的多余空格(循环应用直到无更多匹配)""" + prev = None + while prev != text: + prev = text + text = _CJK_TO_ALNUM.sub(r'\1\2', text) + text = _ALNUM_TO_CJK.sub(r'\1\2', text) + text = _PATH_LETTER.sub(r'\1\2', text) + return text + + +class FilePathFixMiddleware(AgentMiddleware): + """修复工具调用参数中 CJK 文件名/路径的多余空格""" + + def _fix_tool_call_args(self, request: ToolCallRequest) -> None: + args = request.tool_call.get('args', {}) + for key, value in args.items(): + if isinstance(value, str): + fixed = fix_filename_spacing(value) + if fixed != value: + logger.info(f"Filename spacing fix: args['{key}'] '{value}' -> '{fixed}'") + args[key] = fixed + + def wrap_tool_call(self, request, handler): + self._fix_tool_call_args(request) + return handler(request) + + async def awrap_tool_call(self, request, handler): + self._fix_tool_call_args(request) + return await handler(request)