Merge branch 'feature/moshui20260410-file-path-fix' into bot_manager
This commit is contained in:
commit
9825a43cad
@ -22,6 +22,7 @@ from utils.fastapi_utils import detect_provider
|
|||||||
from .guideline_middleware import GuidelineMiddleware
|
from .guideline_middleware import GuidelineMiddleware
|
||||||
from .tool_output_length_middleware import ToolOutputLengthMiddleware
|
from .tool_output_length_middleware import ToolOutputLengthMiddleware
|
||||||
from .tool_use_cleanup_middleware import ToolUseCleanupMiddleware
|
from .tool_use_cleanup_middleware import ToolUseCleanupMiddleware
|
||||||
|
from .filepath_fix_middleware import FilePathFixMiddleware
|
||||||
from utils.settings import (
|
from utils.settings import (
|
||||||
SUMMARIZATION_MAX_TOKENS,
|
SUMMARIZATION_MAX_TOKENS,
|
||||||
SUMMARIZATION_TOKENS_TO_KEEP,
|
SUMMARIZATION_TOKENS_TO_KEEP,
|
||||||
@ -535,6 +536,7 @@ def create_custom_cli_agent(
|
|||||||
|
|
||||||
deepagent_middleware = [
|
deepagent_middleware = [
|
||||||
TodoListMiddleware(),
|
TodoListMiddleware(),
|
||||||
|
FilePathFixMiddleware(), # 修复工具调用参数中 CJK 文件名的多余空格
|
||||||
CustomFilesystemMiddleware(backend=composite_backend), # 使用自定义的 FilesystemMiddleware,支持 SKILL.md 完整读取
|
CustomFilesystemMiddleware(backend=composite_backend), # 使用自定义的 FilesystemMiddleware,支持 SKILL.md 完整读取
|
||||||
AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
|
AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
|
||||||
PatchToolCallsMiddleware(),
|
PatchToolCallsMiddleware(),
|
||||||
|
|||||||
88
agent/filepath_fix_middleware.py
Normal file
88
agent/filepath_fix_middleware.py
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
import re
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from langchain.agents.middleware import AgentMiddleware
|
||||||
|
from langchain.tools.tool_node import ToolCallRequest
|
||||||
|
|
||||||
|
logger = logging.getLogger('app')
|
||||||
|
|
||||||
|
# 常见文件后缀(小写,用于正则匹配)
|
||||||
|
_FILE_EXT = (
|
||||||
|
# 文档
|
||||||
|
r'txt|pdf|doc|docx|xls|xlsx|ppt|pptx|csv|md|rtf|odt|ods|odp'
|
||||||
|
r'|jtd|jtt|jaw' # JustSystems Ichitaro
|
||||||
|
# 图片
|
||||||
|
r'|jpg|jpeg|png|gif|bmp|svg|webp|ico|tiff|tif'
|
||||||
|
# 音频
|
||||||
|
r'|mp3|wav|ogg|flac|aac|m4a'
|
||||||
|
# 视频
|
||||||
|
r'|mp4|avi|mov|mkv|wmv|flv|webm'
|
||||||
|
# 压缩
|
||||||
|
r'|zip|tar|gz|bz2|rar|7z|tgz'
|
||||||
|
# 代码/数据
|
||||||
|
r'|py|js|ts|html|htm|css|json|xml|yaml|yml|sql|sh|bat'
|
||||||
|
r'|java|c|cpp|h|rb|go|rs|php'
|
||||||
|
# 其他
|
||||||
|
r'|log|ini|cfg|conf|toml|env|bin|exe|dmg|iso|db|sqlite|tmp|bak'
|
||||||
|
)
|
||||||
|
|
||||||
|
_EXT_PATTERN = rf'\.(?:{_FILE_EXT})\b'
|
||||||
|
|
||||||
|
# 锚点:文件后缀 或 目录分隔符 /(用于匹配路径中间的目录名)
|
||||||
|
_ANCHOR = rf'(?:{_EXT_PATTERN}|/)'
|
||||||
|
|
||||||
|
# 模式1: CJK字符 + 空格 + (以字母数字开头的片段 + 锚点)
|
||||||
|
# "農作業 2025.jtd" → "農業2025.jtd"
|
||||||
|
# "ファイル report.txt" → "ファイルreport.txt"
|
||||||
|
# "/datasets/報告書 2025/file.txt" → "/datasets/報告書2025/file.txt"
|
||||||
|
_CJK_TO_ALNUM = re.compile(
|
||||||
|
rf'([\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff])\s+([a-zA-Z0-9][\w]*{_ANCHOR})'
|
||||||
|
)
|
||||||
|
|
||||||
|
# 模式2: 数字 + 空格 + (以CJK开头的片段 + 锚点)
|
||||||
|
# "2025 報告書.txt" → "2025報告書.txt"
|
||||||
|
# "/datasets/2025 報告書/file.txt" → "/datasets/2025報告書/file.txt"
|
||||||
|
# 注意: 只匹配数字不匹配字母,避免 "cat 報告.txt" 被误修
|
||||||
|
# 注意: (?<!\.) 排除 "xxx.py ファイル.txt" 这种跨参数的误匹配
|
||||||
|
_ALNUM_TO_CJK = re.compile(
|
||||||
|
rf'(?<!\.)(\d+)\s+([\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff][\w]*{_ANCHOR})'
|
||||||
|
)
|
||||||
|
|
||||||
|
# 模式3: 路径上下文中(/ 后面)的字母 + 空格 + (字母数字开头.后缀)
|
||||||
|
# "/data/report 2025年度.xlsx" → "/data/report2025年度.xlsx"
|
||||||
|
# 用 (?<=/) 确保只在路径中生效,避免 "cat report 2025.txt" 被误修
|
||||||
|
_PATH_LETTER = re.compile(
|
||||||
|
rf'(?<=/)([a-zA-Z][\w]*)\s+([a-zA-Z0-9][\w]*{_ANCHOR})'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def fix_filename_spacing(text: str) -> str:
|
||||||
|
"""修复文件名/路径中 CJK 与非CJK 交界处的多余空格(循环应用直到无更多匹配)"""
|
||||||
|
prev = None
|
||||||
|
while prev != text:
|
||||||
|
prev = text
|
||||||
|
text = _CJK_TO_ALNUM.sub(r'\1\2', text)
|
||||||
|
text = _ALNUM_TO_CJK.sub(r'\1\2', text)
|
||||||
|
text = _PATH_LETTER.sub(r'\1\2', text)
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
class FilePathFixMiddleware(AgentMiddleware):
|
||||||
|
"""修复工具调用参数中 CJK 文件名/路径的多余空格"""
|
||||||
|
|
||||||
|
def _fix_tool_call_args(self, request: ToolCallRequest) -> None:
|
||||||
|
args = request.tool_call.get('args', {})
|
||||||
|
for key, value in args.items():
|
||||||
|
if isinstance(value, str):
|
||||||
|
fixed = fix_filename_spacing(value)
|
||||||
|
if fixed != value:
|
||||||
|
logger.info(f"Filename spacing fix: args['{key}'] '{value}' -> '{fixed}'")
|
||||||
|
args[key] = fixed
|
||||||
|
|
||||||
|
def wrap_tool_call(self, request, handler):
|
||||||
|
self._fix_tool_call_args(request)
|
||||||
|
return handler(request)
|
||||||
|
|
||||||
|
async def awrap_tool_call(self, request, handler):
|
||||||
|
self._fix_tool_call_args(request)
|
||||||
|
return await handler(request)
|
||||||
Loading…
Reference in New Issue
Block a user