qwen_agent/agent/filepath_fix_middleware.py
朱潮 425f3c5bb4 chore: replace Chinese comments and log messages with English
Convert all Chinese comments, docstrings, logger/print output,
HTTPException detail messages, and API response messages to English
across the entire codebase. Functional zh/ja localized strings
(e.g. prompt templates, timezone display names, date formats) are
preserved as-is.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-30 19:45:35 +08:00

89 lines
3.2 KiB
Python

import re
import logging
from langchain.agents.middleware import AgentMiddleware
from langchain.tools.tool_node import ToolCallRequest
logger = logging.getLogger('app')
# Common file extensions in lowercase, used for regex matching.
_FILE_EXT = (
# Documents
r'txt|pdf|doc|docx|xls|xlsx|ppt|pptx|csv|md|rtf|odt|ods|odp'
r'|jtd|jtt|jaw' # JustSystems Ichitaro
# Images
r'|jpg|jpeg|png|gif|bmp|svg|webp|ico|tiff|tif'
# Audio
r'|mp3|wav|ogg|flac|aac|m4a'
# Video
r'|mp4|avi|mov|mkv|wmv|flv|webm'
# Archives
r'|zip|tar|gz|bz2|rar|7z|tgz'
# Code / data
r'|py|js|ts|html|htm|css|json|xml|yaml|yml|sql|sh|bat'
r'|java|c|cpp|h|rb|go|rs|php'
# Others
r'|log|ini|cfg|conf|toml|env|bin|exe|dmg|iso|db|sqlite|tmp|bak'
)
_EXT_PATTERN = rf'\.(?:{_FILE_EXT})\b'
# Anchor: file extension or directory separator /, used to match directory names in the middle of a path.
_ANCHOR = rf'(?:{_EXT_PATTERN}|/)'
# Pattern 1: CJK character + spaces + (segment starting with alphanumeric + anchor)
# "農作業 2025.jtd" → "農業2025.jtd"
# "ファイル report.txt" → "ファイルreport.txt"
# "/datasets/報告書 2025/file.txt" → "/datasets/報告書2025/file.txt"
_CJK_TO_ALNUM = re.compile(
rf'([\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff])\s+([a-zA-Z0-9][\w]*{_ANCHOR})'
)
# Pattern 2: digits + spaces + (segment starting with CJK + anchor)
# "2025 報告書.txt" → "2025報告書.txt"
# "/datasets/2025 報告書/file.txt" → "/datasets/2025報告書/file.txt"
# Note: only match digits, not letters, to avoid incorrectly rewriting "cat 報告.txt".
# Note: (?<!\.) excludes false matches across arguments like "xxx.py ファイル.txt".
_ALNUM_TO_CJK = re.compile(
rf'(?<!\.)(\d+)\s+([\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff][\w]*{_ANCHOR})'
)
# Pattern 3: in path context (after /), letters + spaces + (alphanumeric segment + anchor)
# "/data/report 2025年度.xlsx" → "/data/report2025年度.xlsx"
# Use (?<=/) so it only applies inside paths and avoids rewriting "cat report 2025.txt".
_PATH_LETTER = re.compile(
rf'(?<=/)([a-zA-Z][\w]*)\s+([a-zA-Z0-9][\w]*{_ANCHOR})'
)
def fix_filename_spacing(text: str) -> str:
"""Fix extra spaces around CJK/non-CJK boundaries in file names and paths until stable."""
prev = None
while prev != text:
prev = text
text = _CJK_TO_ALNUM.sub(r'\1\2', text)
text = _ALNUM_TO_CJK.sub(r'\1\2', text)
text = _PATH_LETTER.sub(r'\1\2', text)
return text
class FilePathFixMiddleware(AgentMiddleware):
"""Fix extra spaces in CJK file names and paths inside tool call arguments."""
def _fix_tool_call_args(self, request: ToolCallRequest) -> None:
args = request.tool_call.get('args', {})
for key, value in args.items():
if isinstance(value, str):
fixed = fix_filename_spacing(value)
if fixed != value:
logger.info(f"Filename spacing fix: args['{key}'] '{value}' -> '{fixed}'")
args[key] = fixed
def wrap_tool_call(self, request, handler):
self._fix_tool_call_args(request)
return handler(request)
async def awrap_tool_call(self, request, handler):
self._fix_tool_call_args(request)
return await handler(request)