import re import logging from langchain.agents.middleware import AgentMiddleware from langchain.tools.tool_node import ToolCallRequest logger = logging.getLogger('app') # Common file extensions in lowercase, used for regex matching. _FILE_EXT = ( # Documents r'txt|pdf|doc|docx|xls|xlsx|ppt|pptx|csv|md|rtf|odt|ods|odp' r'|jtd|jtt|jaw' # JustSystems Ichitaro # Images r'|jpg|jpeg|png|gif|bmp|svg|webp|ico|tiff|tif' # Audio r'|mp3|wav|ogg|flac|aac|m4a' # Video r'|mp4|avi|mov|mkv|wmv|flv|webm' # Archives r'|zip|tar|gz|bz2|rar|7z|tgz' # Code / data r'|py|js|ts|html|htm|css|json|xml|yaml|yml|sql|sh|bat' r'|java|c|cpp|h|rb|go|rs|php' # Others r'|log|ini|cfg|conf|toml|env|bin|exe|dmg|iso|db|sqlite|tmp|bak' ) _EXT_PATTERN = rf'\.(?:{_FILE_EXT})\b' # Anchor: file extension or directory separator /, used to match directory names in the middle of a path. _ANCHOR = rf'(?:{_EXT_PATTERN}|/)' # Pattern 1: CJK character + spaces + (segment starting with alphanumeric + anchor) # "農作業 2025.jtd" → "農業2025.jtd" # "ファイル report.txt" → "ファイルreport.txt" # "/datasets/報告書 2025/file.txt" → "/datasets/報告書2025/file.txt" _CJK_TO_ALNUM = re.compile( rf'([\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff])\s+([a-zA-Z0-9][\w]*{_ANCHOR})' ) # Pattern 2: digits + spaces + (segment starting with CJK + anchor) # "2025 報告書.txt" → "2025報告書.txt" # "/datasets/2025 報告書/file.txt" → "/datasets/2025報告書/file.txt" # Note: only match digits, not letters, to avoid incorrectly rewriting "cat 報告.txt". # Note: (? str: """Fix extra spaces around CJK/non-CJK boundaries in file names and paths until stable.""" prev = None while prev != text: prev = text text = _CJK_TO_ALNUM.sub(r'\1\2', text) text = _ALNUM_TO_CJK.sub(r'\1\2', text) text = _PATH_LETTER.sub(r'\1\2', text) return text class FilePathFixMiddleware(AgentMiddleware): """Fix extra spaces in CJK file names and paths inside tool call arguments.""" def _fix_tool_call_args(self, request: ToolCallRequest) -> None: args = request.tool_call.get('args', {}) for key, value in args.items(): if isinstance(value, str): fixed = fix_filename_spacing(value) if fixed != value: logger.info(f"Filename spacing fix: args['{key}'] '{value}' -> '{fixed}'") args[key] = fixed def wrap_tool_call(self, request, handler): self._fix_tool_call_args(request) return handler(request) async def awrap_tool_call(self, request, handler): self._fix_tool_call_args(request) return await handler(request)