From 9d2735a53c5adbc9a3ac760d857a5b13ef3cc992 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E6=BD=AE?= Date: Thu, 16 Oct 2025 21:42:18 +0800 Subject: [PATCH] add regex for multi search --- mcp/multi_keyword_search_server.py | 149 +++++++++++++++++++++++++---- 1 file changed, 130 insertions(+), 19 deletions(-) diff --git a/mcp/multi_keyword_search_server.py b/mcp/multi_keyword_search_server.py index 10ccfe4..616a6c4 100644 --- a/mcp/multi_keyword_search_server.py +++ b/mcp/multi_keyword_search_server.py @@ -9,7 +9,8 @@ import json import os import sys import asyncio -from typing import Any, Dict, List, Optional +import re +from typing import Any, Dict, List, Optional, Union def validate_file_path(file_path: str, allowed_dir: str) -> str: @@ -38,9 +39,48 @@ def get_allowed_directory(): return os.path.abspath(project_dir) +def is_regex_pattern(pattern: str) -> bool: + """检测字符串是否为正则表达式模式""" + # 检查 /pattern/ 格式 + if pattern.startswith('/') and pattern.endswith('/') and len(pattern) > 2: + return True + + # 检查 r"pattern" 或 r'pattern' 格式 + if pattern.startswith(('r"', "r'")) and pattern.endswith(('"', "'")) and len(pattern) > 3: + return True + + # 检查是否包含正则特殊字符 + regex_chars = {'*', '+', '?', '|', '(', ')', '[', ']', '{', '}', '^', '$', '\\', '.'} + return any(char in pattern for char in regex_chars) + + +def compile_pattern(pattern: str) -> Union[re.Pattern, str, None]: + """编译正则表达式模式,如果不是正则则返回原字符串""" + if not is_regex_pattern(pattern): + return pattern + + try: + # 处理 /pattern/ 格式 + if pattern.startswith('/') and pattern.endswith('/'): + regex_body = pattern[1:-1] + return re.compile(regex_body) + + # 处理 r"pattern" 或 r'pattern' 格式 + if pattern.startswith(('r"', "r'")) and pattern.endswith(('"', "'")): + regex_body = pattern[2:-1] + return re.compile(regex_body) + + # 直接编译包含正则字符的字符串 + return re.compile(pattern) + except re.error as e: + # 如果编译失败,返回None表示无效的正则 + print(f"警告: 正则表达式 '{pattern}' 编译失败: {e}") + return None + + def multi_keyword_search(keywords: List[str], file_paths: List[str], limit: int = 10, case_sensitive: bool = False) -> Dict[str, Any]: - """执行多关键词搜索""" + """执行多关键词和正则表达式搜索""" if not keywords: return { "content": [ @@ -61,6 +101,21 @@ def multi_keyword_search(keywords: List[str], file_paths: List[str], ] } + # 预处理和验证关键词中的正则表达式 + valid_keywords = [] + regex_errors = [] + + for keyword in keywords: + compiled = compile_pattern(keyword) + if compiled is None: + regex_errors.append(keyword) + else: + valid_keywords.append(keyword) + + if regex_errors: + error_msg = f"警告: 以下正则表达式编译失败,将被忽略: {', '.join(regex_errors)}" + print(error_msg) + # 处理项目目录限制 project_data_dir = get_allowed_directory() @@ -100,7 +155,7 @@ def multi_keyword_search(keywords: List[str], file_paths: List[str], for file_path in valid_paths: try: - results = search_keywords_in_file(file_path, keywords, case_sensitive) + results = search_keywords_in_file(file_path, valid_keywords, case_sensitive) all_results.extend(results) except Exception as e: continue @@ -122,10 +177,24 @@ def multi_keyword_search(keywords: List[str], file_paths: List[str], ] } - formatted_output = "\n".join([ - f"{result['line_number']}:match_count({result['match_count']}):{result['content']}" - for result in limited_results - ]) + # 增强格式化输出,显示匹配类型和详细信息 + formatted_lines = [] + for result in limited_results: + line_prefix = f"{result['line_number']}:match_count({result['match_count']}):" + + # 构建匹配详情 + match_details = [] + for pattern in result['matched_patterns']: + if pattern['type'] == 'regex': + match_details.append(f"[regex:{pattern['original']}={pattern['match']}]") + else: + match_details.append(f"[keyword:{pattern['match']}]") + + match_info = " ".join(match_details) if match_details else "" + formatted_line = f"{line_prefix}{match_info}:{result['content']}" if match_info else f"{line_prefix}{result['content']}" + formatted_lines.append(formatted_line) + + formatted_output = "\n".join(formatted_lines) return { "content": [ @@ -139,7 +208,7 @@ def multi_keyword_search(keywords: List[str], file_paths: List[str], def search_keywords_in_file(file_path: str, keywords: List[str], case_sensitive: bool) -> List[Dict[str, Any]]: - """搜索单个文件中的关键词""" + """搜索单个文件中的关键词和正则表达式""" results = [] try: @@ -148,27 +217,69 @@ def search_keywords_in_file(file_path: str, keywords: List[str], except Exception as e: return results - # 准备关键词(如果不区分大小写) - search_keywords = keywords if case_sensitive else [kw.lower() for kw in keywords] + # 预处理所有模式 + processed_patterns = [] + for keyword in keywords: + compiled = compile_pattern(keyword) + if compiled is not None: # 跳过无效的正则表达式 + processed_patterns.append({ + 'original': keyword, + 'pattern': compiled, + 'is_regex': isinstance(compiled, re.Pattern) + }) for line_number, line in enumerate(lines, 1): line_content = line.rstrip('\n\r') search_line = line_content if case_sensitive else line_content.lower() - # 统计匹配的关键词数量 - matched_keywords = [] - for i, keyword in enumerate(search_keywords): - if keyword in search_line: - matched_keywords.append(keywords[i]) # 使用原始关键词 + # 统计匹配的模式数量 + matched_patterns = [] + for pattern_info in processed_patterns: + pattern = pattern_info['pattern'] + is_regex = pattern_info['is_regex'] + + match_found = False + match_details = None + + if is_regex: + # 正则表达式匹配 + if case_sensitive: + match = pattern.search(line_content) + else: + # 对于不区分大小写的正则,需要重新编译 + if isinstance(pattern, re.Pattern): + # 创建不区分大小写的版本 + flags = pattern.flags | re.IGNORECASE + case_insensitive_pattern = re.compile(pattern.pattern, flags) + match = case_insensitive_pattern.search(line_content) + else: + match = pattern.search(search_line) + + if match: + match_found = True + match_details = match.group(0) + else: + # 普通字符串匹配 + search_keyword = pattern if case_sensitive else pattern.lower() + if search_keyword in search_line: + match_found = True + match_details = pattern + + if match_found: + matched_patterns.append({ + 'original': pattern_info['original'], + 'type': 'regex' if is_regex else 'keyword', + 'match': match_details + }) - match_count = len(matched_keywords) + match_count = len(matched_patterns) if match_count > 0: results.append({ 'line_number': line_number, 'content': line_content, 'match_count': match_count, - 'matched_keywords': matched_keywords, + 'matched_patterns': matched_patterns, 'file_path': file_path }) @@ -223,14 +334,14 @@ async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]: "tools": [ { "name": "multi_keyword_search", - "description": "多关键词搜索工具,返回按匹配数量排序的结果。格式:[行号]:[匹配数量]:[行的原始内容]", + "description": "智能关键词和正则表达式混合搜索工具,返回按匹配数量排序的结果。支持普通关键词和正则表达式混合使用。正则表达式支持格式:/pattern/、r\"pattern\"或包含正则特殊字符的字符串。结果格式:[行号]:[匹配数量]:[匹配信息]:[行的原始内容]", "inputSchema": { "type": "object", "properties": { "keywords": { "type": "array", "items": {"type": "string"}, - "description": "要搜索的关键词数组" + "description": "要搜索的关键词和正则表达式数组。支持:1)普通关键词 2)/pattern/格式正则 3)r\"pattern\"格式正则 4)包含正则特殊字符的字符串" }, "file_paths": { "type": "array",