add regex for multi search
This commit is contained in:
parent
d0e3e62291
commit
9d2735a53c
@ -9,7 +9,8 @@ import json
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import asyncio
|
import asyncio
|
||||||
from typing import Any, Dict, List, Optional
|
import re
|
||||||
|
from typing import Any, Dict, List, Optional, Union
|
||||||
|
|
||||||
|
|
||||||
def validate_file_path(file_path: str, allowed_dir: str) -> str:
|
def validate_file_path(file_path: str, allowed_dir: str) -> str:
|
||||||
@ -38,9 +39,48 @@ def get_allowed_directory():
|
|||||||
return os.path.abspath(project_dir)
|
return os.path.abspath(project_dir)
|
||||||
|
|
||||||
|
|
||||||
|
def is_regex_pattern(pattern: str) -> bool:
|
||||||
|
"""检测字符串是否为正则表达式模式"""
|
||||||
|
# 检查 /pattern/ 格式
|
||||||
|
if pattern.startswith('/') and pattern.endswith('/') and len(pattern) > 2:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# 检查 r"pattern" 或 r'pattern' 格式
|
||||||
|
if pattern.startswith(('r"', "r'")) and pattern.endswith(('"', "'")) and len(pattern) > 3:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# 检查是否包含正则特殊字符
|
||||||
|
regex_chars = {'*', '+', '?', '|', '(', ')', '[', ']', '{', '}', '^', '$', '\\', '.'}
|
||||||
|
return any(char in pattern for char in regex_chars)
|
||||||
|
|
||||||
|
|
||||||
|
def compile_pattern(pattern: str) -> Union[re.Pattern, str, None]:
|
||||||
|
"""编译正则表达式模式,如果不是正则则返回原字符串"""
|
||||||
|
if not is_regex_pattern(pattern):
|
||||||
|
return pattern
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 处理 /pattern/ 格式
|
||||||
|
if pattern.startswith('/') and pattern.endswith('/'):
|
||||||
|
regex_body = pattern[1:-1]
|
||||||
|
return re.compile(regex_body)
|
||||||
|
|
||||||
|
# 处理 r"pattern" 或 r'pattern' 格式
|
||||||
|
if pattern.startswith(('r"', "r'")) and pattern.endswith(('"', "'")):
|
||||||
|
regex_body = pattern[2:-1]
|
||||||
|
return re.compile(regex_body)
|
||||||
|
|
||||||
|
# 直接编译包含正则字符的字符串
|
||||||
|
return re.compile(pattern)
|
||||||
|
except re.error as e:
|
||||||
|
# 如果编译失败,返回None表示无效的正则
|
||||||
|
print(f"警告: 正则表达式 '{pattern}' 编译失败: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def multi_keyword_search(keywords: List[str], file_paths: List[str],
|
def multi_keyword_search(keywords: List[str], file_paths: List[str],
|
||||||
limit: int = 10, case_sensitive: bool = False) -> Dict[str, Any]:
|
limit: int = 10, case_sensitive: bool = False) -> Dict[str, Any]:
|
||||||
"""执行多关键词搜索"""
|
"""执行多关键词和正则表达式搜索"""
|
||||||
if not keywords:
|
if not keywords:
|
||||||
return {
|
return {
|
||||||
"content": [
|
"content": [
|
||||||
@ -61,6 +101,21 @@ def multi_keyword_search(keywords: List[str], file_paths: List[str],
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# 预处理和验证关键词中的正则表达式
|
||||||
|
valid_keywords = []
|
||||||
|
regex_errors = []
|
||||||
|
|
||||||
|
for keyword in keywords:
|
||||||
|
compiled = compile_pattern(keyword)
|
||||||
|
if compiled is None:
|
||||||
|
regex_errors.append(keyword)
|
||||||
|
else:
|
||||||
|
valid_keywords.append(keyword)
|
||||||
|
|
||||||
|
if regex_errors:
|
||||||
|
error_msg = f"警告: 以下正则表达式编译失败,将被忽略: {', '.join(regex_errors)}"
|
||||||
|
print(error_msg)
|
||||||
|
|
||||||
# 处理项目目录限制
|
# 处理项目目录限制
|
||||||
project_data_dir = get_allowed_directory()
|
project_data_dir = get_allowed_directory()
|
||||||
|
|
||||||
@ -100,7 +155,7 @@ def multi_keyword_search(keywords: List[str], file_paths: List[str],
|
|||||||
|
|
||||||
for file_path in valid_paths:
|
for file_path in valid_paths:
|
||||||
try:
|
try:
|
||||||
results = search_keywords_in_file(file_path, keywords, case_sensitive)
|
results = search_keywords_in_file(file_path, valid_keywords, case_sensitive)
|
||||||
all_results.extend(results)
|
all_results.extend(results)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
continue
|
continue
|
||||||
@ -122,10 +177,24 @@ def multi_keyword_search(keywords: List[str], file_paths: List[str],
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
formatted_output = "\n".join([
|
# 增强格式化输出,显示匹配类型和详细信息
|
||||||
f"{result['line_number']}:match_count({result['match_count']}):{result['content']}"
|
formatted_lines = []
|
||||||
for result in limited_results
|
for result in limited_results:
|
||||||
])
|
line_prefix = f"{result['line_number']}:match_count({result['match_count']}):"
|
||||||
|
|
||||||
|
# 构建匹配详情
|
||||||
|
match_details = []
|
||||||
|
for pattern in result['matched_patterns']:
|
||||||
|
if pattern['type'] == 'regex':
|
||||||
|
match_details.append(f"[regex:{pattern['original']}={pattern['match']}]")
|
||||||
|
else:
|
||||||
|
match_details.append(f"[keyword:{pattern['match']}]")
|
||||||
|
|
||||||
|
match_info = " ".join(match_details) if match_details else ""
|
||||||
|
formatted_line = f"{line_prefix}{match_info}:{result['content']}" if match_info else f"{line_prefix}{result['content']}"
|
||||||
|
formatted_lines.append(formatted_line)
|
||||||
|
|
||||||
|
formatted_output = "\n".join(formatted_lines)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"content": [
|
"content": [
|
||||||
@ -139,7 +208,7 @@ def multi_keyword_search(keywords: List[str], file_paths: List[str],
|
|||||||
|
|
||||||
def search_keywords_in_file(file_path: str, keywords: List[str],
|
def search_keywords_in_file(file_path: str, keywords: List[str],
|
||||||
case_sensitive: bool) -> List[Dict[str, Any]]:
|
case_sensitive: bool) -> List[Dict[str, Any]]:
|
||||||
"""搜索单个文件中的关键词"""
|
"""搜索单个文件中的关键词和正则表达式"""
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -148,27 +217,69 @@ def search_keywords_in_file(file_path: str, keywords: List[str],
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
return results
|
return results
|
||||||
|
|
||||||
# 准备关键词(如果不区分大小写)
|
# 预处理所有模式
|
||||||
search_keywords = keywords if case_sensitive else [kw.lower() for kw in keywords]
|
processed_patterns = []
|
||||||
|
for keyword in keywords:
|
||||||
|
compiled = compile_pattern(keyword)
|
||||||
|
if compiled is not None: # 跳过无效的正则表达式
|
||||||
|
processed_patterns.append({
|
||||||
|
'original': keyword,
|
||||||
|
'pattern': compiled,
|
||||||
|
'is_regex': isinstance(compiled, re.Pattern)
|
||||||
|
})
|
||||||
|
|
||||||
for line_number, line in enumerate(lines, 1):
|
for line_number, line in enumerate(lines, 1):
|
||||||
line_content = line.rstrip('\n\r')
|
line_content = line.rstrip('\n\r')
|
||||||
search_line = line_content if case_sensitive else line_content.lower()
|
search_line = line_content if case_sensitive else line_content.lower()
|
||||||
|
|
||||||
# 统计匹配的关键词数量
|
# 统计匹配的模式数量
|
||||||
matched_keywords = []
|
matched_patterns = []
|
||||||
for i, keyword in enumerate(search_keywords):
|
for pattern_info in processed_patterns:
|
||||||
if keyword in search_line:
|
pattern = pattern_info['pattern']
|
||||||
matched_keywords.append(keywords[i]) # 使用原始关键词
|
is_regex = pattern_info['is_regex']
|
||||||
|
|
||||||
match_count = len(matched_keywords)
|
match_found = False
|
||||||
|
match_details = None
|
||||||
|
|
||||||
|
if is_regex:
|
||||||
|
# 正则表达式匹配
|
||||||
|
if case_sensitive:
|
||||||
|
match = pattern.search(line_content)
|
||||||
|
else:
|
||||||
|
# 对于不区分大小写的正则,需要重新编译
|
||||||
|
if isinstance(pattern, re.Pattern):
|
||||||
|
# 创建不区分大小写的版本
|
||||||
|
flags = pattern.flags | re.IGNORECASE
|
||||||
|
case_insensitive_pattern = re.compile(pattern.pattern, flags)
|
||||||
|
match = case_insensitive_pattern.search(line_content)
|
||||||
|
else:
|
||||||
|
match = pattern.search(search_line)
|
||||||
|
|
||||||
|
if match:
|
||||||
|
match_found = True
|
||||||
|
match_details = match.group(0)
|
||||||
|
else:
|
||||||
|
# 普通字符串匹配
|
||||||
|
search_keyword = pattern if case_sensitive else pattern.lower()
|
||||||
|
if search_keyword in search_line:
|
||||||
|
match_found = True
|
||||||
|
match_details = pattern
|
||||||
|
|
||||||
|
if match_found:
|
||||||
|
matched_patterns.append({
|
||||||
|
'original': pattern_info['original'],
|
||||||
|
'type': 'regex' if is_regex else 'keyword',
|
||||||
|
'match': match_details
|
||||||
|
})
|
||||||
|
|
||||||
|
match_count = len(matched_patterns)
|
||||||
|
|
||||||
if match_count > 0:
|
if match_count > 0:
|
||||||
results.append({
|
results.append({
|
||||||
'line_number': line_number,
|
'line_number': line_number,
|
||||||
'content': line_content,
|
'content': line_content,
|
||||||
'match_count': match_count,
|
'match_count': match_count,
|
||||||
'matched_keywords': matched_keywords,
|
'matched_patterns': matched_patterns,
|
||||||
'file_path': file_path
|
'file_path': file_path
|
||||||
})
|
})
|
||||||
|
|
||||||
@ -223,14 +334,14 @@ async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]:
|
|||||||
"tools": [
|
"tools": [
|
||||||
{
|
{
|
||||||
"name": "multi_keyword_search",
|
"name": "multi_keyword_search",
|
||||||
"description": "多关键词搜索工具,返回按匹配数量排序的结果。格式:[行号]:[匹配数量]:[行的原始内容]",
|
"description": "智能关键词和正则表达式混合搜索工具,返回按匹配数量排序的结果。支持普通关键词和正则表达式混合使用。正则表达式支持格式:/pattern/、r\"pattern\"或包含正则特殊字符的字符串。结果格式:[行号]:[匹配数量]:[匹配信息]:[行的原始内容]",
|
||||||
"inputSchema": {
|
"inputSchema": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"keywords": {
|
"keywords": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {"type": "string"},
|
"items": {"type": "string"},
|
||||||
"description": "要搜索的关键词数组"
|
"description": "要搜索的关键词和正则表达式数组。支持:1)普通关键词 2)/pattern/格式正则 3)r\"pattern\"格式正则 4)包含正则特殊字符的字符串"
|
||||||
},
|
},
|
||||||
"file_paths": {
|
"file_paths": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user