Merge branch 'developing' into dev

This commit is contained in:
朱潮 2026-04-15 11:10:46 +08:00
commit 5bb09b22a5
11 changed files with 34 additions and 34 deletions

View File

@ -291,7 +291,7 @@ async def load_mcp_settings_async(config) -> List[Dict]:
# 计算 dataset_dir 用于替换 MCP 配置中的占位符 # 计算 dataset_dir 用于替换 MCP 配置中的占位符
# 只有当 project_dir 不为 None 时才计算 dataset_dir # 只有当 project_dir 不为 None 时才计算 dataset_dir
dataset_dir = os.path.join(project_dir, "dataset") if project_dir is not None else None dataset_dir = os.path.join(project_dir, "datasets") if project_dir is not None else None
# 替换 MCP 配置中的 {dataset_dir} 占位符 # 替换 MCP 配置中的 {dataset_dir} 占位符
if dataset_dir is None: if dataset_dir is None:
dataset_dir = "" dataset_dir = ""

View File

@ -54,7 +54,7 @@ When executing scripts from SKILL.md files, you MUST convert relative paths to a
**4. Workspace Directory Structure** **4. Workspace Directory Structure**
- **`{agent_dir_path}/skills/`** - Skill packages with embedded scripts - **`{agent_dir_path}/skills/`** - Skill packages with embedded scripts
- **`{agent_dir_path}/dataset/`** - Store file datasets and document data - **`{agent_dir_path}/datasets/`** - Store file datasets and document data
- **`{agent_dir_path}/executable_code/`** - Place generated executable scripts here (not skill scripts) - **`{agent_dir_path}/executable_code/`** - Place generated executable scripts here (not skill scripts)
- **`{agent_dir_path}/download/`** - Store downloaded files and content - **`{agent_dir_path}/download/`** - Store downloaded files and content
@ -75,7 +75,7 @@ When creating scripts in `executable_code/`, follow these organization rules:
**Path Examples:** **Path Examples:**
- Skill script: `{agent_dir_path}/skills/rag-retrieve/scripts/rag_retrieve.py` - Skill script: `{agent_dir_path}/skills/rag-retrieve/scripts/rag_retrieve.py`
- Dataset file: `{agent_dir_path}/dataset/document.txt` - Dataset file: `{agent_dir_path}/datasets/document.txt`
- Task-specific script: `{agent_dir_path}/executable_code/invoice_parser/parse.py` - Task-specific script: `{agent_dir_path}/executable_code/invoice_parser/parse.py`
- Temporary script (when needed): `{agent_dir_path}/executable_code/tmp/test.py` - Temporary script (when needed): `{agent_dir_path}/executable_code/tmp/test.py`
- Downloaded file: `{agent_dir_path}/download/report.pdf` - Downloaded file: `{agent_dir_path}/download/report.pdf`

View File

@ -213,7 +213,7 @@ async def reset_files_processing(dataset_id: str):
elif 'filename' in file_info: elif 'filename' in file_info:
# Fallback to old filename-based structure # Fallback to old filename-based structure
filename_without_ext = os.path.splitext(file_info['filename'])[0] filename_without_ext = os.path.splitext(file_info['filename'])[0]
dataset_dir = os.path.join("projects", "data", dataset_id, "dataset", filename_without_ext) dataset_dir = os.path.join("projects", "data", dataset_id, "datasets", filename_without_ext)
if remove_file_or_directory(dataset_dir): if remove_file_or_directory(dataset_dir):
removed_files.append(dataset_dir) removed_files.append(dataset_dir)
@ -232,7 +232,7 @@ async def reset_files_processing(dataset_id: str):
removed_files.append(files_dir) removed_files.append(files_dir)
# Also remove the entire dataset directory (clean up any remaining files) # Also remove the entire dataset directory (clean up any remaining files)
dataset_dir = os.path.join(project_dir, "dataset") dataset_dir = os.path.join(project_dir, "datasets")
if remove_file_or_directory(dataset_dir): if remove_file_or_directory(dataset_dir):
removed_files.append(dataset_dir) removed_files.append(dataset_dir)

View File

@ -33,8 +33,8 @@ async def list_all_projects():
# 统计文件数量 # 统计文件数量
file_count = 0 file_count = 0
if os.path.exists(os.path.join(item_path, "dataset")): if os.path.exists(os.path.join(item_path, "datasets")):
for root, dirs, files in os.walk(os.path.join(item_path, "dataset")): for root, dirs, files in os.walk(os.path.join(item_path, "datasets")):
file_count += len(files) file_count += len(files)
robot_projects.append({ robot_projects.append({

View File

@ -181,7 +181,7 @@ def process_files_async(
result_files = [] result_files = []
for key in processed_files_by_key.keys(): for key in processed_files_by_key.keys():
# 添加对应的dataset document.txt路径 # 添加对应的dataset document.txt路径
document_path = os.path.join("projects", "data", dataset_id, "dataset", key, "document.txt") document_path = os.path.join("projects", "data", dataset_id, "datasets", key, "document.txt")
if os.path.exists(document_path): if os.path.exists(document_path):
result_files.append(document_path) result_files.append(document_path)
@ -382,7 +382,7 @@ def process_files_incremental_async(
result_files = [] result_files = []
for key in processed_files_by_key.keys(): for key in processed_files_by_key.keys():
# 添加对应的dataset document.txt路径 # 添加对应的dataset document.txt路径
document_path = os.path.join("projects", "data", dataset_id, "dataset", key, "document.txt") document_path = os.path.join("projects", "data", dataset_id, "datasets", key, "document.txt")
if os.path.exists(document_path): if os.path.exists(document_path):
result_files.append(document_path) result_files.append(document_path)

View File

@ -25,7 +25,7 @@ def merge_documents_by_group(unique_id: str, group_name: str) -> Dict:
"""Merge all document.txt files in a group into a single document.""" """Merge all document.txt files in a group into a single document."""
processed_group_dir = os.path.join("projects", "data", unique_id, "processed", group_name) processed_group_dir = os.path.join("projects", "data", unique_id, "processed", group_name)
dataset_group_dir = os.path.join("projects", "data", unique_id, "dataset", group_name) dataset_group_dir = os.path.join("projects", "data", unique_id, "datasets", group_name)
os.makedirs(dataset_group_dir, exist_ok=True) os.makedirs(dataset_group_dir, exist_ok=True)
merged_document_path = os.path.join(dataset_group_dir, "document.txt") merged_document_path = os.path.join(dataset_group_dir, "document.txt")
@ -96,7 +96,7 @@ def merge_paginations_by_group(unique_id: str, group_name: str) -> Dict:
"""Merge all pagination.txt files in a group.""" """Merge all pagination.txt files in a group."""
processed_group_dir = os.path.join("projects", "data", unique_id, "processed", group_name) processed_group_dir = os.path.join("projects", "data", unique_id, "processed", group_name)
dataset_group_dir = os.path.join("projects", "data", unique_id, "dataset", group_name) dataset_group_dir = os.path.join("projects", "data", unique_id, "datasets", group_name)
os.makedirs(dataset_group_dir, exist_ok=True) os.makedirs(dataset_group_dir, exist_ok=True)
merged_pagination_path = os.path.join(dataset_group_dir, "pagination.txt") merged_pagination_path = os.path.join(dataset_group_dir, "pagination.txt")
@ -166,7 +166,7 @@ def merge_embeddings_by_group(unique_id: str, group_name: str) -> Dict:
"""Merge all embedding.pkl files in a group.""" """Merge all embedding.pkl files in a group."""
processed_group_dir = os.path.join("projects", "data", unique_id, "processed", group_name) processed_group_dir = os.path.join("projects", "data", unique_id, "processed", group_name)
dataset_group_dir = os.path.join("projects", "data", unique_id, "dataset", group_name) dataset_group_dir = os.path.join("projects", "data", unique_id, "datasets", group_name)
os.makedirs(dataset_group_dir, exist_ok=True) os.makedirs(dataset_group_dir, exist_ok=True)
merged_embedding_path = os.path.join(dataset_group_dir, "embedding.pkl") merged_embedding_path = os.path.join(dataset_group_dir, "embedding.pkl")
@ -379,7 +379,7 @@ def merge_all_data_by_group(unique_id: str, group_name: str) -> Dict:
def get_group_merge_status(unique_id: str, group_name: str) -> Dict: def get_group_merge_status(unique_id: str, group_name: str) -> Dict:
"""Get the status of merged data for a group.""" """Get the status of merged data for a group."""
dataset_group_dir = os.path.join("projects", "data", unique_id, "dataset", group_name) dataset_group_dir = os.path.join("projects", "data", unique_id, "datasets", group_name)
status = { status = {
"group_name": group_name, "group_name": group_name,
@ -423,7 +423,7 @@ def get_group_merge_status(unique_id: str, group_name: str) -> Dict:
def cleanup_dataset_group(unique_id: str, group_name: str) -> bool: def cleanup_dataset_group(unique_id: str, group_name: str) -> bool:
"""Clean up merged dataset files for a group.""" """Clean up merged dataset files for a group."""
dataset_group_dir = os.path.join("projects", "data", unique_id, "dataset", group_name) dataset_group_dir = os.path.join("projects", "data", unique_id, "datasets", group_name)
try: try:
if os.path.exists(dataset_group_dir): if os.path.exists(dataset_group_dir):

View File

@ -200,7 +200,7 @@ def generate_dataset_structure(unique_id: str) -> str:
add_directory_contents(processed_dir, "") add_directory_contents(processed_dir, "")
# Add dataset directory structure # Add dataset directory structure
dataset_dir = os.path.join(project_dir, "dataset") dataset_dir = os.path.join(project_dir, "datasets")
structure.append("\ndataset/") structure.append("\ndataset/")
add_directory_contents(dataset_dir, "") add_directory_contents(dataset_dir, "")
@ -224,7 +224,7 @@ def get_processing_status(unique_id: str) -> Dict:
"directories": { "directories": {
"files": os.path.exists(os.path.join(project_dir, "files")), "files": os.path.exists(os.path.join(project_dir, "files")),
"processed": os.path.exists(os.path.join(project_dir, "processed")), "processed": os.path.exists(os.path.join(project_dir, "processed")),
"dataset": os.path.exists(os.path.join(project_dir, "dataset")) "dataset": os.path.exists(os.path.join(project_dir, "datasets"))
}, },
"groups": {}, "groups": {},
"processing_log_exists": os.path.exists(os.path.join(project_dir, "processing_log.json")) "processing_log_exists": os.path.exists(os.path.join(project_dir, "processing_log.json"))
@ -245,7 +245,7 @@ def get_processing_status(unique_id: str) -> Dict:
} }
# Check merge status for each group # Check merge status for each group
dataset_dir = os.path.join(project_dir, "dataset") dataset_dir = os.path.join(project_dir, "datasets")
if os.path.exists(dataset_dir): if os.path.exists(dataset_dir):
for group_name in os.listdir(dataset_dir): for group_name in os.listdir(dataset_dir):
group_path = os.path.join(dataset_dir, group_name) group_path = os.path.join(dataset_dir, group_name)

View File

@ -228,7 +228,7 @@ def cleanup_orphaned_files(unique_id: str, changes: Dict) -> Dict[str, List[str]
removed_files[group_name].append("processed group directory") removed_files[group_name].append("processed group directory")
# Remove entire dataset/group directory # Remove entire dataset/group directory
dataset_group_dir = os.path.join(project_dir, "dataset", group_name) dataset_group_dir = os.path.join(project_dir, "datasets", group_name)
if os.path.exists(dataset_group_dir): if os.path.exists(dataset_group_dir):
shutil.rmtree(dataset_group_dir) shutil.rmtree(dataset_group_dir)
removed_files[group_name].append("dataset group directory") removed_files[group_name].append("dataset group directory")
@ -263,7 +263,7 @@ def ensure_directories(unique_id: str):
directories = [ directories = [
"files", "files",
"processed", "processed",
"dataset" "datasets"
] ]
for dir_name in directories: for dir_name in directories:

View File

@ -267,7 +267,7 @@ def get_project_statistics(unique_id: str) -> Dict:
} }
# Check each directory # Check each directory
directories = ["files", "processed", "dataset"] directories = ["files", "processed", "datasets"]
for dir_name in directories: for dir_name in directories:
dir_path = os.path.join(project_dir, dir_name) dir_path = os.path.join(project_dir, dir_name)

View File

@ -75,7 +75,7 @@ def generate_robot_directory_tree(robot_dir: str, robot_id: str, max_depth: int
return lines return lines
# 从dataset目录开始构建树 # 从dataset目录开始构建树
dataset_dir = os.path.join(robot_dir, "dataset") dataset_dir = os.path.join(robot_dir, "datasets")
tree_lines = [] tree_lines = []
if not os.path.exists(dataset_dir): if not os.path.exists(dataset_dir):
@ -165,7 +165,7 @@ def copy_dataset_folder(source_project_id: str, target_dataset_dir: Path, folder
} }
try: try:
source_folder = project_path / "data" / source_project_id / "dataset" / folder_name source_folder = project_path / "data" / source_project_id / "datasets" / folder_name
result["source_path"] = str(source_folder) result["source_path"] = str(source_folder)
if not source_folder.exists(): if not source_folder.exists():
@ -231,7 +231,7 @@ def generate_robot_readme(robot_id: str, dataset_ids: List[str], copy_results: L
readme_content += "## 数据集详情\n\n" readme_content += "## 数据集详情\n\n"
dataset_dir = robot_dir / "dataset" dataset_dir = robot_dir / "datasets"
if not dataset_dir.exists(): if not dataset_dir.exists():
readme_content += "No dataset files available.\n" readme_content += "No dataset files available.\n"
else: else:
@ -324,7 +324,7 @@ def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: boo
# 创建机器人目录结构(如果不存在) # 创建机器人目录结构(如果不存在)
robot_dir = _get_robot_dir(project_path, bot_id) robot_dir = _get_robot_dir(project_path, bot_id)
dataset_dir = robot_dir / "dataset" dataset_dir = robot_dir / "datasets"
scripts_dir = robot_dir / "scripts" scripts_dir = robot_dir / "scripts"
download_dir = robot_dir / "download" download_dir = robot_dir / "download"

View File

@ -62,7 +62,7 @@ def generate_directory_tree(project_dir: str, unique_id: str, max_depth: int = 3
return lines return lines
# Start building tree from dataset directory # Start building tree from dataset directory
dataset_dir = os.path.join(project_dir, "dataset") dataset_dir = os.path.join(project_dir, "datasets")
tree_lines = [] tree_lines = []
if not os.path.exists(dataset_dir): if not os.path.exists(dataset_dir):
@ -128,7 +128,7 @@ This project contains processed documents and their associated embeddings for se
""" """
dataset_dir = os.path.join(project_dir, "dataset") dataset_dir = os.path.join(project_dir, "datasets")
if not os.path.exists(dataset_dir): if not os.path.exists(dataset_dir):
readme_content += "No dataset files available.\n" readme_content += "No dataset files available.\n"
else: else:
@ -217,7 +217,7 @@ def get_project_status(unique_id: str) -> Dict:
# Collect document.txt files # Collect document.txt files
document_files = [] document_files = []
dataset_dir = os.path.join(project_dir, "dataset") dataset_dir = os.path.join(project_dir, "datasets")
if os.path.exists(dataset_dir): if os.path.exists(dataset_dir):
for root, dirs, files in os.walk(dataset_dir): for root, dirs, files in os.walk(dataset_dir):
for file in files: for file in files:
@ -321,7 +321,7 @@ def get_project_stats(unique_id: str) -> Dict:
# Check embeddings files # Check embeddings files
embedding_files = [] embedding_files = []
dataset_dir = os.path.join("projects", "data", unique_id, "dataset") dataset_dir = os.path.join("projects", "data", unique_id, "datasets")
if os.path.exists(dataset_dir): if os.path.exists(dataset_dir):
for root, dirs, files in os.walk(dataset_dir): for root, dirs, files in os.walk(dataset_dir):
for file in files: for file in files: