Merge branch 'developing' into dev

This commit is contained in:
朱潮 2026-04-15 11:10:46 +08:00
commit 5bb09b22a5
11 changed files with 34 additions and 34 deletions

View File

@ -291,7 +291,7 @@ async def load_mcp_settings_async(config) -> List[Dict]:
# 计算 dataset_dir 用于替换 MCP 配置中的占位符
# 只有当 project_dir 不为 None 时才计算 dataset_dir
dataset_dir = os.path.join(project_dir, "dataset") if project_dir is not None else None
dataset_dir = os.path.join(project_dir, "datasets") if project_dir is not None else None
# 替换 MCP 配置中的 {dataset_dir} 占位符
if dataset_dir is None:
dataset_dir = ""

View File

@ -54,7 +54,7 @@ When executing scripts from SKILL.md files, you MUST convert relative paths to a
**4. Workspace Directory Structure**
- **`{agent_dir_path}/skills/`** - Skill packages with embedded scripts
- **`{agent_dir_path}/dataset/`** - Store file datasets and document data
- **`{agent_dir_path}/datasets/`** - Store file datasets and document data
- **`{agent_dir_path}/executable_code/`** - Place generated executable scripts here (not skill scripts)
- **`{agent_dir_path}/download/`** - Store downloaded files and content
@ -75,7 +75,7 @@ When creating scripts in `executable_code/`, follow these organization rules:
**Path Examples:**
- Skill script: `{agent_dir_path}/skills/rag-retrieve/scripts/rag_retrieve.py`
- Dataset file: `{agent_dir_path}/dataset/document.txt`
- Dataset file: `{agent_dir_path}/datasets/document.txt`
- Task-specific script: `{agent_dir_path}/executable_code/invoice_parser/parse.py`
- Temporary script (when needed): `{agent_dir_path}/executable_code/tmp/test.py`
- Downloaded file: `{agent_dir_path}/download/report.pdf`

View File

@ -213,7 +213,7 @@ async def reset_files_processing(dataset_id: str):
elif 'filename' in file_info:
# Fallback to old filename-based structure
filename_without_ext = os.path.splitext(file_info['filename'])[0]
dataset_dir = os.path.join("projects", "data", dataset_id, "dataset", filename_without_ext)
dataset_dir = os.path.join("projects", "data", dataset_id, "datasets", filename_without_ext)
if remove_file_or_directory(dataset_dir):
removed_files.append(dataset_dir)
@ -232,7 +232,7 @@ async def reset_files_processing(dataset_id: str):
removed_files.append(files_dir)
# Also remove the entire dataset directory (clean up any remaining files)
dataset_dir = os.path.join(project_dir, "dataset")
dataset_dir = os.path.join(project_dir, "datasets")
if remove_file_or_directory(dataset_dir):
removed_files.append(dataset_dir)
@ -465,4 +465,4 @@ async def cleanup_tasks(older_than_days: int = 7):
except Exception as e:
logger.error(f"Error cleaning up tasks: {str(e)}")
raise HTTPException(status_code=500, detail=f"清理任务记录失败: {str(e)}")
raise HTTPException(status_code=500, detail=f"清理任务记录失败: {str(e)}")

View File

@ -33,8 +33,8 @@ async def list_all_projects():
# 统计文件数量
file_count = 0
if os.path.exists(os.path.join(item_path, "dataset")):
for root, dirs, files in os.walk(os.path.join(item_path, "dataset")):
if os.path.exists(os.path.join(item_path, "datasets")):
for root, dirs, files in os.walk(os.path.join(item_path, "datasets")):
file_count += len(files)
robot_projects.append({
@ -173,4 +173,4 @@ async def get_project_tasks(dataset_id: str):
except Exception as e:
logger.error(f"Error getting project tasks: {str(e)}")
raise HTTPException(status_code=500, detail=f"获取项目任务失败: {str(e)}")
raise HTTPException(status_code=500, detail=f"获取项目任务失败: {str(e)}")

View File

@ -181,7 +181,7 @@ def process_files_async(
result_files = []
for key in processed_files_by_key.keys():
# 添加对应的dataset document.txt路径
document_path = os.path.join("projects", "data", dataset_id, "dataset", key, "document.txt")
document_path = os.path.join("projects", "data", dataset_id, "datasets", key, "document.txt")
if os.path.exists(document_path):
result_files.append(document_path)
@ -382,7 +382,7 @@ def process_files_incremental_async(
result_files = []
for key in processed_files_by_key.keys():
# 添加对应的dataset document.txt路径
document_path = os.path.join("projects", "data", dataset_id, "dataset", key, "document.txt")
document_path = os.path.join("projects", "data", dataset_id, "datasets", key, "document.txt")
if os.path.exists(document_path):
result_files.append(document_path)
@ -496,4 +496,4 @@ def cleanup_project_async(
"message": error_msg,
"dataset_id": dataset_id,
"error": str(e)
}
}

View File

@ -25,7 +25,7 @@ def merge_documents_by_group(unique_id: str, group_name: str) -> Dict:
"""Merge all document.txt files in a group into a single document."""
processed_group_dir = os.path.join("projects", "data", unique_id, "processed", group_name)
dataset_group_dir = os.path.join("projects", "data", unique_id, "dataset", group_name)
dataset_group_dir = os.path.join("projects", "data", unique_id, "datasets", group_name)
os.makedirs(dataset_group_dir, exist_ok=True)
merged_document_path = os.path.join(dataset_group_dir, "document.txt")
@ -96,7 +96,7 @@ def merge_paginations_by_group(unique_id: str, group_name: str) -> Dict:
"""Merge all pagination.txt files in a group."""
processed_group_dir = os.path.join("projects", "data", unique_id, "processed", group_name)
dataset_group_dir = os.path.join("projects", "data", unique_id, "dataset", group_name)
dataset_group_dir = os.path.join("projects", "data", unique_id, "datasets", group_name)
os.makedirs(dataset_group_dir, exist_ok=True)
merged_pagination_path = os.path.join(dataset_group_dir, "pagination.txt")
@ -166,7 +166,7 @@ def merge_embeddings_by_group(unique_id: str, group_name: str) -> Dict:
"""Merge all embedding.pkl files in a group."""
processed_group_dir = os.path.join("projects", "data", unique_id, "processed", group_name)
dataset_group_dir = os.path.join("projects", "data", unique_id, "dataset", group_name)
dataset_group_dir = os.path.join("projects", "data", unique_id, "datasets", group_name)
os.makedirs(dataset_group_dir, exist_ok=True)
merged_embedding_path = os.path.join(dataset_group_dir, "embedding.pkl")
@ -379,7 +379,7 @@ def merge_all_data_by_group(unique_id: str, group_name: str) -> Dict:
def get_group_merge_status(unique_id: str, group_name: str) -> Dict:
"""Get the status of merged data for a group."""
dataset_group_dir = os.path.join("projects", "data", unique_id, "dataset", group_name)
dataset_group_dir = os.path.join("projects", "data", unique_id, "datasets", group_name)
status = {
"group_name": group_name,
@ -423,7 +423,7 @@ def get_group_merge_status(unique_id: str, group_name: str) -> Dict:
def cleanup_dataset_group(unique_id: str, group_name: str) -> bool:
"""Clean up merged dataset files for a group."""
dataset_group_dir = os.path.join("projects", "data", unique_id, "dataset", group_name)
dataset_group_dir = os.path.join("projects", "data", unique_id, "datasets", group_name)
try:
if os.path.exists(dataset_group_dir):

View File

@ -200,7 +200,7 @@ def generate_dataset_structure(unique_id: str) -> str:
add_directory_contents(processed_dir, "")
# Add dataset directory structure
dataset_dir = os.path.join(project_dir, "dataset")
dataset_dir = os.path.join(project_dir, "datasets")
structure.append("\ndataset/")
add_directory_contents(dataset_dir, "")
@ -224,7 +224,7 @@ def get_processing_status(unique_id: str) -> Dict:
"directories": {
"files": os.path.exists(os.path.join(project_dir, "files")),
"processed": os.path.exists(os.path.join(project_dir, "processed")),
"dataset": os.path.exists(os.path.join(project_dir, "dataset"))
"dataset": os.path.exists(os.path.join(project_dir, "datasets"))
},
"groups": {},
"processing_log_exists": os.path.exists(os.path.join(project_dir, "processing_log.json"))
@ -245,7 +245,7 @@ def get_processing_status(unique_id: str) -> Dict:
}
# Check merge status for each group
dataset_dir = os.path.join(project_dir, "dataset")
dataset_dir = os.path.join(project_dir, "datasets")
if os.path.exists(dataset_dir):
for group_name in os.listdir(dataset_dir):
group_path = os.path.join(dataset_dir, group_name)
@ -294,4 +294,4 @@ def remove_dataset_directory_by_key(unique_id: str, key: str):
shutil.rmtree(processed_group_path)
# Remove dataset directory
cleanup_dataset_group(unique_id, key)
cleanup_dataset_group(unique_id, key)

View File

@ -228,7 +228,7 @@ def cleanup_orphaned_files(unique_id: str, changes: Dict) -> Dict[str, List[str]
removed_files[group_name].append("processed group directory")
# Remove entire dataset/group directory
dataset_group_dir = os.path.join(project_dir, "dataset", group_name)
dataset_group_dir = os.path.join(project_dir, "datasets", group_name)
if os.path.exists(dataset_group_dir):
shutil.rmtree(dataset_group_dir)
removed_files[group_name].append("dataset group directory")
@ -263,9 +263,9 @@ def ensure_directories(unique_id: str):
directories = [
"files",
"processed",
"dataset"
"datasets"
]
for dir_name in directories:
dir_path = os.path.join(base_dir, dir_name)
os.makedirs(dir_path, exist_ok=True)
os.makedirs(dir_path, exist_ok=True)

View File

@ -267,7 +267,7 @@ def get_project_statistics(unique_id: str) -> Dict:
}
# Check each directory
directories = ["files", "processed", "dataset"]
directories = ["files", "processed", "datasets"]
for dir_name in directories:
dir_path = os.path.join(project_dir, dir_name)
@ -293,4 +293,4 @@ def get_project_statistics(unique_id: str) -> Dict:
"files": 0
}
return stats
return stats

View File

@ -75,7 +75,7 @@ def generate_robot_directory_tree(robot_dir: str, robot_id: str, max_depth: int
return lines
# 从dataset目录开始构建树
dataset_dir = os.path.join(robot_dir, "dataset")
dataset_dir = os.path.join(robot_dir, "datasets")
tree_lines = []
if not os.path.exists(dataset_dir):
@ -165,7 +165,7 @@ def copy_dataset_folder(source_project_id: str, target_dataset_dir: Path, folder
}
try:
source_folder = project_path / "data" / source_project_id / "dataset" / folder_name
source_folder = project_path / "data" / source_project_id / "datasets" / folder_name
result["source_path"] = str(source_folder)
if not source_folder.exists():
@ -231,7 +231,7 @@ def generate_robot_readme(robot_id: str, dataset_ids: List[str], copy_results: L
readme_content += "## 数据集详情\n\n"
dataset_dir = robot_dir / "dataset"
dataset_dir = robot_dir / "datasets"
if not dataset_dir.exists():
readme_content += "No dataset files available.\n"
else:
@ -324,7 +324,7 @@ def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: boo
# 创建机器人目录结构(如果不存在)
robot_dir = _get_robot_dir(project_path, bot_id)
dataset_dir = robot_dir / "dataset"
dataset_dir = robot_dir / "datasets"
scripts_dir = robot_dir / "scripts"
download_dir = robot_dir / "download"

View File

@ -62,7 +62,7 @@ def generate_directory_tree(project_dir: str, unique_id: str, max_depth: int = 3
return lines
# Start building tree from dataset directory
dataset_dir = os.path.join(project_dir, "dataset")
dataset_dir = os.path.join(project_dir, "datasets")
tree_lines = []
if not os.path.exists(dataset_dir):
@ -128,7 +128,7 @@ This project contains processed documents and their associated embeddings for se
"""
dataset_dir = os.path.join(project_dir, "dataset")
dataset_dir = os.path.join(project_dir, "datasets")
if not os.path.exists(dataset_dir):
readme_content += "No dataset files available.\n"
else:
@ -217,7 +217,7 @@ def get_project_status(unique_id: str) -> Dict:
# Collect document.txt files
document_files = []
dataset_dir = os.path.join(project_dir, "dataset")
dataset_dir = os.path.join(project_dir, "datasets")
if os.path.exists(dataset_dir):
for root, dirs, files in os.walk(dataset_dir):
for file in files:
@ -321,7 +321,7 @@ def get_project_stats(unique_id: str) -> Dict:
# Check embeddings files
embedding_files = []
dataset_dir = os.path.join("projects", "data", unique_id, "dataset")
dataset_dir = os.path.join("projects", "data", unique_id, "datasets")
if os.path.exists(dataset_dir):
for root, dirs, files in os.walk(dataset_dir):
for file in files: