Merge branch 'developing' into dev
This commit is contained in:
commit
5bb09b22a5
@ -291,7 +291,7 @@ async def load_mcp_settings_async(config) -> List[Dict]:
|
||||
|
||||
# 计算 dataset_dir 用于替换 MCP 配置中的占位符
|
||||
# 只有当 project_dir 不为 None 时才计算 dataset_dir
|
||||
dataset_dir = os.path.join(project_dir, "dataset") if project_dir is not None else None
|
||||
dataset_dir = os.path.join(project_dir, "datasets") if project_dir is not None else None
|
||||
# 替换 MCP 配置中的 {dataset_dir} 占位符
|
||||
if dataset_dir is None:
|
||||
dataset_dir = ""
|
||||
|
||||
@ -54,7 +54,7 @@ When executing scripts from SKILL.md files, you MUST convert relative paths to a
|
||||
**4. Workspace Directory Structure**
|
||||
|
||||
- **`{agent_dir_path}/skills/`** - Skill packages with embedded scripts
|
||||
- **`{agent_dir_path}/dataset/`** - Store file datasets and document data
|
||||
- **`{agent_dir_path}/datasets/`** - Store file datasets and document data
|
||||
- **`{agent_dir_path}/executable_code/`** - Place generated executable scripts here (not skill scripts)
|
||||
- **`{agent_dir_path}/download/`** - Store downloaded files and content
|
||||
|
||||
@ -75,7 +75,7 @@ When creating scripts in `executable_code/`, follow these organization rules:
|
||||
|
||||
**Path Examples:**
|
||||
- Skill script: `{agent_dir_path}/skills/rag-retrieve/scripts/rag_retrieve.py`
|
||||
- Dataset file: `{agent_dir_path}/dataset/document.txt`
|
||||
- Dataset file: `{agent_dir_path}/datasets/document.txt`
|
||||
- Task-specific script: `{agent_dir_path}/executable_code/invoice_parser/parse.py`
|
||||
- Temporary script (when needed): `{agent_dir_path}/executable_code/tmp/test.py`
|
||||
- Downloaded file: `{agent_dir_path}/download/report.pdf`
|
||||
|
||||
@ -213,7 +213,7 @@ async def reset_files_processing(dataset_id: str):
|
||||
elif 'filename' in file_info:
|
||||
# Fallback to old filename-based structure
|
||||
filename_without_ext = os.path.splitext(file_info['filename'])[0]
|
||||
dataset_dir = os.path.join("projects", "data", dataset_id, "dataset", filename_without_ext)
|
||||
dataset_dir = os.path.join("projects", "data", dataset_id, "datasets", filename_without_ext)
|
||||
if remove_file_or_directory(dataset_dir):
|
||||
removed_files.append(dataset_dir)
|
||||
|
||||
@ -232,7 +232,7 @@ async def reset_files_processing(dataset_id: str):
|
||||
removed_files.append(files_dir)
|
||||
|
||||
# Also remove the entire dataset directory (clean up any remaining files)
|
||||
dataset_dir = os.path.join(project_dir, "dataset")
|
||||
dataset_dir = os.path.join(project_dir, "datasets")
|
||||
if remove_file_or_directory(dataset_dir):
|
||||
removed_files.append(dataset_dir)
|
||||
|
||||
@ -465,4 +465,4 @@ async def cleanup_tasks(older_than_days: int = 7):
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error cleaning up tasks: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"清理任务记录失败: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"清理任务记录失败: {str(e)}")
|
||||
|
||||
@ -33,8 +33,8 @@ async def list_all_projects():
|
||||
|
||||
# 统计文件数量
|
||||
file_count = 0
|
||||
if os.path.exists(os.path.join(item_path, "dataset")):
|
||||
for root, dirs, files in os.walk(os.path.join(item_path, "dataset")):
|
||||
if os.path.exists(os.path.join(item_path, "datasets")):
|
||||
for root, dirs, files in os.walk(os.path.join(item_path, "datasets")):
|
||||
file_count += len(files)
|
||||
|
||||
robot_projects.append({
|
||||
@ -173,4 +173,4 @@ async def get_project_tasks(dataset_id: str):
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting project tasks: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"获取项目任务失败: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"获取项目任务失败: {str(e)}")
|
||||
|
||||
@ -181,7 +181,7 @@ def process_files_async(
|
||||
result_files = []
|
||||
for key in processed_files_by_key.keys():
|
||||
# 添加对应的dataset document.txt路径
|
||||
document_path = os.path.join("projects", "data", dataset_id, "dataset", key, "document.txt")
|
||||
document_path = os.path.join("projects", "data", dataset_id, "datasets", key, "document.txt")
|
||||
if os.path.exists(document_path):
|
||||
result_files.append(document_path)
|
||||
|
||||
@ -382,7 +382,7 @@ def process_files_incremental_async(
|
||||
result_files = []
|
||||
for key in processed_files_by_key.keys():
|
||||
# 添加对应的dataset document.txt路径
|
||||
document_path = os.path.join("projects", "data", dataset_id, "dataset", key, "document.txt")
|
||||
document_path = os.path.join("projects", "data", dataset_id, "datasets", key, "document.txt")
|
||||
if os.path.exists(document_path):
|
||||
result_files.append(document_path)
|
||||
|
||||
@ -496,4 +496,4 @@ def cleanup_project_async(
|
||||
"message": error_msg,
|
||||
"dataset_id": dataset_id,
|
||||
"error": str(e)
|
||||
}
|
||||
}
|
||||
|
||||
@ -25,7 +25,7 @@ def merge_documents_by_group(unique_id: str, group_name: str) -> Dict:
|
||||
"""Merge all document.txt files in a group into a single document."""
|
||||
|
||||
processed_group_dir = os.path.join("projects", "data", unique_id, "processed", group_name)
|
||||
dataset_group_dir = os.path.join("projects", "data", unique_id, "dataset", group_name)
|
||||
dataset_group_dir = os.path.join("projects", "data", unique_id, "datasets", group_name)
|
||||
os.makedirs(dataset_group_dir, exist_ok=True)
|
||||
|
||||
merged_document_path = os.path.join(dataset_group_dir, "document.txt")
|
||||
@ -96,7 +96,7 @@ def merge_paginations_by_group(unique_id: str, group_name: str) -> Dict:
|
||||
"""Merge all pagination.txt files in a group."""
|
||||
|
||||
processed_group_dir = os.path.join("projects", "data", unique_id, "processed", group_name)
|
||||
dataset_group_dir = os.path.join("projects", "data", unique_id, "dataset", group_name)
|
||||
dataset_group_dir = os.path.join("projects", "data", unique_id, "datasets", group_name)
|
||||
os.makedirs(dataset_group_dir, exist_ok=True)
|
||||
|
||||
merged_pagination_path = os.path.join(dataset_group_dir, "pagination.txt")
|
||||
@ -166,7 +166,7 @@ def merge_embeddings_by_group(unique_id: str, group_name: str) -> Dict:
|
||||
"""Merge all embedding.pkl files in a group."""
|
||||
|
||||
processed_group_dir = os.path.join("projects", "data", unique_id, "processed", group_name)
|
||||
dataset_group_dir = os.path.join("projects", "data", unique_id, "dataset", group_name)
|
||||
dataset_group_dir = os.path.join("projects", "data", unique_id, "datasets", group_name)
|
||||
os.makedirs(dataset_group_dir, exist_ok=True)
|
||||
|
||||
merged_embedding_path = os.path.join(dataset_group_dir, "embedding.pkl")
|
||||
@ -379,7 +379,7 @@ def merge_all_data_by_group(unique_id: str, group_name: str) -> Dict:
|
||||
def get_group_merge_status(unique_id: str, group_name: str) -> Dict:
|
||||
"""Get the status of merged data for a group."""
|
||||
|
||||
dataset_group_dir = os.path.join("projects", "data", unique_id, "dataset", group_name)
|
||||
dataset_group_dir = os.path.join("projects", "data", unique_id, "datasets", group_name)
|
||||
|
||||
status = {
|
||||
"group_name": group_name,
|
||||
@ -423,7 +423,7 @@ def get_group_merge_status(unique_id: str, group_name: str) -> Dict:
|
||||
def cleanup_dataset_group(unique_id: str, group_name: str) -> bool:
|
||||
"""Clean up merged dataset files for a group."""
|
||||
|
||||
dataset_group_dir = os.path.join("projects", "data", unique_id, "dataset", group_name)
|
||||
dataset_group_dir = os.path.join("projects", "data", unique_id, "datasets", group_name)
|
||||
|
||||
try:
|
||||
if os.path.exists(dataset_group_dir):
|
||||
|
||||
@ -200,7 +200,7 @@ def generate_dataset_structure(unique_id: str) -> str:
|
||||
add_directory_contents(processed_dir, "")
|
||||
|
||||
# Add dataset directory structure
|
||||
dataset_dir = os.path.join(project_dir, "dataset")
|
||||
dataset_dir = os.path.join(project_dir, "datasets")
|
||||
structure.append("\ndataset/")
|
||||
add_directory_contents(dataset_dir, "")
|
||||
|
||||
@ -224,7 +224,7 @@ def get_processing_status(unique_id: str) -> Dict:
|
||||
"directories": {
|
||||
"files": os.path.exists(os.path.join(project_dir, "files")),
|
||||
"processed": os.path.exists(os.path.join(project_dir, "processed")),
|
||||
"dataset": os.path.exists(os.path.join(project_dir, "dataset"))
|
||||
"dataset": os.path.exists(os.path.join(project_dir, "datasets"))
|
||||
},
|
||||
"groups": {},
|
||||
"processing_log_exists": os.path.exists(os.path.join(project_dir, "processing_log.json"))
|
||||
@ -245,7 +245,7 @@ def get_processing_status(unique_id: str) -> Dict:
|
||||
}
|
||||
|
||||
# Check merge status for each group
|
||||
dataset_dir = os.path.join(project_dir, "dataset")
|
||||
dataset_dir = os.path.join(project_dir, "datasets")
|
||||
if os.path.exists(dataset_dir):
|
||||
for group_name in os.listdir(dataset_dir):
|
||||
group_path = os.path.join(dataset_dir, group_name)
|
||||
@ -294,4 +294,4 @@ def remove_dataset_directory_by_key(unique_id: str, key: str):
|
||||
shutil.rmtree(processed_group_path)
|
||||
|
||||
# Remove dataset directory
|
||||
cleanup_dataset_group(unique_id, key)
|
||||
cleanup_dataset_group(unique_id, key)
|
||||
|
||||
@ -228,7 +228,7 @@ def cleanup_orphaned_files(unique_id: str, changes: Dict) -> Dict[str, List[str]
|
||||
removed_files[group_name].append("processed group directory")
|
||||
|
||||
# Remove entire dataset/group directory
|
||||
dataset_group_dir = os.path.join(project_dir, "dataset", group_name)
|
||||
dataset_group_dir = os.path.join(project_dir, "datasets", group_name)
|
||||
if os.path.exists(dataset_group_dir):
|
||||
shutil.rmtree(dataset_group_dir)
|
||||
removed_files[group_name].append("dataset group directory")
|
||||
@ -263,9 +263,9 @@ def ensure_directories(unique_id: str):
|
||||
directories = [
|
||||
"files",
|
||||
"processed",
|
||||
"dataset"
|
||||
"datasets"
|
||||
]
|
||||
|
||||
for dir_name in directories:
|
||||
dir_path = os.path.join(base_dir, dir_name)
|
||||
os.makedirs(dir_path, exist_ok=True)
|
||||
os.makedirs(dir_path, exist_ok=True)
|
||||
|
||||
@ -267,7 +267,7 @@ def get_project_statistics(unique_id: str) -> Dict:
|
||||
}
|
||||
|
||||
# Check each directory
|
||||
directories = ["files", "processed", "dataset"]
|
||||
directories = ["files", "processed", "datasets"]
|
||||
|
||||
for dir_name in directories:
|
||||
dir_path = os.path.join(project_dir, dir_name)
|
||||
@ -293,4 +293,4 @@ def get_project_statistics(unique_id: str) -> Dict:
|
||||
"files": 0
|
||||
}
|
||||
|
||||
return stats
|
||||
return stats
|
||||
|
||||
@ -75,7 +75,7 @@ def generate_robot_directory_tree(robot_dir: str, robot_id: str, max_depth: int
|
||||
return lines
|
||||
|
||||
# 从dataset目录开始构建树
|
||||
dataset_dir = os.path.join(robot_dir, "dataset")
|
||||
dataset_dir = os.path.join(robot_dir, "datasets")
|
||||
tree_lines = []
|
||||
|
||||
if not os.path.exists(dataset_dir):
|
||||
@ -165,7 +165,7 @@ def copy_dataset_folder(source_project_id: str, target_dataset_dir: Path, folder
|
||||
}
|
||||
|
||||
try:
|
||||
source_folder = project_path / "data" / source_project_id / "dataset" / folder_name
|
||||
source_folder = project_path / "data" / source_project_id / "datasets" / folder_name
|
||||
result["source_path"] = str(source_folder)
|
||||
|
||||
if not source_folder.exists():
|
||||
@ -231,7 +231,7 @@ def generate_robot_readme(robot_id: str, dataset_ids: List[str], copy_results: L
|
||||
|
||||
readme_content += "## 数据集详情\n\n"
|
||||
|
||||
dataset_dir = robot_dir / "dataset"
|
||||
dataset_dir = robot_dir / "datasets"
|
||||
if not dataset_dir.exists():
|
||||
readme_content += "No dataset files available.\n"
|
||||
else:
|
||||
@ -324,7 +324,7 @@ def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: boo
|
||||
|
||||
# 创建机器人目录结构(如果不存在)
|
||||
robot_dir = _get_robot_dir(project_path, bot_id)
|
||||
dataset_dir = robot_dir / "dataset"
|
||||
dataset_dir = robot_dir / "datasets"
|
||||
scripts_dir = robot_dir / "scripts"
|
||||
download_dir = robot_dir / "download"
|
||||
|
||||
|
||||
@ -62,7 +62,7 @@ def generate_directory_tree(project_dir: str, unique_id: str, max_depth: int = 3
|
||||
return lines
|
||||
|
||||
# Start building tree from dataset directory
|
||||
dataset_dir = os.path.join(project_dir, "dataset")
|
||||
dataset_dir = os.path.join(project_dir, "datasets")
|
||||
tree_lines = []
|
||||
|
||||
if not os.path.exists(dataset_dir):
|
||||
@ -128,7 +128,7 @@ This project contains processed documents and their associated embeddings for se
|
||||
|
||||
"""
|
||||
|
||||
dataset_dir = os.path.join(project_dir, "dataset")
|
||||
dataset_dir = os.path.join(project_dir, "datasets")
|
||||
if not os.path.exists(dataset_dir):
|
||||
readme_content += "No dataset files available.\n"
|
||||
else:
|
||||
@ -217,7 +217,7 @@ def get_project_status(unique_id: str) -> Dict:
|
||||
|
||||
# Collect document.txt files
|
||||
document_files = []
|
||||
dataset_dir = os.path.join(project_dir, "dataset")
|
||||
dataset_dir = os.path.join(project_dir, "datasets")
|
||||
if os.path.exists(dataset_dir):
|
||||
for root, dirs, files in os.walk(dataset_dir):
|
||||
for file in files:
|
||||
@ -321,7 +321,7 @@ def get_project_stats(unique_id: str) -> Dict:
|
||||
|
||||
# Check embeddings files
|
||||
embedding_files = []
|
||||
dataset_dir = os.path.join("projects", "data", unique_id, "dataset")
|
||||
dataset_dir = os.path.join("projects", "data", unique_id, "datasets")
|
||||
if os.path.exists(dataset_dir):
|
||||
for root, dirs, files in os.walk(dataset_dir):
|
||||
for file in files:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user