Merge branch 'developing' into dev
This commit is contained in:
commit
5bb09b22a5
@ -291,7 +291,7 @@ async def load_mcp_settings_async(config) -> List[Dict]:
|
|||||||
|
|
||||||
# 计算 dataset_dir 用于替换 MCP 配置中的占位符
|
# 计算 dataset_dir 用于替换 MCP 配置中的占位符
|
||||||
# 只有当 project_dir 不为 None 时才计算 dataset_dir
|
# 只有当 project_dir 不为 None 时才计算 dataset_dir
|
||||||
dataset_dir = os.path.join(project_dir, "dataset") if project_dir is not None else None
|
dataset_dir = os.path.join(project_dir, "datasets") if project_dir is not None else None
|
||||||
# 替换 MCP 配置中的 {dataset_dir} 占位符
|
# 替换 MCP 配置中的 {dataset_dir} 占位符
|
||||||
if dataset_dir is None:
|
if dataset_dir is None:
|
||||||
dataset_dir = ""
|
dataset_dir = ""
|
||||||
|
|||||||
@ -54,7 +54,7 @@ When executing scripts from SKILL.md files, you MUST convert relative paths to a
|
|||||||
**4. Workspace Directory Structure**
|
**4. Workspace Directory Structure**
|
||||||
|
|
||||||
- **`{agent_dir_path}/skills/`** - Skill packages with embedded scripts
|
- **`{agent_dir_path}/skills/`** - Skill packages with embedded scripts
|
||||||
- **`{agent_dir_path}/dataset/`** - Store file datasets and document data
|
- **`{agent_dir_path}/datasets/`** - Store file datasets and document data
|
||||||
- **`{agent_dir_path}/executable_code/`** - Place generated executable scripts here (not skill scripts)
|
- **`{agent_dir_path}/executable_code/`** - Place generated executable scripts here (not skill scripts)
|
||||||
- **`{agent_dir_path}/download/`** - Store downloaded files and content
|
- **`{agent_dir_path}/download/`** - Store downloaded files and content
|
||||||
|
|
||||||
@ -75,7 +75,7 @@ When creating scripts in `executable_code/`, follow these organization rules:
|
|||||||
|
|
||||||
**Path Examples:**
|
**Path Examples:**
|
||||||
- Skill script: `{agent_dir_path}/skills/rag-retrieve/scripts/rag_retrieve.py`
|
- Skill script: `{agent_dir_path}/skills/rag-retrieve/scripts/rag_retrieve.py`
|
||||||
- Dataset file: `{agent_dir_path}/dataset/document.txt`
|
- Dataset file: `{agent_dir_path}/datasets/document.txt`
|
||||||
- Task-specific script: `{agent_dir_path}/executable_code/invoice_parser/parse.py`
|
- Task-specific script: `{agent_dir_path}/executable_code/invoice_parser/parse.py`
|
||||||
- Temporary script (when needed): `{agent_dir_path}/executable_code/tmp/test.py`
|
- Temporary script (when needed): `{agent_dir_path}/executable_code/tmp/test.py`
|
||||||
- Downloaded file: `{agent_dir_path}/download/report.pdf`
|
- Downloaded file: `{agent_dir_path}/download/report.pdf`
|
||||||
|
|||||||
@ -213,7 +213,7 @@ async def reset_files_processing(dataset_id: str):
|
|||||||
elif 'filename' in file_info:
|
elif 'filename' in file_info:
|
||||||
# Fallback to old filename-based structure
|
# Fallback to old filename-based structure
|
||||||
filename_without_ext = os.path.splitext(file_info['filename'])[0]
|
filename_without_ext = os.path.splitext(file_info['filename'])[0]
|
||||||
dataset_dir = os.path.join("projects", "data", dataset_id, "dataset", filename_without_ext)
|
dataset_dir = os.path.join("projects", "data", dataset_id, "datasets", filename_without_ext)
|
||||||
if remove_file_or_directory(dataset_dir):
|
if remove_file_or_directory(dataset_dir):
|
||||||
removed_files.append(dataset_dir)
|
removed_files.append(dataset_dir)
|
||||||
|
|
||||||
@ -232,7 +232,7 @@ async def reset_files_processing(dataset_id: str):
|
|||||||
removed_files.append(files_dir)
|
removed_files.append(files_dir)
|
||||||
|
|
||||||
# Also remove the entire dataset directory (clean up any remaining files)
|
# Also remove the entire dataset directory (clean up any remaining files)
|
||||||
dataset_dir = os.path.join(project_dir, "dataset")
|
dataset_dir = os.path.join(project_dir, "datasets")
|
||||||
if remove_file_or_directory(dataset_dir):
|
if remove_file_or_directory(dataset_dir):
|
||||||
removed_files.append(dataset_dir)
|
removed_files.append(dataset_dir)
|
||||||
|
|
||||||
|
|||||||
@ -33,8 +33,8 @@ async def list_all_projects():
|
|||||||
|
|
||||||
# 统计文件数量
|
# 统计文件数量
|
||||||
file_count = 0
|
file_count = 0
|
||||||
if os.path.exists(os.path.join(item_path, "dataset")):
|
if os.path.exists(os.path.join(item_path, "datasets")):
|
||||||
for root, dirs, files in os.walk(os.path.join(item_path, "dataset")):
|
for root, dirs, files in os.walk(os.path.join(item_path, "datasets")):
|
||||||
file_count += len(files)
|
file_count += len(files)
|
||||||
|
|
||||||
robot_projects.append({
|
robot_projects.append({
|
||||||
|
|||||||
@ -181,7 +181,7 @@ def process_files_async(
|
|||||||
result_files = []
|
result_files = []
|
||||||
for key in processed_files_by_key.keys():
|
for key in processed_files_by_key.keys():
|
||||||
# 添加对应的dataset document.txt路径
|
# 添加对应的dataset document.txt路径
|
||||||
document_path = os.path.join("projects", "data", dataset_id, "dataset", key, "document.txt")
|
document_path = os.path.join("projects", "data", dataset_id, "datasets", key, "document.txt")
|
||||||
if os.path.exists(document_path):
|
if os.path.exists(document_path):
|
||||||
result_files.append(document_path)
|
result_files.append(document_path)
|
||||||
|
|
||||||
@ -382,7 +382,7 @@ def process_files_incremental_async(
|
|||||||
result_files = []
|
result_files = []
|
||||||
for key in processed_files_by_key.keys():
|
for key in processed_files_by_key.keys():
|
||||||
# 添加对应的dataset document.txt路径
|
# 添加对应的dataset document.txt路径
|
||||||
document_path = os.path.join("projects", "data", dataset_id, "dataset", key, "document.txt")
|
document_path = os.path.join("projects", "data", dataset_id, "datasets", key, "document.txt")
|
||||||
if os.path.exists(document_path):
|
if os.path.exists(document_path):
|
||||||
result_files.append(document_path)
|
result_files.append(document_path)
|
||||||
|
|
||||||
|
|||||||
@ -25,7 +25,7 @@ def merge_documents_by_group(unique_id: str, group_name: str) -> Dict:
|
|||||||
"""Merge all document.txt files in a group into a single document."""
|
"""Merge all document.txt files in a group into a single document."""
|
||||||
|
|
||||||
processed_group_dir = os.path.join("projects", "data", unique_id, "processed", group_name)
|
processed_group_dir = os.path.join("projects", "data", unique_id, "processed", group_name)
|
||||||
dataset_group_dir = os.path.join("projects", "data", unique_id, "dataset", group_name)
|
dataset_group_dir = os.path.join("projects", "data", unique_id, "datasets", group_name)
|
||||||
os.makedirs(dataset_group_dir, exist_ok=True)
|
os.makedirs(dataset_group_dir, exist_ok=True)
|
||||||
|
|
||||||
merged_document_path = os.path.join(dataset_group_dir, "document.txt")
|
merged_document_path = os.path.join(dataset_group_dir, "document.txt")
|
||||||
@ -96,7 +96,7 @@ def merge_paginations_by_group(unique_id: str, group_name: str) -> Dict:
|
|||||||
"""Merge all pagination.txt files in a group."""
|
"""Merge all pagination.txt files in a group."""
|
||||||
|
|
||||||
processed_group_dir = os.path.join("projects", "data", unique_id, "processed", group_name)
|
processed_group_dir = os.path.join("projects", "data", unique_id, "processed", group_name)
|
||||||
dataset_group_dir = os.path.join("projects", "data", unique_id, "dataset", group_name)
|
dataset_group_dir = os.path.join("projects", "data", unique_id, "datasets", group_name)
|
||||||
os.makedirs(dataset_group_dir, exist_ok=True)
|
os.makedirs(dataset_group_dir, exist_ok=True)
|
||||||
|
|
||||||
merged_pagination_path = os.path.join(dataset_group_dir, "pagination.txt")
|
merged_pagination_path = os.path.join(dataset_group_dir, "pagination.txt")
|
||||||
@ -166,7 +166,7 @@ def merge_embeddings_by_group(unique_id: str, group_name: str) -> Dict:
|
|||||||
"""Merge all embedding.pkl files in a group."""
|
"""Merge all embedding.pkl files in a group."""
|
||||||
|
|
||||||
processed_group_dir = os.path.join("projects", "data", unique_id, "processed", group_name)
|
processed_group_dir = os.path.join("projects", "data", unique_id, "processed", group_name)
|
||||||
dataset_group_dir = os.path.join("projects", "data", unique_id, "dataset", group_name)
|
dataset_group_dir = os.path.join("projects", "data", unique_id, "datasets", group_name)
|
||||||
os.makedirs(dataset_group_dir, exist_ok=True)
|
os.makedirs(dataset_group_dir, exist_ok=True)
|
||||||
|
|
||||||
merged_embedding_path = os.path.join(dataset_group_dir, "embedding.pkl")
|
merged_embedding_path = os.path.join(dataset_group_dir, "embedding.pkl")
|
||||||
@ -379,7 +379,7 @@ def merge_all_data_by_group(unique_id: str, group_name: str) -> Dict:
|
|||||||
def get_group_merge_status(unique_id: str, group_name: str) -> Dict:
|
def get_group_merge_status(unique_id: str, group_name: str) -> Dict:
|
||||||
"""Get the status of merged data for a group."""
|
"""Get the status of merged data for a group."""
|
||||||
|
|
||||||
dataset_group_dir = os.path.join("projects", "data", unique_id, "dataset", group_name)
|
dataset_group_dir = os.path.join("projects", "data", unique_id, "datasets", group_name)
|
||||||
|
|
||||||
status = {
|
status = {
|
||||||
"group_name": group_name,
|
"group_name": group_name,
|
||||||
@ -423,7 +423,7 @@ def get_group_merge_status(unique_id: str, group_name: str) -> Dict:
|
|||||||
def cleanup_dataset_group(unique_id: str, group_name: str) -> bool:
|
def cleanup_dataset_group(unique_id: str, group_name: str) -> bool:
|
||||||
"""Clean up merged dataset files for a group."""
|
"""Clean up merged dataset files for a group."""
|
||||||
|
|
||||||
dataset_group_dir = os.path.join("projects", "data", unique_id, "dataset", group_name)
|
dataset_group_dir = os.path.join("projects", "data", unique_id, "datasets", group_name)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if os.path.exists(dataset_group_dir):
|
if os.path.exists(dataset_group_dir):
|
||||||
|
|||||||
@ -200,7 +200,7 @@ def generate_dataset_structure(unique_id: str) -> str:
|
|||||||
add_directory_contents(processed_dir, "")
|
add_directory_contents(processed_dir, "")
|
||||||
|
|
||||||
# Add dataset directory structure
|
# Add dataset directory structure
|
||||||
dataset_dir = os.path.join(project_dir, "dataset")
|
dataset_dir = os.path.join(project_dir, "datasets")
|
||||||
structure.append("\ndataset/")
|
structure.append("\ndataset/")
|
||||||
add_directory_contents(dataset_dir, "")
|
add_directory_contents(dataset_dir, "")
|
||||||
|
|
||||||
@ -224,7 +224,7 @@ def get_processing_status(unique_id: str) -> Dict:
|
|||||||
"directories": {
|
"directories": {
|
||||||
"files": os.path.exists(os.path.join(project_dir, "files")),
|
"files": os.path.exists(os.path.join(project_dir, "files")),
|
||||||
"processed": os.path.exists(os.path.join(project_dir, "processed")),
|
"processed": os.path.exists(os.path.join(project_dir, "processed")),
|
||||||
"dataset": os.path.exists(os.path.join(project_dir, "dataset"))
|
"dataset": os.path.exists(os.path.join(project_dir, "datasets"))
|
||||||
},
|
},
|
||||||
"groups": {},
|
"groups": {},
|
||||||
"processing_log_exists": os.path.exists(os.path.join(project_dir, "processing_log.json"))
|
"processing_log_exists": os.path.exists(os.path.join(project_dir, "processing_log.json"))
|
||||||
@ -245,7 +245,7 @@ def get_processing_status(unique_id: str) -> Dict:
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Check merge status for each group
|
# Check merge status for each group
|
||||||
dataset_dir = os.path.join(project_dir, "dataset")
|
dataset_dir = os.path.join(project_dir, "datasets")
|
||||||
if os.path.exists(dataset_dir):
|
if os.path.exists(dataset_dir):
|
||||||
for group_name in os.listdir(dataset_dir):
|
for group_name in os.listdir(dataset_dir):
|
||||||
group_path = os.path.join(dataset_dir, group_name)
|
group_path = os.path.join(dataset_dir, group_name)
|
||||||
|
|||||||
@ -228,7 +228,7 @@ def cleanup_orphaned_files(unique_id: str, changes: Dict) -> Dict[str, List[str]
|
|||||||
removed_files[group_name].append("processed group directory")
|
removed_files[group_name].append("processed group directory")
|
||||||
|
|
||||||
# Remove entire dataset/group directory
|
# Remove entire dataset/group directory
|
||||||
dataset_group_dir = os.path.join(project_dir, "dataset", group_name)
|
dataset_group_dir = os.path.join(project_dir, "datasets", group_name)
|
||||||
if os.path.exists(dataset_group_dir):
|
if os.path.exists(dataset_group_dir):
|
||||||
shutil.rmtree(dataset_group_dir)
|
shutil.rmtree(dataset_group_dir)
|
||||||
removed_files[group_name].append("dataset group directory")
|
removed_files[group_name].append("dataset group directory")
|
||||||
@ -263,7 +263,7 @@ def ensure_directories(unique_id: str):
|
|||||||
directories = [
|
directories = [
|
||||||
"files",
|
"files",
|
||||||
"processed",
|
"processed",
|
||||||
"dataset"
|
"datasets"
|
||||||
]
|
]
|
||||||
|
|
||||||
for dir_name in directories:
|
for dir_name in directories:
|
||||||
|
|||||||
@ -267,7 +267,7 @@ def get_project_statistics(unique_id: str) -> Dict:
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Check each directory
|
# Check each directory
|
||||||
directories = ["files", "processed", "dataset"]
|
directories = ["files", "processed", "datasets"]
|
||||||
|
|
||||||
for dir_name in directories:
|
for dir_name in directories:
|
||||||
dir_path = os.path.join(project_dir, dir_name)
|
dir_path = os.path.join(project_dir, dir_name)
|
||||||
|
|||||||
@ -75,7 +75,7 @@ def generate_robot_directory_tree(robot_dir: str, robot_id: str, max_depth: int
|
|||||||
return lines
|
return lines
|
||||||
|
|
||||||
# 从dataset目录开始构建树
|
# 从dataset目录开始构建树
|
||||||
dataset_dir = os.path.join(robot_dir, "dataset")
|
dataset_dir = os.path.join(robot_dir, "datasets")
|
||||||
tree_lines = []
|
tree_lines = []
|
||||||
|
|
||||||
if not os.path.exists(dataset_dir):
|
if not os.path.exists(dataset_dir):
|
||||||
@ -165,7 +165,7 @@ def copy_dataset_folder(source_project_id: str, target_dataset_dir: Path, folder
|
|||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
source_folder = project_path / "data" / source_project_id / "dataset" / folder_name
|
source_folder = project_path / "data" / source_project_id / "datasets" / folder_name
|
||||||
result["source_path"] = str(source_folder)
|
result["source_path"] = str(source_folder)
|
||||||
|
|
||||||
if not source_folder.exists():
|
if not source_folder.exists():
|
||||||
@ -231,7 +231,7 @@ def generate_robot_readme(robot_id: str, dataset_ids: List[str], copy_results: L
|
|||||||
|
|
||||||
readme_content += "## 数据集详情\n\n"
|
readme_content += "## 数据集详情\n\n"
|
||||||
|
|
||||||
dataset_dir = robot_dir / "dataset"
|
dataset_dir = robot_dir / "datasets"
|
||||||
if not dataset_dir.exists():
|
if not dataset_dir.exists():
|
||||||
readme_content += "No dataset files available.\n"
|
readme_content += "No dataset files available.\n"
|
||||||
else:
|
else:
|
||||||
@ -324,7 +324,7 @@ def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: boo
|
|||||||
|
|
||||||
# 创建机器人目录结构(如果不存在)
|
# 创建机器人目录结构(如果不存在)
|
||||||
robot_dir = _get_robot_dir(project_path, bot_id)
|
robot_dir = _get_robot_dir(project_path, bot_id)
|
||||||
dataset_dir = robot_dir / "dataset"
|
dataset_dir = robot_dir / "datasets"
|
||||||
scripts_dir = robot_dir / "scripts"
|
scripts_dir = robot_dir / "scripts"
|
||||||
download_dir = robot_dir / "download"
|
download_dir = robot_dir / "download"
|
||||||
|
|
||||||
|
|||||||
@ -62,7 +62,7 @@ def generate_directory_tree(project_dir: str, unique_id: str, max_depth: int = 3
|
|||||||
return lines
|
return lines
|
||||||
|
|
||||||
# Start building tree from dataset directory
|
# Start building tree from dataset directory
|
||||||
dataset_dir = os.path.join(project_dir, "dataset")
|
dataset_dir = os.path.join(project_dir, "datasets")
|
||||||
tree_lines = []
|
tree_lines = []
|
||||||
|
|
||||||
if not os.path.exists(dataset_dir):
|
if not os.path.exists(dataset_dir):
|
||||||
@ -128,7 +128,7 @@ This project contains processed documents and their associated embeddings for se
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
dataset_dir = os.path.join(project_dir, "dataset")
|
dataset_dir = os.path.join(project_dir, "datasets")
|
||||||
if not os.path.exists(dataset_dir):
|
if not os.path.exists(dataset_dir):
|
||||||
readme_content += "No dataset files available.\n"
|
readme_content += "No dataset files available.\n"
|
||||||
else:
|
else:
|
||||||
@ -217,7 +217,7 @@ def get_project_status(unique_id: str) -> Dict:
|
|||||||
|
|
||||||
# Collect document.txt files
|
# Collect document.txt files
|
||||||
document_files = []
|
document_files = []
|
||||||
dataset_dir = os.path.join(project_dir, "dataset")
|
dataset_dir = os.path.join(project_dir, "datasets")
|
||||||
if os.path.exists(dataset_dir):
|
if os.path.exists(dataset_dir):
|
||||||
for root, dirs, files in os.walk(dataset_dir):
|
for root, dirs, files in os.walk(dataset_dir):
|
||||||
for file in files:
|
for file in files:
|
||||||
@ -321,7 +321,7 @@ def get_project_stats(unique_id: str) -> Dict:
|
|||||||
|
|
||||||
# Check embeddings files
|
# Check embeddings files
|
||||||
embedding_files = []
|
embedding_files = []
|
||||||
dataset_dir = os.path.join("projects", "data", unique_id, "dataset")
|
dataset_dir = os.path.join("projects", "data", unique_id, "datasets")
|
||||||
if os.path.exists(dataset_dir):
|
if os.path.exists(dataset_dir):
|
||||||
for root, dirs, files in os.walk(dataset_dir):
|
for root, dirs, files in os.walk(dataset_dir):
|
||||||
for file in files:
|
for file in files:
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user