add symlink

This commit is contained in:
朱潮 2025-12-23 17:36:45 +08:00
parent 61c6b69aa5
commit d3465d418e
9 changed files with 103 additions and 302 deletions

View File

@ -98,9 +98,7 @@ async def load_system_prompt_async(project_dir: str, language: str = None, syste
datetime_str = format_datetime_by_language(language) if language else format_datetime_by_language('en')
# 如果存在{language} 占位符,那么就直接使用 system_prompt
if robot_type == "deep_agent":
return None
elif robot_type == "general_agent" or robot_type == "catalog_agent":
if robot_type == "general_agent" or robot_type == "catalog_agent" or robot_type == "deep_agent":
"""
优先使用项目目录的README.md没有才使用默认的system_prompt_{robot_type}.md
"""
@ -122,7 +120,15 @@ async def load_system_prompt_async(project_dir: str, language: str = None, syste
readme_path = os.path.join(project_dir, "README.md")
readme = await config_cache.get_text_file(readme_path) or ""
prompt = system_prompt_default.format(readme=str(readme), extra_prompt=system_prompt or "",language=language_display, user_identifier=user_identifier, datetime=datetime_str)
agent_dir_path = f"~/.deepagents/{bot_id}" #agent_dir_path 其实映射的就是 project_dir目录只是给ai看的目录路径
prompt = system_prompt_default.format(
readme=str(readme),
extra_prompt=system_prompt or "",
language=language_display,
user_identifier=user_identifier,
datetime=datetime_str,
agent_dir_path=agent_dir_path
)
elif system_prompt:
prompt = system_prompt.format(language=language_display, user_identifier=user_identifier, datetime=datetime_str)
return prompt or ""

View File

@ -0,0 +1,62 @@
<env>
Working directory: {agent_dir_path}
</env>
### Current Working Directory
The filesystem backend is currently operating in: `{agent_dir_path}`
### File System and Paths
**IMPORTANT - Path Handling:**
- All file paths must be absolute paths (e.g., `{agent_dir_path}/file.txt`)
- Use the working directory from <env> to construct absolute paths
- Example: To create a file in your working directory, use `{agent_dir_path}/research_project/file.md`
- Never use relative paths - always construct full absolute paths
### Skills Directory
Your skills are stored at: `{agent_dir_path}/skills/`
Skills may contain scripts or supporting files. When executing skill scripts with bash, use the real filesystem path:
Example: `bash python {agent_dir_path}/skills/web-research/script.py`
### Human-in-the-Loop Tool Approval
Some tool calls require user approval before execution. When a tool call is rejected by the user:
1. Accept their decision immediately - do NOT retry the same command
2. Explain that you understand they rejected the action
3. Suggest an alternative approach or ask for clarification
4. Never attempt the exact same rejected command again
Respect the user's decisions and work with them collaboratively.
### Web Search Tool Usage
When you use the web_search tool:
1. The tool will return search results with titles, URLs, and content excerpts
2. You MUST read and process these results, then respond naturally to the user
3. NEVER show raw JSON or tool results directly to the user
4. Synthesize the information from multiple sources into a coherent answer
5. Cite your sources by mentioning page titles or URLs when relevant
6. If the search doesn't find what you need, explain what you found and ask clarifying questions
The user only sees your text responses - not tool results. Always provide a complete, natural language answer after using web_search.
### Todo List Management
When using the write_todos tool:
1. Keep the todo list MINIMAL - aim for 3-6 items maximum
2. Only create todos for complex, multi-step tasks that truly need tracking
3. Break down work into clear, actionable items without over-fragmenting
4. For simple tasks (1-2 steps), just do them directly without creating todos
5. When first creating a todo list for a task, ALWAYS ask the user if the plan looks good before starting work
- Create the todos, let them render, then ask: "Does this plan look good?" or similar
- Wait for the user's response before marking the first todo as in_progress
- If they want changes, adjust the plan accordingly
6. Update todo status promptly as you complete each item
The todo list is a planning tool - use it judiciously to avoid overwhelming the user with excessive task tracking.
## System Information
- **Current User**: {user_identifier}
- **Current Time**: {datetime}

View File

@ -19,7 +19,6 @@ from .config import huey
from utils.file_utils import (
extract_zip_file,
get_file_hash,
is_file_already_processed,
load_processed_files_log,
save_processed_files_log,
get_document_preview

View File

@ -9,7 +9,6 @@ from .file_utils import (
remove_file_or_directory,
extract_zip_file,
get_document_preview,
is_file_already_processed,
load_processed_files_log,
save_processed_files_log
)
@ -44,11 +43,6 @@ from .agent_pool import (
release_agent_to_pool
)
from .organize_dataset_files import (
is_file_already_processed,
organize_single_project_files,
organize_dataset_files
)
from .api_models import (
Message,
@ -77,8 +71,6 @@ from .api_models import (
from .multi_project_manager import (
create_robot_project,
get_robot_project_info,
cleanup_robot_project,
get_unique_folder_name,
copy_dataset_folder,
generate_robot_readme
@ -96,7 +88,6 @@ __all__ = [
'remove_file_or_directory',
'extract_zip_file',
'get_document_preview',
'is_file_already_processed',
'load_processed_files_log',
'save_processed_files_log',
@ -122,10 +113,6 @@ __all__ = [
'get_agent_from_pool',
'release_agent_to_pool',
# organize_dataset_files
'is_file_already_processed',
'organize_single_project_files',
'organize_dataset_files',
# api_models
'Message',
@ -152,8 +139,6 @@ __all__ = [
# multi_project_manager
'create_robot_project',
'get_robot_project_info',
'cleanup_robot_project',
'get_unique_folder_name',
'copy_dataset_folder',
'generate_robot_readme',

View File

@ -373,7 +373,8 @@ def create_project_directory(dataset_ids: Optional[List[str]], bot_id: str, robo
try:
from utils.multi_project_manager import create_robot_project
return create_robot_project(dataset_ids, bot_id)
from pathlib import Path
return create_robot_project(dataset_ids, bot_id, Path("~", ".deepagents"))
except Exception as e:
logger.error(f"Error creating project directory: {e}")
return None

View File

@ -91,18 +91,6 @@ def get_document_preview(document_path: str, max_lines: int = 10) -> str:
return f"Error reading document: {str(e)}"
def is_file_already_processed(target_file: Path, pagination_file: Path, embeddings_file: Path) -> bool:
"""Check if a file has already been processed (document.txt, pagination.txt, and embeddings exist)"""
if not target_file.exists():
return False
# Check if pagination and embeddings files exist and are not empty
if pagination_file.exists() and embeddings_file.exists():
# Check file sizes to ensure they're not empty
if pagination_file.stat().st_size > 0 and embeddings_file.stat().st_size > 0:
return True
return False
def load_processed_files_log(unique_id: str) -> Dict[str, Dict]:

View File

@ -141,7 +141,7 @@ def get_unique_folder_name(target_dir: Path, original_name: str) -> str:
counter += 1
def copy_dataset_folder(source_project_id: str, target_dataset_dir: Path, folder_name: str) -> Dict:
def copy_dataset_folder(source_project_id: str, target_dataset_dir: Path, folder_name: str, project_path: Path) -> Dict:
"""
复制单个项目的dataset文件夹到目标目录
@ -149,6 +149,7 @@ def copy_dataset_folder(source_project_id: str, target_dataset_dir: Path, folder
source_project_id: 源项目ID
target_dataset_dir: 目标dataset目录
folder_name: 要复制的文件夹名称
project_path: 项目路径
Returns:
Dict: 复制结果
@ -163,7 +164,7 @@ def copy_dataset_folder(source_project_id: str, target_dataset_dir: Path, folder
}
try:
source_folder = Path("projects") / "data" / source_project_id / "dataset" / folder_name
source_folder = project_path / "data" / source_project_id / "dataset" / folder_name
result["source_path"] = str(source_folder)
if not source_folder.exists():
@ -190,7 +191,7 @@ def copy_dataset_folder(source_project_id: str, target_dataset_dir: Path, folder
return result
def generate_robot_readme(robot_id: str, dataset_ids: List[str], copy_results: List[Dict]) -> str:
def generate_robot_readme(robot_id: str, dataset_ids: List[str], copy_results: List[Dict], project_path: Path) -> str:
"""
生成机器人项目的README.md文件
@ -202,10 +203,10 @@ def generate_robot_readme(robot_id: str, dataset_ids: List[str], copy_results: L
Returns:
str: README.md文件路径
"""
readme_path = Path("projects") / "robot" / robot_id / "README.md"
readme_path = project_path / "robot" / robot_id / "README.md"
readme_path.parent.mkdir(parents=True, exist_ok=True)
robot_dir = Path("projects") / "robot" / robot_id
robot_dir = project_path / "robot" / robot_id
# 统计信息
total_folders = len(copy_results)
@ -300,7 +301,7 @@ def generate_robot_readme(robot_id: str, dataset_ids: List[str], copy_results: L
return str(readme_path)
def should_rebuild_robot_project(dataset_ids: List[str], bot_id: str) -> bool:
def should_rebuild_robot_project(dataset_ids: List[str], bot_id: str, project_path: Path) -> bool:
"""
检查是否需要重建机器人项目
1. 检查机器人项目是否存在
@ -310,11 +311,12 @@ def should_rebuild_robot_project(dataset_ids: List[str], bot_id: str) -> bool:
Args:
dataset_ids: 源项目ID列表
bot_id: 机器人ID
project_path: 项目路径
Returns:
bool: 是否需要重建
"""
robot_dir = Path("projects") / "robot" / bot_id
robot_dir = project_path / "robot" / bot_id
# 如果机器人项目不存在,需要创建
if not robot_dir.exists():
@ -356,7 +358,7 @@ def should_rebuild_robot_project(dataset_ids: List[str], bot_id: str) -> bool:
# 检查每个源项目的processing_log.json文件
for source_project_id in dataset_ids:
log_file = Path("projects") / "data" / source_project_id / "processing_log.json"
log_file = project_path / "data" / source_project_id / "processing_log.json"
if not log_file.exists():
logger.info(f"Processing log file not found for project {source_project_id}, will rebuild")
@ -373,7 +375,7 @@ def should_rebuild_robot_project(dataset_ids: List[str], bot_id: str) -> bool:
return False
def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: bool = False) -> str:
def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: bool = False, project_path: Path = Path("projects")) -> str:
"""
创建机器人项目合并多个源项目的dataset文件夹
@ -386,15 +388,15 @@ def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: boo
str: 机器人项目目录路径
"""
logger.info(f"Creating robot project: {bot_id} from sources: {dataset_ids}")
# 检查是否需要重建
if not force_rebuild and not should_rebuild_robot_project(dataset_ids, bot_id):
robot_dir = Path("projects") / "robot" / bot_id
if not force_rebuild and not should_rebuild_robot_project(dataset_ids, bot_id, project_path):
robot_dir = project_path / "robot" / bot_id
logger.info(f"Using existing robot project: {robot_dir}")
return str(robot_dir)
# 创建机器人目录结构
robot_dir = Path("projects") / "robot" / bot_id
robot_dir = project_path / "robot" / bot_id
dataset_dir = robot_dir / "dataset"
# 清理已存在的目录(如果需要)
@ -411,7 +413,7 @@ def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: boo
for source_project_id in dataset_ids:
logger.info(f"\nProcessing source project: {source_project_id}")
source_dataset_dir = Path("projects") / "data" / source_project_id / "dataset"
source_dataset_dir = project_path / "data" / source_project_id / "dataset"
if not source_dataset_dir.exists():
logger.warning(f" Warning: Dataset directory not found for project {source_project_id}")
@ -426,7 +428,7 @@ def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: boo
# 复制每个文件夹
for folder in folders:
result = copy_dataset_folder(source_project_id, dataset_dir, folder.name)
result = copy_dataset_folder(source_project_id, dataset_dir, folder.name, project_path)
copy_results.append(result)
# 保存配置信息
@ -442,7 +444,7 @@ def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: boo
json.dump(config_data, f, ensure_ascii=False, indent=2)
# 生成README
readme_path = generate_robot_readme(bot_id, dataset_ids, copy_results)
readme_path = generate_robot_readme(bot_id, dataset_ids, copy_results, project_path)
# 统计信息
successful_copies = sum(1 for r in copy_results if r["success"])
@ -456,78 +458,6 @@ def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: boo
return str(robot_dir)
def get_robot_project_info(bot_id: str) -> Dict:
"""
获取机器人项目信息
Args:
bot_id: 机器人ID
Returns:
Dict: 机器人项目信息
"""
robot_dir = Path("projects") / "robot" / bot_id
if not robot_dir.exists():
return {
"exists": False,
"bot_id": bot_id,
"error": "Robot project does not exist"
}
dataset_dir = robot_dir / "dataset"
readme_path = robot_dir / "README.md"
# 统计文件夹数量
folder_count = 0
total_size = 0
if dataset_dir.exists():
for item in dataset_dir.iterdir():
if item.is_dir():
folder_count += 1
# 计算文件夹大小
for file_path in item.rglob('*'):
if file_path.is_file():
total_size += file_path.stat().st_size
return {
"exists": True,
"bot_id": bot_id,
"robot_dir": str(robot_dir),
"dataset_dir": str(dataset_dir),
"readme_exists": readme_path.exists(),
"folder_count": folder_count,
"total_size_bytes": total_size,
"total_size_mb": round(total_size / (1024 * 1024), 2)
}
def cleanup_robot_project(bot_id: str) -> bool:
"""
清理机器人项目
Args:
bot_id: 机器人ID
Returns:
bool: 清理是否成功
"""
try:
robot_dir = Path("projects") / "robot" / bot_id
if robot_dir.exists():
shutil.rmtree(robot_dir)
logger.info(f"Cleaned up robot project: {bot_id}")
return True
else:
logger.info(f"Robot project does not exist: {bot_id}")
return True
except Exception as e:
logger.error(f"Error cleaning up robot project {bot_id}: {str(e)}")
return False
if __name__ == "__main__":
# 测试代码
test_dataset_ids = ["test-project-1", "test-project-2"]
@ -536,5 +466,3 @@ if __name__ == "__main__":
robot_dir = create_robot_project(test_dataset_ids, test_bot_id)
logger.info(f"Created robot project at: {robot_dir}")
info = get_robot_project_info(test_bot_id)
logger.info(f"Robot project info: {json.dumps(info, indent=2, ensure_ascii=False)}")

View File

@ -1,179 +0,0 @@
#!/usr/bin/env python3
import os
import shutil
import logging
from pathlib import Path
# 配置日志
logger = logging.getLogger('app')
def is_file_already_processed(target_file: Path, pagination_file: Path, embeddings_file: Path) -> bool:
"""Check if a file has already been processed (document.txt, pagination.txt, and embeddings exist)"""
if not target_file.exists():
return False
# Check if pagination and embeddings files exist and are not empty
if pagination_file.exists() and embeddings_file.exists():
# Check file sizes to ensure they're not empty
if pagination_file.stat().st_size > 0 and embeddings_file.stat().st_size > 0:
return True
return False
def organize_single_project_files(unique_id: str, skip_processed=True):
"""Organize files for a single project from projects/data/{unique_id}/files to projects/data/{unique_id}/dataset/{file_name}/document.txt"""
project_dir = Path("projects") / "data" / unique_id
if not project_dir.exists():
logger.error(f"Project directory not found: {project_dir}")
return
logger.info(f"Organizing files for project: {unique_id} (skip_processed={skip_processed})")
files_dir = project_dir / "files"
dataset_dir = project_dir / "dataset"
# Check if files directory exists and has files
if not files_dir.exists():
logger.info(f" No files directory found, skipping...")
return
files = list(files_dir.glob("*"))
if not files:
logger.info(f" Files directory is empty, skipping...")
return
# Create dataset directory if it doesn't exist
dataset_dir.mkdir(exist_ok=True)
# Copy each file to its own directory
for file_path in files:
if file_path.is_file():
# Get filename without extension as directory name
file_name_without_ext = file_path.stem
target_dir = dataset_dir / file_name_without_ext
target_file = target_dir / "document.txt"
pagination_file = target_dir / "pagination.txt"
embeddings_file = target_dir / "embedding.pkl"
# Check if file is already processed
if skip_processed and is_file_already_processed(target_file, pagination_file, embeddings_file):
logger.info(f" Skipping already processed file: {file_path.name}")
continue
logger.info(f" Copying {file_path.name} -> {target_file.relative_to(project_dir)}")
# Create target directory
target_dir.mkdir(exist_ok=True)
# Copy and rename file
shutil.copy2(str(file_path), str(target_file))
print(f" Files remain in original location (copied to dataset structure)")
# Process each document.txt file: split pages and generate embeddings
if not skip_processed:
import sys
sys.path.append(os.path.join(os.path.dirname(__file__), 'embedding'))
from embedding import split_document_by_pages, embed_document
for file_path in files:
if file_path.is_file():
file_name_without_ext = file_path.stem
target_dir = dataset_dir / file_name_without_ext
document_file = target_dir / "document.txt"
pagination_file = target_dir / "pagination.txt"
embeddings_file = target_dir / "embedding.pkl"
# Skip if already processed
if is_file_already_processed(document_file, pagination_file, embeddings_file):
print(f" Skipping document processing for already processed file: {file_path.name}")
continue
# Split document by pages
print(f" Splitting pages for {document_file.name}")
try:
pages = split_document_by_pages(str(document_file), str(pagination_file))
print(f" Generated {len(pages)} pages")
except Exception as e:
print(f" Failed to split pages: {e}")
continue
# Generate embeddings
print(f" Generating embeddings for {document_file.name}")
try:
# Use paragraph chunking strategy with default settings
embedding_data = embed_document(
str(document_file),
str(embeddings_file),
chunking_strategy='paragraph'
)
if embedding_data:
print(f" Generated embeddings for {len(embedding_data['chunks'])} chunks")
else:
print(f" Failed to generate embeddings")
except Exception as e:
print(f" Failed to generate embeddings: {e}")
print(f" Document processing completed for project {unique_id}")
else:
print(f" Skipping document processing (skip_processed=True)")
def organize_dataset_files():
"""Move files from projects/data/{unique_id}/files to projects/data/{unique_id}/dataset/{file_name}/document.txt"""
projects_dir = Path("projects") / "data"
if not projects_dir.exists():
print("Projects directory not found")
return
# Get all project directories (exclude cache and other non-project dirs)
project_dirs = [d for d in projects_dir.iterdir()
if d.is_dir() and d.name != "_cache" and not d.name.startswith(".")]
for project_dir in project_dirs:
print(f"\nProcessing project: {project_dir.name}")
files_dir = project_dir / "files"
dataset_dir = project_dir / "dataset"
# Check if files directory exists and has files
if not files_dir.exists():
logger.info(f" No files directory found, skipping...")
continue
files = list(files_dir.glob("*"))
if not files:
logger.info(f" Files directory is empty, skipping...")
continue
# Create dataset directory if it doesn't exist
dataset_dir.mkdir(exist_ok=True)
# Move each file to its own directory
for file_path in files:
if file_path.is_file():
# Get filename without extension as directory name
file_name_without_ext = file_path.stem
target_dir = dataset_dir / file_name_without_ext
target_file = target_dir / "document.txt"
logger.info(f" Copying {file_path.name} -> {target_file.relative_to(project_dir)}")
# Create target directory
target_dir.mkdir(exist_ok=True)
# Copy and rename file
shutil.copy2(str(file_path), str(target_file))
print(f" Files remain in original location (copied to dataset structure)")
print("\nFile organization complete!")
if __name__ == "__main__":
organize_dataset_files()

View File

@ -54,6 +54,17 @@ def setup_deepagents_symlink():
logger.info(f"Removed existing symlink pointing to {target}")
# Create the symbolic link
# Check again before creating to handle race conditions
if deepagents_dir.is_symlink() or deepagents_dir.exists():
logger.warning(f"Path {deepagents_dir} exists, attempting to remove before symlink")
if deepagents_dir.is_symlink():
deepagents_dir.unlink()
elif deepagents_dir.is_dir():
import shutil
shutil.rmtree(str(deepagents_dir))
else:
deepagents_dir.unlink()
os.symlink(robot_dir, deepagents_dir, target_is_directory=True)
logger.info(f"Created symbolic link: {deepagents_dir} -> {robot_dir}")
return True