diff --git a/agent/prompt_loader.py b/agent/prompt_loader.py
index ddd2bfc..bbb76f2 100644
--- a/agent/prompt_loader.py
+++ b/agent/prompt_loader.py
@@ -98,9 +98,7 @@ async def load_system_prompt_async(project_dir: str, language: str = None, syste
datetime_str = format_datetime_by_language(language) if language else format_datetime_by_language('en')
# 如果存在{language} 占位符,那么就直接使用 system_prompt
- if robot_type == "deep_agent":
- return None
- elif robot_type == "general_agent" or robot_type == "catalog_agent":
+ if robot_type == "general_agent" or robot_type == "catalog_agent" or robot_type == "deep_agent":
"""
优先使用项目目录的README.md,没有才使用默认的system_prompt_{robot_type}.md
"""
@@ -122,7 +120,15 @@ async def load_system_prompt_async(project_dir: str, language: str = None, syste
readme_path = os.path.join(project_dir, "README.md")
readme = await config_cache.get_text_file(readme_path) or ""
- prompt = system_prompt_default.format(readme=str(readme), extra_prompt=system_prompt or "",language=language_display, user_identifier=user_identifier, datetime=datetime_str)
+ agent_dir_path = f"~/.deepagents/{bot_id}" #agent_dir_path 其实映射的就是 project_dir目录,只是给ai看的目录路径
+ prompt = system_prompt_default.format(
+ readme=str(readme),
+ extra_prompt=system_prompt or "",
+ language=language_display,
+ user_identifier=user_identifier,
+ datetime=datetime_str,
+ agent_dir_path=agent_dir_path
+ )
elif system_prompt:
prompt = system_prompt.format(language=language_display, user_identifier=user_identifier, datetime=datetime_str)
return prompt or ""
diff --git a/prompt/system_prompt_deep_agent.md b/prompt/system_prompt_deep_agent.md
new file mode 100644
index 0000000..3ec5258
--- /dev/null
+++ b/prompt/system_prompt_deep_agent.md
@@ -0,0 +1,62 @@
+
+Working directory: {agent_dir_path}
+
+
+### Current Working Directory
+
+The filesystem backend is currently operating in: `{agent_dir_path}`
+
+### File System and Paths
+
+**IMPORTANT - Path Handling:**
+- All file paths must be absolute paths (e.g., `{agent_dir_path}/file.txt`)
+- Use the working directory from to construct absolute paths
+- Example: To create a file in your working directory, use `{agent_dir_path}/research_project/file.md`
+- Never use relative paths - always construct full absolute paths
+
+### Skills Directory
+
+Your skills are stored at: `{agent_dir_path}/skills/`
+Skills may contain scripts or supporting files. When executing skill scripts with bash, use the real filesystem path:
+Example: `bash python {agent_dir_path}/skills/web-research/script.py`
+
+### Human-in-the-Loop Tool Approval
+
+Some tool calls require user approval before execution. When a tool call is rejected by the user:
+1. Accept their decision immediately - do NOT retry the same command
+2. Explain that you understand they rejected the action
+3. Suggest an alternative approach or ask for clarification
+4. Never attempt the exact same rejected command again
+
+Respect the user's decisions and work with them collaboratively.
+
+### Web Search Tool Usage
+
+When you use the web_search tool:
+1. The tool will return search results with titles, URLs, and content excerpts
+2. You MUST read and process these results, then respond naturally to the user
+3. NEVER show raw JSON or tool results directly to the user
+4. Synthesize the information from multiple sources into a coherent answer
+5. Cite your sources by mentioning page titles or URLs when relevant
+6. If the search doesn't find what you need, explain what you found and ask clarifying questions
+
+The user only sees your text responses - not tool results. Always provide a complete, natural language answer after using web_search.
+
+### Todo List Management
+
+When using the write_todos tool:
+1. Keep the todo list MINIMAL - aim for 3-6 items maximum
+2. Only create todos for complex, multi-step tasks that truly need tracking
+3. Break down work into clear, actionable items without over-fragmenting
+4. For simple tasks (1-2 steps), just do them directly without creating todos
+5. When first creating a todo list for a task, ALWAYS ask the user if the plan looks good before starting work
+ - Create the todos, let them render, then ask: "Does this plan look good?" or similar
+ - Wait for the user's response before marking the first todo as in_progress
+ - If they want changes, adjust the plan accordingly
+6. Update todo status promptly as you complete each item
+
+The todo list is a planning tool - use it judiciously to avoid overwhelming the user with excessive task tracking.
+
+## System Information
+- **Current User**: {user_identifier}
+- **Current Time**: {datetime}
diff --git a/task_queue/tasks.py b/task_queue/tasks.py
index 50f04e6..7911599 100644
--- a/task_queue/tasks.py
+++ b/task_queue/tasks.py
@@ -19,7 +19,6 @@ from .config import huey
from utils.file_utils import (
extract_zip_file,
get_file_hash,
- is_file_already_processed,
load_processed_files_log,
save_processed_files_log,
get_document_preview
diff --git a/utils/__init__.py b/utils/__init__.py
index a3c12e0..0699d4b 100644
--- a/utils/__init__.py
+++ b/utils/__init__.py
@@ -9,7 +9,6 @@ from .file_utils import (
remove_file_or_directory,
extract_zip_file,
get_document_preview,
- is_file_already_processed,
load_processed_files_log,
save_processed_files_log
)
@@ -44,11 +43,6 @@ from .agent_pool import (
release_agent_to_pool
)
-from .organize_dataset_files import (
- is_file_already_processed,
- organize_single_project_files,
- organize_dataset_files
-)
from .api_models import (
Message,
@@ -77,8 +71,6 @@ from .api_models import (
from .multi_project_manager import (
create_robot_project,
- get_robot_project_info,
- cleanup_robot_project,
get_unique_folder_name,
copy_dataset_folder,
generate_robot_readme
@@ -96,7 +88,6 @@ __all__ = [
'remove_file_or_directory',
'extract_zip_file',
'get_document_preview',
- 'is_file_already_processed',
'load_processed_files_log',
'save_processed_files_log',
@@ -122,10 +113,6 @@ __all__ = [
'get_agent_from_pool',
'release_agent_to_pool',
- # organize_dataset_files
- 'is_file_already_processed',
- 'organize_single_project_files',
- 'organize_dataset_files',
# api_models
'Message',
@@ -152,8 +139,6 @@ __all__ = [
# multi_project_manager
'create_robot_project',
- 'get_robot_project_info',
- 'cleanup_robot_project',
'get_unique_folder_name',
'copy_dataset_folder',
'generate_robot_readme',
diff --git a/utils/fastapi_utils.py b/utils/fastapi_utils.py
index 3033796..e11e066 100644
--- a/utils/fastapi_utils.py
+++ b/utils/fastapi_utils.py
@@ -373,7 +373,8 @@ def create_project_directory(dataset_ids: Optional[List[str]], bot_id: str, robo
try:
from utils.multi_project_manager import create_robot_project
- return create_robot_project(dataset_ids, bot_id)
+ from pathlib import Path
+ return create_robot_project(dataset_ids, bot_id, Path("~", ".deepagents"))
except Exception as e:
logger.error(f"Error creating project directory: {e}")
return None
diff --git a/utils/file_utils.py b/utils/file_utils.py
index 8495d61..2cdb7c7 100644
--- a/utils/file_utils.py
+++ b/utils/file_utils.py
@@ -91,18 +91,6 @@ def get_document_preview(document_path: str, max_lines: int = 10) -> str:
return f"Error reading document: {str(e)}"
-def is_file_already_processed(target_file: Path, pagination_file: Path, embeddings_file: Path) -> bool:
- """Check if a file has already been processed (document.txt, pagination.txt, and embeddings exist)"""
- if not target_file.exists():
- return False
-
- # Check if pagination and embeddings files exist and are not empty
- if pagination_file.exists() and embeddings_file.exists():
- # Check file sizes to ensure they're not empty
- if pagination_file.stat().st_size > 0 and embeddings_file.stat().st_size > 0:
- return True
-
- return False
def load_processed_files_log(unique_id: str) -> Dict[str, Dict]:
diff --git a/utils/multi_project_manager.py b/utils/multi_project_manager.py
index 76cc16f..b7b167a 100644
--- a/utils/multi_project_manager.py
+++ b/utils/multi_project_manager.py
@@ -141,7 +141,7 @@ def get_unique_folder_name(target_dir: Path, original_name: str) -> str:
counter += 1
-def copy_dataset_folder(source_project_id: str, target_dataset_dir: Path, folder_name: str) -> Dict:
+def copy_dataset_folder(source_project_id: str, target_dataset_dir: Path, folder_name: str, project_path: Path) -> Dict:
"""
复制单个项目的dataset文件夹到目标目录
@@ -149,6 +149,7 @@ def copy_dataset_folder(source_project_id: str, target_dataset_dir: Path, folder
source_project_id: 源项目ID
target_dataset_dir: 目标dataset目录
folder_name: 要复制的文件夹名称
+ project_path: 项目路径
Returns:
Dict: 复制结果
@@ -163,7 +164,7 @@ def copy_dataset_folder(source_project_id: str, target_dataset_dir: Path, folder
}
try:
- source_folder = Path("projects") / "data" / source_project_id / "dataset" / folder_name
+ source_folder = project_path / "data" / source_project_id / "dataset" / folder_name
result["source_path"] = str(source_folder)
if not source_folder.exists():
@@ -190,7 +191,7 @@ def copy_dataset_folder(source_project_id: str, target_dataset_dir: Path, folder
return result
-def generate_robot_readme(robot_id: str, dataset_ids: List[str], copy_results: List[Dict]) -> str:
+def generate_robot_readme(robot_id: str, dataset_ids: List[str], copy_results: List[Dict], project_path: Path) -> str:
"""
生成机器人项目的README.md文件
@@ -202,10 +203,10 @@ def generate_robot_readme(robot_id: str, dataset_ids: List[str], copy_results: L
Returns:
str: README.md文件路径
"""
- readme_path = Path("projects") / "robot" / robot_id / "README.md"
+ readme_path = project_path / "robot" / robot_id / "README.md"
readme_path.parent.mkdir(parents=True, exist_ok=True)
- robot_dir = Path("projects") / "robot" / robot_id
+ robot_dir = project_path / "robot" / robot_id
# 统计信息
total_folders = len(copy_results)
@@ -300,7 +301,7 @@ def generate_robot_readme(robot_id: str, dataset_ids: List[str], copy_results: L
return str(readme_path)
-def should_rebuild_robot_project(dataset_ids: List[str], bot_id: str) -> bool:
+def should_rebuild_robot_project(dataset_ids: List[str], bot_id: str, project_path: Path) -> bool:
"""
检查是否需要重建机器人项目
1. 检查机器人项目是否存在
@@ -310,11 +311,12 @@ def should_rebuild_robot_project(dataset_ids: List[str], bot_id: str) -> bool:
Args:
dataset_ids: 源项目ID列表
bot_id: 机器人ID
+ project_path: 项目路径
Returns:
bool: 是否需要重建
"""
- robot_dir = Path("projects") / "robot" / bot_id
+ robot_dir = project_path / "robot" / bot_id
# 如果机器人项目不存在,需要创建
if not robot_dir.exists():
@@ -356,7 +358,7 @@ def should_rebuild_robot_project(dataset_ids: List[str], bot_id: str) -> bool:
# 检查每个源项目的processing_log.json文件
for source_project_id in dataset_ids:
- log_file = Path("projects") / "data" / source_project_id / "processing_log.json"
+ log_file = project_path / "data" / source_project_id / "processing_log.json"
if not log_file.exists():
logger.info(f"Processing log file not found for project {source_project_id}, will rebuild")
@@ -373,7 +375,7 @@ def should_rebuild_robot_project(dataset_ids: List[str], bot_id: str) -> bool:
return False
-def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: bool = False) -> str:
+def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: bool = False, project_path: Path = Path("projects")) -> str:
"""
创建机器人项目,合并多个源项目的dataset文件夹
@@ -386,15 +388,15 @@ def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: boo
str: 机器人项目目录路径
"""
logger.info(f"Creating robot project: {bot_id} from sources: {dataset_ids}")
-
+
# 检查是否需要重建
- if not force_rebuild and not should_rebuild_robot_project(dataset_ids, bot_id):
- robot_dir = Path("projects") / "robot" / bot_id
+ if not force_rebuild and not should_rebuild_robot_project(dataset_ids, bot_id, project_path):
+ robot_dir = project_path / "robot" / bot_id
logger.info(f"Using existing robot project: {robot_dir}")
return str(robot_dir)
# 创建机器人目录结构
- robot_dir = Path("projects") / "robot" / bot_id
+ robot_dir = project_path / "robot" / bot_id
dataset_dir = robot_dir / "dataset"
# 清理已存在的目录(如果需要)
@@ -411,7 +413,7 @@ def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: boo
for source_project_id in dataset_ids:
logger.info(f"\nProcessing source project: {source_project_id}")
- source_dataset_dir = Path("projects") / "data" / source_project_id / "dataset"
+ source_dataset_dir = project_path / "data" / source_project_id / "dataset"
if not source_dataset_dir.exists():
logger.warning(f" Warning: Dataset directory not found for project {source_project_id}")
@@ -426,7 +428,7 @@ def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: boo
# 复制每个文件夹
for folder in folders:
- result = copy_dataset_folder(source_project_id, dataset_dir, folder.name)
+ result = copy_dataset_folder(source_project_id, dataset_dir, folder.name, project_path)
copy_results.append(result)
# 保存配置信息
@@ -442,7 +444,7 @@ def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: boo
json.dump(config_data, f, ensure_ascii=False, indent=2)
# 生成README
- readme_path = generate_robot_readme(bot_id, dataset_ids, copy_results)
+ readme_path = generate_robot_readme(bot_id, dataset_ids, copy_results, project_path)
# 统计信息
successful_copies = sum(1 for r in copy_results if r["success"])
@@ -456,78 +458,6 @@ def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: boo
return str(robot_dir)
-def get_robot_project_info(bot_id: str) -> Dict:
- """
- 获取机器人项目信息
-
- Args:
- bot_id: 机器人ID
-
- Returns:
- Dict: 机器人项目信息
- """
- robot_dir = Path("projects") / "robot" / bot_id
-
- if not robot_dir.exists():
- return {
- "exists": False,
- "bot_id": bot_id,
- "error": "Robot project does not exist"
- }
-
- dataset_dir = robot_dir / "dataset"
- readme_path = robot_dir / "README.md"
-
- # 统计文件夹数量
- folder_count = 0
- total_size = 0
- if dataset_dir.exists():
- for item in dataset_dir.iterdir():
- if item.is_dir():
- folder_count += 1
- # 计算文件夹大小
- for file_path in item.rglob('*'):
- if file_path.is_file():
- total_size += file_path.stat().st_size
-
- return {
- "exists": True,
- "bot_id": bot_id,
- "robot_dir": str(robot_dir),
- "dataset_dir": str(dataset_dir),
- "readme_exists": readme_path.exists(),
- "folder_count": folder_count,
- "total_size_bytes": total_size,
- "total_size_mb": round(total_size / (1024 * 1024), 2)
- }
-
-
-def cleanup_robot_project(bot_id: str) -> bool:
- """
- 清理机器人项目
-
- Args:
- bot_id: 机器人ID
-
- Returns:
- bool: 清理是否成功
- """
- try:
- robot_dir = Path("projects") / "robot" / bot_id
-
- if robot_dir.exists():
- shutil.rmtree(robot_dir)
- logger.info(f"Cleaned up robot project: {bot_id}")
- return True
- else:
- logger.info(f"Robot project does not exist: {bot_id}")
- return True
-
- except Exception as e:
- logger.error(f"Error cleaning up robot project {bot_id}: {str(e)}")
- return False
-
-
if __name__ == "__main__":
# 测试代码
test_dataset_ids = ["test-project-1", "test-project-2"]
@@ -536,5 +466,3 @@ if __name__ == "__main__":
robot_dir = create_robot_project(test_dataset_ids, test_bot_id)
logger.info(f"Created robot project at: {robot_dir}")
- info = get_robot_project_info(test_bot_id)
- logger.info(f"Robot project info: {json.dumps(info, indent=2, ensure_ascii=False)}")
diff --git a/utils/organize_dataset_files.py b/utils/organize_dataset_files.py
deleted file mode 100644
index 4cff748..0000000
--- a/utils/organize_dataset_files.py
+++ /dev/null
@@ -1,179 +0,0 @@
-#!/usr/bin/env python3
-import os
-import shutil
-import logging
-from pathlib import Path
-
-# 配置日志
-logger = logging.getLogger('app')
-
-def is_file_already_processed(target_file: Path, pagination_file: Path, embeddings_file: Path) -> bool:
- """Check if a file has already been processed (document.txt, pagination.txt, and embeddings exist)"""
- if not target_file.exists():
- return False
-
- # Check if pagination and embeddings files exist and are not empty
- if pagination_file.exists() and embeddings_file.exists():
- # Check file sizes to ensure they're not empty
- if pagination_file.stat().st_size > 0 and embeddings_file.stat().st_size > 0:
- return True
-
- return False
-
-def organize_single_project_files(unique_id: str, skip_processed=True):
- """Organize files for a single project from projects/data/{unique_id}/files to projects/data/{unique_id}/dataset/{file_name}/document.txt"""
-
- project_dir = Path("projects") / "data" / unique_id
-
- if not project_dir.exists():
- logger.error(f"Project directory not found: {project_dir}")
- return
-
- logger.info(f"Organizing files for project: {unique_id} (skip_processed={skip_processed})")
-
- files_dir = project_dir / "files"
- dataset_dir = project_dir / "dataset"
-
- # Check if files directory exists and has files
- if not files_dir.exists():
- logger.info(f" No files directory found, skipping...")
- return
-
- files = list(files_dir.glob("*"))
- if not files:
- logger.info(f" Files directory is empty, skipping...")
- return
-
- # Create dataset directory if it doesn't exist
- dataset_dir.mkdir(exist_ok=True)
-
- # Copy each file to its own directory
- for file_path in files:
- if file_path.is_file():
- # Get filename without extension as directory name
- file_name_without_ext = file_path.stem
- target_dir = dataset_dir / file_name_without_ext
- target_file = target_dir / "document.txt"
- pagination_file = target_dir / "pagination.txt"
- embeddings_file = target_dir / "embedding.pkl"
-
- # Check if file is already processed
- if skip_processed and is_file_already_processed(target_file, pagination_file, embeddings_file):
- logger.info(f" Skipping already processed file: {file_path.name}")
- continue
-
- logger.info(f" Copying {file_path.name} -> {target_file.relative_to(project_dir)}")
-
- # Create target directory
- target_dir.mkdir(exist_ok=True)
-
- # Copy and rename file
- shutil.copy2(str(file_path), str(target_file))
-
- print(f" Files remain in original location (copied to dataset structure)")
-
- # Process each document.txt file: split pages and generate embeddings
- if not skip_processed:
- import sys
- sys.path.append(os.path.join(os.path.dirname(__file__), 'embedding'))
-
- from embedding import split_document_by_pages, embed_document
-
- for file_path in files:
- if file_path.is_file():
- file_name_without_ext = file_path.stem
- target_dir = dataset_dir / file_name_without_ext
- document_file = target_dir / "document.txt"
- pagination_file = target_dir / "pagination.txt"
- embeddings_file = target_dir / "embedding.pkl"
-
- # Skip if already processed
- if is_file_already_processed(document_file, pagination_file, embeddings_file):
- print(f" Skipping document processing for already processed file: {file_path.name}")
- continue
-
- # Split document by pages
- print(f" Splitting pages for {document_file.name}")
- try:
- pages = split_document_by_pages(str(document_file), str(pagination_file))
- print(f" Generated {len(pages)} pages")
- except Exception as e:
- print(f" Failed to split pages: {e}")
- continue
-
- # Generate embeddings
- print(f" Generating embeddings for {document_file.name}")
- try:
- # Use paragraph chunking strategy with default settings
- embedding_data = embed_document(
- str(document_file),
- str(embeddings_file),
- chunking_strategy='paragraph'
- )
-
- if embedding_data:
- print(f" Generated embeddings for {len(embedding_data['chunks'])} chunks")
- else:
- print(f" Failed to generate embeddings")
- except Exception as e:
- print(f" Failed to generate embeddings: {e}")
-
- print(f" Document processing completed for project {unique_id}")
- else:
- print(f" Skipping document processing (skip_processed=True)")
-
-
-def organize_dataset_files():
- """Move files from projects/data/{unique_id}/files to projects/data/{unique_id}/dataset/{file_name}/document.txt"""
-
- projects_dir = Path("projects") / "data"
-
- if not projects_dir.exists():
- print("Projects directory not found")
- return
-
- # Get all project directories (exclude cache and other non-project dirs)
- project_dirs = [d for d in projects_dir.iterdir()
- if d.is_dir() and d.name != "_cache" and not d.name.startswith(".")]
-
- for project_dir in project_dirs:
- print(f"\nProcessing project: {project_dir.name}")
-
- files_dir = project_dir / "files"
- dataset_dir = project_dir / "dataset"
-
- # Check if files directory exists and has files
- if not files_dir.exists():
- logger.info(f" No files directory found, skipping...")
- continue
-
- files = list(files_dir.glob("*"))
- if not files:
- logger.info(f" Files directory is empty, skipping...")
- continue
-
- # Create dataset directory if it doesn't exist
- dataset_dir.mkdir(exist_ok=True)
-
- # Move each file to its own directory
- for file_path in files:
- if file_path.is_file():
- # Get filename without extension as directory name
- file_name_without_ext = file_path.stem
- target_dir = dataset_dir / file_name_without_ext
- target_file = target_dir / "document.txt"
-
- logger.info(f" Copying {file_path.name} -> {target_file.relative_to(project_dir)}")
-
- # Create target directory
- target_dir.mkdir(exist_ok=True)
-
- # Copy and rename file
- shutil.copy2(str(file_path), str(target_file))
-
- print(f" Files remain in original location (copied to dataset structure)")
-
- print("\nFile organization complete!")
-
-if __name__ == "__main__":
- organize_dataset_files()
diff --git a/utils/symlink_utils.py b/utils/symlink_utils.py
index 0e83de4..bd7a3b0 100644
--- a/utils/symlink_utils.py
+++ b/utils/symlink_utils.py
@@ -54,6 +54,17 @@ def setup_deepagents_symlink():
logger.info(f"Removed existing symlink pointing to {target}")
# Create the symbolic link
+ # Check again before creating to handle race conditions
+ if deepagents_dir.is_symlink() or deepagents_dir.exists():
+ logger.warning(f"Path {deepagents_dir} exists, attempting to remove before symlink")
+ if deepagents_dir.is_symlink():
+ deepagents_dir.unlink()
+ elif deepagents_dir.is_dir():
+ import shutil
+ shutil.rmtree(str(deepagents_dir))
+ else:
+ deepagents_dir.unlink()
+
os.symlink(robot_dir, deepagents_dir, target_is_directory=True)
logger.info(f"Created symbolic link: {deepagents_dir} -> {robot_dir}")
return True