Merge branch 'master' into bot_manager

This commit is contained in:
朱潮 2026-02-28 23:28:56 +08:00
commit 9c258664f2

View File

@ -306,176 +306,39 @@ def _get_robot_dir(project_path: Path, bot_id: str) -> Path:
return project_path / "robot" / bot_id
def should_rebuild_robot_project(dataset_ids: List[str], bot_id: str, project_path: Path) -> bool:
"""
检查是否需要重建机器人项目
1. 检查机器人项目是否存在
2. 检查是否有新增的dataset_id
3. 检查processing_log.json文件是否更新
Args:
dataset_ids: 源项目ID列表
bot_id: 机器人ID
project_path: 项目路径
Returns:
bool: 是否需要重建
"""
robot_dir = _get_robot_dir(project_path, bot_id)
# 如果机器人项目不存在,需要创建
if not robot_dir.exists():
logger.info(f"Robot project does not exist, need to create: {bot_id}")
return True
# 检查机器人项目的配置信息
config_file = robot_dir / "robot_config.json"
if not config_file.exists():
logger.info(f"Robot config file not found, need to rebuild: {bot_id}")
return True
# 读取配置信息
try:
with open(config_file, 'r', encoding='utf-8') as f:
config = json.load(f)
cached_dataset_ids = set(config.get("dataset_ids", []))
except Exception as e:
logger.error(f"Error reading robot config: {e}, need to rebuild")
return True
# 检查dataset_ids是否有变化
current_dataset_ids = set(dataset_ids)
# 如果有新增的dataset_id
new_ids = current_dataset_ids - cached_dataset_ids
if new_ids:
logger.info(f"Found new dataset_ids: {new_ids}, need to rebuild")
return True
# 如果有删除的dataset_id
removed_ids = cached_dataset_ids - current_dataset_ids
if removed_ids:
logger.info(f"Removed dataset_ids: {removed_ids}, need to rebuild")
return True
# 获取机器人项目的最后修改时间
robot_mod_time = robot_dir.stat().st_mtime
# 检查每个源项目的processing_log.json文件
for source_project_id in dataset_ids:
log_file = project_path / "data" / source_project_id / "processing_log.json"
if not log_file.exists():
logger.info(f"Processing log file not found for project {source_project_id}, will rebuild")
return True
log_mod_time = log_file.stat().st_mtime
# 如果任何一个processing_log.json文件比机器人项目新需要重建
if log_mod_time > robot_mod_time:
logger.info(f"Processing log updated for project {source_project_id}, need to rebuild")
return True
logger.info(f"Robot project {bot_id} is up to date, no rebuild needed")
return False
def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: bool = False, project_path: Path = Path("projects"), skills: Optional[List[str]] = None) -> str:
"""
创建机器人项目合并多个源项目的dataset文件夹
确保机器人项目目录存在不进行重建
Args:
dataset_ids: 源项目ID列表
dataset_ids: 源项目ID列表已废弃保留兼容性
bot_id: 机器人ID
force_rebuild: 是否强制重建
skills: 技能文件名列表 ["rag-retrieve", "device_controller.zip"]
force_rebuild: 忽略此参数保留兼容性
skills: 技能文件名列表
Returns:
str: 机器人项目目录路径
"""
logger.info(f"Creating robot project: {bot_id} from sources: {dataset_ids}, skills: {skills}")
logger.info(f"Ensuring robot project exists: {bot_id}, skills: {skills}")
# 检查是否需要重建
if not force_rebuild and not should_rebuild_robot_project(dataset_ids, bot_id, project_path):
robot_dir = project_path / "robot" / bot_id
logger.info(f"Using existing robot project: {robot_dir}")
# 即使使用现有项目,也要处理 skills如果提供了
if skills:
_extract_skills_to_robot(bot_id, skills, project_path)
return str(robot_dir)
# 创建机器人目录结构
# 创建机器人目录结构(如果不存在)
robot_dir = _get_robot_dir(project_path, bot_id)
dataset_dir = robot_dir / "dataset"
# 清理已存在的目录(如果需要)
if robot_dir.exists():
logger.info(f"Robot directory already exists, cleaning up: {robot_dir}")
shutil.rmtree(robot_dir)
robot_dir.mkdir(parents=True, exist_ok=True)
dataset_dir.mkdir(parents=True, exist_ok=True)
# 创建 scripts 和 download 目录
scripts_dir = robot_dir / "scripts"
download_dir = robot_dir / "download"
# 创建目录(不删除已存在的内容)
robot_dir.mkdir(parents=True, exist_ok=True)
dataset_dir.mkdir(parents=True, exist_ok=True)
scripts_dir.mkdir(parents=True, exist_ok=True)
download_dir.mkdir(parents=True, exist_ok=True)
copy_results = []
# 遍历每个源项目
for source_project_id in dataset_ids:
logger.info(f"\nProcessing source project: {source_project_id}")
source_dataset_dir = project_path / "data" / source_project_id / "dataset"
if not source_dataset_dir.exists():
logger.warning(f" Warning: Dataset directory not found for project {source_project_id}")
continue
# 获取所有子文件夹
folders = [f for f in source_dataset_dir.iterdir() if f.is_dir()]
if not folders:
logger.warning(f" Warning: No folders found in dataset directory for project {source_project_id}")
continue
# 复制每个文件夹
for folder in folders:
result = copy_dataset_folder(source_project_id, dataset_dir, folder.name, project_path)
copy_results.append(result)
# 保存配置信息
config_file = robot_dir / "robot_config.json"
config_data = {
"dataset_ids": dataset_ids,
"bot_id": bot_id,
"created_at": datetime.now().isoformat(),
"total_folders": len(copy_results),
"successful_copies": sum(1 for r in copy_results if r["success"])
}
with open(config_file, 'w', encoding='utf-8') as f:
json.dump(config_data, f, ensure_ascii=False, indent=2)
# 生成README
readme_path = generate_robot_readme(bot_id, dataset_ids, copy_results, project_path)
# 统计信息
successful_copies = sum(1 for r in copy_results if r["success"])
logger.info(f"\nRobot project creation completed:")
logger.info(f" Robot directory: {robot_dir}")
logger.info(f" Total folders processed: {len(copy_results)}")
logger.info(f" Successful copies: {successful_copies}")
logger.info(f" Config saved: {config_file}")
logger.info(f" README generated: {readme_path}")
# 处理 skills 解压
# 处理 skills每次都更新
if skills:
_extract_skills_to_robot(bot_id, skills, project_path)
logger.info(f"Robot project ready: {robot_dir}")
return str(robot_dir)