fix(skills): improve skill extraction and handling logic

- Refactor _extract_skills_to_robot to accept bot_id instead of robot_dir - Add multi-directory skill search with priority order - Switch from zip extraction to direct directory copying - Add rag-retrieve skill directory
2026-01-07 14:56:10 +08:00 · 2026-01-07 14:56:10 +08:00 · f74f09c191
commit f74f09c191
parent 92c82c24a4
4 changed files with 355 additions and 29 deletions
--- a/skills/rag-retrieve/SKILL.md
+++ b/skills/rag-retrieve/SKILL.md
@ -0,0 +1,147 @@
+---
+name: rag-retrieve
+description: RAG retrieval skill for querying and retrieving relevant documents from knowledge base. Use this skill when users need to search documentation, retrieve knowledge base articles, or get context from a vector database. Supports semantic search with configurable top-k results.
+---
+
+# RAG Retrieve
+
+## Skill Structure
+
+This is a **self-contained skill package** that can be distributed independently. The skill includes its own scripts and configuration:
+
+```
+rag-retrieve/
+├── SKILL.md              # Core instruction file (this file)
+├── skill.yaml            # Skill metadata
+├── scripts/              # Executable scripts
+│   └── rag_retrieve.py   # Main RAG retrieval script
+```
+
+## Overview
+
+Query and retrieve relevant documents from a RAG (Retrieval-Augmented Generation) knowledge base using vector search. This skill provides semantic search capabilities with support for multiple bot instances and configurable result limits.
+
+## Required Parameters
+
+Before executing any retrieval, you MUST confirm the following required parameters with the user if they are not explicitly provided:
+
+| Parameter | Description | Type |
+|-----------|-------------|------|
+| **query** | Search query content | string |
+
+### Optional Parameters
+
+| Parameter | Description | Type | Default |
+|-----------|-------------|------|---------|
+| **top_k** | Maximum number of results | integer | 100 |
+
+### Confirmation Template
+
+When the required parameter is missing, ask the user:
+
+```
+I need some information to perform the RAG retrieval:
+
+1. Query: What would you like to search for?
+```
+
+## Quick Start
+
+Use the `scripts/rag_retrieve.py` script to execute RAG queries:
+
+```bash
+scripts/rag_retrieve.py --query "your search query"
+```
+
+## Usage Examples
+
+### Basic Query
+
+```bash
+scripts/rag_retrieve.py --query "How to configure authentication?"
+```
+
+### Search with Specific Top-K
+
+```bash
+scripts/rag_retrieve.py --query "API error handling" --top-k 50
+```
+
+### Common Use Cases
+
+**Scenario 1: Documentation Search**
+```bash
+scripts/rag_retrieve.py --query "deployment guide"
+```
+
+**Scenario 2: Troubleshooting**
+```bash
+scripts/rag_retrieve.py --query "connection timeout error"
+```
+
+**Scenario 3: Feature Information**
+```bash
+scripts/rag_retrieve.py --query "enterprise pricing plans"
+```
+
+## Script Usage
+
+### rag_retrieve.py
+
+Main script for executing RAG retrieval queries.
+
+```bash
+scripts/rag_retrieve.py [OPTIONS]
+```
+
+**Options:**
+
+| Option | Required | Description | Default |
+|--------|----------|-------------|---------|
+| `--query`, `-q` | Yes | Search query content | - |
+| `--top-k`, `-k` | No | Maximum number of results | 100 |
+
+**Examples:**
+
+```bash
+# Basic query
+scripts/rag_retrieve.py --query "authentication setup"
+
+# Custom top-k
+scripts/rag_retrieve.py --query "API reference" --top-k 20
+```
+
+## Common Workflows
+
+### Research Mode: Comprehensive Search
+
+```bash
+scripts/rag_retrieve.py --query "machine learning algorithms" --top-k 100
+```
+
+### Quick Answer Mode: Focused Search
+
+```bash
+scripts/rag_retrieve.py --query "password reset" --top-k 10
+```
+
+### Comparison Mode: Multiple Queries
+
+```bash
+# Search for related topics
+scripts/rag_retrieve.py --query "REST API" --top-k 30
+scripts/rag_retrieve.py --query "GraphQL API" --top-k 30
+```
+
+## Resources
+
+### scripts/rag_retrieve.py
+
+Executable Python script for RAG retrieval. Handles:
+- HTTP requests to RAG API
+- Authentication token generation
+- Configuration file loading
+- Error handling and reporting
+- Markdown response parsing
+
+The script can be executed directly without loading into context.
--- a/skills/rag-retrieve/scripts/rag_retrieve.py
+++ b/skills/rag-retrieve/scripts/rag_retrieve.py
@ -0,0 +1,144 @@
+#!/usr/bin/env python3
+"""
+RAG检索脚本
+调用本地RAG API进行文档检索
+"""
+
+import argparse
+import hashlib
+import json
+import os
+import sys
+
+try:
+    import requests
+except ImportError:
+    print("Error: requests module is required. Please install it with: pip install requests")
+    sys.exit(1)
+
+
+# 默认配置
+DEFAULT_BACKEND_HOST = os.getenv("BACKEND_HOST", "https://api-dev.gptbase.ai")
+DEFAULT_MASTERKEY = os.getenv("MASTERKEY", "master")
+
+
+def load_config() -> dict:
+    """
+    从项目根目录的robot_config.json加载配置
+
+    Returns:
+        dict: 配置字典
+    """
+    print(os.path.dirname(__file__))
+    config_path = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'robot_config.json')
+
+    if os.path.exists(config_path):
+        try:
+            with open(config_path, 'r', encoding='utf-8') as f:
+                return json.load(f)
+        except (json.JSONDecodeError, IOError) as e:
+            print(f"Warning: Failed to load config file: {e}", file=sys.stderr)
+
+    return {}
+
+
+def rag_retrieve(query: str, top_k: int = 100, config: dict = None) -> str:
+    """
+    调用RAG检索API
+
+    Args:
+        bot_id: Bot标识符（如果为None则从config读取）
+        query: 检索查询内容
+        top_k: 返回结果数量
+        config: 配置字典（可选）
+
+    Returns:
+        str: markdown格式的检索结果
+    """
+    if config is None:
+        config = {}
+
+    # 从config.env读取配置，如果没有则使用默认值
+    host =DEFAULT_BACKEND_HOST
+    masterkey = DEFAULT_MASTERKEY
+
+    bot_id = config.get('bot_id')
+
+    if not bot_id:
+        return "Error: bot_id is required"
+
+    if not query:
+        return "Error: query is required"
+
+    url = f"{host}/v1/rag_retrieve/{bot_id}"
+
+    # 生成认证token
+    token_input = f"{masterkey}:{bot_id}"
+    auth_token = hashlib.md5(token_input.encode()).hexdigest()
+
+    headers = {
+        "content-type": "application/json",
+        "authorization": f"Bearer {auth_token}"
+    }
+    data = {
+        "query": query,
+        "top_k": top_k
+    }
+
+    try:
+        response = requests.post(url, json=data, headers=headers, timeout=30)
+
+        if response.status_code != 200:
+            return f"Error: RAG API returned status code {response.status_code}. Response: {response.text}"
+
+        try:
+            response_data = response.json()
+        except json.JSONDecodeError as e:
+            return f"Error: Failed to parse API response as JSON. Error: {str(e)}, Raw response: {response.text}"
+
+        # 提取markdown字段
+        if "markdown" in response_data:
+            return response_data["markdown"]
+        else:
+            return f"Error: 'markdown' field not found in API response. Response: {json.dumps(response_data, indent=2, ensure_ascii=False)}"
+
+    except requests.exceptions.RequestException as e:
+        return f"Error: Failed to connect to RAG API. {str(e)}"
+    except Exception as e:
+        return f"Error: {str(e)}"
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="RAG检索工具 - 从知识库中检索相关文档"
+    )
+    parser.add_argument(
+        "--query",
+        "-q",
+        required=True,
+        help="检索查询内容"
+    )
+    parser.add_argument(
+        "--top-k",
+        "-k",
+        type=int,
+        default=100,
+        help="返回结果数量（默认：100）"
+    )
+
+    args = parser.parse_args()
+
+    # 加载配置
+    config = load_config()
+
+    result = rag_retrieve(
+        query=args.query,
+        top_k=args.top_k,
+        config=config
+    )
+
+    print(result)
+
+
+if __name__ == "__main__":
+    main()
--- a/skills/rag-retrieve/skill.yaml
+++ b/skills/rag-retrieve/skill.yaml
@ -0,0 +1,26 @@
+name: rag-retrieve
+version: 1.0.0
+description: RAG retrieval skill for querying and retrieving relevant documents from knowledge base using vector search
+author:
+  name: sparticle
+  email: support@gbase.ai
+license: MIT
+tags:
+  - rag
+  - retrieval
+  - vector-search
+  - knowledge-base
+runtime:
+  python: ">=3.7"
+  dependencies:
+    - requests
+entry_point: scripts/rag_retrieve.py
+config:
+  query:
+    type: string
+    required: true
+    description: Search query content
+  top_k:
+    type: integer
+    default: 100
+    description: Maximum number of results
--- a/utils/multi_project_manager.py
+++ b/utils/multi_project_manager.py
@ -407,7 +407,7 @@ def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: boo
        logger.info(f"Using existing robot project: {robot_dir}")
        # 即使使用现有项目，也要处理 skills（如果提供了）
        if skills:
-            _extract_skills_to_robot(robot_dir, skills, project_path)
+            _extract_skills_to_robot(bot_id, skills, project_path)
        return str(robot_dir)

    # 创建机器人目录结构
@ -479,7 +479,7 @@ def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: boo

    # 处理 skills 解压
    if skills:
-        _extract_skills_to_robot(robot_dir, skills, project_path)
+        _extract_skills_to_robot(bot_id, skills, project_path)

    return str(robot_dir)

@ -493,52 +493,61 @@ if __name__ == "__main__":
    logger.info(f"Created robot project at: {robot_dir}")


-def _extract_skills_to_robot(robot_dir: Path, skills: List[str], project_path: Path) -> None:
+def _extract_skills_to_robot(bot_id: str, skills: List[str], project_path: Path) -> None:
    """
-    解压 skills 到 robot 项目的 skills 文件夹
+    复制 skills 到 robot 项目的 skills 文件夹
+    - 如果是完整路径（如 "projects/uploads/xxx/skills/rag-retrieve_2.zip"），直接使用该路径
+    - 如果是简单名称（如 "rag-retrieve"），从以下目录按优先级顺序查找：
+      1. projects/uploads/{bot_id}/skills/
+      2. skills/
+
+    搜索目录优先级：先搜索 projects/uploads/{bot_id}/skills/，再搜索 skills/

    Args:
-        robot_dir: 机器人项目目录
-        skills: 技能文件名列表（如 ["rag-retrieve", "device_controller.zip"]）
+        bot_id: 机器人 ID
+        skills: 技能文件名列表（如 ["rag-retrieve", "projects/uploads/{bot_id}/skills/rag-retrieve"]）
        project_path: 项目路径
    """
    import zipfile

-    # skills 源目录在 projects/skills，需要通过解析软链接获取正确路径
-    # project_path 可能是 ~/.deepagents (软链接 -> projects/robot)
-    # 所以 skills 源目录是 project_path.resolve().parent / "skills"
-    skills_source_dir = project_path / "skills"
-    skills_target_dir = robot_dir / "skills"
+    # skills 源目录（按优先级顺序）
+    skills_source_dirs = [
+        project_path / "uploads" / bot_id / "skills",
+        Path("skills"),
+    ]
+    skills_target_dir = project_path / "robot" / bot_id / "skills"

-    # 先清空 skills_target_dir，然后重新解压
+    # 先清空 skills_target_dir，然后重新复制
    if skills_target_dir.exists():
        logger.info(f"  Removing existing skills directory: {skills_target_dir}")
        shutil.rmtree(skills_target_dir)

    skills_target_dir.mkdir(parents=True, exist_ok=True)
-    logger.info(f"Extracting skills to {skills_target_dir}")
+    logger.info(f"Copying skills to {skills_target_dir}")

    for skill in skills:
-        # 规范化文件名（确保有 .zip 后缀）
-        if not skill.endswith(".zip"):
-            skill_file = skill + ".zip"
-        else:
-            skill_file = skill
+        source_dir = None

-        skill_source_path = skills_source_dir / skill_file
+        # 简单名称：按优先级顺序在多个目录中查找
+        for base_dir in skills_source_dirs:
+            candidate_dir = base_dir / skill
+            if candidate_dir.exists():
+                source_dir = candidate_dir
+                logger.info(f"  Found skill '{skill}' in {base_dir}")
+                break

-        if not skill_source_path.exists():
-            logger.warning(f"  Skill file not found: {skill_source_path}")
+        if source_dir is None:
+            logger.warning(f"  Skill directory '{skill}' not found in any source directory: {[str(d) for d in skills_source_dirs]}")
            continue

-        # 获取解压后的文件夹名称（去掉 .zip 后缀）
-        folder_name = skill_file.replace(".zip", "")
-        extract_target = skills_target_dir / folder_name
+        if not source_dir.exists():
+            logger.warning(f"  Skill directory not found: {source_dir}")
+            continue
+
+        target_dir = skills_target_dir / os.path.basename(skill)

-        # 解压文件
        try:
-            with zipfile.ZipFile(skill_source_path, 'r') as zip_ref:
-                zip_ref.extractall(extract_target)
-            logger.info(f"  Extracted: {skill_file} -> {extract_target}")
+            shutil.copytree(source_dir, target_dir)
+            logger.info(f"  Copied: {source_dir} -> {target_dir}")
        except Exception as e:
-            logger.error(f"  Failed to extract {skill_file}: {e}")
+            logger.error(f"  Failed to copy {source_dir}: {e}")