diff --git a/skills/rag-retrieve/SKILL.md b/skills/rag-retrieve/SKILL.md new file mode 100644 index 0000000..0eb0fbb --- /dev/null +++ b/skills/rag-retrieve/SKILL.md @@ -0,0 +1,147 @@ +--- +name: rag-retrieve +description: RAG retrieval skill for querying and retrieving relevant documents from knowledge base. Use this skill when users need to search documentation, retrieve knowledge base articles, or get context from a vector database. Supports semantic search with configurable top-k results. +--- + +# RAG Retrieve + +## Skill Structure + +This is a **self-contained skill package** that can be distributed independently. The skill includes its own scripts and configuration: + +``` +rag-retrieve/ +├── SKILL.md # Core instruction file (this file) +├── skill.yaml # Skill metadata +├── scripts/ # Executable scripts +│ └── rag_retrieve.py # Main RAG retrieval script +``` + +## Overview + +Query and retrieve relevant documents from a RAG (Retrieval-Augmented Generation) knowledge base using vector search. This skill provides semantic search capabilities with support for multiple bot instances and configurable result limits. + +## Required Parameters + +Before executing any retrieval, you MUST confirm the following required parameters with the user if they are not explicitly provided: + +| Parameter | Description | Type | +|-----------|-------------|------| +| **query** | Search query content | string | + +### Optional Parameters + +| Parameter | Description | Type | Default | +|-----------|-------------|------|---------| +| **top_k** | Maximum number of results | integer | 100 | + +### Confirmation Template + +When the required parameter is missing, ask the user: + +``` +I need some information to perform the RAG retrieval: + +1. Query: What would you like to search for? +``` + +## Quick Start + +Use the `scripts/rag_retrieve.py` script to execute RAG queries: + +```bash +scripts/rag_retrieve.py --query "your search query" +``` + +## Usage Examples + +### Basic Query + +```bash +scripts/rag_retrieve.py --query "How to configure authentication?" +``` + +### Search with Specific Top-K + +```bash +scripts/rag_retrieve.py --query "API error handling" --top-k 50 +``` + +### Common Use Cases + +**Scenario 1: Documentation Search** +```bash +scripts/rag_retrieve.py --query "deployment guide" +``` + +**Scenario 2: Troubleshooting** +```bash +scripts/rag_retrieve.py --query "connection timeout error" +``` + +**Scenario 3: Feature Information** +```bash +scripts/rag_retrieve.py --query "enterprise pricing plans" +``` + +## Script Usage + +### rag_retrieve.py + +Main script for executing RAG retrieval queries. + +```bash +scripts/rag_retrieve.py [OPTIONS] +``` + +**Options:** + +| Option | Required | Description | Default | +|--------|----------|-------------|---------| +| `--query`, `-q` | Yes | Search query content | - | +| `--top-k`, `-k` | No | Maximum number of results | 100 | + +**Examples:** + +```bash +# Basic query +scripts/rag_retrieve.py --query "authentication setup" + +# Custom top-k +scripts/rag_retrieve.py --query "API reference" --top-k 20 +``` + +## Common Workflows + +### Research Mode: Comprehensive Search + +```bash +scripts/rag_retrieve.py --query "machine learning algorithms" --top-k 100 +``` + +### Quick Answer Mode: Focused Search + +```bash +scripts/rag_retrieve.py --query "password reset" --top-k 10 +``` + +### Comparison Mode: Multiple Queries + +```bash +# Search for related topics +scripts/rag_retrieve.py --query "REST API" --top-k 30 +scripts/rag_retrieve.py --query "GraphQL API" --top-k 30 +``` + +## Resources + +### scripts/rag_retrieve.py + +Executable Python script for RAG retrieval. Handles: +- HTTP requests to RAG API +- Authentication token generation +- Configuration file loading +- Error handling and reporting +- Markdown response parsing + +The script can be executed directly without loading into context. diff --git a/skills/rag-retrieve/scripts/rag_retrieve.py b/skills/rag-retrieve/scripts/rag_retrieve.py new file mode 100644 index 0000000..992431a --- /dev/null +++ b/skills/rag-retrieve/scripts/rag_retrieve.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 +""" +RAG检索脚本 +调用本地RAG API进行文档检索 +""" + +import argparse +import hashlib +import json +import os +import sys + +try: + import requests +except ImportError: + print("Error: requests module is required. Please install it with: pip install requests") + sys.exit(1) + + +# 默认配置 +DEFAULT_BACKEND_HOST = os.getenv("BACKEND_HOST", "https://api-dev.gptbase.ai") +DEFAULT_MASTERKEY = os.getenv("MASTERKEY", "master") + + +def load_config() -> dict: + """ + 从项目根目录的robot_config.json加载配置 + + Returns: + dict: 配置字典 + """ + print(os.path.dirname(__file__)) + config_path = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'robot_config.json') + + if os.path.exists(config_path): + try: + with open(config_path, 'r', encoding='utf-8') as f: + return json.load(f) + except (json.JSONDecodeError, IOError) as e: + print(f"Warning: Failed to load config file: {e}", file=sys.stderr) + + return {} + + +def rag_retrieve(query: str, top_k: int = 100, config: dict = None) -> str: + """ + 调用RAG检索API + + Args: + bot_id: Bot标识符(如果为None则从config读取) + query: 检索查询内容 + top_k: 返回结果数量 + config: 配置字典(可选) + + Returns: + str: markdown格式的检索结果 + """ + if config is None: + config = {} + + # 从config.env读取配置,如果没有则使用默认值 + host =DEFAULT_BACKEND_HOST + masterkey = DEFAULT_MASTERKEY + + bot_id = config.get('bot_id') + + if not bot_id: + return "Error: bot_id is required" + + if not query: + return "Error: query is required" + + url = f"{host}/v1/rag_retrieve/{bot_id}" + + # 生成认证token + token_input = f"{masterkey}:{bot_id}" + auth_token = hashlib.md5(token_input.encode()).hexdigest() + + headers = { + "content-type": "application/json", + "authorization": f"Bearer {auth_token}" + } + data = { + "query": query, + "top_k": top_k + } + + try: + response = requests.post(url, json=data, headers=headers, timeout=30) + + if response.status_code != 200: + return f"Error: RAG API returned status code {response.status_code}. Response: {response.text}" + + try: + response_data = response.json() + except json.JSONDecodeError as e: + return f"Error: Failed to parse API response as JSON. Error: {str(e)}, Raw response: {response.text}" + + # 提取markdown字段 + if "markdown" in response_data: + return response_data["markdown"] + else: + return f"Error: 'markdown' field not found in API response. Response: {json.dumps(response_data, indent=2, ensure_ascii=False)}" + + except requests.exceptions.RequestException as e: + return f"Error: Failed to connect to RAG API. {str(e)}" + except Exception as e: + return f"Error: {str(e)}" + + +def main(): + parser = argparse.ArgumentParser( + description="RAG检索工具 - 从知识库中检索相关文档" + ) + parser.add_argument( + "--query", + "-q", + required=True, + help="检索查询内容" + ) + parser.add_argument( + "--top-k", + "-k", + type=int, + default=100, + help="返回结果数量(默认:100)" + ) + + args = parser.parse_args() + + # 加载配置 + config = load_config() + + result = rag_retrieve( + query=args.query, + top_k=args.top_k, + config=config + ) + + print(result) + + +if __name__ == "__main__": + main() diff --git a/skills/rag-retrieve/skill.yaml b/skills/rag-retrieve/skill.yaml new file mode 100644 index 0000000..86b3172 --- /dev/null +++ b/skills/rag-retrieve/skill.yaml @@ -0,0 +1,26 @@ +name: rag-retrieve +version: 1.0.0 +description: RAG retrieval skill for querying and retrieving relevant documents from knowledge base using vector search +author: + name: sparticle + email: support@gbase.ai +license: MIT +tags: + - rag + - retrieval + - vector-search + - knowledge-base +runtime: + python: ">=3.7" + dependencies: + - requests +entry_point: scripts/rag_retrieve.py +config: + query: + type: string + required: true + description: Search query content + top_k: + type: integer + default: 100 + description: Maximum number of results diff --git a/utils/multi_project_manager.py b/utils/multi_project_manager.py index b651725..c70d773 100644 --- a/utils/multi_project_manager.py +++ b/utils/multi_project_manager.py @@ -407,7 +407,7 @@ def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: boo logger.info(f"Using existing robot project: {robot_dir}") # 即使使用现有项目,也要处理 skills(如果提供了) if skills: - _extract_skills_to_robot(robot_dir, skills, project_path) + _extract_skills_to_robot(bot_id, skills, project_path) return str(robot_dir) # 创建机器人目录结构 @@ -479,7 +479,7 @@ def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: boo # 处理 skills 解压 if skills: - _extract_skills_to_robot(robot_dir, skills, project_path) + _extract_skills_to_robot(bot_id, skills, project_path) return str(robot_dir) @@ -493,52 +493,61 @@ if __name__ == "__main__": logger.info(f"Created robot project at: {robot_dir}") -def _extract_skills_to_robot(robot_dir: Path, skills: List[str], project_path: Path) -> None: +def _extract_skills_to_robot(bot_id: str, skills: List[str], project_path: Path) -> None: """ - 解压 skills 到 robot 项目的 skills 文件夹 + 复制 skills 到 robot 项目的 skills 文件夹 + - 如果是完整路径(如 "projects/uploads/xxx/skills/rag-retrieve_2.zip"),直接使用该路径 + - 如果是简单名称(如 "rag-retrieve"),从以下目录按优先级顺序查找: + 1. projects/uploads/{bot_id}/skills/ + 2. skills/ + + 搜索目录优先级:先搜索 projects/uploads/{bot_id}/skills/,再搜索 skills/ Args: - robot_dir: 机器人项目目录 - skills: 技能文件名列表(如 ["rag-retrieve", "device_controller.zip"]) + bot_id: 机器人 ID + skills: 技能文件名列表(如 ["rag-retrieve", "projects/uploads/{bot_id}/skills/rag-retrieve"]) project_path: 项目路径 """ import zipfile - # skills 源目录在 projects/skills,需要通过解析软链接获取正确路径 - # project_path 可能是 ~/.deepagents (软链接 -> projects/robot) - # 所以 skills 源目录是 project_path.resolve().parent / "skills" - skills_source_dir = project_path / "skills" - skills_target_dir = robot_dir / "skills" + # skills 源目录(按优先级顺序) + skills_source_dirs = [ + project_path / "uploads" / bot_id / "skills", + Path("skills"), + ] + skills_target_dir = project_path / "robot" / bot_id / "skills" - # 先清空 skills_target_dir,然后重新解压 + # 先清空 skills_target_dir,然后重新复制 if skills_target_dir.exists(): logger.info(f" Removing existing skills directory: {skills_target_dir}") shutil.rmtree(skills_target_dir) skills_target_dir.mkdir(parents=True, exist_ok=True) - logger.info(f"Extracting skills to {skills_target_dir}") + logger.info(f"Copying skills to {skills_target_dir}") for skill in skills: - # 规范化文件名(确保有 .zip 后缀) - if not skill.endswith(".zip"): - skill_file = skill + ".zip" - else: - skill_file = skill + source_dir = None - skill_source_path = skills_source_dir / skill_file + # 简单名称:按优先级顺序在多个目录中查找 + for base_dir in skills_source_dirs: + candidate_dir = base_dir / skill + if candidate_dir.exists(): + source_dir = candidate_dir + logger.info(f" Found skill '{skill}' in {base_dir}") + break - if not skill_source_path.exists(): - logger.warning(f" Skill file not found: {skill_source_path}") + if source_dir is None: + logger.warning(f" Skill directory '{skill}' not found in any source directory: {[str(d) for d in skills_source_dirs]}") continue - # 获取解压后的文件夹名称(去掉 .zip 后缀) - folder_name = skill_file.replace(".zip", "") - extract_target = skills_target_dir / folder_name + if not source_dir.exists(): + logger.warning(f" Skill directory not found: {source_dir}") + continue + + target_dir = skills_target_dir / os.path.basename(skill) - # 解压文件 try: - with zipfile.ZipFile(skill_source_path, 'r') as zip_ref: - zip_ref.extractall(extract_target) - logger.info(f" Extracted: {skill_file} -> {extract_target}") + shutil.copytree(source_dir, target_dir) + logger.info(f" Copied: {source_dir} -> {target_dir}") except Exception as e: - logger.error(f" Failed to extract {skill_file}: {e}") + logger.error(f" Failed to copy {source_dir}: {e}")