fix(skills): improve skill extraction and handling logic

- Refactor _extract_skills_to_robot to accept bot_id instead of robot_dir
  - Add multi-directory skill search with priority order
  - Switch from zip extraction to direct directory copying
  - Add rag-retrieve skill directory
This commit is contained in:
朱潮 2026-01-07 14:56:10 +08:00
parent 92c82c24a4
commit f74f09c191
4 changed files with 355 additions and 29 deletions

View File

@ -0,0 +1,147 @@
---
name: rag-retrieve
description: RAG retrieval skill for querying and retrieving relevant documents from knowledge base. Use this skill when users need to search documentation, retrieve knowledge base articles, or get context from a vector database. Supports semantic search with configurable top-k results.
---
# RAG Retrieve
## Skill Structure
This is a **self-contained skill package** that can be distributed independently. The skill includes its own scripts and configuration:
```
rag-retrieve/
├── SKILL.md # Core instruction file (this file)
├── skill.yaml # Skill metadata
├── scripts/ # Executable scripts
│ └── rag_retrieve.py # Main RAG retrieval script
```
## Overview
Query and retrieve relevant documents from a RAG (Retrieval-Augmented Generation) knowledge base using vector search. This skill provides semantic search capabilities with support for multiple bot instances and configurable result limits.
## Required Parameters
Before executing any retrieval, you MUST confirm the following required parameters with the user if they are not explicitly provided:
| Parameter | Description | Type |
|-----------|-------------|------|
| **query** | Search query content | string |
### Optional Parameters
| Parameter | Description | Type | Default |
|-----------|-------------|------|---------|
| **top_k** | Maximum number of results | integer | 100 |
### Confirmation Template
When the required parameter is missing, ask the user:
```
I need some information to perform the RAG retrieval:
1. Query: What would you like to search for?
```
## Quick Start
Use the `scripts/rag_retrieve.py` script to execute RAG queries:
```bash
scripts/rag_retrieve.py --query "your search query"
```
## Usage Examples
### Basic Query
```bash
scripts/rag_retrieve.py --query "How to configure authentication?"
```
### Search with Specific Top-K
```bash
scripts/rag_retrieve.py --query "API error handling" --top-k 50
```
### Common Use Cases
**Scenario 1: Documentation Search**
```bash
scripts/rag_retrieve.py --query "deployment guide"
```
**Scenario 2: Troubleshooting**
```bash
scripts/rag_retrieve.py --query "connection timeout error"
```
**Scenario 3: Feature Information**
```bash
scripts/rag_retrieve.py --query "enterprise pricing plans"
```
## Script Usage
### rag_retrieve.py
Main script for executing RAG retrieval queries.
```bash
scripts/rag_retrieve.py [OPTIONS]
```
**Options:**
| Option | Required | Description | Default |
|--------|----------|-------------|---------|
| `--query`, `-q` | Yes | Search query content | - |
| `--top-k`, `-k` | No | Maximum number of results | 100 |
**Examples:**
```bash
# Basic query
scripts/rag_retrieve.py --query "authentication setup"
# Custom top-k
scripts/rag_retrieve.py --query "API reference" --top-k 20
```
## Common Workflows
### Research Mode: Comprehensive Search
```bash
scripts/rag_retrieve.py --query "machine learning algorithms" --top-k 100
```
### Quick Answer Mode: Focused Search
```bash
scripts/rag_retrieve.py --query "password reset" --top-k 10
```
### Comparison Mode: Multiple Queries
```bash
# Search for related topics
scripts/rag_retrieve.py --query "REST API" --top-k 30
scripts/rag_retrieve.py --query "GraphQL API" --top-k 30
```
## Resources
### scripts/rag_retrieve.py
Executable Python script for RAG retrieval. Handles:
- HTTP requests to RAG API
- Authentication token generation
- Configuration file loading
- Error handling and reporting
- Markdown response parsing
The script can be executed directly without loading into context.

View File

@ -0,0 +1,144 @@
#!/usr/bin/env python3
"""
RAG检索脚本
调用本地RAG API进行文档检索
"""
import argparse
import hashlib
import json
import os
import sys
try:
import requests
except ImportError:
print("Error: requests module is required. Please install it with: pip install requests")
sys.exit(1)
# 默认配置
DEFAULT_BACKEND_HOST = os.getenv("BACKEND_HOST", "https://api-dev.gptbase.ai")
DEFAULT_MASTERKEY = os.getenv("MASTERKEY", "master")
def load_config() -> dict:
"""
从项目根目录的robot_config.json加载配置
Returns:
dict: 配置字典
"""
print(os.path.dirname(__file__))
config_path = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'robot_config.json')
if os.path.exists(config_path):
try:
with open(config_path, 'r', encoding='utf-8') as f:
return json.load(f)
except (json.JSONDecodeError, IOError) as e:
print(f"Warning: Failed to load config file: {e}", file=sys.stderr)
return {}
def rag_retrieve(query: str, top_k: int = 100, config: dict = None) -> str:
"""
调用RAG检索API
Args:
bot_id: Bot标识符如果为None则从config读取
query: 检索查询内容
top_k: 返回结果数量
config: 配置字典可选
Returns:
str: markdown格式的检索结果
"""
if config is None:
config = {}
# 从config.env读取配置如果没有则使用默认值
host =DEFAULT_BACKEND_HOST
masterkey = DEFAULT_MASTERKEY
bot_id = config.get('bot_id')
if not bot_id:
return "Error: bot_id is required"
if not query:
return "Error: query is required"
url = f"{host}/v1/rag_retrieve/{bot_id}"
# 生成认证token
token_input = f"{masterkey}:{bot_id}"
auth_token = hashlib.md5(token_input.encode()).hexdigest()
headers = {
"content-type": "application/json",
"authorization": f"Bearer {auth_token}"
}
data = {
"query": query,
"top_k": top_k
}
try:
response = requests.post(url, json=data, headers=headers, timeout=30)
if response.status_code != 200:
return f"Error: RAG API returned status code {response.status_code}. Response: {response.text}"
try:
response_data = response.json()
except json.JSONDecodeError as e:
return f"Error: Failed to parse API response as JSON. Error: {str(e)}, Raw response: {response.text}"
# 提取markdown字段
if "markdown" in response_data:
return response_data["markdown"]
else:
return f"Error: 'markdown' field not found in API response. Response: {json.dumps(response_data, indent=2, ensure_ascii=False)}"
except requests.exceptions.RequestException as e:
return f"Error: Failed to connect to RAG API. {str(e)}"
except Exception as e:
return f"Error: {str(e)}"
def main():
parser = argparse.ArgumentParser(
description="RAG检索工具 - 从知识库中检索相关文档"
)
parser.add_argument(
"--query",
"-q",
required=True,
help="检索查询内容"
)
parser.add_argument(
"--top-k",
"-k",
type=int,
default=100,
help="返回结果数量默认100"
)
args = parser.parse_args()
# 加载配置
config = load_config()
result = rag_retrieve(
query=args.query,
top_k=args.top_k,
config=config
)
print(result)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,26 @@
name: rag-retrieve
version: 1.0.0
description: RAG retrieval skill for querying and retrieving relevant documents from knowledge base using vector search
author:
name: sparticle
email: support@gbase.ai
license: MIT
tags:
- rag
- retrieval
- vector-search
- knowledge-base
runtime:
python: ">=3.7"
dependencies:
- requests
entry_point: scripts/rag_retrieve.py
config:
query:
type: string
required: true
description: Search query content
top_k:
type: integer
default: 100
description: Maximum number of results

View File

@ -407,7 +407,7 @@ def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: boo
logger.info(f"Using existing robot project: {robot_dir}")
# 即使使用现有项目,也要处理 skills如果提供了
if skills:
_extract_skills_to_robot(robot_dir, skills, project_path)
_extract_skills_to_robot(bot_id, skills, project_path)
return str(robot_dir)
# 创建机器人目录结构
@ -479,7 +479,7 @@ def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: boo
# 处理 skills 解压
if skills:
_extract_skills_to_robot(robot_dir, skills, project_path)
_extract_skills_to_robot(bot_id, skills, project_path)
return str(robot_dir)
@ -493,52 +493,61 @@ if __name__ == "__main__":
logger.info(f"Created robot project at: {robot_dir}")
def _extract_skills_to_robot(robot_dir: Path, skills: List[str], project_path: Path) -> None:
def _extract_skills_to_robot(bot_id: str, skills: List[str], project_path: Path) -> None:
"""
解压 skills robot 项目的 skills 文件夹
复制 skills robot 项目的 skills 文件夹
- 如果是完整路径 "projects/uploads/xxx/skills/rag-retrieve_2.zip"直接使用该路径
- 如果是简单名称 "rag-retrieve"从以下目录按优先级顺序查找
1. projects/uploads/{bot_id}/skills/
2. skills/
搜索目录优先级先搜索 projects/uploads/{bot_id}/skills/再搜索 skills/
Args:
robot_dir: 机器人项目目录
skills: 技能文件名列表 ["rag-retrieve", "device_controller.zip"]
bot_id: 机器人 ID
skills: 技能文件名列表 ["rag-retrieve", "projects/uploads/{bot_id}/skills/rag-retrieve"]
project_path: 项目路径
"""
import zipfile
# skills 源目录在 projects/skills需要通过解析软链接获取正确路径
# project_path 可能是 ~/.deepagents (软链接 -> projects/robot)
# 所以 skills 源目录是 project_path.resolve().parent / "skills"
skills_source_dir = project_path / "skills"
skills_target_dir = robot_dir / "skills"
# skills 源目录(按优先级顺序)
skills_source_dirs = [
project_path / "uploads" / bot_id / "skills",
Path("skills"),
]
skills_target_dir = project_path / "robot" / bot_id / "skills"
# 先清空 skills_target_dir然后重新解压
# 先清空 skills_target_dir然后重新复制
if skills_target_dir.exists():
logger.info(f" Removing existing skills directory: {skills_target_dir}")
shutil.rmtree(skills_target_dir)
skills_target_dir.mkdir(parents=True, exist_ok=True)
logger.info(f"Extracting skills to {skills_target_dir}")
logger.info(f"Copying skills to {skills_target_dir}")
for skill in skills:
# 规范化文件名(确保有 .zip 后缀)
if not skill.endswith(".zip"):
skill_file = skill + ".zip"
else:
skill_file = skill
source_dir = None
skill_source_path = skills_source_dir / skill_file
# 简单名称:按优先级顺序在多个目录中查找
for base_dir in skills_source_dirs:
candidate_dir = base_dir / skill
if candidate_dir.exists():
source_dir = candidate_dir
logger.info(f" Found skill '{skill}' in {base_dir}")
break
if not skill_source_path.exists():
logger.warning(f" Skill file not found: {skill_source_path}")
if source_dir is None:
logger.warning(f" Skill directory '{skill}' not found in any source directory: {[str(d) for d in skills_source_dirs]}")
continue
# 获取解压后的文件夹名称(去掉 .zip 后缀)
folder_name = skill_file.replace(".zip", "")
extract_target = skills_target_dir / folder_name
if not source_dir.exists():
logger.warning(f" Skill directory not found: {source_dir}")
continue
target_dir = skills_target_dir / os.path.basename(skill)
# 解压文件
try:
with zipfile.ZipFile(skill_source_path, 'r') as zip_ref:
zip_ref.extractall(extract_target)
logger.info(f" Extracted: {skill_file} -> {extract_target}")
shutil.copytree(source_dir, target_dir)
logger.info(f" Copied: {source_dir} -> {target_dir}")
except Exception as e:
logger.error(f" Failed to extract {skill_file}: {e}")
logger.error(f" Failed to copy {source_dir}: {e}")