qwen_agent/skills/developing/rag-retrieve-cli/scripts/rag_retrieve.py
朱潮 425f3c5bb4 chore: replace Chinese comments and log messages with English
Convert all Chinese comments, docstrings, logger/print output,
HTTPException detail messages, and API response messages to English
across the entire codebase. Functional zh/ja localized strings
(e.g. prompt templates, timezone display names, date formats) are
preserved as-is.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-30 19:45:35 +08:00

145 lines
3.7 KiB
Python

#!/usr/bin/env python3
"""
RAG retrieval script
Calls the local RAG API for document retrieval
"""
import argparse
import hashlib
import json
import os
import sys
try:
import requests
except ImportError:
print("Error: requests module is required. Please install it with: pip install requests")
sys.exit(1)
# Default configuration
DEFAULT_BACKEND_HOST = os.getenv("BACKEND_HOST", "https://api-dev.gptbase.ai")
DEFAULT_MASTERKEY = os.getenv("MASTERKEY", "master")
def load_config() -> dict:
"""
Load configuration from robot_config.json in the project root directory.
Returns:
dict: Configuration dictionary
"""
print(os.path.dirname(__file__))
config_path = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'robot_config.json')
if os.path.exists(config_path):
try:
with open(config_path, 'r', encoding='utf-8') as f:
return json.load(f)
except (json.JSONDecodeError, IOError) as e:
print(f"Warning: Failed to load config file: {e}", file=sys.stderr)
return {}
def rag_retrieve(query: str, top_k: int = 100, config: dict = None) -> str:
"""
Call the RAG retrieval API.
Args:
bot_id: Bot identifier (if None, read from config)
query: Retrieval query content
top_k: Number of results to return
config: Configuration dictionary (optional)
Returns:
str: Retrieval results in markdown format
"""
if config is None:
config = {}
# Read configuration from config.env; fall back to defaults if not found
host =DEFAULT_BACKEND_HOST
masterkey = DEFAULT_MASTERKEY
bot_id = config.get('bot_id')
if not bot_id:
return "Error: bot_id is required"
if not query:
return "Error: query is required"
url = f"{host}/v1/rag_retrieve/{bot_id}"
# Generate authentication token
token_input = f"{masterkey}:{bot_id}"
auth_token = hashlib.md5(token_input.encode()).hexdigest()
headers = {
"content-type": "application/json",
"authorization": f"Bearer {auth_token}"
}
data = {
"query": query,
"top_k": top_k
}
try:
response = requests.post(url, json=data, headers=headers, timeout=30)
if response.status_code != 200:
return f"Error: RAG API returned status code {response.status_code}. Response: {response.text}"
try:
response_data = response.json()
except json.JSONDecodeError as e:
return f"Error: Failed to parse API response as JSON. Error: {str(e)}, Raw response: {response.text}"
# Extract the markdown field
if "markdown" in response_data:
return response_data["markdown"]
else:
return f"Error: 'markdown' field not found in API response. Response: {json.dumps(response_data, indent=2, ensure_ascii=False)}"
except requests.exceptions.RequestException as e:
return f"Error: Failed to connect to RAG API. {str(e)}"
except Exception as e:
return f"Error: {str(e)}"
def main():
parser = argparse.ArgumentParser(
description="RAG retrieval tool - retrieve relevant documents from the knowledge base"
)
parser.add_argument(
"--query",
"-q",
required=True,
help="Retrieval query content"
)
parser.add_argument(
"--top-k",
"-k",
type=int,
default=100,
help="Number of results to return (default: 100)"
)
args = parser.parse_args()
# Load configuration
config = load_config()
result = rag_retrieve(
query=args.query,
top_k=args.top_k,
config=config
)
print(result)
if __name__ == "__main__":
main()