Convert all Chinese comments, docstrings, logger/print output, HTTPException detail messages, and API response messages to English across the entire codebase. Functional zh/ja localized strings (e.g. prompt templates, timezone display names, date formats) are preserved as-is. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
145 lines
3.7 KiB
Python
145 lines
3.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
RAG retrieval script
|
|
Calls the local RAG API for document retrieval
|
|
"""
|
|
|
|
import argparse
|
|
import hashlib
|
|
import json
|
|
import os
|
|
import sys
|
|
|
|
try:
|
|
import requests
|
|
except ImportError:
|
|
print("Error: requests module is required. Please install it with: pip install requests")
|
|
sys.exit(1)
|
|
|
|
|
|
# Default configuration
|
|
DEFAULT_BACKEND_HOST = os.getenv("BACKEND_HOST", "https://api-dev.gptbase.ai")
|
|
DEFAULT_MASTERKEY = os.getenv("MASTERKEY", "master")
|
|
|
|
|
|
def load_config() -> dict:
|
|
"""
|
|
Load configuration from robot_config.json in the project root directory.
|
|
|
|
Returns:
|
|
dict: Configuration dictionary
|
|
"""
|
|
print(os.path.dirname(__file__))
|
|
config_path = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'robot_config.json')
|
|
|
|
if os.path.exists(config_path):
|
|
try:
|
|
with open(config_path, 'r', encoding='utf-8') as f:
|
|
return json.load(f)
|
|
except (json.JSONDecodeError, IOError) as e:
|
|
print(f"Warning: Failed to load config file: {e}", file=sys.stderr)
|
|
|
|
return {}
|
|
|
|
|
|
def rag_retrieve(query: str, top_k: int = 100, config: dict = None) -> str:
|
|
"""
|
|
Call the RAG retrieval API.
|
|
|
|
Args:
|
|
bot_id: Bot identifier (if None, read from config)
|
|
query: Retrieval query content
|
|
top_k: Number of results to return
|
|
config: Configuration dictionary (optional)
|
|
|
|
Returns:
|
|
str: Retrieval results in markdown format
|
|
"""
|
|
if config is None:
|
|
config = {}
|
|
|
|
# Read configuration from config.env; fall back to defaults if not found
|
|
host =DEFAULT_BACKEND_HOST
|
|
masterkey = DEFAULT_MASTERKEY
|
|
|
|
bot_id = config.get('bot_id')
|
|
|
|
if not bot_id:
|
|
return "Error: bot_id is required"
|
|
|
|
if not query:
|
|
return "Error: query is required"
|
|
|
|
url = f"{host}/v1/rag_retrieve/{bot_id}"
|
|
|
|
# Generate authentication token
|
|
token_input = f"{masterkey}:{bot_id}"
|
|
auth_token = hashlib.md5(token_input.encode()).hexdigest()
|
|
|
|
headers = {
|
|
"content-type": "application/json",
|
|
"authorization": f"Bearer {auth_token}"
|
|
}
|
|
data = {
|
|
"query": query,
|
|
"top_k": top_k
|
|
}
|
|
|
|
try:
|
|
response = requests.post(url, json=data, headers=headers, timeout=30)
|
|
|
|
if response.status_code != 200:
|
|
return f"Error: RAG API returned status code {response.status_code}. Response: {response.text}"
|
|
|
|
try:
|
|
response_data = response.json()
|
|
except json.JSONDecodeError as e:
|
|
return f"Error: Failed to parse API response as JSON. Error: {str(e)}, Raw response: {response.text}"
|
|
|
|
# Extract the markdown field
|
|
if "markdown" in response_data:
|
|
return response_data["markdown"]
|
|
else:
|
|
return f"Error: 'markdown' field not found in API response. Response: {json.dumps(response_data, indent=2, ensure_ascii=False)}"
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
return f"Error: Failed to connect to RAG API. {str(e)}"
|
|
except Exception as e:
|
|
return f"Error: {str(e)}"
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="RAG retrieval tool - retrieve relevant documents from the knowledge base"
|
|
)
|
|
parser.add_argument(
|
|
"--query",
|
|
"-q",
|
|
required=True,
|
|
help="Retrieval query content"
|
|
)
|
|
parser.add_argument(
|
|
"--top-k",
|
|
"-k",
|
|
type=int,
|
|
default=100,
|
|
help="Number of results to return (default: 100)"
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Load configuration
|
|
config = load_config()
|
|
|
|
result = rag_retrieve(
|
|
query=args.query,
|
|
top_k=args.top_k,
|
|
config=config
|
|
)
|
|
|
|
print(result)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|