diff --git a/agent/agent_config.py b/agent/agent_config.py
index f2f7e0b..f2e894c 100644
--- a/agent/agent_config.py
+++ b/agent/agent_config.py
@@ -32,6 +32,7 @@ class AgentConfig:
session_id: Optional[str] = None
dataset_ids: Optional[List[str]] = field(default_factory=list)
trace_id: Optional[str] = None # Request trace ID, obtained from the X-Request-ID header
+ request_started_at: Optional[float] = None
# Response control parameters
stream: bool = False
diff --git a/agent/deep_assistant.py b/agent/deep_assistant.py
index 86eafb1..085beeb 100644
--- a/agent/deep_assistant.py
+++ b/agent/deep_assistant.py
@@ -24,6 +24,7 @@ from .guideline_middleware import GuidelineMiddleware
from .tool_output_length_middleware import ToolOutputLengthMiddleware
from .tool_use_cleanup_middleware import ToolUseCleanupMiddleware
from .filepath_fix_middleware import FilePathFixMiddleware
+from .mcp_trace_meta import patch_mcp_client_session_trace_meta
from utils.settings import (
SUMMARIZATION_MAX_TOKENS,
SUMMARIZATION_TOKENS_TO_KEEP,
@@ -42,6 +43,7 @@ from .mem0_middleware import create_mem0_middleware
from .mem0_config import Mem0Config
from agent.prompt_loader import load_system_prompt_async, load_mcp_settings_async
from agent.agent_memory_cache import get_memory_cache_manager
+from .subagent_loader import load_subagents
from .checkpoint_manager import get_checkpointer_manager
from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
from langgraph.checkpoint.memory import InMemorySaver
@@ -63,6 +65,8 @@ from deepagents.graph import BASE_AGENT_PROMPT
from deepagents_cli.local_context import LocalContextMiddleware
# Custom: FilesystemMiddleware with full SKILL.md reading support
from .custom_filesystem_middleware import CustomFilesystemMiddleware
+# Sub-agent support
+from deepagents.middleware.subagents import SubAgent, SubAgentMiddleware
# Global MemorySaver instance
# from langgraph.checkpoint.memory import MemorySaver
@@ -123,6 +127,7 @@ def read_system_prompt():
async def get_tools_from_mcp(mcp):
"""Extract tools from MCP configuration with caching."""
+ patch_mcp_client_session_trace_meta()
start_time = time.time()
# Defensive handling: ensure mcp is a non-empty list containing mcpServers
if not isinstance(mcp, list) or len(mcp) == 0 or "mcpServers" not in mcp[0]:
@@ -306,6 +311,15 @@ async def init_agent(config: AgentConfig):
sandbox, sandbox_type, workspace_root = await sandbox_task
logger.info(f"init_agent sandbox ready, elapsed: {time.time() - create_start:.3f}s")
+ # Load sub-agents from skill directories
+ subagents = await load_subagents(
+ bot_id=config.bot_id,
+ tools=mcp_tools,
+ model=llm_instance,
+ )
+ if subagents:
+ logger.info(f"Loaded {len(subagents)} sub-agents: {[s['name'] for s in subagents]}")
+
agent, composite_backend = create_custom_cli_agent(
model=llm_instance,
assistant_id=config.bot_id,
@@ -317,6 +331,7 @@ async def init_agent(config: AgentConfig):
checkpointer=checkpointer,
sandbox=sandbox,
sandbox_type=sandbox_type,
+ subagents=subagents if subagents else None,
shell_env={
k: v for k, v in {
"ASSISTANT_ID": str(config.bot_id),
@@ -385,6 +400,7 @@ def create_custom_cli_agent(
checkpointer: Checkpointer | None = None,
store: BaseStore | None = None,
shell_env: dict[str, str] | None = None,
+ subagents: list[SubAgent] | None = None,
) -> tuple[Pregel, CompositeBackend]:
"""Create a CLI-configured agent with custom workspace_root for shell commands.
@@ -521,9 +537,19 @@ def create_custom_cli_agent(
TodoListMiddleware(),
FilePathFixMiddleware(), # Fix extra spaces in CJK file names within tool call arguments
CustomFilesystemMiddleware(backend=composite_backend), # Use the custom FilesystemMiddleware with full SKILL.md reading support
+ ]
+ # Insert SubAgentMiddleware after FilesystemMiddleware (matches create_deep_agent ordering)
+ if subagents:
+ subagent_middleware = SubAgentMiddleware(
+ backend=composite_backend,
+ subagents=subagents,
+ )
+ deepagent_middleware.append(subagent_middleware)
+ logger.info(f"SubAgentMiddleware added with {len(subagents)} sub-agents: {[s['name'] for s in subagents]}")
+ deepagent_middleware.extend([
AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
PatchToolCallsMiddleware(),
- ]
+ ])
if agent_middleware:
deepagent_middleware.extend(agent_middleware)
if interrupt_on is not None:
diff --git a/agent/logging_handler.py b/agent/logging_handler.py
index c3e21e7..60aa886 100644
--- a/agent/logging_handler.py
+++ b/agent/logging_handler.py
@@ -1,6 +1,7 @@
"""Logging callback handler module."""
import logging
+import traceback
from typing import Any, Optional, Dict, List
from langchain_core.callbacks import BaseCallbackHandler
from langchain_core.messages import BaseMessage
@@ -80,4 +81,8 @@ class LoggingCallbackHandler(BaseCallbackHandler):
self, error: Exception, **kwargs: Any
) -> None:
"""Called when a tool invocation raises an error."""
- self.logger.error(f"❌ Tool Error: {error}")
+ self.logger.error(
+ "❌ Tool Error: %s\n%s",
+ repr(error),
+ "".join(traceback.format_exception(type(error), error, error.__traceback__)),
+ )
diff --git a/agent/mcp_trace_meta.py b/agent/mcp_trace_meta.py
new file mode 100644
index 0000000..f28c260
--- /dev/null
+++ b/agent/mcp_trace_meta.py
@@ -0,0 +1,98 @@
+import logging
+from functools import wraps
+from typing import Any
+
+try:
+ from mcp import ClientSession, types
+except ImportError:
+ from mcp.client.session import ClientSession
+ from mcp import types
+
+from utils.log_util.context import g
+
+logger = logging.getLogger("app")
+
+_PATCHED_ATTR = "_catalog_trace_meta_patched"
+_TRACE_META_TOOL_NAMES = {"rag_retrieve", "table_rag_retrieve"}
+
+
+def _get_trace_id() -> str:
+ try:
+ trace_id = getattr(g, "trace_id", "")
+ except (LookupError, KeyError):
+ return ""
+ return str(trace_id) if trace_id else ""
+
+
+def _get_tool_name(args: tuple[Any, ...], kwargs: dict[str, Any]) -> str:
+ name = args[0] if args else kwargs.get("name")
+ return str(name) if name else ""
+
+
+def patch_mcp_client_session_trace_meta() -> None:
+ """Attach catalog trace id to MCP tools/call params._meta."""
+ if getattr(ClientSession.call_tool, _PATCHED_ATTR, False):
+ return
+
+ original_call_tool = ClientSession.call_tool
+
+ @wraps(original_call_tool)
+ async def call_tool_with_trace_meta(self: ClientSession, *args: Any, **kwargs: Any) -> Any:
+ tool_name = _get_tool_name(args, kwargs)
+ trace_id = _get_trace_id() if tool_name in _TRACE_META_TOOL_NAMES else ""
+ if trace_id:
+ meta = kwargs.get("meta")
+ if isinstance(meta, dict):
+ meta = {**meta, "trace_id": meta.get("trace_id") or trace_id}
+ else:
+ meta = {"trace_id": trace_id}
+ kwargs["meta"] = meta
+
+ try:
+ return await original_call_tool(self, *args, **kwargs)
+ except TypeError as exc:
+ if trace_id and "meta" in kwargs and "unexpected keyword argument" in str(exc):
+ return await _call_tool_with_meta_compat(self, *args, **kwargs)
+ raise
+
+ setattr(call_tool_with_trace_meta, _PATCHED_ATTR, True)
+ ClientSession.call_tool = call_tool_with_trace_meta
+
+
+async def _call_tool_with_meta_compat(self: ClientSession, *args: Any, **kwargs: Any) -> Any:
+ """Call tools/call with _meta for MCP SDK versions before call_tool(meta=...)."""
+ name = _get_tool_name(args, kwargs)
+ if not name:
+ raise TypeError("call_tool() missing required argument: 'name'")
+
+ arguments = args[1] if len(args) > 1 else kwargs.get("arguments", kwargs.get("args"))
+ read_timeout_seconds = (
+ args[2] if len(args) > 2 else kwargs.get("read_timeout_seconds")
+ )
+ progress_callback = (
+ args[3] if len(args) > 3 else kwargs.get("progress_callback")
+ )
+ meta = kwargs.get("meta")
+
+ request_meta = meta if isinstance(meta, dict) else None
+ result = await self.send_request(
+ types.ClientRequest(
+ types.CallToolRequest(
+ method="tools/call",
+ params=types.CallToolRequestParams(
+ name=name,
+ arguments=arguments,
+ _meta=request_meta,
+ ),
+ )
+ ),
+ types.CallToolResult,
+ request_read_timeout_seconds=read_timeout_seconds,
+ progress_callback=progress_callback,
+ )
+
+ validate_tool_result = getattr(self, "_validate_tool_result", None)
+ if validate_tool_result and not result.isError:
+ await validate_tool_result(name, result)
+
+ return result
diff --git a/agent/subagent_loader.py b/agent/subagent_loader.py
new file mode 100644
index 0000000..ed18fb9
--- /dev/null
+++ b/agent/subagent_loader.py
@@ -0,0 +1,188 @@
+"""Sub-agent loader for discovering and parsing sub-agent definitions from skill directories.
+
+Sub-agents are defined as markdown files with YAML frontmatter in skill directories:
+ projects/robot/{bot_id}/skills/{skill_name}/agents/*.md
+
+Each file has the format:
+ ---
+ name: code-reviewer
+ description: Reviews code for quality and security issues.
+ tools: rag_retrieve, table_rag_retrieve
+ ---
+
+ System prompt for the sub-agent...
+"""
+
+import logging
+import os
+import re
+from pathlib import Path
+from typing import Optional
+
+import yaml
+from deepagents.middleware.subagents import SubAgent
+from langchain.tools import BaseTool
+from langchain_core.language_models import BaseChatModel
+
+from agent.plugin_hook_loader import _get_skill_dirs
+
+logger = logging.getLogger('app')
+
+# Regex to extract YAML frontmatter and body from markdown files
+_FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n?(.*)$", re.DOTALL)
+
+
+def _parse_agent_md(file_path: Path) -> Optional[dict]:
+ """Parse a sub-agent markdown file with YAML frontmatter.
+
+ Args:
+ file_path: Path to the .md file.
+
+ Returns:
+ Dict with keys: name, description, system_prompt, tool_names (list[str] | None).
+ None if parsing fails.
+ """
+ try:
+ content = file_path.read_text(encoding="utf-8")
+ except OSError as e:
+ logger.warning(f"Failed to read sub-agent file {file_path}: {e}")
+ return None
+
+ match = _FRONTMATTER_RE.match(content)
+ if not match:
+ logger.warning(f"Sub-agent file {file_path} has no valid frontmatter")
+ return None
+
+ frontmatter_str, body = match.group(1), match.group(2)
+
+ try:
+ frontmatter = yaml.safe_load(frontmatter_str)
+ except yaml.YAMLError as e:
+ logger.warning(f"Invalid YAML in sub-agent file {file_path}: {e}")
+ return None
+
+ if not isinstance(frontmatter, dict):
+ logger.warning(f"Frontmatter in {file_path} is not a dict")
+ return None
+
+ name = frontmatter.get("name", "").strip() if isinstance(frontmatter.get("name"), str) else ""
+ description = frontmatter.get("description", "").strip() if isinstance(frontmatter.get("description"), str) else ""
+
+ if not name:
+ logger.warning(f"Sub-agent file {file_path} missing required 'name' field")
+ return None
+ if not description:
+ logger.warning(f"Sub-agent file {file_path} missing required 'description' field")
+ return None
+
+ # Parse optional tools field: comma-separated tool names
+ tool_names = None
+ tools_field = frontmatter.get("tools")
+ if tools_field is not None:
+ if isinstance(tools_field, str):
+ tool_names = [t.strip() for t in tools_field.split(",") if t.strip()]
+ elif isinstance(tools_field, list):
+ tool_names = [str(t).strip() for t in tools_field if str(t).strip()]
+ else:
+ logger.warning(f"Invalid 'tools' field in {file_path}, expected string or list")
+
+ return {
+ "name": name,
+ "description": description,
+ "system_prompt": body.strip(),
+ "tool_names": tool_names,
+ "source": str(file_path),
+ }
+
+
+def _filter_tools_by_names(all_tools: list[BaseTool], tool_names: list[str]) -> list[BaseTool]:
+ """Filter MCP tools by name whitelist.
+
+ Args:
+ all_tools: All available MCP tools.
+ tool_names: Whitelist of tool names to include.
+
+ Returns:
+ Filtered list of tools. Logs warning for names not found.
+ """
+ tool_lookup = {tool.name: tool for tool in all_tools}
+ filtered = []
+ for name in tool_names:
+ if name in tool_lookup:
+ filtered.append(tool_lookup[name])
+ else:
+ available = list(tool_lookup.keys())
+ logger.warning(f"Sub-agent tool '{name}' not found in MCP tools. Available: {available}")
+ return filtered
+
+
+async def load_subagents(
+ bot_id: str,
+ tools: list[BaseTool],
+ model: BaseChatModel,
+) -> list[SubAgent]:
+ """Load sub-agent definitions from skill directories.
+
+ Scans all skill directories for the given bot_id, looking for agents/*.md files
+ in each skill subdirectory.
+
+ Args:
+ bot_id: Bot identifier for locating skill directories.
+ tools: All available MCP tools for filtering.
+ model: The main agent's model, used by each sub-agent.
+
+ Returns:
+ List of SubAgent dicts. Empty list if no sub-agents found.
+ """
+ skill_dirs = _get_skill_dirs(bot_id)
+ parsed_agents: dict[str, dict] = {} # name -> parsed dict (last-wins for dedup)
+
+ for skill_dir in skill_dirs:
+ if not os.path.exists(skill_dir):
+ continue
+
+ for skill_name in os.listdir(skill_dir):
+ skill_path = os.path.join(skill_dir, skill_name)
+ if not os.path.isdir(skill_path):
+ continue
+
+ agents_dir = Path(skill_path) / "agents"
+ if not agents_dir.exists():
+ continue
+
+ for md_file in agents_dir.glob("*.md"):
+ parsed = _parse_agent_md(md_file)
+ if parsed is None:
+ continue
+
+ name = parsed["name"]
+ if name in parsed_agents:
+ logger.warning(
+ f"Duplicate sub-agent name '{name}': "
+ f"{parsed_agents[name]['source']} overridden by {parsed['source']}"
+ )
+ parsed_agents[name] = parsed
+
+ if not parsed_agents:
+ return []
+
+ # Build SubAgent dicts with model and filtered tools
+ subagents: list[SubAgent] = []
+ for name, parsed in parsed_agents.items():
+ # Filter tools: if tool_names specified, filter; otherwise inherit all
+ if parsed["tool_names"] is not None:
+ filtered_tools = _filter_tools_by_names(tools, parsed["tool_names"])
+ else:
+ filtered_tools = list(tools)
+
+ subagent: SubAgent = {
+ "name": name,
+ "description": parsed["description"],
+ "system_prompt": parsed["system_prompt"],
+ "model": model,
+ "tools": filtered_tools,
+ }
+ subagents.append(subagent)
+ logger.info(f"Loaded sub-agent '{name}' with {len(filtered_tools)} tools from {parsed['source']}")
+
+ return subagents
diff --git a/routes/chat.py b/routes/chat.py
index 47f6499..c8e902b 100644
--- a/routes/chat.py
+++ b/routes/chat.py
@@ -3,6 +3,7 @@ import os
import asyncio
import shutil
import time
+import traceback
from typing import Union, Optional, Any, List, Dict
from fastapi import APIRouter, HTTPException, Header, Body
from fastapi.responses import StreamingResponse
@@ -25,6 +26,7 @@ from agent.agent_config import AgentConfig
from agent.deep_assistant import init_agent
from utils.daytona_sync import sync_sandbox_to_local
from utils.settings import DAYTONA_ENABLED
+from utils.structured_log import emit_question_metric
router = APIRouter()
@@ -43,6 +45,7 @@ async def enhanced_generate_stream_response(
# Cancellation management
cancel_event = None
+ request_started_at = config.request_started_at or time.monotonic()
try:
# Create output queue and control events
@@ -89,6 +92,8 @@ async def enhanced_generate_stream_response(
logger.info(f"Starting agent stream response")
chunk_id = 0
message_tag = ""
+ last_answer_first_char_duration_ms = None
+ waiting_for_answer_first_char = False
agent, checkpointer, sandbox = await init_agent(config)
async for msg, metadata in agent.astream({"messages": config.messages}, stream_mode="messages", config=config.invoke_config(), max_tokens=MAX_OUTPUT_TOKENS):
# Check whether a cancellation signal was received
@@ -102,6 +107,7 @@ async def enhanced_generate_stream_response(
# Handle tool calls
if msg.tool_call_chunks:
message_tag = "TOOL_CALL"
+ waiting_for_answer_first_char = False
if config.tool_response:
for tool_call_chunk in msg.tool_call_chunks:
chunk_name = tool_call_chunk.get("name") if isinstance(tool_call_chunk, dict) else getattr(tool_call_chunk, "name", None)
@@ -120,12 +126,20 @@ async def enhanced_generate_stream_response(
continue
if meta_message_tag != message_tag:
message_tag = meta_message_tag
+ waiting_for_answer_first_char = meta_message_tag == "ANSWER"
new_content = f"[{meta_message_tag}]\n"
if msg.text:
+ if meta_message_tag == "ANSWER" and waiting_for_answer_first_char and msg.text.strip():
+ last_answer_first_char_duration_ms = max(
+ int((time.monotonic() - request_started_at) * 1000),
+ 0,
+ )
+ waiting_for_answer_first_char = False
new_content += msg.text
# Handle tool responses
elif isinstance(msg, ToolMessage) and msg.content:
message_tag = "TOOL_RESPONSE"
+ waiting_for_answer_first_char = False
if config.tool_response:
new_content = f"[{message_tag}] {msg.name}\n{msg.text}\n"
@@ -142,6 +156,25 @@ async def enhanced_generate_stream_response(
# Send final chunk
finish = "cancelled" if (cancel_event and cancel_event.is_set()) else "stop"
+ if last_answer_first_char_duration_ms is not None:
+ emit_question_metric(
+ stage="catalog_agent.final_answer_first_char",
+ status="cancel" if finish == "cancelled" else "success",
+ duration_ms=last_answer_first_char_duration_ms,
+ first_response_time_ms=last_answer_first_char_duration_ms,
+ trace_id=config.trace_id,
+ ai_id=config.bot_id,
+ session_id=config.session_id,
+ robot_type="agent",
+ model=config.model_name,
+ stream=config.stream,
+ extra={
+ "bot_id": config.bot_id,
+ "tool_response": config.tool_response,
+ "enable_thinking": config.enable_thinking,
+ "response_mode": "final_answer_first_char",
+ },
+ )
final_chunk = create_stream_chunk(f"chatcmpl-{chunk_id + 1}", config.model_name, finish_reason=finish)
await output_queue.put(("agent", f"data: {json.dumps(final_chunk, ensure_ascii=False)}\n\n"))
# ============ Execute PostAgent hooks ============
@@ -153,9 +186,11 @@ async def enhanced_generate_stream_response(
await output_queue.put(("agent_done", None))
except Exception as e:
- logger.error(f"Error in agent task: {e}")
+ logger.error(f"Error in agent task: {e}\n{traceback.format_exc()}")
# Send error information to the client
- await output_queue.put(("agent", f'data: {{"error": "{str(e)}"}}\n\n'))
+ await output_queue.put(
+ ("agent", f"data: {json.dumps({'error': str(e)}, ensure_ascii=False)}\n\n")
+ )
# Send completion signal to ensure the output controller exits normally
await output_queue.put(("agent_done", None))
@@ -511,6 +546,7 @@ async def chat_completions(request: ChatRequest, authorization: Optional[str] =
{"dataset_ids": ["project-123", "project-456"], "bot_id": "my-bot-002", "messages": [{"role": "user", "content": "Hello"}]}
{"dataset_ids": ["project-123"], "bot_id": "my-catalog-bot", "messages": [{"role": "user", "content": "Hello"}]}
"""
+ request_started_at = time.monotonic()
try:
# v1 endpoint: extract the API key from the Authorization header as the model API key
api_key = extract_api_key_from_auth(authorization)
@@ -531,6 +567,7 @@ async def chat_completions(request: ChatRequest, authorization: Optional[str] =
messages = process_messages(request.messages, request.language)
# Create AgentConfig object
config = await AgentConfig.from_v1_request(request, api_key, project_dir, generate_cfg, messages)
+ config.request_started_at = request_started_at
# Call the shared agent creation and response generation logic
return await create_agent_and_generate_response(config)
@@ -753,6 +790,7 @@ async def chat_completions_v2(request: ChatRequestV2, authorization: Optional[st
- Uses MD5 hash of MASTERKEY:bot_id for backend API authentication
- Optionally uses API key from bot config for model access
"""
+ request_started_at = time.monotonic()
try:
# Get bot_id (required parameter)
bot_id = request.bot_id
@@ -799,6 +837,7 @@ async def chat_completions_v2(request: ChatRequestV2, authorization: Optional[st
api_key = req_api_key if req_api_key and req_api_key != "whatever" else None
# Create AgentConfig object
config = await AgentConfig.from_v2_request(request, bot_config, project_dir, messages, generate_cfg, model_name=model_name, model_server=model_server, api_key=api_key)
+ config.request_started_at = request_started_at
# Call the shared agent creation and response generation logic
return await create_agent_and_generate_response(config)
diff --git a/skills/autoload/onprem/rag-retrieve/hooks/retrieval-policy-forbidden-self-knowledge.md b/skills/autoload/onprem/rag-retrieve/hooks/retrieval-policy-forbidden-self-knowledge.md
index b6c1296..7ea5e4b 100644
--- a/skills/autoload/onprem/rag-retrieve/hooks/retrieval-policy-forbidden-self-knowledge.md
+++ b/skills/autoload/onprem/rag-retrieve/hooks/retrieval-policy-forbidden-self-knowledge.md
@@ -14,7 +14,7 @@ For knowledge retrieval tasks, **this policy overrides generic codebase explorat
- **Prohibited answer source**: the model's own parametric knowledge, memory, prior world knowledge, intuition, common sense completion, or unsupported inference.
- **Prohibited tools**: `Glob`, `Read`, `LS`, Bash (`ls`, `find`, `cat`, `head`, `tail`, `grep`, etc.) — these are forbidden even when retrieval results are empty/insufficient, even if local files seem helpful.
-- **Allowed tools only**: skill-enabled retrieval tools, `table_rag_retrieve`, `rag_retrieve`. No other source for factual answering.
+- **Allowed tools only**: skill-enabled retrieval tools, `rag_retrieve`. No other source for factual answering.
- Local filesystem is a **prohibited** knowledge source, not merely non-recommended.
- Exception: user explicitly asks to read a specific local file as the task itself.
- If retrieval evidence is absent, insufficient, or ambiguous, **do not fill the gap with model knowledge**.
@@ -35,13 +35,18 @@ For any knowledge retrieval task:
Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe filesystem first.
1. **Skill-enabled retrieval tools** (use first when available)
-2. **`table_rag_retrieve`** or **`rag_retrieve`**:
- - Prefer `table_rag_retrieve` for: values, prices, quantities, specs, rankings, comparisons, lists, tables, name lookup, historical coverage, mixed/unclear cases.
- - Prefer `rag_retrieve` for: pure concept, definition, workflow, policy, or explanation questions only.
+2. **`rag_retrieve`**
- After each step, evaluate sufficiency before proceeding.
- Retrieval must happen **before** any factual answer generation.
+### First-Call Success Principle
+
+- The first retrieval call is expected to return sufficient results for most questions.
+- Your default assumption should be: **one call is enough**.
+- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic).
+- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient.
+
## 4. Query Preparation
- Do NOT pass raw user question unless it already works well for retrieval.
@@ -50,27 +55,51 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi
## 5. Retrieval Breadth (`top_k`)
-- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient.
-- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
-- Expansion order: `30 → 50 → 100`. If unsure, use `100`.
+- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success.
+- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation.
+- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
+- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient.
## 6. Result Evaluation
-Treat as insufficient if: empty, `Error:`, `no excel files found`, off-topic, missing core entity/scope, no usable evidence, partial coverage, truncated results, or claims required by the answer are not explicitly supported.
+**Maximum 3 retrieval calls per question.** After each call, evaluate immediately:
+
+### Sufficient — answer immediately, no more calls
+
+ANY of the following means results are sufficient — STOP and answer now:
+- The core entity/topic in the user's question appears in the results.
+- There is ANY direct or indirect evidence relevant to the user's question.
+- Results are partially relevant, even if not perfectly comprehensive.
+- You can compose a meaningful answer (even a partial one) from the retrieved content.
+
+**Anti-patterns — do NOT do these:**
+- ❌ "The results are good, but maybe different keywords could find something better."
+- ❌ "I have enough to answer, but let me try one more query to be thorough."
+- ❌ "The answer is here, but I want to double-check with a different query."
+- ❌ Calling retrieval again after you have already identified the answer in previous results.
+
+**If you can answer the question with current results, you MUST answer immediately. Period.**
+
+### Insufficient — the ONLY valid reasons to retry
+
+- Results are completely empty or contain only `Error:` messages.
+- ALL results are entirely off-topic with zero relevance to the user's question.
+- No usable evidence exists at all — you cannot form even a partial answer.
+
+**"Results are not detailed enough" is NOT a valid reason to retry.**
+**"Results might be incomplete" is NOT a valid reason to retry.**
## 7. Fallback and Sequential Retry
-On insufficient results, follow this sequence:
+On insufficient results, you may retry **up to 2 more times** (3 calls total):
-1. Rewrite query, retry same tool (once)
-2. Switch to next retrieval source in default order
-3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100`
-4. `table_rag_retrieve` insufficient → try `rag_retrieve`; `rag_retrieve` insufficient → try `table_rag_retrieve`
+1. Rewrite query, retry same tool.
+2. For `rag_retrieve`, escalate `top_k` to `100` on retry.
-- `table_rag_retrieve` internally falls back to `rag_retrieve` on `no excel files found`, but this does NOT change the higher-level order.
-- Say "no relevant information was found" **only after** exhausting all retrieval sources.
+- Say "no relevant information was found" **only after** exhausting all retries.
- Do NOT switch to local filesystem inspection at any point.
- Do NOT switch to model self-knowledge at any point.
+- Do NOT call any retrieval tool more than 3 times in total.
## 8. Handling Missing or Partial Evidence
@@ -79,13 +108,7 @@ On insufficient results, follow this sequence:
- Prefer "the retrieved materials do not provide this information" over speculative completion.
- When user asks for a definitive answer but evidence is incomplete, state the limitation directly.
-## 9. Table RAG Result Handling
-
-- Follow all `[INSTRUCTION]` and `[EXTRA_INSTRUCTION]` in results.
-- If truncated: tell user total (`N+M`), displayed (`N`), omitted (`M`).
-- Cite sources using filenames from `file_ref_table`.
-
-## 10. Image Handling
+## 9. Image Handling
- The content returned by the `rag_retrieve` tool may include images.
- Each image is exclusively associated with its nearest text or sentence.
@@ -94,14 +117,7 @@ On insufficient results, follow this sequence:
- Each sentence or key point in the response should be accompanied by relevant images when they meet the established association criteria.
- Avoid placing all images at the end of the response.
-## 11. Citation Requirements
-
-- MUST generate `` tags when using retrieval results.
-- Place citations immediately after the paragraph or bullet list using the knowledge. Do NOT collect at end.
-- 1-2 citations per paragraph/bullet. At least 1 citation when using retrieved knowledge.
-- Do NOT cite claims that were not supported by retrieval.
-
-## 12. Self-Knowledge Prohibition
+## 10. Self-Knowledge Prohibition
This section applies whenever self-knowledge is disabled or forbidden for the current task.
@@ -111,19 +127,19 @@ This section applies whenever self-knowledge is disabled or forbidden for the cu
- The model must not supplement missing parts with general knowledge, conceptual explanation, common background, intuition, or likely completion.
- The model must not use self-knowledge to invent or complete private, internal, current, precise, or source-sensitive facts.
- The model must not use self-knowledge to invent or complete prices, fees, discounts, rankings, internal policies, user-specific details, current status, latest updates, exact numbers, dates, metrics, or specifications.
-- Retrieved facts must include citations.
- Unsupported parts must be stated as unavailable rather than guessed.
- If a paragraph would mix retrieved facts and unsupported completion, remove the unsupported completion.
- If evidence is incomplete, state the limitation explicitly.
-## 13. Pre-Reply Self-Check
+## 11. Pre-Reply Self-Check
Before replying to a knowledge retrieval task, verify:
- Used only whitelisted retrieval tools — no local filesystem inspection?
+- Called retrieval at most 3 times total (not more)?
+- Answered immediately when results were sufficient (did NOT call again unnecessarily)?
+- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)?
- Did retrieval happen before any factual answer drafting?
- Did every factual claim come from retrieved evidence rather than model knowledge?
-- Exhausted retrieval flow before concluding "not found"?
-- Citations placed immediately after each relevant paragraph?
- If any unsupported part remained, was it removed or explicitly marked unavailable?
If any answer is "no", correct the process first.
diff --git a/skills/autoload/onprem/rag-retrieve/hooks/retrieval-policy.md b/skills/autoload/onprem/rag-retrieve/hooks/retrieval-policy.md
index 1f0c1fe..61378ff 100644
--- a/skills/autoload/onprem/rag-retrieve/hooks/retrieval-policy.md
+++ b/skills/autoload/onprem/rag-retrieve/hooks/retrieval-policy.md
@@ -29,6 +29,13 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi
- Do NOT answer from model knowledge first.
- After each step, evaluate sufficiency before proceeding.
+### First-Call Success Principle
+
+- The first retrieval call is expected to return sufficient results for most questions.
+- Your default assumption should be: **one call is enough**.
+- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic).
+- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient.
+
## 3. Query Preparation
- Do NOT pass raw user question unless it already works well for retrieval.
@@ -37,26 +44,53 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi
## 4. Retrieval Breadth (`top_k`)
-- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient.
-- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
-- Expansion order: `30 → 50 → 100`. If unsure, use `100`.
+- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success.
+- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation.
+- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
+- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient.
## 5. Result Evaluation
-Treat as insufficient if: empty, `Error:`, `no excel files found`, off-topic, missing core entity/scope, no usable evidence, partial coverage, or truncated results.
+**Maximum 3 retrieval calls per question.** After each call, evaluate immediately:
+
+### Sufficient — answer immediately, no more calls
+
+ANY of the following means results are sufficient — STOP and answer now:
+- The core entity/topic in the user's question appears in the results.
+- There is ANY direct or indirect evidence relevant to the user's question.
+- Results are partially relevant, even if not perfectly comprehensive.
+- You can compose a meaningful answer (even a partial one) from the retrieved content.
+
+**Anti-patterns — do NOT do these:**
+- ❌ "The results are good, but maybe different keywords could find something better."
+- ❌ "I have enough to answer, but let me try one more query to be thorough."
+- ❌ "The answer is here, but I want to double-check with a different query."
+- ❌ Calling retrieval again after you have already identified the answer in previous results.
+
+**If you can answer the question with current results, you MUST answer immediately. Period.**
+
+### Insufficient — the ONLY valid reasons to retry
+
+- Results are completely empty or contain only `Error:` / `no excel files found` messages.
+- ALL results are entirely off-topic with zero relevance to the user's question.
+- No usable evidence exists at all — you cannot form even a partial answer.
+
+**"Results are not detailed enough" is NOT a valid reason to retry.**
+**"Results might be incomplete" is NOT a valid reason to retry.**
## 6. Fallback and Sequential Retry
-On insufficient results, follow this sequence:
+On insufficient results, you may retry **up to 2 more times** (3 calls total):
-1. Rewrite query, retry same tool (once)
-2. Switch to next retrieval source in default order
-3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100`
-4. `table_rag_retrieve` insufficient → try `rag_retrieve`; `rag_retrieve` insufficient → try `table_rag_retrieve`
+1. Rewrite query, retry same tool.
+2. Switch to next retrieval source in default order.
+3. For `rag_retrieve`, escalate `top_k` to `100` on retry.
+4. `table_rag_retrieve` insufficient → try `rag_retrieve`; `rag_retrieve` insufficient → try `table_rag_retrieve`.
- `table_rag_retrieve` internally falls back to `rag_retrieve` on `no excel files found`, but this does NOT change the higher-level order.
- Say "no relevant information was found" **only after** exhausting all retrieval sources.
- Do NOT switch to local filesystem inspection at any point.
+- Do NOT call any retrieval tool more than 3 times in total.
## 7. Table RAG Result Handling
@@ -99,7 +133,9 @@ This section applies only when self-knowledge is enabled.
Before replying to a knowledge retrieval task, verify:
- Used only whitelisted retrieval tools — no local filesystem inspection?
-- Exhausted retrieval flow before concluding "not found"?
+- Called retrieval at most 3 times total (not more)?
+- Answered immediately when results were sufficient (did NOT call again unnecessarily)?
+- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)?
- Citations placed immediately after each relevant paragraph?
- If self-knowledge was used, was it clearly separated from retrieved facts and limited to allowed supplement scope?
diff --git a/skills/autoload/onprem/rag-retrieve/rag_retrieve_server.py b/skills/autoload/onprem/rag-retrieve/rag_retrieve_server.py
index 2575644..6f308e2 100644
--- a/skills/autoload/onprem/rag-retrieve/rag_retrieve_server.py
+++ b/skills/autoload/onprem/rag-retrieve/rag_retrieve_server.py
@@ -73,7 +73,7 @@ Format: ``
"""
-def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]:
+def rag_retrieve(query: str, top_k: int = 100, trace_id: str = "") -> Dict[str, Any]:
"""Call the RAG retrieval API."""
try:
bot_id = ""
@@ -100,6 +100,8 @@ def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]:
"content-type": "application/json",
"authorization": f"Bearer {auth_token}"
}
+ if trace_id:
+ headers["X-Request-ID"] = trace_id
data = {
"query": query,
"top_k": top_k
@@ -172,7 +174,7 @@ def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]:
}
-def table_rag_retrieve(query: str) -> Dict[str, Any]:
+def table_rag_retrieve(query: str, trace_id: str = "") -> Dict[str, Any]:
"""Call the Table RAG retrieval API."""
try:
bot_id = ""
@@ -189,6 +191,8 @@ def table_rag_retrieve(query: str) -> Dict[str, Any]:
"content-type": "application/json",
"authorization": f"Bearer {auth_token}"
}
+ if trace_id:
+ headers["X-Request-ID"] = trace_id
data = {
"query": query,
}
@@ -220,7 +224,7 @@ def table_rag_retrieve(query: str) -> Dict[str, Any]:
if "markdown" in response_data:
markdown_content = response_data["markdown"]
if re.search(r"^no excel files found", markdown_content, re.IGNORECASE):
- rag_result = rag_retrieve(query)
+ rag_result = rag_retrieve(query, trace_id=trace_id)
content = rag_result.get("content", [])
if content and content[0].get("type") == "text":
content[0]["text"] = "No table_rag_retrieve results were found. The content below is the fallback result from rag_retrieve:\n\n" + content[0]["text"]
@@ -302,6 +306,8 @@ async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]:
elif method == "tools/call":
tool_name = params.get("name")
arguments = params.get("arguments", {})
+ meta = params.get("_meta") or params.get("meta") or {}
+ trace_id = meta.get("trace_id", "") if isinstance(meta, dict) else ""
if tool_name == "rag_retrieve":
query = arguments.get("query", "")
@@ -310,7 +316,7 @@ async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]:
if not query:
return create_error_response(request_id, -32602, "Missing required parameter: query")
- result = rag_retrieve(query, top_k)
+ result = rag_retrieve(query, top_k, trace_id)
return {
"jsonrpc": "2.0",
@@ -324,7 +330,7 @@ async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]:
if not query:
return create_error_response(request_id, -32602, "Missing required parameter: query")
- result = table_rag_retrieve(query)
+ result = table_rag_retrieve(query, trace_id)
return {
"jsonrpc": "2.0",
diff --git a/skills/autoload/support/rag-retrieve/hooks/retrieval-policy-forbidden-self-knowledge.md b/skills/autoload/support/rag-retrieve/hooks/retrieval-policy-forbidden-self-knowledge.md
index b6c1296..061c855 100644
--- a/skills/autoload/support/rag-retrieve/hooks/retrieval-policy-forbidden-self-knowledge.md
+++ b/skills/autoload/support/rag-retrieve/hooks/retrieval-policy-forbidden-self-knowledge.md
@@ -42,6 +42,13 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi
- After each step, evaluate sufficiency before proceeding.
- Retrieval must happen **before** any factual answer generation.
+### First-Call Success Principle
+
+- The first retrieval call is expected to return sufficient results for most questions.
+- Your default assumption should be: **one call is enough**.
+- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic).
+- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient.
+
## 4. Query Preparation
- Do NOT pass raw user question unless it already works well for retrieval.
@@ -50,27 +57,54 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi
## 5. Retrieval Breadth (`top_k`)
-- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient.
-- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
-- Expansion order: `30 → 50 → 100`. If unsure, use `100`.
+- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success.
+- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation.
+- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
+- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient.
## 6. Result Evaluation
-Treat as insufficient if: empty, `Error:`, `no excel files found`, off-topic, missing core entity/scope, no usable evidence, partial coverage, truncated results, or claims required by the answer are not explicitly supported.
+**Maximum 3 retrieval calls per question.** After each call, evaluate immediately:
+
+### Sufficient — answer immediately, no more calls
+
+ANY of the following means results are sufficient — STOP and answer now:
+- The core entity/topic in the user's question appears in the results.
+- There is ANY direct or indirect evidence relevant to the user's question.
+- Results are partially relevant, even if not perfectly comprehensive.
+- You can compose a meaningful answer (even a partial one) from the retrieved content.
+
+**Anti-patterns — do NOT do these:**
+- ❌ "The results are good, but maybe different keywords could find something better."
+- ❌ "I have enough to answer, but let me try one more query to be thorough."
+- ❌ "The answer is here, but I want to double-check with a different query."
+- ❌ Calling retrieval again after you have already identified the answer in previous results.
+
+**If you can answer the question with current results, you MUST answer immediately. Period.**
+
+### Insufficient — the ONLY valid reasons to retry
+
+- Results are completely empty or contain only `Error:` / `no excel files found` messages.
+- ALL results are entirely off-topic with zero relevance to the user's question.
+- No usable evidence exists at all — you cannot form even a partial answer.
+
+**"Results are not detailed enough" is NOT a valid reason to retry.**
+**"Results might be incomplete" is NOT a valid reason to retry.**
## 7. Fallback and Sequential Retry
-On insufficient results, follow this sequence:
+On insufficient results, you may retry **up to 2 more times** (3 calls total):
-1. Rewrite query, retry same tool (once)
-2. Switch to next retrieval source in default order
-3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100`
-4. `table_rag_retrieve` insufficient → try `rag_retrieve`; `rag_retrieve` insufficient → try `table_rag_retrieve`
+1. Rewrite query, retry same tool.
+2. Switch to next retrieval source in default order.
+3. For `rag_retrieve`, escalate `top_k` to `100` on retry.
+4. `table_rag_retrieve` insufficient → try `rag_retrieve`; `rag_retrieve` insufficient → try `table_rag_retrieve`.
- `table_rag_retrieve` internally falls back to `rag_retrieve` on `no excel files found`, but this does NOT change the higher-level order.
-- Say "no relevant information was found" **only after** exhausting all retrieval sources.
+- Say "no relevant information was found" **only after** exhausting all retries.
- Do NOT switch to local filesystem inspection at any point.
- Do NOT switch to model self-knowledge at any point.
+- Do NOT call any retrieval tool more than 3 times in total.
## 8. Handling Missing or Partial Evidence
@@ -83,7 +117,6 @@ On insufficient results, follow this sequence:
- Follow all `[INSTRUCTION]` and `[EXTRA_INSTRUCTION]` in results.
- If truncated: tell user total (`N+M`), displayed (`N`), omitted (`M`).
-- Cite sources using filenames from `file_ref_table`.
## 10. Image Handling
@@ -94,14 +127,7 @@ On insufficient results, follow this sequence:
- Each sentence or key point in the response should be accompanied by relevant images when they meet the established association criteria.
- Avoid placing all images at the end of the response.
-## 11. Citation Requirements
-
-- MUST generate `` tags when using retrieval results.
-- Place citations immediately after the paragraph or bullet list using the knowledge. Do NOT collect at end.
-- 1-2 citations per paragraph/bullet. At least 1 citation when using retrieved knowledge.
-- Do NOT cite claims that were not supported by retrieval.
-
-## 12. Self-Knowledge Prohibition
+## 11. Self-Knowledge Prohibition
This section applies whenever self-knowledge is disabled or forbidden for the current task.
@@ -111,19 +137,19 @@ This section applies whenever self-knowledge is disabled or forbidden for the cu
- The model must not supplement missing parts with general knowledge, conceptual explanation, common background, intuition, or likely completion.
- The model must not use self-knowledge to invent or complete private, internal, current, precise, or source-sensitive facts.
- The model must not use self-knowledge to invent or complete prices, fees, discounts, rankings, internal policies, user-specific details, current status, latest updates, exact numbers, dates, metrics, or specifications.
-- Retrieved facts must include citations.
- Unsupported parts must be stated as unavailable rather than guessed.
- If a paragraph would mix retrieved facts and unsupported completion, remove the unsupported completion.
- If evidence is incomplete, state the limitation explicitly.
-## 13. Pre-Reply Self-Check
+## 12. Pre-Reply Self-Check
Before replying to a knowledge retrieval task, verify:
- Used only whitelisted retrieval tools — no local filesystem inspection?
+- Called retrieval at most 3 times total (not more)?
+- Answered immediately when results were sufficient (did NOT call again unnecessarily)?
+- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)?
- Did retrieval happen before any factual answer drafting?
- Did every factual claim come from retrieved evidence rather than model knowledge?
-- Exhausted retrieval flow before concluding "not found"?
-- Citations placed immediately after each relevant paragraph?
- If any unsupported part remained, was it removed or explicitly marked unavailable?
If any answer is "no", correct the process first.
diff --git a/skills/autoload/support/rag-retrieve/hooks/retrieval-policy.md b/skills/autoload/support/rag-retrieve/hooks/retrieval-policy.md
index 1f0c1fe..61378ff 100644
--- a/skills/autoload/support/rag-retrieve/hooks/retrieval-policy.md
+++ b/skills/autoload/support/rag-retrieve/hooks/retrieval-policy.md
@@ -29,6 +29,13 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi
- Do NOT answer from model knowledge first.
- After each step, evaluate sufficiency before proceeding.
+### First-Call Success Principle
+
+- The first retrieval call is expected to return sufficient results for most questions.
+- Your default assumption should be: **one call is enough**.
+- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic).
+- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient.
+
## 3. Query Preparation
- Do NOT pass raw user question unless it already works well for retrieval.
@@ -37,26 +44,53 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi
## 4. Retrieval Breadth (`top_k`)
-- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient.
-- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
-- Expansion order: `30 → 50 → 100`. If unsure, use `100`.
+- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success.
+- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation.
+- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
+- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient.
## 5. Result Evaluation
-Treat as insufficient if: empty, `Error:`, `no excel files found`, off-topic, missing core entity/scope, no usable evidence, partial coverage, or truncated results.
+**Maximum 3 retrieval calls per question.** After each call, evaluate immediately:
+
+### Sufficient — answer immediately, no more calls
+
+ANY of the following means results are sufficient — STOP and answer now:
+- The core entity/topic in the user's question appears in the results.
+- There is ANY direct or indirect evidence relevant to the user's question.
+- Results are partially relevant, even if not perfectly comprehensive.
+- You can compose a meaningful answer (even a partial one) from the retrieved content.
+
+**Anti-patterns — do NOT do these:**
+- ❌ "The results are good, but maybe different keywords could find something better."
+- ❌ "I have enough to answer, but let me try one more query to be thorough."
+- ❌ "The answer is here, but I want to double-check with a different query."
+- ❌ Calling retrieval again after you have already identified the answer in previous results.
+
+**If you can answer the question with current results, you MUST answer immediately. Period.**
+
+### Insufficient — the ONLY valid reasons to retry
+
+- Results are completely empty or contain only `Error:` / `no excel files found` messages.
+- ALL results are entirely off-topic with zero relevance to the user's question.
+- No usable evidence exists at all — you cannot form even a partial answer.
+
+**"Results are not detailed enough" is NOT a valid reason to retry.**
+**"Results might be incomplete" is NOT a valid reason to retry.**
## 6. Fallback and Sequential Retry
-On insufficient results, follow this sequence:
+On insufficient results, you may retry **up to 2 more times** (3 calls total):
-1. Rewrite query, retry same tool (once)
-2. Switch to next retrieval source in default order
-3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100`
-4. `table_rag_retrieve` insufficient → try `rag_retrieve`; `rag_retrieve` insufficient → try `table_rag_retrieve`
+1. Rewrite query, retry same tool.
+2. Switch to next retrieval source in default order.
+3. For `rag_retrieve`, escalate `top_k` to `100` on retry.
+4. `table_rag_retrieve` insufficient → try `rag_retrieve`; `rag_retrieve` insufficient → try `table_rag_retrieve`.
- `table_rag_retrieve` internally falls back to `rag_retrieve` on `no excel files found`, but this does NOT change the higher-level order.
- Say "no relevant information was found" **only after** exhausting all retrieval sources.
- Do NOT switch to local filesystem inspection at any point.
+- Do NOT call any retrieval tool more than 3 times in total.
## 7. Table RAG Result Handling
@@ -99,7 +133,9 @@ This section applies only when self-knowledge is enabled.
Before replying to a knowledge retrieval task, verify:
- Used only whitelisted retrieval tools — no local filesystem inspection?
-- Exhausted retrieval flow before concluding "not found"?
+- Called retrieval at most 3 times total (not more)?
+- Answered immediately when results were sufficient (did NOT call again unnecessarily)?
+- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)?
- Citations placed immediately after each relevant paragraph?
- If self-knowledge was used, was it clearly separated from retrieved facts and limited to allowed supplement scope?
diff --git a/skills/autoload/support/rag-retrieve/rag_retrieve_server.py b/skills/autoload/support/rag-retrieve/rag_retrieve_server.py
index 671a456..09e0924 100644
--- a/skills/autoload/support/rag-retrieve/rag_retrieve_server.py
+++ b/skills/autoload/support/rag-retrieve/rag_retrieve_server.py
@@ -73,7 +73,7 @@ Format: ``
"""
-def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]:
+def rag_retrieve(query: str, top_k: int = 100, trace_id: str = "") -> Dict[str, Any]:
"""Call the RAG retrieval API."""
try:
bot_id = ""
@@ -100,6 +100,8 @@ def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]:
"content-type": "application/json",
"authorization": f"Bearer {auth_token}"
}
+ if trace_id:
+ headers["X-Request-ID"] = trace_id
data = {
"query": query,
"top_k": top_k
@@ -172,7 +174,7 @@ def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]:
}
-def table_rag_retrieve(query: str) -> Dict[str, Any]:
+def table_rag_retrieve(query: str, trace_id: str = "") -> Dict[str, Any]:
"""Call the Table RAG retrieval API."""
try:
bot_id = ""
@@ -189,6 +191,8 @@ def table_rag_retrieve(query: str) -> Dict[str, Any]:
"content-type": "application/json",
"authorization": f"Bearer {auth_token}"
}
+ if trace_id:
+ headers["X-Request-ID"] = trace_id
data = {
"query": query,
}
@@ -220,7 +224,7 @@ def table_rag_retrieve(query: str) -> Dict[str, Any]:
if "markdown" in response_data:
markdown_content = response_data["markdown"]
if re.search(r"^no excel files found", markdown_content, re.IGNORECASE):
- rag_result = rag_retrieve(query)
+ rag_result = rag_retrieve(query, trace_id=trace_id)
content = rag_result.get("content", [])
if content and content[0].get("type") == "text":
content[0]["text"] = "No table_rag_retrieve results were found. The content below is the fallback result from rag_retrieve:\n\n" + content[0]["text"]
@@ -302,7 +306,9 @@ async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]:
elif method == "tools/call":
tool_name = params.get("name")
arguments = params.get("arguments", {})
-
+ meta = params.get("_meta") or params.get("meta") or {}
+ trace_id = meta.get("trace_id", "") if isinstance(meta, dict) else ""
+
if tool_name == "rag_retrieve":
query = arguments.get("query", "")
top_k = arguments.get("top_k", 100)
@@ -310,7 +316,7 @@ async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]:
if not query:
return create_error_response(request_id, -32602, "Missing required parameter: query")
- result = rag_retrieve(query, top_k)
+ result = rag_retrieve(query, top_k, trace_id)
return {
"jsonrpc": "2.0",
@@ -324,7 +330,7 @@ async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]:
if not query:
return create_error_response(request_id, -32602, "Missing required parameter: query")
- result = table_rag_retrieve(query)
+ result = table_rag_retrieve(query, trace_id)
return {
"jsonrpc": "2.0",
diff --git a/skills/developing/pmda-drug-info/.claude-plugin/plugin.json b/skills/developing/pmda-drug-info/.claude-plugin/plugin.json
new file mode 100644
index 0000000..aa1055c
--- /dev/null
+++ b/skills/developing/pmda-drug-info/.claude-plugin/plugin.json
@@ -0,0 +1,21 @@
+{
+ "name": "pmda-drug-info",
+ "description": "PMDA drug information tools for Japanese pharmaceutical package insert queries. Provides drug search, master info, interactions, restrictions, dosing, and full-text chapter retrieval via PostgreSQL + OpenSearch.",
+ "hooks": {
+ "PrePrompt": [
+ {
+ "type": "command",
+ "command": "python hooks/pre_prompt.py"
+ }
+ ]
+ },
+ "mcpServers": {
+ "pmda_drug_info": {
+ "transport": "stdio",
+ "command": "python",
+ "args": [
+ "./pmda_server.py"
+ ]
+ }
+ }
+}
diff --git a/skills/developing/pmda-drug-info/agents/adverse-event.md b/skills/developing/pmda-drug-info/agents/adverse-event.md
new file mode 100644
index 0000000..f4be105
--- /dev/null
+++ b/skills/developing/pmda-drug-info/agents/adverse-event.md
@@ -0,0 +1,31 @@
+---
+name: adverse_event
+description: Reverse lookup drugs by adverse event name. Find which drugs have reported a specific side effect.
+ Invoke when the user asks "Which drugs cause Stevens-Johnson syndrome?" or "Drugs that prolong QT interval?".
+ Causal inference is prohibited — information presentation only.
+tools: search_section_text, search_drugs, get_drug_master, list_drug_chapters, read_drug_chapter
+---
+
+あなたは「副作用 → 該当薬剤の逆引き」専門の sub-agent です。
+
+【ツール戦略】
+1. `search_section_text(keyword=副作用名, section_filter="副作用")` で逆引き。
+ total_drugs は必ず本文中に明示する。
+2. 同義語が必要なケース:
+ "Stevens-Johnson" ⇔ "皮膚粘膜眼症候群" / "SJS"
+ "QT延長" ⇔ "Torsades de pointes"
+ "間質性肺炎" ⇔ "肺臓炎"
+ OS の synonym filter が自動展開するので 1 回の検索で OK。
+3. hit から代表薬を 3〜5 件選び、`read_drug_chapter` で 11.1 重大な副作用 / 11.2 その他の副作用
+ verbatim を引用。
+4. 因果推論("この薬がこの患者の症状を起こした")は **絶対しない**。
+ 情報提示のみ。
+
+【絶対ルール】
+1. ツール呼び出し必須。トレーニング知識・教科書・ガイドラインからの推測は禁止。
+2. 数値・固有名・条件は本文表現を改変せず逐語引用。
+3. 出典は **必ず** `[出典: <販売名> (yj_full=) / <章番号 章タイトル>]` の形式。
+ - fact 表 row には `_citation` フィールドが入っているので **そのまま転記**。
+ - `[出典: 薬品マスター]` `[出典: 添付文書]` 等の汎用出典は **絶対禁止**。
+ - read_drug_chapter で実際に読んだ section 以外の出典を捏造しない。
+4. 該当情報が無ければ "添付文書からは確認できません" と書く。
diff --git a/skills/developing/pmda-drug-info/agents/interaction.md b/skills/developing/pmda-drug-info/agents/interaction.md
new file mode 100644
index 0000000..b29e068
--- /dev/null
+++ b/skills/developing/pmda-drug-info/agents/interaction.md
@@ -0,0 +1,28 @@
+---
+name: interaction
+description: Investigate drug-drug interactions between two drugs, or list all interactions for a single drug.
+ Invoke when the user asks "Can drug A and B be used together?" or "What are the interactions of drug A?".
+tools: search_drugs, get_drug_master, get_drug_interactions, search_section_text, list_drug_chapters, read_drug_chapter
+---
+
+あなたは「薬剤間相互作用」専門の sub-agent です。
+
+【ツール戦略】
+- A・B 両薬の yj_code を `search_drugs` で取得。
+- `get_drug_interactions(drug_a_yj=A, drug_b_yj=B)` で双方向検索(A→B も B→A も拾える)。
+- ヒットしたら drug_a の側の出典 section(10.1 / 10.2)を `list_drug_chapters` + `read_drug_chapter` で
+ verbatim 取得。drug_b 側にも該当記載があるか確認。
+- ヒットゼロ → "添付文書上は併用禁忌・併用注意の明確な記載なし" と書く(自由記述/警告等は
+ 別途 `search_section_text(keyword=B薬名, section_filter="相互作用")` で念押し)。
+- 1 薬名のみ与えられた場合は `get_drug_interactions(drug_a_yj=...)` で全相互作用一覧。
+
+severity は本文の "併用禁忌" / "併用注意" の語をそのまま転記。
+
+【絶対ルール】
+1. ツール呼び出し必須。トレーニング知識・教科書・ガイドラインからの推測は禁止。
+2. 数値・固有名・条件は本文表現を改変せず逐語引用。
+3. 出典は **必ず** `[出典: <販売名> (yj_full=) / <章番号 章タイトル>]` の形式。
+ - fact 表 row には `_citation` フィールドが入っているので **そのまま転記**。
+ - `[出典: 薬品マスター]` `[出典: 添付文書]` 等の汎用出典は **絶対禁止**。
+ - read_drug_chapter で実際に読んだ section 以外の出典を捏造しない。
+4. 該当情報が無ければ "添付文書からは確認できません" と書く。
diff --git a/skills/developing/pmda-drug-info/agents/patient-specific.md b/skills/developing/pmda-drug-info/agents/patient-specific.md
new file mode 100644
index 0000000..49f5053
--- /dev/null
+++ b/skills/developing/pmda-drug-info/agents/patient-specific.md
@@ -0,0 +1,32 @@
+---
+name: patient_specific
+description: Determine drug administration feasibility and dosage adjustment for specific patient conditions (renal impairment, hepatic impairment, pregnancy, elderly, pediatric, allergy).
+ Invoke when the user asks "Can this drug be used in a patient with eGFR 25?", "Is it contraindicated in pregnancy?", etc.
+tools: search_drugs, get_drug_master, get_drug_restrictions, get_drug_dosing, list_drug_chapters, read_drug_chapter
+---
+
+あなたは「特定患者への投与可否・用量調整」専門の sub-agent です。
+
+【ツール戦略】
+1. 薬名から yj_code を `search_drugs` で取得。
+2. 患者条件を condition_type に対応付け:
+ - 腎機能 (eGFR/CrCl) → "腎機能障害"
+ - 肝機能 (Child-Pugh) → "肝機能障害"
+ - 妊娠/授乳 → "妊婦"/"授乳婦"
+ - 年齢 (小児/高齢) → "小児等"/"高齢者"
+ - アレルギー既往 → "過敏症"
+ - 合併症 (糖尿病/喘息など) → "疾患"
+3. `get_drug_restrictions(drug_yj=..., condition_type=...)` で該当 restriction を取得。
+ condition_params の数値(例: {"eGFR_max": 30})を必ず確認。
+4. `get_drug_dosing(drug_yj=..., patient_segment=...)` で患者層別用量を取得。
+5. 必要なら原文 `read_drug_chapter` で 9.x 章 verbatim 引用。
+6. 数値判定(例: eGFR=25 ⇔ eGFR_max=30 → 該当)を agent が責任もって行う。
+
+【絶対ルール】
+1. ツール呼び出し必須。トレーニング知識・教科書・ガイドラインからの推測は禁止。
+2. 数値・固有名・条件は本文表現を改変せず逐語引用。
+3. 出典は **必ず** `[出典: <販売名> (yj_full=) / <章番号 章タイトル>]` の形式。
+ - fact 表 row には `_citation` フィールドが入っているので **そのまま転記**。
+ - `[出典: 薬品マスター]` `[出典: 添付文書]` 等の汎用出典は **絶対禁止**。
+ - read_drug_chapter で実際に読んだ section 以外の出典を捏造しない。
+4. 該当情報が無ければ "添付文書からは確認できません" と書く。
diff --git a/skills/developing/pmda-drug-info/agents/single-drug.md b/skills/developing/pmda-drug-info/agents/single-drug.md
new file mode 100644
index 0000000..e5340a9
--- /dev/null
+++ b/skills/developing/pmda-drug-info/agents/single-drug.md
@@ -0,0 +1,26 @@
+---
+name: single_drug
+description: Answer factual questions about a single drug (brand name, generic name, indications, dosing, contraindications, side effects, etc.).
+ Invoke when the question is focused on one drug and requires detailed information from the package insert.
+tools: search_drugs, get_drug_master, get_drug_dosing, get_drug_restrictions, list_drug_chapters, read_drug_chapter
+---
+
+あなたは「単一薬の事実回答」専門の sub-agent です。
+
+【ツール戦略】
+1. 質問から薬名/yj_code を特定 → `search_drugs` または直接 yj_code が分かれば次へ。
+2. `get_drug_master(yj_code)` で基本情報(販売名・一般名・薬効分類・規制)を確定。
+3. 必要に応じて `get_drug_dosing` で用法用量、`get_drug_restrictions(drug_yj=...)` で禁忌・特定患者注意。
+4. 自由記述や上記テーブルに無い情報(例: 重大な副作用一覧、薬物動態の数値)は
+ `list_drug_chapters(yj_full)` → `read_drug_chapter(yj_full, section_title)` で原文取得。
+
+最終回答は箇条書き or 表で、各事実に出典を付ける。
+
+【絶対ルール】
+1. ツール呼び出し必須。トレーニング知識・教科書・ガイドラインからの推測は禁止。
+2. 数値・固有名・条件は本文表現を改変せず逐語引用。
+3. 出典は **必ず** `[出典: <販売名> (yj_full=) / <章番号 章タイトル>]` の形式。
+ - fact 表 row には `_citation` フィールドが入っているので **そのまま転記**。
+ - `[出典: 薬品マスター]` `[出典: 添付文書]` 等の汎用出典は **絶対禁止**。
+ - read_drug_chapter で実際に読んだ section 以外の出典を捏造しない。
+4. 該当情報が無ければ "添付文書からは確認できません" と書く。
diff --git a/skills/developing/pmda-drug-info/hooks/pmda-instructions.md b/skills/developing/pmda-drug-info/hooks/pmda-instructions.md
new file mode 100644
index 0000000..0a656be
--- /dev/null
+++ b/skills/developing/pmda-drug-info/hooks/pmda-instructions.md
@@ -0,0 +1,22 @@
+# PMDA Drug Information Tools
+
+You have access to Japanese pharmaceutical package insert (添付文書) data via the following tools.
+
+## Core Rules
+- **Tool calls are mandatory.** Never answer from training knowledge alone. All facts must come from tool results.
+- Cite sources in the format: `[出典: <販売名> (yj_full=) / <章番号 章タイトル>]`
+- Fact table rows include a `_citation` field — use it directly.
+- Generic citations like `[出典: 薬品マスター]` or `[出典: 添付文書]` are **prohibited**.
+- For urgent questions (suicide/drug abuse/severe acute symptoms), state: "緊急対応として担当医・薬剤師に直接相談してください"
+
+## When to Use Sub-agents (task tool)
+- **patient_specific**: Renal/hepatic/pregnancy/elderly/pediatric/allergy conditions × dosing decisions
+- **interaction**: Pairwise drug interaction investigation
+- **adverse_event**: Reverse lookup from adverse event name to drugs
+- **single_drug**: Detailed info not in fact tables (e.g., full adverse event list, pharmacokinetics)
+
+## Direct Tool Usage (do NOT delegate)
+- Simple lookups → use tools directly
+- Multi-drug comparisons → call tools sequentially, output as markdown table
+- Symptom → candidate drug reverse lookup → `search_section_text`
+- Mechanism/pharmacokinetics → `list_drug_chapters` + `read_drug_chapter`
diff --git a/skills/developing/pmda-drug-info/hooks/pre_prompt.py b/skills/developing/pmda-drug-info/hooks/pre_prompt.py
new file mode 100644
index 0000000..eb1e3ac
--- /dev/null
+++ b/skills/developing/pmda-drug-info/hooks/pre_prompt.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+"""
+PrePrompt hook for PMDA drug info skill.
+Injects usage instructions for the drug information tools.
+"""
+import sys
+from pathlib import Path
+
+
+def main():
+ prompt_file = Path(__file__).parent / "pmda-instructions.md"
+ if prompt_file.exists():
+ print(prompt_file.read_text(encoding="utf-8"))
+ return 0
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/skills/developing/pmda-drug-info/mcp_common.py b/skills/developing/pmda-drug-info/mcp_common.py
new file mode 100644
index 0000000..0baeb01
--- /dev/null
+++ b/skills/developing/pmda-drug-info/mcp_common.py
@@ -0,0 +1,252 @@
+#!/usr/bin/env python3
+"""
+Shared utility functions for the MCP server.
+Provides common functionality for path handling, file validation, and request processing.
+"""
+
+import json
+import os
+import sys
+import asyncio
+from typing import Any, Dict, List, Optional, Union
+import re
+
+def get_allowed_directory():
+ """Get the directory that is allowed to be accessed."""
+ # Prefer dataset_dir passed through command-line arguments.
+ if len(sys.argv) > 1:
+ dataset_dir = sys.argv[1]
+ return os.path.abspath(dataset_dir)
+
+ # Read the project data directory from the environment variable.
+ project_dir = os.getenv("PROJECT_DATA_DIR", "./projects/data")
+ return os.path.abspath(project_dir)
+
+
+def resolve_file_path(file_path: str, default_subfolder: str = "default") -> str:
+ """
+ Resolve a file path, supporting both folder/document.txt and document.txt formats.
+
+ Args:
+ file_path: Input file path.
+ default_subfolder: Default subfolder name to use when only a filename is provided.
+
+ Returns:
+ The resolved full file path.
+ """
+ # If the path contains a folder separator, use it directly.
+ if '/' in file_path or '\\' in file_path:
+ clean_path = file_path.replace('\\', '/')
+
+ # Remove the projects/ prefix if it exists.
+ if clean_path.startswith('projects/'):
+ clean_path = clean_path[9:] # Remove the 'projects/' prefix.
+ elif clean_path.startswith('./projects/'):
+ clean_path = clean_path[11:] # Remove the './projects/' prefix.
+ else:
+ # If only a filename is provided, add the default subfolder.
+ clean_path = f"{default_subfolder}/{file_path}"
+
+ # Get the allowed directory.
+ project_data_dir = get_allowed_directory()
+
+ # Try to locate the file directly under the project directory.
+ full_path = os.path.join(project_data_dir, clean_path.lstrip('./'))
+ if os.path.exists(full_path):
+ return full_path
+
+ # If the direct path does not exist, try a recursive search.
+ found = find_file_in_project(clean_path, project_data_dir)
+ if found:
+ return found
+
+ # If this is a bare filename and it was not found under the default subfolder,
+ # try looking in the project root.
+ if '/' not in file_path and '\\' not in file_path:
+ root_path = os.path.join(project_data_dir, file_path)
+ if os.path.exists(root_path):
+ return root_path
+
+ raise FileNotFoundError(f"File not found: {file_path} (searched in {project_data_dir})")
+
+
+def find_file_in_project(filename: str, project_dir: str) -> Optional[str]:
+ """Recursively search for a file inside the project directory."""
+ # If filename includes a path, only search within the specified path.
+ if '/' in filename:
+ parts = filename.split('/')
+ target_file = parts[-1]
+ search_dir = os.path.join(project_dir, *parts[:-1])
+
+ if os.path.exists(search_dir):
+ target_path = os.path.join(search_dir, target_file)
+ if os.path.exists(target_path):
+ return target_path
+ else:
+ # For a bare filename, recursively search the whole project directory.
+ for root, dirs, files in os.walk(project_dir):
+ if filename in files:
+ return os.path.join(root, filename)
+ return None
+
+
+def load_tools_from_json(tools_file_name: str) -> List[Dict[str, Any]]:
+ """Load tool definitions from a JSON file."""
+ try:
+ tools_file = os.path.join(os.path.dirname(__file__), tools_file_name)
+ if os.path.exists(tools_file):
+ with open(tools_file, 'r', encoding='utf-8') as f:
+ return json.load(f)
+ else:
+ # If the JSON file does not exist, use the default definitions.
+ return []
+ except Exception as e:
+ print(f"Warning: Unable to load tool definition JSON file: {str(e)}")
+ return []
+
+
+def create_error_response(request_id: Any, code: int, message: str) -> Dict[str, Any]:
+ """Create a standardized error response."""
+ return {
+ "jsonrpc": "2.0",
+ "id": request_id,
+ "error": {
+ "code": code,
+ "message": message
+ }
+ }
+
+
+def create_success_response(request_id: Any, result: Any) -> Dict[str, Any]:
+ """Create a standardized success response."""
+ return {
+ "jsonrpc": "2.0",
+ "id": request_id,
+ "result": result
+ }
+
+
+def create_initialize_response(request_id: Any, server_name: str, server_version: str = "1.0.0") -> Dict[str, Any]:
+ """Create a standardized initialize response."""
+ return {
+ "jsonrpc": "2.0",
+ "id": request_id,
+ "result": {
+ "protocolVersion": "2024-11-05",
+ "capabilities": {
+ "tools": {}
+ },
+ "serverInfo": {
+ "name": server_name,
+ "version": server_version
+ }
+ }
+ }
+
+
+def create_ping_response(request_id: Any) -> Dict[str, Any]:
+ """Create a standardized ping response."""
+ return {
+ "jsonrpc": "2.0",
+ "id": request_id,
+ "result": {
+ "pong": True
+ }
+ }
+
+
+def create_tools_list_response(request_id: Any, tools: List[Dict[str, Any]]) -> Dict[str, Any]:
+ """Create a standardized tools/list response."""
+ return {
+ "jsonrpc": "2.0",
+ "id": request_id,
+ "result": {
+ "tools": tools
+ }
+ }
+
+
+def is_regex_pattern(pattern: str) -> bool:
+ """Check whether a string should be treated as a regular expression pattern."""
+ # Check the /pattern/ format.
+ if pattern.startswith('/') and pattern.endswith('/') and len(pattern) > 2:
+ return True
+
+ # Check the r"pattern" or r'pattern' format.
+ if pattern.startswith(('r"', "r'")) and pattern.endswith(('"', "'")) and len(pattern) > 3:
+ return True
+
+ # Check whether it contains regex metacharacters.
+ regex_chars = {'*', '+', '?', '|', '(', ')', '[', ']', '{', '}', '^', '$', '\\', '.'}
+ return any(char in pattern for char in regex_chars)
+
+
+def compile_pattern(pattern: str) -> Union[re.Pattern, str, None]:
+ """Compile a regex pattern, or return the original string if it is not regex."""
+ if not is_regex_pattern(pattern):
+ return pattern
+
+ try:
+ # Handle the /pattern/ format.
+ if pattern.startswith('/') and pattern.endswith('/'):
+ regex_body = pattern[1:-1]
+ return re.compile(regex_body)
+
+ # Handle the r"pattern" or r'pattern' format.
+ if pattern.startswith(('r"', "r'")) and pattern.endswith(('"', "'")):
+ regex_body = pattern[2:-1]
+ return re.compile(regex_body)
+
+ # Directly compile strings that contain regex metacharacters.
+ return re.compile(pattern)
+ except re.error as e:
+ # If compilation fails, return None to indicate an invalid regex.
+ print(f"Warning: Regular expression '{pattern}' compilation failed: {e}")
+ return None
+
+
+async def handle_mcp_streaming(request_handler):
+ """Handle the standard main loop for MCP requests."""
+ try:
+ while True:
+ # Read from stdin
+ line = await asyncio.get_event_loop().run_in_executor(None, sys.stdin.readline)
+ if not line:
+ break
+
+ line = line.strip()
+ if not line:
+ continue
+
+ try:
+ request = json.loads(line)
+ response = await request_handler(request)
+
+ # Write to stdout
+ sys.stdout.write(json.dumps(response, ensure_ascii=False) + "\n")
+ sys.stdout.flush()
+
+ except json.JSONDecodeError:
+ error_response = {
+ "jsonrpc": "2.0",
+ "error": {
+ "code": -32700,
+ "message": "Parse error"
+ }
+ }
+ sys.stdout.write(json.dumps(error_response, ensure_ascii=False) + "\n")
+ sys.stdout.flush()
+
+ except Exception as e:
+ error_response = {
+ "jsonrpc": "2.0",
+ "error": {
+ "code": -32603,
+ "message": f"Internal error: {str(e)}"
+ }
+ }
+ sys.stdout.write(json.dumps(error_response, ensure_ascii=False) + "\n")
+ sys.stdout.flush()
+
+ except KeyboardInterrupt:
+ pass
diff --git a/skills/developing/pmda-drug-info/pmda_server.py b/skills/developing/pmda-drug-info/pmda_server.py
new file mode 100644
index 0000000..0255adc
--- /dev/null
+++ b/skills/developing/pmda-drug-info/pmda_server.py
@@ -0,0 +1,533 @@
+#!/usr/bin/env python3
+"""
+PMDA drug information MCP server (mock data version).
+
+Provides drug search, master info, interactions, restrictions, dosing,
+and full-text chapter retrieval with mock data for testing.
+"""
+
+import asyncio
+import json
+import sys
+from typing import Any, Dict, Optional
+
+from mcp_common import (
+ create_error_response,
+ create_initialize_response,
+ create_ping_response,
+ create_tools_list_response,
+ load_tools_from_json,
+ handle_mcp_streaming,
+)
+
+
+def _dump(obj) -> str:
+ return json.dumps(obj, ensure_ascii=False)
+
+
+# ---------------------------------------------------------------------------
+# Mock data
+# ---------------------------------------------------------------------------
+
+MOCK_DRUG_MASTER = {
+ "2149039F1082": {
+ "yj_code": "2149039F1082",
+ "yj_full": "2149039F1082_1_17",
+ "brand_name": "ロサルタンK錠50mg「科研」",
+ "generic_name": "ロサルタンカリウム",
+ "category_code": "214",
+ "category_name": "アンジオテンシンII受容体拮抗薬",
+ "regulation": "劇薬, 処方箋医薬品",
+ "manufacturer": "科研製薬株式会社",
+ "revision_date": "2024-06",
+ },
+ "3399007H1021": {
+ "yj_code": "3399007H1021",
+ "yj_full": "3399007H1021_1_21",
+ "brand_name": "バイアスピリン錠100mg",
+ "generic_name": "アスピリン",
+ "category_code": "339",
+ "category_name": "血液・体液用薬",
+ "regulation": "処方箋医薬品",
+ "manufacturer": "バイエル薬品株式会社",
+ "revision_date": "2024-03",
+ },
+ "2179004F1026": {
+ "yj_code": "2179004F1026",
+ "yj_full": "2179004F1026_1_14",
+ "brand_name": "ノルバスク錠5mg",
+ "generic_name": "アムロジピンベシル酸塩",
+ "category_code": "217",
+ "category_name": "カルシウム拮抗薬",
+ "regulation": "処方箋医薬品",
+ "manufacturer": "ファイザー株式会社",
+ "revision_date": "2024-01",
+ },
+}
+
+MOCK_CATEGORIES = [
+ {"category_code": "214", "category_name": "アンジオテンシンII受容体拮抗薬", "level": "L2", "drug_count": 35},
+ {"category_code": "217", "category_name": "カルシウム拮抗薬", "level": "L2", "drug_count": 48},
+ {"category_code": "339", "category_name": "血液・体液用薬", "level": "L2", "drug_count": 22},
+ {"category_code": "612", "category_name": "消化性潰瘍用剤", "level": "L2", "drug_count": 40},
+]
+
+MOCK_INTERACTIONS = [
+ {
+ "drug_a_yj": "2149039F1082",
+ "drug_b_yj": "3399007H1021",
+ "drug_b_class": "アスピリン(抗血小板剤)",
+ "severity": "併用注意",
+ "mechanism": "ARBの降圧作用を減弱するおそれがある。また、腎機能低下・高カリウム血症のリスクを増大。",
+ "clinical_effect": "降圧効果の減弱、腎機能悪化、高カリウム血症に注意。",
+ "source_drug_yj": "2149039F1082",
+ "source_section": "10.2 併用注意",
+ },
+ {
+ "drug_a_yj": "3399007H1021",
+ "drug_b_yj": "2149039F1082",
+ "drug_b_class": "ロサルタンカリウム(ARB)",
+ "severity": "併用注意",
+ "mechanism": "アスピリンの副作用(消化性潰瘍、腎機能低下)を増強するおそれ。",
+ "clinical_effect": "消化性潰瘍、腎機能低下に注意。血清カリウム値の上昇に注意。",
+ "source_drug_yj": "3399007H1021",
+ "source_section": "10.2 併用注意",
+ },
+]
+
+MOCK_RESTRICTIONS = [
+ {
+ "drug_yj": "2149039F1082",
+ "condition_type": "腎機能障害",
+ "condition_text": "腎機能障害患者",
+ "condition_params": {"eGFR_max": 30},
+ "severity": "慎重投与",
+ "source_section": "9.2 腎機能障害患者",
+ },
+ {
+ "drug_yj": "2149039F1082",
+ "condition_type": "妊婦",
+ "condition_text": "妊娠中の女性",
+ "condition_params": {},
+ "severity": "禁忌",
+ "source_section": "9.5 妊婦",
+ },
+ {
+ "drug_yj": "2149039F1082",
+ "condition_type": "高齢者",
+ "condition_text": "高齢者(65歳以上)",
+ "condition_params": {},
+ "severity": "慎重投与",
+ "source_section": "9.8 高齢者",
+ },
+ {
+ "drug_yj": "3399007H1021",
+ "condition_type": "過敏症",
+ "condition_text": "本剤の成分に対し過敏症の既往歴のある患者",
+ "condition_params": {},
+ "severity": "禁忌",
+ "source_section": "2. 禁忌",
+ },
+]
+
+MOCK_DOSING = [
+ {
+ "drug_yj": "2149039F1082",
+ "patient_segment": "成人",
+ "segment_params": {},
+ "indication_code": "高血圧症",
+ "dose_amount": "50",
+ "dose_unit": "mg",
+ "frequency": "1日1回",
+ "duration": "",
+ "adjustment_text": "効果不十分な場合は100mgまで増量可",
+ "source_section": "6. 用法及び用量",
+ },
+ {
+ "drug_yj": "2149039F1082",
+ "patient_segment": "腎機能障害患者",
+ "segment_params": {"eGFR_max": 30},
+ "indication_code": "高血圧症",
+ "dose_amount": "25",
+ "dose_unit": "mg",
+ "frequency": "1日1回",
+ "duration": "",
+ "adjustment_text": "eGFR 30以下では用量を減ずること。血清カリウム・クレアチニンの推移に注意。",
+ "source_section": "9.2 腎機能障害患者",
+ },
+]
+
+MOCK_CHAPTERS = {
+ "2149039F1082_1_17": [
+ {"section_title": "1. 警告", "line_num": 1, "text_len": 120},
+ {"section_title": "2. 禁忌", "line_num": 5, "text_len": 80},
+ {"section_title": "4. 効能・効果", "line_num": 12, "text_len": 60},
+ {"section_title": "6. 用法及び用量", "line_num": 20, "text_len": 150},
+ {"section_title": "9.2 腎機能障害患者", "line_num": 45, "text_len": 200},
+ {"section_title": "9.5 妊婦", "line_num": 52, "text_len": 180},
+ {"section_title": "9.8 高齢者", "line_num": 60, "text_len": 100},
+ {"section_title": "10.2 併用注意", "line_num": 75, "text_len": 350},
+ {"section_title": "11.1 重大な副作用", "line_num": 90, "text_len": 400},
+ {"section_title": "11.2 その他の副作用", "line_num": 110, "text_len": 300},
+ ],
+ "3399007H1021_1_21": [
+ {"section_title": "1. 警告", "line_num": 1, "text_len": 100},
+ {"section_title": "2. 禁忌", "line_num": 4, "text_len": 90},
+ {"section_title": "4. 効能・効果", "line_num": 10, "text_len": 55},
+ {"section_title": "6. 用法及び用量", "line_num": 18, "text_len": 130},
+ {"section_title": "10.2 併用注意", "line_num": 70, "text_len": 300},
+ {"section_title": "11.1 重大な副作用", "line_num": 85, "text_len": 450},
+ {"section_title": "11.2 その他の副作用", "line_num": 105, "text_len": 280},
+ ],
+}
+
+MOCK_SECTION_TEXT = {
+ ("2149039F1082_1_17", "9.2 腎機能障害患者"): (
+ "9.2 腎機能障害患者\n"
+ "腎機能障害患者(eGFR 30 mL/min/1.73m²以下)には、ロサルタンカリウムの"
+ "投与開始用量を25mg/日とし、血清カリウム及び血清クレアチニンの推移に"
+ "十分注意すること。\n"
+ "【理由】腎機能障害患者では、本剤の投与により急速に腎機能が悪化する"
+ "おそれがある。また、高カリウム血症があらわれやすい。"
+ ),
+ ("2149039F1082_1_17", "9.5 妊婦"): (
+ "9.5 妊婦\n"
+ "妊婦又は妊娠している可能性のある女性には投与しないこと。\n"
+ "【理由】妊娠中期・末期にレニン-アンジオテンシン系に作用する薬剤を"
+ "投与された患者では、胎児の腎機能低下、羊水過少症、頭蓋の発育不全、"
+ "肺低形成等があらわれるおそれがある。"
+ ),
+ ("2149039F1082_1_17", "10.2 併用注意"): (
+ "10.2 併用注意\n"
+ "・アスピリン(抗血小板剤)\n"
+ " 【リスク】ARBの降圧作用を減弱するおそれがある。\n"
+ " 腎機能低下・高カリウム血症のリスクを増大。\n"
+ " 【措置】降圧効果の減弱、腎機能悪化、高カリウム血症に注意すること。"
+ ),
+ ("2149039F1082_1_17", "11.1 重大な副作用"): (
+ "11.1 重大な副作用\n"
+ "・血管浮腫(頻度不明):顔面、口唇、咽頭、舌等の腫脹があらわれた場合には"
+ "直ちに投与を中止し、適切な処置を行うこと。\n"
+ "・高カリウム血症(0.1%未満):血清カリウム値の上昇があらわれることがある。\n"
+ "・腎機能悪化(0.1%未満):BUN、クレアチニンの上昇があらわれることがある。"
+ ),
+ ("3399007H1021_1_21", "10.2 併用注意"): (
+ "10.2 併用注意\n"
+ "・ロサルタンカリウム(ARB)\n"
+ " 【リスク】アスピリンの副作用(消化性潰瘍、腎機能低下)を増強するおそれ。\n"
+ " 【措置】消化性潰瘍、腎機能低下に注意。血清カリウム値の上昇に注意すること。"
+ ),
+ ("3399007H1021_1_21", "11.1 重大な副作用"): (
+ "11.1 重大な副作用\n"
+ "・ショック、アナフィラキシー(頻度不明):呼吸困難、血圧低下等があらわれた\n"
+ " 場合には直ちに投与を中止し、適切な処置を行うこと。\n"
+ "・消化性潰瘍(0.1%未満):出血、穿孔があらわれることがある。\n"
+ "・腎機能障害(0.1%未満):急性腎不全があらわれることがある。"
+ ),
+}
+
+
+def _citation(drug_yj: str, section: Optional[str]) -> str:
+ drug = MOCK_DRUG_MASTER.get(drug_yj, {})
+ brand = drug.get("brand_name", "")
+ yj_full = drug.get("yj_full", drug_yj)
+ chap = section or "(章不明)"
+ return f"[出典: {brand} (yj_full={yj_full}) / {chap}]"
+
+
+# ---------------------------------------------------------------------------
+# Tool implementations (mock)
+# ---------------------------------------------------------------------------
+
+def _tool_search_drugs(query: str, kind: str = "auto", limit: int = 10) -> str:
+ results = []
+ for code, d in MOCK_DRUG_MASTER.items():
+ q = query.lower()
+ if (kind == "brand" and q in d["brand_name"].lower()) or \
+ (kind == "generic" and q in d["generic_name"].lower()) or \
+ (kind == "yj" and (q in d["yj_code"].lower() or q in d["yj_full"].lower())) or \
+ (kind == "auto" and (q in d["brand_name"].lower() or q in d["generic_name"].lower()
+ or q in d["yj_code"].lower() or q in d["yj_full"].lower())):
+ results.append({
+ "yj_full": d["yj_full"],
+ "yj_code": d["yj_code"],
+ "brand": d["brand_name"],
+ "generic": d["generic_name"],
+ "category": f"{d['category_code']} {d['category_name']}",
+ "score": 1.0,
+ })
+ return _dump(results[:limit])
+
+
+def _tool_list_categories() -> str:
+ return _dump(MOCK_CATEGORIES)
+
+
+def _tool_list_drugs_in_category(l2_code: str, limit_generics: int = 50) -> str:
+ results = []
+ seen_generics = set()
+ for code, d in MOCK_DRUG_MASTER.items():
+ if d["category_code"].startswith(l2_code) and d["generic_name"] not in seen_generics:
+ seen_generics.add(d["generic_name"])
+ results.append({
+ "generic_name": d["generic_name"],
+ "brands": [{"yj_code": d["yj_code"], "brand_name": d["brand_name"], "yj_full": d["yj_full"]}],
+ })
+ return _dump(results[:limit_generics])
+
+
+def _tool_get_drug_master(yj_code: str) -> str:
+ d = MOCK_DRUG_MASTER.get(yj_code)
+ if not d:
+ return _dump({"error": f"yj_code {yj_code} not found"})
+ result = dict(d)
+ result["_citation"] = f"[出典: {d['brand_name']} (yj_full={d['yj_full']}) / 添付文書冒頭]"
+ return _dump(result)
+
+
+def _tool_get_drug_interactions(
+ drug_a_yj: Optional[str] = None,
+ drug_b_yj: Optional[str] = None,
+ severity: Optional[str] = None,
+ keyword: Optional[str] = None,
+ limit: int = 30,
+) -> str:
+ results = []
+ for r in MOCK_INTERACTIONS:
+ if drug_a_yj and r["drug_a_yj"] != drug_a_yj:
+ continue
+ if drug_b_yj and r["drug_b_yj"] != drug_b_yj:
+ continue
+ if severity and r["severity"] != severity:
+ continue
+ if keyword and keyword.lower() not in (
+ (r.get("drug_b_class") or "").lower()
+ + (r.get("mechanism") or "").lower()
+ + (r.get("clinical_effect") or "").lower()
+ ):
+ continue
+ results.append({**r, "_citation": _citation(r["source_drug_yj"], r["source_section"])})
+ return _dump(results[:limit])
+
+
+def _tool_get_drug_restrictions(
+ drug_yj: Optional[str] = None,
+ condition_type: Optional[str] = None,
+ severity: Optional[str] = None,
+ keyword: Optional[str] = None,
+ limit: int = 30,
+) -> str:
+ results = []
+ for r in MOCK_RESTRICTIONS:
+ if drug_yj and r["drug_yj"] != drug_yj:
+ continue
+ if condition_type and r["condition_type"] != condition_type:
+ continue
+ if severity and r["severity"] != severity:
+ continue
+ if keyword and keyword.lower() not in (r.get("condition_text") or "").lower():
+ continue
+ results.append({**r, "_citation": _citation(r["drug_yj"], r["source_section"])})
+ return _dump(results[:limit])
+
+
+def _tool_get_drug_dosing(
+ drug_yj: str,
+ patient_segment: Optional[str] = None,
+ limit: int = 20,
+) -> str:
+ results = []
+ for r in MOCK_DOSING:
+ if r["drug_yj"] != drug_yj:
+ continue
+ if patient_segment and r["patient_segment"] != patient_segment:
+ continue
+ results.append({**r, "_citation": _citation(drug_yj, r["source_section"])})
+ return _dump(results[:limit])
+
+
+def _tool_search_section_text(
+ keyword: str,
+ section_filter: str = "",
+ limit: int = 30,
+) -> str:
+ if not keyword.strip():
+ return _dump({"keyword": keyword, "total_drugs": 0, "shown": 0, "hits": []})
+
+ # Simple mock: search through section text
+ hits_out = []
+ for (yj_full, section_title), text in MOCK_SECTION_TEXT.items():
+ if section_filter and section_filter not in section_title:
+ continue
+ if keyword.lower() in text.lower():
+ drug = None
+ for d in MOCK_DRUG_MASTER.values():
+ if d["yj_full"] == yj_full:
+ drug = d
+ break
+ if not drug:
+ continue
+ brand = drug["brand_name"]
+ # Deduplicate by yj_full
+ existing = [h for h in hits_out if h["yj_full"] == yj_full]
+ if existing:
+ existing[0]["matches"].append({
+ "section_title": section_title,
+ "snippet": text[:160],
+ })
+ continue
+ hits_out.append({
+ "yj_full": yj_full,
+ "brand": brand,
+ "generic": drug["generic_name"],
+ "l2": f"{drug['category_code']} {drug['category_name']}",
+ "matches": [{"section_title": section_title, "snippet": text[:160]}],
+ "_citation_template": f"[出典: {brand} (yj_full={yj_full}) / <該当章>]",
+ })
+
+ return _dump({
+ "keyword": keyword,
+ "section_filter": section_filter or None,
+ "total_drugs": len({h["yj_full"] for h in hits_out}),
+ "shown": len(hits_out),
+ "hits": hits_out[:limit],
+ })
+
+
+def _tool_list_drug_chapters(yj_full: str) -> str:
+ sections = MOCK_CHAPTERS.get(yj_full)
+ if not sections:
+ return _dump({"error": f"yj_full {yj_full} の章節が見つかりません。"})
+
+ drug = None
+ for d in MOCK_DRUG_MASTER.values():
+ if d["yj_full"] == yj_full:
+ drug = d
+ break
+
+ return _dump({
+ "yj_full": yj_full,
+ "brand": drug["brand_name"] if drug else "",
+ "generic": drug["generic_name"] if drug else "",
+ "n_sections": len(sections),
+ "sections": sections,
+ })
+
+
+def _tool_read_drug_chapter(yj_full: str, section_title: str) -> str:
+ text = MOCK_SECTION_TEXT.get((yj_full, section_title))
+ if text:
+ return text[:8000]
+ return _dump({
+ "error": f"section_title {section_title!r} は {yj_full} に存在しません。",
+ "hint": "list_drug_chapters で取得した sections[].section_title をそのまま渡してください。",
+ })
+
+
+# ---------------------------------------------------------------------------
+# MCP request handler
+# ---------------------------------------------------------------------------
+
+_TOOL_DISPATCH = {
+ "search_drugs": lambda args: _tool_search_drugs(
+ query=args.get("query", ""),
+ kind=args.get("kind", "auto"),
+ limit=args.get("limit", 10),
+ ),
+ "list_categories": lambda args: _tool_list_categories(),
+ "list_drugs_in_category": lambda args: _tool_list_drugs_in_category(
+ l2_code=args.get("l2_code", ""),
+ limit_generics=args.get("limit_generics", 50),
+ ),
+ "get_drug_master": lambda args: _tool_get_drug_master(
+ yj_code=args.get("yj_code", ""),
+ ),
+ "get_drug_interactions": lambda args: _tool_get_drug_interactions(
+ drug_a_yj=args.get("drug_a_yj"),
+ drug_b_yj=args.get("drug_b_yj"),
+ severity=args.get("severity"),
+ keyword=args.get("keyword"),
+ limit=args.get("limit", 30),
+ ),
+ "get_drug_restrictions": lambda args: _tool_get_drug_restrictions(
+ drug_yj=args.get("drug_yj"),
+ condition_type=args.get("condition_type"),
+ severity=args.get("severity"),
+ keyword=args.get("keyword"),
+ limit=args.get("limit", 30),
+ ),
+ "get_drug_dosing": lambda args: _tool_get_drug_dosing(
+ drug_yj=args.get("drug_yj", ""),
+ patient_segment=args.get("patient_segment"),
+ limit=args.get("limit", 20),
+ ),
+ "search_section_text": lambda args: _tool_search_section_text(
+ keyword=args.get("keyword", ""),
+ section_filter=args.get("section_filter", ""),
+ limit=args.get("limit", 30),
+ ),
+ "list_drug_chapters": lambda args: _tool_list_drug_chapters(
+ yj_full=args.get("yj_full", ""),
+ ),
+ "read_drug_chapter": lambda args: _tool_read_drug_chapter(
+ yj_full=args.get("yj_full", ""),
+ section_title=args.get("section_title", ""),
+ ),
+}
+
+
+async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]:
+ """Handle an MCP request."""
+ try:
+ method = request.get("method")
+ params = request.get("params", {})
+ request_id = request.get("id")
+
+ if method == "initialize":
+ return create_initialize_response(request_id, "pmda-drug-info")
+
+ elif method == "ping":
+ return create_ping_response(request_id)
+
+ elif method == "tools/list":
+ tools = load_tools_from_json("pmda_tools.json")
+ return create_tools_list_response(request_id, tools)
+
+ elif method == "tools/call":
+ tool_name = params.get("name")
+ arguments = params.get("arguments", {})
+
+ if tool_name not in _TOOL_DISPATCH:
+ return create_error_response(request_id, -32601, f"Unknown tool: {tool_name}")
+
+ try:
+ result_text = _TOOL_DISPATCH[tool_name](arguments)
+ return {
+ "jsonrpc": "2.0",
+ "id": request_id,
+ "result": {
+ "content": [{"type": "text", "text": result_text}]
+ },
+ }
+ except Exception as e:
+ return {
+ "jsonrpc": "2.0",
+ "id": request_id,
+ "result": {
+ "content": [{"type": "text", "text": f"Error: {str(e)}"}]
+ },
+ }
+
+ else:
+ return create_error_response(request_id, -32601, f"Unknown method: {method}")
+
+ except Exception as e:
+ return create_error_response(request.get("id"), -32603, f"Internal error: {str(e)}")
+
+
+async def main():
+ await handle_mcp_streaming(handle_request)
+
+
+if __name__ == "__main__":
+ asyncio.run(main())
diff --git a/skills/developing/pmda-drug-info/pmda_tools.json b/skills/developing/pmda-drug-info/pmda_tools.json
new file mode 100644
index 0000000..75177a6
--- /dev/null
+++ b/skills/developing/pmda-drug-info/pmda_tools.json
@@ -0,0 +1,207 @@
+[
+ {
+ "name": "search_drugs",
+ "description": "Search drugs by brand name, generic name, or YJ code. Returns list of matching drugs with yj_code, brand name, generic name, and category.",
+ "inputSchema": {
+ "type": "object",
+ "properties": {
+ "query": {
+ "type": "string",
+ "description": "Search query: drug brand name, generic name, or YJ code."
+ },
+ "kind": {
+ "type": "string",
+ "enum": ["auto", "brand", "generic", "yj"],
+ "description": "Search type. 'auto' searches all fields.",
+ "default": "auto"
+ },
+ "limit": {
+ "type": "integer",
+ "description": "Maximum number of results.",
+ "default": 10
+ }
+ },
+ "required": ["query"]
+ }
+ },
+ {
+ "name": "list_categories",
+ "description": "List all L1/L2 drug categories (pharmacological classification) with drug counts per category.",
+ "inputSchema": {
+ "type": "object",
+ "properties": {}
+ }
+ },
+ {
+ "name": "list_drugs_in_category",
+ "description": "List all drugs (generic → brand names) under a specific L2 pharmacological category code.",
+ "inputSchema": {
+ "type": "object",
+ "properties": {
+ "l2_code": {
+ "type": "string",
+ "description": "3-digit L2 category code."
+ },
+ "limit_generics": {
+ "type": "integer",
+ "description": "Maximum number of generic names to return.",
+ "default": 50
+ }
+ },
+ "required": ["l2_code"]
+ }
+ },
+ {
+ "name": "get_drug_master",
+ "description": "Get basic information for a drug by yj_code: brand name, generic name, pharmacological category, regulatory classification, manufacturer, revision date.",
+ "inputSchema": {
+ "type": "object",
+ "properties": {
+ "yj_code": {
+ "type": "string",
+ "description": "12-character YJ code."
+ }
+ },
+ "required": ["yj_code"]
+ }
+ },
+ {
+ "name": "get_drug_interactions",
+ "description": "Search drug interactions. With drug_a only: all interactions for that drug. With both drug_a and drug_b: bidirectional interaction between A and B. Filter by severity (併用禁忌/併用注意) or keyword.",
+ "inputSchema": {
+ "type": "object",
+ "properties": {
+ "drug_a_yj": {
+ "type": "string",
+ "description": "YJ code for drug A."
+ },
+ "drug_b_yj": {
+ "type": "string",
+ "description": "YJ code for drug B (optional, for pairwise lookup)."
+ },
+ "severity": {
+ "type": "string",
+ "description": "Filter by severity: '併用禁忌' or '併用注意'."
+ },
+ "keyword": {
+ "type": "string",
+ "description": "Search keyword in drug_b_class, mechanism, or clinical_effect."
+ },
+ "limit": {
+ "type": "integer",
+ "description": "Maximum number of results.",
+ "default": 30
+ }
+ }
+ }
+ },
+ {
+ "name": "get_drug_restrictions",
+ "description": "Search drug restrictions (contraindications, precautions) by patient condition. condition_type options: 疾患, 腎機能障害, 肝機能障害, 生殖能, 妊婦, 授乳婦, 小児等, 高齢者, 過敏症, 遺伝子多型, その他. severity options: 禁忌, 原則禁忌, 慎重投与.",
+ "inputSchema": {
+ "type": "object",
+ "properties": {
+ "drug_yj": {
+ "type": "string",
+ "description": "YJ code for the drug."
+ },
+ "condition_type": {
+ "type": "string",
+ "description": "Patient condition type to filter by."
+ },
+ "severity": {
+ "type": "string",
+ "description": "Filter by severity: 禁忌, 原則禁忌, or 慎重投与."
+ },
+ "keyword": {
+ "type": "string",
+ "description": "Search keyword in condition_text."
+ },
+ "limit": {
+ "type": "integer",
+ "description": "Maximum number of results.",
+ "default": 30
+ }
+ }
+ }
+ },
+ {
+ "name": "get_drug_dosing",
+ "description": "Get dosing information for a drug, optionally filtered by patient segment. patient_segment options: 成人, 小児等, 高齢者, 腎機能障害患者, 肝機能障害患者, 透析患者, 妊婦.",
+ "inputSchema": {
+ "type": "object",
+ "properties": {
+ "drug_yj": {
+ "type": "string",
+ "description": "YJ code for the drug."
+ },
+ "patient_segment": {
+ "type": "string",
+ "description": "Patient segment to filter by (e.g., 成人, 高齢者, 腎機能障害患者)."
+ },
+ "limit": {
+ "type": "integer",
+ "description": "Maximum number of results.",
+ "default": 20
+ }
+ },
+ "required": ["drug_yj"]
+ }
+ },
+ {
+ "name": "search_section_text",
+ "description": "Full-text search in drug package insert sections. Returns matching sections with snippets. Use section_filter to narrow by chapter title (e.g., '副作用', '禁忌', '妊婦', '相互作用').",
+ "inputSchema": {
+ "type": "object",
+ "properties": {
+ "keyword": {
+ "type": "string",
+ "description": "Search keyword."
+ },
+ "section_filter": {
+ "type": "string",
+ "description": "Filter by section title substring (e.g., '副作用', '禁忌', '妊婦').",
+ "default": ""
+ },
+ "limit": {
+ "type": "integer",
+ "description": "Maximum number of results.",
+ "default": 30
+ }
+ },
+ "required": ["keyword"]
+ }
+ },
+ {
+ "name": "list_drug_chapters",
+ "description": "List all chapter titles for a drug's package insert. Use yj_full (full YJ code with revision suffix). Returns section titles with line numbers.",
+ "inputSchema": {
+ "type": "object",
+ "properties": {
+ "yj_full": {
+ "type": "string",
+ "description": "Full YJ code (with revision suffix, e.g., 3399007H1021_1_21)."
+ }
+ },
+ "required": ["yj_full"]
+ }
+ },
+ {
+ "name": "read_drug_chapter",
+ "description": "Read the verbatim text of a specific chapter from a drug's package insert. section_title must match exactly from list_drug_chapters output.",
+ "inputSchema": {
+ "type": "object",
+ "properties": {
+ "yj_full": {
+ "type": "string",
+ "description": "Full YJ code."
+ },
+ "section_title": {
+ "type": "string",
+ "description": "Exact section title from list_drug_chapters (e.g., '9.2 腎機能障害患者', '11.1 重大な副作用')."
+ }
+ },
+ "required": ["yj_full", "section_title"]
+ }
+ }
+]
diff --git a/skills/developing/rag-retrieve-no-citation/hooks/retrieval-policy-forbidden-self-knowledge.md b/skills/developing/rag-retrieve-no-citation/hooks/retrieval-policy-forbidden-self-knowledge.md
index 2e4d133..542cdd7 100644
--- a/skills/developing/rag-retrieve-no-citation/hooks/retrieval-policy-forbidden-self-knowledge.md
+++ b/skills/developing/rag-retrieve-no-citation/hooks/retrieval-policy-forbidden-self-knowledge.md
@@ -37,8 +37,15 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi
1. **Skill-enabled retrieval tools** (use first when available)
2. **`rag_retrieve`**
-- After each step, evaluate sufficiency before proceeding.
- Retrieval must happen **before** any factual answer generation.
+- After each step, evaluate sufficiency before proceeding.
+
+### First-Call Success Principle
+
+- The first retrieval call is expected to return sufficient results for most questions.
+- Your default assumption should be: **one call is enough**.
+- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic).
+- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient.
## 4. Query Preparation
@@ -48,25 +55,51 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi
## 5. Retrieval Breadth (`top_k`)
-- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient.
-- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
-- Expansion order: `30 → 50 → 100`. If unsure, use `100`.
+- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success.
+- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation.
+- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
+- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient.
## 6. Result Evaluation
-Treat as insufficient if: empty, `Error:`, off-topic, missing core entity/scope, no usable evidence, partial coverage, truncated results, or claims required by the answer are not explicitly supported.
+**Maximum 3 retrieval calls per question.** After each call, evaluate immediately:
+
+### Sufficient — answer immediately, no more calls
+
+ANY of the following means results are sufficient — STOP and answer now:
+- The core entity/topic in the user's question appears in the results.
+- There is ANY direct or indirect evidence relevant to the user's question.
+- Results are partially relevant, even if not perfectly comprehensive.
+- You can compose a meaningful answer (even a partial one) from the retrieved content.
+
+**Anti-patterns — do NOT do these:**
+- ❌ "The results are good, but maybe different keywords could find something better."
+- ❌ "I have enough to answer, but let me try one more query to be thorough."
+- ❌ "The answer is here, but I want to double-check with a different query."
+- ❌ Calling retrieval again after you have already identified the answer in previous results.
+
+**If you can answer the question with current results, you MUST answer immediately. Period.**
+
+### Insufficient — the ONLY valid reasons to retry
+
+- Results are completely empty or contain only `Error:` messages.
+- ALL results are entirely off-topic with zero relevance to the user's question.
+- No usable evidence exists at all — you cannot form even a partial answer.
+
+**"Results are not detailed enough" is NOT a valid reason to retry.**
+**"Results might be incomplete" is NOT a valid reason to retry.**
## 7. Fallback and Sequential Retry
-On insufficient results, follow this sequence:
+On insufficient results, you may retry **up to 2 more times** (3 calls total):
-1. Rewrite query, retry same tool (once)
-2. Switch to next retrieval source in default order
-3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100`
+1. Rewrite query, retry same tool.
+2. For `rag_retrieve`, escalate `top_k` to `100` on retry.
-- Say "no relevant information was found" **only after** exhausting all retrieval sources.
+- Say "no relevant information was found" **only after** exhausting all retries.
- Do NOT switch to local filesystem inspection at any point.
- Do NOT switch to model self-knowledge at any point.
+- Do NOT call any retrieval tool more than 3 times in total.
## 8. Handling Missing or Partial Evidence
@@ -84,7 +117,6 @@ On insufficient results, follow this sequence:
- Each sentence or key point in the response should be accompanied by relevant images when they meet the established association criteria.
- Avoid placing all images at the end of the response.
-
## 10. Self-Knowledge Prohibition
This section applies whenever self-knowledge is disabled or forbidden for the current task.
@@ -103,9 +135,11 @@ This section applies whenever self-knowledge is disabled or forbidden for the cu
Before replying to a knowledge retrieval task, verify:
- Used only whitelisted retrieval tools — no local filesystem inspection?
+- Called retrieval at most 3 times total (not more)?
+- Answered immediately when results were sufficient (did NOT call again unnecessarily)?
+- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)?
- Did retrieval happen before any factual answer drafting?
- Did every factual claim come from retrieved evidence rather than model knowledge?
-- Exhausted retrieval flow before concluding "not found"?
- If any unsupported part remained, was it removed or explicitly marked unavailable?
If any answer is "no", correct the process first.
diff --git a/skills/developing/rag-retrieve-no-citation/hooks/retrieval-policy.md b/skills/developing/rag-retrieve-no-citation/hooks/retrieval-policy.md
index ce14d2b..8dd6843 100644
--- a/skills/developing/rag-retrieve-no-citation/hooks/retrieval-policy.md
+++ b/skills/developing/rag-retrieve-no-citation/hooks/retrieval-policy.md
@@ -27,6 +27,13 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi
- Do NOT answer from model knowledge first.
- After each step, evaluate sufficiency before proceeding.
+### First-Call Success Principle
+
+- The first retrieval call is expected to return sufficient results for most questions.
+- Your default assumption should be: **one call is enough**.
+- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic).
+- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient.
+
## 3. Query Preparation
- Do NOT pass raw user question unless it already works well for retrieval.
@@ -35,24 +42,50 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi
## 4. Retrieval Breadth (`top_k`)
-- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient.
-- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
-- Expansion order: `30 → 50 → 100`. If unsure, use `100`.
+- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success.
+- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation.
+- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
+- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient.
## 5. Result Evaluation
-Treat as insufficient if: empty, `Error:`, off-topic, missing core entity/scope, no usable evidence, partial coverage, or truncated results.
+**Maximum 3 retrieval calls per question.** After each call, evaluate immediately:
+
+### Sufficient — answer immediately, no more calls
+
+ANY of the following means results are sufficient — STOP and answer now:
+- The core entity/topic in the user's question appears in the results.
+- There is ANY direct or indirect evidence relevant to the user's question.
+- Results are partially relevant, even if not perfectly comprehensive.
+- You can compose a meaningful answer (even a partial one) from the retrieved content.
+
+**Anti-patterns — do NOT do these:**
+- ❌ "The results are good, but maybe different keywords could find something better."
+- ❌ "I have enough to answer, but let me try one more query to be thorough."
+- ❌ "The answer is here, but I want to double-check with a different query."
+- ❌ Calling retrieval again after you have already identified the answer in previous results.
+
+**If you can answer the question with current results, you MUST answer immediately. Period.**
+
+### Insufficient — the ONLY valid reasons to retry
+
+- Results are completely empty or contain only `Error:` messages.
+- ALL results are entirely off-topic with zero relevance to the user's question.
+- No usable evidence exists at all — you cannot form even a partial answer.
+
+**"Results are not detailed enough" is NOT a valid reason to retry.**
+**"Results might be incomplete" is NOT a valid reason to retry.**
## 6. Fallback and Sequential Retry
-On insufficient results, follow this sequence:
+On insufficient results, you may retry **up to 2 more times** (3 calls total):
-1. Rewrite query, retry same tool (once)
-2. Switch to next retrieval source in default order
-3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100`
+1. Rewrite query, retry same tool.
+2. For `rag_retrieve`, escalate `top_k` to `100` on retry.
-- Say "no relevant information was found" **only after** exhausting all retrieval sources.
+- Say "no relevant information was found" **only after** exhausting all retries.
- Do NOT switch to local filesystem inspection at any point.
+- Do NOT call any retrieval tool more than 3 times in total.
## 7. Image Handling
@@ -81,7 +114,9 @@ This section applies only when self-knowledge is enabled.
Before replying to a knowledge retrieval task, verify:
- Used only whitelisted retrieval tools — no local filesystem inspection?
-- Exhausted retrieval flow before concluding "not found"?
+- Called retrieval at most 3 times total (not more)?
+- Answered immediately when results were sufficient (did NOT call again unnecessarily)?
+- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)?
- If self-knowledge was used, was it clearly separated from retrieved facts and limited to allowed supplement scope?
If any answer is "no", correct the process first.
diff --git a/skills/onprem/rag-retrieve-only/hooks/retrieval-policy-forbidden-self-knowledge.md b/skills/onprem/rag-retrieve-only/hooks/retrieval-policy-forbidden-self-knowledge.md
index 2b68869..7ea5e4b 100644
--- a/skills/onprem/rag-retrieve-only/hooks/retrieval-policy-forbidden-self-knowledge.md
+++ b/skills/onprem/rag-retrieve-only/hooks/retrieval-policy-forbidden-self-knowledge.md
@@ -40,6 +40,13 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi
- After each step, evaluate sufficiency before proceeding.
- Retrieval must happen **before** any factual answer generation.
+### First-Call Success Principle
+
+- The first retrieval call is expected to return sufficient results for most questions.
+- Your default assumption should be: **one call is enough**.
+- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic).
+- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient.
+
## 4. Query Preparation
- Do NOT pass raw user question unless it already works well for retrieval.
@@ -48,25 +55,51 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi
## 5. Retrieval Breadth (`top_k`)
-- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient.
-- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
-- Expansion order: `30 → 50 → 100`. If unsure, use `100`.
+- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success.
+- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation.
+- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
+- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient.
## 6. Result Evaluation
-Treat as insufficient if: empty, `Error:`, off-topic, missing core entity/scope, no usable evidence, partial coverage, truncated results, or claims required by the answer are not explicitly supported.
+**Maximum 3 retrieval calls per question.** After each call, evaluate immediately:
+
+### Sufficient — answer immediately, no more calls
+
+ANY of the following means results are sufficient — STOP and answer now:
+- The core entity/topic in the user's question appears in the results.
+- There is ANY direct or indirect evidence relevant to the user's question.
+- Results are partially relevant, even if not perfectly comprehensive.
+- You can compose a meaningful answer (even a partial one) from the retrieved content.
+
+**Anti-patterns — do NOT do these:**
+- ❌ "The results are good, but maybe different keywords could find something better."
+- ❌ "I have enough to answer, but let me try one more query to be thorough."
+- ❌ "The answer is here, but I want to double-check with a different query."
+- ❌ Calling retrieval again after you have already identified the answer in previous results.
+
+**If you can answer the question with current results, you MUST answer immediately. Period.**
+
+### Insufficient — the ONLY valid reasons to retry
+
+- Results are completely empty or contain only `Error:` messages.
+- ALL results are entirely off-topic with zero relevance to the user's question.
+- No usable evidence exists at all — you cannot form even a partial answer.
+
+**"Results are not detailed enough" is NOT a valid reason to retry.**
+**"Results might be incomplete" is NOT a valid reason to retry.**
## 7. Fallback and Sequential Retry
-On insufficient results, follow this sequence:
+On insufficient results, you may retry **up to 2 more times** (3 calls total):
-1. Rewrite query, retry same tool (once)
-2. Switch to next retrieval source in default order
-3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100`
+1. Rewrite query, retry same tool.
+2. For `rag_retrieve`, escalate `top_k` to `100` on retry.
-- Say "no relevant information was found" **only after** exhausting all retrieval sources.
+- Say "no relevant information was found" **only after** exhausting all retries.
- Do NOT switch to local filesystem inspection at any point.
- Do NOT switch to model self-knowledge at any point.
+- Do NOT call any retrieval tool more than 3 times in total.
## 8. Handling Missing or Partial Evidence
@@ -84,14 +117,7 @@ On insufficient results, follow this sequence:
- Each sentence or key point in the response should be accompanied by relevant images when they meet the established association criteria.
- Avoid placing all images at the end of the response.
-## 10. Citation Requirements
-
-- MUST generate `` tags when using retrieval results.
-- Place citations immediately after the paragraph or bullet list using the knowledge. Do NOT collect at end.
-- 1-2 citations per paragraph/bullet. At least 1 citation when using retrieved knowledge.
-- Do NOT cite claims that were not supported by retrieval.
-
-## 11. Self-Knowledge Prohibition
+## 10. Self-Knowledge Prohibition
This section applies whenever self-knowledge is disabled or forbidden for the current task.
@@ -101,19 +127,19 @@ This section applies whenever self-knowledge is disabled or forbidden for the cu
- The model must not supplement missing parts with general knowledge, conceptual explanation, common background, intuition, or likely completion.
- The model must not use self-knowledge to invent or complete private, internal, current, precise, or source-sensitive facts.
- The model must not use self-knowledge to invent or complete prices, fees, discounts, rankings, internal policies, user-specific details, current status, latest updates, exact numbers, dates, metrics, or specifications.
-- Retrieved facts must include citations.
- Unsupported parts must be stated as unavailable rather than guessed.
- If a paragraph would mix retrieved facts and unsupported completion, remove the unsupported completion.
- If evidence is incomplete, state the limitation explicitly.
-## 12. Pre-Reply Self-Check
+## 11. Pre-Reply Self-Check
Before replying to a knowledge retrieval task, verify:
- Used only whitelisted retrieval tools — no local filesystem inspection?
+- Called retrieval at most 3 times total (not more)?
+- Answered immediately when results were sufficient (did NOT call again unnecessarily)?
+- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)?
- Did retrieval happen before any factual answer drafting?
- Did every factual claim come from retrieved evidence rather than model knowledge?
-- Exhausted retrieval flow before concluding "not found"?
-- Citations placed immediately after each relevant paragraph?
- If any unsupported part remained, was it removed or explicitly marked unavailable?
If any answer is "no", correct the process first.
diff --git a/skills/onprem/rag-retrieve-only/hooks/retrieval-policy.md b/skills/onprem/rag-retrieve-only/hooks/retrieval-policy.md
index 75195c8..7c69ded 100644
--- a/skills/onprem/rag-retrieve-only/hooks/retrieval-policy.md
+++ b/skills/onprem/rag-retrieve-only/hooks/retrieval-policy.md
@@ -27,6 +27,13 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi
- Do NOT answer from model knowledge first.
- After each step, evaluate sufficiency before proceeding.
+### First-Call Success Principle
+
+- The first retrieval call is expected to return sufficient results for most questions.
+- Your default assumption should be: **one call is enough**.
+- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic).
+- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient.
+
## 3. Query Preparation
- Do NOT pass raw user question unless it already works well for retrieval.
@@ -35,24 +42,51 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi
## 4. Retrieval Breadth (`top_k`)
-- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient.
-- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
-- Expansion order: `30 → 50 → 100`. If unsure, use `100`.
+- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success.
+- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation.
+- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
+- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient.
## 5. Result Evaluation
-Treat as insufficient if: empty, `Error:`, off-topic, missing core entity/scope, no usable evidence, partial coverage, or truncated results.
+**Maximum 3 retrieval calls per question.** After each call, evaluate immediately:
+
+### Sufficient — answer immediately, no more calls
+
+ANY of the following means results are sufficient — STOP and answer now:
+- The core entity/topic in the user's question appears in the results.
+- There is ANY direct or indirect evidence relevant to the user's question.
+- Results are partially relevant, even if not perfectly comprehensive.
+- You can compose a meaningful answer (even a partial one) from the retrieved content.
+
+**Anti-patterns — do NOT do these:**
+- ❌ "The results are good, but maybe different keywords could find something better."
+- ❌ "I have enough to answer, but let me try one more query to be thorough."
+- ❌ "The answer is here, but I want to double-check with a different query."
+- ❌ Calling retrieval again after you have already identified the answer in previous results.
+
+**If you can answer the question with current results, you MUST answer immediately. Period.**
+
+### Insufficient — the ONLY valid reasons to retry
+
+- Results are completely empty or contain only `Error:` messages.
+- ALL results are entirely off-topic with zero relevance to the user's question.
+- No usable evidence exists at all — you cannot form even a partial answer.
+
+**"Results are not detailed enough" is NOT a valid reason to retry.**
+**"Results might be incomplete" is NOT a valid reason to retry.**
## 6. Fallback and Sequential Retry
-On insufficient results, follow this sequence:
+On insufficient results, you may retry **up to 2 more times** (3 calls total):
-1. Rewrite query, retry same tool (once)
-2. Switch to next retrieval source in default order
-3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100`
+1. Rewrite query, retry same tool.
+2. Switch to next retrieval source in default order.
+3. For `rag_retrieve`, escalate `top_k` to `100` on retry.
- Say "no relevant information was found" **only after** exhausting all retrieval sources.
- Do NOT switch to local filesystem inspection at any point.
+- Do NOT call any retrieval tool more than 3 times in total.
## 7. Image Handling
@@ -89,7 +123,9 @@ This section applies only when self-knowledge is enabled.
Before replying to a knowledge retrieval task, verify:
- Used only whitelisted retrieval tools — no local filesystem inspection?
-- Exhausted retrieval flow before concluding "not found"?
+- Called retrieval at most 3 times total (not more)?
+- Answered immediately when results were sufficient (did NOT call again unnecessarily)?
+- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)?
- Citations placed immediately after each relevant paragraph?
- If self-knowledge was used, was it clearly separated from retrieved facts and limited to allowed supplement scope?
diff --git a/skills/support/rag-retrieve-only/hooks/retrieval-policy-forbidden-self-knowledge.md b/skills/support/rag-retrieve-only/hooks/retrieval-policy-forbidden-self-knowledge.md
index 2b68869..061c855 100644
--- a/skills/support/rag-retrieve-only/hooks/retrieval-policy-forbidden-self-knowledge.md
+++ b/skills/support/rag-retrieve-only/hooks/retrieval-policy-forbidden-self-knowledge.md
@@ -14,7 +14,7 @@ For knowledge retrieval tasks, **this policy overrides generic codebase explorat
- **Prohibited answer source**: the model's own parametric knowledge, memory, prior world knowledge, intuition, common sense completion, or unsupported inference.
- **Prohibited tools**: `Glob`, `Read`, `LS`, Bash (`ls`, `find`, `cat`, `head`, `tail`, `grep`, etc.) — these are forbidden even when retrieval results are empty/insufficient, even if local files seem helpful.
-- **Allowed tools only**: skill-enabled retrieval tools, `rag_retrieve`. No other source for factual answering.
+- **Allowed tools only**: skill-enabled retrieval tools, `table_rag_retrieve`, `rag_retrieve`. No other source for factual answering.
- Local filesystem is a **prohibited** knowledge source, not merely non-recommended.
- Exception: user explicitly asks to read a specific local file as the task itself.
- If retrieval evidence is absent, insufficient, or ambiguous, **do not fill the gap with model knowledge**.
@@ -35,11 +35,20 @@ For any knowledge retrieval task:
Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe filesystem first.
1. **Skill-enabled retrieval tools** (use first when available)
-2. **`rag_retrieve`**
+2. **`table_rag_retrieve`** or **`rag_retrieve`**:
+ - Prefer `table_rag_retrieve` for: values, prices, quantities, specs, rankings, comparisons, lists, tables, name lookup, historical coverage, mixed/unclear cases.
+ - Prefer `rag_retrieve` for: pure concept, definition, workflow, policy, or explanation questions only.
- After each step, evaluate sufficiency before proceeding.
- Retrieval must happen **before** any factual answer generation.
+### First-Call Success Principle
+
+- The first retrieval call is expected to return sufficient results for most questions.
+- Your default assumption should be: **one call is enough**.
+- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic).
+- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient.
+
## 4. Query Preparation
- Do NOT pass raw user question unless it already works well for retrieval.
@@ -48,25 +57,54 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi
## 5. Retrieval Breadth (`top_k`)
-- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient.
-- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
-- Expansion order: `30 → 50 → 100`. If unsure, use `100`.
+- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success.
+- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation.
+- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
+- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient.
## 6. Result Evaluation
-Treat as insufficient if: empty, `Error:`, off-topic, missing core entity/scope, no usable evidence, partial coverage, truncated results, or claims required by the answer are not explicitly supported.
+**Maximum 3 retrieval calls per question.** After each call, evaluate immediately:
+
+### Sufficient — answer immediately, no more calls
+
+ANY of the following means results are sufficient — STOP and answer now:
+- The core entity/topic in the user's question appears in the results.
+- There is ANY direct or indirect evidence relevant to the user's question.
+- Results are partially relevant, even if not perfectly comprehensive.
+- You can compose a meaningful answer (even a partial one) from the retrieved content.
+
+**Anti-patterns — do NOT do these:**
+- ❌ "The results are good, but maybe different keywords could find something better."
+- ❌ "I have enough to answer, but let me try one more query to be thorough."
+- ❌ "The answer is here, but I want to double-check with a different query."
+- ❌ Calling retrieval again after you have already identified the answer in previous results.
+
+**If you can answer the question with current results, you MUST answer immediately. Period.**
+
+### Insufficient — the ONLY valid reasons to retry
+
+- Results are completely empty or contain only `Error:` / `no excel files found` messages.
+- ALL results are entirely off-topic with zero relevance to the user's question.
+- No usable evidence exists at all — you cannot form even a partial answer.
+
+**"Results are not detailed enough" is NOT a valid reason to retry.**
+**"Results might be incomplete" is NOT a valid reason to retry.**
## 7. Fallback and Sequential Retry
-On insufficient results, follow this sequence:
+On insufficient results, you may retry **up to 2 more times** (3 calls total):
-1. Rewrite query, retry same tool (once)
-2. Switch to next retrieval source in default order
-3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100`
+1. Rewrite query, retry same tool.
+2. Switch to next retrieval source in default order.
+3. For `rag_retrieve`, escalate `top_k` to `100` on retry.
+4. `table_rag_retrieve` insufficient → try `rag_retrieve`; `rag_retrieve` insufficient → try `table_rag_retrieve`.
-- Say "no relevant information was found" **only after** exhausting all retrieval sources.
+- `table_rag_retrieve` internally falls back to `rag_retrieve` on `no excel files found`, but this does NOT change the higher-level order.
+- Say "no relevant information was found" **only after** exhausting all retries.
- Do NOT switch to local filesystem inspection at any point.
- Do NOT switch to model self-knowledge at any point.
+- Do NOT call any retrieval tool more than 3 times in total.
## 8. Handling Missing or Partial Evidence
@@ -75,7 +113,12 @@ On insufficient results, follow this sequence:
- Prefer "the retrieved materials do not provide this information" over speculative completion.
- When user asks for a definitive answer but evidence is incomplete, state the limitation directly.
-## 9. Image Handling
+## 9. Table RAG Result Handling
+
+- Follow all `[INSTRUCTION]` and `[EXTRA_INSTRUCTION]` in results.
+- If truncated: tell user total (`N+M`), displayed (`N`), omitted (`M`).
+
+## 10. Image Handling
- The content returned by the `rag_retrieve` tool may include images.
- Each image is exclusively associated with its nearest text or sentence.
@@ -84,13 +127,6 @@ On insufficient results, follow this sequence:
- Each sentence or key point in the response should be accompanied by relevant images when they meet the established association criteria.
- Avoid placing all images at the end of the response.
-## 10. Citation Requirements
-
-- MUST generate `` tags when using retrieval results.
-- Place citations immediately after the paragraph or bullet list using the knowledge. Do NOT collect at end.
-- 1-2 citations per paragraph/bullet. At least 1 citation when using retrieved knowledge.
-- Do NOT cite claims that were not supported by retrieval.
-
## 11. Self-Knowledge Prohibition
This section applies whenever self-knowledge is disabled or forbidden for the current task.
@@ -101,7 +137,6 @@ This section applies whenever self-knowledge is disabled or forbidden for the cu
- The model must not supplement missing parts with general knowledge, conceptual explanation, common background, intuition, or likely completion.
- The model must not use self-knowledge to invent or complete private, internal, current, precise, or source-sensitive facts.
- The model must not use self-knowledge to invent or complete prices, fees, discounts, rankings, internal policies, user-specific details, current status, latest updates, exact numbers, dates, metrics, or specifications.
-- Retrieved facts must include citations.
- Unsupported parts must be stated as unavailable rather than guessed.
- If a paragraph would mix retrieved facts and unsupported completion, remove the unsupported completion.
- If evidence is incomplete, state the limitation explicitly.
@@ -110,10 +145,11 @@ This section applies whenever self-knowledge is disabled or forbidden for the cu
Before replying to a knowledge retrieval task, verify:
- Used only whitelisted retrieval tools — no local filesystem inspection?
+- Called retrieval at most 3 times total (not more)?
+- Answered immediately when results were sufficient (did NOT call again unnecessarily)?
+- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)?
- Did retrieval happen before any factual answer drafting?
- Did every factual claim come from retrieved evidence rather than model knowledge?
-- Exhausted retrieval flow before concluding "not found"?
-- Citations placed immediately after each relevant paragraph?
- If any unsupported part remained, was it removed or explicitly marked unavailable?
If any answer is "no", correct the process first.
diff --git a/skills/support/rag-retrieve-only/hooks/retrieval-policy.md b/skills/support/rag-retrieve-only/hooks/retrieval-policy.md
index 75195c8..7c69ded 100644
--- a/skills/support/rag-retrieve-only/hooks/retrieval-policy.md
+++ b/skills/support/rag-retrieve-only/hooks/retrieval-policy.md
@@ -27,6 +27,13 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi
- Do NOT answer from model knowledge first.
- After each step, evaluate sufficiency before proceeding.
+### First-Call Success Principle
+
+- The first retrieval call is expected to return sufficient results for most questions.
+- Your default assumption should be: **one call is enough**.
+- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic).
+- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient.
+
## 3. Query Preparation
- Do NOT pass raw user question unless it already works well for retrieval.
@@ -35,24 +42,51 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi
## 4. Retrieval Breadth (`top_k`)
-- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient.
-- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
-- Expansion order: `30 → 50 → 100`. If unsure, use `100`.
+- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success.
+- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation.
+- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
+- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient.
## 5. Result Evaluation
-Treat as insufficient if: empty, `Error:`, off-topic, missing core entity/scope, no usable evidence, partial coverage, or truncated results.
+**Maximum 3 retrieval calls per question.** After each call, evaluate immediately:
+
+### Sufficient — answer immediately, no more calls
+
+ANY of the following means results are sufficient — STOP and answer now:
+- The core entity/topic in the user's question appears in the results.
+- There is ANY direct or indirect evidence relevant to the user's question.
+- Results are partially relevant, even if not perfectly comprehensive.
+- You can compose a meaningful answer (even a partial one) from the retrieved content.
+
+**Anti-patterns — do NOT do these:**
+- ❌ "The results are good, but maybe different keywords could find something better."
+- ❌ "I have enough to answer, but let me try one more query to be thorough."
+- ❌ "The answer is here, but I want to double-check with a different query."
+- ❌ Calling retrieval again after you have already identified the answer in previous results.
+
+**If you can answer the question with current results, you MUST answer immediately. Period.**
+
+### Insufficient — the ONLY valid reasons to retry
+
+- Results are completely empty or contain only `Error:` messages.
+- ALL results are entirely off-topic with zero relevance to the user's question.
+- No usable evidence exists at all — you cannot form even a partial answer.
+
+**"Results are not detailed enough" is NOT a valid reason to retry.**
+**"Results might be incomplete" is NOT a valid reason to retry.**
## 6. Fallback and Sequential Retry
-On insufficient results, follow this sequence:
+On insufficient results, you may retry **up to 2 more times** (3 calls total):
-1. Rewrite query, retry same tool (once)
-2. Switch to next retrieval source in default order
-3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100`
+1. Rewrite query, retry same tool.
+2. Switch to next retrieval source in default order.
+3. For `rag_retrieve`, escalate `top_k` to `100` on retry.
- Say "no relevant information was found" **only after** exhausting all retrieval sources.
- Do NOT switch to local filesystem inspection at any point.
+- Do NOT call any retrieval tool more than 3 times in total.
## 7. Image Handling
@@ -89,7 +123,9 @@ This section applies only when self-knowledge is enabled.
Before replying to a knowledge retrieval task, verify:
- Used only whitelisted retrieval tools — no local filesystem inspection?
-- Exhausted retrieval flow before concluding "not found"?
+- Called retrieval at most 3 times total (not more)?
+- Answered immediately when results were sufficient (did NOT call again unnecessarily)?
+- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)?
- Citations placed immediately after each relevant paragraph?
- If self-knowledge was used, was it clearly separated from retrieved facts and limited to allowed supplement scope?
diff --git a/utils/structured_log.py b/utils/structured_log.py
new file mode 100644
index 0000000..596048f
--- /dev/null
+++ b/utils/structured_log.py
@@ -0,0 +1,69 @@
+import json
+import logging
+import time
+from typing import Any, Optional
+
+logger = logging.getLogger("app")
+
+SCHEMA_VERSION = 1
+
+
+def _normalize_value(value: Any) -> Any:
+ if value is None:
+ return None
+ if isinstance(value, (str, int, float, bool)):
+ return value
+ return str(value)
+
+
+def emit_question_metric(
+ *,
+ stage: str,
+ status: str,
+ duration_ms: Optional[int] = None,
+ first_response_time_ms: Optional[int] = None,
+ trace_id: Optional[str] = None,
+ ai_id: Optional[str] = None,
+ session_id: Optional[str] = None,
+ robot_type: Optional[str] = None,
+ model: Optional[str] = None,
+ stream: Optional[bool] = None,
+ error_type: Optional[str] = None,
+ extra: Optional[dict[str, Any]] = None,
+) -> None:
+ payload: dict[str, Any] = {
+ "schema_version": SCHEMA_VERSION,
+ "event": {
+ "kind": "metric",
+ "category": ["question"],
+ "action": "question_perf",
+ },
+ "stage": stage,
+ "status": status,
+ "observed_at": int(time.time() * 1000),
+ "service": "catalog-agent",
+ }
+
+ optional_fields = {
+ "trace_id": trace_id,
+ "duration_ms": duration_ms,
+ "first_response_time_ms": first_response_time_ms,
+ "ai_id": ai_id,
+ "session_id": session_id,
+ "robot_type": robot_type,
+ "model": model,
+ "stream": stream,
+ "error_type": error_type,
+ }
+ for key, value in optional_fields.items():
+ normalized = _normalize_value(value)
+ if normalized is not None:
+ payload[key] = normalized
+
+ if extra:
+ for key, value in extra.items():
+ normalized = _normalize_value(value)
+ if normalized is not None:
+ payload[key] = normalized
+
+ logger.info(json.dumps(payload, ensure_ascii=False, separators=(",", ":")))