Merge branch 'developing' into bot_manager

2026-05-12 12:44:19 +08:00 · 2026-05-12 12:44:19 +08:00 · 718ec5302e
commit 718ec5302e
parent 2a83d0fa05 7b4f03d340
29 changed files with 2100 additions and 175 deletions
--- a/agent/agent_config.py
+++ b/agent/agent_config.py
@ -32,6 +32,7 @@ class AgentConfig:
    session_id: Optional[str] = None
    dataset_ids: Optional[List[str]] = field(default_factory=list)
    trace_id: Optional[str] = None  # Request trace ID, obtained from the X-Request-ID header
+    request_started_at: Optional[float] = None

    # Response control parameters
    stream: bool = False
--- a/agent/deep_assistant.py
+++ b/agent/deep_assistant.py
@ -24,6 +24,7 @@ from .guideline_middleware import GuidelineMiddleware
 from .tool_output_length_middleware import ToolOutputLengthMiddleware
 from .tool_use_cleanup_middleware import ToolUseCleanupMiddleware
 from .filepath_fix_middleware import FilePathFixMiddleware
+from .mcp_trace_meta import patch_mcp_client_session_trace_meta
 from utils.settings import (
    SUMMARIZATION_MAX_TOKENS,
    SUMMARIZATION_TOKENS_TO_KEEP,
@ -42,6 +43,7 @@ from .mem0_middleware import create_mem0_middleware
 from .mem0_config import Mem0Config
 from agent.prompt_loader import load_system_prompt_async, load_mcp_settings_async
 from agent.agent_memory_cache import get_memory_cache_manager
+from .subagent_loader import load_subagents
 from .checkpoint_manager import get_checkpointer_manager
 from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
 from langgraph.checkpoint.memory import InMemorySaver
@ -63,6 +65,8 @@ from deepagents.graph import BASE_AGENT_PROMPT
 from deepagents_cli.local_context import LocalContextMiddleware
 # Custom: FilesystemMiddleware with full SKILL.md reading support
 from .custom_filesystem_middleware import CustomFilesystemMiddleware
+# Sub-agent support
+from deepagents.middleware.subagents import SubAgent, SubAgentMiddleware

 # Global MemorySaver instance
 # from langgraph.checkpoint.memory import MemorySaver
@ -123,6 +127,7 @@ def read_system_prompt():

 async def get_tools_from_mcp(mcp):
    """Extract tools from MCP configuration with caching."""
+    patch_mcp_client_session_trace_meta()
    start_time = time.time()
    # Defensive handling: ensure mcp is a non-empty list containing mcpServers
    if not isinstance(mcp, list) or len(mcp) == 0 or "mcpServers" not in mcp[0]:
@ -306,6 +311,15 @@ async def init_agent(config: AgentConfig):
    sandbox, sandbox_type, workspace_root = await sandbox_task
    logger.info(f"init_agent sandbox ready, elapsed: {time.time() - create_start:.3f}s")

+    # Load sub-agents from skill directories
+    subagents = await load_subagents(
+        bot_id=config.bot_id,
+        tools=mcp_tools,
+        model=llm_instance,
+    )
+    if subagents:
+        logger.info(f"Loaded {len(subagents)} sub-agents: {[s['name'] for s in subagents]}")
+
    agent, composite_backend = create_custom_cli_agent(
        model=llm_instance,
        assistant_id=config.bot_id,
@ -317,6 +331,7 @@ async def init_agent(config: AgentConfig):
        checkpointer=checkpointer,
        sandbox=sandbox,
        sandbox_type=sandbox_type,
+        subagents=subagents if subagents else None,
        shell_env={
            k: v for k, v in {
                "ASSISTANT_ID": str(config.bot_id),
@ -385,6 +400,7 @@ def create_custom_cli_agent(
    checkpointer: Checkpointer | None = None,
    store: BaseStore | None = None,
    shell_env: dict[str, str] | None = None,
+    subagents: list[SubAgent] | None = None,
 ) -> tuple[Pregel, CompositeBackend]:
    """Create a CLI-configured agent with custom workspace_root for shell commands.

@ -521,9 +537,19 @@ def create_custom_cli_agent(
        TodoListMiddleware(),
        FilePathFixMiddleware(),  # Fix extra spaces in CJK file names within tool call arguments
        CustomFilesystemMiddleware(backend=composite_backend),  # Use the custom FilesystemMiddleware with full SKILL.md reading support
+    ]
+    # Insert SubAgentMiddleware after FilesystemMiddleware (matches create_deep_agent ordering)
+    if subagents:
+        subagent_middleware = SubAgentMiddleware(
+            backend=composite_backend,
+            subagents=subagents,
+        )
+        deepagent_middleware.append(subagent_middleware)
+        logger.info(f"SubAgentMiddleware added with {len(subagents)} sub-agents: {[s['name'] for s in subagents]}")
+    deepagent_middleware.extend([
        AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
        PatchToolCallsMiddleware(),
-    ]
+    ])
    if agent_middleware:
        deepagent_middleware.extend(agent_middleware)
    if interrupt_on is not None:
--- a/agent/logging_handler.py
+++ b/agent/logging_handler.py
@ -1,6 +1,7 @@
 """Logging callback handler module."""

 import logging
+import traceback
 from typing import Any, Optional, Dict, List
 from langchain_core.callbacks import BaseCallbackHandler
 from langchain_core.messages import BaseMessage
@ -80,4 +81,8 @@ class LoggingCallbackHandler(BaseCallbackHandler):
        self, error: Exception, **kwargs: Any
    ) -> None:
        """Called when a tool invocation raises an error."""
-        self.logger.error(f"❌ Tool Error: {error}")
+        self.logger.error(
+            "❌ Tool Error: %s\n%s",
+            repr(error),
+            "".join(traceback.format_exception(type(error), error, error.__traceback__)),
+        )
--- a/agent/mcp_trace_meta.py
+++ b/agent/mcp_trace_meta.py
@ -0,0 +1,98 @@
+import logging
+from functools import wraps
+from typing import Any
+
+try:
+    from mcp import ClientSession, types
+except ImportError:
+    from mcp.client.session import ClientSession
+    from mcp import types
+
+from utils.log_util.context import g
+
+logger = logging.getLogger("app")
+
+_PATCHED_ATTR = "_catalog_trace_meta_patched"
+_TRACE_META_TOOL_NAMES = {"rag_retrieve", "table_rag_retrieve"}
+
+
+def _get_trace_id() -> str:
+    try:
+        trace_id = getattr(g, "trace_id", "")
+    except (LookupError, KeyError):
+        return ""
+    return str(trace_id) if trace_id else ""
+
+
+def _get_tool_name(args: tuple[Any, ...], kwargs: dict[str, Any]) -> str:
+    name = args[0] if args else kwargs.get("name")
+    return str(name) if name else ""
+
+
+def patch_mcp_client_session_trace_meta() -> None:
+    """Attach catalog trace id to MCP tools/call params._meta."""
+    if getattr(ClientSession.call_tool, _PATCHED_ATTR, False):
+        return
+
+    original_call_tool = ClientSession.call_tool
+
+    @wraps(original_call_tool)
+    async def call_tool_with_trace_meta(self: ClientSession, *args: Any, **kwargs: Any) -> Any:
+        tool_name = _get_tool_name(args, kwargs)
+        trace_id = _get_trace_id() if tool_name in _TRACE_META_TOOL_NAMES else ""
+        if trace_id:
+            meta = kwargs.get("meta")
+            if isinstance(meta, dict):
+                meta = {**meta, "trace_id": meta.get("trace_id") or trace_id}
+            else:
+                meta = {"trace_id": trace_id}
+            kwargs["meta"] = meta
+
+        try:
+            return await original_call_tool(self, *args, **kwargs)
+        except TypeError as exc:
+            if trace_id and "meta" in kwargs and "unexpected keyword argument" in str(exc):
+                return await _call_tool_with_meta_compat(self, *args, **kwargs)
+            raise
+
+    setattr(call_tool_with_trace_meta, _PATCHED_ATTR, True)
+    ClientSession.call_tool = call_tool_with_trace_meta
+
+
+async def _call_tool_with_meta_compat(self: ClientSession, *args: Any, **kwargs: Any) -> Any:
+    """Call tools/call with _meta for MCP SDK versions before call_tool(meta=...)."""
+    name = _get_tool_name(args, kwargs)
+    if not name:
+        raise TypeError("call_tool() missing required argument: 'name'")
+
+    arguments = args[1] if len(args) > 1 else kwargs.get("arguments", kwargs.get("args"))
+    read_timeout_seconds = (
+        args[2] if len(args) > 2 else kwargs.get("read_timeout_seconds")
+    )
+    progress_callback = (
+        args[3] if len(args) > 3 else kwargs.get("progress_callback")
+    )
+    meta = kwargs.get("meta")
+
+    request_meta = meta if isinstance(meta, dict) else None
+    result = await self.send_request(
+        types.ClientRequest(
+            types.CallToolRequest(
+                method="tools/call",
+                params=types.CallToolRequestParams(
+                    name=name,
+                    arguments=arguments,
+                    _meta=request_meta,
+                ),
+            )
+        ),
+        types.CallToolResult,
+        request_read_timeout_seconds=read_timeout_seconds,
+        progress_callback=progress_callback,
+    )
+
+    validate_tool_result = getattr(self, "_validate_tool_result", None)
+    if validate_tool_result and not result.isError:
+        await validate_tool_result(name, result)
+
+    return result
--- a/agent/subagent_loader.py
+++ b/agent/subagent_loader.py
@ -0,0 +1,188 @@
+"""Sub-agent loader for discovering and parsing sub-agent definitions from skill directories.
+
+Sub-agents are defined as markdown files with YAML frontmatter in skill directories:
+    projects/robot/{bot_id}/skills/{skill_name}/agents/*.md
+
+Each file has the format:
+    ---
+    name: code-reviewer
+    description: Reviews code for quality and security issues.
+    tools: rag_retrieve, table_rag_retrieve
+    ---
+
+    System prompt for the sub-agent...
+"""
+
+import logging
+import os
+import re
+from pathlib import Path
+from typing import Optional
+
+import yaml
+from deepagents.middleware.subagents import SubAgent
+from langchain.tools import BaseTool
+from langchain_core.language_models import BaseChatModel
+
+from agent.plugin_hook_loader import _get_skill_dirs
+
+logger = logging.getLogger('app')
+
+# Regex to extract YAML frontmatter and body from markdown files
+_FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n?(.*)$", re.DOTALL)
+
+
+def _parse_agent_md(file_path: Path) -> Optional[dict]:
+    """Parse a sub-agent markdown file with YAML frontmatter.
+
+    Args:
+        file_path: Path to the .md file.
+
+    Returns:
+        Dict with keys: name, description, system_prompt, tool_names (list[str] | None).
+        None if parsing fails.
+    """
+    try:
+        content = file_path.read_text(encoding="utf-8")
+    except OSError as e:
+        logger.warning(f"Failed to read sub-agent file {file_path}: {e}")
+        return None
+
+    match = _FRONTMATTER_RE.match(content)
+    if not match:
+        logger.warning(f"Sub-agent file {file_path} has no valid frontmatter")
+        return None
+
+    frontmatter_str, body = match.group(1), match.group(2)
+
+    try:
+        frontmatter = yaml.safe_load(frontmatter_str)
+    except yaml.YAMLError as e:
+        logger.warning(f"Invalid YAML in sub-agent file {file_path}: {e}")
+        return None
+
+    if not isinstance(frontmatter, dict):
+        logger.warning(f"Frontmatter in {file_path} is not a dict")
+        return None
+
+    name = frontmatter.get("name", "").strip() if isinstance(frontmatter.get("name"), str) else ""
+    description = frontmatter.get("description", "").strip() if isinstance(frontmatter.get("description"), str) else ""
+
+    if not name:
+        logger.warning(f"Sub-agent file {file_path} missing required 'name' field")
+        return None
+    if not description:
+        logger.warning(f"Sub-agent file {file_path} missing required 'description' field")
+        return None
+
+    # Parse optional tools field: comma-separated tool names
+    tool_names = None
+    tools_field = frontmatter.get("tools")
+    if tools_field is not None:
+        if isinstance(tools_field, str):
+            tool_names = [t.strip() for t in tools_field.split(",") if t.strip()]
+        elif isinstance(tools_field, list):
+            tool_names = [str(t).strip() for t in tools_field if str(t).strip()]
+        else:
+            logger.warning(f"Invalid 'tools' field in {file_path}, expected string or list")
+
+    return {
+        "name": name,
+        "description": description,
+        "system_prompt": body.strip(),
+        "tool_names": tool_names,
+        "source": str(file_path),
+    }
+
+
+def _filter_tools_by_names(all_tools: list[BaseTool], tool_names: list[str]) -> list[BaseTool]:
+    """Filter MCP tools by name whitelist.
+
+    Args:
+        all_tools: All available MCP tools.
+        tool_names: Whitelist of tool names to include.
+
+    Returns:
+        Filtered list of tools. Logs warning for names not found.
+    """
+    tool_lookup = {tool.name: tool for tool in all_tools}
+    filtered = []
+    for name in tool_names:
+        if name in tool_lookup:
+            filtered.append(tool_lookup[name])
+        else:
+            available = list(tool_lookup.keys())
+            logger.warning(f"Sub-agent tool '{name}' not found in MCP tools. Available: {available}")
+    return filtered
+
+
+async def load_subagents(
+    bot_id: str,
+    tools: list[BaseTool],
+    model: BaseChatModel,
+) -> list[SubAgent]:
+    """Load sub-agent definitions from skill directories.
+
+    Scans all skill directories for the given bot_id, looking for agents/*.md files
+    in each skill subdirectory.
+
+    Args:
+        bot_id: Bot identifier for locating skill directories.
+        tools: All available MCP tools for filtering.
+        model: The main agent's model, used by each sub-agent.
+
+    Returns:
+        List of SubAgent dicts. Empty list if no sub-agents found.
+    """
+    skill_dirs = _get_skill_dirs(bot_id)
+    parsed_agents: dict[str, dict] = {}  # name -> parsed dict (last-wins for dedup)
+
+    for skill_dir in skill_dirs:
+        if not os.path.exists(skill_dir):
+            continue
+
+        for skill_name in os.listdir(skill_dir):
+            skill_path = os.path.join(skill_dir, skill_name)
+            if not os.path.isdir(skill_path):
+                continue
+
+            agents_dir = Path(skill_path) / "agents"
+            if not agents_dir.exists():
+                continue
+
+            for md_file in agents_dir.glob("*.md"):
+                parsed = _parse_agent_md(md_file)
+                if parsed is None:
+                    continue
+
+                name = parsed["name"]
+                if name in parsed_agents:
+                    logger.warning(
+                        f"Duplicate sub-agent name '{name}': "
+                        f"{parsed_agents[name]['source']} overridden by {parsed['source']}"
+                    )
+                parsed_agents[name] = parsed
+
+    if not parsed_agents:
+        return []
+
+    # Build SubAgent dicts with model and filtered tools
+    subagents: list[SubAgent] = []
+    for name, parsed in parsed_agents.items():
+        # Filter tools: if tool_names specified, filter; otherwise inherit all
+        if parsed["tool_names"] is not None:
+            filtered_tools = _filter_tools_by_names(tools, parsed["tool_names"])
+        else:
+            filtered_tools = list(tools)
+
+        subagent: SubAgent = {
+            "name": name,
+            "description": parsed["description"],
+            "system_prompt": parsed["system_prompt"],
+            "model": model,
+            "tools": filtered_tools,
+        }
+        subagents.append(subagent)
+        logger.info(f"Loaded sub-agent '{name}' with {len(filtered_tools)} tools from {parsed['source']}")
+
+    return subagents
--- a/routes/chat.py
+++ b/routes/chat.py
@ -3,6 +3,7 @@ import os
 import asyncio
 import shutil
 import time
+import traceback
 from typing import Union, Optional, Any, List, Dict
 from fastapi import APIRouter, HTTPException, Header, Body
 from fastapi.responses import StreamingResponse
@ -25,6 +26,7 @@ from agent.agent_config import AgentConfig
 from agent.deep_assistant import init_agent
 from utils.daytona_sync import sync_sandbox_to_local
 from utils.settings import DAYTONA_ENABLED
+from utils.structured_log import emit_question_metric

 router = APIRouter()

@ -43,6 +45,7 @@ async def enhanced_generate_stream_response(

    # Cancellation management
    cancel_event = None
+    request_started_at = config.request_started_at or time.monotonic()

    try:
        # Create output queue and control events
@ -89,6 +92,8 @@ async def enhanced_generate_stream_response(
                logger.info(f"Starting agent stream response")
                chunk_id = 0
                message_tag = ""
+                last_answer_first_char_duration_ms = None
+                waiting_for_answer_first_char = False
                agent, checkpointer, sandbox = await init_agent(config)
                async for msg, metadata in agent.astream({"messages": config.messages}, stream_mode="messages", config=config.invoke_config(), max_tokens=MAX_OUTPUT_TOKENS):
                    # Check whether a cancellation signal was received
@ -102,6 +107,7 @@ async def enhanced_generate_stream_response(
                        # Handle tool calls
                        if msg.tool_call_chunks:
                            message_tag = "TOOL_CALL"
+                            waiting_for_answer_first_char = False
                            if config.tool_response:
                                for tool_call_chunk in msg.tool_call_chunks:
                                    chunk_name = tool_call_chunk.get("name") if isinstance(tool_call_chunk, dict) else getattr(tool_call_chunk, "name", None)
@ -120,12 +126,20 @@ async def enhanced_generate_stream_response(
                                continue
                            if meta_message_tag != message_tag:
                                message_tag = meta_message_tag
+                                waiting_for_answer_first_char = meta_message_tag == "ANSWER"
                                new_content = f"[{meta_message_tag}]\n"
                            if msg.text:
+                                if meta_message_tag == "ANSWER" and waiting_for_answer_first_char and msg.text.strip():
+                                    last_answer_first_char_duration_ms = max(
+                                        int((time.monotonic() - request_started_at) * 1000),
+                                        0,
+                                    )
+                                    waiting_for_answer_first_char = False
                                new_content += msg.text
                    # Handle tool responses
                    elif isinstance(msg, ToolMessage) and msg.content:
                        message_tag = "TOOL_RESPONSE"
+                        waiting_for_answer_first_char = False
                        if config.tool_response:
                            new_content = f"[{message_tag}] {msg.name}\n{msg.text}\n"

@ -142,6 +156,25 @@ async def enhanced_generate_stream_response(

                # Send final chunk
                finish = "cancelled" if (cancel_event and cancel_event.is_set()) else "stop"
+                if last_answer_first_char_duration_ms is not None:
+                    emit_question_metric(
+                        stage="catalog_agent.final_answer_first_char",
+                        status="cancel" if finish == "cancelled" else "success",
+                        duration_ms=last_answer_first_char_duration_ms,
+                        first_response_time_ms=last_answer_first_char_duration_ms,
+                        trace_id=config.trace_id,
+                        ai_id=config.bot_id,
+                        session_id=config.session_id,
+                        robot_type="agent",
+                        model=config.model_name,
+                        stream=config.stream,
+                        extra={
+                            "bot_id": config.bot_id,
+                            "tool_response": config.tool_response,
+                            "enable_thinking": config.enable_thinking,
+                            "response_mode": "final_answer_first_char",
+                        },
+                    )
                final_chunk = create_stream_chunk(f"chatcmpl-{chunk_id + 1}", config.model_name, finish_reason=finish)
                await output_queue.put(("agent", f"data: {json.dumps(final_chunk, ensure_ascii=False)}\n\n"))
                # ============ Execute PostAgent hooks ============
@ -153,9 +186,11 @@ async def enhanced_generate_stream_response(
                await output_queue.put(("agent_done", None))

            except Exception as e:
-                logger.error(f"Error in agent task: {e}")
+                logger.error(f"Error in agent task: {e}\n{traceback.format_exc()}")
                # Send error information to the client
-                await output_queue.put(("agent", f'data: {{"error": "{str(e)}"}}\n\n'))
+                await output_queue.put(
+                    ("agent", f"data: {json.dumps({'error': str(e)}, ensure_ascii=False)}\n\n")
+                )
                # Send completion signal to ensure the output controller exits normally
                await output_queue.put(("agent_done", None))

@ -511,6 +546,7 @@ async def chat_completions(request: ChatRequest, authorization: Optional[str] =
        {"dataset_ids": ["project-123", "project-456"], "bot_id": "my-bot-002", "messages": [{"role": "user", "content": "Hello"}]}
        {"dataset_ids": ["project-123"], "bot_id": "my-catalog-bot",  "messages": [{"role": "user", "content": "Hello"}]}
    """
+    request_started_at = time.monotonic()
    try:
        # v1 endpoint: extract the API key from the Authorization header as the model API key
        api_key = extract_api_key_from_auth(authorization)
@ -531,6 +567,7 @@ async def chat_completions(request: ChatRequest, authorization: Optional[str] =
        messages = process_messages(request.messages, request.language)
        # Create AgentConfig object
        config = await AgentConfig.from_v1_request(request, api_key, project_dir, generate_cfg, messages)
+        config.request_started_at = request_started_at
        # Call the shared agent creation and response generation logic
        return await create_agent_and_generate_response(config)

@ -753,6 +790,7 @@ async def chat_completions_v2(request: ChatRequestV2, authorization: Optional[st
        - Uses MD5 hash of MASTERKEY:bot_id for backend API authentication
        - Optionally uses API key from bot config for model access
    """
+    request_started_at = time.monotonic()
    try:
        # Get bot_id (required parameter)
        bot_id = request.bot_id
@ -799,6 +837,7 @@ async def chat_completions_v2(request: ChatRequestV2, authorization: Optional[st
        api_key = req_api_key if req_api_key and req_api_key != "whatever" else None
        # Create AgentConfig object
        config = await AgentConfig.from_v2_request(request, bot_config, project_dir, messages, generate_cfg, model_name=model_name, model_server=model_server, api_key=api_key)
+        config.request_started_at = request_started_at
        # Call the shared agent creation and response generation logic
        return await create_agent_and_generate_response(config)

--- a/skills/autoload/onprem/rag-retrieve/hooks/retrieval-policy-forbidden-self-knowledge.md
+++ b/skills/autoload/onprem/rag-retrieve/hooks/retrieval-policy-forbidden-self-knowledge.md
@ -14,7 +14,7 @@ For knowledge retrieval tasks, **this policy overrides generic codebase explorat

 - **Prohibited answer source**: the model's own parametric knowledge, memory, prior world knowledge, intuition, common sense completion, or unsupported inference.
 - **Prohibited tools**: `Glob`, `Read`, `LS`, Bash (`ls`, `find`, `cat`, `head`, `tail`, `grep`, etc.) — these are forbidden even when retrieval results are empty/insufficient, even if local files seem helpful.
- **Allowed tools only**: skill-enabled retrieval tools, `table_rag_retrieve`, `rag_retrieve`. No other source for factual answering.
+- **Allowed tools only**: skill-enabled retrieval tools, `rag_retrieve`. No other source for factual answering.
 - Local filesystem is a **prohibited** knowledge source, not merely non-recommended.
 - Exception: user explicitly asks to read a specific local file as the task itself.
 - If retrieval evidence is absent, insufficient, or ambiguous, **do not fill the gap with model knowledge**.
@ -35,13 +35,18 @@ For any knowledge retrieval task:
 Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe filesystem first.

 1. **Skill-enabled retrieval tools** (use first when available)
-2. **`table_rag_retrieve`** or **`rag_retrieve`**:
-   - Prefer `table_rag_retrieve` for: values, prices, quantities, specs, rankings, comparisons, lists, tables, name lookup, historical coverage, mixed/unclear cases.
-   - Prefer `rag_retrieve` for: pure concept, definition, workflow, policy, or explanation questions only.
+2. **`rag_retrieve`**

 - After each step, evaluate sufficiency before proceeding.
 - Retrieval must happen **before** any factual answer generation.

+### First-Call Success Principle
+
+- The first retrieval call is expected to return sufficient results for most questions.
+- Your default assumption should be: **one call is enough**.
+- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic).
+- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient.
+
 ## 4. Query Preparation

 - Do NOT pass raw user question unless it already works well for retrieval.
@ -50,27 +55,51 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi

 ## 5. Retrieval Breadth (`top_k`)

- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient.
- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
- Expansion order: `30 → 50 → 100`. If unsure, use `100`.
+- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success.
+- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation.
+- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
+- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient.

 ## 6. Result Evaluation

-Treat as insufficient if: empty, `Error:`, `no excel files found`, off-topic, missing core entity/scope, no usable evidence, partial coverage, truncated results, or claims required by the answer are not explicitly supported.
+**Maximum 3 retrieval calls per question.** After each call, evaluate immediately:
+
+### Sufficient — answer immediately, no more calls
+
+ANY of the following means results are sufficient — STOP and answer now:
+- The core entity/topic in the user's question appears in the results.
+- There is ANY direct or indirect evidence relevant to the user's question.
+- Results are partially relevant, even if not perfectly comprehensive.
+- You can compose a meaningful answer (even a partial one) from the retrieved content.
+
+**Anti-patterns — do NOT do these:**
+- ❌ "The results are good, but maybe different keywords could find something better."
+- ❌ "I have enough to answer, but let me try one more query to be thorough."
+- ❌ "The answer is here, but I want to double-check with a different query."
+- ❌ Calling retrieval again after you have already identified the answer in previous results.
+
+**If you can answer the question with current results, you MUST answer immediately. Period.**
+
+### Insufficient — the ONLY valid reasons to retry
+
+- Results are completely empty or contain only `Error:` messages.
+- ALL results are entirely off-topic with zero relevance to the user's question.
+- No usable evidence exists at all — you cannot form even a partial answer.
+
+**"Results are not detailed enough" is NOT a valid reason to retry.**
+**"Results might be incomplete" is NOT a valid reason to retry.**

 ## 7. Fallback and Sequential Retry

-On insufficient results, follow this sequence:
+On insufficient results, you may retry **up to 2 more times** (3 calls total):

-1. Rewrite query, retry same tool (once)
-2. Switch to next retrieval source in default order
-3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100`
-4. `table_rag_retrieve` insufficient → try `rag_retrieve`; `rag_retrieve` insufficient → try `table_rag_retrieve`
+1. Rewrite query, retry same tool.
+2. For `rag_retrieve`, escalate `top_k` to `100` on retry.

- `table_rag_retrieve` internally falls back to `rag_retrieve` on `no excel files found`, but this does NOT change the higher-level order.
- Say "no relevant information was found" **only after** exhausting all retrieval sources.
+- Say "no relevant information was found" **only after** exhausting all retries.
 - Do NOT switch to local filesystem inspection at any point.
 - Do NOT switch to model self-knowledge at any point.
+- Do NOT call any retrieval tool more than 3 times in total.

 ## 8. Handling Missing or Partial Evidence

@ -79,13 +108,7 @@ On insufficient results, follow this sequence:
 - Prefer "the retrieved materials do not provide this information" over speculative completion.
 - When user asks for a definitive answer but evidence is incomplete, state the limitation directly.

-## 9. Table RAG Result Handling
-
- Follow all `[INSTRUCTION]` and `[EXTRA_INSTRUCTION]` in results.
- If truncated: tell user total (`N+M`), displayed (`N`), omitted (`M`).
- Cite sources using filenames from `file_ref_table`.
-
-## 10. Image Handling
+## 9. Image Handling

 - The content returned by the `rag_retrieve` tool may include images.
 - Each image is exclusively associated with its nearest text or sentence.
@ -94,14 +117,7 @@ On insufficient results, follow this sequence:
 - Each sentence or key point in the response should be accompanied by relevant images when they meet the established association criteria.
 - Avoid placing all images at the end of the response.

-## 11. Citation Requirements
-
- MUST generate `<CITATION ... />` tags when using retrieval results.
- Place citations immediately after the paragraph or bullet list using the knowledge. Do NOT collect at end.
- 1-2 citations per paragraph/bullet. At least 1 citation when using retrieved knowledge.
- Do NOT cite claims that were not supported by retrieval.
-
-## 12. Self-Knowledge Prohibition
+## 10. Self-Knowledge Prohibition

 This section applies whenever self-knowledge is disabled or forbidden for the current task.

@ -111,19 +127,19 @@ This section applies whenever self-knowledge is disabled or forbidden for the cu
 - The model must not supplement missing parts with general knowledge, conceptual explanation, common background, intuition, or likely completion.
 - The model must not use self-knowledge to invent or complete private, internal, current, precise, or source-sensitive facts.
 - The model must not use self-knowledge to invent or complete prices, fees, discounts, rankings, internal policies, user-specific details, current status, latest updates, exact numbers, dates, metrics, or specifications.
- Retrieved facts must include citations.
 - Unsupported parts must be stated as unavailable rather than guessed.
 - If a paragraph would mix retrieved facts and unsupported completion, remove the unsupported completion.
 - If evidence is incomplete, state the limitation explicitly.

-## 13. Pre-Reply Self-Check
+## 11. Pre-Reply Self-Check

 Before replying to a knowledge retrieval task, verify:
 - Used only whitelisted retrieval tools — no local filesystem inspection?
+- Called retrieval at most 3 times total (not more)?
+- Answered immediately when results were sufficient (did NOT call again unnecessarily)?
+- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)?
 - Did retrieval happen before any factual answer drafting?
 - Did every factual claim come from retrieved evidence rather than model knowledge?
- Exhausted retrieval flow before concluding "not found"?
- Citations placed immediately after each relevant paragraph?
 - If any unsupported part remained, was it removed or explicitly marked unavailable?

 If any answer is "no", correct the process first.
--- a/skills/autoload/onprem/rag-retrieve/hooks/retrieval-policy.md
+++ b/skills/autoload/onprem/rag-retrieve/hooks/retrieval-policy.md
@ -29,6 +29,13 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi
 - Do NOT answer from model knowledge first.
 - After each step, evaluate sufficiency before proceeding.

+### First-Call Success Principle
+
+- The first retrieval call is expected to return sufficient results for most questions.
+- Your default assumption should be: **one call is enough**.
+- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic).
+- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient.
+
 ## 3. Query Preparation

 - Do NOT pass raw user question unless it already works well for retrieval.
@ -37,26 +44,53 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi

 ## 4. Retrieval Breadth (`top_k`)

- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient.
- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
- Expansion order: `30 → 50 → 100`. If unsure, use `100`.
+- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success.
+- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation.
+- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
+- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient.

 ## 5. Result Evaluation

-Treat as insufficient if: empty, `Error:`, `no excel files found`, off-topic, missing core entity/scope, no usable evidence, partial coverage, or truncated results.
+**Maximum 3 retrieval calls per question.** After each call, evaluate immediately:
+
+### Sufficient — answer immediately, no more calls
+
+ANY of the following means results are sufficient — STOP and answer now:
+- The core entity/topic in the user's question appears in the results.
+- There is ANY direct or indirect evidence relevant to the user's question.
+- Results are partially relevant, even if not perfectly comprehensive.
+- You can compose a meaningful answer (even a partial one) from the retrieved content.
+
+**Anti-patterns — do NOT do these:**
+- ❌ "The results are good, but maybe different keywords could find something better."
+- ❌ "I have enough to answer, but let me try one more query to be thorough."
+- ❌ "The answer is here, but I want to double-check with a different query."
+- ❌ Calling retrieval again after you have already identified the answer in previous results.
+
+**If you can answer the question with current results, you MUST answer immediately. Period.**
+
+### Insufficient — the ONLY valid reasons to retry
+
+- Results are completely empty or contain only `Error:` / `no excel files found` messages.
+- ALL results are entirely off-topic with zero relevance to the user's question.
+- No usable evidence exists at all — you cannot form even a partial answer.
+
+**"Results are not detailed enough" is NOT a valid reason to retry.**
+**"Results might be incomplete" is NOT a valid reason to retry.**

 ## 6. Fallback and Sequential Retry

-On insufficient results, follow this sequence:
+On insufficient results, you may retry **up to 2 more times** (3 calls total):

-1. Rewrite query, retry same tool (once)
-2. Switch to next retrieval source in default order
-3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100`
-4. `table_rag_retrieve` insufficient → try `rag_retrieve`; `rag_retrieve` insufficient → try `table_rag_retrieve`
+1. Rewrite query, retry same tool.
+2. Switch to next retrieval source in default order.
+3. For `rag_retrieve`, escalate `top_k` to `100` on retry.
+4. `table_rag_retrieve` insufficient → try `rag_retrieve`; `rag_retrieve` insufficient → try `table_rag_retrieve`.

 - `table_rag_retrieve` internally falls back to `rag_retrieve` on `no excel files found`, but this does NOT change the higher-level order.
 - Say "no relevant information was found" **only after** exhausting all retrieval sources.
 - Do NOT switch to local filesystem inspection at any point.
+- Do NOT call any retrieval tool more than 3 times in total.

 ## 7. Table RAG Result Handling

@ -99,7 +133,9 @@ This section applies only when self-knowledge is enabled.

 Before replying to a knowledge retrieval task, verify:
 - Used only whitelisted retrieval tools — no local filesystem inspection?
- Exhausted retrieval flow before concluding "not found"?
+- Called retrieval at most 3 times total (not more)?
+- Answered immediately when results were sufficient (did NOT call again unnecessarily)?
+- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)?
 - Citations placed immediately after each relevant paragraph?
 - If self-knowledge was used, was it clearly separated from retrieved facts and limited to allowed supplement scope?

--- a/skills/autoload/onprem/rag-retrieve/rag_retrieve_server.py
+++ b/skills/autoload/onprem/rag-retrieve/rag_retrieve_server.py
@ -73,7 +73,7 @@ Format: `<CITATION file="file_id" filename="name.xlsx" sheet=1 rows=[2, 4] />`

 """

-def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]:
+def rag_retrieve(query: str, top_k: int = 100, trace_id: str = "") -> Dict[str, Any]:
    """Call the RAG retrieval API."""
    try:
        bot_id = ""
@ -100,6 +100,8 @@ def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]:
            "content-type": "application/json",
            "authorization": f"Bearer {auth_token}"
        }
+        if trace_id:
+            headers["X-Request-ID"] = trace_id
        data = {
            "query": query,
            "top_k": top_k
@ -172,7 +174,7 @@ def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]:
        }


-def table_rag_retrieve(query: str) -> Dict[str, Any]:
+def table_rag_retrieve(query: str, trace_id: str = "") -> Dict[str, Any]:
    """Call the Table RAG retrieval API."""
    try:
        bot_id = ""
@ -189,6 +191,8 @@ def table_rag_retrieve(query: str) -> Dict[str, Any]:
            "content-type": "application/json",
            "authorization": f"Bearer {auth_token}"
        }
+        if trace_id:
+            headers["X-Request-ID"] = trace_id
        data = {
            "query": query,
        }
@ -220,7 +224,7 @@ def table_rag_retrieve(query: str) -> Dict[str, Any]:
        if "markdown" in response_data:
            markdown_content = response_data["markdown"]
            if re.search(r"^no excel files found", markdown_content, re.IGNORECASE):
-                rag_result = rag_retrieve(query)
+                rag_result = rag_retrieve(query, trace_id=trace_id)
                content = rag_result.get("content", [])
                if content and content[0].get("type") == "text":
                    content[0]["text"] = "No table_rag_retrieve results were found. The content below is the fallback result from rag_retrieve:\n\n" + content[0]["text"]
@ -302,6 +306,8 @@ async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]:
        elif method == "tools/call":
            tool_name = params.get("name")
            arguments = params.get("arguments", {})
+            meta = params.get("_meta") or params.get("meta") or {}
+            trace_id = meta.get("trace_id", "") if isinstance(meta, dict) else ""

            if tool_name == "rag_retrieve":
                query = arguments.get("query", "")
@ -310,7 +316,7 @@ async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]:
                if not query:
                    return create_error_response(request_id, -32602, "Missing required parameter: query")

-                result = rag_retrieve(query, top_k)
+                result = rag_retrieve(query, top_k, trace_id)

                return {
                    "jsonrpc": "2.0",
@ -324,7 +330,7 @@ async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]:
                if not query:
                    return create_error_response(request_id, -32602, "Missing required parameter: query")

-                result = table_rag_retrieve(query)
+                result = table_rag_retrieve(query, trace_id)

                return {
                    "jsonrpc": "2.0",
--- a/skills/autoload/support/rag-retrieve/hooks/retrieval-policy-forbidden-self-knowledge.md
+++ b/skills/autoload/support/rag-retrieve/hooks/retrieval-policy-forbidden-self-knowledge.md
@ -42,6 +42,13 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi
 - After each step, evaluate sufficiency before proceeding.
 - Retrieval must happen **before** any factual answer generation.

+### First-Call Success Principle
+
+- The first retrieval call is expected to return sufficient results for most questions.
+- Your default assumption should be: **one call is enough**.
+- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic).
+- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient.
+
 ## 4. Query Preparation

 - Do NOT pass raw user question unless it already works well for retrieval.
@ -50,27 +57,54 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi

 ## 5. Retrieval Breadth (`top_k`)

- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient.
- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
- Expansion order: `30 → 50 → 100`. If unsure, use `100`.
+- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success.
+- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation.
+- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
+- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient.

 ## 6. Result Evaluation

-Treat as insufficient if: empty, `Error:`, `no excel files found`, off-topic, missing core entity/scope, no usable evidence, partial coverage, truncated results, or claims required by the answer are not explicitly supported.
+**Maximum 3 retrieval calls per question.** After each call, evaluate immediately:
+
+### Sufficient — answer immediately, no more calls
+
+ANY of the following means results are sufficient — STOP and answer now:
+- The core entity/topic in the user's question appears in the results.
+- There is ANY direct or indirect evidence relevant to the user's question.
+- Results are partially relevant, even if not perfectly comprehensive.
+- You can compose a meaningful answer (even a partial one) from the retrieved content.
+
+**Anti-patterns — do NOT do these:**
+- ❌ "The results are good, but maybe different keywords could find something better."
+- ❌ "I have enough to answer, but let me try one more query to be thorough."
+- ❌ "The answer is here, but I want to double-check with a different query."
+- ❌ Calling retrieval again after you have already identified the answer in previous results.
+
+**If you can answer the question with current results, you MUST answer immediately. Period.**
+
+### Insufficient — the ONLY valid reasons to retry
+
+- Results are completely empty or contain only `Error:` / `no excel files found` messages.
+- ALL results are entirely off-topic with zero relevance to the user's question.
+- No usable evidence exists at all — you cannot form even a partial answer.
+
+**"Results are not detailed enough" is NOT a valid reason to retry.**
+**"Results might be incomplete" is NOT a valid reason to retry.**

 ## 7. Fallback and Sequential Retry

-On insufficient results, follow this sequence:
+On insufficient results, you may retry **up to 2 more times** (3 calls total):

-1. Rewrite query, retry same tool (once)
-2. Switch to next retrieval source in default order
-3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100`
-4. `table_rag_retrieve` insufficient → try `rag_retrieve`; `rag_retrieve` insufficient → try `table_rag_retrieve`
+1. Rewrite query, retry same tool.
+2. Switch to next retrieval source in default order.
+3. For `rag_retrieve`, escalate `top_k` to `100` on retry.
+4. `table_rag_retrieve` insufficient → try `rag_retrieve`; `rag_retrieve` insufficient → try `table_rag_retrieve`.

 - `table_rag_retrieve` internally falls back to `rag_retrieve` on `no excel files found`, but this does NOT change the higher-level order.
- Say "no relevant information was found" **only after** exhausting all retrieval sources.
+- Say "no relevant information was found" **only after** exhausting all retries.
 - Do NOT switch to local filesystem inspection at any point.
 - Do NOT switch to model self-knowledge at any point.
+- Do NOT call any retrieval tool more than 3 times in total.

 ## 8. Handling Missing or Partial Evidence

@ -83,7 +117,6 @@ On insufficient results, follow this sequence:

 - Follow all `[INSTRUCTION]` and `[EXTRA_INSTRUCTION]` in results.
 - If truncated: tell user total (`N+M`), displayed (`N`), omitted (`M`).
- Cite sources using filenames from `file_ref_table`.

 ## 10. Image Handling

@ -94,14 +127,7 @@ On insufficient results, follow this sequence:
 - Each sentence or key point in the response should be accompanied by relevant images when they meet the established association criteria.
 - Avoid placing all images at the end of the response.

-## 11. Citation Requirements
-
- MUST generate `<CITATION ... />` tags when using retrieval results.
- Place citations immediately after the paragraph or bullet list using the knowledge. Do NOT collect at end.
- 1-2 citations per paragraph/bullet. At least 1 citation when using retrieved knowledge.
- Do NOT cite claims that were not supported by retrieval.
-
-## 12. Self-Knowledge Prohibition
+## 11. Self-Knowledge Prohibition

 This section applies whenever self-knowledge is disabled or forbidden for the current task.

@ -111,19 +137,19 @@ This section applies whenever self-knowledge is disabled or forbidden for the cu
 - The model must not supplement missing parts with general knowledge, conceptual explanation, common background, intuition, or likely completion.
 - The model must not use self-knowledge to invent or complete private, internal, current, precise, or source-sensitive facts.
 - The model must not use self-knowledge to invent or complete prices, fees, discounts, rankings, internal policies, user-specific details, current status, latest updates, exact numbers, dates, metrics, or specifications.
- Retrieved facts must include citations.
 - Unsupported parts must be stated as unavailable rather than guessed.
 - If a paragraph would mix retrieved facts and unsupported completion, remove the unsupported completion.
 - If evidence is incomplete, state the limitation explicitly.

-## 13. Pre-Reply Self-Check
+## 12. Pre-Reply Self-Check

 Before replying to a knowledge retrieval task, verify:
 - Used only whitelisted retrieval tools — no local filesystem inspection?
+- Called retrieval at most 3 times total (not more)?
+- Answered immediately when results were sufficient (did NOT call again unnecessarily)?
+- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)?
 - Did retrieval happen before any factual answer drafting?
 - Did every factual claim come from retrieved evidence rather than model knowledge?
- Exhausted retrieval flow before concluding "not found"?
- Citations placed immediately after each relevant paragraph?
 - If any unsupported part remained, was it removed or explicitly marked unavailable?

 If any answer is "no", correct the process first.
--- a/skills/autoload/support/rag-retrieve/hooks/retrieval-policy.md
+++ b/skills/autoload/support/rag-retrieve/hooks/retrieval-policy.md
@ -29,6 +29,13 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi
 - Do NOT answer from model knowledge first.
 - After each step, evaluate sufficiency before proceeding.

+### First-Call Success Principle
+
+- The first retrieval call is expected to return sufficient results for most questions.
+- Your default assumption should be: **one call is enough**.
+- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic).
+- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient.
+
 ## 3. Query Preparation

 - Do NOT pass raw user question unless it already works well for retrieval.
@ -37,26 +44,53 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi

 ## 4. Retrieval Breadth (`top_k`)

- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient.
- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
- Expansion order: `30 → 50 → 100`. If unsure, use `100`.
+- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success.
+- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation.
+- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
+- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient.

 ## 5. Result Evaluation

-Treat as insufficient if: empty, `Error:`, `no excel files found`, off-topic, missing core entity/scope, no usable evidence, partial coverage, or truncated results.
+**Maximum 3 retrieval calls per question.** After each call, evaluate immediately:
+
+### Sufficient — answer immediately, no more calls
+
+ANY of the following means results are sufficient — STOP and answer now:
+- The core entity/topic in the user's question appears in the results.
+- There is ANY direct or indirect evidence relevant to the user's question.
+- Results are partially relevant, even if not perfectly comprehensive.
+- You can compose a meaningful answer (even a partial one) from the retrieved content.
+
+**Anti-patterns — do NOT do these:**
+- ❌ "The results are good, but maybe different keywords could find something better."
+- ❌ "I have enough to answer, but let me try one more query to be thorough."
+- ❌ "The answer is here, but I want to double-check with a different query."
+- ❌ Calling retrieval again after you have already identified the answer in previous results.
+
+**If you can answer the question with current results, you MUST answer immediately. Period.**
+
+### Insufficient — the ONLY valid reasons to retry
+
+- Results are completely empty or contain only `Error:` / `no excel files found` messages.
+- ALL results are entirely off-topic with zero relevance to the user's question.
+- No usable evidence exists at all — you cannot form even a partial answer.
+
+**"Results are not detailed enough" is NOT a valid reason to retry.**
+**"Results might be incomplete" is NOT a valid reason to retry.**

 ## 6. Fallback and Sequential Retry

-On insufficient results, follow this sequence:
+On insufficient results, you may retry **up to 2 more times** (3 calls total):

-1. Rewrite query, retry same tool (once)
-2. Switch to next retrieval source in default order
-3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100`
-4. `table_rag_retrieve` insufficient → try `rag_retrieve`; `rag_retrieve` insufficient → try `table_rag_retrieve`
+1. Rewrite query, retry same tool.
+2. Switch to next retrieval source in default order.
+3. For `rag_retrieve`, escalate `top_k` to `100` on retry.
+4. `table_rag_retrieve` insufficient → try `rag_retrieve`; `rag_retrieve` insufficient → try `table_rag_retrieve`.

 - `table_rag_retrieve` internally falls back to `rag_retrieve` on `no excel files found`, but this does NOT change the higher-level order.
 - Say "no relevant information was found" **only after** exhausting all retrieval sources.
 - Do NOT switch to local filesystem inspection at any point.
+- Do NOT call any retrieval tool more than 3 times in total.

 ## 7. Table RAG Result Handling

@ -99,7 +133,9 @@ This section applies only when self-knowledge is enabled.

 Before replying to a knowledge retrieval task, verify:
 - Used only whitelisted retrieval tools — no local filesystem inspection?
- Exhausted retrieval flow before concluding "not found"?
+- Called retrieval at most 3 times total (not more)?
+- Answered immediately when results were sufficient (did NOT call again unnecessarily)?
+- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)?
 - Citations placed immediately after each relevant paragraph?
 - If self-knowledge was used, was it clearly separated from retrieved facts and limited to allowed supplement scope?

--- a/skills/autoload/support/rag-retrieve/rag_retrieve_server.py
+++ b/skills/autoload/support/rag-retrieve/rag_retrieve_server.py
@ -73,7 +73,7 @@ Format: `<CITATION file="file_id" filename="name.xlsx" sheet=1 rows=[2, 4] />`

 """

-def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]:
+def rag_retrieve(query: str, top_k: int = 100, trace_id: str = "") -> Dict[str, Any]:
    """Call the RAG retrieval API."""
    try:
        bot_id = ""
@ -100,6 +100,8 @@ def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]:
            "content-type": "application/json",
            "authorization": f"Bearer {auth_token}"
        }
+        if trace_id:
+            headers["X-Request-ID"] = trace_id
        data = {
            "query": query,
            "top_k": top_k
@ -172,7 +174,7 @@ def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]:
        }


-def table_rag_retrieve(query: str) -> Dict[str, Any]:
+def table_rag_retrieve(query: str, trace_id: str = "") -> Dict[str, Any]:
    """Call the Table RAG retrieval API."""
    try:
        bot_id = ""
@ -189,6 +191,8 @@ def table_rag_retrieve(query: str) -> Dict[str, Any]:
            "content-type": "application/json",
            "authorization": f"Bearer {auth_token}"
        }
+        if trace_id:
+            headers["X-Request-ID"] = trace_id
        data = {
            "query": query,
        }
@ -220,7 +224,7 @@ def table_rag_retrieve(query: str) -> Dict[str, Any]:
        if "markdown" in response_data:
            markdown_content = response_data["markdown"]
            if re.search(r"^no excel files found", markdown_content, re.IGNORECASE):
-                rag_result = rag_retrieve(query)
+                rag_result = rag_retrieve(query, trace_id=trace_id)
                content = rag_result.get("content", [])
                if content and content[0].get("type") == "text":
                    content[0]["text"] = "No table_rag_retrieve results were found. The content below is the fallback result from rag_retrieve：\n\n" + content[0]["text"]
@ -302,7 +306,9 @@ async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]:
        elif method == "tools/call":
            tool_name = params.get("name")
            arguments = params.get("arguments", {})
-            
+            meta = params.get("_meta") or params.get("meta") or {}
+            trace_id = meta.get("trace_id", "") if isinstance(meta, dict) else ""
+
            if tool_name == "rag_retrieve":
                query = arguments.get("query", "")
                top_k = arguments.get("top_k", 100)
@ -310,7 +316,7 @@ async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]:
                if not query:
                    return create_error_response(request_id, -32602, "Missing required parameter: query")

-                result = rag_retrieve(query, top_k)
+                result = rag_retrieve(query, top_k, trace_id)

                return {
                    "jsonrpc": "2.0",
@ -324,7 +330,7 @@ async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]:
                if not query:
                    return create_error_response(request_id, -32602, "Missing required parameter: query")

-                result = table_rag_retrieve(query)
+                result = table_rag_retrieve(query, trace_id)

                return {
                    "jsonrpc": "2.0",
--- a/skills/developing/pmda-drug-info/.claude-plugin/plugin.json
+++ b/skills/developing/pmda-drug-info/.claude-plugin/plugin.json
@ -0,0 +1,21 @@
+{
+  "name": "pmda-drug-info",
+  "description": "PMDA drug information tools for Japanese pharmaceutical package insert queries. Provides drug search, master info, interactions, restrictions, dosing, and full-text chapter retrieval via PostgreSQL + OpenSearch.",
+  "hooks": {
+    "PrePrompt": [
+      {
+        "type": "command",
+        "command": "python hooks/pre_prompt.py"
+      }
+    ]
+  },
+  "mcpServers": {
+    "pmda_drug_info": {
+      "transport": "stdio",
+      "command": "python",
+      "args": [
+        "./pmda_server.py"
+      ]
+    }
+  }
+}
--- a/skills/developing/pmda-drug-info/agents/adverse-event.md
+++ b/skills/developing/pmda-drug-info/agents/adverse-event.md
@ -0,0 +1,31 @@
+---
+name: adverse_event
+description: Reverse lookup drugs by adverse event name. Find which drugs have reported a specific side effect.
+             Invoke when the user asks "Which drugs cause Stevens-Johnson syndrome?" or "Drugs that prolong QT interval?".
+             Causal inference is prohibited — information presentation only.
+tools: search_section_text, search_drugs, get_drug_master, list_drug_chapters, read_drug_chapter
+---
+
+あなたは「副作用 → 該当薬剤の逆引き」専門の sub-agent です。
+
+【ツール戦略】
+1. `search_section_text(keyword=副作用名, section_filter="副作用")` で逆引き。
+   total_drugs は必ず本文中に明示する。
+2. 同義語が必要なケース:
+     "Stevens-Johnson" ⇔ "皮膚粘膜眼症候群" / "SJS"
+     "QT延長" ⇔ "Torsades de pointes"
+     "間質性肺炎" ⇔ "肺臓炎"
+   OS の synonym filter が自動展開するので 1 回の検索で OK。
+3. hit から代表薬を 3〜5 件選び、`read_drug_chapter` で 11.1 重大な副作用 / 11.2 その他の副作用
+   verbatim を引用。
+4. 因果推論（"この薬がこの患者の症状を起こした"）は **絶対しない**。
+   情報提示のみ。
+
+【絶対ルール】
+1. ツール呼び出し必須。トレーニング知識・教科書・ガイドラインからの推測は禁止。
+2. 数値・固有名・条件は本文表現を改変せず逐語引用。
+3. 出典は **必ず** `[出典: <販売名> (yj_full=<id>) / <章番号 章タイトル>]` の形式。
+   - fact 表 row には `_citation` フィールドが入っているので **そのまま転記**。
+   - `[出典: 薬品マスター]` `[出典: 添付文書]` 等の汎用出典は **絶対禁止**。
+   - read_drug_chapter で実際に読んだ section 以外の出典を捏造しない。
+4. 該当情報が無ければ "添付文書からは確認できません" と書く。
--- a/skills/developing/pmda-drug-info/agents/interaction.md
+++ b/skills/developing/pmda-drug-info/agents/interaction.md
@ -0,0 +1,28 @@
+---
+name: interaction
+description: Investigate drug-drug interactions between two drugs, or list all interactions for a single drug.
+             Invoke when the user asks "Can drug A and B be used together?" or "What are the interactions of drug A?".
+tools: search_drugs, get_drug_master, get_drug_interactions, search_section_text, list_drug_chapters, read_drug_chapter
+---
+
+あなたは「薬剤間相互作用」専門の sub-agent です。
+
+【ツール戦略】
+- A・B 両薬の yj_code を `search_drugs` で取得。
+- `get_drug_interactions(drug_a_yj=A, drug_b_yj=B)` で双方向検索（A→B も B→A も拾える）。
+- ヒットしたら drug_a の側の出典 section（10.1 / 10.2）を `list_drug_chapters` + `read_drug_chapter` で
+  verbatim 取得。drug_b 側にも該当記載があるか確認。
+- ヒットゼロ → "添付文書上は併用禁忌・併用注意の明確な記載なし" と書く（自由記述/警告等は
+  別途 `search_section_text(keyword=B薬名, section_filter="相互作用")` で念押し）。
+- 1 薬名のみ与えられた場合は `get_drug_interactions(drug_a_yj=...)` で全相互作用一覧。
+
+severity は本文の "併用禁忌" / "併用注意" の語をそのまま転記。
+
+【絶対ルール】
+1. ツール呼び出し必須。トレーニング知識・教科書・ガイドラインからの推測は禁止。
+2. 数値・固有名・条件は本文表現を改変せず逐語引用。
+3. 出典は **必ず** `[出典: <販売名> (yj_full=<id>) / <章番号 章タイトル>]` の形式。
+   - fact 表 row には `_citation` フィールドが入っているので **そのまま転記**。
+   - `[出典: 薬品マスター]` `[出典: 添付文書]` 等の汎用出典は **絶対禁止**。
+   - read_drug_chapter で実際に読んだ section 以外の出典を捏造しない。
+4. 該当情報が無ければ "添付文書からは確認できません" と書く。
--- a/skills/developing/pmda-drug-info/agents/patient-specific.md
+++ b/skills/developing/pmda-drug-info/agents/patient-specific.md
@ -0,0 +1,32 @@
+---
+name: patient_specific
+description: Determine drug administration feasibility and dosage adjustment for specific patient conditions (renal impairment, hepatic impairment, pregnancy, elderly, pediatric, allergy).
+             Invoke when the user asks "Can this drug be used in a patient with eGFR 25?", "Is it contraindicated in pregnancy?", etc.
+tools: search_drugs, get_drug_master, get_drug_restrictions, get_drug_dosing, list_drug_chapters, read_drug_chapter
+---
+
+あなたは「特定患者への投与可否・用量調整」専門の sub-agent です。
+
+【ツール戦略】
+1. 薬名から yj_code を `search_drugs` で取得。
+2. 患者条件を condition_type に対応付け:
+     - 腎機能 (eGFR/CrCl) → "腎機能障害"
+     - 肝機能 (Child-Pugh) → "肝機能障害"
+     - 妊娠/授乳 → "妊婦"/"授乳婦"
+     - 年齢 (小児/高齢) → "小児等"/"高齢者"
+     - アレルギー既往 → "過敏症"
+     - 合併症 (糖尿病/喘息など) → "疾患"
+3. `get_drug_restrictions(drug_yj=..., condition_type=...)` で該当 restriction を取得。
+   condition_params の数値（例: {"eGFR_max": 30}）を必ず確認。
+4. `get_drug_dosing(drug_yj=..., patient_segment=...)` で患者層別用量を取得。
+5. 必要なら原文 `read_drug_chapter` で 9.x 章 verbatim 引用。
+6. 数値判定（例: eGFR=25 ⇔ eGFR_max=30 → 該当）を agent が責任もって行う。
+
+【絶対ルール】
+1. ツール呼び出し必須。トレーニング知識・教科書・ガイドラインからの推測は禁止。
+2. 数値・固有名・条件は本文表現を改変せず逐語引用。
+3. 出典は **必ず** `[出典: <販売名> (yj_full=<id>) / <章番号 章タイトル>]` の形式。
+   - fact 表 row には `_citation` フィールドが入っているので **そのまま転記**。
+   - `[出典: 薬品マスター]` `[出典: 添付文書]` 等の汎用出典は **絶対禁止**。
+   - read_drug_chapter で実際に読んだ section 以外の出典を捏造しない。
+4. 該当情報が無ければ "添付文書からは確認できません" と書く。
--- a/skills/developing/pmda-drug-info/agents/single-drug.md
+++ b/skills/developing/pmda-drug-info/agents/single-drug.md
@ -0,0 +1,26 @@
+---
+name: single_drug
+description: Answer factual questions about a single drug (brand name, generic name, indications, dosing, contraindications, side effects, etc.).
+             Invoke when the question is focused on one drug and requires detailed information from the package insert.
+tools: search_drugs, get_drug_master, get_drug_dosing, get_drug_restrictions, list_drug_chapters, read_drug_chapter
+---
+
+あなたは「単一薬の事実回答」専門の sub-agent です。
+
+【ツール戦略】
+1. 質問から薬名/yj_code を特定 → `search_drugs` または直接 yj_code が分かれば次へ。
+2. `get_drug_master(yj_code)` で基本情報（販売名・一般名・薬効分類・規制）を確定。
+3. 必要に応じて `get_drug_dosing` で用法用量、`get_drug_restrictions(drug_yj=...)` で禁忌・特定患者注意。
+4. 自由記述や上記テーブルに無い情報（例: 重大な副作用一覧、薬物動態の数値）は
+   `list_drug_chapters(yj_full)` → `read_drug_chapter(yj_full, section_title)` で原文取得。
+
+最終回答は箇条書き or 表で、各事実に出典を付ける。
+
+【絶対ルール】
+1. ツール呼び出し必須。トレーニング知識・教科書・ガイドラインからの推測は禁止。
+2. 数値・固有名・条件は本文表現を改変せず逐語引用。
+3. 出典は **必ず** `[出典: <販売名> (yj_full=<id>) / <章番号 章タイトル>]` の形式。
+   - fact 表 row には `_citation` フィールドが入っているので **そのまま転記**。
+   - `[出典: 薬品マスター]` `[出典: 添付文書]` 等の汎用出典は **絶対禁止**。
+   - read_drug_chapter で実際に読んだ section 以外の出典を捏造しない。
+4. 該当情報が無ければ "添付文書からは確認できません" と書く。
--- a/skills/developing/pmda-drug-info/hooks/pmda-instructions.md
+++ b/skills/developing/pmda-drug-info/hooks/pmda-instructions.md
@ -0,0 +1,22 @@
+# PMDA Drug Information Tools
+
+You have access to Japanese pharmaceutical package insert (添付文書) data via the following tools.
+
+## Core Rules
+- **Tool calls are mandatory.** Never answer from training knowledge alone. All facts must come from tool results.
+- Cite sources in the format: `[出典: <販売名> (yj_full=<id>) / <章番号 章タイトル>]`
+- Fact table rows include a `_citation` field — use it directly.
+- Generic citations like `[出典: 薬品マスター]` or `[出典: 添付文書]` are **prohibited**.
+- For urgent questions (suicide/drug abuse/severe acute symptoms), state: "緊急対応として担当医・薬剤師に直接相談してください"
+
+## When to Use Sub-agents (task tool)
+- **patient_specific**: Renal/hepatic/pregnancy/elderly/pediatric/allergy conditions × dosing decisions
+- **interaction**: Pairwise drug interaction investigation
+- **adverse_event**: Reverse lookup from adverse event name to drugs
+- **single_drug**: Detailed info not in fact tables (e.g., full adverse event list, pharmacokinetics)
+
+## Direct Tool Usage (do NOT delegate)
+- Simple lookups → use tools directly
+- Multi-drug comparisons → call tools sequentially, output as markdown table
+- Symptom → candidate drug reverse lookup → `search_section_text`
+- Mechanism/pharmacokinetics → `list_drug_chapters` + `read_drug_chapter`
--- a/skills/developing/pmda-drug-info/hooks/pre_prompt.py
+++ b/skills/developing/pmda-drug-info/hooks/pre_prompt.py
@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+"""
+PrePrompt hook for PMDA drug info skill.
+Injects usage instructions for the drug information tools.
+"""
+import sys
+from pathlib import Path
+
+
+def main():
+    prompt_file = Path(__file__).parent / "pmda-instructions.md"
+    if prompt_file.exists():
+        print(prompt_file.read_text(encoding="utf-8"))
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/skills/developing/pmda-drug-info/mcp_common.py
+++ b/skills/developing/pmda-drug-info/mcp_common.py
@ -0,0 +1,252 @@
+#!/usr/bin/env python3
+"""
+Shared utility functions for the MCP server.
+Provides common functionality for path handling, file validation, and request processing.
+"""
+
+import json
+import os
+import sys
+import asyncio
+from typing import Any, Dict, List, Optional, Union
+import re
+
+def get_allowed_directory():
+    """Get the directory that is allowed to be accessed."""
+    # Prefer dataset_dir passed through command-line arguments.
+    if len(sys.argv) > 1:
+        dataset_dir = sys.argv[1]
+        return os.path.abspath(dataset_dir)
+
+    # Read the project data directory from the environment variable.
+    project_dir = os.getenv("PROJECT_DATA_DIR", "./projects/data")
+    return os.path.abspath(project_dir)
+
+
+def resolve_file_path(file_path: str, default_subfolder: str = "default") -> str:
+    """
+    Resolve a file path, supporting both folder/document.txt and document.txt formats.
+
+    Args:
+        file_path: Input file path.
+        default_subfolder: Default subfolder name to use when only a filename is provided.
+
+    Returns:
+        The resolved full file path.
+    """
+    # If the path contains a folder separator, use it directly.
+    if '/' in file_path or '\\' in file_path:
+        clean_path = file_path.replace('\\', '/')
+
+        # Remove the projects/ prefix if it exists.
+        if clean_path.startswith('projects/'):
+            clean_path = clean_path[9:]  # Remove the 'projects/' prefix.
+        elif clean_path.startswith('./projects/'):
+            clean_path = clean_path[11:]  # Remove the './projects/' prefix.
+    else:
+        # If only a filename is provided, add the default subfolder.
+        clean_path = f"{default_subfolder}/{file_path}"
+
+    # Get the allowed directory.
+    project_data_dir = get_allowed_directory()
+
+    # Try to locate the file directly under the project directory.
+    full_path = os.path.join(project_data_dir, clean_path.lstrip('./'))
+    if os.path.exists(full_path):
+        return full_path
+
+    # If the direct path does not exist, try a recursive search.
+    found = find_file_in_project(clean_path, project_data_dir)
+    if found:
+        return found
+
+    # If this is a bare filename and it was not found under the default subfolder,
+    # try looking in the project root.
+    if '/' not in file_path and '\\' not in file_path:
+        root_path = os.path.join(project_data_dir, file_path)
+        if os.path.exists(root_path):
+            return root_path
+
+    raise FileNotFoundError(f"File not found: {file_path} (searched in {project_data_dir})")
+
+
+def find_file_in_project(filename: str, project_dir: str) -> Optional[str]:
+    """Recursively search for a file inside the project directory."""
+    # If filename includes a path, only search within the specified path.
+    if '/' in filename:
+        parts = filename.split('/')
+        target_file = parts[-1]
+        search_dir = os.path.join(project_dir, *parts[:-1])
+
+        if os.path.exists(search_dir):
+            target_path = os.path.join(search_dir, target_file)
+            if os.path.exists(target_path):
+                return target_path
+    else:
+        # For a bare filename, recursively search the whole project directory.
+        for root, dirs, files in os.walk(project_dir):
+            if filename in files:
+                return os.path.join(root, filename)
+    return None
+
+
+def load_tools_from_json(tools_file_name: str) -> List[Dict[str, Any]]:
+    """Load tool definitions from a JSON file."""
+    try:
+        tools_file = os.path.join(os.path.dirname(__file__), tools_file_name)
+        if os.path.exists(tools_file):
+            with open(tools_file, 'r', encoding='utf-8') as f:
+                return json.load(f)
+        else:
+            # If the JSON file does not exist, use the default definitions.
+            return []
+    except Exception as e:
+        print(f"Warning: Unable to load tool definition JSON file: {str(e)}")
+        return []
+
+
+def create_error_response(request_id: Any, code: int, message: str) -> Dict[str, Any]:
+    """Create a standardized error response."""
+    return {
+        "jsonrpc": "2.0",
+        "id": request_id,
+        "error": {
+            "code": code,
+            "message": message
+        }
+    }
+
+
+def create_success_response(request_id: Any, result: Any) -> Dict[str, Any]:
+    """Create a standardized success response."""
+    return {
+        "jsonrpc": "2.0",
+        "id": request_id,
+        "result": result
+    }
+
+
+def create_initialize_response(request_id: Any, server_name: str, server_version: str = "1.0.0") -> Dict[str, Any]:
+    """Create a standardized initialize response."""
+    return {
+        "jsonrpc": "2.0",
+        "id": request_id,
+        "result": {
+            "protocolVersion": "2024-11-05",
+            "capabilities": {
+                "tools": {}
+            },
+            "serverInfo": {
+                "name": server_name,
+                "version": server_version
+            }
+        }
+    }
+
+
+def create_ping_response(request_id: Any) -> Dict[str, Any]:
+    """Create a standardized ping response."""
+    return {
+        "jsonrpc": "2.0",
+        "id": request_id,
+        "result": {
+            "pong": True
+        }
+    }
+
+
+def create_tools_list_response(request_id: Any, tools: List[Dict[str, Any]]) -> Dict[str, Any]:
+    """Create a standardized tools/list response."""
+    return {
+        "jsonrpc": "2.0",
+        "id": request_id,
+        "result": {
+            "tools": tools
+        }
+    }
+
+
+def is_regex_pattern(pattern: str) -> bool:
+    """Check whether a string should be treated as a regular expression pattern."""
+    # Check the /pattern/ format.
+    if pattern.startswith('/') and pattern.endswith('/') and len(pattern) > 2:
+        return True
+
+    # Check the r"pattern" or r'pattern' format.
+    if pattern.startswith(('r"', "r'")) and pattern.endswith(('"', "'")) and len(pattern) > 3:
+        return True
+
+    # Check whether it contains regex metacharacters.
+    regex_chars = {'*', '+', '?', '|', '(', ')', '[', ']', '{', '}', '^', '$', '\\', '.'}
+    return any(char in pattern for char in regex_chars)
+
+
+def compile_pattern(pattern: str) -> Union[re.Pattern, str, None]:
+    """Compile a regex pattern, or return the original string if it is not regex."""
+    if not is_regex_pattern(pattern):
+        return pattern
+
+    try:
+        # Handle the /pattern/ format.
+        if pattern.startswith('/') and pattern.endswith('/'):
+            regex_body = pattern[1:-1]
+            return re.compile(regex_body)
+
+        # Handle the r"pattern" or r'pattern' format.
+        if pattern.startswith(('r"', "r'")) and pattern.endswith(('"', "'")):
+            regex_body = pattern[2:-1]
+            return re.compile(regex_body)
+
+        # Directly compile strings that contain regex metacharacters.
+        return re.compile(pattern)
+    except re.error as e:
+        # If compilation fails, return None to indicate an invalid regex.
+        print(f"Warning: Regular expression '{pattern}' compilation failed: {e}")
+        return None
+
+
+async def handle_mcp_streaming(request_handler):
+    """Handle the standard main loop for MCP requests."""
+    try:
+        while True:
+            # Read from stdin
+            line = await asyncio.get_event_loop().run_in_executor(None, sys.stdin.readline)
+            if not line:
+                break
+
+            line = line.strip()
+            if not line:
+                continue
+
+            try:
+                request = json.loads(line)
+                response = await request_handler(request)
+
+                # Write to stdout
+                sys.stdout.write(json.dumps(response, ensure_ascii=False) + "\n")
+                sys.stdout.flush()
+
+            except json.JSONDecodeError:
+                error_response = {
+                    "jsonrpc": "2.0",
+                    "error": {
+                        "code": -32700,
+                        "message": "Parse error"
+                    }
+                }
+                sys.stdout.write(json.dumps(error_response, ensure_ascii=False) + "\n")
+                sys.stdout.flush()
+
+            except Exception as e:
+                error_response = {
+                    "jsonrpc": "2.0",
+                    "error": {
+                        "code": -32603,
+                        "message": f"Internal error: {str(e)}"
+                    }
+                }
+                sys.stdout.write(json.dumps(error_response, ensure_ascii=False) + "\n")
+                sys.stdout.flush()
+
+    except KeyboardInterrupt:
+        pass
--- a/skills/developing/pmda-drug-info/pmda_server.py
+++ b/skills/developing/pmda-drug-info/pmda_server.py
@ -0,0 +1,533 @@
+#!/usr/bin/env python3
+"""
+PMDA drug information MCP server (mock data version).
+
+Provides drug search, master info, interactions, restrictions, dosing,
+and full-text chapter retrieval with mock data for testing.
+"""
+
+import asyncio
+import json
+import sys
+from typing import Any, Dict, Optional
+
+from mcp_common import (
+    create_error_response,
+    create_initialize_response,
+    create_ping_response,
+    create_tools_list_response,
+    load_tools_from_json,
+    handle_mcp_streaming,
+)
+
+
+def _dump(obj) -> str:
+    return json.dumps(obj, ensure_ascii=False)
+
+
+# ---------------------------------------------------------------------------
+# Mock data
+# ---------------------------------------------------------------------------
+
+MOCK_DRUG_MASTER = {
+    "2149039F1082": {
+        "yj_code": "2149039F1082",
+        "yj_full": "2149039F1082_1_17",
+        "brand_name": "ロサルタンK錠50mg「科研」",
+        "generic_name": "ロサルタンカリウム",
+        "category_code": "214",
+        "category_name": "アンジオテンシンII受容体拮抗薬",
+        "regulation": "劇薬, 処方箋医薬品",
+        "manufacturer": "科研製薬株式会社",
+        "revision_date": "2024-06",
+    },
+    "3399007H1021": {
+        "yj_code": "3399007H1021",
+        "yj_full": "3399007H1021_1_21",
+        "brand_name": "バイアスピリン錠100mg",
+        "generic_name": "アスピリン",
+        "category_code": "339",
+        "category_name": "血液・体液用薬",
+        "regulation": "処方箋医薬品",
+        "manufacturer": "バイエル薬品株式会社",
+        "revision_date": "2024-03",
+    },
+    "2179004F1026": {
+        "yj_code": "2179004F1026",
+        "yj_full": "2179004F1026_1_14",
+        "brand_name": "ノルバスク錠5mg",
+        "generic_name": "アムロジピンベシル酸塩",
+        "category_code": "217",
+        "category_name": "カルシウム拮抗薬",
+        "regulation": "処方箋医薬品",
+        "manufacturer": "ファイザー株式会社",
+        "revision_date": "2024-01",
+    },
+}
+
+MOCK_CATEGORIES = [
+    {"category_code": "214", "category_name": "アンジオテンシンII受容体拮抗薬", "level": "L2", "drug_count": 35},
+    {"category_code": "217", "category_name": "カルシウム拮抗薬", "level": "L2", "drug_count": 48},
+    {"category_code": "339", "category_name": "血液・体液用薬", "level": "L2", "drug_count": 22},
+    {"category_code": "612", "category_name": "消化性潰瘍用剤", "level": "L2", "drug_count": 40},
+]
+
+MOCK_INTERACTIONS = [
+    {
+        "drug_a_yj": "2149039F1082",
+        "drug_b_yj": "3399007H1021",
+        "drug_b_class": "アスピリン（抗血小板剤）",
+        "severity": "併用注意",
+        "mechanism": "ARBの降圧作用を減弱するおそれがある。また、腎機能低下・高カリウム血症のリスクを増大。",
+        "clinical_effect": "降圧効果の減弱、腎機能悪化、高カリウム血症に注意。",
+        "source_drug_yj": "2149039F1082",
+        "source_section": "10.2 併用注意",
+    },
+    {
+        "drug_a_yj": "3399007H1021",
+        "drug_b_yj": "2149039F1082",
+        "drug_b_class": "ロサルタンカリウム（ARB）",
+        "severity": "併用注意",
+        "mechanism": "アスピリンの副作用（消化性潰瘍、腎機能低下）を増強するおそれ。",
+        "clinical_effect": "消化性潰瘍、腎機能低下に注意。血清カリウム値の上昇に注意。",
+        "source_drug_yj": "3399007H1021",
+        "source_section": "10.2 併用注意",
+    },
+]
+
+MOCK_RESTRICTIONS = [
+    {
+        "drug_yj": "2149039F1082",
+        "condition_type": "腎機能障害",
+        "condition_text": "腎機能障害患者",
+        "condition_params": {"eGFR_max": 30},
+        "severity": "慎重投与",
+        "source_section": "9.2 腎機能障害患者",
+    },
+    {
+        "drug_yj": "2149039F1082",
+        "condition_type": "妊婦",
+        "condition_text": "妊娠中の女性",
+        "condition_params": {},
+        "severity": "禁忌",
+        "source_section": "9.5 妊婦",
+    },
+    {
+        "drug_yj": "2149039F1082",
+        "condition_type": "高齢者",
+        "condition_text": "高齢者（65歳以上）",
+        "condition_params": {},
+        "severity": "慎重投与",
+        "source_section": "9.8 高齢者",
+    },
+    {
+        "drug_yj": "3399007H1021",
+        "condition_type": "過敏症",
+        "condition_text": "本剤の成分に対し過敏症の既往歴のある患者",
+        "condition_params": {},
+        "severity": "禁忌",
+        "source_section": "2. 禁忌",
+    },
+]
+
+MOCK_DOSING = [
+    {
+        "drug_yj": "2149039F1082",
+        "patient_segment": "成人",
+        "segment_params": {},
+        "indication_code": "高血圧症",
+        "dose_amount": "50",
+        "dose_unit": "mg",
+        "frequency": "1日1回",
+        "duration": "",
+        "adjustment_text": "効果不十分な場合は100mgまで増量可",
+        "source_section": "6. 用法及び用量",
+    },
+    {
+        "drug_yj": "2149039F1082",
+        "patient_segment": "腎機能障害患者",
+        "segment_params": {"eGFR_max": 30},
+        "indication_code": "高血圧症",
+        "dose_amount": "25",
+        "dose_unit": "mg",
+        "frequency": "1日1回",
+        "duration": "",
+        "adjustment_text": "eGFR 30以下では用量を減ずること。血清カリウム・クレアチニンの推移に注意。",
+        "source_section": "9.2 腎機能障害患者",
+    },
+]
+
+MOCK_CHAPTERS = {
+    "2149039F1082_1_17": [
+        {"section_title": "1. 警告", "line_num": 1, "text_len": 120},
+        {"section_title": "2. 禁忌", "line_num": 5, "text_len": 80},
+        {"section_title": "4. 効能・効果", "line_num": 12, "text_len": 60},
+        {"section_title": "6. 用法及び用量", "line_num": 20, "text_len": 150},
+        {"section_title": "9.2 腎機能障害患者", "line_num": 45, "text_len": 200},
+        {"section_title": "9.5 妊婦", "line_num": 52, "text_len": 180},
+        {"section_title": "9.8 高齢者", "line_num": 60, "text_len": 100},
+        {"section_title": "10.2 併用注意", "line_num": 75, "text_len": 350},
+        {"section_title": "11.1 重大な副作用", "line_num": 90, "text_len": 400},
+        {"section_title": "11.2 その他の副作用", "line_num": 110, "text_len": 300},
+    ],
+    "3399007H1021_1_21": [
+        {"section_title": "1. 警告", "line_num": 1, "text_len": 100},
+        {"section_title": "2. 禁忌", "line_num": 4, "text_len": 90},
+        {"section_title": "4. 効能・効果", "line_num": 10, "text_len": 55},
+        {"section_title": "6. 用法及び用量", "line_num": 18, "text_len": 130},
+        {"section_title": "10.2 併用注意", "line_num": 70, "text_len": 300},
+        {"section_title": "11.1 重大な副作用", "line_num": 85, "text_len": 450},
+        {"section_title": "11.2 その他の副作用", "line_num": 105, "text_len": 280},
+    ],
+}
+
+MOCK_SECTION_TEXT = {
+    ("2149039F1082_1_17", "9.2 腎機能障害患者"): (
+        "9.2 腎機能障害患者\n"
+        "腎機能障害患者（eGFR 30 mL/min/1.73m²以下）には、ロサルタンカリウムの"
+        "投与開始用量を25mg/日とし、血清カリウム及び血清クレアチニンの推移に"
+        "十分注意すること。\n"
+        "【理由】腎機能障害患者では、本剤の投与により急速に腎機能が悪化する"
+        "おそれがある。また、高カリウム血症があらわれやすい。"
+    ),
+    ("2149039F1082_1_17", "9.5 妊婦"): (
+        "9.5 妊婦\n"
+        "妊婦又は妊娠している可能性のある女性には投与しないこと。\n"
+        "【理由】妊娠中期・末期にレニン-アンジオテンシン系に作用する薬剤を"
+        "投与された患者では、胎児の腎機能低下、羊水過少症、頭蓋の発育不全、"
+        "肺低形成等があらわれるおそれがある。"
+    ),
+    ("2149039F1082_1_17", "10.2 併用注意"): (
+        "10.2 併用注意\n"
+        "・アスピリン（抗血小板剤）\n"
+        "  【リスク】ARBの降圧作用を減弱するおそれがある。\n"
+        "  腎機能低下・高カリウム血症のリスクを増大。\n"
+        "  【措置】降圧効果の減弱、腎機能悪化、高カリウム血症に注意すること。"
+    ),
+    ("2149039F1082_1_17", "11.1 重大な副作用"): (
+        "11.1 重大な副作用\n"
+        "・血管浮腫（頻度不明）：顔面、口唇、咽頭、舌等の腫脹があらわれた場合には"
+        "直ちに投与を中止し、適切な処置を行うこと。\n"
+        "・高カリウム血症（0.1%未満）：血清カリウム値の上昇があらわれることがある。\n"
+        "・腎機能悪化（0.1%未満）：BUN、クレアチニンの上昇があらわれることがある。"
+    ),
+    ("3399007H1021_1_21", "10.2 併用注意"): (
+        "10.2 併用注意\n"
+        "・ロサルタンカリウム（ARB）\n"
+        "  【リスク】アスピリンの副作用（消化性潰瘍、腎機能低下）を増強するおそれ。\n"
+        "  【措置】消化性潰瘍、腎機能低下に注意。血清カリウム値の上昇に注意すること。"
+    ),
+    ("3399007H1021_1_21", "11.1 重大な副作用"): (
+        "11.1 重大な副作用\n"
+        "・ショック、アナフィラキシー（頻度不明）：呼吸困難、血圧低下等があらわれた\n"
+        "  場合には直ちに投与を中止し、適切な処置を行うこと。\n"
+        "・消化性潰瘍（0.1%未満）：出血、穿孔があらわれることがある。\n"
+        "・腎機能障害（0.1%未満）：急性腎不全があらわれることがある。"
+    ),
+}
+
+
+def _citation(drug_yj: str, section: Optional[str]) -> str:
+    drug = MOCK_DRUG_MASTER.get(drug_yj, {})
+    brand = drug.get("brand_name", "")
+    yj_full = drug.get("yj_full", drug_yj)
+    chap = section or "（章不明）"
+    return f"[出典: {brand} (yj_full={yj_full}) / {chap}]"
+
+
+# ---------------------------------------------------------------------------
+# Tool implementations (mock)
+# ---------------------------------------------------------------------------
+
+def _tool_search_drugs(query: str, kind: str = "auto", limit: int = 10) -> str:
+    results = []
+    for code, d in MOCK_DRUG_MASTER.items():
+        q = query.lower()
+        if (kind == "brand" and q in d["brand_name"].lower()) or \
+           (kind == "generic" and q in d["generic_name"].lower()) or \
+           (kind == "yj" and (q in d["yj_code"].lower() or q in d["yj_full"].lower())) or \
+           (kind == "auto" and (q in d["brand_name"].lower() or q in d["generic_name"].lower()
+                                or q in d["yj_code"].lower() or q in d["yj_full"].lower())):
+            results.append({
+                "yj_full": d["yj_full"],
+                "yj_code": d["yj_code"],
+                "brand": d["brand_name"],
+                "generic": d["generic_name"],
+                "category": f"{d['category_code']} {d['category_name']}",
+                "score": 1.0,
+            })
+    return _dump(results[:limit])
+
+
+def _tool_list_categories() -> str:
+    return _dump(MOCK_CATEGORIES)
+
+
+def _tool_list_drugs_in_category(l2_code: str, limit_generics: int = 50) -> str:
+    results = []
+    seen_generics = set()
+    for code, d in MOCK_DRUG_MASTER.items():
+        if d["category_code"].startswith(l2_code) and d["generic_name"] not in seen_generics:
+            seen_generics.add(d["generic_name"])
+            results.append({
+                "generic_name": d["generic_name"],
+                "brands": [{"yj_code": d["yj_code"], "brand_name": d["brand_name"], "yj_full": d["yj_full"]}],
+            })
+    return _dump(results[:limit_generics])
+
+
+def _tool_get_drug_master(yj_code: str) -> str:
+    d = MOCK_DRUG_MASTER.get(yj_code)
+    if not d:
+        return _dump({"error": f"yj_code {yj_code} not found"})
+    result = dict(d)
+    result["_citation"] = f"[出典: {d['brand_name']} (yj_full={d['yj_full']}) / 添付文書冒頭]"
+    return _dump(result)
+
+
+def _tool_get_drug_interactions(
+    drug_a_yj: Optional[str] = None,
+    drug_b_yj: Optional[str] = None,
+    severity: Optional[str] = None,
+    keyword: Optional[str] = None,
+    limit: int = 30,
+) -> str:
+    results = []
+    for r in MOCK_INTERACTIONS:
+        if drug_a_yj and r["drug_a_yj"] != drug_a_yj:
+            continue
+        if drug_b_yj and r["drug_b_yj"] != drug_b_yj:
+            continue
+        if severity and r["severity"] != severity:
+            continue
+        if keyword and keyword.lower() not in (
+            (r.get("drug_b_class") or "").lower()
+            + (r.get("mechanism") or "").lower()
+            + (r.get("clinical_effect") or "").lower()
+        ):
+            continue
+        results.append({**r, "_citation": _citation(r["source_drug_yj"], r["source_section"])})
+    return _dump(results[:limit])
+
+
+def _tool_get_drug_restrictions(
+    drug_yj: Optional[str] = None,
+    condition_type: Optional[str] = None,
+    severity: Optional[str] = None,
+    keyword: Optional[str] = None,
+    limit: int = 30,
+) -> str:
+    results = []
+    for r in MOCK_RESTRICTIONS:
+        if drug_yj and r["drug_yj"] != drug_yj:
+            continue
+        if condition_type and r["condition_type"] != condition_type:
+            continue
+        if severity and r["severity"] != severity:
+            continue
+        if keyword and keyword.lower() not in (r.get("condition_text") or "").lower():
+            continue
+        results.append({**r, "_citation": _citation(r["drug_yj"], r["source_section"])})
+    return _dump(results[:limit])
+
+
+def _tool_get_drug_dosing(
+    drug_yj: str,
+    patient_segment: Optional[str] = None,
+    limit: int = 20,
+) -> str:
+    results = []
+    for r in MOCK_DOSING:
+        if r["drug_yj"] != drug_yj:
+            continue
+        if patient_segment and r["patient_segment"] != patient_segment:
+            continue
+        results.append({**r, "_citation": _citation(drug_yj, r["source_section"])})
+    return _dump(results[:limit])
+
+
+def _tool_search_section_text(
+    keyword: str,
+    section_filter: str = "",
+    limit: int = 30,
+) -> str:
+    if not keyword.strip():
+        return _dump({"keyword": keyword, "total_drugs": 0, "shown": 0, "hits": []})
+
+    # Simple mock: search through section text
+    hits_out = []
+    for (yj_full, section_title), text in MOCK_SECTION_TEXT.items():
+        if section_filter and section_filter not in section_title:
+            continue
+        if keyword.lower() in text.lower():
+            drug = None
+            for d in MOCK_DRUG_MASTER.values():
+                if d["yj_full"] == yj_full:
+                    drug = d
+                    break
+            if not drug:
+                continue
+            brand = drug["brand_name"]
+            # Deduplicate by yj_full
+            existing = [h for h in hits_out if h["yj_full"] == yj_full]
+            if existing:
+                existing[0]["matches"].append({
+                    "section_title": section_title,
+                    "snippet": text[:160],
+                })
+                continue
+            hits_out.append({
+                "yj_full": yj_full,
+                "brand": brand,
+                "generic": drug["generic_name"],
+                "l2": f"{drug['category_code']} {drug['category_name']}",
+                "matches": [{"section_title": section_title, "snippet": text[:160]}],
+                "_citation_template": f"[出典: {brand} (yj_full={yj_full}) / <該当章>]",
+            })
+
+    return _dump({
+        "keyword": keyword,
+        "section_filter": section_filter or None,
+        "total_drugs": len({h["yj_full"] for h in hits_out}),
+        "shown": len(hits_out),
+        "hits": hits_out[:limit],
+    })
+
+
+def _tool_list_drug_chapters(yj_full: str) -> str:
+    sections = MOCK_CHAPTERS.get(yj_full)
+    if not sections:
+        return _dump({"error": f"yj_full {yj_full} の章節が見つかりません。"})
+
+    drug = None
+    for d in MOCK_DRUG_MASTER.values():
+        if d["yj_full"] == yj_full:
+            drug = d
+            break
+
+    return _dump({
+        "yj_full": yj_full,
+        "brand": drug["brand_name"] if drug else "",
+        "generic": drug["generic_name"] if drug else "",
+        "n_sections": len(sections),
+        "sections": sections,
+    })
+
+
+def _tool_read_drug_chapter(yj_full: str, section_title: str) -> str:
+    text = MOCK_SECTION_TEXT.get((yj_full, section_title))
+    if text:
+        return text[:8000]
+    return _dump({
+        "error": f"section_title {section_title!r} は {yj_full} に存在しません。",
+        "hint": "list_drug_chapters で取得した sections[].section_title をそのまま渡してください。",
+    })
+
+
+# ---------------------------------------------------------------------------
+# MCP request handler
+# ---------------------------------------------------------------------------
+
+_TOOL_DISPATCH = {
+    "search_drugs": lambda args: _tool_search_drugs(
+        query=args.get("query", ""),
+        kind=args.get("kind", "auto"),
+        limit=args.get("limit", 10),
+    ),
+    "list_categories": lambda args: _tool_list_categories(),
+    "list_drugs_in_category": lambda args: _tool_list_drugs_in_category(
+        l2_code=args.get("l2_code", ""),
+        limit_generics=args.get("limit_generics", 50),
+    ),
+    "get_drug_master": lambda args: _tool_get_drug_master(
+        yj_code=args.get("yj_code", ""),
+    ),
+    "get_drug_interactions": lambda args: _tool_get_drug_interactions(
+        drug_a_yj=args.get("drug_a_yj"),
+        drug_b_yj=args.get("drug_b_yj"),
+        severity=args.get("severity"),
+        keyword=args.get("keyword"),
+        limit=args.get("limit", 30),
+    ),
+    "get_drug_restrictions": lambda args: _tool_get_drug_restrictions(
+        drug_yj=args.get("drug_yj"),
+        condition_type=args.get("condition_type"),
+        severity=args.get("severity"),
+        keyword=args.get("keyword"),
+        limit=args.get("limit", 30),
+    ),
+    "get_drug_dosing": lambda args: _tool_get_drug_dosing(
+        drug_yj=args.get("drug_yj", ""),
+        patient_segment=args.get("patient_segment"),
+        limit=args.get("limit", 20),
+    ),
+    "search_section_text": lambda args: _tool_search_section_text(
+        keyword=args.get("keyword", ""),
+        section_filter=args.get("section_filter", ""),
+        limit=args.get("limit", 30),
+    ),
+    "list_drug_chapters": lambda args: _tool_list_drug_chapters(
+        yj_full=args.get("yj_full", ""),
+    ),
+    "read_drug_chapter": lambda args: _tool_read_drug_chapter(
+        yj_full=args.get("yj_full", ""),
+        section_title=args.get("section_title", ""),
+    ),
+}
+
+
+async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]:
+    """Handle an MCP request."""
+    try:
+        method = request.get("method")
+        params = request.get("params", {})
+        request_id = request.get("id")
+
+        if method == "initialize":
+            return create_initialize_response(request_id, "pmda-drug-info")
+
+        elif method == "ping":
+            return create_ping_response(request_id)
+
+        elif method == "tools/list":
+            tools = load_tools_from_json("pmda_tools.json")
+            return create_tools_list_response(request_id, tools)
+
+        elif method == "tools/call":
+            tool_name = params.get("name")
+            arguments = params.get("arguments", {})
+
+            if tool_name not in _TOOL_DISPATCH:
+                return create_error_response(request_id, -32601, f"Unknown tool: {tool_name}")
+
+            try:
+                result_text = _TOOL_DISPATCH[tool_name](arguments)
+                return {
+                    "jsonrpc": "2.0",
+                    "id": request_id,
+                    "result": {
+                        "content": [{"type": "text", "text": result_text}]
+                    },
+                }
+            except Exception as e:
+                return {
+                    "jsonrpc": "2.0",
+                    "id": request_id,
+                    "result": {
+                        "content": [{"type": "text", "text": f"Error: {str(e)}"}]
+                    },
+                }
+
+        else:
+            return create_error_response(request_id, -32601, f"Unknown method: {method}")
+
+    except Exception as e:
+        return create_error_response(request.get("id"), -32603, f"Internal error: {str(e)}")
+
+
+async def main():
+    await handle_mcp_streaming(handle_request)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/skills/developing/pmda-drug-info/pmda_tools.json
+++ b/skills/developing/pmda-drug-info/pmda_tools.json
@ -0,0 +1,207 @@
+[
+  {
+    "name": "search_drugs",
+    "description": "Search drugs by brand name, generic name, or YJ code. Returns list of matching drugs with yj_code, brand name, generic name, and category.",
+    "inputSchema": {
+      "type": "object",
+      "properties": {
+        "query": {
+          "type": "string",
+          "description": "Search query: drug brand name, generic name, or YJ code."
+        },
+        "kind": {
+          "type": "string",
+          "enum": ["auto", "brand", "generic", "yj"],
+          "description": "Search type. 'auto' searches all fields.",
+          "default": "auto"
+        },
+        "limit": {
+          "type": "integer",
+          "description": "Maximum number of results.",
+          "default": 10
+        }
+      },
+      "required": ["query"]
+    }
+  },
+  {
+    "name": "list_categories",
+    "description": "List all L1/L2 drug categories (pharmacological classification) with drug counts per category.",
+    "inputSchema": {
+      "type": "object",
+      "properties": {}
+    }
+  },
+  {
+    "name": "list_drugs_in_category",
+    "description": "List all drugs (generic → brand names) under a specific L2 pharmacological category code.",
+    "inputSchema": {
+      "type": "object",
+      "properties": {
+        "l2_code": {
+          "type": "string",
+          "description": "3-digit L2 category code."
+        },
+        "limit_generics": {
+          "type": "integer",
+          "description": "Maximum number of generic names to return.",
+          "default": 50
+        }
+      },
+      "required": ["l2_code"]
+    }
+  },
+  {
+    "name": "get_drug_master",
+    "description": "Get basic information for a drug by yj_code: brand name, generic name, pharmacological category, regulatory classification, manufacturer, revision date.",
+    "inputSchema": {
+      "type": "object",
+      "properties": {
+        "yj_code": {
+          "type": "string",
+          "description": "12-character YJ code."
+        }
+      },
+      "required": ["yj_code"]
+    }
+  },
+  {
+    "name": "get_drug_interactions",
+    "description": "Search drug interactions. With drug_a only: all interactions for that drug. With both drug_a and drug_b: bidirectional interaction between A and B. Filter by severity (併用禁忌/併用注意) or keyword.",
+    "inputSchema": {
+      "type": "object",
+      "properties": {
+        "drug_a_yj": {
+          "type": "string",
+          "description": "YJ code for drug A."
+        },
+        "drug_b_yj": {
+          "type": "string",
+          "description": "YJ code for drug B (optional, for pairwise lookup)."
+        },
+        "severity": {
+          "type": "string",
+          "description": "Filter by severity: '併用禁忌' or '併用注意'."
+        },
+        "keyword": {
+          "type": "string",
+          "description": "Search keyword in drug_b_class, mechanism, or clinical_effect."
+        },
+        "limit": {
+          "type": "integer",
+          "description": "Maximum number of results.",
+          "default": 30
+        }
+      }
+    }
+  },
+  {
+    "name": "get_drug_restrictions",
+    "description": "Search drug restrictions (contraindications, precautions) by patient condition. condition_type options: 疾患, 腎機能障害, 肝機能障害, 生殖能, 妊婦, 授乳婦, 小児等, 高齢者, 過敏症, 遺伝子多型, その他. severity options: 禁忌, 原則禁忌, 慎重投与.",
+    "inputSchema": {
+      "type": "object",
+      "properties": {
+        "drug_yj": {
+          "type": "string",
+          "description": "YJ code for the drug."
+        },
+        "condition_type": {
+          "type": "string",
+          "description": "Patient condition type to filter by."
+        },
+        "severity": {
+          "type": "string",
+          "description": "Filter by severity: 禁忌, 原則禁忌, or 慎重投与."
+        },
+        "keyword": {
+          "type": "string",
+          "description": "Search keyword in condition_text."
+        },
+        "limit": {
+          "type": "integer",
+          "description": "Maximum number of results.",
+          "default": 30
+        }
+      }
+    }
+  },
+  {
+    "name": "get_drug_dosing",
+    "description": "Get dosing information for a drug, optionally filtered by patient segment. patient_segment options: 成人, 小児等, 高齢者, 腎機能障害患者, 肝機能障害患者, 透析患者, 妊婦.",
+    "inputSchema": {
+      "type": "object",
+      "properties": {
+        "drug_yj": {
+          "type": "string",
+          "description": "YJ code for the drug."
+        },
+        "patient_segment": {
+          "type": "string",
+          "description": "Patient segment to filter by (e.g., 成人, 高齢者, 腎機能障害患者)."
+        },
+        "limit": {
+          "type": "integer",
+          "description": "Maximum number of results.",
+          "default": 20
+        }
+      },
+      "required": ["drug_yj"]
+    }
+  },
+  {
+    "name": "search_section_text",
+    "description": "Full-text search in drug package insert sections. Returns matching sections with snippets. Use section_filter to narrow by chapter title (e.g., '副作用', '禁忌', '妊婦', '相互作用').",
+    "inputSchema": {
+      "type": "object",
+      "properties": {
+        "keyword": {
+          "type": "string",
+          "description": "Search keyword."
+        },
+        "section_filter": {
+          "type": "string",
+          "description": "Filter by section title substring (e.g., '副作用', '禁忌', '妊婦').",
+          "default": ""
+        },
+        "limit": {
+          "type": "integer",
+          "description": "Maximum number of results.",
+          "default": 30
+        }
+      },
+      "required": ["keyword"]
+    }
+  },
+  {
+    "name": "list_drug_chapters",
+    "description": "List all chapter titles for a drug's package insert. Use yj_full (full YJ code with revision suffix). Returns section titles with line numbers.",
+    "inputSchema": {
+      "type": "object",
+      "properties": {
+        "yj_full": {
+          "type": "string",
+          "description": "Full YJ code (with revision suffix, e.g., 3399007H1021_1_21)."
+        }
+      },
+      "required": ["yj_full"]
+    }
+  },
+  {
+    "name": "read_drug_chapter",
+    "description": "Read the verbatim text of a specific chapter from a drug's package insert. section_title must match exactly from list_drug_chapters output.",
+    "inputSchema": {
+      "type": "object",
+      "properties": {
+        "yj_full": {
+          "type": "string",
+          "description": "Full YJ code."
+        },
+        "section_title": {
+          "type": "string",
+          "description": "Exact section title from list_drug_chapters (e.g., '9.2 腎機能障害患者', '11.1 重大な副作用')."
+        }
+      },
+      "required": ["yj_full", "section_title"]
+    }
+  }
+]
--- a/skills/developing/rag-retrieve-no-citation/hooks/retrieval-policy-forbidden-self-knowledge.md
+++ b/skills/developing/rag-retrieve-no-citation/hooks/retrieval-policy-forbidden-self-knowledge.md
@ -37,8 +37,15 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi
 1. **Skill-enabled retrieval tools** (use first when available)
 2. **`rag_retrieve`**

- After each step, evaluate sufficiency before proceeding.
 - Retrieval must happen **before** any factual answer generation.
+- After each step, evaluate sufficiency before proceeding.
+
+### First-Call Success Principle
+
+- The first retrieval call is expected to return sufficient results for most questions.
+- Your default assumption should be: **one call is enough**.
+- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic).
+- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient.

 ## 4. Query Preparation

@ -48,25 +55,51 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi

 ## 5. Retrieval Breadth (`top_k`)

- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient.
- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
- Expansion order: `30 → 50 → 100`. If unsure, use `100`.
+- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success.
+- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation.
+- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
+- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient.

 ## 6. Result Evaluation

-Treat as insufficient if: empty, `Error:`, off-topic, missing core entity/scope, no usable evidence, partial coverage, truncated results, or claims required by the answer are not explicitly supported.
+**Maximum 3 retrieval calls per question.** After each call, evaluate immediately:
+
+### Sufficient — answer immediately, no more calls
+
+ANY of the following means results are sufficient — STOP and answer now:
+- The core entity/topic in the user's question appears in the results.
+- There is ANY direct or indirect evidence relevant to the user's question.
+- Results are partially relevant, even if not perfectly comprehensive.
+- You can compose a meaningful answer (even a partial one) from the retrieved content.
+
+**Anti-patterns — do NOT do these:**
+- ❌ "The results are good, but maybe different keywords could find something better."
+- ❌ "I have enough to answer, but let me try one more query to be thorough."
+- ❌ "The answer is here, but I want to double-check with a different query."
+- ❌ Calling retrieval again after you have already identified the answer in previous results.
+
+**If you can answer the question with current results, you MUST answer immediately. Period.**
+
+### Insufficient — the ONLY valid reasons to retry
+
+- Results are completely empty or contain only `Error:` messages.
+- ALL results are entirely off-topic with zero relevance to the user's question.
+- No usable evidence exists at all — you cannot form even a partial answer.
+
+**"Results are not detailed enough" is NOT a valid reason to retry.**
+**"Results might be incomplete" is NOT a valid reason to retry.**

 ## 7. Fallback and Sequential Retry

-On insufficient results, follow this sequence:
+On insufficient results, you may retry **up to 2 more times** (3 calls total):

-1. Rewrite query, retry same tool (once)
-2. Switch to next retrieval source in default order
-3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100`
+1. Rewrite query, retry same tool.
+2. For `rag_retrieve`, escalate `top_k` to `100` on retry.

- Say "no relevant information was found" **only after** exhausting all retrieval sources.
+- Say "no relevant information was found" **only after** exhausting all retries.
 - Do NOT switch to local filesystem inspection at any point.
 - Do NOT switch to model self-knowledge at any point.
+- Do NOT call any retrieval tool more than 3 times in total.

 ## 8. Handling Missing or Partial Evidence

@ -84,7 +117,6 @@ On insufficient results, follow this sequence:
 - Each sentence or key point in the response should be accompanied by relevant images when they meet the established association criteria.
 - Avoid placing all images at the end of the response.

-
 ## 10. Self-Knowledge Prohibition

 This section applies whenever self-knowledge is disabled or forbidden for the current task.
@ -103,9 +135,11 @@ This section applies whenever self-knowledge is disabled or forbidden for the cu

 Before replying to a knowledge retrieval task, verify:
 - Used only whitelisted retrieval tools — no local filesystem inspection?
+- Called retrieval at most 3 times total (not more)?
+- Answered immediately when results were sufficient (did NOT call again unnecessarily)?
+- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)?
 - Did retrieval happen before any factual answer drafting?
 - Did every factual claim come from retrieved evidence rather than model knowledge?
- Exhausted retrieval flow before concluding "not found"?
 - If any unsupported part remained, was it removed or explicitly marked unavailable?

 If any answer is "no", correct the process first.
--- a/skills/developing/rag-retrieve-no-citation/hooks/retrieval-policy.md
+++ b/skills/developing/rag-retrieve-no-citation/hooks/retrieval-policy.md
@ -27,6 +27,13 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi
 - Do NOT answer from model knowledge first.
 - After each step, evaluate sufficiency before proceeding.

+### First-Call Success Principle
+
+- The first retrieval call is expected to return sufficient results for most questions.
+- Your default assumption should be: **one call is enough**.
+- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic).
+- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient.
+
 ## 3. Query Preparation

 - Do NOT pass raw user question unless it already works well for retrieval.
@ -35,24 +42,50 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi

 ## 4. Retrieval Breadth (`top_k`)

- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient.
- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
- Expansion order: `30 → 50 → 100`. If unsure, use `100`.
+- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success.
+- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation.
+- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
+- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient.

 ## 5. Result Evaluation

-Treat as insufficient if: empty, `Error:`, off-topic, missing core entity/scope, no usable evidence, partial coverage, or truncated results.
+**Maximum 3 retrieval calls per question.** After each call, evaluate immediately:
+
+### Sufficient — answer immediately, no more calls
+
+ANY of the following means results are sufficient — STOP and answer now:
+- The core entity/topic in the user's question appears in the results.
+- There is ANY direct or indirect evidence relevant to the user's question.
+- Results are partially relevant, even if not perfectly comprehensive.
+- You can compose a meaningful answer (even a partial one) from the retrieved content.
+
+**Anti-patterns — do NOT do these:**
+- ❌ "The results are good, but maybe different keywords could find something better."
+- ❌ "I have enough to answer, but let me try one more query to be thorough."
+- ❌ "The answer is here, but I want to double-check with a different query."
+- ❌ Calling retrieval again after you have already identified the answer in previous results.
+
+**If you can answer the question with current results, you MUST answer immediately. Period.**
+
+### Insufficient — the ONLY valid reasons to retry
+
+- Results are completely empty or contain only `Error:` messages.
+- ALL results are entirely off-topic with zero relevance to the user's question.
+- No usable evidence exists at all — you cannot form even a partial answer.
+
+**"Results are not detailed enough" is NOT a valid reason to retry.**
+**"Results might be incomplete" is NOT a valid reason to retry.**

 ## 6. Fallback and Sequential Retry

-On insufficient results, follow this sequence:
+On insufficient results, you may retry **up to 2 more times** (3 calls total):

-1. Rewrite query, retry same tool (once)
-2. Switch to next retrieval source in default order
-3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100`
+1. Rewrite query, retry same tool.
+2. For `rag_retrieve`, escalate `top_k` to `100` on retry.

- Say "no relevant information was found" **only after** exhausting all retrieval sources.
+- Say "no relevant information was found" **only after** exhausting all retries.
 - Do NOT switch to local filesystem inspection at any point.
+- Do NOT call any retrieval tool more than 3 times in total.

 ## 7. Image Handling

@ -81,7 +114,9 @@ This section applies only when self-knowledge is enabled.

 Before replying to a knowledge retrieval task, verify:
 - Used only whitelisted retrieval tools — no local filesystem inspection?
- Exhausted retrieval flow before concluding "not found"?
+- Called retrieval at most 3 times total (not more)?
+- Answered immediately when results were sufficient (did NOT call again unnecessarily)?
+- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)?
 - If self-knowledge was used, was it clearly separated from retrieved facts and limited to allowed supplement scope?

 If any answer is "no", correct the process first.
--- a/skills/onprem/rag-retrieve-only/hooks/retrieval-policy-forbidden-self-knowledge.md
+++ b/skills/onprem/rag-retrieve-only/hooks/retrieval-policy-forbidden-self-knowledge.md
@ -40,6 +40,13 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi
 - After each step, evaluate sufficiency before proceeding.
 - Retrieval must happen **before** any factual answer generation.

+### First-Call Success Principle
+
+- The first retrieval call is expected to return sufficient results for most questions.
+- Your default assumption should be: **one call is enough**.
+- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic).
+- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient.
+
 ## 4. Query Preparation

 - Do NOT pass raw user question unless it already works well for retrieval.
@ -48,25 +55,51 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi

 ## 5. Retrieval Breadth (`top_k`)

- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient.
- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
- Expansion order: `30 → 50 → 100`. If unsure, use `100`.
+- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success.
+- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation.
+- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
+- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient.

 ## 6. Result Evaluation

-Treat as insufficient if: empty, `Error:`, off-topic, missing core entity/scope, no usable evidence, partial coverage, truncated results, or claims required by the answer are not explicitly supported.
+**Maximum 3 retrieval calls per question.** After each call, evaluate immediately:
+
+### Sufficient — answer immediately, no more calls
+
+ANY of the following means results are sufficient — STOP and answer now:
+- The core entity/topic in the user's question appears in the results.
+- There is ANY direct or indirect evidence relevant to the user's question.
+- Results are partially relevant, even if not perfectly comprehensive.
+- You can compose a meaningful answer (even a partial one) from the retrieved content.
+
+**Anti-patterns — do NOT do these:**
+- ❌ "The results are good, but maybe different keywords could find something better."
+- ❌ "I have enough to answer, but let me try one more query to be thorough."
+- ❌ "The answer is here, but I want to double-check with a different query."
+- ❌ Calling retrieval again after you have already identified the answer in previous results.
+
+**If you can answer the question with current results, you MUST answer immediately. Period.**
+
+### Insufficient — the ONLY valid reasons to retry
+
+- Results are completely empty or contain only `Error:` messages.
+- ALL results are entirely off-topic with zero relevance to the user's question.
+- No usable evidence exists at all — you cannot form even a partial answer.
+
+**"Results are not detailed enough" is NOT a valid reason to retry.**
+**"Results might be incomplete" is NOT a valid reason to retry.**

 ## 7. Fallback and Sequential Retry

-On insufficient results, follow this sequence:
+On insufficient results, you may retry **up to 2 more times** (3 calls total):

-1. Rewrite query, retry same tool (once)
-2. Switch to next retrieval source in default order
-3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100`
+1. Rewrite query, retry same tool.
+2. For `rag_retrieve`, escalate `top_k` to `100` on retry.

- Say "no relevant information was found" **only after** exhausting all retrieval sources.
+- Say "no relevant information was found" **only after** exhausting all retries.
 - Do NOT switch to local filesystem inspection at any point.
 - Do NOT switch to model self-knowledge at any point.
+- Do NOT call any retrieval tool more than 3 times in total.

 ## 8. Handling Missing or Partial Evidence

@ -84,14 +117,7 @@ On insufficient results, follow this sequence:
 - Each sentence or key point in the response should be accompanied by relevant images when they meet the established association criteria.
 - Avoid placing all images at the end of the response.

-## 10. Citation Requirements
-
- MUST generate `<CITATION ... />` tags when using retrieval results.
- Place citations immediately after the paragraph or bullet list using the knowledge. Do NOT collect at end.
- 1-2 citations per paragraph/bullet. At least 1 citation when using retrieved knowledge.
- Do NOT cite claims that were not supported by retrieval.
-
-## 11. Self-Knowledge Prohibition
+## 10. Self-Knowledge Prohibition

 This section applies whenever self-knowledge is disabled or forbidden for the current task.

@ -101,19 +127,19 @@ This section applies whenever self-knowledge is disabled or forbidden for the cu
 - The model must not supplement missing parts with general knowledge, conceptual explanation, common background, intuition, or likely completion.
 - The model must not use self-knowledge to invent or complete private, internal, current, precise, or source-sensitive facts.
 - The model must not use self-knowledge to invent or complete prices, fees, discounts, rankings, internal policies, user-specific details, current status, latest updates, exact numbers, dates, metrics, or specifications.
- Retrieved facts must include citations.
 - Unsupported parts must be stated as unavailable rather than guessed.
 - If a paragraph would mix retrieved facts and unsupported completion, remove the unsupported completion.
 - If evidence is incomplete, state the limitation explicitly.

-## 12. Pre-Reply Self-Check
+## 11. Pre-Reply Self-Check

 Before replying to a knowledge retrieval task, verify:
 - Used only whitelisted retrieval tools — no local filesystem inspection?
+- Called retrieval at most 3 times total (not more)?
+- Answered immediately when results were sufficient (did NOT call again unnecessarily)?
+- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)?
 - Did retrieval happen before any factual answer drafting?
 - Did every factual claim come from retrieved evidence rather than model knowledge?
- Exhausted retrieval flow before concluding "not found"?
- Citations placed immediately after each relevant paragraph?
 - If any unsupported part remained, was it removed or explicitly marked unavailable?

 If any answer is "no", correct the process first.
--- a/skills/onprem/rag-retrieve-only/hooks/retrieval-policy.md
+++ b/skills/onprem/rag-retrieve-only/hooks/retrieval-policy.md
@ -27,6 +27,13 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi
 - Do NOT answer from model knowledge first.
 - After each step, evaluate sufficiency before proceeding.

+### First-Call Success Principle
+
+- The first retrieval call is expected to return sufficient results for most questions.
+- Your default assumption should be: **one call is enough**.
+- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic).
+- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient.
+
 ## 3. Query Preparation

 - Do NOT pass raw user question unless it already works well for retrieval.
@ -35,24 +42,51 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi

 ## 4. Retrieval Breadth (`top_k`)

- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient.
- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
- Expansion order: `30 → 50 → 100`. If unsure, use `100`.
+- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success.
+- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation.
+- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
+- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient.

 ## 5. Result Evaluation

-Treat as insufficient if: empty, `Error:`, off-topic, missing core entity/scope, no usable evidence, partial coverage, or truncated results.
+**Maximum 3 retrieval calls per question.** After each call, evaluate immediately:
+
+### Sufficient — answer immediately, no more calls
+
+ANY of the following means results are sufficient — STOP and answer now:
+- The core entity/topic in the user's question appears in the results.
+- There is ANY direct or indirect evidence relevant to the user's question.
+- Results are partially relevant, even if not perfectly comprehensive.
+- You can compose a meaningful answer (even a partial one) from the retrieved content.
+
+**Anti-patterns — do NOT do these:**
+- ❌ "The results are good, but maybe different keywords could find something better."
+- ❌ "I have enough to answer, but let me try one more query to be thorough."
+- ❌ "The answer is here, but I want to double-check with a different query."
+- ❌ Calling retrieval again after you have already identified the answer in previous results.
+
+**If you can answer the question with current results, you MUST answer immediately. Period.**
+
+### Insufficient — the ONLY valid reasons to retry
+
+- Results are completely empty or contain only `Error:` messages.
+- ALL results are entirely off-topic with zero relevance to the user's question.
+- No usable evidence exists at all — you cannot form even a partial answer.
+
+**"Results are not detailed enough" is NOT a valid reason to retry.**
+**"Results might be incomplete" is NOT a valid reason to retry.**

 ## 6. Fallback and Sequential Retry

-On insufficient results, follow this sequence:
+On insufficient results, you may retry **up to 2 more times** (3 calls total):

-1. Rewrite query, retry same tool (once)
-2. Switch to next retrieval source in default order
-3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100`
+1. Rewrite query, retry same tool.
+2. Switch to next retrieval source in default order.
+3. For `rag_retrieve`, escalate `top_k` to `100` on retry.

 - Say "no relevant information was found" **only after** exhausting all retrieval sources.
 - Do NOT switch to local filesystem inspection at any point.
+- Do NOT call any retrieval tool more than 3 times in total.

 ## 7. Image Handling

@ -89,7 +123,9 @@ This section applies only when self-knowledge is enabled.

 Before replying to a knowledge retrieval task, verify:
 - Used only whitelisted retrieval tools — no local filesystem inspection?
- Exhausted retrieval flow before concluding "not found"?
+- Called retrieval at most 3 times total (not more)?
+- Answered immediately when results were sufficient (did NOT call again unnecessarily)?
+- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)?
 - Citations placed immediately after each relevant paragraph?
 - If self-knowledge was used, was it clearly separated from retrieved facts and limited to allowed supplement scope?

--- a/skills/support/rag-retrieve-only/hooks/retrieval-policy-forbidden-self-knowledge.md
+++ b/skills/support/rag-retrieve-only/hooks/retrieval-policy-forbidden-self-knowledge.md
@ -14,7 +14,7 @@ For knowledge retrieval tasks, **this policy overrides generic codebase explorat

 - **Prohibited answer source**: the model's own parametric knowledge, memory, prior world knowledge, intuition, common sense completion, or unsupported inference.
 - **Prohibited tools**: `Glob`, `Read`, `LS`, Bash (`ls`, `find`, `cat`, `head`, `tail`, `grep`, etc.) — these are forbidden even when retrieval results are empty/insufficient, even if local files seem helpful.
- **Allowed tools only**: skill-enabled retrieval tools, `rag_retrieve`. No other source for factual answering.
+- **Allowed tools only**: skill-enabled retrieval tools, `table_rag_retrieve`, `rag_retrieve`. No other source for factual answering.
 - Local filesystem is a **prohibited** knowledge source, not merely non-recommended.
 - Exception: user explicitly asks to read a specific local file as the task itself.
 - If retrieval evidence is absent, insufficient, or ambiguous, **do not fill the gap with model knowledge**.
@ -35,11 +35,20 @@ For any knowledge retrieval task:
 Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe filesystem first.

 1. **Skill-enabled retrieval tools** (use first when available)
-2. **`rag_retrieve`**
+2. **`table_rag_retrieve`** or **`rag_retrieve`**:
+   - Prefer `table_rag_retrieve` for: values, prices, quantities, specs, rankings, comparisons, lists, tables, name lookup, historical coverage, mixed/unclear cases.
+   - Prefer `rag_retrieve` for: pure concept, definition, workflow, policy, or explanation questions only.

 - After each step, evaluate sufficiency before proceeding.
 - Retrieval must happen **before** any factual answer generation.

+### First-Call Success Principle
+
+- The first retrieval call is expected to return sufficient results for most questions.
+- Your default assumption should be: **one call is enough**.
+- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic).
+- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient.
+
 ## 4. Query Preparation

 - Do NOT pass raw user question unless it already works well for retrieval.
@ -48,25 +57,54 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi

 ## 5. Retrieval Breadth (`top_k`)

- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient.
- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
- Expansion order: `30 → 50 → 100`. If unsure, use `100`.
+- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success.
+- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation.
+- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
+- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient.

 ## 6. Result Evaluation

-Treat as insufficient if: empty, `Error:`, off-topic, missing core entity/scope, no usable evidence, partial coverage, truncated results, or claims required by the answer are not explicitly supported.
+**Maximum 3 retrieval calls per question.** After each call, evaluate immediately:
+
+### Sufficient — answer immediately, no more calls
+
+ANY of the following means results are sufficient — STOP and answer now:
+- The core entity/topic in the user's question appears in the results.
+- There is ANY direct or indirect evidence relevant to the user's question.
+- Results are partially relevant, even if not perfectly comprehensive.
+- You can compose a meaningful answer (even a partial one) from the retrieved content.
+
+**Anti-patterns — do NOT do these:**
+- ❌ "The results are good, but maybe different keywords could find something better."
+- ❌ "I have enough to answer, but let me try one more query to be thorough."
+- ❌ "The answer is here, but I want to double-check with a different query."
+- ❌ Calling retrieval again after you have already identified the answer in previous results.
+
+**If you can answer the question with current results, you MUST answer immediately. Period.**
+
+### Insufficient — the ONLY valid reasons to retry
+
+- Results are completely empty or contain only `Error:` / `no excel files found` messages.
+- ALL results are entirely off-topic with zero relevance to the user's question.
+- No usable evidence exists at all — you cannot form even a partial answer.
+
+**"Results are not detailed enough" is NOT a valid reason to retry.**
+**"Results might be incomplete" is NOT a valid reason to retry.**

 ## 7. Fallback and Sequential Retry

-On insufficient results, follow this sequence:
+On insufficient results, you may retry **up to 2 more times** (3 calls total):

-1. Rewrite query, retry same tool (once)
-2. Switch to next retrieval source in default order
-3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100`
+1. Rewrite query, retry same tool.
+2. Switch to next retrieval source in default order.
+3. For `rag_retrieve`, escalate `top_k` to `100` on retry.
+4. `table_rag_retrieve` insufficient → try `rag_retrieve`; `rag_retrieve` insufficient → try `table_rag_retrieve`.

- Say "no relevant information was found" **only after** exhausting all retrieval sources.
+- `table_rag_retrieve` internally falls back to `rag_retrieve` on `no excel files found`, but this does NOT change the higher-level order.
+- Say "no relevant information was found" **only after** exhausting all retries.
 - Do NOT switch to local filesystem inspection at any point.
 - Do NOT switch to model self-knowledge at any point.
+- Do NOT call any retrieval tool more than 3 times in total.

 ## 8. Handling Missing or Partial Evidence

@ -75,7 +113,12 @@ On insufficient results, follow this sequence:
 - Prefer "the retrieved materials do not provide this information" over speculative completion.
 - When user asks for a definitive answer but evidence is incomplete, state the limitation directly.

-## 9. Image Handling
+## 9. Table RAG Result Handling
+
+- Follow all `[INSTRUCTION]` and `[EXTRA_INSTRUCTION]` in results.
+- If truncated: tell user total (`N+M`), displayed (`N`), omitted (`M`).
+
+## 10. Image Handling

 - The content returned by the `rag_retrieve` tool may include images.
 - Each image is exclusively associated with its nearest text or sentence.
@ -84,13 +127,6 @@ On insufficient results, follow this sequence:
 - Each sentence or key point in the response should be accompanied by relevant images when they meet the established association criteria.
 - Avoid placing all images at the end of the response.

-## 10. Citation Requirements
-
- MUST generate `<CITATION ... />` tags when using retrieval results.
- Place citations immediately after the paragraph or bullet list using the knowledge. Do NOT collect at end.
- 1-2 citations per paragraph/bullet. At least 1 citation when using retrieved knowledge.
- Do NOT cite claims that were not supported by retrieval.
-
 ## 11. Self-Knowledge Prohibition

 This section applies whenever self-knowledge is disabled or forbidden for the current task.
@ -101,7 +137,6 @@ This section applies whenever self-knowledge is disabled or forbidden for the cu
 - The model must not supplement missing parts with general knowledge, conceptual explanation, common background, intuition, or likely completion.
 - The model must not use self-knowledge to invent or complete private, internal, current, precise, or source-sensitive facts.
 - The model must not use self-knowledge to invent or complete prices, fees, discounts, rankings, internal policies, user-specific details, current status, latest updates, exact numbers, dates, metrics, or specifications.
- Retrieved facts must include citations.
 - Unsupported parts must be stated as unavailable rather than guessed.
 - If a paragraph would mix retrieved facts and unsupported completion, remove the unsupported completion.
 - If evidence is incomplete, state the limitation explicitly.
@ -110,10 +145,11 @@ This section applies whenever self-knowledge is disabled or forbidden for the cu

 Before replying to a knowledge retrieval task, verify:
 - Used only whitelisted retrieval tools — no local filesystem inspection?
+- Called retrieval at most 3 times total (not more)?
+- Answered immediately when results were sufficient (did NOT call again unnecessarily)?
+- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)?
 - Did retrieval happen before any factual answer drafting?
 - Did every factual claim come from retrieved evidence rather than model knowledge?
- Exhausted retrieval flow before concluding "not found"?
- Citations placed immediately after each relevant paragraph?
 - If any unsupported part remained, was it removed or explicitly marked unavailable?

 If any answer is "no", correct the process first.
--- a/skills/support/rag-retrieve-only/hooks/retrieval-policy.md
+++ b/skills/support/rag-retrieve-only/hooks/retrieval-policy.md
@ -27,6 +27,13 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi
 - Do NOT answer from model knowledge first.
 - After each step, evaluate sufficiency before proceeding.

+### First-Call Success Principle
+
+- The first retrieval call is expected to return sufficient results for most questions.
+- Your default assumption should be: **one call is enough**.
+- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic).
+- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient.
+
 ## 3. Query Preparation

 - Do NOT pass raw user question unless it already works well for retrieval.
@ -35,24 +42,51 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi

 ## 4. Retrieval Breadth (`top_k`)

- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient.
- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
- Expansion order: `30 → 50 → 100`. If unsure, use `100`.
+- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success.
+- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation.
+- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
+- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient.

 ## 5. Result Evaluation

-Treat as insufficient if: empty, `Error:`, off-topic, missing core entity/scope, no usable evidence, partial coverage, or truncated results.
+**Maximum 3 retrieval calls per question.** After each call, evaluate immediately:
+
+### Sufficient — answer immediately, no more calls
+
+ANY of the following means results are sufficient — STOP and answer now:
+- The core entity/topic in the user's question appears in the results.
+- There is ANY direct or indirect evidence relevant to the user's question.
+- Results are partially relevant, even if not perfectly comprehensive.
+- You can compose a meaningful answer (even a partial one) from the retrieved content.
+
+**Anti-patterns — do NOT do these:**
+- ❌ "The results are good, but maybe different keywords could find something better."
+- ❌ "I have enough to answer, but let me try one more query to be thorough."
+- ❌ "The answer is here, but I want to double-check with a different query."
+- ❌ Calling retrieval again after you have already identified the answer in previous results.
+
+**If you can answer the question with current results, you MUST answer immediately. Period.**
+
+### Insufficient — the ONLY valid reasons to retry
+
+- Results are completely empty or contain only `Error:` messages.
+- ALL results are entirely off-topic with zero relevance to the user's question.
+- No usable evidence exists at all — you cannot form even a partial answer.
+
+**"Results are not detailed enough" is NOT a valid reason to retry.**
+**"Results might be incomplete" is NOT a valid reason to retry.**

 ## 6. Fallback and Sequential Retry

-On insufficient results, follow this sequence:
+On insufficient results, you may retry **up to 2 more times** (3 calls total):

-1. Rewrite query, retry same tool (once)
-2. Switch to next retrieval source in default order
-3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100`
+1. Rewrite query, retry same tool.
+2. Switch to next retrieval source in default order.
+3. For `rag_retrieve`, escalate `top_k` to `100` on retry.

 - Say "no relevant information was found" **only after** exhausting all retrieval sources.
 - Do NOT switch to local filesystem inspection at any point.
+- Do NOT call any retrieval tool more than 3 times in total.

 ## 7. Image Handling

@ -89,7 +123,9 @@ This section applies only when self-knowledge is enabled.

 Before replying to a knowledge retrieval task, verify:
 - Used only whitelisted retrieval tools — no local filesystem inspection?
- Exhausted retrieval flow before concluding "not found"?
+- Called retrieval at most 3 times total (not more)?
+- Answered immediately when results were sufficient (did NOT call again unnecessarily)?
+- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)?
 - Citations placed immediately after each relevant paragraph?
 - If self-knowledge was used, was it clearly separated from retrieved facts and limited to allowed supplement scope?

--- a/utils/structured_log.py
+++ b/utils/structured_log.py
@ -0,0 +1,69 @@
+import json
+import logging
+import time
+from typing import Any, Optional
+
+logger = logging.getLogger("app")
+
+SCHEMA_VERSION = 1
+
+
+def _normalize_value(value: Any) -> Any:
+    if value is None:
+        return None
+    if isinstance(value, (str, int, float, bool)):
+        return value
+    return str(value)
+
+
+def emit_question_metric(
+    *,
+    stage: str,
+    status: str,
+    duration_ms: Optional[int] = None,
+    first_response_time_ms: Optional[int] = None,
+    trace_id: Optional[str] = None,
+    ai_id: Optional[str] = None,
+    session_id: Optional[str] = None,
+    robot_type: Optional[str] = None,
+    model: Optional[str] = None,
+    stream: Optional[bool] = None,
+    error_type: Optional[str] = None,
+    extra: Optional[dict[str, Any]] = None,
+) -> None:
+    payload: dict[str, Any] = {
+        "schema_version": SCHEMA_VERSION,
+        "event": {
+            "kind": "metric",
+            "category": ["question"],
+            "action": "question_perf",
+        },
+        "stage": stage,
+        "status": status,
+        "observed_at": int(time.time() * 1000),
+        "service": "catalog-agent",
+    }
+
+    optional_fields = {
+        "trace_id": trace_id,
+        "duration_ms": duration_ms,
+        "first_response_time_ms": first_response_time_ms,
+        "ai_id": ai_id,
+        "session_id": session_id,
+        "robot_type": robot_type,
+        "model": model,
+        "stream": stream,
+        "error_type": error_type,
+    }
+    for key, value in optional_fields.items():
+        normalized = _normalize_value(value)
+        if normalized is not None:
+            payload[key] = normalized
+
+    if extra:
+        for key, value in extra.items():
+            normalized = _normalize_value(value)
+            if normalized is not None:
+                payload[key] = normalized
+
+    logger.info(json.dumps(payload, ensure_ascii=False, separators=(",", ":")))