diff --git a/agent/agent_config.py b/agent/agent_config.py index f2f7e0b..f2e894c 100644 --- a/agent/agent_config.py +++ b/agent/agent_config.py @@ -32,6 +32,7 @@ class AgentConfig: session_id: Optional[str] = None dataset_ids: Optional[List[str]] = field(default_factory=list) trace_id: Optional[str] = None # Request trace ID, obtained from the X-Request-ID header + request_started_at: Optional[float] = None # Response control parameters stream: bool = False diff --git a/agent/deep_assistant.py b/agent/deep_assistant.py index 86eafb1..085beeb 100644 --- a/agent/deep_assistant.py +++ b/agent/deep_assistant.py @@ -24,6 +24,7 @@ from .guideline_middleware import GuidelineMiddleware from .tool_output_length_middleware import ToolOutputLengthMiddleware from .tool_use_cleanup_middleware import ToolUseCleanupMiddleware from .filepath_fix_middleware import FilePathFixMiddleware +from .mcp_trace_meta import patch_mcp_client_session_trace_meta from utils.settings import ( SUMMARIZATION_MAX_TOKENS, SUMMARIZATION_TOKENS_TO_KEEP, @@ -42,6 +43,7 @@ from .mem0_middleware import create_mem0_middleware from .mem0_config import Mem0Config from agent.prompt_loader import load_system_prompt_async, load_mcp_settings_async from agent.agent_memory_cache import get_memory_cache_manager +from .subagent_loader import load_subagents from .checkpoint_manager import get_checkpointer_manager from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver from langgraph.checkpoint.memory import InMemorySaver @@ -63,6 +65,8 @@ from deepagents.graph import BASE_AGENT_PROMPT from deepagents_cli.local_context import LocalContextMiddleware # Custom: FilesystemMiddleware with full SKILL.md reading support from .custom_filesystem_middleware import CustomFilesystemMiddleware +# Sub-agent support +from deepagents.middleware.subagents import SubAgent, SubAgentMiddleware # Global MemorySaver instance # from langgraph.checkpoint.memory import MemorySaver @@ -123,6 +127,7 @@ def read_system_prompt(): async def get_tools_from_mcp(mcp): """Extract tools from MCP configuration with caching.""" + patch_mcp_client_session_trace_meta() start_time = time.time() # Defensive handling: ensure mcp is a non-empty list containing mcpServers if not isinstance(mcp, list) or len(mcp) == 0 or "mcpServers" not in mcp[0]: @@ -306,6 +311,15 @@ async def init_agent(config: AgentConfig): sandbox, sandbox_type, workspace_root = await sandbox_task logger.info(f"init_agent sandbox ready, elapsed: {time.time() - create_start:.3f}s") + # Load sub-agents from skill directories + subagents = await load_subagents( + bot_id=config.bot_id, + tools=mcp_tools, + model=llm_instance, + ) + if subagents: + logger.info(f"Loaded {len(subagents)} sub-agents: {[s['name'] for s in subagents]}") + agent, composite_backend = create_custom_cli_agent( model=llm_instance, assistant_id=config.bot_id, @@ -317,6 +331,7 @@ async def init_agent(config: AgentConfig): checkpointer=checkpointer, sandbox=sandbox, sandbox_type=sandbox_type, + subagents=subagents if subagents else None, shell_env={ k: v for k, v in { "ASSISTANT_ID": str(config.bot_id), @@ -385,6 +400,7 @@ def create_custom_cli_agent( checkpointer: Checkpointer | None = None, store: BaseStore | None = None, shell_env: dict[str, str] | None = None, + subagents: list[SubAgent] | None = None, ) -> tuple[Pregel, CompositeBackend]: """Create a CLI-configured agent with custom workspace_root for shell commands. @@ -521,9 +537,19 @@ def create_custom_cli_agent( TodoListMiddleware(), FilePathFixMiddleware(), # Fix extra spaces in CJK file names within tool call arguments CustomFilesystemMiddleware(backend=composite_backend), # Use the custom FilesystemMiddleware with full SKILL.md reading support + ] + # Insert SubAgentMiddleware after FilesystemMiddleware (matches create_deep_agent ordering) + if subagents: + subagent_middleware = SubAgentMiddleware( + backend=composite_backend, + subagents=subagents, + ) + deepagent_middleware.append(subagent_middleware) + logger.info(f"SubAgentMiddleware added with {len(subagents)} sub-agents: {[s['name'] for s in subagents]}") + deepagent_middleware.extend([ AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"), PatchToolCallsMiddleware(), - ] + ]) if agent_middleware: deepagent_middleware.extend(agent_middleware) if interrupt_on is not None: diff --git a/agent/logging_handler.py b/agent/logging_handler.py index c3e21e7..60aa886 100644 --- a/agent/logging_handler.py +++ b/agent/logging_handler.py @@ -1,6 +1,7 @@ """Logging callback handler module.""" import logging +import traceback from typing import Any, Optional, Dict, List from langchain_core.callbacks import BaseCallbackHandler from langchain_core.messages import BaseMessage @@ -80,4 +81,8 @@ class LoggingCallbackHandler(BaseCallbackHandler): self, error: Exception, **kwargs: Any ) -> None: """Called when a tool invocation raises an error.""" - self.logger.error(f"❌ Tool Error: {error}") + self.logger.error( + "❌ Tool Error: %s\n%s", + repr(error), + "".join(traceback.format_exception(type(error), error, error.__traceback__)), + ) diff --git a/agent/mcp_trace_meta.py b/agent/mcp_trace_meta.py new file mode 100644 index 0000000..f28c260 --- /dev/null +++ b/agent/mcp_trace_meta.py @@ -0,0 +1,98 @@ +import logging +from functools import wraps +from typing import Any + +try: + from mcp import ClientSession, types +except ImportError: + from mcp.client.session import ClientSession + from mcp import types + +from utils.log_util.context import g + +logger = logging.getLogger("app") + +_PATCHED_ATTR = "_catalog_trace_meta_patched" +_TRACE_META_TOOL_NAMES = {"rag_retrieve", "table_rag_retrieve"} + + +def _get_trace_id() -> str: + try: + trace_id = getattr(g, "trace_id", "") + except (LookupError, KeyError): + return "" + return str(trace_id) if trace_id else "" + + +def _get_tool_name(args: tuple[Any, ...], kwargs: dict[str, Any]) -> str: + name = args[0] if args else kwargs.get("name") + return str(name) if name else "" + + +def patch_mcp_client_session_trace_meta() -> None: + """Attach catalog trace id to MCP tools/call params._meta.""" + if getattr(ClientSession.call_tool, _PATCHED_ATTR, False): + return + + original_call_tool = ClientSession.call_tool + + @wraps(original_call_tool) + async def call_tool_with_trace_meta(self: ClientSession, *args: Any, **kwargs: Any) -> Any: + tool_name = _get_tool_name(args, kwargs) + trace_id = _get_trace_id() if tool_name in _TRACE_META_TOOL_NAMES else "" + if trace_id: + meta = kwargs.get("meta") + if isinstance(meta, dict): + meta = {**meta, "trace_id": meta.get("trace_id") or trace_id} + else: + meta = {"trace_id": trace_id} + kwargs["meta"] = meta + + try: + return await original_call_tool(self, *args, **kwargs) + except TypeError as exc: + if trace_id and "meta" in kwargs and "unexpected keyword argument" in str(exc): + return await _call_tool_with_meta_compat(self, *args, **kwargs) + raise + + setattr(call_tool_with_trace_meta, _PATCHED_ATTR, True) + ClientSession.call_tool = call_tool_with_trace_meta + + +async def _call_tool_with_meta_compat(self: ClientSession, *args: Any, **kwargs: Any) -> Any: + """Call tools/call with _meta for MCP SDK versions before call_tool(meta=...).""" + name = _get_tool_name(args, kwargs) + if not name: + raise TypeError("call_tool() missing required argument: 'name'") + + arguments = args[1] if len(args) > 1 else kwargs.get("arguments", kwargs.get("args")) + read_timeout_seconds = ( + args[2] if len(args) > 2 else kwargs.get("read_timeout_seconds") + ) + progress_callback = ( + args[3] if len(args) > 3 else kwargs.get("progress_callback") + ) + meta = kwargs.get("meta") + + request_meta = meta if isinstance(meta, dict) else None + result = await self.send_request( + types.ClientRequest( + types.CallToolRequest( + method="tools/call", + params=types.CallToolRequestParams( + name=name, + arguments=arguments, + _meta=request_meta, + ), + ) + ), + types.CallToolResult, + request_read_timeout_seconds=read_timeout_seconds, + progress_callback=progress_callback, + ) + + validate_tool_result = getattr(self, "_validate_tool_result", None) + if validate_tool_result and not result.isError: + await validate_tool_result(name, result) + + return result diff --git a/agent/subagent_loader.py b/agent/subagent_loader.py new file mode 100644 index 0000000..ed18fb9 --- /dev/null +++ b/agent/subagent_loader.py @@ -0,0 +1,188 @@ +"""Sub-agent loader for discovering and parsing sub-agent definitions from skill directories. + +Sub-agents are defined as markdown files with YAML frontmatter in skill directories: + projects/robot/{bot_id}/skills/{skill_name}/agents/*.md + +Each file has the format: + --- + name: code-reviewer + description: Reviews code for quality and security issues. + tools: rag_retrieve, table_rag_retrieve + --- + + System prompt for the sub-agent... +""" + +import logging +import os +import re +from pathlib import Path +from typing import Optional + +import yaml +from deepagents.middleware.subagents import SubAgent +from langchain.tools import BaseTool +from langchain_core.language_models import BaseChatModel + +from agent.plugin_hook_loader import _get_skill_dirs + +logger = logging.getLogger('app') + +# Regex to extract YAML frontmatter and body from markdown files +_FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n?(.*)$", re.DOTALL) + + +def _parse_agent_md(file_path: Path) -> Optional[dict]: + """Parse a sub-agent markdown file with YAML frontmatter. + + Args: + file_path: Path to the .md file. + + Returns: + Dict with keys: name, description, system_prompt, tool_names (list[str] | None). + None if parsing fails. + """ + try: + content = file_path.read_text(encoding="utf-8") + except OSError as e: + logger.warning(f"Failed to read sub-agent file {file_path}: {e}") + return None + + match = _FRONTMATTER_RE.match(content) + if not match: + logger.warning(f"Sub-agent file {file_path} has no valid frontmatter") + return None + + frontmatter_str, body = match.group(1), match.group(2) + + try: + frontmatter = yaml.safe_load(frontmatter_str) + except yaml.YAMLError as e: + logger.warning(f"Invalid YAML in sub-agent file {file_path}: {e}") + return None + + if not isinstance(frontmatter, dict): + logger.warning(f"Frontmatter in {file_path} is not a dict") + return None + + name = frontmatter.get("name", "").strip() if isinstance(frontmatter.get("name"), str) else "" + description = frontmatter.get("description", "").strip() if isinstance(frontmatter.get("description"), str) else "" + + if not name: + logger.warning(f"Sub-agent file {file_path} missing required 'name' field") + return None + if not description: + logger.warning(f"Sub-agent file {file_path} missing required 'description' field") + return None + + # Parse optional tools field: comma-separated tool names + tool_names = None + tools_field = frontmatter.get("tools") + if tools_field is not None: + if isinstance(tools_field, str): + tool_names = [t.strip() for t in tools_field.split(",") if t.strip()] + elif isinstance(tools_field, list): + tool_names = [str(t).strip() for t in tools_field if str(t).strip()] + else: + logger.warning(f"Invalid 'tools' field in {file_path}, expected string or list") + + return { + "name": name, + "description": description, + "system_prompt": body.strip(), + "tool_names": tool_names, + "source": str(file_path), + } + + +def _filter_tools_by_names(all_tools: list[BaseTool], tool_names: list[str]) -> list[BaseTool]: + """Filter MCP tools by name whitelist. + + Args: + all_tools: All available MCP tools. + tool_names: Whitelist of tool names to include. + + Returns: + Filtered list of tools. Logs warning for names not found. + """ + tool_lookup = {tool.name: tool for tool in all_tools} + filtered = [] + for name in tool_names: + if name in tool_lookup: + filtered.append(tool_lookup[name]) + else: + available = list(tool_lookup.keys()) + logger.warning(f"Sub-agent tool '{name}' not found in MCP tools. Available: {available}") + return filtered + + +async def load_subagents( + bot_id: str, + tools: list[BaseTool], + model: BaseChatModel, +) -> list[SubAgent]: + """Load sub-agent definitions from skill directories. + + Scans all skill directories for the given bot_id, looking for agents/*.md files + in each skill subdirectory. + + Args: + bot_id: Bot identifier for locating skill directories. + tools: All available MCP tools for filtering. + model: The main agent's model, used by each sub-agent. + + Returns: + List of SubAgent dicts. Empty list if no sub-agents found. + """ + skill_dirs = _get_skill_dirs(bot_id) + parsed_agents: dict[str, dict] = {} # name -> parsed dict (last-wins for dedup) + + for skill_dir in skill_dirs: + if not os.path.exists(skill_dir): + continue + + for skill_name in os.listdir(skill_dir): + skill_path = os.path.join(skill_dir, skill_name) + if not os.path.isdir(skill_path): + continue + + agents_dir = Path(skill_path) / "agents" + if not agents_dir.exists(): + continue + + for md_file in agents_dir.glob("*.md"): + parsed = _parse_agent_md(md_file) + if parsed is None: + continue + + name = parsed["name"] + if name in parsed_agents: + logger.warning( + f"Duplicate sub-agent name '{name}': " + f"{parsed_agents[name]['source']} overridden by {parsed['source']}" + ) + parsed_agents[name] = parsed + + if not parsed_agents: + return [] + + # Build SubAgent dicts with model and filtered tools + subagents: list[SubAgent] = [] + for name, parsed in parsed_agents.items(): + # Filter tools: if tool_names specified, filter; otherwise inherit all + if parsed["tool_names"] is not None: + filtered_tools = _filter_tools_by_names(tools, parsed["tool_names"]) + else: + filtered_tools = list(tools) + + subagent: SubAgent = { + "name": name, + "description": parsed["description"], + "system_prompt": parsed["system_prompt"], + "model": model, + "tools": filtered_tools, + } + subagents.append(subagent) + logger.info(f"Loaded sub-agent '{name}' with {len(filtered_tools)} tools from {parsed['source']}") + + return subagents diff --git a/routes/chat.py b/routes/chat.py index 47f6499..c8e902b 100644 --- a/routes/chat.py +++ b/routes/chat.py @@ -3,6 +3,7 @@ import os import asyncio import shutil import time +import traceback from typing import Union, Optional, Any, List, Dict from fastapi import APIRouter, HTTPException, Header, Body from fastapi.responses import StreamingResponse @@ -25,6 +26,7 @@ from agent.agent_config import AgentConfig from agent.deep_assistant import init_agent from utils.daytona_sync import sync_sandbox_to_local from utils.settings import DAYTONA_ENABLED +from utils.structured_log import emit_question_metric router = APIRouter() @@ -43,6 +45,7 @@ async def enhanced_generate_stream_response( # Cancellation management cancel_event = None + request_started_at = config.request_started_at or time.monotonic() try: # Create output queue and control events @@ -89,6 +92,8 @@ async def enhanced_generate_stream_response( logger.info(f"Starting agent stream response") chunk_id = 0 message_tag = "" + last_answer_first_char_duration_ms = None + waiting_for_answer_first_char = False agent, checkpointer, sandbox = await init_agent(config) async for msg, metadata in agent.astream({"messages": config.messages}, stream_mode="messages", config=config.invoke_config(), max_tokens=MAX_OUTPUT_TOKENS): # Check whether a cancellation signal was received @@ -102,6 +107,7 @@ async def enhanced_generate_stream_response( # Handle tool calls if msg.tool_call_chunks: message_tag = "TOOL_CALL" + waiting_for_answer_first_char = False if config.tool_response: for tool_call_chunk in msg.tool_call_chunks: chunk_name = tool_call_chunk.get("name") if isinstance(tool_call_chunk, dict) else getattr(tool_call_chunk, "name", None) @@ -120,12 +126,20 @@ async def enhanced_generate_stream_response( continue if meta_message_tag != message_tag: message_tag = meta_message_tag + waiting_for_answer_first_char = meta_message_tag == "ANSWER" new_content = f"[{meta_message_tag}]\n" if msg.text: + if meta_message_tag == "ANSWER" and waiting_for_answer_first_char and msg.text.strip(): + last_answer_first_char_duration_ms = max( + int((time.monotonic() - request_started_at) * 1000), + 0, + ) + waiting_for_answer_first_char = False new_content += msg.text # Handle tool responses elif isinstance(msg, ToolMessage) and msg.content: message_tag = "TOOL_RESPONSE" + waiting_for_answer_first_char = False if config.tool_response: new_content = f"[{message_tag}] {msg.name}\n{msg.text}\n" @@ -142,6 +156,25 @@ async def enhanced_generate_stream_response( # Send final chunk finish = "cancelled" if (cancel_event and cancel_event.is_set()) else "stop" + if last_answer_first_char_duration_ms is not None: + emit_question_metric( + stage="catalog_agent.final_answer_first_char", + status="cancel" if finish == "cancelled" else "success", + duration_ms=last_answer_first_char_duration_ms, + first_response_time_ms=last_answer_first_char_duration_ms, + trace_id=config.trace_id, + ai_id=config.bot_id, + session_id=config.session_id, + robot_type="agent", + model=config.model_name, + stream=config.stream, + extra={ + "bot_id": config.bot_id, + "tool_response": config.tool_response, + "enable_thinking": config.enable_thinking, + "response_mode": "final_answer_first_char", + }, + ) final_chunk = create_stream_chunk(f"chatcmpl-{chunk_id + 1}", config.model_name, finish_reason=finish) await output_queue.put(("agent", f"data: {json.dumps(final_chunk, ensure_ascii=False)}\n\n")) # ============ Execute PostAgent hooks ============ @@ -153,9 +186,11 @@ async def enhanced_generate_stream_response( await output_queue.put(("agent_done", None)) except Exception as e: - logger.error(f"Error in agent task: {e}") + logger.error(f"Error in agent task: {e}\n{traceback.format_exc()}") # Send error information to the client - await output_queue.put(("agent", f'data: {{"error": "{str(e)}"}}\n\n')) + await output_queue.put( + ("agent", f"data: {json.dumps({'error': str(e)}, ensure_ascii=False)}\n\n") + ) # Send completion signal to ensure the output controller exits normally await output_queue.put(("agent_done", None)) @@ -511,6 +546,7 @@ async def chat_completions(request: ChatRequest, authorization: Optional[str] = {"dataset_ids": ["project-123", "project-456"], "bot_id": "my-bot-002", "messages": [{"role": "user", "content": "Hello"}]} {"dataset_ids": ["project-123"], "bot_id": "my-catalog-bot", "messages": [{"role": "user", "content": "Hello"}]} """ + request_started_at = time.monotonic() try: # v1 endpoint: extract the API key from the Authorization header as the model API key api_key = extract_api_key_from_auth(authorization) @@ -531,6 +567,7 @@ async def chat_completions(request: ChatRequest, authorization: Optional[str] = messages = process_messages(request.messages, request.language) # Create AgentConfig object config = await AgentConfig.from_v1_request(request, api_key, project_dir, generate_cfg, messages) + config.request_started_at = request_started_at # Call the shared agent creation and response generation logic return await create_agent_and_generate_response(config) @@ -753,6 +790,7 @@ async def chat_completions_v2(request: ChatRequestV2, authorization: Optional[st - Uses MD5 hash of MASTERKEY:bot_id for backend API authentication - Optionally uses API key from bot config for model access """ + request_started_at = time.monotonic() try: # Get bot_id (required parameter) bot_id = request.bot_id @@ -799,6 +837,7 @@ async def chat_completions_v2(request: ChatRequestV2, authorization: Optional[st api_key = req_api_key if req_api_key and req_api_key != "whatever" else None # Create AgentConfig object config = await AgentConfig.from_v2_request(request, bot_config, project_dir, messages, generate_cfg, model_name=model_name, model_server=model_server, api_key=api_key) + config.request_started_at = request_started_at # Call the shared agent creation and response generation logic return await create_agent_and_generate_response(config) diff --git a/skills/autoload/onprem/rag-retrieve/hooks/retrieval-policy-forbidden-self-knowledge.md b/skills/autoload/onprem/rag-retrieve/hooks/retrieval-policy-forbidden-self-knowledge.md index b6c1296..7ea5e4b 100644 --- a/skills/autoload/onprem/rag-retrieve/hooks/retrieval-policy-forbidden-self-knowledge.md +++ b/skills/autoload/onprem/rag-retrieve/hooks/retrieval-policy-forbidden-self-knowledge.md @@ -14,7 +14,7 @@ For knowledge retrieval tasks, **this policy overrides generic codebase explorat - **Prohibited answer source**: the model's own parametric knowledge, memory, prior world knowledge, intuition, common sense completion, or unsupported inference. - **Prohibited tools**: `Glob`, `Read`, `LS`, Bash (`ls`, `find`, `cat`, `head`, `tail`, `grep`, etc.) — these are forbidden even when retrieval results are empty/insufficient, even if local files seem helpful. -- **Allowed tools only**: skill-enabled retrieval tools, `table_rag_retrieve`, `rag_retrieve`. No other source for factual answering. +- **Allowed tools only**: skill-enabled retrieval tools, `rag_retrieve`. No other source for factual answering. - Local filesystem is a **prohibited** knowledge source, not merely non-recommended. - Exception: user explicitly asks to read a specific local file as the task itself. - If retrieval evidence is absent, insufficient, or ambiguous, **do not fill the gap with model knowledge**. @@ -35,13 +35,18 @@ For any knowledge retrieval task: Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe filesystem first. 1. **Skill-enabled retrieval tools** (use first when available) -2. **`table_rag_retrieve`** or **`rag_retrieve`**: - - Prefer `table_rag_retrieve` for: values, prices, quantities, specs, rankings, comparisons, lists, tables, name lookup, historical coverage, mixed/unclear cases. - - Prefer `rag_retrieve` for: pure concept, definition, workflow, policy, or explanation questions only. +2. **`rag_retrieve`** - After each step, evaluate sufficiency before proceeding. - Retrieval must happen **before** any factual answer generation. +### First-Call Success Principle + +- The first retrieval call is expected to return sufficient results for most questions. +- Your default assumption should be: **one call is enough**. +- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic). +- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient. + ## 4. Query Preparation - Do NOT pass raw user question unless it already works well for retrieval. @@ -50,27 +55,51 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi ## 5. Retrieval Breadth (`top_k`) -- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient. -- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline). -- Expansion order: `30 → 50 → 100`. If unsure, use `100`. +- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success. +- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation. +- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline). +- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient. ## 6. Result Evaluation -Treat as insufficient if: empty, `Error:`, `no excel files found`, off-topic, missing core entity/scope, no usable evidence, partial coverage, truncated results, or claims required by the answer are not explicitly supported. +**Maximum 3 retrieval calls per question.** After each call, evaluate immediately: + +### Sufficient — answer immediately, no more calls + +ANY of the following means results are sufficient — STOP and answer now: +- The core entity/topic in the user's question appears in the results. +- There is ANY direct or indirect evidence relevant to the user's question. +- Results are partially relevant, even if not perfectly comprehensive. +- You can compose a meaningful answer (even a partial one) from the retrieved content. + +**Anti-patterns — do NOT do these:** +- ❌ "The results are good, but maybe different keywords could find something better." +- ❌ "I have enough to answer, but let me try one more query to be thorough." +- ❌ "The answer is here, but I want to double-check with a different query." +- ❌ Calling retrieval again after you have already identified the answer in previous results. + +**If you can answer the question with current results, you MUST answer immediately. Period.** + +### Insufficient — the ONLY valid reasons to retry + +- Results are completely empty or contain only `Error:` messages. +- ALL results are entirely off-topic with zero relevance to the user's question. +- No usable evidence exists at all — you cannot form even a partial answer. + +**"Results are not detailed enough" is NOT a valid reason to retry.** +**"Results might be incomplete" is NOT a valid reason to retry.** ## 7. Fallback and Sequential Retry -On insufficient results, follow this sequence: +On insufficient results, you may retry **up to 2 more times** (3 calls total): -1. Rewrite query, retry same tool (once) -2. Switch to next retrieval source in default order -3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100` -4. `table_rag_retrieve` insufficient → try `rag_retrieve`; `rag_retrieve` insufficient → try `table_rag_retrieve` +1. Rewrite query, retry same tool. +2. For `rag_retrieve`, escalate `top_k` to `100` on retry. -- `table_rag_retrieve` internally falls back to `rag_retrieve` on `no excel files found`, but this does NOT change the higher-level order. -- Say "no relevant information was found" **only after** exhausting all retrieval sources. +- Say "no relevant information was found" **only after** exhausting all retries. - Do NOT switch to local filesystem inspection at any point. - Do NOT switch to model self-knowledge at any point. +- Do NOT call any retrieval tool more than 3 times in total. ## 8. Handling Missing or Partial Evidence @@ -79,13 +108,7 @@ On insufficient results, follow this sequence: - Prefer "the retrieved materials do not provide this information" over speculative completion. - When user asks for a definitive answer but evidence is incomplete, state the limitation directly. -## 9. Table RAG Result Handling - -- Follow all `[INSTRUCTION]` and `[EXTRA_INSTRUCTION]` in results. -- If truncated: tell user total (`N+M`), displayed (`N`), omitted (`M`). -- Cite sources using filenames from `file_ref_table`. - -## 10. Image Handling +## 9. Image Handling - The content returned by the `rag_retrieve` tool may include images. - Each image is exclusively associated with its nearest text or sentence. @@ -94,14 +117,7 @@ On insufficient results, follow this sequence: - Each sentence or key point in the response should be accompanied by relevant images when they meet the established association criteria. - Avoid placing all images at the end of the response. -## 11. Citation Requirements - -- MUST generate `` tags when using retrieval results. -- Place citations immediately after the paragraph or bullet list using the knowledge. Do NOT collect at end. -- 1-2 citations per paragraph/bullet. At least 1 citation when using retrieved knowledge. -- Do NOT cite claims that were not supported by retrieval. - -## 12. Self-Knowledge Prohibition +## 10. Self-Knowledge Prohibition This section applies whenever self-knowledge is disabled or forbidden for the current task. @@ -111,19 +127,19 @@ This section applies whenever self-knowledge is disabled or forbidden for the cu - The model must not supplement missing parts with general knowledge, conceptual explanation, common background, intuition, or likely completion. - The model must not use self-knowledge to invent or complete private, internal, current, precise, or source-sensitive facts. - The model must not use self-knowledge to invent or complete prices, fees, discounts, rankings, internal policies, user-specific details, current status, latest updates, exact numbers, dates, metrics, or specifications. -- Retrieved facts must include citations. - Unsupported parts must be stated as unavailable rather than guessed. - If a paragraph would mix retrieved facts and unsupported completion, remove the unsupported completion. - If evidence is incomplete, state the limitation explicitly. -## 13. Pre-Reply Self-Check +## 11. Pre-Reply Self-Check Before replying to a knowledge retrieval task, verify: - Used only whitelisted retrieval tools — no local filesystem inspection? +- Called retrieval at most 3 times total (not more)? +- Answered immediately when results were sufficient (did NOT call again unnecessarily)? +- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)? - Did retrieval happen before any factual answer drafting? - Did every factual claim come from retrieved evidence rather than model knowledge? -- Exhausted retrieval flow before concluding "not found"? -- Citations placed immediately after each relevant paragraph? - If any unsupported part remained, was it removed or explicitly marked unavailable? If any answer is "no", correct the process first. diff --git a/skills/autoload/onprem/rag-retrieve/hooks/retrieval-policy.md b/skills/autoload/onprem/rag-retrieve/hooks/retrieval-policy.md index 1f0c1fe..61378ff 100644 --- a/skills/autoload/onprem/rag-retrieve/hooks/retrieval-policy.md +++ b/skills/autoload/onprem/rag-retrieve/hooks/retrieval-policy.md @@ -29,6 +29,13 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi - Do NOT answer from model knowledge first. - After each step, evaluate sufficiency before proceeding. +### First-Call Success Principle + +- The first retrieval call is expected to return sufficient results for most questions. +- Your default assumption should be: **one call is enough**. +- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic). +- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient. + ## 3. Query Preparation - Do NOT pass raw user question unless it already works well for retrieval. @@ -37,26 +44,53 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi ## 4. Retrieval Breadth (`top_k`) -- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient. -- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline). -- Expansion order: `30 → 50 → 100`. If unsure, use `100`. +- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success. +- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation. +- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline). +- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient. ## 5. Result Evaluation -Treat as insufficient if: empty, `Error:`, `no excel files found`, off-topic, missing core entity/scope, no usable evidence, partial coverage, or truncated results. +**Maximum 3 retrieval calls per question.** After each call, evaluate immediately: + +### Sufficient — answer immediately, no more calls + +ANY of the following means results are sufficient — STOP and answer now: +- The core entity/topic in the user's question appears in the results. +- There is ANY direct or indirect evidence relevant to the user's question. +- Results are partially relevant, even if not perfectly comprehensive. +- You can compose a meaningful answer (even a partial one) from the retrieved content. + +**Anti-patterns — do NOT do these:** +- ❌ "The results are good, but maybe different keywords could find something better." +- ❌ "I have enough to answer, but let me try one more query to be thorough." +- ❌ "The answer is here, but I want to double-check with a different query." +- ❌ Calling retrieval again after you have already identified the answer in previous results. + +**If you can answer the question with current results, you MUST answer immediately. Period.** + +### Insufficient — the ONLY valid reasons to retry + +- Results are completely empty or contain only `Error:` / `no excel files found` messages. +- ALL results are entirely off-topic with zero relevance to the user's question. +- No usable evidence exists at all — you cannot form even a partial answer. + +**"Results are not detailed enough" is NOT a valid reason to retry.** +**"Results might be incomplete" is NOT a valid reason to retry.** ## 6. Fallback and Sequential Retry -On insufficient results, follow this sequence: +On insufficient results, you may retry **up to 2 more times** (3 calls total): -1. Rewrite query, retry same tool (once) -2. Switch to next retrieval source in default order -3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100` -4. `table_rag_retrieve` insufficient → try `rag_retrieve`; `rag_retrieve` insufficient → try `table_rag_retrieve` +1. Rewrite query, retry same tool. +2. Switch to next retrieval source in default order. +3. For `rag_retrieve`, escalate `top_k` to `100` on retry. +4. `table_rag_retrieve` insufficient → try `rag_retrieve`; `rag_retrieve` insufficient → try `table_rag_retrieve`. - `table_rag_retrieve` internally falls back to `rag_retrieve` on `no excel files found`, but this does NOT change the higher-level order. - Say "no relevant information was found" **only after** exhausting all retrieval sources. - Do NOT switch to local filesystem inspection at any point. +- Do NOT call any retrieval tool more than 3 times in total. ## 7. Table RAG Result Handling @@ -99,7 +133,9 @@ This section applies only when self-knowledge is enabled. Before replying to a knowledge retrieval task, verify: - Used only whitelisted retrieval tools — no local filesystem inspection? -- Exhausted retrieval flow before concluding "not found"? +- Called retrieval at most 3 times total (not more)? +- Answered immediately when results were sufficient (did NOT call again unnecessarily)? +- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)? - Citations placed immediately after each relevant paragraph? - If self-knowledge was used, was it clearly separated from retrieved facts and limited to allowed supplement scope? diff --git a/skills/autoload/onprem/rag-retrieve/rag_retrieve_server.py b/skills/autoload/onprem/rag-retrieve/rag_retrieve_server.py index 2575644..6f308e2 100644 --- a/skills/autoload/onprem/rag-retrieve/rag_retrieve_server.py +++ b/skills/autoload/onprem/rag-retrieve/rag_retrieve_server.py @@ -73,7 +73,7 @@ Format: `` """ -def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]: +def rag_retrieve(query: str, top_k: int = 100, trace_id: str = "") -> Dict[str, Any]: """Call the RAG retrieval API.""" try: bot_id = "" @@ -100,6 +100,8 @@ def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]: "content-type": "application/json", "authorization": f"Bearer {auth_token}" } + if trace_id: + headers["X-Request-ID"] = trace_id data = { "query": query, "top_k": top_k @@ -172,7 +174,7 @@ def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]: } -def table_rag_retrieve(query: str) -> Dict[str, Any]: +def table_rag_retrieve(query: str, trace_id: str = "") -> Dict[str, Any]: """Call the Table RAG retrieval API.""" try: bot_id = "" @@ -189,6 +191,8 @@ def table_rag_retrieve(query: str) -> Dict[str, Any]: "content-type": "application/json", "authorization": f"Bearer {auth_token}" } + if trace_id: + headers["X-Request-ID"] = trace_id data = { "query": query, } @@ -220,7 +224,7 @@ def table_rag_retrieve(query: str) -> Dict[str, Any]: if "markdown" in response_data: markdown_content = response_data["markdown"] if re.search(r"^no excel files found", markdown_content, re.IGNORECASE): - rag_result = rag_retrieve(query) + rag_result = rag_retrieve(query, trace_id=trace_id) content = rag_result.get("content", []) if content and content[0].get("type") == "text": content[0]["text"] = "No table_rag_retrieve results were found. The content below is the fallback result from rag_retrieve:\n\n" + content[0]["text"] @@ -302,6 +306,8 @@ async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]: elif method == "tools/call": tool_name = params.get("name") arguments = params.get("arguments", {}) + meta = params.get("_meta") or params.get("meta") or {} + trace_id = meta.get("trace_id", "") if isinstance(meta, dict) else "" if tool_name == "rag_retrieve": query = arguments.get("query", "") @@ -310,7 +316,7 @@ async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]: if not query: return create_error_response(request_id, -32602, "Missing required parameter: query") - result = rag_retrieve(query, top_k) + result = rag_retrieve(query, top_k, trace_id) return { "jsonrpc": "2.0", @@ -324,7 +330,7 @@ async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]: if not query: return create_error_response(request_id, -32602, "Missing required parameter: query") - result = table_rag_retrieve(query) + result = table_rag_retrieve(query, trace_id) return { "jsonrpc": "2.0", diff --git a/skills/autoload/support/rag-retrieve/hooks/retrieval-policy-forbidden-self-knowledge.md b/skills/autoload/support/rag-retrieve/hooks/retrieval-policy-forbidden-self-knowledge.md index b6c1296..061c855 100644 --- a/skills/autoload/support/rag-retrieve/hooks/retrieval-policy-forbidden-self-knowledge.md +++ b/skills/autoload/support/rag-retrieve/hooks/retrieval-policy-forbidden-self-knowledge.md @@ -42,6 +42,13 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi - After each step, evaluate sufficiency before proceeding. - Retrieval must happen **before** any factual answer generation. +### First-Call Success Principle + +- The first retrieval call is expected to return sufficient results for most questions. +- Your default assumption should be: **one call is enough**. +- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic). +- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient. + ## 4. Query Preparation - Do NOT pass raw user question unless it already works well for retrieval. @@ -50,27 +57,54 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi ## 5. Retrieval Breadth (`top_k`) -- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient. -- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline). -- Expansion order: `30 → 50 → 100`. If unsure, use `100`. +- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success. +- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation. +- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline). +- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient. ## 6. Result Evaluation -Treat as insufficient if: empty, `Error:`, `no excel files found`, off-topic, missing core entity/scope, no usable evidence, partial coverage, truncated results, or claims required by the answer are not explicitly supported. +**Maximum 3 retrieval calls per question.** After each call, evaluate immediately: + +### Sufficient — answer immediately, no more calls + +ANY of the following means results are sufficient — STOP and answer now: +- The core entity/topic in the user's question appears in the results. +- There is ANY direct or indirect evidence relevant to the user's question. +- Results are partially relevant, even if not perfectly comprehensive. +- You can compose a meaningful answer (even a partial one) from the retrieved content. + +**Anti-patterns — do NOT do these:** +- ❌ "The results are good, but maybe different keywords could find something better." +- ❌ "I have enough to answer, but let me try one more query to be thorough." +- ❌ "The answer is here, but I want to double-check with a different query." +- ❌ Calling retrieval again after you have already identified the answer in previous results. + +**If you can answer the question with current results, you MUST answer immediately. Period.** + +### Insufficient — the ONLY valid reasons to retry + +- Results are completely empty or contain only `Error:` / `no excel files found` messages. +- ALL results are entirely off-topic with zero relevance to the user's question. +- No usable evidence exists at all — you cannot form even a partial answer. + +**"Results are not detailed enough" is NOT a valid reason to retry.** +**"Results might be incomplete" is NOT a valid reason to retry.** ## 7. Fallback and Sequential Retry -On insufficient results, follow this sequence: +On insufficient results, you may retry **up to 2 more times** (3 calls total): -1. Rewrite query, retry same tool (once) -2. Switch to next retrieval source in default order -3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100` -4. `table_rag_retrieve` insufficient → try `rag_retrieve`; `rag_retrieve` insufficient → try `table_rag_retrieve` +1. Rewrite query, retry same tool. +2. Switch to next retrieval source in default order. +3. For `rag_retrieve`, escalate `top_k` to `100` on retry. +4. `table_rag_retrieve` insufficient → try `rag_retrieve`; `rag_retrieve` insufficient → try `table_rag_retrieve`. - `table_rag_retrieve` internally falls back to `rag_retrieve` on `no excel files found`, but this does NOT change the higher-level order. -- Say "no relevant information was found" **only after** exhausting all retrieval sources. +- Say "no relevant information was found" **only after** exhausting all retries. - Do NOT switch to local filesystem inspection at any point. - Do NOT switch to model self-knowledge at any point. +- Do NOT call any retrieval tool more than 3 times in total. ## 8. Handling Missing or Partial Evidence @@ -83,7 +117,6 @@ On insufficient results, follow this sequence: - Follow all `[INSTRUCTION]` and `[EXTRA_INSTRUCTION]` in results. - If truncated: tell user total (`N+M`), displayed (`N`), omitted (`M`). -- Cite sources using filenames from `file_ref_table`. ## 10. Image Handling @@ -94,14 +127,7 @@ On insufficient results, follow this sequence: - Each sentence or key point in the response should be accompanied by relevant images when they meet the established association criteria. - Avoid placing all images at the end of the response. -## 11. Citation Requirements - -- MUST generate `` tags when using retrieval results. -- Place citations immediately after the paragraph or bullet list using the knowledge. Do NOT collect at end. -- 1-2 citations per paragraph/bullet. At least 1 citation when using retrieved knowledge. -- Do NOT cite claims that were not supported by retrieval. - -## 12. Self-Knowledge Prohibition +## 11. Self-Knowledge Prohibition This section applies whenever self-knowledge is disabled or forbidden for the current task. @@ -111,19 +137,19 @@ This section applies whenever self-knowledge is disabled or forbidden for the cu - The model must not supplement missing parts with general knowledge, conceptual explanation, common background, intuition, or likely completion. - The model must not use self-knowledge to invent or complete private, internal, current, precise, or source-sensitive facts. - The model must not use self-knowledge to invent or complete prices, fees, discounts, rankings, internal policies, user-specific details, current status, latest updates, exact numbers, dates, metrics, or specifications. -- Retrieved facts must include citations. - Unsupported parts must be stated as unavailable rather than guessed. - If a paragraph would mix retrieved facts and unsupported completion, remove the unsupported completion. - If evidence is incomplete, state the limitation explicitly. -## 13. Pre-Reply Self-Check +## 12. Pre-Reply Self-Check Before replying to a knowledge retrieval task, verify: - Used only whitelisted retrieval tools — no local filesystem inspection? +- Called retrieval at most 3 times total (not more)? +- Answered immediately when results were sufficient (did NOT call again unnecessarily)? +- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)? - Did retrieval happen before any factual answer drafting? - Did every factual claim come from retrieved evidence rather than model knowledge? -- Exhausted retrieval flow before concluding "not found"? -- Citations placed immediately after each relevant paragraph? - If any unsupported part remained, was it removed or explicitly marked unavailable? If any answer is "no", correct the process first. diff --git a/skills/autoload/support/rag-retrieve/hooks/retrieval-policy.md b/skills/autoload/support/rag-retrieve/hooks/retrieval-policy.md index 1f0c1fe..61378ff 100644 --- a/skills/autoload/support/rag-retrieve/hooks/retrieval-policy.md +++ b/skills/autoload/support/rag-retrieve/hooks/retrieval-policy.md @@ -29,6 +29,13 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi - Do NOT answer from model knowledge first. - After each step, evaluate sufficiency before proceeding. +### First-Call Success Principle + +- The first retrieval call is expected to return sufficient results for most questions. +- Your default assumption should be: **one call is enough**. +- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic). +- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient. + ## 3. Query Preparation - Do NOT pass raw user question unless it already works well for retrieval. @@ -37,26 +44,53 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi ## 4. Retrieval Breadth (`top_k`) -- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient. -- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline). -- Expansion order: `30 → 50 → 100`. If unsure, use `100`. +- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success. +- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation. +- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline). +- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient. ## 5. Result Evaluation -Treat as insufficient if: empty, `Error:`, `no excel files found`, off-topic, missing core entity/scope, no usable evidence, partial coverage, or truncated results. +**Maximum 3 retrieval calls per question.** After each call, evaluate immediately: + +### Sufficient — answer immediately, no more calls + +ANY of the following means results are sufficient — STOP and answer now: +- The core entity/topic in the user's question appears in the results. +- There is ANY direct or indirect evidence relevant to the user's question. +- Results are partially relevant, even if not perfectly comprehensive. +- You can compose a meaningful answer (even a partial one) from the retrieved content. + +**Anti-patterns — do NOT do these:** +- ❌ "The results are good, but maybe different keywords could find something better." +- ❌ "I have enough to answer, but let me try one more query to be thorough." +- ❌ "The answer is here, but I want to double-check with a different query." +- ❌ Calling retrieval again after you have already identified the answer in previous results. + +**If you can answer the question with current results, you MUST answer immediately. Period.** + +### Insufficient — the ONLY valid reasons to retry + +- Results are completely empty or contain only `Error:` / `no excel files found` messages. +- ALL results are entirely off-topic with zero relevance to the user's question. +- No usable evidence exists at all — you cannot form even a partial answer. + +**"Results are not detailed enough" is NOT a valid reason to retry.** +**"Results might be incomplete" is NOT a valid reason to retry.** ## 6. Fallback and Sequential Retry -On insufficient results, follow this sequence: +On insufficient results, you may retry **up to 2 more times** (3 calls total): -1. Rewrite query, retry same tool (once) -2. Switch to next retrieval source in default order -3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100` -4. `table_rag_retrieve` insufficient → try `rag_retrieve`; `rag_retrieve` insufficient → try `table_rag_retrieve` +1. Rewrite query, retry same tool. +2. Switch to next retrieval source in default order. +3. For `rag_retrieve`, escalate `top_k` to `100` on retry. +4. `table_rag_retrieve` insufficient → try `rag_retrieve`; `rag_retrieve` insufficient → try `table_rag_retrieve`. - `table_rag_retrieve` internally falls back to `rag_retrieve` on `no excel files found`, but this does NOT change the higher-level order. - Say "no relevant information was found" **only after** exhausting all retrieval sources. - Do NOT switch to local filesystem inspection at any point. +- Do NOT call any retrieval tool more than 3 times in total. ## 7. Table RAG Result Handling @@ -99,7 +133,9 @@ This section applies only when self-knowledge is enabled. Before replying to a knowledge retrieval task, verify: - Used only whitelisted retrieval tools — no local filesystem inspection? -- Exhausted retrieval flow before concluding "not found"? +- Called retrieval at most 3 times total (not more)? +- Answered immediately when results were sufficient (did NOT call again unnecessarily)? +- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)? - Citations placed immediately after each relevant paragraph? - If self-knowledge was used, was it clearly separated from retrieved facts and limited to allowed supplement scope? diff --git a/skills/autoload/support/rag-retrieve/rag_retrieve_server.py b/skills/autoload/support/rag-retrieve/rag_retrieve_server.py index 671a456..09e0924 100644 --- a/skills/autoload/support/rag-retrieve/rag_retrieve_server.py +++ b/skills/autoload/support/rag-retrieve/rag_retrieve_server.py @@ -73,7 +73,7 @@ Format: `` """ -def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]: +def rag_retrieve(query: str, top_k: int = 100, trace_id: str = "") -> Dict[str, Any]: """Call the RAG retrieval API.""" try: bot_id = "" @@ -100,6 +100,8 @@ def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]: "content-type": "application/json", "authorization": f"Bearer {auth_token}" } + if trace_id: + headers["X-Request-ID"] = trace_id data = { "query": query, "top_k": top_k @@ -172,7 +174,7 @@ def rag_retrieve(query: str, top_k: int = 100) -> Dict[str, Any]: } -def table_rag_retrieve(query: str) -> Dict[str, Any]: +def table_rag_retrieve(query: str, trace_id: str = "") -> Dict[str, Any]: """Call the Table RAG retrieval API.""" try: bot_id = "" @@ -189,6 +191,8 @@ def table_rag_retrieve(query: str) -> Dict[str, Any]: "content-type": "application/json", "authorization": f"Bearer {auth_token}" } + if trace_id: + headers["X-Request-ID"] = trace_id data = { "query": query, } @@ -220,7 +224,7 @@ def table_rag_retrieve(query: str) -> Dict[str, Any]: if "markdown" in response_data: markdown_content = response_data["markdown"] if re.search(r"^no excel files found", markdown_content, re.IGNORECASE): - rag_result = rag_retrieve(query) + rag_result = rag_retrieve(query, trace_id=trace_id) content = rag_result.get("content", []) if content and content[0].get("type") == "text": content[0]["text"] = "No table_rag_retrieve results were found. The content below is the fallback result from rag_retrieve:\n\n" + content[0]["text"] @@ -302,7 +306,9 @@ async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]: elif method == "tools/call": tool_name = params.get("name") arguments = params.get("arguments", {}) - + meta = params.get("_meta") or params.get("meta") or {} + trace_id = meta.get("trace_id", "") if isinstance(meta, dict) else "" + if tool_name == "rag_retrieve": query = arguments.get("query", "") top_k = arguments.get("top_k", 100) @@ -310,7 +316,7 @@ async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]: if not query: return create_error_response(request_id, -32602, "Missing required parameter: query") - result = rag_retrieve(query, top_k) + result = rag_retrieve(query, top_k, trace_id) return { "jsonrpc": "2.0", @@ -324,7 +330,7 @@ async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]: if not query: return create_error_response(request_id, -32602, "Missing required parameter: query") - result = table_rag_retrieve(query) + result = table_rag_retrieve(query, trace_id) return { "jsonrpc": "2.0", diff --git a/skills/developing/pmda-drug-info/.claude-plugin/plugin.json b/skills/developing/pmda-drug-info/.claude-plugin/plugin.json new file mode 100644 index 0000000..aa1055c --- /dev/null +++ b/skills/developing/pmda-drug-info/.claude-plugin/plugin.json @@ -0,0 +1,21 @@ +{ + "name": "pmda-drug-info", + "description": "PMDA drug information tools for Japanese pharmaceutical package insert queries. Provides drug search, master info, interactions, restrictions, dosing, and full-text chapter retrieval via PostgreSQL + OpenSearch.", + "hooks": { + "PrePrompt": [ + { + "type": "command", + "command": "python hooks/pre_prompt.py" + } + ] + }, + "mcpServers": { + "pmda_drug_info": { + "transport": "stdio", + "command": "python", + "args": [ + "./pmda_server.py" + ] + } + } +} diff --git a/skills/developing/pmda-drug-info/agents/adverse-event.md b/skills/developing/pmda-drug-info/agents/adverse-event.md new file mode 100644 index 0000000..f4be105 --- /dev/null +++ b/skills/developing/pmda-drug-info/agents/adverse-event.md @@ -0,0 +1,31 @@ +--- +name: adverse_event +description: Reverse lookup drugs by adverse event name. Find which drugs have reported a specific side effect. + Invoke when the user asks "Which drugs cause Stevens-Johnson syndrome?" or "Drugs that prolong QT interval?". + Causal inference is prohibited — information presentation only. +tools: search_section_text, search_drugs, get_drug_master, list_drug_chapters, read_drug_chapter +--- + +あなたは「副作用 → 該当薬剤の逆引き」専門の sub-agent です。 + +【ツール戦略】 +1. `search_section_text(keyword=副作用名, section_filter="副作用")` で逆引き。 + total_drugs は必ず本文中に明示する。 +2. 同義語が必要なケース: + "Stevens-Johnson" ⇔ "皮膚粘膜眼症候群" / "SJS" + "QT延長" ⇔ "Torsades de pointes" + "間質性肺炎" ⇔ "肺臓炎" + OS の synonym filter が自動展開するので 1 回の検索で OK。 +3. hit から代表薬を 3〜5 件選び、`read_drug_chapter` で 11.1 重大な副作用 / 11.2 その他の副作用 + verbatim を引用。 +4. 因果推論("この薬がこの患者の症状を起こした")は **絶対しない**。 + 情報提示のみ。 + +【絶対ルール】 +1. ツール呼び出し必須。トレーニング知識・教科書・ガイドラインからの推測は禁止。 +2. 数値・固有名・条件は本文表現を改変せず逐語引用。 +3. 出典は **必ず** `[出典: <販売名> (yj_full=) / <章番号 章タイトル>]` の形式。 + - fact 表 row には `_citation` フィールドが入っているので **そのまま転記**。 + - `[出典: 薬品マスター]` `[出典: 添付文書]` 等の汎用出典は **絶対禁止**。 + - read_drug_chapter で実際に読んだ section 以外の出典を捏造しない。 +4. 該当情報が無ければ "添付文書からは確認できません" と書く。 diff --git a/skills/developing/pmda-drug-info/agents/interaction.md b/skills/developing/pmda-drug-info/agents/interaction.md new file mode 100644 index 0000000..b29e068 --- /dev/null +++ b/skills/developing/pmda-drug-info/agents/interaction.md @@ -0,0 +1,28 @@ +--- +name: interaction +description: Investigate drug-drug interactions between two drugs, or list all interactions for a single drug. + Invoke when the user asks "Can drug A and B be used together?" or "What are the interactions of drug A?". +tools: search_drugs, get_drug_master, get_drug_interactions, search_section_text, list_drug_chapters, read_drug_chapter +--- + +あなたは「薬剤間相互作用」専門の sub-agent です。 + +【ツール戦略】 +- A・B 両薬の yj_code を `search_drugs` で取得。 +- `get_drug_interactions(drug_a_yj=A, drug_b_yj=B)` で双方向検索(A→B も B→A も拾える)。 +- ヒットしたら drug_a の側の出典 section(10.1 / 10.2)を `list_drug_chapters` + `read_drug_chapter` で + verbatim 取得。drug_b 側にも該当記載があるか確認。 +- ヒットゼロ → "添付文書上は併用禁忌・併用注意の明確な記載なし" と書く(自由記述/警告等は + 別途 `search_section_text(keyword=B薬名, section_filter="相互作用")` で念押し)。 +- 1 薬名のみ与えられた場合は `get_drug_interactions(drug_a_yj=...)` で全相互作用一覧。 + +severity は本文の "併用禁忌" / "併用注意" の語をそのまま転記。 + +【絶対ルール】 +1. ツール呼び出し必須。トレーニング知識・教科書・ガイドラインからの推測は禁止。 +2. 数値・固有名・条件は本文表現を改変せず逐語引用。 +3. 出典は **必ず** `[出典: <販売名> (yj_full=) / <章番号 章タイトル>]` の形式。 + - fact 表 row には `_citation` フィールドが入っているので **そのまま転記**。 + - `[出典: 薬品マスター]` `[出典: 添付文書]` 等の汎用出典は **絶対禁止**。 + - read_drug_chapter で実際に読んだ section 以外の出典を捏造しない。 +4. 該当情報が無ければ "添付文書からは確認できません" と書く。 diff --git a/skills/developing/pmda-drug-info/agents/patient-specific.md b/skills/developing/pmda-drug-info/agents/patient-specific.md new file mode 100644 index 0000000..49f5053 --- /dev/null +++ b/skills/developing/pmda-drug-info/agents/patient-specific.md @@ -0,0 +1,32 @@ +--- +name: patient_specific +description: Determine drug administration feasibility and dosage adjustment for specific patient conditions (renal impairment, hepatic impairment, pregnancy, elderly, pediatric, allergy). + Invoke when the user asks "Can this drug be used in a patient with eGFR 25?", "Is it contraindicated in pregnancy?", etc. +tools: search_drugs, get_drug_master, get_drug_restrictions, get_drug_dosing, list_drug_chapters, read_drug_chapter +--- + +あなたは「特定患者への投与可否・用量調整」専門の sub-agent です。 + +【ツール戦略】 +1. 薬名から yj_code を `search_drugs` で取得。 +2. 患者条件を condition_type に対応付け: + - 腎機能 (eGFR/CrCl) → "腎機能障害" + - 肝機能 (Child-Pugh) → "肝機能障害" + - 妊娠/授乳 → "妊婦"/"授乳婦" + - 年齢 (小児/高齢) → "小児等"/"高齢者" + - アレルギー既往 → "過敏症" + - 合併症 (糖尿病/喘息など) → "疾患" +3. `get_drug_restrictions(drug_yj=..., condition_type=...)` で該当 restriction を取得。 + condition_params の数値(例: {"eGFR_max": 30})を必ず確認。 +4. `get_drug_dosing(drug_yj=..., patient_segment=...)` で患者層別用量を取得。 +5. 必要なら原文 `read_drug_chapter` で 9.x 章 verbatim 引用。 +6. 数値判定(例: eGFR=25 ⇔ eGFR_max=30 → 該当)を agent が責任もって行う。 + +【絶対ルール】 +1. ツール呼び出し必須。トレーニング知識・教科書・ガイドラインからの推測は禁止。 +2. 数値・固有名・条件は本文表現を改変せず逐語引用。 +3. 出典は **必ず** `[出典: <販売名> (yj_full=) / <章番号 章タイトル>]` の形式。 + - fact 表 row には `_citation` フィールドが入っているので **そのまま転記**。 + - `[出典: 薬品マスター]` `[出典: 添付文書]` 等の汎用出典は **絶対禁止**。 + - read_drug_chapter で実際に読んだ section 以外の出典を捏造しない。 +4. 該当情報が無ければ "添付文書からは確認できません" と書く。 diff --git a/skills/developing/pmda-drug-info/agents/single-drug.md b/skills/developing/pmda-drug-info/agents/single-drug.md new file mode 100644 index 0000000..e5340a9 --- /dev/null +++ b/skills/developing/pmda-drug-info/agents/single-drug.md @@ -0,0 +1,26 @@ +--- +name: single_drug +description: Answer factual questions about a single drug (brand name, generic name, indications, dosing, contraindications, side effects, etc.). + Invoke when the question is focused on one drug and requires detailed information from the package insert. +tools: search_drugs, get_drug_master, get_drug_dosing, get_drug_restrictions, list_drug_chapters, read_drug_chapter +--- + +あなたは「単一薬の事実回答」専門の sub-agent です。 + +【ツール戦略】 +1. 質問から薬名/yj_code を特定 → `search_drugs` または直接 yj_code が分かれば次へ。 +2. `get_drug_master(yj_code)` で基本情報(販売名・一般名・薬効分類・規制)を確定。 +3. 必要に応じて `get_drug_dosing` で用法用量、`get_drug_restrictions(drug_yj=...)` で禁忌・特定患者注意。 +4. 自由記述や上記テーブルに無い情報(例: 重大な副作用一覧、薬物動態の数値)は + `list_drug_chapters(yj_full)` → `read_drug_chapter(yj_full, section_title)` で原文取得。 + +最終回答は箇条書き or 表で、各事実に出典を付ける。 + +【絶対ルール】 +1. ツール呼び出し必須。トレーニング知識・教科書・ガイドラインからの推測は禁止。 +2. 数値・固有名・条件は本文表現を改変せず逐語引用。 +3. 出典は **必ず** `[出典: <販売名> (yj_full=) / <章番号 章タイトル>]` の形式。 + - fact 表 row には `_citation` フィールドが入っているので **そのまま転記**。 + - `[出典: 薬品マスター]` `[出典: 添付文書]` 等の汎用出典は **絶対禁止**。 + - read_drug_chapter で実際に読んだ section 以外の出典を捏造しない。 +4. 該当情報が無ければ "添付文書からは確認できません" と書く。 diff --git a/skills/developing/pmda-drug-info/hooks/pmda-instructions.md b/skills/developing/pmda-drug-info/hooks/pmda-instructions.md new file mode 100644 index 0000000..0a656be --- /dev/null +++ b/skills/developing/pmda-drug-info/hooks/pmda-instructions.md @@ -0,0 +1,22 @@ +# PMDA Drug Information Tools + +You have access to Japanese pharmaceutical package insert (添付文書) data via the following tools. + +## Core Rules +- **Tool calls are mandatory.** Never answer from training knowledge alone. All facts must come from tool results. +- Cite sources in the format: `[出典: <販売名> (yj_full=) / <章番号 章タイトル>]` +- Fact table rows include a `_citation` field — use it directly. +- Generic citations like `[出典: 薬品マスター]` or `[出典: 添付文書]` are **prohibited**. +- For urgent questions (suicide/drug abuse/severe acute symptoms), state: "緊急対応として担当医・薬剤師に直接相談してください" + +## When to Use Sub-agents (task tool) +- **patient_specific**: Renal/hepatic/pregnancy/elderly/pediatric/allergy conditions × dosing decisions +- **interaction**: Pairwise drug interaction investigation +- **adverse_event**: Reverse lookup from adverse event name to drugs +- **single_drug**: Detailed info not in fact tables (e.g., full adverse event list, pharmacokinetics) + +## Direct Tool Usage (do NOT delegate) +- Simple lookups → use tools directly +- Multi-drug comparisons → call tools sequentially, output as markdown table +- Symptom → candidate drug reverse lookup → `search_section_text` +- Mechanism/pharmacokinetics → `list_drug_chapters` + `read_drug_chapter` diff --git a/skills/developing/pmda-drug-info/hooks/pre_prompt.py b/skills/developing/pmda-drug-info/hooks/pre_prompt.py new file mode 100644 index 0000000..eb1e3ac --- /dev/null +++ b/skills/developing/pmda-drug-info/hooks/pre_prompt.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 +""" +PrePrompt hook for PMDA drug info skill. +Injects usage instructions for the drug information tools. +""" +import sys +from pathlib import Path + + +def main(): + prompt_file = Path(__file__).parent / "pmda-instructions.md" + if prompt_file.exists(): + print(prompt_file.read_text(encoding="utf-8")) + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/skills/developing/pmda-drug-info/mcp_common.py b/skills/developing/pmda-drug-info/mcp_common.py new file mode 100644 index 0000000..0baeb01 --- /dev/null +++ b/skills/developing/pmda-drug-info/mcp_common.py @@ -0,0 +1,252 @@ +#!/usr/bin/env python3 +""" +Shared utility functions for the MCP server. +Provides common functionality for path handling, file validation, and request processing. +""" + +import json +import os +import sys +import asyncio +from typing import Any, Dict, List, Optional, Union +import re + +def get_allowed_directory(): + """Get the directory that is allowed to be accessed.""" + # Prefer dataset_dir passed through command-line arguments. + if len(sys.argv) > 1: + dataset_dir = sys.argv[1] + return os.path.abspath(dataset_dir) + + # Read the project data directory from the environment variable. + project_dir = os.getenv("PROJECT_DATA_DIR", "./projects/data") + return os.path.abspath(project_dir) + + +def resolve_file_path(file_path: str, default_subfolder: str = "default") -> str: + """ + Resolve a file path, supporting both folder/document.txt and document.txt formats. + + Args: + file_path: Input file path. + default_subfolder: Default subfolder name to use when only a filename is provided. + + Returns: + The resolved full file path. + """ + # If the path contains a folder separator, use it directly. + if '/' in file_path or '\\' in file_path: + clean_path = file_path.replace('\\', '/') + + # Remove the projects/ prefix if it exists. + if clean_path.startswith('projects/'): + clean_path = clean_path[9:] # Remove the 'projects/' prefix. + elif clean_path.startswith('./projects/'): + clean_path = clean_path[11:] # Remove the './projects/' prefix. + else: + # If only a filename is provided, add the default subfolder. + clean_path = f"{default_subfolder}/{file_path}" + + # Get the allowed directory. + project_data_dir = get_allowed_directory() + + # Try to locate the file directly under the project directory. + full_path = os.path.join(project_data_dir, clean_path.lstrip('./')) + if os.path.exists(full_path): + return full_path + + # If the direct path does not exist, try a recursive search. + found = find_file_in_project(clean_path, project_data_dir) + if found: + return found + + # If this is a bare filename and it was not found under the default subfolder, + # try looking in the project root. + if '/' not in file_path and '\\' not in file_path: + root_path = os.path.join(project_data_dir, file_path) + if os.path.exists(root_path): + return root_path + + raise FileNotFoundError(f"File not found: {file_path} (searched in {project_data_dir})") + + +def find_file_in_project(filename: str, project_dir: str) -> Optional[str]: + """Recursively search for a file inside the project directory.""" + # If filename includes a path, only search within the specified path. + if '/' in filename: + parts = filename.split('/') + target_file = parts[-1] + search_dir = os.path.join(project_dir, *parts[:-1]) + + if os.path.exists(search_dir): + target_path = os.path.join(search_dir, target_file) + if os.path.exists(target_path): + return target_path + else: + # For a bare filename, recursively search the whole project directory. + for root, dirs, files in os.walk(project_dir): + if filename in files: + return os.path.join(root, filename) + return None + + +def load_tools_from_json(tools_file_name: str) -> List[Dict[str, Any]]: + """Load tool definitions from a JSON file.""" + try: + tools_file = os.path.join(os.path.dirname(__file__), tools_file_name) + if os.path.exists(tools_file): + with open(tools_file, 'r', encoding='utf-8') as f: + return json.load(f) + else: + # If the JSON file does not exist, use the default definitions. + return [] + except Exception as e: + print(f"Warning: Unable to load tool definition JSON file: {str(e)}") + return [] + + +def create_error_response(request_id: Any, code: int, message: str) -> Dict[str, Any]: + """Create a standardized error response.""" + return { + "jsonrpc": "2.0", + "id": request_id, + "error": { + "code": code, + "message": message + } + } + + +def create_success_response(request_id: Any, result: Any) -> Dict[str, Any]: + """Create a standardized success response.""" + return { + "jsonrpc": "2.0", + "id": request_id, + "result": result + } + + +def create_initialize_response(request_id: Any, server_name: str, server_version: str = "1.0.0") -> Dict[str, Any]: + """Create a standardized initialize response.""" + return { + "jsonrpc": "2.0", + "id": request_id, + "result": { + "protocolVersion": "2024-11-05", + "capabilities": { + "tools": {} + }, + "serverInfo": { + "name": server_name, + "version": server_version + } + } + } + + +def create_ping_response(request_id: Any) -> Dict[str, Any]: + """Create a standardized ping response.""" + return { + "jsonrpc": "2.0", + "id": request_id, + "result": { + "pong": True + } + } + + +def create_tools_list_response(request_id: Any, tools: List[Dict[str, Any]]) -> Dict[str, Any]: + """Create a standardized tools/list response.""" + return { + "jsonrpc": "2.0", + "id": request_id, + "result": { + "tools": tools + } + } + + +def is_regex_pattern(pattern: str) -> bool: + """Check whether a string should be treated as a regular expression pattern.""" + # Check the /pattern/ format. + if pattern.startswith('/') and pattern.endswith('/') and len(pattern) > 2: + return True + + # Check the r"pattern" or r'pattern' format. + if pattern.startswith(('r"', "r'")) and pattern.endswith(('"', "'")) and len(pattern) > 3: + return True + + # Check whether it contains regex metacharacters. + regex_chars = {'*', '+', '?', '|', '(', ')', '[', ']', '{', '}', '^', '$', '\\', '.'} + return any(char in pattern for char in regex_chars) + + +def compile_pattern(pattern: str) -> Union[re.Pattern, str, None]: + """Compile a regex pattern, or return the original string if it is not regex.""" + if not is_regex_pattern(pattern): + return pattern + + try: + # Handle the /pattern/ format. + if pattern.startswith('/') and pattern.endswith('/'): + regex_body = pattern[1:-1] + return re.compile(regex_body) + + # Handle the r"pattern" or r'pattern' format. + if pattern.startswith(('r"', "r'")) and pattern.endswith(('"', "'")): + regex_body = pattern[2:-1] + return re.compile(regex_body) + + # Directly compile strings that contain regex metacharacters. + return re.compile(pattern) + except re.error as e: + # If compilation fails, return None to indicate an invalid regex. + print(f"Warning: Regular expression '{pattern}' compilation failed: {e}") + return None + + +async def handle_mcp_streaming(request_handler): + """Handle the standard main loop for MCP requests.""" + try: + while True: + # Read from stdin + line = await asyncio.get_event_loop().run_in_executor(None, sys.stdin.readline) + if not line: + break + + line = line.strip() + if not line: + continue + + try: + request = json.loads(line) + response = await request_handler(request) + + # Write to stdout + sys.stdout.write(json.dumps(response, ensure_ascii=False) + "\n") + sys.stdout.flush() + + except json.JSONDecodeError: + error_response = { + "jsonrpc": "2.0", + "error": { + "code": -32700, + "message": "Parse error" + } + } + sys.stdout.write(json.dumps(error_response, ensure_ascii=False) + "\n") + sys.stdout.flush() + + except Exception as e: + error_response = { + "jsonrpc": "2.0", + "error": { + "code": -32603, + "message": f"Internal error: {str(e)}" + } + } + sys.stdout.write(json.dumps(error_response, ensure_ascii=False) + "\n") + sys.stdout.flush() + + except KeyboardInterrupt: + pass diff --git a/skills/developing/pmda-drug-info/pmda_server.py b/skills/developing/pmda-drug-info/pmda_server.py new file mode 100644 index 0000000..0255adc --- /dev/null +++ b/skills/developing/pmda-drug-info/pmda_server.py @@ -0,0 +1,533 @@ +#!/usr/bin/env python3 +""" +PMDA drug information MCP server (mock data version). + +Provides drug search, master info, interactions, restrictions, dosing, +and full-text chapter retrieval with mock data for testing. +""" + +import asyncio +import json +import sys +from typing import Any, Dict, Optional + +from mcp_common import ( + create_error_response, + create_initialize_response, + create_ping_response, + create_tools_list_response, + load_tools_from_json, + handle_mcp_streaming, +) + + +def _dump(obj) -> str: + return json.dumps(obj, ensure_ascii=False) + + +# --------------------------------------------------------------------------- +# Mock data +# --------------------------------------------------------------------------- + +MOCK_DRUG_MASTER = { + "2149039F1082": { + "yj_code": "2149039F1082", + "yj_full": "2149039F1082_1_17", + "brand_name": "ロサルタンK錠50mg「科研」", + "generic_name": "ロサルタンカリウム", + "category_code": "214", + "category_name": "アンジオテンシンII受容体拮抗薬", + "regulation": "劇薬, 処方箋医薬品", + "manufacturer": "科研製薬株式会社", + "revision_date": "2024-06", + }, + "3399007H1021": { + "yj_code": "3399007H1021", + "yj_full": "3399007H1021_1_21", + "brand_name": "バイアスピリン錠100mg", + "generic_name": "アスピリン", + "category_code": "339", + "category_name": "血液・体液用薬", + "regulation": "処方箋医薬品", + "manufacturer": "バイエル薬品株式会社", + "revision_date": "2024-03", + }, + "2179004F1026": { + "yj_code": "2179004F1026", + "yj_full": "2179004F1026_1_14", + "brand_name": "ノルバスク錠5mg", + "generic_name": "アムロジピンベシル酸塩", + "category_code": "217", + "category_name": "カルシウム拮抗薬", + "regulation": "処方箋医薬品", + "manufacturer": "ファイザー株式会社", + "revision_date": "2024-01", + }, +} + +MOCK_CATEGORIES = [ + {"category_code": "214", "category_name": "アンジオテンシンII受容体拮抗薬", "level": "L2", "drug_count": 35}, + {"category_code": "217", "category_name": "カルシウム拮抗薬", "level": "L2", "drug_count": 48}, + {"category_code": "339", "category_name": "血液・体液用薬", "level": "L2", "drug_count": 22}, + {"category_code": "612", "category_name": "消化性潰瘍用剤", "level": "L2", "drug_count": 40}, +] + +MOCK_INTERACTIONS = [ + { + "drug_a_yj": "2149039F1082", + "drug_b_yj": "3399007H1021", + "drug_b_class": "アスピリン(抗血小板剤)", + "severity": "併用注意", + "mechanism": "ARBの降圧作用を減弱するおそれがある。また、腎機能低下・高カリウム血症のリスクを増大。", + "clinical_effect": "降圧効果の減弱、腎機能悪化、高カリウム血症に注意。", + "source_drug_yj": "2149039F1082", + "source_section": "10.2 併用注意", + }, + { + "drug_a_yj": "3399007H1021", + "drug_b_yj": "2149039F1082", + "drug_b_class": "ロサルタンカリウム(ARB)", + "severity": "併用注意", + "mechanism": "アスピリンの副作用(消化性潰瘍、腎機能低下)を増強するおそれ。", + "clinical_effect": "消化性潰瘍、腎機能低下に注意。血清カリウム値の上昇に注意。", + "source_drug_yj": "3399007H1021", + "source_section": "10.2 併用注意", + }, +] + +MOCK_RESTRICTIONS = [ + { + "drug_yj": "2149039F1082", + "condition_type": "腎機能障害", + "condition_text": "腎機能障害患者", + "condition_params": {"eGFR_max": 30}, + "severity": "慎重投与", + "source_section": "9.2 腎機能障害患者", + }, + { + "drug_yj": "2149039F1082", + "condition_type": "妊婦", + "condition_text": "妊娠中の女性", + "condition_params": {}, + "severity": "禁忌", + "source_section": "9.5 妊婦", + }, + { + "drug_yj": "2149039F1082", + "condition_type": "高齢者", + "condition_text": "高齢者(65歳以上)", + "condition_params": {}, + "severity": "慎重投与", + "source_section": "9.8 高齢者", + }, + { + "drug_yj": "3399007H1021", + "condition_type": "過敏症", + "condition_text": "本剤の成分に対し過敏症の既往歴のある患者", + "condition_params": {}, + "severity": "禁忌", + "source_section": "2. 禁忌", + }, +] + +MOCK_DOSING = [ + { + "drug_yj": "2149039F1082", + "patient_segment": "成人", + "segment_params": {}, + "indication_code": "高血圧症", + "dose_amount": "50", + "dose_unit": "mg", + "frequency": "1日1回", + "duration": "", + "adjustment_text": "効果不十分な場合は100mgまで増量可", + "source_section": "6. 用法及び用量", + }, + { + "drug_yj": "2149039F1082", + "patient_segment": "腎機能障害患者", + "segment_params": {"eGFR_max": 30}, + "indication_code": "高血圧症", + "dose_amount": "25", + "dose_unit": "mg", + "frequency": "1日1回", + "duration": "", + "adjustment_text": "eGFR 30以下では用量を減ずること。血清カリウム・クレアチニンの推移に注意。", + "source_section": "9.2 腎機能障害患者", + }, +] + +MOCK_CHAPTERS = { + "2149039F1082_1_17": [ + {"section_title": "1. 警告", "line_num": 1, "text_len": 120}, + {"section_title": "2. 禁忌", "line_num": 5, "text_len": 80}, + {"section_title": "4. 効能・効果", "line_num": 12, "text_len": 60}, + {"section_title": "6. 用法及び用量", "line_num": 20, "text_len": 150}, + {"section_title": "9.2 腎機能障害患者", "line_num": 45, "text_len": 200}, + {"section_title": "9.5 妊婦", "line_num": 52, "text_len": 180}, + {"section_title": "9.8 高齢者", "line_num": 60, "text_len": 100}, + {"section_title": "10.2 併用注意", "line_num": 75, "text_len": 350}, + {"section_title": "11.1 重大な副作用", "line_num": 90, "text_len": 400}, + {"section_title": "11.2 その他の副作用", "line_num": 110, "text_len": 300}, + ], + "3399007H1021_1_21": [ + {"section_title": "1. 警告", "line_num": 1, "text_len": 100}, + {"section_title": "2. 禁忌", "line_num": 4, "text_len": 90}, + {"section_title": "4. 効能・効果", "line_num": 10, "text_len": 55}, + {"section_title": "6. 用法及び用量", "line_num": 18, "text_len": 130}, + {"section_title": "10.2 併用注意", "line_num": 70, "text_len": 300}, + {"section_title": "11.1 重大な副作用", "line_num": 85, "text_len": 450}, + {"section_title": "11.2 その他の副作用", "line_num": 105, "text_len": 280}, + ], +} + +MOCK_SECTION_TEXT = { + ("2149039F1082_1_17", "9.2 腎機能障害患者"): ( + "9.2 腎機能障害患者\n" + "腎機能障害患者(eGFR 30 mL/min/1.73m²以下)には、ロサルタンカリウムの" + "投与開始用量を25mg/日とし、血清カリウム及び血清クレアチニンの推移に" + "十分注意すること。\n" + "【理由】腎機能障害患者では、本剤の投与により急速に腎機能が悪化する" + "おそれがある。また、高カリウム血症があらわれやすい。" + ), + ("2149039F1082_1_17", "9.5 妊婦"): ( + "9.5 妊婦\n" + "妊婦又は妊娠している可能性のある女性には投与しないこと。\n" + "【理由】妊娠中期・末期にレニン-アンジオテンシン系に作用する薬剤を" + "投与された患者では、胎児の腎機能低下、羊水過少症、頭蓋の発育不全、" + "肺低形成等があらわれるおそれがある。" + ), + ("2149039F1082_1_17", "10.2 併用注意"): ( + "10.2 併用注意\n" + "・アスピリン(抗血小板剤)\n" + " 【リスク】ARBの降圧作用を減弱するおそれがある。\n" + " 腎機能低下・高カリウム血症のリスクを増大。\n" + " 【措置】降圧効果の減弱、腎機能悪化、高カリウム血症に注意すること。" + ), + ("2149039F1082_1_17", "11.1 重大な副作用"): ( + "11.1 重大な副作用\n" + "・血管浮腫(頻度不明):顔面、口唇、咽頭、舌等の腫脹があらわれた場合には" + "直ちに投与を中止し、適切な処置を行うこと。\n" + "・高カリウム血症(0.1%未満):血清カリウム値の上昇があらわれることがある。\n" + "・腎機能悪化(0.1%未満):BUN、クレアチニンの上昇があらわれることがある。" + ), + ("3399007H1021_1_21", "10.2 併用注意"): ( + "10.2 併用注意\n" + "・ロサルタンカリウム(ARB)\n" + " 【リスク】アスピリンの副作用(消化性潰瘍、腎機能低下)を増強するおそれ。\n" + " 【措置】消化性潰瘍、腎機能低下に注意。血清カリウム値の上昇に注意すること。" + ), + ("3399007H1021_1_21", "11.1 重大な副作用"): ( + "11.1 重大な副作用\n" + "・ショック、アナフィラキシー(頻度不明):呼吸困難、血圧低下等があらわれた\n" + " 場合には直ちに投与を中止し、適切な処置を行うこと。\n" + "・消化性潰瘍(0.1%未満):出血、穿孔があらわれることがある。\n" + "・腎機能障害(0.1%未満):急性腎不全があらわれることがある。" + ), +} + + +def _citation(drug_yj: str, section: Optional[str]) -> str: + drug = MOCK_DRUG_MASTER.get(drug_yj, {}) + brand = drug.get("brand_name", "") + yj_full = drug.get("yj_full", drug_yj) + chap = section or "(章不明)" + return f"[出典: {brand} (yj_full={yj_full}) / {chap}]" + + +# --------------------------------------------------------------------------- +# Tool implementations (mock) +# --------------------------------------------------------------------------- + +def _tool_search_drugs(query: str, kind: str = "auto", limit: int = 10) -> str: + results = [] + for code, d in MOCK_DRUG_MASTER.items(): + q = query.lower() + if (kind == "brand" and q in d["brand_name"].lower()) or \ + (kind == "generic" and q in d["generic_name"].lower()) or \ + (kind == "yj" and (q in d["yj_code"].lower() or q in d["yj_full"].lower())) or \ + (kind == "auto" and (q in d["brand_name"].lower() or q in d["generic_name"].lower() + or q in d["yj_code"].lower() or q in d["yj_full"].lower())): + results.append({ + "yj_full": d["yj_full"], + "yj_code": d["yj_code"], + "brand": d["brand_name"], + "generic": d["generic_name"], + "category": f"{d['category_code']} {d['category_name']}", + "score": 1.0, + }) + return _dump(results[:limit]) + + +def _tool_list_categories() -> str: + return _dump(MOCK_CATEGORIES) + + +def _tool_list_drugs_in_category(l2_code: str, limit_generics: int = 50) -> str: + results = [] + seen_generics = set() + for code, d in MOCK_DRUG_MASTER.items(): + if d["category_code"].startswith(l2_code) and d["generic_name"] not in seen_generics: + seen_generics.add(d["generic_name"]) + results.append({ + "generic_name": d["generic_name"], + "brands": [{"yj_code": d["yj_code"], "brand_name": d["brand_name"], "yj_full": d["yj_full"]}], + }) + return _dump(results[:limit_generics]) + + +def _tool_get_drug_master(yj_code: str) -> str: + d = MOCK_DRUG_MASTER.get(yj_code) + if not d: + return _dump({"error": f"yj_code {yj_code} not found"}) + result = dict(d) + result["_citation"] = f"[出典: {d['brand_name']} (yj_full={d['yj_full']}) / 添付文書冒頭]" + return _dump(result) + + +def _tool_get_drug_interactions( + drug_a_yj: Optional[str] = None, + drug_b_yj: Optional[str] = None, + severity: Optional[str] = None, + keyword: Optional[str] = None, + limit: int = 30, +) -> str: + results = [] + for r in MOCK_INTERACTIONS: + if drug_a_yj and r["drug_a_yj"] != drug_a_yj: + continue + if drug_b_yj and r["drug_b_yj"] != drug_b_yj: + continue + if severity and r["severity"] != severity: + continue + if keyword and keyword.lower() not in ( + (r.get("drug_b_class") or "").lower() + + (r.get("mechanism") or "").lower() + + (r.get("clinical_effect") or "").lower() + ): + continue + results.append({**r, "_citation": _citation(r["source_drug_yj"], r["source_section"])}) + return _dump(results[:limit]) + + +def _tool_get_drug_restrictions( + drug_yj: Optional[str] = None, + condition_type: Optional[str] = None, + severity: Optional[str] = None, + keyword: Optional[str] = None, + limit: int = 30, +) -> str: + results = [] + for r in MOCK_RESTRICTIONS: + if drug_yj and r["drug_yj"] != drug_yj: + continue + if condition_type and r["condition_type"] != condition_type: + continue + if severity and r["severity"] != severity: + continue + if keyword and keyword.lower() not in (r.get("condition_text") or "").lower(): + continue + results.append({**r, "_citation": _citation(r["drug_yj"], r["source_section"])}) + return _dump(results[:limit]) + + +def _tool_get_drug_dosing( + drug_yj: str, + patient_segment: Optional[str] = None, + limit: int = 20, +) -> str: + results = [] + for r in MOCK_DOSING: + if r["drug_yj"] != drug_yj: + continue + if patient_segment and r["patient_segment"] != patient_segment: + continue + results.append({**r, "_citation": _citation(drug_yj, r["source_section"])}) + return _dump(results[:limit]) + + +def _tool_search_section_text( + keyword: str, + section_filter: str = "", + limit: int = 30, +) -> str: + if not keyword.strip(): + return _dump({"keyword": keyword, "total_drugs": 0, "shown": 0, "hits": []}) + + # Simple mock: search through section text + hits_out = [] + for (yj_full, section_title), text in MOCK_SECTION_TEXT.items(): + if section_filter and section_filter not in section_title: + continue + if keyword.lower() in text.lower(): + drug = None + for d in MOCK_DRUG_MASTER.values(): + if d["yj_full"] == yj_full: + drug = d + break + if not drug: + continue + brand = drug["brand_name"] + # Deduplicate by yj_full + existing = [h for h in hits_out if h["yj_full"] == yj_full] + if existing: + existing[0]["matches"].append({ + "section_title": section_title, + "snippet": text[:160], + }) + continue + hits_out.append({ + "yj_full": yj_full, + "brand": brand, + "generic": drug["generic_name"], + "l2": f"{drug['category_code']} {drug['category_name']}", + "matches": [{"section_title": section_title, "snippet": text[:160]}], + "_citation_template": f"[出典: {brand} (yj_full={yj_full}) / <該当章>]", + }) + + return _dump({ + "keyword": keyword, + "section_filter": section_filter or None, + "total_drugs": len({h["yj_full"] for h in hits_out}), + "shown": len(hits_out), + "hits": hits_out[:limit], + }) + + +def _tool_list_drug_chapters(yj_full: str) -> str: + sections = MOCK_CHAPTERS.get(yj_full) + if not sections: + return _dump({"error": f"yj_full {yj_full} の章節が見つかりません。"}) + + drug = None + for d in MOCK_DRUG_MASTER.values(): + if d["yj_full"] == yj_full: + drug = d + break + + return _dump({ + "yj_full": yj_full, + "brand": drug["brand_name"] if drug else "", + "generic": drug["generic_name"] if drug else "", + "n_sections": len(sections), + "sections": sections, + }) + + +def _tool_read_drug_chapter(yj_full: str, section_title: str) -> str: + text = MOCK_SECTION_TEXT.get((yj_full, section_title)) + if text: + return text[:8000] + return _dump({ + "error": f"section_title {section_title!r} は {yj_full} に存在しません。", + "hint": "list_drug_chapters で取得した sections[].section_title をそのまま渡してください。", + }) + + +# --------------------------------------------------------------------------- +# MCP request handler +# --------------------------------------------------------------------------- + +_TOOL_DISPATCH = { + "search_drugs": lambda args: _tool_search_drugs( + query=args.get("query", ""), + kind=args.get("kind", "auto"), + limit=args.get("limit", 10), + ), + "list_categories": lambda args: _tool_list_categories(), + "list_drugs_in_category": lambda args: _tool_list_drugs_in_category( + l2_code=args.get("l2_code", ""), + limit_generics=args.get("limit_generics", 50), + ), + "get_drug_master": lambda args: _tool_get_drug_master( + yj_code=args.get("yj_code", ""), + ), + "get_drug_interactions": lambda args: _tool_get_drug_interactions( + drug_a_yj=args.get("drug_a_yj"), + drug_b_yj=args.get("drug_b_yj"), + severity=args.get("severity"), + keyword=args.get("keyword"), + limit=args.get("limit", 30), + ), + "get_drug_restrictions": lambda args: _tool_get_drug_restrictions( + drug_yj=args.get("drug_yj"), + condition_type=args.get("condition_type"), + severity=args.get("severity"), + keyword=args.get("keyword"), + limit=args.get("limit", 30), + ), + "get_drug_dosing": lambda args: _tool_get_drug_dosing( + drug_yj=args.get("drug_yj", ""), + patient_segment=args.get("patient_segment"), + limit=args.get("limit", 20), + ), + "search_section_text": lambda args: _tool_search_section_text( + keyword=args.get("keyword", ""), + section_filter=args.get("section_filter", ""), + limit=args.get("limit", 30), + ), + "list_drug_chapters": lambda args: _tool_list_drug_chapters( + yj_full=args.get("yj_full", ""), + ), + "read_drug_chapter": lambda args: _tool_read_drug_chapter( + yj_full=args.get("yj_full", ""), + section_title=args.get("section_title", ""), + ), +} + + +async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]: + """Handle an MCP request.""" + try: + method = request.get("method") + params = request.get("params", {}) + request_id = request.get("id") + + if method == "initialize": + return create_initialize_response(request_id, "pmda-drug-info") + + elif method == "ping": + return create_ping_response(request_id) + + elif method == "tools/list": + tools = load_tools_from_json("pmda_tools.json") + return create_tools_list_response(request_id, tools) + + elif method == "tools/call": + tool_name = params.get("name") + arguments = params.get("arguments", {}) + + if tool_name not in _TOOL_DISPATCH: + return create_error_response(request_id, -32601, f"Unknown tool: {tool_name}") + + try: + result_text = _TOOL_DISPATCH[tool_name](arguments) + return { + "jsonrpc": "2.0", + "id": request_id, + "result": { + "content": [{"type": "text", "text": result_text}] + }, + } + except Exception as e: + return { + "jsonrpc": "2.0", + "id": request_id, + "result": { + "content": [{"type": "text", "text": f"Error: {str(e)}"}] + }, + } + + else: + return create_error_response(request_id, -32601, f"Unknown method: {method}") + + except Exception as e: + return create_error_response(request.get("id"), -32603, f"Internal error: {str(e)}") + + +async def main(): + await handle_mcp_streaming(handle_request) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/skills/developing/pmda-drug-info/pmda_tools.json b/skills/developing/pmda-drug-info/pmda_tools.json new file mode 100644 index 0000000..75177a6 --- /dev/null +++ b/skills/developing/pmda-drug-info/pmda_tools.json @@ -0,0 +1,207 @@ +[ + { + "name": "search_drugs", + "description": "Search drugs by brand name, generic name, or YJ code. Returns list of matching drugs with yj_code, brand name, generic name, and category.", + "inputSchema": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "Search query: drug brand name, generic name, or YJ code." + }, + "kind": { + "type": "string", + "enum": ["auto", "brand", "generic", "yj"], + "description": "Search type. 'auto' searches all fields.", + "default": "auto" + }, + "limit": { + "type": "integer", + "description": "Maximum number of results.", + "default": 10 + } + }, + "required": ["query"] + } + }, + { + "name": "list_categories", + "description": "List all L1/L2 drug categories (pharmacological classification) with drug counts per category.", + "inputSchema": { + "type": "object", + "properties": {} + } + }, + { + "name": "list_drugs_in_category", + "description": "List all drugs (generic → brand names) under a specific L2 pharmacological category code.", + "inputSchema": { + "type": "object", + "properties": { + "l2_code": { + "type": "string", + "description": "3-digit L2 category code." + }, + "limit_generics": { + "type": "integer", + "description": "Maximum number of generic names to return.", + "default": 50 + } + }, + "required": ["l2_code"] + } + }, + { + "name": "get_drug_master", + "description": "Get basic information for a drug by yj_code: brand name, generic name, pharmacological category, regulatory classification, manufacturer, revision date.", + "inputSchema": { + "type": "object", + "properties": { + "yj_code": { + "type": "string", + "description": "12-character YJ code." + } + }, + "required": ["yj_code"] + } + }, + { + "name": "get_drug_interactions", + "description": "Search drug interactions. With drug_a only: all interactions for that drug. With both drug_a and drug_b: bidirectional interaction between A and B. Filter by severity (併用禁忌/併用注意) or keyword.", + "inputSchema": { + "type": "object", + "properties": { + "drug_a_yj": { + "type": "string", + "description": "YJ code for drug A." + }, + "drug_b_yj": { + "type": "string", + "description": "YJ code for drug B (optional, for pairwise lookup)." + }, + "severity": { + "type": "string", + "description": "Filter by severity: '併用禁忌' or '併用注意'." + }, + "keyword": { + "type": "string", + "description": "Search keyword in drug_b_class, mechanism, or clinical_effect." + }, + "limit": { + "type": "integer", + "description": "Maximum number of results.", + "default": 30 + } + } + } + }, + { + "name": "get_drug_restrictions", + "description": "Search drug restrictions (contraindications, precautions) by patient condition. condition_type options: 疾患, 腎機能障害, 肝機能障害, 生殖能, 妊婦, 授乳婦, 小児等, 高齢者, 過敏症, 遺伝子多型, その他. severity options: 禁忌, 原則禁忌, 慎重投与.", + "inputSchema": { + "type": "object", + "properties": { + "drug_yj": { + "type": "string", + "description": "YJ code for the drug." + }, + "condition_type": { + "type": "string", + "description": "Patient condition type to filter by." + }, + "severity": { + "type": "string", + "description": "Filter by severity: 禁忌, 原則禁忌, or 慎重投与." + }, + "keyword": { + "type": "string", + "description": "Search keyword in condition_text." + }, + "limit": { + "type": "integer", + "description": "Maximum number of results.", + "default": 30 + } + } + } + }, + { + "name": "get_drug_dosing", + "description": "Get dosing information for a drug, optionally filtered by patient segment. patient_segment options: 成人, 小児等, 高齢者, 腎機能障害患者, 肝機能障害患者, 透析患者, 妊婦.", + "inputSchema": { + "type": "object", + "properties": { + "drug_yj": { + "type": "string", + "description": "YJ code for the drug." + }, + "patient_segment": { + "type": "string", + "description": "Patient segment to filter by (e.g., 成人, 高齢者, 腎機能障害患者)." + }, + "limit": { + "type": "integer", + "description": "Maximum number of results.", + "default": 20 + } + }, + "required": ["drug_yj"] + } + }, + { + "name": "search_section_text", + "description": "Full-text search in drug package insert sections. Returns matching sections with snippets. Use section_filter to narrow by chapter title (e.g., '副作用', '禁忌', '妊婦', '相互作用').", + "inputSchema": { + "type": "object", + "properties": { + "keyword": { + "type": "string", + "description": "Search keyword." + }, + "section_filter": { + "type": "string", + "description": "Filter by section title substring (e.g., '副作用', '禁忌', '妊婦').", + "default": "" + }, + "limit": { + "type": "integer", + "description": "Maximum number of results.", + "default": 30 + } + }, + "required": ["keyword"] + } + }, + { + "name": "list_drug_chapters", + "description": "List all chapter titles for a drug's package insert. Use yj_full (full YJ code with revision suffix). Returns section titles with line numbers.", + "inputSchema": { + "type": "object", + "properties": { + "yj_full": { + "type": "string", + "description": "Full YJ code (with revision suffix, e.g., 3399007H1021_1_21)." + } + }, + "required": ["yj_full"] + } + }, + { + "name": "read_drug_chapter", + "description": "Read the verbatim text of a specific chapter from a drug's package insert. section_title must match exactly from list_drug_chapters output.", + "inputSchema": { + "type": "object", + "properties": { + "yj_full": { + "type": "string", + "description": "Full YJ code." + }, + "section_title": { + "type": "string", + "description": "Exact section title from list_drug_chapters (e.g., '9.2 腎機能障害患者', '11.1 重大な副作用')." + } + }, + "required": ["yj_full", "section_title"] + } + } +] diff --git a/skills/developing/rag-retrieve-no-citation/hooks/retrieval-policy-forbidden-self-knowledge.md b/skills/developing/rag-retrieve-no-citation/hooks/retrieval-policy-forbidden-self-knowledge.md index 2e4d133..542cdd7 100644 --- a/skills/developing/rag-retrieve-no-citation/hooks/retrieval-policy-forbidden-self-knowledge.md +++ b/skills/developing/rag-retrieve-no-citation/hooks/retrieval-policy-forbidden-self-knowledge.md @@ -37,8 +37,15 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi 1. **Skill-enabled retrieval tools** (use first when available) 2. **`rag_retrieve`** -- After each step, evaluate sufficiency before proceeding. - Retrieval must happen **before** any factual answer generation. +- After each step, evaluate sufficiency before proceeding. + +### First-Call Success Principle + +- The first retrieval call is expected to return sufficient results for most questions. +- Your default assumption should be: **one call is enough**. +- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic). +- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient. ## 4. Query Preparation @@ -48,25 +55,51 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi ## 5. Retrieval Breadth (`top_k`) -- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient. -- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline). -- Expansion order: `30 → 50 → 100`. If unsure, use `100`. +- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success. +- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation. +- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline). +- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient. ## 6. Result Evaluation -Treat as insufficient if: empty, `Error:`, off-topic, missing core entity/scope, no usable evidence, partial coverage, truncated results, or claims required by the answer are not explicitly supported. +**Maximum 3 retrieval calls per question.** After each call, evaluate immediately: + +### Sufficient — answer immediately, no more calls + +ANY of the following means results are sufficient — STOP and answer now: +- The core entity/topic in the user's question appears in the results. +- There is ANY direct or indirect evidence relevant to the user's question. +- Results are partially relevant, even if not perfectly comprehensive. +- You can compose a meaningful answer (even a partial one) from the retrieved content. + +**Anti-patterns — do NOT do these:** +- ❌ "The results are good, but maybe different keywords could find something better." +- ❌ "I have enough to answer, but let me try one more query to be thorough." +- ❌ "The answer is here, but I want to double-check with a different query." +- ❌ Calling retrieval again after you have already identified the answer in previous results. + +**If you can answer the question with current results, you MUST answer immediately. Period.** + +### Insufficient — the ONLY valid reasons to retry + +- Results are completely empty or contain only `Error:` messages. +- ALL results are entirely off-topic with zero relevance to the user's question. +- No usable evidence exists at all — you cannot form even a partial answer. + +**"Results are not detailed enough" is NOT a valid reason to retry.** +**"Results might be incomplete" is NOT a valid reason to retry.** ## 7. Fallback and Sequential Retry -On insufficient results, follow this sequence: +On insufficient results, you may retry **up to 2 more times** (3 calls total): -1. Rewrite query, retry same tool (once) -2. Switch to next retrieval source in default order -3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100` +1. Rewrite query, retry same tool. +2. For `rag_retrieve`, escalate `top_k` to `100` on retry. -- Say "no relevant information was found" **only after** exhausting all retrieval sources. +- Say "no relevant information was found" **only after** exhausting all retries. - Do NOT switch to local filesystem inspection at any point. - Do NOT switch to model self-knowledge at any point. +- Do NOT call any retrieval tool more than 3 times in total. ## 8. Handling Missing or Partial Evidence @@ -84,7 +117,6 @@ On insufficient results, follow this sequence: - Each sentence or key point in the response should be accompanied by relevant images when they meet the established association criteria. - Avoid placing all images at the end of the response. - ## 10. Self-Knowledge Prohibition This section applies whenever self-knowledge is disabled or forbidden for the current task. @@ -103,9 +135,11 @@ This section applies whenever self-knowledge is disabled or forbidden for the cu Before replying to a knowledge retrieval task, verify: - Used only whitelisted retrieval tools — no local filesystem inspection? +- Called retrieval at most 3 times total (not more)? +- Answered immediately when results were sufficient (did NOT call again unnecessarily)? +- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)? - Did retrieval happen before any factual answer drafting? - Did every factual claim come from retrieved evidence rather than model knowledge? -- Exhausted retrieval flow before concluding "not found"? - If any unsupported part remained, was it removed or explicitly marked unavailable? If any answer is "no", correct the process first. diff --git a/skills/developing/rag-retrieve-no-citation/hooks/retrieval-policy.md b/skills/developing/rag-retrieve-no-citation/hooks/retrieval-policy.md index ce14d2b..8dd6843 100644 --- a/skills/developing/rag-retrieve-no-citation/hooks/retrieval-policy.md +++ b/skills/developing/rag-retrieve-no-citation/hooks/retrieval-policy.md @@ -27,6 +27,13 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi - Do NOT answer from model knowledge first. - After each step, evaluate sufficiency before proceeding. +### First-Call Success Principle + +- The first retrieval call is expected to return sufficient results for most questions. +- Your default assumption should be: **one call is enough**. +- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic). +- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient. + ## 3. Query Preparation - Do NOT pass raw user question unless it already works well for retrieval. @@ -35,24 +42,50 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi ## 4. Retrieval Breadth (`top_k`) -- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient. -- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline). -- Expansion order: `30 → 50 → 100`. If unsure, use `100`. +- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success. +- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation. +- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline). +- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient. ## 5. Result Evaluation -Treat as insufficient if: empty, `Error:`, off-topic, missing core entity/scope, no usable evidence, partial coverage, or truncated results. +**Maximum 3 retrieval calls per question.** After each call, evaluate immediately: + +### Sufficient — answer immediately, no more calls + +ANY of the following means results are sufficient — STOP and answer now: +- The core entity/topic in the user's question appears in the results. +- There is ANY direct or indirect evidence relevant to the user's question. +- Results are partially relevant, even if not perfectly comprehensive. +- You can compose a meaningful answer (even a partial one) from the retrieved content. + +**Anti-patterns — do NOT do these:** +- ❌ "The results are good, but maybe different keywords could find something better." +- ❌ "I have enough to answer, but let me try one more query to be thorough." +- ❌ "The answer is here, but I want to double-check with a different query." +- ❌ Calling retrieval again after you have already identified the answer in previous results. + +**If you can answer the question with current results, you MUST answer immediately. Period.** + +### Insufficient — the ONLY valid reasons to retry + +- Results are completely empty or contain only `Error:` messages. +- ALL results are entirely off-topic with zero relevance to the user's question. +- No usable evidence exists at all — you cannot form even a partial answer. + +**"Results are not detailed enough" is NOT a valid reason to retry.** +**"Results might be incomplete" is NOT a valid reason to retry.** ## 6. Fallback and Sequential Retry -On insufficient results, follow this sequence: +On insufficient results, you may retry **up to 2 more times** (3 calls total): -1. Rewrite query, retry same tool (once) -2. Switch to next retrieval source in default order -3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100` +1. Rewrite query, retry same tool. +2. For `rag_retrieve`, escalate `top_k` to `100` on retry. -- Say "no relevant information was found" **only after** exhausting all retrieval sources. +- Say "no relevant information was found" **only after** exhausting all retries. - Do NOT switch to local filesystem inspection at any point. +- Do NOT call any retrieval tool more than 3 times in total. ## 7. Image Handling @@ -81,7 +114,9 @@ This section applies only when self-knowledge is enabled. Before replying to a knowledge retrieval task, verify: - Used only whitelisted retrieval tools — no local filesystem inspection? -- Exhausted retrieval flow before concluding "not found"? +- Called retrieval at most 3 times total (not more)? +- Answered immediately when results were sufficient (did NOT call again unnecessarily)? +- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)? - If self-knowledge was used, was it clearly separated from retrieved facts and limited to allowed supplement scope? If any answer is "no", correct the process first. diff --git a/skills/onprem/rag-retrieve-only/hooks/retrieval-policy-forbidden-self-knowledge.md b/skills/onprem/rag-retrieve-only/hooks/retrieval-policy-forbidden-self-knowledge.md index 2b68869..7ea5e4b 100644 --- a/skills/onprem/rag-retrieve-only/hooks/retrieval-policy-forbidden-self-knowledge.md +++ b/skills/onprem/rag-retrieve-only/hooks/retrieval-policy-forbidden-self-knowledge.md @@ -40,6 +40,13 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi - After each step, evaluate sufficiency before proceeding. - Retrieval must happen **before** any factual answer generation. +### First-Call Success Principle + +- The first retrieval call is expected to return sufficient results for most questions. +- Your default assumption should be: **one call is enough**. +- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic). +- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient. + ## 4. Query Preparation - Do NOT pass raw user question unless it already works well for retrieval. @@ -48,25 +55,51 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi ## 5. Retrieval Breadth (`top_k`) -- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient. -- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline). -- Expansion order: `30 → 50 → 100`. If unsure, use `100`. +- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success. +- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation. +- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline). +- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient. ## 6. Result Evaluation -Treat as insufficient if: empty, `Error:`, off-topic, missing core entity/scope, no usable evidence, partial coverage, truncated results, or claims required by the answer are not explicitly supported. +**Maximum 3 retrieval calls per question.** After each call, evaluate immediately: + +### Sufficient — answer immediately, no more calls + +ANY of the following means results are sufficient — STOP and answer now: +- The core entity/topic in the user's question appears in the results. +- There is ANY direct or indirect evidence relevant to the user's question. +- Results are partially relevant, even if not perfectly comprehensive. +- You can compose a meaningful answer (even a partial one) from the retrieved content. + +**Anti-patterns — do NOT do these:** +- ❌ "The results are good, but maybe different keywords could find something better." +- ❌ "I have enough to answer, but let me try one more query to be thorough." +- ❌ "The answer is here, but I want to double-check with a different query." +- ❌ Calling retrieval again after you have already identified the answer in previous results. + +**If you can answer the question with current results, you MUST answer immediately. Period.** + +### Insufficient — the ONLY valid reasons to retry + +- Results are completely empty or contain only `Error:` messages. +- ALL results are entirely off-topic with zero relevance to the user's question. +- No usable evidence exists at all — you cannot form even a partial answer. + +**"Results are not detailed enough" is NOT a valid reason to retry.** +**"Results might be incomplete" is NOT a valid reason to retry.** ## 7. Fallback and Sequential Retry -On insufficient results, follow this sequence: +On insufficient results, you may retry **up to 2 more times** (3 calls total): -1. Rewrite query, retry same tool (once) -2. Switch to next retrieval source in default order -3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100` +1. Rewrite query, retry same tool. +2. For `rag_retrieve`, escalate `top_k` to `100` on retry. -- Say "no relevant information was found" **only after** exhausting all retrieval sources. +- Say "no relevant information was found" **only after** exhausting all retries. - Do NOT switch to local filesystem inspection at any point. - Do NOT switch to model self-knowledge at any point. +- Do NOT call any retrieval tool more than 3 times in total. ## 8. Handling Missing or Partial Evidence @@ -84,14 +117,7 @@ On insufficient results, follow this sequence: - Each sentence or key point in the response should be accompanied by relevant images when they meet the established association criteria. - Avoid placing all images at the end of the response. -## 10. Citation Requirements - -- MUST generate `` tags when using retrieval results. -- Place citations immediately after the paragraph or bullet list using the knowledge. Do NOT collect at end. -- 1-2 citations per paragraph/bullet. At least 1 citation when using retrieved knowledge. -- Do NOT cite claims that were not supported by retrieval. - -## 11. Self-Knowledge Prohibition +## 10. Self-Knowledge Prohibition This section applies whenever self-knowledge is disabled or forbidden for the current task. @@ -101,19 +127,19 @@ This section applies whenever self-knowledge is disabled or forbidden for the cu - The model must not supplement missing parts with general knowledge, conceptual explanation, common background, intuition, or likely completion. - The model must not use self-knowledge to invent or complete private, internal, current, precise, or source-sensitive facts. - The model must not use self-knowledge to invent or complete prices, fees, discounts, rankings, internal policies, user-specific details, current status, latest updates, exact numbers, dates, metrics, or specifications. -- Retrieved facts must include citations. - Unsupported parts must be stated as unavailable rather than guessed. - If a paragraph would mix retrieved facts and unsupported completion, remove the unsupported completion. - If evidence is incomplete, state the limitation explicitly. -## 12. Pre-Reply Self-Check +## 11. Pre-Reply Self-Check Before replying to a knowledge retrieval task, verify: - Used only whitelisted retrieval tools — no local filesystem inspection? +- Called retrieval at most 3 times total (not more)? +- Answered immediately when results were sufficient (did NOT call again unnecessarily)? +- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)? - Did retrieval happen before any factual answer drafting? - Did every factual claim come from retrieved evidence rather than model knowledge? -- Exhausted retrieval flow before concluding "not found"? -- Citations placed immediately after each relevant paragraph? - If any unsupported part remained, was it removed or explicitly marked unavailable? If any answer is "no", correct the process first. diff --git a/skills/onprem/rag-retrieve-only/hooks/retrieval-policy.md b/skills/onprem/rag-retrieve-only/hooks/retrieval-policy.md index 75195c8..7c69ded 100644 --- a/skills/onprem/rag-retrieve-only/hooks/retrieval-policy.md +++ b/skills/onprem/rag-retrieve-only/hooks/retrieval-policy.md @@ -27,6 +27,13 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi - Do NOT answer from model knowledge first. - After each step, evaluate sufficiency before proceeding. +### First-Call Success Principle + +- The first retrieval call is expected to return sufficient results for most questions. +- Your default assumption should be: **one call is enough**. +- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic). +- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient. + ## 3. Query Preparation - Do NOT pass raw user question unless it already works well for retrieval. @@ -35,24 +42,51 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi ## 4. Retrieval Breadth (`top_k`) -- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient. -- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline). -- Expansion order: `30 → 50 → 100`. If unsure, use `100`. +- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success. +- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation. +- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline). +- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient. ## 5. Result Evaluation -Treat as insufficient if: empty, `Error:`, off-topic, missing core entity/scope, no usable evidence, partial coverage, or truncated results. +**Maximum 3 retrieval calls per question.** After each call, evaluate immediately: + +### Sufficient — answer immediately, no more calls + +ANY of the following means results are sufficient — STOP and answer now: +- The core entity/topic in the user's question appears in the results. +- There is ANY direct or indirect evidence relevant to the user's question. +- Results are partially relevant, even if not perfectly comprehensive. +- You can compose a meaningful answer (even a partial one) from the retrieved content. + +**Anti-patterns — do NOT do these:** +- ❌ "The results are good, but maybe different keywords could find something better." +- ❌ "I have enough to answer, but let me try one more query to be thorough." +- ❌ "The answer is here, but I want to double-check with a different query." +- ❌ Calling retrieval again after you have already identified the answer in previous results. + +**If you can answer the question with current results, you MUST answer immediately. Period.** + +### Insufficient — the ONLY valid reasons to retry + +- Results are completely empty or contain only `Error:` messages. +- ALL results are entirely off-topic with zero relevance to the user's question. +- No usable evidence exists at all — you cannot form even a partial answer. + +**"Results are not detailed enough" is NOT a valid reason to retry.** +**"Results might be incomplete" is NOT a valid reason to retry.** ## 6. Fallback and Sequential Retry -On insufficient results, follow this sequence: +On insufficient results, you may retry **up to 2 more times** (3 calls total): -1. Rewrite query, retry same tool (once) -2. Switch to next retrieval source in default order -3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100` +1. Rewrite query, retry same tool. +2. Switch to next retrieval source in default order. +3. For `rag_retrieve`, escalate `top_k` to `100` on retry. - Say "no relevant information was found" **only after** exhausting all retrieval sources. - Do NOT switch to local filesystem inspection at any point. +- Do NOT call any retrieval tool more than 3 times in total. ## 7. Image Handling @@ -89,7 +123,9 @@ This section applies only when self-knowledge is enabled. Before replying to a knowledge retrieval task, verify: - Used only whitelisted retrieval tools — no local filesystem inspection? -- Exhausted retrieval flow before concluding "not found"? +- Called retrieval at most 3 times total (not more)? +- Answered immediately when results were sufficient (did NOT call again unnecessarily)? +- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)? - Citations placed immediately after each relevant paragraph? - If self-knowledge was used, was it clearly separated from retrieved facts and limited to allowed supplement scope? diff --git a/skills/support/rag-retrieve-only/hooks/retrieval-policy-forbidden-self-knowledge.md b/skills/support/rag-retrieve-only/hooks/retrieval-policy-forbidden-self-knowledge.md index 2b68869..061c855 100644 --- a/skills/support/rag-retrieve-only/hooks/retrieval-policy-forbidden-self-knowledge.md +++ b/skills/support/rag-retrieve-only/hooks/retrieval-policy-forbidden-self-knowledge.md @@ -14,7 +14,7 @@ For knowledge retrieval tasks, **this policy overrides generic codebase explorat - **Prohibited answer source**: the model's own parametric knowledge, memory, prior world knowledge, intuition, common sense completion, or unsupported inference. - **Prohibited tools**: `Glob`, `Read`, `LS`, Bash (`ls`, `find`, `cat`, `head`, `tail`, `grep`, etc.) — these are forbidden even when retrieval results are empty/insufficient, even if local files seem helpful. -- **Allowed tools only**: skill-enabled retrieval tools, `rag_retrieve`. No other source for factual answering. +- **Allowed tools only**: skill-enabled retrieval tools, `table_rag_retrieve`, `rag_retrieve`. No other source for factual answering. - Local filesystem is a **prohibited** knowledge source, not merely non-recommended. - Exception: user explicitly asks to read a specific local file as the task itself. - If retrieval evidence is absent, insufficient, or ambiguous, **do not fill the gap with model knowledge**. @@ -35,11 +35,20 @@ For any knowledge retrieval task: Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe filesystem first. 1. **Skill-enabled retrieval tools** (use first when available) -2. **`rag_retrieve`** +2. **`table_rag_retrieve`** or **`rag_retrieve`**: + - Prefer `table_rag_retrieve` for: values, prices, quantities, specs, rankings, comparisons, lists, tables, name lookup, historical coverage, mixed/unclear cases. + - Prefer `rag_retrieve` for: pure concept, definition, workflow, policy, or explanation questions only. - After each step, evaluate sufficiency before proceeding. - Retrieval must happen **before** any factual answer generation. +### First-Call Success Principle + +- The first retrieval call is expected to return sufficient results for most questions. +- Your default assumption should be: **one call is enough**. +- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic). +- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient. + ## 4. Query Preparation - Do NOT pass raw user question unless it already works well for retrieval. @@ -48,25 +57,54 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi ## 5. Retrieval Breadth (`top_k`) -- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient. -- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline). -- Expansion order: `30 → 50 → 100`. If unsure, use `100`. +- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success. +- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation. +- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline). +- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient. ## 6. Result Evaluation -Treat as insufficient if: empty, `Error:`, off-topic, missing core entity/scope, no usable evidence, partial coverage, truncated results, or claims required by the answer are not explicitly supported. +**Maximum 3 retrieval calls per question.** After each call, evaluate immediately: + +### Sufficient — answer immediately, no more calls + +ANY of the following means results are sufficient — STOP and answer now: +- The core entity/topic in the user's question appears in the results. +- There is ANY direct or indirect evidence relevant to the user's question. +- Results are partially relevant, even if not perfectly comprehensive. +- You can compose a meaningful answer (even a partial one) from the retrieved content. + +**Anti-patterns — do NOT do these:** +- ❌ "The results are good, but maybe different keywords could find something better." +- ❌ "I have enough to answer, but let me try one more query to be thorough." +- ❌ "The answer is here, but I want to double-check with a different query." +- ❌ Calling retrieval again after you have already identified the answer in previous results. + +**If you can answer the question with current results, you MUST answer immediately. Period.** + +### Insufficient — the ONLY valid reasons to retry + +- Results are completely empty or contain only `Error:` / `no excel files found` messages. +- ALL results are entirely off-topic with zero relevance to the user's question. +- No usable evidence exists at all — you cannot form even a partial answer. + +**"Results are not detailed enough" is NOT a valid reason to retry.** +**"Results might be incomplete" is NOT a valid reason to retry.** ## 7. Fallback and Sequential Retry -On insufficient results, follow this sequence: +On insufficient results, you may retry **up to 2 more times** (3 calls total): -1. Rewrite query, retry same tool (once) -2. Switch to next retrieval source in default order -3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100` +1. Rewrite query, retry same tool. +2. Switch to next retrieval source in default order. +3. For `rag_retrieve`, escalate `top_k` to `100` on retry. +4. `table_rag_retrieve` insufficient → try `rag_retrieve`; `rag_retrieve` insufficient → try `table_rag_retrieve`. -- Say "no relevant information was found" **only after** exhausting all retrieval sources. +- `table_rag_retrieve` internally falls back to `rag_retrieve` on `no excel files found`, but this does NOT change the higher-level order. +- Say "no relevant information was found" **only after** exhausting all retries. - Do NOT switch to local filesystem inspection at any point. - Do NOT switch to model self-knowledge at any point. +- Do NOT call any retrieval tool more than 3 times in total. ## 8. Handling Missing or Partial Evidence @@ -75,7 +113,12 @@ On insufficient results, follow this sequence: - Prefer "the retrieved materials do not provide this information" over speculative completion. - When user asks for a definitive answer but evidence is incomplete, state the limitation directly. -## 9. Image Handling +## 9. Table RAG Result Handling + +- Follow all `[INSTRUCTION]` and `[EXTRA_INSTRUCTION]` in results. +- If truncated: tell user total (`N+M`), displayed (`N`), omitted (`M`). + +## 10. Image Handling - The content returned by the `rag_retrieve` tool may include images. - Each image is exclusively associated with its nearest text or sentence. @@ -84,13 +127,6 @@ On insufficient results, follow this sequence: - Each sentence or key point in the response should be accompanied by relevant images when they meet the established association criteria. - Avoid placing all images at the end of the response. -## 10. Citation Requirements - -- MUST generate `` tags when using retrieval results. -- Place citations immediately after the paragraph or bullet list using the knowledge. Do NOT collect at end. -- 1-2 citations per paragraph/bullet. At least 1 citation when using retrieved knowledge. -- Do NOT cite claims that were not supported by retrieval. - ## 11. Self-Knowledge Prohibition This section applies whenever self-knowledge is disabled or forbidden for the current task. @@ -101,7 +137,6 @@ This section applies whenever self-knowledge is disabled or forbidden for the cu - The model must not supplement missing parts with general knowledge, conceptual explanation, common background, intuition, or likely completion. - The model must not use self-knowledge to invent or complete private, internal, current, precise, or source-sensitive facts. - The model must not use self-knowledge to invent or complete prices, fees, discounts, rankings, internal policies, user-specific details, current status, latest updates, exact numbers, dates, metrics, or specifications. -- Retrieved facts must include citations. - Unsupported parts must be stated as unavailable rather than guessed. - If a paragraph would mix retrieved facts and unsupported completion, remove the unsupported completion. - If evidence is incomplete, state the limitation explicitly. @@ -110,10 +145,11 @@ This section applies whenever self-knowledge is disabled or forbidden for the cu Before replying to a knowledge retrieval task, verify: - Used only whitelisted retrieval tools — no local filesystem inspection? +- Called retrieval at most 3 times total (not more)? +- Answered immediately when results were sufficient (did NOT call again unnecessarily)? +- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)? - Did retrieval happen before any factual answer drafting? - Did every factual claim come from retrieved evidence rather than model knowledge? -- Exhausted retrieval flow before concluding "not found"? -- Citations placed immediately after each relevant paragraph? - If any unsupported part remained, was it removed or explicitly marked unavailable? If any answer is "no", correct the process first. diff --git a/skills/support/rag-retrieve-only/hooks/retrieval-policy.md b/skills/support/rag-retrieve-only/hooks/retrieval-policy.md index 75195c8..7c69ded 100644 --- a/skills/support/rag-retrieve-only/hooks/retrieval-policy.md +++ b/skills/support/rag-retrieve-only/hooks/retrieval-policy.md @@ -27,6 +27,13 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi - Do NOT answer from model knowledge first. - After each step, evaluate sufficiency before proceeding. +### First-Call Success Principle + +- The first retrieval call is expected to return sufficient results for most questions. +- Your default assumption should be: **one call is enough**. +- Additional calls are the exception, not the norm. Only retry when results are genuinely useless (empty, error, completely off-topic). +- **Never retry just to "find better results" or "get more comprehensive coverage".** Good enough is sufficient. + ## 3. Query Preparation - Do NOT pass raw user question unless it already works well for retrieval. @@ -35,24 +42,51 @@ Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe fi ## 4. Retrieval Breadth (`top_k`) -- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient. -- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline). -- Expansion order: `30 → 50 → 100`. If unsure, use `100`. +- Apply `top_k` only to `rag_retrieve`. Choose the appropriate value upfront to maximize first-call success. +- Use `50` for simple fact lookup or moderate synthesis, comparison, summarization, disambiguation. +- Use `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline). +- If unsure, use `50`. Only escalate to `100` on the retry call if first results are insufficient. ## 5. Result Evaluation -Treat as insufficient if: empty, `Error:`, off-topic, missing core entity/scope, no usable evidence, partial coverage, or truncated results. +**Maximum 3 retrieval calls per question.** After each call, evaluate immediately: + +### Sufficient — answer immediately, no more calls + +ANY of the following means results are sufficient — STOP and answer now: +- The core entity/topic in the user's question appears in the results. +- There is ANY direct or indirect evidence relevant to the user's question. +- Results are partially relevant, even if not perfectly comprehensive. +- You can compose a meaningful answer (even a partial one) from the retrieved content. + +**Anti-patterns — do NOT do these:** +- ❌ "The results are good, but maybe different keywords could find something better." +- ❌ "I have enough to answer, but let me try one more query to be thorough." +- ❌ "The answer is here, but I want to double-check with a different query." +- ❌ Calling retrieval again after you have already identified the answer in previous results. + +**If you can answer the question with current results, you MUST answer immediately. Period.** + +### Insufficient — the ONLY valid reasons to retry + +- Results are completely empty or contain only `Error:` messages. +- ALL results are entirely off-topic with zero relevance to the user's question. +- No usable evidence exists at all — you cannot form even a partial answer. + +**"Results are not detailed enough" is NOT a valid reason to retry.** +**"Results might be incomplete" is NOT a valid reason to retry.** ## 6. Fallback and Sequential Retry -On insufficient results, follow this sequence: +On insufficient results, you may retry **up to 2 more times** (3 calls total): -1. Rewrite query, retry same tool (once) -2. Switch to next retrieval source in default order -3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100` +1. Rewrite query, retry same tool. +2. Switch to next retrieval source in default order. +3. For `rag_retrieve`, escalate `top_k` to `100` on retry. - Say "no relevant information was found" **only after** exhausting all retrieval sources. - Do NOT switch to local filesystem inspection at any point. +- Do NOT call any retrieval tool more than 3 times in total. ## 7. Image Handling @@ -89,7 +123,9 @@ This section applies only when self-knowledge is enabled. Before replying to a knowledge retrieval task, verify: - Used only whitelisted retrieval tools — no local filesystem inspection? -- Exhausted retrieval flow before concluding "not found"? +- Called retrieval at most 3 times total (not more)? +- Answered immediately when results were sufficient (did NOT call again unnecessarily)? +- Called retrieval exactly once when first results were sufficient (did NOT retry unnecessarily)? - Citations placed immediately after each relevant paragraph? - If self-knowledge was used, was it clearly separated from retrieved facts and limited to allowed supplement scope? diff --git a/utils/structured_log.py b/utils/structured_log.py new file mode 100644 index 0000000..596048f --- /dev/null +++ b/utils/structured_log.py @@ -0,0 +1,69 @@ +import json +import logging +import time +from typing import Any, Optional + +logger = logging.getLogger("app") + +SCHEMA_VERSION = 1 + + +def _normalize_value(value: Any) -> Any: + if value is None: + return None + if isinstance(value, (str, int, float, bool)): + return value + return str(value) + + +def emit_question_metric( + *, + stage: str, + status: str, + duration_ms: Optional[int] = None, + first_response_time_ms: Optional[int] = None, + trace_id: Optional[str] = None, + ai_id: Optional[str] = None, + session_id: Optional[str] = None, + robot_type: Optional[str] = None, + model: Optional[str] = None, + stream: Optional[bool] = None, + error_type: Optional[str] = None, + extra: Optional[dict[str, Any]] = None, +) -> None: + payload: dict[str, Any] = { + "schema_version": SCHEMA_VERSION, + "event": { + "kind": "metric", + "category": ["question"], + "action": "question_perf", + }, + "stage": stage, + "status": status, + "observed_at": int(time.time() * 1000), + "service": "catalog-agent", + } + + optional_fields = { + "trace_id": trace_id, + "duration_ms": duration_ms, + "first_response_time_ms": first_response_time_ms, + "ai_id": ai_id, + "session_id": session_id, + "robot_type": robot_type, + "model": model, + "stream": stream, + "error_type": error_type, + } + for key, value in optional_fields.items(): + normalized = _normalize_value(value) + if normalized is not None: + payload[key] = normalized + + if extra: + for key, value in extra.items(): + normalized = _normalize_value(value) + if normalized is not None: + payload[key] = normalized + + logger.info(json.dumps(payload, ensure_ascii=False, separators=(",", ":")))