diff --git a/agent/deep_assistant.py b/agent/deep_assistant.py index abbe887..680c95e 100644 --- a/agent/deep_assistant.py +++ b/agent/deep_assistant.py @@ -45,6 +45,7 @@ from .mem0_config import Mem0Config from agent.prompt_loader import load_system_prompt_async, load_mcp_settings_async from agent.agent_memory_cache import get_memory_cache_manager from .subagent_loader import load_subagents +from agent.plugin_hook_loader import collect_main_agent_hidden_tools from .checkpoint_manager import get_checkpointer_manager from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver from langgraph.checkpoint.memory import InMemorySaver @@ -310,6 +311,15 @@ async def init_agent(config: AgentConfig): logger.info(f"Loaded {len(mcp_tools)} MCP tools") logger.info(f"init_agent mcp tools ready, elapsed: {time.time() - create_start:.3f}s") + # Build the main agent's tool list by hiding tools blacklisted in plugin.json. + # Sub-agents still receive the full mcp_tools set, so hidden tools remain usable by them. + hidden_tools = collect_main_agent_hidden_tools(config.bot_id) + main_tools = [t for t in mcp_tools if t.name not in hidden_tools] if hidden_tools else mcp_tools + if hidden_tools: + logger.info( + f"Main agent hides {len(mcp_tools) - len(main_tools)} tools: {sorted(hidden_tools)}" + ) + sandbox, sandbox_type, workspace_root = await sandbox_task logger.info(f"init_agent sandbox ready, elapsed: {time.time() - create_start:.3f}s") @@ -342,7 +352,7 @@ async def init_agent(config: AgentConfig): model=llm_instance, assistant_id=config.bot_id, system_prompt=system_prompt, - tools=mcp_tools, + tools=main_tools, auto_approve=True, workspace_root=workspace_root, middleware=middleware, diff --git a/agent/plugin_hook_loader.py b/agent/plugin_hook_loader.py index fccdd45..a5ee71b 100644 --- a/agent/plugin_hook_loader.py +++ b/agent/plugin_hook_loader.py @@ -129,6 +129,52 @@ async def merge_skill_mcp_configs(bot_id: str) -> List[Dict]: return [] +def collect_main_agent_hidden_tools(bot_id: str) -> set: + """Collect tool names that must be hidden from the main agent. + + Scans every skill's plugin.json for a top-level "mainAgentHiddenTools" list + and merges them into a single set. These tools are removed from the main + agent's tool list but remain available to sub-agents. + + Args: + bot_id: Bot ID + + Returns: + set[str]: Union of all hidden tool names. Empty set if none configured. + """ + hidden_tools = set() + skill_dirs = _get_skill_dirs(bot_id) + + for skill_dir in skill_dirs: + if not os.path.exists(skill_dir): + continue + + for skill_name in os.listdir(skill_dir): + skill_path = os.path.join(skill_dir, skill_name) + if not os.path.isdir(skill_path): + continue + + plugin_json = os.path.join(skill_path, '.claude-plugin', 'plugin.json') + if not os.path.exists(plugin_json): + continue + + try: + plugin_config = _load_plugin_config(plugin_json) + names = plugin_config.get('mainAgentHiddenTools', []) + if isinstance(names, list): + for name in names: + if isinstance(name, str) and name.strip(): + hidden_tools.add(name.strip()) + else: + logger.warning( + f"Invalid 'mainAgentHiddenTools' in {skill_name}, expected list" + ) + except Exception as e: + logger.error(f"Failed to load mainAgentHiddenTools from {skill_name}: {e}") + + return hidden_tools + + def _normalize_skill_mcp_servers(servers: Dict[str, Any], skill_path: str) -> Dict[str, Any]: """Normalize relative paths in stdio MCP servers to absolute paths based on the skill directory.""" normalized_servers = copy.deepcopy(servers) diff --git a/agent/subagent_context_middleware.py b/agent/subagent_context_middleware.py new file mode 100644 index 0000000..98d1422 --- /dev/null +++ b/agent/subagent_context_middleware.py @@ -0,0 +1,95 @@ +"""Middleware that tags logs with the currently executing subagent name. + +Each subagent receives its own instance of this middleware (carrying its name). +The middleware writes the name into the request-scoped GlobalContext (`g.subagent`) +for the duration of every model call and tool call, so the log Formatter can render +which subagent produced each log line. The previous value is restored afterwards so +that nested/parallel subagents and the main agent are not affected. +""" + +import logging +from typing import Any, Awaitable, Callable + +from langchain.agents.middleware import AgentMiddleware +from langchain.agents.middleware.types import ModelRequest, ModelResponse +from langchain.tools.tool_node import ToolCallRequest + +from utils.log_util.context import g + +logger = logging.getLogger("app") + +# Context key consumed by utils/log_util/logger.py Formatter. +_SUBAGENT_KEY = "subagent" + + +class SubagentContextMiddleware(AgentMiddleware): + """Set `g.subagent` while this subagent's model/tool calls execute.""" + + def __init__(self, subagent_name: str) -> None: + super().__init__() + self._subagent_name = subagent_name + + def _enter(self) -> dict: + # Shallow-copy the whole context dict and rebind a PRIVATE copy for this + # context. This is load-bearing: GlobalContext mutates a shared dict in + # place, and asyncio task copies share that reference, so a plain + # `g.subagent = name` would leak across parallel sibling subagents and + # race on restore. Replacing the reference isolates each context. + try: + prev = dict(g.get_context()) + except LookupError: + prev = {} + new_ctx = dict(prev) + new_ctx[_SUBAGENT_KEY] = self._subagent_name + g.update_context(new_ctx) + return prev + + def _exit(self, prev: dict) -> None: + # Restore by rebinding the previous dict (also a private copy). + g.update_context(dict(prev)) + + # ----- model call ----- + def wrap_model_call( + self, + request: ModelRequest, + handler: Callable[[ModelRequest], ModelResponse], + ) -> ModelResponse: + prev = self._enter() + try: + return handler(request) + finally: + self._exit(prev) + + async def awrap_model_call( + self, + request: ModelRequest, + handler: Callable[[ModelRequest], Awaitable[ModelResponse]], + ) -> ModelResponse: + prev = self._enter() + try: + return await handler(request) + finally: + self._exit(prev) + + # ----- tool call ----- + def wrap_tool_call( + self, + request: ToolCallRequest, + handler: Callable[[ToolCallRequest], Any], + ) -> Any: + prev = self._enter() + try: + return handler(request) + finally: + self._exit(prev) + + async def awrap_tool_call( + self, + request: ToolCallRequest, + handler: Callable[[ToolCallRequest], Awaitable[Any]], + ) -> Any: + prev = self._enter() + try: + return await handler(request) + finally: + self._exit(prev) diff --git a/agent/subagent_loader.py b/agent/subagent_loader.py index ed18fb9..f27d051 100644 --- a/agent/subagent_loader.py +++ b/agent/subagent_loader.py @@ -25,6 +25,7 @@ from langchain.tools import BaseTool from langchain_core.language_models import BaseChatModel from agent.plugin_hook_loader import _get_skill_dirs +from agent.subagent_context_middleware import SubagentContextMiddleware logger = logging.getLogger('app') @@ -181,6 +182,8 @@ async def load_subagents( "system_prompt": parsed["system_prompt"], "model": model, "tools": filtered_tools, + # Tag this subagent's model/tool logs with its name. + "middleware": [SubagentContextMiddleware(name)], } subagents.append(subagent) logger.info(f"Loaded sub-agent '{name}' with {len(filtered_tools)} tools from {parsed['source']}") diff --git a/skills/developing/pmda-drug-info/.claude-plugin/plugin.json b/skills/developing/pmda-drug-info/.claude-plugin/plugin.json index c709e99..65bb9ff 100644 --- a/skills/developing/pmda-drug-info/.claude-plugin/plugin.json +++ b/skills/developing/pmda-drug-info/.claude-plugin/plugin.json @@ -1,5 +1,6 @@ { "name": "pmda-drug-info", + "version": "0.1.0", "description": "PMDA drug information tools for Japanese pharmaceutical package insert queries. Provides drug search, master info, interactions, restrictions, dosing, and full-text chapter retrieval via PostgreSQL + OpenSearch.", "hooks": { "PrePrompt": [ @@ -9,14 +10,34 @@ } ] }, + "mainAgentHiddenTools": [ + "search_drugs", + "list_categories", + "list_drugs_in_category", + "get_drug_master", + "get_drug_interactions", + "get_drug_restrictions", + "get_drug_dosing", + "search_section_text", + "list_drug_chapters", + "read_drug_chapter" + ], "mcpServers": { "pmda_drug_info": { "transport": "stdio", - "command": "python", + "command": "python3", "args": [ "./pmda_server.py" - ] + ], + "env": { + "PMDA_PG_HOST": "postgres-db", + "PMDA_PG_PORT": "5432", + "PMDA_PG_DB": "gptbase", + "PMDA_PG_USER": "postgres", + "PMDA_PG_PASSWORD": "yRhnjSnhufuxNcCxFtPctXnTbAKS2jT2", + "PMDA_OPENSEARCH_URL": "http://admin:admin@opensearch-node:9200", + "PMDA_OS_INDEX": "pmda_sections" + } } - }, - "category": "Developer Tools" + } } diff --git a/skills/developing/pmda-drug-info/README.md b/skills/developing/pmda-drug-info/README.md new file mode 100644 index 0000000..94fcd2a --- /dev/null +++ b/skills/developing/pmda-drug-info/README.md @@ -0,0 +1,76 @@ +# pmda-drug-info — Claude Code MCP plugin + +PMDA 添付文書ベース医薬指導 Q&A の MCP plugin(hu-sandbox/pmda v2e 98/100 baseline)。 + +## アーキテクチャ + +``` +Claude Code (agent) → MCP stdio → pmda_server.py + ├─ PG queries (drug_master / interaction / restriction / dosing) + └─ OS search (pmda_sections, sudachi C-mode) +``` + +10 tools: `search_drugs`, `list_categories`, `list_drugs_in_category`, +`get_drug_master`, `get_drug_interactions`, `get_drug_restrictions`, +`get_drug_dosing`, `search_section_text`, `list_drug_chapters`, +`read_drug_chapter`. + +4 sub-agents(`agents/*.md`): `single_drug`, `interaction`, +`patient_specific`, `adverse_event`. + +## Plugin は独立配布 + +`mygpt.*` への依存なし。PG/OS への接続情報を環境変数で渡すだけで動く。 +`queries.py` / `db.py` / `os_client.py` / `taxonomy.py` / `drug_category.md` +は hu-sandbox/pmda からコピーした自己完結セット。 + +## 環境変数 + +``` +PMDA_PG_HOST Postgres ホスト (例: tunnel / pg.example.com) +PMDA_PG_PORT Postgres ポート (default 5432) +PMDA_PG_DB Postgres DB (例: gptbase) +PMDA_PG_USER Postgres ユーザ +PMDA_PG_PASSWORD Postgres パスワード + +PMDA_OPENSEARCH_URL OpenSearch URL (例: http://admin:admin@tunnel:9200) +PMDA_OS_INDEX OS index 名(default: pmda_sections) +``` + +## インストール + +```bash +pip install -r requirements.txt +``` + +Claude Code: + +```bash +# Plugin ディレクトリを Claude Code に登録(PROJECT 単位で) +/plugin install path/to/pmda-drug-info +``` + +`.claude-plugin/plugin.json` の `mcpServers.pmda_drug_info` が +`python ./pmda_server.py` を stdio MCP サーバとして起動する。 + +## データ準備 + +Plugin は読み取り専用。データ投入は gbase-onprem の folder-connector ++ PmdaXmlPipeline(`mygpt/plugins/pmda/pipeline.py`)が一括管理する。 + +- PG: aerich migration `migrations/models/263_*_add_pmda_tables.py` で 4 表作成 +- OS: `pmda_sections` index は `mygpt/plugins/pmda/os_index.py` の DDL を + pipeline 初期化時に適用 +- データ投入: folder-connector で PMDA XML を登録すると 9 ステップ + pipeline が OS bulk index + PG fact 抽出を実行 + +詳細は `docs/pmda-sync-flow.md` を参照。 + +## 動作確認 + +```bash +# stdio MCP リクエストを手動で投げる +echo '{"jsonrpc":"2.0","id":1,"method":"initialize"}' | python pmda_server.py +echo '{"jsonrpc":"2.0","id":2,"method":"tools/list"}' | python pmda_server.py +echo '{"jsonrpc":"2.0","id":3,"method":"tools/call","params":{"name":"search_drugs","arguments":{"query":"ロサルタン"}}}' | python pmda_server.py +``` diff --git a/skills/developing/pmda-drug-info/agents/adverse-event.md b/skills/developing/pmda-drug-info/agents/adverse-event.md index f4be105..f7ef51a 100644 --- a/skills/developing/pmda-drug-info/agents/adverse-event.md +++ b/skills/developing/pmda-drug-info/agents/adverse-event.md @@ -6,26 +6,31 @@ description: Reverse lookup drugs by adverse event name. Find which drugs have r tools: search_section_text, search_drugs, get_drug_master, list_drug_chapters, read_drug_chapter --- -あなたは「副作用 → 該当薬剤の逆引き」専門の sub-agent です。 +You are a sub-agent specialized in reverse lookup from an adverse event to the drugs that report it. -【ツール戦略】 -1. `search_section_text(keyword=副作用名, section_filter="副作用")` で逆引き。 - total_drugs は必ず本文中に明示する。 -2. 同義語が必要なケース: +## Tool Strategy +1. Reverse-lookup with `search_section_text(keyword=, section_filter="副作用")`. Always state `total_drugs` explicitly in the answer. +2. Synonyms are handled automatically — OpenSearch's synonym filter expands them in a single search, e.g.: "Stevens-Johnson" ⇔ "皮膚粘膜眼症候群" / "SJS" "QT延長" ⇔ "Torsades de pointes" "間質性肺炎" ⇔ "肺臓炎" - OS の synonym filter が自動展開するので 1 回の検索で OK。 -3. hit から代表薬を 3〜5 件選び、`read_drug_chapter` で 11.1 重大な副作用 / 11.2 その他の副作用 - verbatim を引用。 -4. 因果推論("この薬がこの患者の症状を起こした")は **絶対しない**。 - 情報提示のみ。 +3. From the hits, pick 3–5 representative drugs and quote "11.1 重大な副作用" / "11.2 その他の副作用" verbatim with `read_drug_chapter`. +4. NEVER make causal inferences (e.g. "this drug caused this patient's symptom"). Information presentation only. -【絶対ルール】 -1. ツール呼び出し必須。トレーニング知識・教科書・ガイドラインからの推測は禁止。 -2. 数値・固有名・条件は本文表現を改変せず逐語引用。 -3. 出典は **必ず** `[出典: <販売名> (yj_full=) / <章番号 章タイトル>]` の形式。 - - fact 表 row には `_citation` フィールドが入っているので **そのまま転記**。 - - `[出典: 薬品マスター]` `[出典: 添付文書]` 等の汎用出典は **絶対禁止**。 - - read_drug_chapter で実際に読んだ section 以外の出典を捏造しない。 -4. 該当情報が無ければ "添付文書からは確認できません" と書く。 +## Absolute Rules +1. Tool calls are mandatory. Never infer from training knowledge, textbooks, or guidelines. +2. Quote numbers, proper nouns, and conditions verbatim from the source text — do not paraphrase. +3. Text citation is required, in exactly this format: `[出典: <販売名> (yj_full=) / <章番号 章タイトル>]`. + - Fact-table rows include a `_citation` field — copy it verbatim. + - Generic citations such as `[出典: 薬品マスター]` or `[出典: 添付文書]` are PROHIBITED. + - Never fabricate a citation for a section you did not actually read via `read_drug_chapter`. +4. If the information cannot be found, write "添付文書からは確認できません". + +## Citation Requirements (clickable `` tags) +Every tool result record ALSO ends with a `CITATION:` line — a pre-built `` clickable tag that the frontend PDF-highlight pipeline depends on. Your FINAL answer (the text returned to the main agent) MUST include these tags, in addition to the `[出典: ...]` text — otherwise the citation is not clickable and the tag is lost. +- Copy the record's `CITATION:` line VERBATIM (byte-for-byte) immediately after the fact-grounded paragraph or bullet. NEVER collect tags at the end of the answer. +- Do NOT add, modify, reorder, or remove any attribute. Do NOT construct a `` tag yourself. +- At most one `` per unique file. +- `read_drug_chapter` returns the `` already embedded in its header/footer — copy it as-is. +- Records without a `CITATION:` line → emit the `[出典: ...]` text only; never fabricate an empty tag. +- An answer that states facts but contains zero `` tags is a failed answer. diff --git a/skills/developing/pmda-drug-info/agents/interaction.md b/skills/developing/pmda-drug-info/agents/interaction.md index b29e068..e2dbbfb 100644 --- a/skills/developing/pmda-drug-info/agents/interaction.md +++ b/skills/developing/pmda-drug-info/agents/interaction.md @@ -5,24 +5,31 @@ description: Investigate drug-drug interactions between two drugs, or list all i tools: search_drugs, get_drug_master, get_drug_interactions, search_section_text, list_drug_chapters, read_drug_chapter --- -あなたは「薬剤間相互作用」専門の sub-agent です。 +You are a sub-agent specialized in drug-drug interactions. -【ツール戦略】 -- A・B 両薬の yj_code を `search_drugs` で取得。 -- `get_drug_interactions(drug_a_yj=A, drug_b_yj=B)` で双方向検索(A→B も B→A も拾える)。 -- ヒットしたら drug_a の側の出典 section(10.1 / 10.2)を `list_drug_chapters` + `read_drug_chapter` で - verbatim 取得。drug_b 側にも該当記載があるか確認。 -- ヒットゼロ → "添付文書上は併用禁忌・併用注意の明確な記載なし" と書く(自由記述/警告等は - 別途 `search_section_text(keyword=B薬名, section_filter="相互作用")` で念押し)。 -- 1 薬名のみ与えられた場合は `get_drug_interactions(drug_a_yj=...)` で全相互作用一覧。 +## Tool Strategy +- Get the yj_code of both drugs A and B with `search_drugs`. +- Search both directions with `get_drug_interactions(drug_a_yj=A, drug_b_yj=B)` (catches A→B and B→A). +- On a hit, retrieve the citing section on drug A's side (10.1 / 10.2) verbatim with `list_drug_chapters` + `read_drug_chapter`. Also check whether drug B's side carries a matching statement. +- On zero hits, write "添付文書上は併用禁忌・併用注意の明確な記載なし" (free-text warnings can be double-checked separately with `search_section_text(keyword=, section_filter="相互作用")`). +- If only one drug name is given, list all interactions with `get_drug_interactions(drug_a_yj=...)`. -severity は本文の "併用禁忌" / "併用注意" の語をそのまま転記。 +Copy the `severity` field verbatim using the source wording "併用禁忌" / "併用注意". -【絶対ルール】 -1. ツール呼び出し必須。トレーニング知識・教科書・ガイドラインからの推測は禁止。 -2. 数値・固有名・条件は本文表現を改変せず逐語引用。 -3. 出典は **必ず** `[出典: <販売名> (yj_full=) / <章番号 章タイトル>]` の形式。 - - fact 表 row には `_citation` フィールドが入っているので **そのまま転記**。 - - `[出典: 薬品マスター]` `[出典: 添付文書]` 等の汎用出典は **絶対禁止**。 - - read_drug_chapter で実際に読んだ section 以外の出典を捏造しない。 -4. 該当情報が無ければ "添付文書からは確認できません" と書く。 +## Absolute Rules +1. Tool calls are mandatory. Never infer from training knowledge, textbooks, or guidelines. +2. Quote numbers, proper nouns, and conditions verbatim from the source text — do not paraphrase. +3. Text citation is required, in exactly this format: `[出典: <販売名> (yj_full=) / <章番号 章タイトル>]`. + - Fact-table rows include a `_citation` field — copy it verbatim. + - Generic citations such as `[出典: 薬品マスター]` or `[出典: 添付文書]` are PROHIBITED. + - Never fabricate a citation for a section you did not actually read via `read_drug_chapter`. +4. If the information cannot be found, write "添付文書からは確認できません". + +## Citation Requirements (clickable `` tags) +Every tool result record ALSO ends with a `CITATION:` line — a pre-built `` clickable tag that the frontend PDF-highlight pipeline depends on. Your FINAL answer (the text returned to the main agent) MUST include these tags, in addition to the `[出典: ...]` text — otherwise the citation is not clickable and the tag is lost. +- Copy the record's `CITATION:` line VERBATIM (byte-for-byte) immediately after the fact-grounded paragraph or bullet. NEVER collect tags at the end of the answer. +- Do NOT add, modify, reorder, or remove any attribute. Do NOT construct a `` tag yourself. +- At most one `` per unique file. +- `read_drug_chapter` returns the `` already embedded in its header/footer — copy it as-is. +- Records without a `CITATION:` line → emit the `[出典: ...]` text only; never fabricate an empty tag. +- An answer that states facts but contains zero `` tags is a failed answer. diff --git a/skills/developing/pmda-drug-info/agents/patient-specific.md b/skills/developing/pmda-drug-info/agents/patient-specific.md index 49f5053..42de12b 100644 --- a/skills/developing/pmda-drug-info/agents/patient-specific.md +++ b/skills/developing/pmda-drug-info/agents/patient-specific.md @@ -5,28 +5,36 @@ description: Determine drug administration feasibility and dosage adjustment for tools: search_drugs, get_drug_master, get_drug_restrictions, get_drug_dosing, list_drug_chapters, read_drug_chapter --- -あなたは「特定患者への投与可否・用量調整」専門の sub-agent です。 +You are a sub-agent specialized in administration feasibility and dosage adjustment for specific patients. -【ツール戦略】 -1. 薬名から yj_code を `search_drugs` で取得。 -2. 患者条件を condition_type に対応付け: - - 腎機能 (eGFR/CrCl) → "腎機能障害" - - 肝機能 (Child-Pugh) → "肝機能障害" - - 妊娠/授乳 → "妊婦"/"授乳婦" - - 年齢 (小児/高齢) → "小児等"/"高齢者" - - アレルギー既往 → "過敏症" - - 合併症 (糖尿病/喘息など) → "疾患" -3. `get_drug_restrictions(drug_yj=..., condition_type=...)` で該当 restriction を取得。 - condition_params の数値(例: {"eGFR_max": 30})を必ず確認。 -4. `get_drug_dosing(drug_yj=..., patient_segment=...)` で患者層別用量を取得。 -5. 必要なら原文 `read_drug_chapter` で 9.x 章 verbatim 引用。 -6. 数値判定(例: eGFR=25 ⇔ eGFR_max=30 → 該当)を agent が責任もって行う。 +## Tool Strategy +1. Get the yj_code from the drug name with `search_drugs`. +2. Map the patient condition to a `condition_type`: + - Renal function (eGFR/CrCl) → "腎機能障害" + - Hepatic function (Child-Pugh) → "肝機能障害" + - Pregnancy / lactation → "妊婦" / "授乳婦" + - Age (pediatric / elderly) → "小児等" / "高齢者" + - Allergy history → "過敏症" + - Comorbidity (diabetes, asthma, etc.) → "疾患" +3. Get the matching restriction with `get_drug_restrictions(drug_yj=..., condition_type=...)`. Always check the `condition_params` values (e.g. `{"eGFR_max": 30}`). +4. Get patient-segment dosing with `get_drug_dosing(drug_yj=..., patient_segment=...)`. +5. When needed, quote the 9.x chapter verbatim via `read_drug_chapter`. +6. The agent is responsible for the numeric judgment (e.g. eGFR=25 vs eGFR_max=30 → applies). -【絶対ルール】 -1. ツール呼び出し必須。トレーニング知識・教科書・ガイドラインからの推測は禁止。 -2. 数値・固有名・条件は本文表現を改変せず逐語引用。 -3. 出典は **必ず** `[出典: <販売名> (yj_full=) / <章番号 章タイトル>]` の形式。 - - fact 表 row には `_citation` フィールドが入っているので **そのまま転記**。 - - `[出典: 薬品マスター]` `[出典: 添付文書]` 等の汎用出典は **絶対禁止**。 - - read_drug_chapter で実際に読んだ section 以外の出典を捏造しない。 -4. 該当情報が無ければ "添付文書からは確認できません" と書く。 +## Absolute Rules +1. Tool calls are mandatory. Never infer from training knowledge, textbooks, or guidelines. +2. Quote numbers, proper nouns, and conditions verbatim from the source text — do not paraphrase. +3. Text citation is required, in exactly this format: `[出典: <販売名> (yj_full=) / <章番号 章タイトル>]`. + - Fact-table rows include a `_citation` field — copy it verbatim. + - Generic citations such as `[出典: 薬品マスター]` or `[出典: 添付文書]` are PROHIBITED. + - Never fabricate a citation for a section you did not actually read via `read_drug_chapter`. +4. If the information cannot be found, write "添付文書からは確認できません". + +## Citation Requirements (clickable `` tags) +Every tool result record ALSO ends with a `CITATION:` line — a pre-built `` clickable tag that the frontend PDF-highlight pipeline depends on. Your FINAL answer (the text returned to the main agent) MUST include these tags, in addition to the `[出典: ...]` text — otherwise the citation is not clickable and the tag is lost. +- Copy the record's `CITATION:` line VERBATIM (byte-for-byte) immediately after the fact-grounded paragraph or bullet. NEVER collect tags at the end of the answer. +- Do NOT add, modify, reorder, or remove any attribute. Do NOT construct a `` tag yourself. +- At most one `` per unique file. +- `read_drug_chapter` returns the `` already embedded in its header/footer — copy it as-is. +- Records without a `CITATION:` line → emit the `[出典: ...]` text only; never fabricate an empty tag. +- An answer that states facts but contains zero `` tags is a failed answer. diff --git a/skills/developing/pmda-drug-info/agents/single-drug.md b/skills/developing/pmda-drug-info/agents/single-drug.md index e5340a9..e6eb391 100644 --- a/skills/developing/pmda-drug-info/agents/single-drug.md +++ b/skills/developing/pmda-drug-info/agents/single-drug.md @@ -5,22 +5,30 @@ description: Answer factual questions about a single drug (brand name, generic n tools: search_drugs, get_drug_master, get_drug_dosing, get_drug_restrictions, list_drug_chapters, read_drug_chapter --- -あなたは「単一薬の事実回答」専門の sub-agent です。 +You are a sub-agent specialized in factual answers about a single drug. -【ツール戦略】 -1. 質問から薬名/yj_code を特定 → `search_drugs` または直接 yj_code が分かれば次へ。 -2. `get_drug_master(yj_code)` で基本情報(販売名・一般名・薬効分類・規制)を確定。 -3. 必要に応じて `get_drug_dosing` で用法用量、`get_drug_restrictions(drug_yj=...)` で禁忌・特定患者注意。 -4. 自由記述や上記テーブルに無い情報(例: 重大な副作用一覧、薬物動態の数値)は - `list_drug_chapters(yj_full)` → `read_drug_chapter(yj_full, section_title)` で原文取得。 +## Tool Strategy +1. Identify the drug name / yj_code from the question → use `search_drugs`, or go straight ahead if the yj_code is already known. +2. Confirm basic info (brand name, generic name, pharmacological category, regulation) with `get_drug_master(yj_code)`. +3. As needed, use `get_drug_dosing` for dosing and `get_drug_restrictions(drug_yj=...)` for contraindications / patient-specific precautions. +4. For free-text details not in the fact tables (e.g. the full list of serious adverse reactions, pharmacokinetic values), retrieve the source text with `list_drug_chapters(yj_full)` → `read_drug_chapter(yj_full, section_title)`. -最終回答は箇条書き or 表で、各事実に出典を付ける。 +Present the final answer as bullets or a table, attaching a citation to every fact. -【絶対ルール】 -1. ツール呼び出し必須。トレーニング知識・教科書・ガイドラインからの推測は禁止。 -2. 数値・固有名・条件は本文表現を改変せず逐語引用。 -3. 出典は **必ず** `[出典: <販売名> (yj_full=) / <章番号 章タイトル>]` の形式。 - - fact 表 row には `_citation` フィールドが入っているので **そのまま転記**。 - - `[出典: 薬品マスター]` `[出典: 添付文書]` 等の汎用出典は **絶対禁止**。 - - read_drug_chapter で実際に読んだ section 以外の出典を捏造しない。 -4. 該当情報が無ければ "添付文書からは確認できません" と書く。 +## Absolute Rules +1. Tool calls are mandatory. Never infer from training knowledge, textbooks, or guidelines. +2. Quote numbers, proper nouns, and conditions verbatim from the source text — do not paraphrase. +3. Text citation is required, in exactly this format: `[出典: <販売名> (yj_full=) / <章番号 章タイトル>]`. + - Fact-table rows include a `_citation` field — copy it verbatim. + - Generic citations such as `[出典: 薬品マスター]` or `[出典: 添付文書]` are PROHIBITED. + - Never fabricate a citation for a section you did not actually read via `read_drug_chapter`. +4. If the information cannot be found, write "添付文書からは確認できません". + +## Citation Requirements (clickable `` tags) +Every tool result record ALSO ends with a `CITATION:` line — a pre-built `` clickable tag that the frontend PDF-highlight pipeline depends on. Your FINAL answer (the text returned to the main agent) MUST include these tags, in addition to the `[出典: ...]` text — otherwise the citation is not clickable and the tag is lost. +- Copy the record's `CITATION:` line VERBATIM (byte-for-byte) immediately after the fact-grounded paragraph or bullet. NEVER collect tags at the end of the answer. +- Do NOT add, modify, reorder, or remove any attribute. Do NOT construct a `` tag yourself. +- At most one `` per unique file. +- `read_drug_chapter` returns the `` already embedded in its header/footer — copy it as-is. +- Records without a `CITATION:` line → emit the `[出典: ...]` text only; never fabricate an empty tag. +- An answer that states facts but contains zero `` tags is a failed answer. diff --git a/skills/developing/pmda-drug-info/db.py b/skills/developing/pmda-drug-info/db.py new file mode 100644 index 0000000..0ecc0d1 --- /dev/null +++ b/skills/developing/pmda-drug-info/db.py @@ -0,0 +1,80 @@ +"""Postgres 连接 helper。 + +配置全部走环境变量,默认指向 docker-compose 起的本地实例: + + PMDA_PG_HOST (默认 localhost) + PMDA_PG_PORT (默认 5432) + PMDA_PG_DB (默认 pmda) + PMDA_PG_USER (默认 pmda) + PMDA_PG_PASSWORD (默认 pmda_local_dev — 仅本地开发,生产由 secret 注入) + +`connect()` 返回 psycopg3 connection(autocommit=False)。 +长时跑批时使用 `pool()` 取 ConnectionPool。 +""" +from __future__ import annotations + +import os +from contextlib import contextmanager +from typing import Iterator + +import psycopg +from psycopg import Connection +from psycopg_pool import ConnectionPool + +PG_HOST = os.environ.get("PMDA_PG_HOST", "localhost") +PG_PORT = int(os.environ.get("PMDA_PG_PORT", "5432")) +PG_DB = os.environ.get("PMDA_PG_DB", "pmda") +PG_USER = os.environ.get("PMDA_PG_USER", "pmda") +PG_PASSWORD = os.environ.get("PMDA_PG_PASSWORD", "pmda_local_dev") + + +def conninfo() -> str: + return ( + f"host={PG_HOST} port={PG_PORT} dbname={PG_DB} " + f"user={PG_USER} password={PG_PASSWORD}" + ) + + +def connect(*, autocommit: bool = False) -> Connection: + """Open a single connection. Caller is responsible for closing.""" + return psycopg.connect(conninfo(), autocommit=autocommit) + + +@contextmanager +def session(*, autocommit: bool = False) -> Iterator[Connection]: + """`with session() as conn:` — auto close on exit.""" + conn = connect(autocommit=autocommit) + try: + yield conn + if not autocommit: + conn.commit() + except Exception: + if not autocommit: + conn.rollback() + raise + finally: + conn.close() + + +_pool: ConnectionPool | None = None + + +def pool(min_size: int = 1, max_size: int = 8) -> ConnectionPool: + """Lazy-init module-level pool. Use for batch / agent-loop hot path.""" + global _pool + if _pool is None: + _pool = ConnectionPool( + conninfo(), + min_size=min_size, + max_size=max_size, + kwargs={"autocommit": False}, + open=True, + ) + return _pool + + +def close_pool() -> None: + global _pool + if _pool is not None: + _pool.close() + _pool = None diff --git a/skills/developing/pmda-drug-info/drug_category.md b/skills/developing/pmda-drug-info/drug_category.md new file mode 100644 index 0000000..85e6167 --- /dev/null +++ b/skills/developing/pmda-drug-info/drug_category.md @@ -0,0 +1,206 @@ + +- 11 中枢神経系用薬 + - 111 全身麻酔剤 + - 112 催眠鎮静剤,抗不安剤 + - 113 抗てんかん剤 + - 114 解熱鎮痛消炎剤 + - 115 興奮剤,覚醒剤 + - 116 抗パーキンソン剤 + - 117 精神神経用剤 + - 118 総合感冒剤 + - 119 その他の中枢神経系用薬 +- 12 末梢神経用薬 + - 121 局所麻酔剤 + - 122 骨格筋弛緩剤 + - 123 自律神経剤 + - 124 鎮けい剤 + - 125 発汗剤,止汗剤 + - 129 その他の末梢神経系用薬 +- 13 感覚器用薬 + - 131 眼科用剤 + - 132 耳鼻科用剤 + - 133 鎮暈剤 + - 139 その他の感覚器官用薬 +- 19 その他の神経系及び感覚器官用医薬品 +- 21 循環器官用薬 + - 211 強心剤 + - 212 不整脈用剤 + - 213 利尿剤 + - 214 血圧降下剤 + - 215 血管補強剤 + - 216 血管収縮剤 + - 217 血管拡張剤 + - 218 高脂血症用剤 + - 219 その他の循環器官用薬 +- 22 呼吸器官用薬 + - 221 呼吸促進剤 + - 222 鎮咳剤 + - 223 去たん剤 + - 224 鎮咳去たん剤 + - 225 気管支拡張剤 + - 226 含嗽剤 + - 229 その他の呼吸器官用薬 +- 23 消化器官用薬 + - 231 止しゃ剤,整腸剤 + - 232 消化性潰瘍用剤 + - 233 健胃消化剤 + - 234 制酸剤 + - 235 下剤,浣腸剤 + - 236 利胆剤 + - 237 複合胃腸剤 + - 239 その他の消化器官用薬 +- 24 ホルモン剤(抗ホルモン剤を含む) + - 241 脳下垂体ホルモン剤 + - 242 唾液腺ホルモン剤 + - 243 甲状腺,副甲状腺ホルモン剤 + - 244 たん白同化ステロイド剤 + - 245 副腎ホルモン剤 + - 246 男性ホルモン剤 + - 247 卵胞ホルモン及び黄体ホルモン剤 + - 248 混合ホルモン剤 + - 249 その他のホルモン剤(抗ホルモン剤を含む) +- 25 泌尿生殖器官及び肛門用薬 + - 251 泌尿器官用剤 + - 252 生殖器官用剤(性病予防剤を含む。) + - 253 子宮収縮剤 + - 254 避妊剤 + - 255 痔疾用剤 + - 259 その他の泌尿生殖器官及び肛門用薬 +- 26 外皮用薬 + - 261 外皮用殺菌消毒剤 + - 262 創傷保護剤 + - 263 化膿性疾患用剤 + - 264 鎮痛,鎮痒,収歛,消炎剤 + - 265 寄生性皮ふ疾患用剤 + - 266 皮ふ軟化剤(腐しょく剤を含む。) + - 267 毛髪用剤(発毛剤,脱毛剤,染毛剤,養毛剤 + - 268 浴剤 + - 269 その他の外皮用薬 +- 27 歯科口腔用薬 + - 271 歯科用局所麻酔剤 + - 272 歯髄失活剤 + - 273 歯科用鎮痛鎮静剤(根管及び齲窩消毒剤を含 + - 274 歯髄乾屍剤(根管充填剤を含む。) + - 275 歯髄覆たく剤 + - 276 歯科用抗生物質製剤 + - 279 その他の歯科口腔用薬 +- 29 その他の個々の器官系用医薬品 + - 290 その他の個々の器官系用医薬品 +- 31 ビタミン剤 + - 311 ビタミンA及びD剤 + - 312 ビタミンB1剤 + - 313 ビタミンB剤(ビタミンB1剤を除く。) + - 314 ビタミンC剤 + - 315 ビタミンE剤 + - 316 ビタミンK剤 + - 317 混合ビタミン剤(ビタミンA・D混合製剤を除く) + - 319 その他のビタミン剤 +- 32 滋養強壮薬 + - 321 カルシウム剤 + - 322 無機質製剤 + - 323 糖類剤 + - 324 有機酸製剤 + - 325 たん白アミノ酸製剤 + - 326 臓器製剤 + - 327 乳幼児用剤 + - 329 その他の滋養強壮薬 +- 33 血液・体液用薬 + - 331 血液代用剤 + - 332 止血剤 + - 333 血液凝固阻止剤 + - 339 その他の血液・体液用薬 +- 34 人工透析用薬 + - 341 人工腎臓透析用剤 + - 342 腹膜透析用剤 + - 349 その他の人工透析用薬 +- 39 その他の代謝性医薬品 + - 391 肝臓疾患用剤 + - 392 解毒剤 + - 393 習慣性中毒用剤 + - 394 痛風治療剤 + - 395 酵素製剤 + - 396 糖尿病用剤 + - 397 総合代謝性製剤 + - 399 他に分類されない代謝性医薬品 +- 41 細胞賦活用薬 + - 411 クロロフィル製剤 + - 412 色素製剤 + - 419 その他の細胞賦活用薬 +- 42 腫瘍用薬 + - 421 アルキル化剤 + - 422 代謝拮抗剤 + - 423 抗腫瘍性抗生物質製剤 + - 424 抗腫瘍性植物成分製剤 + - 429 その他の腫瘍用薬 +- 43 放射性医薬品 + - 430 放射性医薬品 +- 44 アレルギー用薬 + - 441 抗ヒスタミン剤 + - 442 刺激療法剤 + - 443 非特異性免疫原製剤 + - 449 その他のアレルギー用薬 +- 49 その他の組織細胞機能用医薬品 + - 490 その他の組織細胞機能用医薬品 +- 51 生薬 + - 510 生薬 +- 52 漢方製剤 + - 520 漢方製剤 +- 59 その他の生薬及び漢方処方に基づく医薬品 + - 590 その他の生薬及び漢方処方に基づく医薬品 +- 61 抗生物質製剤 + - 611 主としてグラム陽性菌に作用するもの + - 612 主としてグラム陰性菌に作用するもの + - 613 主としてグラム陽性・陰性菌に作用するもの + - 614 主としてグラム陽性菌,マイコプラズマに作用するもの + - 615 主としてグラム陽性・陰性菌,リケッチア,クラミジアに作用するもの + - 616 主として抗酸菌に作用するもの + - 617 主としてカビに作用するもの + - 619 その他の抗生物質製剤(複合抗生物質製剤を含む) +- 62 化学療法剤 + - 621 サルファ剤 + - 622 抗結核剤 + - 623 抗ハンセン病剤 + - 624 合成抗菌剤 + - 625 抗ウイルス剤 + - 629 その他の化学療法剤 +- 63 生物学的製剤 + - 631 ワクチン類 + - 632 毒素及びトキソイド類 + - 633 抗毒素類及び抗レプトスピラ血清類 + - 634 血液製剤類 + - 635 生物学的試験用製剤類 + - 636 混合生物学的製剤 + - 639 その他の生物学的製剤 +- 64 寄生動物用薬 + - 641 抗原虫剤 + - 642 駆虫剤 + - 649 その他の寄生動物用薬 +- 69 その他の病原生物に対する医薬品 + - 690 その他の病原生物に対する医薬品 +- 71 調剤用薬 + - 711 賦形剤 + - 712 軟膏基剤 + - 713 溶解剤 + - 714 矯味,矯臭,着色剤 + - 715 乳化剤 + - 719 その他の調剤用薬 +- 72 診断用薬(体外診断用医薬品を除く) + - 721 X線造影剤 + - 722 機能検査用試薬 + - 729 その他の診断用薬 +- 73 公衆衛生用薬 + - 731 防腐剤 + - 732 防疫用殺菌消毒剤 + - 733 防虫剤 + - 734 殺虫剤 + - 735 殺そ剤 + - 739 その他の公衆衛生用薬 +- 79 その他の治療を主目的としない医薬品 + - 791 ばん創こう + - 799 他に分類されない治療を主目的としない医薬品 +- 81 アルカロイド系麻薬(天然麻薬) + - 811 アヘンアルカロイド系麻薬 + - 812 コカアルカロイド系製剤 + - 819 その他のアルカロイド系麻薬(天然麻薬) +- 82 非アルカロイド系麻薬 + - 821 合成麻薬 diff --git a/skills/developing/pmda-drug-info/hooks/pmda-instructions.md b/skills/developing/pmda-drug-info/hooks/pmda-instructions.md index 0a656be..b23fe4c 100644 --- a/skills/developing/pmda-drug-info/hooks/pmda-instructions.md +++ b/skills/developing/pmda-drug-info/hooks/pmda-instructions.md @@ -2,19 +2,92 @@ You have access to Japanese pharmaceutical package insert (添付文書) data via the following tools. +## Tool output format + +Tools return **plain text**, not JSON. Each result has: +- A `=== CITATION INSTRUCTIONS ===` header (only when the result carries citable sources). +- A `Found N ...:` summary line, then one numbered record block per row. +- Inside each block: indented `label: value` fields, an optional `出典: [...]` line, and a + `CITATION: ` line (the pre-built clickable tag). + +When a query matches nothing, the tool instead returns a short English message starting with +`No matching ... were found` and **no** citation instructions. In that case tell the user no +relevant material was retrieved — **do NOT** invent or emit any `` tag. + ## Core Rules - **Tool calls are mandatory.** Never answer from training knowledge alone. All facts must come from tool results. -- Cite sources in the format: `[出典: <販売名> (yj_full=) / <章番号 章タイトル>]` -- Fact table rows include a `_citation` field — use it directly. +- Cite sources in the format: `[出典: <販売名> (yj_full=) / <章番号 章タイトル>]` — taken from each record's `出典:` line. - Generic citations like `[出典: 薬品マスター]` or `[出典: 添付文書]` are **prohibited**. - For urgent questions (suicide/drug abuse/severe acute symptoms), state: "緊急対応として担当医・薬剤師に直接相談してください" +## Clickable Citation ( tag) — MUST copy the record's `CITATION:` line + +After each fact-grounded paragraph or bullet list, copy that record's **`CITATION:` line VERBATIM**. Do NOT construct the tag yourself. + +### Why verbatim copy + +The tool already built the full `` string for you on the `CITATION:` line. It contains: +- Generic CITATION core attributes (`file`, `filename`, `page`) for the existing PDF highlight pipeline. +- PMDA-specific attributes (`yj_full`, `brand`, `section`) for richer frontend display. + +If you rebuild it yourself, you risk hallucinating `file=` filenames or dropping attributes. **Just copy the `CITATION:` line byte-for-byte** (drop the leading `CITATION: ` label, keep the `` tag). + +### Rules + +- Each record's `CITATION:` line is the complete `` string. +- **Emit it exactly as-is. Do not modify, paraphrase, summarize, reorder, add, or remove any character.** +- Do NOT assemble a tag from the `出典:` text or other fields — they are for reference only. +- If a record has **no** `CITATION:` line, emit only the `[出典: ...]` text — never invent any CITATION attributes. + +### Multiple citations within the same paragraph + +- Each fact record gets its own `` tag — emit the `CITATION:` line from that record. +- Within the same paragraph, if the same `(file, section)` pair would repeat — emit it only once. +- Same drug × different sections: one tag per section, back-to-back. +- Different drugs: each tag stands alone. + +### Example (LLM-side view) + +Tool returns (plain text): +``` +[1] 〔東洋〕半夏厚朴湯エキス細粒 + generic: 半夏厚朴湯 + yj_full: 1399999X9999_1_01 + 出典: [出典: 〔東洋〕半夏厚朴湯エキス細粒 (yj_full=1399999X9999_1_01) / 6. 用法及び用量] + CITATION: +``` + +Your reply (correct): +``` +用法は 1日3回。 + +``` + +Your reply (WRONG — reconstructed by hand): +``` +用法は 1日3回。 + ← hallucinated, missing attributes +``` + +## Citation Requirements + +- You MUST emit a `` tag whenever you use a tool result. Copy the record's `CITATION:` line verbatim — never construct one. +- Place each citation IMMEDIATELY AFTER the paragraph or bullet list that uses the fact. NEVER collect citations at the end of the response. +- At most one tag per unique file. At least one `` is required whenever the answer is grounded in tool results. +- An answer that states tool-grounded facts but contains zero `` tags is a failed answer. + ## When to Use Sub-agents (task tool) - **patient_specific**: Renal/hepatic/pregnancy/elderly/pediatric/allergy conditions × dosing decisions - **interaction**: Pairwise drug interaction investigation - **adverse_event**: Reverse lookup from adverse event name to drugs - **single_drug**: Detailed info not in fact tables (e.g., full adverse event list, pharmacokinetics) +### Sub-agent citation pass-through (CRITICAL) +- A sub-agent's returned text already contains `` tags built from the tools it called. The original tag attributes (`file`/`filename`) only exist inside that returned text — you cannot reconstruct them. +- You MUST preserve every `` tag from the sub-agent output VERBATIM and re-emit it in your final answer, keeping it immediately after the fact it supports. +- NEVER strip, summarize away, paraphrase, or merge these tags when integrating sub-agent results. +- A final answer that relies on sub-agent facts but contains zero `` tags is a failed answer. + ## Direct Tool Usage (do NOT delegate) - Simple lookups → use tools directly - Multi-drug comparisons → call tools sequentially, output as markdown table diff --git a/skills/developing/pmda-drug-info/os_client.py b/skills/developing/pmda-drug-info/os_client.py new file mode 100644 index 0000000..d86ce10 --- /dev/null +++ b/skills/developing/pmda-drug-info/os_client.py @@ -0,0 +1,157 @@ +"""OpenSearch `pmda_sections` index spec + client helper. + +Mapping 与 wiki-skill 的 sudachi 配置共用 plugin(同一 OS 集群、同一 sudachi +core 字典)。每个 doc 对应一份说明书的一个章节节点,冗余存药品 metadata 以避 +免 JOIN(详见 design.md §2.1.2)。 + +环境变量: + OS_HOST (默认 http://localhost:9200,与 wiki-skill `_common.py` 一致) + PMDA_OS_INDEX (默认 pmda_sections) +""" +from __future__ import annotations + +import os + +from opensearchpy import OpenSearch + +# Plugin env vars: PMDA_OPENSEARCH_URL(推奨)/ OPENSEARCH_URL / OPENSEARCH_HOST +OS_HOST = ( + os.environ.get("PMDA_OPENSEARCH_URL") + or os.environ.get("OPENSEARCH_URL") + or os.environ.get("OPENSEARCH_HOST") + or "http://localhost:9200" +) +INDEX_NAME = os.environ.get("PMDA_OS_INDEX", "pmda_sections") + + +# ---- Mapping spec -------------------------------------------------------- + +INDEX_BODY: dict = { + "settings": { + "index": { + "number_of_shards": 1, + "number_of_replicas": 0, + "refresh_interval": "1s", + }, + "analysis": { + "tokenizer": { + "sudachi_tokenizer": { + "type": "sudachi_tokenizer", + "split_mode": "C", + "discard_punctuation": True, + } + }, + "filter": { + # "med_synonyms": { + # "type": "synonym", + # 初期最小集 — 命中错例后扩充。同义词条之间逗号分隔代表 + # 等价、空格视为词内字符。 + # "synonyms": [ + # "Stevens-Johnson, 皮膚粘膜眼症候群, SJS", + # "中毒性表皮壊死融解症, TEN, ライエル症候群", + # "QT延長, トルサード, Torsades de pointes", + # "間質性肺炎, 肺臓炎", + # "横紋筋融解症, ラブドミオリーシス", + # "アナフィラキシー, アナフィラキシーショック", + # "無顆粒球症, 顆粒球減少症", + # ], + # }, + "jp_pos": { + "type": "sudachi_part_of_speech", + }, + "jp_stop": { + "type": "sudachi_ja_stop", + }, + }, + "analyzer": { + "jp_med": { + "type": "custom", + # icu_normalizer はデフォルト image に未含、sudachi_ + # normalizedform で全角半角・正規化はカバーされる。 + "tokenizer": "sudachi_tokenizer", + "filter": [ + "sudachi_baseform", + "sudachi_normalizedform", + "jp_pos", + "jp_stop", + "lowercase", + ], + } + }, + }, + }, + "mappings": { + "properties": { + "yj_full": {"type": "keyword"}, + "yj_code": {"type": "keyword"}, + "l1_code": {"type": "keyword"}, + "l2_code": {"type": "keyword"}, + "l2_name": {"type": "keyword"}, + "category_name": {"type": "keyword"}, + "brand_names": {"type": "keyword"}, + "generic_name": {"type": "keyword"}, + "section_title": { + "type": "text", + "analyzer": "jp_med", + "fields": {"raw": {"type": "keyword"}}, + }, + "line_num": {"type": "integer"}, + "text": {"type": "text", "analyzer": "jp_med"}, + "revision_date": {"type": "date"}, + "_md_sha256": {"type": "keyword"}, + } + }, +} + + +# ---- Client -------------------------------------------------------------- + + +def client() -> OpenSearch: + """Return an OpenSearch client bound to OS_HOST.""" + return OpenSearch(hosts=[OS_HOST], http_compress=True, timeout=60) + + +# ---- 章節アクセス helpers(PageIndex 退役後の verbatim 取得経路) ------- + + +def list_drug_sections(yj_full: str, *, limit: int = 200) -> list[dict]: + """1 薬の全章節を line_num 昇順で返す。 + + 各 element: {section_title, line_num, text_len, brand, generic} + """ + cli = client() + resp = cli.search(index=INDEX_NAME, body={ + "size": min(limit, 500), + "_source": ["section_title", "line_num", "text", "brand_names", "generic_name"], + "query": {"term": {"yj_full": yj_full}}, + "sort": [{"line_num": "asc"}], + }) + out = [] + for h in resp["hits"]["hits"]: + s = h["_source"] + out.append({ + "section_title": s.get("section_title", ""), + "line_num": s.get("line_num"), + "text_len": len(s.get("text", "") or ""), + "brand": (s.get("brand_names") or [""])[0], + "generic": s.get("generic_name") or "", + }) + return out + + +def get_drug_section_text(yj_full: str, section_title: str) -> str: + """指定 (yj_full, section_title) の verbatim 章節 text。見つからなければ ""。""" + cli = client() + resp = cli.search(index=INDEX_NAME, body={ + "size": 1, + "_source": ["text"], + "query": {"bool": {"must": [ + {"term": {"yj_full": yj_full}}, + {"term": {"section_title.raw": section_title}}, + ]}}, + }) + hits = resp["hits"]["hits"] + if not hits: + return "" + return hits[0]["_source"].get("text", "") or "" diff --git a/skills/developing/pmda-drug-info/pmda_server.py b/skills/developing/pmda-drug-info/pmda_server.py index 0255adc..781a22b 100644 --- a/skills/developing/pmda-drug-info/pmda_server.py +++ b/skills/developing/pmda-drug-info/pmda_server.py @@ -1,15 +1,19 @@ #!/usr/bin/env python3 """ -PMDA drug information MCP server (mock data version). +PMDA drug information MCP server — 真实 PG / OS 查询版本(替换原 mock). -Provides drug search, master info, interactions, restrictions, dosing, -and full-text chapter retrieval with mock data for testing. +Plugin 自包含,不依赖 mygpt.* 任何模块。配置通过环境变量: + PMDA_PG_HOST / PMDA_PG_PORT / PMDA_PG_DB / PMDA_PG_USER / PMDA_PG_PASSWORD + PMDA_OPENSEARCH_URL (or OPENSEARCH_URL) / PMDA_OS_INDEX + +参考 hu-sandbox/pmda/agent/tools.py 的 10 个 tool 行为(98/100 v2e baseline). """ import asyncio -import json import sys -from typing import Any, Dict, Optional +from dataclasses import asdict +from decimal import Decimal +from typing import Any, Dict, List, Optional, Sequence, Tuple from mcp_common import ( create_error_response, @@ -20,269 +24,435 @@ from mcp_common import ( handle_mcp_streaming, ) - -def _dump(obj) -> str: - return json.dumps(obj, ensure_ascii=False) +from db import session +from queries import ( + drug_dosing_query, + drug_interaction_query, + drug_master_get, + drug_restriction_query, + list_categories_with_counts, + list_drugs_in_category as _sql_list_drugs_in_category, + search_drugs_in_db, +) +from os_client import client as os_client, INDEX_NAME as OS_INDEX_NAME # --------------------------------------------------------------------------- -# Mock data +# Plain-text rendering (agent-friendly tool output) +# +# 工具结果以纯文本返回(而非 JSON),降低 agent 的解析负担与 token 噪音。 +# CITATION enforcement 仍是工程化保证(不依赖 LLM 自觉): +# 1. `_CITE_INSTRUCTION_TEXT` 注入每个含可引用源的结果头部 (LLM 第一眼) +# 2. 每条记录末尾一行 `CITATION:` 镜像 `_cite._tag` (LLM 直接复制, 不用 traverse) +# 3. `read_drug_chapter` 三明治包装 raw markdown (tag 物理紧贴章节文本) +# 命中 0 条时返回英文 no-results 话术, 且 **不含** CITATION 指令 —— 避免诱导 +# agent 在无来源时编造引用。 # --------------------------------------------------------------------------- -MOCK_DRUG_MASTER = { - "2149039F1082": { - "yj_code": "2149039F1082", - "yj_full": "2149039F1082_1_17", - "brand_name": "ロサルタンK錠50mg「科研」", - "generic_name": "ロサルタンカリウム", - "category_code": "214", - "category_name": "アンジオテンシンII受容体拮抗薬", - "regulation": "劇薬, 処方箋医薬品", - "manufacturer": "科研製薬株式会社", - "revision_date": "2024-06", - }, - "3399007H1021": { - "yj_code": "3399007H1021", - "yj_full": "3399007H1021_1_21", - "brand_name": "バイアスピリン錠100mg", - "generic_name": "アスピリン", - "category_code": "339", - "category_name": "血液・体液用薬", - "regulation": "処方箋医薬品", - "manufacturer": "バイエル薬品株式会社", - "revision_date": "2024-03", - }, - "2179004F1026": { - "yj_code": "2179004F1026", - "yj_full": "2179004F1026_1_14", - "brand_name": "ノルバスク錠5mg", - "generic_name": "アムロジピンベシル酸塩", - "category_code": "217", - "category_name": "カルシウム拮抗薬", - "regulation": "処方箋医薬品", - "manufacturer": "ファイザー株式会社", - "revision_date": "2024-01", - }, -} +_CITE_INSTRUCTION_TEXT = ( + "=== CITATION INSTRUCTIONS ===\n" + "Each record below ends with a `CITATION:` line — a pre-built " + "`` tag the frontend PDF-highlight " + "pipeline depends on. When you use a fact from a record, copy that record's " + "`CITATION:` tag VERBATIM (byte-for-byte) immediately AFTER the paragraph or " + "bullet that states the fact. NEVER collect citations at the end. At most ONE " + "tag per unique file. Do NOT add, modify, reorder, remove attributes, or build " + "a tag yourself. Records without a `CITATION:` line carry no clickable source — " + "do NOT fabricate one. An answer that uses these facts but contains zero " + "`` tags is a FAILED answer.\n" + "==============================" +) -MOCK_CATEGORIES = [ - {"category_code": "214", "category_name": "アンジオテンシンII受容体拮抗薬", "level": "L2", "drug_count": 35}, - {"category_code": "217", "category_name": "カルシウム拮抗薬", "level": "L2", "drug_count": 48}, - {"category_code": "339", "category_name": "血液・体液用薬", "level": "L2", "drug_count": 22}, - {"category_code": "612", "category_name": "消化性潰瘍用剤", "level": "L2", "drug_count": 40}, -] -MOCK_INTERACTIONS = [ - { - "drug_a_yj": "2149039F1082", - "drug_b_yj": "3399007H1021", - "drug_b_class": "アスピリン(抗血小板剤)", - "severity": "併用注意", - "mechanism": "ARBの降圧作用を減弱するおそれがある。また、腎機能低下・高カリウム血症のリスクを増大。", - "clinical_effect": "降圧効果の減弱、腎機能悪化、高カリウム血症に注意。", - "source_drug_yj": "2149039F1082", - "source_section": "10.2 併用注意", - }, - { - "drug_a_yj": "3399007H1021", - "drug_b_yj": "2149039F1082", - "drug_b_class": "ロサルタンカリウム(ARB)", - "severity": "併用注意", - "mechanism": "アスピリンの副作用(消化性潰瘍、腎機能低下)を増強するおそれ。", - "clinical_effect": "消化性潰瘍、腎機能低下に注意。血清カリウム値の上昇に注意。", - "source_drug_yj": "3399007H1021", - "source_section": "10.2 併用注意", - }, -] +def _no_results(what: str) -> str: + """English no-results message — intentionally omits CITATION instructions. -MOCK_RESTRICTIONS = [ - { - "drug_yj": "2149039F1082", - "condition_type": "腎機能障害", - "condition_text": "腎機能障害患者", - "condition_params": {"eGFR_max": 30}, - "severity": "慎重投与", - "source_section": "9.2 腎機能障害患者", - }, - { - "drug_yj": "2149039F1082", - "condition_type": "妊婦", - "condition_text": "妊娠中の女性", - "condition_params": {}, - "severity": "禁忌", - "source_section": "9.5 妊婦", - }, - { - "drug_yj": "2149039F1082", - "condition_type": "高齢者", - "condition_text": "高齢者(65歳以上)", - "condition_params": {}, - "severity": "慎重投与", - "source_section": "9.8 高齢者", - }, - { - "drug_yj": "3399007H1021", - "condition_type": "過敏症", - "condition_text": "本剤の成分に対し過敏症の既往歴のある患者", - "condition_params": {}, - "severity": "禁忌", - "source_section": "2. 禁忌", - }, -] + Returned when a query matches 0 rows, so the agent tells the user nothing was + found instead of being pushed to emit a citation for a non-existent source. + """ + return ( + f"No matching {what} were found in the PMDA package-insert database.\n" + ) -MOCK_DOSING = [ - { - "drug_yj": "2149039F1082", - "patient_segment": "成人", - "segment_params": {}, - "indication_code": "高血圧症", - "dose_amount": "50", - "dose_unit": "mg", - "frequency": "1日1回", - "duration": "", - "adjustment_text": "効果不十分な場合は100mgまで増量可", - "source_section": "6. 用法及び用量", - }, - { - "drug_yj": "2149039F1082", - "patient_segment": "腎機能障害患者", - "segment_params": {"eGFR_max": 30}, - "indication_code": "高血圧症", - "dose_amount": "25", - "dose_unit": "mg", - "frequency": "1日1回", - "duration": "", - "adjustment_text": "eGFR 30以下では用量を減ずること。血清カリウム・クレアチニンの推移に注意。", - "source_section": "9.2 腎機能障害患者", - }, -] -MOCK_CHAPTERS = { - "2149039F1082_1_17": [ - {"section_title": "1. 警告", "line_num": 1, "text_len": 120}, - {"section_title": "2. 禁忌", "line_num": 5, "text_len": 80}, - {"section_title": "4. 効能・効果", "line_num": 12, "text_len": 60}, - {"section_title": "6. 用法及び用量", "line_num": 20, "text_len": 150}, - {"section_title": "9.2 腎機能障害患者", "line_num": 45, "text_len": 200}, - {"section_title": "9.5 妊婦", "line_num": 52, "text_len": 180}, - {"section_title": "9.8 高齢者", "line_num": 60, "text_len": 100}, - {"section_title": "10.2 併用注意", "line_num": 75, "text_len": 350}, - {"section_title": "11.1 重大な副作用", "line_num": 90, "text_len": 400}, - {"section_title": "11.2 その他の副作用", "line_num": 110, "text_len": 300}, - ], - "3399007H1021_1_21": [ - {"section_title": "1. 警告", "line_num": 1, "text_len": 100}, - {"section_title": "2. 禁忌", "line_num": 4, "text_len": 90}, - {"section_title": "4. 効能・効果", "line_num": 10, "text_len": 55}, - {"section_title": "6. 用法及び用量", "line_num": 18, "text_len": 130}, - {"section_title": "10.2 併用注意", "line_num": 70, "text_len": 300}, - {"section_title": "11.1 重大な副作用", "line_num": 85, "text_len": 450}, - {"section_title": "11.2 その他の副作用", "line_num": 105, "text_len": 280}, - ], -} +def _fmt(value: Any) -> str: + """Render a single field value as compact text (Decimal → number).""" + if isinstance(value, Decimal): + value = float(value) + if isinstance(value, float) and value.is_integer(): + return str(int(value)) + return str(value) -MOCK_SECTION_TEXT = { - ("2149039F1082_1_17", "9.2 腎機能障害患者"): ( - "9.2 腎機能障害患者\n" - "腎機能障害患者(eGFR 30 mL/min/1.73m²以下)には、ロサルタンカリウムの" - "投与開始用量を25mg/日とし、血清カリウム及び血清クレアチニンの推移に" - "十分注意すること。\n" - "【理由】腎機能障害患者では、本剤の投与により急速に腎機能が悪化する" - "おそれがある。また、高カリウム血症があらわれやすい。" - ), - ("2149039F1082_1_17", "9.5 妊婦"): ( - "9.5 妊婦\n" - "妊婦又は妊娠している可能性のある女性には投与しないこと。\n" - "【理由】妊娠中期・末期にレニン-アンジオテンシン系に作用する薬剤を" - "投与された患者では、胎児の腎機能低下、羊水過少症、頭蓋の発育不全、" - "肺低形成等があらわれるおそれがある。" - ), - ("2149039F1082_1_17", "10.2 併用注意"): ( - "10.2 併用注意\n" - "・アスピリン(抗血小板剤)\n" - " 【リスク】ARBの降圧作用を減弱するおそれがある。\n" - " 腎機能低下・高カリウム血症のリスクを増大。\n" - " 【措置】降圧効果の減弱、腎機能悪化、高カリウム血症に注意すること。" - ), - ("2149039F1082_1_17", "11.1 重大な副作用"): ( - "11.1 重大な副作用\n" - "・血管浮腫(頻度不明):顔面、口唇、咽頭、舌等の腫脹があらわれた場合には" - "直ちに投与を中止し、適切な処置を行うこと。\n" - "・高カリウム血症(0.1%未満):血清カリウム値の上昇があらわれることがある。\n" - "・腎機能悪化(0.1%未満):BUN、クレアチニンの上昇があらわれることがある。" - ), - ("3399007H1021_1_21", "10.2 併用注意"): ( - "10.2 併用注意\n" - "・ロサルタンカリウム(ARB)\n" - " 【リスク】アスピリンの副作用(消化性潰瘍、腎機能低下)を増強するおそれ。\n" - " 【措置】消化性潰瘍、腎機能低下に注意。血清カリウム値の上昇に注意すること。" - ), - ("3399007H1021_1_21", "11.1 重大な副作用"): ( - "11.1 重大な副作用\n" - "・ショック、アナフィラキシー(頻度不明):呼吸困難、血圧低下等があらわれた\n" - " 場合には直ちに投与を中止し、適切な処置を行うこと。\n" - "・消化性潰瘍(0.1%未満):出血、穿孔があらわれることがある。\n" - "・腎機能障害(0.1%未満):急性腎不全があらわれることがある。" - ), -} + +def _tag_of(data: dict) -> Optional[str]: + """Pull the pre-built ```` tag out of a record.""" + return data.get("cite_emit") or (data.get("_cite") or {}).get("_tag") + + +def _render_records( + records: Sequence[dict], + *, + what: str, + header_title: str, + field_specs: Sequence[Tuple[str, str]], + title_key: Optional[str] = None, + with_citation: bool = True, +) -> str: + """Render a flat list of record dicts into agent-friendly plain text. + + Empty ``records`` → English no-results message (no CITATION instructions). + Otherwise: optional citation-instruction header, a ``header_title`` line, then + one block per record. ``field_specs`` is ``[(key, label), ...]`` controlling + field order/display; empty values are skipped. ``title_key`` (if given) is the + record's headline; each record's ``_citation`` text and CITATION tag are + appended when present. + """ + if not records: + return _no_results(what) + + parts: List[str] = [] + if with_citation: + parts.append(_CITE_INSTRUCTION_TEXT) + parts.append(header_title) + + for idx, rec in enumerate(records, 1): + title = _fmt(rec.get(title_key)) if title_key and rec.get(title_key) else "" + lines = [f"[{idx}] {title}".rstrip()] + for key, label in field_specs: + value = rec.get(key) + if value in (None, "", [], {}): + continue + lines.append(f" {label}: {_fmt(value)}") + if rec.get("_citation"): + lines.append(f" 出典: {rec['_citation']}") + if with_citation: + tag = _tag_of(rec) + if tag: + lines.append(f" CITATION: {tag}") + parts.append("\n".join(lines)) + + return "\n\n".join(parts) + + +def _render_categories(data: Sequence[dict]) -> str: + """Render the L1/L2 category tree (navigation only — no citation source).""" + if not data: + return _no_results("categories") + lines: List[str] = ["Drug categories:"] + for l1 in data: + lines.append(f"\n■ {l1.get('l1_code', '')} {l1.get('l1_name', '')}".rstrip()) + for l2 in l1.get("l2", []): + lines.append( + f" - {l2.get('code', '')} {l2.get('name', '')} " + f"({l2.get('drug_count', 0)} drugs)" + ) + return "\n".join(lines) + + +def _render_drugs_in_category(data: dict) -> str: + """Render generic → [brand] listing for one L2 category (navigation only).""" + generics = data.get("generics") or [] + if not generics: + return _no_results("drugs in this category") + header = f"Category {data.get('l2_code', '')} {data.get('l2_name', '')}".rstrip() + lines: List[str] = [header] + for entry in generics: + lines.append(f"\n● {entry.get('generic', '')}".rstrip()) + for drug in entry.get("drugs", []): + if "_more" in drug: + lines.append(f" - {drug['_more']}") + else: + lines.append( + f" - {drug.get('brand', '')} (yj_full={drug.get('yj_full', '')})" + ) + if data.get("_more_generics"): + lines.append(f"\n(+{data['_more_generics']} more generics)") + return "\n".join(lines) + + +def _render_section_hits( + *, keyword: str, section_filter: str, total: int, hits: Sequence[dict] +) -> str: + """Render OpenSearch section-text hits with per-match snippets (carry tags).""" + shown = len(hits) + title = f'Found {total} drug(s) matching "{keyword}"' + if section_filter: + title += f' in sections like "{section_filter}"' + title += f" (showing {shown}):" + parts: List[str] = [_CITE_INSTRUCTION_TEXT, title] + for idx, hit in enumerate(hits, 1): + head = f"[{idx}] {hit.get('brand', '')} / {hit.get('generic', '')}".rstrip(" /") + l2 = hit.get("l2", "") + lines = [f"{head} ({l2})" if l2 else head] + lines.append(f" yj_full: {hit.get('yj_full', '')}") + for m in hit.get("matches", []): + lines.append(f" ▸ {m.get('section_title', '')}") + snippet = (m.get("snippet") or "").strip() + for sl in snippet.splitlines(): + lines.append(f" {sl}") + if hit.get("_citation_template"): + lines.append(f" 出典テンプレ: {hit['_citation_template']}") + tag = _tag_of(hit) + if tag: + lines.append(f" CITATION: {tag}") + parts.append("\n".join(lines)) + more = total - shown + if more > 0: + parts.append(f"(+{more} more drugs not shown)") + return "\n\n".join(parts) + + +def _render_chapters( + *, yj_full: str, brand: str, generic: str, sections: Sequence[dict] +) -> str: + """Render the chapter index for one drug; each chapter carries its own tag.""" + has_cite = any(_tag_of(s) for s in sections) + parts: List[str] = [] + if has_cite: + parts.append(_CITE_INSTRUCTION_TEXT) + parts.append( + f"{brand} / {generic} (yj_full={yj_full}) — {len(sections)} section(s):".lstrip( + " /" + ) + ) + block: List[str] = [] + for s in sections: + block.append( + f" - {s.get('section_title', '')} " + f"(line {s.get('line_num', 0)}, {s.get('text_len', 0)} chars)" + ) + tag = _tag_of(s) + if tag: + block.append(f" CITATION: {tag}") + parts.append("\n".join(block)) + return "\n\n".join(parts) + + +# --------------------------------------------------------------------------- +# 出典フォーマッタ(与 tools.py 一致) +# --------------------------------------------------------------------------- + +_DRUG_LOOKUP: Optional[dict] = None +_VF_LOOKUP: Optional[dict] = None +_BRAND_BY_YJ_FULL: Optional[dict] = None + + +def _load_drug_lookup() -> dict: + """yj_code → (brand_name, yj_full) 进程内缓存""" + global _DRUG_LOOKUP + if _DRUG_LOOKUP is None: + with session() as conn, conn.cursor() as cur: + cur.execute("SELECT yj_code, brand_name, yj_full FROM drug_master") + _DRUG_LOOKUP = { + row[0]: ((row[1] or ""), (row[2] or row[0])) + for row in cur.fetchall() + } + return _DRUG_LOOKUP + + +def _load_brand_by_yj_full_lookup() -> dict: + """yj_full → brand 表示名(多品名时取 "/" 分隔的第一段)。 + + drug_master.brand_name 是多 brand 合并的字符串 (例 + "〔東洋〕半夏厚朴湯エキス細粒/〔松浦〕..."), 只用来 + 给前端显示一个代表性的药品名,这里固定取第一段。 + """ + global _BRAND_BY_YJ_FULL + if _BRAND_BY_YJ_FULL is None: + with session() as conn, conn.cursor() as cur: + cur.execute("SELECT yj_full, brand_name FROM drug_master") + _BRAND_BY_YJ_FULL = { + yj_full: ((brand or "").split("/", 1)[0].strip()) + for yj_full, brand in cur.fetchall() + if yj_full + } + return _BRAND_BY_YJ_FULL + + +def _load_vf_lookup() -> dict: + """yj_full → (vector_file_id, filename, section_to_page). + + Populated from ``pmda_drug_vf`` (written by gbase-onprem PmdaXmlPipeline). + If the table is empty / not yet migrated, returns ``{}`` — citations then + degrade to text-only (no ```` tag emitted). + """ + global _VF_LOOKUP + if _VF_LOOKUP is None: + out: dict = {} + try: + with session() as conn, conn.cursor() as cur: + cur.execute( + "SELECT yj_full, vector_file_id, filename, section_to_page " + "FROM pmda_drug_vf" + ) + for yj_full, vf_id, fname, s2p in cur.fetchall(): + out[yj_full] = (str(vf_id), fname or "", s2p or {}) + except Exception: + # Table not yet present — leave empty, downstream tools skip _cite. + pass + _VF_LOOKUP = out + return _VF_LOOKUP def _citation(drug_yj: str, section: Optional[str]) -> str: - drug = MOCK_DRUG_MASTER.get(drug_yj, {}) - brand = drug.get("brand_name", "") - yj_full = drug.get("yj_full", drug_yj) + lk = _load_drug_lookup() + brand, yj_full = lk.get(drug_yj, ("", drug_yj)) chap = section or "(章不明)" return f"[出典: {brand} (yj_full={yj_full}) / {chap}]" +def _citation_tag(cite: dict) -> str: + """Build the ```` string. + + 精简版: **只输出 2 个属性 file + filename** — 减轻 LLM 负担 / 减少 + 输出 token / 减少幻觉表面积。前端 PDF 高亮链路实际只用 file_id + + text(段落正文),不依赖 page/yj_full/brand/section,所以 tag 里 + 不再带这些(`_cite` 字典里仍保留, 给前端可选展示)。 + + 工程化预制, 让 LLM 直接照搬, 避免 LLM 自己拼字符串幻觉 file= 文件名。 + """ + from html import escape as _esc + + parts = [] + if cite.get("file_id"): + parts.append(f'file="{_esc(str(cite["file_id"]), quote=True)}"') + if cite.get("filename"): + # 用 basename, 前端 chip 显示干净 — 完整 path 留在 _cite.filename + bn = cite["filename"].rsplit("/", 1)[-1] + parts.append(f'filename="{_esc(bn, quote=True)}"') + return f"" + + +def _cite_struct_by_yj_full(yj_full: str, section: Optional[str]) -> Optional[dict]: + """Build the ``_cite`` dict directly from a yj_full. + + 返回 ``{file_id, filename, page, yj_full, brand, section?}`` — 复用通用 + ```` 协议, 额外附加 PMDA + 专属属性 ``yj_full`` / ``brand`` / ``section``。 + + 核心属性 (通用 CITATION 协议): + - ``file_id`` : VectorFile.id (uuid), 通用 /pdf/highlight 用这个定位 PDF + - ``filename`` : VF 文件名, 通用 CITATION 展示用 + - ``page`` : PDF 页码 (0-based), 第一版固定 0 (后端 expand_pages 全文搜兜底) + + PMDA 额外属性 (前端可选读): + - ``yj_full`` : 厚労省 YJ コード (含枝番), 跨 vf_uuid 稳定的唯一 id + - ``brand`` : 表示用販売名 (drug_master.brand_name "/" 分隔的第一段) + - ``section?`` : fact 表 source_section 完整字符串 (例 "10.1 併用禁忌") + + 存在性验证 (硬要求, 缺一不返 _cite): + - brand lookup (drug_master) 找不到 → None + - vf_lookup (pmda_drug_vf) 找不到 → None (避免输出 空壳 tag) + + 返 None 时 caller 不附 _cite, LLM 看到没 _cite 就不会 emit citation — + 比 emit 一个无 file/filename 属性的空标签好(前端解析空标签会渲染成 + broken chip)。 + """ + brand = _load_brand_by_yj_full_lookup().get(yj_full) + if not brand: + return None + # 通用 CITATION 核心属性: file_id / filename 必须有, 否则不出 tag + vf_info = _load_vf_lookup().get(yj_full) + if not vf_info: + return None + vf_id, filename, _s2p = vf_info + cite: dict = { + "yj_full": yj_full, + "brand": brand, + "file_id": vf_id, + "filename": filename, + "page": 0, # 第一版固定 page 0, 后端 expand_pages 全文搜 + } + if section: + cite["section"] = section + # 工程化预制完整 tag 字符串, 让 LLM 只做复制粘贴, 不再自己拼 + cite["_tag"] = _citation_tag(cite) + return cite + + +def _cite_struct(drug_yj: str, section: Optional[str]) -> Optional[dict]: + """Return ``{file_id, filename, page, yj_full, brand, section?}`` for the ```` tag. + + Returns ``None`` when drug_master has no row for this yj (skill can still + emit the human ``[出典: ...]`` text). + """ + drug_lk = _load_drug_lookup() + _, yj_full = drug_lk.get(drug_yj, ("", drug_yj)) + return _cite_struct_by_yj_full(yj_full, section) + + # --------------------------------------------------------------------------- -# Tool implementations (mock) +# Tool implementations (10 个) # --------------------------------------------------------------------------- def _tool_search_drugs(query: str, kind: str = "auto", limit: int = 10) -> str: - results = [] - for code, d in MOCK_DRUG_MASTER.items(): - q = query.lower() - if (kind == "brand" and q in d["brand_name"].lower()) or \ - (kind == "generic" and q in d["generic_name"].lower()) or \ - (kind == "yj" and (q in d["yj_code"].lower() or q in d["yj_full"].lower())) or \ - (kind == "auto" and (q in d["brand_name"].lower() or q in d["generic_name"].lower() - or q in d["yj_code"].lower() or q in d["yj_full"].lower())): - results.append({ - "yj_full": d["yj_full"], - "yj_code": d["yj_code"], - "brand": d["brand_name"], - "generic": d["generic_name"], - "category": f"{d['category_code']} {d['category_name']}", - "score": 1.0, - }) - return _dump(results[:limit]) + rows = search_drugs_in_db(query, kind=kind, limit=limit) + out = [] + for r in rows: + entry: dict = { + "yj_full": r.yj_full, + "yj_code": r.yj_code, + "brand": r.brand_name, + "generic": r.generic_name, + "category": f"{r.category_code} {r.category_name}".strip(), + "score": r.score, + } + cite = _cite_struct_by_yj_full(r.yj_full, section=None) + if cite is not None: + entry["_cite"] = cite + entry["cite_emit"] = cite["_tag"] # top-level mirror for LLM + out.append(entry) + return _render_records( + out, + what="drugs", + header_title=f"Found {len(out)} drug(s):", + title_key="brand", + field_specs=[ + ("generic", "generic"), + ("yj_full", "yj_full"), + ("yj_code", "yj_code"), + ("category", "category"), + ("score", "score"), + ], + ) def _tool_list_categories() -> str: - return _dump(MOCK_CATEGORIES) + return _render_categories(list_categories_with_counts()) def _tool_list_drugs_in_category(l2_code: str, limit_generics: int = 50) -> str: - results = [] - seen_generics = set() - for code, d in MOCK_DRUG_MASTER.items(): - if d["category_code"].startswith(l2_code) and d["generic_name"] not in seen_generics: - seen_generics.add(d["generic_name"]) - results.append({ - "generic_name": d["generic_name"], - "brands": [{"yj_code": d["yj_code"], "brand_name": d["brand_name"], "yj_full": d["yj_full"]}], - }) - return _dump(results[:limit_generics]) + return _render_drugs_in_category( + _sql_list_drugs_in_category(l2_code, limit_generics=limit_generics) + ) def _tool_get_drug_master(yj_code: str) -> str: - d = MOCK_DRUG_MASTER.get(yj_code) - if not d: - return _dump({"error": f"yj_code {yj_code} not found"}) - result = dict(d) - result["_citation"] = f"[出典: {d['brand_name']} (yj_full={d['yj_full']}) / 添付文書冒頭]" - return _dump(result) + row = drug_master_get(yj_code) + if row is None: + return _no_results("drug master record") + result = asdict(row) + result["_citation"] = f"[出典: {row.brand_name} (yj_full={row.yj_full}) / 添付文書冒頭]" + cite = _cite_struct(row.yj_code, section=None) + if cite is not None: + result["_cite"] = cite + result["cite_emit"] = cite["_tag"] # top-level mirror for LLM + return _render_records( + [result], + what="drug master record", + header_title="Drug master record:", + title_key="brand_name", + field_specs=[ + ("generic_name_jp", "generic"), + ("yj_full", "yj_full"), + ("yj_code", "yj_code"), + ("category_code", "category_code"), + ("category_name", "category_name"), + ("regulation", "regulation"), + ("manufacturer", "manufacturer"), + ("revision_date", "revision_date"), + ], + ) def _tool_get_drug_interactions( @@ -292,22 +462,36 @@ def _tool_get_drug_interactions( keyword: Optional[str] = None, limit: int = 30, ) -> str: - results = [] - for r in MOCK_INTERACTIONS: - if drug_a_yj and r["drug_a_yj"] != drug_a_yj: - continue - if drug_b_yj and r["drug_b_yj"] != drug_b_yj: - continue - if severity and r["severity"] != severity: - continue - if keyword and keyword.lower() not in ( - (r.get("drug_b_class") or "").lower() - + (r.get("mechanism") or "").lower() - + (r.get("clinical_effect") or "").lower() - ): - continue - results.append({**r, "_citation": _citation(r["source_drug_yj"], r["source_section"])}) - return _dump(results[:limit]) + rows = drug_interaction_query( + drug_a_yj=drug_a_yj, + drug_b_yj=drug_b_yj, + severity=severity, + keyword=keyword, + limit=limit, + ) + out = [] + for r in rows: + d = asdict(r) + d["_citation"] = _citation(r.source_drug_yj, r.source_section) + cite = _cite_struct(r.source_drug_yj, r.source_section) + if cite is not None: + d["_cite"] = cite + d["cite_emit"] = cite["_tag"] # top-level mirror for LLM + out.append(d) + return _render_records( + out, + what="drug interactions", + header_title=f"Found {len(out)} drug interaction(s):", + title_key="severity", + field_specs=[ + ("drug_a_yj", "drug_a_yj"), + ("drug_b_yj", "drug_b_yj"), + ("drug_b_class", "drug_b_class"), + ("mechanism", "mechanism"), + ("clinical_effect", "clinical_effect"), + ("source_section", "source_section"), + ], + ) def _tool_get_drug_restrictions( @@ -317,18 +501,34 @@ def _tool_get_drug_restrictions( keyword: Optional[str] = None, limit: int = 30, ) -> str: - results = [] - for r in MOCK_RESTRICTIONS: - if drug_yj and r["drug_yj"] != drug_yj: - continue - if condition_type and r["condition_type"] != condition_type: - continue - if severity and r["severity"] != severity: - continue - if keyword and keyword.lower() not in (r.get("condition_text") or "").lower(): - continue - results.append({**r, "_citation": _citation(r["drug_yj"], r["source_section"])}) - return _dump(results[:limit]) + rows = drug_restriction_query( + drug_yj=drug_yj, + condition_type=condition_type, + severity=severity, + keyword=keyword, + limit=limit, + ) + out = [] + for r in rows: + d = asdict(r) + d["_citation"] = _citation(r.drug_yj, r.source_section) + cite = _cite_struct(r.drug_yj, r.source_section) + if cite is not None: + d["_cite"] = cite + d["cite_emit"] = cite["_tag"] # top-level mirror for LLM + out.append(d) + return _render_records( + out, + what="drug restrictions", + header_title=f"Found {len(out)} drug restriction(s):", + title_key="condition_type", + field_specs=[ + ("drug_yj", "drug_yj"), + ("condition_text", "condition_text"), + ("severity", "severity"), + ("source_section", "source_section"), + ], + ) def _tool_get_drug_dosing( @@ -336,14 +536,37 @@ def _tool_get_drug_dosing( patient_segment: Optional[str] = None, limit: int = 20, ) -> str: - results = [] - for r in MOCK_DOSING: - if r["drug_yj"] != drug_yj: - continue - if patient_segment and r["patient_segment"] != patient_segment: - continue - results.append({**r, "_citation": _citation(drug_yj, r["source_section"])}) - return _dump(results[:limit]) + rows = drug_dosing_query( + drug_yj=drug_yj, + patient_segment=patient_segment, + limit=limit, + ) + out = [] + for r in rows: + d = asdict(r) + # Merge amount + unit into one readable "dose" field for plain-text output. + if r.dose_amount is not None: + d["dose"] = f"{_fmt(r.dose_amount)}{r.dose_unit or ''}".strip() + d["_citation"] = _citation(r.drug_yj, r.source_section) + cite = _cite_struct(r.drug_yj, r.source_section) + if cite is not None: + d["_cite"] = cite + d["cite_emit"] = cite["_tag"] # top-level mirror for LLM + out.append(d) + return _render_records( + out, + what="dosing entries", + header_title=f"Found {len(out)} dosing entr{'y' if len(out) == 1 else 'ies'}:", + title_key="patient_segment", + field_specs=[ + ("indication_code", "indication_code"), + ("dose", "dose"), + ("frequency", "frequency"), + ("duration", "duration"), + ("adjustment_text", "adjustment"), + ("source_section", "source_section"), + ], + ) def _tool_search_section_text( @@ -352,80 +575,164 @@ def _tool_search_section_text( limit: int = 30, ) -> str: if not keyword.strip(): - return _dump({"keyword": keyword, "total_drugs": 0, "shown": 0, "hits": []}) - - # Simple mock: search through section text + return _no_results("sections") + size = min(max(1, limit), 100) + body: dict = { + "size": size, + "_source": ["yj_full", "brand_names", "generic_name", + "l2_code", "l2_name", "section_title", "line_num"], + "query": {"bool": {"must": [{"match": {"text": keyword}}]}}, + "collapse": { + "field": "yj_full", + "inner_hits": { + "name": "matches", + "size": 2, + "_source": ["section_title", "line_num"], + "highlight": {"fields": {"text": {"fragment_size": 160, "number_of_fragments": 1}}}, + }, + }, + "aggs": {"total_drugs": {"cardinality": {"field": "yj_full"}}}, + } + if section_filter: + body["query"]["bool"]["filter"] = [ + {"wildcard": {"section_title.raw": f"*{section_filter}*"}} + ] + resp = os_client().search(index=OS_INDEX_NAME, body=body) + total = int(resp["aggregations"]["total_drugs"]["value"]) hits_out = [] - for (yj_full, section_title), text in MOCK_SECTION_TEXT.items(): - if section_filter and section_filter not in section_title: - continue - if keyword.lower() in text.lower(): - drug = None - for d in MOCK_DRUG_MASTER.values(): - if d["yj_full"] == yj_full: - drug = d - break - if not drug: + for h in resp["hits"]["hits"]: + src = h.get("_source") or {} + inner = h.get("inner_hits", {}).get("matches", {}).get("hits", {}).get("hits", []) + brand = (src.get("brand_names") or [""])[0] + yj_full = src.get("yj_full") or "" + # Per-match snippet 自带对应 section 的 CITATION tag, + # LLM 复制 snippet 时自动带对的 section 标签 (而不是 hit 顶层粗粒度 tag)。 + matches = [] + seen = set() + for ih in inner: + ih_src = ih.get("_source") or {} + title = ih_src.get("section_title") or "" + if title in seen: continue - brand = drug["brand_name"] - # Deduplicate by yj_full - existing = [h for h in hits_out if h["yj_full"] == yj_full] - if existing: - existing[0]["matches"].append({ - "section_title": section_title, - "snippet": text[:160], - }) - continue - hits_out.append({ - "yj_full": yj_full, - "brand": brand, - "generic": drug["generic_name"], - "l2": f"{drug['category_code']} {drug['category_name']}", - "matches": [{"section_title": section_title, "snippet": text[:160]}], - "_citation_template": f"[出典: {brand} (yj_full={yj_full}) / <該当章>]", + seen.add(title) + hl = ih.get("highlight", {}).get("text", [""]) + snippet_text = hl[0] if hl else "" + inner_cite = _cite_struct_by_yj_full(yj_full, title) + inner_tag = (inner_cite or {}).get("_tag", "") + matches.append({ + "section_title": title, + "snippet": snippet_text + (f"\n{inner_tag}" if inner_tag else ""), }) - - return _dump({ - "keyword": keyword, - "section_filter": section_filter or None, - "total_drugs": len({h["yj_full"] for h in hits_out}), - "shown": len(hits_out), - "hits": hits_out[:limit], - }) + # Hit-level _cite per first-match section (legacy compatibility). + cite_section = matches[0]["section_title"] if matches else None + cite = _cite_struct_by_yj_full(yj_full, cite_section) + hit_entry: dict = { + "yj_full": yj_full, + "brand": brand, + "generic": src.get("generic_name") or "", + "l2": f"{src.get('l2_code') or ''} {src.get('l2_name') or ''}".strip(), + "matches": matches, + "_citation_template": f"[出典: {brand} (yj_full={yj_full}) / <該当章>]", + } + if cite is not None: + hit_entry["_cite"] = cite + hit_entry["cite_emit"] = cite["_tag"] # top-level mirror for LLM + hits_out.append(hit_entry) + if not hits_out: + return _no_results("sections") + return _render_section_hits( + keyword=keyword, + section_filter=section_filter, + total=total, + hits=hits_out, + ) def _tool_list_drug_chapters(yj_full: str) -> str: - sections = MOCK_CHAPTERS.get(yj_full) - if not sections: - return _dump({"error": f"yj_full {yj_full} の章節が見つかりません。"}) + """全章節 (section_title + line_num + text_len) for a yj_full(line_num 昇順)""" + body = { + "size": 200, + "_source": ["section_title", "line_num", "brand_names", "generic_name", "text"], + "query": {"term": {"yj_full": yj_full}}, + "sort": [{"line_num": "asc"}], + } + resp = os_client().search(index=OS_INDEX_NAME, body=body) + hits = resp["hits"]["hits"] + if not hits: + return _no_results("chapters") - drug = None - for d in MOCK_DRUG_MASTER.values(): - if d["yj_full"] == yj_full: - drug = d - break + head = hits[0].get("_source", {}) + brand = (head.get("brand_names") or [""])[0] + generic = head.get("generic_name") or "" - return _dump({ - "yj_full": yj_full, - "brand": drug["brand_name"] if drug else "", - "generic": drug["generic_name"] if drug else "", - "n_sections": len(sections), - "sections": sections, - }) + sections = [] + for h in hits: + src = h.get("_source", {}) + section_title = src.get("section_title", "") + entry: dict = { + "section_title": section_title, + "line_num": src.get("line_num", 0), + "text_len": len(src.get("text", "")), + } + cite = _cite_struct_by_yj_full(yj_full, section_title) + if cite is not None: + entry["_cite"] = cite + entry["cite_emit"] = cite["_tag"] # top-level mirror for LLM + sections.append(entry) + + return _render_chapters( + yj_full=yj_full, brand=brand, generic=generic, sections=sections + ) def _tool_read_drug_chapter(yj_full: str, section_title: str) -> str: - text = MOCK_SECTION_TEXT.get((yj_full, section_title)) - if text: - return text[:8000] - return _dump({ - "error": f"section_title {section_title!r} は {yj_full} に存在しません。", - "hint": "list_drug_chapters で取得した sections[].section_title をそのまま渡してください。", - }) + """指定 (yj_full, section_title) の章節 markdown 全文(max 8000 字)。""" + body = { + "size": 1, + "_source": ["text"], + "query": { + "bool": { + "must": [ + {"term": {"yj_full": yj_full}}, + {"term": {"section_title.raw": section_title}}, + ] + } + }, + } + resp = os_client().search(index=OS_INDEX_NAME, body=body) + hits = resp["hits"]["hits"] + if hits: + src = hits[0].get("_source") or {} + text = src.get("text", "") + if text: + # 三明治包装: header (CITATION reminder + tag) + body + footer (tag). + # LLM 不读 body 就一定先读 header, 输出时复制段落自然带上 tag。 + # HTML comment 对 chat 渲染不可见, 但 LLM 在 sampling 时看得到。 + cite = _cite_struct_by_yj_full(yj_full, section_title) + tag = (cite or {}).get("_tag", "") + if tag: + header = ( + f"\n" + f"{tag}\n\n" + ) + footer = ( + f"\n\n---\n" + f"If you use the content above in your answer, you MUST include this " + f"tag verbatim:\n" + f"{tag}\n" + ) + return header + text[:8000] + footer + return text[:8000] + # Not a "no data" case but a parameter mismatch — keep the actionable hint. + return ( + f'No section titled "{section_title}" exactly matches yj_full={yj_full}.\n' + f"Hint: pass a sections[].section_title returned by list_drug_chapters " + f"verbatim." + ) # --------------------------------------------------------------------------- -# MCP request handler +# MCP dispatch # --------------------------------------------------------------------------- _TOOL_DISPATCH = { @@ -477,7 +784,6 @@ _TOOL_DISPATCH = { async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]: - """Handle an MCP request.""" try: method = request.get("method") params = request.get("params", {}) @@ -485,44 +791,32 @@ async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]: if method == "initialize": return create_initialize_response(request_id, "pmda-drug-info") - - elif method == "ping": + if method == "ping": return create_ping_response(request_id) - - elif method == "tools/list": + if method == "tools/list": tools = load_tools_from_json("pmda_tools.json") return create_tools_list_response(request_id, tools) - - elif method == "tools/call": + if method == "tools/call": tool_name = params.get("name") arguments = params.get("arguments", {}) - if tool_name not in _TOOL_DISPATCH: return create_error_response(request_id, -32601, f"Unknown tool: {tool_name}") - try: result_text = _TOOL_DISPATCH[tool_name](arguments) return { "jsonrpc": "2.0", "id": request_id, - "result": { - "content": [{"type": "text", "text": result_text}] - }, + "result": {"content": [{"type": "text", "text": result_text}]}, } except Exception as e: return { "jsonrpc": "2.0", "id": request_id, - "result": { - "content": [{"type": "text", "text": f"Error: {str(e)}"}] - }, + "result": {"content": [{"type": "text", "text": f"Error: {type(e).__name__}: {e}"}]}, } - - else: - return create_error_response(request_id, -32601, f"Unknown method: {method}") - + return create_error_response(request_id, -32601, f"Unknown method: {method}") except Exception as e: - return create_error_response(request.get("id"), -32603, f"Internal error: {str(e)}") + return create_error_response(request.get("id"), -32603, f"Internal error: {e}") async def main(): diff --git a/skills/developing/pmda-drug-info/pmda_tools.json b/skills/developing/pmda-drug-info/pmda_tools.json index 75177a6..d85aac2 100644 --- a/skills/developing/pmda-drug-info/pmda_tools.json +++ b/skills/developing/pmda-drug-info/pmda_tools.json @@ -11,7 +11,12 @@ }, "kind": { "type": "string", - "enum": ["auto", "brand", "generic", "yj"], + "enum": [ + "auto", + "brand", + "generic", + "yj" + ], "description": "Search type. 'auto' searches all fields.", "default": "auto" }, @@ -21,7 +26,9 @@ "default": 10 } }, - "required": ["query"] + "required": [ + "query" + ] } }, { @@ -48,7 +55,9 @@ "default": 50 } }, - "required": ["l2_code"] + "required": [ + "l2_code" + ] } }, { @@ -62,7 +71,9 @@ "description": "12-character YJ code." } }, - "required": ["yj_code"] + "required": [ + "yj_code" + ] } }, { @@ -145,7 +156,9 @@ "default": 20 } }, - "required": ["drug_yj"] + "required": [ + "drug_yj" + ] } }, { @@ -169,7 +182,9 @@ "default": 30 } }, - "required": ["keyword"] + "required": [ + "keyword" + ] } }, { @@ -183,7 +198,9 @@ "description": "Full YJ code (with revision suffix, e.g., 3399007H1021_1_21)." } }, - "required": ["yj_full"] + "required": [ + "yj_full" + ] } }, { @@ -201,7 +218,10 @@ "description": "Exact section title from list_drug_chapters (e.g., '9.2 腎機能障害患者', '11.1 重大な副作用')." } }, - "required": ["yj_full", "section_title"] + "required": [ + "yj_full", + "section_title" + ] } } ] diff --git a/skills/developing/pmda-drug-info/queries.py b/skills/developing/pmda-drug-info/queries.py new file mode 100644 index 0000000..d186cc4 --- /dev/null +++ b/skills/developing/pmda-drug-info/queries.py @@ -0,0 +1,427 @@ +"""SQL 查询接口(关系小库侧)。 + +Phase 1 已承接: + - `search_drugs` 的 商品名 / 一般名 / YJ 子串检索 + - `list_categories` 的 L1/L2 + drug_count + - `list_drugs_in_category` 的 一般名 → 販売名 + +后续 Phase 2 会接 drug_interaction / drug_restriction / drug_dosing。 +""" +from __future__ import annotations + +import re +from collections import OrderedDict +from dataclasses import dataclass +from pathlib import Path + +from taxonomy import load_taxonomy +from db import session + +# Plugin 自包含: drug_category.md 与 queries.py 同目录 +_TAXONOMY_PATH = Path(__file__).resolve().parent / "drug_category.md" +_TAXONOMY_CACHE = None + + +def _taxonomy(): + global _TAXONOMY_CACHE + if _TAXONOMY_CACHE is None: + _TAXONOMY_CACHE = load_taxonomy(_TAXONOMY_PATH) + return _TAXONOMY_CACHE + +# 12 字母数字 → YJ code 候选;前几位即足够触发自动 kind=yj 的判断 +_YJ_RE = re.compile(r"^[0-9A-Z]{4,12}$") + + +@dataclass(frozen=True) +class DrugHit: + yj_full: str + yj_code: str + brand_name: str # "/" 分隔多品名 + generic_name: str + category_code: str + category_name: str + score: float # 50-100 + + +def _detect_kind(q: str) -> str: + """auto-detect: pure alnum & uppercase 4+ chars → yj, otherwise any.""" + if _YJ_RE.match(q.upper()): + return "yj" + return "any" + + +def _score_expr(q_lower: str, q_like: str) -> str: + """Postgres expression returning relevance score 50–100.""" + # NB: doubles each pattern; psycopg expands %s positionally so caller + # must pass q_lower / q_like in matching repetitions. + return ( + "GREATEST(" + " CASE WHEN lower(brand_name) = %s THEN 100.0 " + " WHEN lower(brand_name) LIKE %s || '%%' THEN 90.0 " + " WHEN brand_name ILIKE %s THEN 70.0 ELSE 0 END," + " CASE WHEN lower(generic_name_jp) = %s THEN 95.0 " + " WHEN lower(generic_name_jp) LIKE %s || '%%' THEN 85.0 " + " WHEN generic_name_jp ILIKE %s THEN 65.0 ELSE 0 END," + " CASE WHEN yj_code = %s THEN 100.0 ELSE 0 END" + ")" + ) + + +def search_drugs_in_db( + query: str, + *, + kind: str = "auto", + limit: int = 20, +) -> list[DrugHit]: + """Drop-in replacement for the in-memory ``CorpusIndex.search``. + + `kind` ∈ {"auto", "brand", "generic", "yj"}. + + Returns DrugHit list (max ``limit``) ordered by relevance score desc. + """ + q = (query or "").strip() + if not q: + return [] + if kind == "auto": + kind = _detect_kind(q) + + q_lower = q.lower() + q_like = f"%{q}%" + q_upper = q.upper() + + if kind == "yj": + sql = """ + SELECT yj_full, yj_code, brand_name, generic_name_jp, + category_code, category_name, + CASE WHEN yj_code = %s THEN 100.0 + WHEN yj_full LIKE %s || '%%' THEN 95.0 + ELSE 80.0 END AS score + FROM drug_master + WHERE yj_code LIKE %s OR yj_full LIKE %s + ORDER BY score DESC, yj_full ASC + LIMIT %s + """ + params = (q_upper, q_upper, f"{q_upper}%", f"{q_upper}%", limit) + elif kind == "brand": + sql = """ + SELECT yj_full, yj_code, brand_name, generic_name_jp, + category_code, category_name, + CASE WHEN lower(brand_name) = %s THEN 100.0 + WHEN lower(brand_name) LIKE %s || '%%' THEN 90.0 + ELSE 70.0 END AS score + FROM drug_master + WHERE brand_name ILIKE %s + ORDER BY score DESC, length(brand_name) ASC, yj_full ASC + LIMIT %s + """ + params = (q_lower, q_lower, q_like, limit) + elif kind == "generic": + sql = """ + SELECT yj_full, yj_code, brand_name, generic_name_jp, + category_code, category_name, + CASE WHEN lower(generic_name_jp) = %s THEN 95.0 + WHEN lower(generic_name_jp) LIKE %s || '%%' THEN 85.0 + ELSE 65.0 END AS score + FROM drug_master + WHERE generic_name_jp ILIKE %s + ORDER BY score DESC, length(generic_name_jp) ASC, yj_full ASC + LIMIT %s + """ + params = (q_lower, q_lower, q_like, limit) + else: # any + sql = f""" + SELECT yj_full, yj_code, brand_name, generic_name_jp, + category_code, category_name, + {_score_expr(q_lower, q_like)} AS score + FROM drug_master + WHERE brand_name ILIKE %s OR generic_name_jp ILIKE %s + OR yj_code LIKE %s OR yj_full LIKE %s + ORDER BY score DESC, length(brand_name) ASC, yj_full ASC + LIMIT %s + """ + # _score_expr 占位符顺序:brand=, brand LIKE, brand ILIKE, + # generic=, generic LIKE, generic ILIKE, yj_code= + # 然后 WHERE: brand ILIKE, generic ILIKE, yj LIKE, yj_full LIKE + params = ( + q_lower, q_lower, q_like, + q_lower, q_lower, q_like, + q_upper, + q_like, q_like, f"{q_upper}%", f"{q_upper}%", + limit, + ) + + with session() as conn, conn.cursor() as cur: + cur.execute(sql, params) + rows = cur.fetchall() + + return [ + DrugHit( + yj_full=r[0], + yj_code=r[1], + brand_name=r[2] or "", + generic_name=r[3] or "", + category_code=r[4] or "", + category_name=r[5] or "", + score=float(r[6] or 0), + ) + for r in rows + ] + + +# ---- 类别导航 ------------------------------------------------------------ + + +def list_categories_with_counts() -> list[dict]: + """全 L1 / L2 分类 + 各 L2 的 drug 数。 + + 分类层级名取自 drug_category.md(不用 PMDA 的 category_name 自由文, + 因为后者一药一表达,难以聚合);drug_count 取自 drug_master 的实际行数。 + """ + tax = _taxonomy() + with session() as conn, conn.cursor() as cur: + cur.execute( + "SELECT category_code, COUNT(*) FROM drug_master " + "WHERE category_code IS NOT NULL " + "GROUP BY category_code" + ) + counts: dict[str, int] = dict(cur.fetchall()) + + by_l1: dict[str, dict] = {} + for l2_code, l2 in tax.items(): + c = counts.get(l2_code, 0) + if c == 0: + continue + l1 = by_l1.setdefault( + l2.l1_code, + {"l1_code": l2.l1_code, "l1_name": l2.l1_name, "l2": []}, + ) + l1["l2"].append({"code": l2_code, "name": l2.name, "drug_count": c}) + # 内层按 code 排序,外层按 l1_code 排序 + for l1 in by_l1.values(): + l1["l2"].sort(key=lambda x: x["code"]) + return [by_l1[k] for k in sorted(by_l1)] + + +def list_drugs_in_category( + l2_code: str, + *, + limit_generics: int = 50, + brands_per_generic: int = 5, +) -> dict: + """指定 L2 类目下的「一般名 → [販売名]」一览。 + + Returns the same JSON shape `_corpus_tools.list_drugs_in_category` previously + yielded so the agent prompt 不变。 + """ + tax = _taxonomy() + l2 = tax.get(l2_code) + with session() as conn, conn.cursor() as cur: + cur.execute( + "SELECT generic_name_jp, yj_full, brand_name " + "FROM drug_master WHERE category_code = %s " + "ORDER BY generic_name_jp, yj_full", + (l2_code,), + ) + rows = cur.fetchall() + + by_gen: "OrderedDict[str, list[dict]]" = OrderedDict() + for gen, yj_full, brand in rows: + by_gen.setdefault(gen or "(一般名不明)", []).append( + {"brand": brand or "", "yj_full": yj_full} + ) + + payload: list[dict] = [] + for gen in list(by_gen)[:limit_generics]: + drugs = by_gen[gen] + shown = drugs[:brands_per_generic] + extra = len(drugs) - len(shown) + entry = {"generic": gen, "drugs": list(shown)} + if extra > 0: + entry["drugs"].append({"_more": f"+{extra} more brands"}) + payload.append(entry) + + out = { + "l2_code": l2_code, + "l2_name": l2.name if l2 else "", + "generics": payload, + } + if len(by_gen) > limit_generics: + out["_more_generics"] = len(by_gen) - limit_generics + return out + + +# ---- fact 查询:drug_master / interaction / restriction / dosing ---------- + + +@dataclass(frozen=True) +class DrugMasterRow: + yj_code: str + yj_full: str + brand_name: str + generic_name_jp: str + category_code: str + category_name: str + regulation: str | None + manufacturer: str | None + revision_date: str | None # ISO date string + + +def drug_master_get(yj_code: str) -> DrugMasterRow | None: + with session() as conn, conn.cursor() as cur: + cur.execute( + "SELECT yj_code, yj_full, brand_name, generic_name_jp, " + " category_code, category_name, regulation, manufacturer, " + " to_char(revision_date, 'YYYY-MM-DD') " + "FROM drug_master WHERE yj_code = %s", + (yj_code,), + ) + row = cur.fetchone() + if not row: + return None + return DrugMasterRow(*row) + + +@dataclass(frozen=True) +class InteractionRow: + id: str + drug_a_yj: str + drug_b_yj: str | None + drug_b_class: str | None + severity: str + mechanism: str | None + clinical_effect: str | None + source_section: str + source_drug_yj: str + + +def drug_interaction_query( + drug_a_yj: str | None = None, + drug_b_yj: str | None = None, + *, + severity: str | None = None, + keyword: str | None = None, + limit: int = 50, +) -> list[InteractionRow]: + """検索条件: + drug_a_yj alone → drug_a の全相互作用(drug_b 任意) + drug_a_yj + drug_b_yj → 双向(A→B もしくは B→A 両方) + keyword → drug_b_class や mechanism / clinical_effect の ILIKE + """ + where = [] + params: list = [] + if drug_a_yj and drug_b_yj: + where.append("((drug_a_yj=%s AND drug_b_yj=%s) OR " + "(drug_a_yj=%s AND drug_b_yj=%s))") + params += [drug_a_yj, drug_b_yj, drug_b_yj, drug_a_yj] + elif drug_a_yj: + where.append("drug_a_yj = %s") + params.append(drug_a_yj) + elif drug_b_yj: + where.append("drug_b_yj = %s") + params.append(drug_b_yj) + if severity: + where.append("severity = %s") + params.append(severity) + if keyword: + where.append("(drug_b_class ILIKE %s OR mechanism ILIKE %s " + " OR clinical_effect ILIKE %s)") + kw = f"%{keyword}%" + params += [kw, kw, kw] + if not where: + return [] + sql = ( + "SELECT id, drug_a_yj, drug_b_yj, drug_b_class, severity, " + " mechanism, clinical_effect, source_section, source_drug_yj " + "FROM drug_interaction WHERE " + " AND ".join(where) + + " ORDER BY severity, drug_b_class NULLS LAST LIMIT %s" + ) + params.append(limit) + with session() as conn, conn.cursor() as cur: + cur.execute(sql, params) + return [InteractionRow(*r) for r in cur.fetchall()] + + +@dataclass(frozen=True) +class RestrictionRow: + id: str + drug_yj: str + condition_type: str + condition_text: str + condition_params: dict + severity: str + source_section: str + + +def drug_restriction_query( + drug_yj: str | None = None, + *, + condition_type: str | None = None, + severity: str | None = None, + keyword: str | None = None, + limit: int = 50, +) -> list[RestrictionRow]: + where = [] + params: list = [] + if drug_yj: + where.append("drug_yj = %s") + params.append(drug_yj) + if condition_type: + where.append("condition_type = %s") + params.append(condition_type) + if severity: + where.append("severity = %s") + params.append(severity) + if keyword: + where.append("condition_text ILIKE %s") + params.append(f"%{keyword}%") + if not where: + return [] + sql = ( + "SELECT id, drug_yj, condition_type, condition_text, condition_params, " + " severity, source_section " + "FROM drug_restriction WHERE " + " AND ".join(where) + + " ORDER BY severity, condition_type LIMIT %s" + ) + params.append(limit) + with session() as conn, conn.cursor() as cur: + cur.execute(sql, params) + return [RestrictionRow(*r) for r in cur.fetchall()] + + +@dataclass(frozen=True) +class DosingRow: + id: str + drug_yj: str + indication_code: str | None + patient_segment: str + segment_params: dict + dose_amount: float | None + dose_unit: str | None + frequency: str | None + duration: str | None + adjustment_text: str + source_section: str + + +def drug_dosing_query( + drug_yj: str, + *, + patient_segment: str | None = None, + limit: int = 30, +) -> list[DosingRow]: + where = ["drug_yj = %s"] + params: list = [drug_yj] + if patient_segment: + where.append("patient_segment = %s") + params.append(patient_segment) + sql = ( + "SELECT id, drug_yj, indication_code, patient_segment, segment_params, " + " dose_amount, dose_unit, frequency, duration, adjustment_text, " + " source_section " + "FROM drug_dosing WHERE " + " AND ".join(where) + + " ORDER BY patient_segment, indication_code NULLS LAST LIMIT %s" + ) + params.append(limit) + with session() as conn, conn.cursor() as cur: + cur.execute(sql, params) + return [DosingRow(*r) for r in cur.fetchall()] diff --git a/skills/developing/pmda-drug-info/requirements.txt b/skills/developing/pmda-drug-info/requirements.txt new file mode 100644 index 0000000..500b448 --- /dev/null +++ b/skills/developing/pmda-drug-info/requirements.txt @@ -0,0 +1,4 @@ +# Plugin self-contained: PG (psycopg) + OS (opensearch-py) +psycopg[binary]>=3.2.0 +psycopg-pool>=3.2.0 +opensearch-py>=2.2.0 diff --git a/skills/developing/pmda-drug-info/taxonomy.py b/skills/developing/pmda-drug-info/taxonomy.py new file mode 100644 index 0000000..2ad04f3 --- /dev/null +++ b/skills/developing/pmda-drug-info/taxonomy.py @@ -0,0 +1,64 @@ +"""Drug-category taxonomy loader. + +Reads `pmda/drug_category.md` (the cleaned-up nested list with codes) and +produces a `{l2_code: L2}` dict for joining with `DocMeta.l2_code` (first 3 +chars of the YJ code). + +Source markdown shape: + + - 11 中枢神経系用薬 + - 111 全身麻酔剤 + - 112 催眠鎮静剤,抗不安剤 + - 12 末梢神経用薬 + - 121 局所麻酔剤 +""" +from __future__ import annotations + +import re +from dataclasses import dataclass +from pathlib import Path + +# Top-level: `- {2-digit} {name}` +_L1_RE = re.compile(r"^- (\d{2})\s+(.+)$") +# Nested: ` - {3-digit} {name}` (indent 2 spaces) +_L2_RE = re.compile(r"^ {2}- (\d{3})\s+(.+)$") + + +@dataclass(frozen=True) +class L2: + code: str # "111" + name: str # "全身麻酔剤" + l1_code: str # "11" + l1_name: str # "中枢神経系用薬" + + +def load_taxonomy(path: Path | str = "pmda/drug_category.md") -> dict[str, L2]: + out: dict[str, L2] = {} + current_l1_code = "" + current_l1_name = "" + for line in Path(path).read_text(encoding="utf-8").splitlines(): + m1 = _L1_RE.match(line) + if m1: + current_l1_code, current_l1_name = m1.group(1), m1.group(2).strip() + continue + m2 = _L2_RE.match(line) + if m2: + code = m2.group(1) + name = m2.group(2).strip() + if not current_l1_code: + raise ValueError(f"L2 row {code} appears before any L1 in {path}") + out[code] = L2(code=code, name=name, l1_code=current_l1_code, l1_name=current_l1_name) + return out + + +def lookup(taxonomy: dict[str, L2], l2_code: str) -> L2 | None: + """Return the L2 entry, or None if the YJ prefix isn't in the taxonomy.""" + return taxonomy.get(l2_code) + + +if __name__ == "__main__": + t = load_taxonomy() + print(f"Loaded {len(t)} L2 categories") + for code in ("111", "214", "421", "999"): + v = t.get(code) + print(f" {code} → {v}") diff --git a/skills/developing/rag-retrieve-disabled/.claude-plugin/plugin.json b/skills/developing/rag-retrieve-disabled/.claude-plugin/plugin.json new file mode 100644 index 0000000..0cc5e44 --- /dev/null +++ b/skills/developing/rag-retrieve-disabled/.claude-plugin/plugin.json @@ -0,0 +1,4 @@ +{ + "name": "rag-retrieve-disabled", + "description": "rag_retrieve, table_rag_retrieve and local file retrieval are disabled." +} diff --git a/skills/developing/rag-retrieve-disabled/README.md b/skills/developing/rag-retrieve-disabled/README.md new file mode 100644 index 0000000..acb9d35 --- /dev/null +++ b/skills/developing/rag-retrieve-disabled/README.md @@ -0,0 +1 @@ +# rag-retrieve-disabled diff --git a/utils/log_util/logger.py b/utils/log_util/logger.py index b6e98ba..a88f5ae 100644 --- a/utils/log_util/logger.py +++ b/utils/log_util/logger.py @@ -51,6 +51,13 @@ class Formatter(logging.Formatter): record.trace_id = getattr(g, "trace_id") except LookupError: record.trace_id = "N/A" + # Handle subagent - default to "main" for the orchestrator / no-context paths. + # Catch KeyError too: GlobalContext.__getattr__ raises KeyError on a missing key. + if not hasattr(record, "subagent"): + try: + record.subagent = getattr(g, "subagent") + except (KeyError, LookupError): + record.subagent = "main" # Handle user_id # if not hasattr(record, "user_id"): # record.user_id = getattr(g, "user_id") @@ -65,7 +72,7 @@ class Formatter(logging.Formatter): def init_logger_once(name,level): logger = logging.getLogger(name) logger.setLevel(level=level) - formatter = Formatter("%(timestamp)s | %(levelname)-5s | %(trace_id)s | %(name)s:%(funcName)s:%(lineno)s - %(message)s", datefmt='%Y-%m-%d %H:%M:%S.%f') + formatter = Formatter("%(timestamp)s | %(levelname)-5s | %(trace_id)s | %(subagent)s | %(name)s:%(funcName)s:%(lineno)s - %(message)s", datefmt='%Y-%m-%d %H:%M:%S.%f') handler = logging.StreamHandler() handler.setFormatter(formatter) logger.addHandler(handler)