qwen_agent/skills/developing/pmda-drug-info/pmda_server.py

#!/usr/bin/env python3
"""
PMDA drug information MCP server — 真实 PG / OS 查询版本（替换原 mock）.

Plugin 自包含，不依赖 mygpt.* 任何模块。配置通过环境变量：
  PMDA_PG_HOST / PMDA_PG_PORT / PMDA_PG_DB / PMDA_PG_USER / PMDA_PG_PASSWORD
  PMDA_OPENSEARCH_URL (or OPENSEARCH_URL) / PMDA_OS_INDEX

参考 hu-sandbox/pmda/agent/tools.py 的 10 个 tool 行为（98/100 v2e baseline）.
"""

import asyncio
import sys
from dataclasses import asdict
from decimal import Decimal
from typing import Any, Dict, List, Optional, Sequence, Tuple

from mcp_common import (
    create_error_response,
    create_initialize_response,
    create_ping_response,
    create_tools_list_response,
    load_tools_from_json,
    handle_mcp_streaming,
)

from db import session
from queries import (
    drug_dosing_query,
    drug_interaction_query,
    drug_master_get,
    drug_restriction_query,
    list_categories_with_counts,
    list_drugs_in_category as _sql_list_drugs_in_category,
    search_drugs_in_db,
)
from os_client import client as os_client, INDEX_NAME as OS_INDEX_NAME


# ---------------------------------------------------------------------------
# Plain-text rendering (agent-friendly tool output)
#
# 工具结果以纯文本返回（而非 JSON），降低 agent 的解析负担与 token 噪音。
# CITATION enforcement 仍是工程化保证（不依赖 LLM 自觉）：
#   1. `_CITE_INSTRUCTION_TEXT` 注入每个含可引用源的结果头部 (LLM 第一眼)
#   2. 每条记录末尾一行 `CITATION:` 镜像 `_cite._tag` (LLM 直接复制, 不用 traverse)
#   3. `read_drug_chapter` 三明治包装 raw markdown (tag 物理紧贴章节文本)
# 命中 0 条时返回英文 no-results 话术, 且 **不含** CITATION 指令 —— 避免诱导
# agent 在无来源时编造引用。
# ---------------------------------------------------------------------------

_CITE_INSTRUCTION_TEXT = (
    "=== CITATION INSTRUCTIONS ===\n"
    "Each record below ends with a `CITATION:` line — a pre-built "
    "`<CITATION file=\"...\" filename=\"...\" />` tag the frontend PDF-highlight "
    "pipeline depends on. When you use a fact from a record, copy that record's "
    "`CITATION:` tag VERBATIM (byte-for-byte) immediately AFTER the paragraph or "
    "bullet that states the fact. NEVER collect citations at the end. At most ONE "
    "tag per unique file. Do NOT add, modify, reorder, remove attributes, or build "
    "a tag yourself. Records without a `CITATION:` line carry no clickable source — "
    "do NOT fabricate one. An answer that uses these facts but contains zero "
    "`<CITATION>` tags is a FAILED answer.\n"
    "=============================="
)


def _no_results(what: str) -> str:
    """English no-results message — intentionally omits CITATION instructions.

    Returned when a query matches 0 rows, so the agent tells the user nothing was
    found instead of being pushed to emit a citation for a non-existent source.
    """
    return (
        f"No matching {what} were found in the PMDA package-insert database.\n"
    )


def _fmt(value: Any) -> str:
    """Render a single field value as compact text (Decimal → number)."""
    if isinstance(value, Decimal):
        value = float(value)
    if isinstance(value, float) and value.is_integer():
        return str(int(value))
    return str(value)


def _tag_of(data: dict) -> Optional[str]:
    """Pull the pre-built ``<CITATION ... />`` tag out of a record."""
    return data.get("cite_emit") or (data.get("_cite") or {}).get("_tag")


def _render_records(
    records: Sequence[dict],
    *,
    what: str,
    header_title: str,
    field_specs: Sequence[Tuple[str, str]],
    title_key: Optional[str] = None,
    with_citation: bool = True,
) -> str:
    """Render a flat list of record dicts into agent-friendly plain text.

    Empty ``records`` → English no-results message (no CITATION instructions).
    Otherwise: optional citation-instruction header, a ``header_title`` line, then
    one block per record. ``field_specs`` is ``[(key, label), ...]`` controlling
    field order/display; empty values are skipped. ``title_key`` (if given) is the
    record's headline; each record's ``_citation`` text and CITATION tag are
    appended when present.
    """
    if not records:
        return _no_results(what)

    parts: List[str] = []
    if with_citation:
        parts.append(_CITE_INSTRUCTION_TEXT)
    parts.append(header_title)

    for idx, rec in enumerate(records, 1):
        title = _fmt(rec.get(title_key)) if title_key and rec.get(title_key) else ""
        lines = [f"[{idx}] {title}".rstrip()]
        for key, label in field_specs:
            value = rec.get(key)
            if value in (None, "", [], {}):
                continue
            lines.append(f"  {label}: {_fmt(value)}")
        if rec.get("_citation"):
            lines.append(f"  出典: {rec['_citation']}")
        if with_citation:
            tag = _tag_of(rec)
            if tag:
                lines.append(f"  CITATION: {tag}")
        parts.append("\n".join(lines))

    return "\n\n".join(parts)


def _render_categories(data: Sequence[dict]) -> str:
    """Render the L1/L2 category tree (navigation only — no citation source)."""
    if not data:
        return _no_results("categories")
    lines: List[str] = ["Drug categories:"]
    for l1 in data:
        lines.append(f"\n■ {l1.get('l1_code', '')} {l1.get('l1_name', '')}".rstrip())
        for l2 in l1.get("l2", []):
            lines.append(
                f"  - {l2.get('code', '')} {l2.get('name', '')} "
                f"({l2.get('drug_count', 0)} drugs)"
            )
    return "\n".join(lines)


def _render_drugs_in_category(data: dict) -> str:
    """Render generic → [brand] listing for one L2 category (navigation only)."""
    generics = data.get("generics") or []
    if not generics:
        return _no_results("drugs in this category")
    header = f"Category {data.get('l2_code', '')} {data.get('l2_name', '')}".rstrip()
    lines: List[str] = [header]
    for entry in generics:
        lines.append(f"\n● {entry.get('generic', '')}".rstrip())
        for drug in entry.get("drugs", []):
            if "_more" in drug:
                lines.append(f"  - {drug['_more']}")
            else:
                lines.append(
                    f"  - {drug.get('brand', '')} (yj_full={drug.get('yj_full', '')})"
                )
    if data.get("_more_generics"):
        lines.append(f"\n(+{data['_more_generics']} more generics)")
    return "\n".join(lines)


def _render_section_hits(
    *, keyword: str, section_filter: str, total: int, hits: Sequence[dict]
) -> str:
    """Render OpenSearch section-text hits with per-match snippets (carry tags)."""
    shown = len(hits)
    title = f'Found {total} drug(s) matching "{keyword}"'
    if section_filter:
        title += f' in sections like "{section_filter}"'
    title += f" (showing {shown}):"
    parts: List[str] = [_CITE_INSTRUCTION_TEXT, title]
    for idx, hit in enumerate(hits, 1):
        head = f"[{idx}] {hit.get('brand', '')} / {hit.get('generic', '')}".rstrip(" /")
        l2 = hit.get("l2", "")
        lines = [f"{head}  ({l2})" if l2 else head]
        lines.append(f"  yj_full: {hit.get('yj_full', '')}")
        for m in hit.get("matches", []):
            lines.append(f"  ▸ {m.get('section_title', '')}")
            snippet = (m.get("snippet") or "").strip()
            for sl in snippet.splitlines():
                lines.append(f"    {sl}")
        if hit.get("_citation_template"):
            lines.append(f"  出典テンプレ: {hit['_citation_template']}")
        tag = _tag_of(hit)
        if tag:
            lines.append(f"  CITATION: {tag}")
        parts.append("\n".join(lines))
    more = total - shown
    if more > 0:
        parts.append(f"(+{more} more drugs not shown)")
    return "\n\n".join(parts)


def _render_chapters(
    *, yj_full: str, brand: str, generic: str, sections: Sequence[dict]
) -> str:
    """Render the chapter index for one drug; each chapter carries its own tag."""
    has_cite = any(_tag_of(s) for s in sections)
    parts: List[str] = []
    if has_cite:
        parts.append(_CITE_INSTRUCTION_TEXT)
    parts.append(
        f"{brand} / {generic} (yj_full={yj_full}) — {len(sections)} section(s):".lstrip(
            " /"
        )
    )
    block: List[str] = []
    for s in sections:
        block.append(
            f"  - {s.get('section_title', '')} "
            f"(line {s.get('line_num', 0)}, {s.get('text_len', 0)} chars)"
        )
        tag = _tag_of(s)
        if tag:
            block.append(f"    CITATION: {tag}")
    parts.append("\n".join(block))
    return "\n\n".join(parts)


# ---------------------------------------------------------------------------
# 出典フォーマッタ（与 tools.py 一致）
# ---------------------------------------------------------------------------

_DRUG_LOOKUP: Optional[dict] = None
_VF_LOOKUP: Optional[dict] = None
_BRAND_BY_YJ_FULL: Optional[dict] = None


def _load_drug_lookup() -> dict:
    """yj_code → (brand_name, yj_full) 进程内缓存"""
    global _DRUG_LOOKUP
    if _DRUG_LOOKUP is None:
        with session() as conn, conn.cursor() as cur:
            cur.execute("SELECT yj_code, brand_name, yj_full FROM drug_master")
            _DRUG_LOOKUP = {
                row[0]: ((row[1] or ""), (row[2] or row[0]))
                for row in cur.fetchall()
            }
    return _DRUG_LOOKUP


def _load_brand_by_yj_full_lookup() -> dict:
    """yj_full → brand 表示名(多品名时取 "/" 分隔的第一段)。

    drug_master.brand_name 是多 brand 合并的字符串 (例
    "〔東洋〕半夏厚朴湯エキス細粒/〔松浦〕..."), <pmda_citation brand= /> 只用来
    给前端显示一个代表性的药品名,这里固定取第一段。
    """
    global _BRAND_BY_YJ_FULL
    if _BRAND_BY_YJ_FULL is None:
        with session() as conn, conn.cursor() as cur:
            cur.execute("SELECT yj_full, brand_name FROM drug_master")
            _BRAND_BY_YJ_FULL = {
                yj_full: ((brand or "").split("/", 1)[0].strip())
                for yj_full, brand in cur.fetchall()
                if yj_full
            }
    return _BRAND_BY_YJ_FULL


def _load_vf_lookup() -> dict:
    """yj_full → (vector_file_id, filename, section_to_page).

    Populated from ``pmda_drug_vf`` (written by gbase-onprem PmdaXmlPipeline).
    If the table is empty / not yet migrated, returns ``{}`` — citations then
    degrade to text-only (no ``<CITATION>`` tag emitted).
    """
    global _VF_LOOKUP
    if _VF_LOOKUP is None:
        out: dict = {}
        try:
            with session() as conn, conn.cursor() as cur:
                cur.execute(
                    "SELECT yj_full, vector_file_id, filename, section_to_page "
                    "FROM pmda_drug_vf"
                )
                for yj_full, vf_id, fname, s2p in cur.fetchall():
                    out[yj_full] = (str(vf_id), fname or "", s2p or {})
        except Exception:
            # Table not yet present — leave empty, downstream tools skip _cite.
            pass
        _VF_LOOKUP = out
    return _VF_LOOKUP


def _citation(drug_yj: str, section: Optional[str]) -> str:
    lk = _load_drug_lookup()
    brand, yj_full = lk.get(drug_yj, ("", drug_yj))
    chap = section or "（章不明）"
    return f"[出典: {brand} (yj_full={yj_full}) / {chap}]"


def _citation_tag(cite: dict) -> str:
    """Build the ``<CITATION file="..." filename="..." />`` string.

    精简版: **只输出 2 个属性 file + filename** — 减轻 LLM 负担 / 减少
    输出 token / 减少幻觉表面积。前端 PDF 高亮链路实际只用 file_id +
    text(段落正文),不依赖 page/yj_full/brand/section,所以 tag 里
    不再带这些(`_cite` 字典里仍保留, 给前端可选展示)。

    工程化预制, 让 LLM 直接照搬, 避免 LLM 自己拼字符串幻觉 file= 文件名。
    """
    from html import escape as _esc

    parts = []
    if cite.get("file_id"):
        parts.append(f'file="{_esc(str(cite["file_id"]), quote=True)}"')
    if cite.get("filename"):
        # 用 basename, 前端 chip 显示干净 — 完整 path 留在 _cite.filename
        bn = cite["filename"].rsplit("/", 1)[-1]
        parts.append(f'filename="{_esc(bn, quote=True)}"')
    return f"<CITATION {' '.join(parts)} />"


def _cite_struct_by_yj_full(yj_full: str, section: Optional[str]) -> Optional[dict]:
    """Build the ``_cite`` dict directly from a yj_full.

    返回 ``{file_id, filename, page, yj_full, brand, section?}`` — 复用通用
    ``<CITATION file="uuid" filename="name" page=N />`` 协议, 额外附加 PMDA
    专属属性 ``yj_full`` / ``brand`` / ``section``。

    核心属性 (通用 CITATION 协议):
      - ``file_id``   : VectorFile.id (uuid), 通用 /pdf/highlight 用这个定位 PDF
      - ``filename``  : VF 文件名, 通用 CITATION 展示用
      - ``page``      : PDF 页码 (0-based), 第一版固定 0 (后端 expand_pages 全文搜兜底)

    PMDA 额外属性 (前端可选读):
      - ``yj_full``   : 厚労省 YJ コード (含枝番), 跨 vf_uuid 稳定的唯一 id
      - ``brand``     : 表示用販売名 (drug_master.brand_name "/" 分隔的第一段)
      - ``section?``  : fact 表 source_section 完整字符串 (例 "10.1 併用禁忌")

    存在性验证 (硬要求, 缺一不返 _cite):
      - brand lookup (drug_master) 找不到 → None
      - vf_lookup (pmda_drug_vf) 找不到 → None (避免输出 <CITATION /> 空壳 tag)

    返 None 时 caller 不附 _cite, LLM 看到没 _cite 就不会 emit citation —
    比 emit 一个无 file/filename 属性的空标签好(前端解析空标签会渲染成
    broken chip)。
    """
    brand = _load_brand_by_yj_full_lookup().get(yj_full)
    if not brand:
        return None
    # 通用 CITATION 核心属性: file_id / filename 必须有, 否则不出 tag
    vf_info = _load_vf_lookup().get(yj_full)
    if not vf_info:
        return None
    vf_id, filename, _s2p = vf_info
    cite: dict = {
        "yj_full": yj_full,
        "brand": brand,
        "file_id": vf_id,
        "filename": filename,
        "page": 0,  # 第一版固定 page 0, 后端 expand_pages 全文搜
    }
    if section:
        cite["section"] = section
    # 工程化预制完整 tag 字符串, 让 LLM 只做复制粘贴, 不再自己拼
    cite["_tag"] = _citation_tag(cite)
    return cite


def _cite_struct(drug_yj: str, section: Optional[str]) -> Optional[dict]:
    """Return ``{file_id, filename, page, yj_full, brand, section?}`` for the ``<CITATION>`` tag.

    Returns ``None`` when drug_master has no row for this yj (skill can still
    emit the human ``[出典: ...]`` text).
    """
    drug_lk = _load_drug_lookup()
    _, yj_full = drug_lk.get(drug_yj, ("", drug_yj))
    return _cite_struct_by_yj_full(yj_full, section)


# ---------------------------------------------------------------------------
# Tool implementations (10 个)
# ---------------------------------------------------------------------------

def _tool_search_drugs(query: str, kind: str = "auto", limit: int = 10) -> str:
    rows = search_drugs_in_db(query, kind=kind, limit=limit)
    out = []
    for r in rows:
        entry: dict = {
            "yj_full": r.yj_full,
            "yj_code": r.yj_code,
            "brand": r.brand_name,
            "generic": r.generic_name,
            "category": f"{r.category_code} {r.category_name}".strip(),
            "score": r.score,
        }
        cite = _cite_struct_by_yj_full(r.yj_full, section=None)
        if cite is not None:
            entry["_cite"] = cite
            entry["cite_emit"] = cite["_tag"]  # top-level mirror for LLM
        out.append(entry)
    return _render_records(
        out,
        what="drugs",
        header_title=f"Found {len(out)} drug(s):",
        title_key="brand",
        field_specs=[
            ("generic", "generic"),
            ("yj_full", "yj_full"),
            ("yj_code", "yj_code"),
            ("category", "category"),
            ("score", "score"),
        ],
    )


def _tool_list_categories() -> str:
    return _render_categories(list_categories_with_counts())


def _tool_list_drugs_in_category(l2_code: str, limit_generics: int = 50) -> str:
    return _render_drugs_in_category(
        _sql_list_drugs_in_category(l2_code, limit_generics=limit_generics)
    )


def _tool_get_drug_master(yj_code: str) -> str:
    row = drug_master_get(yj_code)
    if row is None:
        return _no_results("drug master record")
    result = asdict(row)
    result["_citation"] = f"[出典: {row.brand_name} (yj_full={row.yj_full}) / 添付文書冒頭]"
    cite = _cite_struct(row.yj_code, section=None)
    if cite is not None:
        result["_cite"] = cite
        result["cite_emit"] = cite["_tag"]  # top-level mirror for LLM
    return _render_records(
        [result],
        what="drug master record",
        header_title="Drug master record:",
        title_key="brand_name",
        field_specs=[
            ("generic_name_jp", "generic"),
            ("yj_full", "yj_full"),
            ("yj_code", "yj_code"),
            ("category_code", "category_code"),
            ("category_name", "category_name"),
            ("regulation", "regulation"),
            ("manufacturer", "manufacturer"),
            ("revision_date", "revision_date"),
        ],
    )


def _tool_get_drug_interactions(
    drug_a_yj: Optional[str] = None,
    drug_b_yj: Optional[str] = None,
    severity: Optional[str] = None,
    keyword: Optional[str] = None,
    limit: int = 30,
) -> str:
    rows = drug_interaction_query(
        drug_a_yj=drug_a_yj,
        drug_b_yj=drug_b_yj,
        severity=severity,
        keyword=keyword,
        limit=limit,
    )
    out = []
    for r in rows:
        d = asdict(r)
        d["_citation"] = _citation(r.source_drug_yj, r.source_section)
        cite = _cite_struct(r.source_drug_yj, r.source_section)
        if cite is not None:
            d["_cite"] = cite
            d["cite_emit"] = cite["_tag"]  # top-level mirror for LLM
        out.append(d)
    return _render_records(
        out,
        what="drug interactions",
        header_title=f"Found {len(out)} drug interaction(s):",
        title_key="severity",
        field_specs=[
            ("drug_a_yj", "drug_a_yj"),
            ("drug_b_yj", "drug_b_yj"),
            ("drug_b_class", "drug_b_class"),
            ("mechanism", "mechanism"),
            ("clinical_effect", "clinical_effect"),
            ("source_section", "source_section"),
        ],
    )


def _tool_get_drug_restrictions(
    drug_yj: Optional[str] = None,
    condition_type: Optional[str] = None,
    severity: Optional[str] = None,
    keyword: Optional[str] = None,
    limit: int = 30,
) -> str:
    rows = drug_restriction_query(
        drug_yj=drug_yj,
        condition_type=condition_type,
        severity=severity,
        keyword=keyword,
        limit=limit,
    )
    out = []
    for r in rows:
        d = asdict(r)
        d["_citation"] = _citation(r.drug_yj, r.source_section)
        cite = _cite_struct(r.drug_yj, r.source_section)
        if cite is not None:
            d["_cite"] = cite
            d["cite_emit"] = cite["_tag"]  # top-level mirror for LLM
        out.append(d)
    return _render_records(
        out,
        what="drug restrictions",
        header_title=f"Found {len(out)} drug restriction(s):",
        title_key="condition_type",
        field_specs=[
            ("drug_yj", "drug_yj"),
            ("condition_text", "condition_text"),
            ("severity", "severity"),
            ("source_section", "source_section"),
        ],
    )


def _tool_get_drug_dosing(
    drug_yj: str,
    patient_segment: Optional[str] = None,
    limit: int = 20,
) -> str:
    rows = drug_dosing_query(
        drug_yj=drug_yj,
        patient_segment=patient_segment,
        limit=limit,
    )
    out = []
    for r in rows:
        d = asdict(r)
        # Merge amount + unit into one readable "dose" field for plain-text output.
        if r.dose_amount is not None:
            d["dose"] = f"{_fmt(r.dose_amount)}{r.dose_unit or ''}".strip()
        d["_citation"] = _citation(r.drug_yj, r.source_section)
        cite = _cite_struct(r.drug_yj, r.source_section)
        if cite is not None:
            d["_cite"] = cite
            d["cite_emit"] = cite["_tag"]  # top-level mirror for LLM
        out.append(d)
    return _render_records(
        out,
        what="dosing entries",
        header_title=f"Found {len(out)} dosing entr{'y' if len(out) == 1 else 'ies'}:",
        title_key="patient_segment",
        field_specs=[
            ("indication_code", "indication_code"),
            ("dose", "dose"),
            ("frequency", "frequency"),
            ("duration", "duration"),
            ("adjustment_text", "adjustment"),
            ("source_section", "source_section"),
        ],
    )


def _tool_search_section_text(
    keyword: str,
    section_filter: str = "",
    limit: int = 30,
) -> str:
    if not keyword.strip():
        return _no_results("sections")
    size = min(max(1, limit), 100)
    body: dict = {
        "size": size,
        "_source": ["yj_full", "brand_names", "generic_name",
                    "l2_code", "l2_name", "section_title", "line_num"],
        "query": {"bool": {"must": [{"match": {"text": keyword}}]}},
        "collapse": {
            "field": "yj_full",
            "inner_hits": {
                "name": "matches",
                "size": 2,
                "_source": ["section_title", "line_num"],
                "highlight": {"fields": {"text": {"fragment_size": 160, "number_of_fragments": 1}}},
            },
        },
        "aggs": {"total_drugs": {"cardinality": {"field": "yj_full"}}},
    }
    if section_filter:
        body["query"]["bool"]["filter"] = [
            {"wildcard": {"section_title.raw": f"*{section_filter}*"}}
        ]
    resp = os_client().search(index=OS_INDEX_NAME, body=body)
    total = int(resp["aggregations"]["total_drugs"]["value"])
    hits_out = []
    for h in resp["hits"]["hits"]:
        src = h.get("_source") or {}
        inner = h.get("inner_hits", {}).get("matches", {}).get("hits", {}).get("hits", [])
        brand = (src.get("brand_names") or [""])[0]
        yj_full = src.get("yj_full") or ""
        # Per-match snippet 自带对应 section 的 CITATION tag,
        # LLM 复制 snippet 时自动带对的 section 标签 (而不是 hit 顶层粗粒度 tag)。
        matches = []
        seen = set()
        for ih in inner:
            ih_src = ih.get("_source") or {}
            title = ih_src.get("section_title") or ""
            if title in seen:
                continue
            seen.add(title)
            hl = ih.get("highlight", {}).get("text", [""])
            snippet_text = hl[0] if hl else ""
            inner_cite = _cite_struct_by_yj_full(yj_full, title)
            inner_tag = (inner_cite or {}).get("_tag", "")
            matches.append({
                "section_title": title,
                "snippet": snippet_text + (f"\n{inner_tag}" if inner_tag else ""),
            })
        # Hit-level _cite per first-match section (legacy compatibility).
        cite_section = matches[0]["section_title"] if matches else None
        cite = _cite_struct_by_yj_full(yj_full, cite_section)
        hit_entry: dict = {
            "yj_full": yj_full,
            "brand": brand,
            "generic": src.get("generic_name") or "",
            "l2": f"{src.get('l2_code') or ''} {src.get('l2_name') or ''}".strip(),
            "matches": matches,
            "_citation_template": f"[出典: {brand} (yj_full={yj_full}) / <該当章>]",
        }
        if cite is not None:
            hit_entry["_cite"] = cite
            hit_entry["cite_emit"] = cite["_tag"]  # top-level mirror for LLM
        hits_out.append(hit_entry)
    if not hits_out:
        return _no_results("sections")
    return _render_section_hits(
        keyword=keyword,
        section_filter=section_filter,
        total=total,
        hits=hits_out,
    )


def _tool_list_drug_chapters(yj_full: str) -> str:
    """全章節 (section_title + line_num + text_len) for a yj_full（line_num 昇順）"""
    body = {
        "size": 200,
        "_source": ["section_title", "line_num", "brand_names", "generic_name", "text"],
        "query": {"term": {"yj_full": yj_full}},
        "sort": [{"line_num": "asc"}],
    }
    resp = os_client().search(index=OS_INDEX_NAME, body=body)
    hits = resp["hits"]["hits"]
    if not hits:
        return _no_results("chapters")

    head = hits[0].get("_source", {})
    brand = (head.get("brand_names") or [""])[0]
    generic = head.get("generic_name") or ""

    sections = []
    for h in hits:
        src = h.get("_source", {})
        section_title = src.get("section_title", "")
        entry: dict = {
            "section_title": section_title,
            "line_num": src.get("line_num", 0),
            "text_len": len(src.get("text", "")),
        }
        cite = _cite_struct_by_yj_full(yj_full, section_title)
        if cite is not None:
            entry["_cite"] = cite
            entry["cite_emit"] = cite["_tag"]  # top-level mirror for LLM
        sections.append(entry)

    return _render_chapters(
        yj_full=yj_full, brand=brand, generic=generic, sections=sections
    )


def _tool_read_drug_chapter(yj_full: str, section_title: str) -> str:
    """指定 (yj_full, section_title) の章節 markdown 全文（max 8000 字）。"""
    body = {
        "size": 1,
        "_source": ["text"],
        "query": {
            "bool": {
                "must": [
                    {"term": {"yj_full": yj_full}},
                    {"term": {"section_title.raw": section_title}},
                ]
            }
        },
    }
    resp = os_client().search(index=OS_INDEX_NAME, body=body)
    hits = resp["hits"]["hits"]
    if hits:
        src = hits[0].get("_source") or {}
        text = src.get("text", "")
        if text:
            # 三明治包装: header (CITATION reminder + tag) + body + footer (tag).
            # LLM 不读 body 就一定先读 header, 输出时复制段落自然带上 tag。
            # HTML comment 对 chat 渲染不可见, 但 LLM 在 sampling 时看得到。
            cite = _cite_struct_by_yj_full(yj_full, section_title)
            tag = (cite or {}).get("_tag", "")
            if tag:
                header = (
                    f"<!-- CITATION rule: copy the tag below verbatim into your answer -->\n"
                    f"{tag}\n\n"
                )
                footer = (
                    f"\n\n---\n"
                    f"If you use the content above in your answer, you MUST include this "
                    f"tag verbatim:\n"
                    f"{tag}\n"
                )
                return header + text[:8000] + footer
            return text[:8000]
    # Not a "no data" case but a parameter mismatch — keep the actionable hint.
    return (
        f'No section titled "{section_title}" exactly matches yj_full={yj_full}.\n'
        f"Hint: pass a sections[].section_title returned by list_drug_chapters "
        f"verbatim."
    )


# ---------------------------------------------------------------------------
# MCP dispatch
# ---------------------------------------------------------------------------

_TOOL_DISPATCH = {
    "search_drugs": lambda args: _tool_search_drugs(
        query=args.get("query", ""),
        kind=args.get("kind", "auto"),
        limit=args.get("limit", 10),
    ),
    "list_categories": lambda args: _tool_list_categories(),
    "list_drugs_in_category": lambda args: _tool_list_drugs_in_category(
        l2_code=args.get("l2_code", ""),
        limit_generics=args.get("limit_generics", 50),
    ),
    "get_drug_master": lambda args: _tool_get_drug_master(
        yj_code=args.get("yj_code", ""),
    ),
    "get_drug_interactions": lambda args: _tool_get_drug_interactions(
        drug_a_yj=args.get("drug_a_yj"),
        drug_b_yj=args.get("drug_b_yj"),
        severity=args.get("severity"),
        keyword=args.get("keyword"),
        limit=args.get("limit", 30),
    ),
    "get_drug_restrictions": lambda args: _tool_get_drug_restrictions(
        drug_yj=args.get("drug_yj"),
        condition_type=args.get("condition_type"),
        severity=args.get("severity"),
        keyword=args.get("keyword"),
        limit=args.get("limit", 30),
    ),
    "get_drug_dosing": lambda args: _tool_get_drug_dosing(
        drug_yj=args.get("drug_yj", ""),
        patient_segment=args.get("patient_segment"),
        limit=args.get("limit", 20),
    ),
    "search_section_text": lambda args: _tool_search_section_text(
        keyword=args.get("keyword", ""),
        section_filter=args.get("section_filter", ""),
        limit=args.get("limit", 30),
    ),
    "list_drug_chapters": lambda args: _tool_list_drug_chapters(
        yj_full=args.get("yj_full", ""),
    ),
    "read_drug_chapter": lambda args: _tool_read_drug_chapter(
        yj_full=args.get("yj_full", ""),
        section_title=args.get("section_title", ""),
    ),
}


async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]:
    try:
        method = request.get("method")
        params = request.get("params", {})
        request_id = request.get("id")

        if method == "initialize":
            return create_initialize_response(request_id, "pmda-drug-info")
        if method == "ping":
            return create_ping_response(request_id)
        if method == "tools/list":
            tools = load_tools_from_json("pmda_tools.json")
            return create_tools_list_response(request_id, tools)
        if method == "tools/call":
            tool_name = params.get("name")
            arguments = params.get("arguments", {})
            if tool_name not in _TOOL_DISPATCH:
                return create_error_response(request_id, -32601, f"Unknown tool: {tool_name}")
            try:
                result_text = _TOOL_DISPATCH[tool_name](arguments)
                return {
                    "jsonrpc": "2.0",
                    "id": request_id,
                    "result": {"content": [{"type": "text", "text": result_text}]},
                }
            except Exception as e:
                return {
                    "jsonrpc": "2.0",
                    "id": request_id,
                    "result": {"content": [{"type": "text", "text": f"Error: {type(e).__name__}: {e}"}]},
                }
        return create_error_response(request_id, -32601, f"Unknown method: {method}")
    except Exception as e:
        return create_error_response(request.get("id"), -32603, f"Internal error: {e}")


async def main():
    await handle_mcp_streaming(handle_request)


if __name__ == "__main__":
    asyncio.run(main())