65 lines
1.9 KiB
Python
65 lines
1.9 KiB
Python
"""Drug-category taxonomy loader.
|
|
|
|
Reads `pmda/drug_category.md` (the cleaned-up nested list with codes) and
|
|
produces a `{l2_code: L2}` dict for joining with `DocMeta.l2_code` (first 3
|
|
chars of the YJ code).
|
|
|
|
Source markdown shape:
|
|
|
|
- 11 中枢神経系用薬
|
|
- 111 全身麻酔剤
|
|
- 112 催眠鎮静剤,抗不安剤
|
|
- 12 末梢神経用薬
|
|
- 121 局所麻酔剤
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
|
|
# Top-level: `- {2-digit} {name}`
|
|
_L1_RE = re.compile(r"^- (\d{2})\s+(.+)$")
|
|
# Nested: ` - {3-digit} {name}` (indent 2 spaces)
|
|
_L2_RE = re.compile(r"^ {2}- (\d{3})\s+(.+)$")
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class L2:
|
|
code: str # "111"
|
|
name: str # "全身麻酔剤"
|
|
l1_code: str # "11"
|
|
l1_name: str # "中枢神経系用薬"
|
|
|
|
|
|
def load_taxonomy(path: Path | str = "pmda/drug_category.md") -> dict[str, L2]:
|
|
out: dict[str, L2] = {}
|
|
current_l1_code = ""
|
|
current_l1_name = ""
|
|
for line in Path(path).read_text(encoding="utf-8").splitlines():
|
|
m1 = _L1_RE.match(line)
|
|
if m1:
|
|
current_l1_code, current_l1_name = m1.group(1), m1.group(2).strip()
|
|
continue
|
|
m2 = _L2_RE.match(line)
|
|
if m2:
|
|
code = m2.group(1)
|
|
name = m2.group(2).strip()
|
|
if not current_l1_code:
|
|
raise ValueError(f"L2 row {code} appears before any L1 in {path}")
|
|
out[code] = L2(code=code, name=name, l1_code=current_l1_code, l1_name=current_l1_name)
|
|
return out
|
|
|
|
|
|
def lookup(taxonomy: dict[str, L2], l2_code: str) -> L2 | None:
|
|
"""Return the L2 entry, or None if the YJ prefix isn't in the taxonomy."""
|
|
return taxonomy.get(l2_code)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
t = load_taxonomy()
|
|
print(f"Loaded {len(t)} L2 categories")
|
|
for code in ("111", "214", "421", "999"):
|
|
v = t.get(code)
|
|
print(f" {code} → {v}")
|