qwen_agent/agent/subagent_loader.py

"""Sub-agent loader for discovering and parsing sub-agent definitions from skill directories.

Sub-agents are defined as markdown files with YAML frontmatter in skill directories:
    projects/robot/{bot_id}/skills/{skill_name}/agents/*.md

Each file has the format:
    ---
    name: code-reviewer
    description: Reviews code for quality and security issues.
    tools: rag_retrieve, table_rag_retrieve
    ---

    System prompt for the sub-agent...
"""

import logging
import os
import re
from pathlib import Path
from typing import Optional

import yaml
from deepagents.middleware.subagents import SubAgent
from langchain.tools import BaseTool
from langchain_core.language_models import BaseChatModel

from agent.plugin_hook_loader import _get_skill_dirs

logger = logging.getLogger('app')

# Regex to extract YAML frontmatter and body from markdown files
_FRONTMATTER_RE = re.compile(r"^---\s*\n(.*?)\n---\s*\n?(.*)$", re.DOTALL)


def _parse_agent_md(file_path: Path) -> Optional[dict]:
    """Parse a sub-agent markdown file with YAML frontmatter.

    Args:
        file_path: Path to the .md file.

    Returns:
        Dict with keys: name, description, system_prompt, tool_names (list[str] | None).
        None if parsing fails.
    """
    try:
        content = file_path.read_text(encoding="utf-8")
    except OSError as e:
        logger.warning(f"Failed to read sub-agent file {file_path}: {e}")
        return None

    match = _FRONTMATTER_RE.match(content)
    if not match:
        logger.warning(f"Sub-agent file {file_path} has no valid frontmatter")
        return None

    frontmatter_str, body = match.group(1), match.group(2)

    try:
        frontmatter = yaml.safe_load(frontmatter_str)
    except yaml.YAMLError as e:
        logger.warning(f"Invalid YAML in sub-agent file {file_path}: {e}")
        return None

    if not isinstance(frontmatter, dict):
        logger.warning(f"Frontmatter in {file_path} is not a dict")
        return None

    name = frontmatter.get("name", "").strip() if isinstance(frontmatter.get("name"), str) else ""
    description = frontmatter.get("description", "").strip() if isinstance(frontmatter.get("description"), str) else ""

    if not name:
        logger.warning(f"Sub-agent file {file_path} missing required 'name' field")
        return None
    if not description:
        logger.warning(f"Sub-agent file {file_path} missing required 'description' field")
        return None

    # Parse optional tools field: comma-separated tool names
    tool_names = None
    tools_field = frontmatter.get("tools")
    if tools_field is not None:
        if isinstance(tools_field, str):
            tool_names = [t.strip() for t in tools_field.split(",") if t.strip()]
        elif isinstance(tools_field, list):
            tool_names = [str(t).strip() for t in tools_field if str(t).strip()]
        else:
            logger.warning(f"Invalid 'tools' field in {file_path}, expected string or list")

    return {
        "name": name,
        "description": description,
        "system_prompt": body.strip(),
        "tool_names": tool_names,
        "source": str(file_path),
    }


def _filter_tools_by_names(all_tools: list[BaseTool], tool_names: list[str]) -> list[BaseTool]:
    """Filter MCP tools by name whitelist.

    Args:
        all_tools: All available MCP tools.
        tool_names: Whitelist of tool names to include.

    Returns:
        Filtered list of tools. Logs warning for names not found.
    """
    tool_lookup = {tool.name: tool for tool in all_tools}
    filtered = []
    for name in tool_names:
        if name in tool_lookup:
            filtered.append(tool_lookup[name])
        else:
            available = list(tool_lookup.keys())
            logger.warning(f"Sub-agent tool '{name}' not found in MCP tools. Available: {available}")
    return filtered


async def load_subagents(
    bot_id: str,
    tools: list[BaseTool],
    model: BaseChatModel,
) -> list[SubAgent]:
    """Load sub-agent definitions from skill directories.

    Scans all skill directories for the given bot_id, looking for agents/*.md files
    in each skill subdirectory.

    Args:
        bot_id: Bot identifier for locating skill directories.
        tools: All available MCP tools for filtering.
        model: The main agent's model, used by each sub-agent.

    Returns:
        List of SubAgent dicts. Empty list if no sub-agents found.
    """
    skill_dirs = _get_skill_dirs(bot_id)
    parsed_agents: dict[str, dict] = {}  # name -> parsed dict (last-wins for dedup)

    for skill_dir in skill_dirs:
        if not os.path.exists(skill_dir):
            continue

        for skill_name in os.listdir(skill_dir):
            skill_path = os.path.join(skill_dir, skill_name)
            if not os.path.isdir(skill_path):
                continue

            agents_dir = Path(skill_path) / "agents"
            if not agents_dir.exists():
                continue

            for md_file in agents_dir.glob("*.md"):
                parsed = _parse_agent_md(md_file)
                if parsed is None:
                    continue

                name = parsed["name"]
                if name in parsed_agents:
                    logger.warning(
                        f"Duplicate sub-agent name '{name}': "
                        f"{parsed_agents[name]['source']} overridden by {parsed['source']}"
                    )
                parsed_agents[name] = parsed

    if not parsed_agents:
        return []

    # Build SubAgent dicts with model and filtered tools
    subagents: list[SubAgent] = []
    for name, parsed in parsed_agents.items():
        # Filter tools: if tool_names specified, filter; otherwise inherit all
        if parsed["tool_names"] is not None:
            filtered_tools = _filter_tools_by_names(tools, parsed["tool_names"])
        else:
            filtered_tools = list(tools)

        subagent: SubAgent = {
            "name": name,
            "description": parsed["description"],
            "system_prompt": parsed["system_prompt"],
            "model": model,
            "tools": filtered_tools,
        }
        subagents.append(subagent)
        logger.info(f"Loaded sub-agent '{name}' with {len(filtered_tools)} tools from {parsed['source']}")

    return subagents