LLM 只需处理极短的 "Questions sent to user." 而不是完整的问题 JSON，减少了 token 消耗和延迟

2026-05-18 11:37:23 +08:00 · 2026-05-18 11:37:23 +08:00 · 0bf7a87a0e
commit 0bf7a87a0e
parent 30389be119
2 changed files with 13 additions and 26 deletions
--- a/routes/chat.py
+++ b/routes/chat.py
@ -144,12 +144,11 @@ async def enhanced_generate_stream_response(
                        is_ui_resource = (
                            msg.text
                            and msg.text.lstrip().startswith('{"')
-                            and (
+                            and '"ui://' in msg.text
-                                ('"ui://' in msg.text and ('"text/html' in msg.text or '"text/uri-list' in msg.text))
+                            and ('"text/html' in msg.text or '"text/uri-list' in msg.text)
                                or '"__ask_user__"' in msg.text
                            )
                        )
-                        if config.tool_response or is_ui_resource:
+                        is_ask_user = msg.name == 'ask_user'
                        if config.tool_response or is_ui_resource or is_ask_user:
                            new_content = f"[{message_tag}] {msg.name}\n{msg.text}\n"
                    # Collect full content
--- a/skills/common/mcp-ui/ui_render_server.py
+++ b/skills/common/mcp-ui/ui_render_server.py
@ -21,32 +21,20 @@ from mcp_common import (
 )
-ASK_USER_MARKER = "__ask_user__"
+ASK_USER_RESPONSE = "Questions sent to user."
-def ask_user(questions: list) -> Dict[str, Any]:
+def ask_user() -> Dict[str, Any]:
-    """Create an ask_user response.
+    """Return a minimal fixed response for ask_user tool.
-    Args:
+    The actual questions/options are already in the TOOL_CALL arguments,
-        questions: List of dicts, each with "question", "options", and "multi_select".
+    so the frontend parses them directly from there. This response only
-
+    serves to acknowledge the tool call and minimize token usage in the
-    Returns a JSON structure with a marker so the backend can detect it
+    subsequent LLM inference round.
    and emit it as a special delta.ask_user event at the end of the stream.
    """
    normalized = []
    for q in questions:
        normalized.append({
            "question": q.get("question", ""),
            "options": q.get("options", []),
            "multi_select": q.get("multi_select", False),
        })
    payload = {
        "__type__": ASK_USER_MARKER,
        "questions": normalized,
    }
    return {
        "content": [
-            {"type": "text", "text": json.dumps(payload, ensure_ascii=False)}
+            {"type": "text", "text": ASK_USER_RESPONSE}
        ]
    }
@ -168,7 +156,7 @@ async def handle_request(request: Dict[str, Any]) -> Dict[str, Any]:
                        request_id, -32602, "Missing required parameter: questions"
                    )
-                result = ask_user(questions)
+                result = ask_user()
                return {"jsonrpc": "2.0", "id": request_id, "result": result}
            else: