From d45079ca5538e8c3d1f44d33e50ad3efb4fb49f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9C=B1=E6=BD=AE?= <zhuchaowe@users.noreply.github.com>
Date: Thu, 8 Jan 2026 22:56:43 +0800
Subject: [PATCH] =?UTF-8?q?feat:=20=E5=B0=86system=5Fprompt=E8=A7=A3?=
 =?UTF-8?q?=E6=9E=90=E4=BB=8Emarkdown=E4=BB=A3=E7=A0=81=E5=9D=97=E6=94=B9?=
 =?UTF-8?q?=E4=B8=BAXML=E6=A0=87=E7=AD=BE=E6=A0=BC=E5=BC=8F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- agent_config.py: enable_thinking判断从 ```guideline 改为 <guidelines>
- fastapi_utils.py:
  - preamble解析从 ```preamble``` 改为 <preamble>
  - guidelines/tools/scenarios/terms 块解析从 markdown 格式改为 XML 标签格式
  - 移除不再使用的 parse_guidelines_text 函数

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 agent/agent_config.py  |   7 ++-
 utils/fastapi_utils.py | 102 +++++++++++++----------------------------
 2 files changed, 38 insertions(+), 71 deletions(-)
diff --git a/agent/agent_config.py b/agent/agent_config.py
index 4549f28..b3d2758 100644
--- a/agent/agent_config.py
+++ b/agent/agent_config.py
@@ -81,7 +81,7 @@ class AgentConfig:
             messages = []
 
         preamble_text, system_prompt = get_preamble_text(request.language, request.system_prompt)
-        enable_thinking = request.enable_thinking and "```guideline" in request.system_prompt
+        enable_thinking = request.enable_thinking and "<guidelines>" in request.system_prompt
 
         config = cls(
             bot_id=request.bot_id,
@@ -119,7 +119,10 @@ class AgentConfig:
             messages = []
         language = request.language or bot_config.get("language", "zh")
         preamble_text, system_prompt = get_preamble_text(language, bot_config.get("system_prompt"))
-        enable_thinking = request.enable_thinking and "```guideline" in bot_config.get("system_prompt")
+        robot_type = bot_config.get("robot_type", "general_agent")
+        if robot_type == "catalog_agent":
+            robot_type = "deep_agent"
+        enable_thinking = request.enable_thinking and "<guidelines>" in bot_config.get("system_prompt")
 
         config = cls(
             bot_id=request.bot_id,
diff --git a/utils/fastapi_utils.py b/utils/fastapi_utils.py
index e11e066..23ade5d 100644
--- a/utils/fastapi_utils.py
+++ b/utils/fastapi_utils.py
@@ -493,15 +493,15 @@ def get_language_text(language: str):
     return language_map.get(language.lower(), '')
 
 def get_preamble_text(language: str, system_prompt: str):
-    # 首先检查system_prompt中是否有preamble代码块
+    # 首先检查system_prompt中是否有preamble标签
     if system_prompt:
-        preamble_pattern = r'```preamble\s*\n(.*?)\n```'
+        preamble_pattern = r'<preamble>\s*(.*?)\s*</preamble>'
         preamble_matches = re.findall(preamble_pattern, system_prompt, re.DOTALL)
         if preamble_matches:
             # 提取preamble内容
             preamble_content = preamble_matches[0].strip()
             if preamble_content:
-                # 从system_prompt中删除preamble代码块
+                # 从system_prompt中删除preamble标签
                 cleaned_system_prompt = re.sub(preamble_pattern, '', system_prompt, flags=re.DOTALL)
                 return preamble_content, cleaned_system_prompt
 
@@ -697,27 +697,40 @@ def extract_block_from_system_prompt(system_prompt: str) -> tuple[str, str, str,
 
     terms_list = []
 
-    # 首先分割所有的代码块
-    block_pattern = r'```(\w+)\s*\n(.*?)\n```'
+    # 使用XML标签格式解析块
     blocks_to_remove = []
 
-    for match in re.finditer(block_pattern, system_prompt, re.DOTALL):
-        block_type, content = match.groups()
+    # 解析 <guidelines>
+    guidelines_pattern = r'<guidelines>\s*(.*?)\s*</guidelines>'
+    match = re.search(guidelines_pattern, system_prompt, re.DOTALL)
+    if match:
+        guidelines = match.group(1).strip()
+        blocks_to_remove.append(match.group(0))
 
-        if block_type == 'guideline' or block_type == 'guidelines':
-            guidelines = content.strip()
+    # 解析 <tools>
+    tools_pattern = r'<tools>\s*(.*?)\s*</tools>'
+    match = re.search(tools_pattern, system_prompt, re.DOTALL)
+    if match:
+        tools = match.group(1).strip()
+        blocks_to_remove.append(match.group(0))
+
+    # 解析 <scenarios>
+    scenarios_pattern = r'<scenarios>\s*(.*?)\s*</scenarios>'
+    match = re.search(scenarios_pattern, system_prompt, re.DOTALL)
+    if match:
+        scenarios = match.group(1).strip()
+        blocks_to_remove.append(match.group(0))
+
+    # 解析 <terms>
+    terms_pattern = r'<terms>\s*(.*?)\s*</terms>'
+    match = re.search(terms_pattern, system_prompt, re.DOTALL)
+    if match:
+        try:
+            terms = parse_terms_text(match.group(1).strip())
+            terms_list.extend(terms)
             blocks_to_remove.append(match.group(0))
-        elif block_type == 'tools':
-            tools = content.strip()
-        elif block_type == 'scenarios':
-            scenarios = content.strip()
-        elif block_type == 'terms':
-            try:
-                terms = parse_terms_text(content.strip())
-                terms_list.extend(terms)
-                blocks_to_remove.append(match.group(0))
-            except Exception as e:
-                logger.error(f"Error parsing terms: {e}")
+        except Exception as e:
+            logger.error(f"Error parsing terms: {e}")
 
     # 从system_prompt中移除这些已解析的块
     cleaned_prompt = system_prompt
@@ -729,55 +742,6 @@ def extract_block_from_system_prompt(system_prompt: str) -> tuple[str, str, str,
     return cleaned_prompt, guidelines, tools, scenarios, terms_list
 
 
-def parse_guidelines_text(text: str) -> List[Dict[str, Any]]:
-    """
-    解析guidelines文本，支持多种格式
-
-    Args:
-        text: guidelines文本内容
-
-    Returns:
-        List[Dict]: guidelines列表
-    """
-    guidelines = []
-
-    # 尝试解析JSON格式
-    if text.strip().startswith('[') or text.strip().startswith('{'):
-        try:
-            data = json.loads(text)
-            if isinstance(data, list):
-                for item in data:
-                    if isinstance(item, dict):
-                        guidelines.append(item)
-            elif isinstance(data, dict):
-                guidelines.append(data)
-            return guidelines
-        except json.JSONDecodeError:
-            pass
-
-    # 解析行格式，支持多种分隔符
-    lines = [line.strip() for line in text.split('\n') if line.strip()]
-
-    for line in lines:
-        # 跳过注释行
-        if line.startswith('#') or line.startswith('//'):
-            continue
-
-        # 尝试解析 "id) Condition: ... Action: ..." 格式
-        id_condition_action_pattern = r'(\d+)\)\s*Condition:\s*(.*?)\s*Action:\s*(.*?)(?:\s*Priority:\s*(\d+))?$'
-        match = re.match(id_condition_action_pattern, line, re.IGNORECASE)
-        if match:
-            guidelines.append({
-                'guideline_id': int(match.group(1)),
-                'condition': match.group(2).strip(),
-                'action': match.group(3).strip(),
-                'priority': int(match.group(4)) if match.group(4) else 1
-            })
-            continue
-
-    return guidelines
-
-
 def parse_terms_text(text: str) -> List[Dict[str, Any]]:
     """
     解析terms文本，支持多种格式