From 5e26d88d183286b329d6bc61733ed895df9bad52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9C=B1=E6=BD=AE?= <zhuchaowe@users.noreply.github.com>
Date: Fri, 28 Nov 2025 14:26:37 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E6=8E=A8=E7=90=86=E9=80=BB?=
 =?UTF-8?q?=E8=BE=91,=E6=8F=90=E5=8D=87=E6=8E=A8=E7=90=86=EF=BD=89?=
 =?UTF-8?q?=E9=80=9F=E5=BA=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 prompt/guideline_prompt.md | 26 ++++--------
 prompt/preamble_prompt.md  | 81 ++++++++++++++++++++------------------
 routes/chat.py             | 76 ++++++++++++++++++++++++++---------
 utils/fastapi_utils.py     | 48 +---------------------
 4 files changed, 108 insertions(+), 123 deletions(-)

diff --git a/prompt/guideline_prompt.md b/prompt/guideline_prompt.md
index b4117f8..2990a11 100644
--- a/prompt/guideline_prompt.md
+++ b/prompt/guideline_prompt.md
@@ -31,9 +31,7 @@ Examples of Guideline Match Evaluations:
     "checks": [
       {
         "guideline_id": "<example-id-for-few-shots--do-not-use-this-output>",
-        "condition": "The customer asks for logistical or legal requirements.",
-        "rationale": "The customer now asked about visas and documents which are legal requirements",
-        "applies": true
+        "rationale": "The customer now asked about visas and documents which are legal requirements"
       }
     ]
   }
@@ -58,15 +56,11 @@ Examples of Guideline Match Evaluations:
     "checks": [
       {
         "guideline_id": "<example-id-for-few-shots--do-not-use-this-output>",
-        "condition": "The customer mentions a constraint that related to commitment to the course",
-        "rationale": "In the most recent message the customer mentions that they work full time which is a constraint",
-        "applies": true
+        "rationale": "In the most recent message the customer mentions that they work full time which is a constraint"
       },
       {
         "guideline_id": "<example-id-for-few-shots--do-not-use-this-output>",
-        "condition": "The user expresses hesitation or self-doubt.",
-        "rationale": "In the most recent message the user still sounds hesitating about their fit to the course",
-        "applies": true
+        "rationale": "In the most recent message the user still sounds hesitating about their fit to the course"
       }
     ]
   }
@@ -89,9 +83,7 @@ Examples of Guideline Match Evaluations:
     "checks": [
       {
         "guideline_id": "<example-id-for-few-shots--do-not-use-this-in-output>",
-        "condition": "When the user is having a problem with login.",
-        "rationale": "In the most recent message the customer is still pursuing their login problem, making the mail access problem a sub-issue rather than a new topic",
-        "applies": true
+        "rationale": "In the most recent message the customer is still pursuing their login problem, making the mail access problem a sub-issue rather than a new topic"
       }
     ]
   }
@@ -111,9 +103,7 @@ Examples of Guideline Match Evaluations:
     "checks": [
       {
         "guideline_id": "<example-id-for-few-shots--do-not-use-this-in-output>",
-        "condition": "When the customer asks about how to return an item.",
-        "rationale": "In the most recent message the customer asks about what happens when they wore the item, which an inquiry regarding returning an item",
-        "applies": true
+        "rationale": "In the most recent message the customer asks about what happens when they wore the item, which an inquiry regarding returning an item"
       }
     ]
   }
@@ -130,15 +120,13 @@ Guidelines List:
 
 OUTPUT FORMAT:
 The content in JSON format needs to be wrapped in "```json" and "```".
-Only include guidelines that actually apply (applies: true). Do not include guidelines that don't match.
+Only include guidelines that actually apply.
 ```json
 {
   "checks": [
       {
           "guideline_id": "1",
-          "condition": "Specific condition description",
-          "rationale": "<Explain why the conditions are met and what action should be taken>",
-          "applies": true
+          "rationale": "<Explain why the conditions are met and what action should be taken>"
       }
   ]
 }
diff --git a/prompt/preamble_prompt.md b/prompt/preamble_prompt.md
index 1258222..df1ee01 100644
--- a/prompt/preamble_prompt.md
+++ b/prompt/preamble_prompt.md
@@ -1,42 +1,48 @@
-You are an AI agent that is expected to generate a preamble message for the customer.
+You are a friendly AI assistant that generates natural preamble responses to acknowledge user messages before passing them to a more capable agent for detailed processing.
 
-The actual message will be sent later by a smarter agent. Your job is only to generate the right preamble in order to save time.
+## Scenario Analysis
 
-## Scenario Detection Logic
+Analyze the user's message to determine the appropriate response type:
 
-FIRST, determine if this is a COMPLEX scenario that requires a preamble:
+**Use a friendly preamble for:**
+- Questions, information requests, or searches
+- Task execution or action requests
+- Knowledge retrieval from documents, databases, or web
+- Problem-solving or troubleshooting requests
+- Complex multi-step instructions
+- Technical assistance needs
 
-**Complex Scenarios (preamble needed):**
-- Query scenarios: User is asking for information, searching, or looking up data
-- Action scenarios: User wants to perform an operation, execute a task  
-- Knowledge retrieval scenarios: User needs to search knowledge base, documents、databases or Internet
-- Problem-solving: User is reporting issues, asking for help with problems
-- Complex requests: Multi-step tasks, detailed instructions needed
+**Return "<empty>" for:**
+- Simple greetings and farewells
+- Basic acknowledgments (thanks, ok, etc.)
+- Casual small talk
+- Very brief or non-substantive messages
 
-**ALL OTHER scenarios (output "<empty>"):**
-- Simple greetings: "hi", "hello", "你好", "在吗", "早上好/晚上好"
-- Simple acknowledgments: "thanks", "ok", "好的", "谢谢"
-- Small talk: "how are you", "天气怎么样", "最近怎么样"
-- Simple farewells: "bye", "goodbye", "再见"
-- Any other scenarios not explicitly listed as complex
+## Response Guidelines
 
-## Preamble Selection
+Generate a warm, natural preamble that:
+- Acknowledges the user's message positively
+- Shows you're ready to help
+- Creates a friendly interaction flow
+- Doesn't commit to specific answers (that's for the main agent)
 
-ONLY IF this is a COMPLEX scenario, choose from these preamble messages. You must ONLY choose one of these: ###
+**Reference these examples for inspiration:**
 {preamble_choices_text}
-###
 
-Basically, the preamble is something very short that continues the interaction naturally, without committing to any later action or response.
-We leave that later response to another agent. Make sure you understand this.
+**Your approach:**
+- Match the tone to the user's message (formal/professional or casual/friendly)
+- Use contextual awareness - consider what the user is asking about
+- Keep it brief and welcoming
+- Feel free to create natural variations beyond the examples
+- In Chinese conversations, use appropriate conversational phrases
+- Be helpful and encouraging
 
-Instructions:
-- For COMPLEX scenarios:
-  - Note that some of the choices are more generic, and some are more specific to a particular scenario.
-  - If you're unsure what to choose --> prefer to go with a more generic, bland choice. This should be 80% of cases.
-    Examples of generic choices: "Hey there!", "Just a moment.", "Hello.", "Got it."
-  - If you see clear value in saying something more specific and nuanced --> then go with a more specific choice. This should be 20% or less of cases.
-    Examples of specific choices: "Let me check that for you.", "Sorry to hear that.", "Thanks for your patience."
-- For ALL OTHER scenarios: Always output preamble: "<empty>"
+**Examples of good preambles:**
+- "I'd be happy to help you with that!"
+- "Let me look into that for you."
+- "Thanks for reaching out - I'll assist you with this."
+- "That's an interesting question! Let me help you find the answer."
+- "I understand what you need. Let me get that information for you."
 
 
 Chat History:
@@ -46,26 +52,25 @@ User's Last Message:
 {last_message}
 
 OUTPUT FORMAT:
-You must now choose the preamble message. You must produce a JSON object with a single key, "preamble", holding the preamble message as a string,
-EXACTLY as it is given (pay attention to subtleties like punctuation and copy your choice EXACTLY as it is given above).The content in JSON format needs to be wrapped in "```json" and "```".
+Generate a JSON response with your preamble message:
 
-For ALL OTHER scenarios:
+For simple interactions (greetings, basic acknowledgments, small talk):
 ```json
 {
   "preamble": "<empty>"
 }
 ```
 
-For COMPLEX scenarios:
+For substantive requests and questions:
 ```json
 {
-  "preamble": "Just a moment."
+  "preamble": "Your friendly, contextual preamble here"
 }
 ```
 
-You will now be given the current state of the interaction to which you must generate the next preamble message.
+Remember to wrap your entire response in ```json ... ``` tags.
+
+Preamble LANGAGE:
 {language}
 
-
-
-
+You will now be given the current state of the interaction to which you must generate the next preamble message.
diff --git a/routes/chat.py b/routes/chat.py
index 2b13eb6..37a2050 100644
--- a/routes/chat.py
+++ b/routes/chat.py
@@ -77,7 +77,7 @@ async def process_guidelines_and_terms(
     公共函数：处理guideline分析和terms处理，返回agent和analysis结果
 
     Returns:
-        tuple: (agent, processed_system_prompt, guideline_analysis, terms_analysis)
+        tuple: (agent, processed_system_prompt, guideline_reasoning, terms_analysis)
     """
     # 提取system_prompt中的guideline和terms
     processed_system_prompt, guidelines_list, terms_list = extract_block_from_system_prompt(system_prompt)
@@ -108,7 +108,8 @@ async def process_guidelines_and_terms(
             logger.error(f"Error removing terms cache file: {e}")
 
     # 处理guidelines
-    guideline_analysis = ""
+    guideline_reasoning = ""
+    active_guidelines = ""
     agent = None
 
     if guidelines_list:
@@ -125,7 +126,7 @@ async def process_guidelines_and_terms(
             batch_guidelines = guidelines_list[i:i + guidelines_per_batch]
             batch_strings = []
             for guideline in batch_guidelines:
-                guideline_str = f"{guideline['id']}) Condition: {guideline['condition']} Action: {guideline['action']}"
+                guideline_str = f"{guideline['guideline_id']}) Condition: {guideline['condition']} Action: {guideline['action']}"
                 batch_strings.append(guideline_str)
             batches.append(batch_strings)
 
@@ -179,14 +180,50 @@ async def process_guidelines_and_terms(
                 logger.error(f"Guideline batch {i} failed: {result}")
                 continue
             if result and isinstance(result, dict) and 'checks' in result:
-                applicable_checks = [check for check in result['checks'] if check.get('applies') is True]
+                applicable_checks = [check for check in result['checks']]
                 all_checks.extend(applicable_checks)
             elif result and isinstance(result, str) and result.strip():
                 logger.info(f"Non-JSON result from batch {i}: {result}")
 
         if all_checks:
-            guideline_analysis = "\n".join([item["condition"]+":"+item["rationale"] for item in all_checks])
-            logger.info(f"Guideline analysis completed: {len(guideline_analysis)} chars")
+            # 首先创建guideline_id到action字段的映射，同时支持string和int类型的guideline_id
+            guideline_map = {}
+            for guideline in guidelines_list:
+                guideline_id = guideline.get('guideline_id')
+                if guideline_id is not None:
+                    # 同时存储字符串和整数类型的键，确保匹配成功
+                    guideline_map[str(guideline_id)] = guideline
+
+            # 补全all_checks中缺失的action字段
+            completed_checks = []
+            for check in all_checks:
+                completed_check = check.copy()
+                guideline_id = check.get('guideline_id')
+
+                # 如果action字段缺失或为空，从guidelines_list中获取
+                if str(guideline_id) in guideline_map:
+                    completed_check['action'] = guideline_map[str(guideline_id)].get('action', "")
+                    completed_check['condition'] = guideline_map[str(guideline_id)].get('condition', "")
+
+                completed_checks.append(completed_check)
+
+            guideline_reasoning = "\n".join([item["rationale"] for item in completed_checks])
+
+            # 使用补全后的checks生成active_guidelines，添加安全检查
+            active_guidelines_parts = []
+            for item in completed_checks:
+                if 'action' in item and item['action']:
+                    active_guidelines_parts.append(
+                        "Condition:" + item["condition"] + "\nRationale:" + item["rationale"] + "\nAction:" + item["action"]
+                    )
+                else:
+                    # 如果仍然缺少action字段，使用默认值
+                    active_guidelines_parts.append(
+                        "Condition:" + item["condition"] + "\nRationale:" + item["rationale"] + "\nAction:无具体操作要求"
+                    )
+            active_guidelines = "\n".join(active_guidelines_parts)
+
+            logger.info(f"Guideline analysis completed: {len(guideline_reasoning)} chars, processed {len(completed_checks)} checks")
 
     else:
         # 没有guidelines，直接创建agent
@@ -204,7 +241,7 @@ async def process_guidelines_and_terms(
             user_identifier=user_identifier
         )
 
-    return agent, processed_system_prompt, guideline_analysis, terms_analysis
+    return agent, processed_system_prompt, guideline_reasoning, active_guidelines
 
 
 async def enhanced_generate_stream_response(
@@ -269,11 +306,11 @@ async def enhanced_generate_stream_response(
             logger.error(f"Error generating preamble text: {e}")
 
         # 等待guideline分析任务完成
-        agent, _, guideline_analysis, _ = await guidelines_task
+        agent, system_prompt, guideline_reasoning, active_guidelines = await guidelines_task
 
-        # 立即发送guideline_analysis
-        if guideline_analysis:
-            guideline_content = get_content_from_messages([{"role": "assistant","reasoning_content": guideline_analysis+ "\n"}], tool_response=tool_response)
+        # 立即发送guideline_reasoning
+        if guideline_reasoning:
+            guideline_content = get_content_from_messages([{"role": "assistant","reasoning_content": guideline_reasoning+ "\n"}], tool_response=tool_response)
             chunk_data = create_stream_chunk(f"chatcmpl-guideline", model_name, guideline_content)
             yield f"data: {json.dumps(chunk_data, ensure_ascii=False)}\n\n"
 
@@ -281,8 +318,8 @@ async def enhanced_generate_stream_response(
         final_messages = messages.copy()
         final_messages = append_user_last_message(final_messages, f"\n\nlanguage:{get_language_text(language)}")
 
-        if guideline_analysis:
-            final_messages = append_user_last_message(final_messages, f"\n\nActive Guidelines:\n{guideline_analysis}\nPlease follow these guidelines in your response.")
+        if active_guidelines:
+            final_messages = append_user_last_message(final_messages, f"\n\nActive Guidelines:\n{active_guidelines}\nPlease follow these guidelines in your response.")
 
         # 第三阶段：agent响应流式传输
         logger.info(f"Starting agent stream response")
@@ -373,14 +410,14 @@ async def create_agent_and_generate_response(
         )
 
 
-
+    _, system_prompt = get_preamble_text(language, system_prompt)
     # 使用公共函数处理所有逻辑
-    agent, _, guideline_analysis, _ = await process_guidelines_and_terms(
+    agent, system_prompt, guideline_reasoning, active_guidelines = await process_guidelines_and_terms(
         bot_id=bot_id,
         api_key=api_key,
         model_name=model_name,
         model_server=model_server,
-        system_prompt=system_prompt or "",
+        system_prompt=system_prompt,
         messages=messages,
         agent_manager=agent_manager,
         project_dir=project_dir,
@@ -395,9 +432,10 @@ async def create_agent_and_generate_response(
     final_messages = messages.copy()
     final_messages = append_user_last_message(final_messages, f"\n\nlanguage:{get_language_text(language)}")
     pre_message_list = []
-    if guideline_analysis:
-        final_messages = append_user_last_message(final_messages, f"\n\nActive Guidelines:\n{guideline_analysis}\nPlease follow these guidelines in your response.")
-        pre_message_list.append({"role": "assistant","reasoning_content": guideline_analysis+ "\n"})
+    if active_guidelines:
+        final_messages = append_user_last_message(final_messages, f"\n\nActive Guidelines:\n{active_guidelines}\nPlease follow these guidelines in your response.")
+    if guideline_reasoning:
+        pre_message_list.append({"role": "assistant","reasoning_content": guideline_reasoning+ "\n"})
 
     # 非流式响应
     agent_responses = agent.run_nonstream(final_messages)
diff --git a/utils/fastapi_utils.py b/utils/fastapi_utils.py
index fd9455b..dd372e6 100644
--- a/utils/fastapi_utils.py
+++ b/utils/fastapi_utils.py
@@ -749,45 +749,13 @@ def parse_guidelines_text(text: str) -> List[Dict[str, Any]]:
         match = re.match(id_condition_action_pattern, line, re.IGNORECASE)
         if match:
             guidelines.append({
-                'id': int(match.group(1)),
+                'guideline_id': int(match.group(1)),
                 'condition': match.group(2).strip(),
                 'action': match.group(3).strip(),
                 'priority': int(match.group(4)) if match.group(4) else 1
             })
             continue
 
-        # 尝试解析 "condition -> action" 格式
-        arrow_pattern = r'(?:\d+\)\s*)?(.*?)\s*->\s*(.*?)(?:\s*\[(\d+)\])?$'
-        match = re.match(arrow_pattern, line, re.IGNORECASE)
-        if match:
-            guidelines.append({
-                'id': len(guidelines) + 1,
-                'condition': match.group(1).strip(),
-                'action': match.group(2).strip(),
-                'priority': int(match.group(3)) if match.group(3) else 1
-            })
-            continue
-
-        # 尝试解析 "if condition then action" 格式
-        if_then_pattern = r'(?:\d+\)\s*)?if\s+(.*?)\s+then\s+(.*?)(?:\s*\[(\d+)\])?$'
-        match = re.match(if_then_pattern, line, re.IGNORECASE)
-        if match:
-            guidelines.append({
-                'id': len(guidelines) + 1,
-                'condition': match.group(1).strip(),
-                'action': match.group(2).strip(),
-                'priority': int(match.group(3)) if match.group(3) else 1
-            })
-            continue
-
-        # 默认格式：整行作为action，condition为空
-        guidelines.append({
-            'id': len(guidelines) + 1,
-            'condition': '',
-            'action': line.strip(),
-            'priority': 1
-        })
-
     return guidelines
 
 
@@ -848,20 +816,6 @@ def parse_terms_text(text: str) -> List[Dict[str, Any]]:
             current_term = term_data
             continue
 
-        # 尝试解析 "| value" 格式（简化格式）
-        if line.startswith('|'):
-            parts = [p.strip() for p in line[1:].split('|', 2)]  # 最多分割3部分
-            if len(parts) >= 1:
-                if current_term:
-                    terms.append(current_term)
-                current_term = {'name': parts[0]}
-                if len(parts) >= 2:
-                    current_term['description'] = parts[1]
-                if len(parts) >= 3:
-                    synonyms = re.split(r'[,;|]', parts[2])
-                    current_term['synonyms'] = [s.strip() for s in synonyms if s.strip()]
-            continue
-
     # 添加最后一个term
     if current_term:
         terms.append(current_term)