preamble

2025-11-27 23:51:03 +08:00 · 2025-11-27 23:51:03 +08:00 · 95577c07a8
commit 95577c07a8
parent 3973174c83
3 changed files with 185 additions and 26 deletions
--- a/prompt/preamble_prompt.md
+++ b/prompt/preamble_prompt.md
@ -0,0 +1,41 @@
 You are an AI agent that is expected to generate a preamble message for the customer.
 The actual message will be sent later by a smarter agent. Your job is only to generate the right preamble in order to save time.
 These are the preamble messages you can choose from. You must ONLY choose one of these: ###
 {preamble_choices_text}
 ###
 Basically, the preamble is something very short that continues the interaction naturally, without committing to any later action or response.
 We leave that later response to another agent. Make sure you understand this.
 Instructions:
 - Note that some of the choices are more generic, and some are more specific to a particular scenario.
 - If you're unsure what to choose --> prefer to go with a more generic, bland choice. This should be 80% of cases.
  Examples of generic choices: "Hey there!", "Just a moment.", "Hello.", "Got it."
 - If you see clear value in saying something more specific and nuanced --> then go with a more specific choice. This should be 20% or less of cases.
  Examples of specific choices: "Let me check that for you.", "Sorry to hear that.", "Thanks for your patience."
 Chat History:
 {chat_history}
 User's Last Message:
 {last_message}
 OUTPUT FORMAT:
 You must now choose the preamble message. You must produce a JSON object with a single key, "preamble", holding the preamble message as a string,
 EXACTLY as it is given (pay attention to subtleties like punctuation and copy your choice EXACTLY as it is given above).The content in JSON format needs to be wrapped in "```json" and "```".
 ```json
 {
  "preamble": "Just a moment."
 }
 ```
 You will now be given the current state of the interaction to which you must generate the next preamble message.
 {language}
--- a/routes/chat.py
+++ b/routes/chat.py
@ -16,7 +16,7 @@ from utils.api_models import ChatRequestV2
 from utils.fastapi_utils import (
    process_messages, extract_block_from_system_prompt, format_messages_to_chat_history,
    create_project_directory, extract_api_key_from_auth, generate_v2_auth_token, fetch_bot_config,
-    call_guideline_llm, _get_optimal_batch_size, process_guideline_batch, get_content_from_messages
+    call_guideline_llm, _get_optimal_batch_size, process_guideline_batch, get_content_from_messages, call_preamble_llm, get_preamble_text, get_language_text
 )
 router = APIRouter()
@ -56,7 +56,7 @@ def append_user_last_message(messages: list, content: str) -> bool:
    return messages
-async def generate_stream_response(agent, messages, thought_list, tool_response: bool, model: str):
+async def generate_stream_response(agent, messages, pre_message_list, tool_response: bool, model: str):
    """生成流式响应"""
    accumulated_content = ""
@ -64,8 +64,8 @@ async def generate_stream_response(agent, messages, thought_list, tool_response:
    chunk_id = 0
    try:
-        if len(thought_list)>0:
+        if len(pre_message_list)>0:
-            accumulated_content = get_content_from_messages(thought_list, tool_response=tool_response)
+            accumulated_content = get_content_from_messages(pre_message_list, tool_response=tool_response)
            chunk_data = {
                "id": f"chatcmpl-thought",
                "object": "chat.completion.chunk",
@ -167,6 +167,13 @@ async def create_agent_and_generate_response(
    """创建agent并生成响应的公共逻辑"""
    if generate_cfg is None:
        generate_cfg = {}
    pre_message_list = []
    query_text = get_user_last_message_content(messages)
    chat_history = format_messages_to_chat_history(messages)
    preamble_text = await call_preamble_llm(chat_history, query_text, get_preamble_text(language), language, model_name, api_key, model_server)
    if preamble_text != '':
        pre_message_list.append({"role": "assistant","content": preamble_text})
    # 1. 从system_prompt提取guideline和terms内容
    system_prompt, guidelines_list, terms_list = extract_block_from_system_prompt(system_prompt)
@ -175,8 +182,6 @@ async def create_agent_and_generate_response(
    terms_analysis = ""
    if terms_list:
        logger.info(f"terms_list: {terms_list}")
        # 从messages中提取用户的查询文本用于相似性检索
        query_text = get_user_last_message_content(messages)
        # 使用embedding进行terms处理
        try:
            from embedding.embedding import process_terms_with_embedding
@ -231,9 +236,6 @@ async def create_agent_and_generate_response(
            logger.info(f"Processing {guidelines_count} guidelines in {len(batches)} batches with {batch_count} concurrent batches")
            # 准备chat_history
            chat_history = format_messages_to_chat_history(messages)
            # 并发执行所有任务：guideline批次处理 + agent创建
            tasks = []
@ -313,32 +315,22 @@ async def create_agent_and_generate_response(
            user_identifier=user_identifier
        )
-    if language:
+    messages = append_user_last_message(messages, f"\n\nlanguage:{get_language_text(language)}")
-        # 在最后一条消息的末尾追加回复语言
+
        language_map = {
            'zh': '请用中文回复',
            'en': 'Please reply in English',
            'ja': '日本語で回答してください',
            'jp': '日本語で回答してください'
        }
        language_instruction = language_map.get(language.lower(), '')
        if language_instruction:
            messages = append_user_last_message(messages, f"\n\nlanguage:{language_instruction}")
    thought_list = []
    if guideline_analysis != '':
-        thought_list = [{"role": "assistant","reasoning_content": guideline_analysis}]
+        pre_message_list.append({"role": "assistant","reasoning_content": guideline_analysis})
    # 根据stream参数决定返回流式还是非流式响应
    if stream:
        return StreamingResponse(
-            generate_stream_response(agent, messages, thought_list, tool_response, model_name),
+            generate_stream_response(agent, messages, pre_message_list, tool_response, model_name),
            media_type="text/event-stream",
            headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}
        )
    else:
        # 非流式响应
        agent_responses = agent.run_nonstream(messages)
-        final_responses = thought_list+agent_responses
+        final_responses = pre_message_list+agent_responses
        if final_responses and len(final_responses) > 0:
            # 使用 get_content_from_messages 处理响应，支持 tool_response 参数
            content = get_content_from_messages(final_responses, tool_response=tool_response)
--- a/utils/fastapi_utils.py
+++ b/utils/fastapi_utils.py
@ -371,7 +371,7 @@ async def fetch_bot_config(bot_id: str) -> Dict[str, Any]:
        )
-def _sync_call_guideline_llm(llm_config, messages) -> str:
+def _sync_call_llm(llm_config, messages) -> str:
    """同步调用LLM的辅助函数，在线程池中执行"""
    llm_instance = TextChatAtOAI(llm_config)
    try:
@ -397,6 +397,132 @@ def _sync_call_guideline_llm(llm_config, messages) -> str:
        logger.error(f"Error calling guideline LLM: {e}")
        return ""
 def get_language_text(language: str):
    if language == "jp":
        language = "ja"
    language_map = {
        'zh': '请用中文回复',
        'en': 'Please reply in English',
        'ja': '日本語で回答してください',
    }
    return language_map.get(language.lower(), '')
 def get_preamble_text(language: str):
    if language == "jp":
        language = "ja"
    preamble_choices_map = {
        'zh': [
            "好的，让我来帮您看看。",
            "明白了，请稍等。",
            "好的，我理解了。",
            "没问题，我来处理。",
            "收到，正在为您查询。",
            "了解，让我想想。",
            "好的，我来帮您解答。",
            "明白了，稍等片刻。",
            "好的，正在处理中。",
            "了解了，让我为您分析。"
        ],
        'en': [
            "Just a moment.",
            "Got it.",
            "Let me check that for you.",
            "Sorry to hear that.",
            "Thanks for your patience.",
            "I understand.",
            "Let me help you with that.",
            "Please wait a moment.",
            "I'll look into that for you.",
            "Gotcha, let me see.",
            "Understood, one moment please.",
            "I'll help you with this.",
            "Let me figure that out.",
            "Thanks for waiting.",
            "I'll check on that."
        ],
        'ja': [
            "少々お待ちください。",
            "承知いたしました。",
            "わかりました。",
            "確認いたします。",
            "少々お時間をください。",
            "了解しました。",
            "調べてみますね。",
            "お待たせしました。",
            "対応いたします。",
            "わかりましたね。",
            "承知しました。",
            "確認させてください。",
            "少々お待ちいただけますか。",
            "お調べいたします。",
            "対応いたしますね。"
        ]
    };
    return "\n".join(preamble_choices_map.get(language.lower(), []))
 async def call_preamble_llm(chat_history: str, last_message: str, preamble_choices_text: str, language: str, model_name: str, api_key: str, model_server: str) -> str:
    """调用大语言模型处理guideline分析
    Args:
        chat_history: 聊天历史记录
        guidelines_text: 指导原则文本
        model_name: 模型名称
        api_key: API密钥
        model_server: 模型服务器地址
    Returns:
        str: 模型响应结果
    """
    # 读取guideline提示词模板
    try:
        with open('./prompt/preamble_prompt.md', 'r', encoding='utf-8') as f:
            preamble_template = f.read()
    except Exception as e:
        logger.error(f"Error reading guideline prompt template: {e}")
        return ""
    # 替换模板中的占位符
    system_prompt = preamble_template.replace('{preamble_choices_text}', preamble_choices_text).replace('{chat_history}', chat_history).replace('{last_message}', last_message).replace('{language}', get_language_text(language))
    # 配置LLM
    llm_config = {
        'model': model_name,
        'api_key': api_key,
        'model_server': model_server,  # 使用传入的model_server参数
    }
    # 调用模型
    messages = [{'role': 'user', 'content': system_prompt}]
    try:
        # 使用信号量控制并发API调用数量
        async with api_semaphore:
            # 使用线程池执行同步HTTP调用，避免阻塞事件循环
            loop = asyncio.get_event_loop()
            response = await loop.run_in_executor(thread_pool, _sync_call_llm, llm_config, messages)
            # 从响应中提取 ```json 和 ``` 包裹的内容
            json_pattern = r'```json\s*\n(.*?)\n```'
            json_matches = re.findall(json_pattern, response, re.DOTALL)
            if json_matches:
                try:
                    # 解析第一个找到的JSON对象
                    json_data = json.loads(json_matches[0])
                    logger.info(f"Successfully processed preamble")
                    return json_data["preamble"]  # 返回解析后的preamble
                except json.JSONDecodeError as e:
                    logger.error(f"Error parsing JSON from preamble analysis: {e}")
                    return ""
            else:
                logger.warning(f"No JSON format found in preamble analysis")
            return ""
    except Exception as e:
        logger.error(f"Error calling guideline LLM: {e}")
        return ""
 async def call_guideline_llm(chat_history: str, guidelines_text: str, terms:str, model_name: str, api_key: str, model_server: str) -> str:
    """调用大语言模型处理guideline分析
@ -437,7 +563,7 @@ async def call_guideline_llm(chat_history: str, guidelines_text: str, terms:str,
        async with api_semaphore:
            # 使用线程池执行同步HTTP调用，避免阻塞事件循环
            loop = asyncio.get_event_loop()
-            response = await loop.run_in_executor(thread_pool, _sync_call_guideline_llm, llm_config, messages)
+            response = await loop.run_in_executor(thread_pool, _sync_call_llm, llm_config, messages)
            return response
    except Exception as e: