diff --git a/prompt/preamble_prompt.md b/prompt/preamble_prompt.md new file mode 100644 index 0000000..955d6c9 --- /dev/null +++ b/prompt/preamble_prompt.md @@ -0,0 +1,41 @@ +You are an AI agent that is expected to generate a preamble message for the customer. + +The actual message will be sent later by a smarter agent. Your job is only to generate the right preamble in order to save time. + + +These are the preamble messages you can choose from. You must ONLY choose one of these: ### +{preamble_choices_text} +### + +Basically, the preamble is something very short that continues the interaction naturally, without committing to any later action or response. +We leave that later response to another agent. Make sure you understand this. + +Instructions: +- Note that some of the choices are more generic, and some are more specific to a particular scenario. +- If you're unsure what to choose --> prefer to go with a more generic, bland choice. This should be 80% of cases. + Examples of generic choices: "Hey there!", "Just a moment.", "Hello.", "Got it." +- If you see clear value in saying something more specific and nuanced --> then go with a more specific choice. This should be 20% or less of cases. + Examples of specific choices: "Let me check that for you.", "Sorry to hear that.", "Thanks for your patience." + + +Chat History: +{chat_history} + +User's Last Message: +{last_message} + +OUTPUT FORMAT: +You must now choose the preamble message. You must produce a JSON object with a single key, "preamble", holding the preamble message as a string, +EXACTLY as it is given (pay attention to subtleties like punctuation and copy your choice EXACTLY as it is given above).The content in JSON format needs to be wrapped in "```json" and "```". +```json +{ + "preamble": "Just a moment." +} +``` + +You will now be given the current state of the interaction to which you must generate the next preamble message. +{language} + + + + diff --git a/routes/chat.py b/routes/chat.py index 919b1f0..479df05 100644 --- a/routes/chat.py +++ b/routes/chat.py @@ -16,7 +16,7 @@ from utils.api_models import ChatRequestV2 from utils.fastapi_utils import ( process_messages, extract_block_from_system_prompt, format_messages_to_chat_history, create_project_directory, extract_api_key_from_auth, generate_v2_auth_token, fetch_bot_config, - call_guideline_llm, _get_optimal_batch_size, process_guideline_batch, get_content_from_messages + call_guideline_llm, _get_optimal_batch_size, process_guideline_batch, get_content_from_messages, call_preamble_llm, get_preamble_text, get_language_text ) router = APIRouter() @@ -56,7 +56,7 @@ def append_user_last_message(messages: list, content: str) -> bool: return messages -async def generate_stream_response(agent, messages, thought_list, tool_response: bool, model: str): +async def generate_stream_response(agent, messages, pre_message_list, tool_response: bool, model: str): """生成流式响应""" accumulated_content = "" @@ -64,8 +64,8 @@ async def generate_stream_response(agent, messages, thought_list, tool_response: chunk_id = 0 try: - if len(thought_list)>0: - accumulated_content = get_content_from_messages(thought_list, tool_response=tool_response) + if len(pre_message_list)>0: + accumulated_content = get_content_from_messages(pre_message_list, tool_response=tool_response) chunk_data = { "id": f"chatcmpl-thought", "object": "chat.completion.chunk", @@ -167,6 +167,13 @@ async def create_agent_and_generate_response( """创建agent并生成响应的公共逻辑""" if generate_cfg is None: generate_cfg = {} + pre_message_list = [] + query_text = get_user_last_message_content(messages) + chat_history = format_messages_to_chat_history(messages) + preamble_text = await call_preamble_llm(chat_history, query_text, get_preamble_text(language), language, model_name, api_key, model_server) + + if preamble_text != '': + pre_message_list.append({"role": "assistant","content": preamble_text}) # 1. 从system_prompt提取guideline和terms内容 system_prompt, guidelines_list, terms_list = extract_block_from_system_prompt(system_prompt) @@ -175,8 +182,6 @@ async def create_agent_and_generate_response( terms_analysis = "" if terms_list: logger.info(f"terms_list: {terms_list}") - # 从messages中提取用户的查询文本用于相似性检索 - query_text = get_user_last_message_content(messages) # 使用embedding进行terms处理 try: from embedding.embedding import process_terms_with_embedding @@ -231,9 +236,6 @@ async def create_agent_and_generate_response( logger.info(f"Processing {guidelines_count} guidelines in {len(batches)} batches with {batch_count} concurrent batches") - # 准备chat_history - chat_history = format_messages_to_chat_history(messages) - # 并发执行所有任务:guideline批次处理 + agent创建 tasks = [] @@ -313,32 +315,22 @@ async def create_agent_and_generate_response( user_identifier=user_identifier ) - if language: - # 在最后一条消息的末尾追加回复语言 - language_map = { - 'zh': '请用中文回复', - 'en': 'Please reply in English', - 'ja': '日本語で回答してください', - 'jp': '日本語で回答してください' - } - language_instruction = language_map.get(language.lower(), '') - if language_instruction: - messages = append_user_last_message(messages, f"\n\nlanguage:{language_instruction}") + messages = append_user_last_message(messages, f"\n\nlanguage:{get_language_text(language)}") + - thought_list = [] if guideline_analysis != '': - thought_list = [{"role": "assistant","reasoning_content": guideline_analysis}] + pre_message_list.append({"role": "assistant","reasoning_content": guideline_analysis}) # 根据stream参数决定返回流式还是非流式响应 if stream: return StreamingResponse( - generate_stream_response(agent, messages, thought_list, tool_response, model_name), + generate_stream_response(agent, messages, pre_message_list, tool_response, model_name), media_type="text/event-stream", headers={"Cache-Control": "no-cache", "Connection": "keep-alive"} ) else: # 非流式响应 agent_responses = agent.run_nonstream(messages) - final_responses = thought_list+agent_responses + final_responses = pre_message_list+agent_responses if final_responses and len(final_responses) > 0: # 使用 get_content_from_messages 处理响应,支持 tool_response 参数 content = get_content_from_messages(final_responses, tool_response=tool_response) diff --git a/utils/fastapi_utils.py b/utils/fastapi_utils.py index 19a95cf..e7f7e53 100644 --- a/utils/fastapi_utils.py +++ b/utils/fastapi_utils.py @@ -371,7 +371,7 @@ async def fetch_bot_config(bot_id: str) -> Dict[str, Any]: ) -def _sync_call_guideline_llm(llm_config, messages) -> str: +def _sync_call_llm(llm_config, messages) -> str: """同步调用LLM的辅助函数,在线程池中执行""" llm_instance = TextChatAtOAI(llm_config) try: @@ -397,6 +397,132 @@ def _sync_call_guideline_llm(llm_config, messages) -> str: logger.error(f"Error calling guideline LLM: {e}") return "" +def get_language_text(language: str): + if language == "jp": + language = "ja" + language_map = { + 'zh': '请用中文回复', + 'en': 'Please reply in English', + 'ja': '日本語で回答してください', + } + return language_map.get(language.lower(), '') + +def get_preamble_text(language: str): + if language == "jp": + language = "ja" + preamble_choices_map = { + 'zh': [ + "好的,让我来帮您看看。", + "明白了,请稍等。", + "好的,我理解了。", + "没问题,我来处理。", + "收到,正在为您查询。", + "了解,让我想想。", + "好的,我来帮您解答。", + "明白了,稍等片刻。", + "好的,正在处理中。", + "了解了,让我为您分析。" + ], + 'en': [ + "Just a moment.", + "Got it.", + "Let me check that for you.", + "Sorry to hear that.", + "Thanks for your patience.", + "I understand.", + "Let me help you with that.", + "Please wait a moment.", + "I'll look into that for you.", + "Gotcha, let me see.", + "Understood, one moment please.", + "I'll help you with this.", + "Let me figure that out.", + "Thanks for waiting.", + "I'll check on that." + ], + 'ja': [ + "少々お待ちください。", + "承知いたしました。", + "わかりました。", + "確認いたします。", + "少々お時間をください。", + "了解しました。", + "調べてみますね。", + "お待たせしました。", + "対応いたします。", + "わかりましたね。", + "承知しました。", + "確認させてください。", + "少々お待ちいただけますか。", + "お調べいたします。", + "対応いたしますね。" + ] + }; + return "\n".join(preamble_choices_map.get(language.lower(), [])) + + +async def call_preamble_llm(chat_history: str, last_message: str, preamble_choices_text: str, language: str, model_name: str, api_key: str, model_server: str) -> str: + """调用大语言模型处理guideline分析 + + Args: + chat_history: 聊天历史记录 + guidelines_text: 指导原则文本 + model_name: 模型名称 + api_key: API密钥 + model_server: 模型服务器地址 + + Returns: + str: 模型响应结果 + """ + # 读取guideline提示词模板 + try: + with open('./prompt/preamble_prompt.md', 'r', encoding='utf-8') as f: + preamble_template = f.read() + except Exception as e: + logger.error(f"Error reading guideline prompt template: {e}") + return "" + + # 替换模板中的占位符 + system_prompt = preamble_template.replace('{preamble_choices_text}', preamble_choices_text).replace('{chat_history}', chat_history).replace('{last_message}', last_message).replace('{language}', get_language_text(language)) + # 配置LLM + llm_config = { + 'model': model_name, + 'api_key': api_key, + 'model_server': model_server, # 使用传入的model_server参数 + } + + # 调用模型 + messages = [{'role': 'user', 'content': system_prompt}] + + try: + # 使用信号量控制并发API调用数量 + async with api_semaphore: + # 使用线程池执行同步HTTP调用,避免阻塞事件循环 + loop = asyncio.get_event_loop() + response = await loop.run_in_executor(thread_pool, _sync_call_llm, llm_config, messages) + + # 从响应中提取 ```json 和 ``` 包裹的内容 + json_pattern = r'```json\s*\n(.*?)\n```' + json_matches = re.findall(json_pattern, response, re.DOTALL) + + if json_matches: + try: + # 解析第一个找到的JSON对象 + json_data = json.loads(json_matches[0]) + logger.info(f"Successfully processed preamble") + return json_data["preamble"] # 返回解析后的preamble + except json.JSONDecodeError as e: + logger.error(f"Error parsing JSON from preamble analysis: {e}") + return "" + else: + logger.warning(f"No JSON format found in preamble analysis") + return "" + + except Exception as e: + logger.error(f"Error calling guideline LLM: {e}") + return "" + + async def call_guideline_llm(chat_history: str, guidelines_text: str, terms:str, model_name: str, api_key: str, model_server: str) -> str: """调用大语言模型处理guideline分析 @@ -437,7 +563,7 @@ async def call_guideline_llm(chat_history: str, guidelines_text: str, terms:str, async with api_semaphore: # 使用线程池执行同步HTTP调用,避免阻塞事件循环 loop = asyncio.get_event_loop() - response = await loop.run_in_executor(thread_pool, _sync_call_guideline_llm, llm_config, messages) + response = await loop.run_in_executor(thread_pool, _sync_call_llm, llm_config, messages) return response except Exception as e: