diff --git a/prompt/preamble_prompt.md b/prompt/preamble_prompt.md index 9957500..4dfbc99 100644 --- a/prompt/preamble_prompt.md +++ b/prompt/preamble_prompt.md @@ -4,24 +4,25 @@ The actual message will be sent later by a smarter agent. Your job is only to ge ## Scenario Detection Logic -FIRST, determine if this is a SIMPLE CHAT scenario or COMPLEX scenario: - -**Simple Chat Scenarios (NO preamble needed - output ""):** -- Basic greetings: "hi", "hello", "你好", "在吗", "早上好/晚上好" -- Simple acknowledgments: "thanks", "ok", "好的", "谢谢" -- Small talk: "how are you", "天气怎么样", "最近怎么样" -- Simple farewells: "bye", "goodbye", "再见" +FIRST, determine if this is a COMPLEX scenario that requires a preamble: **Complex Scenarios (preamble needed):** - Query scenarios: User is asking for information, searching, or looking up data -- Action scenarios: User wants to perform an operation, execute a task +- Action scenarios: User wants to perform an operation, execute a task - Knowledge retrieval scenarios: User needs to search knowledge base, documents, or databases - Problem-solving: User is reporting issues, asking for help with problems - Complex requests: Multi-step tasks, detailed instructions needed +**ALL OTHER scenarios (output ""):** +- Simple greetings: "hi", "hello", "你好", "在吗", "早上好/晚上好" +- Simple acknowledgments: "thanks", "ok", "好的", "谢谢" +- Small talk: "how are you", "天气怎么样", "最近怎么样" +- Simple farewells: "bye", "goodbye", "再见" +- Any other scenarios not explicitly listed as complex + ## Preamble Selection -IF this is a COMPLEX scenario, choose from these preamble messages. You must ONLY choose one of these: ### +ONLY IF this is a COMPLEX scenario, choose from these preamble messages. You must ONLY choose one of these: ### {preamble_choices_text} ### @@ -29,13 +30,13 @@ Basically, the preamble is something very short that continues the interaction n We leave that later response to another agent. Make sure you understand this. Instructions: -- For SIMPLE CHAT scenarios: Always output preamble: "" - For COMPLEX scenarios: - Note that some of the choices are more generic, and some are more specific to a particular scenario. - If you're unsure what to choose --> prefer to go with a more generic, bland choice. This should be 80% of cases. Examples of generic choices: "Hey there!", "Just a moment.", "Hello.", "Got it." - If you see clear value in saying something more specific and nuanced --> then go with a more specific choice. This should be 20% or less of cases. Examples of specific choices: "Let me check that for you.", "Sorry to hear that.", "Thanks for your patience." +- For ALL OTHER scenarios: Always output preamble: "" Chat History: @@ -48,7 +49,7 @@ OUTPUT FORMAT: You must now choose the preamble message. You must produce a JSON object with a single key, "preamble", holding the preamble message as a string, EXACTLY as it is given (pay attention to subtleties like punctuation and copy your choice EXACTLY as it is given above).The content in JSON format needs to be wrapped in "```json" and "```". -For SIMPLE CHAT scenarios: +For ALL OTHER scenarios: ```json { "preamble": "" diff --git a/routes/chat.py b/routes/chat.py index bc901ee..e243900 100644 --- a/routes/chat.py +++ b/routes/chat.py @@ -311,45 +311,66 @@ async def enhanced_generate_stream_response( robot_type: str, project_dir: Optional[str], generate_cfg: Optional[dict], - user_identifier: Optional[str], - pre_message_list: Optional[list] + user_identifier: Optional[str] ): """增强的渐进式流式响应生成器""" try: - # 第一阶段:立即传输preamble_text - if pre_message_list: - chunk_data = { - "id": f"chatcmpl-preamble", - "object": "chat.completion.chunk", - "created": int(__import__('time').time()), - "model": model_name, - "choices": [{ - "index": 0, - "delta": { - "content": get_content_from_messages(pre_message_list, tool_response=tool_response) - }, - "finish_reason": None - }] - } - yield f"data: {json.dumps(chunk_data, ensure_ascii=False)}\n\n" + # 第一阶段:并行启动preamble_text生成和第二阶段处理 + query_text = get_user_last_message_content(messages) + chat_history = format_messages_to_chat_history(messages) - # 第二阶段:使用公共函数处理guideline分析和agent创建 - agent, _, guideline_analysis, _ = await process_guidelines_and_terms( - bot_id=bot_id, - api_key=api_key, - model_name=model_name, - model_server=model_server, - system_prompt=system_prompt, - messages=messages, - agent_manager=agent_manager, - project_dir=project_dir, - generate_cfg=generate_cfg, - language=language, - mcp_settings=mcp_settings, - robot_type=robot_type, - user_identifier=user_identifier + # 创建preamble_text生成任务 + preamble_task = asyncio.create_task( + call_preamble_llm(chat_history, query_text, get_preamble_text(language), language, model_name, api_key, model_server) ) + # 创建guideline分析和agent创建任务 + guidelines_task = asyncio.create_task( + process_guidelines_and_terms( + bot_id=bot_id, + api_key=api_key, + model_name=model_name, + model_server=model_server, + system_prompt=system_prompt, + messages=messages, + agent_manager=agent_manager, + project_dir=project_dir, + generate_cfg=generate_cfg, + language=language, + mcp_settings=mcp_settings, + robot_type=robot_type, + user_identifier=user_identifier + ) + ) + + # 等待preamble_text任务完成 + try: + preamble_text = await preamble_task + # 只有当preamble_text不为空且不为""时才输出 + if preamble_text and preamble_text.strip() and preamble_text != "": + chunk_data = { + "id": f"chatcmpl-preamble", + "object": "chat.completion.chunk", + "created": int(__import__('time').time()), + "model": model_name, + "choices": [{ + "index": 0, + "delta": { + "content": get_content_from_messages([{"role": "assistant","content": preamble_text + "\n"}], tool_response=tool_response) + }, + "finish_reason": None + }] + } + yield f"data: {json.dumps(chunk_data, ensure_ascii=False)}\n\n" + logger.info(f"Stream mode: Generated preamble text ({len(preamble_text)} chars)") + else: + logger.info("Stream mode: Skipped empty preamble text") + except Exception as e: + logger.error(f"Error generating preamble text: {e}") + + # 等待guideline分析任务完成 + agent, _, guideline_analysis, _ = await guidelines_task + # 立即发送guideline_analysis if guideline_analysis: chunk_data = { @@ -360,7 +381,7 @@ async def enhanced_generate_stream_response( "choices": [{ "index": 0, "delta": { - "content": get_content_from_messages([{"role": "assistant","reasoning_content": guideline_analysis}], tool_response=tool_response) + "content": get_content_from_messages([{"role": "assistant","reasoning_content": guideline_analysis+ "\n"}], tool_response=tool_response) }, "finish_reason": None }] @@ -462,21 +483,6 @@ async def create_agent_and_generate_response( if generate_cfg is None: generate_cfg = {} - pre_message_list = [] - # 只在stream=True时生成preamble_text - preamble_text = "" - if stream: - query_text = get_user_last_message_content(messages) - chat_history = format_messages_to_chat_history(messages) - preamble_text = await call_preamble_llm(chat_history, query_text, get_preamble_text(language), language, model_name, api_key, model_server) - # 只有当preamble_text不为空且不为""时才添加到消息列表 - if preamble_text and preamble_text.strip() and preamble_text != "": - pre_message_list.append({"role": "assistant","content": preamble_text+"\n"}) - logger.info(f"Stream mode: Generated preamble text ({len(preamble_text)} chars)") - else: - logger.info("Stream mode: Skipped empty preamble text") - - # 如果是流式模式,使用增强的流式响应生成器 if stream: return StreamingResponse( @@ -494,8 +500,7 @@ async def create_agent_and_generate_response( robot_type=robot_type, project_dir=project_dir, generate_cfg=generate_cfg, - user_identifier=user_identifier, - pre_message_list=pre_message_list + user_identifier=user_identifier ), media_type="text/event-stream", headers={"Cache-Control": "no-cache", "Connection": "keep-alive"} @@ -526,7 +531,7 @@ async def create_agent_and_generate_response( if guideline_analysis: final_messages = append_user_last_message(final_messages, f"\n\nActive Guidelines:\n{guideline_analysis}\nPlease follow these guidelines in your response.") - pre_message_list.append({"role": "assistant","reasoning_content": guideline_analysis}) + pre_message_list.append({"role": "assistant","reasoning_content": guideline_analysis+ "\n"}) # 非流式响应 agent_responses = agent.run_nonstream(final_messages)