From f711fcec236a16f7e1144c084334417638d0c8d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E6=BD=AE?= Date: Tue, 21 Oct 2025 12:35:24 +0800 Subject: [PATCH] modify assistant --- modified_assistant.py | 71 +------------------------------------------ 1 file changed, 1 insertion(+), 70 deletions(-) diff --git a/modified_assistant.py b/modified_assistant.py index 78410ca..cc784a6 100644 --- a/modified_assistant.py +++ b/modified_assistant.py @@ -128,77 +128,8 @@ class ModifiedAssistant(Assistant): used_any_tool = True # 如果使用了工具,继续循环 - if used_any_tool: - continue - # 如果没有使用工具,无调用次数,已经执行了2次以上循环,退出循环 - if num_llm_calls_available ==0 or total_num_llm_calls_available - num_llm_calls_available >=2: + if not used_any_tool: break - # 如果没有使用工具,还有调用次数,并只执行了1次,就需要调用模型判断回答是否完整(修复部分case不执行工具调用就停止的问题) - if num_llm_calls_available > 0: - # 构建判断消息 - 使用英文系统提示词,并在用户提示中包含具体内容 - user_question = messages[-1].content if messages[-1].content else str(messages[-1]) - assistant_response = output[-1].content if hasattr(output[-1], 'content') and output[-1].content else str(output[-1]) - - judge_messages = [ - Message(role='system', content='''You are a professional conversation completeness evaluator. Your task is to determine whether an AI assistant has provided a complete and sufficient answer to a user's question based on strict evaluation criteria. - -## Evaluation Criteria - -### Complete Answer Characteristics (mark as "complete" if ANY of these are met): -1. **Information Completeness**: Provides core information users need, including specific parameters, specifications, data, or detailed descriptions -2. **Problem Resolution**: Directly addresses the user's question with clear answers or solutions -3. **Recommendation Adequacy**: When providing product/service recommendations, includes key information (model, parameters, price, features, etc.) -4. **Conclusion Clarity**: Provides clear conclusions, advice, or summaries - -### Incomplete Answer Characteristics (mark as "incomplete" if ANY of these are met): -1. **Information Gaps**: Obviously lacks key information needed to answer the question -2. **Vague Responses**: Uses evasive language and avoids specific questions -3. **Unfulfilled Promises**: Promises to provide more information but doesn't deliver -4. **Needs Follow-up**: Clearly indicates more information is needed to continue answering - -## Special Evaluation Rules -- **Recommendation Questions**: Consider complete if specific recommendations with key information are provided -- **Technical Questions**: Consider complete if technical details and specific data are provided -- **Comparison Questions**: Consider complete if valuable comparative information is provided -- **Inquiry Questions**: Consider complete if best effort answer is given based on available information - -## Output Format -Only output "complete" or "incomplete" with no additional text or explanation. - -Adhere strictly to these standards for objective evaluation.'''), - Message(role='user', content=f'''Please evaluate whether the following assistant response is complete based on the evaluation criteria. - -USER QUESTION: -{user_question} - -ASSISTANT RESPONSE: -{assistant_response} - -Based on the evaluation criteria, is this response complete? Only reply with "complete" or "incomplete".''') - ] - # 调用模型进行判断(不使用工具) - judge_stream = self._call_llm_with_retry(messages=judge_messages, functions=[], extra_generate_cfg={'lang': 'en'}) - judge_output = [] - for judge_msg in judge_stream: - if judge_msg: - judge_output.extend(judge_msg) - - # 分析判断结果 - 使用正则匹配检测 complete - is_complete = False - if judge_output: - judge_content = judge_output[-1].content if hasattr(judge_output[-1], 'content') else str(judge_output[-1]) - judge_content = judge_content.lower().strip() - print(judge_content) - # 使用正则匹配检测 complete - import re - - if re.search(r'incomplete', judge_content): - is_complete = False - else: - is_complete = True - # 如果回答完整,退出循环;否则继续 - if is_complete: - break yield response