modify assistant

2025-10-21 12:35:24 +08:00 · 2025-10-21 12:35:24 +08:00 · f711fcec23
commit f711fcec23
parent 71841ce7f5
1 changed files with 1 additions and 70 deletions
--- a/modified_assistant.py
+++ b/modified_assistant.py
@ -128,77 +128,8 @@ class ModifiedAssistant(Assistant):
                        used_any_tool = True
                # 如果使用了工具，继续循环
-                if used_any_tool:
+                if not used_any_tool:
                    continue
                # 如果没有使用工具，无调用次数，已经执行了2次以上循环，退出循环
                if num_llm_calls_available ==0 or total_num_llm_calls_available - num_llm_calls_available >=2:
                    break
                # 如果没有使用工具，还有调用次数，并只执行了1次，就需要调用模型判断回答是否完整（修复部分case不执行工具调用就停止的问题）
                if num_llm_calls_available > 0:
                    # 构建判断消息 - 使用英文系统提示词，并在用户提示中包含具体内容
                    user_question = messages[-1].content if messages[-1].content else str(messages[-1])
                    assistant_response = output[-1].content if hasattr(output[-1], 'content') and output[-1].content else str(output[-1])
                    judge_messages = [
                        Message(role='system', content='''You are a professional conversation completeness evaluator. Your task is to determine whether an AI assistant has provided a complete and sufficient answer to a user's question based on strict evaluation criteria.
 ## Evaluation Criteria
 ### Complete Answer Characteristics (mark as "complete" if ANY of these are met):
 1. **Information Completeness**: Provides core information users need, including specific parameters, specifications, data, or detailed descriptions
 2. **Problem Resolution**: Directly addresses the user's question with clear answers or solutions
 3. **Recommendation Adequacy**: When providing product/service recommendations, includes key information (model, parameters, price, features, etc.)
 4. **Conclusion Clarity**: Provides clear conclusions, advice, or summaries
 ### Incomplete Answer Characteristics (mark as "incomplete" if ANY of these are met):
 1. **Information Gaps**: Obviously lacks key information needed to answer the question
 2. **Vague Responses**: Uses evasive language and avoids specific questions
 3. **Unfulfilled Promises**: Promises to provide more information but doesn't deliver
 4. **Needs Follow-up**: Clearly indicates more information is needed to continue answering
 ## Special Evaluation Rules
 - **Recommendation Questions**: Consider complete if specific recommendations with key information are provided
 - **Technical Questions**: Consider complete if technical details and specific data are provided  
 - **Comparison Questions**: Consider complete if valuable comparative information is provided
 - **Inquiry Questions**: Consider complete if best effort answer is given based on available information
 ## Output Format
 Only output "complete" or "incomplete" with no additional text or explanation.
 Adhere strictly to these standards for objective evaluation.'''),
                        Message(role='user', content=f'''Please evaluate whether the following assistant response is complete based on the evaluation criteria.
 USER QUESTION:
 {user_question}
 ASSISTANT RESPONSE:
 {assistant_response}
 Based on the evaluation criteria, is this response complete? Only reply with "complete" or "incomplete".''')
                    ]
                    # 调用模型进行判断（不使用工具）
                    judge_stream = self._call_llm_with_retry(messages=judge_messages, functions=[], extra_generate_cfg={'lang': 'en'})
                    judge_output = []
                    for judge_msg in judge_stream:
                        if judge_msg:
                            judge_output.extend(judge_msg)
                    # 分析判断结果 - 使用正则匹配检测 complete
                    is_complete = False
                    if judge_output:
                        judge_content = judge_output[-1].content if hasattr(judge_output[-1], 'content') else str(judge_output[-1])
                        judge_content = judge_content.lower().strip()
                        print(judge_content)
                        # 使用正则匹配检测 complete
                        import re
                        if re.search(r'incomplete', judge_content):
                            is_complete = False
                        else:
                            is_complete = True
                    # 如果回答完整，退出循环；否则继续
                    if is_complete:
                        break
        yield response