modify assistant

This commit is contained in:
朱潮 2025-10-21 12:35:24 +08:00
parent 71841ce7f5
commit f711fcec23

View File

@ -128,77 +128,8 @@ class ModifiedAssistant(Assistant):
used_any_tool = True used_any_tool = True
# 如果使用了工具,继续循环 # 如果使用了工具,继续循环
if used_any_tool: if not used_any_tool:
continue
# 如果没有使用工具无调用次数已经执行了2次以上循环退出循环
if num_llm_calls_available ==0 or total_num_llm_calls_available - num_llm_calls_available >=2:
break break
# 如果没有使用工具还有调用次数并只执行了1次就需要调用模型判断回答是否完整修复部分case不执行工具调用就停止的问题
if num_llm_calls_available > 0:
# 构建判断消息 - 使用英文系统提示词,并在用户提示中包含具体内容
user_question = messages[-1].content if messages[-1].content else str(messages[-1])
assistant_response = output[-1].content if hasattr(output[-1], 'content') and output[-1].content else str(output[-1])
judge_messages = [
Message(role='system', content='''You are a professional conversation completeness evaluator. Your task is to determine whether an AI assistant has provided a complete and sufficient answer to a user's question based on strict evaluation criteria.
## Evaluation Criteria
### Complete Answer Characteristics (mark as "complete" if ANY of these are met):
1. **Information Completeness**: Provides core information users need, including specific parameters, specifications, data, or detailed descriptions
2. **Problem Resolution**: Directly addresses the user's question with clear answers or solutions
3. **Recommendation Adequacy**: When providing product/service recommendations, includes key information (model, parameters, price, features, etc.)
4. **Conclusion Clarity**: Provides clear conclusions, advice, or summaries
### Incomplete Answer Characteristics (mark as "incomplete" if ANY of these are met):
1. **Information Gaps**: Obviously lacks key information needed to answer the question
2. **Vague Responses**: Uses evasive language and avoids specific questions
3. **Unfulfilled Promises**: Promises to provide more information but doesn't deliver
4. **Needs Follow-up**: Clearly indicates more information is needed to continue answering
## Special Evaluation Rules
- **Recommendation Questions**: Consider complete if specific recommendations with key information are provided
- **Technical Questions**: Consider complete if technical details and specific data are provided
- **Comparison Questions**: Consider complete if valuable comparative information is provided
- **Inquiry Questions**: Consider complete if best effort answer is given based on available information
## Output Format
Only output "complete" or "incomplete" with no additional text or explanation.
Adhere strictly to these standards for objective evaluation.'''),
Message(role='user', content=f'''Please evaluate whether the following assistant response is complete based on the evaluation criteria.
USER QUESTION:
{user_question}
ASSISTANT RESPONSE:
{assistant_response}
Based on the evaluation criteria, is this response complete? Only reply with "complete" or "incomplete".''')
]
# 调用模型进行判断(不使用工具)
judge_stream = self._call_llm_with_retry(messages=judge_messages, functions=[], extra_generate_cfg={'lang': 'en'})
judge_output = []
for judge_msg in judge_stream:
if judge_msg:
judge_output.extend(judge_msg)
# 分析判断结果 - 使用正则匹配检测 complete
is_complete = False
if judge_output:
judge_content = judge_output[-1].content if hasattr(judge_output[-1], 'content') else str(judge_output[-1])
judge_content = judge_content.lower().strip()
print(judge_content)
# 使用正则匹配检测 complete
import re
if re.search(r'incomplete', judge_content):
is_complete = False
else:
is_complete = True
# 如果回答完整,退出循环;否则继续
if is_complete:
break
yield response yield response