modify assistant
This commit is contained in:
parent
71841ce7f5
commit
f711fcec23
@ -128,77 +128,8 @@ class ModifiedAssistant(Assistant):
|
|||||||
used_any_tool = True
|
used_any_tool = True
|
||||||
|
|
||||||
# 如果使用了工具,继续循环
|
# 如果使用了工具,继续循环
|
||||||
if used_any_tool:
|
if not used_any_tool:
|
||||||
continue
|
|
||||||
# 如果没有使用工具,无调用次数,已经执行了2次以上循环,退出循环
|
|
||||||
if num_llm_calls_available ==0 or total_num_llm_calls_available - num_llm_calls_available >=2:
|
|
||||||
break
|
break
|
||||||
# 如果没有使用工具,还有调用次数,并只执行了1次,就需要调用模型判断回答是否完整(修复部分case不执行工具调用就停止的问题)
|
|
||||||
if num_llm_calls_available > 0:
|
|
||||||
# 构建判断消息 - 使用英文系统提示词,并在用户提示中包含具体内容
|
|
||||||
user_question = messages[-1].content if messages[-1].content else str(messages[-1])
|
|
||||||
assistant_response = output[-1].content if hasattr(output[-1], 'content') and output[-1].content else str(output[-1])
|
|
||||||
|
|
||||||
judge_messages = [
|
|
||||||
Message(role='system', content='''You are a professional conversation completeness evaluator. Your task is to determine whether an AI assistant has provided a complete and sufficient answer to a user's question based on strict evaluation criteria.
|
|
||||||
|
|
||||||
## Evaluation Criteria
|
|
||||||
|
|
||||||
### Complete Answer Characteristics (mark as "complete" if ANY of these are met):
|
|
||||||
1. **Information Completeness**: Provides core information users need, including specific parameters, specifications, data, or detailed descriptions
|
|
||||||
2. **Problem Resolution**: Directly addresses the user's question with clear answers or solutions
|
|
||||||
3. **Recommendation Adequacy**: When providing product/service recommendations, includes key information (model, parameters, price, features, etc.)
|
|
||||||
4. **Conclusion Clarity**: Provides clear conclusions, advice, or summaries
|
|
||||||
|
|
||||||
### Incomplete Answer Characteristics (mark as "incomplete" if ANY of these are met):
|
|
||||||
1. **Information Gaps**: Obviously lacks key information needed to answer the question
|
|
||||||
2. **Vague Responses**: Uses evasive language and avoids specific questions
|
|
||||||
3. **Unfulfilled Promises**: Promises to provide more information but doesn't deliver
|
|
||||||
4. **Needs Follow-up**: Clearly indicates more information is needed to continue answering
|
|
||||||
|
|
||||||
## Special Evaluation Rules
|
|
||||||
- **Recommendation Questions**: Consider complete if specific recommendations with key information are provided
|
|
||||||
- **Technical Questions**: Consider complete if technical details and specific data are provided
|
|
||||||
- **Comparison Questions**: Consider complete if valuable comparative information is provided
|
|
||||||
- **Inquiry Questions**: Consider complete if best effort answer is given based on available information
|
|
||||||
|
|
||||||
## Output Format
|
|
||||||
Only output "complete" or "incomplete" with no additional text or explanation.
|
|
||||||
|
|
||||||
Adhere strictly to these standards for objective evaluation.'''),
|
|
||||||
Message(role='user', content=f'''Please evaluate whether the following assistant response is complete based on the evaluation criteria.
|
|
||||||
|
|
||||||
USER QUESTION:
|
|
||||||
{user_question}
|
|
||||||
|
|
||||||
ASSISTANT RESPONSE:
|
|
||||||
{assistant_response}
|
|
||||||
|
|
||||||
Based on the evaluation criteria, is this response complete? Only reply with "complete" or "incomplete".''')
|
|
||||||
]
|
|
||||||
# 调用模型进行判断(不使用工具)
|
|
||||||
judge_stream = self._call_llm_with_retry(messages=judge_messages, functions=[], extra_generate_cfg={'lang': 'en'})
|
|
||||||
judge_output = []
|
|
||||||
for judge_msg in judge_stream:
|
|
||||||
if judge_msg:
|
|
||||||
judge_output.extend(judge_msg)
|
|
||||||
|
|
||||||
# 分析判断结果 - 使用正则匹配检测 complete
|
|
||||||
is_complete = False
|
|
||||||
if judge_output:
|
|
||||||
judge_content = judge_output[-1].content if hasattr(judge_output[-1], 'content') else str(judge_output[-1])
|
|
||||||
judge_content = judge_content.lower().strip()
|
|
||||||
print(judge_content)
|
|
||||||
# 使用正则匹配检测 complete
|
|
||||||
import re
|
|
||||||
|
|
||||||
if re.search(r'incomplete', judge_content):
|
|
||||||
is_complete = False
|
|
||||||
else:
|
|
||||||
is_complete = True
|
|
||||||
# 如果回答完整,退出循环;否则继续
|
|
||||||
if is_complete:
|
|
||||||
break
|
|
||||||
|
|
||||||
yield response
|
yield response
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user