qwen_agent/promptfoo/novare/tests.yaml
2025-12-09 20:17:21 +08:00

79 lines
1.8 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

- vars:
question: 清水太郎在哪里
use_history: true
assert:
- type: regex
value: \[TOOL_CALL\].*find_employee_location
- vars:
question: 通知他明天上午8点开会
use_history: true
assert:
- type: llm-rubric
value: 询问是否确认发送
- vars:
question: 确认
use_history: true
assert:
- type: regex
value: \[TOOL_CALL\].*wowtalk_send_message_to_member
- vars:
question: 把DefineRoom 4的灯光状态发给他
use_history: true
assert:
- type: llm-rubric
value: 调用find_devices_by_room和dxcore_get_device_status获取灯光状态并询问是否确认发送。
- vars:
question: 确认
use_history: true
assert:
- type: regex
value: \[TOOL_CALL\].*wowtalk_send_message_to_member
- vars:
question: 关闭设备900142的灯光
use_history: true
assert:
- type: llm-rubric
value: 询问是否确认关闭
- vars:
question: 确认
use_history: true
assert:
- type: regex
value: \[TOOL_CALL\].*dxcore_update_device_status
- vars:
question: Define Room1 的灯光状态
use_history: false
assert:
- type: regex
value: \[TOOL_CALL\].*find_devices_by_room
- vars:
question: 900142の稼働状況
use_history: false
assert:
- type: regex
value: \[TOOL_CALL\].*dxcore_get_device_status
- vars:
question: 卫生间在哪里
use_history: false
assert:
- type: regex
value: \[TOOL_CALL\].*rag_retrieve
- vars:
question: 我丢了物品怎么办
use_history: false
assert:
- type: regex
value: \[TOOL_CALL\].*rag_retrieve
- vars:
question: 咖啡多少钱一杯
use_history: false
assert:
- type: regex
value: \[TOOL_CALL\].*rag_retrieve
- vars:
question: 东京明天的天气
use_history: false
assert:
- type: regex
value: \[TOOL_CALL\].*weather_get_by_location