add promptfoo
This commit is contained in:
parent
ee41279569
commit
092f7de0c3
@ -1,9 +0,0 @@
|
|||||||
question,regex,llm-rubric,,,,,,,,,,,,,,,,,
|
|
||||||
清水太郎在哪里,\[TOOL_CALL\].*find_employee_location,,,,,,,,,,,,,,,,,,
|
|
||||||
通知他明天上午8点开会,,询问是否确认发送,,,,,,,,,,,,,,,,,
|
|
||||||
确认,\[TOOL_CALL\].*wowtalk_send_message_to_member,,,,,,,,,,,,,,,,,,
|
|
||||||
把DefineRoom 4的灯光状态发给他,,调用find_devices_by_room和dxcore_get_device_status获取灯光状态,并询问是否确认发送。,,,,,,,,,,,,,,,,,
|
|
||||||
确认,\[TOOL_CALL\].*wowtalk_send_message_to_member,,,,,,,,,,,,,,,,,,
|
|
||||||
900142の稼働状況,\[TOOL_CALL\].*dxcore_get_device_status,,,,,,,,,,,,,,,,,,
|
|
||||||
关闭设备900142的灯光,,询问是否确认关闭,,,,,,,,,,,,,,,,,
|
|
||||||
确认,\[TOOL_CALL\].*dxcore_update_device_status,,,,,,,,,,,,,,,,,,
|
|
||||||
|
@ -15,7 +15,8 @@ def csv_to_yaml(csv_file, yaml_file):
|
|||||||
if row['question']:
|
if row['question']:
|
||||||
test_case = {
|
test_case = {
|
||||||
'vars': {
|
'vars': {
|
||||||
'question': row['question'].strip()
|
'question': row['question'].strip(),
|
||||||
|
'use_history': True if row['use_history'] == "1" else False,
|
||||||
},
|
},
|
||||||
'assert':[]
|
'assert':[]
|
||||||
}
|
}
|
||||||
@ -41,5 +42,4 @@ def csv_to_yaml(csv_file, yaml_file):
|
|||||||
print(f"Converted {len(tests)} test cases from {csv_file} to {yaml_file}")
|
print(f"Converted {len(tests)} test cases from {csv_file} to {yaml_file}")
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
csv_to_yaml("conversation/tests.csv", "conversation/tests.yaml")
|
csv_to_yaml("novare/novare.csv", "novare/tests.yaml")
|
||||||
csv_to_yaml("query/tests.csv", "query/tests.yaml")
|
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
env: {}
|
env: {}
|
||||||
description: Novare Test
|
description: Novare Test - Unified Config for Both Single and Conversation
|
||||||
providers:
|
providers:
|
||||||
- id: openai:chat:qwen3
|
- id: openai:chat:qwen3
|
||||||
config:
|
config:
|
||||||
14
promptfoo/novare/novare.csv
Normal file
14
promptfoo/novare/novare.csv
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
question,use_history,regex,llm-rubric,,,,,,,,,,,,,,,,,
|
||||||
|
清水太郎在哪里,1,\[TOOL_CALL\].*find_employee_location,,,,,,,,,,,,,,,,,,
|
||||||
|
通知他明天上午8点开会,1,,询问是否确认发送,,,,,,,,,,,,,,,,,
|
||||||
|
确认,1,\[TOOL_CALL\].*wowtalk_send_message_to_member,,,,,,,,,,,,,,,,,,
|
||||||
|
把DefineRoom 4的灯光状态发给他,1,,调用find_devices_by_room和dxcore_get_device_status获取灯光状态,并询问是否确认发送。,,,,,,,,,,,,,,,,,
|
||||||
|
确认,1,\[TOOL_CALL\].*wowtalk_send_message_to_member,,,,,,,,,,,,,,,,,,
|
||||||
|
关闭设备900142的灯光,1,,询问是否确认关闭,,,,,,,,,,,,,,,,,
|
||||||
|
确认,1,\[TOOL_CALL\].*dxcore_update_device_status,,,,,,,,,,,,,,,,,,
|
||||||
|
Define Room1 的灯光状态,0,\[TOOL_CALL\].*find_devices_by_room,,,,,,,,,,,,,,,,,,
|
||||||
|
900142の稼働状況,0,\[TOOL_CALL\].*dxcore_get_device_status,,,,,,,,,,,,,,,,,,
|
||||||
|
卫生间在哪里,0,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,,
|
||||||
|
我丢了物品怎么办,0,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,,
|
||||||
|
咖啡多少钱一杯,0,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,,
|
||||||
|
东京明天的天气,0,\[TOOL_CALL\].*weather_get_by_location,,,,,,,,,,,,,,,,,,
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
[
|
[
|
||||||
|
{% if use_history %}
|
||||||
{% for completion in _conversation %}
|
{% for completion in _conversation %}
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
@ -13,4 +14,10 @@
|
|||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "{{ question | encode }}"
|
"content": "{{ question | encode }}"
|
||||||
}
|
}
|
||||||
|
{% else %}
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "{{ question }}"
|
||||||
|
}
|
||||||
|
{% endif %}
|
||||||
]
|
]
|
||||||
@ -1,40 +1,78 @@
|
|||||||
- vars:
|
- vars:
|
||||||
question: 清水太郎在哪里
|
question: 清水太郎在哪里
|
||||||
|
use_history: true
|
||||||
assert:
|
assert:
|
||||||
- type: regex
|
- type: regex
|
||||||
value: \[TOOL_CALL\].*find_employee_location
|
value: \[TOOL_CALL\].*find_employee_location
|
||||||
- vars:
|
- vars:
|
||||||
question: 通知他明天上午8点开会
|
question: 通知他明天上午8点开会
|
||||||
|
use_history: true
|
||||||
assert:
|
assert:
|
||||||
- type: llm-rubric
|
- type: llm-rubric
|
||||||
value: 询问是否确认发送
|
value: 询问是否确认发送
|
||||||
- vars:
|
- vars:
|
||||||
question: 确认
|
question: 确认
|
||||||
|
use_history: true
|
||||||
assert:
|
assert:
|
||||||
- type: regex
|
- type: regex
|
||||||
value: \[TOOL_CALL\].*wowtalk_send_message_to_member
|
value: \[TOOL_CALL\].*wowtalk_send_message_to_member
|
||||||
- vars:
|
- vars:
|
||||||
question: 把DefineRoom 4的灯光状态发给他
|
question: 把DefineRoom 4的灯光状态发给他
|
||||||
|
use_history: true
|
||||||
assert:
|
assert:
|
||||||
- type: llm-rubric
|
- type: llm-rubric
|
||||||
value: 调用find_devices_by_room和dxcore_get_device_status获取灯光状态,并询问是否确认发送。
|
value: 调用find_devices_by_room和dxcore_get_device_status获取灯光状态,并询问是否确认发送。
|
||||||
- vars:
|
- vars:
|
||||||
question: 确认
|
question: 确认
|
||||||
|
use_history: true
|
||||||
assert:
|
assert:
|
||||||
- type: regex
|
- type: regex
|
||||||
value: \[TOOL_CALL\].*wowtalk_send_message_to_member
|
value: \[TOOL_CALL\].*wowtalk_send_message_to_member
|
||||||
- vars:
|
|
||||||
question: 900142の稼働状況
|
|
||||||
assert:
|
|
||||||
- type: regex
|
|
||||||
value: \[TOOL_CALL\].*dxcore_get_device_status
|
|
||||||
- vars:
|
- vars:
|
||||||
question: 关闭设备900142的灯光
|
question: 关闭设备900142的灯光
|
||||||
|
use_history: true
|
||||||
assert:
|
assert:
|
||||||
- type: llm-rubric
|
- type: llm-rubric
|
||||||
value: 询问是否确认关闭
|
value: 询问是否确认关闭
|
||||||
- vars:
|
- vars:
|
||||||
question: 确认
|
question: 确认
|
||||||
|
use_history: true
|
||||||
assert:
|
assert:
|
||||||
- type: regex
|
- type: regex
|
||||||
value: \[TOOL_CALL\].*dxcore_update_device_status
|
value: \[TOOL_CALL\].*dxcore_update_device_status
|
||||||
|
- vars:
|
||||||
|
question: Define Room1 的灯光状态
|
||||||
|
use_history: false
|
||||||
|
assert:
|
||||||
|
- type: regex
|
||||||
|
value: \[TOOL_CALL\].*find_devices_by_room
|
||||||
|
- vars:
|
||||||
|
question: 900142の稼働状況
|
||||||
|
use_history: false
|
||||||
|
assert:
|
||||||
|
- type: regex
|
||||||
|
value: \[TOOL_CALL\].*dxcore_get_device_status
|
||||||
|
- vars:
|
||||||
|
question: 卫生间在哪里
|
||||||
|
use_history: false
|
||||||
|
assert:
|
||||||
|
- type: regex
|
||||||
|
value: \[TOOL_CALL\].*rag_retrieve
|
||||||
|
- vars:
|
||||||
|
question: 我丢了物品怎么办
|
||||||
|
use_history: false
|
||||||
|
assert:
|
||||||
|
- type: regex
|
||||||
|
value: \[TOOL_CALL\].*rag_retrieve
|
||||||
|
- vars:
|
||||||
|
question: 咖啡多少钱一杯
|
||||||
|
use_history: false
|
||||||
|
assert:
|
||||||
|
- type: regex
|
||||||
|
value: \[TOOL_CALL\].*rag_retrieve
|
||||||
|
- vars:
|
||||||
|
question: 东京明天的天气
|
||||||
|
use_history: false
|
||||||
|
assert:
|
||||||
|
- type: regex
|
||||||
|
value: \[TOOL_CALL\].*weather_get_by_location
|
||||||
@ -1,26 +0,0 @@
|
|||||||
env: {}
|
|
||||||
description: Novare Test
|
|
||||||
providers:
|
|
||||||
- id: openai:chat:qwen3
|
|
||||||
config:
|
|
||||||
apiBaseUrl: https://catalog-agent-dev.gbase.ai/api/v2
|
|
||||||
apiKey: a21c99620a8ef61d69563afe05ccce89
|
|
||||||
passthrough:
|
|
||||||
bot_id: 63069654-7750-409d-9a58-a0960d899a20
|
|
||||||
tool_response: true
|
|
||||||
language: zh
|
|
||||||
prompts:
|
|
||||||
- file://prompt.json
|
|
||||||
tests: file://tests.yaml
|
|
||||||
defaultTest:
|
|
||||||
options:
|
|
||||||
provider:
|
|
||||||
text:
|
|
||||||
id: openai:chat:qwen/qwen3-next-80b-a3b-instruct
|
|
||||||
config:
|
|
||||||
apiKey: sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26
|
|
||||||
apiBaseUrl: https://one.felo.me/v1
|
|
||||||
apiVersion: '2024-02-01'
|
|
||||||
evaluateOptions: {}
|
|
||||||
writeLatestResults: true
|
|
||||||
sharing: true
|
|
||||||
@ -1,6 +0,0 @@
|
|||||||
[
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "{{ question }}"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
@ -1,6 +0,0 @@
|
|||||||
question,regex,llm-rubric,,,,,,,,,,,,,,,,,
|
|
||||||
Define Room1 的灯光状态,\[TOOL_CALL\].*find_devices_by_room,,,,,,,,,,,,,,,,,,
|
|
||||||
卫生间在哪里,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,,
|
|
||||||
我丢了物品怎么办,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,,
|
|
||||||
咖啡多少钱一杯,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,,
|
|
||||||
东京明天的天气,\[TOOL_CALL\].*weather_get_by_location,,,,,,,,,,,,,,,,,,
|
|
||||||
|
@ -1,25 +0,0 @@
|
|||||||
- vars:
|
|
||||||
question: Define Room1 的灯光状态
|
|
||||||
assert:
|
|
||||||
- type: regex
|
|
||||||
value: \[TOOL_CALL\].*find_devices_by_room
|
|
||||||
- vars:
|
|
||||||
question: 卫生间在哪里
|
|
||||||
assert:
|
|
||||||
- type: regex
|
|
||||||
value: \[TOOL_CALL\].*rag_retrieve
|
|
||||||
- vars:
|
|
||||||
question: 我丢了物品怎么办
|
|
||||||
assert:
|
|
||||||
- type: regex
|
|
||||||
value: \[TOOL_CALL\].*rag_retrieve
|
|
||||||
- vars:
|
|
||||||
question: 咖啡多少钱一杯
|
|
||||||
assert:
|
|
||||||
- type: regex
|
|
||||||
value: \[TOOL_CALL\].*rag_retrieve
|
|
||||||
- vars:
|
|
||||||
question: 东京明天的天气
|
|
||||||
assert:
|
|
||||||
- type: regex
|
|
||||||
value: \[TOOL_CALL\].*weather_get_by_location
|
|
||||||
Loading…
Reference in New Issue
Block a user