add promptfoo
This commit is contained in:
parent
ee41279569
commit
092f7de0c3
@ -1,9 +0,0 @@
|
||||
question,regex,llm-rubric,,,,,,,,,,,,,,,,,
|
||||
清水太郎在哪里,\[TOOL_CALL\].*find_employee_location,,,,,,,,,,,,,,,,,,
|
||||
通知他明天上午8点开会,,询问是否确认发送,,,,,,,,,,,,,,,,,
|
||||
确认,\[TOOL_CALL\].*wowtalk_send_message_to_member,,,,,,,,,,,,,,,,,,
|
||||
把DefineRoom 4的灯光状态发给他,,调用find_devices_by_room和dxcore_get_device_status获取灯光状态,并询问是否确认发送。,,,,,,,,,,,,,,,,,
|
||||
确认,\[TOOL_CALL\].*wowtalk_send_message_to_member,,,,,,,,,,,,,,,,,,
|
||||
900142の稼働状況,\[TOOL_CALL\].*dxcore_get_device_status,,,,,,,,,,,,,,,,,,
|
||||
关闭设备900142的灯光,,询问是否确认关闭,,,,,,,,,,,,,,,,,
|
||||
确认,\[TOOL_CALL\].*dxcore_update_device_status,,,,,,,,,,,,,,,,,,
|
||||
|
@ -15,7 +15,8 @@ def csv_to_yaml(csv_file, yaml_file):
|
||||
if row['question']:
|
||||
test_case = {
|
||||
'vars': {
|
||||
'question': row['question'].strip()
|
||||
'question': row['question'].strip(),
|
||||
'use_history': True if row['use_history'] == "1" else False,
|
||||
},
|
||||
'assert':[]
|
||||
}
|
||||
@ -41,5 +42,4 @@ def csv_to_yaml(csv_file, yaml_file):
|
||||
print(f"Converted {len(tests)} test cases from {csv_file} to {yaml_file}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
csv_to_yaml("conversation/tests.csv", "conversation/tests.yaml")
|
||||
csv_to_yaml("query/tests.csv", "query/tests.yaml")
|
||||
csv_to_yaml("novare/novare.csv", "novare/tests.yaml")
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
env: {}
|
||||
description: Novare Test
|
||||
description: Novare Test - Unified Config for Both Single and Conversation
|
||||
providers:
|
||||
- id: openai:chat:qwen3
|
||||
config:
|
||||
@ -25,4 +25,4 @@ defaultTest:
|
||||
apiVersion: '2024-02-01'
|
||||
evaluateOptions: {}
|
||||
writeLatestResults: true
|
||||
sharing: true
|
||||
sharing: true
|
||||
14
promptfoo/novare/novare.csv
Normal file
14
promptfoo/novare/novare.csv
Normal file
@ -0,0 +1,14 @@
|
||||
question,use_history,regex,llm-rubric,,,,,,,,,,,,,,,,,
|
||||
清水太郎在哪里,1,\[TOOL_CALL\].*find_employee_location,,,,,,,,,,,,,,,,,,
|
||||
通知他明天上午8点开会,1,,询问是否确认发送,,,,,,,,,,,,,,,,,
|
||||
确认,1,\[TOOL_CALL\].*wowtalk_send_message_to_member,,,,,,,,,,,,,,,,,,
|
||||
把DefineRoom 4的灯光状态发给他,1,,调用find_devices_by_room和dxcore_get_device_status获取灯光状态,并询问是否确认发送。,,,,,,,,,,,,,,,,,
|
||||
确认,1,\[TOOL_CALL\].*wowtalk_send_message_to_member,,,,,,,,,,,,,,,,,,
|
||||
关闭设备900142的灯光,1,,询问是否确认关闭,,,,,,,,,,,,,,,,,
|
||||
确认,1,\[TOOL_CALL\].*dxcore_update_device_status,,,,,,,,,,,,,,,,,,
|
||||
Define Room1 的灯光状态,0,\[TOOL_CALL\].*find_devices_by_room,,,,,,,,,,,,,,,,,,
|
||||
900142の稼働状況,0,\[TOOL_CALL\].*dxcore_get_device_status,,,,,,,,,,,,,,,,,,
|
||||
卫生间在哪里,0,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,,
|
||||
我丢了物品怎么办,0,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,,
|
||||
咖啡多少钱一杯,0,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,,
|
||||
东京明天的天气,0,\[TOOL_CALL\].*weather_get_by_location,,,,,,,,,,,,,,,,,,
|
||||
|
@ -1,4 +1,5 @@
|
||||
[
|
||||
{% if use_history %}
|
||||
{% for completion in _conversation %}
|
||||
{
|
||||
"role": "user",
|
||||
@ -13,4 +14,10 @@
|
||||
"role": "user",
|
||||
"content": "{{ question | encode }}"
|
||||
}
|
||||
]
|
||||
{% else %}
|
||||
{
|
||||
"role": "user",
|
||||
"content": "{{ question }}"
|
||||
}
|
||||
{% endif %}
|
||||
]
|
||||
@ -1,40 +1,78 @@
|
||||
- vars:
|
||||
question: 清水太郎在哪里
|
||||
use_history: true
|
||||
assert:
|
||||
- type: regex
|
||||
value: \[TOOL_CALL\].*find_employee_location
|
||||
- vars:
|
||||
question: 通知他明天上午8点开会
|
||||
use_history: true
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
value: 询问是否确认发送
|
||||
- vars:
|
||||
question: 确认
|
||||
use_history: true
|
||||
assert:
|
||||
- type: regex
|
||||
value: \[TOOL_CALL\].*wowtalk_send_message_to_member
|
||||
- vars:
|
||||
question: 把DefineRoom 4的灯光状态发给他
|
||||
use_history: true
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
value: 调用find_devices_by_room和dxcore_get_device_status获取灯光状态,并询问是否确认发送。
|
||||
- vars:
|
||||
question: 确认
|
||||
use_history: true
|
||||
assert:
|
||||
- type: regex
|
||||
value: \[TOOL_CALL\].*wowtalk_send_message_to_member
|
||||
- vars:
|
||||
question: 900142の稼働状況
|
||||
assert:
|
||||
- type: regex
|
||||
value: \[TOOL_CALL\].*dxcore_get_device_status
|
||||
- vars:
|
||||
question: 关闭设备900142的灯光
|
||||
use_history: true
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
value: 询问是否确认关闭
|
||||
- vars:
|
||||
question: 确认
|
||||
use_history: true
|
||||
assert:
|
||||
- type: regex
|
||||
value: \[TOOL_CALL\].*dxcore_update_device_status
|
||||
- vars:
|
||||
question: Define Room1 的灯光状态
|
||||
use_history: false
|
||||
assert:
|
||||
- type: regex
|
||||
value: \[TOOL_CALL\].*find_devices_by_room
|
||||
- vars:
|
||||
question: 900142の稼働状況
|
||||
use_history: false
|
||||
assert:
|
||||
- type: regex
|
||||
value: \[TOOL_CALL\].*dxcore_get_device_status
|
||||
- vars:
|
||||
question: 卫生间在哪里
|
||||
use_history: false
|
||||
assert:
|
||||
- type: regex
|
||||
value: \[TOOL_CALL\].*rag_retrieve
|
||||
- vars:
|
||||
question: 我丢了物品怎么办
|
||||
use_history: false
|
||||
assert:
|
||||
- type: regex
|
||||
value: \[TOOL_CALL\].*rag_retrieve
|
||||
- vars:
|
||||
question: 咖啡多少钱一杯
|
||||
use_history: false
|
||||
assert:
|
||||
- type: regex
|
||||
value: \[TOOL_CALL\].*rag_retrieve
|
||||
- vars:
|
||||
question: 东京明天的天气
|
||||
use_history: false
|
||||
assert:
|
||||
- type: regex
|
||||
value: \[TOOL_CALL\].*weather_get_by_location
|
||||
@ -1,26 +0,0 @@
|
||||
env: {}
|
||||
description: Novare Test
|
||||
providers:
|
||||
- id: openai:chat:qwen3
|
||||
config:
|
||||
apiBaseUrl: https://catalog-agent-dev.gbase.ai/api/v2
|
||||
apiKey: a21c99620a8ef61d69563afe05ccce89
|
||||
passthrough:
|
||||
bot_id: 63069654-7750-409d-9a58-a0960d899a20
|
||||
tool_response: true
|
||||
language: zh
|
||||
prompts:
|
||||
- file://prompt.json
|
||||
tests: file://tests.yaml
|
||||
defaultTest:
|
||||
options:
|
||||
provider:
|
||||
text:
|
||||
id: openai:chat:qwen/qwen3-next-80b-a3b-instruct
|
||||
config:
|
||||
apiKey: sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26
|
||||
apiBaseUrl: https://one.felo.me/v1
|
||||
apiVersion: '2024-02-01'
|
||||
evaluateOptions: {}
|
||||
writeLatestResults: true
|
||||
sharing: true
|
||||
@ -1,6 +0,0 @@
|
||||
[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "{{ question }}"
|
||||
}
|
||||
]
|
||||
@ -1,6 +0,0 @@
|
||||
question,regex,llm-rubric,,,,,,,,,,,,,,,,,
|
||||
Define Room1 的灯光状态,\[TOOL_CALL\].*find_devices_by_room,,,,,,,,,,,,,,,,,,
|
||||
卫生间在哪里,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,,
|
||||
我丢了物品怎么办,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,,
|
||||
咖啡多少钱一杯,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,,
|
||||
东京明天的天气,\[TOOL_CALL\].*weather_get_by_location,,,,,,,,,,,,,,,,,,
|
||||
|
@ -1,25 +0,0 @@
|
||||
- vars:
|
||||
question: Define Room1 的灯光状态
|
||||
assert:
|
||||
- type: regex
|
||||
value: \[TOOL_CALL\].*find_devices_by_room
|
||||
- vars:
|
||||
question: 卫生间在哪里
|
||||
assert:
|
||||
- type: regex
|
||||
value: \[TOOL_CALL\].*rag_retrieve
|
||||
- vars:
|
||||
question: 我丢了物品怎么办
|
||||
assert:
|
||||
- type: regex
|
||||
value: \[TOOL_CALL\].*rag_retrieve
|
||||
- vars:
|
||||
question: 咖啡多少钱一杯
|
||||
assert:
|
||||
- type: regex
|
||||
value: \[TOOL_CALL\].*rag_retrieve
|
||||
- vars:
|
||||
question: 东京明天的天气
|
||||
assert:
|
||||
- type: regex
|
||||
value: \[TOOL_CALL\].*weather_get_by_location
|
||||
Loading…
Reference in New Issue
Block a user