remove guildline
This commit is contained in:
parent
7fbebef764
commit
55651c38d0
File diff suppressed because one or more lines are too long
@ -1,6 +1,9 @@
|
||||
question,regex,llm-rubric,,,,,,,,,,,,,,,,,
|
||||
田中花子在哪里,\[TOOL_CALL\].*find_employee_location,,,,,,,,,,,,,,,,,,
|
||||
通知她明天上午8点开会,,询问是否确认发送,,,,,,,,,,,,,,,,,
|
||||
清水太郎在哪里,\[TOOL_CALL\].*find_employee_location,,,,,,,,,,,,,,,,,,
|
||||
通知他明天上午8点开会,,询问是否确认发送,,,,,,,,,,,,,,,,,
|
||||
确认,\[TOOL_CALL\].*wowtalk_send_message_to_member,,,,,,,,,,,,,,,,,,
|
||||
把DefineRoom 4的灯光状态发给田中花子,,调用find_devices_by_room和dxcore_get_device_status获取灯光状态,并询问是否确认发送。,,,,,,,,,,,,,,,,,
|
||||
把DefineRoom 4的灯光状态发给他,,调用find_devices_by_room和dxcore_get_device_status获取灯光状态,并询问是否确认发送。,,,,,,,,,,,,,,,,,
|
||||
确认,\[TOOL_CALL\].*wowtalk_send_message_to_member,,,,,,,,,,,,,,,,,,
|
||||
900142の稼働状況,\[TOOL_CALL\].*dxcore_get_device_status,,,,,,,,,,,,,,,,,,
|
||||
关闭设备900142的灯光,,询问是否确认关闭,,,,,,,,,,,,,,,,,
|
||||
确认,\[TOOL_CALL\].*dxcore_update_device_status,,,,,,,,,,,,,,,,,,
|
||||
|
@ -1,10 +1,10 @@
|
||||
- vars:
|
||||
question: 田中花子在哪里
|
||||
question: 清水太郎在哪里
|
||||
assert:
|
||||
- type: regex
|
||||
value: \[TOOL_CALL\].*find_employee_location
|
||||
- vars:
|
||||
question: 通知她明天上午8点开会
|
||||
question: 通知他明天上午8点开会
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
value: 询问是否确认发送
|
||||
@ -14,7 +14,7 @@
|
||||
- type: regex
|
||||
value: \[TOOL_CALL\].*wowtalk_send_message_to_member
|
||||
- vars:
|
||||
question: 把DefineRoom 4的灯光状态发给田中花子
|
||||
question: 把DefineRoom 4的灯光状态发给他
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
value: 调用find_devices_by_room和dxcore_get_device_status获取灯光状态,并询问是否确认发送。
|
||||
@ -23,3 +23,18 @@
|
||||
assert:
|
||||
- type: regex
|
||||
value: \[TOOL_CALL\].*wowtalk_send_message_to_member
|
||||
- vars:
|
||||
question: 900142の稼働状況
|
||||
assert:
|
||||
- type: regex
|
||||
value: \[TOOL_CALL\].*dxcore_get_device_status
|
||||
- vars:
|
||||
question: 关闭设备900142的灯光
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
value: 询问是否确认关闭
|
||||
- vars:
|
||||
question: 确认
|
||||
assert:
|
||||
- type: regex
|
||||
value: \[TOOL_CALL\].*dxcore_update_device_status
|
||||
|
||||
@ -1,137 +0,0 @@
|
||||
{
|
||||
"evalId": "eval-kAj-2025-12-04T14:31:43",
|
||||
"results": {
|
||||
"version": 3,
|
||||
"timestamp": "2025-12-04T14:31:43.709Z",
|
||||
"prompts": [
|
||||
{
|
||||
"raw": "[{\"role\":\"user\",\"content\":\"{{ question }}\"}]",
|
||||
"label": "prompt.json: [{\"role\":\"user\",\"content\":\"{{ question }}\"}]",
|
||||
"id": "7fd9d6ab1656b5f683dd7d34fc535754cd42291c7b78f2aa5fd68b3e43dae7b6",
|
||||
"provider": "openai:chat:qwen3",
|
||||
"metrics": {
|
||||
"score": 0,
|
||||
"testPassCount": 0,
|
||||
"testFailCount": 0,
|
||||
"testErrorCount": 0,
|
||||
"assertPassCount": 0,
|
||||
"assertFailCount": 0,
|
||||
"totalLatencyMs": 0,
|
||||
"tokenUsage": {
|
||||
"prompt": 0,
|
||||
"completion": 0,
|
||||
"cached": 0,
|
||||
"total": 0,
|
||||
"numRequests": 0,
|
||||
"completionDetails": {
|
||||
"reasoning": 0,
|
||||
"acceptedPrediction": 0,
|
||||
"rejectedPrediction": 0
|
||||
},
|
||||
"assertions": {
|
||||
"total": 0,
|
||||
"prompt": 0,
|
||||
"completion": 0,
|
||||
"cached": 0,
|
||||
"numRequests": 0,
|
||||
"completionDetails": {
|
||||
"reasoning": 0,
|
||||
"acceptedPrediction": 0,
|
||||
"rejectedPrediction": 0
|
||||
}
|
||||
}
|
||||
},
|
||||
"namedScores": {},
|
||||
"namedScoresCount": {},
|
||||
"cost": 0
|
||||
}
|
||||
}
|
||||
],
|
||||
"results": [],
|
||||
"stats": {
|
||||
"successes": 0,
|
||||
"failures": 0,
|
||||
"errors": 0,
|
||||
"tokenUsage": {
|
||||
"prompt": 0,
|
||||
"completion": 0,
|
||||
"cached": 0,
|
||||
"total": 0,
|
||||
"numRequests": 0,
|
||||
"completionDetails": {
|
||||
"reasoning": 0,
|
||||
"acceptedPrediction": 0,
|
||||
"rejectedPrediction": 0
|
||||
},
|
||||
"assertions": {
|
||||
"total": 0,
|
||||
"prompt": 0,
|
||||
"completion": 0,
|
||||
"cached": 0,
|
||||
"numRequests": 0,
|
||||
"completionDetails": {
|
||||
"reasoning": 0,
|
||||
"acceptedPrediction": 0,
|
||||
"rejectedPrediction": 0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"config": {
|
||||
"tags": {},
|
||||
"description": "Novare Test",
|
||||
"prompts": [
|
||||
"file:///Users/moshui/Documents/felo/qwen-agent/promptfoo/query/prompt.json"
|
||||
],
|
||||
"providers": [
|
||||
{
|
||||
"id": "openai:chat:qwen3",
|
||||
"config": {
|
||||
"apiBaseUrl": "https://catalog-agent-dev.gbase.ai/api/v2",
|
||||
"apiKey": "a21c99620a8ef61d69563afe05ccce89",
|
||||
"passthrough": {
|
||||
"bot_id": "63069654-7750-409d-9a58-a0960d899a20",
|
||||
"tool_response": true,
|
||||
"language": "zh"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"tests": [],
|
||||
"scenarios": [],
|
||||
"env": {},
|
||||
"sharing": true,
|
||||
"defaultTest": {
|
||||
"options": {
|
||||
"provider": {
|
||||
"text": {
|
||||
"id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
|
||||
"config": {
|
||||
"apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
|
||||
"apiBaseUrl": "https://one.felo.me/v1",
|
||||
"apiVersion": "2024-02-01"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"vars": {},
|
||||
"assert": [],
|
||||
"metadata": {}
|
||||
},
|
||||
"outputPath": [
|
||||
"result.json"
|
||||
],
|
||||
"extensions": [],
|
||||
"metadata": {}
|
||||
},
|
||||
"shareableUrl": null,
|
||||
"metadata": {
|
||||
"promptfooVersion": "0.117.11",
|
||||
"nodeVersion": "v20.10.0",
|
||||
"platform": "darwin",
|
||||
"arch": "x64",
|
||||
"exportedAt": "2025-12-04T14:31:43.766Z",
|
||||
"evaluationCreatedAt": "2025-12-04T14:31:43.709Z"
|
||||
}
|
||||
}
|
||||
@ -1,6 +1,4 @@
|
||||
question,regex,llm-rubric,,,,,,,,,,,,,,,,,
|
||||
940092の稼働状況,\[TOOL_CALL\].*dxcore_get_device_status,,,,,,,,,,,,,,,,,,
|
||||
关闭设备900142的灯光,\[TOOL_CALL\].*dxcore_update_device_status,,,,,,,,,,,,,,,,,,
|
||||
Define Room1 的灯光状态,\[TOOL_CALL\].*find_devices_by_room,,,,,,,,,,,,,,,,,,
|
||||
卫生间在哪里,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,,
|
||||
我丢了物品怎么办,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,,
|
||||
|
||||
|
@ -1,13 +1,3 @@
|
||||
- vars:
|
||||
question: 940092の稼働状況
|
||||
assert:
|
||||
- type: regex
|
||||
value: \[TOOL_CALL\].*dxcore_get_device_status
|
||||
- vars:
|
||||
question: 关闭设备900142的灯光
|
||||
assert:
|
||||
- type: regex
|
||||
value: \[TOOL_CALL\].*dxcore_update_device_status
|
||||
- vars:
|
||||
question: Define Room1 的灯光状态
|
||||
assert:
|
||||
|
||||
@ -659,6 +659,7 @@ def extract_block_from_system_prompt(system_prompt: str) -> tuple[str, str, str,
|
||||
|
||||
if block_type == 'guideline' or block_type == 'guidelines':
|
||||
guidelines = content.strip()
|
||||
blocks_to_remove.append(match.group(0))
|
||||
elif block_type == 'tools':
|
||||
tools = content.strip()
|
||||
elif block_type == 'scenarios':
|
||||
|
||||
Loading…
Reference in New Issue
Block a user