remove guildline
This commit is contained in:
parent
7fbebef764
commit
55651c38d0
File diff suppressed because one or more lines are too long
@ -1,6 +1,9 @@
|
|||||||
question,regex,llm-rubric,,,,,,,,,,,,,,,,,
|
question,regex,llm-rubric,,,,,,,,,,,,,,,,,
|
||||||
田中花子在哪里,\[TOOL_CALL\].*find_employee_location,,,,,,,,,,,,,,,,,,
|
清水太郎在哪里,\[TOOL_CALL\].*find_employee_location,,,,,,,,,,,,,,,,,,
|
||||||
通知她明天上午8点开会,,询问是否确认发送,,,,,,,,,,,,,,,,,
|
通知他明天上午8点开会,,询问是否确认发送,,,,,,,,,,,,,,,,,
|
||||||
确认,\[TOOL_CALL\].*wowtalk_send_message_to_member,,,,,,,,,,,,,,,,,,
|
确认,\[TOOL_CALL\].*wowtalk_send_message_to_member,,,,,,,,,,,,,,,,,,
|
||||||
把DefineRoom 4的灯光状态发给田中花子,,调用find_devices_by_room和dxcore_get_device_status获取灯光状态,并询问是否确认发送。,,,,,,,,,,,,,,,,,
|
把DefineRoom 4的灯光状态发给他,,调用find_devices_by_room和dxcore_get_device_status获取灯光状态,并询问是否确认发送。,,,,,,,,,,,,,,,,,
|
||||||
确认,\[TOOL_CALL\].*wowtalk_send_message_to_member,,,,,,,,,,,,,,,,,,
|
确认,\[TOOL_CALL\].*wowtalk_send_message_to_member,,,,,,,,,,,,,,,,,,
|
||||||
|
900142の稼働状況,\[TOOL_CALL\].*dxcore_get_device_status,,,,,,,,,,,,,,,,,,
|
||||||
|
关闭设备900142的灯光,,询问是否确认关闭,,,,,,,,,,,,,,,,,
|
||||||
|
确认,\[TOOL_CALL\].*dxcore_update_device_status,,,,,,,,,,,,,,,,,,
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
- vars:
|
- vars:
|
||||||
question: 田中花子在哪里
|
question: 清水太郎在哪里
|
||||||
assert:
|
assert:
|
||||||
- type: regex
|
- type: regex
|
||||||
value: \[TOOL_CALL\].*find_employee_location
|
value: \[TOOL_CALL\].*find_employee_location
|
||||||
- vars:
|
- vars:
|
||||||
question: 通知她明天上午8点开会
|
question: 通知他明天上午8点开会
|
||||||
assert:
|
assert:
|
||||||
- type: llm-rubric
|
- type: llm-rubric
|
||||||
value: 询问是否确认发送
|
value: 询问是否确认发送
|
||||||
@ -14,7 +14,7 @@
|
|||||||
- type: regex
|
- type: regex
|
||||||
value: \[TOOL_CALL\].*wowtalk_send_message_to_member
|
value: \[TOOL_CALL\].*wowtalk_send_message_to_member
|
||||||
- vars:
|
- vars:
|
||||||
question: 把DefineRoom 4的灯光状态发给田中花子
|
question: 把DefineRoom 4的灯光状态发给他
|
||||||
assert:
|
assert:
|
||||||
- type: llm-rubric
|
- type: llm-rubric
|
||||||
value: 调用find_devices_by_room和dxcore_get_device_status获取灯光状态,并询问是否确认发送。
|
value: 调用find_devices_by_room和dxcore_get_device_status获取灯光状态,并询问是否确认发送。
|
||||||
@ -23,3 +23,18 @@
|
|||||||
assert:
|
assert:
|
||||||
- type: regex
|
- type: regex
|
||||||
value: \[TOOL_CALL\].*wowtalk_send_message_to_member
|
value: \[TOOL_CALL\].*wowtalk_send_message_to_member
|
||||||
|
- vars:
|
||||||
|
question: 900142の稼働状況
|
||||||
|
assert:
|
||||||
|
- type: regex
|
||||||
|
value: \[TOOL_CALL\].*dxcore_get_device_status
|
||||||
|
- vars:
|
||||||
|
question: 关闭设备900142的灯光
|
||||||
|
assert:
|
||||||
|
- type: llm-rubric
|
||||||
|
value: 询问是否确认关闭
|
||||||
|
- vars:
|
||||||
|
question: 确认
|
||||||
|
assert:
|
||||||
|
- type: regex
|
||||||
|
value: \[TOOL_CALL\].*dxcore_update_device_status
|
||||||
|
|||||||
@ -1,137 +0,0 @@
|
|||||||
{
|
|
||||||
"evalId": "eval-kAj-2025-12-04T14:31:43",
|
|
||||||
"results": {
|
|
||||||
"version": 3,
|
|
||||||
"timestamp": "2025-12-04T14:31:43.709Z",
|
|
||||||
"prompts": [
|
|
||||||
{
|
|
||||||
"raw": "[{\"role\":\"user\",\"content\":\"{{ question }}\"}]",
|
|
||||||
"label": "prompt.json: [{\"role\":\"user\",\"content\":\"{{ question }}\"}]",
|
|
||||||
"id": "7fd9d6ab1656b5f683dd7d34fc535754cd42291c7b78f2aa5fd68b3e43dae7b6",
|
|
||||||
"provider": "openai:chat:qwen3",
|
|
||||||
"metrics": {
|
|
||||||
"score": 0,
|
|
||||||
"testPassCount": 0,
|
|
||||||
"testFailCount": 0,
|
|
||||||
"testErrorCount": 0,
|
|
||||||
"assertPassCount": 0,
|
|
||||||
"assertFailCount": 0,
|
|
||||||
"totalLatencyMs": 0,
|
|
||||||
"tokenUsage": {
|
|
||||||
"prompt": 0,
|
|
||||||
"completion": 0,
|
|
||||||
"cached": 0,
|
|
||||||
"total": 0,
|
|
||||||
"numRequests": 0,
|
|
||||||
"completionDetails": {
|
|
||||||
"reasoning": 0,
|
|
||||||
"acceptedPrediction": 0,
|
|
||||||
"rejectedPrediction": 0
|
|
||||||
},
|
|
||||||
"assertions": {
|
|
||||||
"total": 0,
|
|
||||||
"prompt": 0,
|
|
||||||
"completion": 0,
|
|
||||||
"cached": 0,
|
|
||||||
"numRequests": 0,
|
|
||||||
"completionDetails": {
|
|
||||||
"reasoning": 0,
|
|
||||||
"acceptedPrediction": 0,
|
|
||||||
"rejectedPrediction": 0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"namedScores": {},
|
|
||||||
"namedScoresCount": {},
|
|
||||||
"cost": 0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"results": [],
|
|
||||||
"stats": {
|
|
||||||
"successes": 0,
|
|
||||||
"failures": 0,
|
|
||||||
"errors": 0,
|
|
||||||
"tokenUsage": {
|
|
||||||
"prompt": 0,
|
|
||||||
"completion": 0,
|
|
||||||
"cached": 0,
|
|
||||||
"total": 0,
|
|
||||||
"numRequests": 0,
|
|
||||||
"completionDetails": {
|
|
||||||
"reasoning": 0,
|
|
||||||
"acceptedPrediction": 0,
|
|
||||||
"rejectedPrediction": 0
|
|
||||||
},
|
|
||||||
"assertions": {
|
|
||||||
"total": 0,
|
|
||||||
"prompt": 0,
|
|
||||||
"completion": 0,
|
|
||||||
"cached": 0,
|
|
||||||
"numRequests": 0,
|
|
||||||
"completionDetails": {
|
|
||||||
"reasoning": 0,
|
|
||||||
"acceptedPrediction": 0,
|
|
||||||
"rejectedPrediction": 0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"config": {
|
|
||||||
"tags": {},
|
|
||||||
"description": "Novare Test",
|
|
||||||
"prompts": [
|
|
||||||
"file:///Users/moshui/Documents/felo/qwen-agent/promptfoo/query/prompt.json"
|
|
||||||
],
|
|
||||||
"providers": [
|
|
||||||
{
|
|
||||||
"id": "openai:chat:qwen3",
|
|
||||||
"config": {
|
|
||||||
"apiBaseUrl": "https://catalog-agent-dev.gbase.ai/api/v2",
|
|
||||||
"apiKey": "a21c99620a8ef61d69563afe05ccce89",
|
|
||||||
"passthrough": {
|
|
||||||
"bot_id": "63069654-7750-409d-9a58-a0960d899a20",
|
|
||||||
"tool_response": true,
|
|
||||||
"language": "zh"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"tests": [],
|
|
||||||
"scenarios": [],
|
|
||||||
"env": {},
|
|
||||||
"sharing": true,
|
|
||||||
"defaultTest": {
|
|
||||||
"options": {
|
|
||||||
"provider": {
|
|
||||||
"text": {
|
|
||||||
"id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
|
|
||||||
"config": {
|
|
||||||
"apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
|
|
||||||
"apiBaseUrl": "https://one.felo.me/v1",
|
|
||||||
"apiVersion": "2024-02-01"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"vars": {},
|
|
||||||
"assert": [],
|
|
||||||
"metadata": {}
|
|
||||||
},
|
|
||||||
"outputPath": [
|
|
||||||
"result.json"
|
|
||||||
],
|
|
||||||
"extensions": [],
|
|
||||||
"metadata": {}
|
|
||||||
},
|
|
||||||
"shareableUrl": null,
|
|
||||||
"metadata": {
|
|
||||||
"promptfooVersion": "0.117.11",
|
|
||||||
"nodeVersion": "v20.10.0",
|
|
||||||
"platform": "darwin",
|
|
||||||
"arch": "x64",
|
|
||||||
"exportedAt": "2025-12-04T14:31:43.766Z",
|
|
||||||
"evaluationCreatedAt": "2025-12-04T14:31:43.709Z"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -1,6 +1,4 @@
|
|||||||
question,regex,llm-rubric,,,,,,,,,,,,,,,,,
|
question,regex,llm-rubric,,,,,,,,,,,,,,,,,
|
||||||
940092の稼働状況,\[TOOL_CALL\].*dxcore_get_device_status,,,,,,,,,,,,,,,,,,
|
|
||||||
关闭设备900142的灯光,\[TOOL_CALL\].*dxcore_update_device_status,,,,,,,,,,,,,,,,,,
|
|
||||||
Define Room1 的灯光状态,\[TOOL_CALL\].*find_devices_by_room,,,,,,,,,,,,,,,,,,
|
Define Room1 的灯光状态,\[TOOL_CALL\].*find_devices_by_room,,,,,,,,,,,,,,,,,,
|
||||||
卫生间在哪里,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,,
|
卫生间在哪里,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,,
|
||||||
我丢了物品怎么办,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,,
|
我丢了物品怎么办,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,,
|
||||||
|
|||||||
|
@ -1,13 +1,3 @@
|
|||||||
- vars:
|
|
||||||
question: 940092の稼働状況
|
|
||||||
assert:
|
|
||||||
- type: regex
|
|
||||||
value: \[TOOL_CALL\].*dxcore_get_device_status
|
|
||||||
- vars:
|
|
||||||
question: 关闭设备900142的灯光
|
|
||||||
assert:
|
|
||||||
- type: regex
|
|
||||||
value: \[TOOL_CALL\].*dxcore_update_device_status
|
|
||||||
- vars:
|
- vars:
|
||||||
question: Define Room1 的灯光状态
|
question: Define Room1 的灯光状态
|
||||||
assert:
|
assert:
|
||||||
|
|||||||
@ -659,6 +659,7 @@ def extract_block_from_system_prompt(system_prompt: str) -> tuple[str, str, str,
|
|||||||
|
|
||||||
if block_type == 'guideline' or block_type == 'guidelines':
|
if block_type == 'guideline' or block_type == 'guidelines':
|
||||||
guidelines = content.strip()
|
guidelines = content.strip()
|
||||||
|
blocks_to_remove.append(match.group(0))
|
||||||
elif block_type == 'tools':
|
elif block_type == 'tools':
|
||||||
tools = content.strip()
|
tools = content.strip()
|
||||||
elif block_type == 'scenarios':
|
elif block_type == 'scenarios':
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user