Merge branch 'master' into onprem

This commit is contained in:
朱潮 2025-12-05 13:16:04 +08:00
commit 7fbebef764
11 changed files with 2027 additions and 686 deletions

View File

@ -13,23 +13,7 @@ prompts:
- file://prompt.json - file://prompt.json
nunjucksFilters: nunjucksFilters:
encode: ./encode.js encode: ./encode.js
tests: tests: file://tests.yaml
- vars:
question: 清水太郎在哪里
assert:
- type: regex
value: \[TOOL_CALL\].*find_employee_location
- vars:
question: 通知他明天上午8点开会
assert:
- type: llm-rubric
value: 询问是否确认发送
- vars:
question: 确认
assert:
- type: regex
value: \[TOOL_CALL\].*wowtalk_send_message_to_member
defaultTest: defaultTest:
options: options:
provider: provider:

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,6 @@
question,regex,llm-rubric,,,,,,,,,,,,,,,,,
田中花子在哪里,\[TOOL_CALL\].*find_employee_location,,,,,,,,,,,,,,,,,,
通知她明天上午8点开会,,询问是否确认发送,,,,,,,,,,,,,,,,,
确认,\[TOOL_CALL\].*wowtalk_send_message_to_member,,,,,,,,,,,,,,,,,,
把DefineRoom 4的灯光状态发给田中花子,,调用find_devices_by_room和dxcore_get_device_status获取灯光状态并询问是否确认发送。,,,,,,,,,,,,,,,,,
确认,\[TOOL_CALL\].*wowtalk_send_message_to_member,,,,,,,,,,,,,,,,,,
1 question regex llm-rubric
2 田中花子在哪里 \[TOOL_CALL\].*find_employee_location
3 通知她明天上午8点开会 询问是否确认发送
4 确认 \[TOOL_CALL\].*wowtalk_send_message_to_member
5 把DefineRoom 4的灯光状态发给田中花子 调用find_devices_by_room和dxcore_get_device_status获取灯光状态,并询问是否确认发送。
6 确认 \[TOOL_CALL\].*wowtalk_send_message_to_member

View File

@ -0,0 +1,25 @@
- vars:
question: 田中花子在哪里
assert:
- type: regex
value: \[TOOL_CALL\].*find_employee_location
- vars:
question: 通知她明天上午8点开会
assert:
- type: llm-rubric
value: 询问是否确认发送
- vars:
question: 确认
assert:
- type: regex
value: \[TOOL_CALL\].*wowtalk_send_message_to_member
- vars:
question: 把DefineRoom 4的灯光状态发给田中花子
assert:
- type: llm-rubric
value: 调用find_devices_by_room和dxcore_get_device_status获取灯光状态并询问是否确认发送。
- vars:
question: 确认
assert:
- type: regex
value: \[TOOL_CALL\].*wowtalk_send_message_to_member

45
promptfoo/csv_to_yaml.py Normal file
View File

@ -0,0 +1,45 @@
#!/usr/bin/env python3
import csv
import yaml
import sys
def csv_to_yaml(csv_file, yaml_file):
"""Convert CSV file to YAML format for promptfoo tests"""
tests = []
with open(csv_file, 'r', encoding='utf-8-sig') as f:
reader = csv.DictReader(f)
for row in reader:
if row['question']:
test_case = {
'vars': {
'question': row['question'].strip()
},
'assert':[]
}
if row['regex'] and row['regex'].strip():
test_case['assert'].append({
'type': 'regex',
'value': row['regex'].strip()
})
# Add llm-rubric if present
if row['llm-rubric'] and row['llm-rubric'].strip():
test_case['assert'].append({
'type': 'llm-rubric',
'value': row['llm-rubric'].strip()
})
tests.append(test_case)
with open(yaml_file, 'w', encoding='utf-8') as f:
yaml.dump(tests, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
print(f"Converted {len(tests)} test cases from {csv_file} to {yaml_file}")
if __name__ == '__main__':
csv_to_yaml("conversation/tests.csv", "conversation/tests.yaml")
csv_to_yaml("query/tests.csv", "query/tests.yaml")

View File

@ -11,13 +11,7 @@ providers:
language: zh language: zh
prompts: prompts:
- file://prompt.json - file://prompt.json
tests: tests: file://tests.yaml
- vars:
question: 940092の稼働状況
assert:
- type: regex
value: \[TOOL_CALL\].*dxcore_get_device_status
defaultTest: defaultTest:
options: options:
provider: provider:

View File

@ -1,8 +1,8 @@
{ {
"evalId": "eval-rew-2025-12-04T11:49:09", "evalId": "eval-kAj-2025-12-04T14:31:43",
"results": { "results": {
"version": 3, "version": 3,
"timestamp": "2025-12-04T11:49:09.282Z", "timestamp": "2025-12-04T14:31:43.709Z",
"prompts": [ "prompts": [
{ {
"raw": "[{\"role\":\"user\",\"content\":\"{{ question }}\"}]", "raw": "[{\"role\":\"user\",\"content\":\"{{ question }}\"}]",
@ -10,19 +10,19 @@
"id": "7fd9d6ab1656b5f683dd7d34fc535754cd42291c7b78f2aa5fd68b3e43dae7b6", "id": "7fd9d6ab1656b5f683dd7d34fc535754cd42291c7b78f2aa5fd68b3e43dae7b6",
"provider": "openai:chat:qwen3", "provider": "openai:chat:qwen3",
"metrics": { "metrics": {
"score": 1, "score": 0,
"testPassCount": 1, "testPassCount": 0,
"testFailCount": 0, "testFailCount": 0,
"testErrorCount": 0, "testErrorCount": 0,
"assertPassCount": 1, "assertPassCount": 0,
"assertFailCount": 0, "assertFailCount": 0,
"totalLatencyMs": 8731, "totalLatencyMs": 0,
"tokenUsage": { "tokenUsage": {
"prompt": 11, "prompt": 0,
"completion": 1174, "completion": 0,
"cached": 0, "cached": 0,
"total": 1185, "total": 0,
"numRequests": 1, "numRequests": 0,
"completionDetails": { "completionDetails": {
"reasoning": 0, "reasoning": 0,
"acceptedPrediction": 0, "acceptedPrediction": 0,
@ -47,103 +47,17 @@
} }
} }
], ],
"results": [ "results": [],
{
"cost": 0,
"gradingResult": {
"pass": true,
"score": 1,
"reason": "All assertions passed",
"namedScores": {},
"tokensUsed": {
"total": 0,
"prompt": 0,
"completion": 0,
"cached": 0,
"numRequests": 0
},
"componentResults": [
{
"pass": true,
"score": 1,
"reason": "Assertion passed",
"assertion": {
"type": "regex",
"value": "\\[TOOL_CALL\\].*dxcore_get_device_status"
}
}
],
"assertion": null
},
"id": "ec8dff77-79da-499f-aee0-f40727fa844e",
"latencyMs": 8731,
"namedScores": {},
"prompt": {
"raw": "[\n {\n \"role\": \"user\",\n \"content\": \"940092の稼働状況\"\n }\n]",
"label": "prompt.json: [{\"role\":\"user\",\"content\":\"{{ question }}\"}]"
},
"promptId": "7fd9d6ab1656b5f683dd7d34fc535754cd42291c7b78f2aa5fd68b3e43dae7b6",
"promptIdx": 0,
"provider": {
"id": "openai:chat:qwen3",
"label": ""
},
"response": {
"output": "[THINK]\n### 🎯 核心目标\n查询编号为“940092”的设备的当前运行状态如温度、湿度、运行模式等以了解其稼働状況工作状态。\n\n### 📋 现状分析\n- **关键信息**: \n - 用户查询的是“940092”的稼働状況该编号极可能为设备ID如空调或照明设备。 \n - 当前时间为2025年12月4日19:49属于晚间时段设备可能处于运行中。 \n - 用户未明确要求控制设备,仅要求查询状态,属于信息查询类请求。\n\n- **限制条件**: \n - 仅允许查询设备状态,不能擅自执行控制操作(如开关、调温等)。 \n - 必须使用指定工具且需优先使用精准匹配的设备ID查询工具。 \n - 所有输出语言必须为中文。\n\n- **可用资源**: \n - **Iot Control-dxcore_get_device_status**可通过设备ID直接查询设备当前状态温度、湿度、运行模式、风量等。 \n - **Iot Control-find_devices_by_room**仅在设备ID无效或模糊时作为备选但当前ID明确无需使用。 \n - 无需调用人员、消息、天气、知识库或搜索工具,因查询目标明确为设备状态。\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 Iot Control-dxcore_get_device_status传入参数 device_id: \"940092\",查询该设备的当前运行状态。\n\n[TOOL_CALL] Iot Control-dxcore_get_device_status\n{\"sensor_id\": 940092}\n[TOOL_RESPONSE] Iot Control-dxcore_get_device_status\n{\"success\":true,\"data\":[{\"Device.Info.Id\":940092,\"Device.Info.Type\":\"light\",\"Device.Info.Description\":\"A棟 2F コワーキングスペース①(丸)-01 照明\",\"Device.Info.Manufacturer\":\"Signify照明\",\"Device.Info.SerialNumber\":\"none\",\"Device.Info.Coordinates\":[0,0,0],\"Device.Info.FieldId\":\"\",\"Device.Status.Lighting\":3}],\"site_name\":\"shiomi\",\"device_type\":\"light\",\"sensor_id\":940092}\n[ANSWER]\nA栋2F协同空间①(丸)-01的照明设备运行正常当前亮度为3级。",
"tokenUsage": {
"total": 1185,
"prompt": 11,
"completion": 1174
},
"cached": false,
"finishReason": "stop"
},
"score": 1,
"success": true,
"testCase": {
"vars": {
"question": "940092の稼働状況"
},
"assert": [
{
"type": "regex",
"value": "\\[TOOL_CALL\\].*dxcore_get_device_status"
}
],
"options": {
"provider": {
"text": {
"id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
"config": {
"apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
"apiBaseUrl": "https://one.felo.me/v1",
"apiVersion": "2024-02-01"
}
}
}
},
"metadata": {}
},
"testIdx": 0,
"vars": {
"question": "940092の稼働状況"
},
"metadata": {
"_promptfooFileMetadata": {}
},
"failureReason": 0
}
],
"stats": { "stats": {
"successes": 1, "successes": 0,
"failures": 0, "failures": 0,
"errors": 0, "errors": 0,
"tokenUsage": { "tokenUsage": {
"prompt": 11, "prompt": 0,
"completion": 1174, "completion": 0,
"cached": 0, "cached": 0,
"total": 1185, "total": 0,
"numRequests": 1, "numRequests": 0,
"completionDetails": { "completionDetails": {
"reasoning": 0, "reasoning": 0,
"acceptedPrediction": 0, "acceptedPrediction": 0,
@ -184,19 +98,7 @@
} }
} }
], ],
"tests": [ "tests": [],
{
"vars": {
"question": "940092の稼働状況"
},
"assert": [
{
"type": "regex",
"value": "\\[TOOL_CALL\\].*dxcore_get_device_status"
}
]
}
],
"scenarios": [], "scenarios": [],
"env": {}, "env": {},
"sharing": true, "sharing": true,
@ -229,7 +131,7 @@
"nodeVersion": "v20.10.0", "nodeVersion": "v20.10.0",
"platform": "darwin", "platform": "darwin",
"arch": "x64", "arch": "x64",
"exportedAt": "2025-12-04T11:49:18.166Z", "exportedAt": "2025-12-04T14:31:43.766Z",
"evaluationCreatedAt": "2025-12-04T11:49:09.282Z" "evaluationCreatedAt": "2025-12-04T14:31:43.709Z"
} }
} }

View File

@ -0,0 +1,8 @@
question,regex,llm-rubric,,,,,,,,,,,,,,,,,
940092の稼働状況,\[TOOL_CALL\].*dxcore_get_device_status,,,,,,,,,,,,,,,,,,
关闭设备900142的灯光,\[TOOL_CALL\].*dxcore_update_device_status,,,,,,,,,,,,,,,,,,
Define Room1 的灯光状态,\[TOOL_CALL\].*find_devices_by_room,,,,,,,,,,,,,,,,,,
卫生间在哪里,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,,
我丢了物品怎么办,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,,
咖啡多少钱一杯,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,,
东京明天的天气,\[TOOL_CALL\].*weather_get_by_location,,,,,,,,,,,,,,,,,,
1 question regex llm-rubric
2 940092の稼働状況 \[TOOL_CALL\].*dxcore_get_device_status
3 关闭设备900142的灯光 \[TOOL_CALL\].*dxcore_update_device_status
4 Define Room1 的灯光状态 \[TOOL_CALL\].*find_devices_by_room
5 卫生间在哪里 \[TOOL_CALL\].*rag_retrieve
6 我丢了物品怎么办 \[TOOL_CALL\].*rag_retrieve
7 咖啡多少钱一杯 \[TOOL_CALL\].*rag_retrieve
8 东京明天的天气 \[TOOL_CALL\].*weather_get_by_location

View File

@ -0,0 +1,35 @@
- vars:
question: 940092の稼働状況
assert:
- type: regex
value: \[TOOL_CALL\].*dxcore_get_device_status
- vars:
question: 关闭设备900142的灯光
assert:
- type: regex
value: \[TOOL_CALL\].*dxcore_update_device_status
- vars:
question: Define Room1 的灯光状态
assert:
- type: regex
value: \[TOOL_CALL\].*find_devices_by_room
- vars:
question: 卫生间在哪里
assert:
- type: regex
value: \[TOOL_CALL\].*rag_retrieve
- vars:
question: 我丢了物品怎么办
assert:
- type: regex
value: \[TOOL_CALL\].*rag_retrieve
- vars:
question: 咖啡多少钱一杯
assert:
- type: regex
value: \[TOOL_CALL\].*rag_retrieve
- vars:
question: 东京明天的天气
assert:
- type: regex
value: \[TOOL_CALL\].*weather_get_by_location

View File

@ -1,369 +0,0 @@
{
"evalId": "eval-0Pa-2025-12-04T11:23:59",
"results": {
"version": 3,
"timestamp": "2025-12-04T11:23:59.637Z",
"prompts": [
{
"raw": "[\n{% for message in messages %}\n {% set outer_loop = loop %}\n {% for role, content in message %}\n {\n \"role\": \"{{ role }}\",\n \"content\": \"{{ content }}\"\n }{% if not (loop.last and outer_loop.last) %},{% endif %}\n {% endfor %}\n{% endfor %}\n]\n",
"label": "prompt.json: [\n{% for message in messages %}\n {% set outer_loop = loop %}\n {% for role, content in message %}\n {\n \"role\": \"{{ role }}\",\n \"content\": \"{{ content }}\"\n }{% if not (loop.last and outer_loop.last) %},{% endif %}\n {% endfor %}\n{% endfor %}\n]\n",
"id": "31ba1cc91b4ca656f8bf91b71d90fd1c786f46919fd3e498accd274537581342",
"provider": "openai:chat:qwen3",
"metrics": {
"score": 2,
"testPassCount": 2,
"testFailCount": 0,
"testErrorCount": 0,
"assertPassCount": 2,
"assertFailCount": 0,
"totalLatencyMs": 62,
"tokenUsage": {
"prompt": 0,
"completion": 0,
"cached": 1877,
"total": 1877,
"numRequests": 2,
"completionDetails": {
"reasoning": 0,
"acceptedPrediction": 0,
"rejectedPrediction": 0
},
"assertions": {
"total": 808,
"prompt": 0,
"completion": 0,
"cached": 808,
"numRequests": 0,
"completionDetails": {
"reasoning": 0,
"acceptedPrediction": 0,
"rejectedPrediction": 0
}
}
},
"namedScores": {},
"namedScoresCount": {},
"cost": 0
}
}
],
"results": [
{
"cost": 0,
"gradingResult": {
"pass": true,
"score": 1,
"reason": "All assertions passed",
"namedScores": {},
"tokensUsed": {
"total": 808,
"prompt": 0,
"completion": 0,
"cached": 808,
"numRequests": 0
},
"componentResults": [
{
"assertion": {
"type": "llm-rubric",
"value": "询问是否确认发送"
},
"pass": true,
"score": 1,
"reason": "输出中明确提出了确认请求:'即将发送“明天上午8点开会”的消息给清水邦英是否确认',符合要求在执行消息通知前进行确认的流程",
"tokensUsed": {
"total": 808,
"prompt": 0,
"completion": 0,
"cached": 808,
"numRequests": 0,
"completionDetails": {
"reasoning": 0,
"acceptedPrediction": 0,
"rejectedPrediction": 0
}
}
}
],
"assertion": null
},
"id": "867656f1-edcd-41b9-b645-c9f5df2e2cc1",
"latencyMs": 46,
"namedScores": {},
"prompt": {
"raw": "[\n\n \n \n {\n \"role\": \"user\",\n \"content\": \"通知清水太郎明天上午8点开会\"\n }\n \n\n]\n",
"label": "prompt.json: [\n{% for message in messages %}\n {% set outer_loop = loop %}\n {% for role, content in message %}\n {\n \"role\": \"{{ role }}\",\n \"content\": \"{{ content }}\"\n }{% if not (loop.last and outer_loop.last) %},{% endif %}\n {% endfor %}\n{% endfor %}\n]\n"
},
"promptId": "31ba1cc91b4ca656f8bf91b71d90fd1c786f46919fd3e498accd274537581342",
"promptIdx": 0,
"provider": {
"id": "openai:chat:qwen3",
"label": ""
},
"response": {
"output": "[THINK]\n### 🎯 核心目标\n通知清水太郎明天上午8点开会。\n\n### 📋 现状分析\n- **关键信息**: \n - 用户指令为“通知清水太郎明天上午8点开会” \n - 目标对象为“清水太郎”(需确认其是否存在及联系方式) \n - 消息内容为会议提醒时间为“明天上午8点”2025年12月5日 08:00 \n - 操作属于“消息通知”,需经过确认流程\n\n- **限制条件**: \n - 根据指南第4条消息通知类操作必须进行确认不可直接执行 \n - 必须通过【人员检索】确认清水太郎的身份与WowTalk账号绑定状态 \n - 消息内容需准确无误地传达时间与事项 \n - 当前时间为2025年12月4日19:01明天为12月5日时间合理\n\n- **可用资源**: \n - Iot Control-find_employee_location查找人员位置/信息) \n - Wowtalk tool-wowtalk_send_message_to_member发送消息通知 \n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用 `Iot Control-find_employee_location` 查询“清水太郎”的员工信息获取其唯一sensor_id或WowTalk账号\n2. 若查询到唯一匹配结果生成确认提示“即将发送明天上午8点开会的消息给清水太郎是否确认”\n3. 等待用户回复确认(如“确认”、“好的”等)\n4. 收到确认后,调用 `Wowtalk tool-wowtalk_send_message_to_member` 向清水太郎发送消息“明天上午8点开会”\n5. 若未查询到清水太郎,请回复:“未找到清水太郎的员工信息,请确认姓名是否正确或是否为系统内注册人员”\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n即将发送“明天上午8点开会”的消息给清水邦英是否确认",
"tokenUsage": {
"cached": 1168,
"total": 1168
},
"cached": true,
"finishReason": "stop"
},
"score": 1,
"success": true,
"testCase": {
"description": "通知清水太郎明天上午8点开会",
"vars": {
"messages": [
{
"user": "通知清水太郎明天上午8点开会"
}
]
},
"assert": [
{
"type": "llm-rubric",
"value": "询问是否确认发送"
}
],
"options": {
"provider": {
"text": {
"id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
"config": {
"apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
"apiBaseUrl": "https://one.felo.me/v1",
"apiVersion": "2024-02-01"
}
}
}
},
"metadata": {}
},
"testIdx": 0,
"vars": {
"messages": [
{
"user": "通知清水太郎明天上午8点开会"
}
]
},
"metadata": {
"_promptfooFileMetadata": {}
},
"failureReason": 0
},
{
"cost": 0,
"gradingResult": {
"pass": true,
"score": 1,
"reason": "All assertions passed",
"namedScores": {},
"tokensUsed": {
"total": 0,
"prompt": 0,
"completion": 0,
"cached": 0,
"numRequests": 0
},
"componentResults": [
{
"pass": true,
"score": 1,
"reason": "Assertion passed",
"assertion": {
"type": "regex",
"value": "\\[TOOL_CALL\\].*find_employee_location"
}
}
],
"assertion": null
},
"id": "a1bb2558-6a43-42e1-969d-332d8a4ca397",
"latencyMs": 16,
"namedScores": {},
"prompt": {
"raw": "[\n\n \n \n {\n \"role\": \"user\",\n \"content\": \"清水太郎在哪里\"\n }\n \n\n]\n",
"label": "prompt.json: [\n{% for message in messages %}\n {% set outer_loop = loop %}\n {% for role, content in message %}\n {\n \"role\": \"{{ role }}\",\n \"content\": \"{{ content }}\"\n }{% if not (loop.last and outer_loop.last) %},{% endif %}\n {% endfor %}\n{% endfor %}\n]\n"
},
"promptId": "31ba1cc91b4ca656f8bf91b71d90fd1c786f46919fd3e498accd274537581342",
"promptIdx": 0,
"provider": {
"id": "openai:chat:qwen3",
"label": ""
},
"response": {
"output": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内无法获取实时位置。",
"tokenUsage": {
"cached": 709,
"total": 709
},
"cached": true,
"finishReason": "stop"
},
"score": 1,
"success": true,
"testCase": {
"description": "清水太郎在哪里",
"vars": {
"messages": [
{
"user": "清水太郎在哪里"
}
]
},
"assert": [
{
"type": "regex",
"value": "\\[TOOL_CALL\\].*find_employee_location"
}
],
"options": {
"provider": {
"text": {
"id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
"config": {
"apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
"apiBaseUrl": "https://one.felo.me/v1",
"apiVersion": "2024-02-01"
}
}
}
},
"metadata": {}
},
"testIdx": 1,
"vars": {
"messages": [
{
"user": "清水太郎在哪里"
}
]
},
"metadata": {
"_promptfooFileMetadata": {}
},
"failureReason": 0
}
],
"stats": {
"successes": 2,
"failures": 0,
"errors": 0,
"tokenUsage": {
"prompt": 0,
"completion": 0,
"cached": 1877,
"total": 1877,
"numRequests": 2,
"completionDetails": {
"reasoning": 0,
"acceptedPrediction": 0,
"rejectedPrediction": 0
},
"assertions": {
"total": 808,
"prompt": 0,
"completion": 0,
"cached": 808,
"numRequests": 0,
"completionDetails": {
"reasoning": 0,
"acceptedPrediction": 0,
"rejectedPrediction": 0
}
}
}
}
},
"config": {
"tags": {},
"description": "Novare Test",
"prompts": [
"file:///Users/moshui/Documents/felo/qwen-agent/promptfoo/prompt.json"
],
"providers": [
{
"id": "openai:chat:qwen3",
"config": {
"apiBaseUrl": "https://catalog-agent-dev.gbase.ai/api/v2",
"apiKey": "a21c99620a8ef61d69563afe05ccce89",
"passthrough": {
"bot_id": "63069654-7750-409d-9a58-a0960d899a20",
"tool_response": true,
"language": "zh"
}
}
}
],
"tests": [
{
"description": "通知清水太郎明天上午8点开会",
"vars": {
"messages": [
{
"user": "通知清水太郎明天上午8点开会"
}
]
},
"assert": [
{
"type": "llm-rubric",
"value": "询问是否确认发送"
}
]
},
{
"description": "清水太郎在哪里",
"vars": {
"messages": [
{
"user": "清水太郎在哪里"
}
]
},
"assert": [
{
"type": "regex",
"value": "\\[TOOL_CALL\\].*find_employee_location"
}
]
}
],
"scenarios": [],
"env": {},
"sharing": true,
"defaultTest": {
"options": {
"provider": {
"text": {
"id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
"config": {
"apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
"apiBaseUrl": "https://one.felo.me/v1",
"apiVersion": "2024-02-01"
}
}
}
},
"vars": {},
"assert": [],
"metadata": {}
},
"outputPath": [
"result.json"
],
"extensions": [],
"metadata": {}
},
"shareableUrl": null,
"metadata": {
"promptfooVersion": "0.117.11",
"nodeVersion": "v20.10.0",
"platform": "darwin",
"arch": "x64",
"exportedAt": "2025-12-04T11:24:00.162Z",
"evaluationCreatedAt": "2025-12-04T11:23:59.637Z"
}
}

View File

@ -172,36 +172,39 @@ def process_messages(messages: List[Dict], language: Optional[str] = None) -> Li
if not text: if not text:
continue continue
if current_tag == "TOOL_RESPONSE": # 不往后传输 历史工具调用的文字
if is_recent_message: if current_tag == "ANSWER":
# 最近10条ASSISTANT消息保留完整TOOL_RESPONSE信息使用简略模式
if len(text) <= 500:
filtered_content += f"[TOOL_RESPONSE]\n{text}\n"
else:
# 截取前中后3段内容每段250字
first_part = text[:250]
middle_start = len(text) // 2 - 125
middle_part = text[middle_start:middle_start + 250]
last_part = text[-250:]
# 计算省略的字数
omitted_count = len(text) - 750
omitted_text = f"...此处省略{omitted_count}字..."
# 拼接内容
truncated_text = f"{first_part}\n{omitted_text}\n{middle_part}\n{omitted_text}\n{last_part}"
filtered_content += f"[TOOL_RESPONSE]\n{truncated_text}\n"
# 10条以上的消息不保留TOOL_RESPONSE数据完全跳过
elif current_tag == "TOOL_CALL":
if is_recent_message:
# 最近10条ASSISTANT消息保留TOOL_CALL信息
filtered_content += f"[TOOL_CALL]\n{text}\n"
# 10条以上的消息不保留TOOL_CALL数据完全跳过
elif current_tag == "ANSWER":
# 所有ASSISTANT消息都保留ANSWER数据
filtered_content += f"[ANSWER]\n{text}\n" filtered_content += f"[ANSWER]\n{text}\n"
elif current_tag != "THINK" and current_tag != "PREAMBLE": # if current_tag == "TOOL_RESPONSE":
filtered_content += text + "\n" # if is_recent_message:
# # 最近10条ASSISTANT消息保留完整TOOL_RESPONSE信息使用简略模式
# if len(text) <= 500:
# filtered_content += f"[TOOL_RESPONSE]\n{text}\n"
# else:
# # 截取前中后3段内容每段250字
# first_part = text[:250]
# middle_start = len(text) // 2 - 125
# middle_part = text[middle_start:middle_start + 250]
# last_part = text[-250:]
# # 计算省略的字数
# omitted_count = len(text) - 750
# omitted_text = f"...此处省略{omitted_count}字..."
# # 拼接内容
# truncated_text = f"{first_part}\n{omitted_text}\n{middle_part}\n{omitted_text}\n{last_part}"
# filtered_content += f"[TOOL_RESPONSE]\n{truncated_text}\n"
# # 10条以上的消息不保留TOOL_RESPONSE数据完全跳过
# elif current_tag == "TOOL_CALL":
# if is_recent_message:
# # 最近10条ASSISTANT消息保留TOOL_CALL信息
# filtered_content += f"[TOOL_CALL]\n{text}\n"
# # 10条以上的消息不保留TOOL_CALL数据完全跳过
# elif current_tag == "ANSWER":
# # 所有ASSISTANT消息都保留ANSWER数据
# filtered_content += f"[ANSWER]\n{text}\n"
# elif current_tag != "THINK" and current_tag != "PREAMBLE":
# filtered_content += text + "\n"
else: # 标签 else: # 标签
current_tag = parts[i] current_tag = parts[i]
@ -233,37 +236,43 @@ def process_messages(messages: List[Dict], language: Optional[str] = None) -> Li
text = parts[i].strip() text = parts[i].strip()
if not text: if not text:
continue continue
# 不往后传输 历史工具调用的文字
if current_tag == "TOOL_RESPONSE": if current_tag == "ANSWER":
# 解析 TOOL_RESPONSE 格式:[TOOL_RESPONSE] function_name\ncontent
lines = text.split('\n', 1)
function_name = lines[0].strip() if lines else ""
response_content = lines[1].strip() if len(lines) > 1 else ""
final_messages.append({
"role": FUNCTION,
"name": function_name,
"content": response_content
})
elif current_tag == "TOOL_CALL":
# 解析 TOOL_CALL 格式:[TOOL_CALL] function_name\narguments
lines = text.split('\n', 1)
function_name = lines[0].strip() if lines else ""
arguments = lines[1].strip() if len(lines) > 1 else ""
final_messages.append({
"role": ASSISTANT,
"content": "",
"function_call": {
"name": function_name,
"arguments": arguments
}
})
elif current_tag != "THINK" and current_tag != "PREAMBLE":
final_messages.append({ final_messages.append({
"role": ASSISTANT, "role": ASSISTANT,
"content": text "content": text
}) })
# if current_tag == "TOOL_RESPONSE":
# # 解析 TOOL_RESPONSE 格式:[TOOL_RESPONSE] function_name\ncontent
# lines = text.split('\n', 1)
# function_name = lines[0].strip() if lines else ""
# response_content = lines[1].strip() if len(lines) > 1 else ""
# final_messages.append({
# "role": FUNCTION,
# "name": function_name,
# "content": response_content
# })
# elif current_tag == "TOOL_CALL":
# # 解析 TOOL_CALL 格式:[TOOL_CALL] function_name\narguments
# lines = text.split('\n', 1)
# function_name = lines[0].strip() if lines else ""
# arguments = lines[1].strip() if len(lines) > 1 else ""
# final_messages.append({
# "role": ASSISTANT,
# "content": "",
# "function_call": {
# "name": function_name,
# "arguments": arguments
# }
# })
# elif current_tag != "THINK" and current_tag != "PREAMBLE":
# final_messages.append({
# "role": ASSISTANT,
# "content": text
# })
else: # 标签 else: # 标签
current_tag = parts[i] current_tag = parts[i]
else: else: