This commit is contained in:
朱潮 2025-12-04 21:05:31 +08:00
commit d3a4a84713
12 changed files with 1377 additions and 15 deletions

View File

@ -158,7 +158,7 @@ class ModifiedAssistant(Assistant):
response = []
# 保持原有的最大调用次数限制
total_num_llm_calls_available = self.MAX_LLM_CALL_PER_RUN if hasattr(self, 'MAX_LLM_CALL_PER_RUN') else 10
total_num_llm_calls_available = self.MAX_LLM_CALL_PER_RUN if hasattr(self, 'MAX_LLM_CALL_PER_RUN') else 50
num_llm_calls_available = total_num_llm_calls_available
while num_llm_calls_available > 0:
num_llm_calls_available -= 1

View File

@ -18,7 +18,7 @@
- **优先级排序**: 确定任务执行顺序
### 3. 计划阶段 (Planning)
- **步骤制定**: 详细的执行步骤,步骤数量根据实际需求调整,每个步骤应具体可执行
- **步骤制定**: 只需要列出详细的工具调用步骤即可,不需要考虑回复用户的话术,步骤数量根据实际需求调整。
请按照上述思考框架进行完整分析,确保理解目标、分析问题和制定计划,
---
@ -54,4 +54,14 @@
## 输出格式
按顺序输出执行步骤:
[按1,2,3...n顺序列出具体可执行的步骤]
### 🎯 核心目标
[简洁描述用户的主要目标]
### 📋 现状分析
- **关键信息**: [从聊天记录中提取的重要信息]
- **限制条件**: [需要遵守的规则和约束]
- **可用资源**: [可以利用的工具和资源]
### ⚡ 计划阶段
**执行步骤**:
[按1,2,3...n顺序列出执行步骤]

View File

@ -0,0 +1,44 @@
env: {}
description: Novare Test
providers:
- id: openai:chat:qwen3
config:
apiBaseUrl: https://catalog-agent-dev.gbase.ai/api/v2
apiKey: a21c99620a8ef61d69563afe05ccce89
passthrough:
bot_id: 63069654-7750-409d-9a58-a0960d899a20
tool_response: true
language: zh
prompts:
- file://prompt.json
nunjucksFilters:
encode: ./encode.js
tests:
- vars:
question: 清水太郎在哪里
assert:
- type: regex
value: \[TOOL_CALL\].*find_employee_location
- vars:
question: 通知他明天上午8点开会
assert:
- type: llm-rubric
value: 询问是否确认发送
- vars:
question: 确认
assert:
- type: regex
value: \[TOOL_CALL\].*wowtalk_send_message_to_member
defaultTest:
options:
provider:
text:
id: openai:chat:qwen/qwen3-next-80b-a3b-instruct
config:
apiKey: sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26
apiBaseUrl: https://one.felo.me/v1
apiVersion: '2024-02-01'
evaluateOptions: {}
writeLatestResults: true
sharing: true

View File

@ -0,0 +1,11 @@
module.exports = function (str) {
if (typeof str !== 'string') {
return str;
}
return str
.replace(/\\/g, '\\\\') // 转义反斜杠
.replace(/"/g, '\\"') // 转义双引号
.replace(/\n/g, '\\n') // 转义换行符
.replace(/\r/g, '\\r') // 转义回车符
.replace(/\t/g, '\\t'); // 转义制表符
};

View File

@ -0,0 +1,16 @@
[
{% for completion in _conversation %}
{
"role": "user",
"content": "{{ completion.input | encode }}"
},
{
"role": "assistant",
"content": "{{ completion.output | encode }}"
},
{% endfor %}
{
"role": "user",
"content": "{{ question | encode }}"
}
]

View File

@ -0,0 +1,612 @@
{
"evalId": "eval-l9e-2025-12-04T12:17:14",
"results": {
"version": 3,
"timestamp": "2025-12-04T12:17:14.502Z",
"prompts": [
{
"raw": "[\n{% for completion in _conversation %}\n {\n \"role\": \"user\",\n \"content\": \"{{ completion.input | encode }}\"\n },\n {\n \"role\": \"assistant\",\n \"content\": \"{{ completion.output | encode }}\"\n },\n{% endfor %}\n {\n \"role\": \"user\",\n \"content\": \"{{ question | encode }}\"\n }\n]\n",
"label": "prompt.json: [\n{% for completion in _conversation %}\n {\n \"role\": \"user\",\n \"content\": \"{{ completion.input | encode }}\"\n },\n {\n \"role\": \"assistant\",\n \"content\": \"{{ completion.output | encode }}\"\n },\n{% endfor %}\n {\n \"role\": \"user\",\n \"content\": \"{{ question | encode }}\"\n }\n]\n",
"id": "49ff9901f402b7e42593f2724f05794c15d1c1bbd5d7a46c3e58198f7bd02c7e",
"provider": "openai:chat:qwen3",
"metrics": {
"score": 3,
"testPassCount": 3,
"testFailCount": 0,
"testErrorCount": 0,
"assertPassCount": 3,
"assertFailCount": 0,
"totalLatencyMs": 18196,
"tokenUsage": {
"prompt": 596,
"completion": 2017,
"cached": 709,
"total": 3322,
"numRequests": 3,
"completionDetails": {
"reasoning": 0,
"acceptedPrediction": 0,
"rejectedPrediction": 0
},
"assertions": {
"total": 746,
"prompt": 697,
"completion": 49,
"cached": 0,
"numRequests": 0,
"completionDetails": {
"reasoning": 0,
"acceptedPrediction": 0,
"rejectedPrediction": 0
}
}
},
"namedScores": {},
"namedScoresCount": {},
"cost": 0
}
}
],
"results": [
{
"cost": 0,
"gradingResult": {
"pass": true,
"score": 1,
"reason": "All assertions passed",
"namedScores": {},
"tokensUsed": {
"total": 0,
"prompt": 0,
"completion": 0,
"cached": 0,
"numRequests": 0
},
"componentResults": [
{
"pass": true,
"score": 1,
"reason": "Assertion passed",
"assertion": {
"type": "regex",
"value": "\\[TOOL_CALL\\].*find_employee_location"
}
}
],
"assertion": null
},
"id": "7595e74d-334a-4ed9-b01c-8373f6e8dffc",
"latencyMs": 12,
"namedScores": {},
"prompt": {
"raw": "[\n\n {\n \"role\": \"user\",\n \"content\": \"清水太郎在哪里\"\n }\n]\n",
"label": "prompt.json: [\n{% for completion in _conversation %}\n {\n \"role\": \"user\",\n \"content\": \"{{ completion.input | encode }}\"\n },\n {\n \"role\": \"assistant\",\n \"content\": \"{{ completion.output | encode }}\"\n },\n{% endfor %}\n {\n \"role\": \"user\",\n \"content\": \"{{ question | encode }}\"\n }\n]\n"
},
"promptId": "49ff9901f402b7e42593f2724f05794c15d1c1bbd5d7a46c3e58198f7bd02c7e",
"promptIdx": 0,
"provider": {
"id": "openai:chat:qwen3",
"label": ""
},
"response": {
"output": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内无法获取实时位置。",
"tokenUsage": {
"cached": 709,
"total": 709
},
"cached": true,
"finishReason": "stop"
},
"score": 1,
"success": true,
"testCase": {
"vars": {
"question": "清水太郎在哪里",
"_conversation": []
},
"assert": [
{
"type": "regex",
"value": "\\[TOOL_CALL\\].*find_employee_location"
}
],
"options": {
"provider": {
"text": {
"id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
"config": {
"apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
"apiBaseUrl": "https://one.felo.me/v1",
"apiVersion": "2024-02-01"
}
}
}
},
"metadata": {}
},
"testIdx": 0,
"vars": {
"question": "清水太郎在哪里",
"_conversation": []
},
"metadata": {
"_promptfooFileMetadata": {}
},
"failureReason": 0
},
{
"cost": 0,
"gradingResult": {
"pass": true,
"score": 1,
"reason": "All assertions passed",
"namedScores": {},
"tokensUsed": {
"total": 746,
"prompt": 697,
"completion": 49,
"cached": 0,
"numRequests": 0
},
"componentResults": [
{
"assertion": {
"type": "llm-rubric",
"value": "询问是否确认发送"
},
"pass": true,
"score": 1,
"reason": "输出明确询问用户是否确认向清水邦英发送会议通知,符合'询问是否确认发送'的rubric要求。",
"tokensUsed": {
"total": 746,
"prompt": 697,
"completion": 49,
"cached": 0,
"numRequests": 0,
"completionDetails": {
"reasoning": 0,
"acceptedPrediction": 0,
"rejectedPrediction": 0
}
}
}
],
"assertion": null
},
"id": "92bba056-d302-4c3b-ac01-deb76e9e74d4",
"latencyMs": 9701,
"namedScores": {},
"prompt": {
"raw": "[\n\n {\n \"role\": \"user\",\n \"content\": \"清水太郎在哪里\"\n },\n {\n \"role\": \"assistant\",\n \"content\": \"[THINK]\\n### 🎯 核心目标\\n查找名为“清水太郎”的人员位置。\\n\\n### 📋 现状分析\\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\\n- **限制条件**: \\n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\\n- **可用资源**: \\n - 工具:`Iot Control-find_employee_location`(人员检索工具)\\n\\n### ⚡ 计划阶段\\n**执行步骤**:\\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\\\"清水太郎\\\"`,查询该人员的当前位置。\\n\\n[TOOL_CALL] Iot Control-find_employee_location\\n{\\\"name\\\": \\\"清水太郎\\\"}\\n[TOOL_RESPONSE] Iot Control-find_employee_location\\n{\\\"matched_count\\\":1,\\\"results\\\":[{\\\"name\\\":\\\"清水 邦英\\\",\\\"wowtalk_id\\\":\\\"u226160\\\",\\\"sensor_id\\\":\\\"3246789\\\",\\\"confidence\\\":85,\\\"location_status\\\":\\\"not_in_range\\\",\\\"error_message\\\":\\\"Sensor不在测位范围内或未检测到位置信号\\\",\\\"coordinates\\\":null,\\\"location\\\":null}]}\\n[ANSWER]\\n清水邦英先生目前未在测位范围内无法获取实时位置。\"\n },\n\n {\n \"role\": \"user\",\n \"content\": \"通知他明天上午8点开会\"\n }\n]\n",
"label": "prompt.json: [\n{% for completion in _conversation %}\n {\n \"role\": \"user\",\n \"content\": \"{{ completion.input | encode }}\"\n },\n {\n \"role\": \"assistant\",\n \"content\": \"{{ completion.output | encode }}\"\n },\n{% endfor %}\n {\n \"role\": \"user\",\n \"content\": \"{{ question | encode }}\"\n }\n]\n"
},
"promptId": "49ff9901f402b7e42593f2724f05794c15d1c1bbd5d7a46c3e58198f7bd02c7e",
"promptIdx": 0,
"provider": {
"id": "openai:chat:qwen3",
"label": ""
},
"response": {
"output": "[THINK]\n### 🎯 核心目标\n通知清水太郎明天上午8点开会但系统返回的是“清水邦英”而非“清水太郎”需确认人员身份并发送会议提醒。\n\n### 📋 现状分析\n- **关键信息**:\n - 用户查询“清水太郎”,但系统返回匹配人为“清水 邦英”(姓名相近但不完全一致)。\n - 清水邦英的传感器信号不在测位范围内,无法获取位置,但不影响消息通知。\n - 清水邦英的wowtalk_id为“u226160”可用于发送消息。\n - 用户意图是发送“明天上午8点开会”的通知。\n- **限制条件**:\n - 名字不完全匹配(太郎 vs 邦英),可能存在输入误差,需谨慎确认。\n - 消息通知属于“需要确认”的操作根据指南第4条不可直接发送。\n - 仅当确认人员为同一人时,才可发送通知。\n- **可用资源**:\n - Iot Control-find_employee_location查询人员信息\n - Wowtalk tool-wowtalk_send_message_to_member发送消息\n - 可用的wowtalk_idu226160清水邦英\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用 Iot Control-find_employee_location查询名称中包含“太郎”的员工确认是否存在“清水太郎”。\n2. 若步骤1返回结果为空或无匹配项则向用户发送确认请求“您要通知的是清水邦英wowtalk_id: u226160系统未查到清水太郎是否确认向清水邦英发送明天上午8点开会的通知”\n3. 等待用户明确回复“确认”或“是”。\n4. 收到确认后,调用 Wowtalk tool-wowtalk_send_message_to_member向 wowtalk_id: u226160 发送消息“明天上午8点开会”。\n\n[ANSWER]\n您要通知的是“清水邦英”wowtalk_id: u226160系统未查到“清水太郎”是否确认向清水邦英发送“明天上午8点开会”的通知",
"tokenUsage": {
"total": 1157,
"prompt": 261,
"completion": 896
},
"cached": false,
"finishReason": "stop"
},
"score": 1,
"success": true,
"testCase": {
"vars": {
"question": "通知他明天上午8点开会",
"_conversation": [
{
"prompt": [
{
"role": "user",
"content": "清水太郎在哪里"
}
],
"input": "清水太郎在哪里",
"output": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内无法获取实时位置。"
},
{
"prompt": [
{
"role": "user",
"content": "清水太郎在哪里"
},
{
"role": "assistant",
"content": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内无法获取实时位置。"
},
{
"role": "user",
"content": "通知他明天上午8点开会"
}
],
"input": "通知他明天上午8点开会",
"output": "[THINK]\n### 🎯 核心目标\n通知清水太郎明天上午8点开会但系统返回的是“清水邦英”而非“清水太郎”需确认人员身份并发送会议提醒。\n\n### 📋 现状分析\n- **关键信息**:\n - 用户查询“清水太郎”,但系统返回匹配人为“清水 邦英”(姓名相近但不完全一致)。\n - 清水邦英的传感器信号不在测位范围内,无法获取位置,但不影响消息通知。\n - 清水邦英的wowtalk_id为“u226160”可用于发送消息。\n - 用户意图是发送“明天上午8点开会”的通知。\n- **限制条件**:\n - 名字不完全匹配(太郎 vs 邦英),可能存在输入误差,需谨慎确认。\n - 消息通知属于“需要确认”的操作根据指南第4条不可直接发送。\n - 仅当确认人员为同一人时,才可发送通知。\n- **可用资源**:\n - Iot Control-find_employee_location查询人员信息\n - Wowtalk tool-wowtalk_send_message_to_member发送消息\n - 可用的wowtalk_idu226160清水邦英\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用 Iot Control-find_employee_location查询名称中包含“太郎”的员工确认是否存在“清水太郎”。\n2. 若步骤1返回结果为空或无匹配项则向用户发送确认请求“您要通知的是清水邦英wowtalk_id: u226160系统未查到清水太郎是否确认向清水邦英发送明天上午8点开会的通知”\n3. 等待用户明确回复“确认”或“是”。\n4. 收到确认后,调用 Wowtalk tool-wowtalk_send_message_to_member向 wowtalk_id: u226160 发送消息“明天上午8点开会”。\n\n[ANSWER]\n您要通知的是“清水邦英”wowtalk_id: u226160系统未查到“清水太郎”是否确认向清水邦英发送“明天上午8点开会”的通知"
}
]
},
"assert": [
{
"type": "llm-rubric",
"value": "询问是否确认发送"
}
],
"options": {
"provider": {
"text": {
"id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
"config": {
"apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
"apiBaseUrl": "https://one.felo.me/v1",
"apiVersion": "2024-02-01"
}
}
}
},
"metadata": {}
},
"testIdx": 1,
"vars": {
"question": "通知他明天上午8点开会",
"_conversation": [
{
"prompt": [
{
"role": "user",
"content": "清水太郎在哪里"
}
],
"input": "清水太郎在哪里",
"output": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内无法获取实时位置。"
},
{
"prompt": [
{
"role": "user",
"content": "清水太郎在哪里"
},
{
"role": "assistant",
"content": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内无法获取实时位置。"
},
{
"role": "user",
"content": "通知他明天上午8点开会"
}
],
"input": "通知他明天上午8点开会",
"output": "[THINK]\n### 🎯 核心目标\n通知清水太郎明天上午8点开会但系统返回的是“清水邦英”而非“清水太郎”需确认人员身份并发送会议提醒。\n\n### 📋 现状分析\n- **关键信息**:\n - 用户查询“清水太郎”,但系统返回匹配人为“清水 邦英”(姓名相近但不完全一致)。\n - 清水邦英的传感器信号不在测位范围内,无法获取位置,但不影响消息通知。\n - 清水邦英的wowtalk_id为“u226160”可用于发送消息。\n - 用户意图是发送“明天上午8点开会”的通知。\n- **限制条件**:\n - 名字不完全匹配(太郎 vs 邦英),可能存在输入误差,需谨慎确认。\n - 消息通知属于“需要确认”的操作根据指南第4条不可直接发送。\n - 仅当确认人员为同一人时,才可发送通知。\n- **可用资源**:\n - Iot Control-find_employee_location查询人员信息\n - Wowtalk tool-wowtalk_send_message_to_member发送消息\n - 可用的wowtalk_idu226160清水邦英\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用 Iot Control-find_employee_location查询名称中包含“太郎”的员工确认是否存在“清水太郎”。\n2. 若步骤1返回结果为空或无匹配项则向用户发送确认请求“您要通知的是清水邦英wowtalk_id: u226160系统未查到清水太郎是否确认向清水邦英发送明天上午8点开会的通知”\n3. 等待用户明确回复“确认”或“是”。\n4. 收到确认后,调用 Wowtalk tool-wowtalk_send_message_to_member向 wowtalk_id: u226160 发送消息“明天上午8点开会”。\n\n[ANSWER]\n您要通知的是“清水邦英”wowtalk_id: u226160系统未查到“清水太郎”是否确认向清水邦英发送“明天上午8点开会”的通知"
}
]
},
"metadata": {
"_promptfooFileMetadata": {}
},
"failureReason": 0
},
{
"cost": 0,
"gradingResult": {
"pass": true,
"score": 1,
"reason": "All assertions passed",
"namedScores": {},
"tokensUsed": {
"total": 0,
"prompt": 0,
"completion": 0,
"cached": 0,
"numRequests": 0
},
"componentResults": [
{
"pass": true,
"score": 1,
"reason": "Assertion passed",
"assertion": {
"type": "regex",
"value": "\\[TOOL_CALL\\].*wowtalk_send_message_to_member"
}
}
],
"assertion": null
},
"id": "647361f4-33c5-4f1a-aa29-edb5c0f51841",
"latencyMs": 8483,
"namedScores": {},
"prompt": {
"raw": "[\n\n {\n \"role\": \"user\",\n \"content\": \"清水太郎在哪里\"\n },\n {\n \"role\": \"assistant\",\n \"content\": \"[THINK]\\n### 🎯 核心目标\\n查找名为“清水太郎”的人员位置。\\n\\n### 📋 现状分析\\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\\n- **限制条件**: \\n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\\n- **可用资源**: \\n - 工具:`Iot Control-find_employee_location`(人员检索工具)\\n\\n### ⚡ 计划阶段\\n**执行步骤**:\\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\\\"清水太郎\\\"`,查询该人员的当前位置。\\n\\n[TOOL_CALL] Iot Control-find_employee_location\\n{\\\"name\\\": \\\"清水太郎\\\"}\\n[TOOL_RESPONSE] Iot Control-find_employee_location\\n{\\\"matched_count\\\":1,\\\"results\\\":[{\\\"name\\\":\\\"清水 邦英\\\",\\\"wowtalk_id\\\":\\\"u226160\\\",\\\"sensor_id\\\":\\\"3246789\\\",\\\"confidence\\\":85,\\\"location_status\\\":\\\"not_in_range\\\",\\\"error_message\\\":\\\"Sensor不在测位范围内或未检测到位置信号\\\",\\\"coordinates\\\":null,\\\"location\\\":null}]}\\n[ANSWER]\\n清水邦英先生目前未在测位范围内无法获取实时位置。\"\n },\n\n {\n \"role\": \"user\",\n \"content\": \"通知他明天上午8点开会\"\n },\n {\n \"role\": \"assistant\",\n \"content\": \"[THINK]\\n### 🎯 核心目标\\n通知清水太郎明天上午8点开会但系统返回的是“清水邦英”而非“清水太郎”需确认人员身份并发送会议提醒。\\n\\n### 📋 现状分析\\n- **关键信息**:\\n - 用户查询“清水太郎”,但系统返回匹配人为“清水 邦英”(姓名相近但不完全一致)。\\n - 清水邦英的传感器信号不在测位范围内,无法获取位置,但不影响消息通知。\\n - 清水邦英的wowtalk_id为“u226160”可用于发送消息。\\n - 用户意图是发送“明天上午8点开会”的通知。\\n- **限制条件**:\\n - 名字不完全匹配(太郎 vs 邦英),可能存在输入误差,需谨慎确认。\\n - 消息通知属于“需要确认”的操作根据指南第4条不可直接发送。\\n - 仅当确认人员为同一人时,才可发送通知。\\n- **可用资源**:\\n - Iot Control-find_employee_location查询人员信息\\n - Wowtalk tool-wowtalk_send_message_to_member发送消息\\n - 可用的wowtalk_idu226160清水邦英\\n\\n### ⚡ 计划阶段\\n**执行步骤**:\\n1. 调用 Iot Control-find_employee_location查询名称中包含“太郎”的员工确认是否存在“清水太郎”。\\n2. 若步骤1返回结果为空或无匹配项则向用户发送确认请求“您要通知的是清水邦英wowtalk_id: u226160系统未查到清水太郎是否确认向清水邦英发送明天上午8点开会的通知”\\n3. 等待用户明确回复“确认”或“是”。\\n4. 收到确认后,调用 Wowtalk tool-wowtalk_send_message_to_member向 wowtalk_id: u226160 发送消息“明天上午8点开会”。\\n\\n[ANSWER]\\n您要通知的是“清水邦英”wowtalk_id: u226160系统未查到“清水太郎”是否确认向清水邦英发送“明天上午8点开会”的通知\"\n },\n\n {\n \"role\": \"user\",\n \"content\": \"确认\"\n }\n]\n",
"label": "prompt.json: [\n{% for completion in _conversation %}\n {\n \"role\": \"user\",\n \"content\": \"{{ completion.input | encode }}\"\n },\n {\n \"role\": \"assistant\",\n \"content\": \"{{ completion.output | encode }}\"\n },\n{% endfor %}\n {\n \"role\": \"user\",\n \"content\": \"{{ question | encode }}\"\n }\n]\n"
},
"promptId": "49ff9901f402b7e42593f2724f05794c15d1c1bbd5d7a46c3e58198f7bd02c7e",
"promptIdx": 0,
"provider": {
"id": "openai:chat:qwen3",
"label": ""
},
"response": {
"output": "[THINK]\n### 🎯 核心目标\n向员工清水邦英wowtalk_id: u226160发送明天上午8点开会的通知。\n\n### 📋 现状分析\n- **关键信息**: \n - 用户最初询问“清水太郎”系统返回“清水邦英”为唯一匹配人confidence: 85%)。\n - 用户已确认“清水邦英”为通知对象并明确指令“通知他明天上午8点开会”。\n - 清水邦英的wowtalk_id为u226160具备消息通知的唯一身份标识。\n- **限制条件**:\n - 根据【消息通知】规范,发送通知前必须获得用户明确确认,当前用户已确认,可直接执行。\n - 仅可使用授权工具完成操作,不得使用非指定工具。\n - 通知内容需精确传达,不得增删语义。\n- **可用资源**:\n - Wowtalk tool-wowtalk_send_message_to_member用于向指定wowtalk_id发送消息。\n - Iot Control-find_employee_location已使用并确认人员身份无需重复调用。\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 Wowtalk tool-wowtalk_send_message_to_member参数为\n - wowtalk_id: \"u226160\"\n - message: \"明天上午8点开会\"\n\n[TOOL_CALL] Wowtalk tool-wowtalk_send_message_to_member\n{\"to_account\": \"u226160\", \"message_content\": \"明天上午8点开会\"}\n[TOOL_RESPONSE] Wowtalk tool-wowtalk_send_message_to_member\n{\"success\":false,\"error\":\"API调用失败: 个人账户不存在\",\"error_code\":108,\"error_type\":\"WowTalkAPIError\",\"response_data\":{\"result_code\":108,\"api_version\":\"1.0\",\"request_id\":\"shimztest19-1764850653920\",\"message\":\"The user does not exist.\",\"response_data\":\"\"},\"bot_id\":\"63069654-7750-409d-9a58-a0960d899a20\"}\n[ANSWER]\n通知失败该用户在 WowTalk 系统中不存在,请确认员工信息或检查账号是否正确。",
"tokenUsage": {
"total": 1456,
"prompt": 335,
"completion": 1121
},
"cached": false,
"finishReason": "stop"
},
"score": 1,
"success": true,
"testCase": {
"vars": {
"question": "确认",
"_conversation": [
{
"prompt": [
{
"role": "user",
"content": "清水太郎在哪里"
}
],
"input": "清水太郎在哪里",
"output": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内无法获取实时位置。"
},
{
"prompt": [
{
"role": "user",
"content": "清水太郎在哪里"
},
{
"role": "assistant",
"content": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内无法获取实时位置。"
},
{
"role": "user",
"content": "通知他明天上午8点开会"
}
],
"input": "通知他明天上午8点开会",
"output": "[THINK]\n### 🎯 核心目标\n通知清水太郎明天上午8点开会但系统返回的是“清水邦英”而非“清水太郎”需确认人员身份并发送会议提醒。\n\n### 📋 现状分析\n- **关键信息**:\n - 用户查询“清水太郎”,但系统返回匹配人为“清水 邦英”(姓名相近但不完全一致)。\n - 清水邦英的传感器信号不在测位范围内,无法获取位置,但不影响消息通知。\n - 清水邦英的wowtalk_id为“u226160”可用于发送消息。\n - 用户意图是发送“明天上午8点开会”的通知。\n- **限制条件**:\n - 名字不完全匹配(太郎 vs 邦英),可能存在输入误差,需谨慎确认。\n - 消息通知属于“需要确认”的操作根据指南第4条不可直接发送。\n - 仅当确认人员为同一人时,才可发送通知。\n- **可用资源**:\n - Iot Control-find_employee_location查询人员信息\n - Wowtalk tool-wowtalk_send_message_to_member发送消息\n - 可用的wowtalk_idu226160清水邦英\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用 Iot Control-find_employee_location查询名称中包含“太郎”的员工确认是否存在“清水太郎”。\n2. 若步骤1返回结果为空或无匹配项则向用户发送确认请求“您要通知的是清水邦英wowtalk_id: u226160系统未查到清水太郎是否确认向清水邦英发送明天上午8点开会的通知”\n3. 等待用户明确回复“确认”或“是”。\n4. 收到确认后,调用 Wowtalk tool-wowtalk_send_message_to_member向 wowtalk_id: u226160 发送消息“明天上午8点开会”。\n\n[ANSWER]\n您要通知的是“清水邦英”wowtalk_id: u226160系统未查到“清水太郎”是否确认向清水邦英发送“明天上午8点开会”的通知"
},
{
"prompt": [
{
"role": "user",
"content": "清水太郎在哪里"
},
{
"role": "assistant",
"content": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内无法获取实时位置。"
},
{
"role": "user",
"content": "通知他明天上午8点开会"
},
{
"role": "assistant",
"content": "[THINK]\n### 🎯 核心目标\n通知清水太郎明天上午8点开会但系统返回的是“清水邦英”而非“清水太郎”需确认人员身份并发送会议提醒。\n\n### 📋 现状分析\n- **关键信息**:\n - 用户查询“清水太郎”,但系统返回匹配人为“清水 邦英”(姓名相近但不完全一致)。\n - 清水邦英的传感器信号不在测位范围内,无法获取位置,但不影响消息通知。\n - 清水邦英的wowtalk_id为“u226160”可用于发送消息。\n - 用户意图是发送“明天上午8点开会”的通知。\n- **限制条件**:\n - 名字不完全匹配(太郎 vs 邦英),可能存在输入误差,需谨慎确认。\n - 消息通知属于“需要确认”的操作根据指南第4条不可直接发送。\n - 仅当确认人员为同一人时,才可发送通知。\n- **可用资源**:\n - Iot Control-find_employee_location查询人员信息\n - Wowtalk tool-wowtalk_send_message_to_member发送消息\n - 可用的wowtalk_idu226160清水邦英\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用 Iot Control-find_employee_location查询名称中包含“太郎”的员工确认是否存在“清水太郎”。\n2. 若步骤1返回结果为空或无匹配项则向用户发送确认请求“您要通知的是清水邦英wowtalk_id: u226160系统未查到清水太郎是否确认向清水邦英发送明天上午8点开会的通知”\n3. 等待用户明确回复“确认”或“是”。\n4. 收到确认后,调用 Wowtalk tool-wowtalk_send_message_to_member向 wowtalk_id: u226160 发送消息“明天上午8点开会”。\n\n[ANSWER]\n您要通知的是“清水邦英”wowtalk_id: u226160系统未查到“清水太郎”是否确认向清水邦英发送“明天上午8点开会”的通知"
},
{
"role": "user",
"content": "确认"
}
],
"input": "确认",
"output": "[THINK]\n### 🎯 核心目标\n向员工清水邦英wowtalk_id: u226160发送明天上午8点开会的通知。\n\n### 📋 现状分析\n- **关键信息**: \n - 用户最初询问“清水太郎”系统返回“清水邦英”为唯一匹配人confidence: 85%)。\n - 用户已确认“清水邦英”为通知对象并明确指令“通知他明天上午8点开会”。\n - 清水邦英的wowtalk_id为u226160具备消息通知的唯一身份标识。\n- **限制条件**:\n - 根据【消息通知】规范,发送通知前必须获得用户明确确认,当前用户已确认,可直接执行。\n - 仅可使用授权工具完成操作,不得使用非指定工具。\n - 通知内容需精确传达,不得增删语义。\n- **可用资源**:\n - Wowtalk tool-wowtalk_send_message_to_member用于向指定wowtalk_id发送消息。\n - Iot Control-find_employee_location已使用并确认人员身份无需重复调用。\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 Wowtalk tool-wowtalk_send_message_to_member参数为\n - wowtalk_id: \"u226160\"\n - message: \"明天上午8点开会\"\n\n[TOOL_CALL] Wowtalk tool-wowtalk_send_message_to_member\n{\"to_account\": \"u226160\", \"message_content\": \"明天上午8点开会\"}\n[TOOL_RESPONSE] Wowtalk tool-wowtalk_send_message_to_member\n{\"success\":false,\"error\":\"API调用失败: 个人账户不存在\",\"error_code\":108,\"error_type\":\"WowTalkAPIError\",\"response_data\":{\"result_code\":108,\"api_version\":\"1.0\",\"request_id\":\"shimztest19-1764850653920\",\"message\":\"The user does not exist.\",\"response_data\":\"\"},\"bot_id\":\"63069654-7750-409d-9a58-a0960d899a20\"}\n[ANSWER]\n通知失败该用户在 WowTalk 系统中不存在,请确认员工信息或检查账号是否正确。"
}
]
},
"assert": [
{
"type": "regex",
"value": "\\[TOOL_CALL\\].*wowtalk_send_message_to_member"
}
],
"options": {
"provider": {
"text": {
"id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
"config": {
"apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
"apiBaseUrl": "https://one.felo.me/v1",
"apiVersion": "2024-02-01"
}
}
}
},
"metadata": {}
},
"testIdx": 2,
"vars": {
"question": "确认",
"_conversation": [
{
"prompt": [
{
"role": "user",
"content": "清水太郎在哪里"
}
],
"input": "清水太郎在哪里",
"output": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内无法获取实时位置。"
},
{
"prompt": [
{
"role": "user",
"content": "清水太郎在哪里"
},
{
"role": "assistant",
"content": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内无法获取实时位置。"
},
{
"role": "user",
"content": "通知他明天上午8点开会"
}
],
"input": "通知他明天上午8点开会",
"output": "[THINK]\n### 🎯 核心目标\n通知清水太郎明天上午8点开会但系统返回的是“清水邦英”而非“清水太郎”需确认人员身份并发送会议提醒。\n\n### 📋 现状分析\n- **关键信息**:\n - 用户查询“清水太郎”,但系统返回匹配人为“清水 邦英”(姓名相近但不完全一致)。\n - 清水邦英的传感器信号不在测位范围内,无法获取位置,但不影响消息通知。\n - 清水邦英的wowtalk_id为“u226160”可用于发送消息。\n - 用户意图是发送“明天上午8点开会”的通知。\n- **限制条件**:\n - 名字不完全匹配(太郎 vs 邦英),可能存在输入误差,需谨慎确认。\n - 消息通知属于“需要确认”的操作根据指南第4条不可直接发送。\n - 仅当确认人员为同一人时,才可发送通知。\n- **可用资源**:\n - Iot Control-find_employee_location查询人员信息\n - Wowtalk tool-wowtalk_send_message_to_member发送消息\n - 可用的wowtalk_idu226160清水邦英\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用 Iot Control-find_employee_location查询名称中包含“太郎”的员工确认是否存在“清水太郎”。\n2. 若步骤1返回结果为空或无匹配项则向用户发送确认请求“您要通知的是清水邦英wowtalk_id: u226160系统未查到清水太郎是否确认向清水邦英发送明天上午8点开会的通知”\n3. 等待用户明确回复“确认”或“是”。\n4. 收到确认后,调用 Wowtalk tool-wowtalk_send_message_to_member向 wowtalk_id: u226160 发送消息“明天上午8点开会”。\n\n[ANSWER]\n您要通知的是“清水邦英”wowtalk_id: u226160系统未查到“清水太郎”是否确认向清水邦英发送“明天上午8点开会”的通知"
},
{
"prompt": [
{
"role": "user",
"content": "清水太郎在哪里"
},
{
"role": "assistant",
"content": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内无法获取实时位置。"
},
{
"role": "user",
"content": "通知他明天上午8点开会"
},
{
"role": "assistant",
"content": "[THINK]\n### 🎯 核心目标\n通知清水太郎明天上午8点开会但系统返回的是“清水邦英”而非“清水太郎”需确认人员身份并发送会议提醒。\n\n### 📋 现状分析\n- **关键信息**:\n - 用户查询“清水太郎”,但系统返回匹配人为“清水 邦英”(姓名相近但不完全一致)。\n - 清水邦英的传感器信号不在测位范围内,无法获取位置,但不影响消息通知。\n - 清水邦英的wowtalk_id为“u226160”可用于发送消息。\n - 用户意图是发送“明天上午8点开会”的通知。\n- **限制条件**:\n - 名字不完全匹配(太郎 vs 邦英),可能存在输入误差,需谨慎确认。\n - 消息通知属于“需要确认”的操作根据指南第4条不可直接发送。\n - 仅当确认人员为同一人时,才可发送通知。\n- **可用资源**:\n - Iot Control-find_employee_location查询人员信息\n - Wowtalk tool-wowtalk_send_message_to_member发送消息\n - 可用的wowtalk_idu226160清水邦英\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用 Iot Control-find_employee_location查询名称中包含“太郎”的员工确认是否存在“清水太郎”。\n2. 若步骤1返回结果为空或无匹配项则向用户发送确认请求“您要通知的是清水邦英wowtalk_id: u226160系统未查到清水太郎是否确认向清水邦英发送明天上午8点开会的通知”\n3. 等待用户明确回复“确认”或“是”。\n4. 收到确认后,调用 Wowtalk tool-wowtalk_send_message_to_member向 wowtalk_id: u226160 发送消息“明天上午8点开会”。\n\n[ANSWER]\n您要通知的是“清水邦英”wowtalk_id: u226160系统未查到“清水太郎”是否确认向清水邦英发送“明天上午8点开会”的通知"
},
{
"role": "user",
"content": "确认"
}
],
"input": "确认",
"output": "[THINK]\n### 🎯 核心目标\n向员工清水邦英wowtalk_id: u226160发送明天上午8点开会的通知。\n\n### 📋 现状分析\n- **关键信息**: \n - 用户最初询问“清水太郎”系统返回“清水邦英”为唯一匹配人confidence: 85%)。\n - 用户已确认“清水邦英”为通知对象并明确指令“通知他明天上午8点开会”。\n - 清水邦英的wowtalk_id为u226160具备消息通知的唯一身份标识。\n- **限制条件**:\n - 根据【消息通知】规范,发送通知前必须获得用户明确确认,当前用户已确认,可直接执行。\n - 仅可使用授权工具完成操作,不得使用非指定工具。\n - 通知内容需精确传达,不得增删语义。\n- **可用资源**:\n - Wowtalk tool-wowtalk_send_message_to_member用于向指定wowtalk_id发送消息。\n - Iot Control-find_employee_location已使用并确认人员身份无需重复调用。\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 Wowtalk tool-wowtalk_send_message_to_member参数为\n - wowtalk_id: \"u226160\"\n - message: \"明天上午8点开会\"\n\n[TOOL_CALL] Wowtalk tool-wowtalk_send_message_to_member\n{\"to_account\": \"u226160\", \"message_content\": \"明天上午8点开会\"}\n[TOOL_RESPONSE] Wowtalk tool-wowtalk_send_message_to_member\n{\"success\":false,\"error\":\"API调用失败: 个人账户不存在\",\"error_code\":108,\"error_type\":\"WowTalkAPIError\",\"response_data\":{\"result_code\":108,\"api_version\":\"1.0\",\"request_id\":\"shimztest19-1764850653920\",\"message\":\"The user does not exist.\",\"response_data\":\"\"},\"bot_id\":\"63069654-7750-409d-9a58-a0960d899a20\"}\n[ANSWER]\n通知失败该用户在 WowTalk 系统中不存在,请确认员工信息或检查账号是否正确。"
}
]
},
"metadata": {
"_promptfooFileMetadata": {}
},
"failureReason": 0
}
],
"stats": {
"successes": 3,
"failures": 0,
"errors": 0,
"tokenUsage": {
"prompt": 596,
"completion": 2017,
"cached": 709,
"total": 3322,
"numRequests": 3,
"completionDetails": {
"reasoning": 0,
"acceptedPrediction": 0,
"rejectedPrediction": 0
},
"assertions": {
"total": 746,
"prompt": 697,
"completion": 49,
"cached": 0,
"numRequests": 0,
"completionDetails": {
"reasoning": 0,
"acceptedPrediction": 0,
"rejectedPrediction": 0
}
}
}
}
},
"config": {
"tags": {},
"description": "Novare Test",
"prompts": [
"file:///Users/moshui/Documents/felo/qwen-agent/promptfoo/conversation/prompt.json"
],
"providers": [
{
"id": "openai:chat:qwen3",
"config": {
"apiBaseUrl": "https://catalog-agent-dev.gbase.ai/api/v2",
"apiKey": "a21c99620a8ef61d69563afe05ccce89",
"passthrough": {
"bot_id": "63069654-7750-409d-9a58-a0960d899a20",
"tool_response": true,
"language": "zh"
}
}
}
],
"tests": [
{
"vars": {
"question": "清水太郎在哪里"
},
"assert": [
{
"type": "regex",
"value": "\\[TOOL_CALL\\].*find_employee_location"
}
]
},
{
"vars": {
"question": "通知他明天上午8点开会"
},
"assert": [
{
"type": "llm-rubric",
"value": "询问是否确认发送"
}
]
},
{
"vars": {
"question": "确认"
},
"assert": [
{
"type": "regex",
"value": "\\[TOOL_CALL\\].*wowtalk_send_message_to_member"
}
]
}
],
"scenarios": [],
"env": {},
"sharing": true,
"defaultTest": {
"options": {
"provider": {
"text": {
"id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
"config": {
"apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
"apiBaseUrl": "https://one.felo.me/v1",
"apiVersion": "2024-02-01"
}
}
}
},
"vars": {},
"assert": [],
"metadata": {}
},
"outputPath": [
"result.json"
],
"extensions": [],
"metadata": {}
},
"shareableUrl": null,
"metadata": {
"promptfooVersion": "0.117.11",
"nodeVersion": "v20.10.0",
"platform": "darwin",
"arch": "x64",
"exportedAt": "2025-12-04T12:17:36.718Z",
"evaluationCreatedAt": "2025-12-04T12:17:14.502Z"
}
}

View File

@ -0,0 +1,32 @@
env: {}
description: Novare Test
providers:
- id: openai:chat:qwen3
config:
apiBaseUrl: https://catalog-agent-dev.gbase.ai/api/v2
apiKey: a21c99620a8ef61d69563afe05ccce89
passthrough:
bot_id: 63069654-7750-409d-9a58-a0960d899a20
tool_response: true
language: zh
prompts:
- file://prompt.json
tests:
- vars:
question: 940092の稼働状況
assert:
- type: regex
value: \[TOOL_CALL\].*dxcore_get_device_status
defaultTest:
options:
provider:
text:
id: openai:chat:qwen/qwen3-next-80b-a3b-instruct
config:
apiKey: sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26
apiBaseUrl: https://one.felo.me/v1
apiVersion: '2024-02-01'
evaluateOptions: {}
writeLatestResults: true
sharing: true

View File

@ -0,0 +1,6 @@
[
{
"role": "user",
"content": "{{ question }}"
}
]

235
promptfoo/query/result.json Normal file
View File

@ -0,0 +1,235 @@
{
"evalId": "eval-rew-2025-12-04T11:49:09",
"results": {
"version": 3,
"timestamp": "2025-12-04T11:49:09.282Z",
"prompts": [
{
"raw": "[{\"role\":\"user\",\"content\":\"{{ question }}\"}]",
"label": "prompt.json: [{\"role\":\"user\",\"content\":\"{{ question }}\"}]",
"id": "7fd9d6ab1656b5f683dd7d34fc535754cd42291c7b78f2aa5fd68b3e43dae7b6",
"provider": "openai:chat:qwen3",
"metrics": {
"score": 1,
"testPassCount": 1,
"testFailCount": 0,
"testErrorCount": 0,
"assertPassCount": 1,
"assertFailCount": 0,
"totalLatencyMs": 8731,
"tokenUsage": {
"prompt": 11,
"completion": 1174,
"cached": 0,
"total": 1185,
"numRequests": 1,
"completionDetails": {
"reasoning": 0,
"acceptedPrediction": 0,
"rejectedPrediction": 0
},
"assertions": {
"total": 0,
"prompt": 0,
"completion": 0,
"cached": 0,
"numRequests": 0,
"completionDetails": {
"reasoning": 0,
"acceptedPrediction": 0,
"rejectedPrediction": 0
}
}
},
"namedScores": {},
"namedScoresCount": {},
"cost": 0
}
}
],
"results": [
{
"cost": 0,
"gradingResult": {
"pass": true,
"score": 1,
"reason": "All assertions passed",
"namedScores": {},
"tokensUsed": {
"total": 0,
"prompt": 0,
"completion": 0,
"cached": 0,
"numRequests": 0
},
"componentResults": [
{
"pass": true,
"score": 1,
"reason": "Assertion passed",
"assertion": {
"type": "regex",
"value": "\\[TOOL_CALL\\].*dxcore_get_device_status"
}
}
],
"assertion": null
},
"id": "ec8dff77-79da-499f-aee0-f40727fa844e",
"latencyMs": 8731,
"namedScores": {},
"prompt": {
"raw": "[\n {\n \"role\": \"user\",\n \"content\": \"940092の稼働状況\"\n }\n]",
"label": "prompt.json: [{\"role\":\"user\",\"content\":\"{{ question }}\"}]"
},
"promptId": "7fd9d6ab1656b5f683dd7d34fc535754cd42291c7b78f2aa5fd68b3e43dae7b6",
"promptIdx": 0,
"provider": {
"id": "openai:chat:qwen3",
"label": ""
},
"response": {
"output": "[THINK]\n### 🎯 核心目标\n查询编号为“940092”的设备的当前运行状态如温度、湿度、运行模式等以了解其稼働状況工作状态。\n\n### 📋 现状分析\n- **关键信息**: \n - 用户查询的是“940092”的稼働状況该编号极可能为设备ID如空调或照明设备。 \n - 当前时间为2025年12月4日19:49属于晚间时段设备可能处于运行中。 \n - 用户未明确要求控制设备,仅要求查询状态,属于信息查询类请求。\n\n- **限制条件**: \n - 仅允许查询设备状态,不能擅自执行控制操作(如开关、调温等)。 \n - 必须使用指定工具且需优先使用精准匹配的设备ID查询工具。 \n - 所有输出语言必须为中文。\n\n- **可用资源**: \n - **Iot Control-dxcore_get_device_status**可通过设备ID直接查询设备当前状态温度、湿度、运行模式、风量等。 \n - **Iot Control-find_devices_by_room**仅在设备ID无效或模糊时作为备选但当前ID明确无需使用。 \n - 无需调用人员、消息、天气、知识库或搜索工具,因查询目标明确为设备状态。\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 Iot Control-dxcore_get_device_status传入参数 device_id: \"940092\",查询该设备的当前运行状态。\n\n[TOOL_CALL] Iot Control-dxcore_get_device_status\n{\"sensor_id\": 940092}\n[TOOL_RESPONSE] Iot Control-dxcore_get_device_status\n{\"success\":true,\"data\":[{\"Device.Info.Id\":940092,\"Device.Info.Type\":\"light\",\"Device.Info.Description\":\"A棟 2F コワーキングスペース①(丸)-01 照明\",\"Device.Info.Manufacturer\":\"Signify照明\",\"Device.Info.SerialNumber\":\"none\",\"Device.Info.Coordinates\":[0,0,0],\"Device.Info.FieldId\":\"\",\"Device.Status.Lighting\":3}],\"site_name\":\"shiomi\",\"device_type\":\"light\",\"sensor_id\":940092}\n[ANSWER]\nA栋2F协同空间①(丸)-01的照明设备运行正常当前亮度为3级。",
"tokenUsage": {
"total": 1185,
"prompt": 11,
"completion": 1174
},
"cached": false,
"finishReason": "stop"
},
"score": 1,
"success": true,
"testCase": {
"vars": {
"question": "940092の稼働状況"
},
"assert": [
{
"type": "regex",
"value": "\\[TOOL_CALL\\].*dxcore_get_device_status"
}
],
"options": {
"provider": {
"text": {
"id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
"config": {
"apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
"apiBaseUrl": "https://one.felo.me/v1",
"apiVersion": "2024-02-01"
}
}
}
},
"metadata": {}
},
"testIdx": 0,
"vars": {
"question": "940092の稼働状況"
},
"metadata": {
"_promptfooFileMetadata": {}
},
"failureReason": 0
}
],
"stats": {
"successes": 1,
"failures": 0,
"errors": 0,
"tokenUsage": {
"prompt": 11,
"completion": 1174,
"cached": 0,
"total": 1185,
"numRequests": 1,
"completionDetails": {
"reasoning": 0,
"acceptedPrediction": 0,
"rejectedPrediction": 0
},
"assertions": {
"total": 0,
"prompt": 0,
"completion": 0,
"cached": 0,
"numRequests": 0,
"completionDetails": {
"reasoning": 0,
"acceptedPrediction": 0,
"rejectedPrediction": 0
}
}
}
}
},
"config": {
"tags": {},
"description": "Novare Test",
"prompts": [
"file:///Users/moshui/Documents/felo/qwen-agent/promptfoo/query/prompt.json"
],
"providers": [
{
"id": "openai:chat:qwen3",
"config": {
"apiBaseUrl": "https://catalog-agent-dev.gbase.ai/api/v2",
"apiKey": "a21c99620a8ef61d69563afe05ccce89",
"passthrough": {
"bot_id": "63069654-7750-409d-9a58-a0960d899a20",
"tool_response": true,
"language": "zh"
}
}
}
],
"tests": [
{
"vars": {
"question": "940092の稼働状況"
},
"assert": [
{
"type": "regex",
"value": "\\[TOOL_CALL\\].*dxcore_get_device_status"
}
]
}
],
"scenarios": [],
"env": {},
"sharing": true,
"defaultTest": {
"options": {
"provider": {
"text": {
"id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
"config": {
"apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
"apiBaseUrl": "https://one.felo.me/v1",
"apiVersion": "2024-02-01"
}
}
}
},
"vars": {},
"assert": [],
"metadata": {}
},
"outputPath": [
"result.json"
],
"extensions": [],
"metadata": {}
},
"shareableUrl": null,
"metadata": {
"promptfooVersion": "0.117.11",
"nodeVersion": "v20.10.0",
"platform": "darwin",
"arch": "x64",
"exportedAt": "2025-12-04T11:49:18.166Z",
"evaluationCreatedAt": "2025-12-04T11:49:09.282Z"
}
}

369
promptfoo/result.json Normal file
View File

@ -0,0 +1,369 @@
{
"evalId": "eval-0Pa-2025-12-04T11:23:59",
"results": {
"version": 3,
"timestamp": "2025-12-04T11:23:59.637Z",
"prompts": [
{
"raw": "[\n{% for message in messages %}\n {% set outer_loop = loop %}\n {% for role, content in message %}\n {\n \"role\": \"{{ role }}\",\n \"content\": \"{{ content }}\"\n }{% if not (loop.last and outer_loop.last) %},{% endif %}\n {% endfor %}\n{% endfor %}\n]\n",
"label": "prompt.json: [\n{% for message in messages %}\n {% set outer_loop = loop %}\n {% for role, content in message %}\n {\n \"role\": \"{{ role }}\",\n \"content\": \"{{ content }}\"\n }{% if not (loop.last and outer_loop.last) %},{% endif %}\n {% endfor %}\n{% endfor %}\n]\n",
"id": "31ba1cc91b4ca656f8bf91b71d90fd1c786f46919fd3e498accd274537581342",
"provider": "openai:chat:qwen3",
"metrics": {
"score": 2,
"testPassCount": 2,
"testFailCount": 0,
"testErrorCount": 0,
"assertPassCount": 2,
"assertFailCount": 0,
"totalLatencyMs": 62,
"tokenUsage": {
"prompt": 0,
"completion": 0,
"cached": 1877,
"total": 1877,
"numRequests": 2,
"completionDetails": {
"reasoning": 0,
"acceptedPrediction": 0,
"rejectedPrediction": 0
},
"assertions": {
"total": 808,
"prompt": 0,
"completion": 0,
"cached": 808,
"numRequests": 0,
"completionDetails": {
"reasoning": 0,
"acceptedPrediction": 0,
"rejectedPrediction": 0
}
}
},
"namedScores": {},
"namedScoresCount": {},
"cost": 0
}
}
],
"results": [
{
"cost": 0,
"gradingResult": {
"pass": true,
"score": 1,
"reason": "All assertions passed",
"namedScores": {},
"tokensUsed": {
"total": 808,
"prompt": 0,
"completion": 0,
"cached": 808,
"numRequests": 0
},
"componentResults": [
{
"assertion": {
"type": "llm-rubric",
"value": "询问是否确认发送"
},
"pass": true,
"score": 1,
"reason": "输出中明确提出了确认请求:'即将发送“明天上午8点开会”的消息给清水邦英是否确认',符合要求在执行消息通知前进行确认的流程",
"tokensUsed": {
"total": 808,
"prompt": 0,
"completion": 0,
"cached": 808,
"numRequests": 0,
"completionDetails": {
"reasoning": 0,
"acceptedPrediction": 0,
"rejectedPrediction": 0
}
}
}
],
"assertion": null
},
"id": "867656f1-edcd-41b9-b645-c9f5df2e2cc1",
"latencyMs": 46,
"namedScores": {},
"prompt": {
"raw": "[\n\n \n \n {\n \"role\": \"user\",\n \"content\": \"通知清水太郎明天上午8点开会\"\n }\n \n\n]\n",
"label": "prompt.json: [\n{% for message in messages %}\n {% set outer_loop = loop %}\n {% for role, content in message %}\n {\n \"role\": \"{{ role }}\",\n \"content\": \"{{ content }}\"\n }{% if not (loop.last and outer_loop.last) %},{% endif %}\n {% endfor %}\n{% endfor %}\n]\n"
},
"promptId": "31ba1cc91b4ca656f8bf91b71d90fd1c786f46919fd3e498accd274537581342",
"promptIdx": 0,
"provider": {
"id": "openai:chat:qwen3",
"label": ""
},
"response": {
"output": "[THINK]\n### 🎯 核心目标\n通知清水太郎明天上午8点开会。\n\n### 📋 现状分析\n- **关键信息**: \n - 用户指令为“通知清水太郎明天上午8点开会” \n - 目标对象为“清水太郎”(需确认其是否存在及联系方式) \n - 消息内容为会议提醒时间为“明天上午8点”2025年12月5日 08:00 \n - 操作属于“消息通知”,需经过确认流程\n\n- **限制条件**: \n - 根据指南第4条消息通知类操作必须进行确认不可直接执行 \n - 必须通过【人员检索】确认清水太郎的身份与WowTalk账号绑定状态 \n - 消息内容需准确无误地传达时间与事项 \n - 当前时间为2025年12月4日19:01明天为12月5日时间合理\n\n- **可用资源**: \n - Iot Control-find_employee_location查找人员位置/信息) \n - Wowtalk tool-wowtalk_send_message_to_member发送消息通知 \n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用 `Iot Control-find_employee_location` 查询“清水太郎”的员工信息获取其唯一sensor_id或WowTalk账号\n2. 若查询到唯一匹配结果生成确认提示“即将发送明天上午8点开会的消息给清水太郎是否确认”\n3. 等待用户回复确认(如“确认”、“好的”等)\n4. 收到确认后,调用 `Wowtalk tool-wowtalk_send_message_to_member` 向清水太郎发送消息“明天上午8点开会”\n5. 若未查询到清水太郎,请回复:“未找到清水太郎的员工信息,请确认姓名是否正确或是否为系统内注册人员”\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n即将发送“明天上午8点开会”的消息给清水邦英是否确认",
"tokenUsage": {
"cached": 1168,
"total": 1168
},
"cached": true,
"finishReason": "stop"
},
"score": 1,
"success": true,
"testCase": {
"description": "通知清水太郎明天上午8点开会",
"vars": {
"messages": [
{
"user": "通知清水太郎明天上午8点开会"
}
]
},
"assert": [
{
"type": "llm-rubric",
"value": "询问是否确认发送"
}
],
"options": {
"provider": {
"text": {
"id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
"config": {
"apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
"apiBaseUrl": "https://one.felo.me/v1",
"apiVersion": "2024-02-01"
}
}
}
},
"metadata": {}
},
"testIdx": 0,
"vars": {
"messages": [
{
"user": "通知清水太郎明天上午8点开会"
}
]
},
"metadata": {
"_promptfooFileMetadata": {}
},
"failureReason": 0
},
{
"cost": 0,
"gradingResult": {
"pass": true,
"score": 1,
"reason": "All assertions passed",
"namedScores": {},
"tokensUsed": {
"total": 0,
"prompt": 0,
"completion": 0,
"cached": 0,
"numRequests": 0
},
"componentResults": [
{
"pass": true,
"score": 1,
"reason": "Assertion passed",
"assertion": {
"type": "regex",
"value": "\\[TOOL_CALL\\].*find_employee_location"
}
}
],
"assertion": null
},
"id": "a1bb2558-6a43-42e1-969d-332d8a4ca397",
"latencyMs": 16,
"namedScores": {},
"prompt": {
"raw": "[\n\n \n \n {\n \"role\": \"user\",\n \"content\": \"清水太郎在哪里\"\n }\n \n\n]\n",
"label": "prompt.json: [\n{% for message in messages %}\n {% set outer_loop = loop %}\n {% for role, content in message %}\n {\n \"role\": \"{{ role }}\",\n \"content\": \"{{ content }}\"\n }{% if not (loop.last and outer_loop.last) %},{% endif %}\n {% endfor %}\n{% endfor %}\n]\n"
},
"promptId": "31ba1cc91b4ca656f8bf91b71d90fd1c786f46919fd3e498accd274537581342",
"promptIdx": 0,
"provider": {
"id": "openai:chat:qwen3",
"label": ""
},
"response": {
"output": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内无法获取实时位置。",
"tokenUsage": {
"cached": 709,
"total": 709
},
"cached": true,
"finishReason": "stop"
},
"score": 1,
"success": true,
"testCase": {
"description": "清水太郎在哪里",
"vars": {
"messages": [
{
"user": "清水太郎在哪里"
}
]
},
"assert": [
{
"type": "regex",
"value": "\\[TOOL_CALL\\].*find_employee_location"
}
],
"options": {
"provider": {
"text": {
"id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
"config": {
"apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
"apiBaseUrl": "https://one.felo.me/v1",
"apiVersion": "2024-02-01"
}
}
}
},
"metadata": {}
},
"testIdx": 1,
"vars": {
"messages": [
{
"user": "清水太郎在哪里"
}
]
},
"metadata": {
"_promptfooFileMetadata": {}
},
"failureReason": 0
}
],
"stats": {
"successes": 2,
"failures": 0,
"errors": 0,
"tokenUsage": {
"prompt": 0,
"completion": 0,
"cached": 1877,
"total": 1877,
"numRequests": 2,
"completionDetails": {
"reasoning": 0,
"acceptedPrediction": 0,
"rejectedPrediction": 0
},
"assertions": {
"total": 808,
"prompt": 0,
"completion": 0,
"cached": 808,
"numRequests": 0,
"completionDetails": {
"reasoning": 0,
"acceptedPrediction": 0,
"rejectedPrediction": 0
}
}
}
}
},
"config": {
"tags": {},
"description": "Novare Test",
"prompts": [
"file:///Users/moshui/Documents/felo/qwen-agent/promptfoo/prompt.json"
],
"providers": [
{
"id": "openai:chat:qwen3",
"config": {
"apiBaseUrl": "https://catalog-agent-dev.gbase.ai/api/v2",
"apiKey": "a21c99620a8ef61d69563afe05ccce89",
"passthrough": {
"bot_id": "63069654-7750-409d-9a58-a0960d899a20",
"tool_response": true,
"language": "zh"
}
}
}
],
"tests": [
{
"description": "通知清水太郎明天上午8点开会",
"vars": {
"messages": [
{
"user": "通知清水太郎明天上午8点开会"
}
]
},
"assert": [
{
"type": "llm-rubric",
"value": "询问是否确认发送"
}
]
},
{
"description": "清水太郎在哪里",
"vars": {
"messages": [
{
"user": "清水太郎在哪里"
}
]
},
"assert": [
{
"type": "regex",
"value": "\\[TOOL_CALL\\].*find_employee_location"
}
]
}
],
"scenarios": [],
"env": {},
"sharing": true,
"defaultTest": {
"options": {
"provider": {
"text": {
"id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
"config": {
"apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
"apiBaseUrl": "https://one.felo.me/v1",
"apiVersion": "2024-02-01"
}
}
}
},
"vars": {},
"assert": [],
"metadata": {}
},
"outputPath": [
"result.json"
],
"extensions": [],
"metadata": {}
},
"shareableUrl": null,
"metadata": {
"promptfooVersion": "0.117.11",
"nodeVersion": "v20.10.0",
"platform": "darwin",
"arch": "x64",
"exportedAt": "2025-12-04T11:24:00.162Z",
"evaluationCreatedAt": "2025-12-04T11:23:59.637Z"
}
}

View File

@ -57,6 +57,25 @@ def append_user_last_message(messages: list, content: str) -> bool:
messages[-1]['content'] += content
return messages
def append_assistant_last_message(messages: list, content: str) -> bool:
"""向最后一条用户消息追加内容
Args:
messages: 消息列表
content: 要追加的内容
condition: 可选条件如果提供则检查消息角色是否匹配此条件
Returns:
bool: 是否成功追加内容
"""
if not messages or len(messages) == 0:
return messages
last_message = messages[-1]
if last_message and last_message.get('role') == 'assistant':
messages[-1]['content'] += content
else:
messages.append({"role":"assistant","content":content})
return messages
async def process_guidelines_and_terms(
@ -243,11 +262,12 @@ async def enhanced_generate_stream_response(
# 准备最终的消息
final_messages = messages.copy()
final_messages = append_user_last_message(final_messages, f"\n\nlanguage:{get_language_text(language)}")
if guideline_reasoning:
# 用###分割guideline_reasoning取最后一段作为Guidelines
final_messages = append_user_last_message(final_messages, f"\n\nGuidelines:\n{guideline_reasoning}\nPlease follow these guidelines step by step.")
guidelines_text = guideline_reasoning.split('###')[-1].strip() if guideline_reasoning else ""
final_messages = append_assistant_last_message(final_messages, f"language:{get_language_text(language)}\n\nGuidelines:\n{guidelines_text}\n I will follow these guidelines step by step.")
else:
final_messages = append_assistant_last_message(final_messages, f"\n\nlanguage:{get_language_text(language)}")
# 第三阶段agent响应流式传输
logger.info(f"Starting agent stream response")
@ -358,12 +378,14 @@ async def create_agent_and_generate_response(
# 准备最终的消息
final_messages = messages.copy()
final_messages = append_user_last_message(final_messages, f"\n\nlanguage:{get_language_text(language)}")
pre_message_list = []
if guideline_reasoning:
# 用###分割guideline_reasoning取最后一段作为Guidelines
final_messages = append_user_last_message(final_messages, f"\n\nGuidelines:\n{guideline_reasoning}\nPlease follow these guidelines step by step.")
guidelines_text = guideline_reasoning.split('###')[-1].strip() if guideline_reasoning else ""
final_messages = append_assistant_last_message(final_messages, f"language:{get_language_text(language)}\n\nGuidelines:\n{guidelines_text}\n I will follow these guidelines step by step.")
pre_message_list.append({"role": "assistant","reasoning_content": guideline_reasoning+ "\n"})
else:
final_messages = append_assistant_last_message(final_messages, f"\n\nlanguage:{get_language_text(language)}")
# 非流式响应
agent_responses = agent.run_nonstream(final_messages)

View File

@ -284,19 +284,24 @@ def format_messages_to_chat_history(messages: List[Dict[str, str]]) -> str:
Returns:
str: 格式化的聊天记录
"""
# 只取最后的6句消息
# 只取最后的15句消息
chat_history = []
for message in messages:
role = message.get('role', '')
content = message.get('content', '')
if len(content) > 0:
if role == 'user':
chat_history.append(f"user: {content}")
elif role == 'assistant':
if role == 'user':
chat_history.append(f"user: {content}")
elif role == FUNCTION:
chat_history.append(f"function_response: {content}")
elif role == ASSISTANT:
if len(content) >0:
chat_history.append(f"assistant: {content}")
if message.get('function_call'):
chat_history.append(f"function_call: {message.get('function_call').get('name')} ")
chat_history.append(f"{message.get('function_call').get('arguments')}")
recent_chat_history = chat_history[-6:] if len(chat_history) > 6 else chat_history
recent_chat_history = chat_history[-15:] if len(chat_history) > 15 else chat_history
print(f"recent_chat_history:{recent_chat_history}")
return "\n".join(recent_chat_history)