merge
This commit is contained in:
commit
d3a4a84713
@ -158,7 +158,7 @@ class ModifiedAssistant(Assistant):
|
||||
response = []
|
||||
|
||||
# 保持原有的最大调用次数限制
|
||||
total_num_llm_calls_available = self.MAX_LLM_CALL_PER_RUN if hasattr(self, 'MAX_LLM_CALL_PER_RUN') else 10
|
||||
total_num_llm_calls_available = self.MAX_LLM_CALL_PER_RUN if hasattr(self, 'MAX_LLM_CALL_PER_RUN') else 50
|
||||
num_llm_calls_available = total_num_llm_calls_available
|
||||
while num_llm_calls_available > 0:
|
||||
num_llm_calls_available -= 1
|
||||
|
||||
@ -18,7 +18,7 @@
|
||||
- **优先级排序**: 确定任务执行顺序
|
||||
|
||||
### 3. 计划阶段 (Planning)
|
||||
- **步骤制定**: 详细的执行步骤,步骤数量根据实际需求调整,每个步骤应具体可执行
|
||||
- **步骤制定**: 只需要列出详细的工具调用步骤即可,不需要考虑回复用户的话术,步骤数量根据实际需求调整。
|
||||
|
||||
请按照上述思考框架进行完整分析,确保理解目标、分析问题和制定计划,
|
||||
---
|
||||
@ -54,4 +54,14 @@
|
||||
## 输出格式
|
||||
按顺序输出执行步骤:
|
||||
|
||||
[按1,2,3...n顺序列出具体可执行的步骤]
|
||||
### 🎯 核心目标
|
||||
[简洁描述用户的主要目标]
|
||||
|
||||
### 📋 现状分析
|
||||
- **关键信息**: [从聊天记录中提取的重要信息]
|
||||
- **限制条件**: [需要遵守的规则和约束]
|
||||
- **可用资源**: [可以利用的工具和资源]
|
||||
|
||||
### ⚡ 计划阶段
|
||||
**执行步骤**:
|
||||
[按1,2,3...n顺序列出执行步骤]
|
||||
|
||||
44
promptfoo/conversation/config.yaml
Normal file
44
promptfoo/conversation/config.yaml
Normal file
@ -0,0 +1,44 @@
|
||||
env: {}
|
||||
description: Novare Test
|
||||
providers:
|
||||
- id: openai:chat:qwen3
|
||||
config:
|
||||
apiBaseUrl: https://catalog-agent-dev.gbase.ai/api/v2
|
||||
apiKey: a21c99620a8ef61d69563afe05ccce89
|
||||
passthrough:
|
||||
bot_id: 63069654-7750-409d-9a58-a0960d899a20
|
||||
tool_response: true
|
||||
language: zh
|
||||
prompts:
|
||||
- file://prompt.json
|
||||
nunjucksFilters:
|
||||
encode: ./encode.js
|
||||
tests:
|
||||
- vars:
|
||||
question: 清水太郎在哪里
|
||||
assert:
|
||||
- type: regex
|
||||
value: \[TOOL_CALL\].*find_employee_location
|
||||
- vars:
|
||||
question: 通知他明天上午8点开会
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
value: 询问是否确认发送
|
||||
- vars:
|
||||
question: 确认
|
||||
assert:
|
||||
- type: regex
|
||||
value: \[TOOL_CALL\].*wowtalk_send_message_to_member
|
||||
|
||||
defaultTest:
|
||||
options:
|
||||
provider:
|
||||
text:
|
||||
id: openai:chat:qwen/qwen3-next-80b-a3b-instruct
|
||||
config:
|
||||
apiKey: sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26
|
||||
apiBaseUrl: https://one.felo.me/v1
|
||||
apiVersion: '2024-02-01'
|
||||
evaluateOptions: {}
|
||||
writeLatestResults: true
|
||||
sharing: true
|
||||
11
promptfoo/conversation/encode.js
Normal file
11
promptfoo/conversation/encode.js
Normal file
@ -0,0 +1,11 @@
|
||||
module.exports = function (str) {
|
||||
if (typeof str !== 'string') {
|
||||
return str;
|
||||
}
|
||||
return str
|
||||
.replace(/\\/g, '\\\\') // 转义反斜杠
|
||||
.replace(/"/g, '\\"') // 转义双引号
|
||||
.replace(/\n/g, '\\n') // 转义换行符
|
||||
.replace(/\r/g, '\\r') // 转义回车符
|
||||
.replace(/\t/g, '\\t'); // 转义制表符
|
||||
};
|
||||
16
promptfoo/conversation/prompt.json
Normal file
16
promptfoo/conversation/prompt.json
Normal file
@ -0,0 +1,16 @@
|
||||
[
|
||||
{% for completion in _conversation %}
|
||||
{
|
||||
"role": "user",
|
||||
"content": "{{ completion.input | encode }}"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "{{ completion.output | encode }}"
|
||||
},
|
||||
{% endfor %}
|
||||
{
|
||||
"role": "user",
|
||||
"content": "{{ question | encode }}"
|
||||
}
|
||||
]
|
||||
612
promptfoo/conversation/result.json
Normal file
612
promptfoo/conversation/result.json
Normal file
@ -0,0 +1,612 @@
|
||||
{
|
||||
"evalId": "eval-l9e-2025-12-04T12:17:14",
|
||||
"results": {
|
||||
"version": 3,
|
||||
"timestamp": "2025-12-04T12:17:14.502Z",
|
||||
"prompts": [
|
||||
{
|
||||
"raw": "[\n{% for completion in _conversation %}\n {\n \"role\": \"user\",\n \"content\": \"{{ completion.input | encode }}\"\n },\n {\n \"role\": \"assistant\",\n \"content\": \"{{ completion.output | encode }}\"\n },\n{% endfor %}\n {\n \"role\": \"user\",\n \"content\": \"{{ question | encode }}\"\n }\n]\n",
|
||||
"label": "prompt.json: [\n{% for completion in _conversation %}\n {\n \"role\": \"user\",\n \"content\": \"{{ completion.input | encode }}\"\n },\n {\n \"role\": \"assistant\",\n \"content\": \"{{ completion.output | encode }}\"\n },\n{% endfor %}\n {\n \"role\": \"user\",\n \"content\": \"{{ question | encode }}\"\n }\n]\n",
|
||||
"id": "49ff9901f402b7e42593f2724f05794c15d1c1bbd5d7a46c3e58198f7bd02c7e",
|
||||
"provider": "openai:chat:qwen3",
|
||||
"metrics": {
|
||||
"score": 3,
|
||||
"testPassCount": 3,
|
||||
"testFailCount": 0,
|
||||
"testErrorCount": 0,
|
||||
"assertPassCount": 3,
|
||||
"assertFailCount": 0,
|
||||
"totalLatencyMs": 18196,
|
||||
"tokenUsage": {
|
||||
"prompt": 596,
|
||||
"completion": 2017,
|
||||
"cached": 709,
|
||||
"total": 3322,
|
||||
"numRequests": 3,
|
||||
"completionDetails": {
|
||||
"reasoning": 0,
|
||||
"acceptedPrediction": 0,
|
||||
"rejectedPrediction": 0
|
||||
},
|
||||
"assertions": {
|
||||
"total": 746,
|
||||
"prompt": 697,
|
||||
"completion": 49,
|
||||
"cached": 0,
|
||||
"numRequests": 0,
|
||||
"completionDetails": {
|
||||
"reasoning": 0,
|
||||
"acceptedPrediction": 0,
|
||||
"rejectedPrediction": 0
|
||||
}
|
||||
}
|
||||
},
|
||||
"namedScores": {},
|
||||
"namedScoresCount": {},
|
||||
"cost": 0
|
||||
}
|
||||
}
|
||||
],
|
||||
"results": [
|
||||
{
|
||||
"cost": 0,
|
||||
"gradingResult": {
|
||||
"pass": true,
|
||||
"score": 1,
|
||||
"reason": "All assertions passed",
|
||||
"namedScores": {},
|
||||
"tokensUsed": {
|
||||
"total": 0,
|
||||
"prompt": 0,
|
||||
"completion": 0,
|
||||
"cached": 0,
|
||||
"numRequests": 0
|
||||
},
|
||||
"componentResults": [
|
||||
{
|
||||
"pass": true,
|
||||
"score": 1,
|
||||
"reason": "Assertion passed",
|
||||
"assertion": {
|
||||
"type": "regex",
|
||||
"value": "\\[TOOL_CALL\\].*find_employee_location"
|
||||
}
|
||||
}
|
||||
],
|
||||
"assertion": null
|
||||
},
|
||||
"id": "7595e74d-334a-4ed9-b01c-8373f6e8dffc",
|
||||
"latencyMs": 12,
|
||||
"namedScores": {},
|
||||
"prompt": {
|
||||
"raw": "[\n\n {\n \"role\": \"user\",\n \"content\": \"清水太郎在哪里\"\n }\n]\n",
|
||||
"label": "prompt.json: [\n{% for completion in _conversation %}\n {\n \"role\": \"user\",\n \"content\": \"{{ completion.input | encode }}\"\n },\n {\n \"role\": \"assistant\",\n \"content\": \"{{ completion.output | encode }}\"\n },\n{% endfor %}\n {\n \"role\": \"user\",\n \"content\": \"{{ question | encode }}\"\n }\n]\n"
|
||||
},
|
||||
"promptId": "49ff9901f402b7e42593f2724f05794c15d1c1bbd5d7a46c3e58198f7bd02c7e",
|
||||
"promptIdx": 0,
|
||||
"provider": {
|
||||
"id": "openai:chat:qwen3",
|
||||
"label": ""
|
||||
},
|
||||
"response": {
|
||||
"output": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内,无法获取实时位置。",
|
||||
"tokenUsage": {
|
||||
"cached": 709,
|
||||
"total": 709
|
||||
},
|
||||
"cached": true,
|
||||
"finishReason": "stop"
|
||||
},
|
||||
"score": 1,
|
||||
"success": true,
|
||||
"testCase": {
|
||||
"vars": {
|
||||
"question": "清水太郎在哪里",
|
||||
"_conversation": []
|
||||
},
|
||||
"assert": [
|
||||
{
|
||||
"type": "regex",
|
||||
"value": "\\[TOOL_CALL\\].*find_employee_location"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"provider": {
|
||||
"text": {
|
||||
"id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
|
||||
"config": {
|
||||
"apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
|
||||
"apiBaseUrl": "https://one.felo.me/v1",
|
||||
"apiVersion": "2024-02-01"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"metadata": {}
|
||||
},
|
||||
"testIdx": 0,
|
||||
"vars": {
|
||||
"question": "清水太郎在哪里",
|
||||
"_conversation": []
|
||||
},
|
||||
"metadata": {
|
||||
"_promptfooFileMetadata": {}
|
||||
},
|
||||
"failureReason": 0
|
||||
},
|
||||
{
|
||||
"cost": 0,
|
||||
"gradingResult": {
|
||||
"pass": true,
|
||||
"score": 1,
|
||||
"reason": "All assertions passed",
|
||||
"namedScores": {},
|
||||
"tokensUsed": {
|
||||
"total": 746,
|
||||
"prompt": 697,
|
||||
"completion": 49,
|
||||
"cached": 0,
|
||||
"numRequests": 0
|
||||
},
|
||||
"componentResults": [
|
||||
{
|
||||
"assertion": {
|
||||
"type": "llm-rubric",
|
||||
"value": "询问是否确认发送"
|
||||
},
|
||||
"pass": true,
|
||||
"score": 1,
|
||||
"reason": "输出明确询问用户是否确认向清水邦英发送会议通知,符合'询问是否确认发送'的rubric要求。",
|
||||
"tokensUsed": {
|
||||
"total": 746,
|
||||
"prompt": 697,
|
||||
"completion": 49,
|
||||
"cached": 0,
|
||||
"numRequests": 0,
|
||||
"completionDetails": {
|
||||
"reasoning": 0,
|
||||
"acceptedPrediction": 0,
|
||||
"rejectedPrediction": 0
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"assertion": null
|
||||
},
|
||||
"id": "92bba056-d302-4c3b-ac01-deb76e9e74d4",
|
||||
"latencyMs": 9701,
|
||||
"namedScores": {},
|
||||
"prompt": {
|
||||
"raw": "[\n\n {\n \"role\": \"user\",\n \"content\": \"清水太郎在哪里\"\n },\n {\n \"role\": \"assistant\",\n \"content\": \"[THINK]\\n### 🎯 核心目标\\n查找名为“清水太郎”的人员位置。\\n\\n### 📋 现状分析\\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\\n- **限制条件**: \\n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\\n- **可用资源**: \\n - 工具:`Iot Control-find_employee_location`(人员检索工具)\\n\\n### ⚡ 计划阶段\\n**执行步骤**:\\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\\\"清水太郎\\\"`,查询该人员的当前位置。\\n\\n[TOOL_CALL] Iot Control-find_employee_location\\n{\\\"name\\\": \\\"清水太郎\\\"}\\n[TOOL_RESPONSE] Iot Control-find_employee_location\\n{\\\"matched_count\\\":1,\\\"results\\\":[{\\\"name\\\":\\\"清水 邦英\\\",\\\"wowtalk_id\\\":\\\"u226160\\\",\\\"sensor_id\\\":\\\"3246789\\\",\\\"confidence\\\":85,\\\"location_status\\\":\\\"not_in_range\\\",\\\"error_message\\\":\\\"Sensor不在测位范围内或未检测到位置信号\\\",\\\"coordinates\\\":null,\\\"location\\\":null}]}\\n[ANSWER]\\n清水邦英先生目前未在测位范围内,无法获取实时位置。\"\n },\n\n {\n \"role\": \"user\",\n \"content\": \"通知他明天上午8点开会\"\n }\n]\n",
|
||||
"label": "prompt.json: [\n{% for completion in _conversation %}\n {\n \"role\": \"user\",\n \"content\": \"{{ completion.input | encode }}\"\n },\n {\n \"role\": \"assistant\",\n \"content\": \"{{ completion.output | encode }}\"\n },\n{% endfor %}\n {\n \"role\": \"user\",\n \"content\": \"{{ question | encode }}\"\n }\n]\n"
|
||||
},
|
||||
"promptId": "49ff9901f402b7e42593f2724f05794c15d1c1bbd5d7a46c3e58198f7bd02c7e",
|
||||
"promptIdx": 0,
|
||||
"provider": {
|
||||
"id": "openai:chat:qwen3",
|
||||
"label": ""
|
||||
},
|
||||
"response": {
|
||||
"output": "[THINK]\n### 🎯 核心目标\n通知清水太郎明天上午8点开会,但系统返回的是“清水邦英”而非“清水太郎”,需确认人员身份并发送会议提醒。\n\n### 📋 现状分析\n- **关键信息**:\n - 用户查询“清水太郎”,但系统返回匹配人为“清水 邦英”(姓名相近但不完全一致)。\n - 清水邦英的传感器信号不在测位范围内,无法获取位置,但不影响消息通知。\n - 清水邦英的wowtalk_id为“u226160”,可用于发送消息。\n - 用户意图是发送“明天上午8点开会”的通知。\n- **限制条件**:\n - 名字不完全匹配(太郎 vs 邦英),可能存在输入误差,需谨慎确认。\n - 消息通知属于“需要确认”的操作(根据指南第4条),不可直接发送。\n - 仅当确认人员为同一人时,才可发送通知。\n- **可用资源**:\n - Iot Control-find_employee_location(查询人员信息)\n - Wowtalk tool-wowtalk_send_message_to_member(发送消息)\n - 可用的wowtalk_id:u226160(清水邦英)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用 Iot Control-find_employee_location,查询名称中包含“太郎”的员工,确认是否存在“清水太郎”。\n2. 若步骤1返回结果为空或无匹配项,则向用户发送确认请求:“您要通知的是‘清水邦英’(wowtalk_id: u226160)吗?系统未查到‘清水太郎’,是否确认向清水邦英发送‘明天上午8点开会’的通知?”\n3. 等待用户明确回复“确认”或“是”。\n4. 收到确认后,调用 Wowtalk tool-wowtalk_send_message_to_member,向 wowtalk_id: u226160 发送消息:“明天上午8点开会”。\n\n[ANSWER]\n您要通知的是“清水邦英”(wowtalk_id: u226160)吗?系统未查到“清水太郎”,是否确认向清水邦英发送“明天上午8点开会”的通知?",
|
||||
"tokenUsage": {
|
||||
"total": 1157,
|
||||
"prompt": 261,
|
||||
"completion": 896
|
||||
},
|
||||
"cached": false,
|
||||
"finishReason": "stop"
|
||||
},
|
||||
"score": 1,
|
||||
"success": true,
|
||||
"testCase": {
|
||||
"vars": {
|
||||
"question": "通知他明天上午8点开会",
|
||||
"_conversation": [
|
||||
{
|
||||
"prompt": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "清水太郎在哪里"
|
||||
}
|
||||
],
|
||||
"input": "清水太郎在哪里",
|
||||
"output": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内,无法获取实时位置。"
|
||||
},
|
||||
{
|
||||
"prompt": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "清水太郎在哪里"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内,无法获取实时位置。"
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "通知他明天上午8点开会"
|
||||
}
|
||||
],
|
||||
"input": "通知他明天上午8点开会",
|
||||
"output": "[THINK]\n### 🎯 核心目标\n通知清水太郎明天上午8点开会,但系统返回的是“清水邦英”而非“清水太郎”,需确认人员身份并发送会议提醒。\n\n### 📋 现状分析\n- **关键信息**:\n - 用户查询“清水太郎”,但系统返回匹配人为“清水 邦英”(姓名相近但不完全一致)。\n - 清水邦英的传感器信号不在测位范围内,无法获取位置,但不影响消息通知。\n - 清水邦英的wowtalk_id为“u226160”,可用于发送消息。\n - 用户意图是发送“明天上午8点开会”的通知。\n- **限制条件**:\n - 名字不完全匹配(太郎 vs 邦英),可能存在输入误差,需谨慎确认。\n - 消息通知属于“需要确认”的操作(根据指南第4条),不可直接发送。\n - 仅当确认人员为同一人时,才可发送通知。\n- **可用资源**:\n - Iot Control-find_employee_location(查询人员信息)\n - Wowtalk tool-wowtalk_send_message_to_member(发送消息)\n - 可用的wowtalk_id:u226160(清水邦英)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用 Iot Control-find_employee_location,查询名称中包含“太郎”的员工,确认是否存在“清水太郎”。\n2. 若步骤1返回结果为空或无匹配项,则向用户发送确认请求:“您要通知的是‘清水邦英’(wowtalk_id: u226160)吗?系统未查到‘清水太郎’,是否确认向清水邦英发送‘明天上午8点开会’的通知?”\n3. 等待用户明确回复“确认”或“是”。\n4. 收到确认后,调用 Wowtalk tool-wowtalk_send_message_to_member,向 wowtalk_id: u226160 发送消息:“明天上午8点开会”。\n\n[ANSWER]\n您要通知的是“清水邦英”(wowtalk_id: u226160)吗?系统未查到“清水太郎”,是否确认向清水邦英发送“明天上午8点开会”的通知?"
|
||||
}
|
||||
]
|
||||
},
|
||||
"assert": [
|
||||
{
|
||||
"type": "llm-rubric",
|
||||
"value": "询问是否确认发送"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"provider": {
|
||||
"text": {
|
||||
"id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
|
||||
"config": {
|
||||
"apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
|
||||
"apiBaseUrl": "https://one.felo.me/v1",
|
||||
"apiVersion": "2024-02-01"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"metadata": {}
|
||||
},
|
||||
"testIdx": 1,
|
||||
"vars": {
|
||||
"question": "通知他明天上午8点开会",
|
||||
"_conversation": [
|
||||
{
|
||||
"prompt": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "清水太郎在哪里"
|
||||
}
|
||||
],
|
||||
"input": "清水太郎在哪里",
|
||||
"output": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内,无法获取实时位置。"
|
||||
},
|
||||
{
|
||||
"prompt": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "清水太郎在哪里"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内,无法获取实时位置。"
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "通知他明天上午8点开会"
|
||||
}
|
||||
],
|
||||
"input": "通知他明天上午8点开会",
|
||||
"output": "[THINK]\n### 🎯 核心目标\n通知清水太郎明天上午8点开会,但系统返回的是“清水邦英”而非“清水太郎”,需确认人员身份并发送会议提醒。\n\n### 📋 现状分析\n- **关键信息**:\n - 用户查询“清水太郎”,但系统返回匹配人为“清水 邦英”(姓名相近但不完全一致)。\n - 清水邦英的传感器信号不在测位范围内,无法获取位置,但不影响消息通知。\n - 清水邦英的wowtalk_id为“u226160”,可用于发送消息。\n - 用户意图是发送“明天上午8点开会”的通知。\n- **限制条件**:\n - 名字不完全匹配(太郎 vs 邦英),可能存在输入误差,需谨慎确认。\n - 消息通知属于“需要确认”的操作(根据指南第4条),不可直接发送。\n - 仅当确认人员为同一人时,才可发送通知。\n- **可用资源**:\n - Iot Control-find_employee_location(查询人员信息)\n - Wowtalk tool-wowtalk_send_message_to_member(发送消息)\n - 可用的wowtalk_id:u226160(清水邦英)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用 Iot Control-find_employee_location,查询名称中包含“太郎”的员工,确认是否存在“清水太郎”。\n2. 若步骤1返回结果为空或无匹配项,则向用户发送确认请求:“您要通知的是‘清水邦英’(wowtalk_id: u226160)吗?系统未查到‘清水太郎’,是否确认向清水邦英发送‘明天上午8点开会’的通知?”\n3. 等待用户明确回复“确认”或“是”。\n4. 收到确认后,调用 Wowtalk tool-wowtalk_send_message_to_member,向 wowtalk_id: u226160 发送消息:“明天上午8点开会”。\n\n[ANSWER]\n您要通知的是“清水邦英”(wowtalk_id: u226160)吗?系统未查到“清水太郎”,是否确认向清水邦英发送“明天上午8点开会”的通知?"
|
||||
}
|
||||
]
|
||||
},
|
||||
"metadata": {
|
||||
"_promptfooFileMetadata": {}
|
||||
},
|
||||
"failureReason": 0
|
||||
},
|
||||
{
|
||||
"cost": 0,
|
||||
"gradingResult": {
|
||||
"pass": true,
|
||||
"score": 1,
|
||||
"reason": "All assertions passed",
|
||||
"namedScores": {},
|
||||
"tokensUsed": {
|
||||
"total": 0,
|
||||
"prompt": 0,
|
||||
"completion": 0,
|
||||
"cached": 0,
|
||||
"numRequests": 0
|
||||
},
|
||||
"componentResults": [
|
||||
{
|
||||
"pass": true,
|
||||
"score": 1,
|
||||
"reason": "Assertion passed",
|
||||
"assertion": {
|
||||
"type": "regex",
|
||||
"value": "\\[TOOL_CALL\\].*wowtalk_send_message_to_member"
|
||||
}
|
||||
}
|
||||
],
|
||||
"assertion": null
|
||||
},
|
||||
"id": "647361f4-33c5-4f1a-aa29-edb5c0f51841",
|
||||
"latencyMs": 8483,
|
||||
"namedScores": {},
|
||||
"prompt": {
|
||||
"raw": "[\n\n {\n \"role\": \"user\",\n \"content\": \"清水太郎在哪里\"\n },\n {\n \"role\": \"assistant\",\n \"content\": \"[THINK]\\n### 🎯 核心目标\\n查找名为“清水太郎”的人员位置。\\n\\n### 📋 现状分析\\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\\n- **限制条件**: \\n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\\n- **可用资源**: \\n - 工具:`Iot Control-find_employee_location`(人员检索工具)\\n\\n### ⚡ 计划阶段\\n**执行步骤**:\\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\\\"清水太郎\\\"`,查询该人员的当前位置。\\n\\n[TOOL_CALL] Iot Control-find_employee_location\\n{\\\"name\\\": \\\"清水太郎\\\"}\\n[TOOL_RESPONSE] Iot Control-find_employee_location\\n{\\\"matched_count\\\":1,\\\"results\\\":[{\\\"name\\\":\\\"清水 邦英\\\",\\\"wowtalk_id\\\":\\\"u226160\\\",\\\"sensor_id\\\":\\\"3246789\\\",\\\"confidence\\\":85,\\\"location_status\\\":\\\"not_in_range\\\",\\\"error_message\\\":\\\"Sensor不在测位范围内或未检测到位置信号\\\",\\\"coordinates\\\":null,\\\"location\\\":null}]}\\n[ANSWER]\\n清水邦英先生目前未在测位范围内,无法获取实时位置。\"\n },\n\n {\n \"role\": \"user\",\n \"content\": \"通知他明天上午8点开会\"\n },\n {\n \"role\": \"assistant\",\n \"content\": \"[THINK]\\n### 🎯 核心目标\\n通知清水太郎明天上午8点开会,但系统返回的是“清水邦英”而非“清水太郎”,需确认人员身份并发送会议提醒。\\n\\n### 📋 现状分析\\n- **关键信息**:\\n - 用户查询“清水太郎”,但系统返回匹配人为“清水 邦英”(姓名相近但不完全一致)。\\n - 清水邦英的传感器信号不在测位范围内,无法获取位置,但不影响消息通知。\\n - 清水邦英的wowtalk_id为“u226160”,可用于发送消息。\\n - 用户意图是发送“明天上午8点开会”的通知。\\n- **限制条件**:\\n - 名字不完全匹配(太郎 vs 邦英),可能存在输入误差,需谨慎确认。\\n - 消息通知属于“需要确认”的操作(根据指南第4条),不可直接发送。\\n - 仅当确认人员为同一人时,才可发送通知。\\n- **可用资源**:\\n - Iot Control-find_employee_location(查询人员信息)\\n - Wowtalk tool-wowtalk_send_message_to_member(发送消息)\\n - 可用的wowtalk_id:u226160(清水邦英)\\n\\n### ⚡ 计划阶段\\n**执行步骤**:\\n1. 调用 Iot Control-find_employee_location,查询名称中包含“太郎”的员工,确认是否存在“清水太郎”。\\n2. 若步骤1返回结果为空或无匹配项,则向用户发送确认请求:“您要通知的是‘清水邦英’(wowtalk_id: u226160)吗?系统未查到‘清水太郎’,是否确认向清水邦英发送‘明天上午8点开会’的通知?”\\n3. 等待用户明确回复“确认”或“是”。\\n4. 收到确认后,调用 Wowtalk tool-wowtalk_send_message_to_member,向 wowtalk_id: u226160 发送消息:“明天上午8点开会”。\\n\\n[ANSWER]\\n您要通知的是“清水邦英”(wowtalk_id: u226160)吗?系统未查到“清水太郎”,是否确认向清水邦英发送“明天上午8点开会”的通知?\"\n },\n\n {\n \"role\": \"user\",\n \"content\": \"确认\"\n }\n]\n",
|
||||
"label": "prompt.json: [\n{% for completion in _conversation %}\n {\n \"role\": \"user\",\n \"content\": \"{{ completion.input | encode }}\"\n },\n {\n \"role\": \"assistant\",\n \"content\": \"{{ completion.output | encode }}\"\n },\n{% endfor %}\n {\n \"role\": \"user\",\n \"content\": \"{{ question | encode }}\"\n }\n]\n"
|
||||
},
|
||||
"promptId": "49ff9901f402b7e42593f2724f05794c15d1c1bbd5d7a46c3e58198f7bd02c7e",
|
||||
"promptIdx": 0,
|
||||
"provider": {
|
||||
"id": "openai:chat:qwen3",
|
||||
"label": ""
|
||||
},
|
||||
"response": {
|
||||
"output": "[THINK]\n### 🎯 核心目标\n向员工清水邦英(wowtalk_id: u226160)发送明天上午8点开会的通知。\n\n### 📋 现状分析\n- **关键信息**: \n - 用户最初询问“清水太郎”,系统返回“清水邦英”为唯一匹配人(confidence: 85%)。\n - 用户已确认“清水邦英”为通知对象,并明确指令:“通知他明天上午8点开会”。\n - 清水邦英的wowtalk_id为u226160,具备消息通知的唯一身份标识。\n- **限制条件**:\n - 根据【消息通知】规范,发送通知前必须获得用户明确确认,当前用户已确认,可直接执行。\n - 仅可使用授权工具完成操作,不得使用非指定工具。\n - 通知内容需精确传达,不得增删语义。\n- **可用资源**:\n - Wowtalk tool-wowtalk_send_message_to_member:用于向指定wowtalk_id发送消息。\n - Iot Control-find_employee_location:已使用并确认人员身份,无需重复调用。\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 Wowtalk tool-wowtalk_send_message_to_member,参数为:\n - wowtalk_id: \"u226160\"\n - message: \"明天上午8点开会\"\n\n[TOOL_CALL] Wowtalk tool-wowtalk_send_message_to_member\n{\"to_account\": \"u226160\", \"message_content\": \"明天上午8点开会\"}\n[TOOL_RESPONSE] Wowtalk tool-wowtalk_send_message_to_member\n{\"success\":false,\"error\":\"API调用失败: 个人账户不存在\",\"error_code\":108,\"error_type\":\"WowTalkAPIError\",\"response_data\":{\"result_code\":108,\"api_version\":\"1.0\",\"request_id\":\"shimztest19-1764850653920\",\"message\":\"The user does not exist.\",\"response_data\":\"\"},\"bot_id\":\"63069654-7750-409d-9a58-a0960d899a20\"}\n[ANSWER]\n通知失败:该用户在 WowTalk 系统中不存在,请确认员工信息或检查账号是否正确。",
|
||||
"tokenUsage": {
|
||||
"total": 1456,
|
||||
"prompt": 335,
|
||||
"completion": 1121
|
||||
},
|
||||
"cached": false,
|
||||
"finishReason": "stop"
|
||||
},
|
||||
"score": 1,
|
||||
"success": true,
|
||||
"testCase": {
|
||||
"vars": {
|
||||
"question": "确认",
|
||||
"_conversation": [
|
||||
{
|
||||
"prompt": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "清水太郎在哪里"
|
||||
}
|
||||
],
|
||||
"input": "清水太郎在哪里",
|
||||
"output": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内,无法获取实时位置。"
|
||||
},
|
||||
{
|
||||
"prompt": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "清水太郎在哪里"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内,无法获取实时位置。"
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "通知他明天上午8点开会"
|
||||
}
|
||||
],
|
||||
"input": "通知他明天上午8点开会",
|
||||
"output": "[THINK]\n### 🎯 核心目标\n通知清水太郎明天上午8点开会,但系统返回的是“清水邦英”而非“清水太郎”,需确认人员身份并发送会议提醒。\n\n### 📋 现状分析\n- **关键信息**:\n - 用户查询“清水太郎”,但系统返回匹配人为“清水 邦英”(姓名相近但不完全一致)。\n - 清水邦英的传感器信号不在测位范围内,无法获取位置,但不影响消息通知。\n - 清水邦英的wowtalk_id为“u226160”,可用于发送消息。\n - 用户意图是发送“明天上午8点开会”的通知。\n- **限制条件**:\n - 名字不完全匹配(太郎 vs 邦英),可能存在输入误差,需谨慎确认。\n - 消息通知属于“需要确认”的操作(根据指南第4条),不可直接发送。\n - 仅当确认人员为同一人时,才可发送通知。\n- **可用资源**:\n - Iot Control-find_employee_location(查询人员信息)\n - Wowtalk tool-wowtalk_send_message_to_member(发送消息)\n - 可用的wowtalk_id:u226160(清水邦英)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用 Iot Control-find_employee_location,查询名称中包含“太郎”的员工,确认是否存在“清水太郎”。\n2. 若步骤1返回结果为空或无匹配项,则向用户发送确认请求:“您要通知的是‘清水邦英’(wowtalk_id: u226160)吗?系统未查到‘清水太郎’,是否确认向清水邦英发送‘明天上午8点开会’的通知?”\n3. 等待用户明确回复“确认”或“是”。\n4. 收到确认后,调用 Wowtalk tool-wowtalk_send_message_to_member,向 wowtalk_id: u226160 发送消息:“明天上午8点开会”。\n\n[ANSWER]\n您要通知的是“清水邦英”(wowtalk_id: u226160)吗?系统未查到“清水太郎”,是否确认向清水邦英发送“明天上午8点开会”的通知?"
|
||||
},
|
||||
{
|
||||
"prompt": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "清水太郎在哪里"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内,无法获取实时位置。"
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "通知他明天上午8点开会"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "[THINK]\n### 🎯 核心目标\n通知清水太郎明天上午8点开会,但系统返回的是“清水邦英”而非“清水太郎”,需确认人员身份并发送会议提醒。\n\n### 📋 现状分析\n- **关键信息**:\n - 用户查询“清水太郎”,但系统返回匹配人为“清水 邦英”(姓名相近但不完全一致)。\n - 清水邦英的传感器信号不在测位范围内,无法获取位置,但不影响消息通知。\n - 清水邦英的wowtalk_id为“u226160”,可用于发送消息。\n - 用户意图是发送“明天上午8点开会”的通知。\n- **限制条件**:\n - 名字不完全匹配(太郎 vs 邦英),可能存在输入误差,需谨慎确认。\n - 消息通知属于“需要确认”的操作(根据指南第4条),不可直接发送。\n - 仅当确认人员为同一人时,才可发送通知。\n- **可用资源**:\n - Iot Control-find_employee_location(查询人员信息)\n - Wowtalk tool-wowtalk_send_message_to_member(发送消息)\n - 可用的wowtalk_id:u226160(清水邦英)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用 Iot Control-find_employee_location,查询名称中包含“太郎”的员工,确认是否存在“清水太郎”。\n2. 若步骤1返回结果为空或无匹配项,则向用户发送确认请求:“您要通知的是‘清水邦英’(wowtalk_id: u226160)吗?系统未查到‘清水太郎’,是否确认向清水邦英发送‘明天上午8点开会’的通知?”\n3. 等待用户明确回复“确认”或“是”。\n4. 收到确认后,调用 Wowtalk tool-wowtalk_send_message_to_member,向 wowtalk_id: u226160 发送消息:“明天上午8点开会”。\n\n[ANSWER]\n您要通知的是“清水邦英”(wowtalk_id: u226160)吗?系统未查到“清水太郎”,是否确认向清水邦英发送“明天上午8点开会”的通知?"
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "确认"
|
||||
}
|
||||
],
|
||||
"input": "确认",
|
||||
"output": "[THINK]\n### 🎯 核心目标\n向员工清水邦英(wowtalk_id: u226160)发送明天上午8点开会的通知。\n\n### 📋 现状分析\n- **关键信息**: \n - 用户最初询问“清水太郎”,系统返回“清水邦英”为唯一匹配人(confidence: 85%)。\n - 用户已确认“清水邦英”为通知对象,并明确指令:“通知他明天上午8点开会”。\n - 清水邦英的wowtalk_id为u226160,具备消息通知的唯一身份标识。\n- **限制条件**:\n - 根据【消息通知】规范,发送通知前必须获得用户明确确认,当前用户已确认,可直接执行。\n - 仅可使用授权工具完成操作,不得使用非指定工具。\n - 通知内容需精确传达,不得增删语义。\n- **可用资源**:\n - Wowtalk tool-wowtalk_send_message_to_member:用于向指定wowtalk_id发送消息。\n - Iot Control-find_employee_location:已使用并确认人员身份,无需重复调用。\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 Wowtalk tool-wowtalk_send_message_to_member,参数为:\n - wowtalk_id: \"u226160\"\n - message: \"明天上午8点开会\"\n\n[TOOL_CALL] Wowtalk tool-wowtalk_send_message_to_member\n{\"to_account\": \"u226160\", \"message_content\": \"明天上午8点开会\"}\n[TOOL_RESPONSE] Wowtalk tool-wowtalk_send_message_to_member\n{\"success\":false,\"error\":\"API调用失败: 个人账户不存在\",\"error_code\":108,\"error_type\":\"WowTalkAPIError\",\"response_data\":{\"result_code\":108,\"api_version\":\"1.0\",\"request_id\":\"shimztest19-1764850653920\",\"message\":\"The user does not exist.\",\"response_data\":\"\"},\"bot_id\":\"63069654-7750-409d-9a58-a0960d899a20\"}\n[ANSWER]\n通知失败:该用户在 WowTalk 系统中不存在,请确认员工信息或检查账号是否正确。"
|
||||
}
|
||||
]
|
||||
},
|
||||
"assert": [
|
||||
{
|
||||
"type": "regex",
|
||||
"value": "\\[TOOL_CALL\\].*wowtalk_send_message_to_member"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"provider": {
|
||||
"text": {
|
||||
"id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
|
||||
"config": {
|
||||
"apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
|
||||
"apiBaseUrl": "https://one.felo.me/v1",
|
||||
"apiVersion": "2024-02-01"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"metadata": {}
|
||||
},
|
||||
"testIdx": 2,
|
||||
"vars": {
|
||||
"question": "确认",
|
||||
"_conversation": [
|
||||
{
|
||||
"prompt": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "清水太郎在哪里"
|
||||
}
|
||||
],
|
||||
"input": "清水太郎在哪里",
|
||||
"output": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内,无法获取实时位置。"
|
||||
},
|
||||
{
|
||||
"prompt": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "清水太郎在哪里"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内,无法获取实时位置。"
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "通知他明天上午8点开会"
|
||||
}
|
||||
],
|
||||
"input": "通知他明天上午8点开会",
|
||||
"output": "[THINK]\n### 🎯 核心目标\n通知清水太郎明天上午8点开会,但系统返回的是“清水邦英”而非“清水太郎”,需确认人员身份并发送会议提醒。\n\n### 📋 现状分析\n- **关键信息**:\n - 用户查询“清水太郎”,但系统返回匹配人为“清水 邦英”(姓名相近但不完全一致)。\n - 清水邦英的传感器信号不在测位范围内,无法获取位置,但不影响消息通知。\n - 清水邦英的wowtalk_id为“u226160”,可用于发送消息。\n - 用户意图是发送“明天上午8点开会”的通知。\n- **限制条件**:\n - 名字不完全匹配(太郎 vs 邦英),可能存在输入误差,需谨慎确认。\n - 消息通知属于“需要确认”的操作(根据指南第4条),不可直接发送。\n - 仅当确认人员为同一人时,才可发送通知。\n- **可用资源**:\n - Iot Control-find_employee_location(查询人员信息)\n - Wowtalk tool-wowtalk_send_message_to_member(发送消息)\n - 可用的wowtalk_id:u226160(清水邦英)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用 Iot Control-find_employee_location,查询名称中包含“太郎”的员工,确认是否存在“清水太郎”。\n2. 若步骤1返回结果为空或无匹配项,则向用户发送确认请求:“您要通知的是‘清水邦英’(wowtalk_id: u226160)吗?系统未查到‘清水太郎’,是否确认向清水邦英发送‘明天上午8点开会’的通知?”\n3. 等待用户明确回复“确认”或“是”。\n4. 收到确认后,调用 Wowtalk tool-wowtalk_send_message_to_member,向 wowtalk_id: u226160 发送消息:“明天上午8点开会”。\n\n[ANSWER]\n您要通知的是“清水邦英”(wowtalk_id: u226160)吗?系统未查到“清水太郎”,是否确认向清水邦英发送“明天上午8点开会”的通知?"
|
||||
},
|
||||
{
|
||||
"prompt": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "清水太郎在哪里"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内,无法获取实时位置。"
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "通知他明天上午8点开会"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "[THINK]\n### 🎯 核心目标\n通知清水太郎明天上午8点开会,但系统返回的是“清水邦英”而非“清水太郎”,需确认人员身份并发送会议提醒。\n\n### 📋 现状分析\n- **关键信息**:\n - 用户查询“清水太郎”,但系统返回匹配人为“清水 邦英”(姓名相近但不完全一致)。\n - 清水邦英的传感器信号不在测位范围内,无法获取位置,但不影响消息通知。\n - 清水邦英的wowtalk_id为“u226160”,可用于发送消息。\n - 用户意图是发送“明天上午8点开会”的通知。\n- **限制条件**:\n - 名字不完全匹配(太郎 vs 邦英),可能存在输入误差,需谨慎确认。\n - 消息通知属于“需要确认”的操作(根据指南第4条),不可直接发送。\n - 仅当确认人员为同一人时,才可发送通知。\n- **可用资源**:\n - Iot Control-find_employee_location(查询人员信息)\n - Wowtalk tool-wowtalk_send_message_to_member(发送消息)\n - 可用的wowtalk_id:u226160(清水邦英)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用 Iot Control-find_employee_location,查询名称中包含“太郎”的员工,确认是否存在“清水太郎”。\n2. 若步骤1返回结果为空或无匹配项,则向用户发送确认请求:“您要通知的是‘清水邦英’(wowtalk_id: u226160)吗?系统未查到‘清水太郎’,是否确认向清水邦英发送‘明天上午8点开会’的通知?”\n3. 等待用户明确回复“确认”或“是”。\n4. 收到确认后,调用 Wowtalk tool-wowtalk_send_message_to_member,向 wowtalk_id: u226160 发送消息:“明天上午8点开会”。\n\n[ANSWER]\n您要通知的是“清水邦英”(wowtalk_id: u226160)吗?系统未查到“清水太郎”,是否确认向清水邦英发送“明天上午8点开会”的通知?"
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "确认"
|
||||
}
|
||||
],
|
||||
"input": "确认",
|
||||
"output": "[THINK]\n### 🎯 核心目标\n向员工清水邦英(wowtalk_id: u226160)发送明天上午8点开会的通知。\n\n### 📋 现状分析\n- **关键信息**: \n - 用户最初询问“清水太郎”,系统返回“清水邦英”为唯一匹配人(confidence: 85%)。\n - 用户已确认“清水邦英”为通知对象,并明确指令:“通知他明天上午8点开会”。\n - 清水邦英的wowtalk_id为u226160,具备消息通知的唯一身份标识。\n- **限制条件**:\n - 根据【消息通知】规范,发送通知前必须获得用户明确确认,当前用户已确认,可直接执行。\n - 仅可使用授权工具完成操作,不得使用非指定工具。\n - 通知内容需精确传达,不得增删语义。\n- **可用资源**:\n - Wowtalk tool-wowtalk_send_message_to_member:用于向指定wowtalk_id发送消息。\n - Iot Control-find_employee_location:已使用并确认人员身份,无需重复调用。\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 Wowtalk tool-wowtalk_send_message_to_member,参数为:\n - wowtalk_id: \"u226160\"\n - message: \"明天上午8点开会\"\n\n[TOOL_CALL] Wowtalk tool-wowtalk_send_message_to_member\n{\"to_account\": \"u226160\", \"message_content\": \"明天上午8点开会\"}\n[TOOL_RESPONSE] Wowtalk tool-wowtalk_send_message_to_member\n{\"success\":false,\"error\":\"API调用失败: 个人账户不存在\",\"error_code\":108,\"error_type\":\"WowTalkAPIError\",\"response_data\":{\"result_code\":108,\"api_version\":\"1.0\",\"request_id\":\"shimztest19-1764850653920\",\"message\":\"The user does not exist.\",\"response_data\":\"\"},\"bot_id\":\"63069654-7750-409d-9a58-a0960d899a20\"}\n[ANSWER]\n通知失败:该用户在 WowTalk 系统中不存在,请确认员工信息或检查账号是否正确。"
|
||||
}
|
||||
]
|
||||
},
|
||||
"metadata": {
|
||||
"_promptfooFileMetadata": {}
|
||||
},
|
||||
"failureReason": 0
|
||||
}
|
||||
],
|
||||
"stats": {
|
||||
"successes": 3,
|
||||
"failures": 0,
|
||||
"errors": 0,
|
||||
"tokenUsage": {
|
||||
"prompt": 596,
|
||||
"completion": 2017,
|
||||
"cached": 709,
|
||||
"total": 3322,
|
||||
"numRequests": 3,
|
||||
"completionDetails": {
|
||||
"reasoning": 0,
|
||||
"acceptedPrediction": 0,
|
||||
"rejectedPrediction": 0
|
||||
},
|
||||
"assertions": {
|
||||
"total": 746,
|
||||
"prompt": 697,
|
||||
"completion": 49,
|
||||
"cached": 0,
|
||||
"numRequests": 0,
|
||||
"completionDetails": {
|
||||
"reasoning": 0,
|
||||
"acceptedPrediction": 0,
|
||||
"rejectedPrediction": 0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"config": {
|
||||
"tags": {},
|
||||
"description": "Novare Test",
|
||||
"prompts": [
|
||||
"file:///Users/moshui/Documents/felo/qwen-agent/promptfoo/conversation/prompt.json"
|
||||
],
|
||||
"providers": [
|
||||
{
|
||||
"id": "openai:chat:qwen3",
|
||||
"config": {
|
||||
"apiBaseUrl": "https://catalog-agent-dev.gbase.ai/api/v2",
|
||||
"apiKey": "a21c99620a8ef61d69563afe05ccce89",
|
||||
"passthrough": {
|
||||
"bot_id": "63069654-7750-409d-9a58-a0960d899a20",
|
||||
"tool_response": true,
|
||||
"language": "zh"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"tests": [
|
||||
{
|
||||
"vars": {
|
||||
"question": "清水太郎在哪里"
|
||||
},
|
||||
"assert": [
|
||||
{
|
||||
"type": "regex",
|
||||
"value": "\\[TOOL_CALL\\].*find_employee_location"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"vars": {
|
||||
"question": "通知他明天上午8点开会"
|
||||
},
|
||||
"assert": [
|
||||
{
|
||||
"type": "llm-rubric",
|
||||
"value": "询问是否确认发送"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"vars": {
|
||||
"question": "确认"
|
||||
},
|
||||
"assert": [
|
||||
{
|
||||
"type": "regex",
|
||||
"value": "\\[TOOL_CALL\\].*wowtalk_send_message_to_member"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"scenarios": [],
|
||||
"env": {},
|
||||
"sharing": true,
|
||||
"defaultTest": {
|
||||
"options": {
|
||||
"provider": {
|
||||
"text": {
|
||||
"id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
|
||||
"config": {
|
||||
"apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
|
||||
"apiBaseUrl": "https://one.felo.me/v1",
|
||||
"apiVersion": "2024-02-01"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"vars": {},
|
||||
"assert": [],
|
||||
"metadata": {}
|
||||
},
|
||||
"outputPath": [
|
||||
"result.json"
|
||||
],
|
||||
"extensions": [],
|
||||
"metadata": {}
|
||||
},
|
||||
"shareableUrl": null,
|
||||
"metadata": {
|
||||
"promptfooVersion": "0.117.11",
|
||||
"nodeVersion": "v20.10.0",
|
||||
"platform": "darwin",
|
||||
"arch": "x64",
|
||||
"exportedAt": "2025-12-04T12:17:36.718Z",
|
||||
"evaluationCreatedAt": "2025-12-04T12:17:14.502Z"
|
||||
}
|
||||
}
|
||||
32
promptfoo/query/config.yaml
Normal file
32
promptfoo/query/config.yaml
Normal file
@ -0,0 +1,32 @@
|
||||
env: {}
|
||||
description: Novare Test
|
||||
providers:
|
||||
- id: openai:chat:qwen3
|
||||
config:
|
||||
apiBaseUrl: https://catalog-agent-dev.gbase.ai/api/v2
|
||||
apiKey: a21c99620a8ef61d69563afe05ccce89
|
||||
passthrough:
|
||||
bot_id: 63069654-7750-409d-9a58-a0960d899a20
|
||||
tool_response: true
|
||||
language: zh
|
||||
prompts:
|
||||
- file://prompt.json
|
||||
tests:
|
||||
- vars:
|
||||
question: 940092の稼働状況
|
||||
assert:
|
||||
- type: regex
|
||||
value: \[TOOL_CALL\].*dxcore_get_device_status
|
||||
|
||||
defaultTest:
|
||||
options:
|
||||
provider:
|
||||
text:
|
||||
id: openai:chat:qwen/qwen3-next-80b-a3b-instruct
|
||||
config:
|
||||
apiKey: sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26
|
||||
apiBaseUrl: https://one.felo.me/v1
|
||||
apiVersion: '2024-02-01'
|
||||
evaluateOptions: {}
|
||||
writeLatestResults: true
|
||||
sharing: true
|
||||
6
promptfoo/query/prompt.json
Normal file
6
promptfoo/query/prompt.json
Normal file
@ -0,0 +1,6 @@
|
||||
[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "{{ question }}"
|
||||
}
|
||||
]
|
||||
235
promptfoo/query/result.json
Normal file
235
promptfoo/query/result.json
Normal file
@ -0,0 +1,235 @@
|
||||
{
|
||||
"evalId": "eval-rew-2025-12-04T11:49:09",
|
||||
"results": {
|
||||
"version": 3,
|
||||
"timestamp": "2025-12-04T11:49:09.282Z",
|
||||
"prompts": [
|
||||
{
|
||||
"raw": "[{\"role\":\"user\",\"content\":\"{{ question }}\"}]",
|
||||
"label": "prompt.json: [{\"role\":\"user\",\"content\":\"{{ question }}\"}]",
|
||||
"id": "7fd9d6ab1656b5f683dd7d34fc535754cd42291c7b78f2aa5fd68b3e43dae7b6",
|
||||
"provider": "openai:chat:qwen3",
|
||||
"metrics": {
|
||||
"score": 1,
|
||||
"testPassCount": 1,
|
||||
"testFailCount": 0,
|
||||
"testErrorCount": 0,
|
||||
"assertPassCount": 1,
|
||||
"assertFailCount": 0,
|
||||
"totalLatencyMs": 8731,
|
||||
"tokenUsage": {
|
||||
"prompt": 11,
|
||||
"completion": 1174,
|
||||
"cached": 0,
|
||||
"total": 1185,
|
||||
"numRequests": 1,
|
||||
"completionDetails": {
|
||||
"reasoning": 0,
|
||||
"acceptedPrediction": 0,
|
||||
"rejectedPrediction": 0
|
||||
},
|
||||
"assertions": {
|
||||
"total": 0,
|
||||
"prompt": 0,
|
||||
"completion": 0,
|
||||
"cached": 0,
|
||||
"numRequests": 0,
|
||||
"completionDetails": {
|
||||
"reasoning": 0,
|
||||
"acceptedPrediction": 0,
|
||||
"rejectedPrediction": 0
|
||||
}
|
||||
}
|
||||
},
|
||||
"namedScores": {},
|
||||
"namedScoresCount": {},
|
||||
"cost": 0
|
||||
}
|
||||
}
|
||||
],
|
||||
"results": [
|
||||
{
|
||||
"cost": 0,
|
||||
"gradingResult": {
|
||||
"pass": true,
|
||||
"score": 1,
|
||||
"reason": "All assertions passed",
|
||||
"namedScores": {},
|
||||
"tokensUsed": {
|
||||
"total": 0,
|
||||
"prompt": 0,
|
||||
"completion": 0,
|
||||
"cached": 0,
|
||||
"numRequests": 0
|
||||
},
|
||||
"componentResults": [
|
||||
{
|
||||
"pass": true,
|
||||
"score": 1,
|
||||
"reason": "Assertion passed",
|
||||
"assertion": {
|
||||
"type": "regex",
|
||||
"value": "\\[TOOL_CALL\\].*dxcore_get_device_status"
|
||||
}
|
||||
}
|
||||
],
|
||||
"assertion": null
|
||||
},
|
||||
"id": "ec8dff77-79da-499f-aee0-f40727fa844e",
|
||||
"latencyMs": 8731,
|
||||
"namedScores": {},
|
||||
"prompt": {
|
||||
"raw": "[\n {\n \"role\": \"user\",\n \"content\": \"940092の稼働状況\"\n }\n]",
|
||||
"label": "prompt.json: [{\"role\":\"user\",\"content\":\"{{ question }}\"}]"
|
||||
},
|
||||
"promptId": "7fd9d6ab1656b5f683dd7d34fc535754cd42291c7b78f2aa5fd68b3e43dae7b6",
|
||||
"promptIdx": 0,
|
||||
"provider": {
|
||||
"id": "openai:chat:qwen3",
|
||||
"label": ""
|
||||
},
|
||||
"response": {
|
||||
"output": "[THINK]\n### 🎯 核心目标\n查询编号为“940092”的设备的当前运行状态(如温度、湿度、运行模式等),以了解其稼働状況(工作状态)。\n\n### 📋 现状分析\n- **关键信息**: \n - 用户查询的是“940092”的稼働状況,该编号极可能为设备ID(如空调或照明设备)。 \n - 当前时间为2025年12月4日19:49,属于晚间时段,设备可能处于运行中。 \n - 用户未明确要求控制设备,仅要求查询状态,属于信息查询类请求。\n\n- **限制条件**: \n - 仅允许查询设备状态,不能擅自执行控制操作(如开关、调温等)。 \n - 必须使用指定工具,且需优先使用精准匹配的设备ID查询工具。 \n - 所有输出语言必须为中文。\n\n- **可用资源**: \n - **Iot Control-dxcore_get_device_status**:可通过设备ID直接查询设备当前状态(温度、湿度、运行模式、风量等)。 \n - **Iot Control-find_devices_by_room**:仅在设备ID无效或模糊时作为备选,但当前ID明确,无需使用。 \n - 无需调用人员、消息、天气、知识库或搜索工具,因查询目标明确为设备状态。\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 Iot Control-dxcore_get_device_status,传入参数 device_id: \"940092\",查询该设备的当前运行状态。\n\n[TOOL_CALL] Iot Control-dxcore_get_device_status\n{\"sensor_id\": 940092}\n[TOOL_RESPONSE] Iot Control-dxcore_get_device_status\n{\"success\":true,\"data\":[{\"Device.Info.Id\":940092,\"Device.Info.Type\":\"light\",\"Device.Info.Description\":\"A棟 2F コワーキングスペース①(丸)-01 照明\",\"Device.Info.Manufacturer\":\"Signify照明\",\"Device.Info.SerialNumber\":\"none\",\"Device.Info.Coordinates\":[0,0,0],\"Device.Info.FieldId\":\"\",\"Device.Status.Lighting\":3}],\"site_name\":\"shiomi\",\"device_type\":\"light\",\"sensor_id\":940092}\n[ANSWER]\nA栋2F协同空间①(丸)-01的照明设备运行正常,当前亮度为3级。",
|
||||
"tokenUsage": {
|
||||
"total": 1185,
|
||||
"prompt": 11,
|
||||
"completion": 1174
|
||||
},
|
||||
"cached": false,
|
||||
"finishReason": "stop"
|
||||
},
|
||||
"score": 1,
|
||||
"success": true,
|
||||
"testCase": {
|
||||
"vars": {
|
||||
"question": "940092の稼働状況"
|
||||
},
|
||||
"assert": [
|
||||
{
|
||||
"type": "regex",
|
||||
"value": "\\[TOOL_CALL\\].*dxcore_get_device_status"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"provider": {
|
||||
"text": {
|
||||
"id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
|
||||
"config": {
|
||||
"apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
|
||||
"apiBaseUrl": "https://one.felo.me/v1",
|
||||
"apiVersion": "2024-02-01"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"metadata": {}
|
||||
},
|
||||
"testIdx": 0,
|
||||
"vars": {
|
||||
"question": "940092の稼働状況"
|
||||
},
|
||||
"metadata": {
|
||||
"_promptfooFileMetadata": {}
|
||||
},
|
||||
"failureReason": 0
|
||||
}
|
||||
],
|
||||
"stats": {
|
||||
"successes": 1,
|
||||
"failures": 0,
|
||||
"errors": 0,
|
||||
"tokenUsage": {
|
||||
"prompt": 11,
|
||||
"completion": 1174,
|
||||
"cached": 0,
|
||||
"total": 1185,
|
||||
"numRequests": 1,
|
||||
"completionDetails": {
|
||||
"reasoning": 0,
|
||||
"acceptedPrediction": 0,
|
||||
"rejectedPrediction": 0
|
||||
},
|
||||
"assertions": {
|
||||
"total": 0,
|
||||
"prompt": 0,
|
||||
"completion": 0,
|
||||
"cached": 0,
|
||||
"numRequests": 0,
|
||||
"completionDetails": {
|
||||
"reasoning": 0,
|
||||
"acceptedPrediction": 0,
|
||||
"rejectedPrediction": 0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"config": {
|
||||
"tags": {},
|
||||
"description": "Novare Test",
|
||||
"prompts": [
|
||||
"file:///Users/moshui/Documents/felo/qwen-agent/promptfoo/query/prompt.json"
|
||||
],
|
||||
"providers": [
|
||||
{
|
||||
"id": "openai:chat:qwen3",
|
||||
"config": {
|
||||
"apiBaseUrl": "https://catalog-agent-dev.gbase.ai/api/v2",
|
||||
"apiKey": "a21c99620a8ef61d69563afe05ccce89",
|
||||
"passthrough": {
|
||||
"bot_id": "63069654-7750-409d-9a58-a0960d899a20",
|
||||
"tool_response": true,
|
||||
"language": "zh"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"tests": [
|
||||
{
|
||||
"vars": {
|
||||
"question": "940092の稼働状況"
|
||||
},
|
||||
"assert": [
|
||||
{
|
||||
"type": "regex",
|
||||
"value": "\\[TOOL_CALL\\].*dxcore_get_device_status"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"scenarios": [],
|
||||
"env": {},
|
||||
"sharing": true,
|
||||
"defaultTest": {
|
||||
"options": {
|
||||
"provider": {
|
||||
"text": {
|
||||
"id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
|
||||
"config": {
|
||||
"apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
|
||||
"apiBaseUrl": "https://one.felo.me/v1",
|
||||
"apiVersion": "2024-02-01"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"vars": {},
|
||||
"assert": [],
|
||||
"metadata": {}
|
||||
},
|
||||
"outputPath": [
|
||||
"result.json"
|
||||
],
|
||||
"extensions": [],
|
||||
"metadata": {}
|
||||
},
|
||||
"shareableUrl": null,
|
||||
"metadata": {
|
||||
"promptfooVersion": "0.117.11",
|
||||
"nodeVersion": "v20.10.0",
|
||||
"platform": "darwin",
|
||||
"arch": "x64",
|
||||
"exportedAt": "2025-12-04T11:49:18.166Z",
|
||||
"evaluationCreatedAt": "2025-12-04T11:49:09.282Z"
|
||||
}
|
||||
}
|
||||
369
promptfoo/result.json
Normal file
369
promptfoo/result.json
Normal file
@ -0,0 +1,369 @@
|
||||
{
|
||||
"evalId": "eval-0Pa-2025-12-04T11:23:59",
|
||||
"results": {
|
||||
"version": 3,
|
||||
"timestamp": "2025-12-04T11:23:59.637Z",
|
||||
"prompts": [
|
||||
{
|
||||
"raw": "[\n{% for message in messages %}\n {% set outer_loop = loop %}\n {% for role, content in message %}\n {\n \"role\": \"{{ role }}\",\n \"content\": \"{{ content }}\"\n }{% if not (loop.last and outer_loop.last) %},{% endif %}\n {% endfor %}\n{% endfor %}\n]\n",
|
||||
"label": "prompt.json: [\n{% for message in messages %}\n {% set outer_loop = loop %}\n {% for role, content in message %}\n {\n \"role\": \"{{ role }}\",\n \"content\": \"{{ content }}\"\n }{% if not (loop.last and outer_loop.last) %},{% endif %}\n {% endfor %}\n{% endfor %}\n]\n",
|
||||
"id": "31ba1cc91b4ca656f8bf91b71d90fd1c786f46919fd3e498accd274537581342",
|
||||
"provider": "openai:chat:qwen3",
|
||||
"metrics": {
|
||||
"score": 2,
|
||||
"testPassCount": 2,
|
||||
"testFailCount": 0,
|
||||
"testErrorCount": 0,
|
||||
"assertPassCount": 2,
|
||||
"assertFailCount": 0,
|
||||
"totalLatencyMs": 62,
|
||||
"tokenUsage": {
|
||||
"prompt": 0,
|
||||
"completion": 0,
|
||||
"cached": 1877,
|
||||
"total": 1877,
|
||||
"numRequests": 2,
|
||||
"completionDetails": {
|
||||
"reasoning": 0,
|
||||
"acceptedPrediction": 0,
|
||||
"rejectedPrediction": 0
|
||||
},
|
||||
"assertions": {
|
||||
"total": 808,
|
||||
"prompt": 0,
|
||||
"completion": 0,
|
||||
"cached": 808,
|
||||
"numRequests": 0,
|
||||
"completionDetails": {
|
||||
"reasoning": 0,
|
||||
"acceptedPrediction": 0,
|
||||
"rejectedPrediction": 0
|
||||
}
|
||||
}
|
||||
},
|
||||
"namedScores": {},
|
||||
"namedScoresCount": {},
|
||||
"cost": 0
|
||||
}
|
||||
}
|
||||
],
|
||||
"results": [
|
||||
{
|
||||
"cost": 0,
|
||||
"gradingResult": {
|
||||
"pass": true,
|
||||
"score": 1,
|
||||
"reason": "All assertions passed",
|
||||
"namedScores": {},
|
||||
"tokensUsed": {
|
||||
"total": 808,
|
||||
"prompt": 0,
|
||||
"completion": 0,
|
||||
"cached": 808,
|
||||
"numRequests": 0
|
||||
},
|
||||
"componentResults": [
|
||||
{
|
||||
"assertion": {
|
||||
"type": "llm-rubric",
|
||||
"value": "询问是否确认发送"
|
||||
},
|
||||
"pass": true,
|
||||
"score": 1,
|
||||
"reason": "输出中明确提出了确认请求:'即将发送“明天上午8点开会”的消息给清水邦英,是否确认?',符合要求在执行消息通知前进行确认的流程",
|
||||
"tokensUsed": {
|
||||
"total": 808,
|
||||
"prompt": 0,
|
||||
"completion": 0,
|
||||
"cached": 808,
|
||||
"numRequests": 0,
|
||||
"completionDetails": {
|
||||
"reasoning": 0,
|
||||
"acceptedPrediction": 0,
|
||||
"rejectedPrediction": 0
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"assertion": null
|
||||
},
|
||||
"id": "867656f1-edcd-41b9-b645-c9f5df2e2cc1",
|
||||
"latencyMs": 46,
|
||||
"namedScores": {},
|
||||
"prompt": {
|
||||
"raw": "[\n\n \n \n {\n \"role\": \"user\",\n \"content\": \"通知清水太郎明天上午8点开会\"\n }\n \n\n]\n",
|
||||
"label": "prompt.json: [\n{% for message in messages %}\n {% set outer_loop = loop %}\n {% for role, content in message %}\n {\n \"role\": \"{{ role }}\",\n \"content\": \"{{ content }}\"\n }{% if not (loop.last and outer_loop.last) %},{% endif %}\n {% endfor %}\n{% endfor %}\n]\n"
|
||||
},
|
||||
"promptId": "31ba1cc91b4ca656f8bf91b71d90fd1c786f46919fd3e498accd274537581342",
|
||||
"promptIdx": 0,
|
||||
"provider": {
|
||||
"id": "openai:chat:qwen3",
|
||||
"label": ""
|
||||
},
|
||||
"response": {
|
||||
"output": "[THINK]\n### 🎯 核心目标\n通知清水太郎明天上午8点开会。\n\n### 📋 现状分析\n- **关键信息**: \n - 用户指令为“通知清水太郎明天上午8点开会” \n - 目标对象为“清水太郎”(需确认其是否存在及联系方式) \n - 消息内容为会议提醒,时间为“明天上午8点”(2025年12月5日 08:00) \n - 操作属于“消息通知”,需经过确认流程\n\n- **限制条件**: \n - 根据指南第4条,消息通知类操作必须进行确认,不可直接执行 \n - 必须通过【人员检索】确认清水太郎的身份与WowTalk账号绑定状态 \n - 消息内容需准确无误地传达时间与事项 \n - 当前时间为2025年12月4日19:01,明天为12月5日,时间合理\n\n- **可用资源**: \n - Iot Control-find_employee_location(查找人员位置/信息) \n - Wowtalk tool-wowtalk_send_message_to_member(发送消息通知) \n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用 `Iot Control-find_employee_location` 查询“清水太郎”的员工信息,获取其唯一sensor_id或WowTalk账号\n2. 若查询到唯一匹配结果,生成确认提示:“即将发送‘明天上午8点开会’的消息给清水太郎,是否确认?”\n3. 等待用户回复确认(如“确认”、“好的”等)\n4. 收到确认后,调用 `Wowtalk tool-wowtalk_send_message_to_member` 向清水太郎发送消息:“明天上午8点开会”\n5. 若未查询到清水太郎,请回复:“未找到清水太郎的员工信息,请确认姓名是否正确或是否为系统内注册人员”\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n即将发送“明天上午8点开会”的消息给清水邦英,是否确认?",
|
||||
"tokenUsage": {
|
||||
"cached": 1168,
|
||||
"total": 1168
|
||||
},
|
||||
"cached": true,
|
||||
"finishReason": "stop"
|
||||
},
|
||||
"score": 1,
|
||||
"success": true,
|
||||
"testCase": {
|
||||
"description": "通知清水太郎明天上午8点开会",
|
||||
"vars": {
|
||||
"messages": [
|
||||
{
|
||||
"user": "通知清水太郎明天上午8点开会"
|
||||
}
|
||||
]
|
||||
},
|
||||
"assert": [
|
||||
{
|
||||
"type": "llm-rubric",
|
||||
"value": "询问是否确认发送"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"provider": {
|
||||
"text": {
|
||||
"id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
|
||||
"config": {
|
||||
"apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
|
||||
"apiBaseUrl": "https://one.felo.me/v1",
|
||||
"apiVersion": "2024-02-01"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"metadata": {}
|
||||
},
|
||||
"testIdx": 0,
|
||||
"vars": {
|
||||
"messages": [
|
||||
{
|
||||
"user": "通知清水太郎明天上午8点开会"
|
||||
}
|
||||
]
|
||||
},
|
||||
"metadata": {
|
||||
"_promptfooFileMetadata": {}
|
||||
},
|
||||
"failureReason": 0
|
||||
},
|
||||
{
|
||||
"cost": 0,
|
||||
"gradingResult": {
|
||||
"pass": true,
|
||||
"score": 1,
|
||||
"reason": "All assertions passed",
|
||||
"namedScores": {},
|
||||
"tokensUsed": {
|
||||
"total": 0,
|
||||
"prompt": 0,
|
||||
"completion": 0,
|
||||
"cached": 0,
|
||||
"numRequests": 0
|
||||
},
|
||||
"componentResults": [
|
||||
{
|
||||
"pass": true,
|
||||
"score": 1,
|
||||
"reason": "Assertion passed",
|
||||
"assertion": {
|
||||
"type": "regex",
|
||||
"value": "\\[TOOL_CALL\\].*find_employee_location"
|
||||
}
|
||||
}
|
||||
],
|
||||
"assertion": null
|
||||
},
|
||||
"id": "a1bb2558-6a43-42e1-969d-332d8a4ca397",
|
||||
"latencyMs": 16,
|
||||
"namedScores": {},
|
||||
"prompt": {
|
||||
"raw": "[\n\n \n \n {\n \"role\": \"user\",\n \"content\": \"清水太郎在哪里\"\n }\n \n\n]\n",
|
||||
"label": "prompt.json: [\n{% for message in messages %}\n {% set outer_loop = loop %}\n {% for role, content in message %}\n {\n \"role\": \"{{ role }}\",\n \"content\": \"{{ content }}\"\n }{% if not (loop.last and outer_loop.last) %},{% endif %}\n {% endfor %}\n{% endfor %}\n]\n"
|
||||
},
|
||||
"promptId": "31ba1cc91b4ca656f8bf91b71d90fd1c786f46919fd3e498accd274537581342",
|
||||
"promptIdx": 0,
|
||||
"provider": {
|
||||
"id": "openai:chat:qwen3",
|
||||
"label": ""
|
||||
},
|
||||
"response": {
|
||||
"output": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内,无法获取实时位置。",
|
||||
"tokenUsage": {
|
||||
"cached": 709,
|
||||
"total": 709
|
||||
},
|
||||
"cached": true,
|
||||
"finishReason": "stop"
|
||||
},
|
||||
"score": 1,
|
||||
"success": true,
|
||||
"testCase": {
|
||||
"description": "清水太郎在哪里",
|
||||
"vars": {
|
||||
"messages": [
|
||||
{
|
||||
"user": "清水太郎在哪里"
|
||||
}
|
||||
]
|
||||
},
|
||||
"assert": [
|
||||
{
|
||||
"type": "regex",
|
||||
"value": "\\[TOOL_CALL\\].*find_employee_location"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"provider": {
|
||||
"text": {
|
||||
"id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
|
||||
"config": {
|
||||
"apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
|
||||
"apiBaseUrl": "https://one.felo.me/v1",
|
||||
"apiVersion": "2024-02-01"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"metadata": {}
|
||||
},
|
||||
"testIdx": 1,
|
||||
"vars": {
|
||||
"messages": [
|
||||
{
|
||||
"user": "清水太郎在哪里"
|
||||
}
|
||||
]
|
||||
},
|
||||
"metadata": {
|
||||
"_promptfooFileMetadata": {}
|
||||
},
|
||||
"failureReason": 0
|
||||
}
|
||||
],
|
||||
"stats": {
|
||||
"successes": 2,
|
||||
"failures": 0,
|
||||
"errors": 0,
|
||||
"tokenUsage": {
|
||||
"prompt": 0,
|
||||
"completion": 0,
|
||||
"cached": 1877,
|
||||
"total": 1877,
|
||||
"numRequests": 2,
|
||||
"completionDetails": {
|
||||
"reasoning": 0,
|
||||
"acceptedPrediction": 0,
|
||||
"rejectedPrediction": 0
|
||||
},
|
||||
"assertions": {
|
||||
"total": 808,
|
||||
"prompt": 0,
|
||||
"completion": 0,
|
||||
"cached": 808,
|
||||
"numRequests": 0,
|
||||
"completionDetails": {
|
||||
"reasoning": 0,
|
||||
"acceptedPrediction": 0,
|
||||
"rejectedPrediction": 0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"config": {
|
||||
"tags": {},
|
||||
"description": "Novare Test",
|
||||
"prompts": [
|
||||
"file:///Users/moshui/Documents/felo/qwen-agent/promptfoo/prompt.json"
|
||||
],
|
||||
"providers": [
|
||||
{
|
||||
"id": "openai:chat:qwen3",
|
||||
"config": {
|
||||
"apiBaseUrl": "https://catalog-agent-dev.gbase.ai/api/v2",
|
||||
"apiKey": "a21c99620a8ef61d69563afe05ccce89",
|
||||
"passthrough": {
|
||||
"bot_id": "63069654-7750-409d-9a58-a0960d899a20",
|
||||
"tool_response": true,
|
||||
"language": "zh"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"tests": [
|
||||
{
|
||||
"description": "通知清水太郎明天上午8点开会",
|
||||
"vars": {
|
||||
"messages": [
|
||||
{
|
||||
"user": "通知清水太郎明天上午8点开会"
|
||||
}
|
||||
]
|
||||
},
|
||||
"assert": [
|
||||
{
|
||||
"type": "llm-rubric",
|
||||
"value": "询问是否确认发送"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"description": "清水太郎在哪里",
|
||||
"vars": {
|
||||
"messages": [
|
||||
{
|
||||
"user": "清水太郎在哪里"
|
||||
}
|
||||
]
|
||||
},
|
||||
"assert": [
|
||||
{
|
||||
"type": "regex",
|
||||
"value": "\\[TOOL_CALL\\].*find_employee_location"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"scenarios": [],
|
||||
"env": {},
|
||||
"sharing": true,
|
||||
"defaultTest": {
|
||||
"options": {
|
||||
"provider": {
|
||||
"text": {
|
||||
"id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
|
||||
"config": {
|
||||
"apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
|
||||
"apiBaseUrl": "https://one.felo.me/v1",
|
||||
"apiVersion": "2024-02-01"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"vars": {},
|
||||
"assert": [],
|
||||
"metadata": {}
|
||||
},
|
||||
"outputPath": [
|
||||
"result.json"
|
||||
],
|
||||
"extensions": [],
|
||||
"metadata": {}
|
||||
},
|
||||
"shareableUrl": null,
|
||||
"metadata": {
|
||||
"promptfooVersion": "0.117.11",
|
||||
"nodeVersion": "v20.10.0",
|
||||
"platform": "darwin",
|
||||
"arch": "x64",
|
||||
"exportedAt": "2025-12-04T11:24:00.162Z",
|
||||
"evaluationCreatedAt": "2025-12-04T11:23:59.637Z"
|
||||
}
|
||||
}
|
||||
@ -57,6 +57,25 @@ def append_user_last_message(messages: list, content: str) -> bool:
|
||||
messages[-1]['content'] += content
|
||||
return messages
|
||||
|
||||
def append_assistant_last_message(messages: list, content: str) -> bool:
|
||||
"""向最后一条用户消息追加内容
|
||||
|
||||
Args:
|
||||
messages: 消息列表
|
||||
content: 要追加的内容
|
||||
condition: 可选条件,如果提供则检查消息角色是否匹配此条件
|
||||
|
||||
Returns:
|
||||
bool: 是否成功追加内容
|
||||
"""
|
||||
if not messages or len(messages) == 0:
|
||||
return messages
|
||||
last_message = messages[-1]
|
||||
if last_message and last_message.get('role') == 'assistant':
|
||||
messages[-1]['content'] += content
|
||||
else:
|
||||
messages.append({"role":"assistant","content":content})
|
||||
return messages
|
||||
|
||||
|
||||
async def process_guidelines_and_terms(
|
||||
@ -243,11 +262,12 @@ async def enhanced_generate_stream_response(
|
||||
|
||||
# 准备最终的消息
|
||||
final_messages = messages.copy()
|
||||
final_messages = append_user_last_message(final_messages, f"\n\nlanguage:{get_language_text(language)}")
|
||||
|
||||
if guideline_reasoning:
|
||||
# 用###分割guideline_reasoning,取最后一段作为Guidelines
|
||||
final_messages = append_user_last_message(final_messages, f"\n\nGuidelines:\n{guideline_reasoning}\nPlease follow these guidelines step by step.")
|
||||
guidelines_text = guideline_reasoning.split('###')[-1].strip() if guideline_reasoning else ""
|
||||
final_messages = append_assistant_last_message(final_messages, f"language:{get_language_text(language)}\n\nGuidelines:\n{guidelines_text}\n I will follow these guidelines step by step.")
|
||||
else:
|
||||
final_messages = append_assistant_last_message(final_messages, f"\n\nlanguage:{get_language_text(language)}")
|
||||
|
||||
# 第三阶段:agent响应流式传输
|
||||
logger.info(f"Starting agent stream response")
|
||||
@ -358,12 +378,14 @@ async def create_agent_and_generate_response(
|
||||
|
||||
# 准备最终的消息
|
||||
final_messages = messages.copy()
|
||||
final_messages = append_user_last_message(final_messages, f"\n\nlanguage:{get_language_text(language)}")
|
||||
pre_message_list = []
|
||||
if guideline_reasoning:
|
||||
# 用###分割guideline_reasoning,取最后一段作为Guidelines
|
||||
final_messages = append_user_last_message(final_messages, f"\n\nGuidelines:\n{guideline_reasoning}\nPlease follow these guidelines step by step.")
|
||||
guidelines_text = guideline_reasoning.split('###')[-1].strip() if guideline_reasoning else ""
|
||||
final_messages = append_assistant_last_message(final_messages, f"language:{get_language_text(language)}\n\nGuidelines:\n{guidelines_text}\n I will follow these guidelines step by step.")
|
||||
pre_message_list.append({"role": "assistant","reasoning_content": guideline_reasoning+ "\n"})
|
||||
else:
|
||||
final_messages = append_assistant_last_message(final_messages, f"\n\nlanguage:{get_language_text(language)}")
|
||||
|
||||
# 非流式响应
|
||||
agent_responses = agent.run_nonstream(final_messages)
|
||||
|
||||
@ -284,19 +284,24 @@ def format_messages_to_chat_history(messages: List[Dict[str, str]]) -> str:
|
||||
Returns:
|
||||
str: 格式化的聊天记录
|
||||
"""
|
||||
# 只取最后的6句消息
|
||||
# 只取最后的15句消息
|
||||
chat_history = []
|
||||
|
||||
for message in messages:
|
||||
role = message.get('role', '')
|
||||
content = message.get('content', '')
|
||||
if len(content) > 0:
|
||||
if role == 'user':
|
||||
chat_history.append(f"user: {content}")
|
||||
elif role == 'assistant':
|
||||
if role == 'user':
|
||||
chat_history.append(f"user: {content}")
|
||||
elif role == FUNCTION:
|
||||
chat_history.append(f"function_response: {content}")
|
||||
elif role == ASSISTANT:
|
||||
if len(content) >0:
|
||||
chat_history.append(f"assistant: {content}")
|
||||
if message.get('function_call'):
|
||||
chat_history.append(f"function_call: {message.get('function_call').get('name')} ")
|
||||
chat_history.append(f"{message.get('function_call').get('arguments')}")
|
||||
|
||||
recent_chat_history = chat_history[-6:] if len(chat_history) > 6 else chat_history
|
||||
recent_chat_history = chat_history[-15:] if len(chat_history) > 15 else chat_history
|
||||
print(f"recent_chat_history:{recent_chat_history}")
|
||||
return "\n".join(recent_chat_history)
|
||||
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user