From 25f8f7c98ffd6f3a941b31f78e9f9e47667a74e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E6=BD=AE?= Date: Thu, 4 Dec 2025 12:22:08 +0800 Subject: [PATCH 1/4] =?UTF-8?q?=E4=BF=AE=E6=94=B9guideline=5Fprompt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- prompt/guideline_prompt.md | 4 +--- utils/fastapi_utils.py | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/prompt/guideline_prompt.md b/prompt/guideline_prompt.md index a758551..3964a6b 100644 --- a/prompt/guideline_prompt.md +++ b/prompt/guideline_prompt.md @@ -19,7 +19,7 @@ ### 3. 计划阶段 (Planning) - **步骤制定**: 详细的执行步骤,步骤数量根据实际需求调整,每个步骤应具体可执行 -- **应急预案**: 准备应对可能的障碍 + --- @@ -65,5 +65,3 @@ ### ⚡ 计划阶段 **执行步骤**: [按1,2,3...n顺序列出执行步骤] -**应急预案**: -[按1,2,3...n列出可能障碍的应对方式] diff --git a/utils/fastapi_utils.py b/utils/fastapi_utils.py index 754fe8b..b6c924e 100644 --- a/utils/fastapi_utils.py +++ b/utils/fastapi_utils.py @@ -284,19 +284,24 @@ def format_messages_to_chat_history(messages: List[Dict[str, str]]) -> str: Returns: str: 格式化的聊天记录 """ - # 只取最后的6句消息 + # 只取最后的15句消息 chat_history = [] - for message in messages: role = message.get('role', '') content = message.get('content', '') - if len(content) > 0: - if role == 'user': - chat_history.append(f"user: {content}") - elif role == 'assistant': + if role == 'user': + chat_history.append(f"user: {content}") + elif role == FUNCTION: + chat_history.append(f"function_response: {content}") + elif role == ASSISTANT: + if len(content) >0: chat_history.append(f"assistant: {content}") + if message.get('function_call'): + chat_history.append(f"function_call: {message.get('function_call').get('name')} ") + chat_history.append(f"{message.get('function_call').get('arguments')}") - recent_chat_history = chat_history[-6:] if len(chat_history) > 6 else chat_history + recent_chat_history = chat_history[-15:] if len(chat_history) > 15 else chat_history + print(f"recent_chat_history:{recent_chat_history}") return "\n".join(recent_chat_history) From c58b20e56f2867fd2bb4e40989dfa135385c6469 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E6=BD=AE?= Date: Thu, 4 Dec 2025 13:50:25 +0800 Subject: [PATCH 2/4] =?UTF-8?q?=E4=BF=AE=E6=94=B9guideline=5Fprompt?= =?UTF-8?q?=EF=BC=9A=E5=8F=AA=E9=9C=80=E8=A6=81=E5=88=97=E5=87=BA=E8=AF=A6?= =?UTF-8?q?=E7=BB=86=E7=9A=84=E5=B7=A5=E5=85=B7=E8=B0=83=E7=94=A8=E6=AD=A5?= =?UTF-8?q?=E9=AA=A4=E5=8D=B3=E5=8F=AF=EF=BC=8C=E4=B8=8D=E9=9C=80=E8=A6=81?= =?UTF-8?q?=E8=80=83=E8=99=91=E5=9B=9E=E5=A4=8D=E7=94=A8=E6=88=B7=E7=9A=84?= =?UTF-8?q?=E8=AF=9D=E6=9C=AF=EF=BC=8C=E6=AD=A5=E9=AA=A4=E6=95=B0=E9=87=8F?= =?UTF-8?q?=E6=A0=B9=E6=8D=AE=E5=AE=9E=E9=99=85=E9=9C=80=E6=B1=82=E8=B0=83?= =?UTF-8?q?=E6=95=B4=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- prompt/guideline_prompt.md | 2 +- routes/chat.py | 30 +++++++++++++++++++++++++----- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/prompt/guideline_prompt.md b/prompt/guideline_prompt.md index 3964a6b..ee0f1e5 100644 --- a/prompt/guideline_prompt.md +++ b/prompt/guideline_prompt.md @@ -18,7 +18,7 @@ - **优先级排序**: 确定任务执行顺序 ### 3. 计划阶段 (Planning) -- **步骤制定**: 详细的执行步骤,步骤数量根据实际需求调整,每个步骤应具体可执行 +- **步骤制定**: 只需要列出详细的工具调用步骤即可,不需要考虑回复用户的话术,步骤数量根据实际需求调整。 --- diff --git a/routes/chat.py b/routes/chat.py index 1379722..12910f7 100644 --- a/routes/chat.py +++ b/routes/chat.py @@ -57,6 +57,25 @@ def append_user_last_message(messages: list, content: str) -> bool: messages[-1]['content'] += content return messages +def append_assistant_last_message(messages: list, content: str) -> bool: + """向最后一条用户消息追加内容 + + Args: + messages: 消息列表 + content: 要追加的内容 + condition: 可选条件,如果提供则检查消息角色是否匹配此条件 + + Returns: + bool: 是否成功追加内容 + """ + if not messages or len(messages) == 0: + return messages + last_message = messages[-1] + if last_message and last_message.get('role') == 'assistant': + messages[-1]['content'] += content + else: + messages.append({"role":"assistant","content":content}) + return messages async def process_guidelines_and_terms( @@ -243,12 +262,12 @@ async def enhanced_generate_stream_response( # 准备最终的消息 final_messages = messages.copy() - final_messages = append_user_last_message(final_messages, f"\n\nlanguage:{get_language_text(language)}") - if guideline_reasoning: # 用###分割guideline_reasoning,取最后一段作为Guidelines guidelines_text = guideline_reasoning.split('###')[-1].strip() if guideline_reasoning else "" - final_messages = append_user_last_message(final_messages, f"\n\nGuidelines:\n{guidelines_text}\nPlease follow these guidelines step by step.") + final_messages = append_assistant_last_message(final_messages, f"language:{get_language_text(language)}\n\nGuidelines:\n{guidelines_text}\n I will follow these guidelines step by step.") + else: + final_messages = append_assistant_last_message(final_messages, f"\n\nlanguage:{get_language_text(language)}") # 第三阶段:agent响应流式传输 logger.info(f"Starting agent stream response") @@ -359,13 +378,14 @@ async def create_agent_and_generate_response( # 准备最终的消息 final_messages = messages.copy() - final_messages = append_user_last_message(final_messages, f"\n\nlanguage:{get_language_text(language)}") pre_message_list = [] if guideline_reasoning: # 用###分割guideline_reasoning,取最后一段作为Guidelines guidelines_text = guideline_reasoning.split('###')[-1].strip() if guideline_reasoning else "" - final_messages = append_user_last_message(final_messages, f"\n\nGuidelines:\n{guidelines_text}\nPlease follow these guidelines step by step.") + final_messages = append_assistant_last_message(final_messages, f"language:{get_language_text(language)}\n\nGuidelines:\n{guidelines_text}\n I will follow these guidelines step by step.") pre_message_list.append({"role": "assistant","reasoning_content": guideline_reasoning+ "\n"}) + else: + final_messages = append_assistant_last_message(final_messages, f"\n\nlanguage:{get_language_text(language)}") # 非流式响应 agent_responses = agent.run_nonstream(final_messages) From aa51212849bdfd251284754c5ff2ec4b81183ac2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E6=BD=AE?= Date: Thu, 4 Dec 2025 14:10:47 +0800 Subject: [PATCH 3/4] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E9=BB=98=E8=AE=A4MAX=5FL?= =?UTF-8?q?LM=5FCALL=5FPER=5FRUN=E8=B0=83=E7=94=A8=E6=AC=A1=E6=95=B050?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- agent/modified_assistant.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agent/modified_assistant.py b/agent/modified_assistant.py index 21be174..9ed3ca7 100644 --- a/agent/modified_assistant.py +++ b/agent/modified_assistant.py @@ -158,7 +158,7 @@ class ModifiedAssistant(Assistant): response = [] # 保持原有的最大调用次数限制 - total_num_llm_calls_available = self.MAX_LLM_CALL_PER_RUN if hasattr(self, 'MAX_LLM_CALL_PER_RUN') else 10 + total_num_llm_calls_available = self.MAX_LLM_CALL_PER_RUN if hasattr(self, 'MAX_LLM_CALL_PER_RUN') else 50 num_llm_calls_available = total_num_llm_calls_available while num_llm_calls_available > 0: num_llm_calls_available -= 1 From 14973509211309c2f9ec716b97905ab5a701c21d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E6=BD=AE?= Date: Thu, 4 Dec 2025 21:02:01 +0800 Subject: [PATCH 4/4] promptfoo --- promptfoo/conversation/config.yaml | 44 +++ promptfoo/conversation/encode.js | 11 + promptfoo/conversation/prompt.json | 16 + promptfoo/conversation/result.json | 612 +++++++++++++++++++++++++++++ promptfoo/query/config.yaml | 32 ++ promptfoo/query/prompt.json | 6 + promptfoo/query/result.json | 235 +++++++++++ promptfoo/result.json | 369 +++++++++++++++++ 8 files changed, 1325 insertions(+) create mode 100644 promptfoo/conversation/config.yaml create mode 100644 promptfoo/conversation/encode.js create mode 100644 promptfoo/conversation/prompt.json create mode 100644 promptfoo/conversation/result.json create mode 100644 promptfoo/query/config.yaml create mode 100644 promptfoo/query/prompt.json create mode 100644 promptfoo/query/result.json create mode 100644 promptfoo/result.json diff --git a/promptfoo/conversation/config.yaml b/promptfoo/conversation/config.yaml new file mode 100644 index 0000000..6144f89 --- /dev/null +++ b/promptfoo/conversation/config.yaml @@ -0,0 +1,44 @@ +env: {} +description: Novare Test +providers: + - id: openai:chat:qwen3 + config: + apiBaseUrl: https://catalog-agent-dev.gbase.ai/api/v2 + apiKey: a21c99620a8ef61d69563afe05ccce89 + passthrough: + bot_id: 63069654-7750-409d-9a58-a0960d899a20 + tool_response: true + language: zh +prompts: + - file://prompt.json +nunjucksFilters: + encode: ./encode.js +tests: + - vars: + question: 清水太郎在哪里 + assert: + - type: regex + value: \[TOOL_CALL\].*find_employee_location + - vars: + question: 通知他明天上午8点开会 + assert: + - type: llm-rubric + value: 询问是否确认发送 + - vars: + question: 确认 + assert: + - type: regex + value: \[TOOL_CALL\].*wowtalk_send_message_to_member + +defaultTest: + options: + provider: + text: + id: openai:chat:qwen/qwen3-next-80b-a3b-instruct + config: + apiKey: sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26 + apiBaseUrl: https://one.felo.me/v1 + apiVersion: '2024-02-01' +evaluateOptions: {} +writeLatestResults: true +sharing: true diff --git a/promptfoo/conversation/encode.js b/promptfoo/conversation/encode.js new file mode 100644 index 0000000..acba61a --- /dev/null +++ b/promptfoo/conversation/encode.js @@ -0,0 +1,11 @@ +module.exports = function (str) { + if (typeof str !== 'string') { + return str; + } + return str + .replace(/\\/g, '\\\\') // 转义反斜杠 + .replace(/"/g, '\\"') // 转义双引号 + .replace(/\n/g, '\\n') // 转义换行符 + .replace(/\r/g, '\\r') // 转义回车符 + .replace(/\t/g, '\\t'); // 转义制表符 +}; diff --git a/promptfoo/conversation/prompt.json b/promptfoo/conversation/prompt.json new file mode 100644 index 0000000..94837f5 --- /dev/null +++ b/promptfoo/conversation/prompt.json @@ -0,0 +1,16 @@ +[ +{% for completion in _conversation %} + { + "role": "user", + "content": "{{ completion.input | encode }}" + }, + { + "role": "assistant", + "content": "{{ completion.output | encode }}" + }, +{% endfor %} + { + "role": "user", + "content": "{{ question | encode }}" + } +] diff --git a/promptfoo/conversation/result.json b/promptfoo/conversation/result.json new file mode 100644 index 0000000..52e441b --- /dev/null +++ b/promptfoo/conversation/result.json @@ -0,0 +1,612 @@ +{ + "evalId": "eval-l9e-2025-12-04T12:17:14", + "results": { + "version": 3, + "timestamp": "2025-12-04T12:17:14.502Z", + "prompts": [ + { + "raw": "[\n{% for completion in _conversation %}\n {\n \"role\": \"user\",\n \"content\": \"{{ completion.input | encode }}\"\n },\n {\n \"role\": \"assistant\",\n \"content\": \"{{ completion.output | encode }}\"\n },\n{% endfor %}\n {\n \"role\": \"user\",\n \"content\": \"{{ question | encode }}\"\n }\n]\n", + "label": "prompt.json: [\n{% for completion in _conversation %}\n {\n \"role\": \"user\",\n \"content\": \"{{ completion.input | encode }}\"\n },\n {\n \"role\": \"assistant\",\n \"content\": \"{{ completion.output | encode }}\"\n },\n{% endfor %}\n {\n \"role\": \"user\",\n \"content\": \"{{ question | encode }}\"\n }\n]\n", + "id": "49ff9901f402b7e42593f2724f05794c15d1c1bbd5d7a46c3e58198f7bd02c7e", + "provider": "openai:chat:qwen3", + "metrics": { + "score": 3, + "testPassCount": 3, + "testFailCount": 0, + "testErrorCount": 0, + "assertPassCount": 3, + "assertFailCount": 0, + "totalLatencyMs": 18196, + "tokenUsage": { + "prompt": 596, + "completion": 2017, + "cached": 709, + "total": 3322, + "numRequests": 3, + "completionDetails": { + "reasoning": 0, + "acceptedPrediction": 0, + "rejectedPrediction": 0 + }, + "assertions": { + "total": 746, + "prompt": 697, + "completion": 49, + "cached": 0, + "numRequests": 0, + "completionDetails": { + "reasoning": 0, + "acceptedPrediction": 0, + "rejectedPrediction": 0 + } + } + }, + "namedScores": {}, + "namedScoresCount": {}, + "cost": 0 + } + } + ], + "results": [ + { + "cost": 0, + "gradingResult": { + "pass": true, + "score": 1, + "reason": "All assertions passed", + "namedScores": {}, + "tokensUsed": { + "total": 0, + "prompt": 0, + "completion": 0, + "cached": 0, + "numRequests": 0 + }, + "componentResults": [ + { + "pass": true, + "score": 1, + "reason": "Assertion passed", + "assertion": { + "type": "regex", + "value": "\\[TOOL_CALL\\].*find_employee_location" + } + } + ], + "assertion": null + }, + "id": "7595e74d-334a-4ed9-b01c-8373f6e8dffc", + "latencyMs": 12, + "namedScores": {}, + "prompt": { + "raw": "[\n\n {\n \"role\": \"user\",\n \"content\": \"清水太郎在哪里\"\n }\n]\n", + "label": "prompt.json: [\n{% for completion in _conversation %}\n {\n \"role\": \"user\",\n \"content\": \"{{ completion.input | encode }}\"\n },\n {\n \"role\": \"assistant\",\n \"content\": \"{{ completion.output | encode }}\"\n },\n{% endfor %}\n {\n \"role\": \"user\",\n \"content\": \"{{ question | encode }}\"\n }\n]\n" + }, + "promptId": "49ff9901f402b7e42593f2724f05794c15d1c1bbd5d7a46c3e58198f7bd02c7e", + "promptIdx": 0, + "provider": { + "id": "openai:chat:qwen3", + "label": "" + }, + "response": { + "output": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内,无法获取实时位置。", + "tokenUsage": { + "cached": 709, + "total": 709 + }, + "cached": true, + "finishReason": "stop" + }, + "score": 1, + "success": true, + "testCase": { + "vars": { + "question": "清水太郎在哪里", + "_conversation": [] + }, + "assert": [ + { + "type": "regex", + "value": "\\[TOOL_CALL\\].*find_employee_location" + } + ], + "options": { + "provider": { + "text": { + "id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct", + "config": { + "apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26", + "apiBaseUrl": "https://one.felo.me/v1", + "apiVersion": "2024-02-01" + } + } + } + }, + "metadata": {} + }, + "testIdx": 0, + "vars": { + "question": "清水太郎在哪里", + "_conversation": [] + }, + "metadata": { + "_promptfooFileMetadata": {} + }, + "failureReason": 0 + }, + { + "cost": 0, + "gradingResult": { + "pass": true, + "score": 1, + "reason": "All assertions passed", + "namedScores": {}, + "tokensUsed": { + "total": 746, + "prompt": 697, + "completion": 49, + "cached": 0, + "numRequests": 0 + }, + "componentResults": [ + { + "assertion": { + "type": "llm-rubric", + "value": "询问是否确认发送" + }, + "pass": true, + "score": 1, + "reason": "输出明确询问用户是否确认向清水邦英发送会议通知,符合'询问是否确认发送'的rubric要求。", + "tokensUsed": { + "total": 746, + "prompt": 697, + "completion": 49, + "cached": 0, + "numRequests": 0, + "completionDetails": { + "reasoning": 0, + "acceptedPrediction": 0, + "rejectedPrediction": 0 + } + } + } + ], + "assertion": null + }, + "id": "92bba056-d302-4c3b-ac01-deb76e9e74d4", + "latencyMs": 9701, + "namedScores": {}, + "prompt": { + "raw": "[\n\n {\n \"role\": \"user\",\n \"content\": \"清水太郎在哪里\"\n },\n {\n \"role\": \"assistant\",\n \"content\": \"[THINK]\\n### 🎯 核心目标\\n查找名为“清水太郎”的人员位置。\\n\\n### 📋 现状分析\\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\\n- **限制条件**: \\n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\\n- **可用资源**: \\n - 工具:`Iot Control-find_employee_location`(人员检索工具)\\n\\n### ⚡ 计划阶段\\n**执行步骤**:\\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\\\"清水太郎\\\"`,查询该人员的当前位置。\\n\\n[TOOL_CALL] Iot Control-find_employee_location\\n{\\\"name\\\": \\\"清水太郎\\\"}\\n[TOOL_RESPONSE] Iot Control-find_employee_location\\n{\\\"matched_count\\\":1,\\\"results\\\":[{\\\"name\\\":\\\"清水 邦英\\\",\\\"wowtalk_id\\\":\\\"u226160\\\",\\\"sensor_id\\\":\\\"3246789\\\",\\\"confidence\\\":85,\\\"location_status\\\":\\\"not_in_range\\\",\\\"error_message\\\":\\\"Sensor不在测位范围内或未检测到位置信号\\\",\\\"coordinates\\\":null,\\\"location\\\":null}]}\\n[ANSWER]\\n清水邦英先生目前未在测位范围内,无法获取实时位置。\"\n },\n\n {\n \"role\": \"user\",\n \"content\": \"通知他明天上午8点开会\"\n }\n]\n", + "label": "prompt.json: [\n{% for completion in _conversation %}\n {\n \"role\": \"user\",\n \"content\": \"{{ completion.input | encode }}\"\n },\n {\n \"role\": \"assistant\",\n \"content\": \"{{ completion.output | encode }}\"\n },\n{% endfor %}\n {\n \"role\": \"user\",\n \"content\": \"{{ question | encode }}\"\n }\n]\n" + }, + "promptId": "49ff9901f402b7e42593f2724f05794c15d1c1bbd5d7a46c3e58198f7bd02c7e", + "promptIdx": 0, + "provider": { + "id": "openai:chat:qwen3", + "label": "" + }, + "response": { + "output": "[THINK]\n### 🎯 核心目标\n通知清水太郎明天上午8点开会,但系统返回的是“清水邦英”而非“清水太郎”,需确认人员身份并发送会议提醒。\n\n### 📋 现状分析\n- **关键信息**:\n - 用户查询“清水太郎”,但系统返回匹配人为“清水 邦英”(姓名相近但不完全一致)。\n - 清水邦英的传感器信号不在测位范围内,无法获取位置,但不影响消息通知。\n - 清水邦英的wowtalk_id为“u226160”,可用于发送消息。\n - 用户意图是发送“明天上午8点开会”的通知。\n- **限制条件**:\n - 名字不完全匹配(太郎 vs 邦英),可能存在输入误差,需谨慎确认。\n - 消息通知属于“需要确认”的操作(根据指南第4条),不可直接发送。\n - 仅当确认人员为同一人时,才可发送通知。\n- **可用资源**:\n - Iot Control-find_employee_location(查询人员信息)\n - Wowtalk tool-wowtalk_send_message_to_member(发送消息)\n - 可用的wowtalk_id:u226160(清水邦英)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用 Iot Control-find_employee_location,查询名称中包含“太郎”的员工,确认是否存在“清水太郎”。\n2. 若步骤1返回结果为空或无匹配项,则向用户发送确认请求:“您要通知的是‘清水邦英’(wowtalk_id: u226160)吗?系统未查到‘清水太郎’,是否确认向清水邦英发送‘明天上午8点开会’的通知?”\n3. 等待用户明确回复“确认”或“是”。\n4. 收到确认后,调用 Wowtalk tool-wowtalk_send_message_to_member,向 wowtalk_id: u226160 发送消息:“明天上午8点开会”。\n\n[ANSWER]\n您要通知的是“清水邦英”(wowtalk_id: u226160)吗?系统未查到“清水太郎”,是否确认向清水邦英发送“明天上午8点开会”的通知?", + "tokenUsage": { + "total": 1157, + "prompt": 261, + "completion": 896 + }, + "cached": false, + "finishReason": "stop" + }, + "score": 1, + "success": true, + "testCase": { + "vars": { + "question": "通知他明天上午8点开会", + "_conversation": [ + { + "prompt": [ + { + "role": "user", + "content": "清水太郎在哪里" + } + ], + "input": "清水太郎在哪里", + "output": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内,无法获取实时位置。" + }, + { + "prompt": [ + { + "role": "user", + "content": "清水太郎在哪里" + }, + { + "role": "assistant", + "content": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内,无法获取实时位置。" + }, + { + "role": "user", + "content": "通知他明天上午8点开会" + } + ], + "input": "通知他明天上午8点开会", + "output": "[THINK]\n### 🎯 核心目标\n通知清水太郎明天上午8点开会,但系统返回的是“清水邦英”而非“清水太郎”,需确认人员身份并发送会议提醒。\n\n### 📋 现状分析\n- **关键信息**:\n - 用户查询“清水太郎”,但系统返回匹配人为“清水 邦英”(姓名相近但不完全一致)。\n - 清水邦英的传感器信号不在测位范围内,无法获取位置,但不影响消息通知。\n - 清水邦英的wowtalk_id为“u226160”,可用于发送消息。\n - 用户意图是发送“明天上午8点开会”的通知。\n- **限制条件**:\n - 名字不完全匹配(太郎 vs 邦英),可能存在输入误差,需谨慎确认。\n - 消息通知属于“需要确认”的操作(根据指南第4条),不可直接发送。\n - 仅当确认人员为同一人时,才可发送通知。\n- **可用资源**:\n - Iot Control-find_employee_location(查询人员信息)\n - Wowtalk tool-wowtalk_send_message_to_member(发送消息)\n - 可用的wowtalk_id:u226160(清水邦英)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用 Iot Control-find_employee_location,查询名称中包含“太郎”的员工,确认是否存在“清水太郎”。\n2. 若步骤1返回结果为空或无匹配项,则向用户发送确认请求:“您要通知的是‘清水邦英’(wowtalk_id: u226160)吗?系统未查到‘清水太郎’,是否确认向清水邦英发送‘明天上午8点开会’的通知?”\n3. 等待用户明确回复“确认”或“是”。\n4. 收到确认后,调用 Wowtalk tool-wowtalk_send_message_to_member,向 wowtalk_id: u226160 发送消息:“明天上午8点开会”。\n\n[ANSWER]\n您要通知的是“清水邦英”(wowtalk_id: u226160)吗?系统未查到“清水太郎”,是否确认向清水邦英发送“明天上午8点开会”的通知?" + } + ] + }, + "assert": [ + { + "type": "llm-rubric", + "value": "询问是否确认发送" + } + ], + "options": { + "provider": { + "text": { + "id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct", + "config": { + "apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26", + "apiBaseUrl": "https://one.felo.me/v1", + "apiVersion": "2024-02-01" + } + } + } + }, + "metadata": {} + }, + "testIdx": 1, + "vars": { + "question": "通知他明天上午8点开会", + "_conversation": [ + { + "prompt": [ + { + "role": "user", + "content": "清水太郎在哪里" + } + ], + "input": "清水太郎在哪里", + "output": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内,无法获取实时位置。" + }, + { + "prompt": [ + { + "role": "user", + "content": "清水太郎在哪里" + }, + { + "role": "assistant", + "content": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内,无法获取实时位置。" + }, + { + "role": "user", + "content": "通知他明天上午8点开会" + } + ], + "input": "通知他明天上午8点开会", + "output": "[THINK]\n### 🎯 核心目标\n通知清水太郎明天上午8点开会,但系统返回的是“清水邦英”而非“清水太郎”,需确认人员身份并发送会议提醒。\n\n### 📋 现状分析\n- **关键信息**:\n - 用户查询“清水太郎”,但系统返回匹配人为“清水 邦英”(姓名相近但不完全一致)。\n - 清水邦英的传感器信号不在测位范围内,无法获取位置,但不影响消息通知。\n - 清水邦英的wowtalk_id为“u226160”,可用于发送消息。\n - 用户意图是发送“明天上午8点开会”的通知。\n- **限制条件**:\n - 名字不完全匹配(太郎 vs 邦英),可能存在输入误差,需谨慎确认。\n - 消息通知属于“需要确认”的操作(根据指南第4条),不可直接发送。\n - 仅当确认人员为同一人时,才可发送通知。\n- **可用资源**:\n - Iot Control-find_employee_location(查询人员信息)\n - Wowtalk tool-wowtalk_send_message_to_member(发送消息)\n - 可用的wowtalk_id:u226160(清水邦英)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用 Iot Control-find_employee_location,查询名称中包含“太郎”的员工,确认是否存在“清水太郎”。\n2. 若步骤1返回结果为空或无匹配项,则向用户发送确认请求:“您要通知的是‘清水邦英’(wowtalk_id: u226160)吗?系统未查到‘清水太郎’,是否确认向清水邦英发送‘明天上午8点开会’的通知?”\n3. 等待用户明确回复“确认”或“是”。\n4. 收到确认后,调用 Wowtalk tool-wowtalk_send_message_to_member,向 wowtalk_id: u226160 发送消息:“明天上午8点开会”。\n\n[ANSWER]\n您要通知的是“清水邦英”(wowtalk_id: u226160)吗?系统未查到“清水太郎”,是否确认向清水邦英发送“明天上午8点开会”的通知?" + } + ] + }, + "metadata": { + "_promptfooFileMetadata": {} + }, + "failureReason": 0 + }, + { + "cost": 0, + "gradingResult": { + "pass": true, + "score": 1, + "reason": "All assertions passed", + "namedScores": {}, + "tokensUsed": { + "total": 0, + "prompt": 0, + "completion": 0, + "cached": 0, + "numRequests": 0 + }, + "componentResults": [ + { + "pass": true, + "score": 1, + "reason": "Assertion passed", + "assertion": { + "type": "regex", + "value": "\\[TOOL_CALL\\].*wowtalk_send_message_to_member" + } + } + ], + "assertion": null + }, + "id": "647361f4-33c5-4f1a-aa29-edb5c0f51841", + "latencyMs": 8483, + "namedScores": {}, + "prompt": { + "raw": "[\n\n {\n \"role\": \"user\",\n \"content\": \"清水太郎在哪里\"\n },\n {\n \"role\": \"assistant\",\n \"content\": \"[THINK]\\n### 🎯 核心目标\\n查找名为“清水太郎”的人员位置。\\n\\n### 📋 现状分析\\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\\n- **限制条件**: \\n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\\n- **可用资源**: \\n - 工具:`Iot Control-find_employee_location`(人员检索工具)\\n\\n### ⚡ 计划阶段\\n**执行步骤**:\\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\\\"清水太郎\\\"`,查询该人员的当前位置。\\n\\n[TOOL_CALL] Iot Control-find_employee_location\\n{\\\"name\\\": \\\"清水太郎\\\"}\\n[TOOL_RESPONSE] Iot Control-find_employee_location\\n{\\\"matched_count\\\":1,\\\"results\\\":[{\\\"name\\\":\\\"清水 邦英\\\",\\\"wowtalk_id\\\":\\\"u226160\\\",\\\"sensor_id\\\":\\\"3246789\\\",\\\"confidence\\\":85,\\\"location_status\\\":\\\"not_in_range\\\",\\\"error_message\\\":\\\"Sensor不在测位范围内或未检测到位置信号\\\",\\\"coordinates\\\":null,\\\"location\\\":null}]}\\n[ANSWER]\\n清水邦英先生目前未在测位范围内,无法获取实时位置。\"\n },\n\n {\n \"role\": \"user\",\n \"content\": \"通知他明天上午8点开会\"\n },\n {\n \"role\": \"assistant\",\n \"content\": \"[THINK]\\n### 🎯 核心目标\\n通知清水太郎明天上午8点开会,但系统返回的是“清水邦英”而非“清水太郎”,需确认人员身份并发送会议提醒。\\n\\n### 📋 现状分析\\n- **关键信息**:\\n - 用户查询“清水太郎”,但系统返回匹配人为“清水 邦英”(姓名相近但不完全一致)。\\n - 清水邦英的传感器信号不在测位范围内,无法获取位置,但不影响消息通知。\\n - 清水邦英的wowtalk_id为“u226160”,可用于发送消息。\\n - 用户意图是发送“明天上午8点开会”的通知。\\n- **限制条件**:\\n - 名字不完全匹配(太郎 vs 邦英),可能存在输入误差,需谨慎确认。\\n - 消息通知属于“需要确认”的操作(根据指南第4条),不可直接发送。\\n - 仅当确认人员为同一人时,才可发送通知。\\n- **可用资源**:\\n - Iot Control-find_employee_location(查询人员信息)\\n - Wowtalk tool-wowtalk_send_message_to_member(发送消息)\\n - 可用的wowtalk_id:u226160(清水邦英)\\n\\n### ⚡ 计划阶段\\n**执行步骤**:\\n1. 调用 Iot Control-find_employee_location,查询名称中包含“太郎”的员工,确认是否存在“清水太郎”。\\n2. 若步骤1返回结果为空或无匹配项,则向用户发送确认请求:“您要通知的是‘清水邦英’(wowtalk_id: u226160)吗?系统未查到‘清水太郎’,是否确认向清水邦英发送‘明天上午8点开会’的通知?”\\n3. 等待用户明确回复“确认”或“是”。\\n4. 收到确认后,调用 Wowtalk tool-wowtalk_send_message_to_member,向 wowtalk_id: u226160 发送消息:“明天上午8点开会”。\\n\\n[ANSWER]\\n您要通知的是“清水邦英”(wowtalk_id: u226160)吗?系统未查到“清水太郎”,是否确认向清水邦英发送“明天上午8点开会”的通知?\"\n },\n\n {\n \"role\": \"user\",\n \"content\": \"确认\"\n }\n]\n", + "label": "prompt.json: [\n{% for completion in _conversation %}\n {\n \"role\": \"user\",\n \"content\": \"{{ completion.input | encode }}\"\n },\n {\n \"role\": \"assistant\",\n \"content\": \"{{ completion.output | encode }}\"\n },\n{% endfor %}\n {\n \"role\": \"user\",\n \"content\": \"{{ question | encode }}\"\n }\n]\n" + }, + "promptId": "49ff9901f402b7e42593f2724f05794c15d1c1bbd5d7a46c3e58198f7bd02c7e", + "promptIdx": 0, + "provider": { + "id": "openai:chat:qwen3", + "label": "" + }, + "response": { + "output": "[THINK]\n### 🎯 核心目标\n向员工清水邦英(wowtalk_id: u226160)发送明天上午8点开会的通知。\n\n### 📋 现状分析\n- **关键信息**: \n - 用户最初询问“清水太郎”,系统返回“清水邦英”为唯一匹配人(confidence: 85%)。\n - 用户已确认“清水邦英”为通知对象,并明确指令:“通知他明天上午8点开会”。\n - 清水邦英的wowtalk_id为u226160,具备消息通知的唯一身份标识。\n- **限制条件**:\n - 根据【消息通知】规范,发送通知前必须获得用户明确确认,当前用户已确认,可直接执行。\n - 仅可使用授权工具完成操作,不得使用非指定工具。\n - 通知内容需精确传达,不得增删语义。\n- **可用资源**:\n - Wowtalk tool-wowtalk_send_message_to_member:用于向指定wowtalk_id发送消息。\n - Iot Control-find_employee_location:已使用并确认人员身份,无需重复调用。\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 Wowtalk tool-wowtalk_send_message_to_member,参数为:\n - wowtalk_id: \"u226160\"\n - message: \"明天上午8点开会\"\n\n[TOOL_CALL] Wowtalk tool-wowtalk_send_message_to_member\n{\"to_account\": \"u226160\", \"message_content\": \"明天上午8点开会\"}\n[TOOL_RESPONSE] Wowtalk tool-wowtalk_send_message_to_member\n{\"success\":false,\"error\":\"API调用失败: 个人账户不存在\",\"error_code\":108,\"error_type\":\"WowTalkAPIError\",\"response_data\":{\"result_code\":108,\"api_version\":\"1.0\",\"request_id\":\"shimztest19-1764850653920\",\"message\":\"The user does not exist.\",\"response_data\":\"\"},\"bot_id\":\"63069654-7750-409d-9a58-a0960d899a20\"}\n[ANSWER]\n通知失败:该用户在 WowTalk 系统中不存在,请确认员工信息或检查账号是否正确。", + "tokenUsage": { + "total": 1456, + "prompt": 335, + "completion": 1121 + }, + "cached": false, + "finishReason": "stop" + }, + "score": 1, + "success": true, + "testCase": { + "vars": { + "question": "确认", + "_conversation": [ + { + "prompt": [ + { + "role": "user", + "content": "清水太郎在哪里" + } + ], + "input": "清水太郎在哪里", + "output": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内,无法获取实时位置。" + }, + { + "prompt": [ + { + "role": "user", + "content": "清水太郎在哪里" + }, + { + "role": "assistant", + "content": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内,无法获取实时位置。" + }, + { + "role": "user", + "content": "通知他明天上午8点开会" + } + ], + "input": "通知他明天上午8点开会", + "output": "[THINK]\n### 🎯 核心目标\n通知清水太郎明天上午8点开会,但系统返回的是“清水邦英”而非“清水太郎”,需确认人员身份并发送会议提醒。\n\n### 📋 现状分析\n- **关键信息**:\n - 用户查询“清水太郎”,但系统返回匹配人为“清水 邦英”(姓名相近但不完全一致)。\n - 清水邦英的传感器信号不在测位范围内,无法获取位置,但不影响消息通知。\n - 清水邦英的wowtalk_id为“u226160”,可用于发送消息。\n - 用户意图是发送“明天上午8点开会”的通知。\n- **限制条件**:\n - 名字不完全匹配(太郎 vs 邦英),可能存在输入误差,需谨慎确认。\n - 消息通知属于“需要确认”的操作(根据指南第4条),不可直接发送。\n - 仅当确认人员为同一人时,才可发送通知。\n- **可用资源**:\n - Iot Control-find_employee_location(查询人员信息)\n - Wowtalk tool-wowtalk_send_message_to_member(发送消息)\n - 可用的wowtalk_id:u226160(清水邦英)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用 Iot Control-find_employee_location,查询名称中包含“太郎”的员工,确认是否存在“清水太郎”。\n2. 若步骤1返回结果为空或无匹配项,则向用户发送确认请求:“您要通知的是‘清水邦英’(wowtalk_id: u226160)吗?系统未查到‘清水太郎’,是否确认向清水邦英发送‘明天上午8点开会’的通知?”\n3. 等待用户明确回复“确认”或“是”。\n4. 收到确认后,调用 Wowtalk tool-wowtalk_send_message_to_member,向 wowtalk_id: u226160 发送消息:“明天上午8点开会”。\n\n[ANSWER]\n您要通知的是“清水邦英”(wowtalk_id: u226160)吗?系统未查到“清水太郎”,是否确认向清水邦英发送“明天上午8点开会”的通知?" + }, + { + "prompt": [ + { + "role": "user", + "content": "清水太郎在哪里" + }, + { + "role": "assistant", + "content": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内,无法获取实时位置。" + }, + { + "role": "user", + "content": "通知他明天上午8点开会" + }, + { + "role": "assistant", + "content": "[THINK]\n### 🎯 核心目标\n通知清水太郎明天上午8点开会,但系统返回的是“清水邦英”而非“清水太郎”,需确认人员身份并发送会议提醒。\n\n### 📋 现状分析\n- **关键信息**:\n - 用户查询“清水太郎”,但系统返回匹配人为“清水 邦英”(姓名相近但不完全一致)。\n - 清水邦英的传感器信号不在测位范围内,无法获取位置,但不影响消息通知。\n - 清水邦英的wowtalk_id为“u226160”,可用于发送消息。\n - 用户意图是发送“明天上午8点开会”的通知。\n- **限制条件**:\n - 名字不完全匹配(太郎 vs 邦英),可能存在输入误差,需谨慎确认。\n - 消息通知属于“需要确认”的操作(根据指南第4条),不可直接发送。\n - 仅当确认人员为同一人时,才可发送通知。\n- **可用资源**:\n - Iot Control-find_employee_location(查询人员信息)\n - Wowtalk tool-wowtalk_send_message_to_member(发送消息)\n - 可用的wowtalk_id:u226160(清水邦英)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用 Iot Control-find_employee_location,查询名称中包含“太郎”的员工,确认是否存在“清水太郎”。\n2. 若步骤1返回结果为空或无匹配项,则向用户发送确认请求:“您要通知的是‘清水邦英’(wowtalk_id: u226160)吗?系统未查到‘清水太郎’,是否确认向清水邦英发送‘明天上午8点开会’的通知?”\n3. 等待用户明确回复“确认”或“是”。\n4. 收到确认后,调用 Wowtalk tool-wowtalk_send_message_to_member,向 wowtalk_id: u226160 发送消息:“明天上午8点开会”。\n\n[ANSWER]\n您要通知的是“清水邦英”(wowtalk_id: u226160)吗?系统未查到“清水太郎”,是否确认向清水邦英发送“明天上午8点开会”的通知?" + }, + { + "role": "user", + "content": "确认" + } + ], + "input": "确认", + "output": "[THINK]\n### 🎯 核心目标\n向员工清水邦英(wowtalk_id: u226160)发送明天上午8点开会的通知。\n\n### 📋 现状分析\n- **关键信息**: \n - 用户最初询问“清水太郎”,系统返回“清水邦英”为唯一匹配人(confidence: 85%)。\n - 用户已确认“清水邦英”为通知对象,并明确指令:“通知他明天上午8点开会”。\n - 清水邦英的wowtalk_id为u226160,具备消息通知的唯一身份标识。\n- **限制条件**:\n - 根据【消息通知】规范,发送通知前必须获得用户明确确认,当前用户已确认,可直接执行。\n - 仅可使用授权工具完成操作,不得使用非指定工具。\n - 通知内容需精确传达,不得增删语义。\n- **可用资源**:\n - Wowtalk tool-wowtalk_send_message_to_member:用于向指定wowtalk_id发送消息。\n - Iot Control-find_employee_location:已使用并确认人员身份,无需重复调用。\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 Wowtalk tool-wowtalk_send_message_to_member,参数为:\n - wowtalk_id: \"u226160\"\n - message: \"明天上午8点开会\"\n\n[TOOL_CALL] Wowtalk tool-wowtalk_send_message_to_member\n{\"to_account\": \"u226160\", \"message_content\": \"明天上午8点开会\"}\n[TOOL_RESPONSE] Wowtalk tool-wowtalk_send_message_to_member\n{\"success\":false,\"error\":\"API调用失败: 个人账户不存在\",\"error_code\":108,\"error_type\":\"WowTalkAPIError\",\"response_data\":{\"result_code\":108,\"api_version\":\"1.0\",\"request_id\":\"shimztest19-1764850653920\",\"message\":\"The user does not exist.\",\"response_data\":\"\"},\"bot_id\":\"63069654-7750-409d-9a58-a0960d899a20\"}\n[ANSWER]\n通知失败:该用户在 WowTalk 系统中不存在,请确认员工信息或检查账号是否正确。" + } + ] + }, + "assert": [ + { + "type": "regex", + "value": "\\[TOOL_CALL\\].*wowtalk_send_message_to_member" + } + ], + "options": { + "provider": { + "text": { + "id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct", + "config": { + "apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26", + "apiBaseUrl": "https://one.felo.me/v1", + "apiVersion": "2024-02-01" + } + } + } + }, + "metadata": {} + }, + "testIdx": 2, + "vars": { + "question": "确认", + "_conversation": [ + { + "prompt": [ + { + "role": "user", + "content": "清水太郎在哪里" + } + ], + "input": "清水太郎在哪里", + "output": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内,无法获取实时位置。" + }, + { + "prompt": [ + { + "role": "user", + "content": "清水太郎在哪里" + }, + { + "role": "assistant", + "content": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内,无法获取实时位置。" + }, + { + "role": "user", + "content": "通知他明天上午8点开会" + } + ], + "input": "通知他明天上午8点开会", + "output": "[THINK]\n### 🎯 核心目标\n通知清水太郎明天上午8点开会,但系统返回的是“清水邦英”而非“清水太郎”,需确认人员身份并发送会议提醒。\n\n### 📋 现状分析\n- **关键信息**:\n - 用户查询“清水太郎”,但系统返回匹配人为“清水 邦英”(姓名相近但不完全一致)。\n - 清水邦英的传感器信号不在测位范围内,无法获取位置,但不影响消息通知。\n - 清水邦英的wowtalk_id为“u226160”,可用于发送消息。\n - 用户意图是发送“明天上午8点开会”的通知。\n- **限制条件**:\n - 名字不完全匹配(太郎 vs 邦英),可能存在输入误差,需谨慎确认。\n - 消息通知属于“需要确认”的操作(根据指南第4条),不可直接发送。\n - 仅当确认人员为同一人时,才可发送通知。\n- **可用资源**:\n - Iot Control-find_employee_location(查询人员信息)\n - Wowtalk tool-wowtalk_send_message_to_member(发送消息)\n - 可用的wowtalk_id:u226160(清水邦英)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用 Iot Control-find_employee_location,查询名称中包含“太郎”的员工,确认是否存在“清水太郎”。\n2. 若步骤1返回结果为空或无匹配项,则向用户发送确认请求:“您要通知的是‘清水邦英’(wowtalk_id: u226160)吗?系统未查到‘清水太郎’,是否确认向清水邦英发送‘明天上午8点开会’的通知?”\n3. 等待用户明确回复“确认”或“是”。\n4. 收到确认后,调用 Wowtalk tool-wowtalk_send_message_to_member,向 wowtalk_id: u226160 发送消息:“明天上午8点开会”。\n\n[ANSWER]\n您要通知的是“清水邦英”(wowtalk_id: u226160)吗?系统未查到“清水太郎”,是否确认向清水邦英发送“明天上午8点开会”的通知?" + }, + { + "prompt": [ + { + "role": "user", + "content": "清水太郎在哪里" + }, + { + "role": "assistant", + "content": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内,无法获取实时位置。" + }, + { + "role": "user", + "content": "通知他明天上午8点开会" + }, + { + "role": "assistant", + "content": "[THINK]\n### 🎯 核心目标\n通知清水太郎明天上午8点开会,但系统返回的是“清水邦英”而非“清水太郎”,需确认人员身份并发送会议提醒。\n\n### 📋 现状分析\n- **关键信息**:\n - 用户查询“清水太郎”,但系统返回匹配人为“清水 邦英”(姓名相近但不完全一致)。\n - 清水邦英的传感器信号不在测位范围内,无法获取位置,但不影响消息通知。\n - 清水邦英的wowtalk_id为“u226160”,可用于发送消息。\n - 用户意图是发送“明天上午8点开会”的通知。\n- **限制条件**:\n - 名字不完全匹配(太郎 vs 邦英),可能存在输入误差,需谨慎确认。\n - 消息通知属于“需要确认”的操作(根据指南第4条),不可直接发送。\n - 仅当确认人员为同一人时,才可发送通知。\n- **可用资源**:\n - Iot Control-find_employee_location(查询人员信息)\n - Wowtalk tool-wowtalk_send_message_to_member(发送消息)\n - 可用的wowtalk_id:u226160(清水邦英)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用 Iot Control-find_employee_location,查询名称中包含“太郎”的员工,确认是否存在“清水太郎”。\n2. 若步骤1返回结果为空或无匹配项,则向用户发送确认请求:“您要通知的是‘清水邦英’(wowtalk_id: u226160)吗?系统未查到‘清水太郎’,是否确认向清水邦英发送‘明天上午8点开会’的通知?”\n3. 等待用户明确回复“确认”或“是”。\n4. 收到确认后,调用 Wowtalk tool-wowtalk_send_message_to_member,向 wowtalk_id: u226160 发送消息:“明天上午8点开会”。\n\n[ANSWER]\n您要通知的是“清水邦英”(wowtalk_id: u226160)吗?系统未查到“清水太郎”,是否确认向清水邦英发送“明天上午8点开会”的通知?" + }, + { + "role": "user", + "content": "确认" + } + ], + "input": "确认", + "output": "[THINK]\n### 🎯 核心目标\n向员工清水邦英(wowtalk_id: u226160)发送明天上午8点开会的通知。\n\n### 📋 现状分析\n- **关键信息**: \n - 用户最初询问“清水太郎”,系统返回“清水邦英”为唯一匹配人(confidence: 85%)。\n - 用户已确认“清水邦英”为通知对象,并明确指令:“通知他明天上午8点开会”。\n - 清水邦英的wowtalk_id为u226160,具备消息通知的唯一身份标识。\n- **限制条件**:\n - 根据【消息通知】规范,发送通知前必须获得用户明确确认,当前用户已确认,可直接执行。\n - 仅可使用授权工具完成操作,不得使用非指定工具。\n - 通知内容需精确传达,不得增删语义。\n- **可用资源**:\n - Wowtalk tool-wowtalk_send_message_to_member:用于向指定wowtalk_id发送消息。\n - Iot Control-find_employee_location:已使用并确认人员身份,无需重复调用。\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 Wowtalk tool-wowtalk_send_message_to_member,参数为:\n - wowtalk_id: \"u226160\"\n - message: \"明天上午8点开会\"\n\n[TOOL_CALL] Wowtalk tool-wowtalk_send_message_to_member\n{\"to_account\": \"u226160\", \"message_content\": \"明天上午8点开会\"}\n[TOOL_RESPONSE] Wowtalk tool-wowtalk_send_message_to_member\n{\"success\":false,\"error\":\"API调用失败: 个人账户不存在\",\"error_code\":108,\"error_type\":\"WowTalkAPIError\",\"response_data\":{\"result_code\":108,\"api_version\":\"1.0\",\"request_id\":\"shimztest19-1764850653920\",\"message\":\"The user does not exist.\",\"response_data\":\"\"},\"bot_id\":\"63069654-7750-409d-9a58-a0960d899a20\"}\n[ANSWER]\n通知失败:该用户在 WowTalk 系统中不存在,请确认员工信息或检查账号是否正确。" + } + ] + }, + "metadata": { + "_promptfooFileMetadata": {} + }, + "failureReason": 0 + } + ], + "stats": { + "successes": 3, + "failures": 0, + "errors": 0, + "tokenUsage": { + "prompt": 596, + "completion": 2017, + "cached": 709, + "total": 3322, + "numRequests": 3, + "completionDetails": { + "reasoning": 0, + "acceptedPrediction": 0, + "rejectedPrediction": 0 + }, + "assertions": { + "total": 746, + "prompt": 697, + "completion": 49, + "cached": 0, + "numRequests": 0, + "completionDetails": { + "reasoning": 0, + "acceptedPrediction": 0, + "rejectedPrediction": 0 + } + } + } + } + }, + "config": { + "tags": {}, + "description": "Novare Test", + "prompts": [ + "file:///Users/moshui/Documents/felo/qwen-agent/promptfoo/conversation/prompt.json" + ], + "providers": [ + { + "id": "openai:chat:qwen3", + "config": { + "apiBaseUrl": "https://catalog-agent-dev.gbase.ai/api/v2", + "apiKey": "a21c99620a8ef61d69563afe05ccce89", + "passthrough": { + "bot_id": "63069654-7750-409d-9a58-a0960d899a20", + "tool_response": true, + "language": "zh" + } + } + } + ], + "tests": [ + { + "vars": { + "question": "清水太郎在哪里" + }, + "assert": [ + { + "type": "regex", + "value": "\\[TOOL_CALL\\].*find_employee_location" + } + ] + }, + { + "vars": { + "question": "通知他明天上午8点开会" + }, + "assert": [ + { + "type": "llm-rubric", + "value": "询问是否确认发送" + } + ] + }, + { + "vars": { + "question": "确认" + }, + "assert": [ + { + "type": "regex", + "value": "\\[TOOL_CALL\\].*wowtalk_send_message_to_member" + } + ] + } + ], + "scenarios": [], + "env": {}, + "sharing": true, + "defaultTest": { + "options": { + "provider": { + "text": { + "id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct", + "config": { + "apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26", + "apiBaseUrl": "https://one.felo.me/v1", + "apiVersion": "2024-02-01" + } + } + } + }, + "vars": {}, + "assert": [], + "metadata": {} + }, + "outputPath": [ + "result.json" + ], + "extensions": [], + "metadata": {} + }, + "shareableUrl": null, + "metadata": { + "promptfooVersion": "0.117.11", + "nodeVersion": "v20.10.0", + "platform": "darwin", + "arch": "x64", + "exportedAt": "2025-12-04T12:17:36.718Z", + "evaluationCreatedAt": "2025-12-04T12:17:14.502Z" + } +} \ No newline at end of file diff --git a/promptfoo/query/config.yaml b/promptfoo/query/config.yaml new file mode 100644 index 0000000..45cc0ee --- /dev/null +++ b/promptfoo/query/config.yaml @@ -0,0 +1,32 @@ +env: {} +description: Novare Test +providers: + - id: openai:chat:qwen3 + config: + apiBaseUrl: https://catalog-agent-dev.gbase.ai/api/v2 + apiKey: a21c99620a8ef61d69563afe05ccce89 + passthrough: + bot_id: 63069654-7750-409d-9a58-a0960d899a20 + tool_response: true + language: zh +prompts: + - file://prompt.json +tests: + - vars: + question: 940092の稼働状況 + assert: + - type: regex + value: \[TOOL_CALL\].*dxcore_get_device_status + +defaultTest: + options: + provider: + text: + id: openai:chat:qwen/qwen3-next-80b-a3b-instruct + config: + apiKey: sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26 + apiBaseUrl: https://one.felo.me/v1 + apiVersion: '2024-02-01' +evaluateOptions: {} +writeLatestResults: true +sharing: true diff --git a/promptfoo/query/prompt.json b/promptfoo/query/prompt.json new file mode 100644 index 0000000..06f9b09 --- /dev/null +++ b/promptfoo/query/prompt.json @@ -0,0 +1,6 @@ +[ + { + "role": "user", + "content": "{{ question }}" + } +] diff --git a/promptfoo/query/result.json b/promptfoo/query/result.json new file mode 100644 index 0000000..eae5004 --- /dev/null +++ b/promptfoo/query/result.json @@ -0,0 +1,235 @@ +{ + "evalId": "eval-rew-2025-12-04T11:49:09", + "results": { + "version": 3, + "timestamp": "2025-12-04T11:49:09.282Z", + "prompts": [ + { + "raw": "[{\"role\":\"user\",\"content\":\"{{ question }}\"}]", + "label": "prompt.json: [{\"role\":\"user\",\"content\":\"{{ question }}\"}]", + "id": "7fd9d6ab1656b5f683dd7d34fc535754cd42291c7b78f2aa5fd68b3e43dae7b6", + "provider": "openai:chat:qwen3", + "metrics": { + "score": 1, + "testPassCount": 1, + "testFailCount": 0, + "testErrorCount": 0, + "assertPassCount": 1, + "assertFailCount": 0, + "totalLatencyMs": 8731, + "tokenUsage": { + "prompt": 11, + "completion": 1174, + "cached": 0, + "total": 1185, + "numRequests": 1, + "completionDetails": { + "reasoning": 0, + "acceptedPrediction": 0, + "rejectedPrediction": 0 + }, + "assertions": { + "total": 0, + "prompt": 0, + "completion": 0, + "cached": 0, + "numRequests": 0, + "completionDetails": { + "reasoning": 0, + "acceptedPrediction": 0, + "rejectedPrediction": 0 + } + } + }, + "namedScores": {}, + "namedScoresCount": {}, + "cost": 0 + } + } + ], + "results": [ + { + "cost": 0, + "gradingResult": { + "pass": true, + "score": 1, + "reason": "All assertions passed", + "namedScores": {}, + "tokensUsed": { + "total": 0, + "prompt": 0, + "completion": 0, + "cached": 0, + "numRequests": 0 + }, + "componentResults": [ + { + "pass": true, + "score": 1, + "reason": "Assertion passed", + "assertion": { + "type": "regex", + "value": "\\[TOOL_CALL\\].*dxcore_get_device_status" + } + } + ], + "assertion": null + }, + "id": "ec8dff77-79da-499f-aee0-f40727fa844e", + "latencyMs": 8731, + "namedScores": {}, + "prompt": { + "raw": "[\n {\n \"role\": \"user\",\n \"content\": \"940092の稼働状況\"\n }\n]", + "label": "prompt.json: [{\"role\":\"user\",\"content\":\"{{ question }}\"}]" + }, + "promptId": "7fd9d6ab1656b5f683dd7d34fc535754cd42291c7b78f2aa5fd68b3e43dae7b6", + "promptIdx": 0, + "provider": { + "id": "openai:chat:qwen3", + "label": "" + }, + "response": { + "output": "[THINK]\n### 🎯 核心目标\n查询编号为“940092”的设备的当前运行状态(如温度、湿度、运行模式等),以了解其稼働状況(工作状态)。\n\n### 📋 现状分析\n- **关键信息**: \n - 用户查询的是“940092”的稼働状況,该编号极可能为设备ID(如空调或照明设备)。 \n - 当前时间为2025年12月4日19:49,属于晚间时段,设备可能处于运行中。 \n - 用户未明确要求控制设备,仅要求查询状态,属于信息查询类请求。\n\n- **限制条件**: \n - 仅允许查询设备状态,不能擅自执行控制操作(如开关、调温等)。 \n - 必须使用指定工具,且需优先使用精准匹配的设备ID查询工具。 \n - 所有输出语言必须为中文。\n\n- **可用资源**: \n - **Iot Control-dxcore_get_device_status**:可通过设备ID直接查询设备当前状态(温度、湿度、运行模式、风量等)。 \n - **Iot Control-find_devices_by_room**:仅在设备ID无效或模糊时作为备选,但当前ID明确,无需使用。 \n - 无需调用人员、消息、天气、知识库或搜索工具,因查询目标明确为设备状态。\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 Iot Control-dxcore_get_device_status,传入参数 device_id: \"940092\",查询该设备的当前运行状态。\n\n[TOOL_CALL] Iot Control-dxcore_get_device_status\n{\"sensor_id\": 940092}\n[TOOL_RESPONSE] Iot Control-dxcore_get_device_status\n{\"success\":true,\"data\":[{\"Device.Info.Id\":940092,\"Device.Info.Type\":\"light\",\"Device.Info.Description\":\"A棟 2F コワーキングスペース①(丸)-01 照明\",\"Device.Info.Manufacturer\":\"Signify照明\",\"Device.Info.SerialNumber\":\"none\",\"Device.Info.Coordinates\":[0,0,0],\"Device.Info.FieldId\":\"\",\"Device.Status.Lighting\":3}],\"site_name\":\"shiomi\",\"device_type\":\"light\",\"sensor_id\":940092}\n[ANSWER]\nA栋2F协同空间①(丸)-01的照明设备运行正常,当前亮度为3级。", + "tokenUsage": { + "total": 1185, + "prompt": 11, + "completion": 1174 + }, + "cached": false, + "finishReason": "stop" + }, + "score": 1, + "success": true, + "testCase": { + "vars": { + "question": "940092の稼働状況" + }, + "assert": [ + { + "type": "regex", + "value": "\\[TOOL_CALL\\].*dxcore_get_device_status" + } + ], + "options": { + "provider": { + "text": { + "id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct", + "config": { + "apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26", + "apiBaseUrl": "https://one.felo.me/v1", + "apiVersion": "2024-02-01" + } + } + } + }, + "metadata": {} + }, + "testIdx": 0, + "vars": { + "question": "940092の稼働状況" + }, + "metadata": { + "_promptfooFileMetadata": {} + }, + "failureReason": 0 + } + ], + "stats": { + "successes": 1, + "failures": 0, + "errors": 0, + "tokenUsage": { + "prompt": 11, + "completion": 1174, + "cached": 0, + "total": 1185, + "numRequests": 1, + "completionDetails": { + "reasoning": 0, + "acceptedPrediction": 0, + "rejectedPrediction": 0 + }, + "assertions": { + "total": 0, + "prompt": 0, + "completion": 0, + "cached": 0, + "numRequests": 0, + "completionDetails": { + "reasoning": 0, + "acceptedPrediction": 0, + "rejectedPrediction": 0 + } + } + } + } + }, + "config": { + "tags": {}, + "description": "Novare Test", + "prompts": [ + "file:///Users/moshui/Documents/felo/qwen-agent/promptfoo/query/prompt.json" + ], + "providers": [ + { + "id": "openai:chat:qwen3", + "config": { + "apiBaseUrl": "https://catalog-agent-dev.gbase.ai/api/v2", + "apiKey": "a21c99620a8ef61d69563afe05ccce89", + "passthrough": { + "bot_id": "63069654-7750-409d-9a58-a0960d899a20", + "tool_response": true, + "language": "zh" + } + } + } + ], + "tests": [ + { + "vars": { + "question": "940092の稼働状況" + }, + "assert": [ + { + "type": "regex", + "value": "\\[TOOL_CALL\\].*dxcore_get_device_status" + } + ] + } + ], + "scenarios": [], + "env": {}, + "sharing": true, + "defaultTest": { + "options": { + "provider": { + "text": { + "id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct", + "config": { + "apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26", + "apiBaseUrl": "https://one.felo.me/v1", + "apiVersion": "2024-02-01" + } + } + } + }, + "vars": {}, + "assert": [], + "metadata": {} + }, + "outputPath": [ + "result.json" + ], + "extensions": [], + "metadata": {} + }, + "shareableUrl": null, + "metadata": { + "promptfooVersion": "0.117.11", + "nodeVersion": "v20.10.0", + "platform": "darwin", + "arch": "x64", + "exportedAt": "2025-12-04T11:49:18.166Z", + "evaluationCreatedAt": "2025-12-04T11:49:09.282Z" + } +} \ No newline at end of file diff --git a/promptfoo/result.json b/promptfoo/result.json new file mode 100644 index 0000000..cfb9b38 --- /dev/null +++ b/promptfoo/result.json @@ -0,0 +1,369 @@ +{ + "evalId": "eval-0Pa-2025-12-04T11:23:59", + "results": { + "version": 3, + "timestamp": "2025-12-04T11:23:59.637Z", + "prompts": [ + { + "raw": "[\n{% for message in messages %}\n {% set outer_loop = loop %}\n {% for role, content in message %}\n {\n \"role\": \"{{ role }}\",\n \"content\": \"{{ content }}\"\n }{% if not (loop.last and outer_loop.last) %},{% endif %}\n {% endfor %}\n{% endfor %}\n]\n", + "label": "prompt.json: [\n{% for message in messages %}\n {% set outer_loop = loop %}\n {% for role, content in message %}\n {\n \"role\": \"{{ role }}\",\n \"content\": \"{{ content }}\"\n }{% if not (loop.last and outer_loop.last) %},{% endif %}\n {% endfor %}\n{% endfor %}\n]\n", + "id": "31ba1cc91b4ca656f8bf91b71d90fd1c786f46919fd3e498accd274537581342", + "provider": "openai:chat:qwen3", + "metrics": { + "score": 2, + "testPassCount": 2, + "testFailCount": 0, + "testErrorCount": 0, + "assertPassCount": 2, + "assertFailCount": 0, + "totalLatencyMs": 62, + "tokenUsage": { + "prompt": 0, + "completion": 0, + "cached": 1877, + "total": 1877, + "numRequests": 2, + "completionDetails": { + "reasoning": 0, + "acceptedPrediction": 0, + "rejectedPrediction": 0 + }, + "assertions": { + "total": 808, + "prompt": 0, + "completion": 0, + "cached": 808, + "numRequests": 0, + "completionDetails": { + "reasoning": 0, + "acceptedPrediction": 0, + "rejectedPrediction": 0 + } + } + }, + "namedScores": {}, + "namedScoresCount": {}, + "cost": 0 + } + } + ], + "results": [ + { + "cost": 0, + "gradingResult": { + "pass": true, + "score": 1, + "reason": "All assertions passed", + "namedScores": {}, + "tokensUsed": { + "total": 808, + "prompt": 0, + "completion": 0, + "cached": 808, + "numRequests": 0 + }, + "componentResults": [ + { + "assertion": { + "type": "llm-rubric", + "value": "询问是否确认发送" + }, + "pass": true, + "score": 1, + "reason": "输出中明确提出了确认请求:'即将发送“明天上午8点开会”的消息给清水邦英,是否确认?',符合要求在执行消息通知前进行确认的流程", + "tokensUsed": { + "total": 808, + "prompt": 0, + "completion": 0, + "cached": 808, + "numRequests": 0, + "completionDetails": { + "reasoning": 0, + "acceptedPrediction": 0, + "rejectedPrediction": 0 + } + } + } + ], + "assertion": null + }, + "id": "867656f1-edcd-41b9-b645-c9f5df2e2cc1", + "latencyMs": 46, + "namedScores": {}, + "prompt": { + "raw": "[\n\n \n \n {\n \"role\": \"user\",\n \"content\": \"通知清水太郎明天上午8点开会\"\n }\n \n\n]\n", + "label": "prompt.json: [\n{% for message in messages %}\n {% set outer_loop = loop %}\n {% for role, content in message %}\n {\n \"role\": \"{{ role }}\",\n \"content\": \"{{ content }}\"\n }{% if not (loop.last and outer_loop.last) %},{% endif %}\n {% endfor %}\n{% endfor %}\n]\n" + }, + "promptId": "31ba1cc91b4ca656f8bf91b71d90fd1c786f46919fd3e498accd274537581342", + "promptIdx": 0, + "provider": { + "id": "openai:chat:qwen3", + "label": "" + }, + "response": { + "output": "[THINK]\n### 🎯 核心目标\n通知清水太郎明天上午8点开会。\n\n### 📋 现状分析\n- **关键信息**: \n - 用户指令为“通知清水太郎明天上午8点开会” \n - 目标对象为“清水太郎”(需确认其是否存在及联系方式) \n - 消息内容为会议提醒,时间为“明天上午8点”(2025年12月5日 08:00) \n - 操作属于“消息通知”,需经过确认流程\n\n- **限制条件**: \n - 根据指南第4条,消息通知类操作必须进行确认,不可直接执行 \n - 必须通过【人员检索】确认清水太郎的身份与WowTalk账号绑定状态 \n - 消息内容需准确无误地传达时间与事项 \n - 当前时间为2025年12月4日19:01,明天为12月5日,时间合理\n\n- **可用资源**: \n - Iot Control-find_employee_location(查找人员位置/信息) \n - Wowtalk tool-wowtalk_send_message_to_member(发送消息通知) \n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用 `Iot Control-find_employee_location` 查询“清水太郎”的员工信息,获取其唯一sensor_id或WowTalk账号\n2. 若查询到唯一匹配结果,生成确认提示:“即将发送‘明天上午8点开会’的消息给清水太郎,是否确认?”\n3. 等待用户回复确认(如“确认”、“好的”等)\n4. 收到确认后,调用 `Wowtalk tool-wowtalk_send_message_to_member` 向清水太郎发送消息:“明天上午8点开会”\n5. 若未查询到清水太郎,请回复:“未找到清水太郎的员工信息,请确认姓名是否正确或是否为系统内注册人员”\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n即将发送“明天上午8点开会”的消息给清水邦英,是否确认?", + "tokenUsage": { + "cached": 1168, + "total": 1168 + }, + "cached": true, + "finishReason": "stop" + }, + "score": 1, + "success": true, + "testCase": { + "description": "通知清水太郎明天上午8点开会", + "vars": { + "messages": [ + { + "user": "通知清水太郎明天上午8点开会" + } + ] + }, + "assert": [ + { + "type": "llm-rubric", + "value": "询问是否确认发送" + } + ], + "options": { + "provider": { + "text": { + "id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct", + "config": { + "apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26", + "apiBaseUrl": "https://one.felo.me/v1", + "apiVersion": "2024-02-01" + } + } + } + }, + "metadata": {} + }, + "testIdx": 0, + "vars": { + "messages": [ + { + "user": "通知清水太郎明天上午8点开会" + } + ] + }, + "metadata": { + "_promptfooFileMetadata": {} + }, + "failureReason": 0 + }, + { + "cost": 0, + "gradingResult": { + "pass": true, + "score": 1, + "reason": "All assertions passed", + "namedScores": {}, + "tokensUsed": { + "total": 0, + "prompt": 0, + "completion": 0, + "cached": 0, + "numRequests": 0 + }, + "componentResults": [ + { + "pass": true, + "score": 1, + "reason": "Assertion passed", + "assertion": { + "type": "regex", + "value": "\\[TOOL_CALL\\].*find_employee_location" + } + } + ], + "assertion": null + }, + "id": "a1bb2558-6a43-42e1-969d-332d8a4ca397", + "latencyMs": 16, + "namedScores": {}, + "prompt": { + "raw": "[\n\n \n \n {\n \"role\": \"user\",\n \"content\": \"清水太郎在哪里\"\n }\n \n\n]\n", + "label": "prompt.json: [\n{% for message in messages %}\n {% set outer_loop = loop %}\n {% for role, content in message %}\n {\n \"role\": \"{{ role }}\",\n \"content\": \"{{ content }}\"\n }{% if not (loop.last and outer_loop.last) %},{% endif %}\n {% endfor %}\n{% endfor %}\n]\n" + }, + "promptId": "31ba1cc91b4ca656f8bf91b71d90fd1c786f46919fd3e498accd274537581342", + "promptIdx": 0, + "provider": { + "id": "openai:chat:qwen3", + "label": "" + }, + "response": { + "output": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”,明确指向查找某个人的当前位置。\n- **限制条件**: \n - 必须使用系统提供的工具进行人员检索,不能臆测或编造信息。\n - 所有查询类操作(如人员位置)无需用户额外确认,可直接执行。\n- **可用资源**: \n - 工具:`Iot Control-find_employee_location`(人员检索工具)\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`,传入参数 `name=\"清水太郎\"`,查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水 邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内,无法获取实时位置。", + "tokenUsage": { + "cached": 709, + "total": 709 + }, + "cached": true, + "finishReason": "stop" + }, + "score": 1, + "success": true, + "testCase": { + "description": "清水太郎在哪里", + "vars": { + "messages": [ + { + "user": "清水太郎在哪里" + } + ] + }, + "assert": [ + { + "type": "regex", + "value": "\\[TOOL_CALL\\].*find_employee_location" + } + ], + "options": { + "provider": { + "text": { + "id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct", + "config": { + "apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26", + "apiBaseUrl": "https://one.felo.me/v1", + "apiVersion": "2024-02-01" + } + } + } + }, + "metadata": {} + }, + "testIdx": 1, + "vars": { + "messages": [ + { + "user": "清水太郎在哪里" + } + ] + }, + "metadata": { + "_promptfooFileMetadata": {} + }, + "failureReason": 0 + } + ], + "stats": { + "successes": 2, + "failures": 0, + "errors": 0, + "tokenUsage": { + "prompt": 0, + "completion": 0, + "cached": 1877, + "total": 1877, + "numRequests": 2, + "completionDetails": { + "reasoning": 0, + "acceptedPrediction": 0, + "rejectedPrediction": 0 + }, + "assertions": { + "total": 808, + "prompt": 0, + "completion": 0, + "cached": 808, + "numRequests": 0, + "completionDetails": { + "reasoning": 0, + "acceptedPrediction": 0, + "rejectedPrediction": 0 + } + } + } + } + }, + "config": { + "tags": {}, + "description": "Novare Test", + "prompts": [ + "file:///Users/moshui/Documents/felo/qwen-agent/promptfoo/prompt.json" + ], + "providers": [ + { + "id": "openai:chat:qwen3", + "config": { + "apiBaseUrl": "https://catalog-agent-dev.gbase.ai/api/v2", + "apiKey": "a21c99620a8ef61d69563afe05ccce89", + "passthrough": { + "bot_id": "63069654-7750-409d-9a58-a0960d899a20", + "tool_response": true, + "language": "zh" + } + } + } + ], + "tests": [ + { + "description": "通知清水太郎明天上午8点开会", + "vars": { + "messages": [ + { + "user": "通知清水太郎明天上午8点开会" + } + ] + }, + "assert": [ + { + "type": "llm-rubric", + "value": "询问是否确认发送" + } + ] + }, + { + "description": "清水太郎在哪里", + "vars": { + "messages": [ + { + "user": "清水太郎在哪里" + } + ] + }, + "assert": [ + { + "type": "regex", + "value": "\\[TOOL_CALL\\].*find_employee_location" + } + ] + } + ], + "scenarios": [], + "env": {}, + "sharing": true, + "defaultTest": { + "options": { + "provider": { + "text": { + "id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct", + "config": { + "apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26", + "apiBaseUrl": "https://one.felo.me/v1", + "apiVersion": "2024-02-01" + } + } + } + }, + "vars": {}, + "assert": [], + "metadata": {} + }, + "outputPath": [ + "result.json" + ], + "extensions": [], + "metadata": {} + }, + "shareableUrl": null, + "metadata": { + "promptfooVersion": "0.117.11", + "nodeVersion": "v20.10.0", + "platform": "darwin", + "arch": "x64", + "exportedAt": "2025-12-04T11:24:00.162Z", + "evaluationCreatedAt": "2025-12-04T11:23:59.637Z" + } +} \ No newline at end of file