Merge branch 'master' into onprem

2025-12-05 13:16:04 +08:00 · 2025-12-05 13:16:04 +08:00 · 7fbebef764
commit 7fbebef764
parent d3a4a84713 e1dbcad755
11 changed files with 2027 additions and 686 deletions
--- a/promptfoo/conversation/config.yaml
+++ b/promptfoo/conversation/config.yaml
@ -13,23 +13,7 @@ prompts:
  - file://prompt.json
 nunjucksFilters:
  encode: ./encode.js
-tests:
+tests: file://tests.yaml
  - vars:
      question: 清水太郎在哪里
    assert:
      - type: regex
        value: \[TOOL_CALL\].*find_employee_location
  - vars:
      question: 通知他明天上午8点开会
    assert:
      - type: llm-rubric
        value: 询问是否确认发送
  - vars:
      question: 确认
    assert:
      - type: regex
        value: \[TOOL_CALL\].*wowtalk_send_message_to_member
 defaultTest:
  options:
    provider:
--- a/promptfoo/conversation/result.json
+++ b/promptfoo/conversation/result.json
--- a/promptfoo/conversation/tests.csv
+++ b/promptfoo/conversation/tests.csv
@ -0,0 +1,6 @@
 question,regex,llm-rubric,,,,,,,,,,,,,,,,,
 田中花子在哪里,\[TOOL_CALL\].*find_employee_location,,,,,,,,,,,,,,,,,,
 通知她明天上午8点开会,,询问是否确认发送,,,,,,,,,,,,,,,,,
 确认,\[TOOL_CALL\].*wowtalk_send_message_to_member,,,,,,,,,,,,,,,,,,
 把DefineRoom 4的灯光状态发给田中花子,,调用find_devices_by_room和dxcore_get_device_status获取灯光状态，并询问是否确认发送。,,,,,,,,,,,,,,,,,
 确认,\[TOOL_CALL\].*wowtalk_send_message_to_member,,,,,,,,,,,,,,,,,,
--- a/promptfoo/conversation/tests.yaml
+++ b/promptfoo/conversation/tests.yaml
@ -0,0 +1,25 @@
 - vars:
    question: 田中花子在哪里
  assert:
  - type: regex
    value: \[TOOL_CALL\].*find_employee_location
 - vars:
    question: 通知她明天上午8点开会
  assert:
  - type: llm-rubric
    value: 询问是否确认发送
 - vars:
    question: 确认
  assert:
  - type: regex
    value: \[TOOL_CALL\].*wowtalk_send_message_to_member
 - vars:
    question: 把DefineRoom 4的灯光状态发给田中花子
  assert:
  - type: llm-rubric
    value: 调用find_devices_by_room和dxcore_get_device_status获取灯光状态，并询问是否确认发送。
 - vars:
    question: 确认
  assert:
  - type: regex
    value: \[TOOL_CALL\].*wowtalk_send_message_to_member
--- a/promptfoo/csv_to_yaml.py
+++ b/promptfoo/csv_to_yaml.py
@ -0,0 +1,45 @@
 #!/usr/bin/env python3
 import csv
 import yaml
 import sys
 def csv_to_yaml(csv_file, yaml_file):
    """Convert CSV file to YAML format for promptfoo tests"""
    tests = []
    with open(csv_file, 'r', encoding='utf-8-sig') as f:
        reader = csv.DictReader(f)
        for row in reader:
            if row['question']:
                test_case = {
                    'vars': {
                        'question': row['question'].strip()
                    },
                    'assert':[]
                }
                if row['regex'] and row['regex'].strip():
                    test_case['assert'].append({
                        'type': 'regex',
                        'value': row['regex'].strip()
                    })
                # Add llm-rubric if present
                if row['llm-rubric'] and row['llm-rubric'].strip():
                    test_case['assert'].append({
                        'type': 'llm-rubric',
                        'value': row['llm-rubric'].strip()
                    })
                tests.append(test_case)
    with open(yaml_file, 'w', encoding='utf-8') as f:
        yaml.dump(tests, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
    print(f"Converted {len(tests)} test cases from {csv_file} to {yaml_file}")
 if __name__ == '__main__':
    csv_to_yaml("conversation/tests.csv", "conversation/tests.yaml")
    csv_to_yaml("query/tests.csv", "query/tests.yaml")
--- a/promptfoo/query/config.yaml
+++ b/promptfoo/query/config.yaml
@ -11,13 +11,7 @@ providers:
        language: zh
 prompts:
  - file://prompt.json
-tests:
+tests: file://tests.yaml
  - vars:
      question: 940092の稼働状況
    assert:
      - type: regex
        value: \[TOOL_CALL\].*dxcore_get_device_status
 defaultTest:
  options:
    provider:
--- a/promptfoo/query/result.json
+++ b/promptfoo/query/result.json
@ -1,8 +1,8 @@
 {
-  "evalId": "eval-rew-2025-12-04T11:49:09",
+  "evalId": "eval-kAj-2025-12-04T14:31:43",
  "results": {
    "version": 3,
-    "timestamp": "2025-12-04T11:49:09.282Z",
+    "timestamp": "2025-12-04T14:31:43.709Z",
    "prompts": [
      {
        "raw": "[{\"role\":\"user\",\"content\":\"{{ question }}\"}]",
@ -10,19 +10,19 @@
        "id": "7fd9d6ab1656b5f683dd7d34fc535754cd42291c7b78f2aa5fd68b3e43dae7b6",
        "provider": "openai:chat:qwen3",
        "metrics": {
-          "score": 1,
+          "score": 0,
-          "testPassCount": 1,
+          "testPassCount": 0,
          "testFailCount": 0,
          "testErrorCount": 0,
-          "assertPassCount": 1,
+          "assertPassCount": 0,
          "assertFailCount": 0,
-          "totalLatencyMs": 8731,
+          "totalLatencyMs": 0,
          "tokenUsage": {
-            "prompt": 11,
+            "prompt": 0,
-            "completion": 1174,
+            "completion": 0,
            "cached": 0,
-            "total": 1185,
+            "total": 0,
-            "numRequests": 1,
+            "numRequests": 0,
            "completionDetails": {
              "reasoning": 0,
              "acceptedPrediction": 0,
@ -47,103 +47,17 @@
        }
      }
    ],
-    "results": [
+    "results": [],
      {
        "cost": 0,
        "gradingResult": {
          "pass": true,
          "score": 1,
          "reason": "All assertions passed",
          "namedScores": {},
          "tokensUsed": {
            "total": 0,
            "prompt": 0,
            "completion": 0,
            "cached": 0,
            "numRequests": 0
          },
          "componentResults": [
            {
              "pass": true,
              "score": 1,
              "reason": "Assertion passed",
              "assertion": {
                "type": "regex",
                "value": "\\[TOOL_CALL\\].*dxcore_get_device_status"
              }
            }
          ],
          "assertion": null
        },
        "id": "ec8dff77-79da-499f-aee0-f40727fa844e",
        "latencyMs": 8731,
        "namedScores": {},
        "prompt": {
          "raw": "[\n  {\n    \"role\": \"user\",\n    \"content\": \"940092の稼働状況\"\n  }\n]",
          "label": "prompt.json: [{\"role\":\"user\",\"content\":\"{{ question }}\"}]"
        },
        "promptId": "7fd9d6ab1656b5f683dd7d34fc535754cd42291c7b78f2aa5fd68b3e43dae7b6",
        "promptIdx": 0,
        "provider": {
          "id": "openai:chat:qwen3",
          "label": ""
        },
        "response": {
          "output": "[THINK]\n### 🎯 核心目标\n查询编号为“940092”的设备的当前运行状态（如温度、湿度、运行模式等），以了解其稼働状況（工作状态）。\n\n### 📋 现状分析\n- **关键信息**:  \n  - 用户查询的是“940092”的稼働状況，该编号极可能为设备ID（如空调或照明设备）。  \n  - 当前时间为2025年12月4日19:49，属于晚间时段，设备可能处于运行中。  \n  - 用户未明确要求控制设备，仅要求查询状态，属于信息查询类请求。\n\n- **限制条件**:  \n  - 仅允许查询设备状态，不能擅自执行控制操作（如开关、调温等）。  \n  - 必须使用指定工具，且需优先使用精准匹配的设备ID查询工具。  \n  - 所有输出语言必须为中文。\n\n- **可用资源**:  \n  - **Iot Control-dxcore_get_device_status**：可通过设备ID直接查询设备当前状态（温度、湿度、运行模式、风量等）。  \n  - **Iot Control-find_devices_by_room**：仅在设备ID无效或模糊时作为备选，但当前ID明确，无需使用。  \n  - 无需调用人员、消息、天气、知识库或搜索工具，因查询目标明确为设备状态。\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 Iot Control-dxcore_get_device_status，传入参数 device_id: \"940092\"，查询该设备的当前运行状态。\n\n[TOOL_CALL] Iot Control-dxcore_get_device_status\n{\"sensor_id\": 940092}\n[TOOL_RESPONSE] Iot Control-dxcore_get_device_status\n{\"success\":true,\"data\":[{\"Device.Info.Id\":940092,\"Device.Info.Type\":\"light\",\"Device.Info.Description\":\"A棟 2F ｺﾜｰｷﾝｸﾞｽﾍﾟｰｽ①(丸)-01 照明\",\"Device.Info.Manufacturer\":\"Signify照明\",\"Device.Info.SerialNumber\":\"none\",\"Device.Info.Coordinates\":[0,0,0],\"Device.Info.FieldId\":\"\",\"Device.Status.Lighting\":3}],\"site_name\":\"shiomi\",\"device_type\":\"light\",\"sensor_id\":940092}\n[ANSWER]\nA栋2F协同空间①(丸)-01的照明设备运行正常，当前亮度为3级。",
          "tokenUsage": {
            "total": 1185,
            "prompt": 11,
            "completion": 1174
          },
          "cached": false,
          "finishReason": "stop"
        },
        "score": 1,
        "success": true,
        "testCase": {
          "vars": {
            "question": "940092の稼働状況"
          },
          "assert": [
            {
              "type": "regex",
              "value": "\\[TOOL_CALL\\].*dxcore_get_device_status"
            }
          ],
          "options": {
            "provider": {
              "text": {
                "id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
                "config": {
                  "apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
                  "apiBaseUrl": "https://one.felo.me/v1",
                  "apiVersion": "2024-02-01"
                }
              }
            }
          },
          "metadata": {}
        },
        "testIdx": 0,
        "vars": {
          "question": "940092の稼働状況"
        },
        "metadata": {
          "_promptfooFileMetadata": {}
        },
        "failureReason": 0
      }
    ],
    "stats": {
-      "successes": 1,
+      "successes": 0,
      "failures": 0,
      "errors": 0,
      "tokenUsage": {
-        "prompt": 11,
+        "prompt": 0,
-        "completion": 1174,
+        "completion": 0,
        "cached": 0,
-        "total": 1185,
+        "total": 0,
-        "numRequests": 1,
+        "numRequests": 0,
        "completionDetails": {
          "reasoning": 0,
          "acceptedPrediction": 0,
@ -184,19 +98,7 @@
        }
      }
    ],
-    "tests": [
+    "tests": [],
      {
        "vars": {
          "question": "940092の稼働状況"
        },
        "assert": [
          {
            "type": "regex",
            "value": "\\[TOOL_CALL\\].*dxcore_get_device_status"
          }
        ]
      }
    ],
    "scenarios": [],
    "env": {},
    "sharing": true,
@ -229,7 +131,7 @@
    "nodeVersion": "v20.10.0",
    "platform": "darwin",
    "arch": "x64",
-    "exportedAt": "2025-12-04T11:49:18.166Z",
+    "exportedAt": "2025-12-04T14:31:43.766Z",
-    "evaluationCreatedAt": "2025-12-04T11:49:09.282Z"
+    "evaluationCreatedAt": "2025-12-04T14:31:43.709Z"
  }
 }
--- a/promptfoo/query/tests.csv
+++ b/promptfoo/query/tests.csv
@ -0,0 +1,8 @@
 question,regex,llm-rubric,,,,,,,,,,,,,,,,,
 940092の稼働状況,\[TOOL_CALL\].*dxcore_get_device_status,,,,,,,,,,,,,,,,,,
 关闭设备900142的灯光,\[TOOL_CALL\].*dxcore_update_device_status,,,,,,,,,,,,,,,,,,
 Define Room1 的灯光状态,\[TOOL_CALL\].*find_devices_by_room,,,,,,,,,,,,,,,,,,
 卫生间在哪里,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,,
 我丢了物品怎么办,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,,
 咖啡多少钱一杯,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,,
 东京明天的天气,\[TOOL_CALL\].*weather_get_by_location,,,,,,,,,,,,,,,,,,
--- a/promptfoo/query/tests.yaml
+++ b/promptfoo/query/tests.yaml
@ -0,0 +1,35 @@
 - vars:
    question: 940092の稼働状況
  assert:
  - type: regex
    value: \[TOOL_CALL\].*dxcore_get_device_status
 - vars:
    question: 关闭设备900142的灯光
  assert:
  - type: regex
    value: \[TOOL_CALL\].*dxcore_update_device_status
 - vars:
    question: Define Room1 的灯光状态
  assert:
  - type: regex
    value: \[TOOL_CALL\].*find_devices_by_room
 - vars:
    question: 卫生间在哪里
  assert:
  - type: regex
    value: \[TOOL_CALL\].*rag_retrieve
 - vars:
    question: 我丢了物品怎么办
  assert:
  - type: regex
    value: \[TOOL_CALL\].*rag_retrieve
 - vars:
    question: 咖啡多少钱一杯
  assert:
  - type: regex
    value: \[TOOL_CALL\].*rag_retrieve
 - vars:
    question: 东京明天的天气
  assert:
  - type: regex
    value: \[TOOL_CALL\].*weather_get_by_location
--- a/promptfoo/result.json
+++ b/promptfoo/result.json
@ -1,369 +0,0 @@
 {
  "evalId": "eval-0Pa-2025-12-04T11:23:59",
  "results": {
    "version": 3,
    "timestamp": "2025-12-04T11:23:59.637Z",
    "prompts": [
      {
        "raw": "[\n{% for message in messages %}\n  {% set outer_loop = loop %}\n  {% for role, content in message %}\n  {\n    \"role\": \"{{ role }}\",\n    \"content\": \"{{ content }}\"\n  }{% if not (loop.last and outer_loop.last) %},{% endif %}\n  {% endfor %}\n{% endfor %}\n]\n",
        "label": "prompt.json: [\n{% for message in messages %}\n  {% set outer_loop = loop %}\n  {% for role, content in message %}\n  {\n    \"role\": \"{{ role }}\",\n    \"content\": \"{{ content }}\"\n  }{% if not (loop.last and outer_loop.last) %},{% endif %}\n  {% endfor %}\n{% endfor %}\n]\n",
        "id": "31ba1cc91b4ca656f8bf91b71d90fd1c786f46919fd3e498accd274537581342",
        "provider": "openai:chat:qwen3",
        "metrics": {
          "score": 2,
          "testPassCount": 2,
          "testFailCount": 0,
          "testErrorCount": 0,
          "assertPassCount": 2,
          "assertFailCount": 0,
          "totalLatencyMs": 62,
          "tokenUsage": {
            "prompt": 0,
            "completion": 0,
            "cached": 1877,
            "total": 1877,
            "numRequests": 2,
            "completionDetails": {
              "reasoning": 0,
              "acceptedPrediction": 0,
              "rejectedPrediction": 0
            },
            "assertions": {
              "total": 808,
              "prompt": 0,
              "completion": 0,
              "cached": 808,
              "numRequests": 0,
              "completionDetails": {
                "reasoning": 0,
                "acceptedPrediction": 0,
                "rejectedPrediction": 0
              }
            }
          },
          "namedScores": {},
          "namedScoresCount": {},
          "cost": 0
        }
      }
    ],
    "results": [
      {
        "cost": 0,
        "gradingResult": {
          "pass": true,
          "score": 1,
          "reason": "All assertions passed",
          "namedScores": {},
          "tokensUsed": {
            "total": 808,
            "prompt": 0,
            "completion": 0,
            "cached": 808,
            "numRequests": 0
          },
          "componentResults": [
            {
              "assertion": {
                "type": "llm-rubric",
                "value": "询问是否确认发送"
              },
              "pass": true,
              "score": 1,
              "reason": "输出中明确提出了确认请求：'即将发送“明天上午8点开会”的消息给清水邦英，是否确认？'，符合要求在执行消息通知前进行确认的流程",
              "tokensUsed": {
                "total": 808,
                "prompt": 0,
                "completion": 0,
                "cached": 808,
                "numRequests": 0,
                "completionDetails": {
                  "reasoning": 0,
                  "acceptedPrediction": 0,
                  "rejectedPrediction": 0
                }
              }
            }
          ],
          "assertion": null
        },
        "id": "867656f1-edcd-41b9-b645-c9f5df2e2cc1",
        "latencyMs": 46,
        "namedScores": {},
        "prompt": {
          "raw": "[\n\n  \n  \n  {\n    \"role\": \"user\",\n    \"content\": \"通知清水太郎明天上午8点开会\"\n  }\n  \n\n]\n",
          "label": "prompt.json: [\n{% for message in messages %}\n  {% set outer_loop = loop %}\n  {% for role, content in message %}\n  {\n    \"role\": \"{{ role }}\",\n    \"content\": \"{{ content }}\"\n  }{% if not (loop.last and outer_loop.last) %},{% endif %}\n  {% endfor %}\n{% endfor %}\n]\n"
        },
        "promptId": "31ba1cc91b4ca656f8bf91b71d90fd1c786f46919fd3e498accd274537581342",
        "promptIdx": 0,
        "provider": {
          "id": "openai:chat:qwen3",
          "label": ""
        },
        "response": {
          "output": "[THINK]\n### 🎯 核心目标\n通知清水太郎明天上午8点开会。\n\n### 📋 现状分析\n- **关键信息**:  \n  - 用户指令为“通知清水太郎明天上午8点开会”  \n  - 目标对象为“清水太郎”（需确认其是否存在及联系方式）  \n  - 消息内容为会议提醒，时间为“明天上午8点”（2025年12月5日 08:00）  \n  - 操作属于“消息通知”，需经过确认流程\n\n- **限制条件**:  \n  - 根据指南第4条，消息通知类操作必须进行确认，不可直接执行  \n  - 必须通过【人员检索】确认清水太郎的身份与WowTalk账号绑定状态  \n  - 消息内容需准确无误地传达时间与事项  \n  - 当前时间为2025年12月4日19:01，明天为12月5日，时间合理\n\n- **可用资源**:  \n  - Iot Control-find_employee_location（查找人员位置/信息）  \n  - Wowtalk tool-wowtalk_send_message_to_member（发送消息通知）  \n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用 `Iot Control-find_employee_location` 查询“清水太郎”的员工信息，获取其唯一sensor_id或WowTalk账号\n2. 若查询到唯一匹配结果，生成确认提示：“即将发送‘明天上午8点开会’的消息给清水太郎，是否确认？”\n3. 等待用户回复确认（如“确认”、“好的”等）\n4. 收到确认后，调用 `Wowtalk tool-wowtalk_send_message_to_member` 向清水太郎发送消息：“明天上午8点开会”\n5. 若未查询到清水太郎，请回复：“未找到清水太郎的员工信息，请确认姓名是否正确或是否为系统内注册人员”\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水　邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n即将发送“明天上午8点开会”的消息给清水邦英，是否确认？",
          "tokenUsage": {
            "cached": 1168,
            "total": 1168
          },
          "cached": true,
          "finishReason": "stop"
        },
        "score": 1,
        "success": true,
        "testCase": {
          "description": "通知清水太郎明天上午8点开会",
          "vars": {
            "messages": [
              {
                "user": "通知清水太郎明天上午8点开会"
              }
            ]
          },
          "assert": [
            {
              "type": "llm-rubric",
              "value": "询问是否确认发送"
            }
          ],
          "options": {
            "provider": {
              "text": {
                "id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
                "config": {
                  "apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
                  "apiBaseUrl": "https://one.felo.me/v1",
                  "apiVersion": "2024-02-01"
                }
              }
            }
          },
          "metadata": {}
        },
        "testIdx": 0,
        "vars": {
          "messages": [
            {
              "user": "通知清水太郎明天上午8点开会"
            }
          ]
        },
        "metadata": {
          "_promptfooFileMetadata": {}
        },
        "failureReason": 0
      },
      {
        "cost": 0,
        "gradingResult": {
          "pass": true,
          "score": 1,
          "reason": "All assertions passed",
          "namedScores": {},
          "tokensUsed": {
            "total": 0,
            "prompt": 0,
            "completion": 0,
            "cached": 0,
            "numRequests": 0
          },
          "componentResults": [
            {
              "pass": true,
              "score": 1,
              "reason": "Assertion passed",
              "assertion": {
                "type": "regex",
                "value": "\\[TOOL_CALL\\].*find_employee_location"
              }
            }
          ],
          "assertion": null
        },
        "id": "a1bb2558-6a43-42e1-969d-332d8a4ca397",
        "latencyMs": 16,
        "namedScores": {},
        "prompt": {
          "raw": "[\n\n  \n  \n  {\n    \"role\": \"user\",\n    \"content\": \"清水太郎在哪里\"\n  }\n  \n\n]\n",
          "label": "prompt.json: [\n{% for message in messages %}\n  {% set outer_loop = loop %}\n  {% for role, content in message %}\n  {\n    \"role\": \"{{ role }}\",\n    \"content\": \"{{ content }}\"\n  }{% if not (loop.last and outer_loop.last) %},{% endif %}\n  {% endfor %}\n{% endfor %}\n]\n"
        },
        "promptId": "31ba1cc91b4ca656f8bf91b71d90fd1c786f46919fd3e498accd274537581342",
        "promptIdx": 0,
        "provider": {
          "id": "openai:chat:qwen3",
          "label": ""
        },
        "response": {
          "output": "[THINK]\n### 🎯 核心目标\n查找名为“清水太郎”的人员位置。\n\n### 📋 现状分析\n- **关键信息**: 用户询问“清水太郎在哪里”，明确指向查找某个人的当前位置。\n- **限制条件**: \n  - 必须使用系统提供的工具进行人员检索，不能臆测或编造信息。\n  - 所有查询类操作（如人员位置）无需用户额外确认，可直接执行。\n- **可用资源**: \n  - 工具：`Iot Control-find_employee_location`（人员检索工具）\n\n### ⚡ 计划阶段\n**执行步骤**:\n1. 调用工具 `Iot Control-find_employee_location`，传入参数 `name=\"清水太郎\"`，查询该人员的当前位置。\n\n[TOOL_CALL] Iot Control-find_employee_location\n{\"name\": \"清水太郎\"}\n[TOOL_RESPONSE] Iot Control-find_employee_location\n{\"matched_count\":1,\"results\":[{\"name\":\"清水　邦英\",\"wowtalk_id\":\"u226160\",\"sensor_id\":\"3246789\",\"confidence\":85,\"location_status\":\"not_in_range\",\"error_message\":\"Sensor不在测位范围内或未检测到位置信号\",\"coordinates\":null,\"location\":null}]}\n[ANSWER]\n清水邦英先生目前未在测位范围内，无法获取实时位置。",
          "tokenUsage": {
            "cached": 709,
            "total": 709
          },
          "cached": true,
          "finishReason": "stop"
        },
        "score": 1,
        "success": true,
        "testCase": {
          "description": "清水太郎在哪里",
          "vars": {
            "messages": [
              {
                "user": "清水太郎在哪里"
              }
            ]
          },
          "assert": [
            {
              "type": "regex",
              "value": "\\[TOOL_CALL\\].*find_employee_location"
            }
          ],
          "options": {
            "provider": {
              "text": {
                "id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
                "config": {
                  "apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
                  "apiBaseUrl": "https://one.felo.me/v1",
                  "apiVersion": "2024-02-01"
                }
              }
            }
          },
          "metadata": {}
        },
        "testIdx": 1,
        "vars": {
          "messages": [
            {
              "user": "清水太郎在哪里"
            }
          ]
        },
        "metadata": {
          "_promptfooFileMetadata": {}
        },
        "failureReason": 0
      }
    ],
    "stats": {
      "successes": 2,
      "failures": 0,
      "errors": 0,
      "tokenUsage": {
        "prompt": 0,
        "completion": 0,
        "cached": 1877,
        "total": 1877,
        "numRequests": 2,
        "completionDetails": {
          "reasoning": 0,
          "acceptedPrediction": 0,
          "rejectedPrediction": 0
        },
        "assertions": {
          "total": 808,
          "prompt": 0,
          "completion": 0,
          "cached": 808,
          "numRequests": 0,
          "completionDetails": {
            "reasoning": 0,
            "acceptedPrediction": 0,
            "rejectedPrediction": 0
          }
        }
      }
    }
  },
  "config": {
    "tags": {},
    "description": "Novare Test",
    "prompts": [
      "file:///Users/moshui/Documents/felo/qwen-agent/promptfoo/prompt.json"
    ],
    "providers": [
      {
        "id": "openai:chat:qwen3",
        "config": {
          "apiBaseUrl": "https://catalog-agent-dev.gbase.ai/api/v2",
          "apiKey": "a21c99620a8ef61d69563afe05ccce89",
          "passthrough": {
            "bot_id": "63069654-7750-409d-9a58-a0960d899a20",
            "tool_response": true,
            "language": "zh"
          }
        }
      }
    ],
    "tests": [
      {
        "description": "通知清水太郎明天上午8点开会",
        "vars": {
          "messages": [
            {
              "user": "通知清水太郎明天上午8点开会"
            }
          ]
        },
        "assert": [
          {
            "type": "llm-rubric",
            "value": "询问是否确认发送"
          }
        ]
      },
      {
        "description": "清水太郎在哪里",
        "vars": {
          "messages": [
            {
              "user": "清水太郎在哪里"
            }
          ]
        },
        "assert": [
          {
            "type": "regex",
            "value": "\\[TOOL_CALL\\].*find_employee_location"
          }
        ]
      }
    ],
    "scenarios": [],
    "env": {},
    "sharing": true,
    "defaultTest": {
      "options": {
        "provider": {
          "text": {
            "id": "openai:chat:qwen/qwen3-next-80b-a3b-instruct",
            "config": {
              "apiKey": "sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26",
              "apiBaseUrl": "https://one.felo.me/v1",
              "apiVersion": "2024-02-01"
            }
          }
        }
      },
      "vars": {},
      "assert": [],
      "metadata": {}
    },
    "outputPath": [
      "result.json"
    ],
    "extensions": [],
    "metadata": {}
  },
  "shareableUrl": null,
  "metadata": {
    "promptfooVersion": "0.117.11",
    "nodeVersion": "v20.10.0",
    "platform": "darwin",
    "arch": "x64",
    "exportedAt": "2025-12-04T11:24:00.162Z",
    "evaluationCreatedAt": "2025-12-04T11:23:59.637Z"
  }
 }
--- a/utils/fastapi_utils.py
+++ b/utils/fastapi_utils.py
@ -172,36 +172,39 @@ def process_messages(messages: List[Dict], language: Optional[str] = None) -> Li
                    if not text:
                        continue
-                    if current_tag == "TOOL_RESPONSE":
+                    # 不往后传输 历史工具调用的文字
-                        if is_recent_message:
+                    if current_tag == "ANSWER":
                            # 最近10条ASSISTANT消息：保留完整TOOL_RESPONSE信息（使用简略模式）
                            if len(text) <= 500:
                                filtered_content += f"[TOOL_RESPONSE]\n{text}\n"
                            else:
                                # 截取前中后3段内容，每段250字
                                first_part = text[:250]
                                middle_start = len(text) // 2 - 125
                                middle_part = text[middle_start:middle_start + 250]
                                last_part = text[-250:]
                                # 计算省略的字数
                                omitted_count = len(text) - 750
                                omitted_text = f"...此处省略{omitted_count}字..."
                                # 拼接内容
                                truncated_text = f"{first_part}\n{omitted_text}\n{middle_part}\n{omitted_text}\n{last_part}"
                                filtered_content += f"[TOOL_RESPONSE]\n{truncated_text}\n"
                        # 10条以上的消息：不保留TOOL_RESPONSE数据（完全跳过）
                    elif current_tag == "TOOL_CALL":
                        if is_recent_message:
                            # 最近10条ASSISTANT消息：保留TOOL_CALL信息
                            filtered_content += f"[TOOL_CALL]\n{text}\n"
                        # 10条以上的消息：不保留TOOL_CALL数据（完全跳过）
                    elif current_tag == "ANSWER":
                        # 所有ASSISTANT消息都保留ANSWER数据
                        filtered_content += f"[ANSWER]\n{text}\n"
-                    elif current_tag != "THINK" and current_tag != "PREAMBLE":
+                    # if current_tag == "TOOL_RESPONSE":
-                        filtered_content += text + "\n"
+                    #     if is_recent_message:
                    #         # 最近10条ASSISTANT消息：保留完整TOOL_RESPONSE信息（使用简略模式）
                    #         if len(text) <= 500:
                    #             filtered_content += f"[TOOL_RESPONSE]\n{text}\n"
                    #         else:
                    #             # 截取前中后3段内容，每段250字
                    #             first_part = text[:250]
                    #             middle_start = len(text) // 2 - 125
                    #             middle_part = text[middle_start:middle_start + 250]
                    #             last_part = text[-250:]
                    #             # 计算省略的字数
                    #             omitted_count = len(text) - 750
                    #             omitted_text = f"...此处省略{omitted_count}字..."
                    #             # 拼接内容
                    #             truncated_text = f"{first_part}\n{omitted_text}\n{middle_part}\n{omitted_text}\n{last_part}"
                    #             filtered_content += f"[TOOL_RESPONSE]\n{truncated_text}\n"
                    #     # 10条以上的消息：不保留TOOL_RESPONSE数据（完全跳过）
                    # elif current_tag == "TOOL_CALL":
                    #     if is_recent_message:
                    #         # 最近10条ASSISTANT消息：保留TOOL_CALL信息
                    #         filtered_content += f"[TOOL_CALL]\n{text}\n"
                    #     # 10条以上的消息：不保留TOOL_CALL数据（完全跳过）
                    # elif current_tag == "ANSWER":
                    #     # 所有ASSISTANT消息都保留ANSWER数据
                    #     filtered_content += f"[ANSWER]\n{text}\n"
                    # elif current_tag != "THINK" and current_tag != "PREAMBLE":
                    #     filtered_content += text + "\n"
                else:  # 标签
                    current_tag = parts[i]
@ -233,37 +236,43 @@ def process_messages(messages: List[Dict], language: Optional[str] = None) -> Li
                    text = parts[i].strip()
                    if not text:
                        continue
-
+                    # 不往后传输 历史工具调用的文字
-                    if current_tag == "TOOL_RESPONSE":
+                    if current_tag == "ANSWER":
                        # 解析 TOOL_RESPONSE 格式：[TOOL_RESPONSE] function_name\ncontent
                        lines = text.split('\n', 1)
                        function_name = lines[0].strip() if lines else ""
                        response_content = lines[1].strip() if len(lines) > 1 else ""
                        final_messages.append({
                            "role": FUNCTION,
                            "name": function_name,
                            "content": response_content
                        })
                    elif current_tag == "TOOL_CALL":
                        # 解析 TOOL_CALL 格式：[TOOL_CALL] function_name\narguments
                        lines = text.split('\n', 1)
                        function_name = lines[0].strip() if lines else ""
                        arguments = lines[1].strip() if len(lines) > 1 else ""
                        final_messages.append({
                            "role": ASSISTANT,
                            "content": "",
                            "function_call": {
                                "name": function_name,
                                "arguments": arguments
                            }
                        })
                    elif current_tag != "THINK" and current_tag != "PREAMBLE":
                        final_messages.append({
                            "role": ASSISTANT,
                            "content": text
                        })
                    # if current_tag == "TOOL_RESPONSE":
                    #     # 解析 TOOL_RESPONSE 格式：[TOOL_RESPONSE] function_name\ncontent
                    #     lines = text.split('\n', 1)
                    #     function_name = lines[0].strip() if lines else ""
                    #     response_content = lines[1].strip() if len(lines) > 1 else ""
                    #     final_messages.append({
                    #         "role": FUNCTION,
                    #         "name": function_name,
                    #         "content": response_content
                    #     })
                    # elif current_tag == "TOOL_CALL":
                    #     # 解析 TOOL_CALL 格式：[TOOL_CALL] function_name\narguments
                    #     lines = text.split('\n', 1)
                    #     function_name = lines[0].strip() if lines else ""
                    #     arguments = lines[1].strip() if len(lines) > 1 else ""
                    #     final_messages.append({
                    #         "role": ASSISTANT,
                    #         "content": "",
                    #         "function_call": {
                    #             "name": function_name,
                    #             "arguments": arguments
                    #         }
                    #     })
                    # elif current_tag != "THINK" and current_tag != "PREAMBLE":
                    #     final_messages.append({
                    #         "role": ASSISTANT,
                    #         "content": text
                    #     })
                else:  # 标签
                    current_tag = parts[i]
        else: