diff --git a/promptfoo/conversation/tests.csv b/promptfoo/conversation/tests.csv deleted file mode 100644 index 1fc3744..0000000 --- a/promptfoo/conversation/tests.csv +++ /dev/null @@ -1,9 +0,0 @@ -question,regex,llm-rubric,,,,,,,,,,,,,,,,, -清水太郎在哪里,\[TOOL_CALL\].*find_employee_location,,,,,,,,,,,,,,,,,, -通知他明天上午8点开会,,询问是否确认发送,,,,,,,,,,,,,,,,, -确认,\[TOOL_CALL\].*wowtalk_send_message_to_member,,,,,,,,,,,,,,,,,, -把DefineRoom 4的灯光状态发给他,,调用find_devices_by_room和dxcore_get_device_status获取灯光状态,并询问是否确认发送。,,,,,,,,,,,,,,,,, -确认,\[TOOL_CALL\].*wowtalk_send_message_to_member,,,,,,,,,,,,,,,,,, -900142の稼働状況,\[TOOL_CALL\].*dxcore_get_device_status,,,,,,,,,,,,,,,,,, -关闭设备900142的灯光,,询问是否确认关闭,,,,,,,,,,,,,,,,, -确认,\[TOOL_CALL\].*dxcore_update_device_status,,,,,,,,,,,,,,,,,, \ No newline at end of file diff --git a/promptfoo/csv_to_yaml.py b/promptfoo/csv_to_yaml.py index 14b89ed..af91187 100644 --- a/promptfoo/csv_to_yaml.py +++ b/promptfoo/csv_to_yaml.py @@ -15,7 +15,8 @@ def csv_to_yaml(csv_file, yaml_file): if row['question']: test_case = { 'vars': { - 'question': row['question'].strip() + 'question': row['question'].strip(), + 'use_history': True if row['use_history'] == "1" else False, }, 'assert':[] } @@ -41,5 +42,4 @@ def csv_to_yaml(csv_file, yaml_file): print(f"Converted {len(tests)} test cases from {csv_file} to {yaml_file}") if __name__ == '__main__': - csv_to_yaml("conversation/tests.csv", "conversation/tests.yaml") - csv_to_yaml("query/tests.csv", "query/tests.yaml") + csv_to_yaml("novare/novare.csv", "novare/tests.yaml") diff --git a/promptfoo/conversation/config.yaml b/promptfoo/novare/config.yaml similarity index 88% rename from promptfoo/conversation/config.yaml rename to promptfoo/novare/config.yaml index 463e5dd..45bcdab 100644 --- a/promptfoo/conversation/config.yaml +++ b/promptfoo/novare/config.yaml @@ -1,5 +1,5 @@ env: {} -description: Novare Test +description: Novare Test - Unified Config for Both Single and Conversation providers: - id: openai:chat:qwen3 config: @@ -25,4 +25,4 @@ defaultTest: apiVersion: '2024-02-01' evaluateOptions: {} writeLatestResults: true -sharing: true +sharing: true \ No newline at end of file diff --git a/promptfoo/conversation/encode.js b/promptfoo/novare/encode.js similarity index 100% rename from promptfoo/conversation/encode.js rename to promptfoo/novare/encode.js diff --git a/promptfoo/novare/novare.csv b/promptfoo/novare/novare.csv new file mode 100644 index 0000000..daa9e1d --- /dev/null +++ b/promptfoo/novare/novare.csv @@ -0,0 +1,14 @@ +question,use_history,regex,llm-rubric,,,,,,,,,,,,,,,,, +清水太郎在哪里,1,\[TOOL_CALL\].*find_employee_location,,,,,,,,,,,,,,,,,, +通知他明天上午8点开会,1,,询问是否确认发送,,,,,,,,,,,,,,,,, +确认,1,\[TOOL_CALL\].*wowtalk_send_message_to_member,,,,,,,,,,,,,,,,,, +把DefineRoom 4的灯光状态发给他,1,,调用find_devices_by_room和dxcore_get_device_status获取灯光状态,并询问是否确认发送。,,,,,,,,,,,,,,,,, +确认,1,\[TOOL_CALL\].*wowtalk_send_message_to_member,,,,,,,,,,,,,,,,,, +关闭设备900142的灯光,1,,询问是否确认关闭,,,,,,,,,,,,,,,,, +确认,1,\[TOOL_CALL\].*dxcore_update_device_status,,,,,,,,,,,,,,,,,, +Define Room1 的灯光状态,0,\[TOOL_CALL\].*find_devices_by_room,,,,,,,,,,,,,,,,,, +900142の稼働状況,0,\[TOOL_CALL\].*dxcore_get_device_status,,,,,,,,,,,,,,,,,, +卫生间在哪里,0,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,, +我丢了物品怎么办,0,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,, +咖啡多少钱一杯,0,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,, +东京明天的天气,0,\[TOOL_CALL\].*weather_get_by_location,,,,,,,,,,,,,,,,,, \ No newline at end of file diff --git a/promptfoo/conversation/prompt.json b/promptfoo/novare/prompt.json similarity index 73% rename from promptfoo/conversation/prompt.json rename to promptfoo/novare/prompt.json index 94837f5..1881694 100644 --- a/promptfoo/conversation/prompt.json +++ b/promptfoo/novare/prompt.json @@ -1,4 +1,5 @@ [ +{% if use_history %} {% for completion in _conversation %} { "role": "user", @@ -13,4 +14,10 @@ "role": "user", "content": "{{ question | encode }}" } -] +{% else %} + { + "role": "user", + "content": "{{ question }}" + } +{% endif %} +] \ No newline at end of file diff --git a/promptfoo/conversation/tests.yaml b/promptfoo/novare/tests.yaml similarity index 54% rename from promptfoo/conversation/tests.yaml rename to promptfoo/novare/tests.yaml index 8633ac7..2551aa1 100644 --- a/promptfoo/conversation/tests.yaml +++ b/promptfoo/novare/tests.yaml @@ -1,40 +1,78 @@ - vars: question: 清水太郎在哪里 + use_history: true assert: - type: regex value: \[TOOL_CALL\].*find_employee_location - vars: question: 通知他明天上午8点开会 + use_history: true assert: - type: llm-rubric value: 询问是否确认发送 - vars: question: 确认 + use_history: true assert: - type: regex value: \[TOOL_CALL\].*wowtalk_send_message_to_member - vars: question: 把DefineRoom 4的灯光状态发给他 + use_history: true assert: - type: llm-rubric value: 调用find_devices_by_room和dxcore_get_device_status获取灯光状态,并询问是否确认发送。 - vars: question: 确认 + use_history: true assert: - type: regex value: \[TOOL_CALL\].*wowtalk_send_message_to_member -- vars: - question: 900142の稼働状況 - assert: - - type: regex - value: \[TOOL_CALL\].*dxcore_get_device_status - vars: question: 关闭设备900142的灯光 + use_history: true assert: - type: llm-rubric value: 询问是否确认关闭 - vars: question: 确认 + use_history: true assert: - type: regex value: \[TOOL_CALL\].*dxcore_update_device_status +- vars: + question: Define Room1 的灯光状态 + use_history: false + assert: + - type: regex + value: \[TOOL_CALL\].*find_devices_by_room +- vars: + question: 900142の稼働状況 + use_history: false + assert: + - type: regex + value: \[TOOL_CALL\].*dxcore_get_device_status +- vars: + question: 卫生间在哪里 + use_history: false + assert: + - type: regex + value: \[TOOL_CALL\].*rag_retrieve +- vars: + question: 我丢了物品怎么办 + use_history: false + assert: + - type: regex + value: \[TOOL_CALL\].*rag_retrieve +- vars: + question: 咖啡多少钱一杯 + use_history: false + assert: + - type: regex + value: \[TOOL_CALL\].*rag_retrieve +- vars: + question: 东京明天的天气 + use_history: false + assert: + - type: regex + value: \[TOOL_CALL\].*weather_get_by_location diff --git a/promptfoo/query/config.yaml b/promptfoo/query/config.yaml deleted file mode 100644 index 04481d6..0000000 --- a/promptfoo/query/config.yaml +++ /dev/null @@ -1,26 +0,0 @@ -env: {} -description: Novare Test -providers: - - id: openai:chat:qwen3 - config: - apiBaseUrl: https://catalog-agent-dev.gbase.ai/api/v2 - apiKey: a21c99620a8ef61d69563afe05ccce89 - passthrough: - bot_id: 63069654-7750-409d-9a58-a0960d899a20 - tool_response: true - language: zh -prompts: - - file://prompt.json -tests: file://tests.yaml -defaultTest: - options: - provider: - text: - id: openai:chat:qwen/qwen3-next-80b-a3b-instruct - config: - apiKey: sk-hsKClH0Z695EkK5fDdB2Ec2fE13f4fC1B627BdBb8e554b5b-26 - apiBaseUrl: https://one.felo.me/v1 - apiVersion: '2024-02-01' -evaluateOptions: {} -writeLatestResults: true -sharing: true diff --git a/promptfoo/query/prompt.json b/promptfoo/query/prompt.json deleted file mode 100644 index 06f9b09..0000000 --- a/promptfoo/query/prompt.json +++ /dev/null @@ -1,6 +0,0 @@ -[ - { - "role": "user", - "content": "{{ question }}" - } -] diff --git a/promptfoo/query/tests.csv b/promptfoo/query/tests.csv deleted file mode 100644 index cc01f3b..0000000 --- a/promptfoo/query/tests.csv +++ /dev/null @@ -1,6 +0,0 @@ -question,regex,llm-rubric,,,,,,,,,,,,,,,,, -Define Room1 的灯光状态,\[TOOL_CALL\].*find_devices_by_room,,,,,,,,,,,,,,,,,, -卫生间在哪里,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,, -我丢了物品怎么办,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,, -咖啡多少钱一杯,\[TOOL_CALL\].*rag_retrieve,,,,,,,,,,,,,,,,,, -东京明天的天气,\[TOOL_CALL\].*weather_get_by_location,,,,,,,,,,,,,,,,,, \ No newline at end of file diff --git a/promptfoo/query/tests.yaml b/promptfoo/query/tests.yaml deleted file mode 100644 index ce719e5..0000000 --- a/promptfoo/query/tests.yaml +++ /dev/null @@ -1,25 +0,0 @@ -- vars: - question: Define Room1 的灯光状态 - assert: - - type: regex - value: \[TOOL_CALL\].*find_devices_by_room -- vars: - question: 卫生间在哪里 - assert: - - type: regex - value: \[TOOL_CALL\].*rag_retrieve -- vars: - question: 我丢了物品怎么办 - assert: - - type: regex - value: \[TOOL_CALL\].*rag_retrieve -- vars: - question: 咖啡多少钱一杯 - assert: - - type: regex - value: \[TOOL_CALL\].*rag_retrieve -- vars: - question: 东京明天的天气 - assert: - - type: regex - value: \[TOOL_CALL\].*weather_get_by_location