qwen_agent/skills/developing/table-query/verify_table_query.sh
2026-06-07 08:58:22 +08:00

68 lines
2.7 KiB
Bash
Executable File

#!/usr/bin/env bash
#
# Manual verification for the new table_query endpoints.
# Run this against an environment where the feature/table-query-split branch is
# deployed (e.g. dev). It checks the 3 fast endpoints and diffs run_sql output
# against the legacy table_rag_retrieve for parity.
#
# Usage:
# HOST=https://api-dev.gptbase.ai BOT_ID=<bot> MASTERKEY=master ./verify_table_query.sh
#
set -euo pipefail
HOST="${HOST:-https://api-dev.gptbase.ai}"
# bot from the slow-request log (has the 案1_売上明細 xlsx). Override as needed.
BOT_ID="${BOT_ID:-c1fa021b-6c41-41d5-b1e6-adfb8896aaaa}"
MASTERKEY="${MASTERKEY:-master}"
QUERY="${QUERY:-2025年4月〜6月の売上実績}"
# auth token = MD5(masterkey:bot_id)
TOKEN=$(python3 -c "import hashlib,sys;print(hashlib.md5(f'{sys.argv[1]}:{sys.argv[2]}'.encode()).hexdigest())" "$MASTERKEY" "$BOT_ID")
AUTH="authorization: Bearer ${TOKEN}"
CT="content-type: application/json"
echo "=== HOST=$HOST BOT_ID=$BOT_ID ==="
echo
echo "### 1) search_tables ###"
curl -s --request POST "$HOST/v1/table_query/search_tables/$BOT_ID" \
--header "$AUTH" --header "$CT" \
--data "{\"query\": \"$QUERY\", \"top_k\": 20}" | python3 -m json.tool
echo
echo "### 2) get_schemas (EDIT --data table_names with names from step 1) ###"
echo "curl -s --request POST \"$HOST/v1/table_query/get_schemas/$BOT_ID\" \\"
echo " --header \"$AUTH\" --header \"$CT\" \\"
echo " --data '{\"table_names\": [\"<TABLE_NAME_FROM_STEP_1>\"], \"sample_rows\": 3}' | python3 -m json.tool"
echo
echo "### 3) run_sql (EDIT the sql to match the real table/columns from step 2) ###"
cat > /tmp/tq_plan.json <<'JSON'
{
"queries": [
{
"step": 1,
"sql": "CREATE TEMP TABLE \"final_table_step1\" AS SELECT \"計上日\", \"得意先名\", \"売上金額\" FROM \"<TABLE_NAME>\" LIMIT 10",
"source_table_names": ["<TABLE_NAME>"],
"destine_table_name": "final_table_step1",
"destine_table_type": "final",
"destine_table_description": "sample rows"
}
]
}
JSON
echo "Edit /tmp/tq_plan.json (replace <TABLE_NAME>), then:"
echo "curl -s --request POST \"$HOST/v1/table_query/run_sql/$BOT_ID\" \\"
echo " --header \"$AUTH\" --header \"$CT\" \\"
echo " --data @/tmp/tq_plan.json | python3 -m json.tool"
echo
echo "ASSERT: run_sql output 'knowledge' contains a '__src' column and 'file_ref_table'."
echo
echo "### 4) legacy table_rag_retrieve (parity reference, same question) ###"
echo "curl -s --request POST \"$HOST/v1/table_rag_retrieve/$BOT_ID\" \\"
echo " --header \"$AUTH\" --header \"$CT\" \\"
echo " --data '{\"query\": \"$QUERY\"}' | python3 -m json.tool"
echo
echo "Compare the __src tokens / result rows between #3 and #4 for the same SQL intent."