fix: list_datasets 改为本地表分页,修复 page_size 超限 500

去掉 service 层硬编码 page_size=1000 拉取 RAGFlow 全部数据集的写法
(撞 SDK 上限 100 导致 500),改为直接从 user_datasets 表分页返回。

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
朱潮 2026-06-21 09:49:10 +08:00
parent 2898c9c42d
commit 86bf8285e5

View File

@ -108,7 +108,7 @@ class KnowledgeBaseService:
search: str = None
) -> Dict[str, Any]:
"""
获取用户的数据集列表从本地数据库过滤
获取用户的数据集列表本地数据库分页
Args:
user_id: 用户 ID
@ -123,10 +123,8 @@ class KnowledgeBaseService:
pool = get_db_pool_manager().pool
# 从本地数据库获取用户的数据集 ID 列表
async with pool.connection() as conn:
async with conn.cursor() as cursor:
# 构建查询条件
where_conditions = ["user_id = %s"]
params = [user_id]
@ -136,49 +134,33 @@ class KnowledgeBaseService:
where_clause = " AND ".join(where_conditions)
# 获取总数
await cursor.execute(f"""
SELECT COUNT(*) FROM user_datasets
WHERE {where_clause}
""", params)
total = (await cursor.fetchone())[0]
# 获取分页数据
offset = (page - 1) * page_size
await cursor.execute(f"""
SELECT dataset_id, dataset_name, created_at
SELECT dataset_id, dataset_name, owner, created_at
FROM user_datasets
WHERE {where_clause}
ORDER BY created_at DESC
LIMIT %s OFFSET %s
""", params + [page_size, offset])
user_datasets = await cursor.fetchall()
rows = await cursor.fetchall()
if not user_datasets:
return {
"items": [],
"total": 0,
"page": page,
"page_size": page_size
items = [
{
"dataset_id": row[0],
"name": row[1],
"description": None,
"owner": row[2],
"created_at": row[3],
}
# 获取数据集 ID 列表,从 RAGFlow 获取详情
dataset_ids = [row[0] for row in user_datasets]
dataset_names = {row[0]: row[1] for row in user_datasets}
# 从 RAGFlow 获取完整的数据集信息
ragflow_result = await self.repository.list_datasets(
page=1,
page_size=1000 # 获取所有数据集,然后在本地过滤
)
# 过滤出属于该用户的数据集
user_dataset_ids_set = set(dataset_ids)
items = []
for item in ragflow_result["items"]:
if item.get("dataset_id") in user_dataset_ids_set:
items.append(item)
for row in rows
]
return {
"items": items,