add log
This commit is contained in:
parent
788491db61
commit
9d65e181eb
@ -212,6 +212,56 @@ class EmbeddingSearch(ISearch):
|
||||
from common.utils.logger import maxkb_logger
|
||||
maxkb_logger.info(f"EmbeddingSearch: Executing search with similarity threshold={similarity}, top_n={top_number}")
|
||||
|
||||
# 先查询所有结果不设置相似度阈值,看看实际的相似度是多少
|
||||
test_sql = """
|
||||
SELECT
|
||||
paragraph_id,
|
||||
comprehensive_score,
|
||||
comprehensive_score as similarity
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT ON
|
||||
("paragraph_id") ( 1 - distince ),* ,(1 - distince) AS comprehensive_score
|
||||
FROM
|
||||
( SELECT *, ( embedding.embedding::vector(%s) <=> %s ) AS distince FROM embedding ${embedding_query} ORDER BY distince) TEMP
|
||||
ORDER BY
|
||||
paragraph_id,
|
||||
distince
|
||||
) DISTINCT_TEMP
|
||||
ORDER BY comprehensive_score DESC
|
||||
LIMIT %s
|
||||
"""
|
||||
|
||||
test_exec_sql, test_exec_params = generate_sql_by_query_dict({'embedding_query': query_set},
|
||||
select_string=test_sql,
|
||||
with_table_name=True)
|
||||
|
||||
# 查询不带阈值的结果
|
||||
test_results = select_list(test_exec_sql, [
|
||||
len(query_embedding),
|
||||
json.dumps(query_embedding),
|
||||
*test_exec_params,
|
||||
10 # 获取前10个结果
|
||||
])
|
||||
|
||||
if len(test_results) > 0:
|
||||
test_similarities = [r.get('similarity', 0) for r in test_results[:5]]
|
||||
maxkb_logger.info(f"Actual similarities (no threshold): {test_similarities}")
|
||||
maxkb_logger.info(f"Highest similarity: {test_similarities[0] if test_similarities else 0}, Required threshold: {similarity}")
|
||||
if test_similarities[0] < similarity:
|
||||
maxkb_logger.warning(f"Best similarity {test_similarities[0]} is below threshold {similarity}")
|
||||
# 获取段落内容看看
|
||||
if len(test_results) > 0:
|
||||
paragraph_id = test_results[0].get('paragraph_id')
|
||||
from knowledge.models import Paragraph
|
||||
para = QuerySet(Paragraph).filter(id=paragraph_id).first()
|
||||
if para:
|
||||
maxkb_logger.info(f"Top paragraph content preview (first 200 chars): {para.content[:200]}...")
|
||||
maxkb_logger.info(f"Paragraph title: {para.title}, length: {len(para.content)}")
|
||||
else:
|
||||
maxkb_logger.warning("No embeddings found even without similarity threshold")
|
||||
|
||||
# 正常查询(带相似度阈值)
|
||||
exec_sql, exec_params = generate_sql_by_query_dict({'embedding_query': query_set},
|
||||
select_string=get_file_content(
|
||||
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql',
|
||||
@ -228,10 +278,10 @@ class EmbeddingSearch(ISearch):
|
||||
top_number
|
||||
])
|
||||
|
||||
maxkb_logger.info(f"EmbeddingSearch results: {len(embedding_model)} embeddings found")
|
||||
maxkb_logger.info(f"EmbeddingSearch results: {len(embedding_model)} embeddings found (with threshold)")
|
||||
if len(embedding_model) > 0:
|
||||
similarities = [e.get('similarity', 0) for e in embedding_model[:3]]
|
||||
maxkb_logger.info(f"Top 3 similarities: {similarities}")
|
||||
maxkb_logger.info(f"Top 3 similarities above threshold: {similarities}")
|
||||
|
||||
return embedding_model
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user