add log
This commit is contained in:
parent
788491db61
commit
9d65e181eb
@ -212,6 +212,56 @@ class EmbeddingSearch(ISearch):
|
|||||||
from common.utils.logger import maxkb_logger
|
from common.utils.logger import maxkb_logger
|
||||||
maxkb_logger.info(f"EmbeddingSearch: Executing search with similarity threshold={similarity}, top_n={top_number}")
|
maxkb_logger.info(f"EmbeddingSearch: Executing search with similarity threshold={similarity}, top_n={top_number}")
|
||||||
|
|
||||||
|
# 先查询所有结果不设置相似度阈值,看看实际的相似度是多少
|
||||||
|
test_sql = """
|
||||||
|
SELECT
|
||||||
|
paragraph_id,
|
||||||
|
comprehensive_score,
|
||||||
|
comprehensive_score as similarity
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT ON
|
||||||
|
("paragraph_id") ( 1 - distince ),* ,(1 - distince) AS comprehensive_score
|
||||||
|
FROM
|
||||||
|
( SELECT *, ( embedding.embedding::vector(%s) <=> %s ) AS distince FROM embedding ${embedding_query} ORDER BY distince) TEMP
|
||||||
|
ORDER BY
|
||||||
|
paragraph_id,
|
||||||
|
distince
|
||||||
|
) DISTINCT_TEMP
|
||||||
|
ORDER BY comprehensive_score DESC
|
||||||
|
LIMIT %s
|
||||||
|
"""
|
||||||
|
|
||||||
|
test_exec_sql, test_exec_params = generate_sql_by_query_dict({'embedding_query': query_set},
|
||||||
|
select_string=test_sql,
|
||||||
|
with_table_name=True)
|
||||||
|
|
||||||
|
# 查询不带阈值的结果
|
||||||
|
test_results = select_list(test_exec_sql, [
|
||||||
|
len(query_embedding),
|
||||||
|
json.dumps(query_embedding),
|
||||||
|
*test_exec_params,
|
||||||
|
10 # 获取前10个结果
|
||||||
|
])
|
||||||
|
|
||||||
|
if len(test_results) > 0:
|
||||||
|
test_similarities = [r.get('similarity', 0) for r in test_results[:5]]
|
||||||
|
maxkb_logger.info(f"Actual similarities (no threshold): {test_similarities}")
|
||||||
|
maxkb_logger.info(f"Highest similarity: {test_similarities[0] if test_similarities else 0}, Required threshold: {similarity}")
|
||||||
|
if test_similarities[0] < similarity:
|
||||||
|
maxkb_logger.warning(f"Best similarity {test_similarities[0]} is below threshold {similarity}")
|
||||||
|
# 获取段落内容看看
|
||||||
|
if len(test_results) > 0:
|
||||||
|
paragraph_id = test_results[0].get('paragraph_id')
|
||||||
|
from knowledge.models import Paragraph
|
||||||
|
para = QuerySet(Paragraph).filter(id=paragraph_id).first()
|
||||||
|
if para:
|
||||||
|
maxkb_logger.info(f"Top paragraph content preview (first 200 chars): {para.content[:200]}...")
|
||||||
|
maxkb_logger.info(f"Paragraph title: {para.title}, length: {len(para.content)}")
|
||||||
|
else:
|
||||||
|
maxkb_logger.warning("No embeddings found even without similarity threshold")
|
||||||
|
|
||||||
|
# 正常查询(带相似度阈值)
|
||||||
exec_sql, exec_params = generate_sql_by_query_dict({'embedding_query': query_set},
|
exec_sql, exec_params = generate_sql_by_query_dict({'embedding_query': query_set},
|
||||||
select_string=get_file_content(
|
select_string=get_file_content(
|
||||||
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql',
|
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql',
|
||||||
@ -228,10 +278,10 @@ class EmbeddingSearch(ISearch):
|
|||||||
top_number
|
top_number
|
||||||
])
|
])
|
||||||
|
|
||||||
maxkb_logger.info(f"EmbeddingSearch results: {len(embedding_model)} embeddings found")
|
maxkb_logger.info(f"EmbeddingSearch results: {len(embedding_model)} embeddings found (with threshold)")
|
||||||
if len(embedding_model) > 0:
|
if len(embedding_model) > 0:
|
||||||
similarities = [e.get('similarity', 0) for e in embedding_model[:3]]
|
similarities = [e.get('similarity', 0) for e in embedding_model[:3]]
|
||||||
maxkb_logger.info(f"Top 3 similarities: {similarities}")
|
maxkb_logger.info(f"Top 3 similarities above threshold: {similarities}")
|
||||||
|
|
||||||
return embedding_model
|
return embedding_model
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user