fix: update SQL queries to improve similarity calculations and indexing
This commit is contained in:
parent
b3a5dc4a1c
commit
4d18b78d29
@ -242,7 +242,7 @@ def create_knowledge_index(knowledge_id=None, document_id=None):
|
|||||||
if len(result) == 0:
|
if len(result) == 0:
|
||||||
return
|
return
|
||||||
dims = result[0]['dims']
|
dims = result[0]['dims']
|
||||||
sql = f"""CREATE INDEX "embedding_hnsw_idx_{k_id}" ON embedding USING hnsw ((embedding::vector({dims})) vector_l2_ops) WHERE knowledge_id = '{k_id}'"""
|
sql = f"""CREATE INDEX "embedding_hnsw_idx_{k_id}" ON embedding USING hnsw ((embedding::vector({dims})) vector_cosine_ops) WHERE knowledge_id = '{k_id}'"""
|
||||||
update_execute(sql, [])
|
update_execute(sql, [])
|
||||||
maxkb_logger.info(f'Created index for knowledge ID: {k_id}')
|
maxkb_logger.info(f'Created index for knowledge ID: {k_id}')
|
||||||
|
|
||||||
|
|||||||
@ -5,15 +5,17 @@ SELECT
|
|||||||
FROM
|
FROM
|
||||||
(
|
(
|
||||||
SELECT DISTINCT ON
|
SELECT DISTINCT ON
|
||||||
( "paragraph_id" ) ( similarity ),* ,
|
( "paragraph_id" ) ( 1 - distince + ts_similarity ) as similarity, *,
|
||||||
similarity AS comprehensive_score
|
(1 - distince + ts_similarity) AS comprehensive_score
|
||||||
FROM
|
FROM
|
||||||
(
|
(
|
||||||
SELECT
|
SELECT
|
||||||
*,
|
*,
|
||||||
(( 1 - ( embedding.embedding <=> %s ) )+ts_rank_cd( embedding.search_vector, websearch_to_tsquery('simple', %s ), 32 )) AS similarity
|
(embedding.embedding::vector(%s) <=> %s) as distince,
|
||||||
|
(ts_rank_cd( embedding.search_vector, websearch_to_tsquery('simple', %s ), 32 )) AS ts_similarity
|
||||||
FROM
|
FROM
|
||||||
embedding ${embedding_query}
|
embedding ${embedding_query}
|
||||||
|
ORDER BY distince
|
||||||
) TEMP
|
) TEMP
|
||||||
ORDER BY
|
ORDER BY
|
||||||
paragraph_id,
|
paragraph_id,
|
||||||
|
|||||||
@ -5,12 +5,12 @@ SELECT
|
|||||||
FROM
|
FROM
|
||||||
(
|
(
|
||||||
SELECT DISTINCT ON
|
SELECT DISTINCT ON
|
||||||
("paragraph_id") ( similarity ),* ,similarity AS comprehensive_score
|
("paragraph_id") ( 1 - distince ),* ,(1 - distince) AS comprehensive_score
|
||||||
FROM
|
FROM
|
||||||
( SELECT *, ( 1 - ( embedding.embedding <=> %s ) ) AS similarity FROM embedding ${embedding_query}) TEMP
|
( SELECT *, ( embedding.embedding::vector(%s) <=> %s ) AS distince FROM embedding ${embedding_query} ORDER BY distince) TEMP
|
||||||
ORDER BY
|
ORDER BY
|
||||||
paragraph_id,
|
paragraph_id,
|
||||||
similarity DESC
|
distince
|
||||||
) DISTINCT_TEMP
|
) DISTINCT_TEMP
|
||||||
WHERE comprehensive_score>%s
|
WHERE comprehensive_score>%s
|
||||||
ORDER BY comprehensive_score DESC
|
ORDER BY comprehensive_score DESC
|
||||||
|
|||||||
@ -172,8 +172,13 @@ class EmbeddingSearch(ISearch):
|
|||||||
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql',
|
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql',
|
||||||
'embedding_search.sql')),
|
'embedding_search.sql')),
|
||||||
with_table_name=True)
|
with_table_name=True)
|
||||||
embedding_model = select_list(exec_sql,
|
embedding_model = select_list(exec_sql, [
|
||||||
[json.dumps(query_embedding), *exec_params, similarity, top_number])
|
len(query_embedding),
|
||||||
|
json.dumps(query_embedding),
|
||||||
|
*exec_params,
|
||||||
|
similarity,
|
||||||
|
top_number
|
||||||
|
])
|
||||||
return embedding_model
|
return embedding_model
|
||||||
|
|
||||||
def support(self, search_mode: SearchMode):
|
def support(self, search_mode: SearchMode):
|
||||||
@ -193,8 +198,12 @@ class KeywordsSearch(ISearch):
|
|||||||
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql',
|
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql',
|
||||||
'keywords_search.sql')),
|
'keywords_search.sql')),
|
||||||
with_table_name=True)
|
with_table_name=True)
|
||||||
embedding_model = select_list(exec_sql,
|
embedding_model = select_list(exec_sql, [
|
||||||
[to_query(query_text), *exec_params, similarity, top_number])
|
to_query(query_text),
|
||||||
|
*exec_params,
|
||||||
|
similarity,
|
||||||
|
top_number
|
||||||
|
])
|
||||||
return embedding_model
|
return embedding_model
|
||||||
|
|
||||||
def support(self, search_mode: SearchMode):
|
def support(self, search_mode: SearchMode):
|
||||||
@ -214,9 +223,13 @@ class BlendSearch(ISearch):
|
|||||||
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql',
|
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql',
|
||||||
'blend_search.sql')),
|
'blend_search.sql')),
|
||||||
with_table_name=True)
|
with_table_name=True)
|
||||||
embedding_model = select_list(exec_sql,
|
embedding_model = select_list(exec_sql, [
|
||||||
[json.dumps(query_embedding), to_query(query_text), *exec_params, similarity,
|
len(query_embedding),
|
||||||
top_number])
|
json.dumps(query_embedding),
|
||||||
|
to_query(query_text),
|
||||||
|
*exec_params, similarity,
|
||||||
|
top_number
|
||||||
|
])
|
||||||
return embedding_model
|
return embedding_model
|
||||||
|
|
||||||
def support(self, search_mode: SearchMode):
|
def support(self, search_mode: SearchMode):
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user