feat: add new SQL queries for embedding and keyword searches

This commit is contained in:
CaptainB 2025-06-18 19:48:20 +08:00
parent a7ec04eb8b
commit 4a58954567
5 changed files with 80 additions and 3 deletions

View File

@ -0,0 +1,26 @@
SELECT
paragraph_id,
comprehensive_score,
comprehensive_score AS similarity
FROM
(
SELECT DISTINCT ON
( "paragraph_id" ) ( similarity ),* ,
similarity AS comprehensive_score
FROM
(
SELECT
*,
(( 1 - ( embedding.embedding <=> %s ) )+ts_rank_cd( embedding.search_vector, websearch_to_tsquery('simple', %s ), 32 )) AS similarity
FROM
embedding ${embedding_query}
) TEMP
ORDER BY
paragraph_id,
similarity DESC
) DISTINCT_TEMP
WHERE
comprehensive_score >%s
ORDER BY
comprehensive_score DESC
LIMIT %s

View File

@ -0,0 +1,17 @@
SELECT
paragraph_id,
comprehensive_score,
comprehensive_score as similarity
FROM
(
SELECT DISTINCT ON
("paragraph_id") ( similarity ),* ,similarity AS comprehensive_score
FROM
( SELECT *, ( 1 - ( embedding.embedding <=> %s ) ) AS similarity FROM embedding ${embedding_query}) TEMP
ORDER BY
paragraph_id,
similarity DESC
) DISTINCT_TEMP
WHERE comprehensive_score>%s
ORDER BY comprehensive_score DESC
LIMIT %s

View File

@ -0,0 +1,17 @@
SELECT
paragraph_id,
comprehensive_score,
comprehensive_score as similarity
FROM
(
SELECT DISTINCT ON
("paragraph_id") ( similarity ),* ,similarity AS comprehensive_score
FROM
( SELECT *, ( 1 - ( embedding.embedding <=> %s ) ) AS similarity FROM embedding ${embedding_query} ) TEMP
ORDER BY
paragraph_id,
similarity DESC
) DISTINCT_TEMP
WHERE comprehensive_score>%s
ORDER BY comprehensive_score DESC
LIMIT %s

View File

@ -0,0 +1,17 @@
SELECT
paragraph_id,
comprehensive_score,
comprehensive_score as similarity
FROM
(
SELECT DISTINCT ON
("paragraph_id") ( similarity ),* ,similarity AS comprehensive_score
FROM
( SELECT *,ts_rank_cd(embedding.search_vector,websearch_to_tsquery('simple',%s),32) AS similarity FROM embedding ${keywords_query}) TEMP
ORDER BY
paragraph_id,
similarity DESC
) DISTINCT_TEMP
WHERE comprehensive_score>%s
ORDER BY comprehensive_score DESC
LIMIT %s

View File

@ -165,7 +165,7 @@ class EmbeddingSearch(ISearch):
search_mode: SearchMode):
exec_sql, exec_params = generate_sql_by_query_dict({'embedding_query': query_set},
select_string=get_file_content(
os.path.join(PROJECT_DIR, "apps", "embedding", 'sql',
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql',
'embedding_search.sql')),
with_table_name=True)
embedding_model = select_list(exec_sql,
@ -186,7 +186,7 @@ class KeywordsSearch(ISearch):
search_mode: SearchMode):
exec_sql, exec_params = generate_sql_by_query_dict({'keywords_query': query_set},
select_string=get_file_content(
os.path.join(PROJECT_DIR, "apps", "embedding", 'sql',
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql',
'keywords_search.sql')),
with_table_name=True)
embedding_model = select_list(exec_sql,
@ -207,7 +207,7 @@ class BlendSearch(ISearch):
search_mode: SearchMode):
exec_sql, exec_params = generate_sql_by_query_dict({'embedding_query': query_set},
select_string=get_file_content(
os.path.join(PROJECT_DIR, "apps", "embedding", 'sql',
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql',
'blend_search.sql')),
with_table_name=True)
embedding_model = select_list(exec_sql,