feat: add new SQL queries for embedding and keyword searches
This commit is contained in:
parent
a7ec04eb8b
commit
4a58954567
26
apps/knowledge/sql/blend_search.sql
Normal file
26
apps/knowledge/sql/blend_search.sql
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
SELECT
|
||||||
|
paragraph_id,
|
||||||
|
comprehensive_score,
|
||||||
|
comprehensive_score AS similarity
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT ON
|
||||||
|
( "paragraph_id" ) ( similarity ),* ,
|
||||||
|
similarity AS comprehensive_score
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
*,
|
||||||
|
(( 1 - ( embedding.embedding <=> %s ) )+ts_rank_cd( embedding.search_vector, websearch_to_tsquery('simple', %s ), 32 )) AS similarity
|
||||||
|
FROM
|
||||||
|
embedding ${embedding_query}
|
||||||
|
) TEMP
|
||||||
|
ORDER BY
|
||||||
|
paragraph_id,
|
||||||
|
similarity DESC
|
||||||
|
) DISTINCT_TEMP
|
||||||
|
WHERE
|
||||||
|
comprehensive_score >%s
|
||||||
|
ORDER BY
|
||||||
|
comprehensive_score DESC
|
||||||
|
LIMIT %s
|
||||||
17
apps/knowledge/sql/embedding_search.sql
Normal file
17
apps/knowledge/sql/embedding_search.sql
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
SELECT
|
||||||
|
paragraph_id,
|
||||||
|
comprehensive_score,
|
||||||
|
comprehensive_score as similarity
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT ON
|
||||||
|
("paragraph_id") ( similarity ),* ,similarity AS comprehensive_score
|
||||||
|
FROM
|
||||||
|
( SELECT *, ( 1 - ( embedding.embedding <=> %s ) ) AS similarity FROM embedding ${embedding_query}) TEMP
|
||||||
|
ORDER BY
|
||||||
|
paragraph_id,
|
||||||
|
similarity DESC
|
||||||
|
) DISTINCT_TEMP
|
||||||
|
WHERE comprehensive_score>%s
|
||||||
|
ORDER BY comprehensive_score DESC
|
||||||
|
LIMIT %s
|
||||||
17
apps/knowledge/sql/hit_test.sql
Normal file
17
apps/knowledge/sql/hit_test.sql
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
SELECT
|
||||||
|
paragraph_id,
|
||||||
|
comprehensive_score,
|
||||||
|
comprehensive_score as similarity
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT ON
|
||||||
|
("paragraph_id") ( similarity ),* ,similarity AS comprehensive_score
|
||||||
|
FROM
|
||||||
|
( SELECT *, ( 1 - ( embedding.embedding <=> %s ) ) AS similarity FROM embedding ${embedding_query} ) TEMP
|
||||||
|
ORDER BY
|
||||||
|
paragraph_id,
|
||||||
|
similarity DESC
|
||||||
|
) DISTINCT_TEMP
|
||||||
|
WHERE comprehensive_score>%s
|
||||||
|
ORDER BY comprehensive_score DESC
|
||||||
|
LIMIT %s
|
||||||
17
apps/knowledge/sql/keywords_search.sql
Normal file
17
apps/knowledge/sql/keywords_search.sql
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
SELECT
|
||||||
|
paragraph_id,
|
||||||
|
comprehensive_score,
|
||||||
|
comprehensive_score as similarity
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT DISTINCT ON
|
||||||
|
("paragraph_id") ( similarity ),* ,similarity AS comprehensive_score
|
||||||
|
FROM
|
||||||
|
( SELECT *,ts_rank_cd(embedding.search_vector,websearch_to_tsquery('simple',%s),32) AS similarity FROM embedding ${keywords_query}) TEMP
|
||||||
|
ORDER BY
|
||||||
|
paragraph_id,
|
||||||
|
similarity DESC
|
||||||
|
) DISTINCT_TEMP
|
||||||
|
WHERE comprehensive_score>%s
|
||||||
|
ORDER BY comprehensive_score DESC
|
||||||
|
LIMIT %s
|
||||||
@ -165,7 +165,7 @@ class EmbeddingSearch(ISearch):
|
|||||||
search_mode: SearchMode):
|
search_mode: SearchMode):
|
||||||
exec_sql, exec_params = generate_sql_by_query_dict({'embedding_query': query_set},
|
exec_sql, exec_params = generate_sql_by_query_dict({'embedding_query': query_set},
|
||||||
select_string=get_file_content(
|
select_string=get_file_content(
|
||||||
os.path.join(PROJECT_DIR, "apps", "embedding", 'sql',
|
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql',
|
||||||
'embedding_search.sql')),
|
'embedding_search.sql')),
|
||||||
with_table_name=True)
|
with_table_name=True)
|
||||||
embedding_model = select_list(exec_sql,
|
embedding_model = select_list(exec_sql,
|
||||||
@ -186,7 +186,7 @@ class KeywordsSearch(ISearch):
|
|||||||
search_mode: SearchMode):
|
search_mode: SearchMode):
|
||||||
exec_sql, exec_params = generate_sql_by_query_dict({'keywords_query': query_set},
|
exec_sql, exec_params = generate_sql_by_query_dict({'keywords_query': query_set},
|
||||||
select_string=get_file_content(
|
select_string=get_file_content(
|
||||||
os.path.join(PROJECT_DIR, "apps", "embedding", 'sql',
|
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql',
|
||||||
'keywords_search.sql')),
|
'keywords_search.sql')),
|
||||||
with_table_name=True)
|
with_table_name=True)
|
||||||
embedding_model = select_list(exec_sql,
|
embedding_model = select_list(exec_sql,
|
||||||
@ -207,7 +207,7 @@ class BlendSearch(ISearch):
|
|||||||
search_mode: SearchMode):
|
search_mode: SearchMode):
|
||||||
exec_sql, exec_params = generate_sql_by_query_dict({'embedding_query': query_set},
|
exec_sql, exec_params = generate_sql_by_query_dict({'embedding_query': query_set},
|
||||||
select_string=get_file_content(
|
select_string=get_file_content(
|
||||||
os.path.join(PROJECT_DIR, "apps", "embedding", 'sql',
|
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql',
|
||||||
'blend_search.sql')),
|
'blend_search.sql')),
|
||||||
with_table_name=True)
|
with_table_name=True)
|
||||||
embedding_model = select_list(exec_sql,
|
embedding_model = select_list(exec_sql,
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user