diff --git a/apps/knowledge/sql/blend_search.sql b/apps/knowledge/sql/blend_search.sql new file mode 100644 index 00000000..afb1f004 --- /dev/null +++ b/apps/knowledge/sql/blend_search.sql @@ -0,0 +1,26 @@ +SELECT + paragraph_id, + comprehensive_score, + comprehensive_score AS similarity +FROM + ( + SELECT DISTINCT ON + ( "paragraph_id" ) ( similarity ),* , + similarity AS comprehensive_score + FROM + ( + SELECT + *, + (( 1 - ( embedding.embedding <=> %s ) )+ts_rank_cd( embedding.search_vector, websearch_to_tsquery('simple', %s ), 32 )) AS similarity + FROM + embedding ${embedding_query} + ) TEMP + ORDER BY + paragraph_id, + similarity DESC + ) DISTINCT_TEMP +WHERE + comprehensive_score >%s +ORDER BY + comprehensive_score DESC + LIMIT %s \ No newline at end of file diff --git a/apps/knowledge/sql/embedding_search.sql b/apps/knowledge/sql/embedding_search.sql new file mode 100644 index 00000000..ce3d4a58 --- /dev/null +++ b/apps/knowledge/sql/embedding_search.sql @@ -0,0 +1,17 @@ +SELECT + paragraph_id, + comprehensive_score, + comprehensive_score as similarity +FROM + ( + SELECT DISTINCT ON + ("paragraph_id") ( similarity ),* ,similarity AS comprehensive_score + FROM + ( SELECT *, ( 1 - ( embedding.embedding <=> %s ) ) AS similarity FROM embedding ${embedding_query}) TEMP + ORDER BY + paragraph_id, + similarity DESC + ) DISTINCT_TEMP +WHERE comprehensive_score>%s +ORDER BY comprehensive_score DESC +LIMIT %s \ No newline at end of file diff --git a/apps/knowledge/sql/hit_test.sql b/apps/knowledge/sql/hit_test.sql new file mode 100644 index 00000000..8feffc86 --- /dev/null +++ b/apps/knowledge/sql/hit_test.sql @@ -0,0 +1,17 @@ +SELECT + paragraph_id, + comprehensive_score, + comprehensive_score as similarity +FROM + ( + SELECT DISTINCT ON + ("paragraph_id") ( similarity ),* ,similarity AS comprehensive_score + FROM + ( SELECT *, ( 1 - ( embedding.embedding <=> %s ) ) AS similarity FROM embedding ${embedding_query} ) TEMP + ORDER BY + paragraph_id, + similarity DESC + ) DISTINCT_TEMP +WHERE comprehensive_score>%s +ORDER BY comprehensive_score DESC +LIMIT %s \ No newline at end of file diff --git a/apps/knowledge/sql/keywords_search.sql b/apps/knowledge/sql/keywords_search.sql new file mode 100644 index 00000000..a27d0a69 --- /dev/null +++ b/apps/knowledge/sql/keywords_search.sql @@ -0,0 +1,17 @@ +SELECT + paragraph_id, + comprehensive_score, + comprehensive_score as similarity +FROM + ( + SELECT DISTINCT ON + ("paragraph_id") ( similarity ),* ,similarity AS comprehensive_score + FROM + ( SELECT *,ts_rank_cd(embedding.search_vector,websearch_to_tsquery('simple',%s),32) AS similarity FROM embedding ${keywords_query}) TEMP + ORDER BY + paragraph_id, + similarity DESC + ) DISTINCT_TEMP +WHERE comprehensive_score>%s +ORDER BY comprehensive_score DESC +LIMIT %s \ No newline at end of file diff --git a/apps/knowledge/vector/pg_vector.py b/apps/knowledge/vector/pg_vector.py index f6572c66..de912448 100644 --- a/apps/knowledge/vector/pg_vector.py +++ b/apps/knowledge/vector/pg_vector.py @@ -165,7 +165,7 @@ class EmbeddingSearch(ISearch): search_mode: SearchMode): exec_sql, exec_params = generate_sql_by_query_dict({'embedding_query': query_set}, select_string=get_file_content( - os.path.join(PROJECT_DIR, "apps", "embedding", 'sql', + os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'embedding_search.sql')), with_table_name=True) embedding_model = select_list(exec_sql, @@ -186,7 +186,7 @@ class KeywordsSearch(ISearch): search_mode: SearchMode): exec_sql, exec_params = generate_sql_by_query_dict({'keywords_query': query_set}, select_string=get_file_content( - os.path.join(PROJECT_DIR, "apps", "embedding", 'sql', + os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'keywords_search.sql')), with_table_name=True) embedding_model = select_list(exec_sql, @@ -207,7 +207,7 @@ class BlendSearch(ISearch): search_mode: SearchMode): exec_sql, exec_params = generate_sql_by_query_dict({'embedding_query': query_set}, select_string=get_file_content( - os.path.join(PROJECT_DIR, "apps", "embedding", 'sql', + os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'blend_search.sql')), with_table_name=True) embedding_model = select_list(exec_sql,