fix: 【知识库】整体同步，只删除了没有同步

2024-01-29 17:07:07 +08:00 · 2024-01-29 17:07:07 +08:00 · 04f34d748e
commit 04f34d748e
parent 1254e5c5ff
3 changed files with 14 additions and 5 deletions
--- a/apps/common/event/common.py
+++ b/apps/common/event/common.py
@ -10,9 +10,18 @@ from concurrent.futures import ThreadPoolExecutor
 work_thread_pool = ThreadPoolExecutor(5)
 embedding_thread_pool = ThreadPoolExecutor(3)
 def poxy(poxy_function):
    def inner(args):
        work_thread_pool.submit(poxy_function, args)
    return inner
 def embedding_poxy(poxy_function):
    def inner(args):
        embedding_thread_pool.submit(poxy_function, args)
    return inner
--- a/apps/common/event/listener_manage.py
+++ b/apps/common/event/listener_manage.py
@ -17,7 +17,7 @@ from django.db.models import QuerySet
 from common.config.embedding_config import VectorStore, EmbeddingModel
 from common.db.search import native_search, get_dynamics_model
-from common.event.common import poxy
+from common.event.common import poxy, embedding_poxy
 from common.util.file_util import get_file_content
 from common.util.fork import ForkManage, Fork
 from common.util.lock import try_lock, un_lock
@ -65,7 +65,7 @@ class ListenerManagement:
        VectorStore.get_embedding_vector().save(**args)
    @staticmethod
-    @poxy
+    @embedding_poxy
    def embedding_by_paragraph(paragraph_id):
        """
        向量化段落 根据段落id
@ -93,7 +93,7 @@ class ListenerManagement:
            max_kb.info(f'结束--->向量化段落:{paragraph_id}')
    @staticmethod
-    @poxy
+    @embedding_poxy
    def embedding_by_document(document_id):
        """
        向量化文档
@ -123,7 +123,7 @@ class ListenerManagement:
            max_kb.info(f"结束--->向量化文档:{document_id}")
    @staticmethod
-    @poxy
+    @embedding_poxy
    def embedding_by_dataset(dataset_id):
        """
        向量化知识库
--- a/apps/dataset/serializers/dataset_serializers.py
+++ b/apps/dataset/serializers/dataset_serializers.py
@ -503,7 +503,7 @@ class DataSetSerializers(serializers.ModelSerializer):
                        document_name = child_link.tag.text if child_link.tag is not None and len(
                            child_link.tag.text.strip()) > 0 else child_link.url
                        paragraphs = get_split_model('web.md').parse(response.content)
-                        first = QuerySet(Document).filter(meta__source_url=child_link.url).first()
+                        first = QuerySet(Document).filter(meta__source_url=child_link.url, dataset=dataset).first()
                        if first is not None:
                            # 如果存在,使用文档同步
                            DocumentSerializers.Sync(data={'document_id': first.id}).sync()