fix: 【知识库】整体同步,只删除了没有同步
This commit is contained in:
parent
1254e5c5ff
commit
04f34d748e
@ -10,9 +10,18 @@ from concurrent.futures import ThreadPoolExecutor
|
|||||||
|
|
||||||
work_thread_pool = ThreadPoolExecutor(5)
|
work_thread_pool = ThreadPoolExecutor(5)
|
||||||
|
|
||||||
|
embedding_thread_pool = ThreadPoolExecutor(3)
|
||||||
|
|
||||||
|
|
||||||
def poxy(poxy_function):
|
def poxy(poxy_function):
|
||||||
def inner(args):
|
def inner(args):
|
||||||
work_thread_pool.submit(poxy_function, args)
|
work_thread_pool.submit(poxy_function, args)
|
||||||
|
|
||||||
return inner
|
return inner
|
||||||
|
|
||||||
|
|
||||||
|
def embedding_poxy(poxy_function):
|
||||||
|
def inner(args):
|
||||||
|
embedding_thread_pool.submit(poxy_function, args)
|
||||||
|
|
||||||
|
return inner
|
||||||
|
|||||||
@ -17,7 +17,7 @@ from django.db.models import QuerySet
|
|||||||
|
|
||||||
from common.config.embedding_config import VectorStore, EmbeddingModel
|
from common.config.embedding_config import VectorStore, EmbeddingModel
|
||||||
from common.db.search import native_search, get_dynamics_model
|
from common.db.search import native_search, get_dynamics_model
|
||||||
from common.event.common import poxy
|
from common.event.common import poxy, embedding_poxy
|
||||||
from common.util.file_util import get_file_content
|
from common.util.file_util import get_file_content
|
||||||
from common.util.fork import ForkManage, Fork
|
from common.util.fork import ForkManage, Fork
|
||||||
from common.util.lock import try_lock, un_lock
|
from common.util.lock import try_lock, un_lock
|
||||||
@ -65,7 +65,7 @@ class ListenerManagement:
|
|||||||
VectorStore.get_embedding_vector().save(**args)
|
VectorStore.get_embedding_vector().save(**args)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@poxy
|
@embedding_poxy
|
||||||
def embedding_by_paragraph(paragraph_id):
|
def embedding_by_paragraph(paragraph_id):
|
||||||
"""
|
"""
|
||||||
向量化段落 根据段落id
|
向量化段落 根据段落id
|
||||||
@ -93,7 +93,7 @@ class ListenerManagement:
|
|||||||
max_kb.info(f'结束--->向量化段落:{paragraph_id}')
|
max_kb.info(f'结束--->向量化段落:{paragraph_id}')
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@poxy
|
@embedding_poxy
|
||||||
def embedding_by_document(document_id):
|
def embedding_by_document(document_id):
|
||||||
"""
|
"""
|
||||||
向量化文档
|
向量化文档
|
||||||
@ -123,7 +123,7 @@ class ListenerManagement:
|
|||||||
max_kb.info(f"结束--->向量化文档:{document_id}")
|
max_kb.info(f"结束--->向量化文档:{document_id}")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@poxy
|
@embedding_poxy
|
||||||
def embedding_by_dataset(dataset_id):
|
def embedding_by_dataset(dataset_id):
|
||||||
"""
|
"""
|
||||||
向量化知识库
|
向量化知识库
|
||||||
|
|||||||
@ -503,7 +503,7 @@ class DataSetSerializers(serializers.ModelSerializer):
|
|||||||
document_name = child_link.tag.text if child_link.tag is not None and len(
|
document_name = child_link.tag.text if child_link.tag is not None and len(
|
||||||
child_link.tag.text.strip()) > 0 else child_link.url
|
child_link.tag.text.strip()) > 0 else child_link.url
|
||||||
paragraphs = get_split_model('web.md').parse(response.content)
|
paragraphs = get_split_model('web.md').parse(response.content)
|
||||||
first = QuerySet(Document).filter(meta__source_url=child_link.url).first()
|
first = QuerySet(Document).filter(meta__source_url=child_link.url, dataset=dataset).first()
|
||||||
if first is not None:
|
if first is not None:
|
||||||
# 如果存在,使用文档同步
|
# 如果存在,使用文档同步
|
||||||
DocumentSerializers.Sync(data={'document_id': first.id}).sync()
|
DocumentSerializers.Sync(data={'document_id': first.id}).sync()
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user