fix: 批量删除文档,未删除关联段落信息, 添加关联问题报错
This commit is contained in:
parent
a98b537beb
commit
67e6138066
@ -50,6 +50,7 @@ class ListenerManagement:
|
|||||||
embedding_by_dataset_signal = signal("embedding_by_dataset")
|
embedding_by_dataset_signal = signal("embedding_by_dataset")
|
||||||
embedding_by_document_signal = signal("embedding_by_document")
|
embedding_by_document_signal = signal("embedding_by_document")
|
||||||
delete_embedding_by_document_signal = signal("delete_embedding_by_document")
|
delete_embedding_by_document_signal = signal("delete_embedding_by_document")
|
||||||
|
delete_embedding_by_document_list_signal = signal("delete_embedding_by_document_list")
|
||||||
delete_embedding_by_dataset_signal = signal("delete_embedding_by_dataset")
|
delete_embedding_by_dataset_signal = signal("delete_embedding_by_dataset")
|
||||||
delete_embedding_by_paragraph_signal = signal("delete_embedding_by_paragraph")
|
delete_embedding_by_paragraph_signal = signal("delete_embedding_by_paragraph")
|
||||||
delete_embedding_by_source_signal = signal("delete_embedding_by_source")
|
delete_embedding_by_source_signal = signal("delete_embedding_by_source")
|
||||||
@ -144,6 +145,10 @@ class ListenerManagement:
|
|||||||
def delete_embedding_by_document(document_id):
|
def delete_embedding_by_document(document_id):
|
||||||
VectorStore.get_embedding_vector().delete_by_document_id(document_id)
|
VectorStore.get_embedding_vector().delete_by_document_id(document_id)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def delete_embedding_by_document_list(document_id_list: List[str]):
|
||||||
|
VectorStore.get_embedding_vector().delete_bu_document_id_list(document_id_list)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def delete_embedding_by_dataset(dataset_id):
|
def delete_embedding_by_dataset(dataset_id):
|
||||||
VectorStore.get_embedding_vector().delete_by_dataset_id(dataset_id)
|
VectorStore.get_embedding_vector().delete_by_dataset_id(dataset_id)
|
||||||
@ -201,6 +206,8 @@ class ListenerManagement:
|
|||||||
self.embedding_by_document)
|
self.embedding_by_document)
|
||||||
# 删除 向量 根据文档
|
# 删除 向量 根据文档
|
||||||
ListenerManagement.delete_embedding_by_document_signal.connect(self.delete_embedding_by_document)
|
ListenerManagement.delete_embedding_by_document_signal.connect(self.delete_embedding_by_document)
|
||||||
|
# 删除 向量 根据文档id列表
|
||||||
|
ListenerManagement.delete_embedding_by_document_list_signal.connect(self.delete_embedding_by_document_list)
|
||||||
# 删除 向量 根据知识库id
|
# 删除 向量 根据知识库id
|
||||||
ListenerManagement.delete_embedding_by_dataset_signal.connect(self.delete_embedding_by_dataset)
|
ListenerManagement.delete_embedding_by_dataset_signal.connect(self.delete_embedding_by_dataset)
|
||||||
# 删除向量 根据段落id
|
# 删除向量 根据段落id
|
||||||
|
|||||||
@ -547,7 +547,12 @@ class DocumentSerializers(ApiMixin, serializers.Serializer):
|
|||||||
if with_valid:
|
if with_valid:
|
||||||
BatchSerializer(data=instance).is_valid(model=Document, raise_exception=True)
|
BatchSerializer(data=instance).is_valid(model=Document, raise_exception=True)
|
||||||
self.is_valid(raise_exception=True)
|
self.is_valid(raise_exception=True)
|
||||||
QuerySet(Document).filter(id__in=instance.get('id_list')).delete()
|
document_id_list = instance.get("id_list")
|
||||||
|
QuerySet(Document).filter(id__in=document_id_list).delete()
|
||||||
|
QuerySet(Paragraph).filter(document_id__in=document_id_list).delete()
|
||||||
|
QuerySet(Problem).filter(document_id__in=document_id_list).delete()
|
||||||
|
# 删除向量库
|
||||||
|
ListenerManagement.delete_embedding_by_document_list_signal.send(document_id_list)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -9,6 +9,7 @@
|
|||||||
import uuid
|
import uuid
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
|
|
||||||
|
from django.db import transaction
|
||||||
from django.db.models import QuerySet
|
from django.db.models import QuerySet
|
||||||
from drf_yasg import openapi
|
from drf_yasg import openapi
|
||||||
from rest_framework import serializers
|
from rest_framework import serializers
|
||||||
@ -61,6 +62,7 @@ class ProblemSerializers(ApiMixin, serializers.Serializer):
|
|||||||
dataset_id=self.data.get('dataset_id')).exists():
|
dataset_id=self.data.get('dataset_id')).exists():
|
||||||
raise AppApiException(500, "段落id不正确")
|
raise AppApiException(500, "段落id不正确")
|
||||||
|
|
||||||
|
@transaction.atomic
|
||||||
def save(self, instance: Dict, with_valid=True, with_embedding=True):
|
def save(self, instance: Dict, with_valid=True, with_embedding=True):
|
||||||
if with_valid:
|
if with_valid:
|
||||||
self.is_valid()
|
self.is_valid()
|
||||||
|
|||||||
@ -51,8 +51,6 @@ class BaseVectorStore(ABC):
|
|||||||
|
|
||||||
def save(self, text, source_type: SourceType, dataset_id: str, document_id: str, paragraph_id: str, source_id: str,
|
def save(self, text, source_type: SourceType, dataset_id: str, document_id: str, paragraph_id: str, source_id: str,
|
||||||
is_active: bool,
|
is_active: bool,
|
||||||
star_num: int,
|
|
||||||
trample_num: int,
|
|
||||||
embedding=None):
|
embedding=None):
|
||||||
"""
|
"""
|
||||||
插入向量数据
|
插入向量数据
|
||||||
@ -64,16 +62,13 @@ class BaseVectorStore(ABC):
|
|||||||
:param is_active: 是否禁用
|
:param is_active: 是否禁用
|
||||||
:param embedding: 向量化处理器
|
:param embedding: 向量化处理器
|
||||||
:param paragraph_id 段落id
|
:param paragraph_id 段落id
|
||||||
:param star_num 点赞数量
|
|
||||||
:param trample_num 点踩数量
|
|
||||||
:return: bool
|
:return: bool
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if embedding is None:
|
if embedding is None:
|
||||||
embedding = EmbeddingModel.get_embedding_model()
|
embedding = EmbeddingModel.get_embedding_model()
|
||||||
self.save_pre_handler()
|
self.save_pre_handler()
|
||||||
self._save(text, source_type, dataset_id, document_id, paragraph_id, source_id, is_active, star_num,
|
self._save(text, source_type, dataset_id, document_id, paragraph_id, source_id, is_active, embedding)
|
||||||
trample_num, embedding)
|
|
||||||
|
|
||||||
def batch_save(self, data_list: List[Dict], embedding=None):
|
def batch_save(self, data_list: List[Dict], embedding=None):
|
||||||
# 获取锁
|
# 获取锁
|
||||||
@ -143,6 +138,10 @@ class BaseVectorStore(ABC):
|
|||||||
def delete_by_document_id(self, document_id: str):
|
def delete_by_document_id(self, document_id: str):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def delete_bu_document_id_list(self, document_id_list: List[str]):
|
||||||
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def delete_by_source_id(self, source_id: str, source_type: str):
|
def delete_by_source_id(self, source_id: str, source_type: str):
|
||||||
pass
|
pass
|
||||||
|
|||||||
@ -107,6 +107,9 @@ class PGVector(BaseVectorStore):
|
|||||||
QuerySet(Embedding).filter(document_id=document_id).delete()
|
QuerySet(Embedding).filter(document_id=document_id).delete()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def delete_bu_document_id_list(self, document_id_list: List[str]):
|
||||||
|
return QuerySet(Embedding).filter(document_id__in=document_id_list).delete()
|
||||||
|
|
||||||
def delete_by_source_id(self, source_id: str, source_type: str):
|
def delete_by_source_id(self, source_id: str, source_type: str):
|
||||||
QuerySet(Embedding).filter(source_id=source_id, source_type=source_type).delete()
|
QuerySet(Embedding).filter(source_id=source_id, source_type=source_type).delete()
|
||||||
return True
|
return True
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user