feat: implement Paragraph API for CRUD operations and batch deletion

This commit is contained in:
CaptainB 2025-05-07 12:02:51 +08:00
parent bcc7c1acf1
commit 10105ce5ab
5 changed files with 548 additions and 2 deletions

View File

@ -0,0 +1,208 @@
from drf_spectacular.types import OpenApiTypes
from drf_spectacular.utils import OpenApiParameter
from common.mixins.api_mixin import APIMixin
from common.result import DefaultResultSerializer, ResultSerializer
from knowledge.serializers.common import BatchSerializer
from knowledge.serializers.paragraph import ParagraphSerializer
from knowledge.serializers.problem import ProblemSerializer
class ParagraphReadResponse(ResultSerializer):
@staticmethod
def get_data():
return ParagraphSerializer(many=True)
class ParagraphReadAPI(APIMixin):
@staticmethod
def get_parameters():
return [
OpenApiParameter(
name="workspace_id",
description="工作空间id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="knowledge_id",
description="知识库id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="document_id",
description="文档id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="title",
description="标题",
type=OpenApiTypes.STR,
location='query',
required=False,
),
OpenApiParameter(
name="content",
description="内容",
type=OpenApiTypes.STR,
location='query',
required=False,
),
]
@staticmethod
def get_response():
return ParagraphReadResponse
class ParagraphCreateAPI(APIMixin):
@staticmethod
def get_parameters():
return [
OpenApiParameter(
name="workspace_id",
description="工作空间id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="knowledge_id",
description="知识库id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="document_id",
description="文档id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
]
@staticmethod
def get_request():
return ParagraphSerializer
@staticmethod
def get_response():
return ParagraphReadResponse
class ParagraphBatchDeleteAPI(ParagraphCreateAPI):
@staticmethod
def get_request():
return BatchSerializer
@staticmethod
def get_response():
return DefaultResultSerializer
class ParagraphGetAPI(APIMixin):
@staticmethod
def get_parameters():
return [
OpenApiParameter(
name="workspace_id",
description="工作空间id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="knowledge_id",
description="知识库id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="document_id",
description="文档id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="paragraph_id",
description="段落id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
]
class ParagraphEditAPI(ParagraphGetAPI):
@staticmethod
def get_request():
return ParagraphSerializer
@staticmethod
def get_response():
return DefaultResultSerializer
class ProblemCreateAPI(ParagraphGetAPI):
@staticmethod
def get_request():
return ProblemSerializer
@staticmethod
def get_response():
return DefaultResultSerializer
class UnAssociationAPI(APIMixin):
@staticmethod
def get_parameters():
return [
OpenApiParameter(
name="workspace_id",
description="工作空间id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="knowledge_id",
description="知识库id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="document_id",
description="文档id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="paragraph_id",
description="段落id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="problem_id",
description="问题id",
type=OpenApiTypes.STR,
location='path',
required=True,
)
]
class AssociationAPI(UnAssociationAPI):
pass

View File

@ -8,15 +8,17 @@ from django.db.models import QuerySet, Count
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
from rest_framework import serializers from rest_framework import serializers
from common.db.search import page_search
from common.exception.app_exception import AppApiException from common.exception.app_exception import AppApiException
from common.utils.common import post from common.utils.common import post
from knowledge.models import Paragraph, Problem, Document, ProblemParagraphMapping, SourceType from knowledge.models import Paragraph, Problem, Document, ProblemParagraphMapping, SourceType
from knowledge.serializers.common import ProblemParagraphObject, ProblemParagraphManage, \ from knowledge.serializers.common import ProblemParagraphObject, ProblemParagraphManage, \
get_embedding_model_id_by_knowledge_id, update_document_char_length get_embedding_model_id_by_knowledge_id, update_document_char_length, BatchSerializer
from knowledge.serializers.problem import ProblemInstanceSerializer, ProblemSerializer, ProblemSerializers from knowledge.serializers.problem import ProblemInstanceSerializer, ProblemSerializer, ProblemSerializers
from knowledge.task.embedding import embedding_by_paragraph, enable_embedding_by_paragraph, \ from knowledge.task.embedding import embedding_by_paragraph, enable_embedding_by_paragraph, \
disable_embedding_by_paragraph, \ disable_embedding_by_paragraph, \
delete_embedding_by_paragraph, embedding_by_problem as embedding_by_problem_task delete_embedding_by_paragraph, embedding_by_problem as embedding_by_problem_task, delete_embedding_by_paragraph_ids, \
embedding_by_problem, delete_embedding_by_source
class ParagraphSerializer(serializers.ModelSerializer): class ParagraphSerializer(serializers.ModelSerializer):
@ -115,6 +117,7 @@ class ParagraphSerializers(serializers.Serializer):
).one(with_valid=True) ).one(with_valid=True)
class Operate(serializers.Serializer): class Operate(serializers.Serializer):
workspace_id = serializers.CharField(required=True, label=_('workspace id'))
# 段落id # 段落id
paragraph_id = serializers.UUIDField(required=True, label=_('paragraph id')) paragraph_id = serializers.UUIDField(required=True, label=_('paragraph id'))
# 知识库id # 知识库id
@ -282,6 +285,100 @@ class ParagraphSerializers(serializers.Serializer):
else: else:
return Problem(id=uuid.uuid7(), content=content, knowledge_id=knowledge_id) return Problem(id=uuid.uuid7(), content=content, knowledge_id=knowledge_id)
class Query(serializers.Serializer):
knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))
document_id = serializers.UUIDField(required=True, label=_('document id'))
title = serializers.CharField(required=False, label=_('section title'))
content = serializers.CharField(required=False)
def get_query_set(self):
query_set = QuerySet(model=Paragraph)
query_set = query_set.filter(
**{'knowledge_id': self.data.get('knowledge_id'), 'document_id': self.data.get("document_id")})
if 'title' in self.data:
query_set = query_set.filter(
**{'title__icontains': self.data.get('title')})
if 'content' in self.data:
query_set = query_set.filter(**{'content__icontains': self.data.get('content')})
query_set.order_by('-create_time', 'id')
return query_set
def list(self):
return list(map(lambda row: ParagraphSerializer(row).data, self.get_query_set()))
def page(self, current_page, page_size):
query_set = self.get_query_set()
return page_search(current_page, page_size, query_set, lambda row: ParagraphSerializer(row).data)
class Association(serializers.Serializer):
workspace_id = serializers.CharField(required=True, label=_('workspace id'))
knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))
problem_id = serializers.UUIDField(required=True, label=_('problem id'))
document_id = serializers.UUIDField(required=True, label=_('document id'))
paragraph_id = serializers.UUIDField(required=True, label=_('paragraph id'))
def is_valid(self, *, raise_exception=True):
super().is_valid(raise_exception=True)
knowledge_id = self.data.get('knowledge_id')
paragraph_id = self.data.get('paragraph_id')
problem_id = self.data.get("problem_id")
if not QuerySet(Paragraph).filter(knowledge_id=knowledge_id, id=paragraph_id).exists():
raise AppApiException(500, _('Paragraph does not exist'))
if not QuerySet(Problem).filter(knowledge_id=knowledge_id, id=problem_id).exists():
raise AppApiException(500, _('Problem does not exist'))
def association(self, with_valid=True, with_embedding=True):
if with_valid:
self.is_valid(raise_exception=True)
problem = QuerySet(Problem).filter(id=self.data.get("problem_id")).first()
problem_paragraph_mapping = ProblemParagraphMapping(id=uuid.uuid7(),
document_id=self.data.get('document_id'),
paragraph_id=self.data.get('paragraph_id'),
knowledge_id=self.data.get('knowledge_id'),
problem_id=problem.id)
problem_paragraph_mapping.save()
if with_embedding:
model_id = get_embedding_model_id_by_knowledge_id(self.data.get('knowledge_id'))
embedding_by_problem({
'text': problem.content,
'is_active': True,
'source_type': SourceType.PROBLEM,
'source_id': problem_paragraph_mapping.id,
'document_id': self.data.get('document_id'),
'paragraph_id': self.data.get('paragraph_id'),
'knowledge_id': self.data.get('knowledge_id'),
}, model_id)
def un_association(self, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)
problem_paragraph_mapping = QuerySet(ProblemParagraphMapping).filter(
paragraph_id=self.data.get('paragraph_id'),
knowledge_id=self.data.get('knowledge_id'),
problem_id=self.data.get(
'problem_id')).first()
problem_paragraph_mapping_id = problem_paragraph_mapping.id
problem_paragraph_mapping.delete()
delete_embedding_by_source(problem_paragraph_mapping_id)
return True
class Batch(serializers.Serializer):
knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))
document_id = serializers.UUIDField(required=True, label=_('document id'))
@transaction.atomic
def batch_delete(self, instance: Dict, with_valid=True):
if with_valid:
BatchSerializer(data=instance).is_valid(model=Paragraph, raise_exception=True)
self.is_valid(raise_exception=True)
paragraph_id_list = instance.get("id_list")
QuerySet(Paragraph).filter(id__in=paragraph_id_list).delete()
delete_problems_and_mappings(paragraph_id_list)
update_document_char_length(self.data.get('document_id'))
# 删除向量库
delete_embedding_by_paragraph_ids(paragraph_id_list)
return True
def delete_problems_and_mappings(paragraph_ids): def delete_problems_and_mappings(paragraph_ids):
problem_paragraph_mappings = ProblemParagraphMapping.objects.filter(paragraph_id__in=paragraph_ids) problem_paragraph_mappings = ProblemParagraphMapping.objects.filter(paragraph_id__in=paragraph_ids)

View File

@ -21,6 +21,12 @@ urlpatterns = [
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/refresh', views.DocumentView.Refresh.as_view()), path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/refresh', views.DocumentView.Refresh.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/cancel_task', views.DocumentView.CancelTask.as_view()), path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/cancel_task', views.DocumentView.CancelTask.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/cancel_task/batch', views.DocumentView.BatchCancelTask.as_view()), path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/cancel_task/batch', views.DocumentView.BatchCancelTask.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph', views.ParagraphView.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/batch', views.ParagraphView.Batch.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/<str:paragraph_id>', views.ParagraphView.Operate.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/<str:paragraph_id>/problem', views.ParagraphView.Problem.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/<str:paragraph_id>/problem/<str:problem_id>/association', views.ParagraphView.Association.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/<str:paragraph_id>/problem/<str:problem_id>/unassociation', views.ParagraphView.UnAssociation.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<int:current_page>/<int:page_sige>', views.DocumentView.Page.as_view()), path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<int:current_page>/<int:page_sige>', views.DocumentView.Page.as_view()),
path('workspace/<str:workspace_id>/knowledge/<int:current_page>/<int:page_size>', views.KnowledgeView.Page.as_view()), path('workspace/<str:workspace_id>/knowledge/<int:current_page>/<int:page_size>', views.KnowledgeView.Page.as_view()),
] ]

View File

@ -1,2 +1,3 @@
from .document import * from .document import *
from .knowledge import * from .knowledge import *
from .paragraph import *

View File

@ -0,0 +1,234 @@
from django.utils.translation import gettext_lazy as _
from drf_spectacular.utils import extend_schema
from rest_framework.views import APIView
from rest_framework.views import Request
from common.auth import TokenAuth
from common.auth.authentication import has_permissions
from common.constants.permission_constants import PermissionConstants
from common.result import result
from common.utils.common import query_params_to_single_dict
from knowledge.api.paragraph import ParagraphReadAPI, ParagraphCreateAPI, ParagraphBatchDeleteAPI, ParagraphEditAPI, \
ParagraphGetAPI, ProblemCreateAPI, UnAssociationAPI, AssociationAPI
from knowledge.serializers.paragraph import ParagraphSerializers
class ParagraphView(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
summary=_('Paragraph list'),
description=_('Paragraph list'),
operation_id=_('Paragraph list'),
parameters=ParagraphReadAPI.get_parameters(),
responses=ParagraphReadAPI.get_response(),
tags=[_('Knowledge Base/Documentation/Paragraph')]
)
@has_permissions(PermissionConstants.DOCUMENT_READ.get_workspace_permission())
def get(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str):
q = ParagraphSerializers.Query(
data={
**query_params_to_single_dict(request.query_params),
'workspace_id': workspace_id,
'knowledge_id': knowledge_id,
'document_id': document_id
}
)
q.is_valid(raise_exception=True)
return result.success(q.list())
@extend_schema(
summary=_('Create Paragraph'),
operation_id=_('Create Paragraph'),
parameters=ParagraphCreateAPI.get_parameters(),
request=ParagraphCreateAPI.get_request(),
responses=ParagraphCreateAPI.get_response(),
tags=[_('Knowledge Base/Documentation/Paragraph')]
)
@has_permissions(PermissionConstants.DOCUMENT_CREATE.get_workspace_permission())
def post(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str):
return result.success(ParagraphSerializers.Create(
data={'workspace_id': workspace_id, 'knowledge_id': knowledge_id, 'document_id': document_id}
).save(request.data))
class Batch(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
methods=['DELETE'],
summary=_('Batch Paragraph'),
description=_('Batch Paragraph'),
operation_id=_('Batch Paragraph'),
parameters=ParagraphBatchDeleteAPI.get_parameters(),
request=ParagraphBatchDeleteAPI.get_request(),
responses=ParagraphBatchDeleteAPI.get_response(),
tags=[_('Knowledge Base/Documentation/Paragraph')]
)
@has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
def delete(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str):
return result.success(ParagraphSerializers.Batch(
data={'workspace_id': workspace_id, 'knowledge_id': knowledge_id, 'document_id': document_id}
).batch_delete(request.data))
class Operate(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
methods=['PUT'],
summary=_('Modify paragraph data'),
description=_('Modify paragraph data'),
operation_id=_('Modify paragraph data'),
parameters=ParagraphEditAPI.get_parameters(),
request=ParagraphEditAPI.get_request(),
responses=ParagraphEditAPI.get_response(),
tags=[_('Knowledge Base/Documentation/Paragraph')]
)
@has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
def put(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str, paragraph_id: str):
o = ParagraphSerializers.Operate(
data={
'workspace_id': workspace_id,
"paragraph_id": paragraph_id,
'knowledge_id': knowledge_id,
'document_id': document_id
}
)
o.is_valid(raise_exception=True)
return result.success(o.edit(request.data))
@extend_schema(
methods=['GET'],
summary=_('Get paragraph details'),
description=_('Get paragraph details'),
operation_id=_('Get paragraph details'),
parameters=ParagraphGetAPI.get_parameters(),
responses=ParagraphGetAPI.get_response(),
tags=[_('Knowledge Base/Documentation/Paragraph')]
)
@has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
def get(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str, paragraph_id: str):
o = ParagraphSerializers.Operate(
data={
'workspace_id': workspace_id,
"paragraph_id": paragraph_id,
'knowledge_id': knowledge_id,
'document_id': document_id
}
)
o.is_valid(raise_exception=True)
return result.success(o.one())
@extend_schema(
methods=['DELETE'],
summary=_('Delete paragraph'),
description=_('Delete paragraph'),
operation_id=_('Delete paragraph'),
parameters=ParagraphGetAPI.get_parameters(),
responses=ParagraphGetAPI.get_response(),
tags=[_('Knowledge Base/Documentation/Paragraph')])
@has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
def delete(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str, paragraph_id: str):
o = ParagraphSerializers.Operate(
data={
'workspace_id': workspace_id,
"paragraph_id": paragraph_id,
'knowledge_id': knowledge_id,
'document_id': document_id
}
)
o.is_valid(raise_exception=True)
return result.success(o.delete())
class Problem(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
methods=['POST'],
summary=_('Add associated questions'),
description=_('Add associated questions'),
operation_id=_('Add associated questions'),
parameters=ProblemCreateAPI.get_parameters(),
request=ProblemCreateAPI.get_request(),
responses=ProblemCreateAPI.get_response(),
tags=[_('Knowledge Base/Documentation/Paragraph')]
)
@has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
def post(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str, paragraph_id: str):
return result.success(ParagraphSerializers.Problem(
data={
'workspace_id': workspace_id,
"knowledge_id": knowledge_id,
'document_id': document_id,
'paragraph_id': paragraph_id
}
).save(request.data, with_valid=True))
@extend_schema(
methods=['GET'],
summary=_('Get a list of paragraph questions'),
description=_('Get a list of paragraph questions'),
operation_id=_('Get a list of paragraph questions'),
parameters=ParagraphGetAPI.get_parameters(),
responses=ParagraphGetAPI.get_response(),
tags=[_('Knowledge Base/Documentation/Paragraph')]
)
@has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
def get(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str, paragraph_id: str):
return result.success(ParagraphSerializers.Problem(
data={
'workspace_id': workspace_id,
"knowledge_id": knowledge_id,
'document_id': document_id,
'paragraph_id': paragraph_id
}
).list(with_valid=True))
class UnAssociation(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
methods=['PUT'],
summary=_('Disassociation issue'),
description=_('Disassociation issue'),
operation_id=_('Disassociation issue'),
parameters=UnAssociationAPI.get_parameters(),
responses=UnAssociationAPI.get_response(),
tags=[_('Knowledge Base/Documentation/Paragraph')]
)
@has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
def put(self, request: Request,
workspace_id: str, knowledge_id: str, document_id: str, paragraph_id: str, problem_id: str):
return result.success(ParagraphSerializers.Association(
data={
'workspace_id': workspace_id,
'knowledge_id': knowledge_id,
'document_id': document_id,
'paragraph_id': paragraph_id,
'problem_id': problem_id
}
).un_association())
class Association(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
methods=['PUT'],
summary=_('Related questions'),
description=_('Related questions'),
operation_id=_('Related questions'),
parameters=AssociationAPI.get_parameters(),
responses=AssociationAPI.get_response(),
tags=[_('Knowledge Base/Documentation/Paragraph')]
)
@has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
def put(self, request: Request,
workspace_id: str, knowledge_id: str, document_id: str, paragraph_id: str, problem_id: str):
return result.success(ParagraphSerializers.Association(
data={
'workspace_id': workspace_id,
'knowledge_id': knowledge_id,
'document_id': document_id,
'paragraph_id': paragraph_id,
'problem_id': problem_id
}
).association())