feat: enhance Document API with create and query functionalities

This commit is contained in:
CaptainB 2025-05-06 11:05:34 +08:00
parent ba5028858c
commit 24e734fb36
9 changed files with 455 additions and 75 deletions

View File

@ -4,29 +4,7 @@ from drf_spectacular.utils import OpenApiParameter
from common.mixins.api_mixin import APIMixin
from common.result import DefaultResultSerializer
from knowledge.serializers.common import BatchSerializer
from knowledge.serializers.document import DocumentCreateRequest, DocumentInstanceSerializer
class DocumentCreateAPI(APIMixin):
@staticmethod
def get_parameters():
return [
OpenApiParameter(
name="workspace_id",
description="工作空间id",
type=OpenApiTypes.STR,
location='path',
required=True,
)
]
@staticmethod
def get_request():
return DocumentCreateRequest
@staticmethod
def get_response():
return DefaultResultSerializer
from knowledge.serializers.document import DocumentInstanceSerializer
class DocumentSplitAPI(APIMixin):
@ -127,3 +105,74 @@ class DocumentBatchCreateAPI(APIMixin):
@staticmethod
def get_response():
return DefaultResultSerializer
class DocumentCreateAPI(APIMixin):
@staticmethod
def get_parameters():
return [
OpenApiParameter(
name="workspace_id",
description="工作空间id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="knowledge_id",
description="知识库id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
]
@staticmethod
def get_request():
return DocumentInstanceSerializer
@staticmethod
def get_response():
return DefaultResultSerializer
class DocumentReadAPI(APIMixin):
@staticmethod
def get_parameters():
return [
OpenApiParameter(
name="workspace_id",
description="工作空间id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="knowledge_id",
description="知识库id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="document_id",
description="文档id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
]
@staticmethod
def get_response():
return DefaultResultSerializer
class DocumentEditAPI(DocumentReadAPI):
@staticmethod
def get_request():
return DocumentInstanceSerializer
class DocumentDeleteAPI(DocumentReadAPI):
pass

View File

@ -1,16 +1,18 @@
import logging
import os
import traceback
from functools import reduce
from typing import Dict, List
import uuid_utils.compat as uuid
from celery_once import AlreadyQueued
from django.db import transaction
from django.db import transaction, models
from django.db.models import QuerySet, Model
from django.db.models.functions import Substr, Reverse
from django.utils.translation import gettext_lazy as _
from rest_framework import serializers
from common.db.search import native_search
from common.db.search import native_search, get_dynamics_model, native_page_search
from common.event import ListenerManagement
from common.event.common import work_thread_pool
from common.exception.app_exception import AppApiException
@ -23,12 +25,15 @@ from common.handle.impl.text.xls_split_handle import XlsSplitHandle
from common.handle.impl.text.xlsx_split_handle import XlsxSplitHandle
from common.handle.impl.text.zip_split_handle import ZipSplitHandle
from common.utils.common import post, get_file_content, bulk_create_in_batches
from common.utils.fork import Fork
from common.utils.split_model import get_split_model
from knowledge.models import Knowledge, Paragraph, Problem, Document, KnowledgeType, ProblemParagraphMapping, State, \
TaskType, File
from knowledge.serializers.common import ProblemParagraphManage, BatchSerializer
from knowledge.serializers.common import ProblemParagraphManage, BatchSerializer, get_embedding_model_id_by_knowledge_id
from knowledge.serializers.paragraph import ParagraphSerializers, ParagraphInstanceSerializer, \
delete_problems_and_mappings
from knowledge.task import embedding_by_document, delete_embedding_by_document_list
from knowledge.task.embedding import embedding_by_document, delete_embedding_by_document_list, \
delete_embedding_by_document
from maxkb.const import PROJECT_DIR
default_split_handle = TextSplitHandle()
@ -62,13 +67,6 @@ class DocumentInstanceSerializer(serializers.Serializer):
paragraphs = ParagraphInstanceSerializer(required=False, many=True, allow_null=True)
class DocumentCreateRequest(serializers.Serializer):
name = serializers.CharField(required=True, label=_('knowledge name'), max_length=64, min_length=1)
desc = serializers.CharField(required=True, label=_('knowledge description'), max_length=256, min_length=1)
embedding_model_id = serializers.UUIDField(required=True, label=_('embedding model'))
documents = DocumentInstanceSerializer(required=False, many=True)
class DocumentSplitRequest(serializers.Serializer):
file = serializers.ListField(required=True, label=_('file list'))
limit = serializers.IntegerField(required=False, label=_('limit'))
@ -80,18 +78,153 @@ class DocumentSplitRequest(serializers.Serializer):
with_filter = serializers.BooleanField(required=False, label=_('Auto Clean'))
class DocumentBatchRequest(serializers.Serializer):
file = serializers.ListField(required=True, label=_('file list'))
limit = serializers.IntegerField(required=False, label=_('limit'))
patterns = serializers.ListField(
required=False,
child=serializers.CharField(required=True, label=_('patterns')),
label=_('patterns')
)
with_filter = serializers.BooleanField(required=False, label=_('Auto Clean'))
class DocumentSerializers(serializers.Serializer):
class Query(serializers.Serializer):
# 知识库id
workspace_id = serializers.CharField(required=True, label=_('workspace id'))
knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))
name = serializers.CharField(required=False, max_length=128, min_length=1, label=_('document name'))
hit_handling_method = serializers.CharField(required=False, label=_('hit handling method'))
is_active = serializers.BooleanField(required=False, label=_('document is active'))
task_type = serializers.IntegerField(required=False, label=_('task type'))
status = serializers.CharField(required=False, label=_('status'))
order_by = serializers.CharField(required=False, label=_('order by'))
def get_query_set(self):
query_set = QuerySet(model=Document)
query_set = query_set.filter(**{'knowledge_id': self.data.get("knowledge_id")})
if 'name' in self.data and self.data.get('name') is not None:
query_set = query_set.filter(**{'name__icontains': self.data.get('name')})
if 'hit_handling_method' in self.data and self.data.get('hit_handling_method') is not None:
query_set = query_set.filter(**{'hit_handling_method': self.data.get('hit_handling_method')})
if 'is_active' in self.data and self.data.get('is_active') is not None:
query_set = query_set.filter(**{'is_active': self.data.get('is_active')})
if 'status' in self.data and self.data.get(
'status') is not None:
task_type = self.data.get('task_type')
status = self.data.get(
'status')
if task_type is not None:
query_set = query_set.annotate(
reversed_status=Reverse('status'),
task_type_status=Substr('reversed_status', TaskType(task_type).value,
1),
).filter(task_type_status=State(status).value).values('id')
else:
if status != State.SUCCESS.value:
query_set = query_set.filter(status__icontains=status)
else:
query_set = query_set.filter(status__iregex='^[2n]*$')
order_by = self.data.get('order_by', '')
order_by_query_set = QuerySet(model=get_dynamics_model(
{'char_length': models.CharField(), 'paragraph_count': models.IntegerField(),
"update_time": models.IntegerField(), 'create_time': models.DateTimeField()}))
if order_by:
order_by_query_set = order_by_query_set.order_by(order_by)
else:
order_by_query_set = order_by_query_set.order_by('-create_time', 'id')
return {
'document_custom_sql': query_set,
'order_by_query': order_by_query_set
}
def list(self, with_valid=False):
if with_valid:
self.is_valid(raise_exception=True)
query_set = self.get_query_set()
return native_search(query_set, select_string=get_file_content(
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_document.sql')))
def page(self, current_page, page_size):
query_set = self.get_query_set()
return native_page_search(current_page, page_size, query_set, select_string=get_file_content(
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_document.sql')))
class Sync(serializers.Serializer):
document_id = serializers.UUIDField(required=True, label=_('document id'))
def is_valid(self, *, raise_exception=False):
super().is_valid(raise_exception=True)
document_id = self.data.get('document_id')
first = QuerySet(Document).filter(id=document_id).first()
if first is None:
raise AppApiException(500, _('document id not exist'))
if first.type != KnowledgeType.WEB:
raise AppApiException(500, _('Synchronization is only supported for web site types'))
def sync(self, with_valid=True, with_embedding=True):
if with_valid:
self.is_valid(raise_exception=True)
document_id = self.data.get('document_id')
document = QuerySet(Document).filter(id=document_id).first()
state = State.SUCCESS
if document.type != KnowledgeType.WEB:
return True
try:
ListenerManagement.update_status(QuerySet(Document).filter(id=document_id),
TaskType.SYNC,
State.PENDING)
ListenerManagement.get_aggregation_document_status(document_id)()
source_url = document.meta.get('source_url')
selector_list = document.meta.get('selector').split(
" ") if 'selector' in document.meta and document.meta.get('selector') is not None else []
result = Fork(source_url, selector_list).fork()
if result.status == 200:
# 删除段落
QuerySet(model=Paragraph).filter(document_id=document_id).delete()
# 删除问题
QuerySet(model=ProblemParagraphMapping).filter(document_id=document_id).delete()
delete_problems_and_mappings([document_id])
# 删除向量库
delete_embedding_by_document(document_id)
paragraphs = get_split_model('web.md').parse(result.content)
char_length = reduce(lambda x, y: x + y,
[len(p.get('content')) for p in paragraphs],
0)
QuerySet(Document).filter(id=document_id).update(char_length=char_length)
document_paragraph_model = DocumentSerializers.Create.get_paragraph_model(document, paragraphs)
paragraph_model_list = document_paragraph_model.get('paragraph_model_list')
problem_paragraph_object_list = document_paragraph_model.get('problem_paragraph_object_list')
problem_model_list, problem_paragraph_mapping_list = ProblemParagraphManage(
problem_paragraph_object_list, document.knowledge_id).to_problem_model_list()
# 批量插入段落
QuerySet(Paragraph).bulk_create(paragraph_model_list) if len(paragraph_model_list) > 0 else None
# 批量插入问题
QuerySet(Problem).bulk_create(problem_model_list) if len(problem_model_list) > 0 else None
# 插入关联问题
QuerySet(ProblemParagraphMapping).bulk_create(problem_paragraph_mapping_list) if len(
problem_paragraph_mapping_list) > 0 else None
# 向量化
if with_embedding:
embedding_model_id = get_embedding_model_id_by_knowledge_id(document.knowledge_id)
ListenerManagement.update_status(QuerySet(Document).filter(id=document_id),
TaskType.EMBEDDING,
State.PENDING)
ListenerManagement.update_status(QuerySet(Paragraph).filter(document_id=document_id),
TaskType.EMBEDDING,
State.PENDING)
ListenerManagement.get_aggregation_document_status(document_id)()
embedding_by_document.delay(document_id, embedding_model_id)
else:
state = State.FAILURE
except Exception as e:
logging.getLogger("max_kb_error").error(f'{str(e)}:{traceback.format_exc()}')
state = State.FAILURE
ListenerManagement.update_status(
QuerySet(Document).filter(id=document_id),
TaskType.SYNC,
state
)
ListenerManagement.update_status(
QuerySet(Paragraph).filter(document_id=document_id),
TaskType.SYNC,
state
)
ListenerManagement.get_aggregation_document_status(document_id)()
return True
class Operate(serializers.Serializer):
document_id = serializers.UUIDField(required=True, label=_('document id'))
knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))
@ -148,6 +281,7 @@ class DocumentSerializers(serializers.Serializer):
raise AppApiException(500, _('The task is being executed, please do not send it repeatedly.'))
class Create(serializers.Serializer):
workspace_id = serializers.UUIDField(required=True, label=_('workspace id'))
knowledge_id = serializers.UUIDField(required=True, label=_('document id'))
def is_valid(self, *, raise_exception=False):
@ -166,7 +300,7 @@ class DocumentSerializers(serializers.Serializer):
@transaction.atomic
def save(self, instance: Dict, with_valid=False, **kwargs):
if with_valid:
DocumentCreateRequest(data=instance).is_valid(raise_exception=True)
DocumentInstanceSerializer(data=instance).is_valid(raise_exception=True)
self.is_valid(raise_exception=True)
knowledge_id = self.data.get('knowledge_id')
document_paragraph_model = self.get_document_paragraph_model(knowledge_id, instance)

View File

@ -16,7 +16,8 @@ from knowledge.models import Knowledge, KnowledgeScope, KnowledgeType, Document,
ProblemParagraphMapping, ApplicationKnowledgeMapping
from knowledge.serializers.common import ProblemParagraphManage, get_embedding_model_id_by_knowledge_id, MetaSerializer
from knowledge.serializers.document import DocumentSerializers
from knowledge.task import sync_web_knowledge, embedding_by_knowledge, delete_embedding_by_knowledge
from knowledge.task.embedding import embedding_by_knowledge, delete_embedding_by_knowledge
from knowledge.task.sync import sync_web_knowledge
from maxkb.conf import PROJECT_DIR

View File

@ -10,12 +10,13 @@ from rest_framework import serializers
from common.exception.app_exception import AppApiException
from common.utils.common import post
from knowledge.models import Paragraph, Problem, Document, ProblemParagraphMapping
from knowledge.models import Paragraph, Problem, Document, ProblemParagraphMapping, SourceType
from knowledge.serializers.common import ProblemParagraphObject, ProblemParagraphManage, \
get_embedding_model_id_by_knowledge_id, update_document_char_length
from knowledge.serializers.problem import ProblemInstanceSerializer, ProblemSerializer
from knowledge.task import embedding_by_paragraph, enable_embedding_by_paragraph, disable_embedding_by_paragraph, \
delete_embedding_by_paragraph
from knowledge.serializers.problem import ProblemInstanceSerializer, ProblemSerializer, ProblemSerializers
from knowledge.task.embedding import embedding_by_paragraph, enable_embedding_by_paragraph, \
disable_embedding_by_paragraph, \
delete_embedding_by_paragraph, embedding_by_problem as embedding_by_problem_task
class ParagraphSerializer(serializers.ModelSerializer):
@ -49,6 +50,70 @@ class ParagraphSerializers(serializers.Serializer):
allow_blank=True)
content = serializers.CharField(required=True, max_length=102400, label=_('section title'))
class Problem(serializers.Serializer):
knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))
document_id = serializers.UUIDField(required=True, label=_('document id'))
paragraph_id = serializers.UUIDField(required=True, label=_('paragraph id'))
def is_valid(self, *, raise_exception=False):
super().is_valid(raise_exception=True)
if not QuerySet(Paragraph).filter(id=self.data.get('paragraph_id')).exists():
raise AppApiException(500, _('Paragraph id does not exist'))
def list(self, with_valid=False):
"""
获取问题列表
:param with_valid: 是否校验
:return: 问题列表
"""
if with_valid:
self.is_valid(raise_exception=True)
problem_paragraph_mapping = QuerySet(ProblemParagraphMapping).filter(
knowledge_id=self.data.get("knowledge_id"),
paragraph_id=self.data.get(
'paragraph_id'))
return [ProblemSerializer(row).data for row in
QuerySet(Problem).filter(id__in=[row.problem_id for row in problem_paragraph_mapping])]
@transaction.atomic
def save(self, instance: Dict, with_valid=True, with_embedding=True, embedding_by_problem=None):
if with_valid:
self.is_valid()
ProblemInstanceSerializer(data=instance).is_valid(raise_exception=True)
problem = QuerySet(Problem).filter(knowledge_id=self.data.get('knowledge_id'),
content=instance.get('content')).first()
if problem is None:
problem = Problem(id=uuid.uuid7(), knowledge_id=self.data.get('knowledge_id'),
content=instance.get('content'))
problem.save()
if QuerySet(ProblemParagraphMapping).filter(knowledge_id=self.data.get('knowledge_id'),
problem_id=problem.id,
paragraph_id=self.data.get('paragraph_id')).exists():
raise AppApiException(500, _('Already associated, please do not associate again'))
problem_paragraph_mapping = ProblemParagraphMapping(
id=uuid.uuid7(),
problem_id=problem.id,
document_id=self.data.get('document_id'),
paragraph_id=self.data.get('paragraph_id'),
knowledge_id=self.data.get('knowledge_id')
)
problem_paragraph_mapping.save()
model_id = get_embedding_model_id_by_knowledge_id(self.data.get('knowledge_id'))
if with_embedding:
embedding_by_problem_task({
'text': problem.content,
'is_active': True,
'source_type': SourceType.PROBLEM,
'source_id': problem_paragraph_mapping.id,
'document_id': self.data.get('document_id'),
'paragraph_id': self.data.get('paragraph_id'),
'knowledge_id': self.data.get('knowledge_id'),
}, model_id)
return ProblemSerializers.Operate(
data={'knowledge_id': self.data.get('knowledge_id'), 'problem_id': problem.id}
).one(with_valid=True)
class Operate(serializers.Serializer):
# 段落id
paragraph_id = serializers.UUIDField(required=True, label=_('paragraph id'))

View File

@ -1,7 +1,17 @@
import os
from typing import Dict
from django.db import transaction
from django.db.models import QuerySet
from django.utils.translation import gettext_lazy as _
from rest_framework import serializers
from knowledge.models import Problem
from common.db.search import native_search
from common.utils.common import get_file_content
from knowledge.models import Problem, ProblemParagraphMapping, Paragraph, Knowledge
from knowledge.serializers.common import get_embedding_model_id_by_knowledge_id
from knowledge.task.embedding import delete_embedding_by_source_ids, update_problem_embedding
from maxkb.const import PROJECT_DIR
class ProblemSerializer(serializers.ModelSerializer):
@ -13,3 +23,55 @@ class ProblemSerializer(serializers.ModelSerializer):
class ProblemInstanceSerializer(serializers.Serializer):
id = serializers.CharField(required=False, label=_('problem id'))
content = serializers.CharField(required=True, max_length=256, label=_('content'))
class ProblemSerializers(serializers.Serializer):
class Operate(serializers.Serializer):
knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))
problem_id = serializers.UUIDField(required=True, label=_('problem id'))
def list_paragraph(self, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)
problem_paragraph_mapping = QuerySet(ProblemParagraphMapping).filter(
knowledge_id=self.data.get("knowledge_id"),
problem_id=self.data.get("problem_id")
)
if problem_paragraph_mapping is None or len(problem_paragraph_mapping) == 0:
return []
return native_search(
QuerySet(Paragraph).filter(id__in=[row.paragraph_id for row in problem_paragraph_mapping]),
select_string=get_file_content(
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_paragraph.sql')))
def one(self, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)
return ProblemInstanceSerializer(QuerySet(Problem).get(**{'id': self.data.get('problem_id')})).data
@transaction.atomic
def delete(self, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)
problem_paragraph_mapping_list = QuerySet(ProblemParagraphMapping).filter(
knowledge_id=self.data.get('knowledge_id'),
problem_id=self.data.get('problem_id'))
source_ids = [row.id for row in problem_paragraph_mapping_list]
problem_paragraph_mapping_list.delete()
QuerySet(Problem).filter(id=self.data.get('problem_id')).delete()
delete_embedding_by_source_ids(source_ids)
return True
@transaction.atomic
def edit(self, instance: Dict, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)
problem_id = self.data.get('problem_id')
knowledge_id = self.data.get('knowledge_id')
content = instance.get('content')
problem = QuerySet(Problem).filter(id=problem_id, knowledge_id=knowledge_id).first()
QuerySet(Knowledge).filter(id=knowledge_id)
problem.content = content
problem.save()
model_id = get_embedding_model_id_by_knowledge_id(knowledge_id)
update_problem_embedding(problem_id, content, model_id)

View File

@ -1,2 +0,0 @@
from .sync import *
from .embedding import *

View File

@ -11,24 +11,28 @@ from django.utils.translation import gettext_lazy as _
from common.utils.fork import ChildLink, Fork
from common.utils.split_model import get_split_model
from knowledge.models.knowledge import KnowledgeType, Document, Knowledge, Status
from knowledge.serializers.document import DocumentSerializers
from knowledge.serializers.paragraph import ParagraphSerializers
max_kb_error = logging.getLogger("max_kb_error")
max_kb = logging.getLogger("max_kb")
def get_save_handler(knowledge_id, selector):
from knowledge.serializers import DocumentSerializers
def handler(child_link: ChildLink, response: Fork.Response):
if response.status == 200:
try:
document_name = child_link.tag.text if child_link.tag is not None and len(
child_link.tag.text.strip()) > 0 else child_link.url
paragraphs = get_split_model('web.md').parse(response.content)
DocumentSerializers.Create(data={'knowledge_id': knowledge_id}).save(
{'name': document_name, 'paragraphs': paragraphs,
'meta': {'source_url': child_link.url, 'selector': selector},
'type': KnowledgeType.WEB}, with_valid=True)
DocumentSerializers.Create(
data={'knowledge_id': knowledge_id}
).save({
'name': document_name,
'paragraphs': paragraphs,
'meta': {'source_url': child_link.url, 'selector': selector},
'type': KnowledgeType.WEB
}, with_valid=True)
except Exception as e:
logging.getLogger("max_kb_error").error(f'{str(e)}:{traceback.format_exc()}')
@ -36,7 +40,6 @@ def get_save_handler(knowledge_id, selector):
def get_sync_handler(knowledge_id):
from knowledge.serializers import DocumentSerializers
knowledge = QuerySet(Knowledge).filter(id=knowledge_id).first()
def handler(child_link: ChildLink, response: Fork.Response):
@ -52,10 +55,14 @@ def get_sync_handler(knowledge_id):
DocumentSerializers.Sync(data={'document_id': first.id}).sync()
else:
# 插入
DocumentSerializers.Create(data={'knowledge_id': knowledge.id}).save(
{'name': document_name, 'paragraphs': paragraphs,
'meta': {'source_url': child_link.url.strip(), 'selector': knowledge.meta.get('selector')},
'type': KnowledgeType.WEB}, with_valid=True)
DocumentSerializers.Create(
data={'knowledge_id': knowledge.id}
).save({
'name': document_name,
'paragraphs': paragraphs,
'meta': {'source_url': child_link.url.strip(), 'selector': knowledge.meta.get('selector')},
'type': KnowledgeType.WEB
}, with_valid=True)
except Exception as e:
logging.getLogger("max_kb_error").error(f'{str(e)}:{traceback.format_exc()}')
@ -63,8 +70,6 @@ def get_sync_handler(knowledge_id):
def get_sync_web_document_handler(knowledge_id):
from knowledge.serializers import DocumentSerializers
def handler(source_url: str, selector, response: Fork.Response):
if response.status == 200:
try:
@ -88,7 +93,6 @@ def get_sync_web_document_handler(knowledge_id):
def save_problem(knowledge_id, document_id, paragraph_id, problem):
from knowledge.serializers import ParagraphSerializers
# print(f"knowledge_id: {knowledge_id}")
# print(f"document_id: {document_id}")
# print(f"paragraph_id: {paragraph_id}")
@ -101,7 +105,11 @@ def save_problem(knowledge_id, document_id, paragraph_id, problem):
return
try:
ParagraphSerializers.Problem(
data={"knowledge_id": knowledge_id, 'document_id': document_id,
'paragraph_id': paragraph_id}).save(instance={"content": problem}, with_valid=True)
data={
"knowledge_id": knowledge_id,
'document_id': document_id,
'paragraph_id': paragraph_id
}
).save(instance={"content": problem}, with_valid=True)
except Exception as e:
max_kb_error.error(_('Association problem failed {error}').format(error=str(e)))

View File

@ -8,7 +8,9 @@ urlpatterns = [
path('workspace/<str:workspace_id>/knowledge/base', views.KnowledgeBaseView.as_view()),
path('workspace/<str:workspace_id>/knowledge/web', views.KnowledgeWebView.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>', views.KnowledgeView.Operate.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document', views.DocumentView.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/split', views.DocumentView.Split.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/batch', views.DocumentView.Batch.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>', views.DocumentView.Operate.as_view()),
path('workspace/<str:workspace_id>/knowledge/<int:current_page>/<int:page_size>', views.KnowledgeView.Page.as_view()),
]

View File

@ -8,28 +8,45 @@ from common.auth import TokenAuth
from common.auth.authentication import has_permissions
from common.constants.permission_constants import PermissionConstants
from common.result import result
from knowledge.api.document import DocumentSplitAPI, DocumentBatchAPI, DocumentBatchCreateAPI
from knowledge.api.document import DocumentSplitAPI, DocumentBatchAPI, DocumentBatchCreateAPI, DocumentCreateAPI, \
DocumentReadAPI, DocumentEditAPI, DocumentDeleteAPI
from knowledge.api.knowledge import KnowledgeTreeReadAPI
from knowledge.serializers.document import DocumentSerializers
from knowledge.serializers.knowledge import KnowledgeSerializer
class DocumentView(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
methods=['POST'],
description=_('Create document'),
operation_id=_('Create document'),
request=DocumentCreateAPI.get_request(),
parameters=DocumentCreateAPI.get_parameters(),
responses=DocumentCreateAPI.get_response(),
tags=[_('Knowledge Base/Documentation')]
)
@has_permissions(PermissionConstants.DOCUMENT_CREATE.get_workspace_permission())
def post(self, request: Request, workspace_id: str, knowledge_id: str):
return result.success(
DocumentSerializers.Create(
data={'workspace_id': workspace_id, 'knowledge_id': knowledge_id},
).save(request.data))
@extend_schema(
methods=['GET'],
description=_('Get document'),
operation_id=_('Get document'),
parameters=KnowledgeTreeReadAPI.get_parameters(),
responses=KnowledgeTreeReadAPI.get_response(),
tags=[_('Knowledge Base')]
tags=[_('Knowledge Base/Documentation')]
)
@has_permissions(PermissionConstants.DOCUMENT_READ.get_workspace_permission())
def get(self, request: Request, workspace_id: str):
return result.success(KnowledgeSerializer.Query(
def get(self, request: Request, workspace_id: str, knowledge_id: str):
return result.success(DocumentSerializers.Query(
data={
'workspace_id': workspace_id,
'knowledge_id': knowledge_id,
'folder_id': request.query_params.get('folder_id'),
'name': request.query_params.get('name'),
'desc': request.query_params.get("desc"),
@ -37,6 +54,50 @@ class DocumentView(APIView):
}
).list())
class Operate(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
description=_('Get document details'),
operation_id=_('Get document details'),
parameters=DocumentReadAPI.get_parameters(),
responses=DocumentReadAPI.get_response(),
tags=[_('Knowledge Base/Documentation')]
)
@has_permissions(PermissionConstants.DOCUMENT_READ.get_workspace_permission())
def get(self, request: Request, knowledge_id: str, document_id: str):
operate = DocumentSerializers.Operate(data={'document_id': document_id, 'knowledge_id': knowledge_id})
operate.is_valid(raise_exception=True)
return result.success(operate.one())
@extend_schema(
description=_('Modify document'),
operation_id=_('Modify document'),
parameters=DocumentEditAPI.get_parameters(),
request=DocumentEditAPI.get_request(),
responses=DocumentEditAPI.get_response(),
tags=[_('Knowledge Base/Documentation')]
)
@has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission())
def put(self, request: Request, knowledge_id: str, document_id: str):
return result.success(
DocumentSerializers.Operate(data={'document_id': document_id, 'knowledge_id': knowledge_id}).edit(
request.data,
with_valid=True))
@extend_schema(
description=_('Delete document'),
operation_id=_('Delete document'),
parameters=DocumentDeleteAPI.get_parameters(),
responses=DocumentDeleteAPI.get_response(),
tags=[_('Knowledge Base/Documentation')]
)
@has_permissions(PermissionConstants.DOCUMENT_DELETE.get_workspace_permission())
def delete(self, request: Request, knowledge_id: str, document_id: str):
operate = DocumentSerializers.Operate(data={'document_id': document_id, 'knowledge_id': knowledge_id})
operate.is_valid(raise_exception=True)
return result.success(operate.delete())
class Split(APIView):
authentication_classes = [TokenAuth]
parser_classes = [MultiPartParser]