From 24e734fb369498d77710df086ac8fd0fd4a75527 Mon Sep 17 00:00:00 2001 From: CaptainB Date: Tue, 6 May 2025 11:05:34 +0800 Subject: [PATCH] feat: enhance Document API with create and query functionalities --- apps/knowledge/api/document.py | 95 ++++++++++--- apps/knowledge/serializers/document.py | 180 +++++++++++++++++++++--- apps/knowledge/serializers/knowledge.py | 3 +- apps/knowledge/serializers/paragraph.py | 73 +++++++++- apps/knowledge/serializers/problem.py | 64 ++++++++- apps/knowledge/task/__init__.py | 2 - apps/knowledge/task/handler.py | 40 +++--- apps/knowledge/urls.py | 2 + apps/knowledge/views/document.py | 71 +++++++++- 9 files changed, 455 insertions(+), 75 deletions(-) diff --git a/apps/knowledge/api/document.py b/apps/knowledge/api/document.py index 4ec48e10..737864d4 100644 --- a/apps/knowledge/api/document.py +++ b/apps/knowledge/api/document.py @@ -4,29 +4,7 @@ from drf_spectacular.utils import OpenApiParameter from common.mixins.api_mixin import APIMixin from common.result import DefaultResultSerializer from knowledge.serializers.common import BatchSerializer -from knowledge.serializers.document import DocumentCreateRequest, DocumentInstanceSerializer - - -class DocumentCreateAPI(APIMixin): - @staticmethod - def get_parameters(): - return [ - OpenApiParameter( - name="workspace_id", - description="工作空间id", - type=OpenApiTypes.STR, - location='path', - required=True, - ) - ] - - @staticmethod - def get_request(): - return DocumentCreateRequest - - @staticmethod - def get_response(): - return DefaultResultSerializer +from knowledge.serializers.document import DocumentInstanceSerializer class DocumentSplitAPI(APIMixin): @@ -127,3 +105,74 @@ class DocumentBatchCreateAPI(APIMixin): @staticmethod def get_response(): return DefaultResultSerializer + + +class DocumentCreateAPI(APIMixin): + @staticmethod + def get_parameters(): + return [ + OpenApiParameter( + name="workspace_id", + description="工作空间id", + type=OpenApiTypes.STR, + location='path', + required=True, + ), + OpenApiParameter( + name="knowledge_id", + description="知识库id", + type=OpenApiTypes.STR, + location='path', + required=True, + ), + ] + + @staticmethod + def get_request(): + return DocumentInstanceSerializer + + @staticmethod + def get_response(): + return DefaultResultSerializer + + +class DocumentReadAPI(APIMixin): + @staticmethod + def get_parameters(): + return [ + OpenApiParameter( + name="workspace_id", + description="工作空间id", + type=OpenApiTypes.STR, + location='path', + required=True, + ), + OpenApiParameter( + name="knowledge_id", + description="知识库id", + type=OpenApiTypes.STR, + location='path', + required=True, + ), + OpenApiParameter( + name="document_id", + description="文档id", + type=OpenApiTypes.STR, + location='path', + required=True, + ), + ] + + @staticmethod + def get_response(): + return DefaultResultSerializer + + +class DocumentEditAPI(DocumentReadAPI): + @staticmethod + def get_request(): + return DocumentInstanceSerializer + + +class DocumentDeleteAPI(DocumentReadAPI): + pass diff --git a/apps/knowledge/serializers/document.py b/apps/knowledge/serializers/document.py index 8c83fc9b..3d0d860a 100644 --- a/apps/knowledge/serializers/document.py +++ b/apps/knowledge/serializers/document.py @@ -1,16 +1,18 @@ +import logging import os +import traceback from functools import reduce from typing import Dict, List import uuid_utils.compat as uuid from celery_once import AlreadyQueued -from django.db import transaction +from django.db import transaction, models from django.db.models import QuerySet, Model from django.db.models.functions import Substr, Reverse from django.utils.translation import gettext_lazy as _ from rest_framework import serializers -from common.db.search import native_search +from common.db.search import native_search, get_dynamics_model, native_page_search from common.event import ListenerManagement from common.event.common import work_thread_pool from common.exception.app_exception import AppApiException @@ -23,12 +25,15 @@ from common.handle.impl.text.xls_split_handle import XlsSplitHandle from common.handle.impl.text.xlsx_split_handle import XlsxSplitHandle from common.handle.impl.text.zip_split_handle import ZipSplitHandle from common.utils.common import post, get_file_content, bulk_create_in_batches +from common.utils.fork import Fork +from common.utils.split_model import get_split_model from knowledge.models import Knowledge, Paragraph, Problem, Document, KnowledgeType, ProblemParagraphMapping, State, \ TaskType, File -from knowledge.serializers.common import ProblemParagraphManage, BatchSerializer +from knowledge.serializers.common import ProblemParagraphManage, BatchSerializer, get_embedding_model_id_by_knowledge_id from knowledge.serializers.paragraph import ParagraphSerializers, ParagraphInstanceSerializer, \ delete_problems_and_mappings -from knowledge.task import embedding_by_document, delete_embedding_by_document_list +from knowledge.task.embedding import embedding_by_document, delete_embedding_by_document_list, \ + delete_embedding_by_document from maxkb.const import PROJECT_DIR default_split_handle = TextSplitHandle() @@ -62,13 +67,6 @@ class DocumentInstanceSerializer(serializers.Serializer): paragraphs = ParagraphInstanceSerializer(required=False, many=True, allow_null=True) -class DocumentCreateRequest(serializers.Serializer): - name = serializers.CharField(required=True, label=_('knowledge name'), max_length=64, min_length=1) - desc = serializers.CharField(required=True, label=_('knowledge description'), max_length=256, min_length=1) - embedding_model_id = serializers.UUIDField(required=True, label=_('embedding model')) - documents = DocumentInstanceSerializer(required=False, many=True) - - class DocumentSplitRequest(serializers.Serializer): file = serializers.ListField(required=True, label=_('file list')) limit = serializers.IntegerField(required=False, label=_('limit')) @@ -80,18 +78,153 @@ class DocumentSplitRequest(serializers.Serializer): with_filter = serializers.BooleanField(required=False, label=_('Auto Clean')) -class DocumentBatchRequest(serializers.Serializer): - file = serializers.ListField(required=True, label=_('file list')) - limit = serializers.IntegerField(required=False, label=_('limit')) - patterns = serializers.ListField( - required=False, - child=serializers.CharField(required=True, label=_('patterns')), - label=_('patterns') - ) - with_filter = serializers.BooleanField(required=False, label=_('Auto Clean')) - - class DocumentSerializers(serializers.Serializer): + class Query(serializers.Serializer): + # 知识库id + workspace_id = serializers.CharField(required=True, label=_('workspace id')) + knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id')) + name = serializers.CharField(required=False, max_length=128, min_length=1, label=_('document name')) + hit_handling_method = serializers.CharField(required=False, label=_('hit handling method')) + is_active = serializers.BooleanField(required=False, label=_('document is active')) + task_type = serializers.IntegerField(required=False, label=_('task type')) + status = serializers.CharField(required=False, label=_('status')) + order_by = serializers.CharField(required=False, label=_('order by')) + + def get_query_set(self): + query_set = QuerySet(model=Document) + query_set = query_set.filter(**{'knowledge_id': self.data.get("knowledge_id")}) + if 'name' in self.data and self.data.get('name') is not None: + query_set = query_set.filter(**{'name__icontains': self.data.get('name')}) + if 'hit_handling_method' in self.data and self.data.get('hit_handling_method') is not None: + query_set = query_set.filter(**{'hit_handling_method': self.data.get('hit_handling_method')}) + if 'is_active' in self.data and self.data.get('is_active') is not None: + query_set = query_set.filter(**{'is_active': self.data.get('is_active')}) + if 'status' in self.data and self.data.get( + 'status') is not None: + task_type = self.data.get('task_type') + status = self.data.get( + 'status') + if task_type is not None: + query_set = query_set.annotate( + reversed_status=Reverse('status'), + task_type_status=Substr('reversed_status', TaskType(task_type).value, + 1), + ).filter(task_type_status=State(status).value).values('id') + else: + if status != State.SUCCESS.value: + query_set = query_set.filter(status__icontains=status) + else: + query_set = query_set.filter(status__iregex='^[2n]*$') + order_by = self.data.get('order_by', '') + order_by_query_set = QuerySet(model=get_dynamics_model( + {'char_length': models.CharField(), 'paragraph_count': models.IntegerField(), + "update_time": models.IntegerField(), 'create_time': models.DateTimeField()})) + if order_by: + order_by_query_set = order_by_query_set.order_by(order_by) + else: + order_by_query_set = order_by_query_set.order_by('-create_time', 'id') + return { + 'document_custom_sql': query_set, + 'order_by_query': order_by_query_set + } + + def list(self, with_valid=False): + if with_valid: + self.is_valid(raise_exception=True) + query_set = self.get_query_set() + return native_search(query_set, select_string=get_file_content( + os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_document.sql'))) + + def page(self, current_page, page_size): + query_set = self.get_query_set() + return native_page_search(current_page, page_size, query_set, select_string=get_file_content( + os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_document.sql'))) + + class Sync(serializers.Serializer): + document_id = serializers.UUIDField(required=True, label=_('document id')) + + def is_valid(self, *, raise_exception=False): + super().is_valid(raise_exception=True) + document_id = self.data.get('document_id') + first = QuerySet(Document).filter(id=document_id).first() + if first is None: + raise AppApiException(500, _('document id not exist')) + if first.type != KnowledgeType.WEB: + raise AppApiException(500, _('Synchronization is only supported for web site types')) + + def sync(self, with_valid=True, with_embedding=True): + if with_valid: + self.is_valid(raise_exception=True) + document_id = self.data.get('document_id') + document = QuerySet(Document).filter(id=document_id).first() + state = State.SUCCESS + if document.type != KnowledgeType.WEB: + return True + try: + ListenerManagement.update_status(QuerySet(Document).filter(id=document_id), + TaskType.SYNC, + State.PENDING) + ListenerManagement.get_aggregation_document_status(document_id)() + source_url = document.meta.get('source_url') + selector_list = document.meta.get('selector').split( + " ") if 'selector' in document.meta and document.meta.get('selector') is not None else [] + result = Fork(source_url, selector_list).fork() + if result.status == 200: + # 删除段落 + QuerySet(model=Paragraph).filter(document_id=document_id).delete() + # 删除问题 + QuerySet(model=ProblemParagraphMapping).filter(document_id=document_id).delete() + delete_problems_and_mappings([document_id]) + # 删除向量库 + delete_embedding_by_document(document_id) + paragraphs = get_split_model('web.md').parse(result.content) + char_length = reduce(lambda x, y: x + y, + [len(p.get('content')) for p in paragraphs], + 0) + QuerySet(Document).filter(id=document_id).update(char_length=char_length) + document_paragraph_model = DocumentSerializers.Create.get_paragraph_model(document, paragraphs) + + paragraph_model_list = document_paragraph_model.get('paragraph_model_list') + problem_paragraph_object_list = document_paragraph_model.get('problem_paragraph_object_list') + problem_model_list, problem_paragraph_mapping_list = ProblemParagraphManage( + problem_paragraph_object_list, document.knowledge_id).to_problem_model_list() + # 批量插入段落 + QuerySet(Paragraph).bulk_create(paragraph_model_list) if len(paragraph_model_list) > 0 else None + # 批量插入问题 + QuerySet(Problem).bulk_create(problem_model_list) if len(problem_model_list) > 0 else None + # 插入关联问题 + QuerySet(ProblemParagraphMapping).bulk_create(problem_paragraph_mapping_list) if len( + problem_paragraph_mapping_list) > 0 else None + # 向量化 + if with_embedding: + embedding_model_id = get_embedding_model_id_by_knowledge_id(document.knowledge_id) + ListenerManagement.update_status(QuerySet(Document).filter(id=document_id), + TaskType.EMBEDDING, + State.PENDING) + ListenerManagement.update_status(QuerySet(Paragraph).filter(document_id=document_id), + TaskType.EMBEDDING, + State.PENDING) + ListenerManagement.get_aggregation_document_status(document_id)() + embedding_by_document.delay(document_id, embedding_model_id) + + else: + state = State.FAILURE + except Exception as e: + logging.getLogger("max_kb_error").error(f'{str(e)}:{traceback.format_exc()}') + state = State.FAILURE + ListenerManagement.update_status( + QuerySet(Document).filter(id=document_id), + TaskType.SYNC, + state + ) + ListenerManagement.update_status( + QuerySet(Paragraph).filter(document_id=document_id), + TaskType.SYNC, + state + ) + ListenerManagement.get_aggregation_document_status(document_id)() + return True + class Operate(serializers.Serializer): document_id = serializers.UUIDField(required=True, label=_('document id')) knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id')) @@ -148,6 +281,7 @@ class DocumentSerializers(serializers.Serializer): raise AppApiException(500, _('The task is being executed, please do not send it repeatedly.')) class Create(serializers.Serializer): + workspace_id = serializers.UUIDField(required=True, label=_('workspace id')) knowledge_id = serializers.UUIDField(required=True, label=_('document id')) def is_valid(self, *, raise_exception=False): @@ -166,7 +300,7 @@ class DocumentSerializers(serializers.Serializer): @transaction.atomic def save(self, instance: Dict, with_valid=False, **kwargs): if with_valid: - DocumentCreateRequest(data=instance).is_valid(raise_exception=True) + DocumentInstanceSerializer(data=instance).is_valid(raise_exception=True) self.is_valid(raise_exception=True) knowledge_id = self.data.get('knowledge_id') document_paragraph_model = self.get_document_paragraph_model(knowledge_id, instance) diff --git a/apps/knowledge/serializers/knowledge.py b/apps/knowledge/serializers/knowledge.py index 0e22a74a..a0c659ea 100644 --- a/apps/knowledge/serializers/knowledge.py +++ b/apps/knowledge/serializers/knowledge.py @@ -16,7 +16,8 @@ from knowledge.models import Knowledge, KnowledgeScope, KnowledgeType, Document, ProblemParagraphMapping, ApplicationKnowledgeMapping from knowledge.serializers.common import ProblemParagraphManage, get_embedding_model_id_by_knowledge_id, MetaSerializer from knowledge.serializers.document import DocumentSerializers -from knowledge.task import sync_web_knowledge, embedding_by_knowledge, delete_embedding_by_knowledge +from knowledge.task.embedding import embedding_by_knowledge, delete_embedding_by_knowledge +from knowledge.task.sync import sync_web_knowledge from maxkb.conf import PROJECT_DIR diff --git a/apps/knowledge/serializers/paragraph.py b/apps/knowledge/serializers/paragraph.py index 80291d45..9f512bf3 100644 --- a/apps/knowledge/serializers/paragraph.py +++ b/apps/knowledge/serializers/paragraph.py @@ -10,12 +10,13 @@ from rest_framework import serializers from common.exception.app_exception import AppApiException from common.utils.common import post -from knowledge.models import Paragraph, Problem, Document, ProblemParagraphMapping +from knowledge.models import Paragraph, Problem, Document, ProblemParagraphMapping, SourceType from knowledge.serializers.common import ProblemParagraphObject, ProblemParagraphManage, \ get_embedding_model_id_by_knowledge_id, update_document_char_length -from knowledge.serializers.problem import ProblemInstanceSerializer, ProblemSerializer -from knowledge.task import embedding_by_paragraph, enable_embedding_by_paragraph, disable_embedding_by_paragraph, \ - delete_embedding_by_paragraph +from knowledge.serializers.problem import ProblemInstanceSerializer, ProblemSerializer, ProblemSerializers +from knowledge.task.embedding import embedding_by_paragraph, enable_embedding_by_paragraph, \ + disable_embedding_by_paragraph, \ + delete_embedding_by_paragraph, embedding_by_problem as embedding_by_problem_task class ParagraphSerializer(serializers.ModelSerializer): @@ -49,6 +50,70 @@ class ParagraphSerializers(serializers.Serializer): allow_blank=True) content = serializers.CharField(required=True, max_length=102400, label=_('section title')) + class Problem(serializers.Serializer): + knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id')) + document_id = serializers.UUIDField(required=True, label=_('document id')) + paragraph_id = serializers.UUIDField(required=True, label=_('paragraph id')) + + def is_valid(self, *, raise_exception=False): + super().is_valid(raise_exception=True) + if not QuerySet(Paragraph).filter(id=self.data.get('paragraph_id')).exists(): + raise AppApiException(500, _('Paragraph id does not exist')) + + def list(self, with_valid=False): + """ + 获取问题列表 + :param with_valid: 是否校验 + :return: 问题列表 + """ + if with_valid: + self.is_valid(raise_exception=True) + problem_paragraph_mapping = QuerySet(ProblemParagraphMapping).filter( + knowledge_id=self.data.get("knowledge_id"), + paragraph_id=self.data.get( + 'paragraph_id')) + return [ProblemSerializer(row).data for row in + QuerySet(Problem).filter(id__in=[row.problem_id for row in problem_paragraph_mapping])] + + @transaction.atomic + def save(self, instance: Dict, with_valid=True, with_embedding=True, embedding_by_problem=None): + if with_valid: + self.is_valid() + ProblemInstanceSerializer(data=instance).is_valid(raise_exception=True) + problem = QuerySet(Problem).filter(knowledge_id=self.data.get('knowledge_id'), + content=instance.get('content')).first() + if problem is None: + problem = Problem(id=uuid.uuid7(), knowledge_id=self.data.get('knowledge_id'), + content=instance.get('content')) + problem.save() + if QuerySet(ProblemParagraphMapping).filter(knowledge_id=self.data.get('knowledge_id'), + problem_id=problem.id, + paragraph_id=self.data.get('paragraph_id')).exists(): + raise AppApiException(500, _('Already associated, please do not associate again')) + problem_paragraph_mapping = ProblemParagraphMapping( + id=uuid.uuid7(), + problem_id=problem.id, + document_id=self.data.get('document_id'), + paragraph_id=self.data.get('paragraph_id'), + knowledge_id=self.data.get('knowledge_id') + ) + problem_paragraph_mapping.save() + model_id = get_embedding_model_id_by_knowledge_id(self.data.get('knowledge_id')) + if with_embedding: + embedding_by_problem_task({ + 'text': problem.content, + 'is_active': True, + 'source_type': SourceType.PROBLEM, + 'source_id': problem_paragraph_mapping.id, + 'document_id': self.data.get('document_id'), + 'paragraph_id': self.data.get('paragraph_id'), + 'knowledge_id': self.data.get('knowledge_id'), + }, model_id) + + return ProblemSerializers.Operate( + data={'knowledge_id': self.data.get('knowledge_id'), 'problem_id': problem.id} + ).one(with_valid=True) + class Operate(serializers.Serializer): # 段落id paragraph_id = serializers.UUIDField(required=True, label=_('paragraph id')) diff --git a/apps/knowledge/serializers/problem.py b/apps/knowledge/serializers/problem.py index 4b98e848..91855d4d 100644 --- a/apps/knowledge/serializers/problem.py +++ b/apps/knowledge/serializers/problem.py @@ -1,7 +1,17 @@ +import os +from typing import Dict + +from django.db import transaction +from django.db.models import QuerySet from django.utils.translation import gettext_lazy as _ from rest_framework import serializers -from knowledge.models import Problem +from common.db.search import native_search +from common.utils.common import get_file_content +from knowledge.models import Problem, ProblemParagraphMapping, Paragraph, Knowledge +from knowledge.serializers.common import get_embedding_model_id_by_knowledge_id +from knowledge.task.embedding import delete_embedding_by_source_ids, update_problem_embedding +from maxkb.const import PROJECT_DIR class ProblemSerializer(serializers.ModelSerializer): @@ -13,3 +23,55 @@ class ProblemSerializer(serializers.ModelSerializer): class ProblemInstanceSerializer(serializers.Serializer): id = serializers.CharField(required=False, label=_('problem id')) content = serializers.CharField(required=True, max_length=256, label=_('content')) + + +class ProblemSerializers(serializers.Serializer): + class Operate(serializers.Serializer): + knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id')) + problem_id = serializers.UUIDField(required=True, label=_('problem id')) + + def list_paragraph(self, with_valid=True): + if with_valid: + self.is_valid(raise_exception=True) + problem_paragraph_mapping = QuerySet(ProblemParagraphMapping).filter( + knowledge_id=self.data.get("knowledge_id"), + problem_id=self.data.get("problem_id") + ) + if problem_paragraph_mapping is None or len(problem_paragraph_mapping) == 0: + return [] + return native_search( + QuerySet(Paragraph).filter(id__in=[row.paragraph_id for row in problem_paragraph_mapping]), + select_string=get_file_content( + os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_paragraph.sql'))) + + def one(self, with_valid=True): + if with_valid: + self.is_valid(raise_exception=True) + return ProblemInstanceSerializer(QuerySet(Problem).get(**{'id': self.data.get('problem_id')})).data + + @transaction.atomic + def delete(self, with_valid=True): + if with_valid: + self.is_valid(raise_exception=True) + problem_paragraph_mapping_list = QuerySet(ProblemParagraphMapping).filter( + knowledge_id=self.data.get('knowledge_id'), + problem_id=self.data.get('problem_id')) + source_ids = [row.id for row in problem_paragraph_mapping_list] + problem_paragraph_mapping_list.delete() + QuerySet(Problem).filter(id=self.data.get('problem_id')).delete() + delete_embedding_by_source_ids(source_ids) + return True + + @transaction.atomic + def edit(self, instance: Dict, with_valid=True): + if with_valid: + self.is_valid(raise_exception=True) + problem_id = self.data.get('problem_id') + knowledge_id = self.data.get('knowledge_id') + content = instance.get('content') + problem = QuerySet(Problem).filter(id=problem_id, knowledge_id=knowledge_id).first() + QuerySet(Knowledge).filter(id=knowledge_id) + problem.content = content + problem.save() + model_id = get_embedding_model_id_by_knowledge_id(knowledge_id) + update_problem_embedding(problem_id, content, model_id) diff --git a/apps/knowledge/task/__init__.py b/apps/knowledge/task/__init__.py index 5fe428b0..e69de29b 100644 --- a/apps/knowledge/task/__init__.py +++ b/apps/knowledge/task/__init__.py @@ -1,2 +0,0 @@ -from .sync import * -from .embedding import * diff --git a/apps/knowledge/task/handler.py b/apps/knowledge/task/handler.py index 9e1d77ea..3d8eae73 100644 --- a/apps/knowledge/task/handler.py +++ b/apps/knowledge/task/handler.py @@ -11,24 +11,28 @@ from django.utils.translation import gettext_lazy as _ from common.utils.fork import ChildLink, Fork from common.utils.split_model import get_split_model from knowledge.models.knowledge import KnowledgeType, Document, Knowledge, Status +from knowledge.serializers.document import DocumentSerializers +from knowledge.serializers.paragraph import ParagraphSerializers max_kb_error = logging.getLogger("max_kb_error") max_kb = logging.getLogger("max_kb") def get_save_handler(knowledge_id, selector): - from knowledge.serializers import DocumentSerializers - def handler(child_link: ChildLink, response: Fork.Response): if response.status == 200: try: document_name = child_link.tag.text if child_link.tag is not None and len( child_link.tag.text.strip()) > 0 else child_link.url paragraphs = get_split_model('web.md').parse(response.content) - DocumentSerializers.Create(data={'knowledge_id': knowledge_id}).save( - {'name': document_name, 'paragraphs': paragraphs, - 'meta': {'source_url': child_link.url, 'selector': selector}, - 'type': KnowledgeType.WEB}, with_valid=True) + DocumentSerializers.Create( + data={'knowledge_id': knowledge_id} + ).save({ + 'name': document_name, + 'paragraphs': paragraphs, + 'meta': {'source_url': child_link.url, 'selector': selector}, + 'type': KnowledgeType.WEB + }, with_valid=True) except Exception as e: logging.getLogger("max_kb_error").error(f'{str(e)}:{traceback.format_exc()}') @@ -36,7 +40,6 @@ def get_save_handler(knowledge_id, selector): def get_sync_handler(knowledge_id): - from knowledge.serializers import DocumentSerializers knowledge = QuerySet(Knowledge).filter(id=knowledge_id).first() def handler(child_link: ChildLink, response: Fork.Response): @@ -52,10 +55,14 @@ def get_sync_handler(knowledge_id): DocumentSerializers.Sync(data={'document_id': first.id}).sync() else: # 插入 - DocumentSerializers.Create(data={'knowledge_id': knowledge.id}).save( - {'name': document_name, 'paragraphs': paragraphs, - 'meta': {'source_url': child_link.url.strip(), 'selector': knowledge.meta.get('selector')}, - 'type': KnowledgeType.WEB}, with_valid=True) + DocumentSerializers.Create( + data={'knowledge_id': knowledge.id} + ).save({ + 'name': document_name, + 'paragraphs': paragraphs, + 'meta': {'source_url': child_link.url.strip(), 'selector': knowledge.meta.get('selector')}, + 'type': KnowledgeType.WEB + }, with_valid=True) except Exception as e: logging.getLogger("max_kb_error").error(f'{str(e)}:{traceback.format_exc()}') @@ -63,8 +70,6 @@ def get_sync_handler(knowledge_id): def get_sync_web_document_handler(knowledge_id): - from knowledge.serializers import DocumentSerializers - def handler(source_url: str, selector, response: Fork.Response): if response.status == 200: try: @@ -88,7 +93,6 @@ def get_sync_web_document_handler(knowledge_id): def save_problem(knowledge_id, document_id, paragraph_id, problem): - from knowledge.serializers import ParagraphSerializers # print(f"knowledge_id: {knowledge_id}") # print(f"document_id: {document_id}") # print(f"paragraph_id: {paragraph_id}") @@ -101,7 +105,11 @@ def save_problem(knowledge_id, document_id, paragraph_id, problem): return try: ParagraphSerializers.Problem( - data={"knowledge_id": knowledge_id, 'document_id': document_id, - 'paragraph_id': paragraph_id}).save(instance={"content": problem}, with_valid=True) + data={ + "knowledge_id": knowledge_id, + 'document_id': document_id, + 'paragraph_id': paragraph_id + } + ).save(instance={"content": problem}, with_valid=True) except Exception as e: max_kb_error.error(_('Association problem failed {error}').format(error=str(e))) diff --git a/apps/knowledge/urls.py b/apps/knowledge/urls.py index 5caa357d..277c0aa0 100644 --- a/apps/knowledge/urls.py +++ b/apps/knowledge/urls.py @@ -8,7 +8,9 @@ urlpatterns = [ path('workspace//knowledge/base', views.KnowledgeBaseView.as_view()), path('workspace//knowledge/web', views.KnowledgeWebView.as_view()), path('workspace//knowledge/', views.KnowledgeView.Operate.as_view()), + path('workspace//knowledge//document', views.DocumentView.as_view()), path('workspace//knowledge//document/split', views.DocumentView.Split.as_view()), path('workspace//knowledge//document/batch', views.DocumentView.Batch.as_view()), + path('workspace//knowledge//document/', views.DocumentView.Operate.as_view()), path('workspace//knowledge//', views.KnowledgeView.Page.as_view()), ] diff --git a/apps/knowledge/views/document.py b/apps/knowledge/views/document.py index 8b0b785d..926d1c1e 100644 --- a/apps/knowledge/views/document.py +++ b/apps/knowledge/views/document.py @@ -8,28 +8,45 @@ from common.auth import TokenAuth from common.auth.authentication import has_permissions from common.constants.permission_constants import PermissionConstants from common.result import result -from knowledge.api.document import DocumentSplitAPI, DocumentBatchAPI, DocumentBatchCreateAPI +from knowledge.api.document import DocumentSplitAPI, DocumentBatchAPI, DocumentBatchCreateAPI, DocumentCreateAPI, \ + DocumentReadAPI, DocumentEditAPI, DocumentDeleteAPI from knowledge.api.knowledge import KnowledgeTreeReadAPI from knowledge.serializers.document import DocumentSerializers -from knowledge.serializers.knowledge import KnowledgeSerializer class DocumentView(APIView): authentication_classes = [TokenAuth] + @extend_schema( + methods=['POST'], + description=_('Create document'), + operation_id=_('Create document'), + request=DocumentCreateAPI.get_request(), + parameters=DocumentCreateAPI.get_parameters(), + responses=DocumentCreateAPI.get_response(), + tags=[_('Knowledge Base/Documentation')] + ) + @has_permissions(PermissionConstants.DOCUMENT_CREATE.get_workspace_permission()) + def post(self, request: Request, workspace_id: str, knowledge_id: str): + return result.success( + DocumentSerializers.Create( + data={'workspace_id': workspace_id, 'knowledge_id': knowledge_id}, + ).save(request.data)) + @extend_schema( methods=['GET'], description=_('Get document'), operation_id=_('Get document'), parameters=KnowledgeTreeReadAPI.get_parameters(), responses=KnowledgeTreeReadAPI.get_response(), - tags=[_('Knowledge Base')] + tags=[_('Knowledge Base/Documentation')] ) @has_permissions(PermissionConstants.DOCUMENT_READ.get_workspace_permission()) - def get(self, request: Request, workspace_id: str): - return result.success(KnowledgeSerializer.Query( + def get(self, request: Request, workspace_id: str, knowledge_id: str): + return result.success(DocumentSerializers.Query( data={ 'workspace_id': workspace_id, + 'knowledge_id': knowledge_id, 'folder_id': request.query_params.get('folder_id'), 'name': request.query_params.get('name'), 'desc': request.query_params.get("desc"), @@ -37,6 +54,50 @@ class DocumentView(APIView): } ).list()) + class Operate(APIView): + authentication_classes = [TokenAuth] + + @extend_schema( + description=_('Get document details'), + operation_id=_('Get document details'), + parameters=DocumentReadAPI.get_parameters(), + responses=DocumentReadAPI.get_response(), + tags=[_('Knowledge Base/Documentation')] + ) + @has_permissions(PermissionConstants.DOCUMENT_READ.get_workspace_permission()) + def get(self, request: Request, knowledge_id: str, document_id: str): + operate = DocumentSerializers.Operate(data={'document_id': document_id, 'knowledge_id': knowledge_id}) + operate.is_valid(raise_exception=True) + return result.success(operate.one()) + + @extend_schema( + description=_('Modify document'), + operation_id=_('Modify document'), + parameters=DocumentEditAPI.get_parameters(), + request=DocumentEditAPI.get_request(), + responses=DocumentEditAPI.get_response(), + tags=[_('Knowledge Base/Documentation')] + ) + @has_permissions(PermissionConstants.DOCUMENT_EDIT.get_workspace_permission()) + def put(self, request: Request, knowledge_id: str, document_id: str): + return result.success( + DocumentSerializers.Operate(data={'document_id': document_id, 'knowledge_id': knowledge_id}).edit( + request.data, + with_valid=True)) + + @extend_schema( + description=_('Delete document'), + operation_id=_('Delete document'), + parameters=DocumentDeleteAPI.get_parameters(), + responses=DocumentDeleteAPI.get_response(), + tags=[_('Knowledge Base/Documentation')] + ) + @has_permissions(PermissionConstants.DOCUMENT_DELETE.get_workspace_permission()) + def delete(self, request: Request, knowledge_id: str, document_id: str): + operate = DocumentSerializers.Operate(data={'document_id': document_id, 'knowledge_id': knowledge_id}) + operate.is_valid(raise_exception=True) + return result.success(operate.delete()) + class Split(APIView): authentication_classes = [TokenAuth] parser_classes = [MultiPartParser]