From 77de4f63154eee659afb970844db232c4ef3b7e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E6=BD=AE?= Date: Thu, 18 Dec 2025 23:49:25 +0800 Subject: [PATCH] =?UTF-8?q?=E9=9F=B3=E8=A7=86=E9=A2=91=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E5=88=86=E6=AE=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/knowledge/serializers/document.py | 40 ++++++++++++-------------- apps/knowledge/views/document.py | 18 ++++++++++-- 2 files changed, 34 insertions(+), 24 deletions(-) diff --git a/apps/knowledge/serializers/document.py b/apps/knowledge/serializers/document.py index 5728b711..fc7ae6f8 100644 --- a/apps/knowledge/serializers/document.py +++ b/apps/knowledge/serializers/document.py @@ -880,9 +880,15 @@ class DocumentSerializers(serializers.Serializer): @post(post_function=post_embedding) @transaction.atomic def save(self, instance: Dict, with_valid=False, **kwargs): - # 确保分块参数使用正确的键名 - # 前端传递的是 limit, patterns, with_filter - # 我们需要将其转换为 split_limit, split_patterns, split_with_filter + # 从 self.data 中获取分块参数 + if self.data.get('limit') is not None: + instance['split_limit'] = self.data.get('limit') + if self.data.get('patterns') is not None: + instance['split_patterns'] = self.data.get('patterns') + if self.data.get('with_filter') is not None: + instance['split_with_filter'] = self.data.get('with_filter') + + # 同时也支持从 instance 中获取分块参数(向后兼容) if instance.get('limit') is not None: instance['split_limit'] = instance.get('limit') if instance.get('patterns') is not None: @@ -1256,30 +1262,22 @@ class DocumentSerializers(serializers.Serializer): from common.utils.logger import maxkb_logger import os - # 添加详细日志 - maxkb_logger.info(f"batch_save called with workspace_id: {workspace_id}, knowledge_id: {knowledge_id}") - maxkb_logger.info(f"instance_list contains {len(instance_list)} documents") + # 获取分块参数 + split_limit = self.data.get('limit') + split_patterns = self.data.get('patterns') + split_with_filter = self.data.get('with_filter') # 在处理文档之前,先确保所有分块参数都被正确转换 for idx, doc in enumerate(instance_list): # 确保分块参数使用正确的键名 # 前端传递的是 limit, patterns, with_filter # 我们需要将其转换为 split_limit, split_patterns, split_with_filter - if doc.get('limit') is not None: - doc['split_limit'] = doc.get('limit') - maxkb_logger.info(f"Document {idx}: Converting limit={doc.get('limit')} to split_limit") - if doc.get('patterns') is not None: - doc['split_patterns'] = doc.get('patterns') - maxkb_logger.info(f"Document {idx}: Converting patterns={doc.get('patterns')} to split_patterns") - if doc.get('with_filter') is not None: - doc['split_with_filter'] = doc.get('with_filter') - maxkb_logger.info(f"Document {idx}: Converting with_filter={doc.get('with_filter')} to split_with_filter") - - maxkb_logger.info(f"Document {idx}: {doc.keys()}") - if 'stt_model_id' in doc: - maxkb_logger.info(f" - stt_model_id present: {doc['stt_model_id']}") - if 'llm_model_id' in doc: - maxkb_logger.info(f" - llm_model_id present: {doc['llm_model_id']}") + if split_limit is not None: + doc['split_limit'] = split_limit + if split_patterns is not None: + doc['split_patterns'] = split_patterns + if split_with_filter is not None: + doc['split_with_filter'] = split_with_filter for document in instance_list: # 检查是否是MinerU类型的文档(需要同时有llm_model_id和vision_model_id) diff --git a/apps/knowledge/views/document.py b/apps/knowledge/views/document.py index bc5707e6..f89b60e9 100644 --- a/apps/knowledge/views/document.py +++ b/apps/knowledge/views/document.py @@ -45,10 +45,22 @@ class DocumentView(APIView): get_knowledge_operation_object(keywords.get('knowledge_id')), {'name': r.data.get('name')}), ) def post(self, request: Request, workspace_id: str, knowledge_id: str): + # 准备分块参数 + serializer_data = { + 'workspace_id': workspace_id, + 'knowledge_id': knowledge_id + } + + # 添加分块参数到 serializer_data + if 'limit' in request.data: + serializer_data['limit'] = request.data.get('limit') + if 'patterns' in request.data: + serializer_data['patterns'] = request.data.get('patterns') + if 'with_filter' in request.data: + serializer_data['with_filter'] = request.data.get('with_filter') + return result.success( - DocumentSerializers.Create( - data={'workspace_id': workspace_id, 'knowledge_id': knowledge_id}, - ).save(request.data)) + DocumentSerializers.Create(data=serializer_data).save(request.data)) @extend_schema( methods=['GET'],