From 8a183dd5cfe9490015c4adc3360b06661a02510b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E6=BD=AE?= Date: Thu, 18 Dec 2025 23:34:28 +0800 Subject: [PATCH] =?UTF-8?q?=E9=9F=B3=E8=A7=86=E9=A2=91=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E5=88=86=E6=AE=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/knowledge/serializers/document.py | 33 ++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/apps/knowledge/serializers/document.py b/apps/knowledge/serializers/document.py index d289f1ec..1e9eeb05 100644 --- a/apps/knowledge/serializers/document.py +++ b/apps/knowledge/serializers/document.py @@ -880,6 +880,21 @@ class DocumentSerializers(serializers.Serializer): @post(post_function=post_embedding) @transaction.atomic def save(self, instance: Dict, with_valid=False, **kwargs): + # 添加调试日志 + from common.utils.logger import maxkb_logger + maxkb_logger.info(f"Save called with instance keys: {list(instance.keys())}") + maxkb_logger.info(f"save method - limit: {instance.get('limit')}, split_patterns: {instance.get('patterns')}, with_filter: {instance.get('with_filter')}") + + # 确保分块参数使用正确的键名 + # 前端传递的是 limit, patterns, with_filter + # 我们需要将其转换为 split_limit, split_patterns, split_with_filter + if instance.get('limit') is not None: + instance['split_limit'] = instance.get('limit') + if instance.get('patterns') is not None: + instance['split_patterns'] = instance.get('patterns') + if instance.get('with_filter') is not None: + instance['split_with_filter'] = instance.get('with_filter') + if with_valid: DocumentInstanceSerializer(data=instance).is_valid(raise_exception=True) self.is_valid(raise_exception=True) @@ -947,6 +962,13 @@ class DocumentSerializers(serializers.Serializer): # 添加音视频STT模型参数到meta if instance.get('stt_model_id'): source_meta['stt_model_id'] = instance.get('stt_model_id') + # 添加分块参数到meta + if instance.get('split_limit') is not None: + source_meta['split_limit'] = instance.get('split_limit') + if instance.get('split_patterns') is not None: + source_meta['split_patterns'] = instance.get('split_patterns') + if instance.get('with_filter') is not None: + source_meta['split_with_filter'] = instance.get('with_filter') meta = {**instance.get('meta'), **source_meta} if instance.get('meta') is not None else source_meta meta = convert_uuid_to_str(meta) @@ -1274,7 +1296,18 @@ class DocumentSerializers(serializers.Serializer): # 插入文档 for document in instance_list: + # 确保分块参数使用正确的键名 + # 前端传递的是 limit, patterns, with_filter + # 我们需要将其转换为 split_limit, split_patterns, split_with_filter + if document.get('limit') is not None: + document['split_limit'] = document.get('limit') + if document.get('patterns') is not None: + document['split_patterns'] = document.get('patterns') + if document.get('with_filter') is not None: + document['split_with_filter'] = document.get('with_filter') + maxkb_logger.info(f"Processing document: {document.get('name')}, paragraphs count: {len(document.get('paragraphs', []))}") + maxkb_logger.info(f"Document split params - split_limit: {document.get('split_limit')}, split_patterns: {document.get('split_patterns')}, split_with_filter: {document.get('split_with_filter')}") document_paragraph_dict_model = DocumentSerializers.Create.get_document_paragraph_model( knowledge_id, document