From e509a078a4adf29717413fcc953238b47d8cf85e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E6=BD=AE?= Date: Thu, 18 Dec 2025 23:57:04 +0800 Subject: [PATCH] =?UTF-8?q?=E9=9F=B3=E8=A7=86=E9=A2=91=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E5=88=86=E6=AE=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/knowledge/serializers/document.py | 28 ++++++++++++++++++++++++++ apps/knowledge/views/document.py | 18 +++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/apps/knowledge/serializers/document.py b/apps/knowledge/serializers/document.py index fc7ae6f8..43e387c0 100644 --- a/apps/knowledge/serializers/document.py +++ b/apps/knowledge/serializers/document.py @@ -880,21 +880,37 @@ class DocumentSerializers(serializers.Serializer): @post(post_function=post_embedding) @transaction.atomic def save(self, instance: Dict, with_valid=False, **kwargs): + # 添加详细日志 + from common.utils.logger import maxkb_logger + maxkb_logger.info(f"=== Save Method ===") + maxkb_logger.info(f"self.data: {self.data}") + maxkb_logger.info(f"instance keys: {list(instance.keys())}") + maxkb_logger.info(f"stt_model_id in instance: {instance.get('stt_model_id')}") + maxkb_logger.info(f"llm_model_id in instance: {instance.get('llm_model_id')}") + # 从 self.data 中获取分块参数 if self.data.get('limit') is not None: instance['split_limit'] = self.data.get('limit') + maxkb_logger.info(f"Set split_limit from self.data: {self.data.get('limit')}") if self.data.get('patterns') is not None: instance['split_patterns'] = self.data.get('patterns') + maxkb_logger.info(f"Set split_patterns from self.data: {self.data.get('patterns')}") if self.data.get('with_filter') is not None: instance['split_with_filter'] = self.data.get('with_filter') + maxkb_logger.info(f"Set split_with_filter from self.data: {self.data.get('with_filter')}") # 同时也支持从 instance 中获取分块参数(向后兼容) if instance.get('limit') is not None: instance['split_limit'] = instance.get('limit') + maxkb_logger.info(f"Set split_limit from instance: {instance.get('limit')}") if instance.get('patterns') is not None: instance['split_patterns'] = instance.get('patterns') + maxkb_logger.info(f"Set split_patterns from instance: {instance.get('patterns')}") if instance.get('with_filter') is not None: instance['split_with_filter'] = instance.get('with_filter') + maxkb_logger.info(f"Set split_with_filter from instance: {instance.get('with_filter')}") + + maxkb_logger.info(f"Final instance split params - split_limit: {instance.get('split_limit')}, split_patterns: {instance.get('split_patterns')}, split_with_filter: {instance.get('split_with_filter')}") if with_valid: DocumentInstanceSerializer(data=instance).is_valid(raise_exception=True) @@ -954,6 +970,12 @@ class DocumentSerializers(serializers.Serializer): @staticmethod def get_document_paragraph_model(knowledge_id, instance: Dict): + from common.utils.logger import maxkb_logger + maxkb_logger.info(f"=== get_document_paragraph_model ===") + maxkb_logger.info(f"instance split_limit: {instance.get('split_limit')}") + maxkb_logger.info(f"instance split_patterns: {instance.get('split_patterns')}") + maxkb_logger.info(f"instance split_with_filter: {instance.get('split_with_filter')}") + source_meta = {'source_file_id': instance.get('source_file_id')} if instance.get('source_file_id') else {} # 添加MinerU模型参数到meta if instance.get('llm_model_id'): @@ -966,13 +988,19 @@ class DocumentSerializers(serializers.Serializer): # 添加分块参数到meta if instance.get('split_limit') is not None: source_meta['split_limit'] = instance.get('split_limit') + maxkb_logger.info(f"Added split_limit to source_meta: {instance.get('split_limit')}") if instance.get('split_patterns') is not None: source_meta['split_patterns'] = instance.get('split_patterns') + maxkb_logger.info(f"Added split_patterns to source_meta: {instance.get('split_patterns')}") if instance.get('with_filter') is not None: source_meta['split_with_filter'] = instance.get('with_filter') + maxkb_logger.info(f"Added split_with_filter to source_meta: {instance.get('with_filter')}") + meta = {**instance.get('meta'), **source_meta} if instance.get('meta') is not None else source_meta meta = convert_uuid_to_str(meta) + maxkb_logger.info(f"Final meta split params - split_limit: {meta.get('split_limit')}, split_patterns: {meta.get('split_patterns')}, split_with_filter: {meta.get('split_with_filter')}") + document_model = Document( **{ 'knowledge_id': knowledge_id, diff --git a/apps/knowledge/views/document.py b/apps/knowledge/views/document.py index f89b60e9..1100967f 100644 --- a/apps/knowledge/views/document.py +++ b/apps/knowledge/views/document.py @@ -45,6 +45,22 @@ class DocumentView(APIView): get_knowledge_operation_object(keywords.get('knowledge_id')), {'name': r.data.get('name')}), ) def post(self, request: Request, workspace_id: str, knowledge_id: str): + # 添加日志 + from common.utils.logger import maxkb_logger + maxkb_logger.info(f"=== Document Upload View ===") + maxkb_logger.info(f"request.data keys: {list(request.data.keys())}") + maxkb_logger.info(f"request.FILES keys: {list(request.FILES.keys())}") + if 'limit' in request.data: + maxkb_logger.info(f"request.data limit: {request.data.get('limit')}") + if 'patterns' in request.data: + maxkb_logger.info(f"request.data patterns: {request.data.get('patterns')}") + if 'with_filter' in request.data: + maxkb_logger.info(f"request.data with_filter: {request.data.get('with_filter')}") + if 'stt_model_id' in request.data: + maxkb_logger.info(f"request.data stt_model_id: {request.data.get('stt_model_id')}") + if 'llm_model_id' in request.data: + maxkb_logger.info(f"request.data llm_model_id: {request.data.get('llm_model_id')}") + # 准备分块参数 serializer_data = { 'workspace_id': workspace_id, @@ -59,6 +75,8 @@ class DocumentView(APIView): if 'with_filter' in request.data: serializer_data['with_filter'] = request.data.get('with_filter') + maxkb_logger.info(f"serializer_data: {serializer_data}") + return result.success( DocumentSerializers.Create(data=serializer_data).save(request.data))