From 18068f76fff13bf820ac16deedae03c8e24a8d26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E6=BD=AE?= Date: Fri, 19 Dec 2025 00:14:30 +0800 Subject: [PATCH] =?UTF-8?q?=E9=9F=B3=E8=A7=86=E9=A2=91=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E5=88=86=E6=AE=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/knowledge/serializers/document.py | 22 ++++++++++++----- apps/knowledge/views/document.py | 31 +++++++++++++++++++++--- ui/src/views/document/UploadDocument.vue | 20 +++++++++++++-- 3 files changed, 61 insertions(+), 12 deletions(-) diff --git a/apps/knowledge/serializers/document.py b/apps/knowledge/serializers/document.py index 43e387c0..10b29e37 100644 --- a/apps/knowledge/serializers/document.py +++ b/apps/knowledge/serializers/document.py @@ -1297,15 +1297,25 @@ class DocumentSerializers(serializers.Serializer): # 在处理文档之前,先确保所有分块参数都被正确转换 for idx, doc in enumerate(instance_list): - # 确保分块参数使用正确的键名 - # 前端传递的是 limit, patterns, with_filter - # 我们需要将其转换为 split_limit, split_patterns, split_with_filter - if split_limit is not None: + # 如果文档对象中已经有分块参数(前端传递的),优先使用 + # 否则使用从 serializer_data 获取的参数 + if doc.get('split_limit') is not None: + maxkb_logger.info(f"Document {idx} already has split_limit: {doc.get('split_limit')}") + elif split_limit is not None: doc['split_limit'] = split_limit - if split_patterns is not None: + maxkb_logger.info(f"Document {idx}: Setting split_limit from serializer_data: {split_limit}") + + if doc.get('split_patterns') is not None: + maxkb_logger.info(f"Document {idx} already has split_patterns: {doc.get('split_patterns')}") + elif split_patterns is not None: doc['split_patterns'] = split_patterns - if split_with_filter is not None: + maxkb_logger.info(f"Document {idx}: Setting split_patterns from serializer_data: {split_patterns}") + + if doc.get('split_with_filter') is not None: + maxkb_logger.info(f"Document {idx} already has split_with_filter: {doc.get('split_with_filter')}") + elif split_with_filter is not None: doc['split_with_filter'] = split_with_filter + maxkb_logger.info(f"Document {idx}: Setting split_with_filter from serializer_data: {split_with_filter}") for document in instance_list: # 检查是否是MinerU类型的文档(需要同时有llm_model_id和vision_model_id) diff --git a/apps/knowledge/views/document.py b/apps/knowledge/views/document.py index 2d93a967..b84b99bb 100644 --- a/apps/knowledge/views/document.py +++ b/apps/knowledge/views/document.py @@ -434,6 +434,9 @@ class DocumentView(APIView): from common.utils.logger import maxkb_logger maxkb_logger.info(f"=== BatchCreate View ===") maxkb_logger.info(f"request.data type: {type(request.data)}") + maxkb_logger.info(f"request.query_params: {dict(request.query_params)}") + maxkb_logger.info(f"request.POST: {dict(request.POST)}") + if isinstance(request.data, list): maxkb_logger.info(f"request.data is list with {len(request.data)} items") # 检查第一个文档的参数 @@ -449,14 +452,34 @@ class DocumentView(APIView): 'workspace_id': workspace_id } - # 从第一个文档中提取分块参数(所有文档使用相同的分块设置) + # 尝试从多个地方获取分块参数 + # 1. 从 query_params 获取(GET 参数) + if 'limit' in request.query_params: + serializer_data['limit'] = request.query_params.get('limit') + maxkb_logger.info(f"Got limit from query_params: {serializer_data['limit']}") + if 'patterns' in request.query_params: + serializer_data['patterns'] = request.query_params.get('patterns') + if 'with_filter' in request.query_params: + serializer_data['with_filter'] = request.query_params.get('with_filter') + + # 2. 从 POST 数据获取(表单数据) + if 'limit' in request.POST: + serializer_data['limit'] = request.POST.get('limit') + maxkb_logger.info(f"Got limit from POST: {serializer_data['limit']}") + if 'patterns' in request.POST: + serializer_data['patterns'] = request.POST.getlist('patterns') + if 'with_filter' in request.POST: + serializer_data['with_filter'] = request.POST.get('with_filter') + + # 3. 从文档对象中获取 if isinstance(request.data, list) and len(request.data) > 0: first_doc = request.data[0] - if 'limit' in first_doc: + if 'limit' in first_doc and 'limit' not in serializer_data: serializer_data['limit'] = first_doc.get('limit') - if 'patterns' in first_doc: + maxkb_logger.info(f"Got limit from first doc: {serializer_data['limit']}") + if 'patterns' in first_doc and 'patterns' not in serializer_data: serializer_data['patterns'] = first_doc.get('patterns') - if 'with_filter' in first_doc: + if 'with_filter' in first_doc and 'with_filter' not in serializer_data: serializer_data['with_filter'] = first_doc.get('with_filter') maxkb_logger.info(f"BatchCreate serializer_data: {serializer_data}") diff --git a/ui/src/views/document/UploadDocument.vue b/ui/src/views/document/UploadDocument.vue index c940541e..225efb04 100644 --- a/ui/src/views/document/UploadDocument.vue +++ b/ui/src/views/document/UploadDocument.vue @@ -209,8 +209,24 @@ function submit() { } } // 传递分段规则(如果有) - if (SetRulesRef.value?.form?.patterns) { - doc.split_patterns = SetRulesRef.value.form.patterns + if (SetRulesRef.value?.form) { + // 传递patterns + if (SetRulesRef.value.form.patterns) { + doc.split_patterns = SetRulesRef.value.form.patterns + } + // 传递limit + if (SetRulesRef.value.form.limit !== undefined) { + doc.split_limit = SetRulesRef.value.form.limit + } + // 传递with_filter + if (SetRulesRef.value.form.with_filter !== undefined) { + doc.split_with_filter = SetRulesRef.value.form.with_filter + } + console.log('Media chunking params:', { + split_patterns: doc.split_patterns, + split_limit: doc.split_limit, + split_with_filter: doc.split_with_filter + }) } console.log('Final doc object for media:', doc) }