音视频支持分段
This commit is contained in:
parent
cefac6399d
commit
18068f76ff
@ -1297,15 +1297,25 @@ class DocumentSerializers(serializers.Serializer):
|
||||
|
||||
# 在处理文档之前,先确保所有分块参数都被正确转换
|
||||
for idx, doc in enumerate(instance_list):
|
||||
# 确保分块参数使用正确的键名
|
||||
# 前端传递的是 limit, patterns, with_filter
|
||||
# 我们需要将其转换为 split_limit, split_patterns, split_with_filter
|
||||
if split_limit is not None:
|
||||
# 如果文档对象中已经有分块参数(前端传递的),优先使用
|
||||
# 否则使用从 serializer_data 获取的参数
|
||||
if doc.get('split_limit') is not None:
|
||||
maxkb_logger.info(f"Document {idx} already has split_limit: {doc.get('split_limit')}")
|
||||
elif split_limit is not None:
|
||||
doc['split_limit'] = split_limit
|
||||
if split_patterns is not None:
|
||||
maxkb_logger.info(f"Document {idx}: Setting split_limit from serializer_data: {split_limit}")
|
||||
|
||||
if doc.get('split_patterns') is not None:
|
||||
maxkb_logger.info(f"Document {idx} already has split_patterns: {doc.get('split_patterns')}")
|
||||
elif split_patterns is not None:
|
||||
doc['split_patterns'] = split_patterns
|
||||
if split_with_filter is not None:
|
||||
maxkb_logger.info(f"Document {idx}: Setting split_patterns from serializer_data: {split_patterns}")
|
||||
|
||||
if doc.get('split_with_filter') is not None:
|
||||
maxkb_logger.info(f"Document {idx} already has split_with_filter: {doc.get('split_with_filter')}")
|
||||
elif split_with_filter is not None:
|
||||
doc['split_with_filter'] = split_with_filter
|
||||
maxkb_logger.info(f"Document {idx}: Setting split_with_filter from serializer_data: {split_with_filter}")
|
||||
|
||||
for document in instance_list:
|
||||
# 检查是否是MinerU类型的文档(需要同时有llm_model_id和vision_model_id)
|
||||
|
||||
@ -434,6 +434,9 @@ class DocumentView(APIView):
|
||||
from common.utils.logger import maxkb_logger
|
||||
maxkb_logger.info(f"=== BatchCreate View ===")
|
||||
maxkb_logger.info(f"request.data type: {type(request.data)}")
|
||||
maxkb_logger.info(f"request.query_params: {dict(request.query_params)}")
|
||||
maxkb_logger.info(f"request.POST: {dict(request.POST)}")
|
||||
|
||||
if isinstance(request.data, list):
|
||||
maxkb_logger.info(f"request.data is list with {len(request.data)} items")
|
||||
# 检查第一个文档的参数
|
||||
@ -449,14 +452,34 @@ class DocumentView(APIView):
|
||||
'workspace_id': workspace_id
|
||||
}
|
||||
|
||||
# 从第一个文档中提取分块参数(所有文档使用相同的分块设置)
|
||||
# 尝试从多个地方获取分块参数
|
||||
# 1. 从 query_params 获取(GET 参数)
|
||||
if 'limit' in request.query_params:
|
||||
serializer_data['limit'] = request.query_params.get('limit')
|
||||
maxkb_logger.info(f"Got limit from query_params: {serializer_data['limit']}")
|
||||
if 'patterns' in request.query_params:
|
||||
serializer_data['patterns'] = request.query_params.get('patterns')
|
||||
if 'with_filter' in request.query_params:
|
||||
serializer_data['with_filter'] = request.query_params.get('with_filter')
|
||||
|
||||
# 2. 从 POST 数据获取(表单数据)
|
||||
if 'limit' in request.POST:
|
||||
serializer_data['limit'] = request.POST.get('limit')
|
||||
maxkb_logger.info(f"Got limit from POST: {serializer_data['limit']}")
|
||||
if 'patterns' in request.POST:
|
||||
serializer_data['patterns'] = request.POST.getlist('patterns')
|
||||
if 'with_filter' in request.POST:
|
||||
serializer_data['with_filter'] = request.POST.get('with_filter')
|
||||
|
||||
# 3. 从文档对象中获取
|
||||
if isinstance(request.data, list) and len(request.data) > 0:
|
||||
first_doc = request.data[0]
|
||||
if 'limit' in first_doc:
|
||||
if 'limit' in first_doc and 'limit' not in serializer_data:
|
||||
serializer_data['limit'] = first_doc.get('limit')
|
||||
if 'patterns' in first_doc:
|
||||
maxkb_logger.info(f"Got limit from first doc: {serializer_data['limit']}")
|
||||
if 'patterns' in first_doc and 'patterns' not in serializer_data:
|
||||
serializer_data['patterns'] = first_doc.get('patterns')
|
||||
if 'with_filter' in first_doc:
|
||||
if 'with_filter' in first_doc and 'with_filter' not in serializer_data:
|
||||
serializer_data['with_filter'] = first_doc.get('with_filter')
|
||||
|
||||
maxkb_logger.info(f"BatchCreate serializer_data: {serializer_data}")
|
||||
|
||||
@ -209,9 +209,25 @@ function submit() {
|
||||
}
|
||||
}
|
||||
// 传递分段规则(如果有)
|
||||
if (SetRulesRef.value?.form?.patterns) {
|
||||
if (SetRulesRef.value?.form) {
|
||||
// 传递patterns
|
||||
if (SetRulesRef.value.form.patterns) {
|
||||
doc.split_patterns = SetRulesRef.value.form.patterns
|
||||
}
|
||||
// 传递limit
|
||||
if (SetRulesRef.value.form.limit !== undefined) {
|
||||
doc.split_limit = SetRulesRef.value.form.limit
|
||||
}
|
||||
// 传递with_filter
|
||||
if (SetRulesRef.value.form.with_filter !== undefined) {
|
||||
doc.split_with_filter = SetRulesRef.value.form.with_filter
|
||||
}
|
||||
console.log('Media chunking params:', {
|
||||
split_patterns: doc.split_patterns,
|
||||
split_limit: doc.split_limit,
|
||||
split_with_filter: doc.split_with_filter
|
||||
})
|
||||
}
|
||||
console.log('Final doc object for media:', doc)
|
||||
}
|
||||
documents.push(doc)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user