音视频支持分段
This commit is contained in:
parent
77de4f6315
commit
e509a078a4
@ -880,21 +880,37 @@ class DocumentSerializers(serializers.Serializer):
|
||||
@post(post_function=post_embedding)
|
||||
@transaction.atomic
|
||||
def save(self, instance: Dict, with_valid=False, **kwargs):
|
||||
# 添加详细日志
|
||||
from common.utils.logger import maxkb_logger
|
||||
maxkb_logger.info(f"=== Save Method ===")
|
||||
maxkb_logger.info(f"self.data: {self.data}")
|
||||
maxkb_logger.info(f"instance keys: {list(instance.keys())}")
|
||||
maxkb_logger.info(f"stt_model_id in instance: {instance.get('stt_model_id')}")
|
||||
maxkb_logger.info(f"llm_model_id in instance: {instance.get('llm_model_id')}")
|
||||
|
||||
# 从 self.data 中获取分块参数
|
||||
if self.data.get('limit') is not None:
|
||||
instance['split_limit'] = self.data.get('limit')
|
||||
maxkb_logger.info(f"Set split_limit from self.data: {self.data.get('limit')}")
|
||||
if self.data.get('patterns') is not None:
|
||||
instance['split_patterns'] = self.data.get('patterns')
|
||||
maxkb_logger.info(f"Set split_patterns from self.data: {self.data.get('patterns')}")
|
||||
if self.data.get('with_filter') is not None:
|
||||
instance['split_with_filter'] = self.data.get('with_filter')
|
||||
maxkb_logger.info(f"Set split_with_filter from self.data: {self.data.get('with_filter')}")
|
||||
|
||||
# 同时也支持从 instance 中获取分块参数(向后兼容)
|
||||
if instance.get('limit') is not None:
|
||||
instance['split_limit'] = instance.get('limit')
|
||||
maxkb_logger.info(f"Set split_limit from instance: {instance.get('limit')}")
|
||||
if instance.get('patterns') is not None:
|
||||
instance['split_patterns'] = instance.get('patterns')
|
||||
maxkb_logger.info(f"Set split_patterns from instance: {instance.get('patterns')}")
|
||||
if instance.get('with_filter') is not None:
|
||||
instance['split_with_filter'] = instance.get('with_filter')
|
||||
maxkb_logger.info(f"Set split_with_filter from instance: {instance.get('with_filter')}")
|
||||
|
||||
maxkb_logger.info(f"Final instance split params - split_limit: {instance.get('split_limit')}, split_patterns: {instance.get('split_patterns')}, split_with_filter: {instance.get('split_with_filter')}")
|
||||
|
||||
if with_valid:
|
||||
DocumentInstanceSerializer(data=instance).is_valid(raise_exception=True)
|
||||
@ -954,6 +970,12 @@ class DocumentSerializers(serializers.Serializer):
|
||||
|
||||
@staticmethod
|
||||
def get_document_paragraph_model(knowledge_id, instance: Dict):
|
||||
from common.utils.logger import maxkb_logger
|
||||
maxkb_logger.info(f"=== get_document_paragraph_model ===")
|
||||
maxkb_logger.info(f"instance split_limit: {instance.get('split_limit')}")
|
||||
maxkb_logger.info(f"instance split_patterns: {instance.get('split_patterns')}")
|
||||
maxkb_logger.info(f"instance split_with_filter: {instance.get('split_with_filter')}")
|
||||
|
||||
source_meta = {'source_file_id': instance.get('source_file_id')} if instance.get('source_file_id') else {}
|
||||
# 添加MinerU模型参数到meta
|
||||
if instance.get('llm_model_id'):
|
||||
@ -966,13 +988,19 @@ class DocumentSerializers(serializers.Serializer):
|
||||
# 添加分块参数到meta
|
||||
if instance.get('split_limit') is not None:
|
||||
source_meta['split_limit'] = instance.get('split_limit')
|
||||
maxkb_logger.info(f"Added split_limit to source_meta: {instance.get('split_limit')}")
|
||||
if instance.get('split_patterns') is not None:
|
||||
source_meta['split_patterns'] = instance.get('split_patterns')
|
||||
maxkb_logger.info(f"Added split_patterns to source_meta: {instance.get('split_patterns')}")
|
||||
if instance.get('with_filter') is not None:
|
||||
source_meta['split_with_filter'] = instance.get('with_filter')
|
||||
maxkb_logger.info(f"Added split_with_filter to source_meta: {instance.get('with_filter')}")
|
||||
|
||||
meta = {**instance.get('meta'), **source_meta} if instance.get('meta') is not None else source_meta
|
||||
meta = convert_uuid_to_str(meta)
|
||||
|
||||
maxkb_logger.info(f"Final meta split params - split_limit: {meta.get('split_limit')}, split_patterns: {meta.get('split_patterns')}, split_with_filter: {meta.get('split_with_filter')}")
|
||||
|
||||
document_model = Document(
|
||||
**{
|
||||
'knowledge_id': knowledge_id,
|
||||
|
||||
@ -45,6 +45,22 @@ class DocumentView(APIView):
|
||||
get_knowledge_operation_object(keywords.get('knowledge_id')),
|
||||
{'name': r.data.get('name')}), )
|
||||
def post(self, request: Request, workspace_id: str, knowledge_id: str):
|
||||
# 添加日志
|
||||
from common.utils.logger import maxkb_logger
|
||||
maxkb_logger.info(f"=== Document Upload View ===")
|
||||
maxkb_logger.info(f"request.data keys: {list(request.data.keys())}")
|
||||
maxkb_logger.info(f"request.FILES keys: {list(request.FILES.keys())}")
|
||||
if 'limit' in request.data:
|
||||
maxkb_logger.info(f"request.data limit: {request.data.get('limit')}")
|
||||
if 'patterns' in request.data:
|
||||
maxkb_logger.info(f"request.data patterns: {request.data.get('patterns')}")
|
||||
if 'with_filter' in request.data:
|
||||
maxkb_logger.info(f"request.data with_filter: {request.data.get('with_filter')}")
|
||||
if 'stt_model_id' in request.data:
|
||||
maxkb_logger.info(f"request.data stt_model_id: {request.data.get('stt_model_id')}")
|
||||
if 'llm_model_id' in request.data:
|
||||
maxkb_logger.info(f"request.data llm_model_id: {request.data.get('llm_model_id')}")
|
||||
|
||||
# 准备分块参数
|
||||
serializer_data = {
|
||||
'workspace_id': workspace_id,
|
||||
@ -59,6 +75,8 @@ class DocumentView(APIView):
|
||||
if 'with_filter' in request.data:
|
||||
serializer_data['with_filter'] = request.data.get('with_filter')
|
||||
|
||||
maxkb_logger.info(f"serializer_data: {serializer_data}")
|
||||
|
||||
return result.success(
|
||||
DocumentSerializers.Create(data=serializer_data).save(request.data))
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user