音视频支持分段
This commit is contained in:
parent
77de4f6315
commit
e509a078a4
@ -880,21 +880,37 @@ class DocumentSerializers(serializers.Serializer):
|
|||||||
@post(post_function=post_embedding)
|
@post(post_function=post_embedding)
|
||||||
@transaction.atomic
|
@transaction.atomic
|
||||||
def save(self, instance: Dict, with_valid=False, **kwargs):
|
def save(self, instance: Dict, with_valid=False, **kwargs):
|
||||||
|
# 添加详细日志
|
||||||
|
from common.utils.logger import maxkb_logger
|
||||||
|
maxkb_logger.info(f"=== Save Method ===")
|
||||||
|
maxkb_logger.info(f"self.data: {self.data}")
|
||||||
|
maxkb_logger.info(f"instance keys: {list(instance.keys())}")
|
||||||
|
maxkb_logger.info(f"stt_model_id in instance: {instance.get('stt_model_id')}")
|
||||||
|
maxkb_logger.info(f"llm_model_id in instance: {instance.get('llm_model_id')}")
|
||||||
|
|
||||||
# 从 self.data 中获取分块参数
|
# 从 self.data 中获取分块参数
|
||||||
if self.data.get('limit') is not None:
|
if self.data.get('limit') is not None:
|
||||||
instance['split_limit'] = self.data.get('limit')
|
instance['split_limit'] = self.data.get('limit')
|
||||||
|
maxkb_logger.info(f"Set split_limit from self.data: {self.data.get('limit')}")
|
||||||
if self.data.get('patterns') is not None:
|
if self.data.get('patterns') is not None:
|
||||||
instance['split_patterns'] = self.data.get('patterns')
|
instance['split_patterns'] = self.data.get('patterns')
|
||||||
|
maxkb_logger.info(f"Set split_patterns from self.data: {self.data.get('patterns')}")
|
||||||
if self.data.get('with_filter') is not None:
|
if self.data.get('with_filter') is not None:
|
||||||
instance['split_with_filter'] = self.data.get('with_filter')
|
instance['split_with_filter'] = self.data.get('with_filter')
|
||||||
|
maxkb_logger.info(f"Set split_with_filter from self.data: {self.data.get('with_filter')}")
|
||||||
|
|
||||||
# 同时也支持从 instance 中获取分块参数(向后兼容)
|
# 同时也支持从 instance 中获取分块参数(向后兼容)
|
||||||
if instance.get('limit') is not None:
|
if instance.get('limit') is not None:
|
||||||
instance['split_limit'] = instance.get('limit')
|
instance['split_limit'] = instance.get('limit')
|
||||||
|
maxkb_logger.info(f"Set split_limit from instance: {instance.get('limit')}")
|
||||||
if instance.get('patterns') is not None:
|
if instance.get('patterns') is not None:
|
||||||
instance['split_patterns'] = instance.get('patterns')
|
instance['split_patterns'] = instance.get('patterns')
|
||||||
|
maxkb_logger.info(f"Set split_patterns from instance: {instance.get('patterns')}")
|
||||||
if instance.get('with_filter') is not None:
|
if instance.get('with_filter') is not None:
|
||||||
instance['split_with_filter'] = instance.get('with_filter')
|
instance['split_with_filter'] = instance.get('with_filter')
|
||||||
|
maxkb_logger.info(f"Set split_with_filter from instance: {instance.get('with_filter')}")
|
||||||
|
|
||||||
|
maxkb_logger.info(f"Final instance split params - split_limit: {instance.get('split_limit')}, split_patterns: {instance.get('split_patterns')}, split_with_filter: {instance.get('split_with_filter')}")
|
||||||
|
|
||||||
if with_valid:
|
if with_valid:
|
||||||
DocumentInstanceSerializer(data=instance).is_valid(raise_exception=True)
|
DocumentInstanceSerializer(data=instance).is_valid(raise_exception=True)
|
||||||
@ -954,6 +970,12 @@ class DocumentSerializers(serializers.Serializer):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_document_paragraph_model(knowledge_id, instance: Dict):
|
def get_document_paragraph_model(knowledge_id, instance: Dict):
|
||||||
|
from common.utils.logger import maxkb_logger
|
||||||
|
maxkb_logger.info(f"=== get_document_paragraph_model ===")
|
||||||
|
maxkb_logger.info(f"instance split_limit: {instance.get('split_limit')}")
|
||||||
|
maxkb_logger.info(f"instance split_patterns: {instance.get('split_patterns')}")
|
||||||
|
maxkb_logger.info(f"instance split_with_filter: {instance.get('split_with_filter')}")
|
||||||
|
|
||||||
source_meta = {'source_file_id': instance.get('source_file_id')} if instance.get('source_file_id') else {}
|
source_meta = {'source_file_id': instance.get('source_file_id')} if instance.get('source_file_id') else {}
|
||||||
# 添加MinerU模型参数到meta
|
# 添加MinerU模型参数到meta
|
||||||
if instance.get('llm_model_id'):
|
if instance.get('llm_model_id'):
|
||||||
@ -966,13 +988,19 @@ class DocumentSerializers(serializers.Serializer):
|
|||||||
# 添加分块参数到meta
|
# 添加分块参数到meta
|
||||||
if instance.get('split_limit') is not None:
|
if instance.get('split_limit') is not None:
|
||||||
source_meta['split_limit'] = instance.get('split_limit')
|
source_meta['split_limit'] = instance.get('split_limit')
|
||||||
|
maxkb_logger.info(f"Added split_limit to source_meta: {instance.get('split_limit')}")
|
||||||
if instance.get('split_patterns') is not None:
|
if instance.get('split_patterns') is not None:
|
||||||
source_meta['split_patterns'] = instance.get('split_patterns')
|
source_meta['split_patterns'] = instance.get('split_patterns')
|
||||||
|
maxkb_logger.info(f"Added split_patterns to source_meta: {instance.get('split_patterns')}")
|
||||||
if instance.get('with_filter') is not None:
|
if instance.get('with_filter') is not None:
|
||||||
source_meta['split_with_filter'] = instance.get('with_filter')
|
source_meta['split_with_filter'] = instance.get('with_filter')
|
||||||
|
maxkb_logger.info(f"Added split_with_filter to source_meta: {instance.get('with_filter')}")
|
||||||
|
|
||||||
meta = {**instance.get('meta'), **source_meta} if instance.get('meta') is not None else source_meta
|
meta = {**instance.get('meta'), **source_meta} if instance.get('meta') is not None else source_meta
|
||||||
meta = convert_uuid_to_str(meta)
|
meta = convert_uuid_to_str(meta)
|
||||||
|
|
||||||
|
maxkb_logger.info(f"Final meta split params - split_limit: {meta.get('split_limit')}, split_patterns: {meta.get('split_patterns')}, split_with_filter: {meta.get('split_with_filter')}")
|
||||||
|
|
||||||
document_model = Document(
|
document_model = Document(
|
||||||
**{
|
**{
|
||||||
'knowledge_id': knowledge_id,
|
'knowledge_id': knowledge_id,
|
||||||
|
|||||||
@ -45,6 +45,22 @@ class DocumentView(APIView):
|
|||||||
get_knowledge_operation_object(keywords.get('knowledge_id')),
|
get_knowledge_operation_object(keywords.get('knowledge_id')),
|
||||||
{'name': r.data.get('name')}), )
|
{'name': r.data.get('name')}), )
|
||||||
def post(self, request: Request, workspace_id: str, knowledge_id: str):
|
def post(self, request: Request, workspace_id: str, knowledge_id: str):
|
||||||
|
# 添加日志
|
||||||
|
from common.utils.logger import maxkb_logger
|
||||||
|
maxkb_logger.info(f"=== Document Upload View ===")
|
||||||
|
maxkb_logger.info(f"request.data keys: {list(request.data.keys())}")
|
||||||
|
maxkb_logger.info(f"request.FILES keys: {list(request.FILES.keys())}")
|
||||||
|
if 'limit' in request.data:
|
||||||
|
maxkb_logger.info(f"request.data limit: {request.data.get('limit')}")
|
||||||
|
if 'patterns' in request.data:
|
||||||
|
maxkb_logger.info(f"request.data patterns: {request.data.get('patterns')}")
|
||||||
|
if 'with_filter' in request.data:
|
||||||
|
maxkb_logger.info(f"request.data with_filter: {request.data.get('with_filter')}")
|
||||||
|
if 'stt_model_id' in request.data:
|
||||||
|
maxkb_logger.info(f"request.data stt_model_id: {request.data.get('stt_model_id')}")
|
||||||
|
if 'llm_model_id' in request.data:
|
||||||
|
maxkb_logger.info(f"request.data llm_model_id: {request.data.get('llm_model_id')}")
|
||||||
|
|
||||||
# 准备分块参数
|
# 准备分块参数
|
||||||
serializer_data = {
|
serializer_data = {
|
||||||
'workspace_id': workspace_id,
|
'workspace_id': workspace_id,
|
||||||
@ -59,6 +75,8 @@ class DocumentView(APIView):
|
|||||||
if 'with_filter' in request.data:
|
if 'with_filter' in request.data:
|
||||||
serializer_data['with_filter'] = request.data.get('with_filter')
|
serializer_data['with_filter'] = request.data.get('with_filter')
|
||||||
|
|
||||||
|
maxkb_logger.info(f"serializer_data: {serializer_data}")
|
||||||
|
|
||||||
return result.success(
|
return result.success(
|
||||||
DocumentSerializers.Create(data=serializer_data).save(request.data))
|
DocumentSerializers.Create(data=serializer_data).save(request.data))
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user