音视频支持分段
This commit is contained in:
parent
8b85ad33f0
commit
8a183dd5cf
@ -880,6 +880,21 @@ class DocumentSerializers(serializers.Serializer):
|
|||||||
@post(post_function=post_embedding)
|
@post(post_function=post_embedding)
|
||||||
@transaction.atomic
|
@transaction.atomic
|
||||||
def save(self, instance: Dict, with_valid=False, **kwargs):
|
def save(self, instance: Dict, with_valid=False, **kwargs):
|
||||||
|
# 添加调试日志
|
||||||
|
from common.utils.logger import maxkb_logger
|
||||||
|
maxkb_logger.info(f"Save called with instance keys: {list(instance.keys())}")
|
||||||
|
maxkb_logger.info(f"save method - limit: {instance.get('limit')}, split_patterns: {instance.get('patterns')}, with_filter: {instance.get('with_filter')}")
|
||||||
|
|
||||||
|
# 确保分块参数使用正确的键名
|
||||||
|
# 前端传递的是 limit, patterns, with_filter
|
||||||
|
# 我们需要将其转换为 split_limit, split_patterns, split_with_filter
|
||||||
|
if instance.get('limit') is not None:
|
||||||
|
instance['split_limit'] = instance.get('limit')
|
||||||
|
if instance.get('patterns') is not None:
|
||||||
|
instance['split_patterns'] = instance.get('patterns')
|
||||||
|
if instance.get('with_filter') is not None:
|
||||||
|
instance['split_with_filter'] = instance.get('with_filter')
|
||||||
|
|
||||||
if with_valid:
|
if with_valid:
|
||||||
DocumentInstanceSerializer(data=instance).is_valid(raise_exception=True)
|
DocumentInstanceSerializer(data=instance).is_valid(raise_exception=True)
|
||||||
self.is_valid(raise_exception=True)
|
self.is_valid(raise_exception=True)
|
||||||
@ -947,6 +962,13 @@ class DocumentSerializers(serializers.Serializer):
|
|||||||
# 添加音视频STT模型参数到meta
|
# 添加音视频STT模型参数到meta
|
||||||
if instance.get('stt_model_id'):
|
if instance.get('stt_model_id'):
|
||||||
source_meta['stt_model_id'] = instance.get('stt_model_id')
|
source_meta['stt_model_id'] = instance.get('stt_model_id')
|
||||||
|
# 添加分块参数到meta
|
||||||
|
if instance.get('split_limit') is not None:
|
||||||
|
source_meta['split_limit'] = instance.get('split_limit')
|
||||||
|
if instance.get('split_patterns') is not None:
|
||||||
|
source_meta['split_patterns'] = instance.get('split_patterns')
|
||||||
|
if instance.get('with_filter') is not None:
|
||||||
|
source_meta['split_with_filter'] = instance.get('with_filter')
|
||||||
meta = {**instance.get('meta'), **source_meta} if instance.get('meta') is not None else source_meta
|
meta = {**instance.get('meta'), **source_meta} if instance.get('meta') is not None else source_meta
|
||||||
meta = convert_uuid_to_str(meta)
|
meta = convert_uuid_to_str(meta)
|
||||||
|
|
||||||
@ -1274,7 +1296,18 @@ class DocumentSerializers(serializers.Serializer):
|
|||||||
|
|
||||||
# 插入文档
|
# 插入文档
|
||||||
for document in instance_list:
|
for document in instance_list:
|
||||||
|
# 确保分块参数使用正确的键名
|
||||||
|
# 前端传递的是 limit, patterns, with_filter
|
||||||
|
# 我们需要将其转换为 split_limit, split_patterns, split_with_filter
|
||||||
|
if document.get('limit') is not None:
|
||||||
|
document['split_limit'] = document.get('limit')
|
||||||
|
if document.get('patterns') is not None:
|
||||||
|
document['split_patterns'] = document.get('patterns')
|
||||||
|
if document.get('with_filter') is not None:
|
||||||
|
document['split_with_filter'] = document.get('with_filter')
|
||||||
|
|
||||||
maxkb_logger.info(f"Processing document: {document.get('name')}, paragraphs count: {len(document.get('paragraphs', []))}")
|
maxkb_logger.info(f"Processing document: {document.get('name')}, paragraphs count: {len(document.get('paragraphs', []))}")
|
||||||
|
maxkb_logger.info(f"Document split params - split_limit: {document.get('split_limit')}, split_patterns: {document.get('split_patterns')}, split_with_filter: {document.get('split_with_filter')}")
|
||||||
document_paragraph_dict_model = DocumentSerializers.Create.get_document_paragraph_model(
|
document_paragraph_dict_model = DocumentSerializers.Create.get_document_paragraph_model(
|
||||||
knowledge_id,
|
knowledge_id,
|
||||||
document
|
document
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user