From c035dc5f3906a1c6bef38c9d785f73c68535d61f Mon Sep 17 00:00:00 2001 From: wxg0103 <727495428@qq.com> Date: Wed, 16 Jul 2025 10:12:49 +0800 Subject: [PATCH] fix: remove empty lines from text before text-to-speech conversion --- apps/application/serializers/application.py | 5 +++-- apps/common/utils/common.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/apps/application/serializers/application.py b/apps/application/serializers/application.py index 8962ca34..c96cecb3 100644 --- a/apps/application/serializers/application.py +++ b/apps/application/serializers/application.py @@ -34,7 +34,7 @@ from common.database_model_manage.database_model_manage import DatabaseModelMana from common.db.search import native_search, native_page_search from common.exception.app_exception import AppApiException from common.field.common import UploadedFileField -from common.utils.common import get_file_content, restricted_loads, generate_uuid +from common.utils.common import get_file_content, restricted_loads, generate_uuid, _remove_empty_lines from knowledge.models import Knowledge, KnowledgeScope from knowledge.serializers.knowledge import KnowledgeSerializer, KnowledgeModelSerializer from maxkb.conf import PROJECT_DIR @@ -931,8 +931,9 @@ class ApplicationOperateSerializer(serializers.Serializer): if application.tts_model_enable: model = get_model_instance_by_model_workspace_id(application.tts_model_id, application.workspace_id, **application.tts_model_params_setting) + content = _remove_empty_lines(instance.get('text', '')) - return model.text_to_speech(instance.get('text')) + return model.text_to_speech(content) def play_demo_text(self, instance, with_valid=True): text = '你好,这里是语音播放测试' diff --git a/apps/common/utils/common.py b/apps/common/utils/common.py index 128f32da..18765d1f 100644 --- a/apps/common/utils/common.py +++ b/apps/common/utils/common.py @@ -118,7 +118,7 @@ def markdown_to_plain_text(md: str) -> str: # 使用正则表达式去除所有 HTML 标签 text = re.sub(r'<[^>]+>', '', text) # 先移除特定媒体标签(优先级高于通用HTML标签移除) - text = re.sub(r'<(audio|video)[^>]*>.*?', '', text, flags=re.DOTALL) # 匹配音频/视频标签 + text = re.sub(r'<(?:audio|video)(?:\s+[^>]*)?>[\s\S]*?(?:)?', '', text, flags=re.IGNORECASE) text = re.sub(r']*>', '', text) # 匹配图片标签 # 去除多余的空白字符(包括换行符、制表符等) text = re.sub(r'\s+', ' ', text)