#!/usr/bin/env python """测试优化后的 smart_split_transcription 函数""" import sys import os sys.path.append(os.path.dirname(os.path.abspath(__file__))) from apps.common.handle.impl.media.media_split_handle import MediaSplitHandle def test_smart_split(): handler = MediaSplitHandle() print("测试1: 短文本(≤ limit)") short_text = "这是一个短文本" result = handler.smart_split_transcription(short_text, limit=1000, overlap=100) print(f"输入长度: {len(short_text)}, 输出块数: {len(result)}") print(f"结果: {result}") print() print("测试2: 长文本(> limit)") long_text = "这是一段很长的文本。" * 200 # 约1200字符 result = handler.smart_split_transcription(long_text, limit=1000, overlap=100) print(f"输入长度: {len(long_text)}, 输出块数: {len(result)}") print(f"第一块长度: {len(result[0])}") print(f"最后一块长度: {len(result[-1])}") print() print("测试3: 验证重叠") # 创建有明显标识的文本 base = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" long_text = base * 50 # 1300字符 result = handler.smart_split_transcription(long_text, limit=500, overlap=100) print(f"输入长度: {len(long_text)}, 输出块数: {len(result)}") # 检查重叠 if len(result) > 1: end_of_first = result[0][-50:] start_of_second = result[1][:50] print(f"\n第一块末尾50字符: {end_of_first}") print(f"第二块开头50字符: {start_of_second}") # 找出重叠部分 overlap_len = 0 max_possible = min(len(end_of_first), len(start_of_second)) for i in range(max_possible): if end_of_first[-(i+1):] == start_of_second[:i+1]: overlap_len = i + 1 print(f"检测到的重叠长度: {overlap_len}") print() print("测试4: 边界情况 - overlap >= limit") text = "测试文本" * 100 result = handler.smart_split_transcription(text, limit=200, overlap=200) print(f"输入长度: {len(text)}, limit=200, overlap=200") print(f"输出块数: {len(result)}") for i, chunk in enumerate(result): print(f"块{i+1}长度: {len(chunk)}") print() print("测试5: 包含换行符的文本") text_with_newlines = "第一行\n第二行\n第三行\n" * 100 result = handler.smart_split_transcription(text_with_newlines, limit=500, overlap=50) print(f"输入长度: {len(text_with_newlines)}, 输出块数: {len(result)}") print(f"第一块是否保留换行符: {'\\n' in result[0]}") print() print("测试6: 极端情况 - 空文本") result = handler.smart_split_transcription("", limit=1000, overlap=100) print(f"空文本输入,输出: {result}") print() print("测试7: 极端情况 - 单个字符") result = handler.smart_split_transcription("A", limit=1000, overlap=100) print(f"单字符输入,输出: {result}") if __name__ == "__main__": test_smart_split()