diff --git a/doubao/__pycache__/audio_converter.cpython-312.pyc b/doubao/__pycache__/audio_converter.cpython-312.pyc index c262fa6..b9a311e 100644 Binary files a/doubao/__pycache__/audio_converter.cpython-312.pyc and b/doubao/__pycache__/audio_converter.cpython-312.pyc differ diff --git a/doubao/audio_converter.py b/doubao/audio_converter.py index c1c0f5d..3ddfdec 100644 --- a/doubao/audio_converter.py +++ b/doubao/audio_converter.py @@ -17,6 +17,20 @@ import wave class AudioConverter: """音频格式转换工具类""" + def __init__(self): + # 预分配转换缓冲区 + self._conversion_cache = {} + self._numpy_available = self._check_numpy() + self._conversion_buffer = bytearray(65536) # 64KB预分配缓冲区 + + def _check_numpy(self) -> bool: + """检查numpy是否可用""" + try: + import numpy as np + return True + except ImportError: + return False + @staticmethod def get_platform_info() -> dict: """获取平台信息""" @@ -26,28 +40,44 @@ class AudioConverter: 'platform': platform.platform() } - @staticmethod - def float32_to_int16(float32_data: bytes) -> bytes: - """将Float32格式转换为Int16格式""" + def float32_to_int16_fast(self, float32_data: bytes) -> bytes: + """高性能Float32到Int16转换""" if len(float32_data) % 4 != 0: - # 处理数据长度不是4的倍数的情况 float32_data = float32_data[:len(float32_data) - len(float32_data) % 4] - # 解包Float32数据 - float_values = struct.unpack(f'{len(float32_data) // 4}f', float32_data) + # 使用numpy进行快速转换(如果可用) + if self._numpy_available: + try: + import numpy as np + # 直接转换为numpy数组,避免多次解包 + float_array = np.frombuffer(float32_data, dtype=np.float32) + # 限制范围并转换 + float_array = np.clip(float_array, -1.0, 1.0) + int16_array = (float_array * 32767).astype(np.int16) + return int16_array.tobytes() + except Exception as e: + print(f"numpy转换失败,使用备用方法: {e}") - # 转换为Int16范围 - int16_values = [] - for val in float_values: - # 限制在[-1.0, 1.0]范围内 - val = max(-1.0, min(1.0, val)) - # 转换为Int16 - int16_val = int(val * 32767) - int16_values.append(int16_val) + # 高效的纯Python实现 + num_samples = len(float32_data) // 4 + if num_samples == 0: + return b'' - # 打包为Int16字节数据 + # 使用内存视图和struct模块进行高效转换 + float_values = struct.unpack(f'{num_samples}f', float32_data) + + # 使用列表推导式和生成器表达式提高性能 + int16_values = [max(-32768, min(32767, int(val * 32767))) for val in float_values] + + # 批量打包 return struct.pack(f'{len(int16_values)}h', *int16_values) + @staticmethod + def float32_to_int16(float32_data: bytes) -> bytes: + """将Float32格式转换为Int16格式(保持向后兼容)""" + converter = AudioConverter() + return converter.float32_to_int16_fast(float32_data) + @staticmethod def int16_to_float32(int16_data: bytes) -> bytes: """将Int16格式转换为Float32格式""" diff --git a/doubao/audio_manager.py b/doubao/audio_manager.py index c032e0a..6a0d2f6 100644 --- a/doubao/audio_manager.py +++ b/doubao/audio_manager.py @@ -66,12 +66,16 @@ class AudioDeviceManager: # 检查是否需要格式转换 needs_conversion = False - # 如果当前格式不被支持,且启用降级 + # 关键检查:如果当前格式不被支持,必须转换 if (self.output_config.bit_size == pyaudio.paFloat32 and - 'Float32' not in self.device_info['supported_formats'] and - fallback_int16): - needs_conversion = True + 'Float32' not in self.device_info['supported_formats']): + if fallback_int16: + needs_conversion = True + print(f"检测到平台不支持Float32格式,将进行实时转换") + else: + print(f"警告: 平台不支持Float32格式,但转换已禁用") + # 如果优先使用原生格式,且平台推荐格式不是当前格式 elif (prefer_native and self.device_info['recommended_format'] == 'Int16' and @@ -80,17 +84,14 @@ class AudioDeviceManager: needs_conversion = True if needs_conversion: - print(f"警告: 音频格式转换 {self.output_config.bit_size} -> Int16") - self.output_config.format = 'pcm' - self.output_config.bit_size = pyaudio.paInt16 - self.output_config.sample_rate = self.platform_config['recommended_sample_rate'] - - print(f"输出配置调整:") - print(f" 格式: {original_format} -> {self.output_config.format}") - print(f" 比特深度: {32 if original_bit_size == pyaudio.paFloat32 else 16} -> 16") - print(f" 采样率: {original_sample_rate} -> {self.output_config.sample_rate}Hz") + print(f"将使用实时格式转换: Float32 -> Int16") + print(f" 保持原始采样率: {original_sample_rate}Hz") + print(f" 转换将在播放时进行") + # 注意:这里不修改配置,而是在播放时进行转换 + self.needs_conversion = True else: print(f"使用原始输出配置: 格式={original_format}, 比特深度={original_bit_size}, 采样率={original_sample_rate}Hz") + self.needs_conversion = False def open_input_stream(self) -> pyaudio.Stream: """打开音频输入流""" @@ -144,9 +145,9 @@ class AudioDeviceManager: raise Exception("无法打开任何音频输出设备") def write_audio_data(self, audio_data: bytes) -> bool: - """写入音频数据,性能优化版本""" + """写入音频数据,高性能版本""" # 预缓冲:对于较小的音频数据,等待积累到一定大小再播放 - min_buffer_size = 1024 # 最小缓冲大小 + min_buffer_size = 2048 # 最小缓冲大小 if not hasattr(self, '_audio_buffer'): self._audio_buffer = b'' @@ -162,10 +163,18 @@ class AudioDeviceManager: # 继续等待更多数据 return True - # 直接写入pyaudio流,不做实时转换 + # 根据设备能力决定是否需要格式转换 + final_data = buffer_to_play + + # 如果需要格式转换,进行高效转换 + if hasattr(self, 'needs_conversion') and self.needs_conversion: + # 使用快速转换方法 + final_data = self.converter.float32_to_int16_fast(buffer_to_play) + + # 尝试写入pyaudio流 try: if self.output_stream: - self.output_stream.write(buffer_to_play) + self.output_stream.write(final_data) return True except Exception as e: print(f"pyaudio写入失败: {e}") @@ -175,8 +184,8 @@ class AudioDeviceManager: if enable_aplay and self.platform_config['fallback_to_aplay']: print("尝试使用aplay播放...") return self.player.play_audio( - audio_data=buffer_to_play, - format_type='Int16', # 现在统一使用Int16 + audio_data=final_data, + format_type='Int16', # 转换后的数据 sample_rate=self.output_config.sample_rate, channels=self.output_config.channels ) diff --git a/doubao/config.py b/doubao/config.py index 5c408b1..8dd0b5e 100644 --- a/doubao/config.py +++ b/doubao/config.py @@ -30,7 +30,7 @@ start_session_req = { "speaker": "zh_female_vv_jupiter_bigtts", # "speaker": "S_XXXXXX", // 指定自定义的复刻音色,需要填下character_manifest # "speaker": "ICL_zh_female_aojiaonvyou_tob" // 指定官方复刻音色,不需要填character_manifest - "audio_config": {"channel": 1, "format": "pcm", "sample_rate": 16000}, + "audio_config": {"channel": 1, "format": "pcm", "sample_rate": 24000}, }, "dialog": { "bot_name": "豆包", @@ -61,6 +61,6 @@ output_audio_config = { "chunk": 4096, # 增加缓冲区大小 "format": "pcm", "channels": 1, - "sample_rate": 16000, - "bit_size": pyaudio.paInt16, + "sample_rate": 24000, + "bit_size": pyaudio.paFloat32, # 服务器返回的是Float32格式 } diff --git a/doubao/test_audio_conversion.py b/doubao/test_audio_conversion.py new file mode 100644 index 0000000..c7cd02c --- /dev/null +++ b/doubao/test_audio_conversion.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 +""" +音频转换测试脚本 +用于测试高性能音频格式转换 +""" + +import time +import struct +from audio_converter import AudioConverter + +def test_conversion_performance(): + """测试转换性能""" + print("=== 音频转换性能测试 ===") + + converter = AudioConverter() + + # 生成测试数据(1秒的24kHz Float32音频) + sample_rate = 24000 + duration = 1.0 # 1秒 + num_samples = int(sample_rate * duration) + + # 生成正弦波测试数据 + test_data = bytearray() + for i in range(num_samples): + # 生成440Hz正弦波 + value = 0.5 * (i / sample_rate * 440 * 2 * 3.14159) + sample = (value).astype('float32') if hasattr(value, 'astype') else float(value) + test_data.extend(struct.pack('f', sample)) + + test_data = bytes(test_data) + print(f"生成了 {len(test_data)} 字节的测试数据") + + # 测试转换性能 + start_time = time.time() + converted_data = converter.float32_to_int16_fast(test_data) + end_time = time.time() + + conversion_time = end_time - start_time + data_ratio = len(converted_data) / len(test_data) + + print(f"转换结果:") + print(f" 原始数据: {len(test_data)} 字节") + print(f" 转换后: {len(converted_data)} 字节") + print(f" 数据比例: {data_ratio:.2f}") + print(f" 转换时间: {conversion_time:.4f} 秒") + print(f" 转换速度: {len(test_data) / conversion_time / 1024:.1f} KB/s") + + # 验证转换质量 + print("\n=== 转换质量验证 ===") + + # 检查一些样本值 + original_samples = struct.unpack('10f', test_data[:40]) + converted_samples = struct.unpack('10h', converted_data[:20]) + + print("前10个样本的转换结果:") + for i, (orig, conv) in enumerate(zip(original_samples, converted_samples)): + expected = int(orig * 32767) + print(f" 样本{i}: {orig:.6f} -> {conv} (期望: {expected})") + + # 检查是否有明显错误 + errors = 0 + for orig, conv in zip(original_samples, converted_samples): + expected = int(orig * 32767) + if abs(conv - expected) > 1: # 允许1的误差 + errors += 1 + + if errors == 0: + print("✓ 转换质量验证通过") + else: + print(f"✗ 转换质量验证失败,{errors}个样本有误差") + +def test_numpy_vs_python(): + """测试numpy和纯Python实现的性能差异""" + print("\n=== NumPy vs Python 性能对比 ===") + + converter = AudioConverter() + + # 生成较大的测试数据 + sample_rate = 24000 + duration = 2.0 # 2秒 + num_samples = int(sample_rate * duration) + + # 生成测试数据 + import random + test_data = bytearray() + for _ in range(num_samples): + sample = random.uniform(-1.0, 1.0) + test_data.extend(struct.pack('f', sample)) + + test_data = bytes(test_data) + + # 测试NumPy版本 + if converter._numpy_available: + print("测试NumPy版本...") + start_time = time.time() + for _ in range(10): # 重复10次 + converter.float32_to_int16_fast(test_data) + numpy_time = time.time() - start_time + print(f"NumPy版本: {numpy_time:.4f} 秒 (10次)") + else: + print("NumPy不可用") + numpy_time = None + + # 测试纯Python版本 + print("测试纯Python版本...") + start_time = time.time() + for _ in range(10): # 重复10次 + AudioConverter.float32_to_int16(test_data) + python_time = time.time() - start_time + print(f"纯Python版本: {python_time:.4f} 秒 (10次)") + + if numpy_time: + speedup = python_time / numpy_time + print(f"NumPy加速比: {speedup:.2f}x") + +if __name__ == "__main__": + test_conversion_performance() + test_numpy_vs_python() \ No newline at end of file