From 93a0b0a44653f63efb0586ee19cf392b47b4d853 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E6=BD=AE?= Date: Fri, 19 Sep 2025 20:04:09 +0800 Subject: [PATCH] audio --- .../audio_converter.cpython-312.pyc | Bin 15440 -> 16977 bytes doubao/audio_converter.py | 60 ++++++--- doubao/audio_manager.py | 47 ++++--- doubao/config.py | 6 +- doubao/test_audio_conversion.py | 118 ++++++++++++++++++ 5 files changed, 194 insertions(+), 37 deletions(-) create mode 100644 doubao/test_audio_conversion.py diff --git a/doubao/__pycache__/audio_converter.cpython-312.pyc b/doubao/__pycache__/audio_converter.cpython-312.pyc index c262fa62684e4401f9dc097a15962265df6e160b..b9a311e9413bfde9055a97193f769491538f12c8 100644 GIT binary patch delta 4440 zcmaJ^3vg7`89wLk?%ijyo9AW&Y&H)F8^S9fk^n*^K`AuqKp)1(vh2MH3!B~b>|H|Q z#zc!HDq`p<5+4*)tU*wMu^s53&?(b8<2XYMO1#qIOic=7XN`nae1BlV5g~cFJbSAX}OQ$+XpY z5GCMgv`V6E1G=rrD4Dk!8|@%B!G17;rIBw{6LnYQ!%GW z5HF{C4EI!~ghoAH$IiYghOM)ueBYePv5L#*})-V$3hF zOyH|RG@d{~pN~1AVOD^jwokg*?CW;85F`>s09|CvXta7hcX)?$+~1ya7G5IM>iI0E zV9WsRcL@i`JYh7GZPpyauV{{e=L#Pc;%HWY-~l*5SQhqiXx zGPm<3BH8u`umNcu%qepndCn3Nnn_4_s0=2sD3j8IB)c+YqHk94G_@yX(X~s5VS2W7 zyHnUc%}6?|FxNE&d@z*Hta5U%$=+#A4+VuBdxU11vc5{+5_ga_1e`frUxpBH=7J8B zl9&FLEGg4VWRH*@jgBrEI!uq5w!f&itu*cSA%TKP9hO$&&q#VDLg@!ITvj511cVGYd z`1UW~I{^VSvVR-k=<~-$2X@u?G)GlwQEh804f}IiXtXw}j-l0?AA@#TjtUV>}qz!j}PB)uGGlqy%d1n|QSN zQ}faR=Tu^l|3J||6X@Mu#*4+GQmsD&h9w#x-nz(__RQ|okbQFRBZGF|zS@uNQ-}FF zvAAmEHDXdwy^6$M#99hI8_-h@`-~yJKfZpM76Pf9>h13ywO5-1XfK z(5-#84g)qfAk4mkh0R6q(Ipb2!xoZxpn0FXxit_t>x%}ipo4AJfX>u{Y+f=^v z>*L*Dp4ck3R)@9TFd5ivggI&ZAoOk_q)|BGq%7xM2D! zU?xzai@=uUl@8^Wbz6s>1!tYJ2A#9|QbW$gr!9+F)EZ@=KNl^giyl_S38Z z)%s5%6LS)~6Tt_-haFowwZ$+J7zQpwXJq97s+q}&4MB)63v{#)Q0ZD$fqX2g4ATso zg)kGq?_evie;2~t2rChOgm5o_DkPdz^LQw+Vx-uEa1G#>GO`O86cnPBwu$J04j53F zSw-c6A7`$i)Wu(M&g^6^9BEu1i$%1msyY%%Pzi@xWMxB4QjJnLq_D{-W5NqcLOU|H z6bP&yVIIQ$2$;67)k*z)@sP1hCe8qCC7&6mUNRa@^Tr&a$9+_py?wHFiKvq|heL9mtjHlHcI^VSlfRN(Cu+q% zd*G|=LRy5^5zVbsQ%g*eBMA%+ZEukg=4H8l`uspc?jhrpcBC`{Ts-*@A!{!Z$nXbD zOe3%4ZqL+`R+M-YVGDrjY6)&x8I32EF?Xg_jJ;>dKFcTnsThTJ>joLC?Pt8nQ+*F}$6x)qMUqrZNY#`%Z z{?}P0wECUddyG!aDzrxxTGz8X`Js8u;t>?u$UXB*^0i2wutYVs2H__$%I}*m`O!#K zOem7f*#Fg)|84$^ML(#_?_iqoAESHWjPPd=5Axc9tsAJj?L2ML6nzXe!;pqz^C0$b z<;@{rs0P?hJ=d>8x!)tai-7Ch?9zvkhEU9(x_zJS(uW6TFMN}VrN}Db=c^y+)J|0^ z%2B`{Z=8Jw zydJn4@?$kG>%VaDAKelRM*{(WmL9}~IHjgpF7~jNt0F{9INHoG-7`q!gnfu`3gIJ! zKO=|;n%3GL;9B~|-Wc@XSr$X_NJfnE9qb%e7gc4nQ|kSOApX1Ez&(rS88K%63&JT_ AnE(I) delta 2860 zcmaKteQXrR6~K4)Zujnddw0ghAJ~{XJ2v;OK;P8}L2gy1H`5U$7V9zJqk zncc%Su`!WIZAF9+4Jjrq?UkYcC8eONTH2-rsehuV5UXvn{R8@eV@p-#K%`3jgY>;Q z+r?GWmG-x9=DnGDZ|1$(+lR@mLHA9Ut3ZIKr0b*pEiG5wK{7fH%~OIV^bu9)6IIe@ z(X6VdNl{U?9JXFUYk2x(=(TD#(Ac6@ReD5Y2aUX-u|1-3XjX@ydQ>}T9a@1}0Iw5X z1zwy-QC){U(!vtXA-BJfJtU2GQ>L1cp-Fbx`ca7!mZuH`x_(i302G+p(!;(ZttU&_ zO{vxIf-X~u##3RVxhbTEjc|mZ+1z7?uLa%*{cMfvXInQ2!idGN&1+T3!0Gv;%EOjR zVqTvAf-U;MpJ9t?J0J?H64z;>X&FCjG7BQv_BlrXu7GylE&jL^DPVXnvR7o9lRHO_)s^ysI>U1 zpsoKFa7g$<78K9;@T7D3#A~;mzHGw) zL}#|{^fQy?_2XMUQOf_QcyBA-tmF4eVActPz9?kF&T5gmayHjnq^}g*+6%96 zN`Rl3l4=a4HFm3LuohP{#bnyF&83$q@x3$)qBND*lZi$(%Dz=x8Zzauj-yPQk=iq4 zz&SX#B+@BrJVj}Wno^p=IMeaeU_?tBu~gEpP(LmiK&(bIAl4w(158nmnl?S0NXIpu zHnQu*m8B-pbOm(${N@11_<3;?>CV+I-A;(Y220kAnu>0OjaVe18T~296*U$ys0SyN zxNGz=T%{hd9MO%yi78eRbtB|?ECE5k3^*)&YOT8~EtwT%sbIpmFTm%4MUabl$7~^& zilt6cmCeiwC|oJI&%bk4*fkqq!=*9U^r_NzQi6qN%3WbP5KH>y{F)xtR=&x5k(gFp zi$`^=G{{vnw1+~mWXuSK4lHNsz*5J2Q)idTyByqg8`!ZMkCo=!%LwuIL5ru2DJ4>> z7T2*lVc+r*zR1>A_{m^yN5%Wrs(onsG+^e&D}vBFBS39llqm0RVAb9_Cnh3&717VO zc}q!z^?Q9X?`U90y){H*SG{FrDp;7FnBJvD!c|UsK7~p-;z)Z|=0Y^K8b~N#4k=!NOGk56Skxc9-aHXrTBe%$uy>+Igxucj|wzjtn|jp7c_{fH{0}aH@b_6zw(?af4 zKqD`s*U$N#^c!gK45A0|EMh0XwC+iz;`BvqZ$4x*93wuqwsu^0uNKu~RrUN#9zeWv>=I1?v_B^DMD7_4LB=6}?c5v-7BD0CLPuK9N z@!~L@(G*RDjSyZsbO6WUzEN9FT-Qj*an{@zAVIdfv1}twYI;Hj9SVih@$gUx0oNrj z4}J-q!;uSfEV?MKHm-tO<YSwpCJkKJ8gLn?EYubr`acvu*aDGuzRE7?z5_sVC{>Ls?k zt)`UM`9dhBbs!7}xSRF1ss11il66CcGQa{1vV{zm46?a^aA2N#Ce2*;E97pOtbT_up^tY`DD%Jnl}HK$R(&+ z%Kp;6b5{^W{+nHal8oS|ScXzPf@eLJgI_Wg_~RL&xqoE-%xlr65P|!+D5*{dE2W#L zVZRLipdMYr%RLc}B|{pe;yQB#golebmia5(z?(9 diff --git a/doubao/audio_converter.py b/doubao/audio_converter.py index c1c0f5d..3ddfdec 100644 --- a/doubao/audio_converter.py +++ b/doubao/audio_converter.py @@ -17,6 +17,20 @@ import wave class AudioConverter: """音频格式转换工具类""" + def __init__(self): + # 预分配转换缓冲区 + self._conversion_cache = {} + self._numpy_available = self._check_numpy() + self._conversion_buffer = bytearray(65536) # 64KB预分配缓冲区 + + def _check_numpy(self) -> bool: + """检查numpy是否可用""" + try: + import numpy as np + return True + except ImportError: + return False + @staticmethod def get_platform_info() -> dict: """获取平台信息""" @@ -26,28 +40,44 @@ class AudioConverter: 'platform': platform.platform() } - @staticmethod - def float32_to_int16(float32_data: bytes) -> bytes: - """将Float32格式转换为Int16格式""" + def float32_to_int16_fast(self, float32_data: bytes) -> bytes: + """高性能Float32到Int16转换""" if len(float32_data) % 4 != 0: - # 处理数据长度不是4的倍数的情况 float32_data = float32_data[:len(float32_data) - len(float32_data) % 4] - # 解包Float32数据 - float_values = struct.unpack(f'{len(float32_data) // 4}f', float32_data) + # 使用numpy进行快速转换(如果可用) + if self._numpy_available: + try: + import numpy as np + # 直接转换为numpy数组,避免多次解包 + float_array = np.frombuffer(float32_data, dtype=np.float32) + # 限制范围并转换 + float_array = np.clip(float_array, -1.0, 1.0) + int16_array = (float_array * 32767).astype(np.int16) + return int16_array.tobytes() + except Exception as e: + print(f"numpy转换失败,使用备用方法: {e}") - # 转换为Int16范围 - int16_values = [] - for val in float_values: - # 限制在[-1.0, 1.0]范围内 - val = max(-1.0, min(1.0, val)) - # 转换为Int16 - int16_val = int(val * 32767) - int16_values.append(int16_val) + # 高效的纯Python实现 + num_samples = len(float32_data) // 4 + if num_samples == 0: + return b'' - # 打包为Int16字节数据 + # 使用内存视图和struct模块进行高效转换 + float_values = struct.unpack(f'{num_samples}f', float32_data) + + # 使用列表推导式和生成器表达式提高性能 + int16_values = [max(-32768, min(32767, int(val * 32767))) for val in float_values] + + # 批量打包 return struct.pack(f'{len(int16_values)}h', *int16_values) + @staticmethod + def float32_to_int16(float32_data: bytes) -> bytes: + """将Float32格式转换为Int16格式(保持向后兼容)""" + converter = AudioConverter() + return converter.float32_to_int16_fast(float32_data) + @staticmethod def int16_to_float32(int16_data: bytes) -> bytes: """将Int16格式转换为Float32格式""" diff --git a/doubao/audio_manager.py b/doubao/audio_manager.py index c032e0a..6a0d2f6 100644 --- a/doubao/audio_manager.py +++ b/doubao/audio_manager.py @@ -66,12 +66,16 @@ class AudioDeviceManager: # 检查是否需要格式转换 needs_conversion = False - # 如果当前格式不被支持,且启用降级 + # 关键检查:如果当前格式不被支持,必须转换 if (self.output_config.bit_size == pyaudio.paFloat32 and - 'Float32' not in self.device_info['supported_formats'] and - fallback_int16): - needs_conversion = True + 'Float32' not in self.device_info['supported_formats']): + if fallback_int16: + needs_conversion = True + print(f"检测到平台不支持Float32格式,将进行实时转换") + else: + print(f"警告: 平台不支持Float32格式,但转换已禁用") + # 如果优先使用原生格式,且平台推荐格式不是当前格式 elif (prefer_native and self.device_info['recommended_format'] == 'Int16' and @@ -80,17 +84,14 @@ class AudioDeviceManager: needs_conversion = True if needs_conversion: - print(f"警告: 音频格式转换 {self.output_config.bit_size} -> Int16") - self.output_config.format = 'pcm' - self.output_config.bit_size = pyaudio.paInt16 - self.output_config.sample_rate = self.platform_config['recommended_sample_rate'] - - print(f"输出配置调整:") - print(f" 格式: {original_format} -> {self.output_config.format}") - print(f" 比特深度: {32 if original_bit_size == pyaudio.paFloat32 else 16} -> 16") - print(f" 采样率: {original_sample_rate} -> {self.output_config.sample_rate}Hz") + print(f"将使用实时格式转换: Float32 -> Int16") + print(f" 保持原始采样率: {original_sample_rate}Hz") + print(f" 转换将在播放时进行") + # 注意:这里不修改配置,而是在播放时进行转换 + self.needs_conversion = True else: print(f"使用原始输出配置: 格式={original_format}, 比特深度={original_bit_size}, 采样率={original_sample_rate}Hz") + self.needs_conversion = False def open_input_stream(self) -> pyaudio.Stream: """打开音频输入流""" @@ -144,9 +145,9 @@ class AudioDeviceManager: raise Exception("无法打开任何音频输出设备") def write_audio_data(self, audio_data: bytes) -> bool: - """写入音频数据,性能优化版本""" + """写入音频数据,高性能版本""" # 预缓冲:对于较小的音频数据,等待积累到一定大小再播放 - min_buffer_size = 1024 # 最小缓冲大小 + min_buffer_size = 2048 # 最小缓冲大小 if not hasattr(self, '_audio_buffer'): self._audio_buffer = b'' @@ -162,10 +163,18 @@ class AudioDeviceManager: # 继续等待更多数据 return True - # 直接写入pyaudio流,不做实时转换 + # 根据设备能力决定是否需要格式转换 + final_data = buffer_to_play + + # 如果需要格式转换,进行高效转换 + if hasattr(self, 'needs_conversion') and self.needs_conversion: + # 使用快速转换方法 + final_data = self.converter.float32_to_int16_fast(buffer_to_play) + + # 尝试写入pyaudio流 try: if self.output_stream: - self.output_stream.write(buffer_to_play) + self.output_stream.write(final_data) return True except Exception as e: print(f"pyaudio写入失败: {e}") @@ -175,8 +184,8 @@ class AudioDeviceManager: if enable_aplay and self.platform_config['fallback_to_aplay']: print("尝试使用aplay播放...") return self.player.play_audio( - audio_data=buffer_to_play, - format_type='Int16', # 现在统一使用Int16 + audio_data=final_data, + format_type='Int16', # 转换后的数据 sample_rate=self.output_config.sample_rate, channels=self.output_config.channels ) diff --git a/doubao/config.py b/doubao/config.py index 5c408b1..8dd0b5e 100644 --- a/doubao/config.py +++ b/doubao/config.py @@ -30,7 +30,7 @@ start_session_req = { "speaker": "zh_female_vv_jupiter_bigtts", # "speaker": "S_XXXXXX", // 指定自定义的复刻音色,需要填下character_manifest # "speaker": "ICL_zh_female_aojiaonvyou_tob" // 指定官方复刻音色,不需要填character_manifest - "audio_config": {"channel": 1, "format": "pcm", "sample_rate": 16000}, + "audio_config": {"channel": 1, "format": "pcm", "sample_rate": 24000}, }, "dialog": { "bot_name": "豆包", @@ -61,6 +61,6 @@ output_audio_config = { "chunk": 4096, # 增加缓冲区大小 "format": "pcm", "channels": 1, - "sample_rate": 16000, - "bit_size": pyaudio.paInt16, + "sample_rate": 24000, + "bit_size": pyaudio.paFloat32, # 服务器返回的是Float32格式 } diff --git a/doubao/test_audio_conversion.py b/doubao/test_audio_conversion.py new file mode 100644 index 0000000..c7cd02c --- /dev/null +++ b/doubao/test_audio_conversion.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 +""" +音频转换测试脚本 +用于测试高性能音频格式转换 +""" + +import time +import struct +from audio_converter import AudioConverter + +def test_conversion_performance(): + """测试转换性能""" + print("=== 音频转换性能测试 ===") + + converter = AudioConverter() + + # 生成测试数据(1秒的24kHz Float32音频) + sample_rate = 24000 + duration = 1.0 # 1秒 + num_samples = int(sample_rate * duration) + + # 生成正弦波测试数据 + test_data = bytearray() + for i in range(num_samples): + # 生成440Hz正弦波 + value = 0.5 * (i / sample_rate * 440 * 2 * 3.14159) + sample = (value).astype('float32') if hasattr(value, 'astype') else float(value) + test_data.extend(struct.pack('f', sample)) + + test_data = bytes(test_data) + print(f"生成了 {len(test_data)} 字节的测试数据") + + # 测试转换性能 + start_time = time.time() + converted_data = converter.float32_to_int16_fast(test_data) + end_time = time.time() + + conversion_time = end_time - start_time + data_ratio = len(converted_data) / len(test_data) + + print(f"转换结果:") + print(f" 原始数据: {len(test_data)} 字节") + print(f" 转换后: {len(converted_data)} 字节") + print(f" 数据比例: {data_ratio:.2f}") + print(f" 转换时间: {conversion_time:.4f} 秒") + print(f" 转换速度: {len(test_data) / conversion_time / 1024:.1f} KB/s") + + # 验证转换质量 + print("\n=== 转换质量验证 ===") + + # 检查一些样本值 + original_samples = struct.unpack('10f', test_data[:40]) + converted_samples = struct.unpack('10h', converted_data[:20]) + + print("前10个样本的转换结果:") + for i, (orig, conv) in enumerate(zip(original_samples, converted_samples)): + expected = int(orig * 32767) + print(f" 样本{i}: {orig:.6f} -> {conv} (期望: {expected})") + + # 检查是否有明显错误 + errors = 0 + for orig, conv in zip(original_samples, converted_samples): + expected = int(orig * 32767) + if abs(conv - expected) > 1: # 允许1的误差 + errors += 1 + + if errors == 0: + print("✓ 转换质量验证通过") + else: + print(f"✗ 转换质量验证失败,{errors}个样本有误差") + +def test_numpy_vs_python(): + """测试numpy和纯Python实现的性能差异""" + print("\n=== NumPy vs Python 性能对比 ===") + + converter = AudioConverter() + + # 生成较大的测试数据 + sample_rate = 24000 + duration = 2.0 # 2秒 + num_samples = int(sample_rate * duration) + + # 生成测试数据 + import random + test_data = bytearray() + for _ in range(num_samples): + sample = random.uniform(-1.0, 1.0) + test_data.extend(struct.pack('f', sample)) + + test_data = bytes(test_data) + + # 测试NumPy版本 + if converter._numpy_available: + print("测试NumPy版本...") + start_time = time.time() + for _ in range(10): # 重复10次 + converter.float32_to_int16_fast(test_data) + numpy_time = time.time() - start_time + print(f"NumPy版本: {numpy_time:.4f} 秒 (10次)") + else: + print("NumPy不可用") + numpy_time = None + + # 测试纯Python版本 + print("测试纯Python版本...") + start_time = time.time() + for _ in range(10): # 重复10次 + AudioConverter.float32_to_int16(test_data) + python_time = time.time() - start_time + print(f"纯Python版本: {python_time:.4f} 秒 (10次)") + + if numpy_time: + speedup = python_time / numpy_time + print(f"NumPy加速比: {speedup:.2f}x") + +if __name__ == "__main__": + test_conversion_performance() + test_numpy_vs_python() \ No newline at end of file