diff --git a/doubao/__pycache__/audio_manager.cpython-312.pyc b/doubao/__pycache__/audio_manager.cpython-312.pyc index 5153c1e..4150946 100644 Binary files a/doubao/__pycache__/audio_manager.cpython-312.pyc and b/doubao/__pycache__/audio_manager.cpython-312.pyc differ diff --git a/doubao/__pycache__/config.cpython-312.pyc b/doubao/__pycache__/config.cpython-312.pyc index 229a6ce..8c35692 100644 Binary files a/doubao/__pycache__/config.cpython-312.pyc and b/doubao/__pycache__/config.cpython-312.pyc differ diff --git a/doubao/audio_manager.py b/doubao/audio_manager.py index c99cd75..c032e0a 100644 --- a/doubao/audio_manager.py +++ b/doubao/audio_manager.py @@ -144,30 +144,28 @@ class AudioDeviceManager: raise Exception("无法打开任何音频输出设备") def write_audio_data(self, audio_data: bytes) -> bool: - """写入音频数据,支持格式转换和多种播放方式""" - # 如果需要格式转换 - converted_data = audio_data - converted_format = None + """写入音频数据,性能优化版本""" + # 预缓冲:对于较小的音频数据,等待积累到一定大小再播放 + min_buffer_size = 1024 # 最小缓冲大小 - # 检查是否需要从Float32转换为Int16 - if (self.output_config.bit_size == pyaudio.paInt16 and - len(audio_data) % 4 == 0): # 可能是Float32数据 - - try: - # 检查是否为Float32数据(通过尝试解析) - import struct - test_sample = struct.unpack('f', audio_data[:4])[0] - if -1.0 <= test_sample <= 1.0: # 合理的Float32范围 - print("检测到Float32数据,转换为Int16格式") - converted_data = self.converter.float32_to_int16(audio_data) - converted_format = 'Int16' - except: - pass # 不是Float32数据,不进行转换 + if not hasattr(self, '_audio_buffer'): + self._audio_buffer = b'' - # 尝试直接写入pyaudio流 + # 累积音频数据 + self._audio_buffer += audio_data + + # 如果缓冲区足够大或者数据包较大,直接播放 + if len(self._audio_buffer) >= min_buffer_size or len(audio_data) > min_buffer_size: + buffer_to_play = self._audio_buffer + self._audio_buffer = b'' + else: + # 继续等待更多数据 + return True + + # 直接写入pyaudio流,不做实时转换 try: if self.output_stream: - self.output_stream.write(converted_data) + self.output_stream.write(buffer_to_play) return True except Exception as e: print(f"pyaudio写入失败: {e}") @@ -176,11 +174,9 @@ class AudioDeviceManager: enable_aplay = config.audio_config.get('enable_aplay_fallback', True) if enable_aplay and self.platform_config['fallback_to_aplay']: print("尝试使用aplay播放...") - format_type = converted_format if converted_format else ('Float32' if self.output_config.bit_size == pyaudio.paFloat32 else 'Int16') - return self.player.play_audio( - audio_data=converted_data, - format_type=format_type, + audio_data=buffer_to_play, + format_type='Int16', # 现在统一使用Int16 sample_rate=self.output_config.sample_rate, channels=self.output_config.channels ) @@ -257,9 +253,9 @@ class DialogSession: self.player_thread.start() def _audio_player_thread(self): - """音频播放线程""" - audio_playing_timeout = 1.0 # 1秒没有音频数据认为播放结束 - queue_check_interval = 0.1 # 每100ms检查一次队列状态 + """音频播放线程 - 性能优化版本""" + audio_playing_timeout = 0.5 # 0.5秒没有音频数据认为播放结束 + queue_check_interval = 0.05 # 每50ms检查一次队列状态,更低的延迟 while self.is_playing: try: @@ -307,6 +303,14 @@ class DialogSession: self.say_hello_completed = True print("say hello 音频播放完成") print("音频播放超时,恢复录音") + + # 刷新音频缓冲区 + if hasattr(self.audio_device, '_audio_buffer') and self.audio_device._audio_buffer: + print("刷新剩余音频缓冲区") + remaining_buffer = self.audio_device._audio_buffer + self.audio_device._audio_buffer = b'' + self.audio_device.write_audio_data(remaining_buffer) + # 直接发送静音数据,而不是在协程中发送 try: silence_data = b'\x00' * config.input_audio_config["chunk"] diff --git a/doubao/config.py b/doubao/config.py index 6a4e41c..5c408b1 100644 --- a/doubao/config.py +++ b/doubao/config.py @@ -30,7 +30,7 @@ start_session_req = { "speaker": "zh_female_vv_jupiter_bigtts", # "speaker": "S_XXXXXX", // 指定自定义的复刻音色,需要填下character_manifest # "speaker": "ICL_zh_female_aojiaonvyou_tob" // 指定官方复刻音色,不需要填character_manifest - "audio_config": {"channel": 1, "format": "pcm", "sample_rate": 24000}, + "audio_config": {"channel": 1, "format": "pcm", "sample_rate": 16000}, }, "dialog": { "bot_name": "豆包", @@ -58,9 +58,9 @@ input_audio_config = { } output_audio_config = { - "chunk": 3200, + "chunk": 4096, # 增加缓冲区大小 "format": "pcm", "channels": 1, - "sample_rate": 24000, - "bit_size": pyaudio.paFloat32, + "sample_rate": 16000, + "bit_size": pyaudio.paInt16, } diff --git a/doubao/output.pcm b/doubao/output.pcm index dfbbd20..345f5ee 100644 Binary files a/doubao/output.pcm and b/doubao/output.pcm differ