This commit is contained in:
朱潮 2025-09-19 20:01:03 +08:00
parent 1e0fd6e234
commit 1bb6a32dc2
5 changed files with 35 additions and 31 deletions

View File

@ -144,30 +144,28 @@ class AudioDeviceManager:
raise Exception("无法打开任何音频输出设备")
def write_audio_data(self, audio_data: bytes) -> bool:
"""写入音频数据,支持格式转换和多种播放方式"""
# 如果需要格式转换
converted_data = audio_data
converted_format = None
"""写入音频数据,性能优化版本"""
# 预缓冲:对于较小的音频数据,等待积累到一定大小再播放
min_buffer_size = 1024 # 最小缓冲大小
# 检查是否需要从Float32转换为Int16
if (self.output_config.bit_size == pyaudio.paInt16 and
len(audio_data) % 4 == 0): # 可能是Float32数据
try:
# 检查是否为Float32数据通过尝试解析
import struct
test_sample = struct.unpack('f', audio_data[:4])[0]
if -1.0 <= test_sample <= 1.0: # 合理的Float32范围
print("检测到Float32数据转换为Int16格式")
converted_data = self.converter.float32_to_int16(audio_data)
converted_format = 'Int16'
except:
pass # 不是Float32数据不进行转换
if not hasattr(self, '_audio_buffer'):
self._audio_buffer = b''
# 尝试直接写入pyaudio流
# 累积音频数据
self._audio_buffer += audio_data
# 如果缓冲区足够大或者数据包较大,直接播放
if len(self._audio_buffer) >= min_buffer_size or len(audio_data) > min_buffer_size:
buffer_to_play = self._audio_buffer
self._audio_buffer = b''
else:
# 继续等待更多数据
return True
# 直接写入pyaudio流不做实时转换
try:
if self.output_stream:
self.output_stream.write(converted_data)
self.output_stream.write(buffer_to_play)
return True
except Exception as e:
print(f"pyaudio写入失败: {e}")
@ -176,11 +174,9 @@ class AudioDeviceManager:
enable_aplay = config.audio_config.get('enable_aplay_fallback', True)
if enable_aplay and self.platform_config['fallback_to_aplay']:
print("尝试使用aplay播放...")
format_type = converted_format if converted_format else ('Float32' if self.output_config.bit_size == pyaudio.paFloat32 else 'Int16')
return self.player.play_audio(
audio_data=converted_data,
format_type=format_type,
audio_data=buffer_to_play,
format_type='Int16', # 现在统一使用Int16
sample_rate=self.output_config.sample_rate,
channels=self.output_config.channels
)
@ -257,9 +253,9 @@ class DialogSession:
self.player_thread.start()
def _audio_player_thread(self):
"""音频播放线程"""
audio_playing_timeout = 1.0 # 1秒没有音频数据认为播放结束
queue_check_interval = 0.1 # 每100ms检查一次队列状态
"""音频播放线程 - 性能优化版本"""
audio_playing_timeout = 0.5 # 0.5秒没有音频数据认为播放结束
queue_check_interval = 0.05 # 每50ms检查一次队列状态更低的延迟
while self.is_playing:
try:
@ -307,6 +303,14 @@ class DialogSession:
self.say_hello_completed = True
print("say hello 音频播放完成")
print("音频播放超时,恢复录音")
# 刷新音频缓冲区
if hasattr(self.audio_device, '_audio_buffer') and self.audio_device._audio_buffer:
print("刷新剩余音频缓冲区")
remaining_buffer = self.audio_device._audio_buffer
self.audio_device._audio_buffer = b''
self.audio_device.write_audio_data(remaining_buffer)
# 直接发送静音数据,而不是在协程中发送
try:
silence_data = b'\x00' * config.input_audio_config["chunk"]

View File

@ -30,7 +30,7 @@ start_session_req = {
"speaker": "zh_female_vv_jupiter_bigtts",
# "speaker": "S_XXXXXX", // 指定自定义的复刻音色,需要填下character_manifest
# "speaker": "ICL_zh_female_aojiaonvyou_tob" // 指定官方复刻音色不需要填character_manifest
"audio_config": {"channel": 1, "format": "pcm", "sample_rate": 24000},
"audio_config": {"channel": 1, "format": "pcm", "sample_rate": 16000},
},
"dialog": {
"bot_name": "豆包",
@ -58,9 +58,9 @@ input_audio_config = {
}
output_audio_config = {
"chunk": 3200,
"chunk": 4096, # 增加缓冲区大小
"format": "pcm",
"channels": 1,
"sample_rate": 24000,
"bit_size": pyaudio.paFloat32,
"sample_rate": 16000,
"bit_size": pyaudio.paInt16,
}

Binary file not shown.