config
This commit is contained in:
parent
3958d2ff81
commit
cc79605405
Binary file not shown.
Binary file not shown.
@ -37,6 +37,13 @@ class AudioDeviceManager:
|
|||||||
self.audio_queue = None
|
self.audio_queue = None
|
||||||
self.recording = False
|
self.recording = False
|
||||||
|
|
||||||
|
# 音频缓冲播放相关
|
||||||
|
self.audio_buffer = bytes() # 使用不可变的bytes而不是bytearray
|
||||||
|
self.buffer_playback_threshold = self.output_config.sample_rate * 15 # 15秒的音频数据
|
||||||
|
self.min_buffer_size = self.output_config.sample_rate * 3 # 最小缓冲3秒
|
||||||
|
self.is_buffer_playing = False
|
||||||
|
self.last_play_time = 0
|
||||||
|
|
||||||
def open_input_stream(self):
|
def open_input_stream(self):
|
||||||
"""打开音频输入流"""
|
"""打开音频输入流"""
|
||||||
try:
|
try:
|
||||||
@ -87,7 +94,7 @@ class AudioDeviceManager:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
def play_audio(self, audio_data: bytes) -> None:
|
def play_audio(self, audio_data: bytes) -> None:
|
||||||
"""播放音频数据"""
|
"""播放音频数据 - 原有的实时播放方法"""
|
||||||
try:
|
try:
|
||||||
# 将字节数据转换为numpy数组
|
# 将字节数据转换为numpy数组
|
||||||
audio_array = np.frombuffer(audio_data, dtype=np.int16)
|
audio_array = np.frombuffer(audio_data, dtype=np.int16)
|
||||||
@ -98,6 +105,51 @@ class AudioDeviceManager:
|
|||||||
sd.wait() # 等待播放完成
|
sd.wait() # 等待播放完成
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"音频播放失败: {e}")
|
print(f"音频播放失败: {e}")
|
||||||
|
|
||||||
|
def buffer_audio(self, audio_data: bytes) -> bool:
|
||||||
|
"""缓冲音频数据,返回是否应该播放"""
|
||||||
|
try:
|
||||||
|
# 使用bytes连接而不是extend
|
||||||
|
self.audio_buffer = self.audio_buffer + audio_data
|
||||||
|
current_time = time.time()
|
||||||
|
|
||||||
|
# 判断是否应该播放缓冲的音频
|
||||||
|
should_play = (
|
||||||
|
len(self.audio_buffer) >= self.buffer_playback_threshold or # 达到缓冲阈值
|
||||||
|
(len(self.audio_buffer) >= self.min_buffer_size and
|
||||||
|
current_time - self.last_play_time > 5.0) # 最小缓冲且距离上次播放超过5秒
|
||||||
|
)
|
||||||
|
|
||||||
|
return should_play
|
||||||
|
except Exception as e:
|
||||||
|
print(f"音频缓冲失败: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def play_buffered_audio(self) -> None:
|
||||||
|
"""播放缓冲的音频数据"""
|
||||||
|
try:
|
||||||
|
if not self.audio_buffer:
|
||||||
|
return
|
||||||
|
|
||||||
|
# 将缓冲数据转换为numpy数组
|
||||||
|
audio_array = np.frombuffer(self.audio_buffer, dtype=np.int16)
|
||||||
|
audio_array = audio_array.reshape(-1, self.output_config.channels)
|
||||||
|
|
||||||
|
# 使用非阻塞播放,避免等待
|
||||||
|
sd.play(audio_array, samplerate=self.output_config.sample_rate)
|
||||||
|
|
||||||
|
# 清空缓冲区
|
||||||
|
self.audio_buffer = bytes()
|
||||||
|
self.last_play_time = time.time()
|
||||||
|
self.is_buffer_playing = True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"缓冲音频播放失败: {e}")
|
||||||
|
|
||||||
|
def clear_audio_buffer(self) -> None:
|
||||||
|
"""清空音频缓冲区"""
|
||||||
|
self.audio_buffer = bytes()
|
||||||
|
self.is_buffer_playing = False
|
||||||
|
|
||||||
def read_audio_data(self, frames: int) -> bytes:
|
def read_audio_data(self, frames: int) -> bytes:
|
||||||
"""读取音频数据"""
|
"""读取音频数据"""
|
||||||
@ -142,7 +194,7 @@ class DialogSession:
|
|||||||
mod: str
|
mod: str
|
||||||
|
|
||||||
def __init__(self, ws_config: Dict[str, Any], output_audio_format: str = "pcm", audio_file_path: str = "",
|
def __init__(self, ws_config: Dict[str, Any], output_audio_format: str = "pcm", audio_file_path: str = "",
|
||||||
mod: str = "audio", recv_timeout: int = 10):
|
mod: str = "audio", recv_timeout: int = 10, use_buffered_playback: bool = False):
|
||||||
self.audio_file_path = audio_file_path
|
self.audio_file_path = audio_file_path
|
||||||
self.recv_timeout = recv_timeout
|
self.recv_timeout = recv_timeout
|
||||||
self.is_audio_file_input = self.audio_file_path != ""
|
self.is_audio_file_input = self.audio_file_path != ""
|
||||||
@ -173,6 +225,10 @@ class DialogSession:
|
|||||||
self.last_recording_state = False # 上次录音状态
|
self.last_recording_state = False # 上次录音状态
|
||||||
self.say_hello_completed = False # say hello 是否已完成
|
self.say_hello_completed = False # say hello 是否已完成
|
||||||
|
|
||||||
|
# 音频缓冲播放相关
|
||||||
|
self.use_buffered_playback = use_buffered_playback # 根据参数启用缓冲播放模式
|
||||||
|
self.buffer_check_interval = 0.1 # 缓冲检查间隔
|
||||||
|
|
||||||
# 新增:音频输入流控制
|
# 新增:音频输入流控制
|
||||||
self.input_stream_paused = False # 输入流是否被暂停
|
self.input_stream_paused = False # 输入流是否被暂停
|
||||||
self.force_silence_mode = False # 强制静音模式
|
self.force_silence_mode = False # 强制静音模式
|
||||||
@ -196,12 +252,15 @@ class DialogSession:
|
|||||||
# 启动播放线程
|
# 启动播放线程
|
||||||
self.is_recording = True
|
self.is_recording = True
|
||||||
self.is_playing = True
|
self.is_playing = True
|
||||||
self.player_thread = threading.Thread(target=self._audio_player_thread)
|
if self.use_buffered_playback:
|
||||||
|
self.player_thread = threading.Thread(target=self._buffered_audio_player_thread)
|
||||||
|
else:
|
||||||
|
self.player_thread = threading.Thread(target=self._audio_player_thread)
|
||||||
self.player_thread.daemon = True
|
self.player_thread.daemon = True
|
||||||
self.player_thread.start()
|
self.player_thread.start()
|
||||||
|
|
||||||
def _audio_player_thread(self):
|
def _audio_player_thread(self):
|
||||||
"""音频播放线程"""
|
"""音频播放线程 - 原有的实时播放模式"""
|
||||||
audio_playing_timeout = 1.0 # 1秒没有音频数据认为播放结束
|
audio_playing_timeout = 1.0 # 1秒没有音频数据认为播放结束
|
||||||
queue_check_interval = 0.1 # 每100ms检查一次队列状态
|
queue_check_interval = 0.1 # 每100ms检查一次队列状态
|
||||||
|
|
||||||
@ -273,6 +332,93 @@ class DialogSession:
|
|||||||
self.is_playing_audio = False
|
self.is_playing_audio = False
|
||||||
self.is_recording_paused = False
|
self.is_recording_paused = False
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
def _buffered_audio_player_thread(self):
|
||||||
|
"""音频缓冲播放线程 - 新的缓冲播放模式"""
|
||||||
|
audio_playing_timeout = 2.0 # 2秒没有音频数据认为播放结束
|
||||||
|
buffer_check_interval = 0.05 # 每50ms检查一次缓冲区状态
|
||||||
|
|
||||||
|
print("启动缓冲音频播放线程")
|
||||||
|
|
||||||
|
while self.is_playing:
|
||||||
|
try:
|
||||||
|
current_time = time.time()
|
||||||
|
|
||||||
|
# 检查是否有新的音频数据
|
||||||
|
audio_data = None
|
||||||
|
try:
|
||||||
|
audio_data = self.audio_queue.get(timeout=buffer_check_interval)
|
||||||
|
except queue.Empty:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if audio_data is not None:
|
||||||
|
with self.audio_queue_lock:
|
||||||
|
# 接收到音频数据,更新播放状态
|
||||||
|
was_not_playing = not self.is_playing_audio
|
||||||
|
if was_not_playing:
|
||||||
|
# 从非播放状态进入播放状态
|
||||||
|
self.is_playing_audio = True
|
||||||
|
if not self.is_recording_paused:
|
||||||
|
self.is_recording_paused = True
|
||||||
|
print("缓冲播放开始,确认暂停录音")
|
||||||
|
|
||||||
|
# 更新最后音频时间
|
||||||
|
self.last_audio_time = current_time
|
||||||
|
|
||||||
|
# 播放前清理管道
|
||||||
|
if was_not_playing:
|
||||||
|
print("缓冲播放开始前,清理管道")
|
||||||
|
for _ in range(2):
|
||||||
|
self.audio_device.play_audio(b'\x00' * len(audio_data))
|
||||||
|
time.sleep(0.05)
|
||||||
|
|
||||||
|
# 缓冲音频数据
|
||||||
|
should_play = self.audio_device.buffer_audio(audio_data)
|
||||||
|
|
||||||
|
# 如果达到播放条件,播放缓冲的音频
|
||||||
|
if should_play:
|
||||||
|
print(f"播放缓冲音频,缓冲大小: {len(self.audio_device.audio_buffer)} 字节")
|
||||||
|
self.audio_device.play_buffered_audio()
|
||||||
|
|
||||||
|
else:
|
||||||
|
# 没有新的音频数据,检查是否超时
|
||||||
|
with self.audio_queue_lock:
|
||||||
|
if self.is_playing_audio:
|
||||||
|
if hasattr(self, 'last_audio_time') and current_time - self.last_audio_time > audio_playing_timeout:
|
||||||
|
# 超时检查:如果缓冲区有数据,先播放
|
||||||
|
if len(self.audio_device.audio_buffer) > 0:
|
||||||
|
print("播放超时,播放剩余缓冲音频")
|
||||||
|
self.audio_device.play_buffered_audio()
|
||||||
|
|
||||||
|
# 然后恢复录音状态
|
||||||
|
self.is_playing_audio = False
|
||||||
|
self.is_recording_paused = False
|
||||||
|
self.force_silence_mode = False
|
||||||
|
self.input_stream_paused = False
|
||||||
|
|
||||||
|
# 标记 say hello 完成
|
||||||
|
if hasattr(self, 'say_hello_completed') and not self.say_hello_completed:
|
||||||
|
self.say_hello_completed = True
|
||||||
|
print("缓冲播放 say hello 音频播放完成")
|
||||||
|
|
||||||
|
print("缓冲播放超时,恢复录音")
|
||||||
|
|
||||||
|
# 设置静音数据发送标志
|
||||||
|
try:
|
||||||
|
silence_data = b'\x00' * config.input_audio_config["chunk"]
|
||||||
|
self.silence_send_count = 2
|
||||||
|
self.should_send_silence = True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"准备静音数据失败: {e}")
|
||||||
|
|
||||||
|
time.sleep(buffer_check_interval)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"缓冲音频播放错误: {e}")
|
||||||
|
with self.audio_queue_lock:
|
||||||
|
self.is_playing_audio = False
|
||||||
|
self.is_recording_paused = False
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
# 移除了静音检测函数,避免干扰正常的音频处理
|
# 移除了静音检测函数,避免干扰正常的音频处理
|
||||||
|
|
||||||
@ -340,6 +486,10 @@ class DialogSession:
|
|||||||
self.audio_queue.get_nowait()
|
self.audio_queue.get_nowait()
|
||||||
except queue.Empty:
|
except queue.Empty:
|
||||||
continue
|
continue
|
||||||
|
# 如果是缓冲播放模式,也要清空音频设备缓冲区
|
||||||
|
if self.use_buffered_playback:
|
||||||
|
self.audio_device.clear_audio_buffer()
|
||||||
|
print("缓冲播放:清空音频设备缓冲区")
|
||||||
self.is_user_querying = True
|
self.is_user_querying = True
|
||||||
print("服务器准备接收用户输入")
|
print("服务器准备接收用户输入")
|
||||||
|
|
||||||
@ -380,6 +530,12 @@ class DialogSession:
|
|||||||
self.is_playing_audio = False
|
self.is_playing_audio = False
|
||||||
self.force_silence_mode = False # 关闭强制静音模式
|
self.force_silence_mode = False # 关闭强制静音模式
|
||||||
self.input_stream_paused = False # 恢复输入流
|
self.input_stream_paused = False # 恢复输入流
|
||||||
|
|
||||||
|
# 如果是缓冲播放模式,清空缓冲区
|
||||||
|
if self.use_buffered_playback:
|
||||||
|
self.audio_device.clear_audio_buffer()
|
||||||
|
print("缓冲播放:服务器响应完成,清空音频缓冲区")
|
||||||
|
|
||||||
if was_paused:
|
if was_paused:
|
||||||
print("服务器响应完成,立即恢复录音")
|
print("服务器响应完成,立即恢复录音")
|
||||||
# 设置标志发送静音数据
|
# 设置标志发送静音数据
|
||||||
|
|||||||
BIN
doubao/input.pcm
BIN
doubao/input.pcm
Binary file not shown.
@ -10,10 +10,11 @@ async def main() -> None:
|
|||||||
parser.add_argument("--audio", type=str, default="", help="audio file send to server, if not set, will use microphone input.")
|
parser.add_argument("--audio", type=str, default="", help="audio file send to server, if not set, will use microphone input.")
|
||||||
parser.add_argument("--mod",type=str,default="audio",help="Use mod to select plain text input mode or audio mode, the default is audio mode")
|
parser.add_argument("--mod",type=str,default="audio",help="Use mod to select plain text input mode or audio mode, the default is audio mode")
|
||||||
parser.add_argument("--recv_timeout",type=int,default=10,help="Timeout for receiving messages,value range [10,120]")
|
parser.add_argument("--recv_timeout",type=int,default=10,help="Timeout for receiving messages,value range [10,120]")
|
||||||
|
parser.add_argument("--buffered_playback",action="store_true",help="Enable buffered audio playback mode for better performance on low-end devices")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
session = DialogSession(ws_config=config.ws_connect_config, output_audio_format=args.format, audio_file_path=args.audio,mod=args.mod,recv_timeout=args.recv_timeout)
|
session = DialogSession(ws_config=config.ws_connect_config, output_audio_format=args.format, audio_file_path=args.audio,mod=args.mod,recv_timeout=args.recv_timeout, use_buffered_playback=args.buffered_playback)
|
||||||
await session.start()
|
await session.start()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
Binary file not shown.
Loading…
Reference in New Issue
Block a user