From 0ee0252c8a765a36077639d54bba72512077c516 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E6=BD=AE?= Date: Sun, 21 Sep 2025 20:03:20 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dtts=E6=9C=AA=E7=94=9F?= =?UTF-8?q?=E6=88=90=E5=B0=B1=E5=88=A4=E6=96=AD=E7=BB=93=E6=9D=9F=E7=9A=84?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- audio_processes.py | 218 +++++++++++++++++++++++++++++---------------- 1 file changed, 143 insertions(+), 75 deletions(-) diff --git a/audio_processes.py b/audio_processes.py index 8ce0222..fc4fda0 100644 --- a/audio_processes.py +++ b/audio_processes.py @@ -713,10 +713,11 @@ class OutputProcess: self.playback_cooldown_period = 0.05 # 播放冷却时间(秒)- 防止回声,减少到0.05秒 self.playback_completed = False # 播放完成标志 self.end_signal_received = False # 结束信号接收标志 + self.end_signal_time = 0 # 结束信号接收时间 # 智能缓冲系统 - 借鉴 recorder.py 的智能句子累积策略 self.preload_buffer = [] # 预加载缓冲区(保留用于音频块) - self.preload_size = 6 # 预加载6个音频块(增加缓冲减少卡顿) + self.preload_size = 3 # 预加载3个音频块(减少预加载时间,加快播放启动) # 简化的音频系统 - 直接使用预加载缓冲区 self.audio_queue_lock = None # 音频队列操作锁(在run方法中初始化) @@ -873,6 +874,8 @@ class OutputProcess: if not self.currently_playing: self.currently_playing = True self.last_audio_chunk_time = time.time() # 记录最后播放时间 + print(f"🎵 播放状态变化: currently_playing = True (开始播放)") + print(f"🎵 设置last_audio_chunk_time = {self.last_audio_chunk_time}") # 如果是第一次播放,不设置冷却期 if chunks_played == 0: @@ -917,6 +920,7 @@ class OutputProcess: len(self.preload_buffer) == 0 and time_since_last_chunk > 1.0): self.currently_playing = False + print(f"🎵 播放状态变化: currently_playing = False (播放缓冲区和预加载缓冲区都为空)") elif len(self.playback_buffer) == 0 and len(self.preload_buffer) > 0: # 播放缓冲区为空但预加载缓冲区有数据,自动转移数据 transfer_count = min(3, len(self.preload_buffer)) # 一次转移3个块 @@ -937,6 +941,7 @@ class OutputProcess: len(self.preload_buffer) == 0 and time_since_last_chunk > 1.5): # 异常情况下,等待更长时间且确保缓冲区为空 self.currently_playing = False + print(f"🎵 播放状态变化: currently_playing = False (异常情况处理,缓冲区为空且{time_since_last_chunk:.1f}秒无播放)") else: # 保持播放状态,继续处理 pass @@ -948,6 +953,7 @@ class OutputProcess: # 线程结束时确保状态正确 if self.currently_playing: self.currently_playing = False + print(f"🎵 播放状态变化: currently_playing = False (播放工作线程结束)") if playback_stream: try: playback_stream.stop_stream() @@ -977,9 +983,10 @@ class OutputProcess: # 1. 处理音频队列(数据接收) self._process_audio_queue() - # 2. 检查播放状态 + # 2. 检查播放状态 - 使用增强播放完成检测 if self.end_signal_received: - self._check_playback_completion() + if self._check_enhanced_playback_completion(): + self._finish_playback() # 3. 检查设备健康状态和冷却期 - 防止回声 current_time = time.time() @@ -1099,20 +1106,16 @@ class OutputProcess: print(f"📥 输出进程收到结束信号") end_signal_received = True self.end_signal_received = True + self.end_signal_time = time.time() # 记录收到结束信号的时间 - # 只有在有音频数据或者TTS缓冲区有内容时,才设置all_audio_received - # 这样可以避免在没有音频的情况下误判播放完成 - if (len(self.preload_buffer) > 0 or - len(self.playback_buffer) > 0 or - len(self.tts_buffer) > 0 or - self.tts_task_queue.qsize() > 0): - self.all_audio_received = True - print(f"📥 设置all_audio_received=True,检测到有待处理的数据") - else: - # 如果没有任何数据,可能是LLM响应为空或其他问题 - # 也设置为True,让播放完成检测处理这种情况 - self.all_audio_received = True - print(f"📥 警告:收到结束信号但没有检测到任何音频数据,可能存在问题") + # 延迟设置all_audio_received,确保音频真正开始播放后再设置 + # 暂时设置为False,让播放启动逻辑处理 + self.all_audio_received = False + print(f"📥 收到结束信号,状态变化:") + print(f" - end_signal_received: True") + print(f" - all_audio_received: False (延迟设置)") + print(f" - completion_sent: False") + print(f" - playback_completed: False") # 重置完成事件标记 self.completion_sent = False @@ -1206,14 +1209,21 @@ class OutputProcess: # 直接添加到预加载缓冲区 self.preload_buffer.append(audio_data) + print(f"🎵 音频数据已添加到预加载缓冲区,当前大小: {len(self.preload_buffer)}/{self.preload_size}") # 检查是否应该开始播放或补充播放缓冲区 if not self.is_playing and len(self.preload_buffer) >= self.preload_size: + print(f"🎵 预加载缓冲区达到{self.preload_size}个块,开始首次播放...") # 首次启动播放 self.playback_buffer.extend(self.preload_buffer) self.preload_buffer.clear() self.is_playing = True self.last_playback_time = 0 # 重置播放时间,避免立即触发冷却期 + + # 音频真正开始播放,设置all_audio_received为True + if self.end_signal_received and not self.all_audio_received: + self.all_audio_received = True + print(f"🎵 音频开始播放,设置all_audio_received=True") # 确保播放工作线程知道有数据要播放 if not self.currently_playing: # 播放工作线程会自动检测播放缓冲区并开始播放 @@ -1231,6 +1241,12 @@ class OutputProcess: self.preload_buffer.clear() self.is_playing = True self.last_playback_time = 0 + + # 强制转移后设置all_audio_received为True + if not self.all_audio_received: + self.all_audio_received = True + print(f"🎵 强制转移预加载缓冲区后,设置all_audio_received=True") + print(f"🎵 强制开始播放音频,播放缓冲区大小: {len(self.playback_buffer)}") else: @@ -1271,6 +1287,11 @@ class OutputProcess: self.preload_buffer.clear() self.is_playing = True self.last_playback_time = 0 # 重置播放时间,避免立即触发冷却期 + + # 音频真正开始播放,设置all_audio_received为True + if self.end_signal_received and not self.all_audio_received: + self.all_audio_received = True + print(f"🎵 音频开始播放(最小缓冲区模式),设置all_audio_received=True") print(f"🎵 开始播放音频(最小缓冲区满足)") print(f"🔍 已重置last_playback_time,避免立即触发冷却期") elif self.is_playing and len(self.playback_buffer) < 2 and len(self.preload_buffer) > 0: @@ -1356,6 +1377,7 @@ class OutputProcess: self.no_active_playback = False self.end_signal_received = False self.playback_completed = False + self.end_signal_time = 0 # 重置结束信号时间 print("📡 输出进程:已重置所有播放完成状态标志") @@ -1387,6 +1409,8 @@ class OutputProcess: if not self.end_signal_received: return False + print(f"🔍 增强播放完成检查开始...") + # 更新状态变量 self.pre_buffer_empty = (len(self.preload_buffer) == 0) self.playback_buffer_empty = (len(self.playback_buffer) == 0) @@ -1394,16 +1418,64 @@ class OutputProcess: tts_queue_size = self.tts_task_queue.qsize() + # 计算时间差 + current_time = time.time() + time_since_last_chunk = current_time - self.last_audio_chunk_time - # 添加时间维度检查 - if self.last_audio_chunk_time > 0: - time_since_last_chunk = time.time() - self.last_audio_chunk_time - else: - time_since_last_chunk = 0 + print(f"🔍 增强播放完成检查详情:") + print(f" - end_signal_received: {self.end_signal_received}") + print(f" - llm_generation_complete: {self.llm_generation_complete}") + print(f" - tts_generation_complete: {self.tts_generation_complete}") + print(f" - all_audio_received: {self.all_audio_received}") + print(f" - tts_queue_size: {tts_queue_size}") + print(f" - tts_buffer_size: {len(self.tts_buffer)}") + print(f" - playback_buffer: {len(self.playback_buffer)}") + print(f" - preload_buffer: {len(self.preload_buffer)}") + print(f" - currently_playing: {self.currently_playing}") + print(f" - is_playing: {self.is_playing}") + print(f" - last_audio_chunk_time: {self.last_audio_chunk_time}") + print(f" - time_since_last_chunk: {time_since_last_chunk:.3f}秒") - # 检查TTS是否正在生成 - 新增条件 - # TTS正在生成的条件:队列中有任务 或 TTS生成未完成 或 还有待处理的缓冲区内容 - tts_is_generating = (tts_queue_size > 0 or not self.tts_generation_complete or len(self.tts_buffer) > 0) + + # 检查TTS是否正在生成 - 修复逻辑 + # 如果TTS生成未完成但队列和缓冲区都为空,可能是状态标记错误,自动修正 + if not self.tts_generation_complete and tts_queue_size == 0 and len(self.tts_buffer) == 0: + print(f"🔧 检测到TTS生成状态异常:tts_generation_complete=False但无数据,自动修正为True") + self.tts_generation_complete = True + + # TTS正在生成的条件:队列中有任务 或 还有待处理的缓冲区内容 或 TTS生成未完成 + tts_is_generating = (tts_queue_size > 0 or len(self.tts_buffer) > 0 or not self.tts_generation_complete) + print(f" - tts_is_generating: {tts_is_generating}") + print(f" - pre_buffer_empty: {self.pre_buffer_empty}") + print(f" - playback_buffer_empty: {self.playback_buffer_empty}") + print(f" - no_active_playback: {self.no_active_playback}") + + # 特殊处理1:如果没有任何音频数据,暂时不设置all_audio_received,等待TTS生成 + # 注意:这里不自动设置all_audio_received,因为可能TTS还在生成中 + # 等待播放完成检测中的超时机制来处理这种情况 + + # 特殊处理2:如果all_audio_received为False但其他条件都满足,强制设置为True + print(f"🔍 检查特殊处理条件:") + print(f" - not all_audio_received: {not self.all_audio_received}") + print(f" - llm_generation_complete: {self.llm_generation_complete}") + print(f" - tts_generation_complete: {self.tts_generation_complete}") + print(f" - pre_buffer_empty: {self.pre_buffer_empty}") + print(f" - playback_buffer_empty: {self.playback_buffer_empty}") + print(f" - no_active_playback: {self.no_active_playback}") + print(f" - tts_queue_size == 0: {tts_queue_size == 0}") + print(f" - not tts_is_generating: {not tts_is_generating}") + + if (not self.all_audio_received and + self.llm_generation_complete and + self.tts_generation_complete and + self.pre_buffer_empty and + self.playback_buffer_empty and + self.no_active_playback and + tts_queue_size == 0 and + not tts_is_generating): + # 这种情况可能是音频播放开始后没有正确设置all_audio_received + print(f"🔧 检测到all_audio_received为False但其他播放条件已满足,强制设置为True") + self.all_audio_received = True # 检查是否所有条件都满足 - 使用更新的状态变量,添加TTS生成状态检查 all_conditions_met = ( @@ -1417,24 +1489,65 @@ class OutputProcess: not tts_is_generating # 新增:确保TTS不在生成中 ) + print(f"🔍 最终条件检查结果:") + print(f" - all_conditions_met: {all_conditions_met}") + print(f" - 各个条件详情:") + print(f" * llm_generation_complete: {self.llm_generation_complete}") + print(f" * tts_generation_complete: {self.tts_generation_complete}") + print(f" * all_audio_received: {self.all_audio_received}") + print(f" * pre_buffer_empty: {self.pre_buffer_empty}") + print(f" * playback_buffer_empty: {self.playback_buffer_empty}") + print(f" * no_active_playback: {self.no_active_playback}") + print(f" * tts_queue_size == 0: {tts_queue_size == 0}") + print(f" * not tts_is_generating: {not tts_is_generating}") + if all_conditions_met: + print(f"✅ 所有播放完成条件已满足,进行时间检查...") + print(f" - last_audio_chunk_time: {self.last_audio_chunk_time}") + print(f" - time_since_last_chunk: {time_since_last_chunk:.3f}秒") + print(f" - 需要等待时间: 1.0秒") + # 额外时间检查:确保音频真正播放完成 - if self.last_audio_chunk_time > 0 and time_since_last_chunk > 0.3: # 至少0.3秒没有新音频播放 + if self.last_audio_chunk_time > 0 and time_since_last_chunk > 1.0: # 至少1.0秒没有新音频播放(增加等待时间确保音频完成) print(f"✅ 所有播放完成条件已满足,且{time_since_last_chunk:.2f}秒无新音频,可以结束播放") return True + elif self.last_audio_chunk_time > 0 and time_since_last_chunk <= 1.0: + print(f"⏳ 所有条件满足但等待时间不足(已等待{time_since_last_chunk:.2f}秒,需要1.0秒)...") + return False elif self.last_audio_chunk_time == 0: + print(f"🔍 从未开始播放的情况,检查是否有音频数据...") # 如果从未开始播放,检查是否有音频数据 if len(self.playback_buffer) == 0 and len(self.preload_buffer) == 0: + print(f"🔍 播放缓冲区和预加载缓冲区都为空") # 检查是否有TTS任务在排队 if self.tts_task_queue.qsize() == 0 and len(self.tts_buffer) == 0: - # 真的没有任何数据,可能是LLM响应为空或TTS失败 - print(f"⚠️ 从未开始播放且无任何数据,可能LLM响应为空或TTS失败,强制结束") - return True + print(f"🔍 TTS队列和缓冲区都为空") + + # 检查是否刚刚收到结束信号,需要给TTS一些时间生成音频 + time_since_end_signal = time.time() - getattr(self, 'end_signal_time', 0) + if not hasattr(self, 'end_signal_time'): + self.end_signal_time = time.time() + + print(f"🔍 距离收到结束信号: {time_since_end_signal:.2f}秒") + + # 如果距离收到结束信号时间很短(< 5秒),可能是TTS还在生成中 + if time_since_end_signal < 5.0: + print(f"⏳ 刚收到结束信号,等待TTS生成音频(已等待{time_since_end_signal:.1f}秒)...") + return False + elif time_since_end_signal < 10.0: + print(f"⏳ 已等待{time_since_end_signal:.1f}秒,继续等待TTS生成...") + return False + else: + # 等待超过10秒,确实没有音频数据,可能是TTS失败 + print(f"⚠️ 已等待{time_since_end_signal:.1f}秒仍无音频数据,可能TTS失败,强制结束") + return True else: + print(f"🔍 还有TTS任务待处理: tts_queue_size={tts_queue_size}, tts_buffer_size={len(self.tts_buffer)}") # 还有TTS任务待处理 print(f"⏳ 从未开始播放但还有TTS任务待处理,等待TTS生成...") return False else: + print(f"🔍 还有音频数据: playback_buffer={len(self.playback_buffer)}, preload_buffer={len(self.preload_buffer)}") print(f"⏳ 从未开始播放但还有音频数据,等待播放开始...") return False else: @@ -1504,53 +1617,8 @@ class OutputProcess: return False - def _check_playback_completion(self): - """简化的播放完成状态检查""" - if not self.end_signal_received: - return - - tts_queue_size = self.tts_task_queue.qsize() - playback_queue_size = len(self.playback_buffer) + len(self.preload_buffer) - - # 调试日志:播放完成状态 - print(f"🔍 播放完成检查:") - print(f" - end_signal_received: {self.end_signal_received}") - print(f" - tts_queue_size: {tts_queue_size}") - print(f" - playback_buffer: {len(self.playback_buffer)}") - print(f" - preload_buffer: {len(self.preload_buffer)}") - print(f" - currently_playing: {self.currently_playing}") - print(f" - playback_completed: {self.playback_completed}") - print(f" - completion_sent: {self.completion_sent}") - - # 检查条件:所有队列为空且没有在播放 - if tts_queue_size == 0 and playback_queue_size == 0 and not self.currently_playing: - print(f"✅ 所有队列已清空且播放器空闲,触发播放完成") - print(f"📥 调用 _finish_playback() 前,completion_sent={self.completion_sent}") - self._finish_playback() - print(f"📥 调用 _finish_playback() 后,completion_sent={self.completion_sent}") - # 重置结束信号标志,防止重复触发 - self.end_signal_received = False - print(f"📥 已重置结束信号标志,防止重复触发") - return - - # 检查条件:所有队列为空但播放器还在播放(最后一个音频块) - if tts_queue_size == 0 and playback_queue_size == 0 and self.currently_playing: - print(f"⏳ 等待最后一个音频块播放完成...") - time.sleep(0.3) - if not self.currently_playing: - print(f"✅ 最后一个音频块播放完成,触发播放完成") - print(f"📥 调用 _finish_playback() 前,completion_sent={self.completion_sent}") - self._finish_playback() - print(f"📥 调用 _finish_playback() 后,completion_sent={self.completion_sent}") - # 重置结束信号标志,防止重复触发 - self.end_signal_received = False - print(f"📥 已重置结束信号标志,防止重复触发") - return - else: - print(f"⚠️ 最后一个音频块播放超时") - # 即使超时也要重置标志,防止重复检测 - self.end_signal_received = False - print(f"📥 已重置结束信号标志(超时情况)") + # 注意:简化播放完成检测已移除,统一使用增强播放完成检测 _check_enhanced_playback_completion() + def _cleanup(self): """清理资源"""