From 8003ca37997deebb249a5eb93256645e727f262e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E6=BD=AE?= Date: Sat, 20 Sep 2025 18:21:40 +0800 Subject: [PATCH] config --- recorder.py | 281 +++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 243 insertions(+), 38 deletions(-) diff --git a/recorder.py b/recorder.py index 63d8f51..28c46fd 100644 --- a/recorder.py +++ b/recorder.py @@ -18,6 +18,7 @@ import time import uuid import wave import argparse +import queue from io import BytesIO from urllib.parse import urlparse @@ -504,6 +505,207 @@ class EnergyBasedRecorder: except: pass + def play_audio_streaming(self, audio_chunks): + """智能流式播放音频数据""" + try: + if not audio_chunks: + return False + + print("🔊 开始智能流式播放音频...") + + # 确保音频输入已停止 + if self.recording: + self.recording = False + self.recorded_frames = [] + self.recording_start_time = None + self.last_sound_time = None + + # 清空缓冲区 + self.pre_record_buffer = [] + self.energy_history = [] + self.zcr_history = [] + + # 关闭输入流 + if self.stream: + self.stream.stop_stream() + self.stream.close() + self.stream = None + + self.is_playing = True + time.sleep(0.3) # 等待音频设备切换 + + # 创建播放流,设置更大的缓冲区 + playback_stream = self.audio.open( + format=self.FORMAT, + channels=self.CHANNELS, + rate=self.RATE, + output=True, + frames_per_buffer=2048 # 增加缓冲区大小 + ) + + print("🚫 音频输入已关闭,开始智能流式播放") + + # 预加载前几个音频块以确保流畅播放 + preload_chunks = 2 + buffer_data = b'' + + # 预加载阶段 + for i in range(min(preload_chunks, len(audio_chunks))): + if audio_chunks[i]: + buffer_data += audio_chunks[i] + progress = (i + 1) / len(audio_chunks) * 100 + print(f"\r📥 预加载音频: {progress:.1f}%", end='', flush=True) + + # 播放预加载的音频 + if buffer_data: + playback_stream.write(buffer_data) + + # 继续播放剩余音频块 + start_idx = preload_chunks + for i in range(start_idx, len(audio_chunks)): + if audio_chunks[i]: + playback_stream.write(audio_chunks[i]) + progress = (i + 1) / len(audio_chunks) * 100 + print(f"\r🔊 流式播放进度: {progress:.1f}%", end='', flush=True) + + # 确保所有数据都被播放 + playback_stream.stop_stream() + playback_stream.close() + + print("\n✅ 智能流式播放完成") + return True + + except Exception as e: + print(f"\n❌ 智能流式播放失败: {e}") + return False + finally: + self.is_playing = False + time.sleep(0.3) + + def play_audio_realtime(self, audio_queue): + """真正的实时流式播放:从队列中获取音频并立即播放""" + try: + print("🔊 启动实时音频播放器...") + + # 确保音频输入已停止 + if self.recording: + self.recording = False + self.recorded_frames = [] + self.recording_start_time = None + self.last_sound_time = None + + # 清空缓冲区 + self.pre_record_buffer = [] + self.energy_history = [] + self.zcr_history = [] + + # 关闭输入流 + if self.stream: + self.stream.stop_stream() + self.stream.close() + self.stream = None + + self.is_playing = True + time.sleep(0.3) # 等待音频设备切换 + + # 创建播放流 + playback_stream = self.audio.open( + format=self.FORMAT, + channels=self.CHANNELS, + rate=self.RATE, + output=True, + frames_per_buffer=1024 # 较小的缓冲区以实现更快的响应 + ) + + print("🚫 音频输入已关闭,实时播放器就绪") + + chunks_played = 0 + total_size = 0 + + # 持续从队列中获取音频数据并播放 + while True: + try: + # 设置超时以避免无限等待 + chunk = audio_queue.get(timeout=1.0) + + if chunk is None: # 结束信号 + print("📥 收到播放结束信号") + break + + if chunk: # 确保chunk不为空 + playback_stream.write(chunk) + chunks_played += 1 + total_size += len(chunk) + + # 显示播放进度 + print(f"\r🔊 实时播放: {chunks_played} 块 | {total_size / 1024:.1f} KB", end='', flush=True) + + audio_queue.task_done() + + except queue.Empty: + # 队列为空,检查是否还在接收数据 + if not hasattr(self, '_receiving_audio') or not self._receiving_audio: + print("\n📡 音频接收完成,播放器结束") + break + continue + except Exception as e: + print(f"\n❌ 播放过程中出错: {e}") + break + + # 确保播放流正确关闭 + playback_stream.stop_stream() + playback_stream.close() + + print(f"\n✅ 实时播放完成: {chunks_played} 块, {total_size / 1024:.1f} KB") + return True + + except Exception as e: + print(f"\n❌ 实时播放失败: {e}") + return False + finally: + self.is_playing = False + time.sleep(0.3) + + def play_audio_hybrid(self, audio_chunks): + """混合模式播放:智能选择流式或传统播放""" + try: + if not audio_chunks: + return False + + # 根据音频块数量和大小决定播放策略 + total_size = sum(len(chunk) for chunk in audio_chunks) + chunk_count = len(audio_chunks) + + print(f"📊 音频分析: {chunk_count} 块, 总大小: {total_size / 1024:.1f} KB") + + # 决策策略: + # 1. 如果音频块很少或总大小很小,使用传统播放(音质更好) + # 2. 如果音频块很多或总大小很大,使用流式播放(响应更快) + if chunk_count <= 3 or total_size < 50 * 1024: # 小于50KB或少于3块 + print("🎵 选择传统播放模式(保证音质)") + # 合并所有音频块 + full_audio = b''.join(audio_chunks) + + # 临时保存到文件 + temp_file = self.generate_tts_filename() + with open(temp_file, "wb") as f: + f.write(full_audio) + + # 使用传统方式播放 + success = self.play_audio_safe(temp_file, reopen_input=False) + + # 删除临时文件 + self._safe_delete_file(temp_file, "临时音频文件") + + return success + else: + print("⚡ 选择智能流式播放模式(快速响应)") + return self.play_audio_streaming(audio_chunks) + + except Exception as e: + print(f"❌ 混合播放失败: {e}") + return False + def play_audio_safe(self, filename, reopen_input=False): """安全的播放方式 - 使用系统播放器""" try: @@ -1157,15 +1359,12 @@ class EnergyBasedRecorder: return f"tts_response_{timestamp}.pcm" def text_to_speech(self, text): - """文本转语音""" + """文本转语音 - 真正实时流式播放""" if not self.enable_tts: return None try: - print("🔊 开始文本转语音...") - - # 生成输出文件名 - output_file = self.generate_tts_filename() + print("🔊 开始文本转语音(实时流式播放)...") # 构建请求头 headers = { @@ -1194,6 +1393,16 @@ class EnergyBasedRecorder: } } + # 创建音频队列 + audio_queue = queue.Queue() + + # 启动实时播放线程 + self._receiving_audio = True + player_thread = threading.Thread(target=self.play_audio_realtime, args=(audio_queue,)) + player_thread.daemon = True + player_thread.start() + print("🎵 实时播放器已启动") + # 发送请求 session = requests.Session() try: @@ -1202,11 +1411,15 @@ class EnergyBasedRecorder: if response.status_code != 200: print(f"❌ TTS请求失败: {response.status_code}") print(f"响应内容: {response.text}") + # 向队列发送结束信号 + audio_queue.put(None) return None - # 处理流式响应 - audio_data = bytearray() + # 处理流式响应 - 实时播放模式 total_audio_size = 0 + chunk_count = 0 + + print("🔄 开始接收TTS音频流(实时播放)...") for chunk in response.iter_lines(decode_unicode=True): if not chunk: @@ -1219,55 +1432,47 @@ class EnergyBasedRecorder: chunk_audio = base64.b64decode(data["data"]) audio_size = len(chunk_audio) total_audio_size += audio_size - audio_data.extend(chunk_audio) + chunk_count += 1 + + # 将音频块放入队列进行实时播放 + audio_queue.put(chunk_audio) + + # 显示接收进度 + print(f"\r📥 接收并播放: {chunk_count} 块 | {total_audio_size / 1024:.1f} KB", end='', flush=True) continue if data.get("code", 0) == 0 and "sentence" in data and data["sentence"]: - print("TTS句子信息:", data["sentence"]) + print(f"\n📝 TTS句子信息: {data['sentence']}") continue if data.get("code", 0) == 20000000: break if data.get("code", 0) > 0: - print(f"❌ TTS错误响应: {data}") + print(f"\n❌ TTS错误响应: {data}") break except json.JSONDecodeError: - print(f"❌ 解析TTS响应失败: {chunk}") + print(f"\n❌ 解析TTS响应失败: {chunk}") continue - # 保存音频文件 - if audio_data: - with open(output_file, "wb") as f: - f.write(audio_data) - print(f"✅ TTS音频已保存: {output_file}") - print(f"📁 文件大小: {len(audio_data) / 1024:.2f} KB") - - # 确保文件有正确的访问权限 - os.chmod(output_file, 0o644) - - # 播放生成的音频 - if hasattr(self, 'audio_player_available') and self.audio_player_available: - print("🔊 播放AI语音回复...") - self.play_audio_safe(output_file, reopen_input=False) - else: - print("ℹ️ 跳过播放TTS音频(无可用播放器)") - print(f"📁 TTS音频已保存到: {output_file}") - - # 播放完成后删除PCM文件 - self._safe_delete_file(output_file, "TTS音频文件") - - return output_file - else: - print("❌ 未接收到TTS音频数据") - # 尝试删除可能存在的空文件 - self._safe_delete_file(output_file, "空的TTS音频文件") - return None + print(f"\n✅ TTS音频接收完成: {chunk_count} 个音频块, 总大小: {total_audio_size / 1024:.1f} KB") + + # 等待播放完成 + print("⏳ 等待音频播放完成...") + player_thread.join(timeout=5.0) + + # 生成临时文件名用于返回 + temp_file = self.generate_tts_filename() + + return temp_file finally: response.close() session.close() + # 确保播放线程结束 + self._receiving_audio = False + audio_queue.put(None) except Exception as e: print(f"❌ TTS转换失败: {e}")