This commit is contained in:
朱潮 2025-09-20 18:21:40 +08:00
parent 0eb937f88b
commit 8003ca3799

View File

@ -18,6 +18,7 @@ import time
import uuid import uuid
import wave import wave
import argparse import argparse
import queue
from io import BytesIO from io import BytesIO
from urllib.parse import urlparse from urllib.parse import urlparse
@ -504,6 +505,207 @@ class EnergyBasedRecorder:
except: except:
pass pass
def play_audio_streaming(self, audio_chunks):
"""智能流式播放音频数据"""
try:
if not audio_chunks:
return False
print("🔊 开始智能流式播放音频...")
# 确保音频输入已停止
if self.recording:
self.recording = False
self.recorded_frames = []
self.recording_start_time = None
self.last_sound_time = None
# 清空缓冲区
self.pre_record_buffer = []
self.energy_history = []
self.zcr_history = []
# 关闭输入流
if self.stream:
self.stream.stop_stream()
self.stream.close()
self.stream = None
self.is_playing = True
time.sleep(0.3) # 等待音频设备切换
# 创建播放流,设置更大的缓冲区
playback_stream = self.audio.open(
format=self.FORMAT,
channels=self.CHANNELS,
rate=self.RATE,
output=True,
frames_per_buffer=2048 # 增加缓冲区大小
)
print("🚫 音频输入已关闭,开始智能流式播放")
# 预加载前几个音频块以确保流畅播放
preload_chunks = 2
buffer_data = b''
# 预加载阶段
for i in range(min(preload_chunks, len(audio_chunks))):
if audio_chunks[i]:
buffer_data += audio_chunks[i]
progress = (i + 1) / len(audio_chunks) * 100
print(f"\r📥 预加载音频: {progress:.1f}%", end='', flush=True)
# 播放预加载的音频
if buffer_data:
playback_stream.write(buffer_data)
# 继续播放剩余音频块
start_idx = preload_chunks
for i in range(start_idx, len(audio_chunks)):
if audio_chunks[i]:
playback_stream.write(audio_chunks[i])
progress = (i + 1) / len(audio_chunks) * 100
print(f"\r🔊 流式播放进度: {progress:.1f}%", end='', flush=True)
# 确保所有数据都被播放
playback_stream.stop_stream()
playback_stream.close()
print("\n✅ 智能流式播放完成")
return True
except Exception as e:
print(f"\n❌ 智能流式播放失败: {e}")
return False
finally:
self.is_playing = False
time.sleep(0.3)
def play_audio_realtime(self, audio_queue):
"""真正的实时流式播放:从队列中获取音频并立即播放"""
try:
print("🔊 启动实时音频播放器...")
# 确保音频输入已停止
if self.recording:
self.recording = False
self.recorded_frames = []
self.recording_start_time = None
self.last_sound_time = None
# 清空缓冲区
self.pre_record_buffer = []
self.energy_history = []
self.zcr_history = []
# 关闭输入流
if self.stream:
self.stream.stop_stream()
self.stream.close()
self.stream = None
self.is_playing = True
time.sleep(0.3) # 等待音频设备切换
# 创建播放流
playback_stream = self.audio.open(
format=self.FORMAT,
channels=self.CHANNELS,
rate=self.RATE,
output=True,
frames_per_buffer=1024 # 较小的缓冲区以实现更快的响应
)
print("🚫 音频输入已关闭,实时播放器就绪")
chunks_played = 0
total_size = 0
# 持续从队列中获取音频数据并播放
while True:
try:
# 设置超时以避免无限等待
chunk = audio_queue.get(timeout=1.0)
if chunk is None: # 结束信号
print("📥 收到播放结束信号")
break
if chunk: # 确保chunk不为空
playback_stream.write(chunk)
chunks_played += 1
total_size += len(chunk)
# 显示播放进度
print(f"\r🔊 实时播放: {chunks_played} 块 | {total_size / 1024:.1f} KB", end='', flush=True)
audio_queue.task_done()
except queue.Empty:
# 队列为空,检查是否还在接收数据
if not hasattr(self, '_receiving_audio') or not self._receiving_audio:
print("\n📡 音频接收完成,播放器结束")
break
continue
except Exception as e:
print(f"\n❌ 播放过程中出错: {e}")
break
# 确保播放流正确关闭
playback_stream.stop_stream()
playback_stream.close()
print(f"\n✅ 实时播放完成: {chunks_played} 块, {total_size / 1024:.1f} KB")
return True
except Exception as e:
print(f"\n❌ 实时播放失败: {e}")
return False
finally:
self.is_playing = False
time.sleep(0.3)
def play_audio_hybrid(self, audio_chunks):
"""混合模式播放:智能选择流式或传统播放"""
try:
if not audio_chunks:
return False
# 根据音频块数量和大小决定播放策略
total_size = sum(len(chunk) for chunk in audio_chunks)
chunk_count = len(audio_chunks)
print(f"📊 音频分析: {chunk_count} 块, 总大小: {total_size / 1024:.1f} KB")
# 决策策略:
# 1. 如果音频块很少或总大小很小,使用传统播放(音质更好)
# 2. 如果音频块很多或总大小很大,使用流式播放(响应更快)
if chunk_count <= 3 or total_size < 50 * 1024: # 小于50KB或少于3块
print("🎵 选择传统播放模式(保证音质)")
# 合并所有音频块
full_audio = b''.join(audio_chunks)
# 临时保存到文件
temp_file = self.generate_tts_filename()
with open(temp_file, "wb") as f:
f.write(full_audio)
# 使用传统方式播放
success = self.play_audio_safe(temp_file, reopen_input=False)
# 删除临时文件
self._safe_delete_file(temp_file, "临时音频文件")
return success
else:
print("⚡ 选择智能流式播放模式(快速响应)")
return self.play_audio_streaming(audio_chunks)
except Exception as e:
print(f"❌ 混合播放失败: {e}")
return False
def play_audio_safe(self, filename, reopen_input=False): def play_audio_safe(self, filename, reopen_input=False):
"""安全的播放方式 - 使用系统播放器""" """安全的播放方式 - 使用系统播放器"""
try: try:
@ -1157,15 +1359,12 @@ class EnergyBasedRecorder:
return f"tts_response_{timestamp}.pcm" return f"tts_response_{timestamp}.pcm"
def text_to_speech(self, text): def text_to_speech(self, text):
"""文本转语音""" """文本转语音 - 真正实时流式播放"""
if not self.enable_tts: if not self.enable_tts:
return None return None
try: try:
print("🔊 开始文本转语音...") print("🔊 开始文本转语音(实时流式播放)...")
# 生成输出文件名
output_file = self.generate_tts_filename()
# 构建请求头 # 构建请求头
headers = { headers = {
@ -1194,6 +1393,16 @@ class EnergyBasedRecorder:
} }
} }
# 创建音频队列
audio_queue = queue.Queue()
# 启动实时播放线程
self._receiving_audio = True
player_thread = threading.Thread(target=self.play_audio_realtime, args=(audio_queue,))
player_thread.daemon = True
player_thread.start()
print("🎵 实时播放器已启动")
# 发送请求 # 发送请求
session = requests.Session() session = requests.Session()
try: try:
@ -1202,11 +1411,15 @@ class EnergyBasedRecorder:
if response.status_code != 200: if response.status_code != 200:
print(f"❌ TTS请求失败: {response.status_code}") print(f"❌ TTS请求失败: {response.status_code}")
print(f"响应内容: {response.text}") print(f"响应内容: {response.text}")
# 向队列发送结束信号
audio_queue.put(None)
return None return None
# 处理流式响应 # 处理流式响应 - 实时播放模式
audio_data = bytearray()
total_audio_size = 0 total_audio_size = 0
chunk_count = 0
print("🔄 开始接收TTS音频流实时播放...")
for chunk in response.iter_lines(decode_unicode=True): for chunk in response.iter_lines(decode_unicode=True):
if not chunk: if not chunk:
@ -1219,55 +1432,47 @@ class EnergyBasedRecorder:
chunk_audio = base64.b64decode(data["data"]) chunk_audio = base64.b64decode(data["data"])
audio_size = len(chunk_audio) audio_size = len(chunk_audio)
total_audio_size += audio_size total_audio_size += audio_size
audio_data.extend(chunk_audio) chunk_count += 1
# 将音频块放入队列进行实时播放
audio_queue.put(chunk_audio)
# 显示接收进度
print(f"\r📥 接收并播放: {chunk_count} 块 | {total_audio_size / 1024:.1f} KB", end='', flush=True)
continue continue
if data.get("code", 0) == 0 and "sentence" in data and data["sentence"]: if data.get("code", 0) == 0 and "sentence" in data and data["sentence"]:
print("TTS句子信息:", data["sentence"]) print(f"\n📝 TTS句子信息: {data['sentence']}")
continue continue
if data.get("code", 0) == 20000000: if data.get("code", 0) == 20000000:
break break
if data.get("code", 0) > 0: if data.get("code", 0) > 0:
print(f"❌ TTS错误响应: {data}") print(f"\n❌ TTS错误响应: {data}")
break break
except json.JSONDecodeError: except json.JSONDecodeError:
print(f"❌ 解析TTS响应失败: {chunk}") print(f"\n❌ 解析TTS响应失败: {chunk}")
continue continue
# 保存音频文件 print(f"\n✅ TTS音频接收完成: {chunk_count} 个音频块, 总大小: {total_audio_size / 1024:.1f} KB")
if audio_data:
with open(output_file, "wb") as f: # 等待播放完成
f.write(audio_data) print("⏳ 等待音频播放完成...")
print(f"✅ TTS音频已保存: {output_file}") player_thread.join(timeout=5.0)
print(f"📁 文件大小: {len(audio_data) / 1024:.2f} KB")
# 生成临时文件名用于返回
# 确保文件有正确的访问权限 temp_file = self.generate_tts_filename()
os.chmod(output_file, 0o644)
return temp_file
# 播放生成的音频
if hasattr(self, 'audio_player_available') and self.audio_player_available:
print("🔊 播放AI语音回复...")
self.play_audio_safe(output_file, reopen_input=False)
else:
print(" 跳过播放TTS音频无可用播放器")
print(f"📁 TTS音频已保存到: {output_file}")
# 播放完成后删除PCM文件
self._safe_delete_file(output_file, "TTS音频文件")
return output_file
else:
print("❌ 未接收到TTS音频数据")
# 尝试删除可能存在的空文件
self._safe_delete_file(output_file, "空的TTS音频文件")
return None
finally: finally:
response.close() response.close()
session.close() session.close()
# 确保播放线程结束
self._receiving_audio = False
audio_queue.put(None)
except Exception as e: except Exception as e:
print(f"❌ TTS转换失败: {e}") print(f"❌ TTS转换失败: {e}")