This commit is contained in:
朱潮 2025-09-20 18:21:40 +08:00
parent 0eb937f88b
commit 8003ca3799

View File

@ -18,6 +18,7 @@ import time
import uuid
import wave
import argparse
import queue
from io import BytesIO
from urllib.parse import urlparse
@ -504,6 +505,207 @@ class EnergyBasedRecorder:
except:
pass
def play_audio_streaming(self, audio_chunks):
"""智能流式播放音频数据"""
try:
if not audio_chunks:
return False
print("🔊 开始智能流式播放音频...")
# 确保音频输入已停止
if self.recording:
self.recording = False
self.recorded_frames = []
self.recording_start_time = None
self.last_sound_time = None
# 清空缓冲区
self.pre_record_buffer = []
self.energy_history = []
self.zcr_history = []
# 关闭输入流
if self.stream:
self.stream.stop_stream()
self.stream.close()
self.stream = None
self.is_playing = True
time.sleep(0.3) # 等待音频设备切换
# 创建播放流,设置更大的缓冲区
playback_stream = self.audio.open(
format=self.FORMAT,
channels=self.CHANNELS,
rate=self.RATE,
output=True,
frames_per_buffer=2048 # 增加缓冲区大小
)
print("🚫 音频输入已关闭,开始智能流式播放")
# 预加载前几个音频块以确保流畅播放
preload_chunks = 2
buffer_data = b''
# 预加载阶段
for i in range(min(preload_chunks, len(audio_chunks))):
if audio_chunks[i]:
buffer_data += audio_chunks[i]
progress = (i + 1) / len(audio_chunks) * 100
print(f"\r📥 预加载音频: {progress:.1f}%", end='', flush=True)
# 播放预加载的音频
if buffer_data:
playback_stream.write(buffer_data)
# 继续播放剩余音频块
start_idx = preload_chunks
for i in range(start_idx, len(audio_chunks)):
if audio_chunks[i]:
playback_stream.write(audio_chunks[i])
progress = (i + 1) / len(audio_chunks) * 100
print(f"\r🔊 流式播放进度: {progress:.1f}%", end='', flush=True)
# 确保所有数据都被播放
playback_stream.stop_stream()
playback_stream.close()
print("\n✅ 智能流式播放完成")
return True
except Exception as e:
print(f"\n❌ 智能流式播放失败: {e}")
return False
finally:
self.is_playing = False
time.sleep(0.3)
def play_audio_realtime(self, audio_queue):
"""真正的实时流式播放:从队列中获取音频并立即播放"""
try:
print("🔊 启动实时音频播放器...")
# 确保音频输入已停止
if self.recording:
self.recording = False
self.recorded_frames = []
self.recording_start_time = None
self.last_sound_time = None
# 清空缓冲区
self.pre_record_buffer = []
self.energy_history = []
self.zcr_history = []
# 关闭输入流
if self.stream:
self.stream.stop_stream()
self.stream.close()
self.stream = None
self.is_playing = True
time.sleep(0.3) # 等待音频设备切换
# 创建播放流
playback_stream = self.audio.open(
format=self.FORMAT,
channels=self.CHANNELS,
rate=self.RATE,
output=True,
frames_per_buffer=1024 # 较小的缓冲区以实现更快的响应
)
print("🚫 音频输入已关闭,实时播放器就绪")
chunks_played = 0
total_size = 0
# 持续从队列中获取音频数据并播放
while True:
try:
# 设置超时以避免无限等待
chunk = audio_queue.get(timeout=1.0)
if chunk is None: # 结束信号
print("📥 收到播放结束信号")
break
if chunk: # 确保chunk不为空
playback_stream.write(chunk)
chunks_played += 1
total_size += len(chunk)
# 显示播放进度
print(f"\r🔊 实时播放: {chunks_played} 块 | {total_size / 1024:.1f} KB", end='', flush=True)
audio_queue.task_done()
except queue.Empty:
# 队列为空,检查是否还在接收数据
if not hasattr(self, '_receiving_audio') or not self._receiving_audio:
print("\n📡 音频接收完成,播放器结束")
break
continue
except Exception as e:
print(f"\n❌ 播放过程中出错: {e}")
break
# 确保播放流正确关闭
playback_stream.stop_stream()
playback_stream.close()
print(f"\n✅ 实时播放完成: {chunks_played} 块, {total_size / 1024:.1f} KB")
return True
except Exception as e:
print(f"\n❌ 实时播放失败: {e}")
return False
finally:
self.is_playing = False
time.sleep(0.3)
def play_audio_hybrid(self, audio_chunks):
"""混合模式播放:智能选择流式或传统播放"""
try:
if not audio_chunks:
return False
# 根据音频块数量和大小决定播放策略
total_size = sum(len(chunk) for chunk in audio_chunks)
chunk_count = len(audio_chunks)
print(f"📊 音频分析: {chunk_count} 块, 总大小: {total_size / 1024:.1f} KB")
# 决策策略:
# 1. 如果音频块很少或总大小很小,使用传统播放(音质更好)
# 2. 如果音频块很多或总大小很大,使用流式播放(响应更快)
if chunk_count <= 3 or total_size < 50 * 1024: # 小于50KB或少于3块
print("🎵 选择传统播放模式(保证音质)")
# 合并所有音频块
full_audio = b''.join(audio_chunks)
# 临时保存到文件
temp_file = self.generate_tts_filename()
with open(temp_file, "wb") as f:
f.write(full_audio)
# 使用传统方式播放
success = self.play_audio_safe(temp_file, reopen_input=False)
# 删除临时文件
self._safe_delete_file(temp_file, "临时音频文件")
return success
else:
print("⚡ 选择智能流式播放模式(快速响应)")
return self.play_audio_streaming(audio_chunks)
except Exception as e:
print(f"❌ 混合播放失败: {e}")
return False
def play_audio_safe(self, filename, reopen_input=False):
"""安全的播放方式 - 使用系统播放器"""
try:
@ -1157,15 +1359,12 @@ class EnergyBasedRecorder:
return f"tts_response_{timestamp}.pcm"
def text_to_speech(self, text):
"""文本转语音"""
"""文本转语音 - 真正实时流式播放"""
if not self.enable_tts:
return None
try:
print("🔊 开始文本转语音...")
# 生成输出文件名
output_file = self.generate_tts_filename()
print("🔊 开始文本转语音(实时流式播放)...")
# 构建请求头
headers = {
@ -1194,6 +1393,16 @@ class EnergyBasedRecorder:
}
}
# 创建音频队列
audio_queue = queue.Queue()
# 启动实时播放线程
self._receiving_audio = True
player_thread = threading.Thread(target=self.play_audio_realtime, args=(audio_queue,))
player_thread.daemon = True
player_thread.start()
print("🎵 实时播放器已启动")
# 发送请求
session = requests.Session()
try:
@ -1202,11 +1411,15 @@ class EnergyBasedRecorder:
if response.status_code != 200:
print(f"❌ TTS请求失败: {response.status_code}")
print(f"响应内容: {response.text}")
# 向队列发送结束信号
audio_queue.put(None)
return None
# 处理流式响应
audio_data = bytearray()
# 处理流式响应 - 实时播放模式
total_audio_size = 0
chunk_count = 0
print("🔄 开始接收TTS音频流实时播放...")
for chunk in response.iter_lines(decode_unicode=True):
if not chunk:
@ -1219,55 +1432,47 @@ class EnergyBasedRecorder:
chunk_audio = base64.b64decode(data["data"])
audio_size = len(chunk_audio)
total_audio_size += audio_size
audio_data.extend(chunk_audio)
chunk_count += 1
# 将音频块放入队列进行实时播放
audio_queue.put(chunk_audio)
# 显示接收进度
print(f"\r📥 接收并播放: {chunk_count} 块 | {total_audio_size / 1024:.1f} KB", end='', flush=True)
continue
if data.get("code", 0) == 0 and "sentence" in data and data["sentence"]:
print("TTS句子信息:", data["sentence"])
print(f"\n📝 TTS句子信息: {data['sentence']}")
continue
if data.get("code", 0) == 20000000:
break
if data.get("code", 0) > 0:
print(f"❌ TTS错误响应: {data}")
print(f"\n❌ TTS错误响应: {data}")
break
except json.JSONDecodeError:
print(f"❌ 解析TTS响应失败: {chunk}")
print(f"\n❌ 解析TTS响应失败: {chunk}")
continue
# 保存音频文件
if audio_data:
with open(output_file, "wb") as f:
f.write(audio_data)
print(f"✅ TTS音频已保存: {output_file}")
print(f"📁 文件大小: {len(audio_data) / 1024:.2f} KB")
# 确保文件有正确的访问权限
os.chmod(output_file, 0o644)
# 播放生成的音频
if hasattr(self, 'audio_player_available') and self.audio_player_available:
print("🔊 播放AI语音回复...")
self.play_audio_safe(output_file, reopen_input=False)
else:
print(" 跳过播放TTS音频无可用播放器")
print(f"📁 TTS音频已保存到: {output_file}")
# 播放完成后删除PCM文件
self._safe_delete_file(output_file, "TTS音频文件")
return output_file
else:
print("❌ 未接收到TTS音频数据")
# 尝试删除可能存在的空文件
self._safe_delete_file(output_file, "空的TTS音频文件")
return None
print(f"\n✅ TTS音频接收完成: {chunk_count} 个音频块, 总大小: {total_audio_size / 1024:.1f} KB")
# 等待播放完成
print("⏳ 等待音频播放完成...")
player_thread.join(timeout=5.0)
# 生成临时文件名用于返回
temp_file = self.generate_tts_filename()
return temp_file
finally:
response.close()
session.close()
# 确保播放线程结束
self._receiving_audio = False
audio_queue.put(None)
except Exception as e:
print(f"❌ TTS转换失败: {e}")