Added comprehensive debug logging to identify why audio playback ends without starting: - TTS text processing and buffer management - Audio chunk generation and transfer between buffers - TTS task queue management - Streaming text processing This will help identify if the issue is: 1. Empty text being sent to TTS 2. TTS generation failing 3. Audio data not being transferred between buffers 4. Premature completion detection 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1793 lines
84 KiB
Python
1793 lines
84 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
|
||
"""
|
||
多进程音频处理模块
|
||
定义输入进程和输出进程的类
|
||
使用增强版语音检测器
|
||
"""
|
||
|
||
import multiprocessing as mp
|
||
import queue
|
||
import time
|
||
import threading
|
||
import numpy as np
|
||
import pyaudio
|
||
from enum import Enum
|
||
from dataclasses import dataclass
|
||
from typing import Optional, List, Dict, Any
|
||
import json
|
||
import wave
|
||
import os
|
||
from enhanced_voice_detector import EnhancedVoiceDetector
|
||
from process_logger import ProcessLogger
|
||
|
||
import requests
|
||
import base64
|
||
import gzip
|
||
|
||
class RecordingState(Enum):
|
||
"""录音状态枚举"""
|
||
IDLE = "idle"
|
||
RECORDING = "recording"
|
||
PROCESSING = "processing"
|
||
PLAYING = "playing"
|
||
|
||
@dataclass
|
||
class AudioSegment:
|
||
"""音频片段数据结构"""
|
||
audio_data: bytes
|
||
start_time: float
|
||
end_time: float
|
||
duration: float
|
||
metadata: Dict[str, Any] = None
|
||
|
||
@dataclass
|
||
class ControlCommand:
|
||
"""控制命令数据结构"""
|
||
command: str
|
||
parameters: Dict[str, Any] = None
|
||
|
||
@dataclass
|
||
class ProcessEvent:
|
||
"""进程事件数据结构"""
|
||
event_type: str
|
||
data: Optional[bytes] = None
|
||
metadata: Dict[str, Any] = None
|
||
|
||
class InputProcess:
|
||
"""输入进程 - 专门负责录音和语音检测"""
|
||
|
||
def __init__(self, command_queue: mp.Queue, event_queue: mp.Queue, config: Dict[str, Any] = None):
|
||
self.command_queue = command_queue # 主进程 → 输入进程
|
||
self.event_queue = event_queue # 输入进程 → 主进程
|
||
|
||
# 配置参数
|
||
self.config = config or self._get_default_config()
|
||
|
||
# 初始化日志记录器
|
||
self.logger = ProcessLogger("InputProcess")
|
||
|
||
# 音频参数
|
||
self.FORMAT = pyaudio.paInt16
|
||
self.CHANNELS = 1
|
||
self.RATE = 16000
|
||
self.CHUNK_SIZE = 1024
|
||
|
||
# 状态控制
|
||
self.recording_enabled = True # 是否允许录音
|
||
self.is_recording = False # 是否正在录音
|
||
self.recording_buffer = [] # 录音缓冲区
|
||
self.pre_record_buffer = [] # 预录音缓冲区
|
||
self.silence_start_time = None
|
||
self.recording_start_time = None
|
||
|
||
# 预录音参数
|
||
self.pre_record_duration = 2.0
|
||
self.pre_record_max_frames = int(self.pre_record_duration * self.RATE / self.CHUNK_SIZE)
|
||
|
||
# 增强语音检测器
|
||
self.voice_detector = EnhancedVoiceDetector(
|
||
sample_rate=self.RATE,
|
||
chunk_size=self.CHUNK_SIZE
|
||
)
|
||
|
||
# 静音检测参数
|
||
self.consecutive_silence_count = 0
|
||
self.silence_threshold_count = 30 # 约3秒
|
||
|
||
# PyAudio实例
|
||
self.audio = None
|
||
self.input_stream = None
|
||
|
||
# 运行状态
|
||
self.running = True
|
||
|
||
# TTS 工作线程
|
||
self.tts_worker_running = True
|
||
self.tts_worker_thread = None
|
||
self.tts_task_queue = mp.Queue(maxsize=10)
|
||
|
||
# TTS 配置
|
||
self.tts_url = "https://openspeech.bytedance.com/api/v3/tts/unidirectional"
|
||
self.tts_app_id = "8718217928"
|
||
self.tts_access_key = "ynJMX-5ix1FsJvswC9KTNlGUdubcchqc"
|
||
self.tts_resource_id = "volc.service_type.10029"
|
||
self.tts_app_key = "aGjiRDfUWi"
|
||
self.tts_speaker = config.get('tts_speaker', "zh_female_wanqudashu_moon_bigtts") if config else "zh_female_wanqudashu_moon_bigtts"
|
||
|
||
# 启动 TTS 工作线程
|
||
self._start_tts_worker()
|
||
|
||
def _get_default_config(self) -> Dict[str, Any]:
|
||
"""获取默认配置"""
|
||
return {
|
||
'zcr_min': 2400, # 适应16kHz采样率的ZCR最小值
|
||
'zcr_max': 12000, # 适应16kHz采样率的ZCR最大值
|
||
'min_recording_time': 2.0, # 最小录音时间
|
||
'max_recording_time': 30.0,
|
||
'silence_threshold': 3.0,
|
||
'pre_record_duration': 2.0
|
||
}
|
||
|
||
def run(self):
|
||
"""输入进程主循环"""
|
||
self.logger.info("输入进程启动")
|
||
self._setup_audio()
|
||
|
||
try:
|
||
while self.running:
|
||
# 1. 检查主进程命令
|
||
self._check_commands()
|
||
|
||
# 2. 如果允许录音,处理音频
|
||
if self.recording_enabled:
|
||
self._process_audio()
|
||
|
||
# 3. 短暂休眠,减少CPU占用
|
||
time.sleep(0.01)
|
||
|
||
except KeyboardInterrupt:
|
||
self.logger.info("输入进程收到中断信号")
|
||
except Exception as e:
|
||
self.logger.error(f"输入进程错误: {e}")
|
||
finally:
|
||
self._cleanup()
|
||
self.logger.info("输入进程退出")
|
||
|
||
def _setup_audio(self):
|
||
"""设置音频输入设备"""
|
||
try:
|
||
self.audio = pyaudio.PyAudio()
|
||
self.input_stream = self.audio.open(
|
||
format=self.FORMAT,
|
||
channels=self.CHANNELS,
|
||
rate=self.RATE,
|
||
input=True,
|
||
frames_per_buffer=self.CHUNK_SIZE
|
||
)
|
||
self.logger.info("音频设备初始化成功")
|
||
except Exception as e:
|
||
self.logger.error(f"音频设备初始化失败: {e}")
|
||
raise
|
||
|
||
def _cleanup_audio_stream(self):
|
||
"""清理音频流(但不关闭整个PyAudio实例)"""
|
||
try:
|
||
if self.input_stream:
|
||
print("🎙️ 输入进程:关闭音频输入流")
|
||
self.input_stream.stop_stream()
|
||
self.input_stream.close()
|
||
self.input_stream = None
|
||
except Exception as e:
|
||
self.logger.warning(f"关闭音频流时出错: {e}")
|
||
|
||
def _cleanup(self):
|
||
"""清理资源"""
|
||
print("🎙️ 输入进程:开始清理资源")
|
||
self._cleanup_audio_stream()
|
||
|
||
if self.audio:
|
||
try:
|
||
print("🎙️ 输入进程:终止PyAudio实例")
|
||
self.audio.terminate()
|
||
except:
|
||
pass
|
||
self.audio = None
|
||
|
||
print("🎙️ 输入进程:资源清理完成")
|
||
|
||
def _check_commands(self):
|
||
"""检查主进程控制命令"""
|
||
try:
|
||
while True:
|
||
command = self.command_queue.get_nowait()
|
||
|
||
if command.command == 'enable_recording':
|
||
if not self.recording_enabled:
|
||
# 从禁用到启用,需要重新初始化音频流
|
||
print("🎙️ 输入进程:重新启用录音功能,重新初始化音频流")
|
||
self._cleanup_audio_stream()
|
||
# 清空所有音频缓冲区,防止旧数据被录制
|
||
self.recording_buffer = []
|
||
self.pre_record_buffer = []
|
||
self.is_recording = False
|
||
self.silence_start_time = None
|
||
self.consecutive_silence_count = 0
|
||
print("🎙️ 输入进程:已清空所有音频缓冲区")
|
||
self._setup_audio()
|
||
self.recording_enabled = True
|
||
self.logger.info("录音功能已启用")
|
||
|
||
elif command.command == 'disable_recording':
|
||
self.recording_enabled = False
|
||
# 如果正在录音,立即停止并发送数据
|
||
if self.is_recording:
|
||
self._stop_recording()
|
||
# 禁用时关闭音频流,避免回声污染
|
||
self._cleanup_audio_stream()
|
||
self.logger.info("录音功能已禁用")
|
||
|
||
elif command.command == 'shutdown':
|
||
self.logger.info("收到关闭命令")
|
||
self.running = False
|
||
return
|
||
|
||
except queue.Empty:
|
||
pass
|
||
|
||
def _process_audio(self):
|
||
"""处理音频数据"""
|
||
try:
|
||
# 检查音频流是否存在
|
||
if not self.input_stream:
|
||
return
|
||
|
||
data = self.input_stream.read(self.CHUNK_SIZE, exception_on_overflow=False)
|
||
if len(data) == 0:
|
||
return
|
||
|
||
# 更新预录音缓冲区
|
||
self._update_pre_record_buffer(data)
|
||
|
||
# 使用增强语音检测器
|
||
detection_result = self.voice_detector.is_voice_advanced(data)
|
||
is_voice = detection_result['is_voice']
|
||
|
||
if self.is_recording:
|
||
# 录音模式
|
||
self.recording_buffer.append(data)
|
||
|
||
# 静音检测
|
||
if is_voice:
|
||
self.silence_start_time = None
|
||
self.consecutive_silence_count = 0
|
||
else:
|
||
self.consecutive_silence_count += 1
|
||
if self.silence_start_time is None:
|
||
self.silence_start_time = time.time()
|
||
|
||
# 检查是否应该停止录音
|
||
recording_duration = time.time() - self.recording_start_time
|
||
should_stop = False
|
||
|
||
# 静音检测
|
||
if (self.consecutive_silence_count >= self.silence_threshold_count and
|
||
recording_duration >= self.config['min_recording_time']):
|
||
should_stop = True
|
||
print(f"🎙️ 输入进程:静音检测触发停止录音")
|
||
|
||
# 最大时间检测
|
||
if recording_duration >= self.config['max_recording_time']:
|
||
should_stop = True
|
||
print(f"🎙️ 输入进程:达到最大录音时间")
|
||
|
||
if should_stop:
|
||
self._stop_recording()
|
||
else:
|
||
# 显示录音状态
|
||
confidence = detection_result.get('confidence', 0.0)
|
||
energy = detection_result.get('energy', 0.0)
|
||
zcr = detection_result.get('zcr', 0.0)
|
||
status = (f"\r🎙️ 录音中... {recording_duration:.1f}s | "
|
||
f"能量: {energy:.0f} | ZCR: {zcr:.0f} | "
|
||
f"语音: {is_voice} | 置信度: {confidence:.2f}")
|
||
print(status, end='', flush=True)
|
||
|
||
else:
|
||
# 监听模式
|
||
if is_voice:
|
||
# 检测到语音,开始录音
|
||
self._start_recording()
|
||
else:
|
||
# 显示监听状态
|
||
if detection_result['calibrating']:
|
||
progress = detection_result['calibration_progress'] * 100
|
||
status = f"\r🎙️ 校准中... {progress:.0f}%"
|
||
else:
|
||
confidence = detection_result.get('confidence', 0.0)
|
||
energy = detection_result.get('energy', 0.0)
|
||
zcr = detection_result.get('zcr', 0.0)
|
||
buffer_usage = len(self.pre_record_buffer) / self.pre_record_max_frames * 100
|
||
status = (f"\r🎙️ 监听中... 能量: {energy:.0f} | ZCR: {zcr:.0f} | "
|
||
f"语音: {is_voice} | 置信度: {confidence:.2f} | "
|
||
f"缓冲: {buffer_usage:.0f}%")
|
||
|
||
print(status, end='', flush=True)
|
||
|
||
except Exception as e:
|
||
print(f"🎙️ 输入进程音频处理错误: {e}")
|
||
# 如果音频流出现问题,尝试重新初始化
|
||
if "stream" in str(e).lower() or "audio" in str(e).lower():
|
||
print(f"🎙️ 输入进程:音频流异常,尝试重新初始化")
|
||
self._cleanup_audio_stream()
|
||
if self.recording_enabled:
|
||
self._setup_audio()
|
||
|
||
def _update_pre_record_buffer(self, audio_data: bytes):
|
||
"""更新预录音缓冲区"""
|
||
self.pre_record_buffer.append(audio_data)
|
||
|
||
# 保持缓冲区大小
|
||
if len(self.pre_record_buffer) > self.pre_record_max_frames:
|
||
self.pre_record_buffer.pop(0)
|
||
|
||
def _start_recording(self):
|
||
"""开始录音"""
|
||
if not self.recording_enabled:
|
||
return
|
||
|
||
self.is_recording = True
|
||
self.recording_buffer = []
|
||
self.recording_start_time = time.time()
|
||
self.silence_start_time = None
|
||
self.consecutive_silence_count = 0
|
||
|
||
# 将预录音缓冲区的内容添加到录音中
|
||
self.recording_buffer.extend(self.pre_record_buffer)
|
||
self.pre_record_buffer.clear()
|
||
|
||
print(f"🎙️ 输入进程:开始录音(包含预录音 {self.config['pre_record_duration']}秒)")
|
||
|
||
def _stop_recording(self):
|
||
"""停止录音并发送数据"""
|
||
if not self.is_recording:
|
||
return
|
||
|
||
self.is_recording = False
|
||
|
||
# 合并录音数据
|
||
if self.recording_buffer:
|
||
audio_data = b''.join(self.recording_buffer)
|
||
duration = len(audio_data) / (self.RATE * 2)
|
||
|
||
# 创建音频片段
|
||
segment = AudioSegment(
|
||
audio_data=audio_data,
|
||
start_time=self.recording_start_time,
|
||
end_time=time.time(),
|
||
duration=duration,
|
||
metadata={
|
||
'sample_rate': self.RATE,
|
||
'channels': self.CHANNELS,
|
||
'format': self.FORMAT,
|
||
'chunk_size': self.CHUNK_SIZE
|
||
}
|
||
)
|
||
|
||
# 保存录音文件(可选)
|
||
filename = self._save_recording(audio_data)
|
||
|
||
# 发送给主进程
|
||
self.event_queue.put(ProcessEvent(
|
||
event_type='recording_complete',
|
||
data=audio_data,
|
||
metadata={
|
||
'duration': duration,
|
||
'start_time': self.recording_start_time,
|
||
'filename': filename
|
||
}
|
||
))
|
||
|
||
print(f"📝 输入进程:录音完成,时长 {duration:.2f} 秒")
|
||
|
||
# 清空缓冲区
|
||
self.recording_buffer = []
|
||
self.pre_record_buffer = []
|
||
|
||
def _save_recording(self, audio_data: bytes) -> str:
|
||
"""保存录音文件"""
|
||
try:
|
||
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
||
filename = f"recording_{timestamp}.wav"
|
||
|
||
with wave.open(filename, 'wb') as wf:
|
||
wf.setnchannels(self.CHANNELS)
|
||
wf.setsampwidth(self.audio.get_sample_size(self.FORMAT))
|
||
wf.setframerate(self.RATE)
|
||
wf.writeframes(audio_data)
|
||
|
||
print(f"💾 输入进程:录音已保存到 {filename}")
|
||
return filename
|
||
|
||
except Exception as e:
|
||
print(f"❌ 输入进程保存录音失败: {e}")
|
||
return None
|
||
|
||
def _cleanup(self):
|
||
"""清理资源"""
|
||
# 停止 TTS 工作线程
|
||
self.tts_worker_running = False
|
||
if self.tts_worker_thread:
|
||
self.tts_task_queue.put(None) # 发送结束信号
|
||
self.tts_worker_thread.join(timeout=2.0)
|
||
|
||
if self.input_stream:
|
||
try:
|
||
self.input_stream.stop_stream()
|
||
self.input_stream.close()
|
||
except:
|
||
pass
|
||
|
||
if self.audio:
|
||
try:
|
||
self.audio.terminate()
|
||
except:
|
||
pass
|
||
|
||
def _start_tts_worker(self):
|
||
"""启动TTS工作线程"""
|
||
self.tts_worker_thread = threading.Thread(target=self._tts_worker, daemon=True)
|
||
self.tts_worker_thread.start()
|
||
self.logger.info("TTS工作线程已启动")
|
||
|
||
def _tts_worker(self):
|
||
"""TTS工作线程 - 处理TTS任务队列"""
|
||
while self.tts_worker_running:
|
||
try:
|
||
# 从队列获取任务
|
||
task = self.tts_task_queue.get(timeout=1.0)
|
||
if task is None: # 结束信号
|
||
break
|
||
|
||
task_type, content = task
|
||
if task_type == "tts_sentence":
|
||
# 生成音频数据
|
||
self._generate_tts_audio(content)
|
||
|
||
except queue.Empty:
|
||
continue
|
||
except Exception as e:
|
||
self.logger.error(f"TTS工作线程错误: {e}")
|
||
time.sleep(0.1)
|
||
|
||
def _add_tts_task(self, content):
|
||
"""添加TTS任务到队列"""
|
||
print(f"🔊 OutputProcess添加TTS任务到队列: '{content[:30]}...' (队列大小: {self.tts_task_queue.qsize()})")
|
||
try:
|
||
self.tts_task_queue.put_nowait(("tts_sentence", content))
|
||
print(f"✅ OutputProcess TTS任务添加成功,队列大小: {self.tts_task_queue.qsize()}")
|
||
return True
|
||
except queue.Full:
|
||
print(f"❌ OutputProcess TTS任务队列已满,丢弃任务")
|
||
self.logger.warning("TTS任务队列已满,丢弃任务")
|
||
return False
|
||
|
||
def _generate_tts_audio(self, text):
|
||
"""生成TTS音频数据"""
|
||
try:
|
||
self.logger.info(f"生成TTS音频: {text[:50]}...")
|
||
|
||
# 构建请求头
|
||
headers = {
|
||
"X-Api-App-Id": self.tts_app_id,
|
||
"X-Api-Access-Key": self.tts_access_key,
|
||
"X-Api-Resource-Id": self.tts_resource_id,
|
||
"X-Api-App-Key": self.tts_app_key,
|
||
"Content-Type": "application/json",
|
||
"Connection": "keep-alive"
|
||
}
|
||
|
||
# 构建请求参数
|
||
payload = {
|
||
"user": {
|
||
"uid": "input_process_tts"
|
||
},
|
||
"req_params": {
|
||
"text": text,
|
||
"speaker": self.tts_speaker,
|
||
"audio_params": {
|
||
"format": "pcm",
|
||
"sample_rate": 16000,
|
||
"enable_timestamp": True
|
||
},
|
||
"additions": "{\"explicit_language\":\"zh\",\"disable_markdown_filter\":true, \"enable_timestamp\":true}\"}"
|
||
}
|
||
}
|
||
|
||
# 发送请求
|
||
session = requests.Session()
|
||
try:
|
||
response = session.post(self.tts_url, headers=headers, json=payload, stream=True)
|
||
|
||
if response.status_code != 200:
|
||
self.logger.error(f"TTS请求失败: {response.status_code}")
|
||
return False
|
||
|
||
# 处理流式响应
|
||
total_audio_size = 0
|
||
chunk_count = 0
|
||
|
||
self.logger.info("开始接收TTS音频流...")
|
||
|
||
for chunk in response.iter_lines(decode_unicode=True):
|
||
if not chunk:
|
||
continue
|
||
|
||
try:
|
||
data = json.loads(chunk)
|
||
|
||
if data.get("code", 0) == 0 and "data" in data and data["data"]:
|
||
chunk_audio = base64.b64decode(data["data"])
|
||
audio_size = len(chunk_audio)
|
||
total_audio_size += audio_size
|
||
chunk_count += 1
|
||
|
||
# 这里可以将音频数据发送到输出进程
|
||
# 或者直接处理音频数据
|
||
self.logger.debug(f"接收音频块: {audio_size} 字节")
|
||
|
||
# 减少进度显示频率
|
||
if chunk_count % 5 == 0:
|
||
progress = f"生成音频: {chunk_count} 块 | {total_audio_size / 1024:.1f} KB"
|
||
self.logger.info(progress)
|
||
continue
|
||
|
||
if data.get("code", 0) == 20000000:
|
||
break
|
||
|
||
if data.get("code", 0) > 0:
|
||
self.logger.error(f"TTS错误响应: {data}")
|
||
break
|
||
|
||
except json.JSONDecodeError:
|
||
continue
|
||
|
||
self.logger.info(f"TTS音频生成完成: {chunk_count} 块, 总大小: {total_audio_size / 1024:.1f} KB")
|
||
|
||
return chunk_count > 0
|
||
|
||
finally:
|
||
response.close()
|
||
session.close()
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"TTS音频生成失败: {e}")
|
||
return False
|
||
|
||
def process_tts_request(self, text):
|
||
"""处理TTS请求的公共接口"""
|
||
return self._add_tts_task(text)
|
||
|
||
class OutputProcess:
|
||
"""输出进程 - 借鉴 recorder.py 的优秀架构"""
|
||
|
||
def __init__(self, audio_queue: mp.Queue, config: Dict[str, Any] = None, event_queue: mp.Queue = None):
|
||
print("🔊 OutputProcess __init__ 开始执行...")
|
||
self.audio_queue = audio_queue # 主进程 → 输出进程(统一音频播放队列)
|
||
self.event_queue = event_queue # 输出进程 → 主进程(事件通知)
|
||
self.config = config or self._get_default_config()
|
||
|
||
# 初始化日志记录器
|
||
self.logger = ProcessLogger("OutputProcess")
|
||
print("🔊 OutputProcess 基本初始化完成")
|
||
|
||
# 音频播放参数 - 完全借鉴 recorder.py 的优化参数
|
||
self.FORMAT = pyaudio.paInt16
|
||
self.CHANNELS = 1
|
||
self.RATE = 16000
|
||
self.CHUNK_SIZE = 2048 # 增加缓冲区大小,减少卡顿
|
||
|
||
# 播放状态管理 - 借鉴 recorder.py 的状态管理模式
|
||
self.is_playing = False
|
||
self.currently_playing = False # 当前是否正在播放(双重状态检查)
|
||
self.playback_buffer = [] # 播放缓冲区
|
||
self.total_chunks_played = 0
|
||
self.total_audio_size = 0
|
||
self.last_playback_time = 0 # 最后播放时间戳
|
||
self.playback_cooldown_period = 0.1 # 播放冷却时间(秒)- 防止回声,减少到0.1秒
|
||
self.playback_completed = False # 播放完成标志
|
||
self.end_signal_received = False # 结束信号接收标志
|
||
|
||
# 智能缓冲系统 - 借鉴 recorder.py 的智能句子累积策略
|
||
self.preload_buffer = [] # 预加载缓冲区(保留用于音频块)
|
||
self.preload_size = 3 # 预加载3个音频块
|
||
|
||
# 智能句子缓冲系统 - 从 recorder.py 借鉴的核心机制
|
||
self.tts_buffer = [] # 智能句子缓冲区
|
||
self.tts_buffer_max_size = 3 # 最多缓冲3个句子
|
||
self.tts_buffer_min_size = 1 # 最少1个句子
|
||
self.tts_accumulation_time = 0.2 # 200ms积累窗口
|
||
self.tts_last_trigger_time = 0 # 上次触发TTS的时间
|
||
self.tts_pending_sentences = [] # 待处理的句子
|
||
self.min_buffer_size = 1 # 最小缓冲区大小
|
||
|
||
self.audio_device_healthy = True # 音频设备健康状态
|
||
|
||
# 统一播放工作线程 - 核心改进
|
||
self.playback_worker_running = True
|
||
self.playback_worker_thread = None
|
||
|
||
# 播放完成检测
|
||
self.last_audio_time = 0 # 最后收到音频数据的时间
|
||
self.playback_timeout = 5.0 # 播放超时时间(秒)
|
||
self.completion_sent = False # 防止重复发送完成事件
|
||
|
||
# 增强的播放完成检测状态
|
||
self.llm_generation_complete = False # LLM生成是否完成
|
||
self.tts_generation_complete = False # TTS生成是否完成
|
||
self.all_audio_received = False # 所有音频数据是否已接收
|
||
self.pre_buffer_empty = False # 预缓冲区是否为空
|
||
self.playback_buffer_empty = False # 播放缓冲区是否为空
|
||
self.no_active_playback = False # 是否没有活跃的播放
|
||
self.last_audio_chunk_time = 0 # 最后一个音频块开始播放的时间,初始化为0表示尚未播放
|
||
|
||
# PyAudio实例
|
||
self.audio = None
|
||
self.output_stream = None
|
||
|
||
# 运行状态
|
||
self.running = True
|
||
|
||
# TTS 工作线程
|
||
self.tts_worker_running = True
|
||
self.tts_worker_thread = None
|
||
self.tts_task_queue = mp.Queue(maxsize=10)
|
||
|
||
# TTS 配置
|
||
self.tts_url = "https://openspeech.bytedance.com/api/v3/tts/unidirectional"
|
||
self.tts_app_id = "8718217928"
|
||
self.tts_access_key = "ynJMX-5ix1FsJvswC9KTNlGUdubcchqc"
|
||
self.tts_resource_id = "volc.service_type.10029"
|
||
self.tts_app_key = "aGjiRDfUWi"
|
||
self.tts_speaker = config.get('tts_speaker', "zh_female_wanqudashu_moon_bigtts") if config else "zh_female_wanqudashu_moon_bigtts"
|
||
|
||
# 启动工作线程 - 先启动播放线程,再启动TTS线程
|
||
print("🔊 准备启动播放工作线程...")
|
||
try:
|
||
self._start_playback_worker()
|
||
print("🔊 播放工作线程已启动,准备启动TTS工作线程...")
|
||
self._start_tts_worker()
|
||
print("🔊 TTS工作线程已启动")
|
||
except Exception as e:
|
||
print(f"❌ 工作线程启动失败: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
|
||
def _start_playback_worker(self):
|
||
"""启动播放工作线程 - 借鉴 recorder.py 的播放线程模式"""
|
||
print("🔊 创建播放工作线程...")
|
||
self.playback_worker_thread = threading.Thread(target=self._playback_worker, daemon=True)
|
||
print("🔊 启动播放工作线程...")
|
||
self.playback_worker_thread.start()
|
||
print("🔊 播放工作线程已启动")
|
||
self.logger.info("播放工作线程已启动")
|
||
|
||
def _playback_worker(self):
|
||
"""播放工作线程 - 专门负责音频播放,完全借鉴 recorder.py"""
|
||
print("🔊 播放工作线程开始运行...")
|
||
|
||
# 等待音频设备就绪和初始化完成
|
||
max_wait_time = 10 # 最多等待10秒
|
||
wait_start_time = time.time()
|
||
|
||
print(f"🔊 播放工作线程等待音频设备就绪... audio={self.audio is not None}, running={self.running}")
|
||
|
||
while (self.audio is None or not self.running) and (time.time() - wait_start_time) < max_wait_time:
|
||
time.sleep(0.1)
|
||
if (time.time() - wait_start_time) % 1 < 0.1: # 每秒打印一次状态
|
||
print(f"🔊 仍在等待音频设备就绪... audio={self.audio is not None}, running={self.running}")
|
||
|
||
if self.audio is None:
|
||
print("❌ 音频设备未就绪,播放工作线程退出")
|
||
self.logger.error("音频设备未就绪,播放工作线程退出")
|
||
return
|
||
|
||
print(f"✅ 音频设备已就绪,耗时: {time.time() - wait_start_time:.1f}秒")
|
||
|
||
print("🔊 音频设备已就绪,开始创建播放流")
|
||
|
||
# 创建音频播放流
|
||
playback_stream = None
|
||
try:
|
||
playback_stream = self.audio.open(
|
||
format=self.FORMAT,
|
||
channels=self.CHANNELS,
|
||
rate=self.RATE,
|
||
output=True,
|
||
frames_per_buffer=512,
|
||
start=True # 立即启动流
|
||
)
|
||
print("🔊 音频播放流已创建")
|
||
except Exception as e:
|
||
print(f"❌ 创建音频播放流失败: {e}")
|
||
self.logger.error(f"创建音频播放流失败: {e}")
|
||
return
|
||
|
||
chunks_played = 0
|
||
total_size = 0
|
||
|
||
try:
|
||
while self.playback_worker_running:
|
||
try:
|
||
# 从播放缓冲区获取音频数据
|
||
if self.playback_buffer:
|
||
audio_chunk = self.playback_buffer.pop(0)
|
||
|
||
if audio_chunk and len(audio_chunk) > 0:
|
||
# 检查播放冷却期
|
||
current_time = time.time()
|
||
time_since_last_play = current_time - self.last_playback_time
|
||
in_cooldown = (self.last_playback_time > 0 and
|
||
time_since_last_play < self.playback_cooldown_period)
|
||
|
||
if in_cooldown:
|
||
# 在冷却期内,跳过播放
|
||
if chunks_played == 0: # 只在第一次遇到冷却期时打印
|
||
print(f"🔊 播放冷却中,跳过播放 ({time_since_last_play:.1f}s < {self.playback_cooldown_period}s)")
|
||
self.logger.debug(f"播放冷却中,跳过播放 ({time_since_last_play:.1f}s < {self.playback_cooldown_period}s)")
|
||
continue
|
||
|
||
# 播放音频块
|
||
if chunks_played == 0: # 只在第一次播放时打印详细信息
|
||
print(f"🔊 开始播放音频块 {chunks_played + 1}")
|
||
print(f"🔍 播放工作线程检查: 音频块大小={len(audio_chunk)}字节, "
|
||
f"冷却期检查={in_cooldown}, 距离上次播放={time_since_last_play:.2f}s, "
|
||
f"冷却阈值={self.playback_cooldown_period}s")
|
||
|
||
# 确保播放状态正确
|
||
if not self.currently_playing:
|
||
self.currently_playing = True
|
||
self.last_audio_chunk_time = time.time() # 记录最后播放时间
|
||
print(f"🔊 播放工作线程:开始播放,设置 currently_playing = True")
|
||
|
||
# 如果是第一次播放,不设置冷却期
|
||
if chunks_played == 0:
|
||
self.last_playback_time = 0 # 第一次播放不触发冷却期
|
||
else:
|
||
self.last_playback_time = current_time # 更新最后播放时间
|
||
|
||
# 播放音频(同步阻塞,直到播放完成)
|
||
playback_stream.write(audio_chunk)
|
||
chunks_played += 1
|
||
total_size += len(audio_chunk)
|
||
|
||
# 减少进度显示频率
|
||
if chunks_played % 10 == 0 or chunks_played <= 3:
|
||
progress = f"🔊 播放工作: {chunks_played} 块 | {total_size / 1024:.1f} KB"
|
||
print(f"\r{progress}", end='', flush=True)
|
||
|
||
# 注意:不在这里重置 currently_playing 状态,保持为 True 直到真正确定播放完成
|
||
|
||
# 如果这是最后一个音频块,主动检查播放完成
|
||
if (len(self.playback_buffer) == 0 and
|
||
len(self.preload_buffer) == 0 and
|
||
self.tts_task_queue.qsize() == 0 and
|
||
not self.playback_completed): # 防止重复设置
|
||
print(f"🔊 播放工作线程:播放完成,设置播放完成标志")
|
||
print(f"🔊 播放工作线程:播放缓冲={len(self.playback_buffer)}, 预加载={len(self.preload_buffer)}, TTS队列={self.tts_task_queue.qsize()}")
|
||
self.playback_completed = True
|
||
print(f"🔊 播放工作线程:playback_completed标志已设置为{self.playback_completed}")
|
||
# 不在这里直接调用_finish_playback,让主处理循环处理
|
||
else:
|
||
# 空音频块,不改变播放状态,继续下一个
|
||
print(f"🔊 播放工作线程:遇到空音频块,跳过")
|
||
continue
|
||
else:
|
||
# 缓冲区为空,短暂休眠,减少CPU占用
|
||
# 只有在确定没有音频播放时才设置状态为 False
|
||
if self.currently_playing:
|
||
# 检查是否真的没有音频在播放
|
||
time_since_last_chunk = time.time() - self.last_audio_chunk_time
|
||
if time_since_last_chunk > 0.5: # 超过0.5秒没有播放新音频
|
||
self.currently_playing = False
|
||
print(f"🔊 播放工作线程:缓冲区为空且{time_since_last_chunk:.1f}秒无新音频,设置 currently_playing = False")
|
||
time.sleep(0.01)
|
||
continue
|
||
|
||
except Exception as e:
|
||
print(f"❌ 播放工作线程错误: {e}")
|
||
self.logger.error(f"播放工作线程错误: {e}")
|
||
# 异常情况下,只有在确定音频停止播放时才重置状态
|
||
if self.currently_playing:
|
||
time_since_last_chunk = time.time() - self.last_audio_chunk_time
|
||
if time_since_last_chunk > 1.0: # 异常情况下,等待更长时间
|
||
self.currently_playing = False
|
||
print(f"🔊 播放工作线程:异常情况下设置 currently_playing = False")
|
||
else:
|
||
print(f"🔊 播放工作线程:异常但保持 currently_playing = True(最后播放于{time_since_last_chunk:.1f}秒前)")
|
||
time.sleep(0.1)
|
||
|
||
print(f"\n✅ 播放工作线程结束: 总计 {chunks_played} 块, {total_size / 1024:.1f} KB")
|
||
|
||
finally:
|
||
# 线程结束时确保状态正确
|
||
if self.currently_playing:
|
||
self.currently_playing = False
|
||
print(f"🔊 播放工作线程:线程结束,设置 currently_playing = False")
|
||
if playback_stream:
|
||
try:
|
||
playback_stream.stop_stream()
|
||
playback_stream.close()
|
||
except:
|
||
pass
|
||
print("🔊 音频播放流已关闭")
|
||
|
||
def _get_default_config(self) -> Dict[str, Any]:
|
||
"""获取默认配置"""
|
||
return {
|
||
'buffer_size': 1000,
|
||
'show_progress': True,
|
||
'progress_interval': 100
|
||
}
|
||
|
||
def run(self):
|
||
"""输出进程主循环 - 借鉴 recorder.py 的优雅主循环"""
|
||
self.logger.info("输出进程启动")
|
||
self._setup_audio()
|
||
|
||
try:
|
||
while self.running:
|
||
# 1. 处理音频队列(数据接收)
|
||
self._process_audio_queue()
|
||
|
||
# 2. 检查设备健康状态和冷却期 - 防止回声
|
||
current_time = time.time()
|
||
time_since_last_play = current_time - self.last_playback_time
|
||
in_cooldown = (self.last_playback_time > 0 and
|
||
time_since_last_play < self.playback_cooldown_period)
|
||
|
||
# 检查TTS和播放状态
|
||
tts_active = not self.tts_task_queue.empty()
|
||
playback_active = self.currently_playing or len(self.playback_buffer) > 0
|
||
|
||
# 如果设备不健康、正在播放、TTS生成中、或在冷却期内,显示状态但继续处理播放完成检测
|
||
if not self.audio_device_healthy or tts_active or playback_active or in_cooldown:
|
||
# 显示播放状态
|
||
tts_queue_size = self.tts_task_queue.qsize()
|
||
queue_size = len(self.playback_buffer) + len(self.preload_buffer)
|
||
|
||
if not self.audio_device_healthy:
|
||
status = f"🔧 设备重置中... TTS: {tts_queue_size} 播放: {queue_size}"
|
||
elif tts_active:
|
||
status = f"🎵 TTS生成中... TTS: {tts_queue_size} 播放: {queue_size}"
|
||
elif in_cooldown:
|
||
cooldown_time = self.playback_cooldown_period - time_since_last_play
|
||
status = f"🔊 播放冷却中... {cooldown_time:.1f}s TTS: {tts_queue_size} 播放: {queue_size}"
|
||
else:
|
||
playing_status = "播放中" if self.currently_playing else "等待播放"
|
||
status = f"🔊 {playing_status}... TTS: {tts_queue_size} 播放: {queue_size}"
|
||
|
||
print(f"\r{status}", end='', flush=True)
|
||
|
||
# 关键修复:即使正在播放,也要检查播放完成
|
||
if self.end_signal_received:
|
||
# 使用增强的播放完成检测
|
||
if self._check_enhanced_playback_completion():
|
||
print(f"📥 播放状态时增强播放完成检测通过,处理结束信号")
|
||
self._finish_playback()
|
||
|
||
time.sleep(0.1) # 播放时增加延迟减少CPU使用
|
||
continue
|
||
|
||
# 3. 主动检查预加载缓冲区,确保音频能及时播放
|
||
if (not self.is_playing and
|
||
len(self.preload_buffer) >= self.min_buffer_size and
|
||
len(self.playback_buffer) == 0):
|
||
# 立即将预加载数据移到播放缓冲区
|
||
self.playback_buffer.extend(self.preload_buffer)
|
||
self.preload_buffer.clear()
|
||
self.is_playing = True
|
||
self.last_playback_time = time.time()
|
||
print(f"🎵 主循环检测:开始播放预加载音频,播放缓冲区大小: {len(self.playback_buffer)}")
|
||
|
||
# 4. 显示播放进度
|
||
self._show_progress()
|
||
|
||
# 5. 主动检查播放完成(无论什么状态都要检查)
|
||
if self.end_signal_received:
|
||
# 使用增强的播放完成检测
|
||
if self._check_enhanced_playback_completion():
|
||
print(f"📥 主循环增强播放完成检测通过,处理结束信号")
|
||
self._finish_playback()
|
||
|
||
# 6. 借鉴 recorder.py: 根据播放状态调整休眠时间,优化性能
|
||
if self.is_playing and (self.playback_buffer or self.preload_buffer):
|
||
time.sleep(0.005) # 播放时极短休眠,提高响应性
|
||
else:
|
||
time.sleep(0.02) # 非播放时稍长休眠,减少CPU占用
|
||
|
||
except KeyboardInterrupt:
|
||
self.logger.info("输出进程收到中断信号")
|
||
except Exception as e:
|
||
self.logger.error(f"输出进程错误: {e}")
|
||
import traceback
|
||
print(f"❌ 输出进程错误详情: {traceback.format_exc()}")
|
||
finally:
|
||
self._cleanup()
|
||
self.logger.info("输出进程退出")
|
||
|
||
def _setup_audio(self):
|
||
"""设置音频输出设备"""
|
||
try:
|
||
print("🔊 开始初始化音频设备...")
|
||
self.audio = pyaudio.PyAudio()
|
||
print(f"🔊 PyAudio实例已创建: {self.audio}")
|
||
|
||
# 主进程不需要创建输出流,由播放工作线程负责
|
||
# 这里只创建PyAudio实例供播放工作线程使用
|
||
self.output_stream = None # 标记为None,表明主进程不直接使用输出流
|
||
|
||
self.logger.info("音频设备初始化成功")
|
||
print("🔊 音频设备初始化完成")
|
||
except Exception as e:
|
||
self.logger.error(f"音频设备初始化失败: {e}")
|
||
print(f"❌ 音频设备初始化失败: {e}")
|
||
raise
|
||
|
||
def _process_audio_queue(self):
|
||
"""处理来自主进程的音频数据 - 借鉴 recorder.py 的优雅队列处理"""
|
||
try:
|
||
processed_count = 0
|
||
end_signal_received = False
|
||
|
||
while self.running:
|
||
try:
|
||
# 使用较短的超时,保持响应性
|
||
audio_data = self.audio_queue.get(timeout=0.05)
|
||
processed_count += 1
|
||
|
||
# 减少日志输出频率,提高性能
|
||
if processed_count <= 3 or processed_count % 50 == 0:
|
||
print(f"📥 输出进程收到第 {processed_count} 个数据包")
|
||
|
||
if audio_data is None:
|
||
# 结束信号处理
|
||
if end_signal_received:
|
||
print(f"📥 输出进程已收到过结束信号,忽略重复信号")
|
||
continue
|
||
|
||
print(f"📥 输出进程收到结束信号")
|
||
end_signal_received = True
|
||
self.end_signal_received = True
|
||
self.all_audio_received = True
|
||
|
||
# 重置完成事件标记
|
||
self.completion_sent = False
|
||
# 重置播放完成标志
|
||
self.playback_completed = False
|
||
print(f"📥 已重置所有播放完成相关标志")
|
||
|
||
# 使用增强的播放完成检测
|
||
if self._check_enhanced_playback_completion():
|
||
print(f"📥 增强播放完成检测通过,处理结束信号")
|
||
self._finish_playback()
|
||
return
|
||
else:
|
||
print(f"📥 延迟处理结束信号 - 等待LLM、TTS和播放完成")
|
||
# 重新放回队列,稍后重试
|
||
self.audio_queue.put(None)
|
||
time.sleep(0.05)
|
||
continue
|
||
|
||
if isinstance(audio_data, str) and audio_data.startswith("METADATA:"):
|
||
# 处理元数据
|
||
metadata = audio_data[9:]
|
||
print(f"📥 输出进程收到元数据: {metadata}")
|
||
continue
|
||
|
||
# 处理来自ControlSystem的流式文本命令
|
||
if isinstance(audio_data, str) and audio_data.startswith("STREAMING_TEXT:"):
|
||
# 流式文本处理 - 智能缓冲
|
||
streaming_text = audio_data[15:] # 移除 "STREAMING_TEXT:" 前缀
|
||
print(f"📥 输出进程收到流式文本: {streaming_text}")
|
||
self.process_streaming_text(streaming_text)
|
||
continue
|
||
|
||
if isinstance(audio_data, str) and audio_data.startswith("COMPLETE_TEXT:"):
|
||
# 完整文本处理 - 强制刷新缓冲区
|
||
complete_text = audio_data[14:] # 移除 "COMPLETE_TEXT:" 前缀
|
||
print(f"📥 输出进程收到完整文本: {complete_text}")
|
||
self.process_complete_text(complete_text)
|
||
continue
|
||
|
||
if isinstance(audio_data, str) and audio_data.startswith("FLUSH_TTS_BUFFER:"):
|
||
# 刷新TTS缓冲区 - 确保所有内容都被处理
|
||
print(f"📥 输出进程收到刷新缓冲区命令")
|
||
self._flush_tts_buffer()
|
||
continue
|
||
|
||
if isinstance(audio_data, str) and audio_data.startswith("LLM_COMPLETE:"):
|
||
# LLM生成完成信号
|
||
print(f"📥 输出进程收到LLM生成完成信号")
|
||
self.llm_generation_complete = True
|
||
continue
|
||
|
||
if isinstance(audio_data, str) and audio_data.startswith("TTS_COMPLETE:"):
|
||
# TTS生成完成信号
|
||
print(f"📥 输出进程收到TTS生成完成信号")
|
||
self.tts_generation_complete = True
|
||
continue
|
||
|
||
# 音频数据处理
|
||
if isinstance(audio_data, bytes):
|
||
# 更新最后收到音频数据的时间
|
||
self.last_audio_time = time.time()
|
||
|
||
# 减少日志输出,提高性能
|
||
if processed_count <= 3 or processed_count % 50 == 0:
|
||
print(f"📥 输出进程收到音频数据: {len(audio_data)} 字节")
|
||
|
||
# 直接添加到预加载缓冲区
|
||
print(f"🔍 添加音频到预加载缓冲区: 音频大小={len(audio_data)}字节, "
|
||
f"添加前预加载缓冲区大小={len(self.preload_buffer)}, "
|
||
f"添加前播放缓冲区大小={len(self.playback_buffer)}, "
|
||
f"is_playing={self.is_playing}")
|
||
self.preload_buffer.append(audio_data)
|
||
print(f"🔍 添加后预加载缓冲区大小={len(self.preload_buffer)}")
|
||
|
||
# 检查是否应该开始播放或补充播放缓冲区
|
||
if not self.is_playing and len(self.preload_buffer) >= self.preload_size:
|
||
# 首次启动播放
|
||
self.playback_buffer.extend(self.preload_buffer)
|
||
self.preload_buffer.clear()
|
||
self.is_playing = True
|
||
self.last_playback_time = 0 # 重置播放时间,避免立即触发冷却期
|
||
# 确保播放工作线程知道有数据要播放
|
||
if not self.currently_playing:
|
||
print(f"🔍 启动播放时确保 currently_playing = True")
|
||
# 播放工作线程会自动检测播放缓冲区并开始播放
|
||
print(f"🎵 开始播放音频(预加载完成),播放缓冲区大小: {len(self.playback_buffer)}")
|
||
print(f"🔍 已重置last_playback_time,避免立即触发冷却期")
|
||
elif self.is_playing and len(self.playback_buffer) < 3 and len(self.preload_buffer) > 0:
|
||
# 正在播放时,保持播放缓冲区有足够的数据
|
||
transfer_count = min(2, len(self.preload_buffer)) # 每次转移2个块
|
||
for _ in range(transfer_count):
|
||
if self.preload_buffer:
|
||
self.playback_buffer.append(self.preload_buffer.pop(0))
|
||
print(f"🔍 播放中补充数据: 转移{transfer_count}个块,播放缓冲区={len(self.playback_buffer)}, 预加载={len(self.preload_buffer)}")
|
||
elif end_signal_received and not self.is_playing and len(self.playback_buffer) == 0 and len(self.preload_buffer) > 0:
|
||
# 关键修复:收到结束信号后,如果播放缓冲区为空但预加载缓冲区有数据,强制转移
|
||
print(f"🔍 结束信号模式下强制转移数据: 预加载缓冲区有 {len(self.preload_buffer)} 个数据块")
|
||
self.playback_buffer.extend(self.preload_buffer)
|
||
self.preload_buffer.clear()
|
||
self.is_playing = True
|
||
self.last_playback_time = 0
|
||
print(f"🎵 强制开始播放音频,播放缓冲区大小: {len(self.playback_buffer)}")
|
||
|
||
else:
|
||
print(f"📥 输出进程收到未知类型数据: {type(audio_data)}")
|
||
|
||
except queue.Empty:
|
||
# 队列为空时的处理
|
||
if end_signal_received:
|
||
# 使用增强的播放完成检测
|
||
if self._check_enhanced_playback_completion():
|
||
print(f"📥 队列空时增强播放完成检测通过,处理结束信号")
|
||
self._finish_playback()
|
||
return
|
||
else:
|
||
print(f"📥 队列空时增强播放完成检测未通过,继续等待")
|
||
# 如果还有数据要播放,继续等待
|
||
tts_queue_size = self.tts_task_queue.qsize()
|
||
playback_queue_size = len(self.playback_buffer) + len(self.preload_buffer)
|
||
if playback_queue_size > 0:
|
||
print(f"📥 还有 {playback_queue_size} 个音频块待播放,等待播放完成")
|
||
time.sleep(0.2) # 增加等待时间
|
||
elif tts_queue_size == 0 and len(self.playback_buffer) == 0 and len(self.preload_buffer) > 0:
|
||
# 关键修复:播放缓冲区为空但预加载缓冲区还有数据,需要转移数据
|
||
print(f"📥 播放缓冲区为空但预加载缓冲区有 {len(self.preload_buffer)} 个数据块,转移数据到播放缓冲区")
|
||
transfer_count = min(len(self.preload_buffer), 3) # 一次转移最多3个块
|
||
for _ in range(transfer_count):
|
||
if self.preload_buffer:
|
||
self.playback_buffer.append(self.preload_buffer.pop(0))
|
||
print(f"📥 已转移 {transfer_count} 个数据块到播放缓冲区")
|
||
# 如果播放工作线程没有在播放,需要确保状态正确
|
||
if not self.currently_playing and len(self.playback_buffer) > 0:
|
||
print(f"📥 转移数据后,确保播放状态正确(播放缓冲区有数据但currently_playing=False)")
|
||
# 播放工作线程会自动检测并开始播放
|
||
time.sleep(0.2) # 增加等待时间
|
||
|
||
# 检查是否应该补充播放缓冲区的数据
|
||
if not self.is_playing and len(self.preload_buffer) >= self.min_buffer_size:
|
||
# 首次启动播放(最小缓冲区模式)
|
||
self.playback_buffer.extend(self.preload_buffer)
|
||
self.preload_buffer.clear()
|
||
self.is_playing = True
|
||
self.last_playback_time = 0 # 重置播放时间,避免立即触发冷却期
|
||
print(f"🎵 开始播放音频(最小缓冲区满足)")
|
||
print(f"🔍 已重置last_playback_time,避免立即触发冷却期")
|
||
elif self.is_playing and len(self.playback_buffer) < 2 and len(self.preload_buffer) > 0:
|
||
# 正在播放时补充数据,避免播放缓冲区耗尽
|
||
transfer_count = min(1, len(self.preload_buffer)) # 每次转移1个块
|
||
for _ in range(transfer_count):
|
||
if self.preload_buffer:
|
||
self.playback_buffer.append(self.preload_buffer.pop(0))
|
||
print(f"🔍 队列空时补充数据: 转移{transfer_count}个块,播放缓冲区={len(self.playback_buffer)}, 预加载={len(self.preload_buffer)}")
|
||
|
||
# 退出循环,避免过度占用CPU
|
||
if processed_count > 0:
|
||
break
|
||
else:
|
||
# 关键修复:即使没有处理数据,也要检查播放完成
|
||
if self.end_signal_received:
|
||
# 使用增强的播放完成检测
|
||
if self._check_enhanced_playback_completion():
|
||
print(f"📥 无数据处理时增强播放完成检测通过,处理结束信号")
|
||
self._finish_playback()
|
||
|
||
time.sleep(0.01)
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"处理音频队列时出错: {e}")
|
||
print(f"❌ 输出进程处理队列时出错: {e}")
|
||
|
||
def _play_audio(self):
|
||
"""播放音频数据 - 简化版本,主要由播放工作线程处理"""
|
||
# 现在播放逻辑主要由专门的播放工作线程处理
|
||
# 这个方法只负责一些状态检查和协调工作
|
||
|
||
# 检查是否应该将预加载缓冲区的数据移到播放缓冲区
|
||
if (not self.is_playing and
|
||
len(self.preload_buffer) >= self.min_buffer_size and
|
||
len(self.playback_buffer) == 0):
|
||
self.playback_buffer.extend(self.preload_buffer)
|
||
self.preload_buffer.clear()
|
||
self.is_playing = True
|
||
self.last_playback_time = time.time()
|
||
print(f"🎵 主线程检测:开始播放预加载音频,播放缓冲区大小: {len(self.playback_buffer)}")
|
||
|
||
# 如果缓冲区为空且之前在播放,检查是否应该发送完成事件
|
||
if (self.is_playing and
|
||
len(self.playback_buffer) == 0 and
|
||
len(self.preload_buffer) == 0 and
|
||
not self.currently_playing):
|
||
# 短暂等待,确保所有音频都播放完成
|
||
time.sleep(0.1)
|
||
if len(self.playback_buffer) == 0 and len(self.preload_buffer) == 0:
|
||
self.is_playing = False
|
||
print(f"🎵 播放缓冲区已清空,准备发送完成事件")
|
||
# 注意:不在这里直接发送完成事件,由_process_audio_queue中的结束信号处理
|
||
|
||
def _show_progress(self):
|
||
"""显示播放进度"""
|
||
if (self.config['show_progress'] and
|
||
self.total_chunks_played > 0 and
|
||
self.total_chunks_played % self.config['progress_interval'] == 0):
|
||
|
||
progress = f"🔊 播放进度: {self.total_chunks_played} 块 | {self.total_audio_size / 1024:.1f} KB"
|
||
print(f"\r{progress}", end='', flush=True)
|
||
|
||
def _finish_playback(self):
|
||
"""完成播放 - 借鉴 recorder.py 的优雅完成机制"""
|
||
# 防止重复发送完成事件
|
||
if self.completion_sent:
|
||
print("📡 输出进程:完成事件已发送过,跳过重复发送")
|
||
return
|
||
|
||
print("📡 输出进程:开始执行播放完成逻辑")
|
||
|
||
self.is_playing = False
|
||
self.playback_buffer.clear()
|
||
self.preload_buffer.clear()
|
||
self.last_playback_time = 0
|
||
|
||
# 重置所有增强的状态标志
|
||
self.llm_generation_complete = False
|
||
self.tts_generation_complete = False
|
||
self.all_audio_received = False
|
||
self.pre_buffer_empty = False
|
||
self.playback_buffer_empty = False
|
||
self.no_active_playback = False
|
||
self.end_signal_received = False
|
||
self.playback_completed = False
|
||
|
||
print("📡 输出进程:已重置所有播放完成状态标志")
|
||
|
||
# 通知主进程播放完成
|
||
if self.event_queue:
|
||
try:
|
||
# 发送播放完成事件到主进程
|
||
completion_event = ProcessEvent(
|
||
event_type='playback_complete',
|
||
metadata={
|
||
'completion_time': time.time()
|
||
}
|
||
)
|
||
self.event_queue.put(completion_event)
|
||
print("📡 输出进程:已发送播放完成事件到主进程")
|
||
self.completion_sent = True
|
||
except Exception as e:
|
||
print(f"❌ 输出进程:发送播放完成事件失败: {e}")
|
||
self.logger.error(f"发送播放完成事件失败: {e}")
|
||
else:
|
||
print("⚠️ 输出进程:未设置事件队列,无法通知主进程播放完成")
|
||
|
||
# 额外等待确保音频设备完全停止
|
||
time.sleep(0.5)
|
||
print("📡 输出进程:播放完成逻辑执行完毕")
|
||
|
||
def _check_enhanced_playback_completion(self):
|
||
"""增强的播放完成检测 - 考虑LLM、TTS和缓冲区状态"""
|
||
if not self.end_signal_received:
|
||
return False
|
||
|
||
# 更新状态变量
|
||
self.pre_buffer_empty = (len(self.preload_buffer) == 0)
|
||
self.playback_buffer_empty = (len(self.playback_buffer) == 0)
|
||
self.no_active_playback = (not self.currently_playing) # 同步更新状态变量
|
||
|
||
tts_queue_size = self.tts_task_queue.qsize()
|
||
|
||
print(f"🔍 增强播放完成检查:")
|
||
print(f" - LLM生成完成: {self.llm_generation_complete}")
|
||
print(f" - TTS生成完成: {self.tts_generation_complete}")
|
||
print(f" - 所有音频已接收: {self.all_audio_received}")
|
||
print(f" - 预缓冲区为空: {self.pre_buffer_empty}")
|
||
print(f" - 播放缓冲区为空: {self.playback_buffer_empty}")
|
||
print(f" - 无活跃播放: {self.no_active_playback} (currently_playing={self.currently_playing})")
|
||
print(f" - TTS队列大小: {tts_queue_size}")
|
||
|
||
# 添加时间维度检查
|
||
if self.last_audio_chunk_time > 0:
|
||
time_since_last_chunk = time.time() - self.last_audio_chunk_time
|
||
print(f" - 最后播放时间: {time_since_last_chunk:.2f}秒前")
|
||
else:
|
||
time_since_last_chunk = 0
|
||
print(f" - 最后播放时间: 尚未开始播放")
|
||
|
||
# 检查是否所有条件都满足 - 使用更新的状态变量
|
||
all_conditions_met = (
|
||
self.llm_generation_complete and
|
||
self.tts_generation_complete and
|
||
self.all_audio_received and
|
||
self.pre_buffer_empty and
|
||
self.playback_buffer_empty and
|
||
self.no_active_playback and # 使用状态变量
|
||
tts_queue_size == 0
|
||
)
|
||
|
||
if all_conditions_met:
|
||
# 额外时间检查:确保音频真正播放完成
|
||
if self.last_audio_chunk_time > 0 and time_since_last_chunk > 0.3: # 至少0.3秒没有新音频播放
|
||
print(f"✅ 所有播放完成条件已满足,且{time_since_last_chunk:.2f}秒无新音频,可以结束播放")
|
||
return True
|
||
elif self.last_audio_chunk_time == 0:
|
||
# 如果从未开始播放,但有音频数据,说明播放可能有问题
|
||
if len(self.playback_buffer) == 0 and len(self.preload_buffer) == 0:
|
||
print(f"⚠️ 从未开始播放且无音频数据,可能播放失败,强制结束")
|
||
return True
|
||
else:
|
||
print(f"⏳ 从未开始播放但还有音频数据,等待播放开始...")
|
||
return False
|
||
else:
|
||
print(f"⏳ 所有条件满足但等待音频完全播放(最后播放于{time_since_last_chunk:.2f}秒前)...")
|
||
return False
|
||
|
||
# 如果LLM和TTS都完成了,但还有音频数据,等待播放完成
|
||
if (self.llm_generation_complete and
|
||
self.tts_generation_complete and
|
||
self.all_audio_received and
|
||
tts_queue_size == 0):
|
||
|
||
if self.pre_buffer_empty and self.playback_buffer_empty:
|
||
if self.no_active_playback: # 使用状态变量
|
||
# 额外检查:确保最后播放的音频已经完成播放
|
||
if self.last_audio_chunk_time > 0:
|
||
time_since_last_chunk = time.time() - self.last_audio_chunk_time
|
||
if time_since_last_chunk > 0.8: # 增加到0.8秒确保音频完全播放
|
||
print(f"✅ LLM和TTS完成,所有缓冲区已清空,播放器空闲,最后播放于{time_since_last_chunk:.1f}秒前")
|
||
return True
|
||
else:
|
||
print(f"⏳ 等待最后音频播放完成(最后播放于{time_since_last_chunk:.1f}秒前,需要0.8秒)...")
|
||
return False
|
||
else:
|
||
# 从未开始播放的情况
|
||
print(f"⚠️ LLM和TTS完成,缓冲区清空,但从未开始播放,可能播放失败")
|
||
return True
|
||
else:
|
||
print(f"⏳ 等待最后的音频播放完成(currently_playing={self.currently_playing})...")
|
||
time.sleep(0.3)
|
||
# 重新更新状态
|
||
self.no_active_playback = (not self.currently_playing)
|
||
if self.no_active_playback:
|
||
if self.last_audio_chunk_time > 0:
|
||
time_since_last_chunk = time.time() - self.last_audio_chunk_time
|
||
if time_since_last_chunk > 0.5:
|
||
print(f"✅ 最后的音频播放完成(最后播放于{time_since_last_chunk:.1f}秒前)")
|
||
return True
|
||
else:
|
||
print(f"⏳ 仍在等待音频完全播放完成(最后播放于{time_since_last_chunk:.1f}秒前)...")
|
||
return False
|
||
else:
|
||
print(f"⚠️ 播放器空闲但从未开始播放,可能播放失败")
|
||
return True
|
||
else:
|
||
print(f"⏳ 播放器仍在活跃状态,继续等待...")
|
||
return False
|
||
else:
|
||
print(f"⏳ 等待缓冲区数据播放完成 - 预缓冲: {len(self.preload_buffer)}, 播放缓冲: {len(self.playback_buffer)}")
|
||
return False
|
||
|
||
# 如果LLM还未完成,但其他条件满足,等待LLM完成
|
||
if not self.llm_generation_complete:
|
||
print(f"⏳ 等待LLM生成完成...")
|
||
return False
|
||
|
||
# 如果TTS还未完成,但LLM已完成,等待TTS完成
|
||
if not self.tts_generation_complete:
|
||
print(f"⏳ 等待TTS生成完成...")
|
||
return False
|
||
|
||
# 如果音频还未完全接收,等待接收完成
|
||
if not self.all_audio_received:
|
||
print(f"⏳ 等待所有音频接收完成...")
|
||
return False
|
||
|
||
return False
|
||
|
||
def _check_playback_completion(self):
|
||
"""检查播放完成状态 - 独立的播放完成检测方法"""
|
||
if not self.end_signal_received:
|
||
return
|
||
|
||
tts_queue_size = self.tts_task_queue.qsize()
|
||
playback_queue_size = len(self.playback_buffer) + len(self.preload_buffer)
|
||
|
||
print(f"🔍 播放完成检查: TTS队列={tts_queue_size}, 播放缓冲={len(self.playback_buffer)}, 预加载={len(self.preload_buffer)}, 正在播放={self.currently_playing}, 播放完成标志={self.playback_completed}")
|
||
|
||
# 检查条件1: 播放完成标志被设置
|
||
if self.playback_completed:
|
||
print(f"✅ 检测到播放完成标志,触发播放完成")
|
||
print(f"📥 调用 _finish_playback() 前,completion_sent={self.completion_sent}")
|
||
self._finish_playback()
|
||
print(f"📥 调用 _finish_playback() 后,completion_sent={self.completion_sent}")
|
||
# 重要:重置播放完成标志,防止重复触发
|
||
self.playback_completed = False
|
||
print(f"📥 已重置播放完成标志,防止重复触发")
|
||
return
|
||
|
||
# 检查条件2: 所有队列为空且没有在播放
|
||
if tts_queue_size == 0 and playback_queue_size == 0 and not self.currently_playing:
|
||
print(f"✅ 所有队列已清空且播放器空闲,触发播放完成")
|
||
print(f"📥 调用 _finish_playback() 前,completion_sent={self.completion_sent}")
|
||
self._finish_playback()
|
||
print(f"📥 调用 _finish_playback() 后,completion_sent={self.completion_sent}")
|
||
# 重要:重置结束信号标志,防止重复触发
|
||
self.end_signal_received = False
|
||
print(f"📥 已重置结束信号标志,防止重复触发")
|
||
return
|
||
|
||
# 检查条件3: 所有队列为空但播放器还在播放(最后一个音频块)
|
||
if tts_queue_size == 0 and playback_queue_size == 0 and self.currently_playing:
|
||
print(f"⏳ 等待最后一个音频块播放完成...")
|
||
time.sleep(0.3)
|
||
if not self.currently_playing:
|
||
print(f"✅ 最后一个音频块播放完成,触发播放完成")
|
||
print(f"📥 调用 _finish_playback() 前,completion_sent={self.completion_sent}")
|
||
self._finish_playback()
|
||
print(f"📥 调用 _finish_playback() 后,completion_sent={self.completion_sent}")
|
||
# 重要:重置结束信号标志,防止重复触发
|
||
self.end_signal_received = False
|
||
print(f"📥 已重置结束信号标志,防止重复触发")
|
||
return
|
||
else:
|
||
print(f"⚠️ 最后一个音频块播放超时")
|
||
# 即使超时也要重置标志,防止重复检测
|
||
self.end_signal_received = False
|
||
print(f"📥 已重置结束信号标志(超时情况)")
|
||
|
||
def _cleanup(self):
|
||
"""清理资源"""
|
||
print("🔊 开始清理输出进程资源...")
|
||
|
||
# 停止播放工作线程
|
||
self.playback_worker_running = False
|
||
if self.playback_worker_thread:
|
||
print("🔊 等待播放工作线程退出...")
|
||
self.playback_worker_thread.join(timeout=3.0)
|
||
|
||
# 停止 TTS 工作线程
|
||
self.tts_worker_running = False
|
||
if self.tts_worker_thread:
|
||
print("🔊 等待TTS工作线程退出...")
|
||
self.tts_task_queue.put(None) # 发送结束信号
|
||
self.tts_worker_thread.join(timeout=2.0)
|
||
|
||
# 清理主进程的输出流(如果存在)
|
||
if self.output_stream:
|
||
try:
|
||
self.output_stream.stop_stream()
|
||
self.output_stream.close()
|
||
print("🔊 主进程输出流已关闭")
|
||
except Exception as e:
|
||
print(f"⚠️ 关闭主进程输出流时出错: {e}")
|
||
|
||
# 清理PyAudio实例
|
||
if self.audio:
|
||
try:
|
||
self.audio.terminate()
|
||
print("🔊 PyAudio实例已终止")
|
||
except Exception as e:
|
||
print(f"⚠️ 终止PyAudio实例时出错: {e}")
|
||
|
||
print("🔊 输出进程资源清理完成")
|
||
|
||
def _start_tts_worker(self):
|
||
"""启动TTS工作线程"""
|
||
self.tts_worker_thread = threading.Thread(target=self._tts_worker, daemon=True)
|
||
self.tts_worker_thread.start()
|
||
self.logger.info("TTS工作线程已启动")
|
||
|
||
def _tts_worker(self):
|
||
"""TTS工作线程 - 处理TTS任务队列"""
|
||
while self.tts_worker_running:
|
||
try:
|
||
# 从队列获取任务
|
||
task = self.tts_task_queue.get(timeout=1.0)
|
||
if task is None: # 结束信号
|
||
break
|
||
|
||
task_type, content = task
|
||
if task_type == "tts_sentence":
|
||
# 生成音频数据并发送到统一播放队列
|
||
self._generate_tts_audio(content)
|
||
|
||
except queue.Empty:
|
||
continue
|
||
except Exception as e:
|
||
self.logger.error(f"TTS工作线程错误: {e}")
|
||
time.sleep(0.1)
|
||
|
||
def _add_tts_task(self, content):
|
||
"""添加TTS任务到队列"""
|
||
print(f"🔊 OutputProcess添加TTS任务到队列: '{content[:30]}...' (队列大小: {self.tts_task_queue.qsize()})")
|
||
try:
|
||
self.tts_task_queue.put_nowait(("tts_sentence", content))
|
||
print(f"✅ OutputProcess TTS任务添加成功,队列大小: {self.tts_task_queue.qsize()}")
|
||
return True
|
||
except queue.Full:
|
||
print(f"❌ OutputProcess TTS任务队列已满,丢弃任务")
|
||
self.logger.warning("TTS任务队列已满,丢弃任务")
|
||
return False
|
||
|
||
def _generate_tts_audio(self, text):
|
||
"""生成TTS音频数据并发送到统一播放队列 - 借鉴 recorder.py 的流式处理"""
|
||
try:
|
||
print(f"🔊 TTS开始生成音频,文本长度: {len(text)} 文本内容: {text[:50]}...")
|
||
self.logger.info(f"生成TTS音频: {text[:50]}...")
|
||
|
||
# 清空所有缓冲区,确保新的音频不被旧数据干扰
|
||
self.playback_buffer.clear()
|
||
self.preload_buffer.clear()
|
||
self.is_playing = False
|
||
self.completion_sent = False # 重置完成标记
|
||
|
||
# 构建请求头
|
||
headers = {
|
||
"X-Api-App-Id": self.tts_app_id,
|
||
"X-Api-Access-Key": self.tts_access_key,
|
||
"X-Api-Resource-Id": self.tts_resource_id,
|
||
"X-Api-App-Key": self.tts_app_key,
|
||
"Content-Type": "application/json",
|
||
"Connection": "keep-alive"
|
||
}
|
||
|
||
# 构建请求参数
|
||
payload = {
|
||
"user": {
|
||
"uid": "output_process_tts"
|
||
},
|
||
"req_params": {
|
||
"text": text,
|
||
"speaker": self.tts_speaker,
|
||
"audio_params": {
|
||
"format": "pcm",
|
||
"sample_rate": 16000,
|
||
"enable_timestamp": True
|
||
},
|
||
"additions": "{\"explicit_language\":\"zh\",\"disable_markdown_filter\":true, \"enable_timestamp\":true}\"}"
|
||
}
|
||
}
|
||
|
||
# 发送请求
|
||
session = requests.Session()
|
||
try:
|
||
response = session.post(self.tts_url, headers=headers, json=payload, stream=True)
|
||
|
||
if response.status_code != 200:
|
||
self.logger.error(f"TTS请求失败: {response.status_code}")
|
||
return False
|
||
|
||
# 处理流式响应 - 借鉴 recorder.py 的优化策略
|
||
total_audio_size = 0
|
||
chunk_count = 0
|
||
success_count = 0
|
||
|
||
self.logger.info("开始接收TTS音频流...")
|
||
|
||
for chunk in response.iter_lines(decode_unicode=True):
|
||
if not chunk:
|
||
continue
|
||
|
||
try:
|
||
data = json.loads(chunk)
|
||
|
||
if data.get("code", 0) == 0 and "data" in data and data["data"]:
|
||
chunk_audio = base64.b64decode(data["data"])
|
||
audio_size = len(chunk_audio)
|
||
total_audio_size += audio_size
|
||
chunk_count += 1
|
||
|
||
# 借鉴 recorder.py: 使用预加载缓冲区机制
|
||
try:
|
||
self.preload_buffer.append(chunk_audio)
|
||
success_count += 1
|
||
|
||
# 检查是否应该开始播放(借鉴 recorder.py 的预加载策略)
|
||
if (not self.is_playing and
|
||
len(self.preload_buffer) >= self.preload_size):
|
||
# 将预加载的数据移到播放缓冲区
|
||
self.playback_buffer.extend(self.preload_buffer)
|
||
self.preload_buffer.clear()
|
||
self.is_playing = True
|
||
self.last_playback_time = time.time()
|
||
self.logger.info("开始播放TTS音频(预加载完成)")
|
||
|
||
# 减少进度显示频率
|
||
if chunk_count % 20 == 0: # 减少显示频率
|
||
progress = (f"生成音频: {chunk_count} 块 | 成功: {success_count} | "
|
||
f"{total_audio_size / 1024:.1f} KB | "
|
||
f"预加载: {len(self.preload_buffer)} | "
|
||
f"播放缓冲: {len(self.playback_buffer)}")
|
||
self.logger.info(progress)
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"添加音频到预加载缓冲区失败: {e}")
|
||
continue
|
||
|
||
continue
|
||
|
||
if data.get("code", 0) == 20000000:
|
||
break
|
||
|
||
if data.get("code", 0) > 0:
|
||
self.logger.error(f"TTS错误响应: {data}")
|
||
break
|
||
|
||
except json.JSONDecodeError:
|
||
continue
|
||
|
||
# 处理剩余的预加载数据
|
||
print(f"🔊 TTS生成结束,检查剩余预加载数据: preload_buffer={len(self.preload_buffer)} 块")
|
||
if self.preload_buffer:
|
||
print(f"🔊 将剩余的 {len(self.preload_buffer)} 个音频块转移到播放缓冲区")
|
||
self.playback_buffer.extend(self.preload_buffer)
|
||
self.preload_buffer.clear()
|
||
if not self.is_playing:
|
||
self.is_playing = True
|
||
self.last_playback_time = time.time()
|
||
self.logger.info("开始播放TTS音频(处理剩余预加载)")
|
||
else:
|
||
print(f"⚠️ TTS生成完成但预加载缓冲区为空!")
|
||
|
||
success_rate = (success_count / chunk_count * 100) if chunk_count > 0 else 0
|
||
print(f"🔊 TTS音频生成完成统计: chunk_count={chunk_count}, success_count={success_count}, success_rate={success_rate:.1f}%, total_size={total_audio_size / 1024:.1f} KB")
|
||
self.logger.info(f"TTS音频生成完成: {chunk_count} 块, 成功率 {success_rate:.1f}% | 总大小: {total_audio_size / 1024:.1f} KB")
|
||
|
||
# 通知自己TTS生成已完成
|
||
self.tts_generation_complete = True
|
||
print(f"🎵 OutputProcess TTS生成已完成")
|
||
|
||
# 注意:不在这里直接调用等待播放完成,让统一的增强播放完成检测机制处理
|
||
# 这样可以避免在TTS还在生成后续音频时就误判播放完成
|
||
self.logger.info("TTS生成完成,等待统一播放完成检测机制处理...")
|
||
|
||
return success_count > 0
|
||
|
||
finally:
|
||
response.close()
|
||
session.close()
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"TTS音频生成失败: {e}")
|
||
return False
|
||
|
||
# ========== 智能句子缓冲系统 - 从 recorder.py 借鉴 ==========
|
||
|
||
def _should_trigger_tts(self, sentence):
|
||
"""智能判断是否应该触发TTS - 借鉴 recorder.py 的策略"""
|
||
current_time = time.time()
|
||
|
||
# 检查缓冲区大小
|
||
if len(self.tts_buffer) >= self.tts_buffer_max_size:
|
||
return True
|
||
|
||
# 检查时间窗口
|
||
time_since_last = current_time - self.tts_last_trigger_time
|
||
if time_since_last >= self.tts_accumulation_time and len(self.tts_buffer) >= self.tts_buffer_min_size:
|
||
return True
|
||
|
||
# 检查是否为完整句子(使用严格的检测)
|
||
if self._is_complete_sentence(sentence):
|
||
return True
|
||
|
||
# 检查句子特征 - 长句子优先(50字符以上)
|
||
if len(sentence) > 50: # 超过50字符的句子立即触发
|
||
return True
|
||
|
||
# 中等长度句子(30-50字符)如果有结束标点也触发
|
||
if len(sentence) > 30:
|
||
end_punctuations = ['。', '!', '?', '.', '!', '?']
|
||
if any(sentence.strip().endswith(p) for p in end_punctuations):
|
||
return True
|
||
|
||
# 短句子只在缓冲区较多或时间窗口到期时触发
|
||
return False
|
||
|
||
def _process_tts_buffer(self):
|
||
"""处理TTS缓冲区 - 发送累积的句子到TTS"""
|
||
print(f"🔊 处理TTS缓冲区,当前缓冲区内容: {self.tts_buffer}")
|
||
if not self.tts_buffer:
|
||
print(f"🔊 TTS缓冲区为空,跳过处理")
|
||
return
|
||
|
||
# 合并缓冲区的句子
|
||
combined_text = ''.join(self.tts_buffer)
|
||
print(f"🔊 合并后的文本: '{combined_text}' (长度: {len(combined_text)})")
|
||
|
||
# 添加到TTS任务队列
|
||
print(f"🔊 尝试添加TTS任务到队列")
|
||
if self._add_tts_task(combined_text):
|
||
print(f"🎵 触发TTS: {combined_text[:50]}...")
|
||
self.tts_last_trigger_time = time.time()
|
||
else:
|
||
print(f"❌ 添加TTS任务失败")
|
||
|
||
# 清空缓冲区
|
||
self.tts_buffer.clear()
|
||
print(f"🔊 TTS缓冲区已清空")
|
||
|
||
def _add_sentence_to_buffer(self, sentence):
|
||
"""添加句子到智能缓冲区 - 核心方法"""
|
||
print(f"🔊 添加句子到TTS缓冲区: '{sentence}' (缓冲区大小: {len(self.tts_buffer)} -> {len(self.tts_buffer)+1})")
|
||
if not sentence.strip():
|
||
print(f"🔊 句子为空,不添加到缓冲区")
|
||
return
|
||
|
||
self.tts_buffer.append(sentence)
|
||
print(f"🔊 已添加到TTS缓冲区,当前缓冲区: {self.tts_buffer}")
|
||
|
||
# 检查是否应该触发TTS
|
||
should_trigger = self._should_trigger_tts(sentence)
|
||
print(f"🔊 是否应该触发TTS: {should_trigger}")
|
||
if should_trigger:
|
||
print(f"🔊 触发TTS缓冲区处理")
|
||
self._process_tts_buffer()
|
||
|
||
def _flush_tts_buffer(self):
|
||
"""强制刷新TTS缓冲区 - 确保所有内容都被处理"""
|
||
if self.tts_buffer:
|
||
self._process_tts_buffer()
|
||
|
||
def _is_complete_sentence(self, text):
|
||
"""检测是否为完整句子 - 从 recorder.py 完全移植"""
|
||
import re
|
||
|
||
if not text or len(text.strip()) == 0:
|
||
return False
|
||
|
||
# 增加最小长度要求 - 至少10个字符才考虑作为完整句子
|
||
if len(text.strip()) < 10:
|
||
return False
|
||
|
||
# 句子结束标点符号
|
||
sentence_endings = r'[。!?.!?]'
|
||
|
||
# 检查是否以句子结束符结尾
|
||
if re.search(sentence_endings + r'\s*$', text):
|
||
# 对于以结束符结尾的句子,要求至少15个字符
|
||
if len(text.strip()) >= 15:
|
||
return True
|
||
|
||
# 检查是否包含句子结束符(可能在句子中间)
|
||
if re.search(sentence_endings, text):
|
||
# 如果后面有其他标点或字符,可能不是完整句子
|
||
remaining_text = re.split(sentence_endings, text, 1)[-1]
|
||
if len(remaining_text.strip()) > 0:
|
||
return False
|
||
# 对于包含结束符的句子,要求至少20个字符
|
||
if len(text.strip()) >= 20:
|
||
return True
|
||
|
||
# 对于较长的文本(超过50字符),即使没有结束符也可以考虑
|
||
if len(text.strip()) >= 50:
|
||
return True
|
||
|
||
# 对于中等长度的文本,如果包含常见完整句式模式
|
||
if len(text.strip()) >= 20:
|
||
common_patterns = [
|
||
r'^[是的有没有来去在把被让叫请使].*[的得了吗呢吧啊呀]',
|
||
r'^(你好|谢谢|再见|是的|不是|好的|没问题)',
|
||
r'^[\u4e00-\u9fff]{4,8}[的得了]$' # 4-8个中文字+的/了/得
|
||
]
|
||
|
||
for pattern in common_patterns:
|
||
if re.match(pattern, text):
|
||
return True
|
||
|
||
return False
|
||
|
||
def _filter_parentheses_content(self, text):
|
||
"""过滤文本中的括号内容(包括中文和英文括号)- 从 recorder.py 移植"""
|
||
import re
|
||
|
||
# 移除中文括号内容:(内容)
|
||
filtered_text = re.sub(r'([^)]*)', '', text)
|
||
# 移除英文括号内容:(content)
|
||
filtered_text = re.sub(r'\([^)]*\)', '', filtered_text)
|
||
# 移除方括号内容:【内容】
|
||
filtered_text = re.sub(r'【[^】]*】', '', filtered_text)
|
||
# 移除方括号内容:[content]
|
||
filtered_text = re.sub(r'\[[^\]]*\]', '', filtered_text)
|
||
# 移除书名号内容:「内容」
|
||
filtered_text = re.sub(r'「[^」]*」', '', filtered_text)
|
||
|
||
# 清理多余空格
|
||
filtered_text = re.sub(r'\s+', ' ', filtered_text).strip()
|
||
|
||
return filtered_text
|
||
|
||
def process_streaming_text(self, text):
|
||
"""处理流式文本 - 新增的公共接口,用于与LLM流式输出集成"""
|
||
print(f"🔊 收到流式文本: '{text}' (长度: {len(text)})")
|
||
if not text.strip():
|
||
print(f"🔊 流式文本为空,跳过处理")
|
||
return
|
||
|
||
# 过滤括号内容
|
||
filtered_text = self._filter_parentheses_content(text.strip())
|
||
print(f"🔊 过滤后的文本: '{filtered_text}' (长度: {len(filtered_text)})")
|
||
|
||
if filtered_text:
|
||
# 使用智能句子缓冲系统
|
||
print(f"🔊 添加文本到智能缓冲区")
|
||
self._add_sentence_to_buffer(filtered_text)
|
||
else:
|
||
print(f"🔊 过滤后文本为空,不添加到缓冲区")
|
||
|
||
def process_complete_text(self, text):
|
||
"""处理完整文本 - 强制刷新缓冲区"""
|
||
if not text.strip():
|
||
return
|
||
|
||
# 过滤括号内容
|
||
filtered_text = self._filter_parentheses_content(text.strip())
|
||
|
||
if filtered_text:
|
||
# 直接添加到缓冲区并强制处理
|
||
self.tts_buffer.append(filtered_text)
|
||
self._process_tts_buffer()
|
||
|
||
# ========== 原有方法保持不变 ==========
|
||
|
||
def process_tts_request(self, text):
|
||
"""处理TTS请求的公共接口 - 兼容原有接口"""
|
||
# 使用新的智能缓冲系统
|
||
self.process_complete_text(text)
|
||
return True
|
||
|
||
if __name__ == "__main__":
|
||
# 测试代码
|
||
print("音频进程模块测试")
|
||
print("这个模块应该在多进程环境中运行") |