回声待处理

2025-09-21 03:00:11 +08:00 · 2025-09-21 03:00:11 +08:00 · 9523176d60
commit 9523176d60
parent aed69e9c54
9 changed files with 2087 additions and 499 deletions
--- a/AUDIO_PROCESSES_IMPROVEMENTS.md
+++ b/AUDIO_PROCESSES_IMPROVEMENTS.md
@ -0,0 +1,127 @@
+# Audio Processes 改进总结
+
+## 问题背景
+- 原始问题：TTS音频只播放3个字符就停止，出现ALSA underrun错误
+- 根本原因：音频缓冲区管理不当，播放策略过于保守
+
+## 改进内容
+
+### 1. 音频播放优化 (_play_audio 方法)
+- **改进前**：保守的播放策略，需要缓冲区有足够数据才开始播放
+- **改进后**：
+  - 借鉴 recorder.py 的播放策略：只要有数据就播放
+  - 添加错误恢复机制，自动检测和恢复 ALSA underrun
+  - 优化缓冲区管理，减少延迟
+
+### 2. TTS 工作线程模式
+- **参考**: recorder.py 的 TTS 工作线程实现
+- **实现功能**：
+  - 独立的 TTS 工作线程处理音频生成
+  - 任务队列管理，避免阻塞主线程
+  - 统一的 TTS 请求接口 `process_tts_request()`
+  - 支持流式音频处理
+
+### 3. 统一的音频播放队列
+- **InputProcess 和 OutputProcess 都支持**：
+  - TTS 工作线程
+  - 音频生成和播放队列
+  - 统一的错误处理和日志记录
+
+### 4. 关键改进点
+
+#### 音频播放策略
+```python
+# 改进前：保守策略
+if len(self.playback_buffer) > 2:  # 需要缓冲区有足够数据
+    # 开始播放
+
+# 改进后：积极策略 + 错误恢复
+audio_chunk = self.playback_buffer.pop(0)
+if audio_chunk and len(audio_chunk) > 0:
+    try:
+        self.output_stream.write(audio_chunk)
+        # 统计信息
+    except Exception as e:
+        # ALSA underrun 错误恢复
+        if "underrun" in str(e).lower():
+            # 自动恢复音频流
+```
+
+#### TTS 工作线程
+```python
+def _tts_worker(self):
+    """TTS工作线程 - 处理TTS任务队列"""
+    while self.tts_worker_running:
+        try:
+            task = self.tts_task_queue.get(timeout=1.0)
+            if task is None:
+                break
+            
+            task_type, content = task
+            if task_type == "tts_sentence":
+                self._generate_tts_audio(content)
+            
+            self.tts_task_queue.task_done()
+            
+        except queue.Empty:
+            continue
+        except Exception as e:
+            self.logger.error(f"TTS工作线程错误: {e}")
+```
+
+#### 错误恢复机制
+```python
+# ALSA underrun 检测和恢复
+if "underrun" in str(e).lower() or "alsa" in str(e).lower():
+    self.logger.info("检测到ALSA underrun，尝试恢复音频流")
+    try:
+        if self.output_stream:
+            self.output_stream.stop_stream()
+            time.sleep(0.1)
+            self.output_stream.start_stream()
+            self.logger.info("音频流已恢复")
+    except Exception as recovery_e:
+        self.logger.error(f"恢复音频流失败: {recovery_e}")
+        self.playback_buffer.clear()
+```
+
+### 5. 性能优化
+- 减少日志输出频率，提高性能
+- 优化队列处理策略，使用适当的超时设置
+- 动态调整休眠时间，根据播放状态优化CPU使用
+
+### 6. 测试和验证
+- 创建了测试脚本 `test_audio_processes.py`
+- 验证了语法正确性
+- 可以测试 TTS 功能的完整性
+
+## 使用方法
+
+### 在控制系统中使用
+```python
+from audio_processes import InputProcess, OutputProcess
+
+# 创建输入和输出进程
+input_process = InputProcess(command_queue, event_queue)
+output_process = OutputProcess(audio_queue)
+
+# 处理TTS请求
+output_process.process_tts_request("你好，这是测试语音")
+```
+
+### 独立测试
+```bash
+python test_audio_processes.py
+```
+
+## 预期效果
+- 解决 ALSA underrun 错误
+- 提高音频播放的流畅性
+- 减少 TTS 处理的延迟
+- 提供更稳定的音频处理能力
+
+## 注意事项
+1. 确保系统安装了必要的依赖：`requests`, `pyaudio`
+2. 检查音频设备是否正常工作
+3. 网络连接正常（用于TTS服务）
+4. 适当调整音频参数以适应不同环境
--- a/asr_diagnostic.py
+++ b/asr_diagnostic.py
@ -0,0 +1,287 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+语音识别诊断工具
+用于测试和诊断语音识别功能的具体问题
+"""
+
+import asyncio
+import json
+import gzip
+import uuid
+import numpy as np
+import wave
+import os
+from typing import Optional
+
+class ASRDiagnostic:
+    """ASR诊断工具"""
+    
+    def __init__(self):
+        self.api_config = {
+            'asr': {
+                'appid': "8718217928",
+                'cluster': "volcano_tts",
+                'token': "ynJMX-5ix1FsJvswC9KTNlGUdubcchqc",
+                'ws_url': "wss://openspeech.bytedance.com/api/v2/asr"
+            }
+        }
+        
+    def generate_asr_header(self, message_type=1, message_type_specific_flags=0):
+        """生成ASR头部"""
+        PROTOCOL_VERSION = 0b0001
+        DEFAULT_HEADER_SIZE = 0b0001
+        JSON = 0b0001
+        GZIP = 0b0001
+        
+        header = bytearray()
+        header.append((PROTOCOL_VERSION << 4) | DEFAULT_HEADER_SIZE)
+        header.append((message_type << 4) | message_type_specific_flags)
+        header.append((JSON << 4) | GZIP)
+        header.append(0x00)  # reserved
+        return header
+    
+    def parse_asr_response(self, res):
+        """解析ASR响应"""
+        print(f"🔍 解析响应，原始大小: {len(res)} 字节")
+        
+        if len(res) < 8:
+            print(f"❌ 响应太短，无法解析")
+            return {}
+        
+        try:
+            message_type = res[1] >> 4
+            payload_size = int.from_bytes(res[4:8], "big", signed=False)
+            payload_msg = res[8:8+payload_size]
+            
+            print(f"📋 消息类型: {message_type}, 载荷大小: {payload_size}")
+            
+            if message_type == 0b1001:  # SERVER_FULL_RESPONSE
+                try:
+                    if payload_msg.startswith(b'{'):
+                        result = json.loads(payload_msg.decode('utf-8'))
+                        print(f"✅ 成功解析JSON响应")
+                        return result
+                    else:
+                        print(f"❌ 响应不是JSON格式")
+                except Exception as e:
+                    print(f"❌ JSON解析失败: {e}")
+            
+        except Exception as e:
+            print(f"❌ 响应解析异常: {e}")
+        
+        return {}
+    
+    async def test_asr_with_audio_file(self, audio_file_path: str):
+        """使用音频文件测试ASR"""
+        print(f"🎵 测试ASR - 音频文件: {audio_file_path}")
+        
+        if not os.path.exists(audio_file_path):
+            print(f"❌ 音频文件不存在: {audio_file_path}")
+            return
+        
+        try:
+            # 读取音频文件
+            with wave.open(audio_file_path, 'rb') as wf:
+                channels = wf.getnchannels()
+                width = wf.getsampwidth()
+                rate = wf.getframerate()
+                frames = wf.readframes(wf.getnframes())
+                
+            print(f"📊 音频信息: 采样率={rate}Hz, 声道={channels}, 位深={width*8}bits")
+            print(f"📊 音频大小: {len(frames)} 字节")
+            
+            # 如果是立体声，转换为单声道
+            if channels > 1:
+                audio_array = np.frombuffer(frames, dtype=np.int16)
+                audio_array = audio_array.reshape(-1, channels)
+                audio_array = np.mean(audio_array, axis=1).astype(np.int16)
+                frames = audio_array.tobytes()
+                print(f"🔄 已转换为单声道")
+            
+            return await self._test_asr_connection(frames)
+            
+        except Exception as e:
+            print(f"❌ 音频文件处理失败: {e}")
+            return None
+    
+    async def test_asr_with_silence(self):
+        """测试静音音频"""
+        print(f"🔇 测试ASR - 静音音频")
+        
+        # 生成3秒的静音音频 (16kHz, 16bit, 单声道)
+        duration = 3  # 秒
+        sample_rate = 16000
+        silence_data = bytes(duration * sample_rate * 2)  # 2 bytes per sample
+        
+        return await self._test_asr_connection(silence_data)
+    
+    async def test_asr_with_noise(self):
+        """测试噪音音频"""
+        print(f"📢 测试ASR - 噪音音频")
+        
+        # 生成3秒的随机噪音
+        duration = 3  # 秒
+        sample_rate = 16000
+        noise_data = np.random.randint(-32768, 32767, duration * sample_rate, dtype=np.int16)
+        noise_data = noise_data.tobytes()
+        
+        return await self._test_asr_connection(noise_data)
+    
+    async def _test_asr_connection(self, audio_data: bytes):
+        """测试ASR连接"""
+        try:
+            import websockets
+            
+            # 构建请求参数
+            reqid = str(uuid.uuid4())
+            request_params = {
+                'app': {
+                    'appid': self.api_config['asr']['appid'],
+                    'cluster': self.api_config['asr']['cluster'],
+                    'token': self.api_config['asr']['token'],
+                },
+                'user': {
+                    'uid': 'asr_diagnostic'
+                },
+                'request': {
+                    'reqid': reqid,
+                    'nbest': 1,
+                    'workflow': 'audio_in,resample,partition,vad,fe,decode,itn,nlu_punctuate',
+                    'show_language': False,
+                    'show_utterances': False,
+                    'result_type': 'full',
+                    "sequence": 1
+                },
+                'audio': {
+                    'format': 'wav',
+                    'rate': 16000,
+                    'language': 'zh-CN',
+                    'bits': 16,
+                    'channel': 1,
+                    'codec': 'raw'
+                }
+            }
+            
+            print(f"📋 ASR请求参数:")
+            print(f"   - AppID: {request_params['app']['appid']}")
+            print(f"   - Cluster: {request_params['app']['cluster']}")
+            print(f"   - Token: {request_params['app']['token'][:20]}...")
+            print(f"   - RequestID: {reqid}")
+            
+            # 构建请求
+            payload_bytes = str.encode(json.dumps(request_params))
+            payload_bytes = gzip.compress(payload_bytes)
+            full_client_request = bytearray(self.generate_asr_header())
+            full_client_request.extend((len(payload_bytes)).to_bytes(4, 'big'))
+            full_client_request.extend(payload_bytes)
+            
+            # 设置认证头
+            additional_headers = {'Authorization': 'Bearer; {}'.format(self.api_config['asr']['token'])}
+            
+            print(f"📡 连接WebSocket...")
+            
+            # 连接WebSocket
+            async with websockets.connect(
+                self.api_config['asr']['ws_url'], 
+                additional_headers=additional_headers,
+                max_size=1000000000
+            ) as ws:
+                print(f"✅ WebSocket连接成功")
+                
+                # 发送请求
+                print(f"📤 发送ASR配置请求...")
+                await ws.send(full_client_request)
+                res = await ws.recv()
+                result = self.parse_asr_response(res)
+                print(f"📥 配置响应: {result}")
+                
+                # 发送音频数据
+                chunk_size = int(1 * 2 * 16000 * 15000 / 1000)  # 1秒 chunks
+                total_chunks = 0
+                
+                for offset in range(0, len(audio_data), chunk_size):
+                    chunk = audio_data[offset:offset + chunk_size]
+                    last = (offset + chunk_size) >= len(audio_data)
+                    
+                    payload_bytes = gzip.compress(chunk)
+                    audio_only_request = bytearray(
+                        self.generate_asr_header(
+                            message_type=0b0010, 
+                            message_type_specific_flags=0b0010 if last else 0
+                        )
+                    )
+                    audio_only_request.extend((len(payload_bytes)).to_bytes(4, 'big'))
+                    audio_only_request.extend(payload_bytes)
+                    
+                    await ws.send(audio_only_request)
+                    res = await ws.recv()
+                    result = self.parse_asr_response(res)
+                    total_chunks += 1
+                    
+                    if last:
+                        print(f"📨 发送最后一块音频数据 (总计{total_chunks}块)")
+                
+                # 获取最终结果
+                print(f"🎯 等待最终识别结果...")
+                if 'payload_msg' in result and 'result' in result['payload_msg']:
+                    results = result['payload_msg']['result']
+                    print(f"📝 ASR返回结果数量: {len(results)}")
+                    if results:
+                        text = results[0].get('text', '识别失败')
+                        print(f"✅ 识别结果: {text}")
+                        return text
+                    else:
+                        print(f"❌ ASR结果为空")
+                else:
+                    print(f"❌ ASR响应格式异常: {result.keys()}")
+                    print(f"📋 完整响应: {result}")
+                
+                return None
+                
+        except Exception as e:
+            print(f"❌ ASR连接异常: {e}")
+            import traceback
+            print(f"❌ 详细错误:\n{traceback.format_exc()}")
+            return None
+    
+    async def run_diagnostic(self):
+        """运行完整诊断"""
+        print("🔧 ASR诊断工具")
+        print("=" * 50)
+        
+        # 1. 测试静音
+        print("\n1️⃣ 测试静音识别...")
+        await self.test_asr_with_silence()
+        
+        # 2. 测试噪音
+        print("\n2️⃣ 测试噪音识别...")
+        await self.test_asr_with_noise()
+        
+        # 3. 测试录音文件（如果存在）
+        recording_files = [f for f in os.listdir('.') if f.startswith('recording_') and f.endswith('.wav')]
+        if recording_files:
+            print(f"\n3️⃣ 测试录音文件...")
+            for file in recording_files[:3]:  # 最多测试3个文件
+                await self.test_asr_with_audio_file(file)
+        else:
+            print(f"\n3️⃣ 跳过录音文件测试 (无录音文件)")
+        
+        print(f"\n✅ 诊断完成")
+
+
+def main():
+    """主函数"""
+    diagnostic = ASRDiagnostic()
+    
+    try:
+        asyncio.run(diagnostic.run_diagnostic())
+    except KeyboardInterrupt:
+        print(f"\n🛑 诊断被用户中断")
+    except Exception as e:
+        print(f"❌ 诊断工具异常: {e}")
+
+
+if __name__ == "__main__":
+    main()
--- a/audio_processes.py
+++ b/audio_processes.py
--- a/control_system.py
+++ b/control_system.py
@ -38,6 +38,7 @@ class ControlSystem:
        self.input_command_queue = mp.Queue(maxsize=100)    # 主进程 → 输入进程
        self.input_event_queue = mp.Queue(maxsize=100)      # 输入进程 → 主进程
        self.output_audio_queue = mp.Queue(maxsize=1000)    # 主进程 → 输出进程
+        self.output_event_queue = mp.Queue(maxsize=100)      # 输出进程 → 主进程
        
        # 进程
        self.input_process = None
@ -214,7 +215,8 @@ class ControlSystem:
        self.output_process = mp.Process(
            target=OutputProcess(
                self.output_audio_queue,
-                output_config
+                output_config,
+                self.output_event_queue  # 传递事件队列
            ).run
        )
        
@ -286,13 +288,8 @@ class ControlSystem:
    
    def _handle_playing_state(self):
        """处理播放状态"""
-        # 检查播放是否完成
-        if self.output_audio_queue.qsize() == 0 and not self.playback_complete:
-            # 等待一小段时间确保播放完成
-            time.sleep(0.5)
-            if self.output_audio_queue.qsize() == 0:
-                self.playback_complete = True
-                self.stats['total_conversations'] += 1
+        # 现在主要由输出进程的播放完成事件驱动
+        pass
    
    def _check_events(self):
        """检查进程事件"""
@ -307,6 +304,18 @@ class ControlSystem:
                    
        except queue.Empty:
            pass
+        
+        # 检查输出进程事件
+        try:
+            while True:
+                event = self.output_event_queue.get_nowait()
+                
+                if event.event_type == 'playback_complete':
+                    print("📡 主控制：收到播放完成事件")
+                    self._handle_playback_complete(event)
+                    
+        except queue.Empty:
+            pass
    
    def _handle_recording_complete(self, event: ProcessEvent):
        """处理录音完成事件"""
@ -327,6 +336,21 @@ class ControlSystem:
        
        print(f"🎯 状态：RECORDING → PROCESSING (时长: {event.metadata['duration']:.2f}s)")
    
+    def _handle_playback_complete(self, event: ProcessEvent):
+        """处理播放完成事件"""
+        # 标记播放完成
+        self.playback_complete = True
+        
+        # 更新统计
+        self.stats['total_conversations'] += 1
+        
+        # 切换到空闲状态
+        self.state = RecordingState.IDLE
+        print(f"🎯 状态：PLAYING → IDLE")
+        
+        # 重新启用输入进程录音功能
+        self.input_command_queue.put(ControlCommand('enable_recording'))
+    
    def _process_audio_pipeline(self):
        """处理音频流水线：STT + LLM + TTS"""
        try:
@ -390,9 +414,17 @@ class ControlSystem:
    def _speech_to_text(self, audio_data: bytes) -> Optional[str]:
        """语音转文字"""
        try:
-            return asyncio.run(self._recognize_audio_async(audio_data))
+            print(f"🔍 开始语音识别，音频大小: {len(audio_data)} 字节")
+            result = asyncio.run(self._recognize_audio_async(audio_data))
+            if result:
+                print(f"✅ 语音识别成功: {result}")
+            else:
+                print(f"❌ 语音识别返回空结果")
+            return result
        except Exception as e:
            print(f"❌ 语音识别异常: {e}")
+            import traceback
+            print(f"❌ 详细错误信息:\n{traceback.format_exc()}")
            return None
    
    async def _recognize_audio_async(self, audio_data: bytes) -> Optional[str]:
@ -401,7 +433,57 @@ class ControlSystem:
            return "语音识别功能已禁用"
        
        try:
+            # 验证音频数据
+            print(f"🎵 音频数据验证:")
+            print(f"   - 大小: {len(audio_data)} 字节")
+            print(f"   - 是否为空: {len(audio_data) == 0}")
+            
+            if len(audio_data) == 0:
+                print("❌ 音频数据为空")
+                return None
+            
+            # 检查是否有WAV头部
+            has_wav_header = audio_data.startswith(b'RIFF')
+            print(f"   - 有WAV头部: {has_wav_header}")
+            
+            if has_wav_header:
+                # 解析WAV头部
+                print(f"   - WAV格式，可能需要提取PCM数据")
+                riff_size = int.from_bytes(audio_data[4:8], 'little')
+                wave_fmt = audio_data[8:12]
+                if wave_fmt == b'WAVE':
+                    print(f"   - WAVE格式正确")
+                    # 查找fmt块
+                    fmt_pos = audio_data.find(b'fmt ')
+                    if fmt_pos > 0:
+                        fmt_size = int.from_bytes(audio_data[fmt_pos+4:fmt_pos+8], 'little')
+                        audio_format = int.from_bytes(audio_data[fmt_pos+8:fmt_pos+10], 'little')
+                        channels = int.from_bytes(audio_data[fmt_pos+10:fmt_pos+12], 'little')
+                        sample_rate = int.from_bytes(audio_data[fmt_pos+12:fmt_pos+16], 'little')
+                        print(f"   - 音频格式: {audio_format}")
+                        print(f"   - 声道数: {channels}")
+                        print(f"   - 采样率: {sample_rate}")
+            else:
+                print(f"   - 纯PCM数据")
+            
+            # 检查音频数据格式（假设是16位PCM）
+            if len(audio_data) % 2 != 0:
+                print(f"⚠️ 音频数据长度不是2的倍数: {len(audio_data)}")
+            
+            # 计算音频时长
+            sample_rate = self.config['audio']['sample_rate']
+            channels = self.config['audio']['channels']
+            bytes_per_second = sample_rate * channels * 2  # 16位 = 2字节
+            duration = len(audio_data) / bytes_per_second
+            print(f"   - 配置采样率: {sample_rate} Hz")
+            print(f"   - 配置声道数: {channels}")
+            print(f"   - 估算时长: {duration:.2f} 秒")
+            
+            if duration < 0.5:
+                print(f"⚠️ 音频时长过短: {duration:.2f} 秒")
+        
            import websockets
+            print(f"🔗 连接WebSocket ASR服务: {self.api_config['asr']['ws_url']}")
            
            # 生成ASR头部
            def generate_asr_header(message_type=1, message_type_specific_flags=0):
@ -417,25 +499,56 @@ class ControlSystem:
                header.append(0x00)  # reserved
                return header
            
-            # 解析ASR响应
+            # 解析ASR响应 - 基于recorder.py的工作实现
            def parse_asr_response(res):
-                # 简化的响应解析
-                if len(res) < 8:
-                    return {}
-                
+                """解析ASR响应"""
+                PROTOCOL_VERSION = res[0] >> 4
+                header_size = res[0] & 0x0f
                message_type = res[1] >> 4
-                payload_size = int.from_bytes(res[4:8], "big", signed=False)
-                payload_msg = res[8:8+payload_size]
+                message_type_specific_flags = res[1] & 0x0f
+                serialization_method = res[2] >> 4
+                message_compression = res[2] & 0x0f
+                reserved = res[3]
+                header_extensions = res[4:header_size * 4]
+                payload = res[header_size * 4:]
+                result = {}
+                payload_msg = None
+                payload_size = 0
+                
+                print(f"🔍 响应头信息: message_type={message_type}, compression={message_compression}, serialization={serialization_method}")
                
                if message_type == 0b1001:  # SERVER_FULL_RESPONSE
-                    try:
-                        if payload_msg.startswith(b'{'):
-                            result = json.loads(payload_msg.decode('utf-8'))
-                            return result
-                    except:
-                        pass
+                    payload_size = int.from_bytes(payload[:4], "big", signed=True)
+                    payload_msg = payload[4:]
+                    print(f"📦 Full响应: payload_size={payload_size}")
+                elif message_type == 0b1011:  # SERVER_ACK
+                    seq = int.from_bytes(payload[:4], "big", signed=True)
+                    result['seq'] = seq
+                    if len(payload) >= 8:
+                        payload_size = int.from_bytes(payload[4:8], "big", signed=False)
+                        payload_msg = payload[8:]
+                    print(f"📦 ACK响应: seq={seq}, payload_size={payload_size}")
+                elif message_type == 0b1111:  # SERVER_ERROR_RESPONSE
+                    code = int.from_bytes(payload[:4], "big", signed=False)
+                    result['code'] = code
+                    payload_size = int.from_bytes(payload[4:8], "big", signed=False)
+                    payload_msg = payload[8:]
+                    print(f"❌ 错误响应: code={code}")
                
-                return {}
+                if payload_msg is None:
+                    return result
+                
+                if message_compression == 0b0001:  # GZIP
+                    payload_msg = gzip.decompress(payload_msg)
+                    print(f"📦 解压后大小: {len(payload_msg)} 字节")
+                
+                if serialization_method == 0b0001:  # JSON
+                    payload_msg = json.loads(str(payload_msg, "utf-8"))
+                    print(f"📋 解析后的JSON: {json.dumps(payload_msg, indent=2, ensure_ascii=False)}")
+                
+                result['payload_msg'] = payload_msg
+                result['payload_size'] = payload_size
+                return result
            
            # 构建请求参数
            reqid = str(uuid.uuid4())
@ -458,7 +571,7 @@ class ControlSystem:
                    "sequence": 1
                },
                'audio': {
-                    'format': 'wav',
+                    'format': 'pcm',
                    'rate': self.config['audio']['sample_rate'],
                    'language': 'zh-CN',
                    'bits': 16,
@ -468,6 +581,14 @@ class ControlSystem:
            }
            
            # 构建请求
+            print(f"📋 ASR请求参数:")
+            print(f"   - audio.format: {request_params['audio']['format']}")
+            print(f"   - audio.rate: {request_params['audio']['rate']}")
+            print(f"   - audio.channel: {request_params['audio']['channel']}")
+            print(f"   - audio.bits: {request_params['audio']['bits']}")
+            print(f"   - audio.codec: {request_params['audio']['codec']}")
+            print(f"   - request.workflow: {request_params['request']['workflow']}")
+            
            payload_bytes = str.encode(json.dumps(request_params))
            payload_bytes = gzip.compress(payload_bytes)
            full_client_request = bytearray(generate_asr_header())
@ -478,43 +599,136 @@ class ControlSystem:
            additional_headers = {'Authorization': 'Bearer; {}'.format(self.api_config['asr']['token'])}
            
            # 连接WebSocket
+            print(f"📡 尝试连接WebSocket...")
+            print(f"🔗 WebSocket URL: {self.api_config['asr']['ws_url']}")
+            print(f"📋 Headers: {additional_headers}")
            async with websockets.connect(
                self.api_config['asr']['ws_url'], 
                additional_headers=additional_headers,
-                max_size=1000000000
+                max_size=1000000000,
+                ping_interval=20,
+                ping_timeout=60
            ) as ws:
+                print(f"✅ WebSocket连接成功")
+                
                # 发送请求
+                print(f"📤 发送ASR请求...")
+                print(f"📦 请求大小: {len(full_client_request)} 字节")
                await ws.send(full_client_request)
                res = await ws.recv()
+                print(f"📥 收到ASR响应，大小: {len(res)} 字节")
                result = parse_asr_response(res)
+                print(f"🔍 解析ASR响应: {result}")
                
-                # 发送音频数据
+                # 发送音频数据 - 基于recorder.py实现
                chunk_size = int(self.config['audio']['channels'] * 2 * 
                                self.config['audio']['sample_rate'] * 15000 / 1000)
                
+                print(f"🎵 开始发送音频数据:")
+                print(f"   - 总大小: {len(audio_data)} 字节")
+                print(f"   - 分块大小: {chunk_size} 字节")
+                print(f"   - 预计分块数: {(len(audio_data) + chunk_size - 1) // chunk_size}")
+                
+                total_chunks = (len(audio_data) + chunk_size - 1) // chunk_size
+                chunks_sent = 0
+                
                for offset in range(0, len(audio_data), chunk_size):
+                    chunks_sent += 1
                    chunk = audio_data[offset:offset + chunk_size]
                    last = (offset + chunk_size) >= len(audio_data)
                    
-                    payload_bytes = gzip.compress(chunk)
-                    audio_only_request = bytearray(
-                        generate_asr_header(
-                            message_type=0b0010, 
-                            message_type_specific_flags=0b0010 if last else 0
-                        )
-                    )
-                    audio_only_request.extend((len(payload_bytes)).to_bytes(4, 'big'))
-                    audio_only_request.extend(payload_bytes)
+                    print(f"📦 发送第 {chunks_sent}/{total_chunks} 块:")
+                    print(f"   - 当前块大小: {len(chunk)} 字节")
+                    print(f"   - 偏移量: {offset}-{offset + len(chunk)}")
+                    print(f"   - 是否最后一块: {last}")
                    
-                    await ws.send(audio_only_request)
-                    res = await ws.recv()
-                    result = parse_asr_response(res)
+                    try:
+                        payload_bytes = gzip.compress(chunk)
+                        audio_only_request = bytearray(
+                            generate_asr_header(
+                                message_type=0b0010, 
+                                message_type_specific_flags=0b0010 if last else 0
+                            )
+                        )
+                        audio_only_request.extend((len(payload_bytes)).to_bytes(4, 'big'))
+                        audio_only_request.extend(payload_bytes)
+                        
+                        print(f"   - 压缩后大小: {len(payload_bytes)} 字节")
+                        print(f"   - 总请求数据大小: {len(audio_only_request)} 字节")
+                        
+                        await ws.send(audio_only_request)
+                        print(f"   ✅ 第 {chunks_sent} 块发送成功")
+                        
+                        # 等待服务器响应
+                        try:
+                            res = await asyncio.wait_for(ws.recv(), timeout=30.0)
+                            print(f"   📥 收到第 {chunks_sent} 块响应，大小: {len(res)} 字节")
+                            result = parse_asr_response(res)
+                            print(f"   🔍 第 {chunks_sent} 块响应解析: {result}")
+                        except asyncio.TimeoutError:
+                            print(f"   ⏰ 第 {chunks_sent} 块响应超时")
+                            raise Exception("音频块响应超时")
+                        
+                        # 检查每个响应是否有错误
+                        if 'code' in result:
+                            print(f"   🔍 第 {chunks_sent} 块响应码: {result['code']}")
+                            if result['code'] != 1000:
+                                print(f"   ❌ 第 {chunks_sent} 块数据发送失败: {result}")
+                                return None
+                        
+                        if 'payload_msg' in result and result['payload_msg'].get('code') != 1000:
+                            print(f"   ❌ 第 {chunks_sent} 块数据发送失败: {result['payload_msg']}")
+                            return None
+                            
+                    except Exception as chunk_error:
+                        print(f"   ❌ 第 {chunks_sent} 块发送异常: {chunk_error}")
+                        raise chunk_error
+                    
+                    if last:
+                        print(f"📨 发送最后一块音频数据完成")
+                        print(f"🎯 所有音频数据发送完成，共发送 {chunks_sent} 块")
                
-                # 获取最终结果
-                if 'payload_msg' in result and 'result' in result['payload_msg']:
-                    results = result['payload_msg']['result']
-                    if results:
-                        return results[0].get('text', '识别失败')
+                # 检查最后一个响应中是否包含识别结果
+                print(f"🎯 检查最终识别结果...")
+                print(f"📋 最后一个响应: {result}")
+                
+                if 'payload_msg' in result:
+                    payload_msg = result['payload_msg']
+                    print(f"📋 最终Payload结构: {list(payload_msg.keys()) if isinstance(payload_msg, dict) else type(payload_msg)}")
+                    print(f"📋 最终Payload内容: {payload_msg}")
+                    
+                    if isinstance(payload_msg, dict):
+                        # 检查响应码
+                        if 'code' in payload_msg:
+                            code = payload_msg['code']
+                            print(f"🔢 最终响应码: {code}")
+                            if code == 1000:
+                                print(f"✅ ASR识别成功")
+                            else:
+                                print(f"❌ ASR服务返回错误: {payload_msg.get('message', '未知错误')}")
+                                return None
+                        
+                        # 查找结果 - 与recorder.py保持一致
+                        if 'result' in payload_msg:
+                            results = payload_msg['result']
+                            print(f"📝 找到结果字段 'result': {results}")
+                            if isinstance(results, list) and results:
+                                text = results[0].get('text', '识别失败')
+                                print(f"✅ 提取识别文本: {text}")
+                                return text
+                            elif isinstance(results, str):
+                                print(f"✅ 提取识别文本: {results}")
+                                return results
+                        else:
+                            print(f"❌ 未找到result字段，可用字段: {list(payload_msg.keys())}")
+                            print(f"完整payload: {json.dumps(payload_msg, indent=2, ensure_ascii=False)}")
+                    else:
+                        print(f"❌ Payload不是字典类型: {type(payload_msg)}")
+                else:
+                    print(f"❌ 响应中没有payload_msg字段")
+                    print(f"可用字段: {list(result.keys())}")
+                    if 'code' in result:
+                        print(f"错误码: {result['code']}")
                
                return None
                
@ -580,9 +794,12 @@ class ControlSystem:
        
        try:
            print("🎵 开始文本转语音")
+            print(f"📝 待转换文本: {text}")
            
            # 发送元数据
-            self.output_audio_queue.put(f"METADATA:{text[:30]}...")
+            metadata_msg = f"METADATA:{text[:30]}..."
+            print(f"📦 发送元数据: {metadata_msg}")
+            self.output_audio_queue.put(metadata_msg)
            
            # 构建请求头
            headers = {
@ -614,6 +831,7 @@ class ControlSystem:
            # 发送请求
            session = requests.Session()
            try:
+                print(f"🌐 发送TTS请求到: {self.api_config['tts']['url']}")
                response = session.post(
                    self.api_config['tts']['url'],
                    headers=headers,
@ -625,16 +843,22 @@ class ControlSystem:
                    print(f"❌ TTS请求失败: {response.status_code}")
                    return False
                
+                print(f"✅ TTS请求成功，开始接收音频流")
+                
                # 处理流式响应
                total_audio_size = 0
                chunk_count = 0
+                queue_size_before = self.output_audio_queue.qsize()
                
                for chunk in response.iter_lines(decode_unicode=True):
                    if not chunk:
                        continue
                    
+                    print(f"🔍 原始TTS响应块 {chunk_count + 1}: {chunk[:100]}...")
+                    
                    try:
                        data = json.loads(chunk)
+                        print(f"🔍 解析后的TTS块 {chunk_count + 1}: {data}")
                        
                        if data.get("code", 0) == 0 and "data" in data and data["data"]:
                            chunk_audio = base64.b64decode(data["data"])
@ -642,24 +866,42 @@ class ControlSystem:
                            total_audio_size += audio_size
                            chunk_count += 1
                            
+                            # 检查队列状态
+                            current_queue_size = self.output_audio_queue.qsize()
+                            print(f"📦 发送音频块 {chunk_count}: {audio_size} 字节, 队列大小: {current_queue_size}")
+                            
                            # 发送到输出进程
                            self.output_audio_queue.put(chunk_audio)
                            
+                            # 检查是否发送成功
+                            new_queue_size = self.output_audio_queue.qsize()
+                            if new_queue_size == current_queue_size + 1:
+                                print(f"✅ 音频块 {chunk_count} 发送成功")
+                            else:
+                                print(f"⚠️ 音频块 {chunk_count} 发送后队列大小异常: {current_queue_size} -> {new_queue_size}")
+                            
                            # 显示进度
-                            if chunk_count % 10 == 0:
+                            if chunk_count % 5 == 0:  # 更频繁显示进度
                                progress = f"📥 TTS生成: {chunk_count} 块 | {total_audio_size / 1024:.1f} KB"
                                print(f"\r{progress}", end='', flush=True)
                        
-                        if data.get("code", 0) == 20000000:
+                        elif data.get("code", 0) == 20000000:
+                            print(f"🏁 收到TTS结束信号")
                            break
+                        elif data.get("code", 0) > 0:
+                            print(f"❌ TTS错误响应: {data}")
                            
-                    except json.JSONDecodeError:
+                    except json.JSONDecodeError as e:
+                        print(f"❌ JSON解析错误: {e}")
+                        print(f"原始数据: {chunk}")
                        continue
                
                print(f"\n✅ TTS音频生成完成: {chunk_count} 块, {total_audio_size / 1024:.1f} KB")
+                print(f"📊 队列大小变化: {queue_size_before} -> {self.output_audio_queue.qsize()}")
                
-                # 发送结束信号
-                self.output_audio_queue.put(None)
+                # 不再在这里发送结束信号，让输出进程自然播放完所有音频
+                print(f"📦 TTS音频数据已全部发送，等待输出进程播放完成")
+                print(f"📊 音频队列当前大小: {self.output_audio_queue.qsize()}")
                
                return chunk_count > 0
                    
@ -669,6 +911,8 @@ class ControlSystem:
                
        except Exception as e:
            print(f"❌ 文本转语音失败: {e}")
+            import traceback
+            print(f"❌ 详细错误: {traceback.format_exc()}")
            return False
    
    def _display_status(self):
--- a/enhanced_voice_detector.py
+++ b/enhanced_voice_detector.py
@ -0,0 +1,377 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+高级语音检测器
+结合能量+ZCR双重检测的自适应语音检测算法
+针对16000Hz采样率优化
+"""
+
+import numpy as np
+import time
+from collections import deque
+from typing import Dict, Any, Optional
+import pyaudio
+
+class EnhancedVoiceDetector:
+    """增强版语音检测器"""
+    
+    def __init__(self, sample_rate=16000, chunk_size=1024):
+        self.sample_rate = sample_rate
+        self.chunk_size = chunk_size
+        
+        # 历史数据窗口
+        self.energy_window = deque(maxlen=100)
+        self.zcr_window = deque(maxlen=100)
+        
+        # 统计信息
+        self.energy_stats = {
+            'mean': 0, 'std': 0, 'min': float('inf'), 'max': 0,
+            'median': 0, 'q75': 0, 'q25': 0
+        }
+        self.zcr_stats = {
+            'mean': 0, 'std': 0, 'min': float('inf'), 'max': 0,
+            'median': 0, 'q75': 0, 'q25': 0
+        }
+        
+        # 检测参数
+        self.calibration_mode = True
+        self.calibration_samples = 0
+        self.required_calibration = 100  # 需要100个样本来校准
+        
+        # 自适应参数 - 调整为更敏感
+        self.energy_multiplier = 1.0      # 能量阈值倍数（降低）
+        self.zcr_std_multiplier = 1.0    # ZCR标准差倍数（降低）
+        self.min_energy_threshold = 80   # 最小能量阈值（降低）
+        self.consecutive_voice_threshold = 2  # 连续语音检测阈值（降低）
+        self.consecutive_silence_threshold = 15  # 连续静音检测阈值（增加）
+        
+        # 状态跟踪
+        self.consecutive_voice_count = 0
+        self.consecutive_silence_count = 0
+        self.last_voice_time = 0
+        
+        # 调试信息
+        self.debug_mode = True
+        self.voice_count = 0
+        self.total_samples = 0
+        self._last_voice_state = False
+        
+    def calculate_energy(self, audio_data: bytes) -> float:
+        """计算音频能量（RMS）"""
+        if len(audio_data) == 0:
+            return 0
+        
+        audio_array = np.frombuffer(audio_data, dtype=np.int16)
+        # RMS能量计算
+        rms = np.sqrt(np.mean(audio_array.astype(float) ** 2))
+        return rms
+    
+    def calculate_zcr(self, audio_data: bytes) -> float:
+        """计算零交叉率"""
+        if len(audio_data) == 0:
+            return 0
+        
+        audio_array = np.frombuffer(audio_data, dtype=np.int16)
+        zero_crossings = np.sum(np.diff(np.sign(audio_array)) != 0)
+        zcr = zero_crossings / len(audio_array) * self.sample_rate
+        return zcr
+    
+    def update_statistics(self, energy: float, zcr: float):
+        """更新统计信息"""
+        self.energy_window.append(energy)
+        self.zcr_window.append(zcr)
+        
+        if len(self.energy_window) >= 20:
+            # 计算详细统计信息
+            energy_array = np.array(self.energy_window)
+            zcr_array = np.array(self.zcr_window)
+            
+            # 基础统计
+            self.energy_stats['mean'] = np.mean(energy_array)
+            self.energy_stats['std'] = np.std(energy_array)
+            self.energy_stats['min'] = np.min(energy_array)
+            self.energy_stats['max'] = np.max(energy_array)
+            self.energy_stats['median'] = np.median(energy_array)
+            self.energy_stats['q25'] = np.percentile(energy_array, 25)
+            self.energy_stats['q75'] = np.percentile(energy_array, 75)
+            
+            self.zcr_stats['mean'] = np.mean(zcr_array)
+            self.zcr_stats['std'] = np.std(zcr_array)
+            self.zcr_stats['min'] = np.min(zcr_array)
+            self.zcr_stats['max'] = np.max(zcr_array)
+            self.zcr_stats['median'] = np.median(zcr_array)
+            self.zcr_stats['q25'] = np.percentile(zcr_array, 25)
+            self.zcr_stats['q75'] = np.percentile(zcr_array, 75)
+    
+    def get_adaptive_thresholds(self) -> Dict[str, float]:
+        """获取自适应阈值"""
+        if len(self.energy_window) < 30:
+            # 使用更敏感的固定阈值
+            return {
+                'energy_threshold': 120,
+                'zcr_min': 2000,
+                'zcr_max': 13000
+            }
+        
+        # 计算动态能量阈值 - 使用更合理的算法
+        # 基于中位数和标准差，但使用更保守的倍数
+        base_energy_threshold = (self.energy_stats['median'] + 
+                               self.energy_multiplier * self.energy_stats['std'])
+        
+        # 使用四分位数来避免异常值影响
+        q75 = self.energy_stats['q75']
+        q25 = self.energy_stats['q25']
+        iqr = q75 - q25  # 四分位距
+        
+        # 基于IQR的鲁棒阈值 - 更敏感
+        iqr_threshold = q75 + 0.5 * iqr
+        
+        # 结合两种方法的阈值 - 使用更低的阈值
+        energy_threshold = max(self.min_energy_threshold, 
+                              min(base_energy_threshold * 0.7, iqr_threshold))
+        
+        # 计算动态ZCR阈值
+        zcr_center = self.zcr_stats['median']
+        zcr_spread = self.zcr_std_multiplier * self.zcr_stats['std']
+        
+        # 确保ZCR范围在合理区间内 - 更宽松
+        zcr_min = max(1500, min(3000, zcr_center - zcr_spread))
+        zcr_max = min(14000, max(6000, zcr_center + zcr_spread * 2.0))
+        
+        # 确保最小范围
+        if zcr_max - zcr_min < 2000:
+            zcr_max = zcr_min + 2000
+        
+        return {
+            'energy_threshold': energy_threshold,
+            'zcr_min': zcr_min,
+            'zcr_max': zcr_max
+        }
+    
+    def is_voice_basic(self, energy: float, zcr: float) -> bool:
+        """基础语音检测（单帧）"""
+        thresholds = self.get_adaptive_thresholds()
+        
+        # 能量检测
+        energy_ok = energy > thresholds['energy_threshold']
+        
+        # ZCR检测
+        zcr_ok = thresholds['zcr_min'] < zcr < thresholds['zcr_max']
+        
+        # 双重条件
+        return energy_ok and zcr_ok
+    
+    def is_voice_advanced(self, audio_data: bytes) -> Dict[str, Any]:
+        """高级语音检测（带状态跟踪）"""
+        # 计算特征
+        energy = self.calculate_energy(audio_data)
+        zcr = self.calculate_zcr(audio_data)
+        
+        # 更新统计
+        self.update_statistics(energy, zcr)
+        
+        # 总样本计数
+        self.total_samples += 1
+        
+        # 校准模式
+        if self.calibration_mode:
+            self.calibration_samples += 1
+            if self.calibration_samples >= self.required_calibration:
+                self.calibration_mode = False
+                if self.debug_mode:
+                    print(f"\n🎯 校准完成!")
+                    print(f"   能量统计: {self.energy_stats['median']:.0f}±{self.energy_stats['std']:.0f}")
+                    print(f"   ZCR统计: {self.zcr_stats['median']:.0f}±{self.zcr_stats['std']:.0f}")
+            
+            return {
+                'is_voice': False,
+                'energy': energy,
+                'zcr': zcr,
+                'calibrating': True,
+                'calibration_progress': self.calibration_samples / self.required_calibration,
+                'confidence': 0.0
+            }
+        
+        # 基础检测
+        is_voice_frame = self.is_voice_basic(energy, zcr)
+        
+        # 状态机处理
+        if is_voice_frame:
+            self.consecutive_voice_count += 1
+            self.consecutive_silence_count = 0
+            self.last_voice_time = time.time()
+        else:
+            self.consecutive_silence_count += 1
+            if self.consecutive_silence_count >= self.consecutive_silence_threshold:
+                self.consecutive_voice_count = 0
+        
+        # 最终决策（需要连续检测到语音）
+        final_voice_detected = self.consecutive_voice_count >= self.consecutive_voice_threshold
+        
+        if final_voice_detected and not hasattr(self, '_last_voice_state') or not self._last_voice_state:
+            self.voice_count += 1
+        
+        # 更新最后状态
+        self._last_voice_state = final_voice_detected
+        
+        # 计算置信度
+        thresholds = self.get_adaptive_thresholds()
+        energy_confidence = min(1.0, energy / thresholds['energy_threshold'])
+        zcr_confidence = 1.0 if thresholds['zcr_min'] < zcr < thresholds['zcr_max'] else 0.0
+        confidence = (energy_confidence + zcr_confidence) / 2
+        
+        return {
+            'is_voice': final_voice_detected,
+            'energy': energy,
+            'zcr': zcr,
+            'confidence': confidence,
+            'energy_threshold': thresholds['energy_threshold'],
+            'zcr_min': thresholds['zcr_min'],
+            'zcr_max': thresholds['zcr_max'],
+            'consecutive_voice_count': self.consecutive_voice_count,
+            'consecutive_silence_count': self.consecutive_silence_count,
+            'calibrating': False,
+            'voice_detection_rate': self.voice_count / self.total_samples if self.total_samples > 0 else 0
+        }
+    
+    def get_debug_info(self) -> str:
+        """获取调试信息"""
+        if self.calibration_mode:
+            return f"校准中: {self.calibration_samples}/{self.required_calibration}"
+        
+        thresholds = self.get_adaptive_thresholds()
+        return (f"能量阈值: {thresholds['energy_threshold']:.0f} | "
+                f"ZCR范围: {thresholds['zcr_min']:.0f}-{thresholds['zcr_max']:.0f} | "
+                f"检测率: {self.voice_count}/{self.total_samples} ({self.voice_count/self.total_samples*100:.1f}%)")
+    
+    def reset(self):
+        """重置检测器状态"""
+        self.energy_window.clear()
+        self.zcr_window.clear()
+        self.calibration_mode = True
+        self.calibration_samples = 0
+        self.consecutive_voice_count = 0
+        self.consecutive_silence_count = 0
+        self.voice_count = 0
+        self.total_samples = 0
+
+
+class VoiceDetectorTester:
+    """语音检测器测试器"""
+    
+    def __init__(self):
+        self.detector = EnhancedVoiceDetector()
+        
+    def run_test(self, duration=30):
+        """运行测试"""
+        print("🎙️ 增强版语音检测器测试")
+        print("=" * 50)
+        print("📊 检测算法: 能量+ZCR双重检测")
+        print("📈 采样率: 16000Hz")
+        print("🔄 自适应阈值: 启用")
+        print("⏱️  测试时长: 30秒")
+        print("💡 请说话测试检测效果...")
+        print("🛑 按 Ctrl+C 提前结束")
+        print("=" * 50)
+        
+        try:
+            # 初始化音频
+            audio = pyaudio.PyAudio()
+            stream = audio.open(
+                format=pyaudio.paInt16,
+                channels=1,
+                rate=16000,
+                input=True,
+                frames_per_buffer=1024
+            )
+            
+            start_time = time.time()
+            voice_segments = []
+            current_segment = None
+            
+            while time.time() - start_time < duration:
+                # 读取音频数据
+                data = stream.read(1024, exception_on_overflow=False)
+                
+                # 检测语音
+                result = self.detector.is_voice_advanced(data)
+                
+                # 处理语音段
+                if result['is_voice']:
+                    if current_segment is None:
+                        current_segment = {
+                            'start_time': time.time(),
+                            'start_sample': self.detector.total_samples
+                        }
+                else:
+                    if current_segment is not None:
+                        current_segment['end_time'] = time.time()
+                        current_segment['end_sample'] = self.detector.total_samples
+                        current_segment['duration'] = current_segment['end_time'] - current_segment['start_time']
+                        voice_segments.append(current_segment)
+                        current_segment = None
+                
+                # 显示状态
+                if result['calibrating']:
+                    progress = result['calibration_progress'] * 100
+                    status = f"\r🔧 校准中: {progress:.0f}% | 能量: {result['energy']:.0f} | ZCR: {result['zcr']:.0f}"
+                else:
+                    status_icon = "🎤" if result['is_voice'] else "🔇"
+                    status_color = "\033[92m" if result['is_voice'] else "\033[90m"
+                    reset_color = "\033[0m"
+                    
+                    status = (f"{status_color}{status_icon} "
+                             f"能量: {result['energy']:.0f}/{result['energy_threshold']:.0f} | "
+                             f"ZCR: {result['zcr']:.0f} ({result['zcr_min']:.0f}-{result['zcr_max']:.0f}) | "
+                             f"置信度: {result['confidence']:.2f} | "
+                             f"连续: {result['consecutive_voice_count']}/{result['consecutive_silence_count']}{reset_color}")
+                
+                print(f"\r{status}", end='', flush=True)
+                
+                time.sleep(0.01)
+            
+            # 结束当前段
+            if current_segment is not None:
+                current_segment['end_time'] = time.time()
+                current_segment['duration'] = current_segment['end_time'] - current_segment['start_time']
+                voice_segments.append(current_segment)
+            
+            # 显示统计结果
+            print(f"\n\n📊 测试结果统计:")
+            print(f"   总检测时长: {duration}秒")
+            print(f"   检测到语音段: {len(voice_segments)}")
+            print(f"   总语音时长: {sum(s['duration'] for s in voice_segments):.1f}秒")
+            print(f"   语音占比: {sum(s['duration'] for s in voice_segments)/duration*100:.1f}%")
+            print(f"   平均置信度: {np.mean([r['confidence'] for r in [self.detector.is_voice_advanced(b'test') for _ in range(10)]]):.2f}")
+            
+            if voice_segments:
+                print(f"   平均语音段时长: {np.mean([s['duration'] for s in voice_segments]):.1f}秒")
+                print(f"   最长语音段: {max(s['duration'] for s in voice_segments):.1f}秒")
+            
+            print(f"\n🎯 检测器状态:")
+            print(f"   {self.detector.get_debug_info()}")
+            
+        except KeyboardInterrupt:
+            print(f"\n\n🛑 测试被用户中断")
+        except Exception as e:
+            print(f"\n\n❌ 测试出错: {e}")
+        finally:
+            try:
+                if 'stream' in locals():
+                    stream.stop_stream()
+                    stream.close()
+                if 'audio' in locals():
+                    audio.terminate()
+            except:
+                pass
+
+
+def main():
+    """主函数"""
+    tester = VoiceDetectorTester()
+    tester.run_test()
+
+
+if __name__ == "__main__":
+    main()
--- a/process_logger.py
+++ b/process_logger.py
@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+日志配置模块
+为多进程录音系统提供日志记录功能
+"""
+
+import logging
+import os
+import sys
+from datetime import datetime
+from typing import Optional
+
+def setup_process_logger(process_name: str, log_dir: str = "logs") -> logging.Logger:
+    """
+    为进程设置日志记录器
+    
+    Args:
+        process_name: 进程名称（用于日志文件名）
+        log_dir: 日志目录路径
+        
+    Returns:
+        配置好的日志记录器
+    """
+    # 创建日志目录
+    if not os.path.exists(log_dir):
+        os.makedirs(log_dir)
+    
+    # 生成日志文件名（包含时间戳）
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    log_file = os.path.join(log_dir, f"{process_name}_{timestamp}.log")
+    
+    # 创建日志记录器
+    logger = logging.getLogger(f"{process_name}_logger")
+    logger.setLevel(logging.DEBUG)
+    
+    # 清除现有的处理器
+    logger.handlers.clear()
+    
+    # 文件处理器（记录所有级别）
+    file_handler = logging.FileHandler(log_file, encoding='utf-8')
+    file_handler.setLevel(logging.DEBUG)
+    
+    # 控制台处理器（只记录INFO及以上级别）
+    console_handler = logging.StreamHandler(sys.stdout)
+    console_handler.setLevel(logging.INFO)
+    
+    # 创建格式化器
+    file_formatter = logging.Formatter(
+        '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        datefmt='%Y-%m-%d %H:%M:%S'
+    )
+    
+    console_formatter = logging.Formatter(
+        '%(asctime)s - %(levelname)s - %(message)s',
+        datefmt='%H:%M:%S'
+    )
+    
+    # 设置格式化器
+    file_handler.setFormatter(file_formatter)
+    console_handler.setFormatter(console_formatter)
+    
+    # 添加处理器
+    logger.addHandler(file_handler)
+    logger.addHandler(console_handler)
+    
+    logger.info(f"日志系统初始化完成 - 进程: {process_name}")
+    logger.info(f"日志文件: {log_file}")
+    
+    return logger
+
+class ProcessLogger:
+    """进程日志包装器"""
+    
+    def __init__(self, process_name: str, log_dir: str = "logs"):
+        self.process_name = process_name
+        self.logger = setup_process_logger(process_name, log_dir)
+    
+    def debug(self, message: str):
+        """调试日志"""
+        self.logger.debug(f"[{self.process_name}] {message}")
+    
+    def info(self, message: str):
+        """信息日志"""
+        self.logger.info(f"[{self.process_name}] {message}")
+    
+    def warning(self, message: str):
+        """警告日志"""
+        self.logger.warning(f"[{self.process_name}] {message}")
+    
+    def error(self, message: str):
+        """错误日志"""
+        self.logger.error(f"[{self.process_name}] {message}")
+    
+    def critical(self, message: str):
+        """严重错误日志"""
+        self.logger.critical(f"[{self.process_name}] {message}")
--- a/quick_test.py
+++ b/quick_test.py
@ -1,123 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-"""
-快速测试脚本
-用于验证多进程录音系统的基础功能
-"""
-
-import time
-import multiprocessing as mp
-from audio_processes import InputProcess, OutputProcess
-
-def test_audio_processes():
-    """测试音频进程类"""
-    print("🧪 测试音频进程类...")
-    
-    # 创建测试队列
-    command_queue = mp.Queue()
-    event_queue = mp.Queue()
-    audio_queue = mp.Queue()
-    
-    # 创建进程配置
-    config = {
-        'zcr_min': 3000,
-        'zcr_max': 10000,
-        'min_recording_time': 3.0,
-        'max_recording_time': 10.0,  # 缩短测试时间
-        'silence_threshold': 3.0,
-        'pre_record_duration': 2.0,
-        'voice_activation_threshold': 5,  # 降低阈值便于测试
-        'calibration_samples': 50,        # 减少校准时间
-        'adaptive_threshold': True
-    }
-    
-    # 创建输入进程
-    input_process = InputProcess(command_queue, event_queue, config)
-    
-    # 创建输出进程
-    output_process = OutputProcess(audio_queue)
-    
-    print("✅ 音频进程类创建成功")
-    
-    # 测试配置加载
-    print("📋 测试配置:")
-    print(f"   ZCR范围: {config['zcr_min']} - {config['zcr_max']}")
-    print(f"   校准样本数: {config['calibration_samples']}")
-    print(f"   语音激活阈值: {config['voice_activation_threshold']}")
-    
-    return True
-
-def test_dependencies():
-    """测试依赖库"""
-    print("🔍 检查依赖库...")
-    
-    dependencies = {
-        'numpy': False,
-        'pyaudio': False,
-        'requests': False,
-        'websockets': False
-    }
-    
-    try:
-        import numpy
-        dependencies['numpy'] = True
-        print("✅ numpy")
-    except ImportError:
-        print("❌ numpy")
-    
-    try:
-        import pyaudio
-        dependencies['pyaudio'] = True
-        print("✅ pyaudio")
-    except ImportError:
-        print("❌ pyaudio")
-    
-    try:
-        import requests
-        dependencies['requests'] = True
-        print("✅ requests")
-    except ImportError:
-        print("❌ requests")
-    
-    try:
-        import websockets
-        dependencies['websockets'] = True
-        print("✅ websockets")
-    except ImportError:
-        print("❌ websockets")
-    
-    missing = [dep for dep, installed in dependencies.items() if not installed]
-    if missing:
-        print(f"❌ 缺少依赖: {', '.join(missing)}")
-        return False
-    else:
-        print("✅ 所有依赖都已安装")
-        return True
-
-def main():
-    """主测试函数"""
-    print("🚀 多进程录音系统快速测试")
-    print("=" * 50)
-    
-    # 测试依赖
-    if not test_dependencies():
-        print("❌ 依赖检查失败")
-        return False
-    
-    print()
-    
-    # 测试音频进程
-    if not test_audio_processes():
-        print("❌ 音频进程测试失败")
-        return False
-    
-    print()
-    print("✅ 所有测试通过！")
-    print("💡 现在可以运行主程序:")
-    print("   python multiprocess_recorder.py")
-    
-    return True
-
-if __name__ == "__main__":
-    main()
--- a/start_with_logging.py
+++ b/start_with_logging.py
@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+启动脚本示例
+演示如何使用带日志记录的多进程录音系统
+"""
+
+import os
+import sys
+import argparse
+from datetime import datetime
+
+def ensure_logs_directory():
+    """确保日志目录存在"""
+    log_dir = "logs"
+    if not os.path.exists(log_dir):
+        os.makedirs(log_dir)
+        print(f"✅ 创建日志目录: {log_dir}")
+    return log_dir
+
+def cleanup_old_logs(log_dir="logs", max_files=10):
+    """清理旧的日志文件"""
+    if not os.path.exists(log_dir):
+        return
+    
+    log_files = []
+    for file in os.listdir(log_dir):
+        if file.endswith('.log'):
+            file_path = os.path.join(log_dir, file)
+            log_files.append((file_path, os.path.getmtime(file_path)))
+    
+    # 按修改时间排序，删除最旧的文件
+    log_files.sort(key=lambda x: x[1])
+    
+    while len(log_files) > max_files:
+        oldest_file = log_files[0][0]
+        try:
+            os.remove(oldest_file)
+            print(f"🗑️ 删除旧日志文件: {oldest_file}")
+            log_files.pop(0)
+        except Exception as e:
+            print(f"⚠️ 删除日志文件失败 {oldest_file}: {e}")
+            break
+
+def main():
+    """主函数"""
+    parser = argparse.ArgumentParser(
+        description='带日志记录的多进程录音系统启动器',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+使用示例:
+  python start_with_logging.py                    # 使用默认设置
+  python start_with_logging.py --clean-logs       # 清理旧日志
+  python start_with_logging.py --log-dir my_logs  # 指定日志目录
+        """
+    )
+    
+    parser.add_argument('--character', '-c', type=str, default='libai',
+                       help='选择角色 (默认: libai)')
+    parser.add_argument('--log-dir', type=str, default='logs',
+                       help='日志目录路径 (默认: logs)')
+    parser.add_argument('--clean-logs', action='store_true',
+                       help='清理旧日志文件')
+    parser.add_argument('--max-log-files', type=int, default=10,
+                       help='保留的最大日志文件数量 (默认: 10)')
+    parser.add_argument('--config', type=str,
+                       help='配置文件路径')
+    parser.add_argument('--verbose', '-v', action='store_true',
+                       help='详细输出')
+    
+    args = parser.parse_args()
+    
+    print("🚀 带日志记录的多进程录音系统")
+    print("=" * 60)
+    
+    # 确保日志目录存在
+    log_dir = ensure_logs_directory()
+    
+    # 清理旧日志文件
+    if args.clean_logs:
+        cleanup_old_logs(log_dir, args.max_log_files)
+    
+    # 显示日志配置信息
+    print(f"📁 日志目录: {log_dir}")
+    print(f"🎭 角色: {args.character}")
+    print("=" * 60)
+    
+    # 导入主模块并启动
+    try:
+        # 修改sys.argv以传递参数给主程序
+        sys.argv = ['multiprocess_recorder.py']
+        if args.character:
+            sys.argv.extend(['-c', args.character])
+        if args.config:
+            sys.argv.extend(['--config', args.config])
+        if args.verbose:
+            sys.argv.append('--verbose')
+        
+        # 导入并运行主程序
+        import multiprocess_recorder
+        multiprocess_recorder.main()
+        
+    except KeyboardInterrupt:
+        print("\n👋 用户中断")
+    except Exception as e:
+        print(f"❌ 启动失败: {e}")
+        if args.verbose:
+            import traceback
+            traceback.print_exc()
+
+if __name__ == "__main__":
+    main()
--- a/test_voice_detection.py
+++ b/test_voice_detection.py
@ -1,194 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-"""
-语音检测测试脚本
-用于测试和调试ZCR语音检测功能
-"""
-
-import numpy as np
-import time
-import pyaudio
-from audio_processes import InputProcess
-import multiprocessing as mp
-import queue
-
-class VoiceDetectionTester:
-    """语音检测测试器"""
-    
-    def __init__(self):
-        self.FORMAT = pyaudio.paInt16
-        self.CHANNELS = 1
-        self.RATE = 16000
-        self.CHUNK_SIZE = 1024
-        
-        # 测试参数
-        self.test_duration = 30  # 测试30秒
-        self.zcr_history = []
-        self.voice_count = 0
-        
-        # 音频设备
-        self.audio = None
-        self.stream = None
-        
-    def setup_audio(self):
-        """设置音频设备"""
-        try:
-            self.audio = pyaudio.PyAudio()
-            self.stream = self.audio.open(
-                format=self.FORMAT,
-                channels=self.CHANNELS,
-                rate=self.RATE,
-                input=True,
-                frames_per_buffer=self.CHUNK_SIZE
-            )
-            print("✅ 音频设备初始化成功")
-            return True
-        except Exception as e:
-            print(f"❌ 音频设备初始化失败: {e}")
-            return False
-    
-    def calculate_zcr(self, audio_data):
-        """计算零交叉率"""
-        if len(audio_data) == 0:
-            return 0
-        
-        audio_array = np.frombuffer(audio_data, dtype=np.int16)
-        zero_crossings = np.sum(np.diff(np.sign(audio_array)) != 0)
-        zcr = zero_crossings / len(audio_array) * self.RATE
-        return zcr
-    
-    def test_detection(self):
-        """测试语音检测"""
-        print("🎙️ 开始语音检测测试")
-        print("=" * 50)
-        
-        # 环境校准阶段
-        print("🔍 第一阶段：环境噪音校准 (10秒)")
-        print("请保持安静，不要说话...")
-        
-        calibration_samples = []
-        start_time = time.time()
-        
-        try:
-            while time.time() - start_time < 10:
-                data = self.stream.read(self.CHUNK_SIZE, exception_on_overflow=False)
-                if len(data) > 0:
-                    zcr = self.calculate_zcr(data)
-                    calibration_samples.append(zcr)
-                    
-                    # 显示进度
-                    progress = (time.time() - start_time) / 10 * 100
-                    print(f"\r校准进度: {progress:.1f}%", end='', flush=True)
-                
-                time.sleep(0.01)
-            
-            print("\n✅ 环境校准完成")
-            
-            # 计算统计数据
-            if calibration_samples:
-                avg_zcr = np.mean(calibration_samples)
-                std_zcr = np.std(calibration_samples)
-                min_zcr = min(calibration_samples)
-                max_zcr = max(calibration_samples)
-                
-                print(f"📊 环境噪音统计:")
-                print(f"   平均ZCR: {avg_zcr:.0f}")
-                print(f"   标准差: {std_zcr:.0f}")
-                print(f"   最小值: {min_zcr:.0f}")
-                print(f"   最大值: {max_zcr:.0f}")
-                
-                # 建议的检测阈值
-                suggested_min = max(2400, avg_zcr + 2 * std_zcr)
-                suggested_max = min(12000, avg_zcr + 6 * std_zcr)
-                
-                print(f"\n🎯 建议的语音检测阈值:")
-                print(f"   最小阈值: {suggested_min:.0f}")
-                print(f"   最大阈值: {suggested_max:.0f}")
-                
-                # 测试检测
-                print(f"\n🎙️ 第二阶段：语音检测测试 (20秒)")
-                print("现在请说话，测试语音检测...")
-                
-                voice_threshold = suggested_min
-                silence_threshold = suggested_max
-                
-                consecutive_voice = 0
-                voice_detected = False
-                
-                test_start = time.time()
-                
-                while time.time() - test_start < 20:
-                    data = self.stream.read(self.CHUNK_SIZE, exception_on_overflow=False)
-                    if len(data) > 0:
-                        zcr = self.calculate_zcr(data)
-                        
-                        # 简单的语音检测
-                        is_voice = voice_threshold < zcr < silence_threshold
-                        
-                        if is_voice:
-                            consecutive_voice += 1
-                            if consecutive_voice >= 5 and not voice_detected:
-                                voice_detected = True
-                                self.voice_count += 1
-                                print(f"\n🎤 检测到语音 #{self.voice_count}! ZCR: {zcr:.0f}")
-                        else:
-                            consecutive_voice = 0
-                            if voice_detected:
-                                voice_detected = False
-                                print(f"   语音结束，持续时间: {time.time() - last_voice_time:.1f}秒")
-                        
-                        if voice_detected:
-                            last_voice_time = time.time()
-                        
-                        # 实时显示ZCR值
-                        status = "🎤" if voice_detected else "🔇"
-                        print(f"\r{status} ZCR: {zcr:.0f} | 阈值: {voice_threshold:.0f}-{silence_threshold:.0f} | "
-                              f"连续语音: {consecutive_voice}/5", end='', flush=True)
-                    
-                    time.sleep(0.01)
-                
-                print(f"\n\n✅ 测试完成！共检测到 {self.voice_count} 次语音")
-                
-        except KeyboardInterrupt:
-            print("\n🛑 测试被用户中断")
-        except Exception as e:
-            print(f"\n❌ 测试过程中出错: {e}")
-    
-    def cleanup(self):
-        """清理资源"""
-        if self.stream:
-            try:
-                self.stream.stop_stream()
-                self.stream.close()
-            except:
-                pass
-        
-        if self.audio:
-            try:
-                self.audio.terminate()
-            except:
-                pass
-    
-    def run_test(self):
-        """运行完整测试"""
-        print("🚀 语音检测测试工具")
-        print("=" * 60)
-        
-        if not self.setup_audio():
-            print("❌ 无法初始化音频设备，测试终止")
-            return
-        
-        try:
-            self.test_detection()
-        finally:
-            self.cleanup()
-            print("\n👋 测试结束")
-
-def main():
-    """主函数"""
-    tester = VoiceDetectionTester()
-    tester.run_test()
-
-if __name__ == "__main__":
-    main()