config

2025-09-20 14:58:49 +08:00 · 2025-09-20 14:58:49 +08:00 · 43879961a2
commit 43879961a2
parent 9108fd4582
1 changed files with 840 additions and 0 deletions
--- a/voice_chat.py
+++ b/voice_chat.py
@ -0,0 +1,840 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
 语音交互聊天系统 - 集成豆包AI
 基于能量检测的录音 + 豆包语音识别 + TTS回复
 """
 import sys
 import os
 import time
 import threading
 import asyncio
 import subprocess
 import wave
 import struct
 import json
 import gzip
 import uuid
 from typing import Dict, Any, Optional
 import pyaudio
 import numpy as np
 import websockets
 # 豆包协议常量
 PROTOCOL_VERSION = 0b0001
 CLIENT_FULL_REQUEST = 0b0001
 CLIENT_AUDIO_ONLY_REQUEST = 0b0010
 SERVER_FULL_RESPONSE = 0b1001
 SERVER_ACK = 0b1011
 SERVER_ERROR_RESPONSE = 0b1111
 NO_SEQUENCE = 0b0000
 MSG_WITH_EVENT = 0b0100
 NO_SERIALIZATION = 0b0000
 JSON = 0b0001
 GZIP = 0b0001
 class DoubaoClient:
    """豆包音频处理客户端"""
    def __init__(self):
        self.base_url = "wss://openspeech.bytedance.com/api/v3/realtime/dialogue"
        self.app_id = "8718217928"
        self.access_key = "ynJMX-5ix1FsJvswC9KTNlGUdubcchqc"
        self.app_key = "PlgvMymc7f3tQnJ6"
        self.resource_id = "volc.speech.dialog"
        self.session_id = str(uuid.uuid4())
        self.ws = None
        self.log_id = ""
    def get_headers(self) -> Dict[str, str]:
        """获取请求头"""
        return {
            "X-Api-App-ID": self.app_id,
            "X-Api-Access-Key": self.access_key,
            "X-Api-Resource-Id": self.resource_id,
            "X-Api-App-Key": self.app_key,
            "X-Api-Connect-Id": str(uuid.uuid4()),
        }
    def generate_header(self, message_type=CLIENT_FULL_REQUEST, 
                       message_type_specific_flags=MSG_WITH_EVENT,
                       serial_method=JSON, compression_type=GZIP) -> bytes:
        """生成协议头"""
        header = bytearray()
        header.append((PROTOCOL_VERSION << 4) | 1)  # version + header_size
        header.append((message_type << 4) | message_type_specific_flags)
        header.append((serial_method << 4) | compression_type)
        header.append(0x00)  # reserved
        return bytes(header)
    async def connect(self) -> None:
        """建立WebSocket连接"""
        print(f"🔗 连接豆包服务器...")
        try:
            self.ws = await websockets.connect(
                self.base_url,
                additional_headers=self.get_headers(),
                ping_interval=None
            )
            # 获取log_id
            if hasattr(self.ws, 'response_headers'):
                self.log_id = self.ws.response_headers.get("X-Tt-Logid")
            elif hasattr(self.ws, 'headers'):
                self.log_id = self.ws.headers.get("X-Tt-Logid")
            print(f"✅ 连接成功, log_id: {self.log_id}")
            # 发送StartConnection请求
            await self._send_start_connection()
            # 发送StartSession请求
            await self._send_start_session()
        except Exception as e:
            print(f"❌ 连接失败: {e}")
            raise
    def parse_response(self, response):
        """解析响应"""
        if len(response) < 4:
            return None
        protocol_version = response[0] >> 4
        header_size = response[0] & 0x0f
        message_type = response[1] >> 4
        flags = response[1] & 0x0f
        payload_start = header_size * 4
        payload = response[payload_start:]
        result = {
            'protocol_version': protocol_version,
            'header_size': header_size,
            'message_type': message_type,
            'flags': flags,
            'payload': payload,
            'payload_size': len(payload)
        }
        # 解析payload
        if len(payload) >= 4:
            result['event'] = int.from_bytes(payload[:4], 'big')
            if len(payload) >= 8:
                session_id_len = int.from_bytes(payload[4:8], 'big')
                if len(payload) >= 8 + session_id_len:
                    result['session_id'] = payload[8:8+session_id_len].decode()
                    if len(payload) >= 12 + session_id_len:
                        data_size = int.from_bytes(payload[8+session_id_len:12+session_id_len], 'big')
                        result['data_size'] = data_size
                        result['data'] = payload[12+session_id_len:12+session_id_len+data_size]
                        # 尝试解析JSON数据
                        try:
                            result['json_data'] = json.loads(result['data'].decode('utf-8'))
                        except:
                            pass
        return result
    async def _send_start_connection(self) -> None:
        """发送StartConnection请求"""
        request = bytearray(self.generate_header())
        request.extend(int(1).to_bytes(4, 'big'))
        payload_bytes = b"{}"
        payload_bytes = gzip.compress(payload_bytes)
        request.extend(len(payload_bytes).to_bytes(4, 'big'))
        request.extend(payload_bytes)
        await self.ws.send(request)
        response = await self.ws.recv()
    async def _send_start_session(self) -> None:
        """发送StartSession请求"""
        session_config = {
            "asr": {"extra": {"end_smooth_window_ms": 1500}},
            "tts": {
                "speaker": "zh_female_vv_jupiter_bigtts",
                "audio_config": {"channel": 1, "format": "pcm", "sample_rate": 24000}
            },
            "dialog": {
                "bot_name": "豆包",
                "system_role": "你使用活泼灵动的女声，性格开朗，热爱生活。",
                "speaking_style": "你的说话风格简洁明了，语速适中，语调自然。",
                "location": {"city": "北京"},
                "extra": {
                    "strict_audit": False,
                    "audit_response": "支持客户自定义安全审核回复话术。",
                    "recv_timeout": 30,
                    "input_mod": "audio",
                },
            },
        }
        request = bytearray(self.generate_header())
        request.extend(int(100).to_bytes(4, 'big'))
        request.extend(len(self.session_id).to_bytes(4, 'big'))
        request.extend(self.session_id.encode())
        payload_bytes = json.dumps(session_config).encode()
        payload_bytes = gzip.compress(payload_bytes)
        request.extend(len(payload_bytes).to_bytes(4, 'big'))
        request.extend(payload_bytes)
        await self.ws.send(request)
        response = await self.ws.recv()
        await asyncio.sleep(1.0)
    async def process_audio(self, audio_data: bytes) -> tuple[str, bytes]:
        """处理音频并返回(识别文本, TTS音频)"""
        try:
            # 发送音频数据 - 使用与doubao_simple.py相同的格式
            task_request = bytearray(
                self.generate_header(message_type=CLIENT_AUDIO_ONLY_REQUEST,
                                 serial_method=NO_SERIALIZATION))
            task_request.extend(int(200).to_bytes(4, 'big'))
            task_request.extend(len(self.session_id).to_bytes(4, 'big'))
            task_request.extend(self.session_id.encode())
            payload_bytes = gzip.compress(audio_data)
            task_request.extend(len(payload_bytes).to_bytes(4, 'big'))
            task_request.extend(payload_bytes)
            await self.ws.send(task_request)
            print("📤 音频数据已发送")
            recognized_text = ""
            tts_audio = b""
            response_count = 0
            # 接收响应 - 使用与doubao_simple.py相同的解析逻辑
            audio_chunks = []
            max_responses = 30
            while response_count < max_responses:
                try:
                    response = await asyncio.wait_for(self.ws.recv(), timeout=30.0)
                    response_count += 1
                    parsed = self.parse_response(response)
                    if not parsed:
                        continue
                    print(f"📥 响应 {response_count}: message_type={parsed['message_type']}, event={parsed.get('event', 'N/A')}, size={parsed['payload_size']}")
                    # 处理不同类型的响应
                    if parsed['message_type'] == 11:  # SERVER_ACK - 可能包含音频
                        if 'data' in parsed and parsed['data_size'] > 0:
                            audio_chunks.append(parsed['data'])
                            print(f"收集到音频块: {parsed['data_size']} 字节")
                    elif parsed['message_type'] == 9:  # SERVER_FULL_RESPONSE
                        event = parsed.get('event', 0)
                        if event == 450:  # ASR开始
                            print("🎤 ASR处理开始")
                        elif event == 451:  # ASR结果
                            if 'json_data' in parsed and 'results' in parsed['json_data']:
                                text = parsed['json_data']['results'][0].get('text', '')
                                recognized_text = text
                                print(f"🧠 识别结果: {text}")
                        elif event == 459:  # ASR结束
                            print("✅ ASR处理结束")
                        elif event == 350:  # TTS开始
                            print("🎵 TTS生成开始")
                        elif event == 359:  # TTS结束
                            print("✅ TTS生成结束")
                            break
                        elif event == 550:  # TTS音频数据
                            if 'data' in parsed and parsed['data_size'] > 0:
                                # 检查是否是JSON（音频元数据）还是实际音频数据
                                try:
                                    json.loads(parsed['data'].decode('utf-8'))
                                    print("收到TTS音频元数据")
                                except:
                                    # 不是JSON，可能是音频数据
                                    audio_chunks.append(parsed['data'])
                                    print(f"收集到TTS音频块: {parsed['data_size']} 字节")
                except asyncio.TimeoutError:
                    print(f"⏰ 等待响应 {response_count + 1} 超时")
                    break
                except websockets.exceptions.ConnectionClosed:
                    print("🔌 连接已关闭")
                    break
            print(f"共收到 {response_count} 个响应，收集到 {len(audio_chunks)} 个音频块")
            # 合并音频数据
            if audio_chunks:
                tts_audio = b''.join(audio_chunks)
                print(f"合并后的音频数据: {len(tts_audio)} 字节")
            # 转换TTS音频格式（32位浮点 -> 16位整数）
            if tts_audio:
                # 检查是否是GZIP压缩数据
                try:
                    decompressed = gzip.decompress(tts_audio)
                    print(f"解压缩后音频数据: {len(decompressed)} 字节")
                    audio_to_write = decompressed
                except:
                    print("音频数据不是GZIP压缩格式，直接使用原始数据")
                    audio_to_write = tts_audio
                # 检查音频数据长度是否是4的倍数（32位浮点）
                if len(audio_to_write) % 4 != 0:
                    print(f"警告：音频数据长度 {len(audio_to_write)} 不是4的倍数，截断到最近的倍数")
                    audio_to_write = audio_to_write[:len(audio_to_write) // 4 * 4]
                # 将32位浮点转换为16位整数
                float_count = len(audio_to_write) // 4
                int16_data = bytearray(float_count * 2)
                for i in range(float_count):
                    # 读取32位浮点数（小端序）
                    float_value = struct.unpack('<f', audio_to_write[i*4:i*4+4])[0]
                    # 将浮点数限制在[-1.0, 1.0]范围内
                    float_value = max(-1.0, min(1.0, float_value))
                    # 转换为16位整数
                    int16_value = int(float_value * 32767)
                    # 写入16位整数（小端序）
                    int16_data[i*2:i*2+2] = struct.pack('<h', int16_value)
                tts_audio = bytes(int16_data)
                print(f"✅ 音频转换完成: {len(tts_audio)} 字节")
            return recognized_text, tts_audio
        except Exception as e:
            print(f"❌ 处理失败: {e}")
            import traceback
            traceback.print_exc()
            return "", b""
    async def send_silence_data(self, duration_ms=100) -> None:
        """发送静音数据保持连接活跃"""
        try:
            # 生成静音音频数据
            samples = int(16000 * duration_ms / 1000)  # 16kHz采样率
            silence_data = bytes(samples * 2)  # 16位PCM
            # 发送静音数据
            task_request = bytearray(
                self.generate_header(message_type=CLIENT_AUDIO_ONLY_REQUEST,
                                 serial_method=NO_SERIALIZATION))
            task_request.extend(int(200).to_bytes(4, 'big'))
            task_request.extend(len(self.session_id).to_bytes(4, 'big'))
            task_request.extend(self.session_id.encode())
            payload_bytes = gzip.compress(silence_data)
            task_request.extend(len(payload_bytes).to_bytes(4, 'big'))
            task_request.extend(payload_bytes)
            await self.ws.send(task_request)
            print("💓 发送心跳数据保持连接")
            # 简单处理响应（不等待完整响应）
            try:
                response = await asyncio.wait_for(self.ws.recv(), timeout=5.0)
                # 只确认收到响应，不处理内容
            except asyncio.TimeoutError:
                print("⚠️ 心跳响应超时")
            except websockets.exceptions.ConnectionClosed:
                print("❌ 心跳时连接已关闭")
                raise
        except Exception as e:
            print(f"❌ 发送心跳数据失败: {e}")
    async def close(self) -> None:
        """关闭连接"""
        if self.ws:
            try:
                await self.ws.close()
            except:
                pass
            print("🔌 连接已关闭")
 class VoiceChatRecorder:
    """语音聊天录音系统"""
    def __init__(self, enable_ai_chat=True):
        # 音频参数
        self.FORMAT = pyaudio.paInt16
        self.CHANNELS = 1
        self.RATE = 16000
        self.CHUNK_SIZE = 1024
        # 能量检测参数
        self.energy_threshold = 500
        self.silence_threshold = 2.0
        self.min_recording_time = 1.0
        self.max_recording_time = 20.0
        # 状态变量
        self.audio = None
        self.stream = None
        self.running = False
        self.recording = False
        self.recorded_frames = []
        self.recording_start_time = None
        self.last_sound_time = None
        self.energy_history = []
        self.zcr_history = []
        # AI聊天功能
        self.enable_ai_chat = enable_ai_chat
        self.doubao_client = None
        self.is_processing_ai = False
        self.heartbeat_thread = None
        self.last_heartbeat_time = time.time()
        self.heartbeat_interval = 10.0  # 每10秒发送一次心跳
        # 预录音缓冲区
        self.pre_record_buffer = []
        self.pre_record_max_frames = int(2.0 * self.RATE / self.CHUNK_SIZE)
        # 播放状态
        self.is_playing = False
        # ZCR检测参数
        self.consecutive_low_zcr_count = 0
        self.low_zcr_threshold_count = 15
        self.voice_activity_history = []
        self._setup_audio()
    def _setup_audio(self):
        """设置音频设备"""
        try:
            self.audio = pyaudio.PyAudio()
            self.stream = self.audio.open(
                format=self.FORMAT,
                channels=self.CHANNELS,
                rate=self.RATE,
                input=True,
                frames_per_buffer=self.CHUNK_SIZE
            )
            print("✅ 音频设备初始化成功")
        except Exception as e:
            print(f"❌ 音频设备初始化失败: {e}")
    def generate_silence_audio(self, duration_ms=100):
        """生成静音音频数据"""
        # 生成指定时长的静音音频（16位PCM，值为0）
        samples = int(self.RATE * duration_ms / 1000)
        silence_data = bytes(samples * 2)  # 16位 = 2字节每样本
        return silence_data
    def calculate_energy(self, audio_data):
        """计算音频能量"""
        if len(audio_data) == 0:
            return 0
        audio_array = np.frombuffer(audio_data, dtype=np.int16)
        rms = np.sqrt(np.mean(audio_array ** 2))
        if not self.recording:
            self.energy_history.append(rms)
            if len(self.energy_history) > 50:
                self.energy_history.pop(0)
        return rms
    def calculate_zero_crossing_rate(self, audio_data):
        """计算零交叉率"""
        if len(audio_data) == 0:
            return 0
        audio_array = np.frombuffer(audio_data, dtype=np.int16)
        zero_crossings = np.sum(np.diff(np.sign(audio_array)) != 0)
        zcr = zero_crossings / len(audio_array) * self.RATE
        self.zcr_history.append(zcr)
        if len(self.zcr_history) > 30:
            self.zcr_history.pop(0)
        return zcr
    def is_voice_active(self, energy, zcr):
        """使用ZCR进行语音活动检测"""
        # 16000Hz采样率下的语音ZCR范围
        zcr_condition = 2400 < zcr < 12000
        return zcr_condition
    def save_recording(self, audio_data, filename=None):
        """保存录音"""
        if filename is None:
            timestamp = time.strftime("%Y%m%d_%H%M%S")
            filename = f"recording_{timestamp}.wav"
        try:
            with wave.open(filename, 'wb') as wf:
                wf.setnchannels(self.CHANNELS)
                wf.setsampwidth(self.audio.get_sample_size(self.FORMAT))
                wf.setframerate(self.RATE)
                wf.writeframes(audio_data)
            print(f"✅ 录音已保存: {filename}")
            return True, filename
        except Exception as e:
            print(f"❌ 保存录音失败: {e}")
            return False, None
    def play_audio(self, filename):
        """播放音频文件"""
        try:
            # 停止当前录音
            if self.recording:
                self.recording = False
                self.recorded_frames = []
            # 关闭输入流
            if self.stream:
                self.stream.stop_stream()
                self.stream.close()
                self.stream = None
            self.is_playing = True
            time.sleep(0.2)
            # 使用系统播放器
            print(f"🔊 播放: {filename}")
            subprocess.run(['aplay', filename], check=True)
            print("✅ 播放完成")
        except Exception as e:
            print(f"❌ 播放失败: {e}")
        finally:
            self.is_playing = False
            time.sleep(0.2)
            self._setup_audio()
    def update_pre_record_buffer(self, audio_data):
        """更新预录音缓冲区"""
        self.pre_record_buffer.append(audio_data)
        if len(self.pre_record_buffer) > self.pre_record_max_frames:
            self.pre_record_buffer.pop(0)
    def start_recording(self):
        """开始录音"""
        print("🎙️ 检测到声音，开始录音...")
        self.recording = True
        self.recorded_frames = []
        self.recorded_frames.extend(self.pre_record_buffer)
        self.pre_record_buffer = []
        self.recording_start_time = time.time()
        self.last_sound_time = time.time()
        self.consecutive_low_zcr_count = 0
    def stop_recording(self):
        """停止录音"""
        if len(self.recorded_frames) > 0:
            audio_data = b''.join(self.recorded_frames)
            duration = len(audio_data) / (self.RATE * 2)
            print(f"📝 录音完成，时长: {duration:.2f}秒")
            if self.enable_ai_chat:
                # AI聊天模式
                self.process_with_ai(audio_data)
            else:
                # 普通录音模式
                success, filename = self.save_recording(audio_data)
                if success and filename:
                    print("=" * 50)
                    print("🔊 播放刚才录制的音频...")
                    self.play_audio(filename)
                    print("=" * 50)
        self.recording = False
        self.recorded_frames = []
        self.recording_start_time = None
        self.last_sound_time = None
    def process_with_ai(self, audio_data):
        """使用AI处理录音"""
        if self.is_processing_ai:
            print("⏳ AI正在处理中，请稍候...")
            return
        self.is_processing_ai = True
        # 在新线程中处理AI
        ai_thread = threading.Thread(target=self._ai_processing_thread, args=(audio_data,))
        ai_thread.daemon = True
        ai_thread.start()
    def _heartbeat_thread(self):
        """心跳线程 - 定期发送静音数据保持连接活跃"""
        while self.running and self.doubao_client and self.doubao_client.ws:
            current_time = time.time()
            if current_time - self.last_heartbeat_time >= self.heartbeat_interval:
                try:
                    # 异步发送心跳数据
                    loop = asyncio.new_event_loop()
                    asyncio.set_event_loop(loop)
                    try:
                        loop.run_until_complete(self.doubao_client.send_silence_data())
                        self.last_heartbeat_time = current_time
                    except Exception as e:
                        print(f"❌ 心跳失败: {e}")
                        # 如果心跳失败，可能需要重新连接
                        break
                    finally:
                        loop.close()
                except Exception as e:
                    print(f"❌ 心跳线程异常: {e}")
                    break
            # 睡眠一段时间
            time.sleep(1.0)
        print("📡 心跳线程结束")
    def _ai_processing_thread(self, audio_data):
        """AI处理线程"""
        try:
            print("🤖 开始AI处理...")
            print("🧠 正在进行语音识别...")
            # 异步处理
            loop = asyncio.new_event_loop()
            asyncio.set_event_loop(loop)
            try:
                # 连接豆包
                self.doubao_client = DoubaoClient()
                loop.run_until_complete(self.doubao_client.connect())
                # 启动心跳线程
                self.last_heartbeat_time = time.time()
                self.heartbeat_thread = threading.Thread(target=self._heartbeat_thread)
                self.heartbeat_thread.daemon = True
                self.heartbeat_thread.start()
                print("💓 心跳线程已启动")
                # 语音识别和TTS回复
                recognized_text, tts_audio = loop.run_until_complete(
                    self.doubao_client.process_audio(audio_data)
                )
                if recognized_text:
                    print(f"🗣️ 你说: {recognized_text}")
                if tts_audio:
                    # 保存TTS音频
                    tts_filename = "ai_response.wav"
                    with wave.open(tts_filename, 'wb') as wav_file:
                        wav_file.setnchannels(1)
                        wav_file.setsampwidth(2)
                        wav_file.setframerate(24000)
                        wav_file.writeframes(tts_audio)
                    print("🎵 AI回复生成完成")
                    print("=" * 50)
                    print("🔊 播放AI回复...")
                    self.play_audio(tts_filename)
                    print("=" * 50)
                else:
                    print("❌ 未收到AI回复")
                # 等待一段时间再关闭连接，以便心跳继续工作
                print("⏳ 等待5秒后关闭连接...")
                time.sleep(5)
            except Exception as e:
                print(f"❌ AI处理失败: {e}")
            finally:
                # 停止心跳线程
                if self.heartbeat_thread and self.heartbeat_thread.is_alive():
                    print("🛑 停止心跳线程")
                    self.heartbeat_thread = None
                # 关闭连接
                if self.doubao_client:
                    loop.run_until_complete(self.doubao_client.close())
                loop.close()
        except Exception as e:
            print(f"❌ AI处理线程失败: {e}")
        finally:
            self.is_processing_ai = False
    def run(self):
        """运行语音聊天系统"""
        if not self.stream:
            print("❌ 音频设备未初始化")
            return
        self.running = True
        if self.enable_ai_chat:
            print("🤖 语音聊天AI助手")
            print("=" * 50)
            print("🎯 功能特点:")
            print("- 🎙️ 智能语音检测")
            print("- 🧠 豆包AI语音识别")
            print("- 🗣️ AI智能回复")
            print("- 🔊 TTS语音播放")
            print("- 🔄 实时对话")
            print("=" * 50)
            print("📖 使用说明:")
            print("- 说话自动录音")
            print("- 静音2秒结束录音")
            print("- AI自动识别并回复")
            print("- 按 Ctrl+C 退出")
            print("=" * 50)
        else:
            print("🎙️ 智能录音系统")
            print("=" * 50)
            print("📖 使用说明:")
            print("- 说话自动录音")
            print("- 静音2秒结束录音")
            print("- 录音完成后自动播放")
            print("- 按 Ctrl+C 退出")
            print("=" * 50)
        try:
            while self.running:
                # 如果正在播放AI回复，跳过音频处理
                if self.is_playing or self.is_processing_ai:
                    status = "🤖 AI处理中..."
                    print(f"\r{status}", end='', flush=True)
                    time.sleep(0.1)
                    continue
                # 读取音频数据
                data = self.stream.read(self.CHUNK_SIZE, exception_on_overflow=False)
                if len(data) == 0:
                    continue
                # 计算能量和ZCR
                energy = self.calculate_energy(data)
                zcr = self.calculate_zero_crossing_rate(data)
                if self.recording:
                    # 录音模式
                    self.recorded_frames.append(data)
                    recording_duration = time.time() - self.recording_start_time
                    # 检测语音活动
                    if self.is_voice_active(energy, zcr):
                        self.last_sound_time = time.time()
                        self.consecutive_low_zcr_count = 0
                    else:
                        self.consecutive_low_zcr_count += 1
                    # 检查是否应该结束录音
                    should_stop = False
                    # ZCR静音检测
                    if self.consecutive_low_zcr_count >= self.low_zcr_threshold_count:
                        should_stop = True
                    # 时间静音检测
                    if not should_stop and time.time() - self.last_sound_time > self.silence_threshold:
                        should_stop = True
                    # 执行停止录音
                    if should_stop and recording_duration >= self.min_recording_time:
                        print(f"\n🔇 检测到静音，结束录音")
                        self.stop_recording()
                    # 检查最大录音时间
                    if recording_duration > self.max_recording_time:
                        print(f"\n⏰ 达到最大录音时间")
                        self.stop_recording()
                    # 显示录音状态
                    is_voice = self.is_voice_active(energy, zcr)
                    zcr_count = f"{self.consecutive_low_zcr_count}/{self.low_zcr_threshold_count}"
                    status = f"录音中... {recording_duration:.1f}s | ZCR: {zcr:.0f} | 语音: {is_voice} | 静音计数: {zcr_count}"
                    print(f"\r{status}", end='', flush=True)
                else:
                    # 监听模式
                    self.update_pre_record_buffer(data)
                    if self.is_voice_active(energy, zcr):
                        # 检测到声音，开始录音
                        self.start_recording()
                    else:
                        # 显示监听状态
                        is_voice = self.is_voice_active(energy, zcr)
                        buffer_usage = len(self.pre_record_buffer) / self.pre_record_max_frames * 100
                        status = f"监听中... ZCR: {zcr:.0f} | 语音: {is_voice} | 缓冲: {buffer_usage:.0f}%"
                        print(f"\r{status}", end='', flush=True)
                time.sleep(0.01)
        except KeyboardInterrupt:
            print("\n👋 退出")
        except Exception as e:
            print(f"❌ 错误: {e}")
        finally:
            self.stop()
    def stop(self):
        """停止系统"""
        self.running = False
        # 停止心跳线程
        if self.heartbeat_thread and self.heartbeat_thread.is_alive():
            print("🛑 停止心跳线程")
            self.heartbeat_thread = None
        if self.recording:
            self.stop_recording()
        if self.stream:
            self.stream.stop_stream()
            self.stream.close()
        if self.audio:
            self.audio.terminate()
        # 关闭AI连接
        if self.doubao_client and self.doubao_client.ws:
            try:
                loop = asyncio.new_event_loop()
                asyncio.set_event_loop(loop)
                loop.run_until_complete(self.doubao_client.close())
                loop.close()
            except:
                pass
 def main():
    """主函数"""
    import argparse
    parser = argparse.ArgumentParser(description='语音聊天AI助手')
    parser.add_argument('--no-ai', action='store_true', help='禁用AI功能，仅录音')
    args = parser.parse_args()
    enable_ai = not args.no_ai
    if enable_ai:
        print("🚀 语音聊天AI助手")
    else:
        print("🚀 智能录音系统")
    print("=" * 50)
    # 创建语音聊天系统
    recorder = VoiceChatRecorder(enable_ai_chat=enable_ai)
    print("✅ 系统初始化成功")
    print("=" * 50)
    # 开始运行
    recorder.run()
 if __name__ == "__main__":
    main()