config

2025-09-18 21:21:34 +08:00 · 2025-09-18 21:21:34 +08:00 · d4ff3fd774
commit d4ff3fd774
parent 711df77d38
5 changed files with 306 additions and 965 deletions
--- a/test_audio.py
+++ b/test_audio.py
@ -1,101 +0,0 @@
-#!/usr/bin/env python3
-"""
-简单的音频测试脚本，用于诊断树莓派上的音频问题
-"""
-
-import pyaudio
-import time
-import os
-
-def test_audio():
-    """测试音频设备"""
-    print("=== 音频设备测试 ===")
-    
-    pa = pyaudio.PyAudio()
-    
-    # 列出所有设备
-    print("\n可用的音频设备:")
-    for i in range(pa.get_device_count()):
-        info = pa.get_device_info_by_index(i)
-        print(f"  设备 {i}: {info['name']}")
-        print(f"    输入通道: {info['maxInputChannels']}")
-        print(f"    输出通道: {info['maxOutputChannels']}")
-        print(f"    默认采样率: {info['defaultSampleRate']}")
-        print()
-    
-    # 查找默认输入设备
-    default_input = pa.get_default_input_device_info()
-    print(f"默认输入设备: {default_input['name']} (索引: {default_input['index']})")
-    
-    # 查找默认输出设备
-    default_output = pa.get_default_output_device_info()
-    print(f"默认输出设备: {default_output['name']} (索引: {default_output['index']})")
-    
-    pa.terminate()
-
-def test_recording():
-    """测试录音功能"""
-    print("\n=== 录音测试 ===")
-    
-    pa = pyaudio.PyAudio()
-    
-    try:
-        # 设置录音参数
-        FORMAT = pyaudio.paInt16
-        CHANNELS = 1
-        RATE = 16000  # 降低采样率，使用设备默认的44100
-        CHUNK = 1024
-        
-        print(f"尝试打开音频流，采样率: {RATE}")
-        
-        # 打开音频流
-        stream = pa.open(
-            format=FORMAT,
-            channels=CHANNELS,
-            rate=RATE,
-            input=True,
-            frames_per_buffer=CHUNK
-        )
-        
-        print("开始录音5秒...")
-        frames = []
-        
-        # 录音5秒
-        for i in range(0, int(RATE / CHUNK * 5)):
-            data = stream.read(CHUNK)
-            frames.append(data)
-            if i % 10 == 0:
-                print(f"录音中... {i * CHUNK / RATE:.1f}秒")
-        
-        print("录音完成")
-        
-        # 停止流
-        stream.stop_stream()
-        stream.close()
-        
-        # 播放录音
-        print("播放录音...")
-        stream = pa.open(
-            format=FORMAT,
-            channels=CHANNELS,
-            rate=RATE,
-            output=True
-        )
-        
-        for frame in frames:
-            stream.write(frame)
-        
-        stream.stop_stream()
-        stream.close()
-        
-        print("播放完成")
-        
-    except Exception as e:
-        print(f"录音测试失败: {e}")
-    
-    finally:
-        pa.terminate()
-
-if __name__ == "__main__":
-    test_audio()
-    test_recording()
--- a/test_audio_playback.py
+++ b/test_audio_playback.py
@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+"""
+音频播放测试脚本
+用于测试树莓派的音频播放功能
+"""
+
+import subprocess
+import time
+import sys
+import os
+
+def test_audio_playback():
+    """测试音频播放功能"""
+    print("=== 音频播放测试 ===")
+    
+    # 检查音频设备
+    print("\n1. 检查音频设备...")
+    try:
+        result = subprocess.run(['aplay', '-l'], capture_output=True, text=True)
+        if result.returncode == 0:
+            print("音频设备列表:")
+            print(result.stdout)
+        else:
+            print("错误: 无法获取音频设备列表")
+            return False
+    except FileNotFoundError:
+        print("错误: aplay 命令未找到，请安装 alsa-utils")
+        return False
+    
+    # 测试播放系统声音
+    print("\n2. 测试播放系统提示音...")
+    try:
+        # 使用系统内置的测试声音
+        result = subprocess.run(['speaker-test', '-t', 'sine', '-f', '440', '-l', '1'], 
+                              capture_output=True, text=True, timeout=5)
+        if result.returncode == 0:
+            print("✓ 系统提示音播放成功")
+        else:
+            print("✗ 系统提示音播放失败")
+            return False
+    except (subprocess.TimeoutExpired, FileNotFoundError):
+        print("提示: speaker-test 测试跳过，尝试直接播放音频文件")
+    
+    # 创建测试音频文件并播放
+    print("\n3. 创建并播放测试音频文件...")
+    test_audio_file = "/tmp/test_audio.wav"
+    
+    # 使用sox生成测试音频（如果可用）
+    if os.path.exists("/usr/bin/sox"):
+        try:
+            subprocess.run(['sox', '-n', '-r', '44100', '-c', '2', test_audio_file, 
+                          'synth', '3', 'sine', '440'], check=True)
+            print("✓ 测试音频文件创建成功")
+        except (subprocess.CalledProcessError, FileNotFoundError):
+            print("无法创建测试音频文件，跳过文件播放测试")
+            return True
+    else:
+        print("sox 未安装，跳过文件播放测试")
+        return True
+    
+    # 播放测试音频文件
+    try:
+        result = subprocess.run(['aplay', test_audio_file], capture_output=True, text=True)
+        if result.returncode == 0:
+            print("✓ 音频文件播放成功")
+            return True
+        else:
+            print("✗ 音频文件播放失败")
+            print(f"错误信息: {result.stderr}")
+            return False
+    except FileNotFoundError:
+        print("错误: aplay 命令未找到")
+        return False
+    finally:
+        # 清理测试文件
+        if os.path.exists(test_audio_file):
+            os.remove(test_audio_file)
+
+def check_volume():
+    """检查并设置音量"""
+    print("\n4. 检查音量设置...")
+    try:
+        result = subprocess.run(['amixer', 'sget', 'Master'], capture_output=True, text=True)
+        if result.returncode == 0:
+            print("当前音量设置:")
+            print(result.stdout)
+            
+            # 设置音量到80%
+            subprocess.run(['amixer', 'sset', 'Master', '80%'], check=True)
+            print("✓ 音量已设置为80%")
+            return True
+        else:
+            print("无法获取音量信息")
+            return False
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        print("amixer 命令未找到或执行失败")
+        return False
+
+if __name__ == "__main__":
+    print("树莓派音频播放功能测试")
+    print("=" * 40)
+    
+    success = True
+    
+    # 检查音量
+    if not check_volume():
+        success = False
+    
+    # 测试音频播放
+    if not test_audio_playback():
+        success = False
+    
+    print("\n" + "=" * 40)
+    if success:
+        print("✓ 所有音频播放测试通过")
+        sys.exit(0)
+    else:
+        print("✗ 部分音频播放测试失败")
+        sys.exit(1)
--- a/test_audio_recording.py
+++ b/test_audio_recording.py
@ -0,0 +1,187 @@
+#!/usr/bin/env python3
+"""
+音频录音测试脚本
+用于测试树莓派的音频录音功能
+"""
+
+import subprocess
+import time
+import sys
+import os
+import signal
+
+def test_audio_recording():
+    """测试音频录音功能"""
+    print("=== 音频录音测试 ===")
+    
+    # 检查录音设备
+    print("\n1. 检查录音设备...")
+    try:
+        result = subprocess.run(['arecord', '-l'], capture_output=True, text=True)
+        if result.returncode == 0:
+            print("录音设备列表:")
+            print(result.stdout)
+        else:
+            print("错误: 无法获取录音设备列表")
+            return False
+    except FileNotFoundError:
+        print("错误: arecord 命令未找到，请安装 alsa-utils")
+        return False
+    
+    # 录制测试音频
+    print("\n2. 录制测试音频（5秒）...")
+    test_record_file = "/tmp/test_record.wav"
+    
+    try:
+        print("请对着麦克风说话（5秒录音开始）...")
+        
+        # 录制5秒音频
+        result = subprocess.run(['arecord', '-d', '5', '-f', 'cd', test_record_file], 
+                              capture_output=True, text=True)
+        
+        if result.returncode == 0:
+            print("✓ 音频录制成功")
+            
+            # 检查文件是否存在且大小合理
+            if os.path.exists(test_record_file):
+                file_size = os.path.getsize(test_record_file)
+                print(f"录制文件大小: {file_size} 字节")
+                
+                if file_size > 1000:  # 至少1KB
+                    print("✓ 录音文件大小正常")
+                    return True
+                else:
+                    print("✗ 录音文件太小，可能录音失败")
+                    return False
+            else:
+                print("✗ 录音文件未创建")
+                return False
+        else:
+            print("✗ 音频录制失败")
+            print(f"错误信息: {result.stderr}")
+            return False
+            
+    except FileNotFoundError:
+        print("错误: arecord 命令未找到")
+        return False
+    except KeyboardInterrupt:
+        print("\n录音被用户中断")
+        return False
+
+def test_audio_playback_verification():
+    """播放录制的音频进行验证"""
+    print("\n3. 播放录制的音频进行验证...")
+    test_record_file = "/tmp/test_record.wav"
+    
+    if not os.path.exists(test_record_file):
+        print("错误: 找不到录制的音频文件")
+        return False
+    
+    try:
+        print("播放录制的音频...")
+        result = subprocess.run(['aplay', test_record_file], capture_output=True, text=True)
+        
+        if result.returncode == 0:
+            print("✓ 录音播放成功")
+            return True
+        else:
+            print("✗ 录音播放失败")
+            print(f"错误信息: {result.stderr}")
+            return False
+            
+    except FileNotFoundError:
+        print("错误: aplay 命令未找到")
+        return False
+
+def test_microphone_levels():
+    """测试麦克风音量级别"""
+    print("\n4. 测试麦克风音量级别...")
+    
+    try:
+        # 获取麦克风音量
+        result = subprocess.run(['amixer', 'sget', 'Capture'], capture_output=True, text=True)
+        
+        if result.returncode == 0:
+            print("当前麦克风音量:")
+            print(result.stdout)
+            
+            # 设置麦克风音量
+            subprocess.run(['amixer', 'sset', 'Capture', '80%'], check=True)
+            print("✓ 麦克风音量已设置为80%")
+            return True
+        else:
+            print("无法获取麦克风音量信息")
+            return False
+            
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        print("amixer 命令未找到或执行失败")
+        return False
+
+def test_realtime_monitoring():
+    """实时音频监控测试"""
+    print("\n5. 实时音频监控测试（3秒）...")
+    
+    try:
+        print("开始实时监控，请对着麦克风说话...")
+        
+        # 使用parecord进行实时监控（如果可用）
+        cmd = ['parecord', '--monitor', '--latency-msec', '100', '--duration', '3', '/dev/null']
+        
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=5)
+        
+        if result.returncode == 0:
+            print("✓ 实时监控测试成功")
+            return True
+        else:
+            print("提示: 实时监控测试跳过（需要pulseaudio）")
+            return True
+            
+    except (subprocess.TimeoutExpired, FileNotFoundError, subprocess.CalledProcessError):
+        print("提示: 实时监控测试跳过")
+        return True
+
+def cleanup():
+    """清理测试文件"""
+    test_files = ["/tmp/test_record.wav"]
+    
+    for file_path in test_files:
+        if os.path.exists(file_path):
+            try:
+                os.remove(file_path)
+                print(f"✓ 已清理测试文件: {file_path}")
+            except OSError:
+                print(f"警告: 无法清理测试文件: {file_path}")
+
+if __name__ == "__main__":
+    print("树莓派音频录音功能测试")
+    print("=" * 40)
+    
+    success = True
+    
+    # 测试麦克风音量
+    if not test_microphone_levels():
+        success = False
+    
+    # 测试音频录制
+    if not test_audio_recording():
+        success = False
+    
+    # 播放录制的音频
+    if os.path.exists("/tmp/test_record.wav"):
+        if not test_audio_playback_verification():
+            success = False
+    
+    # 实时监控测试
+    if not test_realtime_monitoring():
+        success = False
+    
+    print("\n" + "=" * 40)
+    if success:
+        print("✓ 所有音频录音测试通过")
+    else:
+        print("✗ 部分音频录音测试失败")
+    
+    # 清理测试文件
+    cleanup()
+    
+    sys.exit(0 if success else 1)
--- a/voice_assistant_fixed.py
+++ b/voice_assistant_fixed.py
@ -1,483 +0,0 @@
-#!/usr/bin/env python3
-"""
-Voice Assistant: Real-Time Voice Chat
-
-This app runs on a Raspberry Pi (or Linux desktop) and creates a low-latency, full-duplex voice interaction
-with an AI character. It uses local speech recognition
-(Vosk), local text-to-speech synthesis (Piper), and a locally hosted large language model via Ollama.
-
-Key Features:
- Wake-free, continuous voice recognition with real-time transcription
- LLM-driven responses streamed from a selected local model (e.g., LLaMA, Qwen, Gemma)
- Audio response synthesis with a gruff custom voice using ONNX-based Piper models
- Optional noise mixing and filtering via SoX
- System volume control via ALSA
- Modular and responsive design suitable for low-latency, character-driven agents
-
-Ideal for embedded voice AI demos, cosplay companions, or standalone AI characters.
-
-Copyright: M15.ai
-License: MIT
-"""
-
-import io
-import json
-import os
-import queue
-import re
-import subprocess
-import threading
-import time
-import wave
-
-import numpy as np
-import pyaudio
-import requests
-import soxr
-from pydub import AudioSegment
-from vosk import KaldiRecognizer, Model
-
-
-# ------------------- TIMING UTILITY -------------------
-class Timer:
-    def __init__(self, label):
-        self.label = label
-        self.enabled = True
-    def __enter__(self):
-        self.start = time.time()
-        return self
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        if self.enabled:
-            elapsed_ms = (time.time() - self.start) * 1000
-            print(f"[Timing] {self.label}: {elapsed_ms:.0f} ms")
-    def disable(self):
-        self.enabled = False
-
-# ------------------- FUNCTIONS -------------------
-
-def get_input_device_index(preferred_name="default"):
-    pa = pyaudio.PyAudio()
-    index = None
-    for i in range(pa.get_device_count()):
-        info = pa.get_device_info_by_index(i)
-        if preferred_name.lower() in info['name'].lower() and info['maxInputChannels'] > 0:
-            print(f"[Debug] Selected input device {i}: {info['name']}")
-            print(f"[Debug] Device sample rate: {info['defaultSampleRate']} Hz")
-            index = i
-            break
-    pa.terminate()
-    if index is None:
-        print("[Warning] Preferred mic not found. Using default.")
-        return None
-    return index
-
-def get_output_device_index(preferred_name="default"):
-    pa = pyaudio.PyAudio()
-    index = None
-    for i in range(pa.get_device_count()):
-        info = pa.get_device_info_by_index(i)
-        if preferred_name.lower() in info['name'].lower() and info['maxOutputChannels'] > 0:
-            print(f"[Debug] Selected output device {i}: {info['name']}")
-            index = i
-            break
-    pa.terminate()
-    if index is None:
-        print("[Warning] Preferred output device not found. Using default.")
-        return None
-    return index
-
-def parse_card_number(device_str):
-    """
-    Extract ALSA card number from string like 'plughw:3,0'
-    """
-    try:
-        return int(device_str.split(":")[1].split(",")[0])
-    except Exception as e:
-        print(f"[Warning] Could not parse card number from {device_str}: {e}")
-        return 0  # fallback
-
-def list_input_devices():
-    pa = pyaudio.PyAudio()
-    print("[Debug] Available input devices:")
-    for i in range(pa.get_device_count()):
-        info = pa.get_device_info_by_index(i)
-        if info['maxInputChannels'] > 0:
-            print(f"  {i}: {info['name']} ({int(info['defaultSampleRate'])} Hz, {info['maxInputChannels']}ch)")
-    pa.terminate()
-
-def resample_audio(data, orig_rate=48000, target_rate=16000):
-    # Convert byte string to numpy array
-    audio_np = np.frombuffer(data, dtype=np.int16)
-    # Resample using soxr
-    resampled_np = soxr.resample(audio_np, orig_rate, target_rate)
-    # Convert back to bytes
-    return resampled_np.astype(np.int16).tobytes()
-
-def set_output_volume(volume_level, card_id=0):
-    """
-    Set output volume using ALSA 'Speaker' control on specified card.
-    volume_level: 1–10 (user scale)
-    card_id: ALSA card number (from aplay -l)
-    """
-    percent = max(1, min(volume_level, 10)) * 10  # map to 10–100%
-    try:
-        subprocess.run(
-            ['amixer', '-c', str(card_id), 'sset', 'Speaker', str(percent) + '%'],
-            check=True,
-            stdout=subprocess.DEVNULL,
-            stderr=subprocess.DEVNULL
-        )
-        print(f"[Debug] Volume set to {percent}% on card {card_id}")
-    except Exception as e:
-        print(f"[Warning] Volume control failed on card {card_id}: {e}")
-
-# ------------------- PATHS -------------------
-
-CONFIG_PATH = os.path.expanduser("va_config.json")
-BASE_DIR = os.path.dirname(__file__)
-MODEL_PATH = os.path.join(BASE_DIR, 'vosk-model')
-CHAT_URL = 'https://open.bigmodel.cn/api/paas/v4/chat/completions'
-AUTH_TOKEN = '0c9cbaca9d2bbf864990f1e1decdf340.dXRMsZCHTUbPQ0rm'  # Replace with your actual token
-
-# ------------------- CONFIG FILE LOADING -------------------
-
-DEFAULT_CONFIG = {
-    "volume": 9,
-    "mic_name": "default",
-    "audio_output_device": "default",
-    "model_name": "glm-4.5",
-    "voice": "en_US-kathleen-low.onnx",
-    "enable_audio_processing": False,
-    "history_length": 4,
-    "system_prompt": "You are a helpful assistant."
-}
-
-def load_config():
-    # Load config from system file or fall back to defaults
-    if os.path.isfile(CONFIG_PATH):
-        try:
-            with open(CONFIG_PATH, 'r') as f:
-                user_config = json.load(f)
-            return {**DEFAULT_CONFIG, **user_config}  # merge with defaults
-        except Exception as e:
-            print(f"[Warning] Failed to load system config: {e}")
-
-    print("[Debug] Using default config.")
-    return DEFAULT_CONFIG
-
-config = load_config()
-
-# Apply loaded config values
-VOLUME = config["volume"]
-MIC_NAME = config["mic_name"]
-AUDIO_OUTPUT_DEVICE = config["audio_output_device"]
-AUDIO_OUTPUT_DEVICE_INDEX = get_output_device_index(config["audio_output_device"])
-OUTPUT_CARD = parse_card_number(AUDIO_OUTPUT_DEVICE) if AUDIO_OUTPUT_DEVICE else 0
-MODEL_NAME = config["model_name"]
-VOICE_MODEL = os.path.join("voices", config["voice"])
-ENABLE_AUDIO_PROCESSING = config["enable_audio_processing"]
-HISTORY_LENGTH = config["history_length"]
-
-# Set system volume
-set_output_volume(VOLUME, OUTPUT_CARD)
-
-# Setup messages with system prompt
-messages = [{"role": "system", "content": config["system_prompt"]}]
-
-list_input_devices()
-RATE = 48000
-CHUNK = 1024
-CHANNELS = 1
-mic_enabled = True
-DEVICE_INDEX = get_input_device_index()
-
-# SOUND EFFECTS
-NOISE_LEVEL = '0.04'
-BANDPASS_HIGHPASS = '300'
-BANDPASS_LOWPASS = '800'
-
-# ------------------- VOICE MODEL -------------------
-
-VOICE_MODELS_DIR = os.path.join(BASE_DIR, 'voices')
-if not os.path.isdir(VOICE_MODELS_DIR):
-    os.makedirs(VOICE_MODELS_DIR)
-
-VOICE_MODEL = os.path.join(VOICE_MODELS_DIR, config["voice"])
-
-print('[Debug] Available Piper voices:')
-for f in os.listdir(VOICE_MODELS_DIR):
-    if f.endswith('.onnx'):
-        print('  ', f)
-print(f'[Debug] Using VOICE_MODEL: {VOICE_MODEL}')
-print(f"[Debug] Config loaded: model={MODEL_NAME}, voice={config['voice']}, vol={VOLUME}, mic={MIC_NAME}")
-
-# ------------------- CONVERSATION STATE -------------------
-
-audio_queue = queue.Queue()
-
-# Audio callback form Shure
-def audio_callback(in_data, frame_count, time_info, status):
-    global mic_enabled
-    if not mic_enabled:
-        return (None, pyaudio.paContinue)
-    resampled_data = resample_audio(in_data, orig_rate=48000, target_rate=16000)
-    audio_queue.put(resampled_data)
-    return (None, pyaudio.paContinue)
-
-# ------------------- STREAM SETUP -------------------
-
-def start_stream():
-    pa = pyaudio.PyAudio()
-
-    stream = pa.open(
-        rate=RATE,
-        format=pyaudio.paInt16,
-        channels=CHANNELS,
-        input=True,
-        input_device_index=DEVICE_INDEX,
-        frames_per_buffer=CHUNK,
-        stream_callback=audio_callback
-    )
-    stream.start_stream()
-    print(f'[Debug] Stream @ {RATE}Hz')
-    return pa, stream
-
-# ------------------- QUERY GLM API -------------------
-
-def query_glm():
-    headers = {
-        'Authorization': 'Bearer ' + AUTH_TOKEN,
-        'Content-Type': 'application/json'
-    }
-    payload = {
-        "model": "glm-4.5",
-        "messages": [messages[0]] + messages[-HISTORY_LENGTH:],  # force system prompt at top
-        "temperature": 0.6,
-        "max_tokens": 1024,
-        "stream": False
-    }
-
-    with Timer("Inference"):  # measure inference latency
-        try:
-            resp = requests.post(CHAT_URL, json=payload, headers=headers)
-            resp.raise_for_status()  # Raise exception for HTTP errors
-        except requests.exceptions.RequestException as e:
-            print(f"[Error] GLM API request failed: {e}")
-            return ''
-    
-    data = resp.json()
-    # Extract assistant message
-    reply = ''
-    if 'choices' in data and len(data['choices']) > 0:
-        choice = data['choices'][0]
-        if 'message' in choice and 'content' in choice['message']:
-            reply = choice['message']['content'].strip()
-    return reply
-
-# ------------------- TTS & DEGRADATION -------------------
-
-import tempfile
-
-def play_response(text):
-    import io
-    import tempfile
-
-    # Mute the mic during playback to avoid feedback loop
-    global mic_enabled
-    mic_enabled = False  # 🔇 mute mic
-
-    # clean the response
-    clean = re.sub(r"[\*]+", '', text)                # remove asterisks
-    clean = re.sub(r"\(.*?\)", '', clean)             # remove (stage directions)
-    clean = re.sub(r"<.*?>", '', clean)               # remove HTML-style tags
-    clean = clean.replace('\n', ' ').strip()          # normalize newlines
-    clean = re.sub(r'\s+', ' ', clean)                # collapse whitespace
-    clean = re.sub(r'[\U0001F300-\U0001FAFF\u2600-\u26FF\u2700-\u27BF]+', '', clean)  # remove emojis
-
-    piper_path = os.path.join(BASE_DIR, 'bin', 'piper', 'piper')
-
-    # 1. Generate Piper raw PCM
-    with Timer("Piper inference"):
-        try:
-            piper_proc = subprocess.Popen(
-                [piper_path, '--model', VOICE_MODEL, '--output_raw'],
-                stdin=subprocess.PIPE,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.DEVNULL
-            )
-            tts_pcm, _ = piper_proc.communicate(input=clean.encode())
-        except Exception as e:
-            print(f"[Error] Piper TTS failed: {e}")
-            return
-
-    if ENABLE_AUDIO_PROCESSING:
-        # SoX timing consolidation
-        sox_start = time.time()
-
-        # 2. Convert raw PCM to WAV
-        pcm_to_wav = subprocess.Popen(
-            ['sox', '-t', 'raw', '-r', '16000', '-c', str(CHANNELS), '-b', '16',
-            '-e', 'signed-integer', '-', '-t', 'wav', '-'],
-            stdin=subprocess.PIPE,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.DEVNULL
-        )
-        tts_wav_16k, _ = pcm_to_wav.communicate(input=tts_pcm)
-
-        # 3. Estimate duration
-        duration_sec = len(tts_pcm) / (RATE * 2)
-
-        # 4. Generate white noise WAV bytes
-        noise_bytes = subprocess.check_output([
-            'sox', '-n',
-            '-r', '16000',
-            '-c', str(CHANNELS),
-            '-b', '16',
-            '-e', 'signed-integer',
-            '-t', 'wav', '-',
-            'synth', str(duration_sec),
-            'whitenoise', 'vol', NOISE_LEVEL
-        ], stderr=subprocess.DEVNULL)
-
-        # 5. Write both to temp files & mix
-        with tempfile.NamedTemporaryFile(suffix='.wav') as tts_file, tempfile.NamedTemporaryFile(suffix='.wav') as noise_file:
-            tts_file.write(tts_wav_16k)
-            noise_file.write(noise_bytes)
-            tts_file.flush()
-            noise_file.flush()
-            mixer = subprocess.Popen(
-                ['sox', '-m', tts_file.name, noise_file.name, '-t', 'wav', '-'],
-                stdout=subprocess.PIPE,
-                stderr=subprocess.DEVNULL
-            )
-            mixed_bytes, _ = mixer.communicate()
-
-        # 6. Apply filter
-        filter_proc = subprocess.Popen(
-            #['sox', '-t', 'wav', '-', '-t', 'wav', '-', 'highpass', BANDPASS_HIGHPASS, 'lowpass', BANDPASS_LOWPASS],
-            ['sox', '-t', 'wav', '-', '-r', '48000', '-t', 'wav', '-',
-             'highpass', BANDPASS_HIGHPASS, 'lowpass', BANDPASS_LOWPASS],
-            stdin=subprocess.PIPE,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.DEVNULL
-        )
-        final_bytes, _ = filter_proc.communicate(input=mixed_bytes)
-
-        sox_elapsed = (time.time() - sox_start) * 1000
-        print(f"[Timing] SoX (total): {int(sox_elapsed)} ms")
-    
-    else:
-        # No FX: just convert raw PCM to WAV
-        pcm_to_wav = subprocess.Popen(
-            ['sox', '-t', 'raw', '-r', '16000', '-c', str(CHANNELS), '-b', '16',
-             '-e', 'signed-integer', '-', '-t', 'wav', '-'],
-            stdin=subprocess.PIPE,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.DEVNULL
-        )
-        tts_wav_16k, _ = pcm_to_wav.communicate(input=tts_pcm)
-
-        resample_proc = subprocess.Popen(
-            ['sox', '-t', 'wav', '-', '-r', '48000', '-t', 'wav', '-'],
-            stdin=subprocess.PIPE,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.DEVNULL
-        )
-        final_bytes, _ = resample_proc.communicate(input=tts_wav_16k)
-
-    # 7. Playback
-    with Timer("Playback"):
-        try:
-            wf = wave.open(io.BytesIO(final_bytes), 'rb')
-
-            pa = pyaudio.PyAudio()
-            stream = pa.open(
-                format=pa.get_format_from_width(wf.getsampwidth()),
-                channels=wf.getnchannels(),
-                rate=wf.getframerate(),
-                output=True,
-                output_device_index=AUDIO_OUTPUT_DEVICE_INDEX
-            )
-
-            data = wf.readframes(CHUNK)
-            while data:
-                stream.write(data)
-                data = wf.readframes(CHUNK)
-
-            stream.stop_stream()
-            stream.close()
-            pa.terminate()
-            wf.close()
-
-        except wave.Error as e:
-            print(f"[Error] Could not open final WAV: {e}")
-        
-        finally:
-            mic_enabled = True      # 🔊 unmute mic
-            time.sleep(0.3)         # optional: small cooldown
-
-
-# ------------------- PROCESSING LOOP -------------------
-
-def processing_loop():
-    try:
-        model = Model(MODEL_PATH)
-    except Exception as e:
-        print(f"[Error] Failed to load Vosk model: {e}")
-        print(f"[Info] Model path: {MODEL_PATH}")
-        return
-    
-    #rec = KaldiRecognizer(model, RATE)
-    rec = KaldiRecognizer(model, 16000)
-    MAX_DEBUG_LEN = 200  # optional: limit length of debug output
-    LOW_EFFORT_UTTERANCES = {"huh", "uh", "um", "erm", "hmm", "he's", "but"}
-
-    while True:
-        data = audio_queue.get()
-
-        if rec.AcceptWaveform(data):
-            start = time.time()
-            r = json.loads(rec.Result())
-            elapsed_ms = int((time.time() - start) * 1000)
-
-            user = r.get('text', '').strip()
-            if user:
-                print(f"[Timing] STT parse: {elapsed_ms} ms")
-                print("User:", user)
-
-                if user.lower().strip(".,!? ") in LOW_EFFORT_UTTERANCES:
-                    print("[Debug] Ignored low-effort utterance.")
-                    rec = KaldiRecognizer(model, 16000)
-                    continue  # Skip LLM response + TTS for accidental noise
-
-                messages.append({"role": "user", "content": user})
-                # Generate assistant response
-                resp_text = query_glm()
-                if resp_text:
-                    # Clean debug print (remove newlines and carriage returns)
-                    clean_debug_text = resp_text.replace('\n', ' ').replace('\r', ' ')
-                    if len(clean_debug_text) > MAX_DEBUG_LEN:
-                        clean_debug_text = clean_debug_text[:MAX_DEBUG_LEN] + '...'
-
-                    print('Assistant:', clean_debug_text)
-                    messages.append({"role": "assistant", "content": clean_debug_text})
-
-                    # TTS generation + playback
-                    play_response(resp_text)
-                else:
-                    print('[Debug] Empty response, skipping TTS.')
-
-                # Reset recognizer after each full interaction
-                rec = KaldiRecognizer(model, 16000)
-
-# ------------------- MAIN -------------------
-
-if __name__ == '__main__':
-    pa, stream = start_stream()
-    t = threading.Thread(target=processing_loop, daemon=True)
-    t.start()
-    try:
-        while stream.is_active():
-            time.sleep(0.1)
-    except KeyboardInterrupt:
-        stream.stop_stream(); stream.close(); pa.terminate()
--- a/voice_assistant_pi.py
+++ b/voice_assistant_pi.py
@ -1,381 +0,0 @@
-#!/usr/bin/env python3
-"""
-Voice Assistant: Real-Time Voice Chat (修复版)
-
-修复了树莓派上的音频设备问题
-"""
-
-import io
-import json
-import os
-import queue
-import re
-import subprocess
-import threading
-import time
-import wave
-
-import numpy as np
-import pyaudio
-import requests
-import soxr
-from pydub import AudioSegment
-from vosk import KaldiRecognizer, Model
-
-
-# ------------------- TIMING UTILITY -------------------
-class Timer:
-    def __init__(self, label):
-        self.label = label
-        self.enabled = True
-    def __enter__(self):
-        self.start = time.time()
-        return self
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        if self.enabled:
-            elapsed_ms = (time.time() - self.start) * 1000
-            print(f"[Timing] {self.label}: {elapsed_ms:.0f} ms")
-    def disable(self):
-        self.enabled = False
-
-# ------------------- FUNCTIONS -------------------
-
-def get_input_device_index(preferred_name=None):
-    pa = pyaudio.PyAudio()
-    try:
-        # 首先尝试获取默认设备
-        if preferred_name is None:
-            default_input = pa.get_default_input_device_info()
-            print(f"[Debug] Using default input device: {default_input['name']}")
-            return default_input['index']
-        
-        # 如果有指定名称，尝试匹配
-        for i in range(pa.get_device_count()):
-            info = pa.get_device_info_by_index(i)
-            if info['maxInputChannels'] > 0 and preferred_name.lower() in info['name'].lower():
-                print(f"[Debug] Selected input device {i}: {info['name']}")
-                print(f"[Debug] Device sample rate: {info['defaultSampleRate']} Hz")
-                return i
-        
-        # 如果没找到，使用默认设备
-        default_input = pa.get_default_input_device_info()
-        print(f"[Warning] Preferred mic not found. Using default: {default_input['name']}")
-        return default_input['index']
-    finally:
-        pa.terminate()
-
-def get_output_device_index(preferred_name=None):
-    pa = pyaudio.PyAudio()
-    try:
-        # 首先尝试获取默认设备
-        if preferred_name is None:
-            default_output = pa.get_default_output_device_info()
-            print(f"[Debug] Using default output device: {default_output['name']}")
-            return default_output['index']
-        
-        # 如果有指定名称，尝试匹配
-        for i in range(pa.get_device_count()):
-            info = pa.get_device_info_by_index(i)
-            if info['maxOutputChannels'] > 0 and preferred_name.lower() in info['name'].lower():
-                print(f"[Debug] Selected output device {i}: {info['name']}")
-                return i
-        
-        # 如果没找到，使用默认设备
-        default_output = pa.get_default_output_device_info()
-        print(f"[Warning] Preferred output device not found. Using default: {default_output['name']}")
-        return default_output['index']
-    finally:
-        pa.terminate()
-
-def list_input_devices():
-    pa = pyaudio.PyAudio()
-    try:
-        print("[Debug] Available input devices:")
-        for i in range(pa.get_device_count()):
-            info = pa.get_device_info_by_index(i)
-            if info['maxInputChannels'] > 0:
-                print(f"  {i}: {info['name']} ({int(info['defaultSampleRate'])} Hz, {info['maxInputChannels']}ch)")
-    finally:
-        pa.terminate()
-
-def resample_audio(data, orig_rate=44100, target_rate=16000):
-    # Convert byte string to numpy array
-    audio_np = np.frombuffer(data, dtype=np.int16)
-    # Resample using soxr
-    resampled_np = soxr.resample(audio_np, orig_rate, target_rate)
-    # Convert back to bytes
-    return resampled_np.astype(np.int16).tobytes()
-
-# ------------------- PATHS -------------------
-
-CONFIG_PATH = os.path.expanduser("va_config.json")
-BASE_DIR = os.path.dirname(__file__)
-MODEL_PATH = os.path.join(BASE_DIR, 'vosk-model')
-CHAT_URL = 'https://open.bigmodel.cn/api/paas/v4/chat/completions'
-AUTH_TOKEN = '0c9cbaca9d2bbf864990f1e1decdf340.dXRMsZCHTUbPQ0rm'
-
-# ------------------- CONFIG FILE LOADING -------------------
-
-DEFAULT_CONFIG = {
-    "volume": 8,
-    "mic_name": None,
-    "audio_output_device": None,
-    "model_name": "glm-4.5",
-    "voice": "en_US-kathleen-low.onnx",
-    "enable_audio_processing": False,
-    "history_length": 4,
-    "system_prompt": "You are a helpful assistant."
-}
-
-def load_config():
-    if os.path.isfile(CONFIG_PATH):
-        try:
-            with open(CONFIG_PATH, 'r') as f:
-                user_config = json.load(f)
-            return {**DEFAULT_CONFIG, **user_config}
-        except Exception as e:
-            print(f"[Warning] Failed to load system config: {e}")
-    
-    print("[Debug] Using default config.")
-    return DEFAULT_CONFIG
-
-config = load_config()
-
-# Apply loaded config values
-VOLUME = config["volume"]
-MIC_NAME = config["mic_name"]
-AUDIO_OUTPUT_DEVICE = config["audio_output_device"]
-AUDIO_OUTPUT_DEVICE_INDEX = get_output_device_index(config["audio_output_device"])
-MODEL_NAME = config["model_name"]
-VOICE_MODEL = os.path.join("voices", config["voice"])
-ENABLE_AUDIO_PROCESSING = config["enable_audio_processing"]
-HISTORY_LENGTH = config["history_length"]
-
-# Setup messages with system prompt
-messages = [{"role": "system", "content": config["system_prompt"]}]
-
-list_input_devices()
-DEVICE_INDEX = get_input_device_index(config["mic_name"])
-
-# 从设备获取采样率
-pa = pyaudio.PyAudio()
-device_info = pa.get_device_info_by_index(DEVICE_INDEX)
-INPUT_RATE = int(device_info['defaultSampleRate'])
-OUTPUT_RATE = int(device_info['defaultSampleRate'])
-pa.terminate()
-
-CHUNK = 1024
-CHANNELS = 1
-mic_enabled = True
-
-print(f"[Debug] Using sample rate: {INPUT_RATE} Hz")
-print(f"[Debug] Config loaded: model={MODEL_NAME}, voice={config['voice']}, vol={VOLUME}")
-
-# ------------------- CONVERSATION STATE -------------------
-
-audio_queue = queue.Queue()
-
-# Audio callback
-def audio_callback(in_data, frame_count, time_info, status):
-    global mic_enabled
-    if not mic_enabled:
-        return (None, pyaudio.paContinue)
-    resampled_data = resample_audio(in_data, orig_rate=INPUT_RATE, target_rate=16000)
-    audio_queue.put(resampled_data)
-    return (None, pyaudio.paContinue)
-
-# ------------------- STREAM SETUP -------------------
-
-def start_stream():
-    pa = pyaudio.PyAudio()
-
-    stream = pa.open(
-        rate=INPUT_RATE,  # 使用设备的默认采样率
-        format=pyaudio.paInt16,
-        channels=CHANNELS,
-        input=True,
-        input_device_index=DEVICE_INDEX,
-        frames_per_buffer=CHUNK,
-        stream_callback=audio_callback
-    )
-    stream.start_stream()
-    print(f'[Debug] Stream @ {INPUT_RATE}Hz')
-    return pa, stream
-
-# ------------------- QUERY GLM API -------------------
-
-def query_glm():
-    headers = {
-        'Authorization': 'Bearer ' + AUTH_TOKEN,
-        'Content-Type': 'application/json'
-    }
-    payload = {
-        "model": "glm-4.5",
-        "messages": [messages[0]] + messages[-HISTORY_LENGTH:],
-        "temperature": 0.6,
-        "max_tokens": 1024,
-        "stream": False
-    }
-
-    with Timer("Inference"):
-        try:
-            resp = requests.post(CHAT_URL, json=payload, headers=headers)
-            resp.raise_for_status()
-        except requests.exceptions.RequestException as e:
-            print(f"[Error] GLM API request failed: {e}")
-            return ''
-    
-    data = resp.json()
-    reply = ''
-    if 'choices' in data and len(data['choices']) > 0:
-        choice = data['choices'][0]
-        if 'message' in choice and 'content' in choice['message']:
-            reply = choice['message']['content'].strip()
-    return reply
-
-# ------------------- TTS & DEGRADATION -------------------
-
-def play_response(text):
-    global mic_enabled
-    mic_enabled = False
-
-    # clean the response
-    clean = re.sub(r"[\*]+", '', text)
-    clean = re.sub(r"\(.*?\)", '', clean)
-    clean = re.sub(r"<.*?>", '', clean)
-    clean = clean.replace('\n', ' ').strip()
-    clean = re.sub(r'\s+', ' ', clean)
-    clean = re.sub(r'[\U0001F300-\U0001FAFF\u2600-\u26FF\u2700-\u27BF]+', '', clean)
-
-    piper_path = os.path.join(BASE_DIR, 'bin', 'piper', 'piper')
-    
-    if not os.path.exists(piper_path):
-        print(f"[Error] Piper executable not found at {piper_path}")
-        mic_enabled = True
-        return
-
-    try:
-        # Generate Piper raw PCM
-        with Timer("Piper inference"):
-            piper_proc = subprocess.Popen(
-                [piper_path, '--model', VOICE_MODEL, '--output_raw'],
-                stdin=subprocess.PIPE,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.DEVNULL
-            )
-            tts_pcm, _ = piper_proc.communicate(input=clean.encode())
-
-        # Convert raw PCM to WAV for playback
-        wav_io = io.BytesIO()
-        with wave.open(wav_io, 'wb') as wf:
-            wf.setnchannels(1)
-            wf.setsampwidth(2)
-            wf.setframerate(16000)
-            wf.writeframes(tts_pcm)
-        
-        wav_io.seek(0)
-        wf = wave.open(wav_io, 'rb')
-
-        # Playback
-        with Timer("Playback"):
-            pa = pyaudio.PyAudio()
-            stream = pa.open(
-                format=pa.get_format_from_width(wf.getsampwidth()),
-                channels=wf.getnchannels(),
-                rate=wf.getframerate(),
-                output=True,
-                output_device_index=AUDIO_OUTPUT_DEVICE_INDEX
-            )
-
-            data = wf.readframes(CHUNK)
-            while data:
-                stream.write(data)
-                data = wf.readframes(CHUNK)
-
-            stream.stop_stream()
-            stream.close()
-            pa.terminate()
-            wf.close()
-
-    except Exception as e:
-        print(f"[Error] TTS playback failed: {e}")
-    finally:
-        mic_enabled = True
-        time.sleep(0.3)
-
-# ------------------- PROCESSING LOOP -------------------
-
-def processing_loop():
-    try:
-        model = Model(MODEL_PATH)
-        print("[Debug] Vosk model loaded successfully")
-    except Exception as e:
-        print(f"[Error] Failed to load Vosk model: {e}")
-        print(f"[Info] Model path: {MODEL_PATH}")
-        return
-    
-    rec = KaldiRecognizer(model, 16000)
-    MAX_DEBUG_LEN = 200
-    LOW_EFFORT_UTTERANCES = {"huh", "uh", "um", "erm", "hmm", "he's", "but"}
-
-    while True:
-        try:
-            data = audio_queue.get()
-
-            if rec.AcceptWaveform(data):
-                start = time.time()
-                r = json.loads(rec.Result())
-                elapsed_ms = int((time.time() - start) * 1000)
-
-                user = r.get('text', '').strip()
-                if user:
-                    print(f"[Timing] STT parse: {elapsed_ms} ms")
-                    print("User:", user)
-
-                    if user.lower().strip(".,!? ") in LOW_EFFORT_UTTERANCES:
-                        print("[Debug] Ignored low-effort utterance.")
-                        rec = KaldiRecognizer(model, 16000)
-                        continue
-
-                    messages.append({"role": "user", "content": user})
-                    resp_text = query_glm()
-                    
-                    if resp_text:
-                        clean_debug_text = resp_text.replace('\n', ' ').replace('\r', ' ')
-                        if len(clean_debug_text) > MAX_DEBUG_LEN:
-                            clean_debug_text = clean_debug_text[:MAX_DEBUG_LEN] + '...'
-
-                        print('Assistant:', clean_debug_text)
-                        messages.append({"role": "assistant", "content": clean_debug_text})
-                        play_response(resp_text)
-                    else:
-                        print('[Debug] Empty response, skipping TTS.')
-
-                    rec = KaldiRecognizer(model, 16000)
-
-        except Exception as e:
-            print(f"[Error] Processing loop error: {e}")
-            time.sleep(1)
-
-# ------------------- MAIN -------------------
-
-if __name__ == '__main__':
-    try:
-        pa, stream = start_stream()
-        t = threading.Thread(target=processing_loop, daemon=True)
-        t.start()
-        
-        print("[Debug] Voice assistant started. Press Ctrl+C to exit.")
-        while stream.is_active():
-            time.sleep(0.1)
-            
-    except KeyboardInterrupt:
-        print("[Debug] Shutting down...")
-        stream.stop_stream()
-        stream.close()
-        pa.terminate()
-    except Exception as e:
-        print(f"[Error] Main loop error: {e}")
-        stream.stop_stream()
-        stream.close()
-        pa.terminate()