#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 简化的唤醒+录音测试 专注于解决音频冲突问题 """ import sys import os import time import threading import pyaudio import json # 添加当前目录到路径 sys.path.append(os.path.dirname(os.path.abspath(__file__))) try: from vosk import Model, KaldiRecognizer VOSK_AVAILABLE = True except ImportError: VOSK_AVAILABLE = False print("⚠️ Vosk 未安装,请运行: pip install vosk") class SimpleWakeAndRecord: """简化的唤醒+录音系统""" def __init__(self, model_path="model", wake_words=["你好", "助手"]): self.model_path = model_path self.wake_words = wake_words self.model = None self.recognizer = None self.audio = None self.stream = None self.running = False # 音频参数 - 优化为树莓派3B self.FORMAT = pyaudio.paInt16 self.CHANNELS = 1 self.RATE = 8000 # 从16kHz降至8kHz,减少50%数据处理量 self.CHUNK_SIZE = 2048 # 增大块大小,减少处理次数 # 录音相关 self.recording = False self.recorded_frames = [] self.last_text_time = None # 最后一次识别到文字的时间 self.recording_start_time = None self.recording_recognizer = None # 录音时专用的识别器 # 性能优化相关 self.audio_buffer = [] # 音频缓冲区 self.buffer_size = 10 # 缓冲区大小(块数) self.last_process_time = time.time() # 上次处理时间 self.process_interval = 0.5 # 处理间隔(秒) self.batch_process_size = 5 # 批处理大小 # 性能监控 self.process_count = 0 self.avg_process_time = 0 self.last_monitor_time = time.time() self.monitor_interval = 5.0 # 监控间隔(秒) # 阈值 self.text_silence_threshold = 3.0 # 3秒没有识别到文字就结束 self.min_recording_time = 2.0 # 最小录音时间 self.max_recording_time = 30.0 # 最大录音时间 self._setup_model() self._setup_audio() def _setup_model(self): """设置 Vosk 模型""" if not VOSK_AVAILABLE: return try: if not os.path.exists(self.model_path): print(f"模型路径不存在: {self.model_path}") return self.model = Model(self.model_path) self.recognizer = KaldiRecognizer(self.model, self.RATE) self.recognizer.SetWords(True) print(f"✅ Vosk 模型加载成功") except Exception as e: print(f"模型初始化失败: {e}") def _setup_audio(self): """设置音频设备""" try: if self.audio is None: self.audio = pyaudio.PyAudio() if self.stream is None: self.stream = self.audio.open( format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, frames_per_buffer=self.CHUNK_SIZE ) print("✅ 音频设备初始化成功") except Exception as e: print(f"音频设备初始化失败: {e}") def _calculate_energy(self, audio_data): """计算音频能量""" if len(audio_data) == 0: return 0 import numpy as np audio_array = np.frombuffer(audio_data, dtype=np.int16) rms = np.sqrt(np.mean(audio_array ** 2)) return rms def _check_wake_word(self, text): """检查是否包含唤醒词""" if not text or not self.wake_words: return False, None text_lower = text.lower() for wake_word in self.wake_words: if wake_word.lower() in text_lower: return True, wake_word return False, None def _should_process_audio(self): """判断是否应该处理音频""" current_time = time.time() return (current_time - self.last_process_time >= self.process_interval and len(self.audio_buffer) >= self.batch_process_size) def _process_audio_batch(self): """批量处理音频数据""" if len(self.audio_buffer) < self.batch_process_size: return # 记录处理开始时间 start_time = time.time() # 取出批处理数据 batch_data = self.audio_buffer[:self.batch_process_size] self.audio_buffer = self.audio_buffer[self.batch_process_size:] # 合并音频数据 combined_data = b''.join(batch_data) # 更新处理时间 self.last_process_time = time.time() # 更新性能统计 process_time = time.time() - start_time self.process_count += 1 self.avg_process_time = (self.avg_process_time * (self.process_count - 1) + process_time) / self.process_count # 性能监控 self._monitor_performance() return combined_data def _monitor_performance(self): """性能监控""" current_time = time.time() if current_time - self.last_monitor_time >= self.monitor_interval: buffer_usage = len(self.audio_buffer) / self.buffer_size * 100 print(f"\n📊 性能监控 | 处理次数: {self.process_count} | 平均处理时间: {self.avg_process_time:.3f}s | 缓冲区使用: {buffer_usage:.1f}%") self.last_monitor_time = current_time def _save_recording(self, audio_data): """保存录音""" timestamp = time.strftime("%Y%m%d_%H%M%S") filename = f"recording_{timestamp}.wav" try: import wave with wave.open(filename, 'wb') as wf: wf.setnchannels(self.CHANNELS) wf.setsampwidth(self.audio.get_sample_size(self.FORMAT)) wf.setframerate(self.RATE) wf.writeframes(audio_data) print(f"✅ 录音已保存: {filename}") return True, filename except Exception as e: print(f"保存录音失败: {e}") return False, None def _play_audio(self, filename): """播放音频文件""" try: import wave # 打开音频文件 with wave.open(filename, 'rb') as wf: # 获取音频参数 channels = wf.getnchannels() width = wf.getsampwidth() rate = wf.getframerate() total_frames = wf.getnframes() # 分块读取音频数据,避免内存问题 chunk_size = 1024 frames = [] for _ in range(0, total_frames, chunk_size): chunk = wf.readframes(chunk_size) if chunk: frames.append(chunk) else: break # 创建播放流 playback_stream = self.audio.open( format=self.audio.get_format_from_width(width), channels=channels, rate=rate, output=True ) print(f"🔊 开始播放: {filename}") # 分块播放音频 for chunk in frames: playback_stream.write(chunk) # 等待播放完成 playback_stream.stop_stream() playback_stream.close() print("✅ 播放完成") except Exception as e: print(f"❌ 播放失败: {e}") # 如果pyaudio播放失败,尝试用系统命令播放 self._play_with_system_player(filename) def _play_with_system_player(self, filename): """使用系统播放器播放音频""" try: import platform import subprocess system = platform.system() if system == 'Darwin': # macOS cmd = ['afplay', filename] elif system == 'Windows': cmd = ['start', '/min', filename] else: # Linux cmd = ['aplay', filename] print(f"🔊 使用系统播放器: {' '.join(cmd)}") subprocess.run(cmd, check=True) print("✅ 播放完成") except Exception as e: print(f"❌ 系统播放器也失败: {e}") print(f"💡 文件已保存,请手动播放: {filename}") def _start_recording(self): """开始录音""" print("🎙️ 开始录音,请说话...") self.recording = True self.recorded_frames = [] self.last_text_time = None self.recording_start_time = time.time() # 为录音创建一个新的识别器 if self.model: self.recording_recognizer = KaldiRecognizer(self.model, self.RATE) self.recording_recognizer.SetWords(True) def _stop_recording(self): """停止录音""" if len(self.recorded_frames) > 0: audio_data = b''.join(self.recorded_frames) duration = len(audio_data) / (self.RATE * 2) print(f"📝 录音完成,时长: {duration:.2f}秒") # 保存录音 success, filename = self._save_recording(audio_data) # 如果保存成功,播放录音 if success and filename: print("=" * 50) print("🔊 播放刚才录制的音频...") self._play_audio(filename) print("=" * 50) self.recording = False self.recorded_frames = [] self.last_text_time = None self.recording_start_time = None self.recording_recognizer = None def start(self): """开始唤醒词检测和录音""" if not self.stream: print("❌ 音频设备未初始化") return self.running = True print("🎤 开始监听...") print(f"唤醒词: {', '.join(self.wake_words)}") try: while self.running: # 读取音频数据 data = self.stream.read(self.CHUNK_SIZE, exception_on_overflow=False) if len(data) == 0: continue if self.recording: # 录音模式 - 直接处理 self.recorded_frames.append(data) recording_duration = time.time() - self.recording_start_time # 录音时使用批处理进行识别 self.audio_buffer.append(data) # 限制缓冲区大小 if len(self.audio_buffer) > self.buffer_size: self.audio_buffer.pop(0) # 批处理识别 if self._should_process_audio() and self.recording_recognizer: combined_data = self._process_audio_batch() if combined_data and self.recording_recognizer.AcceptWaveform(combined_data): # 获取最终识别结果 result = json.loads(self.recording_recognizer.Result()) text = result.get('text', '').strip() if text: # 识别到文字,更新时间戳 self.last_text_time = time.time() print(f"\n📝 识别: {text}") elif combined_data: # 获取部分识别结果 partial_result = json.loads(self.recording_recognizer.PartialResult()) partial_text = partial_result.get('partial', '').strip() if partial_text: # 更新时间戳(部分识别也算有声音) self.last_text_time = time.time() status = f"录音中... {recording_duration:.1f}s | {partial_text}" print(f"\r{status}", end='', flush=True) # 检查是否需要结束录音 current_time = time.time() # 检查是否有文字识别超时 if self.last_text_time is not None: text_silence_duration = current_time - self.last_text_time if text_silence_duration > self.text_silence_threshold and recording_duration >= self.min_recording_time: print(f"\n\n3秒没有识别到文字,结束录音") self._stop_recording() else: # 还没有识别到任何文字,检查是否超时 if recording_duration > 5.0: # 如果5秒还没识别到任何文字,也结束 print(f"\n\n5秒没有识别到文字,结束录音") self._stop_recording() # 检查最大录音时间 if recording_duration > self.max_recording_time: print(f"\n\n达到最大录音时间 {self.max_recording_time}s") self._stop_recording() # 显示录音状态 if self.last_text_time is None: status = f"等待语音输入... {recording_duration:.1f}s" print(f"\r{status}", end='', flush=True) elif self.model and self.recognizer: # 唤醒词检测模式 - 使用批处理 self.audio_buffer.append(data) # 限制缓冲区大小 if len(self.audio_buffer) > self.buffer_size: self.audio_buffer.pop(0) # 批处理识别 if self._should_process_audio(): combined_data = self._process_audio_batch() if combined_data and self.recognizer.AcceptWaveform(combined_data): result = json.loads(self.recognizer.Result()) text = result.get('text', '').strip() if text: print(f"识别: {text}") # 检查唤醒词 is_wake_word, detected_word = self._check_wake_word(text) if is_wake_word: print(f"🎯 检测到唤醒词: {detected_word}") self._start_recording() else: # 显示实时音频级别 energy = self._calculate_energy(data) if energy > 50: # 只显示有意义的音频级别 partial_result = json.loads(self.recognizer.PartialResult()) partial_text = partial_result.get('partial', '') if partial_text: status = f"监听中... 能量: {energy:.0f} | {partial_text}" else: status = f"监听中... 能量: {energy:.0f}" print(status, end='\r') time.sleep(0.05) # 增加延迟,减少CPU使用 except KeyboardInterrupt: print("\n👋 退出") except Exception as e: print(f"错误: {e}") finally: self.stop() def stop(self): """停止""" self.running = False if self.recording: self._stop_recording() if self.stream: self.stream.stop_stream() self.stream.close() self.stream = None if self.audio: self.audio.terminate() self.audio = None def main(): """主函数""" print("🚀 简化唤醒+录音测试") print("=" * 50) # 检查模型 model_dir = "model" if not os.path.exists(model_dir): print("⚠️ 未找到模型目录") print("请下载 Vosk 模型到 model 目录") return # 创建系统 system = SimpleWakeAndRecord( model_path=model_dir, wake_words=["你好", "助手", "小爱"] ) if not system.model: print("❌ 模型加载失败") return print("✅ 系统初始化成功") print("📖 使用说明:") print("1. 说唤醒词开始录音") print("2. 基于语音识别判断,3秒没有识别到文字就结束") print("3. 最少录音2秒,最多30秒") print("4. 录音时实时显示识别结果") print("5. 录音文件自动保存") print("6. 录音完成后自动播放刚才录制的内容") print("7. 按 Ctrl+C 退出") print("🚀 性能优化已启用:") print(" - 采样率: 8kHz (降低50%数据量)") print(" - 批处理: 5个音频块/次") print(" - 处理间隔: 0.5秒") print(" - 缓冲区: 10个音频块") print(" - 性能监控: 每5秒显示") print("=" * 50) # 开始运行 system.start() if __name__ == "__main__": main()