#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 基于能量检测的极简录音系统 专门针对树莓派3B优化,完全移除Vosk识别依赖 """ import sys import os import time import threading import pyaudio import numpy as np import wave class EnergyBasedRecorder: """基于能量检测的录音系统""" def __init__(self, energy_threshold=500, silence_threshold=1.5, min_recording_time=2.0, max_recording_time=30.0): # 音频参数 - 极简优化 self.FORMAT = pyaudio.paInt16 self.CHANNELS = 1 self.RATE = 8000 # 8kHz采样率 self.CHUNK_SIZE = 1024 # 适中块大小 # 能量检测参数 self.energy_threshold = energy_threshold # 能量阈值,高于此值认为有声音 self.silence_threshold = silence_threshold # 静音阈值,低于此值持续多久认为结束 self.min_recording_time = min_recording_time # 最小录音时间 self.max_recording_time = max_recording_time # 最大录音时间 # 状态变量 self.audio = None self.stream = None self.running = False self.recording = False self.recorded_frames = [] self.recording_start_time = None self.last_sound_time = None self.energy_history = [] # 能量历史 self.max_energy_history = 50 # 最大能量历史记录 # 性能监控 self.frame_count = 0 self.start_time = time.time() self._setup_audio() def _setup_audio(self): """设置音频设备""" try: self.audio = pyaudio.PyAudio() self.stream = self.audio.open( format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, frames_per_buffer=self.CHUNK_SIZE ) print("✅ 音频设备初始化成功") except Exception as e: print(f"❌ 音频设备初始化失败: {e}") def calculate_energy(self, audio_data): """计算音频能量""" if len(audio_data) == 0: return 0 # 将字节数据转换为numpy数组 audio_array = np.frombuffer(audio_data, dtype=np.int16) # 计算RMS能量 rms = np.sqrt(np.mean(audio_array ** 2)) # 更新能量历史 self.energy_history.append(rms) if len(self.energy_history) > self.max_energy_history: self.energy_history.pop(0) return rms def get_average_energy(self): """获取平均能量水平""" if not self.energy_history: return 0 return np.mean(self.energy_history) def is_voice_active(self, energy): """判断是否有人声""" return energy > self.energy_threshold def save_recording(self, audio_data, filename=None): """保存录音""" if filename is None: timestamp = time.strftime("%Y%m%d_%H%M%S") filename = f"recording_{timestamp}.wav" try: with wave.open(filename, 'wb') as wf: wf.setnchannels(self.CHANNELS) wf.setsampwidth(self.audio.get_sample_size(self.FORMAT)) wf.setframerate(self.RATE) wf.writeframes(audio_data) print(f"✅ 录音已保存: {filename}") return True, filename except Exception as e: print(f"❌ 保存录音失败: {e}") return False, None def play_audio(self, filename): """播放音频文件""" try: with wave.open(filename, 'rb') as wf: channels = wf.getnchannels() width = wf.getsampwidth() rate = wf.getframerate() total_frames = wf.getnframes() # 分块读取音频数据 chunk_size = 1024 frames = [] for _ in range(0, total_frames, chunk_size): chunk = wf.readframes(chunk_size) if chunk: frames.append(chunk) else: break # 创建播放流 playback_stream = self.audio.open( format=self.audio.get_format_from_width(width), channels=channels, rate=rate, output=True ) print(f"🔊 开始播放: {filename}") # 分块播放音频 for chunk in frames: playback_stream.write(chunk) playback_stream.stop_stream() playback_stream.close() print("✅ 播放完成") except Exception as e: print(f"❌ 播放失败: {e}") self.play_with_system_player(filename) def play_with_system_player(self, filename): """使用系统播放器播放音频""" try: import subprocess cmd = ['aplay', filename] # Linux系统 print(f"🔊 使用系统播放器: {' '.join(cmd)}") subprocess.run(cmd, check=True) print("✅ 播放完成") except Exception as e: print(f"❌ 系统播放器也失败: {e}") def start_recording(self): """开始录音""" print("🎙️ 检测到声音,开始录音...") self.recording = True self.recorded_frames = [] self.recording_start_time = time.time() self.last_sound_time = time.time() self.energy_history = [] # 重置能量历史 def stop_recording(self): """停止录音""" if len(self.recorded_frames) > 0: audio_data = b''.join(self.recorded_frames) duration = len(audio_data) / (self.RATE * 2) # 16位音频,每样本2字节 print(f"📝 录音完成,时长: {duration:.2f}秒") # 保存录音 success, filename = self.save_recording(audio_data) # 如果保存成功,播放录音 if success and filename: print("=" * 50) print("🔊 播放刚才录制的音频...") self.play_audio(filename) print("=" * 50) self.recording = False self.recorded_frames = [] self.recording_start_time = None self.last_sound_time = None self.energy_history = [] def monitor_performance(self): """性能监控""" self.frame_count += 1 if self.frame_count % 1000 == 0: # 每1000帧显示一次 elapsed = time.time() - self.start_time fps = self.frame_count / elapsed avg_energy = self.get_average_energy() print(f"📊 性能: {fps:.1f} FPS | 平均能量: {avg_energy:.1f} | 阈值: {self.energy_threshold}") def auto_adjust_threshold(self): """自动调整能量阈值""" if len(self.energy_history) >= 20: # 基于历史能量的中位数和标准差调整阈值 median_energy = np.median(self.energy_history) std_energy = np.std(self.energy_history) # 设置阈值为中位数 + 2倍标准差 new_threshold = max(300, median_energy + 2 * std_energy) # 平滑调整阈值 self.energy_threshold = 0.9 * self.energy_threshold + 0.1 * new_threshold def run(self): """运行录音系统""" if not self.stream: print("❌ 音频设备未初始化") return self.running = True print("🎤 开始监听...") print(f"能量阈值: {self.energy_threshold}") print(f"静音阈值: {self.silence_threshold}秒") print("📖 使用说明:") print("- 检测到声音自动开始录音") print("- 持续静音1.5秒自动结束录音") print("- 最少录音2秒,最多30秒") print("- 录音完成后自动播放") print("- 按 Ctrl+C 退出") print("=" * 50) try: while self.running: # 读取音频数据 data = self.stream.read(self.CHUNK_SIZE, exception_on_overflow=False) if len(data) == 0: continue # 计算能量 energy = self.calculate_energy(data) # 性能监控 self.monitor_performance() if self.recording: # 录音模式 self.recorded_frames.append(data) recording_duration = time.time() - self.recording_start_time # 更新最后声音时间 if self.is_voice_active(energy): self.last_sound_time = time.time() # 检查是否应该结束录音 current_time = time.time() # 检查静音超时 if current_time - self.last_sound_time > self.silence_threshold: if recording_duration >= self.min_recording_time: print(f"\n🔇 检测到持续静音 {self.silence_threshold}秒,结束录音") self.stop_recording() # 检查最大录音时间 if recording_duration > self.max_recording_time: print(f"\n⏰ 达到最大录音时间 {self.max_recording_time}秒") self.stop_recording() # 显示录音状态 status = f"录音中... {recording_duration:.1f}s | 能量: {energy:.0f} | 静音: {current_time - self.last_sound_time:.1f}s" print(f"\r{status}", end='', flush=True) else: # 监听模式 if self.is_voice_active(energy): # 检测到声音,开始录音 self.start_recording() else: # 显示监听状态 avg_energy = self.get_average_energy() status = f"监听中... 能量: {energy:.0f} | 平均: {avg_energy:.0f} | 阈值: {self.energy_threshold}" print(f"\r{status}", end='', flush=True) # 自动调整阈值 self.auto_adjust_threshold() # 减少CPU使用 time.sleep(0.01) except KeyboardInterrupt: print("\n👋 退出") except Exception as e: print(f"❌ 错误: {e}") finally: self.stop() def stop(self): """停止系统""" self.running = False if self.recording: self.stop_recording() if self.stream: self.stream.stop_stream() self.stream.close() if self.audio: self.audio.terminate() def main(): """主函数""" print("🚀 基于能量检测的极简录音系统") print("=" * 50) # 创建录音系统 recorder = EnergyBasedRecorder( energy_threshold=500, # 能量阈值 silence_threshold=1.5, # 静音阈值(秒) min_recording_time=2.0, # 最小录音时间 max_recording_time=30.0 # 最大录音时间 ) print("✅ 系统初始化成功") print("🎯 优化特点:") print(" - 完全移除Vosk识别依赖") print(" - 基于能量检测,极低CPU占用") print(" - 自动调整能量阈值") print(" - 实时性能监控") print(" - 预期延迟: <0.1秒") print("=" * 50) # 开始运行 recorder.run() if __name__ == "__main__": main()