diff --git a/energy_based_recorder.py b/energy_based_recorder.py index 46f5eef..d5358ef 100644 --- a/energy_based_recorder.py +++ b/energy_based_recorder.py @@ -81,13 +81,24 @@ class EnergyBasedRecorder: # 计算RMS能量 rms = np.sqrt(np.mean(audio_array ** 2)) - # 更新能量历史 - self.energy_history.append(rms) - if len(self.energy_history) > self.max_energy_history: - self.energy_history.pop(0) + # 更新能量历史(只在非录音状态下更新,避免语音影响背景噪音计算) + if not self.recording: + self.energy_history.append(rms) + if len(self.energy_history) > self.max_energy_history: + self.energy_history.pop(0) return rms + def calculate_peak_energy(self, audio_data): + """计算峰值能量(辅助判断)""" + if len(audio_data) == 0: + return 0 + + audio_array = np.frombuffer(audio_data, dtype=np.int16) + peak_energy = np.max(np.abs(audio_array)) + + return peak_energy + def calculate_zero_crossing_rate(self, audio_data): """计算零交叉率(辅助判断语音)""" if len(audio_data) == 0: @@ -110,21 +121,21 @@ class EnergyBasedRecorder: # 使用最近10个样本的中位数作为背景噪音 background_energy = np.median(self.energy_history[-10:]) - # 动态阈值:背景噪音 + 25%(比原来的500更敏感) - dynamic_threshold = max(50, background_energy * 1.25) + # 动态阈值:背景噪音 + 50%(提高敏感性) + dynamic_threshold = max(50, background_energy * 1.5) # 能量条件 energy_condition = energy > dynamic_threshold - # 零交叉率条件:语音通常在500-5000 Hz之间 - # 对于8kHz采样率,ZCR通常在500-2000之间 - zcr_condition = 500 < zcr < 3000 + # 零交叉率条件:语音通常在1000-5000 Hz之间 + # 对于8kHz采样率,ZCR通常在1000-4000之间 + zcr_condition = 1000 < zcr < 4000 # 同时满足能量和ZCR条件才认为是语音 return energy_condition and zcr_condition else: # 初始阶段使用固定阈值 - return energy > 80 # 更低的初始阈值 + return energy > 60 and zcr > 1000 # 更严格的初始条件 def get_average_energy(self): """获取平均能量水平""" @@ -332,6 +343,7 @@ class EnergyBasedRecorder: # 计算能量和零交叉率 energy = self.calculate_energy(data) zcr = self.calculate_zero_crossing_rate(data) + peak_energy = self.calculate_peak_energy(data) # 性能监控 self.monitor_performance() @@ -359,9 +371,10 @@ class EnergyBasedRecorder: print(f"\n⏰ 达到最大录音时间 {self.max_recording_time}秒") self.stop_recording() - # 显示录音状态(包含预录音信息) + # 显示录音状态(包含调试信息) bg_energy = np.median(self.energy_history[-10:]) if len(self.energy_history) >= 10 else 0 - status = f"录音中... {recording_duration:.1f}s | 能量: {energy:.0f} | ZCR: {zcr:.0f} | 背景: {bg_energy:.0f}" + is_voice = self.is_voice_active_advanced(energy, zcr) + status = f"录音中... {recording_duration:.1f}s | RMS: {energy:.0f} | 峰值: {peak_energy:.0f} | ZCR: {zcr:.0f} | 语音: {is_voice}" print(f"\r{status}", end='', flush=True) else: @@ -373,11 +386,12 @@ class EnergyBasedRecorder: # 检测到声音,开始录音 self.start_recording() else: - # 显示监听状态(包含缓冲区信息) + # 显示监听状态(包含调试信息) avg_energy = self.get_average_energy() bg_energy = np.median(self.energy_history[-10:]) if len(self.energy_history) >= 10 else 0 buffer_usage = len(self.pre_record_buffer) / self.pre_record_max_frames * 100 - status = f"监听中... 能量: {energy:.0f} | ZCR: {zcr:.0f} | 背景: {bg_energy:.0f} | 缓冲: {buffer_usage:.0f}%" + is_voice = self.is_voice_active_advanced(energy, zcr) + status = f"监听中... RMS: {energy:.0f} | 峰值: {peak_energy:.0f} | ZCR: {zcr:.0f} | 背景: {bg_energy:.0f} | 语音: {is_voice} | 缓冲: {buffer_usage:.0f}%" print(f"\r{status}", end='', flush=True) # 减少CPU使用