From 12c79a5a53fdd72608a3bcc9cf856bb7de4c77db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E6=BD=AE?= Date: Sat, 20 Sep 2025 11:39:56 +0800 Subject: [PATCH] =?UTF-8?q?=E5=8D=87=E7=BA=A7=E5=A3=B0=E9=9F=B3=E6=A3=80?= =?UTF-8?q?=E6=B5=8B=E7=AE=97=E6=B3=95=EF=BC=9A=E5=8A=A8=E6=80=81=E9=98=88?= =?UTF-8?q?=E5=80=BC+=E9=9B=B6=E4=BA=A4=E5=8F=89=E7=8E=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 实现动态阈值调整(背景噪音+25%) - 添加零交叉率检测区分语音和噪音 - 优化灵敏度,适应50-70的能量范围 - 实时显示ZCR和背景能量值 - 大幅提高语音检测准确性 --- energy_based_recorder.py | 60 ++++++++++++++++++++++++++++++++++------ 1 file changed, 51 insertions(+), 9 deletions(-) diff --git a/energy_based_recorder.py b/energy_based_recorder.py index cc6509e..f8e4477 100644 --- a/energy_based_recorder.py +++ b/energy_based_recorder.py @@ -80,6 +80,44 @@ class EnergyBasedRecorder: return rms + def calculate_zero_crossing_rate(self, audio_data): + """计算零交叉率(辅助判断语音)""" + if len(audio_data) == 0: + return 0 + + audio_array = np.frombuffer(audio_data, dtype=np.int16) + + # 计算零交叉次数 + zero_crossings = np.sum(np.diff(np.sign(audio_array)) != 0) + + # 归一化到采样率 + zcr = zero_crossings / len(audio_array) * self.RATE + + return zcr + + def is_voice_active_advanced(self, energy, zcr): + """高级语音活动检测""" + # 动态阈值:基于背景噪音 + if len(self.energy_history) >= 10: + # 使用最近10个样本的中位数作为背景噪音 + background_energy = np.median(self.energy_history[-10:]) + + # 动态阈值:背景噪音 + 25%(比原来的500更敏感) + dynamic_threshold = max(50, background_energy * 1.25) + + # 能量条件 + energy_condition = energy > dynamic_threshold + + # 零交叉率条件:语音通常在500-5000 Hz之间 + # 对于8kHz采样率,ZCR通常在500-2000之间 + zcr_condition = 500 < zcr < 3000 + + # 同时满足能量和ZCR条件才认为是语音 + return energy_condition and zcr_condition + else: + # 初始阶段使用固定阈值 + return energy > 80 # 更低的初始阈值 + def get_average_energy(self): """获取平均能量水平""" if not self.energy_history: @@ -233,6 +271,10 @@ class EnergyBasedRecorder: print("- 最少录音2秒,最多30秒") print("- 录音完成后自动播放") print("- 按 Ctrl+C 退出") + print("🎯 新增功能:") + print("- 动态阈值调整(基于背景噪音)") + print("- 零交叉率检测(区分语音和噪音)") + print("- 实时显示ZCR和背景能量") print("=" * 50) try: @@ -243,8 +285,9 @@ class EnergyBasedRecorder: if len(data) == 0: continue - # 计算能量 + # 计算能量和零交叉率 energy = self.calculate_energy(data) + zcr = self.calculate_zero_crossing_rate(data) # 性能监控 self.monitor_performance() @@ -255,7 +298,7 @@ class EnergyBasedRecorder: recording_duration = time.time() - self.recording_start_time # 更新最后声音时间 - if self.is_voice_active(energy): + if self.is_voice_active_advanced(energy, zcr): self.last_sound_time = time.time() # 检查是否应该结束录音 @@ -273,22 +316,21 @@ class EnergyBasedRecorder: self.stop_recording() # 显示录音状态 - status = f"录音中... {recording_duration:.1f}s | 能量: {energy:.0f} | 静音: {current_time - self.last_sound_time:.1f}s" + bg_energy = np.median(self.energy_history[-10:]) if len(self.energy_history) >= 10 else 0 + status = f"录音中... {recording_duration:.1f}s | 能量: {energy:.0f} | ZCR: {zcr:.0f} | 背景: {bg_energy:.0f}" print(f"\r{status}", end='', flush=True) else: - # 监听模式 - if self.is_voice_active(energy): + # 监听模式 - 使用高级检测 + if self.is_voice_active_advanced(energy, zcr): # 检测到声音,开始录音 self.start_recording() else: # 显示监听状态 avg_energy = self.get_average_energy() - status = f"监听中... 能量: {energy:.0f} | 平均: {avg_energy:.0f} | 阈值: {self.energy_threshold}" + bg_energy = np.median(self.energy_history[-10:]) if len(self.energy_history) >= 10 else 0 + status = f"监听中... 能量: {energy:.0f} | ZCR: {zcr:.0f} | 背景: {bg_energy:.0f}" print(f"\r{status}", end='', flush=True) - - # 自动调整阈值 - self.auto_adjust_threshold() # 减少CPU使用 time.sleep(0.01)