升级声音检测算法:动态阈值+零交叉率
- 实现动态阈值调整(背景噪音+25%) - 添加零交叉率检测区分语音和噪音 - 优化灵敏度,适应50-70的能量范围 - 实时显示ZCR和背景能量值 - 大幅提高语音检测准确性
This commit is contained in:
parent
b526328fe6
commit
12c79a5a53
@ -80,6 +80,44 @@ class EnergyBasedRecorder:
|
|||||||
|
|
||||||
return rms
|
return rms
|
||||||
|
|
||||||
|
def calculate_zero_crossing_rate(self, audio_data):
|
||||||
|
"""计算零交叉率(辅助判断语音)"""
|
||||||
|
if len(audio_data) == 0:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
audio_array = np.frombuffer(audio_data, dtype=np.int16)
|
||||||
|
|
||||||
|
# 计算零交叉次数
|
||||||
|
zero_crossings = np.sum(np.diff(np.sign(audio_array)) != 0)
|
||||||
|
|
||||||
|
# 归一化到采样率
|
||||||
|
zcr = zero_crossings / len(audio_array) * self.RATE
|
||||||
|
|
||||||
|
return zcr
|
||||||
|
|
||||||
|
def is_voice_active_advanced(self, energy, zcr):
|
||||||
|
"""高级语音活动检测"""
|
||||||
|
# 动态阈值:基于背景噪音
|
||||||
|
if len(self.energy_history) >= 10:
|
||||||
|
# 使用最近10个样本的中位数作为背景噪音
|
||||||
|
background_energy = np.median(self.energy_history[-10:])
|
||||||
|
|
||||||
|
# 动态阈值:背景噪音 + 25%(比原来的500更敏感)
|
||||||
|
dynamic_threshold = max(50, background_energy * 1.25)
|
||||||
|
|
||||||
|
# 能量条件
|
||||||
|
energy_condition = energy > dynamic_threshold
|
||||||
|
|
||||||
|
# 零交叉率条件:语音通常在500-5000 Hz之间
|
||||||
|
# 对于8kHz采样率,ZCR通常在500-2000之间
|
||||||
|
zcr_condition = 500 < zcr < 3000
|
||||||
|
|
||||||
|
# 同时满足能量和ZCR条件才认为是语音
|
||||||
|
return energy_condition and zcr_condition
|
||||||
|
else:
|
||||||
|
# 初始阶段使用固定阈值
|
||||||
|
return energy > 80 # 更低的初始阈值
|
||||||
|
|
||||||
def get_average_energy(self):
|
def get_average_energy(self):
|
||||||
"""获取平均能量水平"""
|
"""获取平均能量水平"""
|
||||||
if not self.energy_history:
|
if not self.energy_history:
|
||||||
@ -233,6 +271,10 @@ class EnergyBasedRecorder:
|
|||||||
print("- 最少录音2秒,最多30秒")
|
print("- 最少录音2秒,最多30秒")
|
||||||
print("- 录音完成后自动播放")
|
print("- 录音完成后自动播放")
|
||||||
print("- 按 Ctrl+C 退出")
|
print("- 按 Ctrl+C 退出")
|
||||||
|
print("🎯 新增功能:")
|
||||||
|
print("- 动态阈值调整(基于背景噪音)")
|
||||||
|
print("- 零交叉率检测(区分语音和噪音)")
|
||||||
|
print("- 实时显示ZCR和背景能量")
|
||||||
print("=" * 50)
|
print("=" * 50)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -243,8 +285,9 @@ class EnergyBasedRecorder:
|
|||||||
if len(data) == 0:
|
if len(data) == 0:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 计算能量
|
# 计算能量和零交叉率
|
||||||
energy = self.calculate_energy(data)
|
energy = self.calculate_energy(data)
|
||||||
|
zcr = self.calculate_zero_crossing_rate(data)
|
||||||
|
|
||||||
# 性能监控
|
# 性能监控
|
||||||
self.monitor_performance()
|
self.monitor_performance()
|
||||||
@ -255,7 +298,7 @@ class EnergyBasedRecorder:
|
|||||||
recording_duration = time.time() - self.recording_start_time
|
recording_duration = time.time() - self.recording_start_time
|
||||||
|
|
||||||
# 更新最后声音时间
|
# 更新最后声音时间
|
||||||
if self.is_voice_active(energy):
|
if self.is_voice_active_advanced(energy, zcr):
|
||||||
self.last_sound_time = time.time()
|
self.last_sound_time = time.time()
|
||||||
|
|
||||||
# 检查是否应该结束录音
|
# 检查是否应该结束录音
|
||||||
@ -273,23 +316,22 @@ class EnergyBasedRecorder:
|
|||||||
self.stop_recording()
|
self.stop_recording()
|
||||||
|
|
||||||
# 显示录音状态
|
# 显示录音状态
|
||||||
status = f"录音中... {recording_duration:.1f}s | 能量: {energy:.0f} | 静音: {current_time - self.last_sound_time:.1f}s"
|
bg_energy = np.median(self.energy_history[-10:]) if len(self.energy_history) >= 10 else 0
|
||||||
|
status = f"录音中... {recording_duration:.1f}s | 能量: {energy:.0f} | ZCR: {zcr:.0f} | 背景: {bg_energy:.0f}"
|
||||||
print(f"\r{status}", end='', flush=True)
|
print(f"\r{status}", end='', flush=True)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# 监听模式
|
# 监听模式 - 使用高级检测
|
||||||
if self.is_voice_active(energy):
|
if self.is_voice_active_advanced(energy, zcr):
|
||||||
# 检测到声音,开始录音
|
# 检测到声音,开始录音
|
||||||
self.start_recording()
|
self.start_recording()
|
||||||
else:
|
else:
|
||||||
# 显示监听状态
|
# 显示监听状态
|
||||||
avg_energy = self.get_average_energy()
|
avg_energy = self.get_average_energy()
|
||||||
status = f"监听中... 能量: {energy:.0f} | 平均: {avg_energy:.0f} | 阈值: {self.energy_threshold}"
|
bg_energy = np.median(self.energy_history[-10:]) if len(self.energy_history) >= 10 else 0
|
||||||
|
status = f"监听中... 能量: {energy:.0f} | ZCR: {zcr:.0f} | 背景: {bg_energy:.0f}"
|
||||||
print(f"\r{status}", end='', flush=True)
|
print(f"\r{status}", end='', flush=True)
|
||||||
|
|
||||||
# 自动调整阈值
|
|
||||||
self.auto_adjust_threshold()
|
|
||||||
|
|
||||||
# 减少CPU使用
|
# 减少CPU使用
|
||||||
time.sleep(0.01)
|
time.sleep(0.01)
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user