修复语音检测算法:解决背景噪音和能量计算问题
- 修复能量历史更新:只在非录音状态更新背景噪音 - 提高ZCR阈值:1000-4000范围更适合语音检测 - 优化动态阈值:背景噪音+50%提高敏感性 - 添加峰值能量计算和调试信息显示 - 解决语音影响背景噪音计算的问题
This commit is contained in:
parent
c01e6ad1f6
commit
48b99384b7
@ -81,13 +81,24 @@ class EnergyBasedRecorder:
|
|||||||
# 计算RMS能量
|
# 计算RMS能量
|
||||||
rms = np.sqrt(np.mean(audio_array ** 2))
|
rms = np.sqrt(np.mean(audio_array ** 2))
|
||||||
|
|
||||||
# 更新能量历史
|
# 更新能量历史(只在非录音状态下更新,避免语音影响背景噪音计算)
|
||||||
self.energy_history.append(rms)
|
if not self.recording:
|
||||||
if len(self.energy_history) > self.max_energy_history:
|
self.energy_history.append(rms)
|
||||||
self.energy_history.pop(0)
|
if len(self.energy_history) > self.max_energy_history:
|
||||||
|
self.energy_history.pop(0)
|
||||||
|
|
||||||
return rms
|
return rms
|
||||||
|
|
||||||
|
def calculate_peak_energy(self, audio_data):
|
||||||
|
"""计算峰值能量(辅助判断)"""
|
||||||
|
if len(audio_data) == 0:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
audio_array = np.frombuffer(audio_data, dtype=np.int16)
|
||||||
|
peak_energy = np.max(np.abs(audio_array))
|
||||||
|
|
||||||
|
return peak_energy
|
||||||
|
|
||||||
def calculate_zero_crossing_rate(self, audio_data):
|
def calculate_zero_crossing_rate(self, audio_data):
|
||||||
"""计算零交叉率(辅助判断语音)"""
|
"""计算零交叉率(辅助判断语音)"""
|
||||||
if len(audio_data) == 0:
|
if len(audio_data) == 0:
|
||||||
@ -110,21 +121,21 @@ class EnergyBasedRecorder:
|
|||||||
# 使用最近10个样本的中位数作为背景噪音
|
# 使用最近10个样本的中位数作为背景噪音
|
||||||
background_energy = np.median(self.energy_history[-10:])
|
background_energy = np.median(self.energy_history[-10:])
|
||||||
|
|
||||||
# 动态阈值:背景噪音 + 25%(比原来的500更敏感)
|
# 动态阈值:背景噪音 + 50%(提高敏感性)
|
||||||
dynamic_threshold = max(50, background_energy * 1.25)
|
dynamic_threshold = max(50, background_energy * 1.5)
|
||||||
|
|
||||||
# 能量条件
|
# 能量条件
|
||||||
energy_condition = energy > dynamic_threshold
|
energy_condition = energy > dynamic_threshold
|
||||||
|
|
||||||
# 零交叉率条件:语音通常在500-5000 Hz之间
|
# 零交叉率条件:语音通常在1000-5000 Hz之间
|
||||||
# 对于8kHz采样率,ZCR通常在500-2000之间
|
# 对于8kHz采样率,ZCR通常在1000-4000之间
|
||||||
zcr_condition = 500 < zcr < 3000
|
zcr_condition = 1000 < zcr < 4000
|
||||||
|
|
||||||
# 同时满足能量和ZCR条件才认为是语音
|
# 同时满足能量和ZCR条件才认为是语音
|
||||||
return energy_condition and zcr_condition
|
return energy_condition and zcr_condition
|
||||||
else:
|
else:
|
||||||
# 初始阶段使用固定阈值
|
# 初始阶段使用固定阈值
|
||||||
return energy > 80 # 更低的初始阈值
|
return energy > 60 and zcr > 1000 # 更严格的初始条件
|
||||||
|
|
||||||
def get_average_energy(self):
|
def get_average_energy(self):
|
||||||
"""获取平均能量水平"""
|
"""获取平均能量水平"""
|
||||||
@ -332,6 +343,7 @@ class EnergyBasedRecorder:
|
|||||||
# 计算能量和零交叉率
|
# 计算能量和零交叉率
|
||||||
energy = self.calculate_energy(data)
|
energy = self.calculate_energy(data)
|
||||||
zcr = self.calculate_zero_crossing_rate(data)
|
zcr = self.calculate_zero_crossing_rate(data)
|
||||||
|
peak_energy = self.calculate_peak_energy(data)
|
||||||
|
|
||||||
# 性能监控
|
# 性能监控
|
||||||
self.monitor_performance()
|
self.monitor_performance()
|
||||||
@ -359,9 +371,10 @@ class EnergyBasedRecorder:
|
|||||||
print(f"\n⏰ 达到最大录音时间 {self.max_recording_time}秒")
|
print(f"\n⏰ 达到最大录音时间 {self.max_recording_time}秒")
|
||||||
self.stop_recording()
|
self.stop_recording()
|
||||||
|
|
||||||
# 显示录音状态(包含预录音信息)
|
# 显示录音状态(包含调试信息)
|
||||||
bg_energy = np.median(self.energy_history[-10:]) if len(self.energy_history) >= 10 else 0
|
bg_energy = np.median(self.energy_history[-10:]) if len(self.energy_history) >= 10 else 0
|
||||||
status = f"录音中... {recording_duration:.1f}s | 能量: {energy:.0f} | ZCR: {zcr:.0f} | 背景: {bg_energy:.0f}"
|
is_voice = self.is_voice_active_advanced(energy, zcr)
|
||||||
|
status = f"录音中... {recording_duration:.1f}s | RMS: {energy:.0f} | 峰值: {peak_energy:.0f} | ZCR: {zcr:.0f} | 语音: {is_voice}"
|
||||||
print(f"\r{status}", end='', flush=True)
|
print(f"\r{status}", end='', flush=True)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
@ -373,11 +386,12 @@ class EnergyBasedRecorder:
|
|||||||
# 检测到声音,开始录音
|
# 检测到声音,开始录音
|
||||||
self.start_recording()
|
self.start_recording()
|
||||||
else:
|
else:
|
||||||
# 显示监听状态(包含缓冲区信息)
|
# 显示监听状态(包含调试信息)
|
||||||
avg_energy = self.get_average_energy()
|
avg_energy = self.get_average_energy()
|
||||||
bg_energy = np.median(self.energy_history[-10:]) if len(self.energy_history) >= 10 else 0
|
bg_energy = np.median(self.energy_history[-10:]) if len(self.energy_history) >= 10 else 0
|
||||||
buffer_usage = len(self.pre_record_buffer) / self.pre_record_max_frames * 100
|
buffer_usage = len(self.pre_record_buffer) / self.pre_record_max_frames * 100
|
||||||
status = f"监听中... 能量: {energy:.0f} | ZCR: {zcr:.0f} | 背景: {bg_energy:.0f} | 缓冲: {buffer_usage:.0f}%"
|
is_voice = self.is_voice_active_advanced(energy, zcr)
|
||||||
|
status = f"监听中... RMS: {energy:.0f} | 峰值: {peak_energy:.0f} | ZCR: {zcr:.0f} | 背景: {bg_energy:.0f} | 语音: {is_voice} | 缓冲: {buffer_usage:.0f}%"
|
||||||
print(f"\r{status}", end='', flush=True)
|
print(f"\r{status}", end='', flush=True)
|
||||||
|
|
||||||
# 减少CPU使用
|
# 减少CPU使用
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user