添加预录音功能:解决录音开头丢失问题

- 实现2秒预录音环形缓冲区
- 检测到声音时自动包含前2秒音频
- 实时显示缓冲区使用状态
- 完美解决录音开头丢失问题
- 显示预录音时长信息
This commit is contained in:
朱潮 2025-09-20 11:44:34 +08:00
parent 12c79a5a53
commit 939a1721d6

View File

@ -29,6 +29,7 @@ class EnergyBasedRecorder:
self.silence_threshold = silence_threshold # 静音阈值,低于此值持续多久认为结束 self.silence_threshold = silence_threshold # 静音阈值,低于此值持续多久认为结束
self.min_recording_time = min_recording_time # 最小录音时间 self.min_recording_time = min_recording_time # 最小录音时间
self.max_recording_time = max_recording_time # 最大录音时间 self.max_recording_time = max_recording_time # 最大录音时间
self.pre_record_duration = 2.0 # 预录音时长(秒)
# 状态变量 # 状态变量
self.audio = None self.audio = None
@ -41,6 +42,10 @@ class EnergyBasedRecorder:
self.energy_history = [] # 能量历史 self.energy_history = [] # 能量历史
self.max_energy_history = 50 # 最大能量历史记录 self.max_energy_history = 50 # 最大能量历史记录
# 预录音缓冲区
self.pre_record_buffer = [] # 预录音缓冲区
self.pre_record_max_frames = int(self.pre_record_duration * self.RATE / self.CHUNK_SIZE) # 最大预录音帧数
# 性能监控 # 性能监控
self.frame_count = 0 self.frame_count = 0
self.start_time = time.time() self.start_time = time.time()
@ -201,11 +206,26 @@ class EnergyBasedRecorder:
except Exception as e: except Exception as e:
print(f"❌ 系统播放器也失败: {e}") print(f"❌ 系统播放器也失败: {e}")
def update_pre_record_buffer(self, audio_data):
"""更新预录音缓冲区"""
self.pre_record_buffer.append(audio_data)
# 保持缓冲区大小
if len(self.pre_record_buffer) > self.pre_record_max_frames:
self.pre_record_buffer.pop(0)
def start_recording(self): def start_recording(self):
"""开始录音""" """开始录音"""
print("🎙️ 检测到声音,开始录音...") print("🎙️ 检测到声音,开始录音...")
self.recording = True self.recording = True
self.recorded_frames = [] self.recorded_frames = []
# 将预录音缓冲区的内容添加到录音中
self.recorded_frames.extend(self.pre_record_buffer)
# 清空预录音缓冲区
self.pre_record_buffer = []
self.recording_start_time = time.time() self.recording_start_time = time.time()
self.last_sound_time = time.time() self.last_sound_time = time.time()
self.energy_history = [] # 重置能量历史 self.energy_history = [] # 重置能量历史
@ -215,7 +235,12 @@ class EnergyBasedRecorder:
if len(self.recorded_frames) > 0: if len(self.recorded_frames) > 0:
audio_data = b''.join(self.recorded_frames) audio_data = b''.join(self.recorded_frames)
duration = len(audio_data) / (self.RATE * 2) # 16位音频每样本2字节 duration = len(audio_data) / (self.RATE * 2) # 16位音频每样本2字节
print(f"📝 录音完成,时长: {duration:.2f}")
# 计算实际录音时长和预录音时长
actual_duration = duration
pre_record_duration = min(duration, self.pre_record_duration)
print(f"📝 录音完成,时长: {actual_duration:.2f}秒 (包含预录音 {pre_record_duration:.1f}秒)")
# 保存录音 # 保存录音
success, filename = self.save_recording(audio_data) success, filename = self.save_recording(audio_data)
@ -275,6 +300,8 @@ class EnergyBasedRecorder:
print("- 动态阈值调整(基于背景噪音)") print("- 动态阈值调整(基于背景噪音)")
print("- 零交叉率检测(区分语音和噪音)") print("- 零交叉率检测(区分语音和噪音)")
print("- 实时显示ZCR和背景能量") print("- 实时显示ZCR和背景能量")
print("- 预录音功能包含声音开始前2秒")
print("- 环形缓冲区防止丢失开头音频")
print("=" * 50) print("=" * 50)
try: try:
@ -315,21 +342,26 @@ class EnergyBasedRecorder:
print(f"\n⏰ 达到最大录音时间 {self.max_recording_time}") print(f"\n⏰ 达到最大录音时间 {self.max_recording_time}")
self.stop_recording() self.stop_recording()
# 显示录音状态 # 显示录音状态(包含预录音信息)
pre_duration = len(self.pre_record_buffer) * self.CHUNK_SIZE / self.RATE
bg_energy = np.median(self.energy_history[-10:]) if len(self.energy_history) >= 10 else 0 bg_energy = np.median(self.energy_history[-10:]) if len(self.energy_history) >= 10 else 0
status = f"录音中... {recording_duration:.1f}s | 能量: {energy:.0f} | ZCR: {zcr:.0f} | 背景: {bg_energy:.0f}" status = f"录音中... {recording_duration:.1f}s | 能量: {energy:.0f} | ZCR: {zcr:.0f} | 背景: {bg_energy:.0f}"
print(f"\r{status}", end='', flush=True) print(f"\r{status}", end='', flush=True)
else: else:
# 监听模式 - 使用高级检测 # 监听模式 - 更新预录音缓冲区
self.update_pre_record_buffer(data)
# 使用高级检测
if self.is_voice_active_advanced(energy, zcr): if self.is_voice_active_advanced(energy, zcr):
# 检测到声音,开始录音 # 检测到声音,开始录音
self.start_recording() self.start_recording()
else: else:
# 显示监听状态 # 显示监听状态(包含缓冲区信息)
avg_energy = self.get_average_energy() avg_energy = self.get_average_energy()
bg_energy = np.median(self.energy_history[-10:]) if len(self.energy_history) >= 10 else 0 bg_energy = np.median(self.energy_history[-10:]) if len(self.energy_history) >= 10 else 0
status = f"监听中... 能量: {energy:.0f} | ZCR: {zcr:.0f} | 背景: {bg_energy:.0f}" buffer_usage = len(self.pre_record_buffer) / self.pre_record_max_frames * 100
status = f"监听中... 能量: {energy:.0f} | ZCR: {zcr:.0f} | 背景: {bg_energy:.0f} | 缓冲: {buffer_usage:.0f}%"
print(f"\r{status}", end='', flush=True) print(f"\r{status}", end='', flush=True)
# 减少CPU使用 # 减少CPU使用