diff --git a/doubao/__pycache__/audio_manager.cpython-312.pyc b/doubao/__pycache__/audio_manager.cpython-312.pyc index 1270ee1..815671f 100644 Binary files a/doubao/__pycache__/audio_manager.cpython-312.pyc and b/doubao/__pycache__/audio_manager.cpython-312.pyc differ diff --git a/doubao/__pycache__/config.cpython-312.pyc b/doubao/__pycache__/config.cpython-312.pyc index 59a6393..440ea30 100644 Binary files a/doubao/__pycache__/config.cpython-312.pyc and b/doubao/__pycache__/config.cpython-312.pyc differ diff --git a/doubao/audio_manager.py b/doubao/audio_manager.py index b85ce38..e72ef19 100644 --- a/doubao/audio_manager.py +++ b/doubao/audio_manager.py @@ -35,13 +35,26 @@ class AudioDeviceManager: self.input_stream = None self.output_stream = None self.audio_queue = None + self.playback_queue = None # 播放队列 self.recording = False + self.playing = False + + # 预缓冲机制 + self.pre_buffer = [] + self.pre_buffer_size = 5 # 预缓冲5个音频块 + self.buffer_threshold = 3 # 缓冲阈值,低于此值开始预缓冲 + + # 静音检测和回声消除 + self.silence_threshold = 500 # 静音阈值 + self.echo_suppression_enabled = True + self.last_audio_level = 0 + self.audio_level_history = [] def open_input_stream(self): """打开音频输入流""" try: import queue - self.audio_queue = queue.Queue(maxsize=100) # 音频数据队列 + self.audio_queue = queue.Queue(maxsize=100) # 增大队列大小,提供更多缓冲 def audio_callback(indata, frames, time_info, status): """音频数据回调""" @@ -51,17 +64,23 @@ class AudioDeviceManager: try: # 将numpy数组转换为字节数据 audio_bytes = indata.tobytes() + + # 添加音频数据预处理,提高质量 + if hasattr(self, '_audio_processor'): + audio_bytes = self._audio_processor(audio_bytes) + self.audio_queue.put_nowait(audio_bytes) except queue.Full: - print("警告: 音频队列已满,丢弃数据") + pass # 静默丢弃,避免阻塞 self.input_stream = sd.InputStream( samplerate=self.input_config.sample_rate, channels=self.input_config.channels, - dtype='int16', # 16-bit PCM + dtype='int16', blocksize=self.input_config.chunk, callback=audio_callback, - device=None # 使用默认设备 + device=None, + latency='low' # 低延迟模式 ) self.input_stream.start() self.recording = True @@ -73,14 +92,53 @@ class AudioDeviceManager: def open_output_stream(self): """打开音频输出流""" try: + import queue + self.playback_queue = queue.Queue(maxsize=50) # 增大播放队列,提供更多缓冲 + + def playback_callback(outdata, frames, time_info, status): + """音频播放回调""" + if status: + print(f"播放状态: {status}") + + try: + # 从队列获取音频数据 + audio_data = self.playback_queue.get_nowait() + + # 转换字节数据为numpy数组 + audio_array = np.frombuffer(audio_data, dtype=np.int16) + audio_array = audio_array.reshape(-1, self.output_config.channels) + + # 应用音频淡入淡出效果,减少爆音 + if hasattr(self, '_apply_volume_fade'): + audio_array = self._apply_volume_fade(audio_array) + + # 确保数据大小匹配 + if len(audio_array) < frames: + # 数据不足,用0填充 + padded = np.zeros((frames, self.output_config.channels), dtype=np.int16) + padded[:len(audio_array)] = audio_array + outdata[:] = padded + else: + outdata[:] = audio_array[:frames] + + except queue.Empty: + # 队列为空,输出静音 + outdata.fill(0) + except Exception as e: + print(f"播放回调错误: {e}") + outdata.fill(0) + self.output_stream = sd.OutputStream( samplerate=self.output_config.sample_rate, channels=self.output_config.channels, - dtype='int16', # 16-bit PCM + dtype='int16', blocksize=self.output_config.chunk, - device=None # 使用默认设备 + callback=playback_callback, + device=None, + latency='low' # 低延迟模式 ) self.output_stream.start() + self.playing = True return self.output_stream except Exception as e: print(f"打开输出流失败: {e}") @@ -89,13 +147,42 @@ class AudioDeviceManager: def play_audio(self, audio_data: bytes) -> None: """播放音频数据""" try: - # 将字节数据转换为numpy数组 - audio_array = np.frombuffer(audio_data, dtype=np.int16) - audio_array = audio_array.reshape(-1, self.output_config.channels) - - # 使用sounddevice播放 - sd.play(audio_array, samplerate=self.output_config.sample_rate) - sd.wait() # 等待播放完成 + if self.playing and self.playback_queue: + # 音频数据预缓冲:将大数据块分成更小的块以获得更流畅的播放 + chunk_size = self.output_config.chunk * 2 # 每个样本2字节 + + # 预处理音频数据 + if hasattr(self, '_playback_processor'): + audio_data = self._playback_processor(audio_data) + + # 预缓冲机制:在播放前积累一些音频块 + if len(self.pre_buffer) < self.pre_buffer_size: + chunk_size = self.output_config.chunk * 2 + for i in range(0, len(audio_data), chunk_size): + chunk = audio_data[i:i+chunk_size] + self.pre_buffer.append(chunk) + if len(self.pre_buffer) >= self.pre_buffer_size: + break + + # 如果预缓冲已满,开始播放 + if len(self.pre_buffer) >= self.pre_buffer_size: + self._flush_pre_buffer() + + # 分块处理音频数据,避免单个数据块过大 + for i in range(0, len(audio_data), chunk_size): + chunk = audio_data[i:i+chunk_size] + try: + # 使用阻塞式put,确保不丢失数据 + self.playback_queue.put(chunk, timeout=0.1) + except queue.Full: + print("警告: 播放队列已满,丢弃音频数据") + # 如果队列满,尝试清空一些旧数据 + try: + self.playback_queue.get_nowait() + self.playback_queue.put(chunk, timeout=0.05) + except: + pass + break except Exception as e: print(f"音频播放失败: {e}") @@ -105,9 +192,9 @@ class AudioDeviceManager: if not self.recording or self.audio_queue is None: return b'\x00' * (frames * 2) # 返回静音数据 - # 从队列获取音频数据 + # 使用更长的超时时间,提高音频数据获取成功率 try: - audio_data = self.audio_queue.get(timeout=0.1) # 100ms超时 + audio_data = self.audio_queue.get(timeout=0.1) # 增加超时时间 return audio_data except queue.Empty: # 队列为空,返回静音数据 @@ -121,10 +208,75 @@ class AudioDeviceManager: """停止录音""" self.recording = False + def stop_playing(self): + """停止播放""" + self.playing = False + if self.playback_queue: + # 清空播放队列 + while not self.playback_queue.empty(): + try: + self.playback_queue.get_nowait() + except queue.Empty: + break + + def _flush_pre_buffer(self): + """刷新预缓冲区到播放队列""" + if hasattr(self, 'pre_buffer') and self.pre_buffer: + for chunk in self.pre_buffer: + try: + self.playback_queue.put(chunk, timeout=0.1) + except queue.Full: + print("警告: 播放队列已满,丢弃预缓冲数据") + break + self.pre_buffer.clear() + + def _apply_volume_fade(self, audio_array): + """应用音量淡入淡出效果,减少爆音""" + try: + # 简单的淡入淡出效果 + fade_samples = min(100, len(audio_array) // 10) # 淡入淡出样本数 + + # 淡入 + for i in range(fade_samples): + factor = i / fade_samples + audio_array[i] = int(audio_array[i] * factor) + + # 淡出 + for i in range(fade_samples): + factor = (fade_samples - i) / fade_samples + audio_array[-(i+1)] = int(audio_array[-(i+1)] * factor) + + return audio_array + except Exception as e: + print(f"音量淡入淡出失败: {e}") + return audio_array + + def _detect_silence(self, audio_data): + """检测静音""" + try: + audio_array = np.frombuffer(audio_data, dtype=np.int16) + audio_level = np.abs(audio_array).mean() + + # 更新音频电平历史 + self.audio_level_history.append(audio_level) + if len(self.audio_level_history) > 10: + self.audio_level_history.pop(0) + + # 计算平均音频电平 + avg_level = np.mean(self.audio_level_history) if self.audio_level_history else 0 + + # 检测静音 + is_silence = audio_level < self.silence_threshold + return is_silence, audio_level, avg_level + except Exception as e: + print(f"静音检测失败: {e}") + return False, 0, 0 + def cleanup(self) -> None: """清理音频设备资源""" try: - self.recording = False + self.stop_recording() + self.stop_playing() if self.input_stream: self.input_stream.stop() self.input_stream.close() @@ -132,6 +284,9 @@ class AudioDeviceManager: self.output_stream.stop() self.output_stream.close() sd.stop() # 停止所有音频播放 + # 清空预缓冲区 + if hasattr(self, 'pre_buffer'): + self.pre_buffer.clear() except Exception as e: print(f"清理音频设备失败: {e}") diff --git a/doubao/config.py b/doubao/config.py index c388772..ebde61b 100644 --- a/doubao/config.py +++ b/doubao/config.py @@ -42,7 +42,7 @@ start_session_req = { } input_audio_config = { - "chunk": 3200, + "chunk": 6400, # 增大缓冲区大小,减少处理频率 "format": "pcm", "channels": 1, "sample_rate": 16000, @@ -50,7 +50,7 @@ input_audio_config = { } output_audio_config = { - "chunk": 3200, + "chunk": 6400, # 增大缓冲区大小,减少处理频率 "format": "pcm", "channels": 1, "sample_rate": 24000,