fix audio

This commit is contained in:
朱潮 2025-09-19 20:58:35 +08:00
parent 3958d2ff81
commit 7eff24a175
4 changed files with 173 additions and 18 deletions

View File

@ -35,13 +35,26 @@ class AudioDeviceManager:
self.input_stream = None
self.output_stream = None
self.audio_queue = None
self.playback_queue = None # 播放队列
self.recording = False
self.playing = False
# 预缓冲机制
self.pre_buffer = []
self.pre_buffer_size = 5 # 预缓冲5个音频块
self.buffer_threshold = 3 # 缓冲阈值,低于此值开始预缓冲
# 静音检测和回声消除
self.silence_threshold = 500 # 静音阈值
self.echo_suppression_enabled = True
self.last_audio_level = 0
self.audio_level_history = []
def open_input_stream(self):
"""打开音频输入流"""
try:
import queue
self.audio_queue = queue.Queue(maxsize=100) # 音频数据队列
self.audio_queue = queue.Queue(maxsize=100) # 增大队列大小,提供更多缓冲
def audio_callback(indata, frames, time_info, status):
"""音频数据回调"""
@ -51,17 +64,23 @@ class AudioDeviceManager:
try:
# 将numpy数组转换为字节数据
audio_bytes = indata.tobytes()
# 添加音频数据预处理,提高质量
if hasattr(self, '_audio_processor'):
audio_bytes = self._audio_processor(audio_bytes)
self.audio_queue.put_nowait(audio_bytes)
except queue.Full:
print("警告: 音频队列已满,丢弃数据")
pass # 静默丢弃,避免阻塞
self.input_stream = sd.InputStream(
samplerate=self.input_config.sample_rate,
channels=self.input_config.channels,
dtype='int16', # 16-bit PCM
dtype='int16',
blocksize=self.input_config.chunk,
callback=audio_callback,
device=None # 使用默认设备
device=None,
latency='low' # 低延迟模式
)
self.input_stream.start()
self.recording = True
@ -73,14 +92,53 @@ class AudioDeviceManager:
def open_output_stream(self):
"""打开音频输出流"""
try:
import queue
self.playback_queue = queue.Queue(maxsize=50) # 增大播放队列,提供更多缓冲
def playback_callback(outdata, frames, time_info, status):
"""音频播放回调"""
if status:
print(f"播放状态: {status}")
try:
# 从队列获取音频数据
audio_data = self.playback_queue.get_nowait()
# 转换字节数据为numpy数组
audio_array = np.frombuffer(audio_data, dtype=np.int16)
audio_array = audio_array.reshape(-1, self.output_config.channels)
# 应用音频淡入淡出效果,减少爆音
if hasattr(self, '_apply_volume_fade'):
audio_array = self._apply_volume_fade(audio_array)
# 确保数据大小匹配
if len(audio_array) < frames:
# 数据不足用0填充
padded = np.zeros((frames, self.output_config.channels), dtype=np.int16)
padded[:len(audio_array)] = audio_array
outdata[:] = padded
else:
outdata[:] = audio_array[:frames]
except queue.Empty:
# 队列为空,输出静音
outdata.fill(0)
except Exception as e:
print(f"播放回调错误: {e}")
outdata.fill(0)
self.output_stream = sd.OutputStream(
samplerate=self.output_config.sample_rate,
channels=self.output_config.channels,
dtype='int16', # 16-bit PCM
dtype='int16',
blocksize=self.output_config.chunk,
device=None # 使用默认设备
callback=playback_callback,
device=None,
latency='low' # 低延迟模式
)
self.output_stream.start()
self.playing = True
return self.output_stream
except Exception as e:
print(f"打开输出流失败: {e}")
@ -89,13 +147,42 @@ class AudioDeviceManager:
def play_audio(self, audio_data: bytes) -> None:
"""播放音频数据"""
try:
# 将字节数据转换为numpy数组
audio_array = np.frombuffer(audio_data, dtype=np.int16)
audio_array = audio_array.reshape(-1, self.output_config.channels)
if self.playing and self.playback_queue:
# 音频数据预缓冲:将大数据块分成更小的块以获得更流畅的播放
chunk_size = self.output_config.chunk * 2 # 每个样本2字节
# 使用sounddevice播放
sd.play(audio_array, samplerate=self.output_config.sample_rate)
sd.wait() # 等待播放完成
# 预处理音频数据
if hasattr(self, '_playback_processor'):
audio_data = self._playback_processor(audio_data)
# 预缓冲机制:在播放前积累一些音频块
if len(self.pre_buffer) < self.pre_buffer_size:
chunk_size = self.output_config.chunk * 2
for i in range(0, len(audio_data), chunk_size):
chunk = audio_data[i:i+chunk_size]
self.pre_buffer.append(chunk)
if len(self.pre_buffer) >= self.pre_buffer_size:
break
# 如果预缓冲已满,开始播放
if len(self.pre_buffer) >= self.pre_buffer_size:
self._flush_pre_buffer()
# 分块处理音频数据,避免单个数据块过大
for i in range(0, len(audio_data), chunk_size):
chunk = audio_data[i:i+chunk_size]
try:
# 使用阻塞式put确保不丢失数据
self.playback_queue.put(chunk, timeout=0.1)
except queue.Full:
print("警告: 播放队列已满,丢弃音频数据")
# 如果队列满,尝试清空一些旧数据
try:
self.playback_queue.get_nowait()
self.playback_queue.put(chunk, timeout=0.05)
except:
pass
break
except Exception as e:
print(f"音频播放失败: {e}")
@ -105,9 +192,9 @@ class AudioDeviceManager:
if not self.recording or self.audio_queue is None:
return b'\x00' * (frames * 2) # 返回静音数据
# 从队列获取音频数据
# 使用更长的超时时间,提高音频数据获取成功率
try:
audio_data = self.audio_queue.get(timeout=0.1) # 100ms超时
audio_data = self.audio_queue.get(timeout=0.1) # 增加超时时间
return audio_data
except queue.Empty:
# 队列为空,返回静音数据
@ -121,10 +208,75 @@ class AudioDeviceManager:
"""停止录音"""
self.recording = False
def stop_playing(self):
"""停止播放"""
self.playing = False
if self.playback_queue:
# 清空播放队列
while not self.playback_queue.empty():
try:
self.playback_queue.get_nowait()
except queue.Empty:
break
def _flush_pre_buffer(self):
"""刷新预缓冲区到播放队列"""
if hasattr(self, 'pre_buffer') and self.pre_buffer:
for chunk in self.pre_buffer:
try:
self.playback_queue.put(chunk, timeout=0.1)
except queue.Full:
print("警告: 播放队列已满,丢弃预缓冲数据")
break
self.pre_buffer.clear()
def _apply_volume_fade(self, audio_array):
"""应用音量淡入淡出效果,减少爆音"""
try:
# 简单的淡入淡出效果
fade_samples = min(100, len(audio_array) // 10) # 淡入淡出样本数
# 淡入
for i in range(fade_samples):
factor = i / fade_samples
audio_array[i] = int(audio_array[i] * factor)
# 淡出
for i in range(fade_samples):
factor = (fade_samples - i) / fade_samples
audio_array[-(i+1)] = int(audio_array[-(i+1)] * factor)
return audio_array
except Exception as e:
print(f"音量淡入淡出失败: {e}")
return audio_array
def _detect_silence(self, audio_data):
"""检测静音"""
try:
audio_array = np.frombuffer(audio_data, dtype=np.int16)
audio_level = np.abs(audio_array).mean()
# 更新音频电平历史
self.audio_level_history.append(audio_level)
if len(self.audio_level_history) > 10:
self.audio_level_history.pop(0)
# 计算平均音频电平
avg_level = np.mean(self.audio_level_history) if self.audio_level_history else 0
# 检测静音
is_silence = audio_level < self.silence_threshold
return is_silence, audio_level, avg_level
except Exception as e:
print(f"静音检测失败: {e}")
return False, 0, 0
def cleanup(self) -> None:
"""清理音频设备资源"""
try:
self.recording = False
self.stop_recording()
self.stop_playing()
if self.input_stream:
self.input_stream.stop()
self.input_stream.close()
@ -132,6 +284,9 @@ class AudioDeviceManager:
self.output_stream.stop()
self.output_stream.close()
sd.stop() # 停止所有音频播放
# 清空预缓冲区
if hasattr(self, 'pre_buffer'):
self.pre_buffer.clear()
except Exception as e:
print(f"清理音频设备失败: {e}")

View File

@ -42,7 +42,7 @@ start_session_req = {
}
input_audio_config = {
"chunk": 3200,
"chunk": 6400, # 增大缓冲区大小,减少处理频率
"format": "pcm",
"channels": 1,
"sample_rate": 16000,
@ -50,7 +50,7 @@ input_audio_config = {
}
output_audio_config = {
"chunk": 3200,
"chunk": 6400, # 增大缓冲区大小,减少处理频率
"format": "pcm",
"channels": 1,
"sample_rate": 24000,