fix audio
This commit is contained in:
parent
3958d2ff81
commit
7eff24a175
Binary file not shown.
Binary file not shown.
@ -35,13 +35,26 @@ class AudioDeviceManager:
|
||||
self.input_stream = None
|
||||
self.output_stream = None
|
||||
self.audio_queue = None
|
||||
self.playback_queue = None # 播放队列
|
||||
self.recording = False
|
||||
self.playing = False
|
||||
|
||||
# 预缓冲机制
|
||||
self.pre_buffer = []
|
||||
self.pre_buffer_size = 5 # 预缓冲5个音频块
|
||||
self.buffer_threshold = 3 # 缓冲阈值,低于此值开始预缓冲
|
||||
|
||||
# 静音检测和回声消除
|
||||
self.silence_threshold = 500 # 静音阈值
|
||||
self.echo_suppression_enabled = True
|
||||
self.last_audio_level = 0
|
||||
self.audio_level_history = []
|
||||
|
||||
def open_input_stream(self):
|
||||
"""打开音频输入流"""
|
||||
try:
|
||||
import queue
|
||||
self.audio_queue = queue.Queue(maxsize=100) # 音频数据队列
|
||||
self.audio_queue = queue.Queue(maxsize=100) # 增大队列大小,提供更多缓冲
|
||||
|
||||
def audio_callback(indata, frames, time_info, status):
|
||||
"""音频数据回调"""
|
||||
@ -51,17 +64,23 @@ class AudioDeviceManager:
|
||||
try:
|
||||
# 将numpy数组转换为字节数据
|
||||
audio_bytes = indata.tobytes()
|
||||
|
||||
# 添加音频数据预处理,提高质量
|
||||
if hasattr(self, '_audio_processor'):
|
||||
audio_bytes = self._audio_processor(audio_bytes)
|
||||
|
||||
self.audio_queue.put_nowait(audio_bytes)
|
||||
except queue.Full:
|
||||
print("警告: 音频队列已满,丢弃数据")
|
||||
pass # 静默丢弃,避免阻塞
|
||||
|
||||
self.input_stream = sd.InputStream(
|
||||
samplerate=self.input_config.sample_rate,
|
||||
channels=self.input_config.channels,
|
||||
dtype='int16', # 16-bit PCM
|
||||
dtype='int16',
|
||||
blocksize=self.input_config.chunk,
|
||||
callback=audio_callback,
|
||||
device=None # 使用默认设备
|
||||
device=None,
|
||||
latency='low' # 低延迟模式
|
||||
)
|
||||
self.input_stream.start()
|
||||
self.recording = True
|
||||
@ -73,14 +92,53 @@ class AudioDeviceManager:
|
||||
def open_output_stream(self):
|
||||
"""打开音频输出流"""
|
||||
try:
|
||||
import queue
|
||||
self.playback_queue = queue.Queue(maxsize=50) # 增大播放队列,提供更多缓冲
|
||||
|
||||
def playback_callback(outdata, frames, time_info, status):
|
||||
"""音频播放回调"""
|
||||
if status:
|
||||
print(f"播放状态: {status}")
|
||||
|
||||
try:
|
||||
# 从队列获取音频数据
|
||||
audio_data = self.playback_queue.get_nowait()
|
||||
|
||||
# 转换字节数据为numpy数组
|
||||
audio_array = np.frombuffer(audio_data, dtype=np.int16)
|
||||
audio_array = audio_array.reshape(-1, self.output_config.channels)
|
||||
|
||||
# 应用音频淡入淡出效果,减少爆音
|
||||
if hasattr(self, '_apply_volume_fade'):
|
||||
audio_array = self._apply_volume_fade(audio_array)
|
||||
|
||||
# 确保数据大小匹配
|
||||
if len(audio_array) < frames:
|
||||
# 数据不足,用0填充
|
||||
padded = np.zeros((frames, self.output_config.channels), dtype=np.int16)
|
||||
padded[:len(audio_array)] = audio_array
|
||||
outdata[:] = padded
|
||||
else:
|
||||
outdata[:] = audio_array[:frames]
|
||||
|
||||
except queue.Empty:
|
||||
# 队列为空,输出静音
|
||||
outdata.fill(0)
|
||||
except Exception as e:
|
||||
print(f"播放回调错误: {e}")
|
||||
outdata.fill(0)
|
||||
|
||||
self.output_stream = sd.OutputStream(
|
||||
samplerate=self.output_config.sample_rate,
|
||||
channels=self.output_config.channels,
|
||||
dtype='int16', # 16-bit PCM
|
||||
dtype='int16',
|
||||
blocksize=self.output_config.chunk,
|
||||
device=None # 使用默认设备
|
||||
callback=playback_callback,
|
||||
device=None,
|
||||
latency='low' # 低延迟模式
|
||||
)
|
||||
self.output_stream.start()
|
||||
self.playing = True
|
||||
return self.output_stream
|
||||
except Exception as e:
|
||||
print(f"打开输出流失败: {e}")
|
||||
@ -89,13 +147,42 @@ class AudioDeviceManager:
|
||||
def play_audio(self, audio_data: bytes) -> None:
|
||||
"""播放音频数据"""
|
||||
try:
|
||||
# 将字节数据转换为numpy数组
|
||||
audio_array = np.frombuffer(audio_data, dtype=np.int16)
|
||||
audio_array = audio_array.reshape(-1, self.output_config.channels)
|
||||
|
||||
# 使用sounddevice播放
|
||||
sd.play(audio_array, samplerate=self.output_config.sample_rate)
|
||||
sd.wait() # 等待播放完成
|
||||
if self.playing and self.playback_queue:
|
||||
# 音频数据预缓冲:将大数据块分成更小的块以获得更流畅的播放
|
||||
chunk_size = self.output_config.chunk * 2 # 每个样本2字节
|
||||
|
||||
# 预处理音频数据
|
||||
if hasattr(self, '_playback_processor'):
|
||||
audio_data = self._playback_processor(audio_data)
|
||||
|
||||
# 预缓冲机制:在播放前积累一些音频块
|
||||
if len(self.pre_buffer) < self.pre_buffer_size:
|
||||
chunk_size = self.output_config.chunk * 2
|
||||
for i in range(0, len(audio_data), chunk_size):
|
||||
chunk = audio_data[i:i+chunk_size]
|
||||
self.pre_buffer.append(chunk)
|
||||
if len(self.pre_buffer) >= self.pre_buffer_size:
|
||||
break
|
||||
|
||||
# 如果预缓冲已满,开始播放
|
||||
if len(self.pre_buffer) >= self.pre_buffer_size:
|
||||
self._flush_pre_buffer()
|
||||
|
||||
# 分块处理音频数据,避免单个数据块过大
|
||||
for i in range(0, len(audio_data), chunk_size):
|
||||
chunk = audio_data[i:i+chunk_size]
|
||||
try:
|
||||
# 使用阻塞式put,确保不丢失数据
|
||||
self.playback_queue.put(chunk, timeout=0.1)
|
||||
except queue.Full:
|
||||
print("警告: 播放队列已满,丢弃音频数据")
|
||||
# 如果队列满,尝试清空一些旧数据
|
||||
try:
|
||||
self.playback_queue.get_nowait()
|
||||
self.playback_queue.put(chunk, timeout=0.05)
|
||||
except:
|
||||
pass
|
||||
break
|
||||
except Exception as e:
|
||||
print(f"音频播放失败: {e}")
|
||||
|
||||
@ -105,9 +192,9 @@ class AudioDeviceManager:
|
||||
if not self.recording or self.audio_queue is None:
|
||||
return b'\x00' * (frames * 2) # 返回静音数据
|
||||
|
||||
# 从队列获取音频数据
|
||||
# 使用更长的超时时间,提高音频数据获取成功率
|
||||
try:
|
||||
audio_data = self.audio_queue.get(timeout=0.1) # 100ms超时
|
||||
audio_data = self.audio_queue.get(timeout=0.1) # 增加超时时间
|
||||
return audio_data
|
||||
except queue.Empty:
|
||||
# 队列为空,返回静音数据
|
||||
@ -121,10 +208,75 @@ class AudioDeviceManager:
|
||||
"""停止录音"""
|
||||
self.recording = False
|
||||
|
||||
def stop_playing(self):
|
||||
"""停止播放"""
|
||||
self.playing = False
|
||||
if self.playback_queue:
|
||||
# 清空播放队列
|
||||
while not self.playback_queue.empty():
|
||||
try:
|
||||
self.playback_queue.get_nowait()
|
||||
except queue.Empty:
|
||||
break
|
||||
|
||||
def _flush_pre_buffer(self):
|
||||
"""刷新预缓冲区到播放队列"""
|
||||
if hasattr(self, 'pre_buffer') and self.pre_buffer:
|
||||
for chunk in self.pre_buffer:
|
||||
try:
|
||||
self.playback_queue.put(chunk, timeout=0.1)
|
||||
except queue.Full:
|
||||
print("警告: 播放队列已满,丢弃预缓冲数据")
|
||||
break
|
||||
self.pre_buffer.clear()
|
||||
|
||||
def _apply_volume_fade(self, audio_array):
|
||||
"""应用音量淡入淡出效果,减少爆音"""
|
||||
try:
|
||||
# 简单的淡入淡出效果
|
||||
fade_samples = min(100, len(audio_array) // 10) # 淡入淡出样本数
|
||||
|
||||
# 淡入
|
||||
for i in range(fade_samples):
|
||||
factor = i / fade_samples
|
||||
audio_array[i] = int(audio_array[i] * factor)
|
||||
|
||||
# 淡出
|
||||
for i in range(fade_samples):
|
||||
factor = (fade_samples - i) / fade_samples
|
||||
audio_array[-(i+1)] = int(audio_array[-(i+1)] * factor)
|
||||
|
||||
return audio_array
|
||||
except Exception as e:
|
||||
print(f"音量淡入淡出失败: {e}")
|
||||
return audio_array
|
||||
|
||||
def _detect_silence(self, audio_data):
|
||||
"""检测静音"""
|
||||
try:
|
||||
audio_array = np.frombuffer(audio_data, dtype=np.int16)
|
||||
audio_level = np.abs(audio_array).mean()
|
||||
|
||||
# 更新音频电平历史
|
||||
self.audio_level_history.append(audio_level)
|
||||
if len(self.audio_level_history) > 10:
|
||||
self.audio_level_history.pop(0)
|
||||
|
||||
# 计算平均音频电平
|
||||
avg_level = np.mean(self.audio_level_history) if self.audio_level_history else 0
|
||||
|
||||
# 检测静音
|
||||
is_silence = audio_level < self.silence_threshold
|
||||
return is_silence, audio_level, avg_level
|
||||
except Exception as e:
|
||||
print(f"静音检测失败: {e}")
|
||||
return False, 0, 0
|
||||
|
||||
def cleanup(self) -> None:
|
||||
"""清理音频设备资源"""
|
||||
try:
|
||||
self.recording = False
|
||||
self.stop_recording()
|
||||
self.stop_playing()
|
||||
if self.input_stream:
|
||||
self.input_stream.stop()
|
||||
self.input_stream.close()
|
||||
@ -132,6 +284,9 @@ class AudioDeviceManager:
|
||||
self.output_stream.stop()
|
||||
self.output_stream.close()
|
||||
sd.stop() # 停止所有音频播放
|
||||
# 清空预缓冲区
|
||||
if hasattr(self, 'pre_buffer'):
|
||||
self.pre_buffer.clear()
|
||||
except Exception as e:
|
||||
print(f"清理音频设备失败: {e}")
|
||||
|
||||
|
||||
@ -42,7 +42,7 @@ start_session_req = {
|
||||
}
|
||||
|
||||
input_audio_config = {
|
||||
"chunk": 3200,
|
||||
"chunk": 6400, # 增大缓冲区大小,减少处理频率
|
||||
"format": "pcm",
|
||||
"channels": 1,
|
||||
"sample_rate": 16000,
|
||||
@ -50,7 +50,7 @@ input_audio_config = {
|
||||
}
|
||||
|
||||
output_audio_config = {
|
||||
"chunk": 3200,
|
||||
"chunk": 6400, # 增大缓冲区大小,减少处理频率
|
||||
"format": "pcm",
|
||||
"channels": 1,
|
||||
"sample_rate": 24000,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user