fix audio
This commit is contained in:
parent
e4503e2d1a
commit
d5f2957984
Binary file not shown.
@ -11,7 +11,8 @@ from dataclasses import dataclass
|
|||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
import config
|
import config
|
||||||
import pyaudio
|
import sounddevice as sd
|
||||||
|
import numpy as np
|
||||||
from realtime_dialog_client import RealtimeDialogClient
|
from realtime_dialog_client import RealtimeDialogClient
|
||||||
|
|
||||||
|
|
||||||
@ -31,40 +32,66 @@ class AudioDeviceManager:
|
|||||||
def __init__(self, input_config: AudioConfig, output_config: AudioConfig):
|
def __init__(self, input_config: AudioConfig, output_config: AudioConfig):
|
||||||
self.input_config = input_config
|
self.input_config = input_config
|
||||||
self.output_config = output_config
|
self.output_config = output_config
|
||||||
self.pyaudio = pyaudio.PyAudio()
|
self.input_stream = None
|
||||||
self.input_stream: Optional[pyaudio.Stream] = None
|
self.output_stream = None
|
||||||
self.output_stream: Optional[pyaudio.Stream] = None
|
|
||||||
|
def open_input_stream(self):
|
||||||
def open_input_stream(self) -> pyaudio.Stream:
|
|
||||||
"""打开音频输入流"""
|
"""打开音频输入流"""
|
||||||
# p = pyaudio.PyAudio()
|
try:
|
||||||
self.input_stream = self.pyaudio.open(
|
self.input_stream = sd.InputStream(
|
||||||
format=self.input_config.bit_size,
|
samplerate=self.input_config.sample_rate,
|
||||||
channels=self.input_config.channels,
|
channels=self.input_config.channels,
|
||||||
rate=self.input_config.sample_rate,
|
dtype='int16', # 16-bit PCM
|
||||||
input=True,
|
blocksize=self.input_config.chunk,
|
||||||
frames_per_buffer=self.input_config.chunk
|
device=None # 使用默认设备
|
||||||
)
|
)
|
||||||
return self.input_stream
|
self.input_stream.start()
|
||||||
|
return self.input_stream
|
||||||
|
except Exception as e:
|
||||||
|
print(f"打开输入流失败: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
def open_output_stream(self) -> pyaudio.Stream:
|
def open_output_stream(self):
|
||||||
"""打开音频输出流"""
|
"""打开音频输出流"""
|
||||||
self.output_stream = self.pyaudio.open(
|
try:
|
||||||
format=self.output_config.bit_size,
|
self.output_stream = sd.OutputStream(
|
||||||
channels=self.output_config.channels,
|
samplerate=self.output_config.sample_rate,
|
||||||
rate=self.output_config.sample_rate,
|
channels=self.output_config.channels,
|
||||||
output=True,
|
dtype='int16', # 16-bit PCM
|
||||||
frames_per_buffer=self.output_config.chunk
|
blocksize=self.output_config.chunk,
|
||||||
)
|
device=None # 使用默认设备
|
||||||
return self.output_stream
|
)
|
||||||
|
self.output_stream.start()
|
||||||
|
return self.output_stream
|
||||||
|
except Exception as e:
|
||||||
|
print(f"打开输出流失败: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def play_audio(self, audio_data: bytes) -> None:
|
||||||
|
"""播放音频数据"""
|
||||||
|
try:
|
||||||
|
# 将字节数据转换为numpy数组
|
||||||
|
audio_array = np.frombuffer(audio_data, dtype=np.int16)
|
||||||
|
audio_array = audio_array.reshape(-1, self.output_config.channels)
|
||||||
|
|
||||||
|
# 使用sounddevice播放
|
||||||
|
sd.play(audio_array, samplerate=self.output_config.sample_rate)
|
||||||
|
sd.wait() # 等待播放完成
|
||||||
|
except Exception as e:
|
||||||
|
print(f"音频播放失败: {e}")
|
||||||
|
|
||||||
def cleanup(self) -> None:
|
def cleanup(self) -> None:
|
||||||
"""清理音频设备资源"""
|
"""清理音频设备资源"""
|
||||||
for stream in [self.input_stream, self.output_stream]:
|
try:
|
||||||
if stream:
|
if self.input_stream:
|
||||||
stream.stop_stream()
|
self.input_stream.stop()
|
||||||
stream.close()
|
self.input_stream.close()
|
||||||
self.pyaudio.terminate()
|
if self.output_stream:
|
||||||
|
self.output_stream.stop()
|
||||||
|
self.output_stream.close()
|
||||||
|
sd.stop() # 停止所有音频播放
|
||||||
|
except Exception as e:
|
||||||
|
print(f"清理音频设备失败: {e}")
|
||||||
|
|
||||||
|
|
||||||
class DialogSession:
|
class DialogSession:
|
||||||
@ -118,8 +145,12 @@ class DialogSession:
|
|||||||
)
|
)
|
||||||
# 初始化音频队列和输出流
|
# 初始化音频队列和输出流
|
||||||
print(f"输出音频配置: {config.output_audio_config}")
|
print(f"输出音频配置: {config.output_audio_config}")
|
||||||
self.output_stream = self.audio_device.open_output_stream()
|
output_stream = self.audio_device.open_output_stream()
|
||||||
print("音频输出流已打开")
|
if output_stream:
|
||||||
|
print("音频输出流已打开")
|
||||||
|
self.output_stream = output_stream
|
||||||
|
else:
|
||||||
|
print("警告:音频输出流打开失败,将使用直接播放模式")
|
||||||
# 启动播放线程
|
# 启动播放线程
|
||||||
self.is_recording = True
|
self.is_recording = True
|
||||||
self.is_playing = True
|
self.is_playing = True
|
||||||
@ -155,11 +186,15 @@ class DialogSession:
|
|||||||
if was_not_playing:
|
if was_not_playing:
|
||||||
print("播放开始前,额外发送静音数据清理管道")
|
print("播放开始前,额外发送静音数据清理管道")
|
||||||
for _ in range(3):
|
for _ in range(3):
|
||||||
self.output_stream.write(b'\x00' * len(audio_data))
|
# 播放静音数据
|
||||||
|
self.audio_device.play_audio(b'\x00' * len(audio_data))
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
|
|
||||||
# 播放音频数据
|
# 播放音频数据
|
||||||
self.output_stream.write(audio_data)
|
try:
|
||||||
|
self.audio_device.play_audio(audio_data)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"音频播放错误: {e}")
|
||||||
|
|
||||||
except queue.Empty:
|
except queue.Empty:
|
||||||
# 队列为空,检查是否超时
|
# 队列为空,检查是否超时
|
||||||
|
|||||||
BIN
doubao/input.pcm
BIN
doubao/input.pcm
Binary file not shown.
Binary file not shown.
143
doubao/test_sounddevice.py
Normal file
143
doubao/test_sounddevice.py
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
测试sounddevice音频播放功能
|
||||||
|
用于验证新的音频实现是否正常工作
|
||||||
|
"""
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import sounddevice as sd
|
||||||
|
import time
|
||||||
|
|
||||||
|
def test_sounddevice():
|
||||||
|
"""测试sounddevice音频播放"""
|
||||||
|
print("=== SoundDevice音频播放测试 ===")
|
||||||
|
|
||||||
|
# 1. 检查音频设备
|
||||||
|
print("\n1. 检查音频设备...")
|
||||||
|
try:
|
||||||
|
devices = sd.query_devices()
|
||||||
|
print(f"找到 {len(devices)} 个音频设备:")
|
||||||
|
for i, dev in enumerate(devices):
|
||||||
|
print(f" [{i}] {dev['name']} (输入: {dev['max_input_channels']}, 输出: {dev['max_output_channels']})")
|
||||||
|
|
||||||
|
# 查找默认输出设备
|
||||||
|
default_output = sd.default.device
|
||||||
|
print(f"默认输出设备: {default_output}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"音频设备检查失败: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 2. 测试生成和播放音频
|
||||||
|
print("\n2. 测试生成和播放音频...")
|
||||||
|
try:
|
||||||
|
# 生成1秒的440Hz正弦波
|
||||||
|
sample_rate = 24000
|
||||||
|
duration = 1.0
|
||||||
|
frequency = 440
|
||||||
|
|
||||||
|
t = np.linspace(0, duration, int(sample_rate * duration), False)
|
||||||
|
audio_data = np.sin(2 * np.pi * frequency * t) * 0.3 # 30%音量
|
||||||
|
|
||||||
|
# 转换为16-bit整数
|
||||||
|
audio_data_int16 = (audio_data * 32767).astype(np.int16)
|
||||||
|
|
||||||
|
print(f"生成音频数据: 采样率={sample_rate}Hz, 时长={duration}秒, 频率={frequency}Hz")
|
||||||
|
print(f"音频数据形状: {audio_data_int16.shape}, 数据类型: {audio_data_int16.dtype}")
|
||||||
|
|
||||||
|
# 播放音频
|
||||||
|
print("开始播放测试音频...")
|
||||||
|
sd.play(audio_data_int16, sample_rate)
|
||||||
|
sd.wait() # 等待播放完成
|
||||||
|
print("✓ 音频播放成功")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"音频播放失败: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 3. 测试直接播放字节数据
|
||||||
|
print("\n3. 测试直接播放字节数据...")
|
||||||
|
try:
|
||||||
|
# 将numpy数组转换为字节数据
|
||||||
|
byte_data = audio_data_int16.tobytes()
|
||||||
|
print(f"字节数据长度: {len(byte_data)} 字节")
|
||||||
|
|
||||||
|
# 将字节数据转换回numpy数组
|
||||||
|
audio_array = np.frombuffer(byte_data, dtype=np.int16)
|
||||||
|
|
||||||
|
# 播放
|
||||||
|
print("开始播放字节数据...")
|
||||||
|
sd.play(audio_array, sample_rate)
|
||||||
|
sd.wait()
|
||||||
|
print("✓ 字节数据播放成功")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"字节数据播放失败: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 4. 测试立体声
|
||||||
|
print("\n4. 测试立体声播放...")
|
||||||
|
try:
|
||||||
|
# 创建立体声数据
|
||||||
|
stereo_data = np.column_stack([audio_data_int16, audio_data_int16])
|
||||||
|
print(f"立体声数据形状: {stereo_data.shape}")
|
||||||
|
|
||||||
|
print("开始播放立体声音频...")
|
||||||
|
sd.play(stereo_data, sample_rate)
|
||||||
|
sd.wait()
|
||||||
|
print("✓ 立体声播放成功")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"立体声播放失败: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def test_numpy_conversion():
|
||||||
|
"""测试numpy数组转换"""
|
||||||
|
print("\n5. 测试数据类型转换...")
|
||||||
|
|
||||||
|
# 模拟火山引擎返回的16bit PCM数据
|
||||||
|
test_data = b'\x00\x00\x7f\x7f\x80\x00\xff\xff' # 一些测试音频数据
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 字节数据转numpy数组
|
||||||
|
audio_array = np.frombuffer(test_data, dtype=np.int16)
|
||||||
|
print(f"原始字节数据: {test_data}")
|
||||||
|
print(f"转换后numpy数组: {audio_array}")
|
||||||
|
print(f"数组形状: {audio_array.shape}, 数据类型: {audio_array.dtype}")
|
||||||
|
|
||||||
|
# 重塑为单声道
|
||||||
|
audio_reshaped = audio_array.reshape(-1, 1)
|
||||||
|
print(f"重塑后形状: {audio_reshaped.shape}")
|
||||||
|
|
||||||
|
# 转回字节数据
|
||||||
|
byte_data = audio_array.tobytes()
|
||||||
|
print(f"转回字节数据: {byte_data}")
|
||||||
|
|
||||||
|
print("✓ 数据类型转换测试成功")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"数据类型转换失败: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print("SoundDevice音频播放功能测试")
|
||||||
|
print("=" * 50)
|
||||||
|
|
||||||
|
success = True
|
||||||
|
|
||||||
|
# 测试sounddevice
|
||||||
|
if not test_sounddevice():
|
||||||
|
success = False
|
||||||
|
|
||||||
|
# 测试数据转换
|
||||||
|
if not test_numpy_conversion():
|
||||||
|
success = False
|
||||||
|
|
||||||
|
print("\n" + "=" * 50)
|
||||||
|
if success:
|
||||||
|
print("✓ 所有SoundDevice测试通过")
|
||||||
|
print("树莓派应该可以正常播放音频了!")
|
||||||
|
else:
|
||||||
|
print("✗ 部分测试失败,需要进一步调试")
|
||||||
Loading…
Reference in New Issue
Block a user