audio
This commit is contained in:
parent
1bb6a32dc2
commit
93a0b0a446
Binary file not shown.
@ -17,6 +17,20 @@ import wave
|
||||
class AudioConverter:
|
||||
"""音频格式转换工具类"""
|
||||
|
||||
def __init__(self):
|
||||
# 预分配转换缓冲区
|
||||
self._conversion_cache = {}
|
||||
self._numpy_available = self._check_numpy()
|
||||
self._conversion_buffer = bytearray(65536) # 64KB预分配缓冲区
|
||||
|
||||
def _check_numpy(self) -> bool:
|
||||
"""检查numpy是否可用"""
|
||||
try:
|
||||
import numpy as np
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def get_platform_info() -> dict:
|
||||
"""获取平台信息"""
|
||||
@ -26,28 +40,44 @@ class AudioConverter:
|
||||
'platform': platform.platform()
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def float32_to_int16(float32_data: bytes) -> bytes:
|
||||
"""将Float32格式转换为Int16格式"""
|
||||
def float32_to_int16_fast(self, float32_data: bytes) -> bytes:
|
||||
"""高性能Float32到Int16转换"""
|
||||
if len(float32_data) % 4 != 0:
|
||||
# 处理数据长度不是4的倍数的情况
|
||||
float32_data = float32_data[:len(float32_data) - len(float32_data) % 4]
|
||||
|
||||
# 解包Float32数据
|
||||
float_values = struct.unpack(f'{len(float32_data) // 4}f', float32_data)
|
||||
# 使用numpy进行快速转换(如果可用)
|
||||
if self._numpy_available:
|
||||
try:
|
||||
import numpy as np
|
||||
# 直接转换为numpy数组,避免多次解包
|
||||
float_array = np.frombuffer(float32_data, dtype=np.float32)
|
||||
# 限制范围并转换
|
||||
float_array = np.clip(float_array, -1.0, 1.0)
|
||||
int16_array = (float_array * 32767).astype(np.int16)
|
||||
return int16_array.tobytes()
|
||||
except Exception as e:
|
||||
print(f"numpy转换失败,使用备用方法: {e}")
|
||||
|
||||
# 转换为Int16范围
|
||||
int16_values = []
|
||||
for val in float_values:
|
||||
# 限制在[-1.0, 1.0]范围内
|
||||
val = max(-1.0, min(1.0, val))
|
||||
# 转换为Int16
|
||||
int16_val = int(val * 32767)
|
||||
int16_values.append(int16_val)
|
||||
# 高效的纯Python实现
|
||||
num_samples = len(float32_data) // 4
|
||||
if num_samples == 0:
|
||||
return b''
|
||||
|
||||
# 打包为Int16字节数据
|
||||
# 使用内存视图和struct模块进行高效转换
|
||||
float_values = struct.unpack(f'{num_samples}f', float32_data)
|
||||
|
||||
# 使用列表推导式和生成器表达式提高性能
|
||||
int16_values = [max(-32768, min(32767, int(val * 32767))) for val in float_values]
|
||||
|
||||
# 批量打包
|
||||
return struct.pack(f'{len(int16_values)}h', *int16_values)
|
||||
|
||||
@staticmethod
|
||||
def float32_to_int16(float32_data: bytes) -> bytes:
|
||||
"""将Float32格式转换为Int16格式(保持向后兼容)"""
|
||||
converter = AudioConverter()
|
||||
return converter.float32_to_int16_fast(float32_data)
|
||||
|
||||
@staticmethod
|
||||
def int16_to_float32(int16_data: bytes) -> bytes:
|
||||
"""将Int16格式转换为Float32格式"""
|
||||
|
||||
@ -66,12 +66,16 @@ class AudioDeviceManager:
|
||||
# 检查是否需要格式转换
|
||||
needs_conversion = False
|
||||
|
||||
# 如果当前格式不被支持,且启用降级
|
||||
# 关键检查:如果当前格式不被支持,必须转换
|
||||
if (self.output_config.bit_size == pyaudio.paFloat32 and
|
||||
'Float32' not in self.device_info['supported_formats'] and
|
||||
fallback_int16):
|
||||
needs_conversion = True
|
||||
'Float32' not in self.device_info['supported_formats']):
|
||||
|
||||
if fallback_int16:
|
||||
needs_conversion = True
|
||||
print(f"检测到平台不支持Float32格式,将进行实时转换")
|
||||
else:
|
||||
print(f"警告: 平台不支持Float32格式,但转换已禁用")
|
||||
|
||||
# 如果优先使用原生格式,且平台推荐格式不是当前格式
|
||||
elif (prefer_native and
|
||||
self.device_info['recommended_format'] == 'Int16' and
|
||||
@ -80,17 +84,14 @@ class AudioDeviceManager:
|
||||
needs_conversion = True
|
||||
|
||||
if needs_conversion:
|
||||
print(f"警告: 音频格式转换 {self.output_config.bit_size} -> Int16")
|
||||
self.output_config.format = 'pcm'
|
||||
self.output_config.bit_size = pyaudio.paInt16
|
||||
self.output_config.sample_rate = self.platform_config['recommended_sample_rate']
|
||||
|
||||
print(f"输出配置调整:")
|
||||
print(f" 格式: {original_format} -> {self.output_config.format}")
|
||||
print(f" 比特深度: {32 if original_bit_size == pyaudio.paFloat32 else 16} -> 16")
|
||||
print(f" 采样率: {original_sample_rate} -> {self.output_config.sample_rate}Hz")
|
||||
print(f"将使用实时格式转换: Float32 -> Int16")
|
||||
print(f" 保持原始采样率: {original_sample_rate}Hz")
|
||||
print(f" 转换将在播放时进行")
|
||||
# 注意:这里不修改配置,而是在播放时进行转换
|
||||
self.needs_conversion = True
|
||||
else:
|
||||
print(f"使用原始输出配置: 格式={original_format}, 比特深度={original_bit_size}, 采样率={original_sample_rate}Hz")
|
||||
self.needs_conversion = False
|
||||
|
||||
def open_input_stream(self) -> pyaudio.Stream:
|
||||
"""打开音频输入流"""
|
||||
@ -144,9 +145,9 @@ class AudioDeviceManager:
|
||||
raise Exception("无法打开任何音频输出设备")
|
||||
|
||||
def write_audio_data(self, audio_data: bytes) -> bool:
|
||||
"""写入音频数据,性能优化版本"""
|
||||
"""写入音频数据,高性能版本"""
|
||||
# 预缓冲:对于较小的音频数据,等待积累到一定大小再播放
|
||||
min_buffer_size = 1024 # 最小缓冲大小
|
||||
min_buffer_size = 2048 # 最小缓冲大小
|
||||
|
||||
if not hasattr(self, '_audio_buffer'):
|
||||
self._audio_buffer = b''
|
||||
@ -162,10 +163,18 @@ class AudioDeviceManager:
|
||||
# 继续等待更多数据
|
||||
return True
|
||||
|
||||
# 直接写入pyaudio流,不做实时转换
|
||||
# 根据设备能力决定是否需要格式转换
|
||||
final_data = buffer_to_play
|
||||
|
||||
# 如果需要格式转换,进行高效转换
|
||||
if hasattr(self, 'needs_conversion') and self.needs_conversion:
|
||||
# 使用快速转换方法
|
||||
final_data = self.converter.float32_to_int16_fast(buffer_to_play)
|
||||
|
||||
# 尝试写入pyaudio流
|
||||
try:
|
||||
if self.output_stream:
|
||||
self.output_stream.write(buffer_to_play)
|
||||
self.output_stream.write(final_data)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"pyaudio写入失败: {e}")
|
||||
@ -175,8 +184,8 @@ class AudioDeviceManager:
|
||||
if enable_aplay and self.platform_config['fallback_to_aplay']:
|
||||
print("尝试使用aplay播放...")
|
||||
return self.player.play_audio(
|
||||
audio_data=buffer_to_play,
|
||||
format_type='Int16', # 现在统一使用Int16
|
||||
audio_data=final_data,
|
||||
format_type='Int16', # 转换后的数据
|
||||
sample_rate=self.output_config.sample_rate,
|
||||
channels=self.output_config.channels
|
||||
)
|
||||
|
||||
@ -30,7 +30,7 @@ start_session_req = {
|
||||
"speaker": "zh_female_vv_jupiter_bigtts",
|
||||
# "speaker": "S_XXXXXX", // 指定自定义的复刻音色,需要填下character_manifest
|
||||
# "speaker": "ICL_zh_female_aojiaonvyou_tob" // 指定官方复刻音色,不需要填character_manifest
|
||||
"audio_config": {"channel": 1, "format": "pcm", "sample_rate": 16000},
|
||||
"audio_config": {"channel": 1, "format": "pcm", "sample_rate": 24000},
|
||||
},
|
||||
"dialog": {
|
||||
"bot_name": "豆包",
|
||||
@ -61,6 +61,6 @@ output_audio_config = {
|
||||
"chunk": 4096, # 增加缓冲区大小
|
||||
"format": "pcm",
|
||||
"channels": 1,
|
||||
"sample_rate": 16000,
|
||||
"bit_size": pyaudio.paInt16,
|
||||
"sample_rate": 24000,
|
||||
"bit_size": pyaudio.paFloat32, # 服务器返回的是Float32格式
|
||||
}
|
||||
|
||||
118
doubao/test_audio_conversion.py
Normal file
118
doubao/test_audio_conversion.py
Normal file
@ -0,0 +1,118 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
音频转换测试脚本
|
||||
用于测试高性能音频格式转换
|
||||
"""
|
||||
|
||||
import time
|
||||
import struct
|
||||
from audio_converter import AudioConverter
|
||||
|
||||
def test_conversion_performance():
|
||||
"""测试转换性能"""
|
||||
print("=== 音频转换性能测试 ===")
|
||||
|
||||
converter = AudioConverter()
|
||||
|
||||
# 生成测试数据(1秒的24kHz Float32音频)
|
||||
sample_rate = 24000
|
||||
duration = 1.0 # 1秒
|
||||
num_samples = int(sample_rate * duration)
|
||||
|
||||
# 生成正弦波测试数据
|
||||
test_data = bytearray()
|
||||
for i in range(num_samples):
|
||||
# 生成440Hz正弦波
|
||||
value = 0.5 * (i / sample_rate * 440 * 2 * 3.14159)
|
||||
sample = (value).astype('float32') if hasattr(value, 'astype') else float(value)
|
||||
test_data.extend(struct.pack('f', sample))
|
||||
|
||||
test_data = bytes(test_data)
|
||||
print(f"生成了 {len(test_data)} 字节的测试数据")
|
||||
|
||||
# 测试转换性能
|
||||
start_time = time.time()
|
||||
converted_data = converter.float32_to_int16_fast(test_data)
|
||||
end_time = time.time()
|
||||
|
||||
conversion_time = end_time - start_time
|
||||
data_ratio = len(converted_data) / len(test_data)
|
||||
|
||||
print(f"转换结果:")
|
||||
print(f" 原始数据: {len(test_data)} 字节")
|
||||
print(f" 转换后: {len(converted_data)} 字节")
|
||||
print(f" 数据比例: {data_ratio:.2f}")
|
||||
print(f" 转换时间: {conversion_time:.4f} 秒")
|
||||
print(f" 转换速度: {len(test_data) / conversion_time / 1024:.1f} KB/s")
|
||||
|
||||
# 验证转换质量
|
||||
print("\n=== 转换质量验证 ===")
|
||||
|
||||
# 检查一些样本值
|
||||
original_samples = struct.unpack('10f', test_data[:40])
|
||||
converted_samples = struct.unpack('10h', converted_data[:20])
|
||||
|
||||
print("前10个样本的转换结果:")
|
||||
for i, (orig, conv) in enumerate(zip(original_samples, converted_samples)):
|
||||
expected = int(orig * 32767)
|
||||
print(f" 样本{i}: {orig:.6f} -> {conv} (期望: {expected})")
|
||||
|
||||
# 检查是否有明显错误
|
||||
errors = 0
|
||||
for orig, conv in zip(original_samples, converted_samples):
|
||||
expected = int(orig * 32767)
|
||||
if abs(conv - expected) > 1: # 允许1的误差
|
||||
errors += 1
|
||||
|
||||
if errors == 0:
|
||||
print("✓ 转换质量验证通过")
|
||||
else:
|
||||
print(f"✗ 转换质量验证失败,{errors}个样本有误差")
|
||||
|
||||
def test_numpy_vs_python():
|
||||
"""测试numpy和纯Python实现的性能差异"""
|
||||
print("\n=== NumPy vs Python 性能对比 ===")
|
||||
|
||||
converter = AudioConverter()
|
||||
|
||||
# 生成较大的测试数据
|
||||
sample_rate = 24000
|
||||
duration = 2.0 # 2秒
|
||||
num_samples = int(sample_rate * duration)
|
||||
|
||||
# 生成测试数据
|
||||
import random
|
||||
test_data = bytearray()
|
||||
for _ in range(num_samples):
|
||||
sample = random.uniform(-1.0, 1.0)
|
||||
test_data.extend(struct.pack('f', sample))
|
||||
|
||||
test_data = bytes(test_data)
|
||||
|
||||
# 测试NumPy版本
|
||||
if converter._numpy_available:
|
||||
print("测试NumPy版本...")
|
||||
start_time = time.time()
|
||||
for _ in range(10): # 重复10次
|
||||
converter.float32_to_int16_fast(test_data)
|
||||
numpy_time = time.time() - start_time
|
||||
print(f"NumPy版本: {numpy_time:.4f} 秒 (10次)")
|
||||
else:
|
||||
print("NumPy不可用")
|
||||
numpy_time = None
|
||||
|
||||
# 测试纯Python版本
|
||||
print("测试纯Python版本...")
|
||||
start_time = time.time()
|
||||
for _ in range(10): # 重复10次
|
||||
AudioConverter.float32_to_int16(test_data)
|
||||
python_time = time.time() - start_time
|
||||
print(f"纯Python版本: {python_time:.4f} 秒 (10次)")
|
||||
|
||||
if numpy_time:
|
||||
speedup = python_time / numpy_time
|
||||
print(f"NumPy加速比: {speedup:.2f}x")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_conversion_performance()
|
||||
test_numpy_vs_python()
|
||||
Loading…
Reference in New Issue
Block a user