audio
This commit is contained in:
parent
1bb6a32dc2
commit
93a0b0a446
Binary file not shown.
@ -17,6 +17,20 @@ import wave
|
|||||||
class AudioConverter:
|
class AudioConverter:
|
||||||
"""音频格式转换工具类"""
|
"""音频格式转换工具类"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
# 预分配转换缓冲区
|
||||||
|
self._conversion_cache = {}
|
||||||
|
self._numpy_available = self._check_numpy()
|
||||||
|
self._conversion_buffer = bytearray(65536) # 64KB预分配缓冲区
|
||||||
|
|
||||||
|
def _check_numpy(self) -> bool:
|
||||||
|
"""检查numpy是否可用"""
|
||||||
|
try:
|
||||||
|
import numpy as np
|
||||||
|
return True
|
||||||
|
except ImportError:
|
||||||
|
return False
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_platform_info() -> dict:
|
def get_platform_info() -> dict:
|
||||||
"""获取平台信息"""
|
"""获取平台信息"""
|
||||||
@ -26,28 +40,44 @@ class AudioConverter:
|
|||||||
'platform': platform.platform()
|
'platform': platform.platform()
|
||||||
}
|
}
|
||||||
|
|
||||||
@staticmethod
|
def float32_to_int16_fast(self, float32_data: bytes) -> bytes:
|
||||||
def float32_to_int16(float32_data: bytes) -> bytes:
|
"""高性能Float32到Int16转换"""
|
||||||
"""将Float32格式转换为Int16格式"""
|
|
||||||
if len(float32_data) % 4 != 0:
|
if len(float32_data) % 4 != 0:
|
||||||
# 处理数据长度不是4的倍数的情况
|
|
||||||
float32_data = float32_data[:len(float32_data) - len(float32_data) % 4]
|
float32_data = float32_data[:len(float32_data) - len(float32_data) % 4]
|
||||||
|
|
||||||
# 解包Float32数据
|
# 使用numpy进行快速转换(如果可用)
|
||||||
float_values = struct.unpack(f'{len(float32_data) // 4}f', float32_data)
|
if self._numpy_available:
|
||||||
|
try:
|
||||||
|
import numpy as np
|
||||||
|
# 直接转换为numpy数组,避免多次解包
|
||||||
|
float_array = np.frombuffer(float32_data, dtype=np.float32)
|
||||||
|
# 限制范围并转换
|
||||||
|
float_array = np.clip(float_array, -1.0, 1.0)
|
||||||
|
int16_array = (float_array * 32767).astype(np.int16)
|
||||||
|
return int16_array.tobytes()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"numpy转换失败,使用备用方法: {e}")
|
||||||
|
|
||||||
# 转换为Int16范围
|
# 高效的纯Python实现
|
||||||
int16_values = []
|
num_samples = len(float32_data) // 4
|
||||||
for val in float_values:
|
if num_samples == 0:
|
||||||
# 限制在[-1.0, 1.0]范围内
|
return b''
|
||||||
val = max(-1.0, min(1.0, val))
|
|
||||||
# 转换为Int16
|
|
||||||
int16_val = int(val * 32767)
|
|
||||||
int16_values.append(int16_val)
|
|
||||||
|
|
||||||
# 打包为Int16字节数据
|
# 使用内存视图和struct模块进行高效转换
|
||||||
|
float_values = struct.unpack(f'{num_samples}f', float32_data)
|
||||||
|
|
||||||
|
# 使用列表推导式和生成器表达式提高性能
|
||||||
|
int16_values = [max(-32768, min(32767, int(val * 32767))) for val in float_values]
|
||||||
|
|
||||||
|
# 批量打包
|
||||||
return struct.pack(f'{len(int16_values)}h', *int16_values)
|
return struct.pack(f'{len(int16_values)}h', *int16_values)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def float32_to_int16(float32_data: bytes) -> bytes:
|
||||||
|
"""将Float32格式转换为Int16格式(保持向后兼容)"""
|
||||||
|
converter = AudioConverter()
|
||||||
|
return converter.float32_to_int16_fast(float32_data)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def int16_to_float32(int16_data: bytes) -> bytes:
|
def int16_to_float32(int16_data: bytes) -> bytes:
|
||||||
"""将Int16格式转换为Float32格式"""
|
"""将Int16格式转换为Float32格式"""
|
||||||
|
|||||||
@ -66,12 +66,16 @@ class AudioDeviceManager:
|
|||||||
# 检查是否需要格式转换
|
# 检查是否需要格式转换
|
||||||
needs_conversion = False
|
needs_conversion = False
|
||||||
|
|
||||||
# 如果当前格式不被支持,且启用降级
|
# 关键检查:如果当前格式不被支持,必须转换
|
||||||
if (self.output_config.bit_size == pyaudio.paFloat32 and
|
if (self.output_config.bit_size == pyaudio.paFloat32 and
|
||||||
'Float32' not in self.device_info['supported_formats'] and
|
'Float32' not in self.device_info['supported_formats']):
|
||||||
fallback_int16):
|
|
||||||
needs_conversion = True
|
|
||||||
|
|
||||||
|
if fallback_int16:
|
||||||
|
needs_conversion = True
|
||||||
|
print(f"检测到平台不支持Float32格式,将进行实时转换")
|
||||||
|
else:
|
||||||
|
print(f"警告: 平台不支持Float32格式,但转换已禁用")
|
||||||
|
|
||||||
# 如果优先使用原生格式,且平台推荐格式不是当前格式
|
# 如果优先使用原生格式,且平台推荐格式不是当前格式
|
||||||
elif (prefer_native and
|
elif (prefer_native and
|
||||||
self.device_info['recommended_format'] == 'Int16' and
|
self.device_info['recommended_format'] == 'Int16' and
|
||||||
@ -80,17 +84,14 @@ class AudioDeviceManager:
|
|||||||
needs_conversion = True
|
needs_conversion = True
|
||||||
|
|
||||||
if needs_conversion:
|
if needs_conversion:
|
||||||
print(f"警告: 音频格式转换 {self.output_config.bit_size} -> Int16")
|
print(f"将使用实时格式转换: Float32 -> Int16")
|
||||||
self.output_config.format = 'pcm'
|
print(f" 保持原始采样率: {original_sample_rate}Hz")
|
||||||
self.output_config.bit_size = pyaudio.paInt16
|
print(f" 转换将在播放时进行")
|
||||||
self.output_config.sample_rate = self.platform_config['recommended_sample_rate']
|
# 注意:这里不修改配置,而是在播放时进行转换
|
||||||
|
self.needs_conversion = True
|
||||||
print(f"输出配置调整:")
|
|
||||||
print(f" 格式: {original_format} -> {self.output_config.format}")
|
|
||||||
print(f" 比特深度: {32 if original_bit_size == pyaudio.paFloat32 else 16} -> 16")
|
|
||||||
print(f" 采样率: {original_sample_rate} -> {self.output_config.sample_rate}Hz")
|
|
||||||
else:
|
else:
|
||||||
print(f"使用原始输出配置: 格式={original_format}, 比特深度={original_bit_size}, 采样率={original_sample_rate}Hz")
|
print(f"使用原始输出配置: 格式={original_format}, 比特深度={original_bit_size}, 采样率={original_sample_rate}Hz")
|
||||||
|
self.needs_conversion = False
|
||||||
|
|
||||||
def open_input_stream(self) -> pyaudio.Stream:
|
def open_input_stream(self) -> pyaudio.Stream:
|
||||||
"""打开音频输入流"""
|
"""打开音频输入流"""
|
||||||
@ -144,9 +145,9 @@ class AudioDeviceManager:
|
|||||||
raise Exception("无法打开任何音频输出设备")
|
raise Exception("无法打开任何音频输出设备")
|
||||||
|
|
||||||
def write_audio_data(self, audio_data: bytes) -> bool:
|
def write_audio_data(self, audio_data: bytes) -> bool:
|
||||||
"""写入音频数据,性能优化版本"""
|
"""写入音频数据,高性能版本"""
|
||||||
# 预缓冲:对于较小的音频数据,等待积累到一定大小再播放
|
# 预缓冲:对于较小的音频数据,等待积累到一定大小再播放
|
||||||
min_buffer_size = 1024 # 最小缓冲大小
|
min_buffer_size = 2048 # 最小缓冲大小
|
||||||
|
|
||||||
if not hasattr(self, '_audio_buffer'):
|
if not hasattr(self, '_audio_buffer'):
|
||||||
self._audio_buffer = b''
|
self._audio_buffer = b''
|
||||||
@ -162,10 +163,18 @@ class AudioDeviceManager:
|
|||||||
# 继续等待更多数据
|
# 继续等待更多数据
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# 直接写入pyaudio流,不做实时转换
|
# 根据设备能力决定是否需要格式转换
|
||||||
|
final_data = buffer_to_play
|
||||||
|
|
||||||
|
# 如果需要格式转换,进行高效转换
|
||||||
|
if hasattr(self, 'needs_conversion') and self.needs_conversion:
|
||||||
|
# 使用快速转换方法
|
||||||
|
final_data = self.converter.float32_to_int16_fast(buffer_to_play)
|
||||||
|
|
||||||
|
# 尝试写入pyaudio流
|
||||||
try:
|
try:
|
||||||
if self.output_stream:
|
if self.output_stream:
|
||||||
self.output_stream.write(buffer_to_play)
|
self.output_stream.write(final_data)
|
||||||
return True
|
return True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"pyaudio写入失败: {e}")
|
print(f"pyaudio写入失败: {e}")
|
||||||
@ -175,8 +184,8 @@ class AudioDeviceManager:
|
|||||||
if enable_aplay and self.platform_config['fallback_to_aplay']:
|
if enable_aplay and self.platform_config['fallback_to_aplay']:
|
||||||
print("尝试使用aplay播放...")
|
print("尝试使用aplay播放...")
|
||||||
return self.player.play_audio(
|
return self.player.play_audio(
|
||||||
audio_data=buffer_to_play,
|
audio_data=final_data,
|
||||||
format_type='Int16', # 现在统一使用Int16
|
format_type='Int16', # 转换后的数据
|
||||||
sample_rate=self.output_config.sample_rate,
|
sample_rate=self.output_config.sample_rate,
|
||||||
channels=self.output_config.channels
|
channels=self.output_config.channels
|
||||||
)
|
)
|
||||||
|
|||||||
@ -30,7 +30,7 @@ start_session_req = {
|
|||||||
"speaker": "zh_female_vv_jupiter_bigtts",
|
"speaker": "zh_female_vv_jupiter_bigtts",
|
||||||
# "speaker": "S_XXXXXX", // 指定自定义的复刻音色,需要填下character_manifest
|
# "speaker": "S_XXXXXX", // 指定自定义的复刻音色,需要填下character_manifest
|
||||||
# "speaker": "ICL_zh_female_aojiaonvyou_tob" // 指定官方复刻音色,不需要填character_manifest
|
# "speaker": "ICL_zh_female_aojiaonvyou_tob" // 指定官方复刻音色,不需要填character_manifest
|
||||||
"audio_config": {"channel": 1, "format": "pcm", "sample_rate": 16000},
|
"audio_config": {"channel": 1, "format": "pcm", "sample_rate": 24000},
|
||||||
},
|
},
|
||||||
"dialog": {
|
"dialog": {
|
||||||
"bot_name": "豆包",
|
"bot_name": "豆包",
|
||||||
@ -61,6 +61,6 @@ output_audio_config = {
|
|||||||
"chunk": 4096, # 增加缓冲区大小
|
"chunk": 4096, # 增加缓冲区大小
|
||||||
"format": "pcm",
|
"format": "pcm",
|
||||||
"channels": 1,
|
"channels": 1,
|
||||||
"sample_rate": 16000,
|
"sample_rate": 24000,
|
||||||
"bit_size": pyaudio.paInt16,
|
"bit_size": pyaudio.paFloat32, # 服务器返回的是Float32格式
|
||||||
}
|
}
|
||||||
|
|||||||
118
doubao/test_audio_conversion.py
Normal file
118
doubao/test_audio_conversion.py
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
音频转换测试脚本
|
||||||
|
用于测试高性能音频格式转换
|
||||||
|
"""
|
||||||
|
|
||||||
|
import time
|
||||||
|
import struct
|
||||||
|
from audio_converter import AudioConverter
|
||||||
|
|
||||||
|
def test_conversion_performance():
|
||||||
|
"""测试转换性能"""
|
||||||
|
print("=== 音频转换性能测试 ===")
|
||||||
|
|
||||||
|
converter = AudioConverter()
|
||||||
|
|
||||||
|
# 生成测试数据(1秒的24kHz Float32音频)
|
||||||
|
sample_rate = 24000
|
||||||
|
duration = 1.0 # 1秒
|
||||||
|
num_samples = int(sample_rate * duration)
|
||||||
|
|
||||||
|
# 生成正弦波测试数据
|
||||||
|
test_data = bytearray()
|
||||||
|
for i in range(num_samples):
|
||||||
|
# 生成440Hz正弦波
|
||||||
|
value = 0.5 * (i / sample_rate * 440 * 2 * 3.14159)
|
||||||
|
sample = (value).astype('float32') if hasattr(value, 'astype') else float(value)
|
||||||
|
test_data.extend(struct.pack('f', sample))
|
||||||
|
|
||||||
|
test_data = bytes(test_data)
|
||||||
|
print(f"生成了 {len(test_data)} 字节的测试数据")
|
||||||
|
|
||||||
|
# 测试转换性能
|
||||||
|
start_time = time.time()
|
||||||
|
converted_data = converter.float32_to_int16_fast(test_data)
|
||||||
|
end_time = time.time()
|
||||||
|
|
||||||
|
conversion_time = end_time - start_time
|
||||||
|
data_ratio = len(converted_data) / len(test_data)
|
||||||
|
|
||||||
|
print(f"转换结果:")
|
||||||
|
print(f" 原始数据: {len(test_data)} 字节")
|
||||||
|
print(f" 转换后: {len(converted_data)} 字节")
|
||||||
|
print(f" 数据比例: {data_ratio:.2f}")
|
||||||
|
print(f" 转换时间: {conversion_time:.4f} 秒")
|
||||||
|
print(f" 转换速度: {len(test_data) / conversion_time / 1024:.1f} KB/s")
|
||||||
|
|
||||||
|
# 验证转换质量
|
||||||
|
print("\n=== 转换质量验证 ===")
|
||||||
|
|
||||||
|
# 检查一些样本值
|
||||||
|
original_samples = struct.unpack('10f', test_data[:40])
|
||||||
|
converted_samples = struct.unpack('10h', converted_data[:20])
|
||||||
|
|
||||||
|
print("前10个样本的转换结果:")
|
||||||
|
for i, (orig, conv) in enumerate(zip(original_samples, converted_samples)):
|
||||||
|
expected = int(orig * 32767)
|
||||||
|
print(f" 样本{i}: {orig:.6f} -> {conv} (期望: {expected})")
|
||||||
|
|
||||||
|
# 检查是否有明显错误
|
||||||
|
errors = 0
|
||||||
|
for orig, conv in zip(original_samples, converted_samples):
|
||||||
|
expected = int(orig * 32767)
|
||||||
|
if abs(conv - expected) > 1: # 允许1的误差
|
||||||
|
errors += 1
|
||||||
|
|
||||||
|
if errors == 0:
|
||||||
|
print("✓ 转换质量验证通过")
|
||||||
|
else:
|
||||||
|
print(f"✗ 转换质量验证失败,{errors}个样本有误差")
|
||||||
|
|
||||||
|
def test_numpy_vs_python():
|
||||||
|
"""测试numpy和纯Python实现的性能差异"""
|
||||||
|
print("\n=== NumPy vs Python 性能对比 ===")
|
||||||
|
|
||||||
|
converter = AudioConverter()
|
||||||
|
|
||||||
|
# 生成较大的测试数据
|
||||||
|
sample_rate = 24000
|
||||||
|
duration = 2.0 # 2秒
|
||||||
|
num_samples = int(sample_rate * duration)
|
||||||
|
|
||||||
|
# 生成测试数据
|
||||||
|
import random
|
||||||
|
test_data = bytearray()
|
||||||
|
for _ in range(num_samples):
|
||||||
|
sample = random.uniform(-1.0, 1.0)
|
||||||
|
test_data.extend(struct.pack('f', sample))
|
||||||
|
|
||||||
|
test_data = bytes(test_data)
|
||||||
|
|
||||||
|
# 测试NumPy版本
|
||||||
|
if converter._numpy_available:
|
||||||
|
print("测试NumPy版本...")
|
||||||
|
start_time = time.time()
|
||||||
|
for _ in range(10): # 重复10次
|
||||||
|
converter.float32_to_int16_fast(test_data)
|
||||||
|
numpy_time = time.time() - start_time
|
||||||
|
print(f"NumPy版本: {numpy_time:.4f} 秒 (10次)")
|
||||||
|
else:
|
||||||
|
print("NumPy不可用")
|
||||||
|
numpy_time = None
|
||||||
|
|
||||||
|
# 测试纯Python版本
|
||||||
|
print("测试纯Python版本...")
|
||||||
|
start_time = time.time()
|
||||||
|
for _ in range(10): # 重复10次
|
||||||
|
AudioConverter.float32_to_int16(test_data)
|
||||||
|
python_time = time.time() - start_time
|
||||||
|
print(f"纯Python版本: {python_time:.4f} 秒 (10次)")
|
||||||
|
|
||||||
|
if numpy_time:
|
||||||
|
speedup = python_time / numpy_time
|
||||||
|
print(f"NumPy加速比: {speedup:.2f}x")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_conversion_performance()
|
||||||
|
test_numpy_vs_python()
|
||||||
Loading…
Reference in New Issue
Block a user