527 lines
18 KiB
Python
527 lines
18 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
|
||
"""
|
||
多进程音频处理模块
|
||
定义输入进程和输出进程的类
|
||
"""
|
||
|
||
import multiprocessing as mp
|
||
import queue
|
||
import time
|
||
import threading
|
||
import numpy as np
|
||
import pyaudio
|
||
from enum import Enum
|
||
from dataclasses import dataclass
|
||
from typing import Optional, List, Dict, Any
|
||
import json
|
||
import wave
|
||
import os
|
||
|
||
class RecordingState(Enum):
|
||
"""录音状态枚举"""
|
||
IDLE = "idle"
|
||
RECORDING = "recording"
|
||
PROCESSING = "processing"
|
||
PLAYING = "playing"
|
||
|
||
@dataclass
|
||
class AudioSegment:
|
||
"""音频片段数据结构"""
|
||
audio_data: bytes
|
||
start_time: float
|
||
end_time: float
|
||
duration: float
|
||
metadata: Dict[str, Any] = None
|
||
|
||
@dataclass
|
||
class ControlCommand:
|
||
"""控制命令数据结构"""
|
||
command: str
|
||
parameters: Dict[str, Any] = None
|
||
|
||
@dataclass
|
||
class ProcessEvent:
|
||
"""进程事件数据结构"""
|
||
event_type: str
|
||
data: Optional[bytes] = None
|
||
metadata: Dict[str, Any] = None
|
||
|
||
class InputProcess:
|
||
"""输入进程 - 专门负责录音和语音检测"""
|
||
|
||
def __init__(self, command_queue: mp.Queue, event_queue: mp.Queue, config: Dict[str, Any] = None):
|
||
self.command_queue = command_queue # 主进程 → 输入进程
|
||
self.event_queue = event_queue # 输入进程 → 主进程
|
||
|
||
# 配置参数
|
||
self.config = config or self._get_default_config()
|
||
|
||
# 音频参数
|
||
self.FORMAT = pyaudio.paInt16
|
||
self.CHANNELS = 1
|
||
self.RATE = 16000
|
||
self.CHUNK_SIZE = 1024
|
||
|
||
# 状态控制
|
||
self.recording_enabled = True # 是否允许录音
|
||
self.is_recording = False # 是否正在录音
|
||
self.recording_buffer = [] # 录音缓冲区
|
||
self.pre_record_buffer = [] # 预录音缓冲区
|
||
self.voice_detected = False
|
||
self.silence_start_time = None
|
||
self.recording_start_time = None
|
||
|
||
# ZCR检测参数
|
||
self.zcr_history = []
|
||
self.max_zcr_history = 50
|
||
self.consecutive_silence_count = 0
|
||
self.silence_threshold_count = 30 # 约3秒
|
||
self.low_zcr_threshold_count = 20 # 连续低ZCR计数阈值
|
||
self.consecutive_low_zcr_count = 0 # 连续低ZCR计数
|
||
self.voice_activity_history = [] # 语音活动历史
|
||
self.max_voice_history = 30 # 最大历史记录数
|
||
|
||
# 预录音参数
|
||
self.pre_record_duration = 2.0
|
||
self.pre_record_max_frames = int(self.pre_record_duration * self.RATE / self.CHUNK_SIZE)
|
||
|
||
# PyAudio实例
|
||
self.audio = None
|
||
self.input_stream = None
|
||
|
||
# 运行状态
|
||
self.running = True
|
||
|
||
def _get_default_config(self) -> Dict[str, Any]:
|
||
"""获取默认配置"""
|
||
return {
|
||
'zcr_min': 2400, # 适应16kHz采样率的ZCR最小值
|
||
'zcr_max': 12000, # 适应16kHz采样率的ZCR最大值
|
||
'min_recording_time': 2.0, # 最小录音时间
|
||
'max_recording_time': 30.0,
|
||
'silence_threshold': 3.0,
|
||
'pre_record_duration': 2.0
|
||
}
|
||
|
||
def run(self):
|
||
"""输入进程主循环"""
|
||
print("🎙️ 输入进程启动")
|
||
self._setup_audio()
|
||
|
||
try:
|
||
while self.running:
|
||
# 1. 检查主进程命令
|
||
self._check_commands()
|
||
|
||
# 2. 如果允许录音,处理音频
|
||
if self.recording_enabled:
|
||
self._process_audio()
|
||
|
||
# 3. 短暂休眠,减少CPU占用
|
||
time.sleep(0.01)
|
||
|
||
except KeyboardInterrupt:
|
||
print("🎙️ 输入进程收到中断信号")
|
||
except Exception as e:
|
||
print(f"❌ 输入进程错误: {e}")
|
||
finally:
|
||
self._cleanup()
|
||
print("🎙️ 输入进程退出")
|
||
|
||
def _setup_audio(self):
|
||
"""设置音频输入设备"""
|
||
try:
|
||
self.audio = pyaudio.PyAudio()
|
||
self.input_stream = self.audio.open(
|
||
format=self.FORMAT,
|
||
channels=self.CHANNELS,
|
||
rate=self.RATE,
|
||
input=True,
|
||
frames_per_buffer=self.CHUNK_SIZE
|
||
)
|
||
print("🎙️ 输入进程:音频设备初始化成功")
|
||
except Exception as e:
|
||
print(f"❌ 输入进程音频设备初始化失败: {e}")
|
||
raise
|
||
|
||
def _check_commands(self):
|
||
"""检查主进程控制命令"""
|
||
try:
|
||
while True:
|
||
command = self.command_queue.get_nowait()
|
||
|
||
if command.command == 'enable_recording':
|
||
self.recording_enabled = True
|
||
print("🎙️ 输入进程:录音功能已启用")
|
||
|
||
elif command.command == 'disable_recording':
|
||
self.recording_enabled = False
|
||
# 如果正在录音,立即停止并发送数据
|
||
if self.is_recording:
|
||
self._stop_recording()
|
||
print("🎙️ 输入进程:录音功能已禁用")
|
||
|
||
elif command.command == 'shutdown':
|
||
print("🎙️ 输入进程:收到关闭命令")
|
||
self.running = False
|
||
return
|
||
|
||
except queue.Empty:
|
||
pass
|
||
|
||
def _process_audio(self):
|
||
"""处理音频数据"""
|
||
try:
|
||
data = self.input_stream.read(self.CHUNK_SIZE, exception_on_overflow=False)
|
||
if len(data) == 0:
|
||
return
|
||
|
||
# 更新预录音缓冲区
|
||
self._update_pre_record_buffer(data)
|
||
|
||
# ZCR语音检测
|
||
zcr = self._calculate_zcr(data)
|
||
|
||
# 语音检测
|
||
is_voice = self._is_voice_active(zcr)
|
||
|
||
if self.is_recording:
|
||
# 录音模式
|
||
self.recording_buffer.append(data)
|
||
|
||
# 静音检测
|
||
if is_voice:
|
||
self.silence_start_time = None
|
||
self.consecutive_silence_count = 0
|
||
self.consecutive_low_zcr_count = 0 # 重置低ZCR计数
|
||
else:
|
||
self.consecutive_silence_count += 1
|
||
self.consecutive_low_zcr_count += 1
|
||
if self.silence_start_time is None:
|
||
self.silence_start_time = time.time()
|
||
|
||
# 检查是否应该停止录音
|
||
recording_duration = time.time() - self.recording_start_time
|
||
should_stop = False
|
||
|
||
# ZCR静音检测
|
||
if (self.consecutive_low_zcr_count >= self.low_zcr_threshold_count and
|
||
recording_duration >= self.config['min_recording_time']):
|
||
should_stop = True
|
||
print(f"🎙️ 输入进程:ZCR静音检测触发停止录音")
|
||
|
||
# 最大时间检测
|
||
if recording_duration >= self.config['max_recording_time']:
|
||
should_stop = True
|
||
print(f"🎙️ 输入进程:达到最大录音时间")
|
||
|
||
if should_stop:
|
||
self._stop_recording()
|
||
|
||
else:
|
||
# 监听模式
|
||
if is_voice:
|
||
# 检测到语音,开始录音
|
||
self._start_recording()
|
||
else:
|
||
# 显示监听状态
|
||
buffer_usage = len(self.pre_record_buffer) / self.pre_record_max_frames * 100
|
||
print(f"\r🎙️ 监听中... ZCR: {zcr:.0f} | 语音: {is_voice} | 缓冲: {buffer_usage:.0f}%", end='', flush=True)
|
||
|
||
except Exception as e:
|
||
print(f"🎙️ 输入进程音频处理错误: {e}")
|
||
|
||
def _update_pre_record_buffer(self, audio_data: bytes):
|
||
"""更新预录音缓冲区"""
|
||
self.pre_record_buffer.append(audio_data)
|
||
|
||
# 保持缓冲区大小
|
||
if len(self.pre_record_buffer) > self.pre_record_max_frames:
|
||
self.pre_record_buffer.pop(0)
|
||
|
||
def _start_recording(self):
|
||
"""开始录音"""
|
||
if not self.recording_enabled:
|
||
return
|
||
|
||
self.is_recording = True
|
||
self.recording_buffer = []
|
||
self.recording_start_time = time.time()
|
||
self.silence_start_time = None
|
||
self.consecutive_silence_count = 0
|
||
self.consecutive_low_zcr_count = 0
|
||
|
||
# 将预录音缓冲区的内容添加到录音中
|
||
self.recording_buffer.extend(self.pre_record_buffer)
|
||
self.pre_record_buffer.clear()
|
||
|
||
print(f"🎙️ 输入进程:开始录音(包含预录音 {self.config['pre_record_duration']}秒)")
|
||
|
||
def _stop_recording(self):
|
||
"""停止录音并发送数据"""
|
||
if not self.is_recording:
|
||
return
|
||
|
||
self.is_recording = False
|
||
|
||
# 合并录音数据
|
||
if self.recording_buffer:
|
||
audio_data = b''.join(self.recording_buffer)
|
||
duration = len(audio_data) / (self.RATE * 2)
|
||
|
||
# 创建音频片段
|
||
segment = AudioSegment(
|
||
audio_data=audio_data,
|
||
start_time=self.recording_start_time,
|
||
end_time=time.time(),
|
||
duration=duration,
|
||
metadata={
|
||
'sample_rate': self.RATE,
|
||
'channels': self.CHANNELS,
|
||
'format': self.FORMAT,
|
||
'chunk_size': self.CHUNK_SIZE
|
||
}
|
||
)
|
||
|
||
# 保存录音文件(可选)
|
||
filename = self._save_recording(audio_data)
|
||
|
||
# 发送给主进程
|
||
self.event_queue.put(ProcessEvent(
|
||
event_type='recording_complete',
|
||
data=audio_data,
|
||
metadata={
|
||
'duration': duration,
|
||
'start_time': self.recording_start_time,
|
||
'filename': filename
|
||
}
|
||
))
|
||
|
||
print(f"📝 输入进程:录音完成,时长 {duration:.2f} 秒")
|
||
|
||
# 清空缓冲区
|
||
self.recording_buffer = []
|
||
self.pre_record_buffer = []
|
||
|
||
def _save_recording(self, audio_data: bytes) -> str:
|
||
"""保存录音文件"""
|
||
try:
|
||
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
||
filename = f"recording_{timestamp}.wav"
|
||
|
||
with wave.open(filename, 'wb') as wf:
|
||
wf.setnchannels(self.CHANNELS)
|
||
wf.setsampwidth(self.audio.get_sample_size(self.FORMAT))
|
||
wf.setframerate(self.RATE)
|
||
wf.writeframes(audio_data)
|
||
|
||
print(f"💾 输入进程:录音已保存到 {filename}")
|
||
return filename
|
||
|
||
except Exception as e:
|
||
print(f"❌ 输入进程保存录音失败: {e}")
|
||
return None
|
||
|
||
def _calculate_zcr(self, audio_data: bytes) -> float:
|
||
"""计算零交叉率"""
|
||
if len(audio_data) == 0:
|
||
return 0
|
||
|
||
audio_array = np.frombuffer(audio_data, dtype=np.int16)
|
||
|
||
# 计算零交叉次数
|
||
zero_crossings = np.sum(np.diff(np.sign(audio_array)) != 0)
|
||
|
||
# 归一化到采样率
|
||
zcr = zero_crossings / len(audio_array) * self.RATE
|
||
|
||
# 更新ZCR历史
|
||
self.zcr_history.append(zcr)
|
||
if len(self.zcr_history) > self.max_zcr_history:
|
||
self.zcr_history.pop(0)
|
||
|
||
return zcr
|
||
|
||
def _is_voice_active(self, zcr: float) -> bool:
|
||
"""基于ZCR判断是否为语音活动"""
|
||
# 简单的ZCR范围检测,匹配recorder.py的实现
|
||
return 2400 < zcr < 12000
|
||
|
||
def _cleanup(self):
|
||
"""清理资源"""
|
||
if self.input_stream:
|
||
try:
|
||
self.input_stream.stop_stream()
|
||
self.input_stream.close()
|
||
except:
|
||
pass
|
||
|
||
if self.audio:
|
||
try:
|
||
self.audio.terminate()
|
||
except:
|
||
pass
|
||
|
||
class OutputProcess:
|
||
"""输出进程 - 专门负责音频播放"""
|
||
|
||
def __init__(self, audio_queue: mp.Queue, config: Dict[str, Any] = None):
|
||
self.audio_queue = audio_queue # 主进程 → 输出进程
|
||
self.config = config or self._get_default_config()
|
||
|
||
# 音频播放参数
|
||
self.FORMAT = pyaudio.paInt16
|
||
self.CHANNELS = 1
|
||
self.RATE = 16000
|
||
self.CHUNK_SIZE = 512
|
||
|
||
# 播放状态
|
||
self.is_playing = False
|
||
self.playback_buffer = []
|
||
self.total_chunks_played = 0
|
||
self.total_audio_size = 0
|
||
|
||
# PyAudio实例
|
||
self.audio = None
|
||
self.output_stream = None
|
||
|
||
# 运行状态
|
||
self.running = True
|
||
|
||
def _get_default_config(self) -> Dict[str, Any]:
|
||
"""获取默认配置"""
|
||
return {
|
||
'buffer_size': 1000,
|
||
'show_progress': True,
|
||
'progress_interval': 100
|
||
}
|
||
|
||
def run(self):
|
||
"""输出进程主循环"""
|
||
print("🔊 输出进程启动")
|
||
self._setup_audio()
|
||
|
||
try:
|
||
while self.running:
|
||
# 处理音频队列
|
||
self._process_audio_queue()
|
||
|
||
# 播放缓冲的音频
|
||
self._play_audio()
|
||
|
||
# 显示播放进度
|
||
self._show_progress()
|
||
|
||
time.sleep(0.001) # 极短休眠,确保流畅播放
|
||
|
||
except KeyboardInterrupt:
|
||
print("🔊 输出进程收到中断信号")
|
||
except Exception as e:
|
||
print(f"❌ 输出进程错误: {e}")
|
||
finally:
|
||
self._cleanup()
|
||
print("🔊 输出进程退出")
|
||
|
||
def _setup_audio(self):
|
||
"""设置音频输出设备"""
|
||
try:
|
||
self.audio = pyaudio.PyAudio()
|
||
self.output_stream = self.audio.open(
|
||
format=self.FORMAT,
|
||
channels=self.CHANNELS,
|
||
rate=self.RATE,
|
||
output=True,
|
||
frames_per_buffer=self.CHUNK_SIZE
|
||
)
|
||
print("🔊 输出进程:音频设备初始化成功")
|
||
except Exception as e:
|
||
print(f"❌ 输出进程音频设备初始化失败: {e}")
|
||
raise
|
||
|
||
def _process_audio_queue(self):
|
||
"""处理来自主进程的音频数据"""
|
||
try:
|
||
while True:
|
||
audio_data = self.audio_queue.get_nowait()
|
||
|
||
if audio_data is None:
|
||
# 结束信号
|
||
self._finish_playback()
|
||
break
|
||
|
||
if isinstance(audio_data, str) and audio_data.startswith("METADATA:"):
|
||
# 处理元数据
|
||
metadata = audio_data[9:] # 移除 "METADATA:" 前缀
|
||
print(f"📝 输出进程:播放元数据 {metadata}")
|
||
continue
|
||
|
||
# 音频数据放入播放缓冲区
|
||
self.playback_buffer.append(audio_data)
|
||
if not self.is_playing:
|
||
self.is_playing = True
|
||
print("🔊 输出进程:开始播放音频")
|
||
|
||
except queue.Empty:
|
||
pass
|
||
|
||
def _play_audio(self):
|
||
"""播放音频数据"""
|
||
if self.playback_buffer and self.output_stream:
|
||
try:
|
||
# 取出一块音频数据播放
|
||
audio_chunk = self.playback_buffer.pop(0)
|
||
if audio_chunk and len(audio_chunk) > 0:
|
||
self.output_stream.write(audio_chunk)
|
||
self.total_chunks_played += 1
|
||
self.total_audio_size += len(audio_chunk)
|
||
|
||
except Exception as e:
|
||
print(f"❌ 输出进程播放错误: {e}")
|
||
self.playback_buffer.clear()
|
||
|
||
def _show_progress(self):
|
||
"""显示播放进度"""
|
||
if (self.config['show_progress'] and
|
||
self.total_chunks_played > 0 and
|
||
self.total_chunks_played % self.config['progress_interval'] == 0):
|
||
|
||
progress = f"🔊 播放进度: {self.total_chunks_played} 块 | {self.total_audio_size / 1024:.1f} KB"
|
||
print(f"\r{progress}", end='', flush=True)
|
||
|
||
def _finish_playback(self):
|
||
"""完成播放"""
|
||
self.is_playing = False
|
||
self.playback_buffer.clear()
|
||
|
||
if self.total_chunks_played > 0:
|
||
print(f"\n✅ 输出进程:播放完成,总计 {self.total_chunks_played} 块, {self.total_audio_size / 1024:.1f} KB")
|
||
|
||
# 重置统计
|
||
self.total_chunks_played = 0
|
||
self.total_audio_size = 0
|
||
|
||
# 通知主进程播放完成
|
||
# 这里可以通过共享内存或另一个队列来实现
|
||
# 暂时简化处理,由主进程通过队列大小判断
|
||
|
||
def _cleanup(self):
|
||
"""清理资源"""
|
||
if self.output_stream:
|
||
try:
|
||
self.output_stream.stop_stream()
|
||
self.output_stream.close()
|
||
except:
|
||
pass
|
||
|
||
if self.audio:
|
||
try:
|
||
self.audio.terminate()
|
||
except:
|
||
pass
|
||
|
||
if __name__ == "__main__":
|
||
# 测试代码
|
||
print("音频进程模块测试")
|
||
print("这个模块应该在多进程环境中运行") |