#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 多进程音频处理模块 定义输入进程和输出进程的类 """ import multiprocessing as mp import queue import time import threading import numpy as np import pyaudio from enum import Enum from dataclasses import dataclass from typing import Optional, List, Dict, Any import json import wave import os class RecordingState(Enum): """录音状态枚举""" IDLE = "idle" RECORDING = "recording" PROCESSING = "processing" PLAYING = "playing" @dataclass class AudioSegment: """音频片段数据结构""" audio_data: bytes start_time: float end_time: float duration: float metadata: Dict[str, Any] = None @dataclass class ControlCommand: """控制命令数据结构""" command: str parameters: Dict[str, Any] = None @dataclass class ProcessEvent: """进程事件数据结构""" event_type: str data: Optional[bytes] = None metadata: Dict[str, Any] = None class InputProcess: """输入进程 - 专门负责录音和语音检测""" def __init__(self, command_queue: mp.Queue, event_queue: mp.Queue, config: Dict[str, Any] = None): self.command_queue = command_queue # 主进程 → 输入进程 self.event_queue = event_queue # 输入进程 → 主进程 # 配置参数 self.config = config or self._get_default_config() # 音频参数 self.FORMAT = pyaudio.paInt16 self.CHANNELS = 1 self.RATE = 16000 self.CHUNK_SIZE = 1024 # 状态控制 self.recording_enabled = True # 是否允许录音 self.is_recording = False # 是否正在录音 self.recording_buffer = [] # 录音缓冲区 self.pre_record_buffer = [] # 预录音缓冲区 self.voice_detected = False self.silence_start_time = None self.recording_start_time = None # ZCR检测参数 self.zcr_history = [] self.max_zcr_history = 50 self.consecutive_silence_count = 0 self.silence_threshold_count = 30 # 约3秒 self.low_zcr_threshold_count = 20 # 连续低ZCR计数阈值 self.consecutive_low_zcr_count = 0 # 连续低ZCR计数 self.voice_activity_history = [] # 语音活动历史 self.max_voice_history = 30 # 最大历史记录数 # 预录音参数 self.pre_record_duration = 2.0 self.pre_record_max_frames = int(self.pre_record_duration * self.RATE / self.CHUNK_SIZE) # PyAudio实例 self.audio = None self.input_stream = None # 运行状态 self.running = True def _get_default_config(self) -> Dict[str, Any]: """获取默认配置""" return { 'zcr_min': 2400, # 适应16kHz采样率的ZCR最小值 'zcr_max': 12000, # 适应16kHz采样率的ZCR最大值 'min_recording_time': 2.0, # 最小录音时间 'max_recording_time': 30.0, 'silence_threshold': 3.0, 'pre_record_duration': 2.0 } def run(self): """输入进程主循环""" print("🎙️ 输入进程启动") self._setup_audio() try: while self.running: # 1. 检查主进程命令 self._check_commands() # 2. 如果允许录音,处理音频 if self.recording_enabled: self._process_audio() # 3. 短暂休眠,减少CPU占用 time.sleep(0.01) except KeyboardInterrupt: print("🎙️ 输入进程收到中断信号") except Exception as e: print(f"❌ 输入进程错误: {e}") finally: self._cleanup() print("🎙️ 输入进程退出") def _setup_audio(self): """设置音频输入设备""" try: self.audio = pyaudio.PyAudio() self.input_stream = self.audio.open( format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, frames_per_buffer=self.CHUNK_SIZE ) print("🎙️ 输入进程:音频设备初始化成功") except Exception as e: print(f"❌ 输入进程音频设备初始化失败: {e}") raise def _check_commands(self): """检查主进程控制命令""" try: while True: command = self.command_queue.get_nowait() if command.command == 'enable_recording': self.recording_enabled = True print("🎙️ 输入进程:录音功能已启用") elif command.command == 'disable_recording': self.recording_enabled = False # 如果正在录音,立即停止并发送数据 if self.is_recording: self._stop_recording() print("🎙️ 输入进程:录音功能已禁用") elif command.command == 'shutdown': print("🎙️ 输入进程:收到关闭命令") self.running = False return except queue.Empty: pass def _process_audio(self): """处理音频数据""" try: data = self.input_stream.read(self.CHUNK_SIZE, exception_on_overflow=False) if len(data) == 0: return # 更新预录音缓冲区 self._update_pre_record_buffer(data) # ZCR语音检测 zcr = self._calculate_zcr(data) # 语音检测 is_voice = self._is_voice_active(zcr) if self.is_recording: # 录音模式 self.recording_buffer.append(data) # 静音检测 if is_voice: self.silence_start_time = None self.consecutive_silence_count = 0 self.consecutive_low_zcr_count = 0 # 重置低ZCR计数 else: self.consecutive_silence_count += 1 self.consecutive_low_zcr_count += 1 if self.silence_start_time is None: self.silence_start_time = time.time() # 检查是否应该停止录音 recording_duration = time.time() - self.recording_start_time should_stop = False # ZCR静音检测 if (self.consecutive_low_zcr_count >= self.low_zcr_threshold_count and recording_duration >= self.config['min_recording_time']): should_stop = True print(f"🎙️ 输入进程:ZCR静音检测触发停止录音") # 最大时间检测 if recording_duration >= self.config['max_recording_time']: should_stop = True print(f"🎙️ 输入进程:达到最大录音时间") if should_stop: self._stop_recording() else: # 监听模式 if is_voice: # 检测到语音,开始录音 self._start_recording() else: # 显示监听状态 buffer_usage = len(self.pre_record_buffer) / self.pre_record_max_frames * 100 print(f"\r🎙️ 监听中... ZCR: {zcr:.0f} | 语音: {is_voice} | 缓冲: {buffer_usage:.0f}%", end='', flush=True) except Exception as e: print(f"🎙️ 输入进程音频处理错误: {e}") def _update_pre_record_buffer(self, audio_data: bytes): """更新预录音缓冲区""" self.pre_record_buffer.append(audio_data) # 保持缓冲区大小 if len(self.pre_record_buffer) > self.pre_record_max_frames: self.pre_record_buffer.pop(0) def _start_recording(self): """开始录音""" if not self.recording_enabled: return self.is_recording = True self.recording_buffer = [] self.recording_start_time = time.time() self.silence_start_time = None self.consecutive_silence_count = 0 self.consecutive_low_zcr_count = 0 # 将预录音缓冲区的内容添加到录音中 self.recording_buffer.extend(self.pre_record_buffer) self.pre_record_buffer.clear() print(f"🎙️ 输入进程:开始录音(包含预录音 {self.config['pre_record_duration']}秒)") def _stop_recording(self): """停止录音并发送数据""" if not self.is_recording: return self.is_recording = False # 合并录音数据 if self.recording_buffer: audio_data = b''.join(self.recording_buffer) duration = len(audio_data) / (self.RATE * 2) # 创建音频片段 segment = AudioSegment( audio_data=audio_data, start_time=self.recording_start_time, end_time=time.time(), duration=duration, metadata={ 'sample_rate': self.RATE, 'channels': self.CHANNELS, 'format': self.FORMAT, 'chunk_size': self.CHUNK_SIZE } ) # 保存录音文件(可选) filename = self._save_recording(audio_data) # 发送给主进程 self.event_queue.put(ProcessEvent( event_type='recording_complete', data=audio_data, metadata={ 'duration': duration, 'start_time': self.recording_start_time, 'filename': filename } )) print(f"📝 输入进程:录音完成,时长 {duration:.2f} 秒") # 清空缓冲区 self.recording_buffer = [] self.pre_record_buffer = [] def _save_recording(self, audio_data: bytes) -> str: """保存录音文件""" try: timestamp = time.strftime("%Y%m%d_%H%M%S") filename = f"recording_{timestamp}.wav" with wave.open(filename, 'wb') as wf: wf.setnchannels(self.CHANNELS) wf.setsampwidth(self.audio.get_sample_size(self.FORMAT)) wf.setframerate(self.RATE) wf.writeframes(audio_data) print(f"💾 输入进程:录音已保存到 {filename}") return filename except Exception as e: print(f"❌ 输入进程保存录音失败: {e}") return None def _calculate_zcr(self, audio_data: bytes) -> float: """计算零交叉率""" if len(audio_data) == 0: return 0 audio_array = np.frombuffer(audio_data, dtype=np.int16) # 计算零交叉次数 zero_crossings = np.sum(np.diff(np.sign(audio_array)) != 0) # 归一化到采样率 zcr = zero_crossings / len(audio_array) * self.RATE # 更新ZCR历史 self.zcr_history.append(zcr) if len(self.zcr_history) > self.max_zcr_history: self.zcr_history.pop(0) return zcr def _is_voice_active(self, zcr: float) -> bool: """基于ZCR判断是否为语音活动""" # 简单的ZCR范围检测,匹配recorder.py的实现 return 2400 < zcr < 12000 def _cleanup(self): """清理资源""" if self.input_stream: try: self.input_stream.stop_stream() self.input_stream.close() except: pass if self.audio: try: self.audio.terminate() except: pass class OutputProcess: """输出进程 - 专门负责音频播放""" def __init__(self, audio_queue: mp.Queue, config: Dict[str, Any] = None): self.audio_queue = audio_queue # 主进程 → 输出进程 self.config = config or self._get_default_config() # 音频播放参数 self.FORMAT = pyaudio.paInt16 self.CHANNELS = 1 self.RATE = 16000 self.CHUNK_SIZE = 512 # 播放状态 self.is_playing = False self.playback_buffer = [] self.total_chunks_played = 0 self.total_audio_size = 0 # PyAudio实例 self.audio = None self.output_stream = None # 运行状态 self.running = True def _get_default_config(self) -> Dict[str, Any]: """获取默认配置""" return { 'buffer_size': 1000, 'show_progress': True, 'progress_interval': 100 } def run(self): """输出进程主循环""" print("🔊 输出进程启动") self._setup_audio() try: while self.running: # 处理音频队列 self._process_audio_queue() # 播放缓冲的音频 self._play_audio() # 显示播放进度 self._show_progress() time.sleep(0.001) # 极短休眠,确保流畅播放 except KeyboardInterrupt: print("🔊 输出进程收到中断信号") except Exception as e: print(f"❌ 输出进程错误: {e}") finally: self._cleanup() print("🔊 输出进程退出") def _setup_audio(self): """设置音频输出设备""" try: self.audio = pyaudio.PyAudio() self.output_stream = self.audio.open( format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, output=True, frames_per_buffer=self.CHUNK_SIZE ) print("🔊 输出进程:音频设备初始化成功") except Exception as e: print(f"❌ 输出进程音频设备初始化失败: {e}") raise def _process_audio_queue(self): """处理来自主进程的音频数据""" try: while True: audio_data = self.audio_queue.get_nowait() if audio_data is None: # 结束信号 self._finish_playback() break if isinstance(audio_data, str) and audio_data.startswith("METADATA:"): # 处理元数据 metadata = audio_data[9:] # 移除 "METADATA:" 前缀 print(f"📝 输出进程:播放元数据 {metadata}") continue # 音频数据放入播放缓冲区 self.playback_buffer.append(audio_data) if not self.is_playing: self.is_playing = True print("🔊 输出进程:开始播放音频") except queue.Empty: pass def _play_audio(self): """播放音频数据""" if self.playback_buffer and self.output_stream: try: # 取出一块音频数据播放 audio_chunk = self.playback_buffer.pop(0) if audio_chunk and len(audio_chunk) > 0: self.output_stream.write(audio_chunk) self.total_chunks_played += 1 self.total_audio_size += len(audio_chunk) except Exception as e: print(f"❌ 输出进程播放错误: {e}") self.playback_buffer.clear() def _show_progress(self): """显示播放进度""" if (self.config['show_progress'] and self.total_chunks_played > 0 and self.total_chunks_played % self.config['progress_interval'] == 0): progress = f"🔊 播放进度: {self.total_chunks_played} 块 | {self.total_audio_size / 1024:.1f} KB" print(f"\r{progress}", end='', flush=True) def _finish_playback(self): """完成播放""" self.is_playing = False self.playback_buffer.clear() if self.total_chunks_played > 0: print(f"\n✅ 输出进程:播放完成,总计 {self.total_chunks_played} 块, {self.total_audio_size / 1024:.1f} KB") # 重置统计 self.total_chunks_played = 0 self.total_audio_size = 0 # 通知主进程播放完成 # 这里可以通过共享内存或另一个队列来实现 # 暂时简化处理,由主进程通过队列大小判断 def _cleanup(self): """清理资源""" if self.output_stream: try: self.output_stream.stop_stream() self.output_stream.close() except: pass if self.audio: try: self.audio.terminate() except: pass if __name__ == "__main__": # 测试代码 print("音频进程模块测试") print("这个模块应该在多进程环境中运行")