Local-Voice/audio_processes.py
2025-09-20 23:29:47 +08:00

527 lines
18 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
多进程音频处理模块
定义输入进程和输出进程的类
"""
import multiprocessing as mp
import queue
import time
import threading
import numpy as np
import pyaudio
from enum import Enum
from dataclasses import dataclass
from typing import Optional, List, Dict, Any
import json
import wave
import os
class RecordingState(Enum):
"""录音状态枚举"""
IDLE = "idle"
RECORDING = "recording"
PROCESSING = "processing"
PLAYING = "playing"
@dataclass
class AudioSegment:
"""音频片段数据结构"""
audio_data: bytes
start_time: float
end_time: float
duration: float
metadata: Dict[str, Any] = None
@dataclass
class ControlCommand:
"""控制命令数据结构"""
command: str
parameters: Dict[str, Any] = None
@dataclass
class ProcessEvent:
"""进程事件数据结构"""
event_type: str
data: Optional[bytes] = None
metadata: Dict[str, Any] = None
class InputProcess:
"""输入进程 - 专门负责录音和语音检测"""
def __init__(self, command_queue: mp.Queue, event_queue: mp.Queue, config: Dict[str, Any] = None):
self.command_queue = command_queue # 主进程 → 输入进程
self.event_queue = event_queue # 输入进程 → 主进程
# 配置参数
self.config = config or self._get_default_config()
# 音频参数
self.FORMAT = pyaudio.paInt16
self.CHANNELS = 1
self.RATE = 16000
self.CHUNK_SIZE = 1024
# 状态控制
self.recording_enabled = True # 是否允许录音
self.is_recording = False # 是否正在录音
self.recording_buffer = [] # 录音缓冲区
self.pre_record_buffer = [] # 预录音缓冲区
self.voice_detected = False
self.silence_start_time = None
self.recording_start_time = None
# ZCR检测参数
self.zcr_history = []
self.max_zcr_history = 50
self.consecutive_silence_count = 0
self.silence_threshold_count = 30 # 约3秒
self.low_zcr_threshold_count = 20 # 连续低ZCR计数阈值
self.consecutive_low_zcr_count = 0 # 连续低ZCR计数
self.voice_activity_history = [] # 语音活动历史
self.max_voice_history = 30 # 最大历史记录数
# 预录音参数
self.pre_record_duration = 2.0
self.pre_record_max_frames = int(self.pre_record_duration * self.RATE / self.CHUNK_SIZE)
# PyAudio实例
self.audio = None
self.input_stream = None
# 运行状态
self.running = True
def _get_default_config(self) -> Dict[str, Any]:
"""获取默认配置"""
return {
'zcr_min': 2400, # 适应16kHz采样率的ZCR最小值
'zcr_max': 12000, # 适应16kHz采样率的ZCR最大值
'min_recording_time': 2.0, # 最小录音时间
'max_recording_time': 30.0,
'silence_threshold': 3.0,
'pre_record_duration': 2.0
}
def run(self):
"""输入进程主循环"""
print("🎙️ 输入进程启动")
self._setup_audio()
try:
while self.running:
# 1. 检查主进程命令
self._check_commands()
# 2. 如果允许录音,处理音频
if self.recording_enabled:
self._process_audio()
# 3. 短暂休眠减少CPU占用
time.sleep(0.01)
except KeyboardInterrupt:
print("🎙️ 输入进程收到中断信号")
except Exception as e:
print(f"❌ 输入进程错误: {e}")
finally:
self._cleanup()
print("🎙️ 输入进程退出")
def _setup_audio(self):
"""设置音频输入设备"""
try:
self.audio = pyaudio.PyAudio()
self.input_stream = self.audio.open(
format=self.FORMAT,
channels=self.CHANNELS,
rate=self.RATE,
input=True,
frames_per_buffer=self.CHUNK_SIZE
)
print("🎙️ 输入进程:音频设备初始化成功")
except Exception as e:
print(f"❌ 输入进程音频设备初始化失败: {e}")
raise
def _check_commands(self):
"""检查主进程控制命令"""
try:
while True:
command = self.command_queue.get_nowait()
if command.command == 'enable_recording':
self.recording_enabled = True
print("🎙️ 输入进程:录音功能已启用")
elif command.command == 'disable_recording':
self.recording_enabled = False
# 如果正在录音,立即停止并发送数据
if self.is_recording:
self._stop_recording()
print("🎙️ 输入进程:录音功能已禁用")
elif command.command == 'shutdown':
print("🎙️ 输入进程:收到关闭命令")
self.running = False
return
except queue.Empty:
pass
def _process_audio(self):
"""处理音频数据"""
try:
data = self.input_stream.read(self.CHUNK_SIZE, exception_on_overflow=False)
if len(data) == 0:
return
# 更新预录音缓冲区
self._update_pre_record_buffer(data)
# ZCR语音检测
zcr = self._calculate_zcr(data)
# 语音检测
is_voice = self._is_voice_active(zcr)
if self.is_recording:
# 录音模式
self.recording_buffer.append(data)
# 静音检测
if is_voice:
self.silence_start_time = None
self.consecutive_silence_count = 0
self.consecutive_low_zcr_count = 0 # 重置低ZCR计数
else:
self.consecutive_silence_count += 1
self.consecutive_low_zcr_count += 1
if self.silence_start_time is None:
self.silence_start_time = time.time()
# 检查是否应该停止录音
recording_duration = time.time() - self.recording_start_time
should_stop = False
# ZCR静音检测
if (self.consecutive_low_zcr_count >= self.low_zcr_threshold_count and
recording_duration >= self.config['min_recording_time']):
should_stop = True
print(f"🎙️ 输入进程ZCR静音检测触发停止录音")
# 最大时间检测
if recording_duration >= self.config['max_recording_time']:
should_stop = True
print(f"🎙️ 输入进程:达到最大录音时间")
if should_stop:
self._stop_recording()
else:
# 监听模式
if is_voice:
# 检测到语音,开始录音
self._start_recording()
else:
# 显示监听状态
buffer_usage = len(self.pre_record_buffer) / self.pre_record_max_frames * 100
print(f"\r🎙️ 监听中... ZCR: {zcr:.0f} | 语音: {is_voice} | 缓冲: {buffer_usage:.0f}%", end='', flush=True)
except Exception as e:
print(f"🎙️ 输入进程音频处理错误: {e}")
def _update_pre_record_buffer(self, audio_data: bytes):
"""更新预录音缓冲区"""
self.pre_record_buffer.append(audio_data)
# 保持缓冲区大小
if len(self.pre_record_buffer) > self.pre_record_max_frames:
self.pre_record_buffer.pop(0)
def _start_recording(self):
"""开始录音"""
if not self.recording_enabled:
return
self.is_recording = True
self.recording_buffer = []
self.recording_start_time = time.time()
self.silence_start_time = None
self.consecutive_silence_count = 0
self.consecutive_low_zcr_count = 0
# 将预录音缓冲区的内容添加到录音中
self.recording_buffer.extend(self.pre_record_buffer)
self.pre_record_buffer.clear()
print(f"🎙️ 输入进程:开始录音(包含预录音 {self.config['pre_record_duration']}秒)")
def _stop_recording(self):
"""停止录音并发送数据"""
if not self.is_recording:
return
self.is_recording = False
# 合并录音数据
if self.recording_buffer:
audio_data = b''.join(self.recording_buffer)
duration = len(audio_data) / (self.RATE * 2)
# 创建音频片段
segment = AudioSegment(
audio_data=audio_data,
start_time=self.recording_start_time,
end_time=time.time(),
duration=duration,
metadata={
'sample_rate': self.RATE,
'channels': self.CHANNELS,
'format': self.FORMAT,
'chunk_size': self.CHUNK_SIZE
}
)
# 保存录音文件(可选)
filename = self._save_recording(audio_data)
# 发送给主进程
self.event_queue.put(ProcessEvent(
event_type='recording_complete',
data=audio_data,
metadata={
'duration': duration,
'start_time': self.recording_start_time,
'filename': filename
}
))
print(f"📝 输入进程:录音完成,时长 {duration:.2f}")
# 清空缓冲区
self.recording_buffer = []
self.pre_record_buffer = []
def _save_recording(self, audio_data: bytes) -> str:
"""保存录音文件"""
try:
timestamp = time.strftime("%Y%m%d_%H%M%S")
filename = f"recording_{timestamp}.wav"
with wave.open(filename, 'wb') as wf:
wf.setnchannels(self.CHANNELS)
wf.setsampwidth(self.audio.get_sample_size(self.FORMAT))
wf.setframerate(self.RATE)
wf.writeframes(audio_data)
print(f"💾 输入进程:录音已保存到 {filename}")
return filename
except Exception as e:
print(f"❌ 输入进程保存录音失败: {e}")
return None
def _calculate_zcr(self, audio_data: bytes) -> float:
"""计算零交叉率"""
if len(audio_data) == 0:
return 0
audio_array = np.frombuffer(audio_data, dtype=np.int16)
# 计算零交叉次数
zero_crossings = np.sum(np.diff(np.sign(audio_array)) != 0)
# 归一化到采样率
zcr = zero_crossings / len(audio_array) * self.RATE
# 更新ZCR历史
self.zcr_history.append(zcr)
if len(self.zcr_history) > self.max_zcr_history:
self.zcr_history.pop(0)
return zcr
def _is_voice_active(self, zcr: float) -> bool:
"""基于ZCR判断是否为语音活动"""
# 简单的ZCR范围检测匹配recorder.py的实现
return 2400 < zcr < 12000
def _cleanup(self):
"""清理资源"""
if self.input_stream:
try:
self.input_stream.stop_stream()
self.input_stream.close()
except:
pass
if self.audio:
try:
self.audio.terminate()
except:
pass
class OutputProcess:
"""输出进程 - 专门负责音频播放"""
def __init__(self, audio_queue: mp.Queue, config: Dict[str, Any] = None):
self.audio_queue = audio_queue # 主进程 → 输出进程
self.config = config or self._get_default_config()
# 音频播放参数
self.FORMAT = pyaudio.paInt16
self.CHANNELS = 1
self.RATE = 16000
self.CHUNK_SIZE = 512
# 播放状态
self.is_playing = False
self.playback_buffer = []
self.total_chunks_played = 0
self.total_audio_size = 0
# PyAudio实例
self.audio = None
self.output_stream = None
# 运行状态
self.running = True
def _get_default_config(self) -> Dict[str, Any]:
"""获取默认配置"""
return {
'buffer_size': 1000,
'show_progress': True,
'progress_interval': 100
}
def run(self):
"""输出进程主循环"""
print("🔊 输出进程启动")
self._setup_audio()
try:
while self.running:
# 处理音频队列
self._process_audio_queue()
# 播放缓冲的音频
self._play_audio()
# 显示播放进度
self._show_progress()
time.sleep(0.001) # 极短休眠,确保流畅播放
except KeyboardInterrupt:
print("🔊 输出进程收到中断信号")
except Exception as e:
print(f"❌ 输出进程错误: {e}")
finally:
self._cleanup()
print("🔊 输出进程退出")
def _setup_audio(self):
"""设置音频输出设备"""
try:
self.audio = pyaudio.PyAudio()
self.output_stream = self.audio.open(
format=self.FORMAT,
channels=self.CHANNELS,
rate=self.RATE,
output=True,
frames_per_buffer=self.CHUNK_SIZE
)
print("🔊 输出进程:音频设备初始化成功")
except Exception as e:
print(f"❌ 输出进程音频设备初始化失败: {e}")
raise
def _process_audio_queue(self):
"""处理来自主进程的音频数据"""
try:
while True:
audio_data = self.audio_queue.get_nowait()
if audio_data is None:
# 结束信号
self._finish_playback()
break
if isinstance(audio_data, str) and audio_data.startswith("METADATA:"):
# 处理元数据
metadata = audio_data[9:] # 移除 "METADATA:" 前缀
print(f"📝 输出进程:播放元数据 {metadata}")
continue
# 音频数据放入播放缓冲区
self.playback_buffer.append(audio_data)
if not self.is_playing:
self.is_playing = True
print("🔊 输出进程:开始播放音频")
except queue.Empty:
pass
def _play_audio(self):
"""播放音频数据"""
if self.playback_buffer and self.output_stream:
try:
# 取出一块音频数据播放
audio_chunk = self.playback_buffer.pop(0)
if audio_chunk and len(audio_chunk) > 0:
self.output_stream.write(audio_chunk)
self.total_chunks_played += 1
self.total_audio_size += len(audio_chunk)
except Exception as e:
print(f"❌ 输出进程播放错误: {e}")
self.playback_buffer.clear()
def _show_progress(self):
"""显示播放进度"""
if (self.config['show_progress'] and
self.total_chunks_played > 0 and
self.total_chunks_played % self.config['progress_interval'] == 0):
progress = f"🔊 播放进度: {self.total_chunks_played} 块 | {self.total_audio_size / 1024:.1f} KB"
print(f"\r{progress}", end='', flush=True)
def _finish_playback(self):
"""完成播放"""
self.is_playing = False
self.playback_buffer.clear()
if self.total_chunks_played > 0:
print(f"\n✅ 输出进程:播放完成,总计 {self.total_chunks_played} 块, {self.total_audio_size / 1024:.1f} KB")
# 重置统计
self.total_chunks_played = 0
self.total_audio_size = 0
# 通知主进程播放完成
# 这里可以通过共享内存或另一个队列来实现
# 暂时简化处理,由主进程通过队列大小判断
def _cleanup(self):
"""清理资源"""
if self.output_stream:
try:
self.output_stream.stop_stream()
self.output_stream.close()
except:
pass
if self.audio:
try:
self.audio.terminate()
except:
pass
if __name__ == "__main__":
# 测试代码
print("音频进程模块测试")
print("这个模块应该在多进程环境中运行")