Local-Voice/audio_processes.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
多进程音频处理模块
定义输入进程和输出进程的类
"""

import multiprocessing as mp
import queue
import time
import threading
import numpy as np
import pyaudio
from enum import Enum
from dataclasses import dataclass
from typing import Optional, List, Dict, Any
import json
import wave
import os

class RecordingState(Enum):
    """录音状态枚举"""
    IDLE = "idle"
    RECORDING = "recording"
    PROCESSING = "processing"
    PLAYING = "playing"

@dataclass
class AudioSegment:
    """音频片段数据结构"""
    audio_data: bytes
    start_time: float
    end_time: float
    duration: float
    metadata: Dict[str, Any] = None

@dataclass
class ControlCommand:
    """控制命令数据结构"""
    command: str
    parameters: Dict[str, Any] = None

@dataclass
class ProcessEvent:
    """进程事件数据结构"""
    event_type: str
    data: Optional[bytes] = None
    metadata: Dict[str, Any] = None

class InputProcess:
    """输入进程 - 专门负责录音和语音检测"""

    def __init__(self, command_queue: mp.Queue, event_queue: mp.Queue, config: Dict[str, Any] = None):
        self.command_queue = command_queue    # 主进程 → 输入进程
        self.event_queue = event_queue        # 输入进程 → 主进程

        # 配置参数
        self.config = config or self._get_default_config()

        # 音频参数
        self.FORMAT = pyaudio.paInt16
        self.CHANNELS = 1
        self.RATE = 16000
        self.CHUNK_SIZE = 1024

        # 状态控制
        self.recording_enabled = True        # 是否允许录音
        self.is_recording = False            # 是否正在录音
        self.recording_buffer = []           # 录音缓冲区
        self.pre_record_buffer = []          # 预录音缓冲区
        self.voice_detected = False
        self.silence_start_time = None
        self.recording_start_time = None

        # ZCR检测参数
        self.zcr_history = []
        self.max_zcr_history = 50
        self.consecutive_silence_count = 0
        self.silence_threshold_count = 30    # 约3秒
        self.low_zcr_threshold_count = 20     # 连续低ZCR计数阈值
        self.consecutive_low_zcr_count = 0    # 连续低ZCR计数
        self.voice_activity_history = []      # 语音活动历史
        self.max_voice_history = 30           # 最大历史记录数

        # 预录音参数
        self.pre_record_duration = 2.0
        self.pre_record_max_frames = int(self.pre_record_duration * self.RATE / self.CHUNK_SIZE)

        # PyAudio实例
        self.audio = None
        self.input_stream = None

        # 运行状态
        self.running = True

    def _get_default_config(self) -> Dict[str, Any]:
        """获取默认配置"""
        return {
            'zcr_min': 2400,        # 适应16kHz采样率的ZCR最小值
            'zcr_max': 12000,       # 适应16kHz采样率的ZCR最大值
            'min_recording_time': 2.0,    # 最小录音时间
            'max_recording_time': 30.0,
            'silence_threshold': 3.0,
            'pre_record_duration': 2.0
        }

    def run(self):
        """输入进程主循环"""
        print("🎙️ 输入进程启动")
        self._setup_audio()

        try:
            while self.running:
                # 1. 检查主进程命令
                self._check_commands()

                # 2. 如果允许录音，处理音频
                if self.recording_enabled:
                    self._process_audio()

                # 3. 短暂休眠，减少CPU占用
                time.sleep(0.01)

        except KeyboardInterrupt:
            print("🎙️ 输入进程收到中断信号")
        except Exception as e:
            print(f"❌ 输入进程错误: {e}")
        finally:
            self._cleanup()
            print("🎙️ 输入进程退出")

    def _setup_audio(self):
        """设置音频输入设备"""
        try:
            self.audio = pyaudio.PyAudio()
            self.input_stream = self.audio.open(
                format=self.FORMAT,
                channels=self.CHANNELS,
                rate=self.RATE,
                input=True,
                frames_per_buffer=self.CHUNK_SIZE
            )
            print("🎙️ 输入进程：音频设备初始化成功")
        except Exception as e:
            print(f"❌ 输入进程音频设备初始化失败: {e}")
            raise

    def _check_commands(self):
        """检查主进程控制命令"""
        try:
            while True:
                command = self.command_queue.get_nowait()

                if command.command == 'enable_recording':
                    self.recording_enabled = True
                    print("🎙️ 输入进程：录音功能已启用")

                elif command.command == 'disable_recording':
                    self.recording_enabled = False
                    # 如果正在录音，立即停止并发送数据
                    if self.is_recording:
                        self._stop_recording()
                    print("🎙️ 输入进程：录音功能已禁用")

                elif command.command == 'shutdown':
                    print("🎙️ 输入进程：收到关闭命令")
                    self.running = False
                    return

        except queue.Empty:
            pass

    def _process_audio(self):
        """处理音频数据"""
        try:
            data = self.input_stream.read(self.CHUNK_SIZE, exception_on_overflow=False)
            if len(data) == 0:
                return

            # 更新预录音缓冲区
            self._update_pre_record_buffer(data)

            # ZCR语音检测
            zcr = self._calculate_zcr(data)

            # 语音检测
            is_voice = self._is_voice_active(zcr)

            if self.is_recording:
                # 录音模式
                self.recording_buffer.append(data)

                # 静音检测
                if is_voice:
                    self.silence_start_time = None
                    self.consecutive_silence_count = 0
                    self.consecutive_low_zcr_count = 0  # 重置低ZCR计数
                else:
                    self.consecutive_silence_count += 1
                    self.consecutive_low_zcr_count += 1
                    if self.silence_start_time is None:
                        self.silence_start_time = time.time()

                # 检查是否应该停止录音
                recording_duration = time.time() - self.recording_start_time
                should_stop = False

                # ZCR静音检测
                if (self.consecutive_low_zcr_count >= self.low_zcr_threshold_count and
                    recording_duration >= self.config['min_recording_time']):
                    should_stop = True
                    print(f"🎙️ 输入进程：ZCR静音检测触发停止录音")

                # 最大时间检测
                if recording_duration >= self.config['max_recording_time']:
                    should_stop = True
                    print(f"🎙️ 输入进程：达到最大录音时间")

                if should_stop:
                    self._stop_recording()

            else:
                # 监听模式
                if is_voice:
                    # 检测到语音，开始录音
                    self._start_recording()
                else:
                    # 显示监听状态
                    buffer_usage = len(self.pre_record_buffer) / self.pre_record_max_frames * 100
                    print(f"\r🎙️ 监听中... ZCR: {zcr:.0f} | 语音: {is_voice} | 缓冲: {buffer_usage:.0f}%", end='', flush=True)

        except Exception as e:
            print(f"🎙️ 输入进程音频处理错误: {e}")

    def _update_pre_record_buffer(self, audio_data: bytes):
        """更新预录音缓冲区"""
        self.pre_record_buffer.append(audio_data)

        # 保持缓冲区大小
        if len(self.pre_record_buffer) > self.pre_record_max_frames:
            self.pre_record_buffer.pop(0)

    def _start_recording(self):
        """开始录音"""
        if not self.recording_enabled:
            return

        self.is_recording = True
        self.recording_buffer = []
        self.recording_start_time = time.time()
        self.silence_start_time = None
        self.consecutive_silence_count = 0
        self.consecutive_low_zcr_count = 0

        # 将预录音缓冲区的内容添加到录音中
        self.recording_buffer.extend(self.pre_record_buffer)
        self.pre_record_buffer.clear()

        print(f"🎙️ 输入进程：开始录音（包含预录音 {self.config['pre_record_duration']}秒）")

    def _stop_recording(self):
        """停止录音并发送数据"""
        if not self.is_recording:
            return

        self.is_recording = False

        # 合并录音数据
        if self.recording_buffer:
            audio_data = b''.join(self.recording_buffer)
            duration = len(audio_data) / (self.RATE * 2)

            # 创建音频片段
            segment = AudioSegment(
                audio_data=audio_data,
                start_time=self.recording_start_time,
                end_time=time.time(),
                duration=duration,
                metadata={
                    'sample_rate': self.RATE,
                    'channels': self.CHANNELS,
                    'format': self.FORMAT,
                    'chunk_size': self.CHUNK_SIZE
                }
            )

            # 保存录音文件（可选）
            filename = self._save_recording(audio_data)

            # 发送给主进程
            self.event_queue.put(ProcessEvent(
                event_type='recording_complete',
                data=audio_data,
                metadata={
                    'duration': duration,
                    'start_time': self.recording_start_time,
                    'filename': filename
                }
            ))

            print(f"📝 输入进程：录音完成，时长 {duration:.2f} 秒")

        # 清空缓冲区
        self.recording_buffer = []
        self.pre_record_buffer = []

    def _save_recording(self, audio_data: bytes) -> str:
        """保存录音文件"""
        try:
            timestamp = time.strftime("%Y%m%d_%H%M%S")
            filename = f"recording_{timestamp}.wav"

            with wave.open(filename, 'wb') as wf:
                wf.setnchannels(self.CHANNELS)
                wf.setsampwidth(self.audio.get_sample_size(self.FORMAT))
                wf.setframerate(self.RATE)
                wf.writeframes(audio_data)

            print(f"💾 输入进程：录音已保存到 {filename}")
            return filename

        except Exception as e:
            print(f"❌ 输入进程保存录音失败: {e}")
            return None

    def _calculate_zcr(self, audio_data: bytes) -> float:
        """计算零交叉率"""
        if len(audio_data) == 0:
            return 0

        audio_array = np.frombuffer(audio_data, dtype=np.int16)

        # 计算零交叉次数
        zero_crossings = np.sum(np.diff(np.sign(audio_array)) != 0)

        # 归一化到采样率
        zcr = zero_crossings / len(audio_array) * self.RATE

        # 更新ZCR历史
        self.zcr_history.append(zcr)
        if len(self.zcr_history) > self.max_zcr_history:
            self.zcr_history.pop(0)

        return zcr

    def _is_voice_active(self, zcr: float) -> bool:
        """基于ZCR判断是否为语音活动"""
        # 简单的ZCR范围检测，匹配recorder.py的实现
        return 2400 < zcr < 12000

    def _cleanup(self):
        """清理资源"""
        if self.input_stream:
            try:
                self.input_stream.stop_stream()
                self.input_stream.close()
            except:
                pass

        if self.audio:
            try:
                self.audio.terminate()
            except:
                pass

class OutputProcess:
    """输出进程 - 专门负责音频播放"""

    def __init__(self, audio_queue: mp.Queue, config: Dict[str, Any] = None):
        self.audio_queue = audio_queue          # 主进程 → 输出进程
        self.config = config or self._get_default_config()

        # 音频播放参数
        self.FORMAT = pyaudio.paInt16
        self.CHANNELS = 1
        self.RATE = 16000
        self.CHUNK_SIZE = 512

        # 播放状态
        self.is_playing = False
        self.playback_buffer = []
        self.total_chunks_played = 0
        self.total_audio_size = 0

        # PyAudio实例
        self.audio = None
        self.output_stream = None

        # 运行状态
        self.running = True

    def _get_default_config(self) -> Dict[str, Any]:
        """获取默认配置"""
        return {
            'buffer_size': 1000,
            'show_progress': True,
            'progress_interval': 100
        }

    def run(self):
        """输出进程主循环"""
        print("🔊 输出进程启动")
        self._setup_audio()

        try:
            while self.running:
                # 处理音频队列
                self._process_audio_queue()

                # 播放缓冲的音频
                self._play_audio()

                # 显示播放进度
                self._show_progress()

                time.sleep(0.001)  # 极短休眠，确保流畅播放

        except KeyboardInterrupt:
            print("🔊 输出进程收到中断信号")
        except Exception as e:
            print(f"❌ 输出进程错误: {e}")
        finally:
            self._cleanup()
            print("🔊 输出进程退出")

    def _setup_audio(self):
        """设置音频输出设备"""
        try:
            self.audio = pyaudio.PyAudio()
            self.output_stream = self.audio.open(
                format=self.FORMAT,
                channels=self.CHANNELS,
                rate=self.RATE,
                output=True,
                frames_per_buffer=self.CHUNK_SIZE
            )
            print("🔊 输出进程：音频设备初始化成功")
        except Exception as e:
            print(f"❌ 输出进程音频设备初始化失败: {e}")
            raise

    def _process_audio_queue(self):
        """处理来自主进程的音频数据"""
        try:
            while True:
                audio_data = self.audio_queue.get_nowait()

                if audio_data is None:
                    # 结束信号
                    self._finish_playback()
                    break

                if isinstance(audio_data, str) and audio_data.startswith("METADATA:"):
                    # 处理元数据
                    metadata = audio_data[9:]  # 移除 "METADATA:" 前缀
                    print(f"📝 输出进程：播放元数据 {metadata}")
                    continue

                # 音频数据放入播放缓冲区
                self.playback_buffer.append(audio_data)
                if not self.is_playing:
                    self.is_playing = True
                    print("🔊 输出进程：开始播放音频")

        except queue.Empty:
            pass

    def _play_audio(self):
        """播放音频数据"""
        if self.playback_buffer and self.output_stream:
            try:
                # 取出一块音频数据播放
                audio_chunk = self.playback_buffer.pop(0)
                if audio_chunk and len(audio_chunk) > 0:
                    self.output_stream.write(audio_chunk)
                    self.total_chunks_played += 1
                    self.total_audio_size += len(audio_chunk)

            except Exception as e:
                print(f"❌ 输出进程播放错误: {e}")
                self.playback_buffer.clear()

    def _show_progress(self):
        """显示播放进度"""
        if (self.config['show_progress'] and
            self.total_chunks_played > 0 and
            self.total_chunks_played % self.config['progress_interval'] == 0):

            progress = f"🔊 播放进度: {self.total_chunks_played} 块 | {self.total_audio_size / 1024:.1f} KB"
            print(f"\r{progress}", end='', flush=True)

    def _finish_playback(self):
        """完成播放"""
        self.is_playing = False
        self.playback_buffer.clear()

        if self.total_chunks_played > 0:
            print(f"\n✅ 输出进程：播放完成，总计 {self.total_chunks_played} 块, {self.total_audio_size / 1024:.1f} KB")

        # 重置统计
        self.total_chunks_played = 0
        self.total_audio_size = 0

        # 通知主进程播放完成
        # 这里可以通过共享内存或另一个队列来实现
        # 暂时简化处理，由主进程通过队列大小判断

    def _cleanup(self):
        """清理资源"""
        if self.output_stream:
            try:
                self.output_stream.stop_stream()
                self.output_stream.close()
            except:
                pass

        if self.audio:
            try:
                self.audio.terminate()
            except:
                pass

if __name__ == "__main__":
    # 测试代码
    print("音频进程模块测试")
    print("这个模块应该在多进程环境中运行")