# -*- coding: utf-8 -*- """ 音视频处理适配器 复用MaxKB的模型系统,保持模块独立性 """ import os import json import tempfile from typing import Dict, List, Optional, Any from concurrent.futures import ThreadPoolExecutor class MediaAdapter: """ 音视频处理适配器 复用MaxKB的模型系统,保持模块独立性 """ def __init__(self, logger=None): self.logger = logger or self._get_default_logger() from .config import MediaConfig self.config = MediaConfig() def _get_default_logger(self): """获取默认logger""" try: from common.utils.logger import maxkb_logger from .logger import MediaLogger return MediaLogger(maxkb_logger) except: import logging from .logger import MediaLogger return MediaLogger(logging.getLogger('MediaAdapter')) def process_media(self, file_content: bytes, file_name: str, stt_model_id: Optional[str] = None, llm_model_id: Optional[str] = None, workspace_id: Optional[str] = None, options: Dict[str, Any] = None) -> Dict: """ 处理音视频文件 Args: file_content: 文件内容 file_name: 文件名 stt_model_id: STT模型ID(MaxKB系统中的) llm_model_id: LLM模型ID(用于文本优化,可选) workspace_id: 工作空间ID options: 其他选项 - language: 语言(zh/en/auto) - segment_duration: 分段时长(秒) - enable_punctuation: 是否添加标点 - enable_summary: 是否生成摘要 Returns: { 'status': 'success', 'media_type': 'audio/video', 'duration': 120.5, 'segments': [ { 'index': 0, 'start_time': 0, 'end_time': 60, 'text': '转写文本', 'enhanced_text': '优化后的文本', 'summary': '段落摘要' } ], 'full_text': '完整文本', 'metadata': { 'stt_model': 'model_name', 'language': 'zh', 'processing_time': 10.5 } } """ options = options or {} self.logger.info(f"开始处理媒体文件: {file_name}") self.logger.info(f"接收到的参数:") self.logger.info(f" - stt_model_id: {stt_model_id}") self.logger.info(f" - workspace_id: {workspace_id}") self.logger.info(f" - llm_model_id: {llm_model_id}") try: # 判断媒体类型 media_type = self._detect_media_type(file_name) # 获取STT模型实例 stt_model = None if stt_model_id and workspace_id: try: from models_provider.tools import get_model_instance_by_model_workspace_id stt_model = get_model_instance_by_model_workspace_id(stt_model_id, workspace_id) self.logger.info(f"成功获取STT模型实例: {stt_model}") except Exception as e: self.logger.error(f"获取STT模型失败: {str(e)}") else: self.logger.warning(f"STT模型未配置 - stt_model_id: {stt_model_id}, workspace_id: {workspace_id}") # 获取LLM模型实例(可选) llm_model = None if llm_model_id and workspace_id: try: from models_provider.tools import get_model_instance_by_model_workspace_id llm_model = get_model_instance_by_model_workspace_id(llm_model_id, workspace_id) self.logger.info(f"使用LLM模型: {llm_model_id}") except Exception as e: self.logger.warning(f"获取LLM模型失败: {str(e)}") # 处理文件 if media_type == 'video': from .processors.video_processor import VideoProcessor processor = VideoProcessor(self.config, self.logger) else: from .processors.audio_processor import AudioProcessor processor = AudioProcessor(self.config, self.logger) result = processor.process( file_content=file_content, file_name=file_name, stt_model=stt_model, llm_model=llm_model, options=options ) self.logger.info(f"媒体文件处理成功: {file_name}") return result except Exception as e: self.logger.error(f"处理媒体文件失败: {str(e)}") raise def _detect_media_type(self, file_name: str) -> str: """检测媒体类型""" file_ext = file_name.lower().split('.')[-1] video_exts = {'mp4', 'avi', 'mov', 'mkv', 'webm', 'flv', 'wmv'} if file_ext in video_exts: return 'video' return 'audio'