145 lines
5.5 KiB
Python
145 lines
5.5 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
音视频处理适配器
|
||
复用MaxKB的模型系统,保持模块独立性
|
||
"""
|
||
import os
|
||
import json
|
||
import tempfile
|
||
from typing import Dict, List, Optional, Any
|
||
from concurrent.futures import ThreadPoolExecutor
|
||
|
||
class MediaAdapter:
|
||
"""
|
||
音视频处理适配器
|
||
复用MaxKB的模型系统,保持模块独立性
|
||
"""
|
||
|
||
def __init__(self, logger=None):
|
||
self.logger = logger or self._get_default_logger()
|
||
from .config import MediaConfig
|
||
self.config = MediaConfig()
|
||
|
||
def _get_default_logger(self):
|
||
"""获取默认logger"""
|
||
try:
|
||
from common.utils.logger import maxkb_logger
|
||
from .logger import MediaLogger
|
||
return MediaLogger(maxkb_logger)
|
||
except:
|
||
import logging
|
||
from .logger import MediaLogger
|
||
return MediaLogger(logging.getLogger('MediaAdapter'))
|
||
|
||
def process_media(self,
|
||
file_content: bytes,
|
||
file_name: str,
|
||
stt_model_id: Optional[str] = None,
|
||
llm_model_id: Optional[str] = None,
|
||
workspace_id: Optional[str] = None,
|
||
options: Dict[str, Any] = None) -> Dict:
|
||
"""
|
||
处理音视频文件
|
||
|
||
Args:
|
||
file_content: 文件内容
|
||
file_name: 文件名
|
||
stt_model_id: STT模型ID(MaxKB系统中的)
|
||
llm_model_id: LLM模型ID(用于文本优化,可选)
|
||
workspace_id: 工作空间ID
|
||
options: 其他选项
|
||
- language: 语言(zh/en/auto)
|
||
- segment_duration: 分段时长(秒)
|
||
- enable_punctuation: 是否添加标点
|
||
- enable_summary: 是否生成摘要
|
||
|
||
Returns:
|
||
{
|
||
'status': 'success',
|
||
'media_type': 'audio/video',
|
||
'duration': 120.5,
|
||
'segments': [
|
||
{
|
||
'index': 0,
|
||
'start_time': 0,
|
||
'end_time': 60,
|
||
'text': '转写文本',
|
||
'enhanced_text': '优化后的文本',
|
||
'summary': '段落摘要'
|
||
}
|
||
],
|
||
'full_text': '完整文本',
|
||
'metadata': {
|
||
'stt_model': 'model_name',
|
||
'language': 'zh',
|
||
'processing_time': 10.5
|
||
}
|
||
}
|
||
"""
|
||
|
||
options = options or {}
|
||
self.logger.info(f"开始处理媒体文件: {file_name}")
|
||
self.logger.info(f"接收到的参数:")
|
||
self.logger.info(f" - stt_model_id: {stt_model_id}")
|
||
self.logger.info(f" - workspace_id: {workspace_id}")
|
||
self.logger.info(f" - llm_model_id: {llm_model_id}")
|
||
self.logger.info(f" - options: {options}")
|
||
self.logger.info(f" - enable_summary in options: {options.get('enable_summary')}")
|
||
|
||
try:
|
||
# 判断媒体类型
|
||
media_type = self._detect_media_type(file_name)
|
||
|
||
# 获取STT模型实例
|
||
stt_model = None
|
||
if stt_model_id and workspace_id:
|
||
try:
|
||
from models_provider.tools import get_model_instance_by_model_workspace_id
|
||
stt_model = get_model_instance_by_model_workspace_id(stt_model_id, workspace_id)
|
||
self.logger.info(f"成功获取STT模型实例: {stt_model}")
|
||
except Exception as e:
|
||
self.logger.error(f"获取STT模型失败: {str(e)}")
|
||
else:
|
||
self.logger.warning(f"STT模型未配置 - stt_model_id: {stt_model_id}, workspace_id: {workspace_id}")
|
||
|
||
# 获取LLM模型实例(可选)
|
||
llm_model = None
|
||
if llm_model_id and workspace_id:
|
||
try:
|
||
from models_provider.tools import get_model_instance_by_model_workspace_id
|
||
llm_model = get_model_instance_by_model_workspace_id(llm_model_id, workspace_id)
|
||
self.logger.info(f"使用LLM模型: {llm_model_id}")
|
||
except Exception as e:
|
||
self.logger.warning(f"获取LLM模型失败: {str(e)}")
|
||
|
||
# 处理文件
|
||
if media_type == 'video':
|
||
from .processors.video_processor import VideoProcessor
|
||
processor = VideoProcessor(self.config, self.logger)
|
||
else:
|
||
from .processors.audio_processor import AudioProcessor
|
||
processor = AudioProcessor(self.config, self.logger)
|
||
|
||
result = processor.process(
|
||
file_content=file_content,
|
||
file_name=file_name,
|
||
stt_model=stt_model,
|
||
llm_model=llm_model,
|
||
options=options
|
||
)
|
||
|
||
self.logger.info(f"媒体文件处理成功: {file_name}")
|
||
return result
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"处理媒体文件失败: {str(e)}")
|
||
raise
|
||
|
||
def _detect_media_type(self, file_name: str) -> str:
|
||
"""检测媒体类型"""
|
||
file_ext = file_name.lower().split('.')[-1]
|
||
video_exts = {'mp4', 'avi', 'mov', 'mkv', 'webm', 'flv', 'wmv'}
|
||
|
||
if file_ext in video_exts:
|
||
return 'video'
|
||
return 'audio' |