maxkb/apps/common/handle/impl/media/media_adapter/adapter.py
2025-08-31 11:16:33 +08:00

145 lines
5.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
"""
音视频处理适配器
复用MaxKB的模型系统保持模块独立性
"""
import os
import json
import tempfile
from typing import Dict, List, Optional, Any
from concurrent.futures import ThreadPoolExecutor
class MediaAdapter:
"""
音视频处理适配器
复用MaxKB的模型系统保持模块独立性
"""
def __init__(self, logger=None):
self.logger = logger or self._get_default_logger()
from .config import MediaConfig
self.config = MediaConfig()
def _get_default_logger(self):
"""获取默认logger"""
try:
from common.utils.logger import maxkb_logger
from .logger import MediaLogger
return MediaLogger(maxkb_logger)
except:
import logging
from .logger import MediaLogger
return MediaLogger(logging.getLogger('MediaAdapter'))
def process_media(self,
file_content: bytes,
file_name: str,
stt_model_id: Optional[str] = None,
llm_model_id: Optional[str] = None,
workspace_id: Optional[str] = None,
options: Dict[str, Any] = None) -> Dict:
"""
处理音视频文件
Args:
file_content: 文件内容
file_name: 文件名
stt_model_id: STT模型IDMaxKB系统中的
llm_model_id: LLM模型ID用于文本优化可选
workspace_id: 工作空间ID
options: 其他选项
- language: 语言zh/en/auto
- segment_duration: 分段时长(秒)
- enable_punctuation: 是否添加标点
- enable_summary: 是否生成摘要
Returns:
{
'status': 'success',
'media_type': 'audio/video',
'duration': 120.5,
'segments': [
{
'index': 0,
'start_time': 0,
'end_time': 60,
'text': '转写文本',
'enhanced_text': '优化后的文本',
'summary': '段落摘要'
}
],
'full_text': '完整文本',
'metadata': {
'stt_model': 'model_name',
'language': 'zh',
'processing_time': 10.5
}
}
"""
options = options or {}
self.logger.info(f"开始处理媒体文件: {file_name}")
self.logger.info(f"接收到的参数:")
self.logger.info(f" - stt_model_id: {stt_model_id}")
self.logger.info(f" - workspace_id: {workspace_id}")
self.logger.info(f" - llm_model_id: {llm_model_id}")
self.logger.info(f" - options: {options}")
self.logger.info(f" - enable_summary in options: {options.get('enable_summary')}")
try:
# 判断媒体类型
media_type = self._detect_media_type(file_name)
# 获取STT模型实例
stt_model = None
if stt_model_id and workspace_id:
try:
from models_provider.tools import get_model_instance_by_model_workspace_id
stt_model = get_model_instance_by_model_workspace_id(stt_model_id, workspace_id)
self.logger.info(f"成功获取STT模型实例: {stt_model}")
except Exception as e:
self.logger.error(f"获取STT模型失败: {str(e)}")
else:
self.logger.warning(f"STT模型未配置 - stt_model_id: {stt_model_id}, workspace_id: {workspace_id}")
# 获取LLM模型实例可选
llm_model = None
if llm_model_id and workspace_id:
try:
from models_provider.tools import get_model_instance_by_model_workspace_id
llm_model = get_model_instance_by_model_workspace_id(llm_model_id, workspace_id)
self.logger.info(f"使用LLM模型: {llm_model_id}")
except Exception as e:
self.logger.warning(f"获取LLM模型失败: {str(e)}")
# 处理文件
if media_type == 'video':
from .processors.video_processor import VideoProcessor
processor = VideoProcessor(self.config, self.logger)
else:
from .processors.audio_processor import AudioProcessor
processor = AudioProcessor(self.config, self.logger)
result = processor.process(
file_content=file_content,
file_name=file_name,
stt_model=stt_model,
llm_model=llm_model,
options=options
)
self.logger.info(f"媒体文件处理成功: {file_name}")
return result
except Exception as e:
self.logger.error(f"处理媒体文件失败: {str(e)}")
raise
def _detect_media_type(self, file_name: str) -> str:
"""检测媒体类型"""
file_ext = file_name.lower().split('.')[-1]
video_exts = {'mp4', 'avi', 'mov', 'mkv', 'webm', 'flv', 'wmv'}
if file_ext in video_exts:
return 'video'
return 'audio'