157 lines
5.9 KiB
Python
157 lines
5.9 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
音视频处理器 - MaxKB集成层
|
|
"""
|
|
from typing import List
|
|
from common.handle.base_split_handle import BaseSplitHandle
|
|
from common.utils.logger import maxkb_logger
|
|
|
|
class MediaSplitHandle(BaseSplitHandle):
|
|
"""
|
|
音视频处理器 - MaxKB集成层
|
|
"""
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.adapter = None
|
|
|
|
def support(self, file, get_buffer, **kwargs):
|
|
"""检查是否支持该文件类型"""
|
|
file_name = file.name.lower()
|
|
|
|
# 支持的音频格式
|
|
audio_exts = ('.mp3', '.wav', '.m4a', '.flac', '.aac', '.ogg', '.wma')
|
|
# 支持的视频格式
|
|
video_exts = ('.mp4', '.avi', '.mov', '.mkv', '.webm', '.flv', '.wmv')
|
|
|
|
return any(file_name.endswith(ext) for ext in audio_exts + video_exts)
|
|
|
|
def handle(self, file, pattern_list: List, with_filter: bool, limit: int,
|
|
get_buffer, save_image, **kwargs):
|
|
"""处理音视频文件"""
|
|
|
|
maxkb_logger.info(f"MediaSplitHandle.handle called with file: {file.name}")
|
|
maxkb_logger.info(f"kwargs received: {kwargs}")
|
|
|
|
# 初始化适配器
|
|
if not self.adapter:
|
|
from .media_adapter import MediaAdapter
|
|
from .media_adapter.logger import MediaLogger
|
|
logger_wrapper = MediaLogger(maxkb_logger)
|
|
self.adapter = MediaAdapter(logger=logger_wrapper)
|
|
|
|
# 获取文件内容
|
|
buffer = get_buffer(file)
|
|
|
|
# 获取模型ID和工作空间ID
|
|
stt_model_id = kwargs.get('stt_model_id')
|
|
llm_model_id = kwargs.get('llm_model_id')
|
|
workspace_id = kwargs.get('workspace_id')
|
|
|
|
maxkb_logger.info(f"Extracted from kwargs - stt_model_id: {stt_model_id}, llm_model_id: {llm_model_id}, workspace_id: {workspace_id}")
|
|
|
|
# 处理选项
|
|
options = {
|
|
'language': kwargs.get('language', 'auto'),
|
|
'segment_duration': kwargs.get('segment_duration', 300),
|
|
'enable_punctuation': kwargs.get('enable_punctuation', True),
|
|
'enable_summary': kwargs.get('enable_summary', False),
|
|
'extract_keyframes': kwargs.get('extract_keyframes', False)
|
|
}
|
|
|
|
try:
|
|
# 调用适配器处理
|
|
result = self.adapter.process_media(
|
|
file_content=buffer,
|
|
file_name=file.name,
|
|
stt_model_id=stt_model_id,
|
|
llm_model_id=llm_model_id,
|
|
workspace_id=workspace_id,
|
|
options=options
|
|
)
|
|
|
|
# 转换为MaxKB段落格式
|
|
paragraphs = []
|
|
for segment in result.get('segments', []):
|
|
# 使用优化后的文本(如果有)
|
|
text = segment.get('enhanced_text', segment.get('text', ''))
|
|
|
|
# 添加时间戳信息
|
|
if segment.get('start_time') is not None:
|
|
time_info = f"[{self._format_time(segment['start_time'])} - {self._format_time(segment['end_time'])}]"
|
|
text = f"{time_info}\n{text}"
|
|
|
|
# 添加摘要(如果有)
|
|
if segment.get('summary'):
|
|
text = f"{text}\n【摘要】{segment['summary']}"
|
|
|
|
paragraph = {
|
|
'content': text,
|
|
'title': f"段落 {segment.get('index', 0) + 1}",
|
|
'metadata': {
|
|
'start_time': segment.get('start_time'),
|
|
'end_time': segment.get('end_time'),
|
|
'index': segment.get('index')
|
|
}
|
|
}
|
|
|
|
# 如果有关键帧,添加到段落中
|
|
if 'keyframes' in result and segment.get('index', 0) < len(result['keyframes']):
|
|
paragraph['images'] = [result['keyframes'][segment['index']]]
|
|
|
|
paragraphs.append(paragraph)
|
|
|
|
# 应用限制
|
|
if limit > 0:
|
|
paragraphs = paragraphs[:limit]
|
|
|
|
# 添加成功处理的标记
|
|
metadata = result.get('metadata', {})
|
|
metadata['media_processing_status'] = 'success'
|
|
|
|
return {
|
|
'name': file.name,
|
|
'content': paragraphs,
|
|
'metadata': metadata
|
|
}
|
|
|
|
except Exception as e:
|
|
maxkb_logger.error(f"处理音视频文件失败: {str(e)}")
|
|
# 返回错误信息
|
|
return {
|
|
'name': file.name,
|
|
'content': [{
|
|
'content': f'处理失败: {str(e)}',
|
|
'title': '错误'
|
|
}],
|
|
'metadata': {'error': str(e)}
|
|
}
|
|
|
|
def get_content(self, file, save_image):
|
|
"""获取文件内容(用于预览)"""
|
|
try:
|
|
file_name = file.name
|
|
# 判断媒体类型
|
|
file_ext = file_name.lower().split('.')[-1]
|
|
video_exts = {'mp4', 'avi', 'mov', 'mkv', 'webm', 'flv', 'wmv'}
|
|
|
|
if file_ext in video_exts:
|
|
return f"[视频文件: {file_name}]\n\n该文件需要进行音频提取和语音识别处理。"
|
|
else:
|
|
return f"[音频文件: {file_name}]\n\n该文件需要进行语音识别处理。"
|
|
except Exception as e:
|
|
return f"读取文件失败: {str(e)}"
|
|
|
|
def _format_time(self, seconds: float) -> str:
|
|
"""格式化时间"""
|
|
if seconds is None:
|
|
return "00:00"
|
|
|
|
hours = int(seconds // 3600)
|
|
minutes = int((seconds % 3600) // 60)
|
|
secs = int(seconds % 60)
|
|
|
|
if hours > 0:
|
|
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
|
|
else:
|
|
return f"{minutes:02d}:{secs:02d}" |