midyf model_id
Some checks are pending
sync2gitee / repo-sync (push) Waiting to run
Typos Check / Spell Check with Typos (push) Waiting to run

This commit is contained in:
朱潮 2025-08-26 13:48:07 +08:00
parent edc80888cc
commit 623dda5bb7
6 changed files with 197 additions and 20 deletions

View File

@ -41,6 +41,9 @@ class MinerUImageProcessor:
self.logger = logger
self.image_optimizer = None
self.platform_adapter = None # Will be set by parser if available
# Log the config being used
if hasattr(config, 'llm_model_id') and hasattr(config, 'vision_model_id'):
self.logger.info(f"ImageProcessor initialized with config: LLM={getattr(config, 'llm_model_id', 'N/A')}, Vision={getattr(config, 'vision_model_id', 'N/A')}")
async def initialize(self):
"""Initialize image optimizer"""

View File

@ -175,7 +175,7 @@ class MinerUExtractor(BaseMinerUExtractor):
# 导入并创建MaxKB特定的配置传递模型ID
from .config_maxkb import MaxKBMinerUConfig
config = MaxKBMinerUConfig(llm_model_id=llm_model_id, vision_model_id=vision_model_id)
config = MaxKBMinerUConfig.create(llm_model_id=llm_model_id, vision_model_id=vision_model_id)
# 调用基类初始化传递适配器、配置和MaxKB特有参数
super().__init__(

View File

@ -12,35 +12,36 @@ from ..config_base import MinerUConfig
class MaxKBMinerUConfig(MinerUConfig):
"""MaxKB-specific configuration for MinerU"""
def __init__(self, llm_model_id: str = None, vision_model_id: str = None):
"""Initialize with MaxKB-specific settings"""
# Store the model IDs before calling parent init
self._init_llm_model_id = llm_model_id
self._init_vision_model_id = vision_model_id
# Call parent initialization
super().__init__()
@classmethod
def create(cls, llm_model_id: str = None, vision_model_id: str = None):
"""Factory method to create config with specific model IDs"""
instance = cls()
# Override model IDs after creation
if llm_model_id:
instance.llm_model_id = llm_model_id
if vision_model_id:
instance.vision_model_id = vision_model_id
# Log the configured model IDs
from .logger import get_module_logger
logger = get_module_logger('config_maxkb')
logger.info(f"MaxKBMinerUConfig.create() set LLM={instance.llm_model_id}, Vision={instance.vision_model_id}")
return instance
def __post_init__(self):
"""Initialize with MaxKB-specific settings"""
# Call parent initialization first
super().__post_init__()
# MaxKB specific settings - use provided IDs first, then environment, then defaults
# 优先使用传入的模型ID其次是环境变量最后是默认值
if hasattr(self, '_init_llm_model_id') and self._init_llm_model_id:
self.llm_model_id = self._init_llm_model_id
else:
# MaxKB specific settings from environment or defaults
# 如果环境变量中设置了具体的UUID使用UUID否则使用默认值或自动检测
self.llm_model_id = os.getenv('MAXKB_LLM_MODEL_ID', self._get_default_llm_model_id())
if hasattr(self, '_init_vision_model_id') and self._init_vision_model_id:
self.vision_model_id = self._init_vision_model_id
else:
self.vision_model_id = os.getenv('MAXKB_VISION_MODEL_ID', self._get_default_vision_model_id())
# Log the configured model IDs
from .logger import get_module_logger
logger = get_module_logger('config_maxkb')
logger.info(f"MaxKBMinerUConfig initialized with LLM={self.llm_model_id}, Vision={self.vision_model_id}")
logger.info(f"MaxKBMinerUConfig __post_init__ with LLM={self.llm_model_id}, Vision={self.vision_model_id}")
# MaxKB API settings
self.maxkb_api_key = os.getenv('MAXKB_API_KEY')
@ -137,8 +138,10 @@ class MaxKBMinerUConfig(MinerUConfig):
# Determine which model to use
if use_llm:
model_id = self.llm_model_id
logger.info(f"MaxKB: Using LLM model: {model_id} (self.llm_model_id={self.llm_model_id})")
else:
model_id = self.vision_model_id
logger.info(f"MaxKB: Using Vision model: {model_id} (self.vision_model_id={self.vision_model_id})")
logger.info(f"MaxKB: Calling model {model_id} with {len(messages)} messages, use_llm={use_llm}, model_type={model_type}")

View File

@ -55,6 +55,9 @@ class ParallelProcessorPool:
# Use provided config or create default
if config is None:
config = MinerUConfig()
# Log the config being used
if hasattr(config, 'llm_model_id') and hasattr(config, 'vision_model_id'):
self.logger.info(f"Using config with LLM={getattr(config, 'llm_model_id', 'N/A')}, Vision={getattr(config, 'vision_model_id', 'N/A')}")
processor = ParallelMinerUProcessor(config, learn_type, platform_adapter)
self._processors[learn_type] = processor

67
test_config_simple.py Normal file
View File

@ -0,0 +1,67 @@
#!/usr/bin/env python3
"""
简单测试配置逻辑
"""
# 模拟配置类的行为
class TestConfig:
def __init__(self):
self.llm_model_id = None
self.vision_model_id = None
@classmethod
def create(cls, llm_model_id=None, vision_model_id=None):
instance = cls()
if llm_model_id:
instance.llm_model_id = llm_model_id
if vision_model_id:
instance.vision_model_id = vision_model_id
print(f"Config created with LLM={instance.llm_model_id}, Vision={instance.vision_model_id}")
return instance
def test_model_selection():
"""测试模型选择逻辑"""
TEST_LLM_ID = "0198e029-bfeb-7d43-a6ee-c88662697d3c"
TEST_VISION_ID = "0198e02c-9f2e-7520-a27b-6376ad42d520"
# 创建配置
config = TestConfig.create(
llm_model_id=TEST_LLM_ID,
vision_model_id=TEST_VISION_ID
)
print("\nTest 1: use_llm=False (should use vision model)")
use_llm = False
if use_llm:
model_id = config.llm_model_id
print(f" Using LLM model: {model_id}")
else:
model_id = config.vision_model_id
print(f" Using Vision model: {model_id}")
if model_id == TEST_VISION_ID:
print(f" ✅ Correct! Using vision model ID: {TEST_VISION_ID}")
else:
print(f" ❌ Wrong! Using: {model_id}, Expected: {TEST_VISION_ID}")
print("\nTest 2: use_llm=True (should use LLM model)")
use_llm = True
if use_llm:
model_id = config.llm_model_id
print(f" Using LLM model: {model_id}")
else:
model_id = config.vision_model_id
print(f" Using Vision model: {model_id}")
if model_id == TEST_LLM_ID:
print(f" ✅ Correct! Using LLM model ID: {TEST_LLM_ID}")
else:
print(f" ❌ Wrong! Using: {model_id}, Expected: {TEST_LLM_ID}")
if __name__ == "__main__":
print("=" * 60)
print("Testing Model Selection Logic")
print("=" * 60)
test_model_selection()
print("=" * 60)

101
test_model_config.py Normal file
View File

@ -0,0 +1,101 @@
#!/usr/bin/env python3
"""
测试模型ID配置是否正确传递
"""
import os
import sys
from pathlib import Path
# Add paths
project_root = Path(__file__).parent
sys.path.insert(0, str(project_root))
apps_path = project_root / 'apps'
if apps_path.exists():
sys.path.insert(0, str(apps_path))
# 模拟传入的模型ID
TEST_LLM_ID = "0198e029-bfeb-7d43-a6ee-c88662697d3c"
TEST_VISION_ID = "0198e02c-9f2e-7520-a27b-6376ad42d520"
def test_config_creation():
"""测试配置创建"""
print("=" * 60)
print("Testing MaxKBMinerUConfig creation")
print("=" * 60)
from apps.common.handle.impl.mineru.maxkb_adapter.config_maxkb import MaxKBMinerUConfig
# 方法1直接创建使用默认值或环境变量
print("\n1. Default creation:")
config1 = MaxKBMinerUConfig()
print(f" LLM ID: {config1.llm_model_id}")
print(f" Vision ID: {config1.vision_model_id}")
# 方法2使用工厂方法
print("\n2. Factory method creation:")
config2 = MaxKBMinerUConfig.create(
llm_model_id=TEST_LLM_ID,
vision_model_id=TEST_VISION_ID
)
print(f" LLM ID: {config2.llm_model_id}")
print(f" Vision ID: {config2.vision_model_id}")
# 验证
print("\n3. Verification:")
if config2.llm_model_id == TEST_LLM_ID:
print(" ✅ LLM ID correctly set")
else:
print(f" ❌ LLM ID mismatch: expected {TEST_LLM_ID}, got {config2.llm_model_id}")
if config2.vision_model_id == TEST_VISION_ID:
print(" ✅ Vision ID correctly set")
else:
print(f" ❌ Vision ID mismatch: expected {TEST_VISION_ID}, got {config2.vision_model_id}")
return config2
def test_model_selection():
"""测试模型选择逻辑"""
print("\n" + "=" * 60)
print("Testing model selection logic")
print("=" * 60)
config = MaxKBMinerUConfig.create(
llm_model_id=TEST_LLM_ID,
vision_model_id=TEST_VISION_ID
)
# 模拟 call_litellm 中的逻辑
print("\n1. When use_llm=True:")
use_llm = True
if use_llm:
model_id = config.llm_model_id
else:
model_id = config.vision_model_id
print(f" Selected model ID: {model_id}")
print(f" Expected: {TEST_LLM_ID}")
print(f" Match: {model_id == TEST_LLM_ID}")
print("\n2. When use_llm=False:")
use_llm = False
if use_llm:
model_id = config.llm_model_id
else:
model_id = config.vision_model_id
print(f" Selected model ID: {model_id}")
print(f" Expected: {TEST_VISION_ID}")
print(f" Match: {model_id == TEST_VISION_ID}")
if __name__ == "__main__":
print("Testing Model Configuration")
print("=" * 60)
print(f"Test LLM ID: {TEST_LLM_ID}")
print(f"Test Vision ID: {TEST_VISION_ID}")
config = test_config_creation()
test_model_selection()
print("\n" + "=" * 60)
print("Test completed!")
print("=" * 60)