docker platform adapter
This commit is contained in:
parent
4a3147db56
commit
36da5e1bf3
@ -703,6 +703,9 @@ class MinerUAPIClient:
|
||||
|
||||
# Step 1: Upload file to accessible URL
|
||||
file_url = await self._upload_file_to_accessible_url(pdf_path, src_fileid)
|
||||
self.logger.info(f"mineru-api: uploaded file URL: {file_url}")
|
||||
if not file_url.startswith(('http://', 'https://')):
|
||||
self.logger.warning(f"mineru-api: URL may not be valid for Cloud API: {file_url}")
|
||||
# Step 2: Create task for full document
|
||||
task_id = await self._create_mineru_task_full_document(file_url, src_fileid)
|
||||
|
||||
|
||||
@ -111,7 +111,7 @@ class MaxKBAdapter(PlatformAdapter):
|
||||
result_url = f"/storage/{relative_path}"
|
||||
|
||||
logger.info(f"MaxKB: Copied file {file_path} -> {dest_path}")
|
||||
logger.debug(f"MaxKB: Returning URL: {result_url}")
|
||||
logger.info(f"MaxKB: Returning URL: {result_url}")
|
||||
|
||||
return result_url
|
||||
|
||||
|
||||
@ -178,7 +178,7 @@ class ParallelMinerUProcessor:
|
||||
document_batch_info = {} # {src_fileid: {'batch_size': int, 'total_pages': int}}
|
||||
|
||||
# Initialize API client
|
||||
async with MinerUAPIClient(self.config) as api_client:
|
||||
async with MinerUAPIClient(self.config, self.platform_adapter) as api_client:
|
||||
while not self.shutdown_event.is_set():
|
||||
try:
|
||||
# Get task from queue (timeout to check shutdown)
|
||||
|
||||
121
test_url_fix.py
Normal file
121
test_url_fix.py
Normal file
@ -0,0 +1,121 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
测试URL修复 - 验证platform_adapter是否正确传递
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
# Add paths
|
||||
project_root = Path(__file__).parent
|
||||
sys.path.insert(0, str(project_root))
|
||||
apps_path = project_root / 'apps'
|
||||
if apps_path.exists():
|
||||
sys.path.insert(0, str(apps_path))
|
||||
|
||||
# Set environment variables for testing
|
||||
os.environ['MAXKB_BASE_URL'] = 'http://xbase.aitravelmaster.com'
|
||||
os.environ['MINERU_API_TYPE'] = 'cloud' # Force cloud mode for testing
|
||||
|
||||
async def test_url_generation():
|
||||
"""Test that URLs are generated correctly"""
|
||||
|
||||
# Import after setting environment
|
||||
from apps.common.handle.impl.mineru.maxkb_adapter.adapter import MaxKBAdapter
|
||||
|
||||
# Create adapter
|
||||
adapter = MaxKBAdapter()
|
||||
|
||||
# Create a test file
|
||||
import tempfile
|
||||
with tempfile.NamedTemporaryFile(mode='w', suffix='.pdf', delete=False) as f:
|
||||
f.write('test')
|
||||
test_file = f.name
|
||||
|
||||
try:
|
||||
# Test upload_file
|
||||
print("Testing MaxKBAdapter.upload_file()...")
|
||||
url = await adapter.upload_file(test_file, ['test_knowledge_id'])
|
||||
|
||||
print(f"\n✅ Generated URL: {url}")
|
||||
|
||||
# Verify URL format
|
||||
if url.startswith('http://') or url.startswith('https://'):
|
||||
print("✅ URL is properly formatted for Cloud API")
|
||||
else:
|
||||
print(f"❌ URL is not valid for Cloud API: {url}")
|
||||
|
||||
# Check if MAXKB_BASE_URL is used
|
||||
base_url = os.environ.get('MAXKB_BASE_URL', '')
|
||||
if base_url and url.startswith(base_url):
|
||||
print(f"✅ URL correctly uses MAXKB_BASE_URL: {base_url}")
|
||||
else:
|
||||
print(f"❌ URL does not use MAXKB_BASE_URL")
|
||||
|
||||
finally:
|
||||
# Clean up
|
||||
if os.path.exists(test_file):
|
||||
os.unlink(test_file)
|
||||
|
||||
async def test_api_client_with_adapter():
|
||||
"""Test that MinerUAPIClient receives platform_adapter correctly"""
|
||||
|
||||
from apps.common.handle.impl.mineru.api_client import MinerUAPIClient
|
||||
from apps.common.handle.impl.mineru.maxkb_adapter.adapter import MaxKBAdapter
|
||||
from apps.common.handle.impl.mineru.maxkb_adapter.config_maxkb import MaxKBMinerUConfig
|
||||
|
||||
print("\nTesting MinerUAPIClient with platform_adapter...")
|
||||
|
||||
# Create components
|
||||
adapter = MaxKBAdapter()
|
||||
config = MaxKBMinerUConfig()
|
||||
|
||||
# Create API client with adapter
|
||||
api_client = MinerUAPIClient(config, adapter)
|
||||
|
||||
# Check if adapter is set
|
||||
if api_client.platform_adapter is not None:
|
||||
print("✅ platform_adapter is correctly set in MinerUAPIClient")
|
||||
else:
|
||||
print("❌ platform_adapter is None in MinerUAPIClient")
|
||||
|
||||
# Test _upload_file_to_accessible_url
|
||||
import tempfile
|
||||
with tempfile.NamedTemporaryFile(mode='w', suffix='.pdf', delete=False) as f:
|
||||
f.write('test')
|
||||
test_file = f.name
|
||||
|
||||
try:
|
||||
# Test upload through API client
|
||||
async with api_client:
|
||||
url = await api_client._upload_file_to_accessible_url(test_file, 'test_src_id')
|
||||
print(f"✅ URL from _upload_file_to_accessible_url: {url}")
|
||||
|
||||
if url.startswith('http://') or url.startswith('https://'):
|
||||
print("✅ API client generates valid URL for Cloud API")
|
||||
else:
|
||||
print(f"❌ API client generates invalid URL: {url}")
|
||||
|
||||
finally:
|
||||
if os.path.exists(test_file):
|
||||
os.unlink(test_file)
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("=" * 60)
|
||||
print("Testing MinerU Cloud API URL Fix")
|
||||
print("=" * 60)
|
||||
|
||||
# Check environment
|
||||
print("\nEnvironment:")
|
||||
print(f"MAXKB_BASE_URL: {os.environ.get('MAXKB_BASE_URL', 'NOT SET')}")
|
||||
print(f"MINERU_API_TYPE: {os.environ.get('MINERU_API_TYPE', 'NOT SET')}")
|
||||
|
||||
# Run tests
|
||||
asyncio.run(test_url_generation())
|
||||
asyncio.run(test_api_client_with_adapter())
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("Test completed!")
|
||||
print("=" * 60)
|
||||
94
test_url_simple.py
Normal file
94
test_url_simple.py
Normal file
@ -0,0 +1,94 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
简单测试URL生成逻辑
|
||||
"""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
import shutil
|
||||
import uuid
|
||||
|
||||
# 设置环境变量
|
||||
os.environ['MAXKB_BASE_URL'] = 'http://xbase.aitravelmaster.com'
|
||||
|
||||
def test_url_generation():
|
||||
"""模拟adapter.py中的upload_file逻辑"""
|
||||
|
||||
# 创建测试文件
|
||||
with tempfile.NamedTemporaryFile(mode='w', suffix='.pdf', delete=False) as f:
|
||||
f.write('test')
|
||||
file_path = f.name
|
||||
|
||||
try:
|
||||
# 模拟upload_file的逻辑
|
||||
storage_path = '/tmp/storage' # 模拟存储路径
|
||||
|
||||
# 创建存储目录
|
||||
sub_dir = 'mineru'
|
||||
storage_dir = os.path.join(storage_path, sub_dir, 'images')
|
||||
os.makedirs(storage_dir, exist_ok=True)
|
||||
|
||||
# 生成文件名
|
||||
file_ext = os.path.splitext(file_path)[1]
|
||||
file_name = f"{uuid.uuid4().hex}{file_ext}"
|
||||
dest_path = os.path.join(storage_dir, file_name)
|
||||
|
||||
# 复制文件
|
||||
shutil.copy2(file_path, dest_path)
|
||||
|
||||
# 生成URL(这是关键部分)
|
||||
relative_path = os.path.relpath(dest_path, storage_path)
|
||||
relative_path = relative_path.replace(os.path.sep, '/')
|
||||
|
||||
# 检查环境变量
|
||||
base_url = os.getenv('MAXKB_BASE_URL', '')
|
||||
print(f"MAXKB_BASE_URL from env: '{base_url}'")
|
||||
print(f"Relative path: {relative_path}")
|
||||
|
||||
if base_url:
|
||||
result_url = f"{base_url.rstrip('/')}/storage/{relative_path}"
|
||||
print(f"✅ Generated full URL: {result_url}")
|
||||
else:
|
||||
result_url = f"/storage/{relative_path}"
|
||||
print(f"⚠️ Generated relative URL: {result_url}")
|
||||
|
||||
# 验证URL格式
|
||||
if result_url.startswith(('http://', 'https://')):
|
||||
print("✅ URL is valid for Cloud API")
|
||||
else:
|
||||
print("❌ URL is NOT valid for Cloud API (must start with http:// or https://)")
|
||||
|
||||
return result_url
|
||||
|
||||
finally:
|
||||
# 清理
|
||||
if os.path.exists(file_path):
|
||||
os.unlink(file_path)
|
||||
# 清理存储目录
|
||||
if os.path.exists('/tmp/storage'):
|
||||
shutil.rmtree('/tmp/storage')
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("=" * 60)
|
||||
print("Testing URL Generation Logic")
|
||||
print("=" * 60)
|
||||
print()
|
||||
|
||||
# 测试1:有MAXKB_BASE_URL
|
||||
print("Test 1: With MAXKB_BASE_URL set")
|
||||
print("-" * 40)
|
||||
url1 = test_url_generation()
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
|
||||
# 测试2:没有MAXKB_BASE_URL
|
||||
print("\nTest 2: Without MAXKB_BASE_URL")
|
||||
print("-" * 40)
|
||||
os.environ['MAXKB_BASE_URL'] = ''
|
||||
url2 = test_url_generation()
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("Summary:")
|
||||
print(f"With MAXKB_BASE_URL: {url1}")
|
||||
print(f"Without MAXKB_BASE_URL: {url2}")
|
||||
print("=" * 60)
|
||||
Loading…
Reference in New Issue
Block a user