docker platform adapter
This commit is contained in:
parent
4a3147db56
commit
36da5e1bf3
@ -703,6 +703,9 @@ class MinerUAPIClient:
|
|||||||
|
|
||||||
# Step 1: Upload file to accessible URL
|
# Step 1: Upload file to accessible URL
|
||||||
file_url = await self._upload_file_to_accessible_url(pdf_path, src_fileid)
|
file_url = await self._upload_file_to_accessible_url(pdf_path, src_fileid)
|
||||||
|
self.logger.info(f"mineru-api: uploaded file URL: {file_url}")
|
||||||
|
if not file_url.startswith(('http://', 'https://')):
|
||||||
|
self.logger.warning(f"mineru-api: URL may not be valid for Cloud API: {file_url}")
|
||||||
# Step 2: Create task for full document
|
# Step 2: Create task for full document
|
||||||
task_id = await self._create_mineru_task_full_document(file_url, src_fileid)
|
task_id = await self._create_mineru_task_full_document(file_url, src_fileid)
|
||||||
|
|
||||||
|
|||||||
@ -111,7 +111,7 @@ class MaxKBAdapter(PlatformAdapter):
|
|||||||
result_url = f"/storage/{relative_path}"
|
result_url = f"/storage/{relative_path}"
|
||||||
|
|
||||||
logger.info(f"MaxKB: Copied file {file_path} -> {dest_path}")
|
logger.info(f"MaxKB: Copied file {file_path} -> {dest_path}")
|
||||||
logger.debug(f"MaxKB: Returning URL: {result_url}")
|
logger.info(f"MaxKB: Returning URL: {result_url}")
|
||||||
|
|
||||||
return result_url
|
return result_url
|
||||||
|
|
||||||
|
|||||||
@ -178,7 +178,7 @@ class ParallelMinerUProcessor:
|
|||||||
document_batch_info = {} # {src_fileid: {'batch_size': int, 'total_pages': int}}
|
document_batch_info = {} # {src_fileid: {'batch_size': int, 'total_pages': int}}
|
||||||
|
|
||||||
# Initialize API client
|
# Initialize API client
|
||||||
async with MinerUAPIClient(self.config) as api_client:
|
async with MinerUAPIClient(self.config, self.platform_adapter) as api_client:
|
||||||
while not self.shutdown_event.is_set():
|
while not self.shutdown_event.is_set():
|
||||||
try:
|
try:
|
||||||
# Get task from queue (timeout to check shutdown)
|
# Get task from queue (timeout to check shutdown)
|
||||||
|
|||||||
121
test_url_fix.py
Normal file
121
test_url_fix.py
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
测试URL修复 - 验证platform_adapter是否正确传递
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import asyncio
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Add paths
|
||||||
|
project_root = Path(__file__).parent
|
||||||
|
sys.path.insert(0, str(project_root))
|
||||||
|
apps_path = project_root / 'apps'
|
||||||
|
if apps_path.exists():
|
||||||
|
sys.path.insert(0, str(apps_path))
|
||||||
|
|
||||||
|
# Set environment variables for testing
|
||||||
|
os.environ['MAXKB_BASE_URL'] = 'http://xbase.aitravelmaster.com'
|
||||||
|
os.environ['MINERU_API_TYPE'] = 'cloud' # Force cloud mode for testing
|
||||||
|
|
||||||
|
async def test_url_generation():
|
||||||
|
"""Test that URLs are generated correctly"""
|
||||||
|
|
||||||
|
# Import after setting environment
|
||||||
|
from apps.common.handle.impl.mineru.maxkb_adapter.adapter import MaxKBAdapter
|
||||||
|
|
||||||
|
# Create adapter
|
||||||
|
adapter = MaxKBAdapter()
|
||||||
|
|
||||||
|
# Create a test file
|
||||||
|
import tempfile
|
||||||
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.pdf', delete=False) as f:
|
||||||
|
f.write('test')
|
||||||
|
test_file = f.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Test upload_file
|
||||||
|
print("Testing MaxKBAdapter.upload_file()...")
|
||||||
|
url = await adapter.upload_file(test_file, ['test_knowledge_id'])
|
||||||
|
|
||||||
|
print(f"\n✅ Generated URL: {url}")
|
||||||
|
|
||||||
|
# Verify URL format
|
||||||
|
if url.startswith('http://') or url.startswith('https://'):
|
||||||
|
print("✅ URL is properly formatted for Cloud API")
|
||||||
|
else:
|
||||||
|
print(f"❌ URL is not valid for Cloud API: {url}")
|
||||||
|
|
||||||
|
# Check if MAXKB_BASE_URL is used
|
||||||
|
base_url = os.environ.get('MAXKB_BASE_URL', '')
|
||||||
|
if base_url and url.startswith(base_url):
|
||||||
|
print(f"✅ URL correctly uses MAXKB_BASE_URL: {base_url}")
|
||||||
|
else:
|
||||||
|
print(f"❌ URL does not use MAXKB_BASE_URL")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Clean up
|
||||||
|
if os.path.exists(test_file):
|
||||||
|
os.unlink(test_file)
|
||||||
|
|
||||||
|
async def test_api_client_with_adapter():
|
||||||
|
"""Test that MinerUAPIClient receives platform_adapter correctly"""
|
||||||
|
|
||||||
|
from apps.common.handle.impl.mineru.api_client import MinerUAPIClient
|
||||||
|
from apps.common.handle.impl.mineru.maxkb_adapter.adapter import MaxKBAdapter
|
||||||
|
from apps.common.handle.impl.mineru.maxkb_adapter.config_maxkb import MaxKBMinerUConfig
|
||||||
|
|
||||||
|
print("\nTesting MinerUAPIClient with platform_adapter...")
|
||||||
|
|
||||||
|
# Create components
|
||||||
|
adapter = MaxKBAdapter()
|
||||||
|
config = MaxKBMinerUConfig()
|
||||||
|
|
||||||
|
# Create API client with adapter
|
||||||
|
api_client = MinerUAPIClient(config, adapter)
|
||||||
|
|
||||||
|
# Check if adapter is set
|
||||||
|
if api_client.platform_adapter is not None:
|
||||||
|
print("✅ platform_adapter is correctly set in MinerUAPIClient")
|
||||||
|
else:
|
||||||
|
print("❌ platform_adapter is None in MinerUAPIClient")
|
||||||
|
|
||||||
|
# Test _upload_file_to_accessible_url
|
||||||
|
import tempfile
|
||||||
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.pdf', delete=False) as f:
|
||||||
|
f.write('test')
|
||||||
|
test_file = f.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Test upload through API client
|
||||||
|
async with api_client:
|
||||||
|
url = await api_client._upload_file_to_accessible_url(test_file, 'test_src_id')
|
||||||
|
print(f"✅ URL from _upload_file_to_accessible_url: {url}")
|
||||||
|
|
||||||
|
if url.startswith('http://') or url.startswith('https://'):
|
||||||
|
print("✅ API client generates valid URL for Cloud API")
|
||||||
|
else:
|
||||||
|
print(f"❌ API client generates invalid URL: {url}")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
if os.path.exists(test_file):
|
||||||
|
os.unlink(test_file)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print("=" * 60)
|
||||||
|
print("Testing MinerU Cloud API URL Fix")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
# Check environment
|
||||||
|
print("\nEnvironment:")
|
||||||
|
print(f"MAXKB_BASE_URL: {os.environ.get('MAXKB_BASE_URL', 'NOT SET')}")
|
||||||
|
print(f"MINERU_API_TYPE: {os.environ.get('MINERU_API_TYPE', 'NOT SET')}")
|
||||||
|
|
||||||
|
# Run tests
|
||||||
|
asyncio.run(test_url_generation())
|
||||||
|
asyncio.run(test_api_client_with_adapter())
|
||||||
|
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("Test completed!")
|
||||||
|
print("=" * 60)
|
||||||
94
test_url_simple.py
Normal file
94
test_url_simple.py
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
简单测试URL生成逻辑
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
import shutil
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
# 设置环境变量
|
||||||
|
os.environ['MAXKB_BASE_URL'] = 'http://xbase.aitravelmaster.com'
|
||||||
|
|
||||||
|
def test_url_generation():
|
||||||
|
"""模拟adapter.py中的upload_file逻辑"""
|
||||||
|
|
||||||
|
# 创建测试文件
|
||||||
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.pdf', delete=False) as f:
|
||||||
|
f.write('test')
|
||||||
|
file_path = f.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 模拟upload_file的逻辑
|
||||||
|
storage_path = '/tmp/storage' # 模拟存储路径
|
||||||
|
|
||||||
|
# 创建存储目录
|
||||||
|
sub_dir = 'mineru'
|
||||||
|
storage_dir = os.path.join(storage_path, sub_dir, 'images')
|
||||||
|
os.makedirs(storage_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# 生成文件名
|
||||||
|
file_ext = os.path.splitext(file_path)[1]
|
||||||
|
file_name = f"{uuid.uuid4().hex}{file_ext}"
|
||||||
|
dest_path = os.path.join(storage_dir, file_name)
|
||||||
|
|
||||||
|
# 复制文件
|
||||||
|
shutil.copy2(file_path, dest_path)
|
||||||
|
|
||||||
|
# 生成URL(这是关键部分)
|
||||||
|
relative_path = os.path.relpath(dest_path, storage_path)
|
||||||
|
relative_path = relative_path.replace(os.path.sep, '/')
|
||||||
|
|
||||||
|
# 检查环境变量
|
||||||
|
base_url = os.getenv('MAXKB_BASE_URL', '')
|
||||||
|
print(f"MAXKB_BASE_URL from env: '{base_url}'")
|
||||||
|
print(f"Relative path: {relative_path}")
|
||||||
|
|
||||||
|
if base_url:
|
||||||
|
result_url = f"{base_url.rstrip('/')}/storage/{relative_path}"
|
||||||
|
print(f"✅ Generated full URL: {result_url}")
|
||||||
|
else:
|
||||||
|
result_url = f"/storage/{relative_path}"
|
||||||
|
print(f"⚠️ Generated relative URL: {result_url}")
|
||||||
|
|
||||||
|
# 验证URL格式
|
||||||
|
if result_url.startswith(('http://', 'https://')):
|
||||||
|
print("✅ URL is valid for Cloud API")
|
||||||
|
else:
|
||||||
|
print("❌ URL is NOT valid for Cloud API (must start with http:// or https://)")
|
||||||
|
|
||||||
|
return result_url
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# 清理
|
||||||
|
if os.path.exists(file_path):
|
||||||
|
os.unlink(file_path)
|
||||||
|
# 清理存储目录
|
||||||
|
if os.path.exists('/tmp/storage'):
|
||||||
|
shutil.rmtree('/tmp/storage')
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print("=" * 60)
|
||||||
|
print("Testing URL Generation Logic")
|
||||||
|
print("=" * 60)
|
||||||
|
print()
|
||||||
|
|
||||||
|
# 测试1:有MAXKB_BASE_URL
|
||||||
|
print("Test 1: With MAXKB_BASE_URL set")
|
||||||
|
print("-" * 40)
|
||||||
|
url1 = test_url_generation()
|
||||||
|
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
|
||||||
|
# 测试2:没有MAXKB_BASE_URL
|
||||||
|
print("\nTest 2: Without MAXKB_BASE_URL")
|
||||||
|
print("-" * 40)
|
||||||
|
os.environ['MAXKB_BASE_URL'] = ''
|
||||||
|
url2 = test_url_generation()
|
||||||
|
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("Summary:")
|
||||||
|
print(f"With MAXKB_BASE_URL: {url1}")
|
||||||
|
print(f"Without MAXKB_BASE_URL: {url2}")
|
||||||
|
print("=" * 60)
|
||||||
Loading…
Reference in New Issue
Block a user