add pptx convert
This commit is contained in:
parent
88a04fda5a
commit
86d86d9ff3
@ -131,15 +131,47 @@ class DocumentConverter:
|
||||
True if conversion successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
cmd = [
|
||||
self.config.libreoffice_path,
|
||||
'--headless',
|
||||
'--convert-to', 'pdf',
|
||||
'--outdir', temp_dir,
|
||||
ppt_path
|
||||
]
|
||||
|
||||
self.logger.info(f"mineru-converter: executing LibreOffice conversion")
|
||||
# Try direct conversion first, fallback to xvfb if needed
|
||||
use_xvfb = os.getenv('USE_XVFB', 'auto').lower()
|
||||
|
||||
if use_xvfb == 'auto':
|
||||
# Try direct conversion first
|
||||
cmd = [
|
||||
self.config.libreoffice_path,
|
||||
'--headless',
|
||||
'--convert-to', 'pdf',
|
||||
'--outdir', temp_dir,
|
||||
ppt_path
|
||||
]
|
||||
use_xvfb_now = False
|
||||
fallback_to_xvfb = True
|
||||
elif use_xvfb == 'true':
|
||||
# Force use xvfb
|
||||
cmd = [
|
||||
'xvfb-run',
|
||||
'--auto-servernum',
|
||||
'--server-args=-screen 0, 1024x768x24',
|
||||
self.config.libreoffice_path,
|
||||
'--headless',
|
||||
'--convert-to', 'pdf',
|
||||
'--outdir', temp_dir,
|
||||
ppt_path
|
||||
]
|
||||
use_xvfb_now = True
|
||||
fallback_to_xvfb = False
|
||||
else:
|
||||
# No xvfb
|
||||
cmd = [
|
||||
self.config.libreoffice_path,
|
||||
'--headless',
|
||||
'--convert-to', 'pdf',
|
||||
'--outdir', temp_dir,
|
||||
ppt_path
|
||||
]
|
||||
use_xvfb_now = False
|
||||
fallback_to_xvfb = False
|
||||
|
||||
self.logger.info(f"mineru-converter: executing LibreOffice conversion (use_xvfb={use_xvfb}, fallback={fallback_to_xvfb})")
|
||||
|
||||
# Run with timeout
|
||||
process = await asyncio.create_subprocess_exec(
|
||||
@ -156,8 +188,47 @@ class DocumentConverter:
|
||||
|
||||
if process.returncode != 0:
|
||||
self.logger.warning(f"mineru-converter: LibreOffice returned non-zero: {stderr.decode()}")
|
||||
return False
|
||||
|
||||
# If direct conversion failed and we have xvfb fallback enabled
|
||||
if fallback_to_xvfb and not use_xvfb_now:
|
||||
self.logger.info(f"mineru-converter: direct conversion failed, retrying with xvfb")
|
||||
fallback_cmd = [
|
||||
'xvfb-run',
|
||||
'--auto-servernum',
|
||||
'--server-args=-screen 0, 1024x768x24',
|
||||
self.config.libreoffice_path,
|
||||
'--headless',
|
||||
'--convert-to', 'pdf',
|
||||
'--outdir', temp_dir,
|
||||
ppt_path
|
||||
]
|
||||
|
||||
# Try again with xvfb
|
||||
process_xvfb = await asyncio.create_subprocess_exec(
|
||||
*fallback_cmd,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE
|
||||
)
|
||||
|
||||
try:
|
||||
stdout_xvfb, stderr_xvfb = await asyncio.wait_for(
|
||||
process_xvfb.communicate(),
|
||||
timeout=self.config.conversion_timeout
|
||||
)
|
||||
|
||||
if process_xvfb.returncode == 0:
|
||||
self.logger.info(f"mineru-converter: LibreOffice conversion with xvfb completed successfully")
|
||||
return True
|
||||
else:
|
||||
self.logger.warning(f"mineru-converter: LibreOffice with xvfb also failed: {stderr_xvfb.decode()}")
|
||||
return False
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
self.logger.warning(f"mineru-converter: LibreOffice with xvfb conversion timeout")
|
||||
process_xvfb.kill()
|
||||
return False
|
||||
else:
|
||||
return False
|
||||
|
||||
self.logger.info(f"mineru-converter: LibreOffice conversion completed successfully")
|
||||
return True
|
||||
|
||||
|
||||
@ -76,24 +76,18 @@ ENV MAXKB_VERSION="${DOCKER_IMAGE_TAG} (build at ${BUILD_AT}, commit: ${GITHUB_C
|
||||
PIP_TARGET=/opt/maxkb/python-packages
|
||||
|
||||
# Install poppler-utils for PDF processing (required by MinerU)
|
||||
# Install X11 libraries for LibreOffice (required for headless operation)
|
||||
# Install essential libraries for LibreOffice headless operation
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
poppler-utils \
|
||||
libxinerama1 \
|
||||
libxi6 \
|
||||
libxrender1 \
|
||||
libxtst6 \
|
||||
libxrandr2 \
|
||||
libxext6 \
|
||||
libxfixes3 \
|
||||
libxcursor1 \
|
||||
libxcomposite1 \
|
||||
libxdamage1 \
|
||||
libxss1 \
|
||||
libxt6 \
|
||||
libdbus-1-3 \
|
||||
libsm6 \
|
||||
libice6 && \
|
||||
libice6 \
|
||||
libxt6 \
|
||||
libglib2.0-0 \
|
||||
libcups2 \
|
||||
xvfb && \
|
||||
apt-get clean all && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
||||
@ -33,6 +33,10 @@ services:
|
||||
|
||||
# LibreOffice 路径配置
|
||||
LIBREOFFICE_PATH: "soffice"
|
||||
# LibreOffice headless mode 环境变量
|
||||
DISPLAY: ":99"
|
||||
# xvfb 模式: auto(自动检测是否需要), true(强制使用), false(不使用)
|
||||
USE_XVFB: "auto"
|
||||
|
||||
# MINERU_API_TYPE: "cloud"
|
||||
# MINERU_API_URL: "https://mineru.net"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user