add pptx convert
Some checks failed
sync2gitee / repo-sync (push) Has been cancelled
Typos Check / Spell Check with Typos (push) Has been cancelled

This commit is contained in:
朱潮 2025-12-21 16:29:38 +08:00
parent 88a04fda5a
commit 86d86d9ff3
3 changed files with 93 additions and 24 deletions

View File

@ -131,6 +131,11 @@ class DocumentConverter:
True if conversion successful, False otherwise True if conversion successful, False otherwise
""" """
try: try:
# Try direct conversion first, fallback to xvfb if needed
use_xvfb = os.getenv('USE_XVFB', 'auto').lower()
if use_xvfb == 'auto':
# Try direct conversion first
cmd = [ cmd = [
self.config.libreoffice_path, self.config.libreoffice_path,
'--headless', '--headless',
@ -138,8 +143,35 @@ class DocumentConverter:
'--outdir', temp_dir, '--outdir', temp_dir,
ppt_path ppt_path
] ]
use_xvfb_now = False
fallback_to_xvfb = True
elif use_xvfb == 'true':
# Force use xvfb
cmd = [
'xvfb-run',
'--auto-servernum',
'--server-args=-screen 0, 1024x768x24',
self.config.libreoffice_path,
'--headless',
'--convert-to', 'pdf',
'--outdir', temp_dir,
ppt_path
]
use_xvfb_now = True
fallback_to_xvfb = False
else:
# No xvfb
cmd = [
self.config.libreoffice_path,
'--headless',
'--convert-to', 'pdf',
'--outdir', temp_dir,
ppt_path
]
use_xvfb_now = False
fallback_to_xvfb = False
self.logger.info(f"mineru-converter: executing LibreOffice conversion") self.logger.info(f"mineru-converter: executing LibreOffice conversion (use_xvfb={use_xvfb}, fallback={fallback_to_xvfb})")
# Run with timeout # Run with timeout
process = await asyncio.create_subprocess_exec( process = await asyncio.create_subprocess_exec(
@ -156,6 +188,45 @@ class DocumentConverter:
if process.returncode != 0: if process.returncode != 0:
self.logger.warning(f"mineru-converter: LibreOffice returned non-zero: {stderr.decode()}") self.logger.warning(f"mineru-converter: LibreOffice returned non-zero: {stderr.decode()}")
# If direct conversion failed and we have xvfb fallback enabled
if fallback_to_xvfb and not use_xvfb_now:
self.logger.info(f"mineru-converter: direct conversion failed, retrying with xvfb")
fallback_cmd = [
'xvfb-run',
'--auto-servernum',
'--server-args=-screen 0, 1024x768x24',
self.config.libreoffice_path,
'--headless',
'--convert-to', 'pdf',
'--outdir', temp_dir,
ppt_path
]
# Try again with xvfb
process_xvfb = await asyncio.create_subprocess_exec(
*fallback_cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
try:
stdout_xvfb, stderr_xvfb = await asyncio.wait_for(
process_xvfb.communicate(),
timeout=self.config.conversion_timeout
)
if process_xvfb.returncode == 0:
self.logger.info(f"mineru-converter: LibreOffice conversion with xvfb completed successfully")
return True
else:
self.logger.warning(f"mineru-converter: LibreOffice with xvfb also failed: {stderr_xvfb.decode()}")
return False
except asyncio.TimeoutError:
self.logger.warning(f"mineru-converter: LibreOffice with xvfb conversion timeout")
process_xvfb.kill()
return False
else:
return False return False
self.logger.info(f"mineru-converter: LibreOffice conversion completed successfully") self.logger.info(f"mineru-converter: LibreOffice conversion completed successfully")

View File

@ -76,24 +76,18 @@ ENV MAXKB_VERSION="${DOCKER_IMAGE_TAG} (build at ${BUILD_AT}, commit: ${GITHUB_C
PIP_TARGET=/opt/maxkb/python-packages PIP_TARGET=/opt/maxkb/python-packages
# Install poppler-utils for PDF processing (required by MinerU) # Install poppler-utils for PDF processing (required by MinerU)
# Install X11 libraries for LibreOffice (required for headless operation) # Install essential libraries for LibreOffice headless operation
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y --no-install-recommends \ apt-get install -y --no-install-recommends \
poppler-utils \ poppler-utils \
libxinerama1 \ libxinerama1 \
libxi6 \ libdbus-1-3 \
libxrender1 \
libxtst6 \
libxrandr2 \
libxext6 \
libxfixes3 \
libxcursor1 \
libxcomposite1 \
libxdamage1 \
libxss1 \
libxt6 \
libsm6 \ libsm6 \
libice6 && \ libice6 \
libxt6 \
libglib2.0-0 \
libcups2 \
xvfb && \
apt-get clean all && \ apt-get clean all && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*

View File

@ -33,6 +33,10 @@ services:
# LibreOffice 路径配置 # LibreOffice 路径配置
LIBREOFFICE_PATH: "soffice" LIBREOFFICE_PATH: "soffice"
# LibreOffice headless mode 环境变量
DISPLAY: ":99"
# xvfb 模式: auto(自动检测是否需要), true(强制使用), false(不使用)
USE_XVFB: "auto"
# MINERU_API_TYPE: "cloud" # MINERU_API_TYPE: "cloud"
# MINERU_API_URL: "https://mineru.net" # MINERU_API_URL: "https://mineru.net"