From 86d86d9ff3a8c97d6d88a69e7b8e764a51162392 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E6=BD=AE?= Date: Sun, 21 Dec 2025 16:29:38 +0800 Subject: [PATCH] add pptx convert --- apps/common/handle/impl/mineru/converter.py | 93 ++++++++++++++++++--- installer/Dockerfile | 20 ++--- installer/docker-compose.yml | 4 + 3 files changed, 93 insertions(+), 24 deletions(-) diff --git a/apps/common/handle/impl/mineru/converter.py b/apps/common/handle/impl/mineru/converter.py index 8e718ed2..20610f37 100644 --- a/apps/common/handle/impl/mineru/converter.py +++ b/apps/common/handle/impl/mineru/converter.py @@ -131,15 +131,47 @@ class DocumentConverter: True if conversion successful, False otherwise """ try: - cmd = [ - self.config.libreoffice_path, - '--headless', - '--convert-to', 'pdf', - '--outdir', temp_dir, - ppt_path - ] - - self.logger.info(f"mineru-converter: executing LibreOffice conversion") + # Try direct conversion first, fallback to xvfb if needed + use_xvfb = os.getenv('USE_XVFB', 'auto').lower() + + if use_xvfb == 'auto': + # Try direct conversion first + cmd = [ + self.config.libreoffice_path, + '--headless', + '--convert-to', 'pdf', + '--outdir', temp_dir, + ppt_path + ] + use_xvfb_now = False + fallback_to_xvfb = True + elif use_xvfb == 'true': + # Force use xvfb + cmd = [ + 'xvfb-run', + '--auto-servernum', + '--server-args=-screen 0, 1024x768x24', + self.config.libreoffice_path, + '--headless', + '--convert-to', 'pdf', + '--outdir', temp_dir, + ppt_path + ] + use_xvfb_now = True + fallback_to_xvfb = False + else: + # No xvfb + cmd = [ + self.config.libreoffice_path, + '--headless', + '--convert-to', 'pdf', + '--outdir', temp_dir, + ppt_path + ] + use_xvfb_now = False + fallback_to_xvfb = False + + self.logger.info(f"mineru-converter: executing LibreOffice conversion (use_xvfb={use_xvfb}, fallback={fallback_to_xvfb})") # Run with timeout process = await asyncio.create_subprocess_exec( @@ -156,8 +188,47 @@ class DocumentConverter: if process.returncode != 0: self.logger.warning(f"mineru-converter: LibreOffice returned non-zero: {stderr.decode()}") - return False - + # If direct conversion failed and we have xvfb fallback enabled + if fallback_to_xvfb and not use_xvfb_now: + self.logger.info(f"mineru-converter: direct conversion failed, retrying with xvfb") + fallback_cmd = [ + 'xvfb-run', + '--auto-servernum', + '--server-args=-screen 0, 1024x768x24', + self.config.libreoffice_path, + '--headless', + '--convert-to', 'pdf', + '--outdir', temp_dir, + ppt_path + ] + + # Try again with xvfb + process_xvfb = await asyncio.create_subprocess_exec( + *fallback_cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE + ) + + try: + stdout_xvfb, stderr_xvfb = await asyncio.wait_for( + process_xvfb.communicate(), + timeout=self.config.conversion_timeout + ) + + if process_xvfb.returncode == 0: + self.logger.info(f"mineru-converter: LibreOffice conversion with xvfb completed successfully") + return True + else: + self.logger.warning(f"mineru-converter: LibreOffice with xvfb also failed: {stderr_xvfb.decode()}") + return False + + except asyncio.TimeoutError: + self.logger.warning(f"mineru-converter: LibreOffice with xvfb conversion timeout") + process_xvfb.kill() + return False + else: + return False + self.logger.info(f"mineru-converter: LibreOffice conversion completed successfully") return True diff --git a/installer/Dockerfile b/installer/Dockerfile index 1ad1f421..bdc945e4 100644 --- a/installer/Dockerfile +++ b/installer/Dockerfile @@ -76,24 +76,18 @@ ENV MAXKB_VERSION="${DOCKER_IMAGE_TAG} (build at ${BUILD_AT}, commit: ${GITHUB_C PIP_TARGET=/opt/maxkb/python-packages # Install poppler-utils for PDF processing (required by MinerU) -# Install X11 libraries for LibreOffice (required for headless operation) +# Install essential libraries for LibreOffice headless operation RUN apt-get update && \ apt-get install -y --no-install-recommends \ poppler-utils \ libxinerama1 \ - libxi6 \ - libxrender1 \ - libxtst6 \ - libxrandr2 \ - libxext6 \ - libxfixes3 \ - libxcursor1 \ - libxcomposite1 \ - libxdamage1 \ - libxss1 \ - libxt6 \ + libdbus-1-3 \ libsm6 \ - libice6 && \ + libice6 \ + libxt6 \ + libglib2.0-0 \ + libcups2 \ + xvfb && \ apt-get clean all && \ rm -rf /var/lib/apt/lists/* diff --git a/installer/docker-compose.yml b/installer/docker-compose.yml index 05ced249..cba23de4 100644 --- a/installer/docker-compose.yml +++ b/installer/docker-compose.yml @@ -33,6 +33,10 @@ services: # LibreOffice 路径配置 LIBREOFFICE_PATH: "soffice" + # LibreOffice headless mode 环境变量 + DISPLAY: ":99" + # xvfb 模式: auto(自动检测是否需要), true(强制使用), false(不使用) + USE_XVFB: "auto" # MINERU_API_TYPE: "cloud" # MINERU_API_URL: "https://mineru.net"