refactor: improve error logging for image reading and enhance image handling logic

--bug=1057749 --user=刘瑞斌 【知识库】qa问答对文档中带图片,导入后图片未显示 https://www.tapd.cn/62980211/s/1720856
This commit is contained in:
CaptainB 2025-07-01 14:09:29 +08:00
parent 407fe83582
commit 089915f488

View File

@ -7,20 +7,18 @@
@desc: @desc:
""" """
import io import io
import logging import traceback
import uuid_utils.compat as uuid
from functools import reduce from functools import reduce
from io import BytesIO from io import BytesIO
from xml.etree.ElementTree import fromstring from xml.etree.ElementTree import fromstring
from zipfile import ZipFile from zipfile import ZipFile
import uuid_utils.compat as uuid
from PIL import Image as PILImage from PIL import Image as PILImage
from openpyxl.drawing.image import Image as openpyxl_Image from openpyxl.drawing.image import Image as openpyxl_Image
from openpyxl.packaging.relationship import get_rels_path, get_dependents from openpyxl.packaging.relationship import get_rels_path, get_dependents
from openpyxl.xml.constants import SHEET_DRAWING_NS, REL_NS, SHEET_MAIN_NS from openpyxl.xml.constants import SHEET_DRAWING_NS, REL_NS, SHEET_MAIN_NS
from common.handle.base_parse_qa_handle import get_title_row_index_dict, get_row_value
from common.utils.logger import maxkb_logger from common.utils.logger import maxkb_logger
from knowledge.models import File from knowledge.models import File
@ -76,7 +74,7 @@ def handle_images(deps, archive: ZipFile) -> []:
image_io = archive.read(dep.target) image_io = archive.read(dep.target)
image = openpyxl_Image(BytesIO(image_io)) image = openpyxl_Image(BytesIO(image_io))
except Exception as e: except Exception as e:
maxkb_logger.error(f"Error reading image {dep.target}: {e}") maxkb_logger.error(f"Error reading image {dep.target}: {e}, {traceback.format_exc()}")
continue continue
image.embed = dep.id # 文件rId image.embed = dep.id # 文件rId
image.target = dep.target # 文件地址 image.target = dep.target # 文件地址
@ -107,6 +105,9 @@ def xlsx_embed_cells_images(buffer) -> {}:
image_excel_id_list = [_xl for _xl in image_excel_id_list = [_xl for _xl in
reduce(lambda x, y: [*x, *y], [sheet for sheet_id, sheet in sheet_list.items()], []) if reduce(lambda x, y: [*x, *y], [sheet for sheet_id, sheet in sheet_list.items()], []) if
key in _xl] key in _xl]
# print(key, img)
if img is None:
continue
if len(image_excel_id_list) > 0: if len(image_excel_id_list) > 0:
image_excel_id = image_excel_id_list[-1] image_excel_id = image_excel_id_list[-1]
f = archive.open(img.target) f = archive.open(img.target)