chore: 文档内容无法提取的时候输出错误信息

This commit is contained in:
CaptainB 2024-11-22 17:54:06 +08:00 committed by 刘瑞斌
parent 98db08d263
commit 64e8f4dc9f
3 changed files with 46 additions and 37 deletions

View File

@ -41,4 +41,4 @@ class CsvSplitHandle(BaseParseTableHandle):
return buffer.decode(detect(buffer)['encoding']) return buffer.decode(detect(buffer)['encoding'])
except BaseException as e: except BaseException as e:
max_kb.error(f'csv split handle error: {e}') max_kb.error(f'csv split handle error: {e}')
return [{'name': file.name, 'paragraphs': []}] return f'error: {e}'

View File

@ -63,6 +63,7 @@ class XlsSplitHandle(BaseParseTableHandle):
def get_content(self, file): def get_content(self, file):
# 打开 .xls 文件 # 打开 .xls 文件
try:
workbook = xlrd.open_workbook(file_contents=file.read(), formatting_info=True) workbook = xlrd.open_workbook(file_contents=file.read(), formatting_info=True)
sheets = workbook.sheets() sheets = workbook.sheets()
md_tables = '' md_tables = ''
@ -77,7 +78,11 @@ class XlsSplitHandle(BaseParseTableHandle):
md_table += '| ' + ' | '.join(['---'] * len(headers)) + ' |\n' md_table += '| ' + ' | '.join(['---'] * len(headers)) + ' |\n'
for row in data: for row in data:
# 将每个单元格中的内容替换换行符为 <br> 以保留原始格式 # 将每个单元格中的内容替换换行符为 <br> 以保留原始格式
md_table += '| ' + ' | '.join([str(cell).replace('\n', '<br>') if cell else '' for cell in row]) + ' |\n' md_table += '| ' + ' | '.join(
[str(cell).replace('\n', '<br>') if cell else '' for cell in row]) + ' |\n'
md_tables += md_table + '\n\n' md_tables += md_table + '\n\n'
return md_tables return md_tables
except Exception as e:
max_kb.error(f'excel split handle error: {e}')
return f'error: {e}'

View File

@ -75,6 +75,7 @@ class XlsxSplitHandle(BaseParseTableHandle):
def get_content(self, file): def get_content(self, file):
try:
# 加载 Excel 文件 # 加载 Excel 文件
workbook = load_workbook(file) workbook = load_workbook(file)
md_tables = '' md_tables = ''
@ -100,3 +101,6 @@ class XlsxSplitHandle(BaseParseTableHandle):
md_tables += md_table + '\n\n' md_tables += md_table + '\n\n'
return md_tables return md_tables
except Exception as e:
max_kb.error(f'excel split handle error: {e}')
return f'error: {e}'