chore: 文档内容无法提取的时候输出错误信息
This commit is contained in:
parent
98db08d263
commit
64e8f4dc9f
@ -41,4 +41,4 @@ class CsvSplitHandle(BaseParseTableHandle):
|
|||||||
return buffer.decode(detect(buffer)['encoding'])
|
return buffer.decode(detect(buffer)['encoding'])
|
||||||
except BaseException as e:
|
except BaseException as e:
|
||||||
max_kb.error(f'csv split handle error: {e}')
|
max_kb.error(f'csv split handle error: {e}')
|
||||||
return [{'name': file.name, 'paragraphs': []}]
|
return f'error: {e}'
|
||||||
@ -63,6 +63,7 @@ class XlsSplitHandle(BaseParseTableHandle):
|
|||||||
|
|
||||||
def get_content(self, file):
|
def get_content(self, file):
|
||||||
# 打开 .xls 文件
|
# 打开 .xls 文件
|
||||||
|
try:
|
||||||
workbook = xlrd.open_workbook(file_contents=file.read(), formatting_info=True)
|
workbook = xlrd.open_workbook(file_contents=file.read(), formatting_info=True)
|
||||||
sheets = workbook.sheets()
|
sheets = workbook.sheets()
|
||||||
md_tables = ''
|
md_tables = ''
|
||||||
@ -77,7 +78,11 @@ class XlsSplitHandle(BaseParseTableHandle):
|
|||||||
md_table += '| ' + ' | '.join(['---'] * len(headers)) + ' |\n'
|
md_table += '| ' + ' | '.join(['---'] * len(headers)) + ' |\n'
|
||||||
for row in data:
|
for row in data:
|
||||||
# 将每个单元格中的内容替换换行符为 <br> 以保留原始格式
|
# 将每个单元格中的内容替换换行符为 <br> 以保留原始格式
|
||||||
md_table += '| ' + ' | '.join([str(cell).replace('\n', '<br>') if cell else '' for cell in row]) + ' |\n'
|
md_table += '| ' + ' | '.join(
|
||||||
|
[str(cell).replace('\n', '<br>') if cell else '' for cell in row]) + ' |\n'
|
||||||
md_tables += md_table + '\n\n'
|
md_tables += md_table + '\n\n'
|
||||||
|
|
||||||
return md_tables
|
return md_tables
|
||||||
|
except Exception as e:
|
||||||
|
max_kb.error(f'excel split handle error: {e}')
|
||||||
|
return f'error: {e}'
|
||||||
|
|||||||
@ -75,6 +75,7 @@ class XlsxSplitHandle(BaseParseTableHandle):
|
|||||||
|
|
||||||
|
|
||||||
def get_content(self, file):
|
def get_content(self, file):
|
||||||
|
try:
|
||||||
# 加载 Excel 文件
|
# 加载 Excel 文件
|
||||||
workbook = load_workbook(file)
|
workbook = load_workbook(file)
|
||||||
md_tables = ''
|
md_tables = ''
|
||||||
@ -100,3 +101,6 @@ class XlsxSplitHandle(BaseParseTableHandle):
|
|||||||
|
|
||||||
md_tables += md_table + '\n\n'
|
md_tables += md_table + '\n\n'
|
||||||
return md_tables
|
return md_tables
|
||||||
|
except Exception as e:
|
||||||
|
max_kb.error(f'excel split handle error: {e}')
|
||||||
|
return f'error: {e}'
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user