fix: limit chapter title length to 256 characters in pdf_split_handle.py
--bug=1054363 --user=刘瑞斌 【知识库】导入PDF文档,分段标题长度超长时,没有自动截断 https://www.tapd.cn/57709429/s/1681044
This commit is contained in:
parent
675adeeb63
commit
560890f717
@ -173,14 +173,15 @@ class PdfSplitHandle(BaseSplitHandle):
|
|||||||
|
|
||||||
# Null characters are not allowed.
|
# Null characters are not allowed.
|
||||||
chapter_text = chapter_text.replace('\0', '')
|
chapter_text = chapter_text.replace('\0', '')
|
||||||
|
# 限制标题长度
|
||||||
|
real_chapter_title = chapter_title[:256]
|
||||||
# 限制章节内容长度
|
# 限制章节内容长度
|
||||||
if 0 < limit < len(chapter_text):
|
if 0 < limit < len(chapter_text):
|
||||||
split_text = PdfSplitHandle.split_text(chapter_text, limit)
|
split_text = PdfSplitHandle.split_text(chapter_text, limit)
|
||||||
for text in split_text:
|
for text in split_text:
|
||||||
chapters.append({"title": chapter_title, "content": text})
|
chapters.append({"title": real_chapter_title, "content": text})
|
||||||
else:
|
else:
|
||||||
chapters.append({"title": chapter_title, "content": chapter_text if chapter_text else chapter_title})
|
chapters.append({"title": real_chapter_title, "content": chapter_text if chapter_text else real_chapter_title})
|
||||||
# 保存章节内容和章节标题
|
# 保存章节内容和章节标题
|
||||||
return chapters
|
return chapters
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user