fix: defect of incorrect document names after importing CSV and docx files into the knowledge base
--bug=1052039 --user=王孝刚 【知识库】-压缩文件中包含csv、docx文件时,导入到知识库后,文档名称包含文件夹名称 https://www.tapd.cn/57709429/s/1651752
This commit is contained in:
parent
2c03b2859d
commit
b90995d3aa
@ -8,6 +8,7 @@
|
|||||||
"""
|
"""
|
||||||
import csv
|
import csv
|
||||||
import io
|
import io
|
||||||
|
import os
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from charset_normalizer import detect
|
from charset_normalizer import detect
|
||||||
@ -28,7 +29,8 @@ class CsvSplitHandle(BaseSplitHandle):
|
|||||||
def handle(self, file, pattern_list: List, with_filter: bool, limit: int, get_buffer, save_image):
|
def handle(self, file, pattern_list: List, with_filter: bool, limit: int, get_buffer, save_image):
|
||||||
buffer = get_buffer(file)
|
buffer = get_buffer(file)
|
||||||
paragraphs = []
|
paragraphs = []
|
||||||
result = {'name': file.name, 'content': paragraphs}
|
file_name = os.path.basename(file.name)
|
||||||
|
result = {'name': file_name, 'content': paragraphs}
|
||||||
try:
|
try:
|
||||||
reader = csv.reader(io.TextIOWrapper(io.BytesIO(buffer), encoding=detect(buffer)['encoding']))
|
reader = csv.reader(io.TextIOWrapper(io.BytesIO(buffer), encoding=detect(buffer)['encoding']))
|
||||||
try:
|
try:
|
||||||
|
|||||||
@ -7,6 +7,7 @@
|
|||||||
@desc:
|
@desc:
|
||||||
"""
|
"""
|
||||||
import io
|
import io
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
import traceback
|
import traceback
|
||||||
import uuid
|
import uuid
|
||||||
@ -167,6 +168,7 @@ class DocSplitHandle(BaseSplitHandle):
|
|||||||
in elements])
|
in elements])
|
||||||
|
|
||||||
def handle(self, file, pattern_list: List, with_filter: bool, limit: int, get_buffer, save_image):
|
def handle(self, file, pattern_list: List, with_filter: bool, limit: int, get_buffer, save_image):
|
||||||
|
file_name = os.path.basename(file.name)
|
||||||
try:
|
try:
|
||||||
image_list = []
|
image_list = []
|
||||||
buffer = get_buffer(file)
|
buffer = get_buffer(file)
|
||||||
@ -180,9 +182,9 @@ class DocSplitHandle(BaseSplitHandle):
|
|||||||
split_model = SplitModel(default_pattern_list, with_filter=with_filter, limit=limit)
|
split_model = SplitModel(default_pattern_list, with_filter=with_filter, limit=limit)
|
||||||
except BaseException as e:
|
except BaseException as e:
|
||||||
traceback.print_exception(e)
|
traceback.print_exception(e)
|
||||||
return {'name': file.name,
|
return {'name': file_name,
|
||||||
'content': []}
|
'content': []}
|
||||||
return {'name': file.name,
|
return {'name': file_name,
|
||||||
'content': split_model.parse(content)
|
'content': split_model.parse(content)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user