fix: update image URL paths to use OSS endpoints
This commit is contained in:
parent
e5e993986c
commit
9a7281212d
@ -37,7 +37,7 @@ def handle_sheet(file_name, sheet, image_dict):
|
|||||||
content = str(content.value)
|
content = str(content.value)
|
||||||
image = image_dict.get(content, None)
|
image = image_dict.get(content, None)
|
||||||
if image is not None:
|
if image is not None:
|
||||||
content = f''
|
content = f''
|
||||||
paragraph_list.append({'title': title[0:255],
|
paragraph_list.append({'title': title[0:255],
|
||||||
'content': content[0:102400],
|
'content': content[0:102400],
|
||||||
'problem_list': problem_list})
|
'problem_list': problem_list})
|
||||||
|
|||||||
@ -90,20 +90,20 @@ def get_image_list(result_list: list, zip_files: List[str]):
|
|||||||
'/') else source_image_path)
|
'/') else source_image_path)
|
||||||
if not zip_files.__contains__(image_path):
|
if not zip_files.__contains__(image_path):
|
||||||
continue
|
continue
|
||||||
if image_path.startswith('api/file/') or image_path.startswith('api/image/'):
|
if image_path.startswith('oss/file/') or image_path.startswith('oss/image/'):
|
||||||
image_id = image_path.replace('api/file/', '').replace('api/image/', '')
|
image_id = image_path.replace('oss/file/', '')
|
||||||
if is_valid_uuid(image_id):
|
if is_valid_uuid(image_id):
|
||||||
image_file_list.append({'source_file': image_path,
|
image_file_list.append({'source_file': image_path,
|
||||||
'image_id': image_id})
|
'image_id': image_id})
|
||||||
else:
|
else:
|
||||||
image_file_list.append({'source_file': image_path,
|
image_file_list.append({'source_file': image_path,
|
||||||
'image_id': new_image_id})
|
'image_id': new_image_id})
|
||||||
content = content.replace(source_image_path, f'/api/image/{new_image_id}')
|
content = content.replace(source_image_path, f'/oss/file/{new_image_id}')
|
||||||
p['content'] = content
|
p['content'] = content
|
||||||
else:
|
else:
|
||||||
image_file_list.append({'source_file': image_path,
|
image_file_list.append({'source_file': image_path,
|
||||||
'image_id': new_image_id})
|
'image_id': new_image_id})
|
||||||
content = content.replace(source_image_path, f'/api/image/{new_image_id}')
|
content = content.replace(source_image_path, f'/oss/file/{new_image_id}')
|
||||||
p['content'] = content
|
p['content'] = content
|
||||||
|
|
||||||
return image_file_list
|
return image_file_list
|
||||||
|
|||||||
@ -43,7 +43,7 @@ class XlsxParseTableHandle(BaseParseTableHandle):
|
|||||||
|
|
||||||
image = image_dict.get(cell_value, None)
|
image = image_dict.get(cell_value, None)
|
||||||
if image is not None:
|
if image is not None:
|
||||||
cell_value = f''
|
cell_value = f''
|
||||||
|
|
||||||
# 使用标题作为键,单元格的值作为值存入字典
|
# 使用标题作为键,单元格的值作为值存入字典
|
||||||
row_data[headers[col_idx]] = cell_value
|
row_data[headers[col_idx]] = cell_value
|
||||||
@ -110,7 +110,6 @@ class XlsxParseTableHandle(BaseParseTableHandle):
|
|||||||
|
|
||||||
md_tables += md_table + '\n\n'
|
md_tables += md_table + '\n\n'
|
||||||
|
|
||||||
md_tables = md_tables.replace('/api/image/', '/oss/file/')
|
|
||||||
return md_tables
|
return md_tables
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
max_kb.error(f'excel split handle error: {e}')
|
max_kb.error(f'excel split handle error: {e}')
|
||||||
|
|||||||
@ -43,7 +43,7 @@ def image_to_mode(image, doc: Document, images_list, get_image_id):
|
|||||||
if len([i for i in images_list if i.id == image_uuid]) == 0:
|
if len([i for i in images_list if i.id == image_uuid]) == 0:
|
||||||
image = File(id=image_uuid, file_name=part.filename, meta={'debug': False, 'content': part.blob})
|
image = File(id=image_uuid, file_name=part.filename, meta={'debug': False, 'content': part.blob})
|
||||||
images_list.append(image)
|
images_list.append(image)
|
||||||
return f''
|
return f''
|
||||||
return None
|
return None
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@ -226,7 +226,6 @@ class DocSplitHandle(BaseSplitHandle):
|
|||||||
doc = Document(io.BytesIO(buffer))
|
doc = Document(io.BytesIO(buffer))
|
||||||
content = self.to_md(doc, image_list, get_image_id_func())
|
content = self.to_md(doc, image_list, get_image_id_func())
|
||||||
if len(image_list) > 0:
|
if len(image_list) > 0:
|
||||||
content = content.replace('/api/image/', '/oss/file/')
|
|
||||||
save_image(image_list)
|
save_image(image_list)
|
||||||
return content
|
return content
|
||||||
except BaseException as e:
|
except BaseException as e:
|
||||||
|
|||||||
@ -18,7 +18,7 @@ from common.handle.impl.common_handle import xlsx_embed_cells_images
|
|||||||
def post_cell(image_dict, cell_value):
|
def post_cell(image_dict, cell_value):
|
||||||
image = image_dict.get(cell_value, None)
|
image = image_dict.get(cell_value, None)
|
||||||
if image is not None:
|
if image is not None:
|
||||||
return f''
|
return f''
|
||||||
return cell_value.replace('\n', '<br>').replace('|', '|')
|
return cell_value.replace('\n', '<br>').replace('|', '|')
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -181,20 +181,6 @@ def write_image(zip_path: str, image_list: List[str]):
|
|||||||
os.makedirs(os.path.dirname(file_path))
|
os.makedirs(os.path.dirname(file_path))
|
||||||
with open(os.path.join(zip_path, file_path), 'wb') as f:
|
with open(os.path.join(zip_path, file_path), 'wb') as f:
|
||||||
f.write(file.get_bytes())
|
f.write(file.get_bytes())
|
||||||
# else:
|
|
||||||
# r = text.replace('(/api/image/', '').replace(')', '')
|
|
||||||
# r = r.strip().split(" ")[0]
|
|
||||||
# if not is_valid_uuid(r):
|
|
||||||
# break
|
|
||||||
# image_model = QuerySet(Image).filter(id=r).first()
|
|
||||||
# if image_model is None:
|
|
||||||
# break
|
|
||||||
# zip_inner_path = os.path.join('api', 'image', r)
|
|
||||||
# file_path = os.path.join(zip_path, zip_inner_path)
|
|
||||||
# if not os.path.exists(os.path.dirname(file_path)):
|
|
||||||
# os.makedirs(os.path.dirname(file_path))
|
|
||||||
# with open(file_path, 'wb') as f:
|
|
||||||
# f.write(image_model.image)
|
|
||||||
|
|
||||||
|
|
||||||
def update_document_char_length(document_id: str):
|
def update_document_char_length(document_id: str):
|
||||||
@ -223,7 +209,6 @@ def or_get(exists_problem_list, content, knowledge_id, document_id, paragraph_id
|
|||||||
return problem, document_id, paragraph_id
|
return problem, document_id, paragraph_id
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_knowledge_operation_object(knowledge_id: str):
|
def get_knowledge_operation_object(knowledge_id: str):
|
||||||
knowledge_model = QuerySet(model=Knowledge).filter(id=knowledge_id).first()
|
knowledge_model = QuerySet(model=Knowledge).filter(id=knowledge_id).first()
|
||||||
if knowledge_model is not None:
|
if knowledge_model is not None:
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user