feat: add export and export_zip endpoints for knowledge base and document with image handling

This commit is contained in:
CaptainB 2025-06-03 17:44:02 +08:00
parent cb4b1c927c
commit efd273b3bc
8 changed files with 450 additions and 11 deletions

View File

@ -319,3 +319,9 @@ def flat_map(array: List[List]):
for e in array: for e in array:
result += e result += e
return result return result
def parse_image(content: str):
matches = re.finditer("!\[.*?\]\(\/api\/(image|file)\/.*?\)", content)
image_list = [match.group() for match in matches]
return image_list

View File

@ -383,6 +383,7 @@ class BatchRefreshAPI(APIMixin):
def get_request(): def get_request():
return DocumentBatchRefreshSerializer return DocumentBatchRefreshSerializer
class BatchGenerateRelatedAPI(APIMixin): class BatchGenerateRelatedAPI(APIMixin):
@staticmethod @staticmethod
def get_parameters(): def get_parameters():
@ -406,3 +407,67 @@ class BatchGenerateRelatedAPI(APIMixin):
@staticmethod @staticmethod
def get_request(): def get_request():
return DocumentBatchGenerateRelatedSerializer return DocumentBatchGenerateRelatedSerializer
class TemplateExportAPI(APIMixin):
@staticmethod
def get_parameters():
return [
OpenApiParameter(
name="workspace_id",
description="工作空间id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="knowledge_id",
description="知识库id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="type",
description="Export template type csv|excel",
type=OpenApiTypes.STR,
location='query',
required=True,
),
]
@staticmethod
def get_response():
return DefaultResultSerializer
class DocumentExportAPI(APIMixin):
@staticmethod
def get_parameters():
return [
OpenApiParameter(
name="workspace_id",
description="工作空间id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="knowledge_id",
description="知识库id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="document_id",
description="文档id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
]
@staticmethod
def get_response():
return DefaultResultSerializer

View File

@ -259,3 +259,27 @@ class GetModelAPI(SyncWebAPI):
@staticmethod @staticmethod
def get_response(): def get_response():
return DefaultResultSerializer return DefaultResultSerializer
class KnowledgeExportAPI(APIMixin):
@staticmethod
def get_parameters():
return [
OpenApiParameter(
name="workspace_id",
description="工作空间id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="knowledge_id",
description="知识库id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
]
@staticmethod
def get_response():
return DefaultResultSerializer

View File

@ -1,18 +1,24 @@
import io
import logging import logging
import os import os
import re import re
import traceback import traceback
from functools import reduce from functools import reduce
from tempfile import TemporaryDirectory
from typing import Dict, List from typing import Dict, List
import openpyxl
import uuid_utils.compat as uuid import uuid_utils.compat as uuid
from celery_once import AlreadyQueued from celery_once import AlreadyQueued
from django.core import validators from django.core import validators
from django.db import transaction, models from django.db import transaction, models
from django.db.models import QuerySet, Model from django.db.models import QuerySet, Model
from django.db.models.functions import Substr, Reverse from django.db.models.functions import Substr, Reverse
from django.utils.translation import gettext_lazy as _ from django.http import HttpResponse
from django.utils.translation import gettext_lazy as _, gettext, get_language, to_locale
from openpyxl.cell.cell import ILLEGAL_CHARACTERS_RE
from rest_framework import serializers from rest_framework import serializers
from xlwt import Utils
from common.db.search import native_search, get_dynamics_model, native_page_search from common.db.search import native_search, get_dynamics_model, native_page_search
from common.event import ListenerManagement from common.event import ListenerManagement
@ -33,13 +39,13 @@ from common.handle.impl.text.text_split_handle import TextSplitHandle
from common.handle.impl.text.xls_split_handle import XlsSplitHandle from common.handle.impl.text.xls_split_handle import XlsSplitHandle
from common.handle.impl.text.xlsx_split_handle import XlsxSplitHandle from common.handle.impl.text.xlsx_split_handle import XlsxSplitHandle
from common.handle.impl.text.zip_split_handle import ZipSplitHandle from common.handle.impl.text.zip_split_handle import ZipSplitHandle
from common.utils.common import post, get_file_content, bulk_create_in_batches from common.utils.common import post, get_file_content, bulk_create_in_batches, parse_image
from common.utils.fork import Fork from common.utils.fork import Fork
from common.utils.split_model import get_split_model, flat_map from common.utils.split_model import get_split_model, flat_map
from knowledge.models import Knowledge, Paragraph, Problem, Document, KnowledgeType, ProblemParagraphMapping, State, \ from knowledge.models import Knowledge, Paragraph, Problem, Document, KnowledgeType, ProblemParagraphMapping, State, \
TaskType, File TaskType, File
from knowledge.serializers.common import ProblemParagraphManage, BatchSerializer, \ from knowledge.serializers.common import ProblemParagraphManage, BatchSerializer, \
get_embedding_model_id_by_knowledge_id, MetaSerializer get_embedding_model_id_by_knowledge_id, MetaSerializer, write_image, zip_dir
from knowledge.serializers.paragraph import ParagraphSerializers, ParagraphInstanceSerializer, \ from knowledge.serializers.paragraph import ParagraphSerializers, ParagraphInstanceSerializer, \
delete_problems_and_mappings delete_problems_and_mappings
from knowledge.task.embedding import embedding_by_document, delete_embedding_by_document_list, \ from knowledge.task.embedding import embedding_by_document, delete_embedding_by_document_list, \
@ -180,11 +186,66 @@ class BatchEditHitHandlingSerializer(serializers.Serializer):
class DocumentSerializers(serializers.Serializer): class DocumentSerializers(serializers.Serializer):
class Export(serializers.Serializer):
type = serializers.CharField(required=True, validators=[
validators.RegexValidator(regex=re.compile("^csv|excel$"),
message=_('The template type only supports excel|csv'),
code=500)
], label=_('type'))
def export(self, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)
language = get_language()
if self.data.get('type') == 'csv':
file = open(
os.path.join(PROJECT_DIR, "apps", "knowledge", 'template', f'csv_template_{to_locale(language)}.csv'),
"rb")
content = file.read()
file.close()
return HttpResponse(content, status=200, headers={'Content-Type': 'text/csv',
'Content-Disposition': 'attachment; filename="csv_template.csv"'})
elif self.data.get('type') == 'excel':
file = open(os.path.join(PROJECT_DIR, "apps", "knowledge", 'template',
f'excel_template_{to_locale(language)}.xlsx'), "rb")
content = file.read()
file.close()
return HttpResponse(content, status=200, headers={'Content-Type': 'application/vnd.ms-excel',
'Content-Disposition': 'attachment; filename="excel_template.xlsx"'})
else:
return None
def table_export(self, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)
language = get_language()
if self.data.get('type') == 'csv':
file = open(
os.path.join(PROJECT_DIR, "apps", "knowledge", 'template',
f'table_template_{to_locale(language)}.csv'),
"rb")
content = file.read()
file.close()
return HttpResponse(content, status=200, headers={'Content-Type': 'text/cxv',
'Content-Disposition': 'attachment; filename="csv_template.csv"'})
elif self.data.get('type') == 'excel':
file = open(os.path.join(PROJECT_DIR, "apps", "knowledge", 'template',
f'table_template_{to_locale(language)}.xlsx'),
"rb")
content = file.read()
file.close()
return HttpResponse(content, status=200, headers={'Content-Type': 'application/vnd.ms-excel',
'Content-Disposition': 'attachment; filename="excel_template.xlsx"'})
else:
return None
class Query(serializers.Serializer): class Query(serializers.Serializer):
# 知识库id # 知识库id
workspace_id = serializers.CharField(required=True, label=_('workspace id')) workspace_id = serializers.CharField(required=True, label=_('workspace id'))
knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id')) knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))
name = serializers.CharField(required=False, max_length=128, min_length=1, allow_null=True, allow_blank=True, label=_('document name')) name = serializers.CharField(required=False, max_length=128, min_length=1, allow_null=True, allow_blank=True,
label=_('document name'))
hit_handling_method = serializers.CharField(required=False, label=_('hit handling method')) hit_handling_method = serializers.CharField(required=False, label=_('hit handling method'))
is_active = serializers.BooleanField(required=False, label=_('document is active')) is_active = serializers.BooleanField(required=False, label=_('document is active'))
task_type = serializers.IntegerField(required=False, label=_('task type')) task_type = serializers.IntegerField(required=False, label=_('task type'))
@ -339,6 +400,53 @@ class DocumentSerializers(serializers.Serializer):
if not QuerySet(Document).filter(id=document_id).exists(): if not QuerySet(Document).filter(id=document_id).exists():
raise AppApiException(500, _('document id not exist')) raise AppApiException(500, _('document id not exist'))
def export(self, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)
document = QuerySet(Document).filter(id=self.data.get("document_id")).first()
paragraph_list = native_search(QuerySet(Paragraph).filter(document_id=self.data.get("document_id")),
get_file_content(
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql',
'list_paragraph_document_name.sql')))
problem_mapping_list = native_search(
QuerySet(ProblemParagraphMapping).filter(document_id=self.data.get("document_id")), get_file_content(
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_problem_mapping.sql')),
with_table_name=True)
data_dict, document_dict = self.merge_problem(paragraph_list, problem_mapping_list, [document])
workbook = self.get_workbook(data_dict, document_dict)
response = HttpResponse(content_type='application/vnd.ms-excel')
response['Content-Disposition'] = f'attachment; filename="data.xlsx"'
workbook.save(response)
return response
def export_zip(self, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)
document = QuerySet(Document).filter(id=self.data.get("document_id")).first()
paragraph_list = native_search(QuerySet(Paragraph).filter(document_id=self.data.get("document_id")),
get_file_content(
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql',
'list_paragraph_document_name.sql')))
problem_mapping_list = native_search(
QuerySet(ProblemParagraphMapping).filter(document_id=self.data.get("document_id")), get_file_content(
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_problem_mapping.sql')),
with_table_name=True)
data_dict, document_dict = self.merge_problem(paragraph_list, problem_mapping_list, [document])
res = [parse_image(paragraph.get('content')) for paragraph in paragraph_list]
workbook = DocumentSerializers.Operate.get_workbook(data_dict, document_dict)
response = HttpResponse(content_type='application/zip')
response['Content-Disposition'] = 'attachment; filename="archive.zip"'
zip_buffer = io.BytesIO()
with TemporaryDirectory() as tempdir:
knowledge_file = os.path.join(tempdir, 'knowledge.xlsx')
workbook.save(knowledge_file)
for r in res:
write_image(tempdir, r)
zip_dir(tempdir, zip_buffer)
response.write(zip_buffer.getvalue())
return response
def one(self, with_valid=False): def one(self, with_valid=False):
if with_valid: if with_valid:
self.is_valid(raise_exception=True) self.is_valid(raise_exception=True)
@ -441,6 +549,78 @@ class DocumentSerializers(serializers.Serializer):
except AlreadyQueued as e: except AlreadyQueued as e:
raise AppApiException(500, _('The task is being executed, please do not send it repeatedly.')) raise AppApiException(500, _('The task is being executed, please do not send it repeatedly.'))
@staticmethod
def get_workbook(data_dict, document_dict):
# 创建工作簿对象
workbook = openpyxl.Workbook()
workbook.remove(workbook.active)
if len(data_dict.keys()) == 0:
data_dict['sheet'] = []
for sheet_id in data_dict:
# 添加工作表
worksheet = workbook.create_sheet(document_dict.get(sheet_id))
data = [
[gettext('Section title (optional)'),
gettext('Section content (required, question answer, no more than 4096 characters)'),
gettext('Question (optional, one per line in the cell)')],
*data_dict.get(sheet_id, [])
]
# 写入数据到工作表
for row_idx, row in enumerate(data):
for col_idx, col in enumerate(row):
cell = worksheet.cell(row=row_idx + 1, column=col_idx + 1)
if isinstance(col, str):
col = re.sub(ILLEGAL_CHARACTERS_RE, '', col)
if col.startswith(('=', '+', '-', '@')):
col = '\ufeff' + col
cell.value = col
# 创建HttpResponse对象返回Excel文件
return workbook
@staticmethod
def merge_problem(paragraph_list: List[Dict], problem_mapping_list: List[Dict], document_list):
result = {}
document_dict = {}
for paragraph in paragraph_list:
problem_list = [problem_mapping.get('content') for problem_mapping in problem_mapping_list if
problem_mapping.get('paragraph_id') == paragraph.get('id')]
document_sheet = result.get(paragraph.get('document_id'))
document_name = DocumentSerializers.Operate.reset_document_name(paragraph.get('document_name'))
d = document_dict.get(document_name)
if d is None:
document_dict[document_name] = {paragraph.get('document_id')}
else:
d.add(paragraph.get('document_id'))
if document_sheet is None:
result[paragraph.get('document_id')] = [[paragraph.get('title'), paragraph.get('content'),
'\n'.join(problem_list)]]
else:
document_sheet.append([paragraph.get('title'), paragraph.get('content'), '\n'.join(problem_list)])
for document in document_list:
if document.id not in result:
document_name = DocumentSerializers.Operate.reset_document_name(document.name)
result[document.id] = [[]]
d = document_dict.get(document_name)
if d is None:
document_dict[document_name] = {document.id}
else:
d.add(document.id)
result_document_dict = {}
for d_name in document_dict:
for index, d_id in enumerate(document_dict.get(d_name)):
result_document_dict[d_id] = d_name if index == 0 else d_name + str(index)
return result, result_document_dict
@staticmethod
def reset_document_name(document_name):
if document_name is not None:
document_name = document_name.strip()[0:29]
if document_name is None or not Utils.valid_sheet_name(document_name):
return "Sheet"
return document_name.strip()
class Create(serializers.Serializer): class Create(serializers.Serializer):
workspace_id = serializers.UUIDField(required=True, label=_('workspace id')) workspace_id = serializers.UUIDField(required=True, label=_('workspace id'))
knowledge_id = serializers.UUIDField(required=True, label=_('document id')) knowledge_id = serializers.UUIDField(required=True, label=_('document id'))

View File

@ -1,9 +1,11 @@
import io
import logging import logging
import os import os
import re import re
import traceback import traceback
from functools import reduce from functools import reduce
from typing import Dict from tempfile import TemporaryDirectory
from typing import Dict, List
import uuid_utils.compat as uuid import uuid_utils.compat as uuid
from celery_once import AlreadyQueued from celery_once import AlreadyQueued
@ -11,6 +13,7 @@ from django.core import validators
from django.db import transaction, models from django.db import transaction, models
from django.db.models import QuerySet from django.db.models import QuerySet
from django.db.models.functions import Reverse, Substr from django.db.models.functions import Reverse, Substr
from django.http import HttpResponse
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
from rest_framework import serializers from rest_framework import serializers
@ -20,13 +23,13 @@ from common.db.search import native_search, get_dynamics_model, native_page_sear
from common.db.sql_execute import select_list from common.db.sql_execute import select_list
from common.event import ListenerManagement from common.event import ListenerManagement
from common.exception.app_exception import AppApiException from common.exception.app_exception import AppApiException
from common.utils.common import valid_license, post, get_file_content from common.utils.common import valid_license, post, get_file_content, parse_image
from common.utils.fork import Fork, ChildLink from common.utils.fork import Fork, ChildLink
from common.utils.split_model import get_split_model from common.utils.split_model import get_split_model
from knowledge.models import Knowledge, KnowledgeScope, KnowledgeType, Document, Paragraph, Problem, \ from knowledge.models import Knowledge, KnowledgeScope, KnowledgeType, Document, Paragraph, Problem, \
ProblemParagraphMapping, TaskType, State, SearchMode, KnowledgeFolder ProblemParagraphMapping, TaskType, State, SearchMode, KnowledgeFolder
from knowledge.serializers.common import ProblemParagraphManage, get_embedding_model_id_by_knowledge_id, MetaSerializer, \ from knowledge.serializers.common import ProblemParagraphManage, get_embedding_model_id_by_knowledge_id, MetaSerializer, \
GenerateRelatedSerializer, get_embedding_model_by_knowledge_id, list_paragraph GenerateRelatedSerializer, get_embedding_model_by_knowledge_id, list_paragraph, write_image, zip_dir
from knowledge.serializers.document import DocumentSerializers from knowledge.serializers.document import DocumentSerializers
from knowledge.task.embedding import embedding_by_knowledge, delete_embedding_by_knowledge from knowledge.task.embedding import embedding_by_knowledge, delete_embedding_by_knowledge
from knowledge.task.generate import generate_related_by_knowledge_id from knowledge.task.generate import generate_related_by_knowledge_id
@ -331,6 +334,77 @@ class KnowledgeSerializer(serializers.Serializer):
delete_embedding_by_knowledge(self.data.get('knowledge_id')) delete_embedding_by_knowledge(self.data.get('knowledge_id'))
return True return True
def export_excel(self, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)
document_list = QuerySet(Document).filter(knowledge_id=self.data.get('id'))
paragraph_list = native_search(QuerySet(Paragraph).filter(knowledge_id=self.data.get("id")), get_file_content(
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_paragraph_document_name.sql')))
problem_mapping_list = native_search(
QuerySet(ProblemParagraphMapping).filter(knowledge_id=self.data.get("id")), get_file_content(
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_problem_mapping.sql')),
with_table_name=True)
data_dict, document_dict = DocumentSerializers.Operate.merge_problem(paragraph_list, problem_mapping_list,
document_list)
workbook = DocumentSerializers.Operate.get_workbook(data_dict, document_dict)
response = HttpResponse(content_type='application/vnd.ms-excel')
response['Content-Disposition'] = 'attachment; filename="knowledge.xlsx"'
workbook.save(response)
return response
def export_zip(self, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)
document_list = QuerySet(Document).filter(knowledge_id=self.data.get('id'))
paragraph_list = native_search(QuerySet(Paragraph).filter(knowledge_id=self.data.get("id")), get_file_content(
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_paragraph_document_name.sql')))
problem_mapping_list = native_search(
QuerySet(ProblemParagraphMapping).filter(knowledge_id=self.data.get("id")), get_file_content(
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_problem_mapping.sql')),
with_table_name=True)
data_dict, document_dict = DocumentSerializers.Operate.merge_problem(paragraph_list, problem_mapping_list,
document_list)
res = [parse_image(paragraph.get('content')) for paragraph in paragraph_list]
workbook = DocumentSerializers.Operate.get_workbook(data_dict, document_dict)
response = HttpResponse(content_type='application/zip')
response['Content-Disposition'] = 'attachment; filename="archive.zip"'
zip_buffer = io.BytesIO()
with TemporaryDirectory() as tempdir:
knowledge_file = os.path.join(tempdir, 'knowledge.xlsx')
workbook.save(knowledge_file)
for r in res:
write_image(tempdir, r)
zip_dir(tempdir, zip_buffer)
response.write(zip_buffer.getvalue())
return response
@staticmethod
def merge_problem(paragraph_list: List[Dict], problem_mapping_list: List[Dict]):
result = {}
document_dict = {}
for paragraph in paragraph_list:
problem_list = [problem_mapping.get('content') for problem_mapping in problem_mapping_list if
problem_mapping.get('paragraph_id') == paragraph.get('id')]
document_sheet = result.get(paragraph.get('document_id'))
d = document_dict.get(paragraph.get('document_name'))
if d is None:
document_dict[paragraph.get('document_name')] = {paragraph.get('document_id')}
else:
d.add(paragraph.get('document_id'))
if document_sheet is None:
result[paragraph.get('document_id')] = [[paragraph.get('title'), paragraph.get('content'),
'\n'.join(problem_list)]]
else:
document_sheet.append([paragraph.get('title'), paragraph.get('content'), '\n'.join(problem_list)])
result_document_dict = {}
for d_name in document_dict:
for index, d_id in enumerate(document_dict.get(d_name)):
result_document_dict[d_id] = d_name if index == 0 else d_name + str(index)
return result, result_document_dict
class Create(serializers.Serializer): class Create(serializers.Serializer):
user_id = serializers.UUIDField(required=True, label=_('user id')) user_id = serializers.UUIDField(required=True, label=_('user id'))
workspace_id = serializers.CharField(required=True, label=_('workspace id')) workspace_id = serializers.CharField(required=True, label=_('workspace id'))

View File

@ -14,6 +14,8 @@ urlpatterns = [
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/generate_related', views.KnowledgeView.GenerateRelated.as_view()), path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/generate_related', views.KnowledgeView.GenerateRelated.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/embedding', views.KnowledgeView.Embedding.as_view()), path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/embedding', views.KnowledgeView.Embedding.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/hit_test', views.KnowledgeView.HitTest.as_view()), path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/hit_test', views.KnowledgeView.HitTest.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/export', views.KnowledgeView.Export.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/export_zip', views.KnowledgeView.ExportZip.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document', views.DocumentView.as_view()), path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document', views.DocumentView.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/split', views.DocumentView.Split.as_view()), path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/split', views.DocumentView.Split.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/split_pattern', views.DocumentView.SplitPattern.as_view()), path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/split_pattern', views.DocumentView.SplitPattern.as_view()),
@ -26,11 +28,15 @@ urlpatterns = [
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/qa', views.QaDocumentView.as_view()), path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/qa', views.QaDocumentView.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/table', views.TableDocumentView.as_view()), path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/table', views.TableDocumentView.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/batch_hit_handling', views.DocumentView.BatchEditHitHandling.as_view()), path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/batch_hit_handling', views.DocumentView.BatchEditHitHandling.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/template/export', views.Template.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/table_template/export', views.TableTemplate.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>', views.DocumentView.Operate.as_view()), path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>', views.DocumentView.Operate.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/sync', views.DocumentView.SyncWeb.as_view()), path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/sync', views.DocumentView.SyncWeb.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/refresh', views.DocumentView.Refresh.as_view()), path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/refresh', views.DocumentView.Refresh.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/cancel_task', views.DocumentView.CancelTask.as_view()), path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/cancel_task', views.DocumentView.CancelTask.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/batch_cancel_task', views.DocumentView.BatchCancelTask.as_view()), path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/batch_cancel_task', views.DocumentView.BatchCancelTask.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/export', views.DocumentView.Export.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/export_zip', views.DocumentView.ExportZip.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph', views.ParagraphView.as_view()), path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph', views.ParagraphView.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/batch_delete', views.ParagraphView.BatchDelete.as_view()), path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/batch_delete', views.ParagraphView.BatchDelete.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/batch_generate_related', views.ParagraphView.BatchGenerateRelated.as_view()), path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/batch_generate_related', views.ParagraphView.BatchGenerateRelated.as_view()),

View File

@ -11,7 +11,8 @@ from common.result import result
from knowledge.api.document import DocumentSplitAPI, DocumentBatchAPI, DocumentBatchCreateAPI, DocumentCreateAPI, \ from knowledge.api.document import DocumentSplitAPI, DocumentBatchAPI, DocumentBatchCreateAPI, DocumentCreateAPI, \
DocumentReadAPI, DocumentEditAPI, DocumentDeleteAPI, TableDocumentCreateAPI, QaDocumentCreateAPI, \ DocumentReadAPI, DocumentEditAPI, DocumentDeleteAPI, TableDocumentCreateAPI, QaDocumentCreateAPI, \
WebDocumentCreateAPI, CancelTaskAPI, BatchCancelTaskAPI, SyncWebAPI, RefreshAPI, BatchEditHitHandlingAPI, \ WebDocumentCreateAPI, CancelTaskAPI, BatchCancelTaskAPI, SyncWebAPI, RefreshAPI, BatchEditHitHandlingAPI, \
DocumentTreeReadAPI, DocumentSplitPatternAPI, BatchRefreshAPI, BatchGenerateRelatedAPI DocumentTreeReadAPI, DocumentSplitPatternAPI, BatchRefreshAPI, BatchGenerateRelatedAPI, TemplateExportAPI, \
DocumentExportAPI
from knowledge.serializers.document import DocumentSerializers from knowledge.serializers.document import DocumentSerializers
@ -384,6 +385,34 @@ class DocumentView(APIView):
} }
).page(current_page, page_size)) ).page(current_page, page_size))
class Export(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
summary=_('Export document'),
operation_id=_('Export document'), # type: ignore
parameters=DocumentExportAPI.get_parameters(),
responses=DocumentExportAPI.get_response(),
tags=[_('Knowledge Base/Documentation')] # type: ignore
)
@has_permissions(PermissionConstants.KNOWLEDGE_DOCUMENT_EXPORT.get_workspace_permission())
def get(self, request: Request, dataset_id: str, document_id: str):
return DocumentSerializers.Operate(data={'document_id': document_id, 'dataset_id': dataset_id}).export()
class ExportZip(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
summary=_('Export Zip document'),
operation_id=_('Export Zip document'), # type: ignore
parameters=DocumentExportAPI.get_parameters(),
responses=DocumentExportAPI.get_response(),
tags=[_('Knowledge Base/Documentation')] # type: ignore
)
@has_permissions(PermissionConstants.KNOWLEDGE_DOCUMENT_EXPORT.get_workspace_permission())
def get(self, request: Request, dataset_id: str, document_id: str):
return DocumentSerializers.Operate(data={'document_id': document_id, 'dataset_id': dataset_id}).export_zip()
class WebDocumentView(APIView): class WebDocumentView(APIView):
authentication_classes = [TokenAuth] authentication_classes = [TokenAuth]
@ -443,3 +472,30 @@ class TableDocumentView(APIView):
return result.success(DocumentSerializers.Create( return result.success(DocumentSerializers.Create(
data={'knowledge_id': knowledge_id, 'workspace_id': workspace_id} data={'knowledge_id': knowledge_id, 'workspace_id': workspace_id}
).save_table({'file_list': request.FILES.getlist('file')}, with_valid=True)) ).save_table({'file_list': request.FILES.getlist('file')}, with_valid=True))
class Template(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
summary=_('Get QA template'),
operation_id=_('Get QA template'), # type: ignore
parameters=TemplateExportAPI.get_parameters(),
responses=TemplateExportAPI.get_response(),
tags=[_('Knowledge Base/Documentation')] # type: ignore
)
def get(self, request: Request):
return DocumentSerializers.Export(data={'type': request.query_params.get('type')}).export(with_valid=True)
class TableTemplate(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
summary=_('Get form template'),
operation_id=_('Get form template'), # type: ignore
parameters=TemplateExportAPI.get_parameters(),
responses=TemplateExportAPI.get_response(),
tags=[_('Knowledge Base/Documentation')]) # type: ignore
def get(self, request: Request):
return DocumentSerializers.Export(data={'type': request.query_params.get('type')}).table_export(with_valid=True)

View File

@ -9,7 +9,7 @@ from common.constants.permission_constants import PermissionConstants
from common.result import result from common.result import result
from knowledge.api.knowledge import KnowledgeBaseCreateAPI, KnowledgeWebCreateAPI, KnowledgeTreeReadAPI, \ from knowledge.api.knowledge import KnowledgeBaseCreateAPI, KnowledgeWebCreateAPI, KnowledgeTreeReadAPI, \
KnowledgeEditAPI, KnowledgeReadAPI, KnowledgePageAPI, SyncWebAPI, GenerateRelatedAPI, HitTestAPI, EmbeddingAPI, \ KnowledgeEditAPI, KnowledgeReadAPI, KnowledgePageAPI, SyncWebAPI, GenerateRelatedAPI, HitTestAPI, EmbeddingAPI, \
GetModelAPI GetModelAPI, KnowledgeExportAPI
from knowledge.serializers.knowledge import KnowledgeSerializer from knowledge.serializers.knowledge import KnowledgeSerializer
from models_provider.serializers.model_serializer import ModelSerializer from models_provider.serializers.model_serializer import ModelSerializer
@ -182,6 +182,34 @@ class KnowledgeView(APIView):
data={'knowledge_id': knowledge_id, 'workspace_id': workspace_id, 'user_id': request.user.id} data={'knowledge_id': knowledge_id, 'workspace_id': workspace_id, 'user_id': request.user.id}
).embedding()) ).embedding())
class Export(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
summary=_('Export knowledge base'),
operation_id=_('Export knowledge base'), # type: ignore
parameters=KnowledgeExportAPI.get_parameters(),
responses=KnowledgeExportAPI.get_response(),
tags=[_('Knowledge Base')] # type: ignore
)
@has_permissions(PermissionConstants.KNOWLEDGE_EXPORT.get_workspace_permission())
def get(self, request: Request, knowledge_id: str):
return KnowledgeSerializer.Operate(data={'id': knowledge_id, 'user_id': request.user.id}).export_excel()
class ExportZip(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
summary=_('Export knowledge base containing images'),
operation_id=_('Export knowledge base containing images'), # type: ignore
parameters=KnowledgeExportAPI.get_parameters(),
responses=KnowledgeExportAPI.get_response(),
tags=[_('Knowledge Base')] # type: ignore
)
@has_permissions(PermissionConstants.KNOWLEDGE_EXPORT.get_workspace_permission())
def get(self, request: Request, knowledge_id: str):
return KnowledgeSerializer.Operate(data={'id': knowledge_id, 'user_id': request.user.id}).export_zip()
class GenerateRelated(APIView): class GenerateRelated(APIView):
authentication_classes = [TokenAuth] authentication_classes = [TokenAuth]