feat: add export and export_zip endpoints for knowledge base and document with image handling
This commit is contained in:
parent
cb4b1c927c
commit
efd273b3bc
@ -319,3 +319,9 @@ def flat_map(array: List[List]):
|
|||||||
for e in array:
|
for e in array:
|
||||||
result += e
|
result += e
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
def parse_image(content: str):
|
||||||
|
matches = re.finditer("!\[.*?\]\(\/api\/(image|file)\/.*?\)", content)
|
||||||
|
image_list = [match.group() for match in matches]
|
||||||
|
return image_list
|
||||||
|
|
||||||
|
|||||||
@ -383,6 +383,7 @@ class BatchRefreshAPI(APIMixin):
|
|||||||
def get_request():
|
def get_request():
|
||||||
return DocumentBatchRefreshSerializer
|
return DocumentBatchRefreshSerializer
|
||||||
|
|
||||||
|
|
||||||
class BatchGenerateRelatedAPI(APIMixin):
|
class BatchGenerateRelatedAPI(APIMixin):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_parameters():
|
def get_parameters():
|
||||||
@ -406,3 +407,67 @@ class BatchGenerateRelatedAPI(APIMixin):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def get_request():
|
def get_request():
|
||||||
return DocumentBatchGenerateRelatedSerializer
|
return DocumentBatchGenerateRelatedSerializer
|
||||||
|
|
||||||
|
|
||||||
|
class TemplateExportAPI(APIMixin):
|
||||||
|
@staticmethod
|
||||||
|
def get_parameters():
|
||||||
|
return [
|
||||||
|
OpenApiParameter(
|
||||||
|
name="workspace_id",
|
||||||
|
description="工作空间id",
|
||||||
|
type=OpenApiTypes.STR,
|
||||||
|
location='path',
|
||||||
|
required=True,
|
||||||
|
),
|
||||||
|
OpenApiParameter(
|
||||||
|
name="knowledge_id",
|
||||||
|
description="知识库id",
|
||||||
|
type=OpenApiTypes.STR,
|
||||||
|
location='path',
|
||||||
|
required=True,
|
||||||
|
),
|
||||||
|
OpenApiParameter(
|
||||||
|
name="type",
|
||||||
|
description="Export template type csv|excel",
|
||||||
|
type=OpenApiTypes.STR,
|
||||||
|
location='query',
|
||||||
|
required=True,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_response():
|
||||||
|
return DefaultResultSerializer
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentExportAPI(APIMixin):
|
||||||
|
@staticmethod
|
||||||
|
def get_parameters():
|
||||||
|
return [
|
||||||
|
OpenApiParameter(
|
||||||
|
name="workspace_id",
|
||||||
|
description="工作空间id",
|
||||||
|
type=OpenApiTypes.STR,
|
||||||
|
location='path',
|
||||||
|
required=True,
|
||||||
|
),
|
||||||
|
OpenApiParameter(
|
||||||
|
name="knowledge_id",
|
||||||
|
description="知识库id",
|
||||||
|
type=OpenApiTypes.STR,
|
||||||
|
location='path',
|
||||||
|
required=True,
|
||||||
|
),
|
||||||
|
OpenApiParameter(
|
||||||
|
name="document_id",
|
||||||
|
description="文档id",
|
||||||
|
type=OpenApiTypes.STR,
|
||||||
|
location='path',
|
||||||
|
required=True,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_response():
|
||||||
|
return DefaultResultSerializer
|
||||||
|
|||||||
@ -259,3 +259,27 @@ class GetModelAPI(SyncWebAPI):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def get_response():
|
def get_response():
|
||||||
return DefaultResultSerializer
|
return DefaultResultSerializer
|
||||||
|
|
||||||
|
class KnowledgeExportAPI(APIMixin):
|
||||||
|
@staticmethod
|
||||||
|
def get_parameters():
|
||||||
|
return [
|
||||||
|
OpenApiParameter(
|
||||||
|
name="workspace_id",
|
||||||
|
description="工作空间id",
|
||||||
|
type=OpenApiTypes.STR,
|
||||||
|
location='path',
|
||||||
|
required=True,
|
||||||
|
),
|
||||||
|
OpenApiParameter(
|
||||||
|
name="knowledge_id",
|
||||||
|
description="知识库id",
|
||||||
|
type=OpenApiTypes.STR,
|
||||||
|
location='path',
|
||||||
|
required=True,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_response():
|
||||||
|
return DefaultResultSerializer
|
||||||
@ -1,18 +1,24 @@
|
|||||||
|
import io
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import traceback
|
import traceback
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
|
from tempfile import TemporaryDirectory
|
||||||
from typing import Dict, List
|
from typing import Dict, List
|
||||||
|
|
||||||
|
import openpyxl
|
||||||
import uuid_utils.compat as uuid
|
import uuid_utils.compat as uuid
|
||||||
from celery_once import AlreadyQueued
|
from celery_once import AlreadyQueued
|
||||||
from django.core import validators
|
from django.core import validators
|
||||||
from django.db import transaction, models
|
from django.db import transaction, models
|
||||||
from django.db.models import QuerySet, Model
|
from django.db.models import QuerySet, Model
|
||||||
from django.db.models.functions import Substr, Reverse
|
from django.db.models.functions import Substr, Reverse
|
||||||
from django.utils.translation import gettext_lazy as _
|
from django.http import HttpResponse
|
||||||
|
from django.utils.translation import gettext_lazy as _, gettext, get_language, to_locale
|
||||||
|
from openpyxl.cell.cell import ILLEGAL_CHARACTERS_RE
|
||||||
from rest_framework import serializers
|
from rest_framework import serializers
|
||||||
|
from xlwt import Utils
|
||||||
|
|
||||||
from common.db.search import native_search, get_dynamics_model, native_page_search
|
from common.db.search import native_search, get_dynamics_model, native_page_search
|
||||||
from common.event import ListenerManagement
|
from common.event import ListenerManagement
|
||||||
@ -33,13 +39,13 @@ from common.handle.impl.text.text_split_handle import TextSplitHandle
|
|||||||
from common.handle.impl.text.xls_split_handle import XlsSplitHandle
|
from common.handle.impl.text.xls_split_handle import XlsSplitHandle
|
||||||
from common.handle.impl.text.xlsx_split_handle import XlsxSplitHandle
|
from common.handle.impl.text.xlsx_split_handle import XlsxSplitHandle
|
||||||
from common.handle.impl.text.zip_split_handle import ZipSplitHandle
|
from common.handle.impl.text.zip_split_handle import ZipSplitHandle
|
||||||
from common.utils.common import post, get_file_content, bulk_create_in_batches
|
from common.utils.common import post, get_file_content, bulk_create_in_batches, parse_image
|
||||||
from common.utils.fork import Fork
|
from common.utils.fork import Fork
|
||||||
from common.utils.split_model import get_split_model, flat_map
|
from common.utils.split_model import get_split_model, flat_map
|
||||||
from knowledge.models import Knowledge, Paragraph, Problem, Document, KnowledgeType, ProblemParagraphMapping, State, \
|
from knowledge.models import Knowledge, Paragraph, Problem, Document, KnowledgeType, ProblemParagraphMapping, State, \
|
||||||
TaskType, File
|
TaskType, File
|
||||||
from knowledge.serializers.common import ProblemParagraphManage, BatchSerializer, \
|
from knowledge.serializers.common import ProblemParagraphManage, BatchSerializer, \
|
||||||
get_embedding_model_id_by_knowledge_id, MetaSerializer
|
get_embedding_model_id_by_knowledge_id, MetaSerializer, write_image, zip_dir
|
||||||
from knowledge.serializers.paragraph import ParagraphSerializers, ParagraphInstanceSerializer, \
|
from knowledge.serializers.paragraph import ParagraphSerializers, ParagraphInstanceSerializer, \
|
||||||
delete_problems_and_mappings
|
delete_problems_and_mappings
|
||||||
from knowledge.task.embedding import embedding_by_document, delete_embedding_by_document_list, \
|
from knowledge.task.embedding import embedding_by_document, delete_embedding_by_document_list, \
|
||||||
@ -180,11 +186,66 @@ class BatchEditHitHandlingSerializer(serializers.Serializer):
|
|||||||
|
|
||||||
|
|
||||||
class DocumentSerializers(serializers.Serializer):
|
class DocumentSerializers(serializers.Serializer):
|
||||||
|
class Export(serializers.Serializer):
|
||||||
|
type = serializers.CharField(required=True, validators=[
|
||||||
|
validators.RegexValidator(regex=re.compile("^csv|excel$"),
|
||||||
|
message=_('The template type only supports excel|csv'),
|
||||||
|
code=500)
|
||||||
|
], label=_('type'))
|
||||||
|
|
||||||
|
def export(self, with_valid=True):
|
||||||
|
if with_valid:
|
||||||
|
self.is_valid(raise_exception=True)
|
||||||
|
language = get_language()
|
||||||
|
if self.data.get('type') == 'csv':
|
||||||
|
file = open(
|
||||||
|
os.path.join(PROJECT_DIR, "apps", "knowledge", 'template', f'csv_template_{to_locale(language)}.csv'),
|
||||||
|
"rb")
|
||||||
|
content = file.read()
|
||||||
|
file.close()
|
||||||
|
return HttpResponse(content, status=200, headers={'Content-Type': 'text/csv',
|
||||||
|
'Content-Disposition': 'attachment; filename="csv_template.csv"'})
|
||||||
|
elif self.data.get('type') == 'excel':
|
||||||
|
file = open(os.path.join(PROJECT_DIR, "apps", "knowledge", 'template',
|
||||||
|
f'excel_template_{to_locale(language)}.xlsx'), "rb")
|
||||||
|
content = file.read()
|
||||||
|
file.close()
|
||||||
|
return HttpResponse(content, status=200, headers={'Content-Type': 'application/vnd.ms-excel',
|
||||||
|
'Content-Disposition': 'attachment; filename="excel_template.xlsx"'})
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def table_export(self, with_valid=True):
|
||||||
|
if with_valid:
|
||||||
|
self.is_valid(raise_exception=True)
|
||||||
|
language = get_language()
|
||||||
|
if self.data.get('type') == 'csv':
|
||||||
|
file = open(
|
||||||
|
os.path.join(PROJECT_DIR, "apps", "knowledge", 'template',
|
||||||
|
f'table_template_{to_locale(language)}.csv'),
|
||||||
|
"rb")
|
||||||
|
content = file.read()
|
||||||
|
file.close()
|
||||||
|
return HttpResponse(content, status=200, headers={'Content-Type': 'text/cxv',
|
||||||
|
'Content-Disposition': 'attachment; filename="csv_template.csv"'})
|
||||||
|
elif self.data.get('type') == 'excel':
|
||||||
|
file = open(os.path.join(PROJECT_DIR, "apps", "knowledge", 'template',
|
||||||
|
f'table_template_{to_locale(language)}.xlsx'),
|
||||||
|
"rb")
|
||||||
|
content = file.read()
|
||||||
|
file.close()
|
||||||
|
return HttpResponse(content, status=200, headers={'Content-Type': 'application/vnd.ms-excel',
|
||||||
|
'Content-Disposition': 'attachment; filename="excel_template.xlsx"'})
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
class Query(serializers.Serializer):
|
class Query(serializers.Serializer):
|
||||||
# 知识库id
|
# 知识库id
|
||||||
workspace_id = serializers.CharField(required=True, label=_('workspace id'))
|
workspace_id = serializers.CharField(required=True, label=_('workspace id'))
|
||||||
knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))
|
knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))
|
||||||
name = serializers.CharField(required=False, max_length=128, min_length=1, allow_null=True, allow_blank=True, label=_('document name'))
|
name = serializers.CharField(required=False, max_length=128, min_length=1, allow_null=True, allow_blank=True,
|
||||||
|
label=_('document name'))
|
||||||
hit_handling_method = serializers.CharField(required=False, label=_('hit handling method'))
|
hit_handling_method = serializers.CharField(required=False, label=_('hit handling method'))
|
||||||
is_active = serializers.BooleanField(required=False, label=_('document is active'))
|
is_active = serializers.BooleanField(required=False, label=_('document is active'))
|
||||||
task_type = serializers.IntegerField(required=False, label=_('task type'))
|
task_type = serializers.IntegerField(required=False, label=_('task type'))
|
||||||
@ -339,6 +400,53 @@ class DocumentSerializers(serializers.Serializer):
|
|||||||
if not QuerySet(Document).filter(id=document_id).exists():
|
if not QuerySet(Document).filter(id=document_id).exists():
|
||||||
raise AppApiException(500, _('document id not exist'))
|
raise AppApiException(500, _('document id not exist'))
|
||||||
|
|
||||||
|
def export(self, with_valid=True):
|
||||||
|
if with_valid:
|
||||||
|
self.is_valid(raise_exception=True)
|
||||||
|
document = QuerySet(Document).filter(id=self.data.get("document_id")).first()
|
||||||
|
paragraph_list = native_search(QuerySet(Paragraph).filter(document_id=self.data.get("document_id")),
|
||||||
|
get_file_content(
|
||||||
|
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql',
|
||||||
|
'list_paragraph_document_name.sql')))
|
||||||
|
problem_mapping_list = native_search(
|
||||||
|
QuerySet(ProblemParagraphMapping).filter(document_id=self.data.get("document_id")), get_file_content(
|
||||||
|
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_problem_mapping.sql')),
|
||||||
|
with_table_name=True)
|
||||||
|
data_dict, document_dict = self.merge_problem(paragraph_list, problem_mapping_list, [document])
|
||||||
|
workbook = self.get_workbook(data_dict, document_dict)
|
||||||
|
response = HttpResponse(content_type='application/vnd.ms-excel')
|
||||||
|
response['Content-Disposition'] = f'attachment; filename="data.xlsx"'
|
||||||
|
workbook.save(response)
|
||||||
|
return response
|
||||||
|
|
||||||
|
def export_zip(self, with_valid=True):
|
||||||
|
if with_valid:
|
||||||
|
self.is_valid(raise_exception=True)
|
||||||
|
document = QuerySet(Document).filter(id=self.data.get("document_id")).first()
|
||||||
|
paragraph_list = native_search(QuerySet(Paragraph).filter(document_id=self.data.get("document_id")),
|
||||||
|
get_file_content(
|
||||||
|
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql',
|
||||||
|
'list_paragraph_document_name.sql')))
|
||||||
|
problem_mapping_list = native_search(
|
||||||
|
QuerySet(ProblemParagraphMapping).filter(document_id=self.data.get("document_id")), get_file_content(
|
||||||
|
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_problem_mapping.sql')),
|
||||||
|
with_table_name=True)
|
||||||
|
data_dict, document_dict = self.merge_problem(paragraph_list, problem_mapping_list, [document])
|
||||||
|
res = [parse_image(paragraph.get('content')) for paragraph in paragraph_list]
|
||||||
|
|
||||||
|
workbook = DocumentSerializers.Operate.get_workbook(data_dict, document_dict)
|
||||||
|
response = HttpResponse(content_type='application/zip')
|
||||||
|
response['Content-Disposition'] = 'attachment; filename="archive.zip"'
|
||||||
|
zip_buffer = io.BytesIO()
|
||||||
|
with TemporaryDirectory() as tempdir:
|
||||||
|
knowledge_file = os.path.join(tempdir, 'knowledge.xlsx')
|
||||||
|
workbook.save(knowledge_file)
|
||||||
|
for r in res:
|
||||||
|
write_image(tempdir, r)
|
||||||
|
zip_dir(tempdir, zip_buffer)
|
||||||
|
response.write(zip_buffer.getvalue())
|
||||||
|
return response
|
||||||
|
|
||||||
def one(self, with_valid=False):
|
def one(self, with_valid=False):
|
||||||
if with_valid:
|
if with_valid:
|
||||||
self.is_valid(raise_exception=True)
|
self.is_valid(raise_exception=True)
|
||||||
@ -441,6 +549,78 @@ class DocumentSerializers(serializers.Serializer):
|
|||||||
except AlreadyQueued as e:
|
except AlreadyQueued as e:
|
||||||
raise AppApiException(500, _('The task is being executed, please do not send it repeatedly.'))
|
raise AppApiException(500, _('The task is being executed, please do not send it repeatedly.'))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_workbook(data_dict, document_dict):
|
||||||
|
# 创建工作簿对象
|
||||||
|
workbook = openpyxl.Workbook()
|
||||||
|
workbook.remove(workbook.active)
|
||||||
|
if len(data_dict.keys()) == 0:
|
||||||
|
data_dict['sheet'] = []
|
||||||
|
for sheet_id in data_dict:
|
||||||
|
# 添加工作表
|
||||||
|
worksheet = workbook.create_sheet(document_dict.get(sheet_id))
|
||||||
|
data = [
|
||||||
|
[gettext('Section title (optional)'),
|
||||||
|
gettext('Section content (required, question answer, no more than 4096 characters)'),
|
||||||
|
gettext('Question (optional, one per line in the cell)')],
|
||||||
|
*data_dict.get(sheet_id, [])
|
||||||
|
]
|
||||||
|
# 写入数据到工作表
|
||||||
|
for row_idx, row in enumerate(data):
|
||||||
|
for col_idx, col in enumerate(row):
|
||||||
|
cell = worksheet.cell(row=row_idx + 1, column=col_idx + 1)
|
||||||
|
if isinstance(col, str):
|
||||||
|
col = re.sub(ILLEGAL_CHARACTERS_RE, '', col)
|
||||||
|
if col.startswith(('=', '+', '-', '@')):
|
||||||
|
col = '\ufeff' + col
|
||||||
|
cell.value = col
|
||||||
|
# 创建HttpResponse对象返回Excel文件
|
||||||
|
return workbook
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def merge_problem(paragraph_list: List[Dict], problem_mapping_list: List[Dict], document_list):
|
||||||
|
result = {}
|
||||||
|
document_dict = {}
|
||||||
|
|
||||||
|
for paragraph in paragraph_list:
|
||||||
|
problem_list = [problem_mapping.get('content') for problem_mapping in problem_mapping_list if
|
||||||
|
problem_mapping.get('paragraph_id') == paragraph.get('id')]
|
||||||
|
document_sheet = result.get(paragraph.get('document_id'))
|
||||||
|
document_name = DocumentSerializers.Operate.reset_document_name(paragraph.get('document_name'))
|
||||||
|
d = document_dict.get(document_name)
|
||||||
|
if d is None:
|
||||||
|
document_dict[document_name] = {paragraph.get('document_id')}
|
||||||
|
else:
|
||||||
|
d.add(paragraph.get('document_id'))
|
||||||
|
|
||||||
|
if document_sheet is None:
|
||||||
|
result[paragraph.get('document_id')] = [[paragraph.get('title'), paragraph.get('content'),
|
||||||
|
'\n'.join(problem_list)]]
|
||||||
|
else:
|
||||||
|
document_sheet.append([paragraph.get('title'), paragraph.get('content'), '\n'.join(problem_list)])
|
||||||
|
for document in document_list:
|
||||||
|
if document.id not in result:
|
||||||
|
document_name = DocumentSerializers.Operate.reset_document_name(document.name)
|
||||||
|
result[document.id] = [[]]
|
||||||
|
d = document_dict.get(document_name)
|
||||||
|
if d is None:
|
||||||
|
document_dict[document_name] = {document.id}
|
||||||
|
else:
|
||||||
|
d.add(document.id)
|
||||||
|
result_document_dict = {}
|
||||||
|
for d_name in document_dict:
|
||||||
|
for index, d_id in enumerate(document_dict.get(d_name)):
|
||||||
|
result_document_dict[d_id] = d_name if index == 0 else d_name + str(index)
|
||||||
|
return result, result_document_dict
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def reset_document_name(document_name):
|
||||||
|
if document_name is not None:
|
||||||
|
document_name = document_name.strip()[0:29]
|
||||||
|
if document_name is None or not Utils.valid_sheet_name(document_name):
|
||||||
|
return "Sheet"
|
||||||
|
return document_name.strip()
|
||||||
|
|
||||||
class Create(serializers.Serializer):
|
class Create(serializers.Serializer):
|
||||||
workspace_id = serializers.UUIDField(required=True, label=_('workspace id'))
|
workspace_id = serializers.UUIDField(required=True, label=_('workspace id'))
|
||||||
knowledge_id = serializers.UUIDField(required=True, label=_('document id'))
|
knowledge_id = serializers.UUIDField(required=True, label=_('document id'))
|
||||||
|
|||||||
@ -1,9 +1,11 @@
|
|||||||
|
import io
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import traceback
|
import traceback
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
from typing import Dict
|
from tempfile import TemporaryDirectory
|
||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
import uuid_utils.compat as uuid
|
import uuid_utils.compat as uuid
|
||||||
from celery_once import AlreadyQueued
|
from celery_once import AlreadyQueued
|
||||||
@ -11,6 +13,7 @@ from django.core import validators
|
|||||||
from django.db import transaction, models
|
from django.db import transaction, models
|
||||||
from django.db.models import QuerySet
|
from django.db.models import QuerySet
|
||||||
from django.db.models.functions import Reverse, Substr
|
from django.db.models.functions import Reverse, Substr
|
||||||
|
from django.http import HttpResponse
|
||||||
from django.utils.translation import gettext_lazy as _
|
from django.utils.translation import gettext_lazy as _
|
||||||
from rest_framework import serializers
|
from rest_framework import serializers
|
||||||
|
|
||||||
@ -20,13 +23,13 @@ from common.db.search import native_search, get_dynamics_model, native_page_sear
|
|||||||
from common.db.sql_execute import select_list
|
from common.db.sql_execute import select_list
|
||||||
from common.event import ListenerManagement
|
from common.event import ListenerManagement
|
||||||
from common.exception.app_exception import AppApiException
|
from common.exception.app_exception import AppApiException
|
||||||
from common.utils.common import valid_license, post, get_file_content
|
from common.utils.common import valid_license, post, get_file_content, parse_image
|
||||||
from common.utils.fork import Fork, ChildLink
|
from common.utils.fork import Fork, ChildLink
|
||||||
from common.utils.split_model import get_split_model
|
from common.utils.split_model import get_split_model
|
||||||
from knowledge.models import Knowledge, KnowledgeScope, KnowledgeType, Document, Paragraph, Problem, \
|
from knowledge.models import Knowledge, KnowledgeScope, KnowledgeType, Document, Paragraph, Problem, \
|
||||||
ProblemParagraphMapping, TaskType, State, SearchMode, KnowledgeFolder
|
ProblemParagraphMapping, TaskType, State, SearchMode, KnowledgeFolder
|
||||||
from knowledge.serializers.common import ProblemParagraphManage, get_embedding_model_id_by_knowledge_id, MetaSerializer, \
|
from knowledge.serializers.common import ProblemParagraphManage, get_embedding_model_id_by_knowledge_id, MetaSerializer, \
|
||||||
GenerateRelatedSerializer, get_embedding_model_by_knowledge_id, list_paragraph
|
GenerateRelatedSerializer, get_embedding_model_by_knowledge_id, list_paragraph, write_image, zip_dir
|
||||||
from knowledge.serializers.document import DocumentSerializers
|
from knowledge.serializers.document import DocumentSerializers
|
||||||
from knowledge.task.embedding import embedding_by_knowledge, delete_embedding_by_knowledge
|
from knowledge.task.embedding import embedding_by_knowledge, delete_embedding_by_knowledge
|
||||||
from knowledge.task.generate import generate_related_by_knowledge_id
|
from knowledge.task.generate import generate_related_by_knowledge_id
|
||||||
@ -331,6 +334,77 @@ class KnowledgeSerializer(serializers.Serializer):
|
|||||||
delete_embedding_by_knowledge(self.data.get('knowledge_id'))
|
delete_embedding_by_knowledge(self.data.get('knowledge_id'))
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def export_excel(self, with_valid=True):
|
||||||
|
if with_valid:
|
||||||
|
self.is_valid(raise_exception=True)
|
||||||
|
document_list = QuerySet(Document).filter(knowledge_id=self.data.get('id'))
|
||||||
|
paragraph_list = native_search(QuerySet(Paragraph).filter(knowledge_id=self.data.get("id")), get_file_content(
|
||||||
|
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_paragraph_document_name.sql')))
|
||||||
|
problem_mapping_list = native_search(
|
||||||
|
QuerySet(ProblemParagraphMapping).filter(knowledge_id=self.data.get("id")), get_file_content(
|
||||||
|
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_problem_mapping.sql')),
|
||||||
|
with_table_name=True)
|
||||||
|
data_dict, document_dict = DocumentSerializers.Operate.merge_problem(paragraph_list, problem_mapping_list,
|
||||||
|
document_list)
|
||||||
|
workbook = DocumentSerializers.Operate.get_workbook(data_dict, document_dict)
|
||||||
|
response = HttpResponse(content_type='application/vnd.ms-excel')
|
||||||
|
response['Content-Disposition'] = 'attachment; filename="knowledge.xlsx"'
|
||||||
|
workbook.save(response)
|
||||||
|
return response
|
||||||
|
|
||||||
|
def export_zip(self, with_valid=True):
|
||||||
|
if with_valid:
|
||||||
|
self.is_valid(raise_exception=True)
|
||||||
|
document_list = QuerySet(Document).filter(knowledge_id=self.data.get('id'))
|
||||||
|
paragraph_list = native_search(QuerySet(Paragraph).filter(knowledge_id=self.data.get("id")), get_file_content(
|
||||||
|
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_paragraph_document_name.sql')))
|
||||||
|
problem_mapping_list = native_search(
|
||||||
|
QuerySet(ProblemParagraphMapping).filter(knowledge_id=self.data.get("id")), get_file_content(
|
||||||
|
os.path.join(PROJECT_DIR, "apps", "knowledge", 'sql', 'list_problem_mapping.sql')),
|
||||||
|
with_table_name=True)
|
||||||
|
data_dict, document_dict = DocumentSerializers.Operate.merge_problem(paragraph_list, problem_mapping_list,
|
||||||
|
document_list)
|
||||||
|
res = [parse_image(paragraph.get('content')) for paragraph in paragraph_list]
|
||||||
|
|
||||||
|
workbook = DocumentSerializers.Operate.get_workbook(data_dict, document_dict)
|
||||||
|
response = HttpResponse(content_type='application/zip')
|
||||||
|
response['Content-Disposition'] = 'attachment; filename="archive.zip"'
|
||||||
|
zip_buffer = io.BytesIO()
|
||||||
|
with TemporaryDirectory() as tempdir:
|
||||||
|
knowledge_file = os.path.join(tempdir, 'knowledge.xlsx')
|
||||||
|
workbook.save(knowledge_file)
|
||||||
|
for r in res:
|
||||||
|
write_image(tempdir, r)
|
||||||
|
zip_dir(tempdir, zip_buffer)
|
||||||
|
response.write(zip_buffer.getvalue())
|
||||||
|
return response
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def merge_problem(paragraph_list: List[Dict], problem_mapping_list: List[Dict]):
|
||||||
|
result = {}
|
||||||
|
document_dict = {}
|
||||||
|
|
||||||
|
for paragraph in paragraph_list:
|
||||||
|
problem_list = [problem_mapping.get('content') for problem_mapping in problem_mapping_list if
|
||||||
|
problem_mapping.get('paragraph_id') == paragraph.get('id')]
|
||||||
|
document_sheet = result.get(paragraph.get('document_id'))
|
||||||
|
d = document_dict.get(paragraph.get('document_name'))
|
||||||
|
if d is None:
|
||||||
|
document_dict[paragraph.get('document_name')] = {paragraph.get('document_id')}
|
||||||
|
else:
|
||||||
|
d.add(paragraph.get('document_id'))
|
||||||
|
|
||||||
|
if document_sheet is None:
|
||||||
|
result[paragraph.get('document_id')] = [[paragraph.get('title'), paragraph.get('content'),
|
||||||
|
'\n'.join(problem_list)]]
|
||||||
|
else:
|
||||||
|
document_sheet.append([paragraph.get('title'), paragraph.get('content'), '\n'.join(problem_list)])
|
||||||
|
result_document_dict = {}
|
||||||
|
for d_name in document_dict:
|
||||||
|
for index, d_id in enumerate(document_dict.get(d_name)):
|
||||||
|
result_document_dict[d_id] = d_name if index == 0 else d_name + str(index)
|
||||||
|
return result, result_document_dict
|
||||||
|
|
||||||
class Create(serializers.Serializer):
|
class Create(serializers.Serializer):
|
||||||
user_id = serializers.UUIDField(required=True, label=_('user id'))
|
user_id = serializers.UUIDField(required=True, label=_('user id'))
|
||||||
workspace_id = serializers.CharField(required=True, label=_('workspace id'))
|
workspace_id = serializers.CharField(required=True, label=_('workspace id'))
|
||||||
|
|||||||
@ -14,6 +14,8 @@ urlpatterns = [
|
|||||||
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/generate_related', views.KnowledgeView.GenerateRelated.as_view()),
|
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/generate_related', views.KnowledgeView.GenerateRelated.as_view()),
|
||||||
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/embedding', views.KnowledgeView.Embedding.as_view()),
|
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/embedding', views.KnowledgeView.Embedding.as_view()),
|
||||||
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/hit_test', views.KnowledgeView.HitTest.as_view()),
|
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/hit_test', views.KnowledgeView.HitTest.as_view()),
|
||||||
|
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/export', views.KnowledgeView.Export.as_view()),
|
||||||
|
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/export_zip', views.KnowledgeView.ExportZip.as_view()),
|
||||||
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document', views.DocumentView.as_view()),
|
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document', views.DocumentView.as_view()),
|
||||||
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/split', views.DocumentView.Split.as_view()),
|
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/split', views.DocumentView.Split.as_view()),
|
||||||
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/split_pattern', views.DocumentView.SplitPattern.as_view()),
|
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/split_pattern', views.DocumentView.SplitPattern.as_view()),
|
||||||
@ -26,11 +28,15 @@ urlpatterns = [
|
|||||||
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/qa', views.QaDocumentView.as_view()),
|
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/qa', views.QaDocumentView.as_view()),
|
||||||
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/table', views.TableDocumentView.as_view()),
|
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/table', views.TableDocumentView.as_view()),
|
||||||
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/batch_hit_handling', views.DocumentView.BatchEditHitHandling.as_view()),
|
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/batch_hit_handling', views.DocumentView.BatchEditHitHandling.as_view()),
|
||||||
|
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/template/export', views.Template.as_view()),
|
||||||
|
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/table_template/export', views.TableTemplate.as_view()),
|
||||||
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>', views.DocumentView.Operate.as_view()),
|
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>', views.DocumentView.Operate.as_view()),
|
||||||
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/sync', views.DocumentView.SyncWeb.as_view()),
|
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/sync', views.DocumentView.SyncWeb.as_view()),
|
||||||
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/refresh', views.DocumentView.Refresh.as_view()),
|
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/refresh', views.DocumentView.Refresh.as_view()),
|
||||||
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/cancel_task', views.DocumentView.CancelTask.as_view()),
|
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/cancel_task', views.DocumentView.CancelTask.as_view()),
|
||||||
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/batch_cancel_task', views.DocumentView.BatchCancelTask.as_view()),
|
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/batch_cancel_task', views.DocumentView.BatchCancelTask.as_view()),
|
||||||
|
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/export', views.DocumentView.Export.as_view()),
|
||||||
|
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/export_zip', views.DocumentView.ExportZip.as_view()),
|
||||||
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph', views.ParagraphView.as_view()),
|
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph', views.ParagraphView.as_view()),
|
||||||
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/batch_delete', views.ParagraphView.BatchDelete.as_view()),
|
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/batch_delete', views.ParagraphView.BatchDelete.as_view()),
|
||||||
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/batch_generate_related', views.ParagraphView.BatchGenerateRelated.as_view()),
|
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/batch_generate_related', views.ParagraphView.BatchGenerateRelated.as_view()),
|
||||||
|
|||||||
@ -11,7 +11,8 @@ from common.result import result
|
|||||||
from knowledge.api.document import DocumentSplitAPI, DocumentBatchAPI, DocumentBatchCreateAPI, DocumentCreateAPI, \
|
from knowledge.api.document import DocumentSplitAPI, DocumentBatchAPI, DocumentBatchCreateAPI, DocumentCreateAPI, \
|
||||||
DocumentReadAPI, DocumentEditAPI, DocumentDeleteAPI, TableDocumentCreateAPI, QaDocumentCreateAPI, \
|
DocumentReadAPI, DocumentEditAPI, DocumentDeleteAPI, TableDocumentCreateAPI, QaDocumentCreateAPI, \
|
||||||
WebDocumentCreateAPI, CancelTaskAPI, BatchCancelTaskAPI, SyncWebAPI, RefreshAPI, BatchEditHitHandlingAPI, \
|
WebDocumentCreateAPI, CancelTaskAPI, BatchCancelTaskAPI, SyncWebAPI, RefreshAPI, BatchEditHitHandlingAPI, \
|
||||||
DocumentTreeReadAPI, DocumentSplitPatternAPI, BatchRefreshAPI, BatchGenerateRelatedAPI
|
DocumentTreeReadAPI, DocumentSplitPatternAPI, BatchRefreshAPI, BatchGenerateRelatedAPI, TemplateExportAPI, \
|
||||||
|
DocumentExportAPI
|
||||||
from knowledge.serializers.document import DocumentSerializers
|
from knowledge.serializers.document import DocumentSerializers
|
||||||
|
|
||||||
|
|
||||||
@ -384,6 +385,34 @@ class DocumentView(APIView):
|
|||||||
}
|
}
|
||||||
).page(current_page, page_size))
|
).page(current_page, page_size))
|
||||||
|
|
||||||
|
class Export(APIView):
|
||||||
|
authentication_classes = [TokenAuth]
|
||||||
|
|
||||||
|
@extend_schema(
|
||||||
|
summary=_('Export document'),
|
||||||
|
operation_id=_('Export document'), # type: ignore
|
||||||
|
parameters=DocumentExportAPI.get_parameters(),
|
||||||
|
responses=DocumentExportAPI.get_response(),
|
||||||
|
tags=[_('Knowledge Base/Documentation')] # type: ignore
|
||||||
|
)
|
||||||
|
@has_permissions(PermissionConstants.KNOWLEDGE_DOCUMENT_EXPORT.get_workspace_permission())
|
||||||
|
def get(self, request: Request, dataset_id: str, document_id: str):
|
||||||
|
return DocumentSerializers.Operate(data={'document_id': document_id, 'dataset_id': dataset_id}).export()
|
||||||
|
|
||||||
|
class ExportZip(APIView):
|
||||||
|
authentication_classes = [TokenAuth]
|
||||||
|
|
||||||
|
@extend_schema(
|
||||||
|
summary=_('Export Zip document'),
|
||||||
|
operation_id=_('Export Zip document'), # type: ignore
|
||||||
|
parameters=DocumentExportAPI.get_parameters(),
|
||||||
|
responses=DocumentExportAPI.get_response(),
|
||||||
|
tags=[_('Knowledge Base/Documentation')] # type: ignore
|
||||||
|
)
|
||||||
|
@has_permissions(PermissionConstants.KNOWLEDGE_DOCUMENT_EXPORT.get_workspace_permission())
|
||||||
|
def get(self, request: Request, dataset_id: str, document_id: str):
|
||||||
|
return DocumentSerializers.Operate(data={'document_id': document_id, 'dataset_id': dataset_id}).export_zip()
|
||||||
|
|
||||||
|
|
||||||
class WebDocumentView(APIView):
|
class WebDocumentView(APIView):
|
||||||
authentication_classes = [TokenAuth]
|
authentication_classes = [TokenAuth]
|
||||||
@ -443,3 +472,30 @@ class TableDocumentView(APIView):
|
|||||||
return result.success(DocumentSerializers.Create(
|
return result.success(DocumentSerializers.Create(
|
||||||
data={'knowledge_id': knowledge_id, 'workspace_id': workspace_id}
|
data={'knowledge_id': knowledge_id, 'workspace_id': workspace_id}
|
||||||
).save_table({'file_list': request.FILES.getlist('file')}, with_valid=True))
|
).save_table({'file_list': request.FILES.getlist('file')}, with_valid=True))
|
||||||
|
|
||||||
|
|
||||||
|
class Template(APIView):
|
||||||
|
authentication_classes = [TokenAuth]
|
||||||
|
|
||||||
|
@extend_schema(
|
||||||
|
summary=_('Get QA template'),
|
||||||
|
operation_id=_('Get QA template'), # type: ignore
|
||||||
|
parameters=TemplateExportAPI.get_parameters(),
|
||||||
|
responses=TemplateExportAPI.get_response(),
|
||||||
|
tags=[_('Knowledge Base/Documentation')] # type: ignore
|
||||||
|
)
|
||||||
|
def get(self, request: Request):
|
||||||
|
return DocumentSerializers.Export(data={'type': request.query_params.get('type')}).export(with_valid=True)
|
||||||
|
|
||||||
|
|
||||||
|
class TableTemplate(APIView):
|
||||||
|
authentication_classes = [TokenAuth]
|
||||||
|
|
||||||
|
@extend_schema(
|
||||||
|
summary=_('Get form template'),
|
||||||
|
operation_id=_('Get form template'), # type: ignore
|
||||||
|
parameters=TemplateExportAPI.get_parameters(),
|
||||||
|
responses=TemplateExportAPI.get_response(),
|
||||||
|
tags=[_('Knowledge Base/Documentation')]) # type: ignore
|
||||||
|
def get(self, request: Request):
|
||||||
|
return DocumentSerializers.Export(data={'type': request.query_params.get('type')}).table_export(with_valid=True)
|
||||||
|
|||||||
@ -9,7 +9,7 @@ from common.constants.permission_constants import PermissionConstants
|
|||||||
from common.result import result
|
from common.result import result
|
||||||
from knowledge.api.knowledge import KnowledgeBaseCreateAPI, KnowledgeWebCreateAPI, KnowledgeTreeReadAPI, \
|
from knowledge.api.knowledge import KnowledgeBaseCreateAPI, KnowledgeWebCreateAPI, KnowledgeTreeReadAPI, \
|
||||||
KnowledgeEditAPI, KnowledgeReadAPI, KnowledgePageAPI, SyncWebAPI, GenerateRelatedAPI, HitTestAPI, EmbeddingAPI, \
|
KnowledgeEditAPI, KnowledgeReadAPI, KnowledgePageAPI, SyncWebAPI, GenerateRelatedAPI, HitTestAPI, EmbeddingAPI, \
|
||||||
GetModelAPI
|
GetModelAPI, KnowledgeExportAPI
|
||||||
from knowledge.serializers.knowledge import KnowledgeSerializer
|
from knowledge.serializers.knowledge import KnowledgeSerializer
|
||||||
from models_provider.serializers.model_serializer import ModelSerializer
|
from models_provider.serializers.model_serializer import ModelSerializer
|
||||||
|
|
||||||
@ -182,6 +182,34 @@ class KnowledgeView(APIView):
|
|||||||
data={'knowledge_id': knowledge_id, 'workspace_id': workspace_id, 'user_id': request.user.id}
|
data={'knowledge_id': knowledge_id, 'workspace_id': workspace_id, 'user_id': request.user.id}
|
||||||
).embedding())
|
).embedding())
|
||||||
|
|
||||||
|
class Export(APIView):
|
||||||
|
authentication_classes = [TokenAuth]
|
||||||
|
|
||||||
|
@extend_schema(
|
||||||
|
summary=_('Export knowledge base'),
|
||||||
|
operation_id=_('Export knowledge base'), # type: ignore
|
||||||
|
parameters=KnowledgeExportAPI.get_parameters(),
|
||||||
|
responses=KnowledgeExportAPI.get_response(),
|
||||||
|
tags=[_('Knowledge Base')] # type: ignore
|
||||||
|
)
|
||||||
|
@has_permissions(PermissionConstants.KNOWLEDGE_EXPORT.get_workspace_permission())
|
||||||
|
def get(self, request: Request, knowledge_id: str):
|
||||||
|
return KnowledgeSerializer.Operate(data={'id': knowledge_id, 'user_id': request.user.id}).export_excel()
|
||||||
|
|
||||||
|
class ExportZip(APIView):
|
||||||
|
authentication_classes = [TokenAuth]
|
||||||
|
|
||||||
|
@extend_schema(
|
||||||
|
summary=_('Export knowledge base containing images'),
|
||||||
|
operation_id=_('Export knowledge base containing images'), # type: ignore
|
||||||
|
parameters=KnowledgeExportAPI.get_parameters(),
|
||||||
|
responses=KnowledgeExportAPI.get_response(),
|
||||||
|
tags=[_('Knowledge Base')] # type: ignore
|
||||||
|
)
|
||||||
|
@has_permissions(PermissionConstants.KNOWLEDGE_EXPORT.get_workspace_permission())
|
||||||
|
def get(self, request: Request, knowledge_id: str):
|
||||||
|
return KnowledgeSerializer.Operate(data={'id': knowledge_id, 'user_id': request.user.id}).export_zip()
|
||||||
|
|
||||||
class GenerateRelated(APIView):
|
class GenerateRelated(APIView):
|
||||||
authentication_classes = [TokenAuth]
|
authentication_classes = [TokenAuth]
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user