feat: Knowledge base generation problem (#2760)

This commit is contained in:
shaohuzhang1 2025-04-01 12:46:30 +08:00 committed by GitHub
parent 06867d33cb
commit 4b9cecd4d1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 139 additions and 10 deletions

View File

@ -222,3 +222,26 @@ def get_embedding_model_id_by_dataset_id_list(dataset_id_list: List):
if len(dataset_list) == 0: if len(dataset_list) == 0:
raise Exception(_('Knowledge base setting error, please reset the knowledge base')) raise Exception(_('Knowledge base setting error, please reset the knowledge base'))
return str(dataset_list[0].embedding_mode_id) return str(dataset_list[0].embedding_mode_id)
class GenerateRelatedSerializer(ApiMixin, serializers.Serializer):
model_id = serializers.UUIDField(required=True, error_messages=ErrMessage.uuid(_('Model id')))
prompt = serializers.CharField(required=True, error_messages=ErrMessage.uuid(_('Prompt word')))
state_list = serializers.ListField(required=False, child=serializers.CharField(required=True),
error_messages=ErrMessage.list("state list"))
@staticmethod
def get_request_body_api():
return openapi.Schema(
type=openapi.TYPE_OBJECT,
properties={
'model_id': openapi.Schema(type=openapi.TYPE_STRING,
title=_('Model id'),
description=_('Model id')),
'prompt': openapi.Schema(type=openapi.TYPE_STRING, title=_('Prompt word'),
description=_("Prompt word")),
'state_list': openapi.Schema(type=openapi.TYPE_ARRAY,
items=openapi.Schema(type=openapi.TYPE_STRING),
title=_('state list'))
}
)

View File

@ -23,6 +23,7 @@ from django.contrib.postgres.fields import ArrayField
from django.core import validators from django.core import validators
from django.db import transaction, models from django.db import transaction, models
from django.db.models import QuerySet from django.db.models import QuerySet
from django.db.models.functions import Reverse, Substr
from django.http import HttpResponse from django.http import HttpResponse
from drf_yasg import openapi from drf_yasg import openapi
from rest_framework import serializers from rest_framework import serializers
@ -42,9 +43,10 @@ from common.util.split_model import get_split_model
from dataset.models.data_set import DataSet, Document, Paragraph, Problem, Type, ProblemParagraphMapping, TaskType, \ from dataset.models.data_set import DataSet, Document, Paragraph, Problem, Type, ProblemParagraphMapping, TaskType, \
State, File, Image State, File, Image
from dataset.serializers.common_serializers import list_paragraph, MetaSerializer, ProblemParagraphManage, \ from dataset.serializers.common_serializers import list_paragraph, MetaSerializer, ProblemParagraphManage, \
get_embedding_model_by_dataset_id, get_embedding_model_id_by_dataset_id, write_image, zip_dir get_embedding_model_by_dataset_id, get_embedding_model_id_by_dataset_id, write_image, zip_dir, \
GenerateRelatedSerializer
from dataset.serializers.document_serializers import DocumentSerializers, DocumentInstanceSerializer from dataset.serializers.document_serializers import DocumentSerializers, DocumentInstanceSerializer
from dataset.task import sync_web_dataset, sync_replace_web_dataset from dataset.task import sync_web_dataset, sync_replace_web_dataset, generate_related_by_dataset_id
from embedding.models import SearchMode from embedding.models import SearchMode
from embedding.task import embedding_by_dataset, delete_embedding_by_dataset from embedding.task import embedding_by_dataset, delete_embedding_by_dataset
from setting.models import AuthOperate, Model from setting.models import AuthOperate, Model
@ -814,6 +816,31 @@ class DataSetSerializers(serializers.ModelSerializer):
except AlreadyQueued as e: except AlreadyQueued as e:
raise AppApiException(500, _('Failed to send the vectorization task, please try again later!')) raise AppApiException(500, _('Failed to send the vectorization task, please try again later!'))
def generate_related(self, instance: Dict, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)
GenerateRelatedSerializer(data=instance).is_valid(raise_exception=True)
dataset_id = self.data.get('id')
model_id = instance.get("model_id")
prompt = instance.get("prompt")
state_list = instance.get('state_list')
ListenerManagement.update_status(QuerySet(Document).filter(dataset_id=dataset_id),
TaskType.GENERATE_PROBLEM,
State.PENDING)
ListenerManagement.update_status(QuerySet(Paragraph).annotate(
reversed_status=Reverse('status'),
task_type_status=Substr('reversed_status', TaskType.GENERATE_PROBLEM.value,
1),
).filter(task_type_status__in=state_list, dataset_id=dataset_id)
.values('id'),
TaskType.GENERATE_PROBLEM,
State.PENDING)
ListenerManagement.get_aggregation_document_status_by_dataset_id(dataset_id)()
try:
generate_related_by_dataset_id.delay(dataset_id, model_id, prompt, state_list)
except AlreadyQueued as e:
raise AppApiException(500, _('Failed to send the vectorization task, please try again later!'))
def list_application(self, with_valid=True): def list_application(self, with_valid=True):
if with_valid: if with_valid:
self.is_valid(raise_exception=True) self.is_valid(raise_exception=True)

View File

@ -64,6 +64,17 @@ def get_is_the_task_interrupted(document_id):
return is_the_task_interrupted return is_the_task_interrupted
@celery_app.task(base=QueueOnce, once={'keys': ['dataset_id']},
name='celery:generate_related_by_dataset')
def generate_related_by_dataset_id(dataset_id, model_id, prompt, state_list=None):
document_list = QuerySet(Document).filter(dataset_id=dataset_id)
for document in document_list:
try:
generate_related_by_document_id.delay(document.id, model_id, prompt, state_list)
except Exception as e:
pass
@celery_app.task(base=QueueOnce, once={'keys': ['document_id']}, @celery_app.task(base=QueueOnce, once={'keys': ['document_id']},
name='celery:generate_related_by_document') name='celery:generate_related_by_document')
def generate_related_by_document_id(document_id, model_id, prompt, state_list=None): def generate_related_by_document_id(document_id, model_id, prompt, state_list=None):

View File

@ -11,6 +11,8 @@ urlpatterns = [
path('dataset/<str:dataset_id>/export', views.Dataset.Export.as_view(), name="export"), path('dataset/<str:dataset_id>/export', views.Dataset.Export.as_view(), name="export"),
path('dataset/<str:dataset_id>/export_zip', views.Dataset.ExportZip.as_view(), name="export_zip"), path('dataset/<str:dataset_id>/export_zip', views.Dataset.ExportZip.as_view(), name="export_zip"),
path('dataset/<str:dataset_id>/re_embedding', views.Dataset.Embedding.as_view(), name="dataset_key"), path('dataset/<str:dataset_id>/re_embedding', views.Dataset.Embedding.as_view(), name="dataset_key"),
path('dataset/<str:dataset_id>/generate_related', views.Dataset.GenerateRelated.as_view(),
name="dataset_generate_related"),
path('dataset/<str:dataset_id>/application', views.Dataset.Application.as_view()), path('dataset/<str:dataset_id>/application', views.Dataset.Application.as_view()),
path('dataset/<int:current_page>/<int:page_size>', views.Dataset.Page.as_view(), name="dataset"), path('dataset/<int:current_page>/<int:page_size>', views.Dataset.Page.as_view(), name="dataset"),
path('dataset/<str:dataset_id>/sync_web', views.Dataset.SyncWeb.as_view()), path('dataset/<str:dataset_id>/sync_web', views.Dataset.SyncWeb.as_view()),

View File

@ -21,6 +21,7 @@ from common.log.log import log
from common.response import result from common.response import result
from common.response.result import get_page_request_params, get_page_api_response, get_api_response from common.response.result import get_page_request_params, get_page_api_response, get_api_response
from common.swagger_api.common_api import CommonApi from common.swagger_api.common_api import CommonApi
from dataset.serializers.common_serializers import GenerateRelatedSerializer
from dataset.serializers.dataset_serializers import DataSetSerializers from dataset.serializers.dataset_serializers import DataSetSerializers
from dataset.views.common import get_dataset_operation_object from dataset.views.common import get_dataset_operation_object
from setting.serializers.provider_serializers import ModelSerializer from setting.serializers.provider_serializers import ModelSerializer
@ -173,6 +174,23 @@ class Dataset(APIView):
return result.success( return result.success(
DataSetSerializers.Operate(data={'id': dataset_id, 'user_id': request.user.id}).re_embedding()) DataSetSerializers.Operate(data={'id': dataset_id, 'user_id': request.user.id}).re_embedding())
class GenerateRelated(APIView):
authentication_classes = [TokenAuth]
@action(methods=['PUT'], detail=False)
@swagger_auto_schema(operation_summary=_('Generate related'), operation_id=_('Generate related'),
manual_parameters=DataSetSerializers.Operate.get_request_params_api(),
request_body=GenerateRelatedSerializer.get_request_body_api(),
tags=[_('Knowledge Base')]
)
@log(menu='document', operate="Generate related documents",
get_operation_object=lambda r, keywords: get_dataset_operation_object(keywords.get('dataset_id'))
)
def put(self, request: Request, dataset_id: str):
return result.success(
DataSetSerializers.Operate(data={'id': dataset_id, 'user_id': request.user.id}).generate_related(
request.data))
class Export(APIView): class Export(APIView):
authentication_classes = [TokenAuth] authentication_classes = [TokenAuth]

View File

@ -7487,4 +7487,7 @@ msgid "Field: {name} Type: {_type} Value: {value} Unsupported types"
msgstr "" msgstr ""
msgid "Field: {name} No value set" msgid "Field: {name} No value set"
msgstr ""
msgid "Generate related"
msgstr "" msgstr ""

View File

@ -7650,4 +7650,7 @@ msgid "Field: {name} Type: {_type} Value: {value} Unsupported types"
msgstr "字段: {name} 类型: {_type} 值: {value} 不支持的类型" msgstr "字段: {name} 类型: {_type} 值: {value} 不支持的类型"
msgid "Field: {name} No value set" msgid "Field: {name} No value set"
msgstr "字段: {name} 未设置值" msgstr "字段: {name} 未设置值"
msgid "Generate related"
msgstr "生成问题"

View File

@ -7660,4 +7660,7 @@ msgid "Field: {name} Type: {_type} Value: {value} Unsupported types"
msgstr "欄位: {name} 類型: {_type} 值: {value} 不支持的類型" msgstr "欄位: {name} 類型: {_type} 值: {value} 不支持的類型"
msgid "Field: {name} No value set" msgid "Field: {name} No value set"
msgstr "欄位: {name} 未設定值" msgstr "欄位: {name} 未設定值"
msgid "Generate related"
msgstr "生成問題"

View File

@ -277,6 +277,20 @@ const importLarkDocument: (
) => Promise<Result<Array<any>>> = (dataset_id, data, loading) => { ) => Promise<Result<Array<any>>> = (dataset_id, data, loading) => {
return post(`${prefix}/lark/${dataset_id}/import`, data, null, loading) return post(`${prefix}/lark/${dataset_id}/import`, data, null, loading)
} }
/**
*
* @param dataset_id id
* @param data
* @param loading
* @returns
*/
const generateRelated: (
dataset_id: string,
data: any,
loading?: Ref<boolean>
) => Promise<Result<Array<any>>> = (dataset_id, data, loading) => {
return put(`${prefix}/${dataset_id}/generate_related`, data, null, loading)
}
export default { export default {
getDataset, getDataset,
@ -297,5 +311,6 @@ export default {
postLarkDataset, postLarkDataset,
getLarkDocumentList, getLarkDocumentList,
importLarkDocument, importLarkDocument,
putLarkDataset putLarkDataset,
generateRelated
} }

View File

@ -51,7 +51,7 @@
/> />
</el-form-item> </el-form-item>
<el-form-item <el-form-item
v-if="apiType === 'document'" v-if="['document', 'dataset'].includes(apiType)"
:label="$t('components.selectParagraph.title')" :label="$t('components.selectParagraph.title')"
prop="state" prop="state"
> >
@ -107,6 +107,7 @@ const stateMap = {
error: ['0', '1', '3', '4', '5', 'n'] error: ['0', '1', '3', '4', '5', 'n']
} }
const FormRef = ref() const FormRef = ref()
const datasetId = ref<string>()
const userId = user.userInfo?.id as string const userId = user.userInfo?.id as string
const form = ref(prompt.get(userId)) const form = ref(prompt.get(userId))
const rules = reactive({ const rules = reactive({
@ -133,7 +134,8 @@ watch(dialogVisible, (bool) => {
} }
}) })
const open = (ids: string[], type: string) => { const open = (ids: string[], type: string, _datasetId?: string) => {
datasetId.value = _datasetId
getModel() getModel()
idList.value = ids idList.value = ids
apiType.value = type apiType.value = type
@ -169,6 +171,15 @@ const submitHandle = async (formEl: FormInstance) => {
emit('refresh') emit('refresh')
dialogVisible.value = false dialogVisible.value = false
}) })
} else if (apiType.value === 'dataset') {
const data = {
...form.value,
state_list: stateMap[state.value]
}
datasetApi.generateRelated(id ? id : datasetId.value, data, loading).then(() => {
MsgSuccess(t('views.document.generateQuestion.successMessage'))
dialogVisible.value = false
})
} }
} }
}) })
@ -177,7 +188,7 @@ const submitHandle = async (formEl: FormInstance) => {
function getModel() { function getModel() {
loading.value = true loading.value = true
datasetApi datasetApi
.getDatasetModel(id) .getDatasetModel(id ? id : datasetId.value)
.then((res: any) => { .then((res: any) => {
modelOptions.value = groupBy(res?.data, 'provider') modelOptions.value = groupBy(res?.data, 'provider')
loading.value = false loading.value = false

View File

@ -127,6 +127,7 @@
v-if="item.type === '1'" v-if="item.type === '1'"
>{{ $t('views.dataset.setting.sync') }}</el-dropdown-item >{{ $t('views.dataset.setting.sync') }}</el-dropdown-item
> >
<el-dropdown-item @click="reEmbeddingDataset(item)"> <el-dropdown-item @click="reEmbeddingDataset(item)">
<AppIcon <AppIcon
iconName="app-document-refresh" iconName="app-document-refresh"
@ -134,6 +135,11 @@
></AppIcon> ></AppIcon>
{{ $t('views.dataset.setting.vectorization') }}</el-dropdown-item {{ $t('views.dataset.setting.vectorization') }}</el-dropdown-item
> >
<el-dropdown-item
icon="Connection"
@click.stop="openGenerateDialog(item)"
>{{ $t('views.document.generateQuestion.title') }}</el-dropdown-item
>
<el-dropdown-item <el-dropdown-item
icon="Setting" icon="Setting"
@click.stop="router.push({ path: `/dataset/${item.id}/setting` })" @click.stop="router.push({ path: `/dataset/${item.id}/setting` })"
@ -165,10 +171,11 @@
</div> </div>
<SyncWebDialog ref="SyncWebDialogRef" @refresh="refresh" /> <SyncWebDialog ref="SyncWebDialogRef" @refresh="refresh" />
<CreateDatasetDialog ref="CreateDatasetDialogRef" /> <CreateDatasetDialog ref="CreateDatasetDialogRef" />
<GenerateRelatedDialog ref="GenerateRelatedDialogRef" />
</div> </div>
</template> </template>
<script setup lang="ts"> <script setup lang="ts">
import { ref, onMounted, reactive, computed } from 'vue' import { ref, onMounted, reactive } from 'vue'
import SyncWebDialog from '@/views/dataset/component/SyncWebDialog.vue' import SyncWebDialog from '@/views/dataset/component/SyncWebDialog.vue'
import CreateDatasetDialog from './component/CreateDatasetDialog.vue' import CreateDatasetDialog from './component/CreateDatasetDialog.vue'
import datasetApi from '@/api/dataset' import datasetApi from '@/api/dataset'
@ -179,7 +186,7 @@ import { ValidType, ValidCount } from '@/enums/common'
import { t } from '@/locales' import { t } from '@/locales'
import useStore from '@/stores' import useStore from '@/stores'
import applicationApi from '@/api/application' import applicationApi from '@/api/application'
import GenerateRelatedDialog from '@/components/generate-related-dialog/index.vue'
const { user, common } = useStore() const { user, common } = useStore()
const router = useRouter() const router = useRouter()
@ -192,6 +199,12 @@ const paginationConfig = reactive({
page_size: 30, page_size: 30,
total: 0 total: 0
}) })
const GenerateRelatedDialogRef = ref<InstanceType<typeof GenerateRelatedDialog>>()
function openGenerateDialog(row: any) {
if (GenerateRelatedDialogRef.value) {
GenerateRelatedDialogRef.value.open([], 'dataset', row.id)
}
}
const searchValue = ref('') const searchValue = ref('')