feat: 知识库增加重新向量化功能

This commit is contained in:
shaohuzhang1 2024-05-24 11:27:59 +08:00 committed by GitHub
parent 9ac9c9b64a
commit a3af104ef0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 169 additions and 53 deletions

View File

@ -123,6 +123,8 @@ class ListenerManagement:
:return: None :return: None
""" """
max_kb.info(f"开始--->向量化文档:{document_id}") max_kb.info(f"开始--->向量化文档:{document_id}")
QuerySet(Document).filter(id=document_id).update(**{'status': Status.embedding})
QuerySet(Paragraph).filter(document_id=document_id).update(**{'status': Status.embedding})
status = Status.success status = Status.success
try: try:
data_list = native_search( data_list = native_search(

View File

@ -680,6 +680,11 @@ class DataSetSerializers(serializers.ModelSerializer):
ListenerManagement.delete_embedding_by_dataset_signal.send(self.data.get('id')) ListenerManagement.delete_embedding_by_dataset_signal.send(self.data.get('id'))
return True return True
def re_embedding(self, with_valid=True):
if with_valid:
self.is_valid(raise_exception=True)
ListenerManagement.embedding_by_dataset_signal.send(self.data.get('id'))
def list_application(self, with_valid=True): def list_application(self, with_valid=True):
if with_valid: if with_valid:
self.is_valid(raise_exception=True) self.is_valid(raise_exception=True)

View File

@ -448,18 +448,7 @@ class DocumentSerializers(ApiMixin, serializers.Serializer):
if with_valid: if with_valid:
self.is_valid(raise_exception=True) self.is_valid(raise_exception=True)
document_id = self.data.get("document_id") document_id = self.data.get("document_id")
document = QuerySet(Document).filter(id=document_id).first() ListenerManagement.embedding_by_document_signal.send(document_id)
if document.type == Type.web:
# 异步同步
work_thread_pool.submit(lambda x: DocumentSerializers.Sync(data={'document_id': document_id}).sync(),
{})
else:
if document.status != Status.embedding.value:
document.status = Status.embedding
document.save()
ListenerManagement.embedding_by_document_signal.send(document_id)
return True
@transaction.atomic @transaction.atomic
def delete(self): def delete(self):

View File

@ -8,6 +8,7 @@ urlpatterns = [
path('dataset/web', views.Dataset.CreateWebDataset.as_view()), path('dataset/web', views.Dataset.CreateWebDataset.as_view()),
path('dataset/qa', views.Dataset.CreateQADataset.as_view()), path('dataset/qa', views.Dataset.CreateQADataset.as_view()),
path('dataset/<str:dataset_id>', views.Dataset.Operate.as_view(), name="dataset_key"), path('dataset/<str:dataset_id>', views.Dataset.Operate.as_view(), name="dataset_key"),
path('dataset/<str:dataset_id>/re_embedding', views.Dataset.Embedding.as_view(), name="dataset_key"),
path('dataset/<str:dataset_id>/application', views.Dataset.Application.as_view()), path('dataset/<str:dataset_id>/application', views.Dataset.Application.as_view()),
path('dataset/<int:current_page>/<int:page_size>', views.Dataset.Page.as_view(), name="dataset"), path('dataset/<int:current_page>/<int:page_size>', views.Dataset.Page.as_view(), name="dataset"),
path('dataset/<str:dataset_id>/sync_web', views.Dataset.SyncWeb.as_view()), path('dataset/<str:dataset_id>/sync_web', views.Dataset.SyncWeb.as_view()),
@ -26,6 +27,7 @@ urlpatterns = [
path('dataset/document/split_pattern', views.Document.SplitPattern.as_view(), path('dataset/document/split_pattern', views.Document.SplitPattern.as_view(),
name="document_operate"), name="document_operate"),
path('dataset/<str:dataset_id>/document/migrate/<str:target_dataset_id>', views.Document.Migrate.as_view()), path('dataset/<str:dataset_id>/document/migrate/<str:target_dataset_id>', views.Document.Migrate.as_view()),
path('dataset/<str:dataset_id>/document/<str:document_id>/sync', views.Document.SyncWeb.as_view()),
path('dataset/<str:dataset_id>/document/<str:document_id>/refresh', views.Document.Refresh.as_view()), path('dataset/<str:dataset_id>/document/<str:document_id>/refresh', views.Document.Refresh.as_view()),
path('dataset/<str:dataset_id>/document/<str:document_id>/paragraph', views.Paragraph.as_view()), path('dataset/<str:dataset_id>/document/<str:document_id>/paragraph', views.Paragraph.as_view()),
path( path(

View File

@ -137,6 +137,21 @@ class Dataset(APIView):
'search_mode': request.query_params.get('search_mode')}).hit_test( 'search_mode': request.query_params.get('search_mode')}).hit_test(
)) ))
class Embedding(APIView):
authentication_classes = [TokenAuth]
@action(methods="PUT", detail=False)
@swagger_auto_schema(operation_summary="重新向量化", operation_id="重新向量化",
manual_parameters=DataSetSerializers.Operate.get_request_params_api(),
responses=result.get_default_response(),
tags=["知识库"]
)
@has_permissions(lambda r, keywords: Permission(group=Group.DATASET, operate=Operate.MANAGE,
dynamic_tag=keywords.get('dataset_id')))
def put(self, request: Request, dataset_id: str):
return result.success(
DataSetSerializers.Operate(data={'id': dataset_id, 'user_id': request.user.id}).re_embedding())
class Operate(APIView): class Operate(APIView):
authentication_classes = [TokenAuth] authentication_classes = [TokenAuth]

View File

@ -168,6 +168,24 @@ class Document(APIView):
def delete(self, request: Request, dataset_id: str): def delete(self, request: Request, dataset_id: str):
return result.success(DocumentSerializers.Batch(data={'dataset_id': dataset_id}).batch_delete(request.data)) return result.success(DocumentSerializers.Batch(data={'dataset_id': dataset_id}).batch_delete(request.data))
class SyncWeb(APIView):
authentication_classes = [TokenAuth]
@action(methods=['PUT'], detail=False)
@swagger_auto_schema(operation_summary="同步web站点类型",
operation_id="同步web站点类型",
manual_parameters=DocumentSerializers.Operate.get_request_params_api(),
responses=result.get_default_response(),
tags=["知识库/文档"]
)
@has_permissions(
lambda r, k: Permission(group=Group.DATASET, operate=Operate.MANAGE,
dynamic_tag=k.get('dataset_id')))
def put(self, request: Request, dataset_id: str, document_id: str):
return result.success(
DocumentSerializers.Sync(data={'document_id': document_id, 'dataset_id': dataset_id}).sync(
))
class Refresh(APIView): class Refresh(APIView):
authentication_classes = [TokenAuth] authentication_classes = [TokenAuth]

View File

@ -176,6 +176,17 @@ const putSyncWebDataset: (
return put(`${prefix}/${dataset_id}/sync_web`, undefined, { sync_type }, loading) return put(`${prefix}/${dataset_id}/sync_web`, undefined, { sync_type }, loading)
} }
/**
*
* @param dataset_id
*/
const putReEmbeddingDataset: (
dataset_id: string,
loading?: Ref<boolean>
) => Promise<Result<any>> = (dataset_id, loading) => {
return put(`${prefix}/${dataset_id}/re_embedding`, undefined, undefined, loading)
}
export default { export default {
getDataset, getDataset,
getAllDataset, getAllDataset,
@ -186,6 +197,9 @@ export default {
listUsableApplication, listUsableApplication,
getDatasetHitTest, getDatasetHitTest,
postWebDataset, postWebDataset,
putSyncWebDataset,
putReEmbeddingDataset,
postQADataset, postQADataset,
putSyncWebDataset putSyncWebDataset
} }

View File

@ -137,12 +137,8 @@ const getDocumentDetail: (dataset_id: string, document_id: string) => Promise<Re
/** /**
* *
* @param * @param
* dataset_id, document_id, * dataset_id, document_id,
* {
"name": "string",
"is_active": true
}
*/ */
const putDocumentRefresh: ( const putDocumentRefresh: (
dataset_id: string, dataset_id: string,
@ -157,6 +153,19 @@ const putDocumentRefresh: (
) )
} }
/**
* web站点类型
* @param
* dataset_id, document_id,
*/
const putDocumentSync: (
dataset_id: string,
document_id: string,
loading?: Ref<boolean>
) => Promise<Result<any>> = (dataset_id, document_id, loading) => {
return put(`${prefix}/${dataset_id}/document/${document_id}/sync`, undefined, undefined, loading)
}
/** /**
* *
* @param dataset_id, * @param dataset_id,
@ -258,6 +267,7 @@ export default {
getDocumentDetail, getDocumentDetail,
listSplitPattern, listSplitPattern,
putDocumentRefresh, putDocumentRefresh,
putDocumentSync,
delMulSyncDocument, delMulSyncDocument,
postWebDocument, postWebDocument,
putMigrateMulDocument, putMigrateMulDocument,

View File

@ -873,5 +873,42 @@ export const iconMap: any = {
) )
]) ])
} }
},
'app-document-refresh': {
iconReader: () => {
return h('i', [
h(
'svg',
{
style: { height: '100%', width: '100%' },
viewBox: '0 0 1024 1024',
version: '1.1',
xmlns: 'http://www.w3.org/2000/svg'
},
[
h('path', {
d: 'M494.592 165.12l-320 208a32 32 0 0 0-14.592 26.88v224a32 32 0 0 0 14.592 26.88l320 208a32 32 0 0 0 34.88 0l320-208a32 32 0 0 0 14.528-26.88v-224a32 32 0 0 0-14.528-26.88l-320-208a32 32 0 0 0-34.88 0zM224 417.408L512 230.144l288 187.2V606.72L512 793.856 224 606.656V417.28z',
fill: 'currentColor'
}),
h('path', {
d: 'M512 592a32 32 0 0 0-32 32V832a32 32 0 0 0 64 0V624a32 32 0 0 0-32-32z',
fill: 'currentColor'
}),
h('path', {
d: 'M165.76 381.632a32 32 0 0 0 7.872 44.608l320 224a32 32 0 0 0 36.736 0l320-224a32 32 0 0 0-36.736-52.48L512 584.96l-301.632-211.2a32 32 0 0 0-44.608 7.872z',
fill: 'currentColor'
}),
h('path', {
d: 'M493.632 373.76a32 32 0 0 1 36.736 0l320 224a32 32 0 0 1-36.736 52.48L512 439.04l-301.632 211.2a32 32 0 1 1-36.736-52.48l320-224z',
fill: 'currentColor'
}),
h('path', {
d: 'M512 160a32 32 0 0 0-32 32v208a32 32 0 0 0 64 0V192a32 32 0 0 0-32-32z',
fill: 'currentColor'
})
]
)
])
}
} }
} }

View File

@ -1,5 +1,9 @@
<template> <template>
<MdEditor noIconfont v-bind="$attrs" /> <MdEditor noIconfont v-bind="$attrs">
<template #defFooters>
<slot name="defFooters"> </slot>
</template>
</MdEditor>
</template> </template>
<script setup lang="ts"> <script setup lang="ts">

View File

@ -75,6 +75,13 @@
v-if="item.type === '1'" v-if="item.type === '1'"
>同步</el-dropdown-item >同步</el-dropdown-item
> >
<el-dropdown-item @click="reEmbeddingDataset(item)">
<AppIcon
iconName="app-document-refresh"
style="font-size: 16px"
></AppIcon>
重新向量化</el-dropdown-item
>
<el-dropdown-item <el-dropdown-item
icon="Setting" icon="Setting"
@click.stop="router.push({ path: `/dataset/${item.id}/setting` })" @click.stop="router.push({ path: `/dataset/${item.id}/setting` })"
@ -118,10 +125,14 @@ const paginationConfig = reactive({
const searchValue = ref('') const searchValue = ref('')
function refresh(row: any) { function refresh() {
MsgSuccess('同步任务发送成功') MsgSuccess('同步任务发送成功')
} }
function reEmbeddingDataset(row: any) {
datasetApi.putReEmbeddingDataset(row.id).then(() => {})
}
function syncDataset(row: any) { function syncDataset(row: any) {
SyncWebDialogRef.value.open(row.id) SyncWebDialogRef.value.open(row.id)
} }

View File

@ -146,7 +146,7 @@
<span class="mr-4"> <span class="mr-4">
<el-tooltip effect="dark" content="重新向量化" placement="top"> <el-tooltip effect="dark" content="重新向量化" placement="top">
<el-button type="primary" text @click.stop="refreshDocument(row)"> <el-button type="primary" text @click.stop="refreshDocument(row)">
<el-icon><RefreshRight /></el-icon> <AppIcon iconName="app-document-refresh" style="font-size: 16px"></AppIcon>
</el-button> </el-button>
</el-tooltip> </el-tooltip>
</span> </span>
@ -174,13 +174,8 @@
</span> </span>
</div> </div>
<div v-if="datasetDetail.type === '1'"> <div v-if="datasetDetail.type === '1'">
<el-tooltip <el-tooltip effect="dark" content="同步" placement="top">
effect="dark" <el-button type="primary" text @click.stop="syncDocument(row)">
content="同步"
placement="top"
v-if="datasetDetail.type === '1'"
>
<el-button type="primary" text @click.stop="refreshDocument(row)">
<el-icon><Refresh /></el-icon> <el-icon><Refresh /></el-icon>
</el-button> </el-button>
</el-tooltip> </el-tooltip>
@ -191,6 +186,13 @@
</el-button> </el-button>
<template #dropdown> <template #dropdown>
<el-dropdown-menu> <el-dropdown-menu>
<el-dropdown-item @click="refreshDocument(row)">
<AppIcon
iconName="app-document-refresh"
style="font-size: 16px"
></AppIcon>
重新向量化</el-dropdown-item
>
<el-dropdown-item icon="Setting" @click="settingDoc(row)" <el-dropdown-item icon="Setting" @click="settingDoc(row)"
>设置</el-dropdown-item >设置</el-dropdown-item
> >
@ -340,33 +342,33 @@ const closeInterval = () => {
clearInterval(interval) clearInterval(interval)
} }
} }
function refreshDocument(row: any) {
if (row.type === '1') { function syncDocument(row: any) {
if (row.meta?.source_url) { if (row.meta?.source_url) {
MsgConfirm(`确认同步文档?`, `同步将删除已有数据重新获取新数据,请谨慎操作。`, { MsgConfirm(`确认同步文档?`, `同步将删除已有数据重新获取新数据,请谨慎操作。`, {
confirmButtonText: '同步', confirmButtonText: '同步',
confirmButtonClass: 'danger' confirmButtonClass: 'danger'
})
.then(() => {
documentApi.putDocumentRefresh(row.dataset_id, row.id).then(() => {
getList()
})
})
.catch(() => {})
} else {
MsgConfirm(`提示`, `无法同步,请先去设置文档 URL地址`, {
confirmButtonText: '确认',
type: 'warning'
})
.then(() => {})
.catch(() => {})
}
} else {
documentApi.putDocumentRefresh(row.dataset_id, row.id).then(() => {
getList()
}) })
.then(() => {
documentApi.putDocumentSync(row.dataset_id, row.id).then(() => {
getList()
})
})
.catch(() => {})
} else {
MsgConfirm(`提示`, `无法同步,请先去设置文档 URL地址`, {
confirmButtonText: '确认',
type: 'warning'
})
.then(() => {})
.catch(() => {})
} }
} }
function refreshDocument(row: any) {
documentApi.putDocumentRefresh(row.dataset_id, row.id).then(() => {
getList()
})
}
function rowClickHandle(row: any, column: any) { function rowClickHandle(row: any, column: any) {
if (column && column.type === 'selection') { if (column && column.type === 'selection') {

View File

@ -21,7 +21,12 @@
:toolbars="toolbars" :toolbars="toolbars"
style="height: 300px" style="height: 300px"
@onUploadImg="onUploadImg" @onUploadImg="onUploadImg"
/> :footers="footers"
>
<template #defFooters>
<span style="margin-left: -6px;">/ 4096</span>
</template>
</MarkdownEditor>
<MdPreview <MdPreview
v-else v-else
ref="editorRef" ref="editorRef"
@ -76,6 +81,8 @@ const toolbars = [
'htmlPreview' 'htmlPreview'
] as any[] ] as any[]
const footers = ['markdownTotal', 0, '=', 1, 'scrollSwitch']
const editorRef = ref() const editorRef = ref()
const form = ref<any>({ const form = ref<any>({