feat: 文档添加排队中状态 (#886)
This commit is contained in:
parent
29b005e405
commit
3c7142ed7c
@ -8,10 +8,14 @@
|
|||||||
"""
|
"""
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
|
from django.core.cache.backends.locmem import LocMemCache
|
||||||
|
|
||||||
work_thread_pool = ThreadPoolExecutor(5)
|
work_thread_pool = ThreadPoolExecutor(5)
|
||||||
|
|
||||||
embedding_thread_pool = ThreadPoolExecutor(3)
|
embedding_thread_pool = ThreadPoolExecutor(3)
|
||||||
|
|
||||||
|
memory_cache = LocMemCache('task', {"OPTIONS": {"MAX_ENTRIES": 1000}})
|
||||||
|
|
||||||
|
|
||||||
def poxy(poxy_function):
|
def poxy(poxy_function):
|
||||||
def inner(args, **keywords):
|
def inner(args, **keywords):
|
||||||
@ -20,8 +24,25 @@ def poxy(poxy_function):
|
|||||||
return inner
|
return inner
|
||||||
|
|
||||||
|
|
||||||
|
def get_cache_key(poxy_function, args):
|
||||||
|
return poxy_function.__name__ + str(args)
|
||||||
|
|
||||||
|
|
||||||
|
def get_cache_poxy_function(poxy_function, cache_key):
|
||||||
|
def fun(args, **keywords):
|
||||||
|
poxy_function(args, **keywords)
|
||||||
|
memory_cache.delete(cache_key)
|
||||||
|
|
||||||
|
return fun
|
||||||
|
|
||||||
|
|
||||||
def embedding_poxy(poxy_function):
|
def embedding_poxy(poxy_function):
|
||||||
def inner(args, **keywords):
|
def inner(args, **keywords):
|
||||||
embedding_thread_pool.submit(poxy_function, args, **keywords)
|
key = get_cache_key(poxy_function, args)
|
||||||
|
if memory_cache.has_key(key):
|
||||||
|
return
|
||||||
|
memory_cache.add(key, None)
|
||||||
|
f = get_cache_poxy_function(poxy_function, key)
|
||||||
|
embedding_thread_pool.submit(f, args, **keywords)
|
||||||
|
|
||||||
return inner
|
return inner
|
||||||
|
|||||||
@ -189,7 +189,6 @@ class ListenerManagement:
|
|||||||
un_lock('embedding' + str(document_id))
|
un_lock('embedding' + str(document_id))
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@embedding_poxy
|
|
||||||
def embedding_by_dataset(dataset_id, embedding_model: Embeddings):
|
def embedding_by_dataset(dataset_id, embedding_model: Embeddings):
|
||||||
"""
|
"""
|
||||||
向量化知识库
|
向量化知识库
|
||||||
|
|||||||
@ -0,0 +1,23 @@
|
|||||||
|
# Generated by Django 4.2.14 on 2024-07-29 15:37
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('dataset', '0007_alter_paragraph_content'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='document',
|
||||||
|
name='status',
|
||||||
|
field=models.CharField(choices=[('0', '导入中'), ('1', '已完成'), ('2', '导入失败'), ('3', '排队中')], default='3', max_length=1, verbose_name='状态'),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='paragraph',
|
||||||
|
name='status',
|
||||||
|
field=models.CharField(choices=[('0', '导入中'), ('1', '已完成'), ('2', '导入失败'), ('3', '排队中')], default='0', max_length=1, verbose_name='状态'),
|
||||||
|
),
|
||||||
|
]
|
||||||
@ -22,6 +22,7 @@ class Status(models.TextChoices):
|
|||||||
embedding = 0, '导入中'
|
embedding = 0, '导入中'
|
||||||
success = 1, '已完成'
|
success = 1, '已完成'
|
||||||
error = 2, '导入失败'
|
error = 2, '导入失败'
|
||||||
|
queue_up = 3, '排队中'
|
||||||
|
|
||||||
|
|
||||||
class Type(models.TextChoices):
|
class Type(models.TextChoices):
|
||||||
@ -66,7 +67,7 @@ class Document(AppModelMixin):
|
|||||||
name = models.CharField(max_length=150, verbose_name="文档名称")
|
name = models.CharField(max_length=150, verbose_name="文档名称")
|
||||||
char_length = models.IntegerField(verbose_name="文档字符数 冗余字段")
|
char_length = models.IntegerField(verbose_name="文档字符数 冗余字段")
|
||||||
status = models.CharField(verbose_name='状态', max_length=1, choices=Status.choices,
|
status = models.CharField(verbose_name='状态', max_length=1, choices=Status.choices,
|
||||||
default=Status.embedding)
|
default=Status.queue_up)
|
||||||
is_active = models.BooleanField(default=True)
|
is_active = models.BooleanField(default=True)
|
||||||
|
|
||||||
type = models.CharField(verbose_name='类型', max_length=1, choices=Type.choices,
|
type = models.CharField(verbose_name='类型', max_length=1, choices=Type.choices,
|
||||||
|
|||||||
@ -35,7 +35,7 @@ from common.util.field_message import ErrMessage
|
|||||||
from common.util.file_util import get_file_content
|
from common.util.file_util import get_file_content
|
||||||
from common.util.fork import ChildLink, Fork
|
from common.util.fork import ChildLink, Fork
|
||||||
from common.util.split_model import get_split_model
|
from common.util.split_model import get_split_model
|
||||||
from dataset.models.data_set import DataSet, Document, Paragraph, Problem, Type, ProblemParagraphMapping
|
from dataset.models.data_set import DataSet, Document, Paragraph, Problem, Type, ProblemParagraphMapping, Status
|
||||||
from dataset.serializers.common_serializers import list_paragraph, MetaSerializer, ProblemParagraphManage, \
|
from dataset.serializers.common_serializers import list_paragraph, MetaSerializer, ProblemParagraphManage, \
|
||||||
get_embedding_model_by_dataset_id
|
get_embedding_model_by_dataset_id
|
||||||
from dataset.serializers.document_serializers import DocumentSerializers, DocumentInstanceSerializer
|
from dataset.serializers.document_serializers import DocumentSerializers, DocumentInstanceSerializer
|
||||||
@ -746,6 +746,8 @@ class DataSetSerializers(serializers.ModelSerializer):
|
|||||||
if with_valid:
|
if with_valid:
|
||||||
self.is_valid(raise_exception=True)
|
self.is_valid(raise_exception=True)
|
||||||
model = get_embedding_model_by_dataset_id(self.data.get('id'))
|
model = get_embedding_model_by_dataset_id(self.data.get('id'))
|
||||||
|
QuerySet(Document).filter(dataset_id=self.data.get('id')).update(**{'status': Status.queue_up})
|
||||||
|
QuerySet(Paragraph).filter(dataset_id=self.data.get('id')).update(**{'status': Status.queue_up})
|
||||||
ListenerManagement.embedding_by_dataset_signal.send(self.data.get('id'), embedding_model=model)
|
ListenerManagement.embedding_by_dataset_signal.send(self.data.get('id'), embedding_model=model)
|
||||||
|
|
||||||
def list_application(self, with_valid=True):
|
def list_application(self, with_valid=True):
|
||||||
|
|||||||
@ -539,6 +539,8 @@ class DocumentSerializers(ApiMixin, serializers.Serializer):
|
|||||||
self.is_valid(raise_exception=True)
|
self.is_valid(raise_exception=True)
|
||||||
document_id = self.data.get("document_id")
|
document_id = self.data.get("document_id")
|
||||||
model = get_embedding_model_by_dataset_id(dataset_id=self.data.get('dataset_id'))
|
model = get_embedding_model_by_dataset_id(dataset_id=self.data.get('dataset_id'))
|
||||||
|
QuerySet(Document).filter(id=document_id).update(**{'status': Status.queue_up})
|
||||||
|
QuerySet(Paragraph).filter(document_id=document_id).update(**{'status': Status.queue_up})
|
||||||
ListenerManagement.embedding_by_document_signal.send(document_id, embedding_model=model)
|
ListenerManagement.embedding_by_document_signal.send(document_id, embedding_model=model)
|
||||||
|
|
||||||
@transaction.atomic
|
@transaction.atomic
|
||||||
|
|||||||
@ -82,7 +82,10 @@
|
|||||||
<el-icon class="danger"><CircleCloseFilled /></el-icon> 失败
|
<el-icon class="danger"><CircleCloseFilled /></el-icon> 失败
|
||||||
</el-text>
|
</el-text>
|
||||||
<el-text v-else-if="row.status === '0'">
|
<el-text v-else-if="row.status === '0'">
|
||||||
<el-icon class="is-loading primary"><Loading /></el-icon> 导入中
|
<el-icon class="is-loading primary"><Loading /></el-icon> 索引中
|
||||||
|
</el-text>
|
||||||
|
<el-text v-else-if="row.status === '3'">
|
||||||
|
<el-icon class="is-loading primary"><Loading /></el-icon>排队中
|
||||||
</el-text>
|
</el-text>
|
||||||
</template>
|
</template>
|
||||||
</el-table-column>
|
</el-table-column>
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user