fix: update paragraph ordering and adjust position during bulk creation

This commit is contained in:
CaptainB 2025-06-17 12:09:12 +08:00
parent 00e486c3fe
commit 119f678224
9 changed files with 195 additions and 18 deletions

View File

@ -311,3 +311,49 @@ class ParagraphMigrateAPI(APIMixin):
@staticmethod @staticmethod
def get_request(): def get_request():
return BatchSerializer return BatchSerializer
class ParagraphAdjustOrderAPI(APIMixin):
@staticmethod
def get_parameters():
return [
OpenApiParameter(
name="workspace_id",
description="工作空间id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="knowledge_id",
description="知识库id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="document_id",
description="文档id",
type=OpenApiTypes.STR,
location='path',
required=True,
),
OpenApiParameter(
name="paragraph_id",
description="段落id",
type=OpenApiTypes.STR,
location='query',
required=True,
),
OpenApiParameter(
name="new_position",
description="新的顺序",
type=OpenApiTypes.INT,
location='query',
required=True,
),
]
@staticmethod
def get_response():
return DefaultResultSerializer

View File

@ -0,0 +1,29 @@
# Generated by Django 5.2.3 on 2025-06-17 03:11
import knowledge.models.knowledge
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('knowledge', '0004_alter_document_status_alter_paragraph_status_and_more'),
]
operations = [
migrations.AddField(
model_name='paragraph',
name='position',
field=models.IntegerField(db_index=True, default=0, verbose_name='段落顺序'),
),
migrations.AlterField(
model_name='document',
name='status',
field=models.CharField(default=knowledge.models.knowledge.Status.__str__, max_length=20, verbose_name='状态'),
),
migrations.AlterField(
model_name='paragraph',
name='status',
field=models.CharField(default=knowledge.models.knowledge.Status.__str__, max_length=20, verbose_name='状态'),
),
]

View File

@ -174,6 +174,7 @@ class Paragraph(AppModelMixin):
status_meta = models.JSONField(verbose_name="状态数据", default=default_status_meta) status_meta = models.JSONField(verbose_name="状态数据", default=default_status_meta)
hit_num = models.IntegerField(verbose_name="命中次数", default=0) hit_num = models.IntegerField(verbose_name="命中次数", default=0)
is_active = models.BooleanField(default=True) is_active = models.BooleanField(default=True)
position = models.IntegerField(verbose_name="段落顺序", default=0, db_index=True)
class Meta: class Meta:
db_table = "paragraph" db_table = "paragraph"

View File

@ -13,6 +13,7 @@ from celery_once import AlreadyQueued
from django.core import validators from django.core import validators
from django.db import transaction, models from django.db import transaction, models
from django.db.models import QuerySet from django.db.models import QuerySet
from django.db.models.aggregates import Max
from django.db.models.functions import Substr, Reverse from django.db.models.functions import Substr, Reverse
from django.http import HttpResponse from django.http import HttpResponse
from django.utils.translation import gettext_lazy as _, gettext, get_language, to_locale from django.utils.translation import gettext_lazy as _, gettext, get_language, to_locale
@ -417,6 +418,7 @@ class DocumentSerializers(serializers.Serializer):
if first.type != KnowledgeType.WEB: if first.type != KnowledgeType.WEB:
raise AppApiException(500, _('Synchronization is only supported for web site types')) raise AppApiException(500, _('Synchronization is only supported for web site types'))
@transaction.atomic
def sync(self, with_valid=True, with_embedding=True): def sync(self, with_valid=True, with_embedding=True):
if with_valid: if with_valid:
self.is_valid(raise_exception=True) self.is_valid(raise_exception=True)
@ -454,7 +456,13 @@ class DocumentSerializers(serializers.Serializer):
problem_model_list, problem_paragraph_mapping_list = ProblemParagraphManage( problem_model_list, problem_paragraph_mapping_list = ProblemParagraphManage(
problem_paragraph_object_list, document.knowledge_id).to_problem_model_list() problem_paragraph_object_list, document.knowledge_id).to_problem_model_list()
# 批量插入段落 # 批量插入段落
QuerySet(Paragraph).bulk_create(paragraph_model_list) if len(paragraph_model_list) > 0 else None if len(paragraph_model_list) > 0:
max_position = Paragraph.objects.filter(document_id=document_id).aggregate(
max_position=Max('position')
)['max_position'] or 0
for i, paragraph in enumerate(paragraph_model_list):
paragraph.position = max_position + i + 1
QuerySet(Paragraph).bulk_create(paragraph_model_list)
# 批量插入问题 # 批量插入问题
QuerySet(Problem).bulk_create(problem_model_list) if len(problem_model_list) > 0 else None QuerySet(Problem).bulk_create(problem_model_list) if len(problem_model_list) > 0 else None
# 插入关联问题 # 插入关联问题
@ -757,7 +765,13 @@ class DocumentSerializers(serializers.Serializer):
# 插入文档 # 插入文档
document_model.save() document_model.save()
# 批量插入段落 # 批量插入段落
QuerySet(Paragraph).bulk_create(paragraph_model_list) if len(paragraph_model_list) > 0 else None if len(paragraph_model_list) > 0:
max_position = Paragraph.objects.filter(document_id=document_model.id).aggregate(
max_position=Max('position')
)['max_position'] or 0
for i, paragraph in enumerate(paragraph_model_list):
paragraph.position = max_position + i + 1
QuerySet(Paragraph).bulk_create(paragraph_model_list)
# 批量插入问题 # 批量插入问题
QuerySet(Problem).bulk_create(problem_model_list) if len(problem_model_list) > 0 else None QuerySet(Problem).bulk_create(problem_model_list) if len(problem_model_list) > 0 else None
# 批量插入关联问题 # 批量插入关联问题
@ -1031,7 +1045,15 @@ class DocumentSerializers(serializers.Serializer):
# 插入文档 # 插入文档
QuerySet(Document).bulk_create(document_model_list) if len(document_model_list) > 0 else None QuerySet(Document).bulk_create(document_model_list) if len(document_model_list) > 0 else None
# 批量插入段落 # 批量插入段落
bulk_create_in_batches(Paragraph, paragraph_model_list, batch_size=1000) if len(paragraph_model_list) > 0:
for document in document_model_list:
max_position = Paragraph.objects.filter(document_id=document.id).aggregate(
max_position=Max('position')
)['max_position'] or 0
sub_list = [p for p in paragraph_model_list if p.document_id == document.id]
for i, paragraph in enumerate(sub_list):
paragraph.position = max_position + i + 1
QuerySet(Paragraph).bulk_create(sub_list if len(sub_list) > 0 else [])
# 批量插入问题 # 批量插入问题
bulk_create_in_batches(Problem, problem_model_list, batch_size=1000) bulk_create_in_batches(Problem, problem_model_list, batch_size=1000)
# 批量插入关联问题 # 批量插入关联问题

View File

@ -5,7 +5,8 @@ from typing import Dict
import uuid_utils.compat as uuid import uuid_utils.compat as uuid
from celery_once import AlreadyQueued from celery_once import AlreadyQueued
from django.db import transaction from django.db import transaction
from django.db.models import QuerySet, Count from django.db.models import QuerySet, Count, F
from django.db.models.aggregates import Max
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
from rest_framework import serializers from rest_framework import serializers
@ -28,7 +29,7 @@ from knowledge.task.generate import generate_related_by_paragraph_id_list
class ParagraphSerializer(serializers.ModelSerializer): class ParagraphSerializer(serializers.ModelSerializer):
class Meta: class Meta:
model = Paragraph model = Paragraph
fields = ['id', 'content', 'is_active', 'document_id', 'title', 'create_time', 'update_time'] fields = ['id', 'content', 'is_active', 'document_id', 'title', 'create_time', 'update_time', 'position']
class ParagraphInstanceSerializer(serializers.Serializer): class ParagraphInstanceSerializer(serializers.Serializer):
@ -244,6 +245,7 @@ class ParagraphSerializers(serializers.Serializer):
knowledge_id=self.data.get('knowledge_id')).exists(): knowledge_id=self.data.get('knowledge_id')).exists():
raise AppApiException(500, _('The document id is incorrect')) raise AppApiException(500, _('The document id is incorrect'))
@transaction.atomic
def save(self, instance: Dict, with_valid=True, with_embedding=True): def save(self, instance: Dict, with_valid=True, with_embedding=True):
if with_valid: if with_valid:
ParagraphSerializers(data=instance).is_valid(raise_exception=True) ParagraphSerializers(data=instance).is_valid(raise_exception=True)
@ -257,7 +259,18 @@ class ParagraphSerializers(serializers.Serializer):
ProblemParagraphManage(problem_paragraph_object_list, knowledge_id) ProblemParagraphManage(problem_paragraph_object_list, knowledge_id)
.to_problem_model_list()) .to_problem_model_list())
# 插入段落 # 插入段落
paragraph_problem_model.get('paragraph').save() max_position = Paragraph.objects.filter(document_id=document_id).aggregate(
max_position=Max('position')
)['max_position'] or 0
paragraph.position = max_position + 1
paragraph.save()
# 调整位置
ParagraphSerializers.AdjustPosition(data={
'paragraph_id': str(paragraph.id),
'knowledge_id': knowledge_id,
'document_id': document_id,
'workspace_id': self.data.get('workspace_id')
}).adjust_position(position=instance.get('position', max_position + 1))
# 插入問題 # 插入問題
QuerySet(Problem).bulk_create(problem_model_list) if len(problem_model_list) > 0 else None QuerySet(Problem).bulk_create(problem_model_list) if len(problem_model_list) > 0 else None
# 插入问题关联关系 # 插入问题关联关系
@ -319,7 +332,7 @@ class ParagraphSerializers(serializers.Serializer):
**{'title__icontains': self.data.get('title')}) **{'title__icontains': self.data.get('title')})
if 'content' in self.data: if 'content' in self.data:
query_set = query_set.filter(**{'content__icontains': self.data.get('content')}) query_set = query_set.filter(**{'content__icontains': self.data.get('content')})
query_set = query_set.order_by('create_time', 'id') query_set = query_set.order_by('position', 'create_time')
return query_set return query_set
def list(self): def list(self):
@ -541,6 +554,42 @@ class ParagraphSerializers(serializers.Serializer):
return problem, True return problem, True
return None return None
class AdjustPosition(serializers.Serializer):
workspace_id = serializers.CharField(required=True, label=_('workspace id'))
knowledge_id = serializers.UUIDField(required=True, label=_('knowledge id'))
document_id = serializers.UUIDField(required=True, label=_('document id'))
paragraph_id = serializers.UUIDField(required=True, label=_('paragraph id'))
@transaction.atomic
def adjust_position(self, new_position):
"""
调整段落顺序
:param new_position: 新的顺序值
"""
self.is_valid(raise_exception=True)
try:
new_position = int(new_position)
except (TypeError, ValueError):
raise serializers.ValidationError(_('new_position must be an integer'))
# 获取当前段落
paragraph = Paragraph.objects.get(id=self.data.get('paragraph_id'))
old_position = paragraph.position
if old_position < new_position:
# 如果新顺序在当前顺序之后,更新受影响段落的顺序
Paragraph.objects.filter(
position__gt=old_position, position__lte=new_position
).update(position=F('position') - 1)
elif old_position > new_position:
# 如果新顺序在当前顺序之前,更新受影响段落的顺序
Paragraph.objects.filter(
position__lt=old_position, position__gte=new_position
).update(position=F('position') + 1)
# 更新当前段落的顺序
paragraph.position = new_position
paragraph.save()
def delete_problems_and_mappings(paragraph_ids): def delete_problems_and_mappings(paragraph_ids):
problem_paragraph_mappings = ProblemParagraphMapping.objects.filter(paragraph_id__in=paragraph_ids) problem_paragraph_mappings = ProblemParagraphMapping.objects.filter(paragraph_id__in=paragraph_ids)

View File

@ -75,6 +75,9 @@ urlpatterns = [
views.ParagraphView.Association.as_view()), views.ParagraphView.Association.as_view()),
path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/unassociation', path('workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/unassociation',
views.ParagraphView.UnAssociation.as_view()), views.ParagraphView.UnAssociation.as_view()),
path(
'workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/adjust_position',
views.ParagraphView.AdjustPosition.as_view()),
path( path(
'workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/<str:paragraph_id>', 'workspace/<str:workspace_id>/knowledge/<str:knowledge_id>/document/<str:document_id>/paragraph/<str:paragraph_id>',
views.ParagraphView.Operate.as_view()), views.ParagraphView.Operate.as_view()),

View File

@ -11,7 +11,7 @@ from common.result import result
from common.utils.common import query_params_to_single_dict from common.utils.common import query_params_to_single_dict
from knowledge.api.paragraph import ParagraphReadAPI, ParagraphCreateAPI, ParagraphBatchDeleteAPI, ParagraphEditAPI, \ from knowledge.api.paragraph import ParagraphReadAPI, ParagraphCreateAPI, ParagraphBatchDeleteAPI, ParagraphEditAPI, \
ParagraphGetAPI, ProblemCreateAPI, UnAssociationAPI, AssociationAPI, ParagraphPageAPI, \ ParagraphGetAPI, ProblemCreateAPI, UnAssociationAPI, AssociationAPI, ParagraphPageAPI, \
ParagraphBatchGenerateRelatedAPI, ParagraphMigrateAPI ParagraphBatchGenerateRelatedAPI, ParagraphMigrateAPI, ParagraphAdjustOrderAPI
from knowledge.serializers.common import get_knowledge_operation_object from knowledge.serializers.common import get_knowledge_operation_object
from knowledge.serializers.paragraph import ParagraphSerializers from knowledge.serializers.paragraph import ParagraphSerializers
from knowledge.views import get_knowledge_document_operation_object, get_document_operation_object from knowledge.views import get_knowledge_document_operation_object, get_document_operation_object
@ -400,3 +400,30 @@ class ParagraphView(APIView):
) )
d.is_valid(raise_exception=True) d.is_valid(raise_exception=True)
return result.success(d.page(current_page, page_size)) return result.success(d.page(current_page, page_size))
class AdjustPosition(APIView):
authentication_classes = [TokenAuth]
@extend_schema(
methods=['PUT'],
summary=_('Adjust paragraph position'),
description=_('Adjust paragraph position'),
operation_id=_('Adjust paragraph position'), # type: ignore
parameters=ParagraphAdjustOrderAPI.get_parameters(),
request=ParagraphAdjustOrderAPI.get_request(),
responses=ParagraphAdjustOrderAPI.get_response(),
tags=[_('Knowledge Base/Documentation/Paragraph')] # type: ignore
)
@has_permissions(
PermissionConstants.KNOWLEDGE_DOCUMENT_EDIT.get_workspace_knowledge_permission(),
RoleConstants.WORKSPACE_MANAGE.get_workspace_role()
)
def put(self, request: Request, workspace_id: str, knowledge_id: str, document_id: str):
return result.success(ParagraphSerializers.AdjustPosition(
data={
'workspace_id': workspace_id,
'knowledge_id': knowledge_id,
'document_id': document_id,
'paragraph_id': request.query_params.get('paragraph_id'),
}
).adjust_position(request.query_params.get('new_position')))

View File

@ -17,7 +17,7 @@ export default defineConfig(({ mode }) => {
const prefix = process.env.VITE_DYNAMIC_PREFIX || ENV.VITE_BASE_PATH const prefix = process.env.VITE_DYNAMIC_PREFIX || ENV.VITE_BASE_PATH
const proxyConf: Record<string, string | ProxyOptions> = {} const proxyConf: Record<string, string | ProxyOptions> = {}
proxyConf['/api'] = { proxyConf['/api'] = {
target: 'http://43.166.1.146:8080', target: 'http://127.0.0.1:8080',
changeOrigin: true, changeOrigin: true,
rewrite: (path: string) => path.replace(ENV.VITE_BASE_PATH, '/'), rewrite: (path: string) => path.replace(ENV.VITE_BASE_PATH, '/'),
} }