fix: When deleting conversation logs, they should be deleted by conversation record, not by session.

--bug=1051378 --user=王孝刚 【应用】对话日志删除时应该按对话记录删除,不能按会话删除 https://www.tapd.cn/57709429/s/1645443
This commit is contained in:
wxg0103 2025-01-10 16:08:45 +08:00 committed by wxg
parent 6e281f6242
commit 9c8d7fc269

View File

@ -4,16 +4,14 @@ import logging
import datetime import datetime
from django.db import transaction from django.db import transaction
from django.db.models.fields.json import KeyTextTransform
from django.utils import timezone from django.utils import timezone
from apscheduler.schedulers.background import BackgroundScheduler from apscheduler.schedulers.background import BackgroundScheduler
from django_apscheduler.jobstores import DjangoJobStore from django_apscheduler.jobstores import DjangoJobStore
from application.models import Application, Chat from application.models import Application, Chat, ChatRecord
from django.db.models import Q from django.db.models import Q, Max
from common.lock.impl.file_lock import FileLock from common.lock.impl.file_lock import FileLock
from dataset.models import File from dataset.models import File
from django.db.models.functions import Cast from django.db import connection
from django.db import models
scheduler = BackgroundScheduler() scheduler = BackgroundScheduler()
scheduler.add_jobstore(DjangoJobStore(), "default") scheduler.add_jobstore(DjangoJobStore(), "default")
@ -32,19 +30,38 @@ def clean_chat_log_job():
query_conditions = Q() query_conditions = Q()
for app_id, cutoff_date in cutoff_dates.items(): for app_id, cutoff_date in cutoff_dates.items():
query_conditions |= Q(application_id=app_id, create_time__lt=cutoff_date) query_conditions |= Q(chat__application_id=app_id, create_time__lt=cutoff_date)
batch_size = 500 batch_size = 500
while True: while True:
with transaction.atomic(): with transaction.atomic():
logs_to_delete = Chat.objects.filter(query_conditions).values_list('id', flat=True)[:batch_size] chat_records = ChatRecord.objects.filter(query_conditions).select_related('chat').only('id', 'chat_id',
count = logs_to_delete.count() 'create_time')[
logs_to_delete_str = [str(uuid) for uuid in logs_to_delete] :batch_size]
if count == 0: if not chat_records:
break break
deleted_count, _ = Chat.objects.filter(id__in=logs_to_delete).delete() chat_record_ids = [record.id for record in chat_records]
# 删除对应的文件 chat_ids = {record.chat_id for record in chat_records}
File.objects.filter(meta__chat_id__in=logs_to_delete_str).delete()
# 计算每个 chat_id 的最大 create_time
max_create_times = ChatRecord.objects.filter(id__in=chat_record_ids).values('chat_id').annotate(
max_create_time=Max('create_time'))
# 收集需要删除的文件
files_to_delete = []
for record in chat_records:
max_create_time = next(
(item['max_create_time'] for item in max_create_times if item['chat_id'] == record.chat_id), None)
if max_create_time:
files_to_delete.extend(
File.objects.filter(meta__chat_id=str(record.chat_id), create_time__lt=max_create_time)
)
# 删除 ChatRecord
deleted_count = ChatRecord.objects.filter(id__in=chat_record_ids).delete()[0]
# 删除没有关联 ChatRecord 的 Chat
Chat.objects.filter(chatrecord__isnull=True, id__in=chat_ids).delete()
File.objects.filter(loid__in=[file.loid for file in files_to_delete]).delete()
if deleted_count < batch_size: if deleted_count < batch_size:
break break