refactor: File model (#3050)
This commit is contained in:
parent
442dd209a5
commit
bcf6ccee20
@ -23,6 +23,7 @@ from pydub import AudioSegment
|
|||||||
|
|
||||||
from ..exception.app_exception import AppApiException
|
from ..exception.app_exception import AppApiException
|
||||||
from ..models.db_model_manage import DBModelManage
|
from ..models.db_model_manage import DBModelManage
|
||||||
|
import hashlib
|
||||||
|
|
||||||
|
|
||||||
def password_encrypt(row_password):
|
def password_encrypt(row_password):
|
||||||
@ -124,6 +125,7 @@ def get_file_content(path):
|
|||||||
content = file.read()
|
content = file.read()
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
|
||||||
def sub_array(array: List, item_num=10):
|
def sub_array(array: List, item_num=10):
|
||||||
result = []
|
result = []
|
||||||
temp = []
|
temp = []
|
||||||
@ -270,3 +272,8 @@ def bulk_create_in_batches(model, data, batch_size=1000):
|
|||||||
batch = data[i:i + batch_size]
|
batch = data[i:i + batch_size]
|
||||||
model.objects.bulk_create(batch)
|
model.objects.bulk_create(batch)
|
||||||
|
|
||||||
|
|
||||||
|
def get_sha256_hash(_bytes):
|
||||||
|
sha256 = hashlib.sha256()
|
||||||
|
sha256.update(_bytes)
|
||||||
|
return sha256.hexdigest()
|
||||||
|
|||||||
@ -0,0 +1,37 @@
|
|||||||
|
# Generated by Django 5.2 on 2025-05-07 03:40
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('knowledge', '0004_knowledge_file_size_limit_alter_document_status_and_more'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='file',
|
||||||
|
name='workspace_id',
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='file',
|
||||||
|
name='file_size',
|
||||||
|
field=models.IntegerField(default=0, verbose_name='文件大小'),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='file',
|
||||||
|
name='sha256_hash',
|
||||||
|
field=models.CharField(default='', verbose_name='文件sha256_hash标识'),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='file',
|
||||||
|
name='source_id',
|
||||||
|
field=models.CharField(default='TEMPORARY_100_MINUTE', verbose_name='资源id'),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='file',
|
||||||
|
name='source_type',
|
||||||
|
field=models.CharField(choices=[('KNOWLEDGE', 'Knowledge'), ('APPLICATION', 'Application'), ('TEMPORARY_30_MINUTE', 'Temporary 30 Minute'), ('TEMPORARY_100_MINUTE', 'Temporary 120 Minute'), ('TEMPORARY_1_DAY', 'Temporary 1 Day')], default='TEMPORARY_100_MINUTE', verbose_name='资源类型'),
|
||||||
|
)
|
||||||
|
]
|
||||||
@ -3,6 +3,7 @@ from enum import Enum
|
|||||||
import uuid_utils.compat as uuid
|
import uuid_utils.compat as uuid
|
||||||
from django.contrib.postgres.search import SearchVectorField
|
from django.contrib.postgres.search import SearchVectorField
|
||||||
from django.db import models
|
from django.db import models
|
||||||
|
from django.db.models import QuerySet
|
||||||
from django.db.models.signals import pre_delete
|
from django.db.models.signals import pre_delete
|
||||||
from django.dispatch import receiver
|
from django.dispatch import receiver
|
||||||
from mptt.fields import TreeForeignKey
|
from mptt.fields import TreeForeignKey
|
||||||
@ -10,6 +11,7 @@ from mptt.models import MPTTModel
|
|||||||
|
|
||||||
from common.db.sql_execute import select_one
|
from common.db.sql_execute import select_one
|
||||||
from common.mixins.app_model_mixin import AppModelMixin
|
from common.mixins.app_model_mixin import AppModelMixin
|
||||||
|
from common.utils.common import get_sha256_hash
|
||||||
from models_provider.models import Model
|
from models_provider.models import Model
|
||||||
from users.models import User
|
from users.models import User
|
||||||
|
|
||||||
@ -221,6 +223,19 @@ class SearchMode(models.TextChoices):
|
|||||||
blend = 'blend'
|
blend = 'blend'
|
||||||
|
|
||||||
|
|
||||||
|
class FileSourceType(models.TextChoices):
|
||||||
|
# 知识库 跟随知识库被删除而被删除 source_id 为知识库id
|
||||||
|
KNOWLEDGE = "KNOWLEDGE"
|
||||||
|
# 应用 跟随应用被删除而被删除 source_id 为应用id
|
||||||
|
APPLICATION = "APPLICATION"
|
||||||
|
# 临时30分钟 数据30分钟后被清理 source_id 为TEMPORARY_30_MINUTE
|
||||||
|
TEMPORARY_30_MINUTE = "TEMPORARY_30_MINUTE"
|
||||||
|
# 临时120分钟 数据120分钟后被清理 source_id为TEMPORARY_100_MINUTE
|
||||||
|
TEMPORARY_120_MINUTE = "TEMPORARY_100_MINUTE"
|
||||||
|
# 临时1天 数据1天后被清理 source_id为TEMPORARY_1_DAY
|
||||||
|
TEMPORARY_1_DAY = "TEMPORARY_1_DAY"
|
||||||
|
|
||||||
|
|
||||||
class VectorField(models.Field):
|
class VectorField(models.Field):
|
||||||
def db_type(self, connection):
|
def db_type(self, connection):
|
||||||
return 'vector'
|
return 'vector'
|
||||||
@ -246,7 +261,11 @@ class Embedding(models.Model):
|
|||||||
class File(AppModelMixin):
|
class File(AppModelMixin):
|
||||||
id = models.UUIDField(primary_key=True, max_length=128, default=uuid.uuid7, editable=False, verbose_name="主键id")
|
id = models.UUIDField(primary_key=True, max_length=128, default=uuid.uuid7, editable=False, verbose_name="主键id")
|
||||||
file_name = models.CharField(max_length=256, verbose_name="文件名称", default="")
|
file_name = models.CharField(max_length=256, verbose_name="文件名称", default="")
|
||||||
workspace_id = models.CharField(max_length=64, verbose_name="工作空间id", default="default", db_index=True)
|
file_size = models.IntegerField(verbose_name="文件大小", default=0)
|
||||||
|
sha256_hash = models.CharField(verbose_name="文件sha256_hash标识", default="")
|
||||||
|
source_type = models.CharField(verbose_name="资源类型", choices=FileSourceType,
|
||||||
|
default=FileSourceType.TEMPORARY_120_MINUTE.value)
|
||||||
|
source_id = models.CharField(verbose_name="资源id", default=FileSourceType.TEMPORARY_120_MINUTE.value)
|
||||||
loid = models.IntegerField(verbose_name="loid")
|
loid = models.IntegerField(verbose_name="loid")
|
||||||
meta = models.JSONField(verbose_name="文件关联数据", default=dict)
|
meta = models.JSONField(verbose_name="文件关联数据", default=dict)
|
||||||
|
|
||||||
@ -254,8 +273,13 @@ class File(AppModelMixin):
|
|||||||
db_table = "file"
|
db_table = "file"
|
||||||
|
|
||||||
def save(self, bytea=None, force_insert=False, force_update=False, using=None, update_fields=None):
|
def save(self, bytea=None, force_insert=False, force_update=False, using=None, update_fields=None):
|
||||||
result = select_one("SELECT lo_from_bytea(%s, %s::bytea) as loid", [0, bytea])
|
sha256_hash = get_sha256_hash(bytea)
|
||||||
self.loid = result['loid']
|
f = QuerySet(File).filter(sha256_hash=sha256_hash).first()
|
||||||
|
if f is not None:
|
||||||
|
self.loid = f.loid
|
||||||
|
else:
|
||||||
|
result = select_one("SELECT lo_from_bytea(%s, %s::bytea) as loid", [0, bytea])
|
||||||
|
self.loid = result['loid']
|
||||||
super().save()
|
super().save()
|
||||||
|
|
||||||
def get_bytes(self):
|
def get_bytes(self):
|
||||||
@ -265,4 +289,6 @@ class File(AppModelMixin):
|
|||||||
|
|
||||||
@receiver(pre_delete, sender=File)
|
@receiver(pre_delete, sender=File)
|
||||||
def on_delete_file(sender, instance, **kwargs):
|
def on_delete_file(sender, instance, **kwargs):
|
||||||
select_one(f'SELECT lo_unlink({instance.loid})', [])
|
exist = QuerySet(File).filter(loid=instance.loid).exclude(id=instance.id).exists()
|
||||||
|
if not exist:
|
||||||
|
select_one(f'SELECT lo_unlink({instance.loid})', [])
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user