midyf model_id
Some checks are pending
sync2gitee / repo-sync (push) Waiting to run
Typos Check / Spell Check with Typos (push) Waiting to run

This commit is contained in:
朱潮 2025-08-27 00:38:41 +08:00
parent 541369d343
commit 4930ef71f7
2 changed files with 14 additions and 8 deletions

View File

@ -121,8 +121,4 @@ https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
Community Impact Guidelines were inspired by [Mozilla's code of conduct
enforcement ladder](https://github.com/mozilla/diversity).
[homepage]: https://www.contributor-covenant.org
For answers to common questions about this code of conduct, see the FAQ at
https://www.contributor-covenant.org/faq. Translations are available at
https://www.contributor-covenant.org/translations.
[

View File

@ -1180,6 +1180,7 @@ class DocumentSerializers(serializers.Serializer):
# 插入文档
for document in instance_list:
maxkb_logger.info(f"Processing document: {document.get('name')}, paragraphs count: {len(document.get('paragraphs', []))}")
document_paragraph_dict_model = DocumentSerializers.Create.get_document_paragraph_model(
knowledge_id,
document
@ -1188,7 +1189,9 @@ class DocumentSerializers(serializers.Serializer):
document_instance = document_paragraph_dict_model.get('document')
self.link_file(document.get('source_file_id'), document_instance.id)
document_model_list.append(document_instance)
for paragraph in document_paragraph_dict_model.get('paragraph_model_list'):
para_list = document_paragraph_dict_model.get('paragraph_model_list')
maxkb_logger.info(f"Created {len(para_list)} paragraph models for document: {document_instance.id}")
for paragraph in para_list:
paragraph_model_list.append(paragraph)
for problem_paragraph_object in document_paragraph_dict_model.get('problem_paragraph_object_list'):
problem_paragraph_object_list.append(problem_paragraph_object)
@ -1259,14 +1262,21 @@ class DocumentSerializers(serializers.Serializer):
# 批量插入段落(只为非高级学习文档)
if len(paragraph_model_list) > 0:
maxkb_logger.info(f"Total paragraphs to insert: {len(paragraph_model_list)}")
for document in document_model_list:
max_position = Paragraph.objects.filter(document_id=document.id).aggregate(
max_position=Max('position')
)['max_position'] or 0
sub_list = [p for p in paragraph_model_list if p.document_id == document.id]
# 修复比较逻辑:确保类型一致的比较
sub_list = [p for p in paragraph_model_list if str(p.document_id) == str(document.id)]
maxkb_logger.info(f"Document {document.id} will have {len(sub_list)} paragraphs")
for i, paragraph in enumerate(sub_list):
paragraph.position = max_position + i + 1
QuerySet(Paragraph).bulk_create(sub_list if len(sub_list) > 0 else [])
if len(sub_list) > 0:
QuerySet(Paragraph).bulk_create(sub_list)
maxkb_logger.info(f"Successfully created {len(sub_list)} paragraphs for document {document.id}")
else:
maxkb_logger.warning(f"No paragraphs to create for document {document.id}")
# 批量插入问题
bulk_create_in_batches(Problem, problem_model_list, batch_size=1000)
# 批量插入关联问题