refactor: improve logging and status updates in embedding methods
This commit is contained in:
parent
c363003aed
commit
6e16c74a5e
@ -6,15 +6,16 @@
|
|||||||
@date:2023/10/20 14:01
|
@date:2023/10/20 14:01
|
||||||
@desc:
|
@desc:
|
||||||
"""
|
"""
|
||||||
|
import datetime
|
||||||
import os
|
import os
|
||||||
import threading
|
import threading
|
||||||
import datetime
|
|
||||||
import traceback
|
import traceback
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import django.db.models
|
import django.db.models
|
||||||
from django.db.models import QuerySet
|
from django.db.models import QuerySet
|
||||||
from django.db.models.functions import Substr, Reverse
|
from django.db.models.functions import Substr, Reverse
|
||||||
|
from django.utils.translation import gettext_lazy as _
|
||||||
from langchain_core.embeddings import Embeddings
|
from langchain_core.embeddings import Embeddings
|
||||||
|
|
||||||
from common.config.embedding_config import VectorStore
|
from common.config.embedding_config import VectorStore
|
||||||
@ -23,10 +24,9 @@ from common.utils.common import get_file_content
|
|||||||
from common.utils.lock import RedisLock
|
from common.utils.lock import RedisLock
|
||||||
from common.utils.logger import maxkb_logger
|
from common.utils.logger import maxkb_logger
|
||||||
from common.utils.page_utils import page_desc
|
from common.utils.page_utils import page_desc
|
||||||
from knowledge.models import Paragraph, Status, Document, ProblemParagraphMapping, TaskType, State,SourceType, SearchMode
|
from knowledge.models import Paragraph, Status, Document, ProblemParagraphMapping, TaskType, State, SourceType, \
|
||||||
|
SearchMode
|
||||||
from maxkb.conf import (PROJECT_DIR)
|
from maxkb.conf import (PROJECT_DIR)
|
||||||
from django.utils.translation import gettext_lazy as _
|
|
||||||
|
|
||||||
|
|
||||||
lock = threading.Lock()
|
lock = threading.Lock()
|
||||||
|
|
||||||
@ -91,8 +91,9 @@ class ListenerManagement:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def embedding_by_paragraph_data_list(data_list, paragraph_id_list, embedding_model: Embeddings):
|
def embedding_by_paragraph_data_list(data_list, paragraph_id_list, embedding_model: Embeddings):
|
||||||
maxkb_logger.info(_('Start--->Embedding paragraph: {paragraph_id_list}').format(paragraph_id_list=paragraph_id_list))
|
maxkb_logger.info(_('Start--->Embedding paragraph: {paragraph_id_list}').format(
|
||||||
status = State.SUCCESS
|
paragraph_id_list=paragraph_id_list)
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
# 删除段落
|
# 删除段落
|
||||||
VectorStore.get_embedding_vector().delete_by_paragraph_ids(paragraph_id_list)
|
VectorStore.get_embedding_vector().delete_by_paragraph_ids(paragraph_id_list)
|
||||||
@ -102,14 +103,20 @@ class ListenerManagement:
|
|||||||
|
|
||||||
# 批量向量化
|
# 批量向量化
|
||||||
VectorStore.get_embedding_vector().batch_save(data_list, embedding_model, is_save_function)
|
VectorStore.get_embedding_vector().batch_save(data_list, embedding_model, is_save_function)
|
||||||
|
ListenerManagement.update_status(
|
||||||
|
QuerySet(Paragraph).filter(id__in=paragraph_id_list), TaskType.EMBEDDING, State.SUCCESS
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
maxkb_logger.error(_('Vectorized paragraph: {paragraph_id_list} error {error} {traceback}').format(
|
maxkb_logger.error(_('Vectorized paragraph: {paragraph_id_list} error {error} {traceback}').format(
|
||||||
paragraph_id_list=paragraph_id_list, error=str(e), traceback=traceback.format_exc()))
|
paragraph_id_list=paragraph_id_list, error=str(e), traceback=traceback.format_exc())
|
||||||
status = State.FAILURE
|
)
|
||||||
|
ListenerManagement.update_status(
|
||||||
|
QuerySet(Paragraph).filter(id__in=paragraph_id_list), TaskType.EMBEDDING, State.FAILURE
|
||||||
|
)
|
||||||
finally:
|
finally:
|
||||||
QuerySet(Paragraph).filter(id__in=paragraph_id_list).update(**{'status': status})
|
maxkb_logger.info(_('End--->Embedding paragraph: {paragraph_id_list}').format(
|
||||||
maxkb_logger.info(
|
paragraph_id_list=paragraph_id_list)
|
||||||
_('End--->Embedding paragraph: {paragraph_id_list}').format(paragraph_id_list=paragraph_id_list))
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def embedding_by_paragraph(paragraph_id, embedding_model: Embeddings):
|
def embedding_by_paragraph(paragraph_id, embedding_model: Embeddings):
|
||||||
@ -271,7 +278,6 @@ class ListenerManagement:
|
|||||||
ListenerManagement.update_status(QuerySet(Document).filter(id=document_id), TaskType.EMBEDDING,
|
ListenerManagement.update_status(QuerySet(Document).filter(id=document_id), TaskType.EMBEDDING,
|
||||||
State.STARTED)
|
State.STARTED)
|
||||||
|
|
||||||
|
|
||||||
# 根据段落进行向量化处理
|
# 根据段落进行向量化处理
|
||||||
page_desc(QuerySet(Paragraph)
|
page_desc(QuerySet(Paragraph)
|
||||||
.annotate(
|
.annotate(
|
||||||
@ -381,5 +387,6 @@ class ListenerManagement:
|
|||||||
similarity: float,
|
similarity: float,
|
||||||
search_mode: SearchMode,
|
search_mode: SearchMode,
|
||||||
embedding: Embeddings):
|
embedding: Embeddings):
|
||||||
return VectorStore.get_embedding_vector().hit_test(query_text, knowledge_id, exclude_document_id_list, top_number,
|
return VectorStore.get_embedding_vector().hit_test(query_text, knowledge_id, exclude_document_id_list,
|
||||||
|
top_number,
|
||||||
similarity, search_mode, embedding)
|
similarity, search_mode, embedding)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user