feat: 日志打印,嵌入脚本

This commit is contained in:
shaohuzhang1 2023-12-21 12:16:39 +08:00
parent 303251f6cb
commit b6f7537c2b
7 changed files with 75 additions and 44 deletions

View File

@ -13,3 +13,4 @@ from .listener_chat_message import *
def run(): def run():
listener_manage.ListenerManagement().run() listener_manage.ListenerManagement().run()
listener_chat_message.ListenerChatMessage().run() listener_chat_message.ListenerChatMessage().run()
QuerySet(Document).filter(status=Status.embedding).update(**{'status': Status.error})

View File

@ -6,7 +6,9 @@
@date2023/10/20 14:01 @date2023/10/20 14:01
@desc: @desc:
""" """
import logging
import os import os
import traceback
import django.db.models import django.db.models
from blinker import signal from blinker import signal
@ -20,6 +22,9 @@ from dataset.models import Paragraph, Status, Document
from embedding.models import SourceType from embedding.models import SourceType
from smartdoc.conf import PROJECT_DIR from smartdoc.conf import PROJECT_DIR
max_kb_error = logging.getLogger("max_kb_error")
max_kb = logging.getLogger("max_kb")
class ListenerManagement: class ListenerManagement:
embedding_by_problem_signal = signal("embedding_by_problem") embedding_by_problem_signal = signal("embedding_by_problem")
@ -46,6 +51,7 @@ class ListenerManagement:
:param paragraph_id: 段落id :param paragraph_id: 段落id
:return: None :return: None
""" """
max_kb.info(f"开始--->向量化段落:{paragraph_id}")
status = Status.success status = Status.success
try: try:
data_list = native_search( data_list = native_search(
@ -59,8 +65,11 @@ class ListenerManagement:
# 批量向量化 # 批量向量化
VectorStore.get_embedding_vector().batch_save(data_list) VectorStore.get_embedding_vector().batch_save(data_list)
except Exception as e: except Exception as e:
max_kb_error.error(f'向量化段落:{paragraph_id}出现错误{str(e)}{traceback.format_exc()}')
status = Status.error status = Status.error
QuerySet(Paragraph).filter(id=paragraph_id).update(**{'status': status}) finally:
QuerySet(Paragraph).filter(id=paragraph_id).update(**{'status': status})
max_kb.info(f'结束--->向量化段落:{paragraph_id}')
@staticmethod @staticmethod
@poxy @poxy
@ -70,6 +79,7 @@ class ListenerManagement:
:param document_id: 文档id :param document_id: 文档id
:return: None :return: None
""" """
max_kb.info(f"开始--->向量化文档:{document_id}")
status = Status.success status = Status.success
try: try:
data_list = native_search( data_list = native_search(
@ -83,10 +93,13 @@ class ListenerManagement:
# 批量向量化 # 批量向量化
VectorStore.get_embedding_vector().batch_save(data_list) VectorStore.get_embedding_vector().batch_save(data_list)
except Exception as e: except Exception as e:
max_kb_error.error(f'向量化文档:{document_id}出现错误{str(e)}{traceback.format_exc()}')
status = Status.error status = Status.error
# 修改状态 finally:
QuerySet(Document).filter(id=document_id).update(**{'status': status}) # 修改状态
QuerySet(Paragraph).filter(document_id=document_id).update(**{'status': status}) QuerySet(Document).filter(id=document_id).update(**{'status': status})
QuerySet(Paragraph).filter(document_id=document_id).update(**{'status': status})
max_kb.info(f"结束--->向量化文档:{document_id}")
@staticmethod @staticmethod
@poxy @poxy
@ -96,9 +109,15 @@ class ListenerManagement:
:param dataset_id: 知识库id :param dataset_id: 知识库id
:return: None :return: None
""" """
document_list = QuerySet(Document).filter(dataset_id=dataset_id) max_kb.info(f"向量化数据集{dataset_id}")
for document in document_list: try:
ListenerManagement.embedding_by_document(document.id) document_list = QuerySet(Document).filter(dataset_id=dataset_id)
for document in document_list:
ListenerManagement.embedding_by_document(document.id)
except Exception as e:
max_kb_error.error(f'向量化数据集:{dataset_id}出现错误{str(e)}{traceback.format_exc()}')
finally:
max_kb.info(f"结束--->向量化数据集:{dataset_id}")
@staticmethod @staticmethod
def delete_embedding_by_document(document_id): def delete_embedding_by_document(document_id):

View File

@ -6,15 +6,16 @@
@date2023/9/5 19:29 @date2023/9/5 19:29
@desc: @desc:
""" """
import django.core.exceptions import logging
from psycopg2 import IntegrityError import traceback
from rest_framework.exceptions import ValidationError, ErrorDetail, APIException from rest_framework.exceptions import ValidationError, ErrorDetail, APIException
from rest_framework.views import exception_handler from rest_framework.views import exception_handler
from common.exception.app_exception import AppApiException from common.exception.app_exception import AppApiException
from common.response import result from common.response import result
import traceback
def to_result(key, args, parent_key=None): def to_result(key, args, parent_key=None):
""" """
将校验异常 args转换为统一数据 将校验异常 args转换为统一数据
@ -59,7 +60,6 @@ def handle_exception(exc, context):
exception_class = exc.__class__ exception_class = exc.__class__
# 先调用REST framework默认的异常处理方法获得标准错误响应对象 # 先调用REST framework默认的异常处理方法获得标准错误响应对象
response = exception_handler(exc, context) response = exception_handler(exc, context)
traceback.print_exc()
# 在此处补充自定义的异常处理 # 在此处补充自定义的异常处理
if issubclass(exception_class, ValidationError): if issubclass(exception_class, ValidationError):
return validation_error_to_result(exc) return validation_error_to_result(exc)
@ -68,5 +68,6 @@ def handle_exception(exc, context):
if issubclass(exception_class, APIException): if issubclass(exception_class, APIException):
return result.error(exc.detail) return result.error(exc.detail)
if response is None: if response is None:
logging.getLogger("max_kb_error").error(f'{str(exc)}:{traceback.format_exc()}')
return result.error(str(exc)) return result.error(str(exc))
return response return response

View File

@ -11,7 +11,7 @@ from functools import reduce
from typing import Dict, List from typing import Dict, List
def sub_array(array: List, item_num=30): def sub_array(array: List, item_num=10):
result = [] result = []
temp = [] temp = []
for item in array: for item in array:

View File

@ -6,6 +6,7 @@
@date2023/10/18 19:16 @date2023/10/18 19:16
@desc: @desc:
""" """
import threading
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import List, Dict from typing import List, Dict
@ -15,6 +16,8 @@ from common.config.embedding_config import EmbeddingModel
from common.util.common import sub_array from common.util.common import sub_array
from embedding.models import SourceType from embedding.models import SourceType
lock = threading.Lock()
class BaseVectorStore(ABC): class BaseVectorStore(ABC):
vector_exists = False vector_exists = False
@ -65,25 +68,37 @@ class BaseVectorStore(ABC):
:param trample_num 点踩数量 :param trample_num 点踩数量
:return: bool :return: bool
""" """
if embedding is None: # 获取锁
embedding = EmbeddingModel.get_embedding_model() lock.acquire()
self.save_pre_handler() try:
self._save(text, source_type, dataset_id, document_id, paragraph_id, source_id, is_active, star_num, if embedding is None:
trample_num, embedding) embedding = EmbeddingModel.get_embedding_model()
self.save_pre_handler()
self._save(text, source_type, dataset_id, document_id, paragraph_id, source_id, is_active, star_num,
trample_num, embedding)
finally:
# 释放锁
lock.release()
def batch_save(self, data_list: List[Dict], embedding=None): def batch_save(self, data_list: List[Dict], embedding=None):
""" # 获取锁
批量插入 lock.acquire()
:param data_list: 数据列表 try:
:param embedding: 向量化处理器 """
:return: bool 批量插入
""" :param data_list: 数据列表
if embedding is None: :param embedding: 向量化处理器
embedding = EmbeddingModel.get_embedding_model() :return: bool
self.save_pre_handler() """
result = sub_array(data_list) if embedding is None:
for child_array in result: embedding = EmbeddingModel.get_embedding_model()
self._batch_save(child_array, embedding) self.save_pre_handler()
result = sub_array(data_list)
for child_array in result:
self._batch_save(child_array, embedding)
finally:
# 释放锁
lock.release()
return True return True
@abstractmethod @abstractmethod

View File

@ -5,11 +5,9 @@ import os
from ..const import PROJECT_DIR, CONFIG from ..const import PROJECT_DIR, CONFIG
LOG_DIR = os.path.join(PROJECT_DIR, 'data', 'logs') LOG_DIR = os.path.join(PROJECT_DIR, 'data', 'logs')
QA_BOT_LOG_FILE = os.path.join(LOG_DIR, 'smart_doc.log') MAX_KB_LOG_FILE = os.path.join(LOG_DIR, 'max_kb.log')
DRF_EXCEPTION_LOG_FILE = os.path.join(LOG_DIR, 'drf_exception.log') DRF_EXCEPTION_LOG_FILE = os.path.join(LOG_DIR, 'drf_exception.log')
UNEXPECTED_EXCEPTION_LOG_FILE = os.path.join(LOG_DIR, 'unexpected_exception.log') UNEXPECTED_EXCEPTION_LOG_FILE = os.path.join(LOG_DIR, 'unexpected_exception.log')
ANSIBLE_LOG_FILE = os.path.join(LOG_DIR, 'ansible.log')
GUNICORN_LOG_FILE = os.path.join(LOG_DIR, 'gunicorn.log')
LOG_LEVEL = "DEBUG" LOG_LEVEL = "DEBUG"
LOGGING = { LOGGING = {
@ -54,16 +52,7 @@ LOGGING = {
'maxBytes': 1024 * 1024 * 100, 'maxBytes': 1024 * 1024 * 100,
'backupCount': 7, 'backupCount': 7,
'formatter': 'main', 'formatter': 'main',
'filename': QA_BOT_LOG_FILE, 'filename': MAX_KB_LOG_FILE,
},
'ansible_logs': {
'encoding': 'utf8',
'level': 'DEBUG',
'class': 'logging.handlers.RotatingFileHandler',
'formatter': 'main',
'maxBytes': 1024 * 1024 * 100,
'backupCount': 7,
'filename': ANSIBLE_LOG_FILE,
}, },
'drf_exception': { 'drf_exception': {
'encoding': 'utf8', 'encoding': 'utf8',
@ -115,9 +104,15 @@ LOGGING = {
'level': LOG_LEVEL, 'level': LOG_LEVEL,
'propagate': False, 'propagate': False,
}, },
'smartdoc': { 'max_kb_error': {
'handlers': ['console', 'unexpected_exception'],
'level': LOG_LEVEL,
'propagate': False,
},
'max_kb': {
'handlers': ['console', 'file'], 'handlers': ['console', 'file'],
'level': LOG_LEVEL, 'level': LOG_LEVEL,
'propagate': False,
}, },
} }
} }

View File

@ -86,4 +86,4 @@
document.body.append(chat_button); document.body.append(chat_button);
} else console.error('invalid parameter') } else console.error('invalid parameter')
} }
document.body.onload = embedChatbot window.onload = embedChatbot