feat: 日志打印,嵌入脚本
This commit is contained in:
parent
303251f6cb
commit
b6f7537c2b
@ -13,3 +13,4 @@ from .listener_chat_message import *
|
|||||||
def run():
|
def run():
|
||||||
listener_manage.ListenerManagement().run()
|
listener_manage.ListenerManagement().run()
|
||||||
listener_chat_message.ListenerChatMessage().run()
|
listener_chat_message.ListenerChatMessage().run()
|
||||||
|
QuerySet(Document).filter(status=Status.embedding).update(**{'status': Status.error})
|
||||||
|
|||||||
@ -6,7 +6,9 @@
|
|||||||
@date:2023/10/20 14:01
|
@date:2023/10/20 14:01
|
||||||
@desc:
|
@desc:
|
||||||
"""
|
"""
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import traceback
|
||||||
|
|
||||||
import django.db.models
|
import django.db.models
|
||||||
from blinker import signal
|
from blinker import signal
|
||||||
@ -20,6 +22,9 @@ from dataset.models import Paragraph, Status, Document
|
|||||||
from embedding.models import SourceType
|
from embedding.models import SourceType
|
||||||
from smartdoc.conf import PROJECT_DIR
|
from smartdoc.conf import PROJECT_DIR
|
||||||
|
|
||||||
|
max_kb_error = logging.getLogger("max_kb_error")
|
||||||
|
max_kb = logging.getLogger("max_kb")
|
||||||
|
|
||||||
|
|
||||||
class ListenerManagement:
|
class ListenerManagement:
|
||||||
embedding_by_problem_signal = signal("embedding_by_problem")
|
embedding_by_problem_signal = signal("embedding_by_problem")
|
||||||
@ -46,6 +51,7 @@ class ListenerManagement:
|
|||||||
:param paragraph_id: 段落id
|
:param paragraph_id: 段落id
|
||||||
:return: None
|
:return: None
|
||||||
"""
|
"""
|
||||||
|
max_kb.info(f"开始--->向量化段落:{paragraph_id}")
|
||||||
status = Status.success
|
status = Status.success
|
||||||
try:
|
try:
|
||||||
data_list = native_search(
|
data_list = native_search(
|
||||||
@ -59,8 +65,11 @@ class ListenerManagement:
|
|||||||
# 批量向量化
|
# 批量向量化
|
||||||
VectorStore.get_embedding_vector().batch_save(data_list)
|
VectorStore.get_embedding_vector().batch_save(data_list)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
max_kb_error.error(f'向量化段落:{paragraph_id}出现错误{str(e)}{traceback.format_exc()}')
|
||||||
status = Status.error
|
status = Status.error
|
||||||
QuerySet(Paragraph).filter(id=paragraph_id).update(**{'status': status})
|
finally:
|
||||||
|
QuerySet(Paragraph).filter(id=paragraph_id).update(**{'status': status})
|
||||||
|
max_kb.info(f'结束--->向量化段落:{paragraph_id}')
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@poxy
|
@poxy
|
||||||
@ -70,6 +79,7 @@ class ListenerManagement:
|
|||||||
:param document_id: 文档id
|
:param document_id: 文档id
|
||||||
:return: None
|
:return: None
|
||||||
"""
|
"""
|
||||||
|
max_kb.info(f"开始--->向量化文档:{document_id}")
|
||||||
status = Status.success
|
status = Status.success
|
||||||
try:
|
try:
|
||||||
data_list = native_search(
|
data_list = native_search(
|
||||||
@ -83,10 +93,13 @@ class ListenerManagement:
|
|||||||
# 批量向量化
|
# 批量向量化
|
||||||
VectorStore.get_embedding_vector().batch_save(data_list)
|
VectorStore.get_embedding_vector().batch_save(data_list)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
max_kb_error.error(f'向量化文档:{document_id}出现错误{str(e)}{traceback.format_exc()}')
|
||||||
status = Status.error
|
status = Status.error
|
||||||
# 修改状态
|
finally:
|
||||||
QuerySet(Document).filter(id=document_id).update(**{'status': status})
|
# 修改状态
|
||||||
QuerySet(Paragraph).filter(document_id=document_id).update(**{'status': status})
|
QuerySet(Document).filter(id=document_id).update(**{'status': status})
|
||||||
|
QuerySet(Paragraph).filter(document_id=document_id).update(**{'status': status})
|
||||||
|
max_kb.info(f"结束--->向量化文档:{document_id}")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@poxy
|
@poxy
|
||||||
@ -96,9 +109,15 @@ class ListenerManagement:
|
|||||||
:param dataset_id: 知识库id
|
:param dataset_id: 知识库id
|
||||||
:return: None
|
:return: None
|
||||||
"""
|
"""
|
||||||
document_list = QuerySet(Document).filter(dataset_id=dataset_id)
|
max_kb.info(f"向量化数据集{dataset_id}")
|
||||||
for document in document_list:
|
try:
|
||||||
ListenerManagement.embedding_by_document(document.id)
|
document_list = QuerySet(Document).filter(dataset_id=dataset_id)
|
||||||
|
for document in document_list:
|
||||||
|
ListenerManagement.embedding_by_document(document.id)
|
||||||
|
except Exception as e:
|
||||||
|
max_kb_error.error(f'向量化数据集:{dataset_id}出现错误{str(e)}{traceback.format_exc()}')
|
||||||
|
finally:
|
||||||
|
max_kb.info(f"结束--->向量化数据集:{dataset_id}")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def delete_embedding_by_document(document_id):
|
def delete_embedding_by_document(document_id):
|
||||||
|
|||||||
@ -6,15 +6,16 @@
|
|||||||
@date:2023/9/5 19:29
|
@date:2023/9/5 19:29
|
||||||
@desc:
|
@desc:
|
||||||
"""
|
"""
|
||||||
import django.core.exceptions
|
import logging
|
||||||
from psycopg2 import IntegrityError
|
import traceback
|
||||||
|
|
||||||
from rest_framework.exceptions import ValidationError, ErrorDetail, APIException
|
from rest_framework.exceptions import ValidationError, ErrorDetail, APIException
|
||||||
from rest_framework.views import exception_handler
|
from rest_framework.views import exception_handler
|
||||||
|
|
||||||
from common.exception.app_exception import AppApiException
|
from common.exception.app_exception import AppApiException
|
||||||
from common.response import result
|
from common.response import result
|
||||||
|
|
||||||
import traceback
|
|
||||||
def to_result(key, args, parent_key=None):
|
def to_result(key, args, parent_key=None):
|
||||||
"""
|
"""
|
||||||
将校验异常 args转换为统一数据
|
将校验异常 args转换为统一数据
|
||||||
@ -59,7 +60,6 @@ def handle_exception(exc, context):
|
|||||||
exception_class = exc.__class__
|
exception_class = exc.__class__
|
||||||
# 先调用REST framework默认的异常处理方法获得标准错误响应对象
|
# 先调用REST framework默认的异常处理方法获得标准错误响应对象
|
||||||
response = exception_handler(exc, context)
|
response = exception_handler(exc, context)
|
||||||
traceback.print_exc()
|
|
||||||
# 在此处补充自定义的异常处理
|
# 在此处补充自定义的异常处理
|
||||||
if issubclass(exception_class, ValidationError):
|
if issubclass(exception_class, ValidationError):
|
||||||
return validation_error_to_result(exc)
|
return validation_error_to_result(exc)
|
||||||
@ -68,5 +68,6 @@ def handle_exception(exc, context):
|
|||||||
if issubclass(exception_class, APIException):
|
if issubclass(exception_class, APIException):
|
||||||
return result.error(exc.detail)
|
return result.error(exc.detail)
|
||||||
if response is None:
|
if response is None:
|
||||||
|
logging.getLogger("max_kb_error").error(f'{str(exc)}:{traceback.format_exc()}')
|
||||||
return result.error(str(exc))
|
return result.error(str(exc))
|
||||||
return response
|
return response
|
||||||
|
|||||||
@ -11,7 +11,7 @@ from functools import reduce
|
|||||||
from typing import Dict, List
|
from typing import Dict, List
|
||||||
|
|
||||||
|
|
||||||
def sub_array(array: List, item_num=30):
|
def sub_array(array: List, item_num=10):
|
||||||
result = []
|
result = []
|
||||||
temp = []
|
temp = []
|
||||||
for item in array:
|
for item in array:
|
||||||
|
|||||||
@ -6,6 +6,7 @@
|
|||||||
@date:2023/10/18 19:16
|
@date:2023/10/18 19:16
|
||||||
@desc:
|
@desc:
|
||||||
"""
|
"""
|
||||||
|
import threading
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import List, Dict
|
from typing import List, Dict
|
||||||
|
|
||||||
@ -15,6 +16,8 @@ from common.config.embedding_config import EmbeddingModel
|
|||||||
from common.util.common import sub_array
|
from common.util.common import sub_array
|
||||||
from embedding.models import SourceType
|
from embedding.models import SourceType
|
||||||
|
|
||||||
|
lock = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
class BaseVectorStore(ABC):
|
class BaseVectorStore(ABC):
|
||||||
vector_exists = False
|
vector_exists = False
|
||||||
@ -65,25 +68,37 @@ class BaseVectorStore(ABC):
|
|||||||
:param trample_num 点踩数量
|
:param trample_num 点踩数量
|
||||||
:return: bool
|
:return: bool
|
||||||
"""
|
"""
|
||||||
if embedding is None:
|
# 获取锁
|
||||||
embedding = EmbeddingModel.get_embedding_model()
|
lock.acquire()
|
||||||
self.save_pre_handler()
|
try:
|
||||||
self._save(text, source_type, dataset_id, document_id, paragraph_id, source_id, is_active, star_num,
|
if embedding is None:
|
||||||
trample_num, embedding)
|
embedding = EmbeddingModel.get_embedding_model()
|
||||||
|
self.save_pre_handler()
|
||||||
|
self._save(text, source_type, dataset_id, document_id, paragraph_id, source_id, is_active, star_num,
|
||||||
|
trample_num, embedding)
|
||||||
|
finally:
|
||||||
|
# 释放锁
|
||||||
|
lock.release()
|
||||||
|
|
||||||
def batch_save(self, data_list: List[Dict], embedding=None):
|
def batch_save(self, data_list: List[Dict], embedding=None):
|
||||||
"""
|
# 获取锁
|
||||||
批量插入
|
lock.acquire()
|
||||||
:param data_list: 数据列表
|
try:
|
||||||
:param embedding: 向量化处理器
|
"""
|
||||||
:return: bool
|
批量插入
|
||||||
"""
|
:param data_list: 数据列表
|
||||||
if embedding is None:
|
:param embedding: 向量化处理器
|
||||||
embedding = EmbeddingModel.get_embedding_model()
|
:return: bool
|
||||||
self.save_pre_handler()
|
"""
|
||||||
result = sub_array(data_list)
|
if embedding is None:
|
||||||
for child_array in result:
|
embedding = EmbeddingModel.get_embedding_model()
|
||||||
self._batch_save(child_array, embedding)
|
self.save_pre_handler()
|
||||||
|
result = sub_array(data_list)
|
||||||
|
for child_array in result:
|
||||||
|
self._batch_save(child_array, embedding)
|
||||||
|
finally:
|
||||||
|
# 释放锁
|
||||||
|
lock.release()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
|
|||||||
@ -5,11 +5,9 @@ import os
|
|||||||
from ..const import PROJECT_DIR, CONFIG
|
from ..const import PROJECT_DIR, CONFIG
|
||||||
|
|
||||||
LOG_DIR = os.path.join(PROJECT_DIR, 'data', 'logs')
|
LOG_DIR = os.path.join(PROJECT_DIR, 'data', 'logs')
|
||||||
QA_BOT_LOG_FILE = os.path.join(LOG_DIR, 'smart_doc.log')
|
MAX_KB_LOG_FILE = os.path.join(LOG_DIR, 'max_kb.log')
|
||||||
DRF_EXCEPTION_LOG_FILE = os.path.join(LOG_DIR, 'drf_exception.log')
|
DRF_EXCEPTION_LOG_FILE = os.path.join(LOG_DIR, 'drf_exception.log')
|
||||||
UNEXPECTED_EXCEPTION_LOG_FILE = os.path.join(LOG_DIR, 'unexpected_exception.log')
|
UNEXPECTED_EXCEPTION_LOG_FILE = os.path.join(LOG_DIR, 'unexpected_exception.log')
|
||||||
ANSIBLE_LOG_FILE = os.path.join(LOG_DIR, 'ansible.log')
|
|
||||||
GUNICORN_LOG_FILE = os.path.join(LOG_DIR, 'gunicorn.log')
|
|
||||||
LOG_LEVEL = "DEBUG"
|
LOG_LEVEL = "DEBUG"
|
||||||
|
|
||||||
LOGGING = {
|
LOGGING = {
|
||||||
@ -54,16 +52,7 @@ LOGGING = {
|
|||||||
'maxBytes': 1024 * 1024 * 100,
|
'maxBytes': 1024 * 1024 * 100,
|
||||||
'backupCount': 7,
|
'backupCount': 7,
|
||||||
'formatter': 'main',
|
'formatter': 'main',
|
||||||
'filename': QA_BOT_LOG_FILE,
|
'filename': MAX_KB_LOG_FILE,
|
||||||
},
|
|
||||||
'ansible_logs': {
|
|
||||||
'encoding': 'utf8',
|
|
||||||
'level': 'DEBUG',
|
|
||||||
'class': 'logging.handlers.RotatingFileHandler',
|
|
||||||
'formatter': 'main',
|
|
||||||
'maxBytes': 1024 * 1024 * 100,
|
|
||||||
'backupCount': 7,
|
|
||||||
'filename': ANSIBLE_LOG_FILE,
|
|
||||||
},
|
},
|
||||||
'drf_exception': {
|
'drf_exception': {
|
||||||
'encoding': 'utf8',
|
'encoding': 'utf8',
|
||||||
@ -115,9 +104,15 @@ LOGGING = {
|
|||||||
'level': LOG_LEVEL,
|
'level': LOG_LEVEL,
|
||||||
'propagate': False,
|
'propagate': False,
|
||||||
},
|
},
|
||||||
'smartdoc': {
|
'max_kb_error': {
|
||||||
|
'handlers': ['console', 'unexpected_exception'],
|
||||||
|
'level': LOG_LEVEL,
|
||||||
|
'propagate': False,
|
||||||
|
},
|
||||||
|
'max_kb': {
|
||||||
'handlers': ['console', 'file'],
|
'handlers': ['console', 'file'],
|
||||||
'level': LOG_LEVEL,
|
'level': LOG_LEVEL,
|
||||||
|
'propagate': False,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -86,4 +86,4 @@
|
|||||||
document.body.append(chat_button);
|
document.body.append(chat_button);
|
||||||
} else console.error('invalid parameter')
|
} else console.error('invalid parameter')
|
||||||
}
|
}
|
||||||
document.body.onload = embedChatbot
|
window.onload = embedChatbot
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user