feat: Dialogue displays knowledge sources (#3501)

This commit is contained in:
shaohuzhang1 2025-07-07 21:42:17 +08:00 committed by GitHub
parent bfdbc74611
commit b52c972ac0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 135 additions and 39 deletions

View File

@ -18,8 +18,9 @@ from knowledge.models import Paragraph
class ParagraphPipelineModel: class ParagraphPipelineModel:
def __init__(self, _id: str, document_id: str, knowledge_id: str, content: str, title: str, status: str, def __init__(self, _id: str, document_id: str, knowledge_id: str, content: str, title: str, status: str,
is_active: bool, comprehensive_score: float, similarity: float, dataset_name: str, document_name: str, is_active: bool, comprehensive_score: float, similarity: float, knowledge_name: str,
hit_handling_method: str, directly_return_similarity: float, meta: dict = None): document_name: str,
hit_handling_method: str, directly_return_similarity: float, knowledge_type, meta: dict = None):
self.id = _id self.id = _id
self.document_id = document_id self.document_id = document_id
self.knowledge_id = knowledge_id self.knowledge_id = knowledge_id
@ -29,11 +30,12 @@ class ParagraphPipelineModel:
self.is_active = is_active self.is_active = is_active
self.comprehensive_score = comprehensive_score self.comprehensive_score = comprehensive_score
self.similarity = similarity self.similarity = similarity
self.dataset_name = dataset_name self.knowledge_name = knowledge_name
self.document_name = document_name self.document_name = document_name
self.hit_handling_method = hit_handling_method self.hit_handling_method = hit_handling_method
self.directly_return_similarity = directly_return_similarity self.directly_return_similarity = directly_return_similarity
self.meta = meta self.meta = meta
self.knowledge_type = knowledge_type
def to_dict(self): def to_dict(self):
return { return {
@ -46,8 +48,9 @@ class ParagraphPipelineModel:
'is_active': self.is_active, 'is_active': self.is_active,
'comprehensive_score': self.comprehensive_score, 'comprehensive_score': self.comprehensive_score,
'similarity': self.similarity, 'similarity': self.similarity,
'dataset_name': self.dataset_name, 'knowledge_name': self.knowledge_name,
'document_name': self.document_name, 'document_name': self.document_name,
'knowledge_type': self.knowledge_type,
'meta': self.meta, 'meta': self.meta,
} }
@ -57,7 +60,8 @@ class ParagraphPipelineModel:
self.paragraph = {} self.paragraph = {}
self.comprehensive_score = None self.comprehensive_score = None
self.document_name = None self.document_name = None
self.dataset_name = None self.knowledge_name = None
self.knowledge_type = None
self.hit_handling_method = None self.hit_handling_method = None
self.directly_return_similarity = 0.9 self.directly_return_similarity = 0.9
self.meta = {} self.meta = {}
@ -76,8 +80,12 @@ class ParagraphPipelineModel:
self.paragraph = paragraph self.paragraph = paragraph
return self return self
def add_dataset_name(self, dataset_name): def add_knowledge_name(self, knowledge_name):
self.dataset_name = dataset_name self.knowledge_name = knowledge_name
return self
def add_knowledge_type(self, knowledge_type):
self.knowledge_type = knowledge_type
return self return self
def add_document_name(self, document_name): def add_document_name(self, document_name):
@ -110,8 +118,9 @@ class ParagraphPipelineModel:
self.paragraph.get('content'), self.paragraph.get('title'), self.paragraph.get('content'), self.paragraph.get('title'),
self.paragraph.get('status'), self.paragraph.get('status'),
self.paragraph.get('is_active'), self.paragraph.get('is_active'),
self.comprehensive_score, self.similarity, self.dataset_name, self.comprehensive_score, self.similarity, self.knowledge_name,
self.document_name, self.hit_handling_method, self.directly_return_similarity, self.document_name, self.hit_handling_method, self.directly_return_similarity,
self.knowledge_type,
self.meta) self.meta)

View File

@ -18,7 +18,7 @@ from django.utils.translation import gettext as _
from langchain.chat_models.base import BaseChatModel from langchain.chat_models.base import BaseChatModel
from langchain.schema import BaseMessage from langchain.schema import BaseMessage
from langchain.schema.messages import HumanMessage, AIMessage from langchain.schema.messages import HumanMessage, AIMessage
from langchain_core.messages import AIMessageChunk from langchain_core.messages import AIMessageChunk, SystemMessage
from rest_framework import status from rest_framework import status
from application.chat_pipeline.I_base_chat_pipeline import ParagraphPipelineModel from application.chat_pipeline.I_base_chat_pipeline import ParagraphPipelineModel
@ -196,7 +196,8 @@ class BaseChatStep(IChatStep):
@staticmethod @staticmethod
def reset_message_list(message_list: List[BaseMessage], answer_text): def reset_message_list(message_list: List[BaseMessage], answer_text):
result = [{'role': 'user' if isinstance(message, HumanMessage) else 'ai', 'content': message.content} for result = [{'role': 'user' if isinstance(message, HumanMessage) else (
'system' if isinstance(message, SystemMessage) else 'ai'), 'content': message.content} for
message message
in in
message_list] message_list]

View File

@ -79,7 +79,8 @@ class BaseSearchDatasetStep(ISearchDatasetStep):
.add_paragraph(paragraph) .add_paragraph(paragraph)
.add_similarity(find_embedding.get('similarity')) .add_similarity(find_embedding.get('similarity'))
.add_comprehensive_score(find_embedding.get('comprehensive_score')) .add_comprehensive_score(find_embedding.get('comprehensive_score'))
.add_dataset_name(paragraph.get('dataset_name')) .add_knowledge_name(paragraph.get('knowledge_name'))
.add_knowledge_type(paragraph.get('knowledge_type'))
.add_document_name(paragraph.get('document_name')) .add_document_name(paragraph.get('document_name'))
.add_hit_handling_method(paragraph.get('hit_handling_method')) .add_hit_handling_method(paragraph.get('hit_handling_method'))
.add_directly_return_similarity(paragraph.get('directly_return_similarity')) .add_directly_return_similarity(paragraph.get('directly_return_similarity'))

View File

@ -32,6 +32,8 @@ class RerankerStepNodeSerializer(serializers.Serializer):
question_reference_address = serializers.ListField(required=True) question_reference_address = serializers.ListField(required=True)
reranker_model_id = serializers.UUIDField(required=True) reranker_model_id = serializers.UUIDField(required=True)
reranker_reference_list = serializers.ListField(required=True, child=serializers.ListField(required=True)) reranker_reference_list = serializers.ListField(required=True, child=serializers.ListField(required=True))
show_knowledge = serializers.BooleanField(required=True,
label=_("The results are displayed in the knowledge sources"))
def is_valid(self, *, raise_exception=False): def is_valid(self, *, raise_exception=False):
super().is_valid(raise_exception=True) super().is_valid(raise_exception=True)
@ -55,6 +57,6 @@ class IRerankerNode(INode):
reranker_list=reranker_list) reranker_list=reranker_list)
def execute(self, question, reranker_setting, reranker_list, reranker_model_id, def execute(self, question, reranker_setting, reranker_list, reranker_model_id,show_knowledge,
**kwargs) -> NodeResult: **kwargs) -> NodeResult:
pass pass

View File

@ -24,11 +24,9 @@ def merge_reranker_list(reranker_list, result=None):
elif isinstance(document, dict): elif isinstance(document, dict):
content = document.get('title', '') + document.get('content', '') content = document.get('title', '') + document.get('content', '')
title = document.get("title") title = document.get("title")
dataset_name = document.get("dataset_name")
document_name = document.get('document_name')
result.append( result.append(
Document(page_content=str(document) if len(content) == 0 else content, Document(page_content=str(document) if len(content) == 0 else content,
metadata={'title': title, 'dataset_name': dataset_name, 'document_name': document_name})) metadata={'title': title, **document}))
else: else:
result.append(Document(page_content=str(document), metadata={})) result.append(Document(page_content=str(document), metadata={}))
return result return result
@ -71,8 +69,9 @@ class BaseRerankerNode(IRerankerNode):
self.context['result_list'] = details.get('result_list') self.context['result_list'] = details.get('result_list')
self.context['result'] = details.get('result') self.context['result'] = details.get('result')
def execute(self, question, reranker_setting, reranker_list, reranker_model_id, def execute(self, question, reranker_setting, reranker_list, reranker_model_id, show_knowledge,
**kwargs) -> NodeResult: **kwargs) -> NodeResult:
self.context['show_knowledge'] = show_knowledge
documents = merge_reranker_list(reranker_list) documents = merge_reranker_list(reranker_list)
top_n = reranker_setting.get('top_n', 3) top_n = reranker_setting.get('top_n', 3)
self.context['document_list'] = [{'page_content': document.page_content, 'metadata': document.metadata} for self.context['document_list'] = [{'page_content': document.page_content, 'metadata': document.metadata} for
@ -93,6 +92,7 @@ class BaseRerankerNode(IRerankerNode):
def get_details(self, index: int, **kwargs): def get_details(self, index: int, **kwargs):
return { return {
'show_knowledge': self.context.get('show_knowledge'),
'name': self.node.properties.get('stepName'), 'name': self.node.properties.get('stepName'),
"index": index, "index": index,
'document_list': self.context.get('document_list'), 'document_list': self.context.get('document_list'),

View File

@ -41,6 +41,9 @@ class SearchDatasetStepNodeSerializer(serializers.Serializer):
question_reference_address = serializers.ListField(required=True) question_reference_address = serializers.ListField(required=True)
show_knowledge = serializers.BooleanField(required=True,
label=_("The results are displayed in the knowledge sources"))
def is_valid(self, *, raise_exception=False): def is_valid(self, *, raise_exception=False):
super().is_valid(raise_exception=True) super().is_valid(raise_exception=True)
@ -73,7 +76,7 @@ class ISearchKnowledgeStepNode(INode):
return self.execute(**self.node_params_serializer.data, question=str(question), return self.execute(**self.node_params_serializer.data, question=str(question),
exclude_paragraph_id_list=exclude_paragraph_id_list) exclude_paragraph_id_list=exclude_paragraph_id_list)
def execute(self, dataset_id_list, dataset_setting, question, def execute(self, dataset_id_list, dataset_setting, question, show_knowledge,
exclude_paragraph_id_list=None, exclude_paragraph_id_list=None,
**kwargs) -> NodeResult: **kwargs) -> NodeResult:
pass pass

View File

@ -62,10 +62,11 @@ class BaseSearchKnowledgeNode(ISearchKnowledgeStepNode):
result])[0:dataset_setting.get('max_paragraph_char_number', 5000)] result])[0:dataset_setting.get('max_paragraph_char_number', 5000)]
self.context['directly_return'] = directly_return self.context['directly_return'] = directly_return
def execute(self, knowledge_id_list, knowledge_setting, question, def execute(self, knowledge_id_list, knowledge_setting, question, show_knowledge,
exclude_paragraph_id_list=None, exclude_paragraph_id_list=None,
**kwargs) -> NodeResult: **kwargs) -> NodeResult:
self.context['question'] = question self.context['question'] = question
self.context['show_knowledge'] = show_knowledge
get_knowledge_list_of_authorized = DatabaseModelManage.get_model('get_knowledge_list_of_authorized') get_knowledge_list_of_authorized = DatabaseModelManage.get_model('get_knowledge_list_of_authorized')
chat_user_type = self.workflow_manage.get_body().get('chat_user_type') chat_user_type = self.workflow_manage.get_body().get('chat_user_type')
if get_knowledge_list_of_authorized is not None and RoleConstants.CHAT_USER.value.name == chat_user_type: if get_knowledge_list_of_authorized is not None and RoleConstants.CHAT_USER.value.name == chat_user_type:
@ -145,6 +146,7 @@ class BaseSearchKnowledgeNode(ISearchKnowledgeStepNode):
def get_details(self, index: int, **kwargs): def get_details(self, index: int, **kwargs):
return { return {
'name': self.node.properties.get('stepName'), 'name': self.node.properties.get('stepName'),
'show_knowledge': self.context.get('show_knowledge'),
'question': self.context.get('question'), 'question': self.context.get('question'),
"index": index, "index": index,
'run_time': self.context.get('run_time'), 'run_time': self.context.get('run_time'),

View File

@ -75,7 +75,15 @@ class ChatRecordOperateSerializer(serializers.Serializer):
chat_record = self.get_chat_record() chat_record = self.get_chat_record()
if chat_record is None: if chat_record is None:
raise AppApiException(500, gettext("Conversation does not exist")) raise AppApiException(500, gettext("Conversation does not exist"))
return ApplicationChatRecordQuerySerializers.reset_chat_record(chat_record) application_access_token = QuerySet(ApplicationAccessToken).filter(
application_id=self.data.get('application_id')).first()
show_source = False
show_exec = False
if application_access_token is not None:
show_exec = application_access_token.show_exec
show_source = application_access_token.show_source
return ApplicationChatRecordQuerySerializers.reset_chat_record(
chat_record, show_source, show_exec)
class ApplicationChatRecordQuerySerializers(serializers.Serializer): class ApplicationChatRecordQuerySerializers(serializers.Serializer):
@ -103,21 +111,34 @@ class ApplicationChatRecordQuerySerializers(serializers.Serializer):
QuerySet(ChatRecord).filter(chat_id=self.data.get('chat_id')).order_by(order_by)] QuerySet(ChatRecord).filter(chat_id=self.data.get('chat_id')).order_by(order_by)]
@staticmethod @staticmethod
def reset_chat_record(chat_record): def reset_chat_record(chat_record, show_source, show_exec):
knowledge_list = [] knowledge_list = []
paragraph_list = [] paragraph_list = []
if 'search_step' in chat_record.details and chat_record.details.get('search_step').get( if 'search_step' in chat_record.details and chat_record.details.get('search_step').get(
'paragraph_list') is not None: 'paragraph_list') is not None:
paragraph_list = chat_record.details.get('search_step').get( paragraph_list = chat_record.details.get('search_step').get(
'paragraph_list') 'paragraph_list')
knowledge_list = [{'id': dataset_id, 'name': name} for dataset_id, name in reduce(lambda x, y: {**x, **y},
for item in chat_record.details.values():
if item.get('type') == 'search-knowledge-node' and item.get('show_knowledge', False):
paragraph_list = paragraph_list + item.get(
'paragraph_list')
if item.get('type') == 'reranker-node' and item.get('show_knowledge', False):
paragraph_list = paragraph_list + [rl.get('metadata') for rl in item.get('result_list') if
'document_id' in rl.get('metadata') and 'knowledge_id' in rl.get(
'metadata')]
paragraph_list = list({p.get('id'): p for p in paragraph_list}.values())
knowledge_list = knowledge_list + [{'id': knowledge_id, **knowledge} for knowledge_id, knowledge in
reduce(lambda x, y: {**x, **y},
[{row.get( [{row.get(
'knowledge_id'): row.get( 'knowledge_id'): {'knowledge_name': row.get(
"knowledge_name")} for "knowledge_name"),
'knowledge_type': row.get('knowledge_type')}} for
row in row in
paragraph_list], paragraph_list],
{}).items()] {}).items()]
if len(chat_record.improve_paragraph_id_list) > 0: if len(chat_record.improve_paragraph_id_list) > 0:
paragraph_model_list = QuerySet(Paragraph).filter(id__in=chat_record.improve_paragraph_id_list) paragraph_model_list = QuerySet(Paragraph).filter(id__in=chat_record.improve_paragraph_id_list)
if len(paragraph_model_list) < len(chat_record.improve_paragraph_id_list): if len(paragraph_model_list) < len(chat_record.improve_paragraph_id_list):
@ -126,14 +147,15 @@ class ApplicationChatRecordQuerySerializers(serializers.Serializer):
filter(lambda p_id: paragraph_model_id_list.__contains__(p_id), filter(lambda p_id: paragraph_model_id_list.__contains__(p_id),
chat_record.improve_paragraph_id_list)) chat_record.improve_paragraph_id_list))
chat_record.save() chat_record.save()
show_source_dict = {'knowledge_list': knowledge_list,
'paragraph_list': paragraph_list, }
show_exec_dict = {'execution_details': [chat_record.details[key] for key in chat_record.details]}
return { return {
**ChatRecordSerializerModel(chat_record).data, **ChatRecordSerializerModel(chat_record).data,
'padding_problem_text': chat_record.details.get('problem_padding').get( 'padding_problem_text': chat_record.details.get('problem_padding').get(
'padding_problem_text') if 'problem_padding' in chat_record.details else None, 'padding_problem_text') if 'problem_padding' in chat_record.details else None,
'knowledge_list': knowledge_list, **(show_source_dict if show_source else {}),
'paragraph_list': paragraph_list, **(show_exec_dict if show_exec else {})
'execution_details': [chat_record.details[key] for key in chat_record.details]
} }
def page(self, current_page: int, page_size: int, with_valid=True): def page(self, current_page: int, page_size: int, with_valid=True):
@ -141,9 +163,17 @@ class ApplicationChatRecordQuerySerializers(serializers.Serializer):
self.is_valid(raise_exception=True) self.is_valid(raise_exception=True)
order_by = '-create_time' if self.data.get('order_asc') is None or self.data.get( order_by = '-create_time' if self.data.get('order_asc') is None or self.data.get(
'order_asc') else 'create_time' 'order_asc') else 'create_time'
application_access_token = QuerySet(ApplicationAccessToken).filter(
application_id=self.data.get('application_id')).first()
show_source = False
show_exec = False
if application_access_token is not None:
show_exec = application_access_token.show_exec
show_source = application_access_token.show_source
page = page_search(current_page, page_size, page = page_search(current_page, page_size,
QuerySet(ChatRecord).filter(chat_id=self.data.get('chat_id')).order_by(order_by), QuerySet(ChatRecord).filter(chat_id=self.data.get('chat_id')).order_by(order_by),
post_records_handler=lambda chat_record: self.reset_chat_record(chat_record)) post_records_handler=lambda chat_record: self.reset_chat_record(chat_record, show_source,
show_exec))
return page return page

View File

@ -14,10 +14,12 @@ from django.db.models import QuerySet
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
from application.chat_pipeline.step.chat_step.i_chat_step import PostResponseHandler from application.chat_pipeline.step.chat_step.i_chat_step import PostResponseHandler
from application.models import Application, ChatRecord, Chat, ApplicationVersion, ChatUserType from application.models import Application, ChatRecord, Chat, ApplicationVersion, ChatUserType, ApplicationTypeChoices, \
ApplicationKnowledgeMapping
from common.constants.cache_version import Cache_Version from common.constants.cache_version import Cache_Version
from common.database_model_manage.database_model_manage import DatabaseModelManage from common.database_model_manage.database_model_manage import DatabaseModelManage
from common.exception.app_exception import ChatException from common.exception.app_exception import ChatException
from knowledge.models import Document
from models_provider.models import Model from models_provider.models import Model
from models_provider.tools import get_model_credential from models_provider.tools import get_model_credential
@ -72,6 +74,19 @@ class ChatInfo:
'-create_time')[0:1].first() '-create_time')[0:1].first()
if not application: if not application:
raise ChatException(500, _("The application has not been published. Please use it after publishing.")) raise ChatException(500, _("The application has not been published. Please use it after publishing."))
if application.type == ApplicationTypeChoices.SIMPLE.value:
# 数据集id列表
knowledge_id_list = [str(row.knowledge_id) for row in
QuerySet(ApplicationKnowledgeMapping).filter(
application_id=self.application_id)]
# 需要排除的文档
exclude_document_id_list = [str(document.id) for document in
QuerySet(Document).filter(
knowledge_id__in=knowledge_id_list,
is_active=False)]
self.knowledge_id_list = knowledge_id_list
self.exclude_document_id_list = exclude_document_id_list
self.application = application self.application = application
return application return application

View File

@ -1,6 +1,7 @@
SELECT SELECT
paragraph.*, paragraph.*,
knowledge."name" AS "knowledge_name", knowledge."name" AS "knowledge_name",
knowledge."type" AS "knowledge_type",
"document"."name" AS "document_name", "document"."name" AS "document_name",
"document"."meta" AS "meta", "document"."meta" AS "meta",
"document"."hit_handling_method" AS "hit_handling_method", "document"."hit_handling_method" AS "hit_handling_method",

View File

@ -15,7 +15,8 @@ from rest_framework import serializers
from application.models import VoteChoices, ChatRecord, Chat from application.models import VoteChoices, ChatRecord, Chat
from application.serializers.application_chat import ChatCountSerializer from application.serializers.application_chat import ChatCountSerializer
from application.serializers.application_chat_record import ChatRecordSerializerModel from application.serializers.application_chat_record import ChatRecordSerializerModel, \
ApplicationChatRecordQuerySerializers
from common.db.search import page_search from common.db.search import page_search
from common.exception.app_exception import AppApiException from common.exception.app_exception import AppApiException
from common.utils.lock import try_lock, un_lock from common.utils.lock import try_lock, un_lock
@ -86,7 +87,8 @@ class HistoricalConversationSerializer(serializers.Serializer):
def get_queryset(self): def get_queryset(self):
chat_user_id = self.data.get('chat_user_id') chat_user_id = self.data.get('chat_user_id')
application_id = self.data.get("application_id") application_id = self.data.get("application_id")
return QuerySet(Chat).filter(application_id=application_id, chat_user_id=chat_user_id, is_deleted=False) return QuerySet(Chat).filter(application_id=application_id, chat_user_id=chat_user_id,
is_deleted=False).order_by('-update_time')
def list(self): def list(self):
self.is_valid(raise_exception=True) self.is_valid(raise_exception=True)
@ -157,4 +159,6 @@ class HistoricalConversationRecordSerializer(serializers.Serializer):
def page(self, current_page, page_size): def page(self, current_page, page_size):
self.is_valid(raise_exception=True) self.is_valid(raise_exception=True)
return page_search(current_page, page_size, self.get_queryset(), lambda r: ChatRecordSerializerModel(r).data) return ApplicationChatRecordQuerySerializers(
data={'application_id': self.data.get('application_id'), 'chat_id': self.data.get('chat_id')}).page(
current_page, page_size)

View File

@ -133,6 +133,10 @@ export default {
result: '检索结果', result: '检索结果',
directly_return: '满足直接回答的分段内容', directly_return: '满足直接回答的分段内容',
searchParam: '检索参数', searchParam: '检索参数',
showKnowledge: {
label: '结果显示在知识来源中',
requiredMessage: '请设置参数',
},
searchQuestion: { searchQuestion: {
label: '检索问题', label: '检索问题',
placeholder: '请选择检索问题', placeholder: '请选择检索问题',

View File

@ -166,6 +166,14 @@
:model-type="'RERANKER'" :model-type="'RERANKER'"
></ModelSelect> ></ModelSelect>
</el-form-item> </el-form-item>
<el-form-item
:label="$t('views.applicationWorkflow.nodes.searchKnowledgeNode.showKnowledge.label')"
prop="show_knowledge"
required
@click.prevent
>
<el-switch size="small" v-model="form_data.show_knowledge" />
</el-form-item>
</el-form> </el-form>
</el-card> </el-card>
<ParamSettingDialog ref="ParamSettingDialogRef" @refresh="refreshParam" /> <ParamSettingDialog ref="ParamSettingDialogRef" @refresh="refreshParam" />
@ -198,6 +206,7 @@ const form = {
similarity: 0, similarity: 0,
max_paragraph_char_number: 5000, max_paragraph_char_number: 5000,
}, },
show_knowledge: false,
} }
const modelOptions = ref<any>(null) const modelOptions = ref<any>(null)

View File

@ -106,6 +106,20 @@
v-model="form_data.question_reference_address" v-model="form_data.question_reference_address"
/> />
</el-form-item> </el-form-item>
<el-form-item
:label="$t('views.applicationWorkflow.nodes.searchKnowledgeNode.showKnowledge.label')"
prop="show_knowledge"
:rules="{
message: $t(
'views.applicationWorkflow.nodes.searchKnowledgeNode.showKnowledge.requiredMessage',
),
trigger: 'blur',
required: true,
}"
@click.prevent
>
<el-switch size="small" v-model="form_data.show_knowledge" />
</el-form-item>
</el-form> </el-form>
</el-card> </el-card>
<ParamSettingDialog ref="ParamSettingDialogRef" @refresh="refreshParam" /> <ParamSettingDialog ref="ParamSettingDialogRef" @refresh="refreshParam" />
@ -142,6 +156,7 @@ const form = {
search_mode: 'embedding', search_mode: 'embedding',
}, },
question_reference_address: [], question_reference_address: [],
show_knowledge: false,
} }
const form_data = computed({ const form_data = computed({