preamble
This commit is contained in:
parent
3973174c83
commit
95577c07a8
41
prompt/preamble_prompt.md
Normal file
41
prompt/preamble_prompt.md
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
You are an AI agent that is expected to generate a preamble message for the customer.
|
||||||
|
|
||||||
|
The actual message will be sent later by a smarter agent. Your job is only to generate the right preamble in order to save time.
|
||||||
|
|
||||||
|
|
||||||
|
These are the preamble messages you can choose from. You must ONLY choose one of these: ###
|
||||||
|
{preamble_choices_text}
|
||||||
|
###
|
||||||
|
|
||||||
|
Basically, the preamble is something very short that continues the interaction naturally, without committing to any later action or response.
|
||||||
|
We leave that later response to another agent. Make sure you understand this.
|
||||||
|
|
||||||
|
Instructions:
|
||||||
|
- Note that some of the choices are more generic, and some are more specific to a particular scenario.
|
||||||
|
- If you're unsure what to choose --> prefer to go with a more generic, bland choice. This should be 80% of cases.
|
||||||
|
Examples of generic choices: "Hey there!", "Just a moment.", "Hello.", "Got it."
|
||||||
|
- If you see clear value in saying something more specific and nuanced --> then go with a more specific choice. This should be 20% or less of cases.
|
||||||
|
Examples of specific choices: "Let me check that for you.", "Sorry to hear that.", "Thanks for your patience."
|
||||||
|
|
||||||
|
|
||||||
|
Chat History:
|
||||||
|
{chat_history}
|
||||||
|
|
||||||
|
User's Last Message:
|
||||||
|
{last_message}
|
||||||
|
|
||||||
|
OUTPUT FORMAT:
|
||||||
|
You must now choose the preamble message. You must produce a JSON object with a single key, "preamble", holding the preamble message as a string,
|
||||||
|
EXACTLY as it is given (pay attention to subtleties like punctuation and copy your choice EXACTLY as it is given above).The content in JSON format needs to be wrapped in "```json" and "```".
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"preamble": "Just a moment."
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
You will now be given the current state of the interaction to which you must generate the next preamble message.
|
||||||
|
{language}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -16,7 +16,7 @@ from utils.api_models import ChatRequestV2
|
|||||||
from utils.fastapi_utils import (
|
from utils.fastapi_utils import (
|
||||||
process_messages, extract_block_from_system_prompt, format_messages_to_chat_history,
|
process_messages, extract_block_from_system_prompt, format_messages_to_chat_history,
|
||||||
create_project_directory, extract_api_key_from_auth, generate_v2_auth_token, fetch_bot_config,
|
create_project_directory, extract_api_key_from_auth, generate_v2_auth_token, fetch_bot_config,
|
||||||
call_guideline_llm, _get_optimal_batch_size, process_guideline_batch, get_content_from_messages
|
call_guideline_llm, _get_optimal_batch_size, process_guideline_batch, get_content_from_messages, call_preamble_llm, get_preamble_text, get_language_text
|
||||||
)
|
)
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
@ -56,7 +56,7 @@ def append_user_last_message(messages: list, content: str) -> bool:
|
|||||||
return messages
|
return messages
|
||||||
|
|
||||||
|
|
||||||
async def generate_stream_response(agent, messages, thought_list, tool_response: bool, model: str):
|
async def generate_stream_response(agent, messages, pre_message_list, tool_response: bool, model: str):
|
||||||
"""生成流式响应"""
|
"""生成流式响应"""
|
||||||
accumulated_content = ""
|
accumulated_content = ""
|
||||||
|
|
||||||
@ -64,8 +64,8 @@ async def generate_stream_response(agent, messages, thought_list, tool_response:
|
|||||||
chunk_id = 0
|
chunk_id = 0
|
||||||
try:
|
try:
|
||||||
|
|
||||||
if len(thought_list)>0:
|
if len(pre_message_list)>0:
|
||||||
accumulated_content = get_content_from_messages(thought_list, tool_response=tool_response)
|
accumulated_content = get_content_from_messages(pre_message_list, tool_response=tool_response)
|
||||||
chunk_data = {
|
chunk_data = {
|
||||||
"id": f"chatcmpl-thought",
|
"id": f"chatcmpl-thought",
|
||||||
"object": "chat.completion.chunk",
|
"object": "chat.completion.chunk",
|
||||||
@ -167,6 +167,13 @@ async def create_agent_and_generate_response(
|
|||||||
"""创建agent并生成响应的公共逻辑"""
|
"""创建agent并生成响应的公共逻辑"""
|
||||||
if generate_cfg is None:
|
if generate_cfg is None:
|
||||||
generate_cfg = {}
|
generate_cfg = {}
|
||||||
|
pre_message_list = []
|
||||||
|
query_text = get_user_last_message_content(messages)
|
||||||
|
chat_history = format_messages_to_chat_history(messages)
|
||||||
|
preamble_text = await call_preamble_llm(chat_history, query_text, get_preamble_text(language), language, model_name, api_key, model_server)
|
||||||
|
|
||||||
|
if preamble_text != '':
|
||||||
|
pre_message_list.append({"role": "assistant","content": preamble_text})
|
||||||
|
|
||||||
# 1. 从system_prompt提取guideline和terms内容
|
# 1. 从system_prompt提取guideline和terms内容
|
||||||
system_prompt, guidelines_list, terms_list = extract_block_from_system_prompt(system_prompt)
|
system_prompt, guidelines_list, terms_list = extract_block_from_system_prompt(system_prompt)
|
||||||
@ -175,8 +182,6 @@ async def create_agent_and_generate_response(
|
|||||||
terms_analysis = ""
|
terms_analysis = ""
|
||||||
if terms_list:
|
if terms_list:
|
||||||
logger.info(f"terms_list: {terms_list}")
|
logger.info(f"terms_list: {terms_list}")
|
||||||
# 从messages中提取用户的查询文本用于相似性检索
|
|
||||||
query_text = get_user_last_message_content(messages)
|
|
||||||
# 使用embedding进行terms处理
|
# 使用embedding进行terms处理
|
||||||
try:
|
try:
|
||||||
from embedding.embedding import process_terms_with_embedding
|
from embedding.embedding import process_terms_with_embedding
|
||||||
@ -231,9 +236,6 @@ async def create_agent_and_generate_response(
|
|||||||
|
|
||||||
logger.info(f"Processing {guidelines_count} guidelines in {len(batches)} batches with {batch_count} concurrent batches")
|
logger.info(f"Processing {guidelines_count} guidelines in {len(batches)} batches with {batch_count} concurrent batches")
|
||||||
|
|
||||||
# 准备chat_history
|
|
||||||
chat_history = format_messages_to_chat_history(messages)
|
|
||||||
|
|
||||||
# 并发执行所有任务:guideline批次处理 + agent创建
|
# 并发执行所有任务:guideline批次处理 + agent创建
|
||||||
tasks = []
|
tasks = []
|
||||||
|
|
||||||
@ -313,32 +315,22 @@ async def create_agent_and_generate_response(
|
|||||||
user_identifier=user_identifier
|
user_identifier=user_identifier
|
||||||
)
|
)
|
||||||
|
|
||||||
if language:
|
messages = append_user_last_message(messages, f"\n\nlanguage:{get_language_text(language)}")
|
||||||
# 在最后一条消息的末尾追加回复语言
|
|
||||||
language_map = {
|
|
||||||
'zh': '请用中文回复',
|
|
||||||
'en': 'Please reply in English',
|
|
||||||
'ja': '日本語で回答してください',
|
|
||||||
'jp': '日本語で回答してください'
|
|
||||||
}
|
|
||||||
language_instruction = language_map.get(language.lower(), '')
|
|
||||||
if language_instruction:
|
|
||||||
messages = append_user_last_message(messages, f"\n\nlanguage:{language_instruction}")
|
|
||||||
|
|
||||||
thought_list = []
|
|
||||||
if guideline_analysis != '':
|
if guideline_analysis != '':
|
||||||
thought_list = [{"role": "assistant","reasoning_content": guideline_analysis}]
|
pre_message_list.append({"role": "assistant","reasoning_content": guideline_analysis})
|
||||||
# 根据stream参数决定返回流式还是非流式响应
|
# 根据stream参数决定返回流式还是非流式响应
|
||||||
if stream:
|
if stream:
|
||||||
return StreamingResponse(
|
return StreamingResponse(
|
||||||
generate_stream_response(agent, messages, thought_list, tool_response, model_name),
|
generate_stream_response(agent, messages, pre_message_list, tool_response, model_name),
|
||||||
media_type="text/event-stream",
|
media_type="text/event-stream",
|
||||||
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}
|
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# 非流式响应
|
# 非流式响应
|
||||||
agent_responses = agent.run_nonstream(messages)
|
agent_responses = agent.run_nonstream(messages)
|
||||||
final_responses = thought_list+agent_responses
|
final_responses = pre_message_list+agent_responses
|
||||||
if final_responses and len(final_responses) > 0:
|
if final_responses and len(final_responses) > 0:
|
||||||
# 使用 get_content_from_messages 处理响应,支持 tool_response 参数
|
# 使用 get_content_from_messages 处理响应,支持 tool_response 参数
|
||||||
content = get_content_from_messages(final_responses, tool_response=tool_response)
|
content = get_content_from_messages(final_responses, tool_response=tool_response)
|
||||||
|
|||||||
@ -371,7 +371,7 @@ async def fetch_bot_config(bot_id: str) -> Dict[str, Any]:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _sync_call_guideline_llm(llm_config, messages) -> str:
|
def _sync_call_llm(llm_config, messages) -> str:
|
||||||
"""同步调用LLM的辅助函数,在线程池中执行"""
|
"""同步调用LLM的辅助函数,在线程池中执行"""
|
||||||
llm_instance = TextChatAtOAI(llm_config)
|
llm_instance = TextChatAtOAI(llm_config)
|
||||||
try:
|
try:
|
||||||
@ -397,6 +397,132 @@ def _sync_call_guideline_llm(llm_config, messages) -> str:
|
|||||||
logger.error(f"Error calling guideline LLM: {e}")
|
logger.error(f"Error calling guideline LLM: {e}")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
def get_language_text(language: str):
|
||||||
|
if language == "jp":
|
||||||
|
language = "ja"
|
||||||
|
language_map = {
|
||||||
|
'zh': '请用中文回复',
|
||||||
|
'en': 'Please reply in English',
|
||||||
|
'ja': '日本語で回答してください',
|
||||||
|
}
|
||||||
|
return language_map.get(language.lower(), '')
|
||||||
|
|
||||||
|
def get_preamble_text(language: str):
|
||||||
|
if language == "jp":
|
||||||
|
language = "ja"
|
||||||
|
preamble_choices_map = {
|
||||||
|
'zh': [
|
||||||
|
"好的,让我来帮您看看。",
|
||||||
|
"明白了,请稍等。",
|
||||||
|
"好的,我理解了。",
|
||||||
|
"没问题,我来处理。",
|
||||||
|
"收到,正在为您查询。",
|
||||||
|
"了解,让我想想。",
|
||||||
|
"好的,我来帮您解答。",
|
||||||
|
"明白了,稍等片刻。",
|
||||||
|
"好的,正在处理中。",
|
||||||
|
"了解了,让我为您分析。"
|
||||||
|
],
|
||||||
|
'en': [
|
||||||
|
"Just a moment.",
|
||||||
|
"Got it.",
|
||||||
|
"Let me check that for you.",
|
||||||
|
"Sorry to hear that.",
|
||||||
|
"Thanks for your patience.",
|
||||||
|
"I understand.",
|
||||||
|
"Let me help you with that.",
|
||||||
|
"Please wait a moment.",
|
||||||
|
"I'll look into that for you.",
|
||||||
|
"Gotcha, let me see.",
|
||||||
|
"Understood, one moment please.",
|
||||||
|
"I'll help you with this.",
|
||||||
|
"Let me figure that out.",
|
||||||
|
"Thanks for waiting.",
|
||||||
|
"I'll check on that."
|
||||||
|
],
|
||||||
|
'ja': [
|
||||||
|
"少々お待ちください。",
|
||||||
|
"承知いたしました。",
|
||||||
|
"わかりました。",
|
||||||
|
"確認いたします。",
|
||||||
|
"少々お時間をください。",
|
||||||
|
"了解しました。",
|
||||||
|
"調べてみますね。",
|
||||||
|
"お待たせしました。",
|
||||||
|
"対応いたします。",
|
||||||
|
"わかりましたね。",
|
||||||
|
"承知しました。",
|
||||||
|
"確認させてください。",
|
||||||
|
"少々お待ちいただけますか。",
|
||||||
|
"お調べいたします。",
|
||||||
|
"対応いたしますね。"
|
||||||
|
]
|
||||||
|
};
|
||||||
|
return "\n".join(preamble_choices_map.get(language.lower(), []))
|
||||||
|
|
||||||
|
|
||||||
|
async def call_preamble_llm(chat_history: str, last_message: str, preamble_choices_text: str, language: str, model_name: str, api_key: str, model_server: str) -> str:
|
||||||
|
"""调用大语言模型处理guideline分析
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chat_history: 聊天历史记录
|
||||||
|
guidelines_text: 指导原则文本
|
||||||
|
model_name: 模型名称
|
||||||
|
api_key: API密钥
|
||||||
|
model_server: 模型服务器地址
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: 模型响应结果
|
||||||
|
"""
|
||||||
|
# 读取guideline提示词模板
|
||||||
|
try:
|
||||||
|
with open('./prompt/preamble_prompt.md', 'r', encoding='utf-8') as f:
|
||||||
|
preamble_template = f.read()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error reading guideline prompt template: {e}")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# 替换模板中的占位符
|
||||||
|
system_prompt = preamble_template.replace('{preamble_choices_text}', preamble_choices_text).replace('{chat_history}', chat_history).replace('{last_message}', last_message).replace('{language}', get_language_text(language))
|
||||||
|
# 配置LLM
|
||||||
|
llm_config = {
|
||||||
|
'model': model_name,
|
||||||
|
'api_key': api_key,
|
||||||
|
'model_server': model_server, # 使用传入的model_server参数
|
||||||
|
}
|
||||||
|
|
||||||
|
# 调用模型
|
||||||
|
messages = [{'role': 'user', 'content': system_prompt}]
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 使用信号量控制并发API调用数量
|
||||||
|
async with api_semaphore:
|
||||||
|
# 使用线程池执行同步HTTP调用,避免阻塞事件循环
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
response = await loop.run_in_executor(thread_pool, _sync_call_llm, llm_config, messages)
|
||||||
|
|
||||||
|
# 从响应中提取 ```json 和 ``` 包裹的内容
|
||||||
|
json_pattern = r'```json\s*\n(.*?)\n```'
|
||||||
|
json_matches = re.findall(json_pattern, response, re.DOTALL)
|
||||||
|
|
||||||
|
if json_matches:
|
||||||
|
try:
|
||||||
|
# 解析第一个找到的JSON对象
|
||||||
|
json_data = json.loads(json_matches[0])
|
||||||
|
logger.info(f"Successfully processed preamble")
|
||||||
|
return json_data["preamble"] # 返回解析后的preamble
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.error(f"Error parsing JSON from preamble analysis: {e}")
|
||||||
|
return ""
|
||||||
|
else:
|
||||||
|
logger.warning(f"No JSON format found in preamble analysis")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error calling guideline LLM: {e}")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def call_guideline_llm(chat_history: str, guidelines_text: str, terms:str, model_name: str, api_key: str, model_server: str) -> str:
|
async def call_guideline_llm(chat_history: str, guidelines_text: str, terms:str, model_name: str, api_key: str, model_server: str) -> str:
|
||||||
"""调用大语言模型处理guideline分析
|
"""调用大语言模型处理guideline分析
|
||||||
@ -437,7 +563,7 @@ async def call_guideline_llm(chat_history: str, guidelines_text: str, terms:str,
|
|||||||
async with api_semaphore:
|
async with api_semaphore:
|
||||||
# 使用线程池执行同步HTTP调用,避免阻塞事件循环
|
# 使用线程池执行同步HTTP调用,避免阻塞事件循环
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
response = await loop.run_in_executor(thread_pool, _sync_call_guideline_llm, llm_config, messages)
|
response = await loop.run_in_executor(thread_pool, _sync_call_llm, llm_config, messages)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user