preamble
This commit is contained in:
parent
3973174c83
commit
95577c07a8
41
prompt/preamble_prompt.md
Normal file
41
prompt/preamble_prompt.md
Normal file
@ -0,0 +1,41 @@
|
||||
You are an AI agent that is expected to generate a preamble message for the customer.
|
||||
|
||||
The actual message will be sent later by a smarter agent. Your job is only to generate the right preamble in order to save time.
|
||||
|
||||
|
||||
These are the preamble messages you can choose from. You must ONLY choose one of these: ###
|
||||
{preamble_choices_text}
|
||||
###
|
||||
|
||||
Basically, the preamble is something very short that continues the interaction naturally, without committing to any later action or response.
|
||||
We leave that later response to another agent. Make sure you understand this.
|
||||
|
||||
Instructions:
|
||||
- Note that some of the choices are more generic, and some are more specific to a particular scenario.
|
||||
- If you're unsure what to choose --> prefer to go with a more generic, bland choice. This should be 80% of cases.
|
||||
Examples of generic choices: "Hey there!", "Just a moment.", "Hello.", "Got it."
|
||||
- If you see clear value in saying something more specific and nuanced --> then go with a more specific choice. This should be 20% or less of cases.
|
||||
Examples of specific choices: "Let me check that for you.", "Sorry to hear that.", "Thanks for your patience."
|
||||
|
||||
|
||||
Chat History:
|
||||
{chat_history}
|
||||
|
||||
User's Last Message:
|
||||
{last_message}
|
||||
|
||||
OUTPUT FORMAT:
|
||||
You must now choose the preamble message. You must produce a JSON object with a single key, "preamble", holding the preamble message as a string,
|
||||
EXACTLY as it is given (pay attention to subtleties like punctuation and copy your choice EXACTLY as it is given above).The content in JSON format needs to be wrapped in "```json" and "```".
|
||||
```json
|
||||
{
|
||||
"preamble": "Just a moment."
|
||||
}
|
||||
```
|
||||
|
||||
You will now be given the current state of the interaction to which you must generate the next preamble message.
|
||||
{language}
|
||||
|
||||
|
||||
|
||||
|
||||
@ -16,7 +16,7 @@ from utils.api_models import ChatRequestV2
|
||||
from utils.fastapi_utils import (
|
||||
process_messages, extract_block_from_system_prompt, format_messages_to_chat_history,
|
||||
create_project_directory, extract_api_key_from_auth, generate_v2_auth_token, fetch_bot_config,
|
||||
call_guideline_llm, _get_optimal_batch_size, process_guideline_batch, get_content_from_messages
|
||||
call_guideline_llm, _get_optimal_batch_size, process_guideline_batch, get_content_from_messages, call_preamble_llm, get_preamble_text, get_language_text
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
@ -56,7 +56,7 @@ def append_user_last_message(messages: list, content: str) -> bool:
|
||||
return messages
|
||||
|
||||
|
||||
async def generate_stream_response(agent, messages, thought_list, tool_response: bool, model: str):
|
||||
async def generate_stream_response(agent, messages, pre_message_list, tool_response: bool, model: str):
|
||||
"""生成流式响应"""
|
||||
accumulated_content = ""
|
||||
|
||||
@ -64,8 +64,8 @@ async def generate_stream_response(agent, messages, thought_list, tool_response:
|
||||
chunk_id = 0
|
||||
try:
|
||||
|
||||
if len(thought_list)>0:
|
||||
accumulated_content = get_content_from_messages(thought_list, tool_response=tool_response)
|
||||
if len(pre_message_list)>0:
|
||||
accumulated_content = get_content_from_messages(pre_message_list, tool_response=tool_response)
|
||||
chunk_data = {
|
||||
"id": f"chatcmpl-thought",
|
||||
"object": "chat.completion.chunk",
|
||||
@ -167,6 +167,13 @@ async def create_agent_and_generate_response(
|
||||
"""创建agent并生成响应的公共逻辑"""
|
||||
if generate_cfg is None:
|
||||
generate_cfg = {}
|
||||
pre_message_list = []
|
||||
query_text = get_user_last_message_content(messages)
|
||||
chat_history = format_messages_to_chat_history(messages)
|
||||
preamble_text = await call_preamble_llm(chat_history, query_text, get_preamble_text(language), language, model_name, api_key, model_server)
|
||||
|
||||
if preamble_text != '':
|
||||
pre_message_list.append({"role": "assistant","content": preamble_text})
|
||||
|
||||
# 1. 从system_prompt提取guideline和terms内容
|
||||
system_prompt, guidelines_list, terms_list = extract_block_from_system_prompt(system_prompt)
|
||||
@ -175,8 +182,6 @@ async def create_agent_and_generate_response(
|
||||
terms_analysis = ""
|
||||
if terms_list:
|
||||
logger.info(f"terms_list: {terms_list}")
|
||||
# 从messages中提取用户的查询文本用于相似性检索
|
||||
query_text = get_user_last_message_content(messages)
|
||||
# 使用embedding进行terms处理
|
||||
try:
|
||||
from embedding.embedding import process_terms_with_embedding
|
||||
@ -231,9 +236,6 @@ async def create_agent_and_generate_response(
|
||||
|
||||
logger.info(f"Processing {guidelines_count} guidelines in {len(batches)} batches with {batch_count} concurrent batches")
|
||||
|
||||
# 准备chat_history
|
||||
chat_history = format_messages_to_chat_history(messages)
|
||||
|
||||
# 并发执行所有任务:guideline批次处理 + agent创建
|
||||
tasks = []
|
||||
|
||||
@ -313,32 +315,22 @@ async def create_agent_and_generate_response(
|
||||
user_identifier=user_identifier
|
||||
)
|
||||
|
||||
if language:
|
||||
# 在最后一条消息的末尾追加回复语言
|
||||
language_map = {
|
||||
'zh': '请用中文回复',
|
||||
'en': 'Please reply in English',
|
||||
'ja': '日本語で回答してください',
|
||||
'jp': '日本語で回答してください'
|
||||
}
|
||||
language_instruction = language_map.get(language.lower(), '')
|
||||
if language_instruction:
|
||||
messages = append_user_last_message(messages, f"\n\nlanguage:{language_instruction}")
|
||||
messages = append_user_last_message(messages, f"\n\nlanguage:{get_language_text(language)}")
|
||||
|
||||
|
||||
thought_list = []
|
||||
if guideline_analysis != '':
|
||||
thought_list = [{"role": "assistant","reasoning_content": guideline_analysis}]
|
||||
pre_message_list.append({"role": "assistant","reasoning_content": guideline_analysis})
|
||||
# 根据stream参数决定返回流式还是非流式响应
|
||||
if stream:
|
||||
return StreamingResponse(
|
||||
generate_stream_response(agent, messages, thought_list, tool_response, model_name),
|
||||
generate_stream_response(agent, messages, pre_message_list, tool_response, model_name),
|
||||
media_type="text/event-stream",
|
||||
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}
|
||||
)
|
||||
else:
|
||||
# 非流式响应
|
||||
agent_responses = agent.run_nonstream(messages)
|
||||
final_responses = thought_list+agent_responses
|
||||
final_responses = pre_message_list+agent_responses
|
||||
if final_responses and len(final_responses) > 0:
|
||||
# 使用 get_content_from_messages 处理响应,支持 tool_response 参数
|
||||
content = get_content_from_messages(final_responses, tool_response=tool_response)
|
||||
|
||||
@ -371,7 +371,7 @@ async def fetch_bot_config(bot_id: str) -> Dict[str, Any]:
|
||||
)
|
||||
|
||||
|
||||
def _sync_call_guideline_llm(llm_config, messages) -> str:
|
||||
def _sync_call_llm(llm_config, messages) -> str:
|
||||
"""同步调用LLM的辅助函数,在线程池中执行"""
|
||||
llm_instance = TextChatAtOAI(llm_config)
|
||||
try:
|
||||
@ -397,6 +397,132 @@ def _sync_call_guideline_llm(llm_config, messages) -> str:
|
||||
logger.error(f"Error calling guideline LLM: {e}")
|
||||
return ""
|
||||
|
||||
def get_language_text(language: str):
|
||||
if language == "jp":
|
||||
language = "ja"
|
||||
language_map = {
|
||||
'zh': '请用中文回复',
|
||||
'en': 'Please reply in English',
|
||||
'ja': '日本語で回答してください',
|
||||
}
|
||||
return language_map.get(language.lower(), '')
|
||||
|
||||
def get_preamble_text(language: str):
|
||||
if language == "jp":
|
||||
language = "ja"
|
||||
preamble_choices_map = {
|
||||
'zh': [
|
||||
"好的,让我来帮您看看。",
|
||||
"明白了,请稍等。",
|
||||
"好的,我理解了。",
|
||||
"没问题,我来处理。",
|
||||
"收到,正在为您查询。",
|
||||
"了解,让我想想。",
|
||||
"好的,我来帮您解答。",
|
||||
"明白了,稍等片刻。",
|
||||
"好的,正在处理中。",
|
||||
"了解了,让我为您分析。"
|
||||
],
|
||||
'en': [
|
||||
"Just a moment.",
|
||||
"Got it.",
|
||||
"Let me check that for you.",
|
||||
"Sorry to hear that.",
|
||||
"Thanks for your patience.",
|
||||
"I understand.",
|
||||
"Let me help you with that.",
|
||||
"Please wait a moment.",
|
||||
"I'll look into that for you.",
|
||||
"Gotcha, let me see.",
|
||||
"Understood, one moment please.",
|
||||
"I'll help you with this.",
|
||||
"Let me figure that out.",
|
||||
"Thanks for waiting.",
|
||||
"I'll check on that."
|
||||
],
|
||||
'ja': [
|
||||
"少々お待ちください。",
|
||||
"承知いたしました。",
|
||||
"わかりました。",
|
||||
"確認いたします。",
|
||||
"少々お時間をください。",
|
||||
"了解しました。",
|
||||
"調べてみますね。",
|
||||
"お待たせしました。",
|
||||
"対応いたします。",
|
||||
"わかりましたね。",
|
||||
"承知しました。",
|
||||
"確認させてください。",
|
||||
"少々お待ちいただけますか。",
|
||||
"お調べいたします。",
|
||||
"対応いたしますね。"
|
||||
]
|
||||
};
|
||||
return "\n".join(preamble_choices_map.get(language.lower(), []))
|
||||
|
||||
|
||||
async def call_preamble_llm(chat_history: str, last_message: str, preamble_choices_text: str, language: str, model_name: str, api_key: str, model_server: str) -> str:
|
||||
"""调用大语言模型处理guideline分析
|
||||
|
||||
Args:
|
||||
chat_history: 聊天历史记录
|
||||
guidelines_text: 指导原则文本
|
||||
model_name: 模型名称
|
||||
api_key: API密钥
|
||||
model_server: 模型服务器地址
|
||||
|
||||
Returns:
|
||||
str: 模型响应结果
|
||||
"""
|
||||
# 读取guideline提示词模板
|
||||
try:
|
||||
with open('./prompt/preamble_prompt.md', 'r', encoding='utf-8') as f:
|
||||
preamble_template = f.read()
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading guideline prompt template: {e}")
|
||||
return ""
|
||||
|
||||
# 替换模板中的占位符
|
||||
system_prompt = preamble_template.replace('{preamble_choices_text}', preamble_choices_text).replace('{chat_history}', chat_history).replace('{last_message}', last_message).replace('{language}', get_language_text(language))
|
||||
# 配置LLM
|
||||
llm_config = {
|
||||
'model': model_name,
|
||||
'api_key': api_key,
|
||||
'model_server': model_server, # 使用传入的model_server参数
|
||||
}
|
||||
|
||||
# 调用模型
|
||||
messages = [{'role': 'user', 'content': system_prompt}]
|
||||
|
||||
try:
|
||||
# 使用信号量控制并发API调用数量
|
||||
async with api_semaphore:
|
||||
# 使用线程池执行同步HTTP调用,避免阻塞事件循环
|
||||
loop = asyncio.get_event_loop()
|
||||
response = await loop.run_in_executor(thread_pool, _sync_call_llm, llm_config, messages)
|
||||
|
||||
# 从响应中提取 ```json 和 ``` 包裹的内容
|
||||
json_pattern = r'```json\s*\n(.*?)\n```'
|
||||
json_matches = re.findall(json_pattern, response, re.DOTALL)
|
||||
|
||||
if json_matches:
|
||||
try:
|
||||
# 解析第一个找到的JSON对象
|
||||
json_data = json.loads(json_matches[0])
|
||||
logger.info(f"Successfully processed preamble")
|
||||
return json_data["preamble"] # 返回解析后的preamble
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Error parsing JSON from preamble analysis: {e}")
|
||||
return ""
|
||||
else:
|
||||
logger.warning(f"No JSON format found in preamble analysis")
|
||||
return ""
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calling guideline LLM: {e}")
|
||||
return ""
|
||||
|
||||
|
||||
|
||||
async def call_guideline_llm(chat_history: str, guidelines_text: str, terms:str, model_name: str, api_key: str, model_server: str) -> str:
|
||||
"""调用大语言模型处理guideline分析
|
||||
@ -437,7 +563,7 @@ async def call_guideline_llm(chat_history: str, guidelines_text: str, terms:str,
|
||||
async with api_semaphore:
|
||||
# 使用线程池执行同步HTTP调用,避免阻塞事件循环
|
||||
loop = asyncio.get_event_loop()
|
||||
response = await loop.run_in_executor(thread_pool, _sync_call_guideline_llm, llm_config, messages)
|
||||
response = await loop.run_in_executor(thread_pool, _sync_call_llm, llm_config, messages)
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user