优化后的逻辑:
1. 收集所有ASSISTANT消息的索引位置
2. 根据消息在ASSISTANT序列中的位置判断是否为最近10条
3. 对不同位置的消息应用不同的数据保留策略:
- 非最近10条: 只保留 [ANSWER]
- 最近10条: 保留完整信息([TOOL_CALL]、[TOOL_RESPONSE]、[ANSWER])
This commit is contained in:
parent
6c1393e96a
commit
c1a06aae35
@ -830,15 +830,23 @@ def process_messages(messages: List[Message], language: Optional[str] = None) ->
|
|||||||
"""处理消息列表,包括[TOOL_CALL]|[TOOL_RESPONSE]|[ANSWER]分割和语言指令添加"""
|
"""处理消息列表,包括[TOOL_CALL]|[TOOL_RESPONSE]|[ANSWER]分割和语言指令添加"""
|
||||||
processed_messages = []
|
processed_messages = []
|
||||||
|
|
||||||
|
# 收集所有ASSISTANT消息的索引
|
||||||
|
assistant_indices = [i for i, msg in enumerate(messages) if msg.role == "assistant"]
|
||||||
|
total_assistant_messages = len(assistant_indices)
|
||||||
|
cutoff_point = max(0, total_assistant_messages - 5)
|
||||||
# 处理每条消息
|
# 处理每条消息
|
||||||
for msg in messages:
|
for i, msg in enumerate(messages):
|
||||||
if msg.role == "assistant":
|
if msg.role == "assistant":
|
||||||
|
# 确定当前ASSISTANT消息在所有ASSISTANT消息中的位置(从0开始)
|
||||||
|
assistant_position = assistant_indices.index(i)
|
||||||
|
|
||||||
# 使用正则表达式按照 [TOOL_CALL]|[TOOL_RESPONSE]|[ANSWER] 进行切割
|
# 使用正则表达式按照 [TOOL_CALL]|[TOOL_RESPONSE]|[ANSWER] 进行切割
|
||||||
parts = re.split(r'\[(TOOL_CALL|TOOL_RESPONSE|ANSWER)\]', msg.content)
|
parts = re.split(r'\[(TOOL_CALL|TOOL_RESPONSE|ANSWER)\]', msg.content)
|
||||||
|
|
||||||
# 重新组装内容,过滤掉过长的 [TOOL_RESPONSE] 后的内容
|
# 重新组装内容,根据消息位置决定处理方式
|
||||||
filtered_content = ""
|
filtered_content = ""
|
||||||
current_tag = None
|
current_tag = None
|
||||||
|
is_recent_message = assistant_position >= cutoff_point # 最近10条消息
|
||||||
|
|
||||||
for i in range(0, len(parts)):
|
for i in range(0, len(parts)):
|
||||||
if i % 2 == 0: # 文本内容
|
if i % 2 == 0: # 文本内容
|
||||||
@ -847,26 +855,32 @@ def process_messages(messages: List[Message], language: Optional[str] = None) ->
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
if current_tag == "TOOL_RESPONSE":
|
if current_tag == "TOOL_RESPONSE":
|
||||||
# 统计 [TOOL_RESPONSE] 后面的文字长度,超过500字就截取
|
if is_recent_message:
|
||||||
if len(text) <= 500:
|
# 最近10条ASSISTANT消息:保留完整TOOL_RESPONSE信息(使用简略模式)
|
||||||
filtered_content += f"[TOOL_RESPONSE]\n{text}\n"
|
if len(text) <= 500:
|
||||||
else:
|
filtered_content += f"[TOOL_RESPONSE]\n{text}\n"
|
||||||
# 截取前中后3段内容,每段250字
|
else:
|
||||||
first_part = text[:250]
|
# 截取前中后3段内容,每段250字
|
||||||
middle_start = len(text) // 2 - 125
|
first_part = text[:250]
|
||||||
middle_part = text[middle_start:middle_start + 250]
|
middle_start = len(text) // 2 - 125
|
||||||
last_part = text[-250:]
|
middle_part = text[middle_start:middle_start + 250]
|
||||||
|
last_part = text[-250:]
|
||||||
|
|
||||||
# 计算省略的字数
|
# 计算省略的字数
|
||||||
omitted_count = len(text) - 750
|
omitted_count = len(text) - 750
|
||||||
omitted_text = f"...此处省略{omitted_count}字..."
|
omitted_text = f"...此处省略{omitted_count}字..."
|
||||||
|
|
||||||
# 拼接内容
|
# 拼接内容
|
||||||
truncated_text = f"{first_part}\n{omitted_text}\n{middle_part}\n{omitted_text}\n{last_part}"
|
truncated_text = f"{first_part}\n{omitted_text}\n{middle_part}\n{omitted_text}\n{last_part}"
|
||||||
filtered_content += f"[TOOL_RESPONSE]\n{truncated_text}\n"
|
filtered_content += f"[TOOL_RESPONSE]\n{truncated_text}\n"
|
||||||
|
# 10条以上的消息:不保留TOOL_RESPONSE数据(完全跳过)
|
||||||
elif current_tag == "TOOL_CALL":
|
elif current_tag == "TOOL_CALL":
|
||||||
filtered_content += f"[TOOL_CALL]\n{text}\n"
|
if is_recent_message:
|
||||||
|
# 最近10条ASSISTANT消息:保留TOOL_CALL信息
|
||||||
|
filtered_content += f"[TOOL_CALL]\n{text}\n"
|
||||||
|
# 10条以上的消息:不保留TOOL_CALL数据(完全跳过)
|
||||||
elif current_tag == "ANSWER":
|
elif current_tag == "ANSWER":
|
||||||
|
# 所有ASSISTANT消息都保留ANSWER数据
|
||||||
filtered_content += f"[ANSWER]\n{text}\n"
|
filtered_content += f"[ANSWER]\n{text}\n"
|
||||||
else:
|
else:
|
||||||
# 第一个标签之前的内容
|
# 第一个标签之前的内容
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user