This commit is contained in:
朱潮 2026-04-20 22:42:53 +08:00
commit 8e35d860a5
22 changed files with 881 additions and 237 deletions

View File

@ -2,7 +2,7 @@
> 负责范围:技能包管理服务 - 核心实现 > 负责范围:技能包管理服务 - 核心实现
> 最后更新2026-04-18 > 最后更新2026-04-20
## 当前状态 ## 当前状态
@ -21,8 +21,8 @@ Skill 系统支持两种来源:官方 skills (`./skills/`) 和用户 skills (`
## 最近重要事项 ## 最近重要事项
- 2026-04-16: 为 `auto-daily-summary``competitor-news-intel` 新增 Python CLI 脚本 MVP统一采用 `argparse + JSON stdout` 模式 - 2026-04-20: 为 `rag-retrieve` 新增 `retrieval-policy-forbidden-self-knowledge.md`,禁止知识问答场景使用模型自身知识补全答案,要求严格基于检索证据作答
- 2026-04-16: 新增 6 个纯 `SKILL.md` 型业务 skill`market-academic-insight`、`financial-report-generator`、`contract-document-generator`、`sales-decision-report`、`auto-daily-summary`、`competitor-news-intel` - 2026-04-19: 环境变量 `SKILLS_SUBDIR` 重命名为 `PROJECT_NAME`,用于选择 `skills/{PROJECT_NAME}``skills/autoload/{PROJECT_NAME}` 目录
- 2026-04-19: `create_robot_project` 的 autoload 去重和 stale 清理补强autoload 目录也纳入 managed 清理,避免 `rag-retrieve-only` 场景下旧的 `rag-retrieve` 残留 - 2026-04-19: `create_robot_project` 的 autoload 去重和 stale 清理补强autoload 目录也纳入 managed 清理,避免 `rag-retrieve-only` 场景下旧的 `rag-retrieve` 残留
- 2026-04-18: `create_robot_project` 改为自动加载 `skills/autoload/{SKILLS_SUBDIR}` 下所有 skill并跳过已显式传入的同名 skill - 2026-04-18: `create_robot_project` 改为自动加载 `skills/autoload/{SKILLS_SUBDIR}` 下所有 skill并跳过已显式传入的同名 skill
- 2026-04-18: `/api/v1/skill/list` 的官方库改为同时读取 `skills/common``skills/{SKILLS_SUBDIR}`,并按目录顺序去重 - 2026-04-18: `/api/v1/skill/list` 的官方库改为同时读取 `skills/common``skills/{SKILLS_SUBDIR}`,并按目录顺序去重

View File

@ -0,0 +1,6 @@
# 2026-Q2 Skill Changelog
### 2026-04-20
- **新增**: `skills/autoload/onprem/rag-retrieve/hooks/retrieval-policy-forbidden-self-knowledge.md`
- **说明**: 基于现有 `retrieval-policy.md` 衍生出更严格的检索策略,明确禁止在知识问答场景中使用模型自身知识补全答案,要求回答只能来自检索证据
- **作者**: Claude

View File

@ -24,6 +24,7 @@ class AgentConfig:
mcp_settings: Optional[List[Dict]] = field(default_factory=list) mcp_settings: Optional[List[Dict]] = field(default_factory=list)
generate_cfg: Optional[Dict] = None generate_cfg: Optional[Dict] = None
enable_thinking: bool = False enable_thinking: bool = False
enable_self_knowledge: bool = False
# 上下文参数 # 上下文参数
project_dir: Optional[str] = None project_dir: Optional[str] = None
@ -64,6 +65,7 @@ class AgentConfig:
'mcp_settings': self.mcp_settings, 'mcp_settings': self.mcp_settings,
'generate_cfg': self.generate_cfg, 'generate_cfg': self.generate_cfg,
'enable_thinking': self.enable_thinking, 'enable_thinking': self.enable_thinking,
'enable_self_knowledge': self.enable_self_knowledge,
'project_dir': self.project_dir, 'project_dir': self.project_dir,
'user_identifier': self.user_identifier, 'user_identifier': self.user_identifier,
'session_id': self.session_id, 'session_id': self.session_id,
@ -122,6 +124,7 @@ class AgentConfig:
user_identifier=request.user_identifier, user_identifier=request.user_identifier,
session_id=request.session_id, session_id=request.session_id,
enable_thinking=request.enable_thinking, enable_thinking=request.enable_thinking,
enable_self_knowledge=request.enable_self_knowledge,
project_dir=project_dir, project_dir=project_dir,
stream=request.stream, stream=request.stream,
tool_response=request.tool_response, tool_response=request.tool_response,
@ -179,6 +182,7 @@ class AgentConfig:
enable_thinking = bot_config.get("enable_thinking", False) enable_thinking = bot_config.get("enable_thinking", False)
enable_memori = bot_config.get("enable_memory", False) enable_memori = bot_config.get("enable_memory", False)
enable_self_knowledge = bot_config.get("enable_self_knowledge", False)
config = cls( config = cls(
bot_id=request.bot_id, bot_id=request.bot_id,
@ -191,6 +195,7 @@ class AgentConfig:
user_identifier=request.user_identifier, user_identifier=request.user_identifier,
session_id=request.session_id, session_id=request.session_id,
enable_thinking=enable_thinking, enable_thinking=enable_thinking,
enable_self_knowledge=enable_self_knowledge,
project_dir=project_dir, project_dir=project_dir,
stream=request.stream, stream=request.stream,
tool_response=request.tool_response, tool_response=request.tool_response,
@ -323,6 +328,7 @@ class AgentConfig:
'language': self.language, 'language': self.language,
'generate_cfg': self.generate_cfg, 'generate_cfg': self.generate_cfg,
'enable_thinking': self.enable_thinking, 'enable_thinking': self.enable_thinking,
'enable_self_knowledge': self.enable_self_knowledge,
'user_identifier': self.user_identifier, 'user_identifier': self.user_identifier,
'session_id': self.session_id, 'session_id': self.session_id,
'dataset_ids': self.dataset_ids, # 添加dataset_ids到缓存键生成 'dataset_ids': self.dataset_ids, # 添加dataset_ids到缓存键生成

View File

@ -18,7 +18,7 @@ from langchain.agents.middleware import SummarizationMiddleware as LangchainSumm
from .summarization_middleware import SummarizationMiddleware from .summarization_middleware import SummarizationMiddleware
from langchain_mcp_adapters.client import MultiServerMCPClient from langchain_mcp_adapters.client import MultiServerMCPClient
from sympy.printing.cxx import none from sympy.printing.cxx import none
from utils.fastapi_utils import detect_provider from utils.fastapi_utils import detect_provider, sanitize_model_kwargs
from .guideline_middleware import GuidelineMiddleware from .guideline_middleware import GuidelineMiddleware
from .tool_output_length_middleware import ToolOutputLengthMiddleware from .tool_output_length_middleware import ToolOutputLengthMiddleware
from .tool_use_cleanup_middleware import ToolUseCleanupMiddleware from .tool_use_cleanup_middleware import ToolUseCleanupMiddleware
@ -200,47 +200,22 @@ async def init_agent(config: AgentConfig):
# 检测或使用指定的提供商 # 检测或使用指定的提供商
model_provider, base_url = detect_provider(config.model_name, config.model_server) model_provider, base_url = detect_provider(config.model_name, config.model_server)
# 构建模型参数 model_kwargs, dropped_params, default_temperature_applied = sanitize_model_kwargs(
model_kwargs = { model_name=config.model_name,
"model": config.model_name, model_provider=model_provider,
"model_provider": model_provider, base_url=base_url,
"temperature": 0.8, api_key=config.api_key,
"base_url": base_url, generate_cfg=config.generate_cfg,
"api_key": config.api_key source="init_agent"
} )
if config.generate_cfg: if dropped_params:
# 内部使用的参数,不应传给任何 LLM logger.info(
internal_params = { "init_agent dropped_params=%s model=%s provider=%s default_temperature_applied=%s",
'tool_output_max_length', dropped_params,
'tool_output_truncation_strategy', config.model_name,
'tool_output_filters', model_provider,
'tool_output_exclude', default_temperature_applied
'preserve_code_blocks', )
'preserve_json',
}
# Anthropic 不支持的 OpenAI 特有参数
openai_only_params = {
'n', # 生成多少个响应
'presence_penalty',
'frequency_penalty',
'logprobs',
'top_logprobs',
'logit_bias',
'seed',
'suffix',
'best_of',
'echo',
'user',
}
# 根据提供商决定需要过滤的参数
params_to_filter = internal_params.copy()
if model_provider == 'anthropic':
params_to_filter.update(openai_only_params)
filtered_cfg = {k: v for k, v in config.generate_cfg.items() if k not in params_to_filter}
model_kwargs.update(filtered_cfg)
llm_instance = init_chat_model(**model_kwargs) llm_instance = init_chat_model(**model_kwargs)
# 创建新的 agent不再缓存 # 创建新的 agent不再缓存
@ -332,6 +307,7 @@ async def init_agent(config: AgentConfig):
"ASSISTANT_ID": str(config.bot_id), "ASSISTANT_ID": str(config.bot_id),
"USER_IDENTIFIER": str(config.user_identifier) if config.user_identifier else None, "USER_IDENTIFIER": str(config.user_identifier) if config.user_identifier else None,
"TRACE_ID": str(config.trace_id) if config.trace_id else None, "TRACE_ID": str(config.trace_id) if config.trace_id else None,
"ENABLE_SELF_KNOWLEDGE": str(config.enable_self_knowledge).lower(),
**(config.shell_env or {}), **(config.shell_env or {}),
}.items() if v is not None }.items() if v is not None
} }

View File

@ -214,6 +214,7 @@ async def _execute_command(skill_path: str, command: str, hook_type: str, config
env['ASSISTANT_ID'] = str(getattr(config, 'bot_id', '')) env['ASSISTANT_ID'] = str(getattr(config, 'bot_id', ''))
env['USER_IDENTIFIER'] = str(getattr(config, 'user_identifier', '')) env['USER_IDENTIFIER'] = str(getattr(config, 'user_identifier', ''))
env['TRACE_ID'] = str(getattr(config, 'trace_id', '')) env['TRACE_ID'] = str(getattr(config, 'trace_id', ''))
env['ENABLE_SELF_KNOWLEDGE'] = str(getattr(config, 'enable_self_knowledge', False)).lower()
env['SESSION_ID'] = str(getattr(config, 'session_id', '')) env['SESSION_ID'] = str(getattr(config, 'session_id', ''))
env['LANGUAGE'] = str(getattr(config, 'language', '')) env['LANGUAGE'] = str(getattr(config, 'language', ''))
env['HOOK_TYPE'] = hook_type env['HOOK_TYPE'] = hook_type

View File

@ -512,8 +512,9 @@ async def chat_completions(request: ChatRequest, authorization: Optional[str] =
project_dir = create_project_directory(request.dataset_ids, bot_id, request.skills) project_dir = create_project_directory(request.dataset_ids, bot_id, request.skills)
# 收集额外参数作为 generate_cfg # 收集额外参数作为 generate_cfg
exclude_fields = {'messages', 'model', 'model_server', 'dataset_ids', 'language', 'tool_response', 'system_prompt', 'mcp_settings' ,'stream', 'robot_type', 'bot_id', 'user_identifier', 'session_id', 'enable_thinking', 'skills', 'enable_memory', 'n', 'shell_env', 'max_tokens'} exclude_fields = {'messages', 'model', 'model_server', 'dataset_ids', 'language', 'tool_response', 'system_prompt', 'mcp_settings' ,'stream', 'robot_type', 'bot_id', 'user_identifier', 'session_id', 'enable_thinking', 'skills', 'enable_memory', 'enable_self_knowledge', 'n', 'shell_env', 'max_tokens'}
generate_cfg = {k: v for k, v in request.model_dump().items() if k not in exclude_fields} generate_cfg = {k: v for k, v in request.model_dump().items() if k not in exclude_fields}
logger.info("chat_completions generate_cfg_keys=%s model=%s", list(generate_cfg.keys()), request.model)
# 处理消息 # 处理消息
messages = process_messages(request.messages, request.language) messages = process_messages(request.messages, request.language)
# 创建 AgentConfig 对象 # 创建 AgentConfig 对象
@ -562,7 +563,7 @@ async def chat_warmup_v1(request: ChatRequest, authorization: Optional[str] = He
project_dir = create_project_directory(request.dataset_ids, bot_id, request.skills) project_dir = create_project_directory(request.dataset_ids, bot_id, request.skills)
# 收集额外参数作为 generate_cfg # 收集额外参数作为 generate_cfg
exclude_fields = {'messages', 'model', 'model_server', 'dataset_ids', 'language', 'tool_response', 'system_prompt', 'mcp_settings' ,'stream', 'robot_type', 'bot_id', 'user_identifier', 'session_id', 'enable_thinking', 'skills', 'enable_memory', 'n', 'shell_env'} exclude_fields = {'messages', 'model', 'model_server', 'dataset_ids', 'language', 'tool_response', 'system_prompt', 'mcp_settings' ,'stream', 'robot_type', 'bot_id', 'user_identifier', 'session_id', 'enable_thinking', 'skills', 'enable_memory', 'enable_self_knowledge', 'n', 'shell_env'}
generate_cfg = {k: v for k, v in request.model_dump().items() if k not in exclude_fields} generate_cfg = {k: v for k, v in request.model_dump().items() if k not in exclude_fields}
# 创建一个空的消息列表用于预热实际消息不会在warmup中处理 # 创建一个空的消息列表用于预热实际消息不会在warmup中处理
@ -665,9 +666,9 @@ async def chat_warmup_v2(request: ChatRequestV2, authorization: Optional[str] =
# 处理消息 # 处理消息
messages = process_messages(empty_messages, request.language or "ja") messages = process_messages(empty_messages, request.language or "ja")
# 收集额外参数作为 generate_cfg exclude_fields = {'messages', 'dataset_ids', 'language', 'tool_response', 'system_prompt', 'mcp_settings', 'stream', 'robot_type', 'bot_id', 'user_identifier', 'session_id', 'enable_thinking', 'skills', 'enable_memory', 'enable_self_knowledge', 'n', 'model', 'model_server', 'api_key', 'shell_env', 'max_tokens'}
exclude_fields = {'messages', 'stream', 'tool_response', 'bot_id', 'language', 'user_identifier', 'session_id', 'n', 'model', 'model_server', 'api_key', 'shell_env', 'max_tokens'}
generate_cfg = {k: v for k, v in request.model_dump().items() if k not in exclude_fields} generate_cfg = {k: v for k, v in request.model_dump().items() if k not in exclude_fields}
logger.info("chat_warmup_v2 generate_cfg_keys=%s requested_model=%s", list(generate_cfg.keys()), request.model)
# 从请求中提取 model/model_server/api_key优先级高于 bot_config排除 "whatever" 和空值) # 从请求中提取 model/model_server/api_key优先级高于 bot_config排除 "whatever" 和空值)
req_data = request.model_dump() req_data = request.model_dump()
req_model = req_data.get("model") or "" req_model = req_data.get("model") or ""
@ -773,8 +774,9 @@ async def chat_completions_v2(request: ChatRequestV2, authorization: Optional[st
# 处理消息 # 处理消息
messages = process_messages(request.messages, request.language) messages = process_messages(request.messages, request.language)
# 收集额外参数作为 generate_cfg # 收集额外参数作为 generate_cfg
exclude_fields = {'messages', 'dataset_ids', 'language', 'tool_response', 'system_prompt', 'mcp_settings', 'stream', 'robot_type', 'bot_id', 'user_identifier', 'session_id', 'enable_thinking', 'skills', 'enable_memory', 'n', 'model', 'model_server', 'api_key', 'shell_env', 'max_tokens'} exclude_fields = {'messages', 'dataset_ids', 'language', 'tool_response', 'system_prompt', 'mcp_settings', 'stream', 'robot_type', 'bot_id', 'user_identifier', 'session_id', 'enable_thinking', 'skills', 'enable_memory', 'enable_self_knowledge', 'n', 'model', 'model_server', 'api_key', 'shell_env', 'max_tokens'}
generate_cfg = {k: v for k, v in request.model_dump().items() if k not in exclude_fields} generate_cfg = {k: v for k, v in request.model_dump().items() if k not in exclude_fields}
logger.info("chat_completions_v2 generate_cfg_keys=%s requested_model=%s", list(generate_cfg.keys()), request.model)
# 从请求中提取 model/model_server/api_key优先级高于 bot_config排除 "whatever" 和空值) # 从请求中提取 model/model_server/api_key优先级高于 bot_config排除 "whatever" 和空值)
req_data = request.model_dump() req_data = request.model_dump()
req_model = req_data.get("model") or "" req_model = req_data.get("model") or ""

View File

@ -1,55 +0,0 @@
# Retrieval Policy
### 1. Retrieval Order and Tool Selection
- Follow this section for source choice, tool choice, query rewrite, `top_k`, fallback, result handling, and citations.
- Use this default retrieval order and execute it sequentially: skill-enabled knowledge retrieval tools > `rag_retrieve` / `table_rag_retrieve`.
- Do NOT answer from model knowledge first.
- Do NOT bypass the retrieval flow and inspect local filesystem documents on your own.
- Do NOT use local filesystem retrieval as a fallback knowledge source.
- Local filesystem documents are not a recommended retrieval source here because file formats are inconsistent and have not been normalized or parsed for reliable knowledge lookup.
- Knowledge must be retrieved through the supported knowledge tools only: skill-enabled retrieval scripts, `table_rag_retrieve`, and `rag_retrieve`.
- When a suitable skill-enabled knowledge retrieval tool is available, use it first.
- If no suitable skill-enabled retrieval tool is available, or if its result is insufficient, continue with `rag_retrieve` or `table_rag_retrieve`.
- Use `table_rag_retrieve` first for values, prices, quantities, inventory, specifications, rankings, comparisons, summaries, extraction, lists, tables, name lookup, historical coverage, mixed questions, and unclear cases.
- Use `rag_retrieve` first only for clearly pure concept, definition, workflow, policy, or explanation questions without structured data needs.
- After each retrieval step, evaluate sufficiency before moving to the next source. Do NOT run these retrieval sources in parallel.
### 2. Query Preparation
- Do NOT pass the raw user question unless it already works well for retrieval.
- Rewrite for recall: extract entity, time scope, attributes, and intent.
- Add useful variants: synonyms, aliases, abbreviations, related titles, historical names, and category terms.
- Expand list-style, extraction, overview, historical, roster, timeline, and archive queries more aggressively.
- Preserve meaning. Do NOT introduce unrelated topics.
### 3. Retrieval Breadth (`top_k`)
- Apply `top_k` only to `rag_retrieve`. Use the smallest sufficient value, then expand only if coverage is insufficient.
- Use `30` for simple fact lookup.
- Use `50` for moderate synthesis, comparison, summarization, or disambiguation.
- Use `100` for broad recall, such as comprehensive analysis, scattered knowledge, multiple entities or periods, or list / catalog / timeline / roster / overview requests.
- Raise `top_k` when keyword branches are many or results are too few, repetitive, incomplete, sparse, or too narrow.
- Use this expansion order: `30 -> 50 -> 100`. If unsure, use `100`.
### 4. Result Evaluation
- Treat results as insufficient if they are empty, start with `Error:`, say `no excel files found`, are off-topic, miss the core entity or scope, or provide no usable evidence.
- Also treat results as insufficient when they cover only part of the request, or when full-list, historical, comparison, or mixed data + explanation requests return only partial or truncated coverage.
### 5. Fallback and Sequential Retry
- If the first retrieval result is insufficient, call the next supported retrieval source in the default order before replying.
- `table_rag_retrieve` now performs an internal fallback to `rag_retrieve` when it returns `no excel files found`, but this does NOT change the higher-level retrieval order.
- If `table_rag_retrieve` is insufficient or empty, continue with `rag_retrieve`.
- If `rag_retrieve` is insufficient or empty, continue with `table_rag_retrieve`.
- Say no relevant information was found only after all applicable skill-enabled retrieval tools, `rag_retrieve`, and `table_rag_retrieve` have been tried and still do not provide enough evidence.
- Do NOT reply that no relevant information was found before the supported knowledge retrieval flow has been exhausted.
### 6. Table RAG Result Handling
- Follow all `[INSTRUCTION]` and `[EXTRA_INSTRUCTION]` content in `table_rag_retrieve` results.
- If results are truncated, explicitly tell the user total matches (`N+M`), displayed count (`N`), and omitted count (`M`).
- Cite data sources using filenames from `file_ref_table`.
### 7. Citation Requirements for Retrieved Knowledge
- When using knowledge from `rag_retrieve` or `table_rag_retrieve`, you MUST generate `<CITATION ... />` tags.
- Follow the citation format returned by each tool.
- Place citations immediately after the paragraph or bullet list that uses the knowledge.
- Do NOT collect citations at the end.
- Use 1-2 citations per paragraph or bullet list when possible.
- If learned knowledge is used, include at least 1 `<CITATION ... />`.

View File

@ -3,16 +3,24 @@
PreMemoryPrompt Hook - 用户上下文加载器示例 PreMemoryPrompt Hook - 用户上下文加载器示例
在记忆提取提示词FACT_RETRIEVAL_PROMPT加载时执行 在记忆提取提示词FACT_RETRIEVAL_PROMPT加载时执行
读取同目录下的 memory_prompt.md 作为自定义记忆提取提示词模板 根据环境变量决定是否启用禁止使用模型自身知识的 retrieval policy
""" """
import os
import sys import sys
from pathlib import Path from pathlib import Path
def main(): def main():
prompt_file = Path(__file__).parent / "retrieval-policy.md" enable_self_knowledge = (
if prompt_file.exists(): os.getenv("ENABLE_SELF_KNOWLEDGE", "false").lower() == "true"
print(prompt_file.read_text(encoding="utf-8")) )
policy_name = (
"retrieval-policy.md"
if enable_self_knowledge
else "retrieval-policy-forbidden-self-knowledge.md"
)
prompt_file = Path(__file__).parent / policy_name
print(prompt_file.read_text(encoding="utf-8"))
return 0 return 0

View File

@ -0,0 +1,129 @@
# Retrieval Policy (Forbidden Self-Knowledge)
## 0. Task Classification
Classify the request before acting:
- **Knowledge retrieval** (facts, summaries, comparisons, prices, lists, timelines, extraction, etc.): follow this policy strictly.
- **Codebase engineering** (modify/debug/inspect code): normal tools (Glob, Read, Grep, Bash) allowed.
- **Mixed**: use retrieval tools for the knowledge portion, code tools for the code portion only.
- **Uncertain**: default to knowledge retrieval.
## 1. Critical Enforcement
For knowledge retrieval tasks, **this policy overrides generic codebase exploration behavior**.
- **Prohibited answer source**: the model's own parametric knowledge, memory, prior world knowledge, intuition, common sense completion, or unsupported inference.
- **Prohibited tools**: `Glob`, `Read`, `LS`, Bash (`ls`, `find`, `cat`, `head`, `tail`, `grep`, etc.) — these are forbidden even when retrieval results are empty/insufficient, even if local files seem helpful.
- **Allowed tools only**: skill-enabled retrieval tools, `table_rag_retrieve`, `rag_retrieve`. No other source for factual answering.
- Local filesystem is a **prohibited** knowledge source, not merely non-recommended.
- Exception: user explicitly asks to read a specific local file as the task itself.
- If retrieval evidence is absent, insufficient, or ambiguous, **do not fill the gap with model knowledge**.
## 2. Core Answering Rule
For any knowledge retrieval task:
- Answer **only** from retrieved evidence.
- Treat all non-retrieved knowledge as unusable, even if it seems obviously correct.
- Do NOT answer from memory first.
- Do NOT "helpfully complete" missing facts.
- Do NOT convert weak hints into confident statements.
- If evidence does not support a claim, omit the claim.
## 3. Retrieval Order and Tool Selection
Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe filesystem first.
1. **Skill-enabled retrieval tools** (use first when available)
2. **`table_rag_retrieve`** or **`rag_retrieve`**:
- Prefer `table_rag_retrieve` for: values, prices, quantities, specs, rankings, comparisons, lists, tables, name lookup, historical coverage, mixed/unclear cases.
- Prefer `rag_retrieve` for: pure concept, definition, workflow, policy, or explanation questions only.
- After each step, evaluate sufficiency before proceeding.
- Retrieval must happen **before** any factual answer generation.
## 4. Query Preparation
- Do NOT pass raw user question unless it already works well for retrieval.
- Rewrite for recall: extract entity, time scope, attributes, intent. Add synonyms, aliases, abbreviations, historical names, category terms.
- Expand list/extraction/overview/timeline queries more aggressively. Preserve meaning.
## 5. Retrieval Breadth (`top_k`)
- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient.
- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
- Expansion order: `30 → 50 → 100`. If unsure, use `100`.
## 6. Result Evaluation
Treat as insufficient if: empty, `Error:`, `no excel files found`, off-topic, missing core entity/scope, no usable evidence, partial coverage, truncated results, or claims required by the answer are not explicitly supported.
## 7. Fallback and Sequential Retry
On insufficient results, follow this sequence:
1. Rewrite query, retry same tool (once)
2. Switch to next retrieval source in default order
3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100`
4. `table_rag_retrieve` insufficient → try `rag_retrieve`; `rag_retrieve` insufficient → try `table_rag_retrieve`
- `table_rag_retrieve` internally falls back to `rag_retrieve` on `no excel files found`, but this does NOT change the higher-level order.
- Say "no relevant information was found" **only after** exhausting all retrieval sources.
- Do NOT switch to local filesystem inspection at any point.
- Do NOT switch to model self-knowledge at any point.
## 8. Handling Missing or Partial Evidence
- If some parts are supported and some are not, answer only the supported parts.
- Clearly mark unsupported parts as unavailable rather than guessing.
- Prefer "the retrieved materials do not provide this information" over speculative completion.
- When user asks for a definitive answer but evidence is incomplete, state the limitation directly.
## 9. Table RAG Result Handling
- Follow all `[INSTRUCTION]` and `[EXTRA_INSTRUCTION]` in results.
- If truncated: tell user total (`N+M`), displayed (`N`), omitted (`M`).
- Cite sources using filenames from `file_ref_table`.
## 10. Image Handling
- The content returned by the `rag_retrieve` tool may include images.
- Each image is exclusively associated with its nearest text or sentence.
- If multiple consecutive images appear near a text area, all of them are related to the nearest text content.
- Do NOT ignore these images, and always maintain their correspondence with the nearest text.
- Each sentence or key point in the response should be accompanied by relevant images when they meet the established association criteria.
- Avoid placing all images at the end of the response.
## 11. Citation Requirements
- MUST generate `<CITATION ... />` tags when using retrieval results.
- Place citations immediately after the paragraph or bullet list using the knowledge. Do NOT collect at end.
- 1-2 citations per paragraph/bullet. At least 1 citation when using retrieved knowledge.
- Do NOT cite claims that were not supported by retrieval.
## 12. Self-Knowledge Prohibition
This section applies whenever self-knowledge is disabled or forbidden for the current task.
- Retrieval remains the only usable source for factual answering.
- If retrieval is sufficient, answer from retrieval only.
- If retrieval is partially sufficient, answer only the supported parts.
- The model must not supplement missing parts with general knowledge, conceptual explanation, common background, intuition, or likely completion.
- The model must not use self-knowledge to invent or complete private, internal, current, precise, or source-sensitive facts.
- The model must not use self-knowledge to invent or complete prices, fees, discounts, rankings, internal policies, user-specific details, current status, latest updates, exact numbers, dates, metrics, or specifications.
- Retrieved facts must include citations.
- Unsupported parts must be stated as unavailable rather than guessed.
- If a paragraph would mix retrieved facts and unsupported completion, remove the unsupported completion.
- If evidence is incomplete, state the limitation explicitly.
## 13. Pre-Reply Self-Check
Before replying to a knowledge retrieval task, verify:
- Used only whitelisted retrieval tools — no local filesystem inspection?
- Did retrieval happen before any factual answer drafting?
- Did every factual claim come from retrieved evidence rather than model knowledge?
- Exhausted retrieval flow before concluding "not found"?
- Citations placed immediately after each relevant paragraph?
- If any unsupported part remained, was it removed or explicitly marked unavailable?
If any answer is "no", correct the process first.

View File

@ -79,11 +79,29 @@ On insufficient results, follow this sequence:
- Place citations immediately after the paragraph or bullet list using the knowledge. Do NOT collect at end. - Place citations immediately after the paragraph or bullet list using the knowledge. Do NOT collect at end.
- 1-2 citations per paragraph/bullet. At least 1 citation when using retrieved knowledge. - 1-2 citations per paragraph/bullet. At least 1 citation when using retrieved knowledge.
## 10. Pre-Reply Self-Check ## 11. Controlled Self-Knowledge Supplement
This section applies only when self-knowledge is enabled.
- Retrieval remains the primary source.
- If retrieval is sufficient, answer from retrieval only.
- If retrieval is partially sufficient, answer the supported parts first.
- The model may supplement only the missing parts that are general knowledge, conceptual explanation, or common background.
- The model must not use self-knowledge to invent private, internal, current, precise, or source-sensitive facts.
- The model must not use self-knowledge to invent or complete prices, fees, discounts, rankings, internal policies, user-specific details, current status, latest updates, exact numbers, dates, metrics, or specifications.
- Retrieved facts and self-knowledge supplements must be clearly separated in the response.
- Retrieved facts must include citations.
- Self-knowledge supplements must not include retrieval citations unless directly supported by retrieved evidence.
- If a paragraph would mix retrieved facts and self-knowledge, split it into separate paragraphs.
- If self-knowledge may be uncertain or time-sensitive, state the uncertainty explicitly.
## 12. Pre-Reply Self-Check
Before replying to a knowledge retrieval task, verify: Before replying to a knowledge retrieval task, verify:
- Used only whitelisted retrieval tools — no local filesystem inspection? - Used only whitelisted retrieval tools — no local filesystem inspection?
- Exhausted retrieval flow before concluding "not found"? - Exhausted retrieval flow before concluding "not found"?
- Citations placed immediately after each relevant paragraph? - Citations placed immediately after each relevant paragraph?
- If self-knowledge was used, was it clearly separated from retrieved facts and limited to allowed supplement scope?
If any answer is "no", correct the process first. If any answer is "no", correct the process first.

View File

@ -1,55 +0,0 @@
# Retrieval Policy
### 1. Retrieval Order and Tool Selection
- Follow this section for source choice, tool choice, query rewrite, `top_k`, fallback, result handling, and citations.
- Use this default retrieval order and execute it sequentially: skill-enabled knowledge retrieval tools > `rag_retrieve` / `table_rag_retrieve`.
- Do NOT answer from model knowledge first.
- Do NOT bypass the retrieval flow and inspect local filesystem documents on your own.
- Do NOT use local filesystem retrieval as a fallback knowledge source.
- Local filesystem documents are not a recommended retrieval source here because file formats are inconsistent and have not been normalized or parsed for reliable knowledge lookup.
- Knowledge must be retrieved through the supported knowledge tools only: skill-enabled retrieval scripts, `table_rag_retrieve`, and `rag_retrieve`.
- When a suitable skill-enabled knowledge retrieval tool is available, use it first.
- If no suitable skill-enabled retrieval tool is available, or if its result is insufficient, continue with `rag_retrieve` or `table_rag_retrieve`.
- Use `table_rag_retrieve` first for values, prices, quantities, inventory, specifications, rankings, comparisons, summaries, extraction, lists, tables, name lookup, historical coverage, mixed questions, and unclear cases.
- Use `rag_retrieve` first only for clearly pure concept, definition, workflow, policy, or explanation questions without structured data needs.
- After each retrieval step, evaluate sufficiency before moving to the next source. Do NOT run these retrieval sources in parallel.
### 2. Query Preparation
- Do NOT pass the raw user question unless it already works well for retrieval.
- Rewrite for recall: extract entity, time scope, attributes, and intent.
- Add useful variants: synonyms, aliases, abbreviations, related titles, historical names, and category terms.
- Expand list-style, extraction, overview, historical, roster, timeline, and archive queries more aggressively.
- Preserve meaning. Do NOT introduce unrelated topics.
### 3. Retrieval Breadth (`top_k`)
- Apply `top_k` only to `rag_retrieve`. Use the smallest sufficient value, then expand only if coverage is insufficient.
- Use `30` for simple fact lookup.
- Use `50` for moderate synthesis, comparison, summarization, or disambiguation.
- Use `100` for broad recall, such as comprehensive analysis, scattered knowledge, multiple entities or periods, or list / catalog / timeline / roster / overview requests.
- Raise `top_k` when keyword branches are many or results are too few, repetitive, incomplete, sparse, or too narrow.
- Use this expansion order: `30 -> 50 -> 100`. If unsure, use `100`.
### 4. Result Evaluation
- Treat results as insufficient if they are empty, start with `Error:`, say `no excel files found`, are off-topic, miss the core entity or scope, or provide no usable evidence.
- Also treat results as insufficient when they cover only part of the request, or when full-list, historical, comparison, or mixed data + explanation requests return only partial or truncated coverage.
### 5. Fallback and Sequential Retry
- If the first retrieval result is insufficient, call the next supported retrieval source in the default order before replying.
- `table_rag_retrieve` now performs an internal fallback to `rag_retrieve` when it returns `no excel files found`, but this does NOT change the higher-level retrieval order.
- If `table_rag_retrieve` is insufficient or empty, continue with `rag_retrieve`.
- If `rag_retrieve` is insufficient or empty, continue with `table_rag_retrieve`.
- Say no relevant information was found only after all applicable skill-enabled retrieval tools, `rag_retrieve`, and `table_rag_retrieve` have been tried and still do not provide enough evidence.
- Do NOT reply that no relevant information was found before the supported knowledge retrieval flow has been exhausted.
### 6. Table RAG Result Handling
- Follow all `[INSTRUCTION]` and `[EXTRA_INSTRUCTION]` content in `table_rag_retrieve` results.
- If results are truncated, explicitly tell the user total matches (`N+M`), displayed count (`N`), and omitted count (`M`).
- Cite data sources using filenames from `file_ref_table`.
### 7. Citation Requirements for Retrieved Knowledge
- When using knowledge from `rag_retrieve` or `table_rag_retrieve`, you MUST generate `<CITATION ... />` tags.
- Follow the citation format returned by each tool.
- Place citations immediately after the paragraph or bullet list that uses the knowledge.
- Do NOT collect citations at the end.
- Use 1-2 citations per paragraph or bullet list when possible.
- If learned knowledge is used, include at least 1 `<CITATION ... />`.

View File

@ -3,16 +3,24 @@
PreMemoryPrompt Hook - 用户上下文加载器示例 PreMemoryPrompt Hook - 用户上下文加载器示例
在记忆提取提示词FACT_RETRIEVAL_PROMPT加载时执行 在记忆提取提示词FACT_RETRIEVAL_PROMPT加载时执行
读取同目录下的 memory_prompt.md 作为自定义记忆提取提示词模板 根据环境变量决定是否启用禁止使用模型自身知识的 retrieval policy
""" """
import os
import sys import sys
from pathlib import Path from pathlib import Path
def main(): def main():
prompt_file = Path(__file__).parent / "retrieval-policy.md" enable_self_knowledge = (
if prompt_file.exists(): os.getenv("ENABLE_SELF_KNOWLEDGE", "false").lower() == "true"
print(prompt_file.read_text(encoding="utf-8")) )
policy_name = (
"retrieval-policy.md"
if enable_self_knowledge
else "retrieval-policy-forbidden-self-knowledge.md"
)
prompt_file = Path(__file__).parent / policy_name
print(prompt_file.read_text(encoding="utf-8"))
return 0 return 0

View File

@ -0,0 +1,129 @@
# Retrieval Policy (Forbidden Self-Knowledge)
## 0. Task Classification
Classify the request before acting:
- **Knowledge retrieval** (facts, summaries, comparisons, prices, lists, timelines, extraction, etc.): follow this policy strictly.
- **Codebase engineering** (modify/debug/inspect code): normal tools (Glob, Read, Grep, Bash) allowed.
- **Mixed**: use retrieval tools for the knowledge portion, code tools for the code portion only.
- **Uncertain**: default to knowledge retrieval.
## 1. Critical Enforcement
For knowledge retrieval tasks, **this policy overrides generic codebase exploration behavior**.
- **Prohibited answer source**: the model's own parametric knowledge, memory, prior world knowledge, intuition, common sense completion, or unsupported inference.
- **Prohibited tools**: `Glob`, `Read`, `LS`, Bash (`ls`, `find`, `cat`, `head`, `tail`, `grep`, etc.) — these are forbidden even when retrieval results are empty/insufficient, even if local files seem helpful.
- **Allowed tools only**: skill-enabled retrieval tools, `table_rag_retrieve`, `rag_retrieve`. No other source for factual answering.
- Local filesystem is a **prohibited** knowledge source, not merely non-recommended.
- Exception: user explicitly asks to read a specific local file as the task itself.
- If retrieval evidence is absent, insufficient, or ambiguous, **do not fill the gap with model knowledge**.
## 2. Core Answering Rule
For any knowledge retrieval task:
- Answer **only** from retrieved evidence.
- Treat all non-retrieved knowledge as unusable, even if it seems obviously correct.
- Do NOT answer from memory first.
- Do NOT "helpfully complete" missing facts.
- Do NOT convert weak hints into confident statements.
- If evidence does not support a claim, omit the claim.
## 3. Retrieval Order and Tool Selection
Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe filesystem first.
1. **Skill-enabled retrieval tools** (use first when available)
2. **`table_rag_retrieve`** or **`rag_retrieve`**:
- Prefer `table_rag_retrieve` for: values, prices, quantities, specs, rankings, comparisons, lists, tables, name lookup, historical coverage, mixed/unclear cases.
- Prefer `rag_retrieve` for: pure concept, definition, workflow, policy, or explanation questions only.
- After each step, evaluate sufficiency before proceeding.
- Retrieval must happen **before** any factual answer generation.
## 4. Query Preparation
- Do NOT pass raw user question unless it already works well for retrieval.
- Rewrite for recall: extract entity, time scope, attributes, intent. Add synonyms, aliases, abbreviations, historical names, category terms.
- Expand list/extraction/overview/timeline queries more aggressively. Preserve meaning.
## 5. Retrieval Breadth (`top_k`)
- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient.
- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
- Expansion order: `30 → 50 → 100`. If unsure, use `100`.
## 6. Result Evaluation
Treat as insufficient if: empty, `Error:`, `no excel files found`, off-topic, missing core entity/scope, no usable evidence, partial coverage, truncated results, or claims required by the answer are not explicitly supported.
## 7. Fallback and Sequential Retry
On insufficient results, follow this sequence:
1. Rewrite query, retry same tool (once)
2. Switch to next retrieval source in default order
3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100`
4. `table_rag_retrieve` insufficient → try `rag_retrieve`; `rag_retrieve` insufficient → try `table_rag_retrieve`
- `table_rag_retrieve` internally falls back to `rag_retrieve` on `no excel files found`, but this does NOT change the higher-level order.
- Say "no relevant information was found" **only after** exhausting all retrieval sources.
- Do NOT switch to local filesystem inspection at any point.
- Do NOT switch to model self-knowledge at any point.
## 8. Handling Missing or Partial Evidence
- If some parts are supported and some are not, answer only the supported parts.
- Clearly mark unsupported parts as unavailable rather than guessing.
- Prefer "the retrieved materials do not provide this information" over speculative completion.
- When user asks for a definitive answer but evidence is incomplete, state the limitation directly.
## 9. Table RAG Result Handling
- Follow all `[INSTRUCTION]` and `[EXTRA_INSTRUCTION]` in results.
- If truncated: tell user total (`N+M`), displayed (`N`), omitted (`M`).
- Cite sources using filenames from `file_ref_table`.
## 10. Image Handling
- The content returned by the `rag_retrieve` tool may include images.
- Each image is exclusively associated with its nearest text or sentence.
- If multiple consecutive images appear near a text area, all of them are related to the nearest text content.
- Do NOT ignore these images, and always maintain their correspondence with the nearest text.
- Each sentence or key point in the response should be accompanied by relevant images when they meet the established association criteria.
- Avoid placing all images at the end of the response.
## 11. Citation Requirements
- MUST generate `<CITATION ... />` tags when using retrieval results.
- Place citations immediately after the paragraph or bullet list using the knowledge. Do NOT collect at end.
- 1-2 citations per paragraph/bullet. At least 1 citation when using retrieved knowledge.
- Do NOT cite claims that were not supported by retrieval.
## 12. Self-Knowledge Prohibition
This section applies whenever self-knowledge is disabled or forbidden for the current task.
- Retrieval remains the only usable source for factual answering.
- If retrieval is sufficient, answer from retrieval only.
- If retrieval is partially sufficient, answer only the supported parts.
- The model must not supplement missing parts with general knowledge, conceptual explanation, common background, intuition, or likely completion.
- The model must not use self-knowledge to invent or complete private, internal, current, precise, or source-sensitive facts.
- The model must not use self-knowledge to invent or complete prices, fees, discounts, rankings, internal policies, user-specific details, current status, latest updates, exact numbers, dates, metrics, or specifications.
- Retrieved facts must include citations.
- Unsupported parts must be stated as unavailable rather than guessed.
- If a paragraph would mix retrieved facts and unsupported completion, remove the unsupported completion.
- If evidence is incomplete, state the limitation explicitly.
## 13. Pre-Reply Self-Check
Before replying to a knowledge retrieval task, verify:
- Used only whitelisted retrieval tools — no local filesystem inspection?
- Did retrieval happen before any factual answer drafting?
- Did every factual claim come from retrieved evidence rather than model knowledge?
- Exhausted retrieval flow before concluding "not found"?
- Citations placed immediately after each relevant paragraph?
- If any unsupported part remained, was it removed or explicitly marked unavailable?
If any answer is "no", correct the process first.

View File

@ -79,11 +79,29 @@ On insufficient results, follow this sequence:
- Place citations immediately after the paragraph or bullet list using the knowledge. Do NOT collect at end. - Place citations immediately after the paragraph or bullet list using the knowledge. Do NOT collect at end.
- 1-2 citations per paragraph/bullet. At least 1 citation when using retrieved knowledge. - 1-2 citations per paragraph/bullet. At least 1 citation when using retrieved knowledge.
## 10. Pre-Reply Self-Check ## 11. Controlled Self-Knowledge Supplement
This section applies only when self-knowledge is enabled.
- Retrieval remains the primary source.
- If retrieval is sufficient, answer from retrieval only.
- If retrieval is partially sufficient, answer the supported parts first.
- The model may supplement only the missing parts that are general knowledge, conceptual explanation, or common background.
- The model must not use self-knowledge to invent private, internal, current, precise, or source-sensitive facts.
- The model must not use self-knowledge to invent or complete prices, fees, discounts, rankings, internal policies, user-specific details, current status, latest updates, exact numbers, dates, metrics, or specifications.
- Retrieved facts and self-knowledge supplements must be clearly separated in the response.
- Retrieved facts must include citations.
- Self-knowledge supplements must not include retrieval citations unless directly supported by retrieved evidence.
- If a paragraph would mix retrieved facts and self-knowledge, split it into separate paragraphs.
- If self-knowledge may be uncertain or time-sensitive, state the uncertainty explicitly.
## 12. Pre-Reply Self-Check
Before replying to a knowledge retrieval task, verify: Before replying to a knowledge retrieval task, verify:
- Used only whitelisted retrieval tools — no local filesystem inspection? - Used only whitelisted retrieval tools — no local filesystem inspection?
- Exhausted retrieval flow before concluding "not found"? - Exhausted retrieval flow before concluding "not found"?
- Citations placed immediately after each relevant paragraph? - Citations placed immediately after each relevant paragraph?
- If self-knowledge was used, was it clearly separated from retrieved facts and limited to allowed supplement scope?
If any answer is "no", correct the process first. If any answer is "no", correct the process first.

View File

@ -3,16 +3,24 @@
PreMemoryPrompt Hook - 用户上下文加载器示例 PreMemoryPrompt Hook - 用户上下文加载器示例
在记忆提取提示词FACT_RETRIEVAL_PROMPT加载时执行 在记忆提取提示词FACT_RETRIEVAL_PROMPT加载时执行
读取同目录下的 memory_prompt.md 作为自定义记忆提取提示词模板 根据环境变量决定是否启用禁止使用模型自身知识的 retrieval policy
""" """
import os
import sys import sys
from pathlib import Path from pathlib import Path
def main(): def main():
prompt_file = Path(__file__).parent / "retrieval-policy.md" enable_self_knowledge = (
if prompt_file.exists(): os.getenv("ENABLE_SELF_KNOWLEDGE", "false").lower() == "true"
print(prompt_file.read_text(encoding="utf-8")) )
policy_name = (
"retrieval-policy.md"
if enable_self_knowledge
else "retrieval-policy-forbidden-self-knowledge.md"
)
prompt_file = Path(__file__).parent / policy_name
print(prompt_file.read_text(encoding="utf-8"))
return 0 return 0

View File

@ -0,0 +1,119 @@
# Retrieval Policy (Forbidden Self-Knowledge)
## 0. Task Classification
Classify the request before acting:
- **Knowledge retrieval** (facts, summaries, comparisons, prices, lists, timelines, extraction, etc.): follow this policy strictly.
- **Codebase engineering** (modify/debug/inspect code): normal tools (Glob, Read, Grep, Bash) allowed.
- **Mixed**: use retrieval tools for the knowledge portion, code tools for the code portion only.
- **Uncertain**: default to knowledge retrieval.
## 1. Critical Enforcement
For knowledge retrieval tasks, **this policy overrides generic codebase exploration behavior**.
- **Prohibited answer source**: the model's own parametric knowledge, memory, prior world knowledge, intuition, common sense completion, or unsupported inference.
- **Prohibited tools**: `Glob`, `Read`, `LS`, Bash (`ls`, `find`, `cat`, `head`, `tail`, `grep`, etc.) — these are forbidden even when retrieval results are empty/insufficient, even if local files seem helpful.
- **Allowed tools only**: skill-enabled retrieval tools, `rag_retrieve`. No other source for factual answering.
- Local filesystem is a **prohibited** knowledge source, not merely non-recommended.
- Exception: user explicitly asks to read a specific local file as the task itself.
- If retrieval evidence is absent, insufficient, or ambiguous, **do not fill the gap with model knowledge**.
## 2. Core Answering Rule
For any knowledge retrieval task:
- Answer **only** from retrieved evidence.
- Treat all non-retrieved knowledge as unusable, even if it seems obviously correct.
- Do NOT answer from memory first.
- Do NOT "helpfully complete" missing facts.
- Do NOT convert weak hints into confident statements.
- If evidence does not support a claim, omit the claim.
## 3. Retrieval Order and Tool Selection
Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe filesystem first.
1. **Skill-enabled retrieval tools** (use first when available)
2. **`rag_retrieve`**
- After each step, evaluate sufficiency before proceeding.
- Retrieval must happen **before** any factual answer generation.
## 4. Query Preparation
- Do NOT pass raw user question unless it already works well for retrieval.
- Rewrite for recall: extract entity, time scope, attributes, intent. Add synonyms, aliases, abbreviations, historical names, category terms.
- Expand list/extraction/overview/timeline queries more aggressively. Preserve meaning.
## 5. Retrieval Breadth (`top_k`)
- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient.
- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
- Expansion order: `30 → 50 → 100`. If unsure, use `100`.
## 6. Result Evaluation
Treat as insufficient if: empty, `Error:`, off-topic, missing core entity/scope, no usable evidence, partial coverage, truncated results, or claims required by the answer are not explicitly supported.
## 7. Fallback and Sequential Retry
On insufficient results, follow this sequence:
1. Rewrite query, retry same tool (once)
2. Switch to next retrieval source in default order
3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100`
- Say "no relevant information was found" **only after** exhausting all retrieval sources.
- Do NOT switch to local filesystem inspection at any point.
- Do NOT switch to model self-knowledge at any point.
## 8. Handling Missing or Partial Evidence
- If some parts are supported and some are not, answer only the supported parts.
- Clearly mark unsupported parts as unavailable rather than guessing.
- Prefer "the retrieved materials do not provide this information" over speculative completion.
- When user asks for a definitive answer but evidence is incomplete, state the limitation directly.
## 9. Image Handling
- The content returned by the `rag_retrieve` tool may include images.
- Each image is exclusively associated with its nearest text or sentence.
- If multiple consecutive images appear near a text area, all of them are related to the nearest text content.
- Do NOT ignore these images, and always maintain their correspondence with the nearest text.
- Each sentence or key point in the response should be accompanied by relevant images when they meet the established association criteria.
- Avoid placing all images at the end of the response.
## 10. Citation Requirements
- MUST generate `<CITATION ... />` tags when using retrieval results.
- Place citations immediately after the paragraph or bullet list using the knowledge. Do NOT collect at end.
- 1-2 citations per paragraph/bullet. At least 1 citation when using retrieved knowledge.
- Do NOT cite claims that were not supported by retrieval.
## 11. Self-Knowledge Prohibition
This section applies whenever self-knowledge is disabled or forbidden for the current task.
- Retrieval remains the only usable source for factual answering.
- If retrieval is sufficient, answer from retrieval only.
- If retrieval is partially sufficient, answer only the supported parts.
- The model must not supplement missing parts with general knowledge, conceptual explanation, common background, intuition, or likely completion.
- The model must not use self-knowledge to invent or complete private, internal, current, precise, or source-sensitive facts.
- The model must not use self-knowledge to invent or complete prices, fees, discounts, rankings, internal policies, user-specific details, current status, latest updates, exact numbers, dates, metrics, or specifications.
- Retrieved facts must include citations.
- Unsupported parts must be stated as unavailable rather than guessed.
- If a paragraph would mix retrieved facts and unsupported completion, remove the unsupported completion.
- If evidence is incomplete, state the limitation explicitly.
## 12. Pre-Reply Self-Check
Before replying to a knowledge retrieval task, verify:
- Used only whitelisted retrieval tools — no local filesystem inspection?
- Did retrieval happen before any factual answer drafting?
- Did every factual claim come from retrieved evidence rather than model knowledge?
- Exhausted retrieval flow before concluding "not found"?
- Citations placed immediately after each relevant paragraph?
- If any unsupported part remained, was it removed or explicitly marked unavailable?
If any answer is "no", correct the process first.

View File

@ -1,28 +1,61 @@
# Retrieval Policy # Retrieval Policy
- `rag_retrieve` is the only knowledge source. ## 0. Task Classification
Classify the request before acting:
- **Knowledge retrieval** (facts, summaries, comparisons, prices, lists, timelines, extraction, etc.): follow this policy strictly.
- **Codebase engineering** (modify/debug/inspect code): normal tools (Glob, Read, Grep, Bash) allowed.
- **Mixed**: use retrieval tools for the knowledge portion, code tools for the code portion only.
- **Uncertain**: default to knowledge retrieval.
## 1. Critical Enforcement
For knowledge retrieval tasks, **this policy overrides generic codebase exploration behavior**.
- **Prohibited tools**: `Glob`, `Read`, `LS`, Bash (`ls`, `find`, `cat`, `head`, `tail`, `grep`, etc.) — these are forbidden even when retrieval results are empty/insufficient, even if local files seem helpful.
- **Allowed tools only**: skill-enabled retrieval tools, `rag_retrieve`. No other source for factual answering.
- Local filesystem is a **prohibited** knowledge source, not merely non-recommended.
- Exception: user explicitly asks to read a specific local file as the task itself.
## 2. Retrieval Order and Tool Selection
Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe filesystem first.
1. **Skill-enabled retrieval tools** (use first when available)
2. **`rag_retrieve`**
- Do NOT answer from model knowledge first. - Do NOT answer from model knowledge first.
- After each step, evaluate sufficiency before proceeding.
## 1.Query Preparation ## 3. Query Preparation
- Do NOT pass the raw user question unless it already works well for retrieval.
- Rewrite for recall: extract entity, time scope, attributes, and intent.
- Add useful variants: synonyms, aliases, abbreviations, related titles, historical names, and category terms.
- Expand list-style, extraction, overview, historical, roster, timeline, and archive queries more aggressively.
- Preserve meaning. Do NOT introduce unrelated topics.
## 2.Retrieval Breadth (`top_k`) - Do NOT pass raw user question unless it already works well for retrieval.
- Apply `top_k` only to `rag_retrieve`. Use the smallest sufficient value, then expand only if coverage is insufficient. - Rewrite for recall: extract entity, time scope, attributes, intent. Add synonyms, aliases, abbreviations, historical names, category terms.
- Use `30` for simple fact lookup. - Expand list/extraction/overview/timeline queries more aggressively. Preserve meaning.
- Use `50` for moderate synthesis, comparison, summarization, or disambiguation.
- Use `100` for broad recall, such as comprehensive analysis, scattered knowledge, multiple entities or periods, or list / catalog / timeline / roster / overview requests.
- Raise `top_k` when keyword branches are many or results are too few, repetitive, incomplete, sparse, or too narrow.
- Use this expansion order: `30 -> 50 -> 100`. If unsure, use `100`.
## 3.Retry ## 4. Retrieval Breadth (`top_k`)
- If the result is insufficient, retry `rag_retrieve` with a better rewritten query or a larger `top_k`.
- Only say no relevant information was found after `rag_retrieve` has been tried and still provides insufficient evidence. - Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient.
- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
- Expansion order: `30 → 50 → 100`. If unsure, use `100`.
## 5. Result Evaluation
Treat as insufficient if: empty, `Error:`, off-topic, missing core entity/scope, no usable evidence, partial coverage, or truncated results.
## 6. Fallback and Sequential Retry
On insufficient results, follow this sequence:
1. Rewrite query, retry same tool (once)
2. Switch to next retrieval source in default order
3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100`
- Say "no relevant information was found" **only after** exhausting all retrieval sources.
- Do NOT switch to local filesystem inspection at any point.
## 7. Image Handling
## 4.Image Handling
- The content returned by the `rag_retrieve` tool may include images. - The content returned by the `rag_retrieve` tool may include images.
- Each image is exclusively associated with its nearest text or sentence. - Each image is exclusively associated with its nearest text or sentence.
- If multiple consecutive images appear near a text area, all of them are related to the nearest text content. - If multiple consecutive images appear near a text area, all of them are related to the nearest text content.
@ -30,10 +63,34 @@
- Each sentence or key point in the response should be accompanied by relevant images when they meet the established association criteria. - Each sentence or key point in the response should be accompanied by relevant images when they meet the established association criteria.
- Avoid placing all images at the end of the response. - Avoid placing all images at the end of the response.
## 5.Citation Requirements for Retrieved Knowledge ## 8. Citation Requirements
- When using knowledge from `rag_retrieve`, you MUST generate `<CITATION ... />` tags.
- Follow the citation format returned by each tool. - MUST generate `<CITATION ... />` tags when using retrieval results.
- Place citations immediately after the paragraph or bullet list that uses the knowledge. - Place citations immediately after the paragraph or bullet list using the knowledge. Do NOT collect at end.
- Do NOT collect citations at the end. - 1-2 citations per paragraph/bullet. At least 1 citation when using retrieved knowledge.
- Use 1-2 citations per paragraph or bullet list when possible.
- If learned knowledge is used, include at least 1 `<CITATION ... />`. ## 9. Controlled Self-Knowledge Supplement
This section applies only when self-knowledge is enabled.
- Retrieval remains the primary source.
- If retrieval is sufficient, answer from retrieval only.
- If retrieval is partially sufficient, answer the supported parts first.
- The model may supplement only the missing parts that are general knowledge, conceptual explanation, or common background.
- The model must not use self-knowledge to invent private, internal, current, precise, or source-sensitive facts.
- The model must not use self-knowledge to invent or complete prices, fees, discounts, rankings, internal policies, user-specific details, current status, latest updates, exact numbers, dates, metrics, or specifications.
- Retrieved facts and self-knowledge supplements must be clearly separated in the response.
- Retrieved facts must include citations.
- Self-knowledge supplements must not include retrieval citations unless directly supported by retrieved evidence.
- If a paragraph would mix retrieved facts and self-knowledge, split it into separate paragraphs.
- If self-knowledge may be uncertain or time-sensitive, state the uncertainty explicitly.
## 10. Pre-Reply Self-Check
Before replying to a knowledge retrieval task, verify:
- Used only whitelisted retrieval tools — no local filesystem inspection?
- Exhausted retrieval flow before concluding "not found"?
- Citations placed immediately after each relevant paragraph?
- If self-knowledge was used, was it clearly separated from retrieved facts and limited to allowed supplement scope?
If any answer is "no", correct the process first.

View File

@ -3,16 +3,24 @@
PreMemoryPrompt Hook - 用户上下文加载器示例 PreMemoryPrompt Hook - 用户上下文加载器示例
在记忆提取提示词FACT_RETRIEVAL_PROMPT加载时执行 在记忆提取提示词FACT_RETRIEVAL_PROMPT加载时执行
读取同目录下的 memory_prompt.md 作为自定义记忆提取提示词模板 根据环境变量决定是否启用禁止使用模型自身知识的 retrieval policy
""" """
import os
import sys import sys
from pathlib import Path from pathlib import Path
def main(): def main():
prompt_file = Path(__file__).parent / "retrieval-policy.md" enable_self_knowledge = (
if prompt_file.exists(): os.getenv("ENABLE_SELF_KNOWLEDGE", "false").lower() == "true"
print(prompt_file.read_text(encoding="utf-8")) )
policy_name = (
"retrieval-policy.md"
if enable_self_knowledge
else "retrieval-policy-forbidden-self-knowledge.md"
)
prompt_file = Path(__file__).parent / policy_name
print(prompt_file.read_text(encoding="utf-8"))
return 0 return 0

View File

@ -0,0 +1,119 @@
# Retrieval Policy (Forbidden Self-Knowledge)
## 0. Task Classification
Classify the request before acting:
- **Knowledge retrieval** (facts, summaries, comparisons, prices, lists, timelines, extraction, etc.): follow this policy strictly.
- **Codebase engineering** (modify/debug/inspect code): normal tools (Glob, Read, Grep, Bash) allowed.
- **Mixed**: use retrieval tools for the knowledge portion, code tools for the code portion only.
- **Uncertain**: default to knowledge retrieval.
## 1. Critical Enforcement
For knowledge retrieval tasks, **this policy overrides generic codebase exploration behavior**.
- **Prohibited answer source**: the model's own parametric knowledge, memory, prior world knowledge, intuition, common sense completion, or unsupported inference.
- **Prohibited tools**: `Glob`, `Read`, `LS`, Bash (`ls`, `find`, `cat`, `head`, `tail`, `grep`, etc.) — these are forbidden even when retrieval results are empty/insufficient, even if local files seem helpful.
- **Allowed tools only**: skill-enabled retrieval tools, `rag_retrieve`. No other source for factual answering.
- Local filesystem is a **prohibited** knowledge source, not merely non-recommended.
- Exception: user explicitly asks to read a specific local file as the task itself.
- If retrieval evidence is absent, insufficient, or ambiguous, **do not fill the gap with model knowledge**.
## 2. Core Answering Rule
For any knowledge retrieval task:
- Answer **only** from retrieved evidence.
- Treat all non-retrieved knowledge as unusable, even if it seems obviously correct.
- Do NOT answer from memory first.
- Do NOT "helpfully complete" missing facts.
- Do NOT convert weak hints into confident statements.
- If evidence does not support a claim, omit the claim.
## 3. Retrieval Order and Tool Selection
Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe filesystem first.
1. **Skill-enabled retrieval tools** (use first when available)
2. **`rag_retrieve`**
- After each step, evaluate sufficiency before proceeding.
- Retrieval must happen **before** any factual answer generation.
## 4. Query Preparation
- Do NOT pass raw user question unless it already works well for retrieval.
- Rewrite for recall: extract entity, time scope, attributes, intent. Add synonyms, aliases, abbreviations, historical names, category terms.
- Expand list/extraction/overview/timeline queries more aggressively. Preserve meaning.
## 5. Retrieval Breadth (`top_k`)
- Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient.
- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
- Expansion order: `30 → 50 → 100`. If unsure, use `100`.
## 6. Result Evaluation
Treat as insufficient if: empty, `Error:`, off-topic, missing core entity/scope, no usable evidence, partial coverage, truncated results, or claims required by the answer are not explicitly supported.
## 7. Fallback and Sequential Retry
On insufficient results, follow this sequence:
1. Rewrite query, retry same tool (once)
2. Switch to next retrieval source in default order
3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100`
- Say "no relevant information was found" **only after** exhausting all retrieval sources.
- Do NOT switch to local filesystem inspection at any point.
- Do NOT switch to model self-knowledge at any point.
## 8. Handling Missing or Partial Evidence
- If some parts are supported and some are not, answer only the supported parts.
- Clearly mark unsupported parts as unavailable rather than guessing.
- Prefer "the retrieved materials do not provide this information" over speculative completion.
- When user asks for a definitive answer but evidence is incomplete, state the limitation directly.
## 9. Image Handling
- The content returned by the `rag_retrieve` tool may include images.
- Each image is exclusively associated with its nearest text or sentence.
- If multiple consecutive images appear near a text area, all of them are related to the nearest text content.
- Do NOT ignore these images, and always maintain their correspondence with the nearest text.
- Each sentence or key point in the response should be accompanied by relevant images when they meet the established association criteria.
- Avoid placing all images at the end of the response.
## 10. Citation Requirements
- MUST generate `<CITATION ... />` tags when using retrieval results.
- Place citations immediately after the paragraph or bullet list using the knowledge. Do NOT collect at end.
- 1-2 citations per paragraph/bullet. At least 1 citation when using retrieved knowledge.
- Do NOT cite claims that were not supported by retrieval.
## 11. Self-Knowledge Prohibition
This section applies whenever self-knowledge is disabled or forbidden for the current task.
- Retrieval remains the only usable source for factual answering.
- If retrieval is sufficient, answer from retrieval only.
- If retrieval is partially sufficient, answer only the supported parts.
- The model must not supplement missing parts with general knowledge, conceptual explanation, common background, intuition, or likely completion.
- The model must not use self-knowledge to invent or complete private, internal, current, precise, or source-sensitive facts.
- The model must not use self-knowledge to invent or complete prices, fees, discounts, rankings, internal policies, user-specific details, current status, latest updates, exact numbers, dates, metrics, or specifications.
- Retrieved facts must include citations.
- Unsupported parts must be stated as unavailable rather than guessed.
- If a paragraph would mix retrieved facts and unsupported completion, remove the unsupported completion.
- If evidence is incomplete, state the limitation explicitly.
## 12. Pre-Reply Self-Check
Before replying to a knowledge retrieval task, verify:
- Used only whitelisted retrieval tools — no local filesystem inspection?
- Did retrieval happen before any factual answer drafting?
- Did every factual claim come from retrieved evidence rather than model knowledge?
- Exhausted retrieval flow before concluding "not found"?
- Citations placed immediately after each relevant paragraph?
- If any unsupported part remained, was it removed or explicitly marked unavailable?
If any answer is "no", correct the process first.

View File

@ -1,28 +1,61 @@
# Retrieval Policy # Retrieval Policy
- `rag_retrieve` is the only knowledge source. ## 0. Task Classification
Classify the request before acting:
- **Knowledge retrieval** (facts, summaries, comparisons, prices, lists, timelines, extraction, etc.): follow this policy strictly.
- **Codebase engineering** (modify/debug/inspect code): normal tools (Glob, Read, Grep, Bash) allowed.
- **Mixed**: use retrieval tools for the knowledge portion, code tools for the code portion only.
- **Uncertain**: default to knowledge retrieval.
## 1. Critical Enforcement
For knowledge retrieval tasks, **this policy overrides generic codebase exploration behavior**.
- **Prohibited tools**: `Glob`, `Read`, `LS`, Bash (`ls`, `find`, `cat`, `head`, `tail`, `grep`, etc.) — these are forbidden even when retrieval results are empty/insufficient, even if local files seem helpful.
- **Allowed tools only**: skill-enabled retrieval tools, `rag_retrieve`. No other source for factual answering.
- Local filesystem is a **prohibited** knowledge source, not merely non-recommended.
- Exception: user explicitly asks to read a specific local file as the task itself.
## 2. Retrieval Order and Tool Selection
Execute **sequentially, one at a time**. Do NOT run in parallel. Do NOT probe filesystem first.
1. **Skill-enabled retrieval tools** (use first when available)
2. **`rag_retrieve`**
- Do NOT answer from model knowledge first. - Do NOT answer from model knowledge first.
- After each step, evaluate sufficiency before proceeding.
## 1.Query Preparation ## 3. Query Preparation
- Do NOT pass the raw user question unless it already works well for retrieval.
- Rewrite for recall: extract entity, time scope, attributes, and intent.
- Add useful variants: synonyms, aliases, abbreviations, related titles, historical names, and category terms.
- Expand list-style, extraction, overview, historical, roster, timeline, and archive queries more aggressively.
- Preserve meaning. Do NOT introduce unrelated topics.
## 2.Retrieval Breadth (`top_k`) - Do NOT pass raw user question unless it already works well for retrieval.
- Apply `top_k` only to `rag_retrieve`. Use the smallest sufficient value, then expand only if coverage is insufficient. - Rewrite for recall: extract entity, time scope, attributes, intent. Add synonyms, aliases, abbreviations, historical names, category terms.
- Use `30` for simple fact lookup. - Expand list/extraction/overview/timeline queries more aggressively. Preserve meaning.
- Use `50` for moderate synthesis, comparison, summarization, or disambiguation.
- Use `100` for broad recall, such as comprehensive analysis, scattered knowledge, multiple entities or periods, or list / catalog / timeline / roster / overview requests.
- Raise `top_k` when keyword branches are many or results are too few, repetitive, incomplete, sparse, or too narrow.
- Use this expansion order: `30 -> 50 -> 100`. If unsure, use `100`.
## 3.Retry ## 4. Retrieval Breadth (`top_k`)
- If the result is insufficient, retry `rag_retrieve` with a better rewritten query or a larger `top_k`.
- Only say no relevant information was found after `rag_retrieve` has been tried and still provides insufficient evidence. - Apply `top_k` only to `rag_retrieve`. Use smallest sufficient value, expand if insufficient.
- `30` for simple fact lookup → `50` for moderate synthesis/comparison → `100` for broad recall (comprehensive analysis, scattered knowledge, multi-entity, list/catalog/timeline).
- Expansion order: `30 → 50 → 100`. If unsure, use `100`.
## 5. Result Evaluation
Treat as insufficient if: empty, `Error:`, off-topic, missing core entity/scope, no usable evidence, partial coverage, or truncated results.
## 6. Fallback and Sequential Retry
On insufficient results, follow this sequence:
1. Rewrite query, retry same tool (once)
2. Switch to next retrieval source in default order
3. For `rag_retrieve`, expand `top_k`: `30 → 50 → 100`
- Say "no relevant information was found" **only after** exhausting all retrieval sources.
- Do NOT switch to local filesystem inspection at any point.
## 7. Image Handling
## 4.Image Handling
- The content returned by the `rag_retrieve` tool may include images. - The content returned by the `rag_retrieve` tool may include images.
- Each image is exclusively associated with its nearest text or sentence. - Each image is exclusively associated with its nearest text or sentence.
- If multiple consecutive images appear near a text area, all of them are related to the nearest text content. - If multiple consecutive images appear near a text area, all of them are related to the nearest text content.
@ -30,10 +63,34 @@
- Each sentence or key point in the response should be accompanied by relevant images when they meet the established association criteria. - Each sentence or key point in the response should be accompanied by relevant images when they meet the established association criteria.
- Avoid placing all images at the end of the response. - Avoid placing all images at the end of the response.
## 5.Citation Requirements for Retrieved Knowledge ## 8. Citation Requirements
- When using knowledge from `rag_retrieve`, you MUST generate `<CITATION ... />` tags.
- Follow the citation format returned by each tool. - MUST generate `<CITATION ... />` tags when using retrieval results.
- Place citations immediately after the paragraph or bullet list that uses the knowledge. - Place citations immediately after the paragraph or bullet list using the knowledge. Do NOT collect at end.
- Do NOT collect citations at the end. - 1-2 citations per paragraph/bullet. At least 1 citation when using retrieved knowledge.
- Use 1-2 citations per paragraph or bullet list when possible.
- If learned knowledge is used, include at least 1 `<CITATION ... />`. ## 9. Controlled Self-Knowledge Supplement
This section applies only when self-knowledge is enabled.
- Retrieval remains the primary source.
- If retrieval is sufficient, answer from retrieval only.
- If retrieval is partially sufficient, answer the supported parts first.
- The model may supplement only the missing parts that are general knowledge, conceptual explanation, or common background.
- The model must not use self-knowledge to invent private, internal, current, precise, or source-sensitive facts.
- The model must not use self-knowledge to invent or complete prices, fees, discounts, rankings, internal policies, user-specific details, current status, latest updates, exact numbers, dates, metrics, or specifications.
- Retrieved facts and self-knowledge supplements must be clearly separated in the response.
- Retrieved facts must include citations.
- Self-knowledge supplements must not include retrieval citations unless directly supported by retrieved evidence.
- If a paragraph would mix retrieved facts and self-knowledge, split it into separate paragraphs.
- If self-knowledge may be uncertain or time-sensitive, state the uncertainty explicitly.
## 10. Pre-Reply Self-Check
Before replying to a knowledge retrieval task, verify:
- Used only whitelisted retrieval tools — no local filesystem inspection?
- Exhausted retrieval flow before concluding "not found"?
- Citations placed immediately after each relevant paragraph?
- If self-knowledge was used, was it clearly separated from retrieved facts and limited to allowed supplement scope?
If any answer is "no", correct the process first.

View File

@ -54,6 +54,7 @@ class ChatRequest(BaseModel):
enable_thinking: Optional[bool] = False enable_thinking: Optional[bool] = False
skills: Optional[List[str]] = None skills: Optional[List[str]] = None
enable_memory: Optional[bool] = False enable_memory: Optional[bool] = False
enable_self_knowledge: Optional[bool] = False
shell_env: Optional[Dict[str, str]] = None shell_env: Optional[Dict[str, str]] = None
model_config = ConfigDict(extra='allow') model_config = ConfigDict(extra='allow')

View File

@ -36,6 +36,83 @@ def detect_provider(model_name,model_server):
# 默认使用 openai 兼容格式 # 默认使用 openai 兼容格式
return "openai",model_server return "openai",model_server
def is_anthropic_opus_model(model_name: Optional[str]) -> bool:
"""判断是否为 Anthropic Opus 模型"""
return bool(model_name and "opus" in model_name.lower())
def sanitize_model_kwargs(
model_name: str,
model_provider: str,
base_url: Optional[str],
api_key: Optional[str],
generate_cfg: Optional[Dict[str, Any]] = None,
source: str = "agent"
) -> tuple[Dict[str, Any], List[str], bool]:
"""清洗模型参数,过滤不兼容参数并返回日志所需信息"""
model_kwargs = {
"model": model_name,
"model_provider": model_provider,
"base_url": base_url,
"api_key": api_key
}
internal_params = {
'tool_output_max_length',
'tool_output_truncation_strategy',
'tool_output_filters',
'tool_output_exclude',
'preserve_code_blocks',
'preserve_json',
}
openai_only_params = {
'n',
'presence_penalty',
'frequency_penalty',
'logprobs',
'top_logprobs',
'logit_bias',
'seed',
'suffix',
'best_of',
'echo',
'user',
}
params_to_filter = set(internal_params)
is_opus_model = model_provider == 'anthropic' and is_anthropic_opus_model(model_name)
if model_provider == 'anthropic':
params_to_filter.update(openai_only_params)
if is_opus_model:
params_to_filter.add('temperature')
original_keys = list((generate_cfg or {}).keys())
filtered_cfg = {k: v for k, v in (generate_cfg or {}).items() if k not in params_to_filter}
dropped_params = [k for k in original_keys if k in params_to_filter]
default_temperature_applied = False
if not is_opus_model:
model_kwargs["temperature"] = 0.8
default_temperature_applied = True
model_kwargs.update(filtered_cfg)
logger.info(
"sanitize_model_kwargs source=%s provider=%s model=%s original_keys=%s dropped_keys=%s default_temperature_applied=%s",
source,
model_provider,
model_name,
original_keys,
dropped_params,
default_temperature_applied
)
return model_kwargs, dropped_params, default_temperature_applied
def get_versioned_filename(upload_dir: str, name_without_ext: str, file_extension: str) -> tuple[str, int]: def get_versioned_filename(upload_dir: str, name_without_ext: str, file_extension: str) -> tuple[str, int]:
""" """
获取带版本号的文件名自动处理文件删除和版本递增 获取带版本号的文件名自动处理文件删除和版本递增
@ -635,15 +712,22 @@ async def _sync_call_llm(llm_config, messages) -> str:
api_key = llm_config.get('api_key') api_key = llm_config.get('api_key')
# 检测或使用指定的提供商 # 检测或使用指定的提供商
model_provider,base_url = detect_provider(model_name,model_server) model_provider,base_url = detect_provider(model_name,model_server)
# 构建模型参数 model_kwargs, dropped_params, default_temperature_applied = sanitize_model_kwargs(
model_kwargs = { model_name=model_name,
"model": model_name, model_provider=model_provider,
"model_provider": model_provider, base_url=base_url,
"temperature": 0.8, api_key=api_key,
"base_url":base_url, source="_sync_call_llm"
"api_key":api_key )
} if dropped_params:
logger.info(
"_sync_call_llm dropped_params=%s model=%s provider=%s default_temperature_applied=%s",
dropped_params,
model_name,
model_provider,
default_temperature_applied
)
llm_instance = init_chat_model(**model_kwargs) llm_instance = init_chat_model(**model_kwargs)
# 转换消息格式为LangChain格式 # 转换消息格式为LangChain格式