diff --git a/.circleci/config.yml b/.circleci/config.yml index b0a1532..0d541a6 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -108,7 +108,7 @@ workflows: filters: branches: only: - - master + - dev - deploy: name: deploy-for-test docker-tag: '' @@ -120,7 +120,7 @@ workflows: filters: branches: only: - - master + - dev requires: - build-for-test - build-and-push: @@ -183,7 +183,7 @@ workflows: filters: branches: only: - - onprem + - onprem-release - docker-hub-build-push: name: docker-hub-build-push-arm repo: gptbasesparticle/catalog-agent @@ -192,4 +192,31 @@ workflows: filters: branches: only: - - onprem + - onprem-release + # 为 onprem-dev 环境部署 + - build-and-push: + name: build-for-onprem-dev + context: + - ecr-new + path: . + dockerfile: Dockerfile + repo: catalog-agent + docker-tag: '' + filters: + branches: + only: + - onprem-dev + - deploy: + name: deploy-for-onprem-dev + docker-tag: '' + path: '/home/ubuntu/cluster-for-B/onprem-dev/catalog-agent/deploy.yaml' + deploy-name: catalog-agent + deploy-namespace: onprem-dev + context: + - ecr-new + filters: + branches: + only: + - onprem-dev + requires: + - build-for-onprem-dev diff --git a/docs/file_manage_apis.md b/docs/file_manage_apis.md new file mode 100644 index 0000000..6d9f612 --- /dev/null +++ b/docs/file_manage_apis.md @@ -0,0 +1,435 @@ +# 文件管理 API 接口文档 + +**Base URL:** `/api/v1/file-manager` + +--- + +## 1. 列出目录内容 + +**GET** `/api/v1/file-manager/list` + +| 参数 | 类型 | 位置 | 必填 | 说明 | +|------|------|------|------|------| +| path | string | query | 否 | 相对路径,空字符串返回根目录(projects/prompt) | +| recursive | bool | query | 否 | 是否递归列出子目录,默认 false | + +```bash +curl -X GET "/api/v1/file-manager/list?path=projects&recursive=false" +``` + +响应示例: +```json +{ + "success": true, + "path": "projects", + "items": [ + { + "name": "report.pdf", + "path": "projects/report.pdf", + "type": "file", + "size": 102400, + "modified": 1700000000.0, + "created": 1699000000.0 + } + ], + "total": 1 +} +``` + +--- + +## 2. 上传文件 + +**POST** `/api/v1/file-manager/upload` + +请求方式:表单数据上传(multipart/form-data) + +| 参数 | 类型 | 位置 | 必填 | 说明 | +|------|------|------|------|------| +| file | file | form | 是 | 上传的文件 | +| path | string | form | 否 | 目标目录路径,默认 `projects` | + +```bash +curl -X POST "/api/v1/file-manager/upload" \ + -F "file=@document.pdf" \ + -F "path=projects/dataset" +``` + +响应示例: +```json +{ + "success": true, + "message": "文件上传成功", + "filename": "document.pdf", + "original_filename": "document.pdf", + "path": "projects/dataset/document.pdf", + "size": 102400 +} +``` + +> 注:文件名中的空格会自动替换为下划线。 + +--- + +## 3. 下载文件 + +**GET** `/api/v1/file-manager/download/{file_path}` + +| 参数 | 类型 | 位置 | 必填 | 说明 | +|------|------|------|------|------| +| file_path | string | path | 是 | 文件相对路径 | + +```bash +curl -X GET "/api/v1/file-manager/download/projects/report.pdf" -o report.pdf +``` + +响应:文件二进制流,`Content-Type` 根据文件类型自动推断。 + +--- + +## 4. 删除文件或目录 + +**DELETE** `/api/v1/file-manager/delete` + +| 参数 | 类型 | 位置 | 必填 | 说明 | +|------|------|------|------|------| +| path | string | query | 是 | 要删除的路径 | + +```bash +curl -X DELETE "/api/v1/file-manager/delete?path=projects/report.pdf" +``` + +响应示例: +```json +{ + "success": true, + "message": "文件删除成功", + "path": "projects/report.pdf" +} +``` + +--- + +## 5. 创建文件夹 + +**POST** `/api/v1/file-manager/create-folder` + +请求方式:JSON + +| 参数 | 类型 | 位置 | 必填 | 说明 | +|------|------|------|------|------| +| path | string | body | 否 | 父目录路径,默认 `projects` | +| name | string | body | 是 | 文件夹名称 | + +```bash +curl -X POST "/api/v1/file-manager/create-folder" \ + -H "Content-Type: application/json" \ + -d '{"path": "projects", "name": "new-folder"}' +``` + +响应示例: +```json +{ + "success": true, + "message": "文件夹创建成功", + "path": "projects/new-folder" +} +``` + +--- + +## 6. 重命名文件或文件夹 + +**POST** `/api/v1/file-manager/rename` + +| 参数 | 类型 | 位置 | 必填 | 说明 | +|------|------|------|------|------| +| old_path | string | query | 是 | 原路径 | +| new_name | string | query | 是 | 新名称 | + +```bash +curl -X POST "/api/v1/file-manager/rename?old_path=projects/old-name.pdf&new_name=new-name.pdf" +``` + +响应示例: +```json +{ + "success": true, + "message": "重命名成功", + "old_path": "projects/old-name.pdf", + "new_path": "projects/new-name.pdf" +} +``` + +--- + +## 7. 移动文件或文件夹 + +**POST** `/api/v1/file-manager/move` + +| 参数 | 类型 | 位置 | 必填 | 说明 | +|------|------|------|------|------| +| source_path | string | query | 是 | 源路径 | +| target_path | string | query | 是 | 目标路径 | + +```bash +curl -X POST "/api/v1/file-manager/move?source_path=projects/a.pdf&target_path=projects/sub/a.pdf" +``` + +响应示例: +```json +{ + "success": true, + "message": "移动成功", + "source_path": "projects/a.pdf", + "target_path": "projects/sub/a.pdf" +} +``` + +--- + +## 8. 复制文件或文件夹 + +**POST** `/api/v1/file-manager/copy` + +| 参数 | 类型 | 位置 | 必填 | 说明 | +|------|------|------|------|------| +| source_path | string | query | 是 | 源路径 | +| target_path | string | query | 是 | 目标路径 | + +```bash +curl -X POST "/api/v1/file-manager/copy?source_path=projects/a.pdf&target_path=projects/a-copy.pdf" +``` + +响应示例: +```json +{ + "success": true, + "message": "复制成功", + "source_path": "projects/a.pdf", + "target_path": "projects/a-copy.pdf" +} +``` + +--- + +## 9. 搜索文件 + +**GET** `/api/v1/file-manager/search` + +| 参数 | 类型 | 位置 | 必填 | 说明 | +|------|------|------|------|------| +| query | string | query | 是 | 搜索关键词(匹配文件名) | +| path | string | query | 否 | 搜索路径,为空则搜索所有支持目录 | +| file_type | string | query | 否 | 文件类型过滤(如 `.pdf`) | + +```bash +curl -X GET "/api/v1/file-manager/search?query=report&path=projects&file_type=.pdf" +``` + +响应示例: +```json +{ + "success": true, + "query": "report", + "path": "projects", + "results": [ + { + "name": "report.pdf", + "path": "projects/report.pdf", + "type": "file" + } + ], + "total": 1 +} +``` + +--- + +## 10. 读取文件内容 + +**GET** `/api/v1/file-manager/read` + +| 参数 | 类型 | 位置 | 必填 | 说明 | +|------|------|------|------|------| +| path | string | query | 是 | 文件相对路径 | + +```bash +curl -X GET "/api/v1/file-manager/read?path=projects/config.json" +``` + +响应示例: +```json +{ + "success": true, + "content": "{\"key\": \"value\"}", + "path": "projects/config.json", + "size": 16, + "modified": 1700000000.0, + "encoding": "utf-8", + "mime_type": "application/json" +} +``` + +> 限制:最大 10MB。自动尝试 utf-8 / gbk / gb2312 / latin-1 编码。 + +--- + +## 11. 保存文件内容 + +**POST** `/api/v1/file-manager/save` + +请求方式:JSON + +| 参数 | 类型 | 位置 | 必填 | 说明 | +|------|------|------|------|------| +| path | string | body | 是 | 文件相对路径 | +| content | string | body | 是 | 文件内容 | + +```bash +curl -X POST "/api/v1/file-manager/save" \ + -H "Content-Type: application/json" \ + -d '{"path": "projects/config.json", "content": "{\"key\": \"new-value\"}"}' +``` + +响应示例: +```json +{ + "success": true, + "message": "文件保存成功", + "path": "projects/config.json", + "size": 20, + "modified": 1700000000.0, + "encoding": "utf-8" +} +``` + +> 限制:最大 5MB。保存前自动创建备份,写入失败时自动回滚。 + +--- + +## 12. 获取文件/文件夹详细信息 + +**GET** `/api/v1/file-manager/info/{file_path}` + +| 参数 | 类型 | 位置 | 必填 | 说明 | +|------|------|------|------|------| +| file_path | string | path | 是 | 文件或文件夹路径 | + +```bash +curl -X GET "/api/v1/file-manager/info/projects/report.pdf" +``` + +响应示例: +```json +{ + "success": true, + "info": { + "name": "report.pdf", + "path": "projects/report.pdf", + "type": "file", + "size": 102400, + "modified": 1700000000.0, + "created": 1699000000.0, + "permissions": "644", + "mime_type": "application/pdf", + "preview": "文件前1000字符预览..." + } +} +``` + +> 小于 1MB 的文本文件会包含 `preview` 字段。 + +--- + +## 13. 下载文件夹为 ZIP + +**POST** `/api/v1/file-manager/download-folder-zip` + +请求方式:JSON + +| 参数 | 类型 | 位置 | 必填 | 说明 | +|------|------|------|------|------| +| path | string | body | 是 | 文件夹路径 | + +```bash +curl -X POST "/api/v1/file-manager/download-folder-zip" \ + -H "Content-Type: application/json" \ + -d '{"path": "projects/dataset"}' -o dataset.zip +``` + +响应:ZIP 文件二进制流(`application/zip`)。 + +> 限制:最大 500MB,最多 10000 个文件。 + +--- + +## 14. 批量下载为 ZIP + +**POST** `/api/v1/file-manager/download-multiple-zip` + +请求方式:JSON + +| 参数 | 类型 | 位置 | 必填 | 说明 | +|------|------|------|------|------| +| paths | string[] | body | 是 | 文件/文件夹路径数组 | +| filename | string | body | 否 | ZIP 文件名,默认 `batch_download.zip` | + +```bash +curl -X POST "/api/v1/file-manager/download-multiple-zip" \ + -H "Content-Type: application/json" \ + -d '{"paths": ["projects/a.pdf", "projects/sub"], "filename": "export.zip"}' \ + -o export.zip +``` + +响应:ZIP 文件二进制流(`application/zip`)。 + +> 限制:最大 500MB,最多 10000 个文件。 + +--- + +## 15. 批量操作 + +**POST** `/api/v1/file-manager/batch-operation` + +请求方式:JSON(操作数组) + +支持操作类型:`delete` / `move` / `copy` + +```bash +curl -X POST "/api/v1/file-manager/batch-operation" \ + -H "Content-Type: application/json" \ + -d '[ + {"type": "delete", "path": "projects/old.pdf"}, + {"type": "move", "source_path": "projects/a.pdf", "target_path": "projects/archive/a.pdf"}, + {"type": "copy", "source_path": "projects/b.pdf", "target_path": "projects/backup/b.pdf"} + ]' +``` + +响应示例: +```json +{ + "success": true, + "results": [ + {"type": "delete", "success": true, "message": "删除成功"}, + {"type": "move", "success": true, "message": "移动成功"}, + {"type": "copy", "success": true, "message": "复制成功"} + ], + "total": 3, + "successful": 3 +} +``` + +--- + +## 通用说明 + +- **路径约束**:所有路径必须以 `projects` 或 `prompt` 开头,否则返回 400 +- **安全机制**:路径经过规范化处理,防止目录遍历攻击 +- **隐藏文件**:以 `.` 开头的文件/目录在列出和搜索时自动跳过 +- **错误响应格式**: +```json +{ + "detail": "错误描述信息" +} +``` diff --git a/prompt/FACT_RETRIEVAL_PROMPT.md b/prompt/FACT_RETRIEVAL_PROMPT.md index da7d8b2..0064f06 100644 --- a/prompt/FACT_RETRIEVAL_PROMPT.md +++ b/prompt/FACT_RETRIEVAL_PROMPT.md @@ -148,6 +148,21 @@ Output: {{"facts" : []}} Input: DR1の照明状態を教えて Output: {{"facts" : []}} +Input: 私は林檎好きです +Output: {{"facts" : ["林檎が好き"]}} + +Input: コーヒー飲みたい、毎朝 +Output: {{"facts" : ["毎朝コーヒーを飲みたい"]}} + +Input: 昨日映画見た、すごくよかった +Output: {{"facts" : ["昨日映画を見た", "映画がすごくよかった"]}} + +Input: 我喜欢吃苹果 +Output: {{"facts" : ["喜欢吃苹果"]}} + +Input: 나는 사과를 좋아해 +Output: {{"facts" : ["사과를 좋아함"]}} + Return the facts and preferences in a json format as shown above. Remember the following: @@ -159,12 +174,15 @@ Remember the following: - If you do not find anything relevant in the below conversation, you can return an empty list corresponding to the "facts" key. - Create the facts based on the user and assistant messages only. Do not pick anything from the system messages. - Make sure to return the response in the format mentioned in the examples. The response should be in json with a key as "facts" and corresponding value will be a list of strings. + - **CRITICAL - Do NOT memorize actions or operations**: Do not extract facts about queries the user asked you to perform, devices the user asked you to operate, or any one-time transient actions. Only memorize information ABOUT the user (preferences, relationships, personal details, plans), not actions the user asked the assistant to DO. Ask yourself: "Is this a fact about WHO the user IS, or what the user asked me to DO?" Only remember the former. + - **CRITICAL for Semantic Completeness**: - Each extracted fact MUST preserve the complete semantic meaning. Never truncate or drop key parts of the meaning. - For colloquial or grammatically informal expressions (common in spoken Japanese, Chinese, Korean, etc.), understand the full intended meaning and record it in a clear, semantically complete form. - In Japanese, spoken language often omits particles (e.g., が, を, に). When extracting facts, include the necessary particles to make the meaning unambiguous. For example: "私は林檎好きです" should be understood as "林檎が好き" (likes apples), not literally "私は林檎好き". - When the user expresses a preference or opinion in casual speech, record the core preference/opinion clearly. Remove the subject pronoun (私は/I) since facts are about the user by default, but keep all other semantic components intact. + - **CRITICAL for People/Relationship Tracking**: - Write people-related facts in plain, natural language. Do NOT use structured formats like "Contact:", "referred as", or "DEFAULT when user says". - Good examples: "Michael Johnson is a colleague, also called Mike", "田中さんは友達", "滨田太郎は「滨田」とも呼ばれている" diff --git a/prompt/system_prompt.md b/prompt/system_prompt.md index 632af9e..d740313 100644 --- a/prompt/system_prompt.md +++ b/prompt/system_prompt.md @@ -14,7 +14,19 @@ The filesystem backend is currently operating in: `{agent_dir_path}` - Never use relative paths in bash commands - always construct full absolute paths - Use the working directory from to construct absolute paths -**2. Skill Script Path Conversion** +**2. Skills vs Tools - CRITICAL DISTINCTION** + +**Skills are NOT tools.** Do NOT attempt to call a skill as a tool_call/function_call. + +- **Tools** (e.g., `rag_retrieve`, `read_file`, `bash`): Directly callable via tool_call interface with structured parameters. +- **Skills** (e.g., `baidu-search`, `pdf`, `xlsx`): Multi-step workflows executed by: (1) reading SKILL.md, (2) extracting the command, (3) running it via the `bash` tool. + +❌ WRONG: Generating a tool_call with `{"name": "baidu-search", "arguments": {...}}` +✅ CORRECT: Using `read_file` to read SKILL.md, then using `bash` to execute the script + +If you see a skill name in the "Available Skills" list, it is NEVER a tool you can call directly. + +**3. Skill Script Path Conversion** When executing scripts from SKILL.md files, you MUST convert relative paths to absolute paths: @@ -29,14 +41,14 @@ When executing scripts from SKILL.md files, you MUST convert relative paths to a └── scriptB.py # Actual script B file ``` -**3. Workspace Directory Structure** +**4. Workspace Directory Structure** - **`{agent_dir_path}/skills/`** - Skill packages with embedded scripts - **`{agent_dir_path}/dataset/`** - Store file datasets and document data - **`{agent_dir_path}/executable_code/`** - Place generated executable scripts here (not skill scripts) - **`{agent_dir_path}/download/`** - Store downloaded files and content -**4. Executable Code Organization** +**5. Executable Code Organization** When creating scripts in `executable_code/`, follow these organization rules: @@ -80,3 +92,15 @@ Trace Id: {trace_id} - Even when the user writes in a different language, you MUST still reply in [{language}]. - Do NOT mix languages. Do NOT fall back to English or any other language under any circumstances. - Technical terms, code identifiers, file paths, and tool names may remain in their original form, but all surrounding text MUST be in [{language}]. + +**Citation Requirement (RAG Only)**: When answering questions based on `rag_retrieve` tool results, you MUST add XML citation tags for factual claims derived from the knowledge base. + +**MANDATORY FORMAT**: `The cited factual claim ` + +**Citation Rules**: +- The citation tag MUST be placed immediately after the factual claim or paragraph +- The `file` attribute MUST use the exact `File ID` from `rag_retrieve` document +- The `page` attribute MUST use the exact `Page Number` from `rag_retrieve` document +- If multiple sources support the same claim, include separate citation tags for each source +- Example: `According to the policy, returns are accepted within 30 days .` +- This requirement ONLY applies when using `rag_retrieve` results to answer questions. diff --git a/prompt/system_prompt_deep_agent.md b/prompt/system_prompt_deep_agent.md index 336a265..ecca407 100644 --- a/prompt/system_prompt_deep_agent.md +++ b/prompt/system_prompt_deep_agent.md @@ -83,6 +83,18 @@ When using the write_todos tool: The todo list is a planning tool - use it judiciously to avoid overwhelming the user with excessive task tracking. +### Skills vs Tools - CRITICAL DISTINCTION + +**Skills are NOT tools.** Do NOT attempt to call a skill as a tool_call/function_call. + +- **Tools** (e.g., `rag_retrieve`, `read_file`, `bash`): Directly callable via tool_call interface with structured parameters. +- **Skills** (e.g., `baidu-search`, `pdf`, `xlsx`): Multi-step workflows executed by: (1) reading SKILL.md, (2) extracting the command, (3) running it via the `bash` tool. + +❌ WRONG: Generating a tool_call with `{"name": "baidu-search", "arguments": {...}}` +✅ CORRECT: Using `read_file` to read SKILL.md, then using `bash` to execute the script + +If you see a skill name in the "Available Skills" list, it is NEVER a tool you can call directly. + ### Skill Execution Workflow **CRITICAL**: When you need to use a skill, follow this exact workflow: diff --git a/routes/memory.py b/routes/memory.py index a0b6a95..c29ae53 100644 --- a/routes/memory.py +++ b/routes/memory.py @@ -1,13 +1,13 @@ """ Memory 管理 API 路由 -提供记忆查看和删除功能 +提供记忆查看、添加和删除功能 """ import logging -from typing import Optional, List, Dict, Any +from typing import Literal, Optional, List, Dict, Any from fastapi import APIRouter, HTTPException, Header, Query from fastapi.responses import JSONResponse -from pydantic import BaseModel +from pydantic import BaseModel, Field logger = logging.getLogger('app') @@ -33,6 +33,26 @@ class DeleteAllResponse(BaseModel): deleted_count: int +class ConversationMessage(BaseModel): + """对话消息""" + role: Literal["user", "assistant"] + content: str = Field(..., min_length=1) + + +class AddMemoryRequest(BaseModel): + """添加记忆的请求体""" + bot_id: str = Field(..., min_length=1) + user_id: str = Field(..., min_length=1) + messages: List[ConversationMessage] = Field(..., max_length=200) + + +class AddMemoryResponse(BaseModel): + """添加记忆的响应""" + success: bool + pairs_processed: int + pairs_failed: int = 0 + + async def get_user_identifier_from_request( authorization: Optional[str], user_id: Optional[str] = None @@ -63,6 +83,92 @@ async def get_user_identifier_from_request( ) +@router.post("/memory", response_model=AddMemoryResponse) +async def add_memory_from_conversation(data: AddMemoryRequest): + """ + 从对话消息中提取并保存记忆 + + 将用户和助手的对话配对,通过 Mem0 提取关键事实并存储。 + 用于 realtime 语音对话等不经过 Agent 中间件的场景。 + 此端点供内部服务调用(如 felo-mygpt),不暴露给外部用户。 + """ + try: + from agent.mem0_manager import get_mem0_manager + from utils.settings import MEM0_ENABLED + + if not MEM0_ENABLED: + raise HTTPException( + status_code=503, + detail="Memory feature is not enabled" + ) + + if not data.messages: + return AddMemoryResponse(success=True, pairs_processed=0) + + manager = get_mem0_manager() + + # 将消息配对为 user-assistant 对,然后调用 add_memory + pairs_processed = 0 + pairs_failed = 0 + i = 0 + while i < len(data.messages): + msg = data.messages[i] + if msg.role == 'user': + # 收集连续的 user 消息 + user_contents = [msg.content] + j = i + 1 + while j < len(data.messages) and data.messages[j].role == 'user': + user_contents.append(data.messages[j].content) + j += 1 + + user_text = '\n'.join(user_contents) + + # 检查是否有对应的 assistant 回复 + assistant_text = "" + if j < len(data.messages) and data.messages[j].role == 'assistant': + assistant_text = data.messages[j].content or "" + j += 1 + + if user_text and assistant_text: + conversation_text = f"User: {user_text}\nAssistant: {assistant_text}" + try: + await manager.add_memory( + text=conversation_text, + user_id=data.user_id, + agent_id=data.bot_id, + metadata={"type": "realtime_conversation"}, + ) + pairs_processed += 1 + except Exception as pair_error: + pairs_failed += 1 + logger.error( + f"Failed to add memory for pair: {pair_error}" + ) + + i = j + else: + i += 1 + + logger.info( + f"Added {pairs_processed} memory pairs (failed={pairs_failed}) " + f"for user={data.user_id}, bot={data.bot_id}" + ) + return AddMemoryResponse( + success=pairs_failed == 0, + pairs_processed=pairs_processed, + pairs_failed=pairs_failed, + ) + + except HTTPException: + raise + except Exception as e: + logger.error(f"Failed to add memory from conversation: {e}") + raise HTTPException( + status_code=500, + detail="Failed to add memory from conversation" + ) + + @router.get("/memory", response_model=MemoryListResponse) async def get_memories( bot_id: str = Query(..., description="Bot ID (对应 agent_id)"),