From deb78a76253a3a3855965858443e92b9f0b2da8a Mon Sep 17 00:00:00 2001 From: autobee-sparticle Date: Tue, 17 Mar 2026 10:37:49 +0900 Subject: [PATCH 1/2] fix: improve memory extraction for colloquial/informal speech (#16) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: add .worktrees/ to .gitignore Co-Authored-By: Claude Opus 4.6 * feat(CI): 添加 onprem-dev 环境的构建和部署配置 在 CircleCI 配置中新增 onprem-dev 环境的 build-and-push 和 deploy 任务,部署到 cluster-for-B 的 onprem-dev 命名空间 Co-Authored-By: Claude Opus 4.6 (1M context) * fix: improve memory extraction for colloquial/informal speech Add semantic completeness rules and multilingual few-shot examples to FACT_RETRIEVAL_PROMPT to prevent truncated or semantically incorrect memory extraction. Specifically addresses Japanese casual speech where particles (が, を, に) are often omitted. Closes sparticleinc/mygpt-frontend#2125 Co-Authored-By: Claude Opus 4.6 --------- Co-authored-by: zhuchao Co-authored-by: Claude Opus 4.6 Co-authored-by: shuirong --- .circleci/config.yml | 27 +++++++++++++++++++++++++++ prompt/FACT_RETRIEVAL_PROMPT.md | 20 ++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index b0a1532..d0b7173 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -193,3 +193,30 @@ workflows: branches: only: - onprem + # 为 onprem-dev 环境部署 + - build-and-push: + name: build-for-onprem-dev + context: + - ecr-new + path: . + dockerfile: Dockerfile + repo: catalog-agent + docker-tag: '' + filters: + branches: + only: + - onprem + - deploy: + name: deploy-for-onprem-dev + docker-tag: '' + path: '/home/ubuntu/cluster-for-B/onprem-dev/catalog-agent/deploy.yaml' + deploy-name: catalog-agent + deploy-namespace: onprem-dev + context: + - ecr-new + filters: + branches: + only: + - onprem + requires: + - build-for-onprem-dev diff --git a/prompt/FACT_RETRIEVAL_PROMPT.md b/prompt/FACT_RETRIEVAL_PROMPT.md index 27777e6..1c2f93e 100644 --- a/prompt/FACT_RETRIEVAL_PROMPT.md +++ b/prompt/FACT_RETRIEVAL_PROMPT.md @@ -83,6 +83,21 @@ Output: {{"facts" : ["Mike Smith helped with bug fix", "Contact: Mike Smith (col Input: Mike is coming to the meeting tomorrow. Output: {{"facts" : ["Mike Smith is coming to the meeting tomorrow", "Contact: Mike Smith (colleague, also referred as Mike) - DEFAULT when user says 'Mike'"]}} +Input: 私は林檎好きです +Output: {{"facts" : ["林檎が好き"]}} + +Input: コーヒー飲みたい、毎朝 +Output: {{"facts" : ["毎朝コーヒーを飲みたい"]}} + +Input: 昨日映画見た、すごくよかった +Output: {{"facts" : ["昨日映画を見た", "映画がすごくよかった"]}} + +Input: 我喜欢吃苹果 +Output: {{"facts" : ["喜欢吃苹果"]}} + +Input: 나는 사과를 좋아해 +Output: {{"facts" : ["사과를 좋아함"]}} + Return the facts and preferences in a json format as shown above. Remember the following: @@ -93,6 +108,11 @@ Remember the following: - If you do not find anything relevant in the below conversation, you can return an empty list corresponding to the "facts" key. - Create the facts based on the user and assistant messages only. Do not pick anything from the system messages. - Make sure to return the response in the format mentioned in the examples. The response should be in json with a key as "facts" and corresponding value will be a list of strings. +- **CRITICAL for Semantic Completeness**: + - Each extracted fact MUST preserve the complete semantic meaning. Never truncate or drop key parts of the meaning. + - For colloquial or grammatically informal expressions (common in spoken Japanese, Chinese, Korean, etc.), understand the full intended meaning and record it in a clear, semantically complete form. + - In Japanese, spoken language often omits particles (e.g., が, を, に). When extracting facts, include the necessary particles to make the meaning unambiguous. For example: "私は林檎好きです" should be understood as "林檎が好き" (likes apples), not literally "私は林檎好き". + - When the user expresses a preference or opinion in casual speech, record the core preference/opinion clearly. Remove the subject pronoun (私は/I) since facts are about the user by default, but keep all other semantic components intact. - **CRITICAL for Contact/Relationship Tracking**: - ALWAYS use the "Contact: [name] (relationship/context)" format when recording people - When you see a short name that matches a known full name, record as "Contact: [Full Name] (relationship, also referred as [Short Name])" From a161e43421bcd6e66e725525a02300bd94153569 Mon Sep 17 00:00:00 2001 From: autobee-sparticle Date: Tue, 17 Mar 2026 11:14:02 +0900 Subject: [PATCH 2/2] feat: add POST /api/v1/memory endpoint for realtime conversation memory (#17) * feat: add POST /api/v1/memory endpoint for realtime conversation memory Add memory extraction API that accepts conversation messages and stores them via Mem0. This enables realtime voice sessions to save memories through the same pipeline as chat conversations. Fixes: sparticleinc/mygpt-frontend#2126 Co-Authored-By: Claude Opus 4.6 * fix: address code review findings for memory API - Use Literal["user","assistant"] for role field validation - Add Field constraints (min_length, max_length=200) - Track and report pairs_failed in response - Hide internal exception details from HTTP response - Remove unused authorization parameter (internal API) Co-Authored-By: Claude Opus 4.6 --------- Co-authored-by: zhuchao Co-authored-by: Claude Opus 4.6 --- routes/memory.py | 112 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 109 insertions(+), 3 deletions(-) diff --git a/routes/memory.py b/routes/memory.py index a0b6a95..c29ae53 100644 --- a/routes/memory.py +++ b/routes/memory.py @@ -1,13 +1,13 @@ """ Memory 管理 API 路由 -提供记忆查看和删除功能 +提供记忆查看、添加和删除功能 """ import logging -from typing import Optional, List, Dict, Any +from typing import Literal, Optional, List, Dict, Any from fastapi import APIRouter, HTTPException, Header, Query from fastapi.responses import JSONResponse -from pydantic import BaseModel +from pydantic import BaseModel, Field logger = logging.getLogger('app') @@ -33,6 +33,26 @@ class DeleteAllResponse(BaseModel): deleted_count: int +class ConversationMessage(BaseModel): + """对话消息""" + role: Literal["user", "assistant"] + content: str = Field(..., min_length=1) + + +class AddMemoryRequest(BaseModel): + """添加记忆的请求体""" + bot_id: str = Field(..., min_length=1) + user_id: str = Field(..., min_length=1) + messages: List[ConversationMessage] = Field(..., max_length=200) + + +class AddMemoryResponse(BaseModel): + """添加记忆的响应""" + success: bool + pairs_processed: int + pairs_failed: int = 0 + + async def get_user_identifier_from_request( authorization: Optional[str], user_id: Optional[str] = None @@ -63,6 +83,92 @@ async def get_user_identifier_from_request( ) +@router.post("/memory", response_model=AddMemoryResponse) +async def add_memory_from_conversation(data: AddMemoryRequest): + """ + 从对话消息中提取并保存记忆 + + 将用户和助手的对话配对,通过 Mem0 提取关键事实并存储。 + 用于 realtime 语音对话等不经过 Agent 中间件的场景。 + 此端点供内部服务调用(如 felo-mygpt),不暴露给外部用户。 + """ + try: + from agent.mem0_manager import get_mem0_manager + from utils.settings import MEM0_ENABLED + + if not MEM0_ENABLED: + raise HTTPException( + status_code=503, + detail="Memory feature is not enabled" + ) + + if not data.messages: + return AddMemoryResponse(success=True, pairs_processed=0) + + manager = get_mem0_manager() + + # 将消息配对为 user-assistant 对,然后调用 add_memory + pairs_processed = 0 + pairs_failed = 0 + i = 0 + while i < len(data.messages): + msg = data.messages[i] + if msg.role == 'user': + # 收集连续的 user 消息 + user_contents = [msg.content] + j = i + 1 + while j < len(data.messages) and data.messages[j].role == 'user': + user_contents.append(data.messages[j].content) + j += 1 + + user_text = '\n'.join(user_contents) + + # 检查是否有对应的 assistant 回复 + assistant_text = "" + if j < len(data.messages) and data.messages[j].role == 'assistant': + assistant_text = data.messages[j].content or "" + j += 1 + + if user_text and assistant_text: + conversation_text = f"User: {user_text}\nAssistant: {assistant_text}" + try: + await manager.add_memory( + text=conversation_text, + user_id=data.user_id, + agent_id=data.bot_id, + metadata={"type": "realtime_conversation"}, + ) + pairs_processed += 1 + except Exception as pair_error: + pairs_failed += 1 + logger.error( + f"Failed to add memory for pair: {pair_error}" + ) + + i = j + else: + i += 1 + + logger.info( + f"Added {pairs_processed} memory pairs (failed={pairs_failed}) " + f"for user={data.user_id}, bot={data.bot_id}" + ) + return AddMemoryResponse( + success=pairs_failed == 0, + pairs_processed=pairs_processed, + pairs_failed=pairs_failed, + ) + + except HTTPException: + raise + except Exception as e: + logger.error(f"Failed to add memory from conversation: {e}") + raise HTTPException( + status_code=500, + detail="Failed to add memory from conversation" + ) + + @router.get("/memory", response_model=MemoryListResponse) async def get_memories( bot_id: str = Query(..., description="Bot ID (对应 agent_id)"),