diff --git a/.circleci/config.yml b/.circleci/config.yml
index b0a1532..d0b7173 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -193,3 +193,30 @@ workflows:
             branches:
               only:
                 - onprem
+      # 为 onprem-dev 环境部署
+      - build-and-push:
+          name: build-for-onprem-dev
+          context:
+            - ecr-new
+          path: .
+          dockerfile: Dockerfile
+          repo: catalog-agent
+          docker-tag: ''
+          filters:
+            branches:
+              only:
+                - onprem
+      - deploy:
+          name: deploy-for-onprem-dev
+          docker-tag: ''
+          path: '/home/ubuntu/cluster-for-B/onprem-dev/catalog-agent/deploy.yaml'
+          deploy-name: catalog-agent
+          deploy-namespace: onprem-dev
+          context:
+            - ecr-new
+          filters:
+            branches:
+              only:
+                - onprem
+          requires:
+            - build-for-onprem-dev
diff --git a/prompt/FACT_RETRIEVAL_PROMPT.md b/prompt/FACT_RETRIEVAL_PROMPT.md
index 27777e6..1c2f93e 100644
--- a/prompt/FACT_RETRIEVAL_PROMPT.md
+++ b/prompt/FACT_RETRIEVAL_PROMPT.md
@@ -83,6 +83,21 @@ Output: {{"facts" : ["Mike Smith helped with bug fix", "Contact: Mike Smith (col
 Input: Mike is coming to the meeting tomorrow.
 Output: {{"facts" : ["Mike Smith is coming to the meeting tomorrow", "Contact: Mike Smith (colleague, also referred as Mike) - DEFAULT when user says 'Mike'"]}}
 
+Input: 私は林檎好きです
+Output: {{"facts" : ["林檎が好き"]}}
+
+Input: コーヒー飲みたい、毎朝
+Output: {{"facts" : ["毎朝コーヒーを飲みたい"]}}
+
+Input: 昨日映画見た、すごくよかった
+Output: {{"facts" : ["昨日映画を見た", "映画がすごくよかった"]}}
+
+Input: 我喜欢吃苹果
+Output: {{"facts" : ["喜欢吃苹果"]}}
+
+Input: 나는 사과를 좋아해
+Output: {{"facts" : ["사과를 좋아함"]}}
+
 Return the facts and preferences in a json format as shown above.
 
 Remember the following:
@@ -93,6 +108,11 @@ Remember the following:
 - If you do not find anything relevant in the below conversation, you can return an empty list corresponding to the "facts" key.
 - Create the facts based on the user and assistant messages only. Do not pick anything from the system messages.
 - Make sure to return the response in the format mentioned in the examples. The response should be in json with a key as "facts" and corresponding value will be a list of strings.
+- **CRITICAL for Semantic Completeness**:
+  - Each extracted fact MUST preserve the complete semantic meaning. Never truncate or drop key parts of the meaning.
+  - For colloquial or grammatically informal expressions (common in spoken Japanese, Chinese, Korean, etc.), understand the full intended meaning and record it in a clear, semantically complete form.
+  - In Japanese, spoken language often omits particles (e.g., が, を, に). When extracting facts, include the necessary particles to make the meaning unambiguous. For example: "私は林檎好きです" should be understood as "林檎が好き" (likes apples), not literally "私は林檎好き".
+  - When the user expresses a preference or opinion in casual speech, record the core preference/opinion clearly. Remove the subject pronoun (私は/I) since facts are about the user by default, but keep all other semantic components intact.
 - **CRITICAL for Contact/Relationship Tracking**:
   - ALWAYS use the "Contact: [name] (relationship/context)" format when recording people
   - When you see a short name that matches a known full name, record as "Contact: [Full Name] (relationship, also referred as [Short Name])"
diff --git a/routes/memory.py b/routes/memory.py
index a0b6a95..c29ae53 100644
--- a/routes/memory.py
+++ b/routes/memory.py
@@ -1,13 +1,13 @@
 """
 Memory 管理 API 路由
-提供记忆查看和删除功能
+提供记忆查看、添加和删除功能
 """
 
 import logging
-from typing import Optional, List, Dict, Any
+from typing import Literal, Optional, List, Dict, Any
 from fastapi import APIRouter, HTTPException, Header, Query
 from fastapi.responses import JSONResponse
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 
 logger = logging.getLogger('app')
 
@@ -33,6 +33,26 @@ class DeleteAllResponse(BaseModel):
     deleted_count: int
 
 
+class ConversationMessage(BaseModel):
+    """对话消息"""
+    role: Literal["user", "assistant"]
+    content: str = Field(..., min_length=1)
+
+
+class AddMemoryRequest(BaseModel):
+    """添加记忆的请求体"""
+    bot_id: str = Field(..., min_length=1)
+    user_id: str = Field(..., min_length=1)
+    messages: List[ConversationMessage] = Field(..., max_length=200)
+
+
+class AddMemoryResponse(BaseModel):
+    """添加记忆的响应"""
+    success: bool
+    pairs_processed: int
+    pairs_failed: int = 0
+
+
 async def get_user_identifier_from_request(
     authorization: Optional[str],
     user_id: Optional[str] = None
@@ -63,6 +83,92 @@ async def get_user_identifier_from_request(
     )
 
 
+@router.post("/memory", response_model=AddMemoryResponse)
+async def add_memory_from_conversation(data: AddMemoryRequest):
+    """
+    从对话消息中提取并保存记忆
+
+    将用户和助手的对话配对，通过 Mem0 提取关键事实并存储。
+    用于 realtime 语音对话等不经过 Agent 中间件的场景。
+    此端点供内部服务调用（如 felo-mygpt），不暴露给外部用户。
+    """
+    try:
+        from agent.mem0_manager import get_mem0_manager
+        from utils.settings import MEM0_ENABLED
+
+        if not MEM0_ENABLED:
+            raise HTTPException(
+                status_code=503,
+                detail="Memory feature is not enabled"
+            )
+
+        if not data.messages:
+            return AddMemoryResponse(success=True, pairs_processed=0)
+
+        manager = get_mem0_manager()
+
+        # 将消息配对为 user-assistant 对，然后调用 add_memory
+        pairs_processed = 0
+        pairs_failed = 0
+        i = 0
+        while i < len(data.messages):
+            msg = data.messages[i]
+            if msg.role == 'user':
+                # 收集连续的 user 消息
+                user_contents = [msg.content]
+                j = i + 1
+                while j < len(data.messages) and data.messages[j].role == 'user':
+                    user_contents.append(data.messages[j].content)
+                    j += 1
+
+                user_text = '\n'.join(user_contents)
+
+                # 检查是否有对应的 assistant 回复
+                assistant_text = ""
+                if j < len(data.messages) and data.messages[j].role == 'assistant':
+                    assistant_text = data.messages[j].content or ""
+                    j += 1
+
+                if user_text and assistant_text:
+                    conversation_text = f"User: {user_text}\nAssistant: {assistant_text}"
+                    try:
+                        await manager.add_memory(
+                            text=conversation_text,
+                            user_id=data.user_id,
+                            agent_id=data.bot_id,
+                            metadata={"type": "realtime_conversation"},
+                        )
+                        pairs_processed += 1
+                    except Exception as pair_error:
+                        pairs_failed += 1
+                        logger.error(
+                            f"Failed to add memory for pair: {pair_error}"
+                        )
+
+                i = j
+            else:
+                i += 1
+
+        logger.info(
+            f"Added {pairs_processed} memory pairs (failed={pairs_failed}) "
+            f"for user={data.user_id}, bot={data.bot_id}"
+        )
+        return AddMemoryResponse(
+            success=pairs_failed == 0,
+            pairs_processed=pairs_processed,
+            pairs_failed=pairs_failed,
+        )
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Failed to add memory from conversation: {e}")
+        raise HTTPException(
+            status_code=500,
+            detail="Failed to add memory from conversation"
+        )
+
+
 @router.get("/memory", response_model=MemoryListResponse)
 async def get_memories(
     bot_id: str = Query(..., description="Bot ID (对应 agent_id)"),