日志优化

2025-11-27 21:50:03 +08:00 · 2025-11-27 21:50:03 +08:00 · 3973174c83
commit 3973174c83
parent 116f1cb471
37 changed files with 684 additions and 387 deletions
--- a/README.md
+++ b/README.md
@ -668,3 +668,20 @@ qwen-agent/
   ```

 现在您可以开始使用 Catalog Agent 进行智能数据检索了！🚀
+
+
+
+## 提示词工程
+
+指南
+```guideline
+1) Condition: 用户意图为“执行设备控制”。 Action: 如果用户在当前交互中已回复肯定确认（例如，同意、确认、好的等），则立即调用 Iot Control-dxcore_update_device_status 工具执行设备操作。否则（用户尚未确认），则向用户发送确认询问：“即将为您[操作内容][设备名称][具体参数]，是否确认？”并等待用户回复。
+3) Condition: 当用户意图为查询设备状态或信息时。 Action: 调用 Iot Control 设备相关的工具进行查询（不调用 rag_retrieve-rag_retrieve）。
+4) Condition: 当用户意图为非设备相关的问题（如提问、遇到难处、咨询事实性问题、价格等）时。 Action:  优先调用 rag_retrieve-rag_retrieve 查询知识库；如果无结果，则调用 WebSearch-web_search；最后综合两个工具的内容进行回复。
+```
+术语表
+```terms
+1) Name: term_name1, Description: desc, Synonyms: syn1, syn2
+2) Name: term_name2, Description: desc, Synonyms: syn1, syn2
+```
+
--- a/agent/config_cache.py
+++ b/agent/config_cache.py
@ -7,7 +7,9 @@ import asyncio
 import os
 import json
 from typing import Dict, Tuple, Optional, Any
-from utils.logger import logger
+import logging
+
+logger = logging.getLogger('app')


 class ConfigFileCache:
@ -97,4 +99,4 @@ class ConfigFileCache:


 # 全局缓存实例
-config_cache = ConfigFileCache()
+config_cache = ConfigFileCache()
--- a/agent/custom_mcp_manager.py
+++ b/agent/custom_mcp_manager.py
@ -24,7 +24,9 @@ from typing import Dict, Optional, Union

 from dotenv import load_dotenv

-from qwen_agent.log import logger
+import logging
+
+logger = logging.getLogger('app')
 from qwen_agent.tools.base import BaseTool


--- a/agent/file_loaded_agent_manager.py
+++ b/agent/file_loaded_agent_manager.py
@ -21,7 +21,9 @@ import asyncio
 from typing import Dict, List, Optional

 from qwen_agent.agents import Assistant
-from qwen_agent.log import logger
+import logging
+
+logger = logging.getLogger('app')

 from agent.modified_assistant import init_modified_agent_service_with_files, update_agent_llm
 from agent.prompt_loader import load_system_prompt_async, load_mcp_settings_async
--- a/agent/modified_assistant.py
+++ b/agent/modified_assistant.py
@ -24,9 +24,11 @@ from qwen_agent.llm.schema import ASSISTANT, FUNCTION, Message
 from qwen_agent.llm.oai import TextChatAtOAI 
 from qwen_agent.tools import BaseTool
 from agent.custom_mcp_manager import CustomMCPManager
+import logging

+logger = logging.getLogger('app')
 # 设置工具日志记录器
-tool_logger = logging.getLogger('tool_logger')
+tool_logger = logging.getLogger('app')

 class ModifiedAssistant(Assistant):
    """
--- a/agent/prompt_loader.py
+++ b/agent/prompt_loader.py
@ -7,6 +7,9 @@ import json
 import asyncio
 from typing import List, Dict, Optional, Any
 from datetime import datetime, timezone, timedelta
+import logging
+
+logger = logging.getLogger('app')


 def safe_replace(text: str, placeholder: str, value: Any) -> str:
@ -142,9 +145,9 @@ async def load_system_prompt_async(project_dir: str, language: str = None, syste
            default_prompt_file = os.path.join("prompt", f"system_prompt_{robot_type}.md")
            system_prompt_default = await config_cache.get_text_file(default_prompt_file)
            if system_prompt_default:
-                print(f"Using cached default system prompt for {robot_type} from prompt folder")
+                logger.info(f"Using cached default system prompt for {robot_type} from prompt folder")
        except Exception as e:
-            print(f"Failed to load default system prompt for {robot_type}: {str(e)}")
+            logger.error(f"Failed to load default system prompt for {robot_type}: {str(e)}")
            system_prompt_default = None

        readme = ""
@ -244,11 +247,11 @@ async def load_mcp_settings_async(project_dir: str, mcp_settings: list=None, bot
        default_mcp_file = os.path.join("mcp", f"mcp_settings_{robot_type}.json")
        default_mcp_settings = await config_cache.get_json_file(default_mcp_file) or []
        if default_mcp_settings:
-            print(f"Using cached default mcp_settings_{robot_type} from mcp folder")
+            logger.info(f"Using cached default mcp_settings_{robot_type} from mcp folder")
        else:
-            print(f"No default mcp_settings_{robot_type} found, using empty default settings")
+            logger.warning(f"No default mcp_settings_{robot_type} found, using empty default settings")
    except Exception as e:
-        print(f"Failed to load default mcp_settings_{robot_type}: {str(e)}")
+        logger.error(f"Failed to load default mcp_settings_{robot_type}: {str(e)}")
        default_mcp_settings = []

    # 遍历mcpServers工具，给每个工具增加env参数
@ -269,7 +272,7 @@ async def load_mcp_settings_async(project_dir: str, mcp_settings: list=None, bot
            input_mcp_settings = mcp_settings
        elif mcp_settings:
            input_mcp_settings = [mcp_settings]
-            print(f"Warning: mcp_settings is not a list, converting to list format")
+            logger.warning(f"Warning: mcp_settings is not a list, converting to list format")

    # 3. 合并默认设置和传入设置
    merged_settings = []
@ -286,7 +289,7 @@ async def load_mcp_settings_async(project_dir: str, mcp_settings: list=None, bot
            # 合并mcpServers对象，传入的设置覆盖默认设置中相同的key
            default_mcp_servers.update(input_mcp_servers)
            merged_settings[0]['mcpServers'] = default_mcp_servers
-            print(f"Merged mcpServers: default + {len(input_mcp_servers)} input servers")
+            logger.info(f"Merged mcpServers: default + {len(input_mcp_servers)} input servers")

    # 如果没有默认设置但有传入设置，直接使用传入设置
    elif input_mcp_settings:
@ -296,7 +299,7 @@ async def load_mcp_settings_async(project_dir: str, mcp_settings: list=None, bot
    if not merged_settings:
        merged_settings = []
    elif not isinstance(merged_settings, list):
-        print(f"Warning: merged_settings is not a list, converting to list format")
+        logger.warning(f"Warning: merged_settings is not a list, converting to list format")
        merged_settings = [merged_settings] if merged_settings else []

    # 计算 dataset_dir 用于替换 MCP 配置中的占位符
--- a/agent/sharded_agent_manager.py
+++ b/agent/sharded_agent_manager.py
@ -24,7 +24,9 @@ import threading
 from collections import defaultdict

 from qwen_agent.agents import Assistant
-from qwen_agent.log import logger
+import logging
+
+logger = logging.getLogger('app')

 from agent.modified_assistant import init_modified_agent_service_with_files, update_agent_llm
 from agent.prompt_loader import load_system_prompt_async, load_mcp_settings_async
--- a/embedding/embedding.py
+++ b/embedding/embedding.py
@ -7,6 +7,10 @@ import requests
 import asyncio
 import hashlib
 import json
+import logging
+
+# Configure logger
+logger = logging.getLogger('app')

 def encode_texts_via_api(texts, batch_size=32):
    """通过 API 接口编码文本"""
@ -35,18 +39,18 @@ def encode_texts_via_api(texts, batch_size=32):
            
            if result_data.get("success"):
                embeddings_list = result_data.get("embeddings", [])
-                print(f"API编码成功，处理了 {len(texts)} 个文本，embedding维度: {len(embeddings_list[0]) if embeddings_list else 0}")
+                logger.info(f"API编码成功，处理了 {len(texts)} 个文本，embedding维度: {len(embeddings_list[0]) if embeddings_list else 0}")
                return np.array(embeddings_list)
            else:
                error_msg = result_data.get('error', '未知错误')
-                print(f"API编码失败: {error_msg}")
+                logger.error(f"API编码失败: {error_msg}")
                raise Exception(f"API编码失败: {error_msg}")
        else:
-            print(f"API请求失败: {response.status_code} - {response.text}")
+            logger.error(f"API请求失败: {response.status_code} - {response.text}")
            raise Exception(f"API请求失败: {response.status_code}")
            
    except Exception as e:
-        print(f"API编码异常: {e}")
+        logger.error(f"API编码异常: {e}")
        raise

 def clean_text(text):
@ -144,10 +148,10 @@ def embed_document(input_file='document.txt', output_file='embedding.pkl',
                if is_meaningful_line(cleaned_text):
                    chunks.append(cleaned_text)
            
-            print(f"使用按行分块策略")
-            print(f"原始行数: {original_count}")
-            print(f"清理后有效句子数: {len(chunks)}")
-            print(f"过滤比例: {((original_count - len(chunks)) / original_count * 100):.1f}%")
+            logger.info(f"使用按行分块策略")
+            logger.info(f"原始行数: {original_count}")
+            logger.info(f"清理后有效句子数: {len(chunks)}")
+            logger.info(f"过滤比例: {((original_count - len(chunks)) / original_count * 100):.1f}%")
            
        elif chunking_strategy == 'paragraph':
            # 新的段落级分块策略
@ -166,13 +170,13 @@ def embed_document(input_file='document.txt', output_file='embedding.pkl',
            # 使用段落分块
            chunks = paragraph_chunking(cleaned_content, **params)
            
-            print(f"使用段落级分块策略")
-            print(f"文档总长度: {len(content)} 字符")
-            print(f"分块数量: {len(chunks)}")
+            logger.info(f"使用段落级分块策略")
+            logger.info(f"文档总长度: {len(content)} 字符")
+            logger.info(f"分块数量: {len(chunks)}")
            if chunks:
-                print(f"平均chunk大小: {sum(len(chunk) for chunk in chunks) / len(chunks):.1f} 字符")
-                print(f"最大chunk大小: {max(len(chunk) for chunk in chunks)} 字符")
-                print(f"最小chunk大小: {min(len(chunk) for chunk in chunks)} 字符")
+                logger.debug(f"平均chunk大小: {sum(len(chunk) for chunk in chunks) / len(chunks):.1f} 字符")
+                logger.debug(f"最大chunk大小: {max(len(chunk) for chunk in chunks)} 字符")
+                logger.debug(f"最小chunk大小: {min(len(chunk) for chunk in chunks)} 字符")
        
        elif chunking_strategy == 'smart':
            # 智能分块策略，自动检测文档格式
@ -186,25 +190,25 @@ def embed_document(input_file='document.txt', output_file='embedding.pkl',
            # 使用智能分块
            chunks = smart_chunking(content, **params)
            
-            print(f"使用智能分块策略")
-            print(f"文档总长度: {len(content)} 字符")
-            print(f"分块数量: {len(chunks)}")
+            logger.info(f"使用智能分块策略")
+            logger.info(f"文档总长度: {len(content)} 字符")
+            logger.info(f"分块数量: {len(chunks)}")
            if chunks:
-                print(f"平均chunk大小: {sum(len(chunk) for chunk in chunks) / len(chunks):.1f} 字符")
-                print(f"最大chunk大小: {max(len(chunk) for chunk in chunks)} 字符")
-                print(f"最小chunk大小: {min(len(chunk) for chunk in chunks)} 字符")
+                logger.debug(f"平均chunk大小: {sum(len(chunk) for chunk in chunks) / len(chunks):.1f} 字符")
+                logger.debug(f"最大chunk大小: {max(len(chunk) for chunk in chunks)} 字符")
+                logger.debug(f"最小chunk大小: {min(len(chunk) for chunk in chunks)} 字符")
            
        else:
            raise ValueError(f"不支持的分块策略: {chunking_strategy}")
        
        if not chunks:
-            print("警告：没有找到有效的内容块！")
+            logger.warning("警告：没有找到有效的内容块！")
            return None
        
-        print(f"正在处理 {len(chunks)} 个内容块...")
+        logger.info(f"正在处理 {len(chunks)} 个内容块...")
        
        # 使用API接口进行编码
-        print("使用API接口进行编码...")
+        logger.info("使用API接口进行编码...")
        chunk_embeddings = encode_texts_via_api(chunks, batch_size=32)
        
        embedding_data = {
@ -218,14 +222,14 @@ def embed_document(input_file='document.txt', output_file='embedding.pkl',
        with open(output_file, 'wb') as f:
            pickle.dump(embedding_data, f)
        
-        print(f"已保存嵌入向量到 {output_file}")
+        logger.info(f"已保存嵌入向量到 {output_file}")
        return embedding_data
        
    except FileNotFoundError:
-        print(f"错误：找不到文件 {input_file}")
+        logger.error(f"错误：找不到文件 {input_file}")
        return None
    except Exception as e:
-        print(f"处理文档时出错：{e}")
+        logger.error(f"处理文档时出错：{e}")
        return None


@ -611,21 +615,21 @@ def split_document_by_pages(input_file='document.txt', output_file='pagination.t
            if page_content:
                pages.append(page_content)
        
-        print(f"总共分割出 {len(pages)} 页")
+        logger.info(f"总共分割出 {len(pages)} 页")
        
        # 写入序列化文件
        with open(output_file, 'w', encoding='utf-8') as f:
            for i, page_content in enumerate(pages, 1):
                f.write(f"{page_content}\n")
        
-        print(f"已将页面内容序列化到 {output_file}")
+        logger.info(f"已将页面内容序列化到 {output_file}")
        return pages
        
    except FileNotFoundError:
-        print(f"错误：找不到文件 {input_file}")
+        logger.error(f"错误：找不到文件 {input_file}")
        return []
    except Exception as e:
-        print(f"分割文档时出错：{e}")
+        logger.error(f"分割文档时出错：{e}")
        return []

 def test_chunking_strategies():
@ -645,54 +649,54 @@ def test_chunking_strategies():
    第五段：这是最后一个段落。它用来测试分块策略的完整性和准确性。
    """
    
-    print("=" * 60)
-    print("分块策略测试")
-    print("=" * 60)
+    logger.debug("=" * 60)
+    logger.debug("分块策略测试")
+    logger.debug("=" * 60)
    
    # 测试1: 段落级分块（小chunk）
-    print("\n1. 段落级分块 - 小chunk (max_size=200):")
+    logger.debug("\n1. 段落级分块 - 小chunk (max_size=200):")
    chunks_small = paragraph_chunking(test_text, max_chunk_size=200, overlap=50)
    for i, chunk in enumerate(chunks_small):
-        print(f"Chunk {i+1} (长度: {len(chunk)}): {chunk[:50]}...")
+        logger.debug(f"Chunk {i+1} (长度: {len(chunk)}): {chunk[:50]}...")
    
    # 测试2: 段落级分块（大chunk）
-    print("\n2. 段落级分块 - 大chunk (max_size=500):")
+    logger.debug("\n2. 段落级分块 - 大chunk (max_size=500):")
    chunks_large = paragraph_chunking(test_text, max_chunk_size=500, overlap=100)
    for i, chunk in enumerate(chunks_large):
-        print(f"Chunk {i+1} (长度: {len(chunk)}): {chunk[:50]}...")
+        logger.debug(f"Chunk {i+1} (长度: {len(chunk)}): {chunk[:50]}...")
    
    # 测试3: 段落级分块（无重叠）
-    print("\n3. 段落级分块 - 无重叠:")
+    logger.debug("\n3. 段落级分块 - 无重叠:")
    chunks_no_overlap = paragraph_chunking(test_text, max_chunk_size=300, overlap=0)
    for i, chunk in enumerate(chunks_no_overlap):
-        print(f"Chunk {i+1} (长度: {len(chunk)}): {chunk[:50]}...")
+        logger.debug(f"Chunk {i+1} (长度: {len(chunk)}): {chunk[:50]}...")
    
-    print(f"\n测试总结:")
-    print(f"- 小chunk策略: {len(chunks_small)} 个chunks")
-    print(f"- 大chunk策略: {len(chunks_large)} 个chunks") 
-    print(f"- 无重叠策略: {len(chunks_no_overlap)} 个chunks")
+    logger.debug(f"\n测试总结:")
+    logger.debug(f"- 小chunk策略: {len(chunks_small)} 个chunks")
+    logger.debug(f"- 大chunk策略: {len(chunks_large)} 个chunks") 
+    logger.debug(f"- 无重叠策略: {len(chunks_no_overlap)} 个chunks")


 def demo_usage():
    """
    演示如何使用新的分块功能
    """
-    print("=" * 60)
-    print("使用示例")
-    print("=" * 60)
+    logger.debug("=" * 60)
+    logger.debug("使用示例")
+    logger.debug("=" * 60)
    
-    print("\n1. 使用传统的按行分块:")
-    print("embed_document('document.txt', 'line_embedding.pkl', chunking_strategy='line')")
+    logger.debug("\n1. 使用传统的按行分块:")
+    logger.debug("embed_document('document.txt', 'line_embedding.pkl', chunking_strategy='line')")
    
-    print("\n2. 使用段落级分块（默认参数）:")
-    print("embed_document('document.txt', 'paragraph_embedding.pkl', chunking_strategy='paragraph')")
+    logger.debug("\n2. 使用段落级分块（默认参数）:")
+    logger.debug("embed_document('document.txt', 'paragraph_embedding.pkl', chunking_strategy='paragraph')")
    
-    print("\n3. 使用自定义参数的段落级分块:")
-    print("embed_document('document.txt', 'custom_embedding.pkl',")
-    print("              chunking_strategy='paragraph',")
-    print("              max_chunk_size=1500,")
-    print("              overlap=200,")
-    print("              min_chunk_size=300)")
+    logger.debug("\n3. 使用自定义参数的段落级分块:")
+    logger.debug("embed_document('document.txt', 'custom_embedding.pkl',")
+    logger.debug("              chunking_strategy='paragraph',")
+    logger.debug("              max_chunk_size=1500,")
+    logger.debug("              overlap=200,")
+    logger.debug("              min_chunk_size=300)")
    
    

@ -737,10 +741,10 @@ def cache_terms_embeddings(bot_id: str, terms_list: List[Dict[str, Any]]) -> Dic
            # 验证缓存数据是否匹配当前的terms
            current_hash = _generate_terms_hash(terms_list)
            if cached_data.get('hash') == current_hash:
-                print(f"Using cached terms embeddings for {cache_key}")
+                logger.info(f"Using cached terms embeddings for {cache_key}")
                return cached_data
        except Exception as e:
-            print(f"Error loading cache: {e}")
+            logger.error(f"Error loading cache: {e}")

    # 准备要编码的文本
    term_texts = []
@ -793,11 +797,11 @@ def cache_terms_embeddings(bot_id: str, terms_list: List[Dict[str, Any]]) -> Dic
        with open(cache_file, 'wb') as f:
            pickle.dump(cache_data, f)

-        print(f"Cached {len(term_texts)} terms embeddings to {cache_file}")
+        logger.info(f"Cached {len(term_texts)} terms embeddings to {cache_file}")
        return cache_data

    except Exception as e:
-        print(f"Error generating terms embeddings: {e}")
+        logger.error(f"Error generating terms embeddings: {e}")
        return {}


@ -826,14 +830,14 @@ def search_similar_terms(query_text: str, cached_terms_data: Dict[str, Any]) ->
        term_info = cached_terms_data['term_info']

        # 添加调试信息
-        print(f"DEBUG: Query text: '{query_text}'")
-        print(f"DEBUG: Query vector shape: {query_vector.shape}, norm: {np.linalg.norm(query_vector)}")
+        logger.debug(f"DEBUG: Query text: '{query_text}'")
+        logger.debug(f"DEBUG: Query vector shape: {query_vector.shape}, norm: {np.linalg.norm(query_vector)}")

        # 计算cos相似度
        similarities = _cosine_similarity(query_vector, term_embeddings)

-        print(f"DEBUG: Similarities: {similarities}")
-        print(f"DEBUG: Max similarity: {np.max(similarities):.3f}, Mean similarity: {np.mean(similarities):.3f}")
+        logger.debug(f"DEBUG: Similarities: {similarities}")
+        logger.debug(f"DEBUG: Max similarity: {np.max(similarities):.3f}, Mean similarity: {np.mean(similarities):.3f}")

        # 获取所有terms的相似度
        matches = []
@ -852,7 +856,7 @@ def search_similar_terms(query_text: str, cached_terms_data: Dict[str, Any]) ->
        return matches[:5]

    except Exception as e:
-        print(f"Error in similarity search: {e}")
+        logger.error(f"Error in similarity search: {e}")
        return []


--- a/embedding/manager.py
+++ b/embedding/manager.py
@ -18,8 +18,9 @@ import psutil
 import numpy as np

 from sentence_transformers import SentenceTransformer
+import logging

-logger = logging.getLogger(__name__)
+logger = logging.getLogger('app')


 class GlobalModelManager:
--- a/fastapi_app.py
+++ b/fastapi_app.py
@ -10,7 +10,11 @@ from fastapi import FastAPI
 from fastapi.staticfiles import StaticFiles
 from fastapi.middleware.cors import CORSMiddleware
 from routes.file_manager import router as file_manager_router
-from utils.logger import logger
+import logging
+
+
+from utils.log_util.logger import init_with_fastapi
+

 # Import route modules
 from routes import chat, files, projects, system
@ -20,6 +24,10 @@ from routes.system import agent_manager, connection_pool, file_cache

 app = FastAPI(title="Database Assistant API", version="1.0.0")

+init_with_fastapi(app)
+
+logger = logging.getLogger('app')
+
 # 挂载public文件夹为静态文件服务
 app.mount("/public", StaticFiles(directory="public"), name="static")

@ -47,7 +55,7 @@ app.include_router(file_manager_router)

 if __name__ == "__main__":
    # 启动 FastAPI 应用
-    print("Starting FastAPI server...")
-    print("File Manager API available at: http://localhost:8001/api/v1/files")
-    print("Web Interface available at: http://localhost:8001/public/file-manager.html")
+    logger.info("Starting FastAPI server...")
+    logger.info("File Manager API available at: http://localhost:8001/api/v1/files")
+    logger.info("Web Interface available at: http://localhost:8001/public/file-manager.html")
    uvicorn.run(app, host="0.0.0.0", port=8001)
--- a/utils/logger.py
+++ b/utils/logger.py
--- a/routes/chat.py
+++ b/routes/chat.py
@ -5,7 +5,9 @@ from typing import Union, Optional
 from fastapi import APIRouter, HTTPException, Header
 from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
+import logging

+logger = logging.getLogger('app')
 from utils import (
    Message, ChatRequest, ChatResponse
 )
@ -79,6 +81,7 @@ async def generate_stream_response(agent, messages, thought_list, tool_response:
            }
            yield f"data: {json.dumps(chunk_data, ensure_ascii=False)}\n\n"

+        logger.info(f"开始生成Agent流式响应, model: {model}")
        for response in agent.run(messages=messages):
            previous_content = accumulated_content
            accumulated_content = get_content_from_messages(response, tool_response=tool_response)
@ -92,6 +95,8 @@ async def generate_stream_response(agent, messages, thought_list, tool_response:
            
            # 只有当有新内容时才发送chunk
            if new_content:
+                if chunk_id == 0:
+                    logger.info(f"Agent首个Token已生成, 开始流式输出")
                chunk_id += 1
                # 构造OpenAI格式的流式响应
                chunk_data = {
@ -126,10 +131,11 @@ async def generate_stream_response(agent, messages, thought_list, tool_response:
        
        # 发送结束标记
        yield "data: [DONE]\n\n"
+        logger.info(f"Agent流式响应完成, 总共生成 {chunk_id} 个chunks")
    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
-        from utils.logger import logger
+
        logger.error(f"Error in generate_stream_response: {str(e)}")
        logger.error(f"Full traceback: {error_details}")
        
@ -168,7 +174,7 @@ async def create_agent_and_generate_response(
    # 2. 如果有terms内容，先进行embedding（embedding需要缓存起来，这个可以tmp文件缓存，以{bot_id}_terms作为key）embedding实现情参考 @embedding/embedding.py 文件,可以在里面实现。拿到embedding后，可以进行相似性检索，检索方式先使用cos相似度，找到阈值相似性>0.7的匹配项，重新整理为terms_analysis，格式：1) Name: term_name1, Description: desc, Synonyms: syn1, syn2。
    terms_analysis = ""
    if terms_list:
-        print(f"terms_list: {terms_list}")
+        logger.info(f"terms_list: {terms_list}")
        # 从messages中提取用户的查询文本用于相似性检索
        query_text = get_user_last_message_content(messages)
        # 使用embedding进行terms处理
@ -178,9 +184,9 @@ async def create_agent_and_generate_response(
            if terms_analysis:
                # 将terms分析结果也添加到消息中
                system_prompt = system_prompt.replace("{terms}", terms_analysis)
-                print(f"Generated terms analysis: {terms_analysis[:200]}...")  # 只打印前200个字符
+                logger.info(f"Generated terms analysis: {terms_analysis[:200]}...")  # 只打印前200个字符
        except Exception as e:
-            print(f"Error processing terms with embedding: {e}")
+            logger.error(f"Error processing terms with embedding: {e}")
            terms_analysis = ""
    else:
        # 当terms_list为空时，删除对应的pkl缓存文件
@ -189,14 +195,14 @@ async def create_agent_and_generate_response(
            cache_file = f"projects/cache/{bot_id}_terms.pkl"
            if os.path.exists(cache_file):
                os.remove(cache_file)
-                print(f"Removed empty terms cache file: {cache_file}")
+                logger.info(f"Removed empty terms cache file: {cache_file}")
        except Exception as e:
-            print(f"Error removing terms cache file: {e}")
+            logger.error(f"Error removing terms cache file: {e}")

    # 3. 如果有guideline内容，进行并发处理
    guideline_analysis = ""
    if guidelines_list:
-        print(f"guidelines_list: {guidelines_list}")
+        logger.info(f"guidelines_list: {guidelines_list}")
        guidelines_count = len(guidelines_list)

        if guidelines_count > 0:
@ -223,7 +229,7 @@ async def create_agent_and_generate_response(
                batches[-2].extend(batches[-1])
                batches.pop()

-            print(f"Processing {guidelines_count} guidelines in {len(batches)} batches with {batch_count} concurrent batches")
+            logger.info(f"Processing {guidelines_count} guidelines in {len(batches)} batches with {batch_count} concurrent batches")

            # 准备chat_history
            chat_history = format_messages_to_chat_history(messages)
@ -265,13 +271,13 @@ async def create_agent_and_generate_response(
            # 处理结果：最后一个结果是agent，前面的是guideline批次结果
            agent = all_results[-1]  # agent创建的结果
            batch_results = all_results[:-1]  # guideline批次的结果
-            print(f"batch_results:{batch_results}")
+            logger.info(f"batch_results:{batch_results}")

            # 合并guideline分析结果，使用JSON格式的checks数组
            all_checks = []
            for i, result in enumerate(batch_results):
                if isinstance(result, Exception):
-                    print(f"Guideline batch {i} failed: {result}")
+                    logger.error(f"Guideline batch {i} failed: {result}")
                    continue
                if result and isinstance(result, dict) and 'checks' in result:
                    # 如果是JSON对象且包含checks数组，只保留applies为true的checks
@ -279,13 +285,13 @@ async def create_agent_and_generate_response(
                    all_checks.extend(applicable_checks)
                elif result and isinstance(result, str) and result.strip():
                    # 如果是普通文本，保留原有逻辑
-                    print(f"Non-JSON result from batch {i}: {result}")
+                    logger.info(f"Non-JSON result from batch {i}: {result}")

            if all_checks:
                # 将checks数组格式化为JSON字符串
                guideline_analysis = "\n".join([item["rationale"] for item in all_checks])
                # guideline_analysis = json.dumps({"checks": all_checks}, ensure_ascii=False)
-                print(f"Merged guideline analysis result: {guideline_analysis}")
+                logger.info(f"Merged guideline analysis result: {guideline_analysis}")

                # 将分析结果添加到最后一个消息的内容中
                if guideline_analysis:
@ -430,8 +436,8 @@ async def chat_completions(request: ChatRequest, authorization: Optional[str] =
    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
-        print(f"Error in chat_completions: {str(e)}")
-        print(f"Full traceback: {error_details}")
+        logger.error(f"Error in chat_completions: {str(e)}")
+        logger.error(f"Full traceback: {error_details}")
        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")


@ -526,6 +532,6 @@ async def chat_completions_v2(request: ChatRequestV2, authorization: Optional[st
    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
-        print(f"Error in chat_completions_v2: {str(e)}")
-        print(f"Full traceback: {error_details}")
+        logger.error(f"Error in chat_completions_v2: {str(e)}")
+        logger.error(f"Full traceback: {error_details}")
        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
--- a/routes/file_manager.py
+++ b/routes/file_manager.py
@ -13,6 +13,9 @@ from fastapi.responses import FileResponse, StreamingResponse
 import mimetypes
 import json
 import zipfile
+import logging
+
+logger = logging.getLogger('app')
 import tempfile
 import io
 import math
@ -485,7 +488,7 @@ async def download_folder_as_zip(request: Dict[str, str]):
                        zipf.write(file_path, arcname)
                    except (OSError, IOError) as e:
                        # 跳过无法读取的文件，但记录警告
-                        print(f"Warning: Skipping file {file_path}: {e}")
+                        logger.warning(f"Warning: Skipping file {file_path}: {e}")
                        continue
        
        zip_buffer.seek(0)
@ -586,7 +589,7 @@ async def download_multiple_items_as_zip(request: Dict[str, Any]):
                    try:
                        zipf.write(target_path, target_path.name)
                    except (OSError, IOError) as e:
-                        print(f"Warning: Skipping file {target_path}: {e}")
+                        logger.warning(f"Warning: Skipping file {target_path}: {e}")
                        continue
                elif target_path.is_dir():
                    # 文件夹
@ -597,7 +600,7 @@ async def download_multiple_items_as_zip(request: Dict[str, Any]):
                                arcname = f"{target_path.name}/{file_path.relative_to(target_path)}"
                                zipf.write(file_path, arcname)
                            except (OSError, IOError) as e:
-                                print(f"Warning: Skipping file {file_path}: {e}")
+                                logger.warning(f"Warning: Skipping file {file_path}: {e}")
                                continue
        
        zip_buffer.seek(0)
--- a/routes/files.py
+++ b/routes/files.py
@ -5,6 +5,9 @@ from datetime import datetime
 from typing import Optional, List
 from fastapi import APIRouter, HTTPException, Header, UploadFile, File, Form
 from pydantic import BaseModel
+import logging
+
+logger = logging.getLogger('app')

 from utils import (
    DatasetRequest, QueueTaskRequest, IncrementalTaskRequest, QueueTaskResponse,
@ -83,7 +86,7 @@ async def process_files_async_endpoint(request: QueueTaskRequest, authorization:
    except HTTPException:
        raise
    except Exception as e:
-        print(f"Error submitting async file processing task: {str(e)}")
+        logger.error(f"Error submitting async file processing task: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")


@ -146,7 +149,7 @@ async def process_files_incremental_endpoint(request: IncrementalTaskRequest, au
    except HTTPException:
        raise
    except Exception as e:
-        print(f"Error submitting incremental file processing task: {str(e)}")
+        logger.error(f"Error submitting incremental file processing task: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")


@ -260,7 +263,7 @@ async def cleanup_project_async_endpoint(dataset_id: str, remove_all: bool = Fal
            "action": "remove_all" if remove_all else "cleanup_logs"
        }
    except Exception as e:
-        print(f"Error submitting cleanup task: {str(e)}")
+        logger.error(f"Error submitting cleanup task: {str(e)}")
        raise HTTPException(status_code=500, detail=f"提交清理任务失败: {str(e)}")


@ -281,8 +284,8 @@ async def upload_file(file: UploadFile = File(...), folder: Optional[str] = Form
    """
    try:
        # 调试信息
-        print(f"Received folder parameter: {folder}")
-        print(f"File received: {file.filename if file else 'None'}")
+        logger.info(f"Received folder parameter: {folder}")
+        logger.info(f"File received: {file.filename if file else 'None'}")
        
        # 确定上传文件夹
        if folder:
@ -345,7 +348,7 @@ async def upload_file(file: UploadFile = File(...), folder: Optional[str] = Form
    except HTTPException:
        raise
    except Exception as e:
-        print(f"Error uploading file: {str(e)}")
+        logger.error(f"Error uploading file: {str(e)}")
        raise HTTPException(status_code=500, detail=f"文件上传失败: {str(e)}")


@ -377,7 +380,7 @@ async def get_task_status(task_id: str):
        }
        
    except Exception as e:
-        print(f"Error getting task status: {str(e)}")
+        logger.error(f"Error getting task status: {str(e)}")
        raise HTTPException(status_code=500, detail=f"获取任务状态失败: {str(e)}")


@ -399,7 +402,7 @@ async def delete_task(task_id: str):
                "task_id": task_id
            }
    except Exception as e:
-        print(f"Error deleting task: {str(e)}")
+        logger.error(f"Error deleting task: {str(e)}")
        raise HTTPException(status_code=500, detail=f"删除任务记录失败: {str(e)}")


@ -428,7 +431,7 @@ async def list_tasks(status: Optional[str] = None, dataset_id: Optional[str] = N
        }
        
    except Exception as e:
-        print(f"Error listing tasks: {str(e)}")
+        logger.error(f"Error listing tasks: {str(e)}")
        raise HTTPException(status_code=500, detail=f"获取任务列表失败: {str(e)}")


@ -445,7 +448,7 @@ async def get_task_statistics():
        }
        
    except Exception as e:
-        print(f"Error getting statistics: {str(e)}")
+        logger.error(f"Error getting statistics: {str(e)}")
        raise HTTPException(status_code=500, detail=f"获取统计信息失败: {str(e)}")


@ -463,5 +466,5 @@ async def cleanup_tasks(older_than_days: int = 7):
        }
        
    except Exception as e:
-        print(f"Error cleaning up tasks: {str(e)}")
+        logger.error(f"Error cleaning up tasks: {str(e)}")
        raise HTTPException(status_code=500, detail=f"清理任务记录失败: {str(e)}")
--- a/routes/projects.py
+++ b/routes/projects.py
@ -2,6 +2,9 @@ import os
 import json
 from typing import Optional
 from fastapi import APIRouter, HTTPException
+import logging
+
+logger = logging.getLogger('app')

 from task_queue.task_status import task_status_store

@ -45,7 +48,7 @@ async def list_all_projects():
                            "updated_at": os.path.getmtime(item_path)
                        })
                    except Exception as e:
-                        print(f"Error reading robot project {item}: {str(e)}")
+                        logger.error(f"Error reading robot project {item}: {str(e)}")
                        robot_projects.append({
                            "id": item,
                            "name": item,
@ -95,7 +98,7 @@ async def list_all_projects():
                            "updated_at": os.path.getmtime(item_path)
                        })
                    except Exception as e:
-                        print(f"Error reading dataset {item}: {str(e)}")
+                        logger.error(f"Error reading dataset {item}: {str(e)}")
                        datasets.append({
                            "id": item,
                            "name": f"数据集 - {item[:8]}...",
@ -118,7 +121,7 @@ async def list_all_projects():
        }
        
    except Exception as e:
-        print(f"Error listing projects: {str(e)}")
+        logger.error(f"Error listing projects: {str(e)}")
        raise HTTPException(status_code=500, detail=f"获取项目列表失败: {str(e)}")


@ -134,7 +137,7 @@ async def list_robot_projects():
            "projects": response["robot_projects"]
        }
    except Exception as e:
-        print(f"Error listing robot projects: {str(e)}")
+        logger.error(f"Error listing robot projects: {str(e)}")
        raise HTTPException(status_code=500, detail=f"获取机器人项目列表失败: {str(e)}")


@ -150,7 +153,7 @@ async def list_datasets():
            "projects": response["datasets"]
        }
    except Exception as e:
-        print(f"Error listing datasets: {str(e)}")
+        logger.error(f"Error listing datasets: {str(e)}")
        raise HTTPException(status_code=500, detail=f"获取数据集列表失败: {str(e)}")


@ -169,5 +172,5 @@ async def get_project_tasks(dataset_id: str):
        }
        
    except Exception as e:
-        print(f"Error getting project tasks: {str(e)}")
+        logger.error(f"Error getting project tasks: {str(e)}")
        raise HTTPException(status_code=500, detail=f"获取项目任务失败: {str(e)}")
--- a/routes/system.py
+++ b/routes/system.py
@ -19,6 +19,9 @@ except ImportError:
 from utils.fastapi_utils import get_content_from_messages
 from embedding import get_model_manager
 from pydantic import BaseModel
+import logging
+
+logger = logging.getLogger('app')

 router = APIRouter()

@ -38,7 +41,7 @@ class EncodeResponse(BaseModel):


 # 系统优化设置初始化
-print("正在初始化系统优化...")
+logger.info("正在初始化系统优化...")
 system_optimizer = setup_system_optimizations()

 # 全局助手管理器配置（使用优化后的配置）
@ -66,10 +69,10 @@ file_cache = init_global_file_cache(
    ttl=int(os.getenv("FILE_CACHE_TTL", "300"))
 )

-print("系统优化初始化完成")
-print(f"- 分片Agent管理器: {shard_count} 个分片，最多缓存 {max_cached_agents} 个agent")
-print(f"- 连接池: 每主机100连接，总计500连接")
-print(f"- 文件缓存: 1000个文件，TTL 300秒")
+logger.info("系统优化初始化完成")
+logger.info(f"- 分片Agent管理器: {shard_count} 个分片，最多缓存 {max_cached_agents} 个agent")
+logger.info(f"- 连接池: 每主机100连接，总计500连接")
+logger.info(f"- 文件缓存: 1000个文件，TTL 300秒")


@router.get("/api/health")
@ -128,7 +131,7 @@ async def get_performance_stats():
        }
        
    except Exception as e:
-        print(f"Error getting performance stats: {str(e)}")
+        logger.error(f"Error getting performance stats: {str(e)}")
        raise HTTPException(status_code=500, detail=f"获取性能统计失败: {str(e)}")


@ -146,7 +149,7 @@ async def optimize_system(profile: str = "balanced"):
        }
        
    except Exception as e:
-        print(f"Error applying optimization profile: {str(e)}")
+        logger.error(f"Error applying optimization profile: {str(e)}")
        raise HTTPException(status_code=500, detail=f"应用优化配置失败: {str(e)}")


@ -175,7 +178,7 @@ async def clear_system_cache(cache_type: Optional[str] = None):
        }
        
    except Exception as e:
-        print(f"Error clearing cache: {str(e)}")
+        logger.error(f"Error clearing cache: {str(e)}")
        raise HTTPException(status_code=500, detail=f"清理缓存失败: {str(e)}")


@ -197,7 +200,7 @@ async def get_system_config():
        }
        
    except Exception as e:
-        print(f"Error getting system config: {str(e)}")
+        logger.error(f"Error getting system config: {str(e)}")
        raise HTTPException(status_code=500, detail=f"获取系统配置失败: {str(e)}")


@ -260,7 +263,7 @@ async def encode_texts(request: EncodeRequest):
        )

    except Exception as e:
-        from utils.logger import logger
+
        logger.error(f"文本编码 API 错误: {e}")
        return EncodeResponse(
            success=False,
@ -269,4 +272,4 @@ async def encode_texts(request: EncodeRequest):
            processing_time=0.0,
            total_texts=len(request.texts) if request else 0,
            error=f"编码失败: {str(e)}"
-        )
+        )
--- a/task_queue/config.py
+++ b/task_queue/config.py
@ -4,9 +4,13 @@ Queue configuration using SqliteHuey for asynchronous file processing.
 """

 import os
+import logging
 from huey import SqliteHuey
 from datetime import timedelta

+# 配置日志
+logger = logging.getLogger('app')
+
 # 确保projects/queue_data目录存在
 queue_data_dir = os.path.join(os.path.dirname(__file__), '..', 'projects', 'queue_data')
 os.makedirs(queue_data_dir, exist_ok=True)
@ -24,4 +28,4 @@ huey.store_errors = True  # 存储错误信息
 huey.max_retries = 3  # 最大重试次数
 huey.retry_delay = timedelta(seconds=60)  # 重试延迟

-print(f"SqliteHuey队列已初始化，数据库路径: {os.path.join(queue_data_dir, 'huey.db')}")
+logger.info(f"SqliteHuey队列已初始化，数据库路径: {os.path.join(queue_data_dir, 'huey.db')}")
--- a/task_queue/manager.py
+++ b/task_queue/manager.py
@ -6,11 +6,15 @@ Queue manager for handling file processing queues.
 import os
 import json
 import time
+import logging
 from typing import Dict, List, Optional, Any
 from huey import Huey
 from huey.api import Task
 from datetime import datetime, timedelta

+# 配置日志
+logger = logging.getLogger('app')
+
 from .config import huey
 from .tasks import process_file_async, process_multiple_files_async, process_zip_file_async, cleanup_processed_files

@ -20,7 +24,7 @@ class QueueManager:
    
    def __init__(self):
        self.huey = huey
-        print(f"队列管理器已初始化，使用数据库: {os.path.join(os.path.dirname(__file__), '..', 'projects', 'queue_data', 'huey.db')}")
+        logger.info(f"队列管理器已初始化，使用数据库: {os.path.join(os.path.dirname(__file__), '..', 'projects', 'queue_data', 'huey.db')}")
    
    def enqueue_file(
        self,
@ -49,7 +53,7 @@ class QueueManager:
        else:
            task = process_file_async(project_id, file_path, original_filename)
        
-        print(f"文件已加入队列: {file_path}, 任务ID: {task.id}")
+        logger.info(f"文件已加入队列: {file_path}, 任务ID: {task.id}")
        return task.id
    
    def enqueue_multiple_files(
@ -79,7 +83,7 @@ class QueueManager:
        else:
            task = process_multiple_files_async(project_id, file_paths, original_filenames)
        
-        print(f"批量文件已加入队列: {len(file_paths)} 个文件, 任务ID: {task.id}")
+        logger.info(f"批量文件已加入队列: {len(file_paths)} 个文件, 任务ID: {task.id}")
        return [task.id]
    
    def enqueue_zip_file(
@ -109,7 +113,7 @@ class QueueManager:
        else:
            task = process_zip_file_async(project_id, zip_path, extract_to)
        
-        print(f"zip文件已加入队列: {zip_path}, 任务ID: {task.id}")
+        logger.info(f"zip文件已加入队列: {zip_path}, 任务ID: {task.id}")
        return task.id
    
    def get_task_status(self, task_id: str) -> Dict[str, Any]:
@ -201,7 +205,7 @@ class QueueManager:
                stats["pending_tasks"] = len(pending_tasks)
                stats["total_tasks"] += len(pending_tasks)
            except Exception as e:
-                print(f"获取pending任务失败: {e}")
+                logger.error(f"获取pending任务失败: {e}")
            
            # 尝试获取定时任务数量
            try:
@ -209,7 +213,7 @@ class QueueManager:
                stats["scheduled_tasks"] = len(scheduled_tasks)
                stats["total_tasks"] += len(scheduled_tasks)
            except Exception as e:
-                print(f"获取scheduled任务失败: {e}")
+                logger.error(f"获取scheduled任务失败: {e}")
            
            return stats
            
@ -237,7 +241,7 @@ class QueueManager:
            else:
                return False
        except Exception as e:
-            print(f"取消任务失败: {str(e)}")
+            logger.error(f"取消任务失败: {str(e)}")
            return False
    
    def cleanup_old_tasks(self, older_than_days: int = 7) -> Dict[str, Any]:
@ -292,7 +296,7 @@ class QueueManager:
        else:
            task = cleanup_processed_files(project_id, older_than_days)
        
-        print(f"清理任务已加入队列: 项目 {project_id}, 任务ID: {task.id}")
+        logger.info(f"清理任务已加入队列: 项目 {project_id}, 任务ID: {task.id}")
        return task.id
    
    def list_pending_tasks(self, limit: int = 50) -> List[Dict[str, Any]]:
@ -318,7 +322,7 @@ class QueueManager:
                            "status": "pending",
                        })
            except Exception as e:
-                print(f"获取pending任务失败: {e}")
+                logger.error(f"获取pending任务失败: {e}")
            
            # 获取scheduled任务
            try:
@ -330,12 +334,12 @@ class QueueManager:
                            "status": "scheduled",
                        })
            except Exception as e:
-                print(f"获取scheduled任务失败: {e}")
+                logger.error(f"获取scheduled任务失败: {e}")
            
            return pending_tasks
            
        except Exception as e:
-            print(f"获取待处理任务失败: {str(e)}")
+            logger.error(f"获取待处理任务失败: {str(e)}")
            return []
    
    def get_task_result(self, task_id: str) -> Optional[Any]:
@ -354,7 +358,7 @@ class QueueManager:
                return task.get()
            return None
        except Exception as e:
-            print(f"获取任务结果失败: {str(e)}")
+            logger.error(f"获取任务结果失败: {str(e)}")
            return None


--- a/task_queue/optimized_consumer.py
+++ b/task_queue/optimized_consumer.py
@ -9,10 +9,14 @@ import time
 import signal
 import argparse
 import multiprocessing
+import logging
 from pathlib import Path
 from concurrent.futures import ThreadPoolExecutor
 import threading

+# 配置日志
+logger = logging.getLogger('app')
+
 # 添加项目根目录到Python路径
 project_root = Path(__file__).parent.parent
 sys.path.insert(0, str(project_root))
@ -50,7 +54,7 @@ class OptimizedQueueConsumer:
    
    def _signal_handler(self, signum, frame):
        """信号处理器，用于优雅关闭"""
-        print(f"\n收到信号 {signum}，正在关闭队列消费者...")
+        logger.info(f"\n收到信号 {signum}，正在关闭队列消费者...")
        self.running = False
        if self.consumer:
            self.consumer.stop()
@ -80,9 +84,9 @@ class OptimizedQueueConsumer:
            if hasattr(huey, 'always_eager'):
                huey.always_eager = False
        
-        print(f"队列消费者优化设置完成:")
-        print(f"- 工作类型: {self.worker_type}")
-        print(f"- 工作线程数: {self.workers}")
+        logger.info(f"队列消费者优化设置完成:")
+        logger.info(f"- 工作类型: {self.worker_type}")
+        logger.info(f"- 工作线程数: {self.workers}")
    
    def monitor_performance(self):
        """性能监控线程"""
@ -93,27 +97,26 @@ class OptimizedQueueConsumer:
                elapsed = time.time() - self.start_time
                rate = self.performance_stats['tasks_processed'] / max(1, elapsed)
                
-                print(f"\n[性能统计]")
-                print(f"- 运行时间: {elapsed:.1f}秒")
-                print(f"- 已处理任务: {self.performance_stats['tasks_processed']}")
-                print(f"- 失败任务: {self.performance_stats['tasks_failed']}")
-                print(f"- 平均处理速率: {rate:.2f} 任务/秒")
+                logger.info(f"\n[性能统计]")
+                logger.info(f"- 运行时间: {elapsed:.1f}秒")
+                logger.info(f"- 已处理任务: {self.performance_stats['tasks_processed']}")
+                logger.info(f"- 失败任务: {self.performance_stats['tasks_failed']}")
+                logger.info(f"- 平均处理速率: {rate:.2f} 任务/秒")
                
                if self.performance_stats['avg_processing_time'] > 0:
-                    print(f"- 平均处理时间: {self.performance_stats['avg_processing_time']:.2f}秒")
+                    logger.info(f"- 平均处理时间: {self.performance_stats['avg_processing_time']:.2f}秒")
    
    def start(self):
        """启动队列消费者"""
-        print("=" * 60)
-        print("优化的队列消费者启动")
-        print("=" * 60)
+        logger.info("=" * 60)
+        logger.info("优化的队列消费者启动")
+        logger.info("=" * 60)
        
        # 设置优化
        self.setup_optimizations()
        
-        print(f"数据库: {os.path.join(os.path.dirname(__file__), '..', 'projects', 'queue_data', 'huey.db')}")
-        print("按 Ctrl+C 停止消费者")
-        print()
+        logger.info(f"数据库: {os.path.join(os.path.dirname(__file__), '..', 'projects', 'queue_data', 'huey.db')}")
+        logger.info("按 Ctrl+C 停止消费者")
        
        self.running = True
        self.start_time = time.time()
@ -134,15 +137,15 @@ class OptimizedQueueConsumer:
                periodic=True,    # 启用定期任务
            )
            
-            print("队列消费者已启动，等待任务...")
+            logger.info("队列消费者已启动，等待任务...")
            
            # 启动消费者
            self.consumer.run()
            
        except KeyboardInterrupt:
-            print("\n收到键盘中断信号")
+            logger.info("\n收到键盘中断信号")
        except Exception as e:
-            print(f"队列消费者运行错误: {e}")
+            logger.error(f"队列消费者运行错误: {e}")
            import traceback
            traceback.print_exc()
        finally:
@ -150,27 +153,27 @@ class OptimizedQueueConsumer:
    
    def shutdown(self):
        """关闭队列消费者"""
-        print("\n正在关闭队列消费者...")
+        logger.info("\n正在关闭队列消费者...")
        self.running = False
        
        if self.consumer:
            try:
                self.consumer.stop()
-                print("队列消费者已停止")
+                logger.info("队列消费者已停止")
            except Exception as e:
-                print(f"停止队列消费者时出错: {e}")
+                logger.error(f"停止队列消费者时出错: {e}")
        
        # 输出最终统计
        if self.start_time:
            elapsed = time.time() - self.start_time
-            print(f"\n[最终统计]")
-            print(f"- 总运行时间: {elapsed:.1f}秒")
-            print(f"- 总处理任务: {self.performance_stats['tasks_processed']}")
-            print(f"- 总失败任务: {self.performance_stats['tasks_failed']}")
+            logger.info(f"\n[最终统计]")
+            logger.info(f"- 总运行时间: {elapsed:.1f}秒")
+            logger.info(f"- 总处理任务: {self.performance_stats['tasks_processed']}")
+            logger.info(f"- 总失败任务: {self.performance_stats['tasks_failed']}")
            
            if self.performance_stats['tasks_processed'] > 0:
                rate = self.performance_stats['tasks_processed'] / elapsed
-                print(f"- 平均处理速率: {rate:.2f} 任务/秒")
+                logger.info(f"- 平均处理速率: {rate:.2f} 任务/秒")


 def calculate_optimal_workers():
@ -191,25 +194,25 @@ def check_queue_status():
    try:
        stats = queue_manager.get_queue_stats()
        
-        print("\n[队列状态]")
+        logger.info("\n[队列状态]")
        if isinstance(stats, dict):
            if 'total_tasks' in stats:
-                print(f"- 总任务数: {stats['total_tasks']}")
+                logger.info(f"- 总任务数: {stats['total_tasks']}")
            if 'pending_tasks' in stats:
-                print(f"- 待处理任务: {stats['pending_tasks']}")
+                logger.info(f"- 待处理任务: {stats['pending_tasks']}")
            if 'scheduled_tasks' in stats:
-                print(f"- 定时任务: {stats['scheduled_tasks']}")
+                logger.info(f"- 定时任务: {stats['scheduled_tasks']}")
            
            # 检查数据库文件
            db_path = os.path.join(os.path.dirname(__file__), '..', 'projects', 'queue_data', 'huey.db')
            if os.path.exists(db_path):
                size = os.path.getsize(db_path)
-                print(f"- 数据库大小: {size} 字节")
+                logger.info(f"- 数据库大小: {size} 字节")
            else:
-                print("- 数据库文件: 不存在")
+                logger.info("- 数据库文件: 不存在")
        
    except Exception as e:
-        print(f"获取队列状态失败: {e}")
+        logger.error(f"获取队列状态失败: {e}")


 def main():
@ -248,11 +251,11 @@ def main():
        os.environ['PYTHONOPTIMIZE'] = '1'
        if args.workers > 2:
            args.workers = 2
-            print(f"低内存模式: 调整工作线程数为 {args.workers}")
+            logger.info(f"低内存模式: 调整工作线程数为 {args.workers}")
    elif args.profile == "high_performance":
        if args.workers < 4:
            args.workers = 4
-            print(f"高性能模式: 调整工作线程数为 {args.workers}")
+            logger.info(f"高性能模式: 调整工作线程数为 {args.workers}")
    
    # 检查队列状态
    if args.check_status:
@ -263,12 +266,12 @@ def main():
    try:
        import psutil
        memory = psutil.virtual_memory()
-        print(f"[系统信息]")
-        print(f"- CPU核心数: {multiprocessing.cpu_count()}")
-        print(f"- 可用内存: {memory.available / (1024**3):.1f}GB")
-        print(f"- 内存使用率: {memory.percent:.1f}%")
+        logger.info(f"[系统信息]")
+        logger.info(f"- CPU核心数: {multiprocessing.cpu_count()}")
+        logger.info(f"- 可用内存: {memory.available / (1024**3):.1f}GB")
+        logger.info(f"- 内存使用率: {memory.percent:.1f}%")
    except ImportError:
-        print("[提示] 安装 psutil 可显示系统信息: pip install psutil")
+        logger.info("[提示] 安装 psutil 可显示系统信息: pip install psutil")
    
    # 创建并启动队列消费者
    consumer = OptimizedQueueConsumer(
--- a/task_queue/tasks.py
+++ b/task_queue/tasks.py
@ -7,10 +7,14 @@ import os
 import json
 import time
 import shutil
+import logging
 from pathlib import Path
 from typing import Dict, List, Optional, Any
 from huey import crontab

+# 配置日志
+logger = logging.getLogger('app')
+
 from .config import huey
 from utils.file_utils import (
    extract_zip_file,
@ -42,7 +46,7 @@ def process_file_async(
        处理结果字典
    """
    try:
-        print(f"开始处理文件: {file_path}")
+        logger.info(f"开始处理文件: {file_path}")
        
        # 确保项目目录存在
        project_dir = os.path.join("projects", project_id)
@ -55,7 +59,7 @@ def process_file_async(
        # 检查文件是否已处理
        processed_log = load_processed_files_log(project_id)
        if file_hash in processed_log:
-            print(f"文件已处理，跳过: {file_path}")
+            logger.info(f"文件已处理，跳过: {file_path}")
            return {
                "status": "skipped",
                "message": "文件已处理",
@ -84,12 +88,12 @@ def process_file_async(
        result["file_hash"] = file_hash
        result["project_id"] = project_id
        
-        print(f"文件处理完成: {file_path}, 状态: {result['status']}")
+        logger.info(f"文件处理完成: {file_path}, 状态: {result['status']}")
        return result
        
    except Exception as e:
        error_msg = f"处理文件时发生错误: {str(e)}"
-        print(error_msg)
+        logger.error(error_msg)
        return {
            "status": "error",
            "message": error_msg,
@ -116,7 +120,7 @@ def process_multiple_files_async(
        处理结果列表
    """
    try:
-        print(f"开始批量处理 {len(file_paths)} 个文件")
+        logger.info(f"开始批量处理 {len(file_paths)} 个文件")
        
        results = []
        for i, file_path in enumerate(file_paths):
@ -126,12 +130,12 @@ def process_multiple_files_async(
            result = process_file_async(project_id, file_path, original_filename)
            results.append(result)
        
-        print(f"批量文件处理任务已提交，共 {len(results)} 个文件")
+        logger.info(f"批量文件处理任务已提交，共 {len(results)} 个文件")
        return results
        
    except Exception as e:
        error_msg = f"批量处理文件时发生错误: {str(e)}"
-        print(error_msg)
+        logger.error(error_msg)
        return [{
            "status": "error",
            "message": error_msg,
@ -157,7 +161,7 @@ def process_zip_file_async(
        处理结果字典
    """
    try:
-        print(f"开始处理zip文件: {zip_path}")
+        logger.info(f"开始处理zip文件: {zip_path}")
        
        # 设置解压目录
        if extract_to is None:
@ -191,7 +195,7 @@ def process_zip_file_async(
        
    except Exception as e:
        error_msg = f"处理zip文件时发生错误: {str(e)}"
-        print(error_msg)
+        logger.error(error_msg)
        return {
            "status": "error",
            "message": error_msg,
@ -216,7 +220,7 @@ def cleanup_processed_files(
        清理结果字典
    """
    try:
-        print(f"开始清理项目 {project_id} 中 {older_than_days} 天前的文件")
+        logger.info(f"开始清理项目 {project_id} 中 {older_than_days} 天前的文件")
        
        project_dir = os.path.join("projects", project_id)
        if not os.path.exists(project_dir):
@ -240,9 +244,9 @@ def cleanup_processed_files(
                    try:
                        os.remove(file_path)
                        cleaned_files.append(file_path)
-                        print(f"已删除旧文件: {file_path}")
+                        logger.info(f"已删除旧文件: {file_path}")
                    except Exception as e:
-                        print(f"删除文件失败 {file_path}: {str(e)}")
+                        logger.error(f"删除文件失败 {file_path}: {str(e)}")
        
        # 清理空目录
        for root, dirs, files in os.walk(project_dir, topdown=False):
@ -251,9 +255,9 @@ def cleanup_processed_files(
                try:
                    if not os.listdir(dir_path):
                        os.rmdir(dir_path)
-                        print(f"已删除空目录: {dir_path}")
+                        logger.info(f"已删除空目录: {dir_path}")
                except Exception as e:
-                    print(f"删除目录失败 {dir_path}: {str(e)}")
+                    logger.error(f"删除目录失败 {dir_path}: {str(e)}")
        
        return {
            "status": "success",
@ -265,7 +269,7 @@ def cleanup_processed_files(
        
    except Exception as e:
        error_msg = f"清理文件时发生错误: {str(e)}"
-        print(error_msg)
+        logger.error(error_msg)
        return {
            "status": "error",
            "message": error_msg,
@ -351,6 +355,6 @@ def _process_single_file(
@huey.periodic_task(crontab(hour=2, minute=0))
 def daily_cleanup():
    """每日清理任务"""
-    print("执行每日清理任务")
+    logger.info("执行每日清理任务")
    # 这里可以添加清理逻辑
    return {"status": "completed", "message": "每日清理任务完成"}
--- a/utils/agent_pool.py
+++ b/utils/agent_pool.py
@ -2,7 +2,7 @@ import asyncio
 from typing import List, Optional
 import logging

-logger = logging.getLogger(__name__)
+logger = logging.getLogger('app')


 class AgentPool:
@ -175,4 +175,4 @@ async def release_agent_to_pool(agent):
    if _global_agent_pool is None:
        raise RuntimeError("全局助手实例池未初始化")
    
-    await _global_agent_pool.release_agent(agent)
+    await _global_agent_pool.release_agent(agent)
--- a/utils/async_file_ops.py
+++ b/utils/async_file_ops.py
@ -8,6 +8,7 @@ import json
 import asyncio
 import aiofiles
 import aiofiles.os
+import logging
 from typing import Dict, List, Optional, Any
 from pathlib import Path
 import weakref
@ -15,6 +16,9 @@ import threading
 import time
 from concurrent.futures import ThreadPoolExecutor

+# Configure logger
+logger = logging.getLogger('app')
+

 class AsyncFileCache:
    """异步文件缓存管理器"""
@ -70,7 +74,7 @@ class AsyncFileCache:
                return content
                
            except Exception as e:
-                print(f"Error reading file {abs_path}: {e}")
+                logger.error(f"Error reading file {abs_path}: {e}")
                return ""
    
    def _read_text_file(self, file_path: str, encoding: str) -> str:
@ -90,7 +94,7 @@ class AsyncFileCache:
        try:
            return json.loads(content)
        except json.JSONDecodeError as e:
-            print(f"Error parsing JSON from {file_path}: {e}")
+            logger.error(f"Error parsing JSON from {file_path}: {e}")
            return {}
    
    async def write_file(self, file_path: str, content: str, encoding: str = 'utf-8'):
@ -244,7 +248,7 @@ class ParallelFileReader:
                content = await task
                results[file_path] = content
            except Exception as e:
-                print(f"Error reading {file_path}: {e}")
+                logger.error(f"Error reading {file_path}: {e}")
                results[file_path] = ""
        
        return results
@ -269,7 +273,7 @@ class ParallelFileReader:
                try:
                    results[file_path] = json.loads(content)
                except json.JSONDecodeError as e:
-                    print(f"Error parsing JSON from {file_path}: {e}")
+                    logger.error(f"Error parsing JSON from {file_path}: {e}")
                    results[file_path] = {}
            else:
                results[file_path] = {}
--- a/utils/data_merger.py
+++ b/utils/data_merger.py
@ -5,15 +5,19 @@ Data merging functions for combining processed file results.

 import os
 import pickle
+import logging
 from typing import Dict, List, Optional, Tuple
 import json

+# Configure logger
+logger = logging.getLogger('app')
+
 # Try to import numpy, but handle if missing
 try:
    import numpy as np
    NUMPY_SUPPORT = True
 except ImportError:
-    print("NumPy not available, some embedding features may be limited")
+    logger.warning("NumPy not available, some embedding features may be limited")
    NUMPY_SUPPORT = False


@ -66,7 +70,7 @@ def merge_documents_by_group(unique_id: str, group_name: str) -> Dict:
                    result["source_files"].append(filename_stem)
                    
            except Exception as e:
-                print(f"Error reading document file {document_path}: {str(e)}")
+                logger.error(f"Error reading document file {document_path}: {str(e)}")
                continue
        
        if merged_content:
@ -83,7 +87,7 @@ def merge_documents_by_group(unique_id: str, group_name: str) -> Dict:
            
    except Exception as e:
        result["error"] = f"Document merging failed: {str(e)}"
-        print(f"Error merging documents for group {group_name}: {str(e)}")
+        logger.error(f"Error merging documents for group {group_name}: {str(e)}")
    
    return result

@ -136,7 +140,7 @@ def merge_paginations_by_group(unique_id: str, group_name: str) -> Dict:
                result["source_files"].append(filename_stem)
                
            except Exception as e:
-                print(f"Error reading pagination file {pagination_path}: {str(e)}")
+                logger.error(f"Error reading pagination file {pagination_path}: {str(e)}")
                continue
        
        if merged_lines:
@ -153,7 +157,7 @@ def merge_paginations_by_group(unique_id: str, group_name: str) -> Dict:
            
    except Exception as e:
        result["error"] = f"Pagination merging failed: {str(e)}"
-        print(f"Error merging paginations for group {group_name}: {str(e)}")
+        logger.error(f"Error merging paginations for group {group_name}: {str(e)}")
    
    return result

@ -236,7 +240,7 @@ def merge_embeddings_by_group(unique_id: str, group_name: str) -> Dict:
                    result["source_files"].append(filename_stem)
                
            except Exception as e:
-                print(f"Error loading embedding file {embedding_path}: {str(e)}")
+                logger.error(f"Error loading embedding file {embedding_path}: {str(e)}")
                continue
        
        if all_chunks and all_embeddings:
@ -259,7 +263,7 @@ def merge_embeddings_by_group(unique_id: str, group_name: str) -> Dict:
                                np_embeddings.append(emb)
                            else:
                                # 如果无法转换，跳过这个文件
-                                print(f"Warning: Cannot convert embedding to numpy from file {filename_stem}")
+                                logger.warning(f"Warning: Cannot convert embedding to numpy from file {filename_stem}")
                                continue
                        
                        if np_embeddings:
@ -283,7 +287,7 @@ def merge_embeddings_by_group(unique_id: str, group_name: str) -> Dict:
                        elif isinstance(emb, np.ndarray):
                            np_embeddings.append(emb)
                        else:
-                            print(f"Warning: Cannot convert embedding to numpy from file {filename_stem}")
+                            logger.warning(f"Warning: Cannot convert embedding to numpy from file {filename_stem}")
                            continue
                    
                    if np_embeddings:
@ -297,7 +301,7 @@ def merge_embeddings_by_group(unique_id: str, group_name: str) -> Dict:
                    return result
            except Exception as e:
                result["error"] = f"Failed to merge embedding tensors: {str(e)}"
-                print(f"Error merging embedding tensors: {str(e)}")
+                logger.error(f"Error merging embedding tensors: {str(e)}")
                return result
            
            # Create merged embedding data structure
@ -327,7 +331,7 @@ def merge_embeddings_by_group(unique_id: str, group_name: str) -> Dict:
            
    except Exception as e:
        result["error"] = f"Embedding merging failed: {str(e)}"
-        print(f"Error merging embeddings for group {group_name}: {str(e)}")
+        logger.error(f"Error merging embeddings for group {group_name}: {str(e)}")
    
    return result

@ -425,11 +429,11 @@ def cleanup_dataset_group(unique_id: str, group_name: str) -> bool:
        if os.path.exists(dataset_group_dir):
            import shutil
            shutil.rmtree(dataset_group_dir)
-            print(f"Cleaned up dataset group: {group_name}")
+            logger.info(f"Cleaned up dataset group: {group_name}")
            return True
        else:
            return True  # Nothing to clean up
            
    except Exception as e:
-        print(f"Error cleaning up dataset group {group_name}: {str(e)}")
+        logger.error(f"Error cleaning up dataset group {group_name}: {str(e)}")
        return False
--- a/utils/dataset_manager.py
+++ b/utils/dataset_manager.py
@ -6,8 +6,12 @@ New implementation with per-file processing and group merging.

 import os
 import json
+import logging
 from typing import Dict, List

+# Configure logger
+logger = logging.getLogger('app')
+
 # Import new modules
 from utils.file_manager import (
    ensure_directories, sync_files_to_group, cleanup_orphaned_files,
@ -37,13 +41,13 @@ async def download_dataset_files(unique_id: str, files: Dict[str, List[str]], in
    if not files:
        return {}
    
-    print(f"Starting {'incremental' if incremental_mode else 'full'} file processing for project: {unique_id}")
+    logger.info(f"Starting {'incremental' if incremental_mode else 'full'} file processing for project: {unique_id}")
    
    # Ensure project directories exist
    ensure_directories(unique_id)
    
    # Step 1: Sync files to group directories
-    print("Step 1: Syncing files to group directories...")
+    logger.info("Step 1: Syncing files to group directories...")
    synced_files, failed_files = sync_files_to_group(unique_id, files, incremental_mode)
    
    # Step 2: Detect changes and cleanup orphaned files (only in non-incremental mode)
@ -52,14 +56,14 @@ async def download_dataset_files(unique_id: str, files: Dict[str, List[str]], in
    
    # Only cleanup orphaned files in non-incremental mode or when files are explicitly removed
    if not incremental_mode and any(changes["removed"].values()):
-        print("Step 2: Cleaning up orphaned files...")
+        logger.info("Step 2: Cleaning up orphaned files...")
        removed_files = cleanup_orphaned_files(unique_id, changes)
-        print(f"Removed orphaned files: {removed_files}")
+        logger.info(f"Removed orphaned files: {removed_files}")
    elif incremental_mode:
-        print("Step 2: Skipping cleanup in incremental mode to preserve existing files")
+        logger.info("Step 2: Skipping cleanup in incremental mode to preserve existing files")
    
    # Step 3: Process individual files
-    print("Step 3: Processing individual files...")
+    logger.info("Step 3: Processing individual files...")
    processed_files_by_group = {}
    processing_results = {}
    
@ -75,12 +79,12 @@ async def download_dataset_files(unique_id: str, files: Dict[str, List[str]], in
            
            # Skip if file doesn't exist (might be remote file that failed to download)
            if not os.path.exists(local_path) and not file_path.startswith(('http://', 'https://')):
-                print(f"Skipping non-existent file: {filename}")
+                logger.warning(f"Skipping non-existent file: {filename}")
                continue
            
            # Check if already processed
            if check_file_already_processed(unique_id, group_name, filename):
-                print(f"Skipping already processed file: {filename}")
+                logger.info(f"Skipping already processed file: {filename}")
                processed_files_by_group[group_name].append(filename)
                processing_results[group_name].append({
                    "filename": filename,
@ -89,18 +93,18 @@ async def download_dataset_files(unique_id: str, files: Dict[str, List[str]], in
                continue
            
            # Process the file
-            print(f"Processing file: {filename} (group: {group_name})")
+            logger.info(f"Processing file: {filename} (group: {group_name})")
            result = await process_single_file(unique_id, group_name, filename, file_path, local_path)
            processing_results[group_name].append(result)
            
            if result["success"]:
                processed_files_by_group[group_name].append(filename)
-                print(f"  Successfully processed {filename}")
+                logger.info(f"  Successfully processed {filename}")
            else:
-                print(f"  Failed to process {filename}: {result['error']}")
+                logger.error(f"  Failed to process {filename}: {result['error']}")
    
    # Step 4: Merge results by group
-    print("Step 4: Merging results by group...")
+    logger.info("Step 4: Merging results by group...")
    merge_results = {}
    
    for group_name in processed_files_by_group.keys():
@ -108,20 +112,20 @@ async def download_dataset_files(unique_id: str, files: Dict[str, List[str]], in
        group_files = get_group_files_list(unique_id, group_name)
        
        if group_files:
-            print(f"Merging group: {group_name} with {len(group_files)} files")
+            logger.info(f"Merging group: {group_name} with {len(group_files)} files")
            merge_result = merge_all_data_by_group(unique_id, group_name)
            merge_results[group_name] = merge_result
            
            if merge_result["success"]:
-                print(f"  Successfully merged group {group_name}")
+                logger.info(f"  Successfully merged group {group_name}")
            else:
-                print(f"  Failed to merge group {group_name}: {merge_result['errors']}")
+                logger.error(f"  Failed to merge group {group_name}: {merge_result['errors']}")
    
    # Step 5: Save processing log
-    print("Step 5: Saving processing log...")
+    logger.info("Step 5: Saving processing log...")
    await save_processing_log(unique_id, files, synced_files, processing_results, merge_results)
    
-    print(f"File processing completed for project: {unique_id}")
+    logger.info(f"File processing completed for project: {unique_id}")
    return processed_files_by_group


@ -156,9 +160,9 @@ async def save_processing_log(
    try:
        with open(log_file_path, 'w', encoding='utf-8') as f:
            json.dump(log_data, f, ensure_ascii=False, indent=2)
-        print(f"Processing log saved to: {log_file_path}")
+        logger.info(f"Processing log saved to: {log_file_path}")
    except Exception as e:
-        print(f"Error saving processing log: {str(e)}")
+        logger.error(f"Error saving processing log: {str(e)}")


 def generate_dataset_structure(unique_id: str) -> str:
--- a/utils/excel_csv_processor.py
+++ b/utils/excel_csv_processor.py
@ -5,8 +5,12 @@ Excel and CSV file processor for converting data to document.txt and pagination.

 import os
 import pandas as pd
+import logging
 from typing import List, Dict, Any, Tuple

+# 配置日志
+logger = logging.getLogger('app')
+

 def read_excel_sheets(file_path: str) -> Dict[str, List[Dict[str, Any]]]:
    """
@ -48,16 +52,16 @@ def read_excel_sheets(file_path: str) -> Dict[str, List[Dict[str, Any]]]:
                        sheet_data.append(row_dict)
                
                sheets_data[sheet_name] = sheet_data
-                print(f"读取 Excel sheet '{sheet_name}': {len(sheet_data)} 行数据")
+                logger.info(f"读取 Excel sheet '{sheet_name}': {len(sheet_data)} 行数据")
                
            except Exception as e:
-                print(f"读取 Excel sheet '{sheet_name}' 失败: {str(e)}")
+                logger.error(f"读取 Excel sheet '{sheet_name}' 失败: {str(e)}")
                continue
        
        return sheets_data
        
    except Exception as e:
-        print(f"读取 Excel 文件失败: {str(e)}")
+        logger.error(f"读取 Excel 文件失败: {str(e)}")
        return {}


@ -105,11 +109,11 @@ def read_csv_file(file_path: str, encoding: str = 'utf-8') -> List[Dict[str, Any
            if any(v.strip() for v in row_dict.values()):
                csv_data.append(row_dict)
        
-        print(f"读取 CSV 文件: {len(csv_data)} 行数据")
+        logger.info(f"读取 CSV 文件: {len(csv_data)} 行数据")
        return csv_data
        
    except Exception as e:
-        print(f"读取 CSV 文件失败: {str(e)}")
+        logger.error(f"读取 CSV 文件失败: {str(e)}")
        return []


--- a/utils/fastapi_utils.py
+++ b/utils/fastapi_utils.py
@ -9,7 +9,9 @@ import aiohttp
 from qwen_agent.llm.schema import ASSISTANT, FUNCTION
 from qwen_agent.llm.oai import TextChatAtOAI
 from fastapi import HTTPException
-from utils.logger import logger
+import logging
+
+logger = logging.getLogger('app')

 # 创建全局线程池执行器，用于执行同步的HTTP调用
 thread_pool = ThreadPoolExecutor(max_workers=10)
@ -61,9 +63,9 @@ def get_versioned_filename(upload_dir: str, name_without_ext: str, file_extensio
        file_to_delete = os.path.join(upload_dir, filename)
        try:
            os.remove(file_to_delete)
-            print(f"已删除文件: {file_to_delete}")
+            logger.info(f"已删除文件: {file_to_delete}")
        except OSError as e:
-            print(f"删除文件失败 {file_to_delete}: {e}")
+            logger.error(f"删除文件失败 {file_to_delete}: {e}")
    
    # 确定下一个版本号
    if existing_versions:
@ -301,7 +303,7 @@ def create_project_directory(dataset_ids: Optional[List[str]], bot_id: str, robo
        from utils.multi_project_manager import create_robot_project
        return create_robot_project(dataset_ids, bot_id)
    except Exception as e:
-        print(f"Error creating project directory: {e}")
+        logger.error(f"Error creating project directory: {e}")
        return None


@ -392,7 +394,7 @@ def _sync_call_guideline_llm(llm_config, messages) -> str:
        return str(response) if response else ""

    except Exception as e:
-        print(f"Error calling guideline LLM: {e}")
+        logger.error(f"Error calling guideline LLM: {e}")
        return ""


@ -414,7 +416,7 @@ async def call_guideline_llm(chat_history: str, guidelines_text: str, terms:str,
        with open('./prompt/guideline_prompt.md', 'r', encoding='utf-8') as f:
            guideline_template = f.read()
    except Exception as e:
-        print(f"Error reading guideline prompt template: {e}")
+        logger.error(f"Error reading guideline prompt template: {e}")
        return ""

    # 替换模板中的占位符
@ -439,7 +441,7 @@ async def call_guideline_llm(chat_history: str, guidelines_text: str, terms:str,
            return response

    except Exception as e:
-        print(f"Error calling guideline LLM: {e}")
+        logger.error(f"Error calling guideline LLM: {e}")
        return ""


@ -466,28 +468,43 @@ async def process_guideline_batch(
    model_server: str
 ) -> str:
    """处理单个guideline批次"""
-    try:
-        # 调用LLM分析这批guidelines
-        batch_guidelines_text = "\n".join(guidelines_batch)
-        batch_analysis = await call_guideline_llm(chat_history, batch_guidelines_text, terms, model_name, api_key, model_server)
+    max_retries = 3

-        # 从响应中提取 ```json 和 ``` 包裹的内容
-        json_pattern = r'```json\s*\n(.*?)\n```'
-        json_matches = re.findall(json_pattern, batch_analysis, re.DOTALL)
-        
-        if json_matches:
-            try:
-                # 解析第一个找到的JSON对象
-                json_data = json.loads(json_matches[0])
-                return json_data  # 返回解析后的JSON对象
-            except json.JSONDecodeError as e:
-                print(f"Error parsing JSON from guideline analysis: {e}")
-                return batch_analysis  # 如果JSON解析失败，返回原始文本
-        else:
-            return batch_analysis  # 如果没有找到JSON格式，返回原始文本
-    except Exception as e:
-        print(f"Error processing guideline batch: {e}")
-        return ""
+    for attempt in range(max_retries):
+        try:
+            # 调用LLM分析这批guidelines
+            batch_guidelines_text = "\n".join(guidelines_batch)
+            logger.info(f"Start processed guideline batch on attempt {attempt + 1}")
+            batch_analysis = await call_guideline_llm(chat_history, batch_guidelines_text, terms, model_name, api_key, model_server)
+
+            # 从响应中提取 ```json 和 ``` 包裹的内容
+            json_pattern = r'```json\s*\n(.*?)\n```'
+            json_matches = re.findall(json_pattern, batch_analysis, re.DOTALL)
+
+            if json_matches:
+                try:
+                    # 解析第一个找到的JSON对象
+                    json_data = json.loads(json_matches[0])
+                    logger.info(f"Successfully processed guideline batch on attempt {attempt + 1}")
+                    return json_data  # 返回解析后的JSON对象
+                except json.JSONDecodeError as e:
+                    logger.error(f"Error parsing JSON from guideline analysis on attempt {attempt + 1}: {e}")
+                    if attempt == max_retries - 1:
+                        return batch_analysis  # 最后一次尝试失败，返回原始文本
+                    continue
+            else:
+                logger.warning(f"No JSON format found in guideline analysis on attempt {attempt + 1}")
+                if attempt == max_retries - 1:
+                    return batch_analysis  # 最后一次尝试失败，返回原始文本
+                continue
+
+        except Exception as e:
+            logger.error(f"Error processing guideline batch on attempt {attempt + 1}: {e}")
+            if attempt == max_retries - 1:
+                return ""  # 最后一次尝试失败，返回空字符串
+
+    # 这里不应该到达，但为了完整性
+    return ""


 def extract_block_from_system_prompt(system_prompt: Optional[str]) -> tuple[str, List[Dict[str, Any]], List[Dict[str, Any]]]:
@ -519,7 +536,7 @@ def extract_block_from_system_prompt(system_prompt: Optional[str]) -> tuple[str,
                guidelines_list.extend(guidelines)
                blocks_to_remove.append(match.group(0))
            except Exception as e:
-                print(f"Error parsing guidelines: {e}")
+                logger.error(f"Error parsing guidelines: {e}")

        elif block_type == 'terms':
            try:
@ -527,7 +544,7 @@ def extract_block_from_system_prompt(system_prompt: Optional[str]) -> tuple[str,
                terms_list.extend(terms)
                blocks_to_remove.append(match.group(0))
            except Exception as e:
-                print(f"Error parsing terms: {e}")
+                logger.error(f"Error parsing terms: {e}")

    # 从system_prompt中移除这些已解析的块
    cleaned_prompt = system_prompt
--- a/utils/file_manager.py
+++ b/utils/file_manager.py
@ -8,6 +8,9 @@ import json
 import shutil
 from typing import Dict, List, Set, Tuple
 from pathlib import Path
+import logging
+
+logger = logging.getLogger('app')


 def get_existing_files(unique_id: str) -> Dict[str, Set[str]]:
@ -204,11 +207,11 @@ def cleanup_orphaned_files(unique_id: str, changes: Dict) -> Dict[str, List[str]
                    removed_files[group_name].append(f"processed: {Path(filename).stem}")
                
            except Exception as e:
-                print(f"Error cleaning up {filename}: {str(e)}")
+                logger.error(f"Error cleaning up {filename}: {str(e)}")
    
    # Handle completely removed groups
    for group_name in changes.get("removed_groups", set()):
-        print(f"Cleaning up completely removed group: {group_name}")
+        logger.info(f"Cleaning up completely removed group: {group_name}")
        removed_files[group_name] = []
        
        try:
@ -230,10 +233,10 @@ def cleanup_orphaned_files(unique_id: str, changes: Dict) -> Dict[str, List[str]
                shutil.rmtree(dataset_group_dir)
                removed_files[group_name].append("dataset group directory")
            
-            print(f"Completely removed group '{group_name}' and all its data")
+            logger.info(f"Completely removed group '{group_name}' and all its data")
            
        except Exception as e:
-            print(f"Error cleaning up group {group_name}: {str(e)}")
+            logger.error(f"Error cleaning up group {group_name}: {str(e)}")
    
    return removed_files

--- a/utils/file_utils.py
+++ b/utils/file_utils.py
@ -10,9 +10,13 @@ import aiohttp
 import shutil
 import zipfile
 import tempfile
+import logging
 from typing import Dict, List, Optional
 from pathlib import Path

+# 配置日志
+logger = logging.getLogger('app')
+

 async def download_file(url: str, destination_path: str) -> bool:
    """Download file from URL asynchronously"""
@ -25,10 +29,10 @@ async def download_file(url: str, destination_path: str) -> bool:
                            await f.write(chunk)
                    return True
                else:
-                    print(f"Failed to download {url}, status code: {response.status}")
+                    logger.error(f"Failed to download {url}, status code: {response.status}")
                    return False
    except Exception as e:
-        print(f"Error downloading {url}: {str(e)}")
+        logger.error(f"Error downloading {url}: {str(e)}")
        return False


@ -45,11 +49,11 @@ def remove_file_or_directory(path: str):
                os.remove(path)
            elif os.path.isdir(path):
                shutil.rmtree(path)
-            print(f"Removed: {path}")
+            logger.info(f"Removed: {path}")
        else:
-            print(f"Path does not exist: {path}")
+            logger.warning(f"Path does not exist: {path}")
    except Exception as e:
-        print(f"Error removing {path}: {str(e)}")
+        logger.error(f"Error removing {path}: {str(e)}")


 def extract_zip_file(zip_path: str, extract_dir: str) -> List[str]:
@ -65,11 +69,11 @@ def extract_zip_file(zip_path: str, extract_dir: str) -> List[str]:
                if file.lower().endswith(('.txt', '.md', '.xlsx', '.xls', '.csv')):
                    extracted_files.append(os.path.join(root, file))
        
-        print(f"Extracted {len(extracted_files)} txt/md/xlsx/csv files from {zip_path}")
+        logger.info(f"Extracted {len(extracted_files)} txt/md/xlsx/csv files from {zip_path}")
        return extracted_files
        
    except Exception as e:
-        print(f"Error extracting zip file {zip_path}: {str(e)}")
+        logger.error(f"Error extracting zip file {zip_path}: {str(e)}")
        return []


@ -110,7 +114,7 @@ def load_processed_files_log(unique_id: str) -> Dict[str, Dict]:
            with open(log_file, 'r', encoding='utf-8') as f:
                return json.load(f)
        except Exception as e:
-            print(f"Error loading processed files log: {e}")
+            logger.error(f"Error loading processed files log: {e}")
    return {}


@ -123,7 +127,7 @@ def save_processed_files_log(unique_id: str, processed_log: Dict[str, Dict]):
        with open(log_file, 'w', encoding='utf-8') as f:
            json.dump(processed_log, f, ensure_ascii=False, indent=2)
    except Exception as e:
-        print(f"Error saving processed files log: {e}")
+        logger.error(f"Error saving processed files log: {e}")


 def get_processing_log(unique_id: str) -> Dict:
@ -135,7 +139,7 @@ def get_processing_log(unique_id: str) -> Dict:
            with open(log_file, 'r', encoding='utf-8') as f:
                return json.load(f)
        except Exception as e:
-            print(f"Error loading processing log: {e}")
+            logger.error(f"Error loading processing log: {e}")
    return {}


@ -148,7 +152,7 @@ def save_project_status(unique_id: str, status: Dict):
        with open(status_file, 'w', encoding='utf-8') as f:
            json.dump(status, f, ensure_ascii=False, indent=2)
    except Exception as e:
-        print(f"Error saving project status: {e}")
+        logger.error(f"Error saving project status: {e}")


 def load_project_status(unique_id: str) -> Dict:
@ -160,7 +164,7 @@ def load_project_status(unique_id: str) -> Dict:
            with open(status_file, 'r', encoding='utf-8') as f:
                return json.load(f)
        except Exception as e:
-            print(f"Error loading project status: {e}")
+            logger.error(f"Error loading project status: {e}")
    return {}


@ -212,7 +216,7 @@ def update_file_processing_status(unique_id: str, group_name: str, filename: str
            json.dump(file_status, f, ensure_ascii=False, indent=2)
            
    except Exception as e:
-        print(f"Error updating file processing status: {e}")
+        logger.error(f"Error updating file processing status: {e}")


 def get_file_processing_status(unique_id: str, group_name: str = None, filename: str = None) -> Dict:
@ -240,7 +244,7 @@ def get_file_processing_status(unique_id: str, group_name: str = None, filename:
        return file_status
        
    except Exception as e:
-        print(f"Error getting file processing status: {e}")
+        logger.error(f"Error getting file processing status: {e}")
        return {}


@ -254,7 +258,7 @@ def calculate_directory_size(directory_path: str) -> int:
                if os.path.exists(file_path):
                    total_size += os.path.getsize(file_path)
    except Exception as e:
-        print(f"Error calculating directory size: {e}")
+        logger.error(f"Error calculating directory size: {e}")
    
    return total_size

--- a/utils/log_util/context.py
+++ b/utils/log_util/context.py
@ -0,0 +1,32 @@
+# author: askfiy
+
+import contextvars
+
+
+class GlobalContext:
+    def __init__(self):
+        super().__setattr__('_contextvar', contextvars.ContextVar(f"{__class__.__name__}_g"))
+
+    def __getattr__(self, k: str):
+        ctx = self._contextvar.get()
+        return ctx[k]
+
+    def __setattr__(self, k: str, v):
+        try:
+            ctx = self._contextvar.get()
+        except LookupError:
+            ctx = {}
+
+        ctx.update({k: v})
+        self._contextvar.set(ctx)
+
+    def get_context(self):
+        return self._contextvar.get()
+
+    def update_context(self, ctx):
+        self._contextvar.set(ctx)
+
+
+g = GlobalContext()
+
+
--- a/utils/log_util/decorator.py
+++ b/utils/log_util/decorator.py
@ -0,0 +1,32 @@
+import time
+import logging
+from functools import wraps
+from functools import update_wrapper
+
+from .context import g
+
+
+app_logger = logging.getLogger('app')
+
+
+def safe_network(func):
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        while True:
+            try:
+                return func(*args, **kwargs)
+            except Exception as exc:
+                app_logger.error(exc)
+                time.sleep(1)
+
+    return wrapper
+
+
+def copy_threading_context(func):
+    main_thridng_g_ctx = g.get_context()
+
+    def wrapper(*args, **kwargs):
+        g.update_context(main_thridng_g_ctx)
+        return func(*args, **kwargs)
+
+    return update_wrapper(wrapper, func)
--- a/utils/log_util/logger.py
+++ b/utils/log_util/logger.py
@ -0,0 +1,93 @@
+import logging
+import uuid
+import json
+from datetime import datetime
+
+from fastapi import Request, FastAPI
+from starlette.responses import JSONResponse
+from starlette.requests import HTTPConnection
+from typing import Callable, Awaitable
+
+from .context import g
+
+
+def add_request_routes(app: FastAPI):
+    @app.middleware("http")
+    async def before_request(request: Request, call_next: Callable[[Request], Awaitable[JSONResponse]]):
+
+        # 先从header中获取X-Trace-Id，如果没有则生成新的
+        trace_id = request.headers.get('X-Trace-Id')
+        if not trace_id:
+            trace_id = "generate_" + str(uuid.uuid4())
+        
+        # user_id = "未知的 user_id"
+
+        g.trace_id = trace_id
+        # g.user_id = user_id
+        response = await call_next(request)
+
+        response.headers['X-Trace-Id'] = g.trace_id
+        return response
+
+
+class Formatter(logging.Formatter):
+    def formatTime(self, record, datefmt=None):
+        # 将时间戳转换为datetime对象
+        dt = datetime.fromtimestamp(record.created)
+        # 格式化时间戳到毫秒
+        if datefmt:
+            s = dt.strftime(datefmt)
+            # 去除微秒的后三位，保留毫秒部分
+            s = s[:-3]
+        else:
+            # 去除微秒的后三位，保留毫秒部分
+            s = dt.strftime("%H:%M:%S.%f")[:-3]
+        return s
+
+    def format(self, record):
+        # 处理 trace_id
+        if not hasattr(record, "trace_id"):
+            record.trace_id = getattr(g, "trace_id")
+        # 处理 user_id
+        # if not hasattr(record, "user_id"):
+        #     record.user_id = getattr(g, "user_id")
+
+        # 格式化时间戳
+        record.timestamp = self.formatTime(record, self.datefmt)
+
+        return super().format(record)
+
+
+# 这里注册session 上下文追踪一次就可以了
+def init_logger_once(name,level):
+    logger = logging.getLogger(name)
+    logger.setLevel(level=level)
+    formatter = Formatter("%(timestamp)s | %(levelname)-5s | %(trace_id)s | %(name)s:%(funcName)s:%(lineno)s - %(message)s", datefmt='%Y-%m-%d %H:%M:%S.%f')
+    handler = logging.StreamHandler()
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+
+
+def init_with_fastapi(app,level=logging.INFO):
+    init_logger_once("app",level)
+    add_request_routes(app)
+
+def info(message, *args, **kwargs):
+    app_logger = logging.getLogger('app')
+    app_logger.info(message, *args, **kwargs)
+
+def debug(message, *args, **kwargs):
+    app_logger = logging.getLogger('app')
+    app_logger.debug(message, *args, **kwargs)
+
+def warning(message, *args, **kwargs):
+    app_logger = logging.getLogger('app')
+    app_logger.warning(message, *args, **kwargs)
+
+def error(message, *args, **kwargs):
+    app_logger = logging.getLogger('app')
+    app_logger.error(message, *args, **kwargs)
+
+def critical(message, *args, **kwargs):
+    app_logger = logging.getLogger('app')
+    app_logger.critical(message, *args, **kwargs)
--- a/utils/multi_project_manager.py
+++ b/utils/multi_project_manager.py
@ -6,10 +6,14 @@
 import os
 import shutil
 import json
+import logging
 from pathlib import Path
 from typing import List, Dict, Optional
 from datetime import datetime

+# Configure logger
+logger = logging.getLogger('app')
+
 from utils.file_utils import get_document_preview


@ -177,11 +181,11 @@ def copy_dataset_folder(source_project_id: str, target_dataset_dir: Path, folder
        shutil.copytree(source_folder, target_folder)
        result["success"] = True
        
-        print(f"  Copied: {source_folder} -> {target_folder}")
+        logger.info(f"  Copied: {source_folder} -> {target_folder}")
        
    except Exception as e:
        result["error"] = str(e)
-        print(f"  Error copying {folder_name}: {str(e)}")
+        logger.error(f"  Error copying {folder_name}: {str(e)}")
    
    return result

@ -237,7 +241,7 @@ def generate_robot_readme(robot_id: str, dataset_ids: List[str], copy_results: L
                if item_path.is_dir():
                    doc_dirs.append(item)
        except Exception as e:
-            print(f"Error listing dataset directories: {str(e)}")
+            logger.error(f"Error listing dataset directories: {str(e)}")
        
        if not doc_dirs:
            readme_content += "No document directories found.\n"
@ -292,7 +296,7 @@ def generate_robot_readme(robot_id: str, dataset_ids: List[str], copy_results: L
    with open(readme_path, 'w', encoding='utf-8') as f:
        f.write(readme_content)
    
-    print(f"Generated README: {readme_path}")
+    logger.info(f"Generated README: {readme_path}")
    return str(readme_path)


@ -314,14 +318,14 @@ def should_rebuild_robot_project(dataset_ids: List[str], bot_id: str) -> bool:
    
    # 如果机器人项目不存在，需要创建
    if not robot_dir.exists():
-        print(f"Robot project does not exist, need to create: {bot_id}")
-        return True
+            logger.info(f"Robot project does not exist, need to create: {bot_id}")
+            return True
    
    # 检查机器人项目的配置信息
    config_file = robot_dir / "robot_config.json"
    if not config_file.exists():
-        print(f"Robot config file not found, need to rebuild: {bot_id}")
-        return True
+            logger.info(f"Robot config file not found, need to rebuild: {bot_id}")
+            return True
    
    # 读取配置信息
    try:
@ -329,8 +333,8 @@ def should_rebuild_robot_project(dataset_ids: List[str], bot_id: str) -> bool:
            config = json.load(f)
        cached_dataset_ids = set(config.get("dataset_ids", []))
    except Exception as e:
-        print(f"Error reading robot config: {e}, need to rebuild")
-        return True
+            logger.error(f"Error reading robot config: {e}, need to rebuild")
+            return True
    
    # 检查dataset_ids是否有变化
    current_dataset_ids = set(dataset_ids)
@ -338,14 +342,14 @@ def should_rebuild_robot_project(dataset_ids: List[str], bot_id: str) -> bool:
    # 如果有新增的dataset_id
    new_ids = current_dataset_ids - cached_dataset_ids
    if new_ids:
-        print(f"Found new dataset_ids: {new_ids}, need to rebuild")
-        return True
+            logger.info(f"Found new dataset_ids: {new_ids}, need to rebuild")
+            return True
    
    # 如果有删除的dataset_id
    removed_ids = cached_dataset_ids - current_dataset_ids
    if removed_ids:
-        print(f"Removed dataset_ids: {removed_ids}, need to rebuild")
-        return True
+            logger.info(f"Removed dataset_ids: {removed_ids}, need to rebuild")
+            return True
    
    # 获取机器人项目的最后修改时间
    robot_mod_time = robot_dir.stat().st_mtime
@ -355,17 +359,17 @@ def should_rebuild_robot_project(dataset_ids: List[str], bot_id: str) -> bool:
        log_file = Path("projects") / "data" / source_project_id / "processing_log.json"
        
        if not log_file.exists():
-            print(f"Processing log file not found for project {source_project_id}, will rebuild")
-            return True
+                logger.info(f"Processing log file not found for project {source_project_id}, will rebuild")
+                return True
        
        log_mod_time = log_file.stat().st_mtime
        
        # 如果任何一个processing_log.json文件比机器人项目新，需要重建
        if log_mod_time > robot_mod_time:
-            print(f"Processing log updated for project {source_project_id}, need to rebuild")
-            return True
+                logger.info(f"Processing log updated for project {source_project_id}, need to rebuild")
+                return True
    
-    print(f"Robot project {bot_id} is up to date, no rebuild needed")
+        logger.info(f"Robot project {bot_id} is up to date, no rebuild needed")
    return False


@ -381,12 +385,12 @@ def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: boo
    Returns:
        str: 机器人项目目录路径
    """
-    print(f"Creating robot project: {bot_id} from sources: {dataset_ids}")
+    logger.info(f"Creating robot project: {bot_id} from sources: {dataset_ids}")
    
    # 检查是否需要重建
    if not force_rebuild and not should_rebuild_robot_project(dataset_ids, bot_id):
        robot_dir = Path("projects") / "robot" / bot_id
-        print(f"Using existing robot project: {robot_dir}")
+        logger.info(f"Using existing robot project: {robot_dir}")
        return str(robot_dir)
    
    # 创建机器人目录结构
@ -395,8 +399,8 @@ def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: boo
    
    # 清理已存在的目录（如果需要）
    if robot_dir.exists():
-        print(f"Robot directory already exists, cleaning up: {robot_dir}")
-        shutil.rmtree(robot_dir)
+            logger.info(f"Robot directory already exists, cleaning up: {robot_dir}")
+            shutil.rmtree(robot_dir)
    
    robot_dir.mkdir(parents=True, exist_ok=True)
    dataset_dir.mkdir(parents=True, exist_ok=True)
@ -405,19 +409,19 @@ def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: boo
    
    # 遍历每个源项目
    for source_project_id in dataset_ids:
-        print(f"\nProcessing source project: {source_project_id}")
+        logger.info(f"\nProcessing source project: {source_project_id}")
        
        source_dataset_dir = Path("projects") / "data" / source_project_id / "dataset"
        
        if not source_dataset_dir.exists():
-            print(f"  Warning: Dataset directory not found for project {source_project_id}")
+            logger.warning(f"  Warning: Dataset directory not found for project {source_project_id}")
            continue
        
        # 获取所有子文件夹
        folders = [f for f in source_dataset_dir.iterdir() if f.is_dir()]
        
        if not folders:
-            print(f"  Warning: No folders found in dataset directory for project {source_project_id}")
+            logger.warning(f"  Warning: No folders found in dataset directory for project {source_project_id}")
            continue
        
        # 复制每个文件夹
@ -442,12 +446,12 @@ def create_robot_project(dataset_ids: List[str], bot_id: str, force_rebuild: boo
    
    # 统计信息
    successful_copies = sum(1 for r in copy_results if r["success"])
-    print(f"\nRobot project creation completed:")
-    print(f"  Robot directory: {robot_dir}")
-    print(f"  Total folders processed: {len(copy_results)}")
-    print(f"  Successful copies: {successful_copies}")
-    print(f"  Config saved: {config_file}")
-    print(f"  README generated: {readme_path}")
+    logger.info(f"\nRobot project creation completed:")
+    logger.info(f"  Robot directory: {robot_dir}")
+    logger.info(f"  Total folders processed: {len(copy_results)}")
+    logger.info(f"  Successful copies: {successful_copies}")
+    logger.info(f"  Config saved: {config_file}")
+    logger.info(f"  README generated: {readme_path}")
    
    return str(robot_dir)

@ -513,14 +517,14 @@ def cleanup_robot_project(bot_id: str) -> bool:
        
        if robot_dir.exists():
            shutil.rmtree(robot_dir)
-            print(f"Cleaned up robot project: {bot_id}")
+            logger.info(f"Cleaned up robot project: {bot_id}")
            return True
        else:
-            print(f"Robot project does not exist: {bot_id}")
+            logger.info(f"Robot project does not exist: {bot_id}")
            return True
            
    except Exception as e:
-        print(f"Error cleaning up robot project {bot_id}: {str(e)}")
+        logger.error(f"Error cleaning up robot project {bot_id}: {str(e)}")
        return False


@ -530,7 +534,7 @@ if __name__ == "__main__":
    test_bot_id = "test-robot-001"
    
    robot_dir = create_robot_project(test_dataset_ids, test_bot_id)
-    print(f"Created robot project at: {robot_dir}")
+    logger.info(f"Created robot project at: {robot_dir}")
    
    info = get_robot_project_info(test_bot_id)
-    print(f"Robot project info: {json.dumps(info, indent=2, ensure_ascii=False)}")
+    logger.info(f"Robot project info: {json.dumps(info, indent=2, ensure_ascii=False)}")
--- a/utils/organize_dataset_files.py
+++ b/utils/organize_dataset_files.py
@ -1,8 +1,12 @@
 #!/usr/bin/env python3
 import os
 import shutil
+import logging
 from pathlib import Path

+# 配置日志
+logger = logging.getLogger('app')
+
 def is_file_already_processed(target_file: Path, pagination_file: Path, embeddings_file: Path) -> bool:
    """Check if a file has already been processed (document.txt, pagination.txt, and embeddings exist)"""
    if not target_file.exists():
@ -22,22 +26,22 @@ def organize_single_project_files(unique_id: str, skip_processed=True):
    project_dir = Path("projects") / "data" / unique_id
    
    if not project_dir.exists():
-        print(f"Project directory not found: {project_dir}")
+        logger.error(f"Project directory not found: {project_dir}")
        return
    
-    print(f"Organizing files for project: {unique_id} (skip_processed={skip_processed})")
+    logger.info(f"Organizing files for project: {unique_id} (skip_processed={skip_processed})")
    
    files_dir = project_dir / "files"
    dataset_dir = project_dir / "dataset"
    
    # Check if files directory exists and has files
    if not files_dir.exists():
-        print(f"  No files directory found, skipping...")
+        logger.info(f"  No files directory found, skipping...")
        return
        
    files = list(files_dir.glob("*"))
    if not files:
-        print(f"  Files directory is empty, skipping...")
+        logger.info(f"  Files directory is empty, skipping...")
        return
        
    # Create dataset directory if it doesn't exist
@ -55,10 +59,10 @@ def organize_single_project_files(unique_id: str, skip_processed=True):
            
            # Check if file is already processed
            if skip_processed and is_file_already_processed(target_file, pagination_file, embeddings_file):
-                print(f"  Skipping already processed file: {file_path.name}")
+                logger.info(f"  Skipping already processed file: {file_path.name}")
                continue
            
-            print(f"  Copying {file_path.name} -> {target_file.relative_to(project_dir)}")
+            logger.info(f"  Copying {file_path.name} -> {target_file.relative_to(project_dir)}")
            
            # Create target directory
            target_dir.mkdir(exist_ok=True)
@ -140,12 +144,12 @@ def organize_dataset_files():
        
        # Check if files directory exists and has files
        if not files_dir.exists():
-            print(f"  No files directory found, skipping...")
+            logger.info(f"  No files directory found, skipping...")
            continue
            
        files = list(files_dir.glob("*"))
        if not files:
-            print(f"  Files directory is empty, skipping...")
+            logger.info(f"  Files directory is empty, skipping...")
            continue
            
        # Create dataset directory if it doesn't exist
@ -159,7 +163,7 @@ def organize_dataset_files():
                target_dir = dataset_dir / file_name_without_ext
                target_file = target_dir / "document.txt"
                
-                print(f"  Copying {file_path.name} -> {target_file.relative_to(project_dir)}")
+                logger.info(f"  Copying {file_path.name} -> {target_file.relative_to(project_dir)}")
                
                # Create target directory
                target_dir.mkdir(exist_ok=True)
--- a/utils/project_manager.py
+++ b/utils/project_manager.py
@ -5,9 +5,13 @@ Project management functions for handling projects, README generation, and statu

 import os
 import json
+import logging
 from typing import Dict, List, Optional
 from pathlib import Path

+# Configure logger
+logger = logging.getLogger('app')
+
 from utils.file_utils import get_document_preview, load_processed_files_log


@ -145,7 +149,7 @@ This project contains processed documents and their associated embeddings for se
                if os.path.isdir(item_path):
                    doc_dirs.append(item)
        except Exception as e:
-            print(f"Error listing dataset directories: {str(e)}")
+            logger.error(f"Error listing dataset directories: {str(e)}")
        
        if not doc_dirs:
            readme_content += "No document directories found.\n"
@ -198,10 +202,10 @@ def save_project_readme(unique_id: str):
        os.makedirs(os.path.dirname(readme_path), exist_ok=True)
        with open(readme_path, 'w', encoding='utf-8') as f:
            f.write(readme_content)
-        print(f"Generated README.md for project {unique_id}")
+        logger.info(f"Generated README.md for project {unique_id}")
        return readme_path
    except Exception as e:
-        print(f"Error generating README for project {unique_id}: {str(e)}")
+        logger.error(f"Error generating README for project {unique_id}: {str(e)}")
        return None


@ -264,13 +268,13 @@ def remove_project(unique_id: str) -> bool:
        if os.path.exists(project_dir):
            import shutil
            shutil.rmtree(project_dir)
-            print(f"Removed project directory: {project_dir}")
+            logger.info(f"Removed project directory: {project_dir}")
            return True
        else:
-            print(f"Project directory not found: {project_dir}")
+            logger.warning(f"Project directory not found: {project_dir}")
            return False
    except Exception as e:
-        print(f"Error removing project {unique_id}: {str(e)}")
+        logger.error(f"Error removing project {unique_id}: {str(e)}")
        return False


@ -284,7 +288,7 @@ def list_projects() -> List[str]:
        return [item for item in os.listdir(projects_dir) 
                if os.path.isdir(os.path.join(projects_dir, item))]
    except Exception as e:
-        print(f"Error listing projects: {str(e)}")
+        logger.error(f"Error listing projects: {str(e)}")
        return []


--- a/utils/single_file_processor.py
+++ b/utils/single_file_processor.py
@ -6,9 +6,13 @@ Single file processing functions for handling individual files.
 import os
 import tempfile
 import zipfile
+import logging
 from typing import Dict, List, Tuple, Optional
 from pathlib import Path

+# Configure logger
+logger = logging.getLogger('app')
+
 from utils.file_utils import download_file

 # Try to import excel/csv processor, but handle if dependencies are missing
@ -18,7 +22,7 @@ try:
    )
    EXCEL_CSV_SUPPORT = True
 except ImportError as e:
-    print(f"Excel/CSV processing not available: {e}")
+    logger.warning(f"Excel/CSV processing not available: {e}")
    EXCEL_CSV_SUPPORT = False
    
    # Fallback functions
@ -71,7 +75,7 @@ async def process_single_file(
        # Download file if it's remote and not yet downloaded
        if original_path.startswith(('http://', 'https://')):
            if not os.path.exists(local_path):
-                print(f"Downloading {original_path} -> {local_path}")
+                logger.info(f"Downloading {original_path} -> {local_path}")
                success = await download_file(original_path, local_path)
                if not success:
                    result["error"] = "Failed to download file"
@ -112,11 +116,11 @@ async def process_single_file(
            
        except Exception as e:
            result["error"] = f"Embedding generation failed: {str(e)}"
-            print(f"Failed to generate embeddings for {filename}: {str(e)}")
+        logger.error(f"Failed to generate embeddings for {filename}: {str(e)}")
        
    except Exception as e:
        result["error"] = f"File processing failed: {str(e)}"
-        print(f"Error processing file {filename}: {str(e)}")
+        logger.error(f"Error processing file {filename}: {str(e)}")
    
    return result

@ -167,14 +171,14 @@ async def extract_from_zip(zip_path: str, filename: str) -> Tuple[str, List[str]
                                pagination_lines.extend(file_pagination)
                                
                        except Exception as e:
-                            print(f"Error processing extracted file {file}: {str(e)}")
+                            logger.error(f"Error processing extracted file {file}: {str(e)}")
            
            # Clean up temporary directory
            import shutil
            shutil.rmtree(temp_dir)
            
    except Exception as e:
-        print(f"Error extracting zip file {filename}: {str(e)}")
+        logger.error(f"Error extracting zip file {filename}: {str(e)}")
        return "", []
    
    return '\n\n'.join(content_parts), pagination_lines
@ -192,7 +196,7 @@ async def extract_from_excel(file_path: str, filename: str) -> Tuple[str, List[s
            return "", []
            
    except Exception as e:
-        print(f"Error processing Excel file {filename}: {str(e)}")
+        logger.error(f"Error processing Excel file {filename}: {str(e)}")
        return "", []


@ -208,7 +212,7 @@ async def extract_from_csv(file_path: str, filename: str) -> Tuple[str, List[str
            return "", []
            
    except Exception as e:
-        print(f"Error processing CSV file {filename}: {str(e)}")
+        logger.error(f"Error processing CSV file {filename}: {str(e)}")
        return "", []


@ -224,7 +228,7 @@ async def extract_from_text(file_path: str, filename: str) -> Tuple[str, List[st
            return "", []
            
    except Exception as e:
-        print(f"Error reading text file {filename}: {str(e)}")
+        logger.error(f"Error reading text file {filename}: {str(e)}")
        return "", []


@ -248,7 +252,7 @@ def generate_pagination_from_text(document_path: str, pagination_path: str) -> L
        return pagination_lines
        
    except Exception as e:
-        print(f"Error generating pagination from text: {str(e)}")
+        logger.error(f"Error generating pagination from text: {str(e)}")
        return []


@ -273,7 +277,7 @@ async def generate_embeddings_for_file(document_path: str, embedding_path: str)
            return None
            
    except Exception as e:
-        print(f"Error generating embeddings: {str(e)}")
+        logger.error(f"Error generating embeddings: {str(e)}")
        return None


--- a/utils/system_optimizer.py
+++ b/utils/system_optimizer.py
@ -9,9 +9,13 @@ import multiprocessing
 import threading
 import time
 import resource
+import logging
 from typing import Dict, Any, Optional
 from concurrent.futures import ThreadPoolExecutor

+# 配置日志
+logger = logging.getLogger('app')
+

 class SystemOptimizer:
    """系统优化器
@ -37,9 +41,9 @@ class SystemOptimizer:
            new_limit = min(65536, hard)  # 增加到65536或硬件限制
            resource.setrlimit(resource.RLIMIT_NOFILE, (new_limit, hard))
            self.original_settings['RLIMIT_NOFILE'] = (soft, hard)
-            print(f"文件描述符限制从 {soft} 增加到 {new_limit}")
+            logger.info(f"文件描述符限制从 {soft} 增加到 {new_limit}")
        except (ValueError, OSError) as e:
-            print(f"无法设置文件描述符限制: {e}")
+            logger.error(f"无法设置文件描述符限制: {e}")
        
        # 2. 优化线程栈大小
        try:
@ -47,9 +51,9 @@ class SystemOptimizer:
            new_stack = min(8 * 1024 * 1024, hard)  # 8MB栈大小
            resource.setrlimit(resource.RLIMIT_STACK, (new_stack, hard))
            self.original_settings['RLIMIT_STACK'] = (soft, hard)
-            print(f"线程栈大小设置为 {new_stack // (1024*1024)}MB")
+            logger.info(f"线程栈大小设置为 {new_stack // (1024*1024)}MB")
        except (ValueError, OSError) as e:
-            print(f"无法设置线程栈大小: {e}")
+            logger.error(f"无法设置线程栈大小: {e}")
        
        # 3. 环境变量优化
        env_vars = {
@ -85,10 +89,10 @@ class SystemOptimizer:
        for key, value in env_vars.items():
            if key not in os.environ:
                os.environ[key] = value
-                print(f"设置环境变量: {key}={value}")
+                logger.info(f"设置环境变量: {key}={value}")
        
        self.optimized = True
-        print("系统优化完成")
+        logger.info("系统优化完成")
    
    def _backup_original_settings(self):
        """备份原始设置"""
@ -115,20 +119,20 @@ class SystemOptimizer:
        if not self.original_settings:
            return
        
-        print("恢复原始系统设置...")
+        logger.info("恢复原始系统设置...")
        
        # 恢复资源限制
        if 'RLIMIT_NOFILE' in self.original_settings:
            try:
                resource.setrlimit(resource.RLIMIT_NOFILE, self.original_settings['RLIMIT_NOFILE'])
-                print(f"恢复文件描述符限制")
+                logger.info(f"恢复文件描述符限制")
            except:
                pass
        
        if 'RLIMIT_STACK' in self.original_settings:
            try:
                resource.setrlimit(resource.RLIMIT_STACK, self.original_settings['RLIMIT_STACK'])
-                print(f"恢复线程栈大小")
+                logger.info(f"恢复线程栈大小")
            except:
                pass
        
@ -137,13 +141,13 @@ class SystemOptimizer:
            if key.startswith('TOKENIZERS_') or key in ['PYTHONUNBUFFERED', 'PYTHONDONTWRITEBYTECODE']:
                if key in os.environ:
                    del os.environ[key]
-                print(f"移除环境变量: {key}")
+                logger.info(f"移除环境变量: {key}")
            elif key in ['OMP_NUM_THREADS'] and value is not None:
                os.environ[key] = value
-                print(f"恢复环境变量: {key}={value}")
+                logger.info(f"恢复环境变量: {key}={value}")
        
        self.optimized = False
-        print("系统设置已恢复")
+        logger.info("系统设置已恢复")


 class AsyncioOptimizer:
@ -156,9 +160,9 @@ class AsyncioOptimizer:
            # 尝试使用uvloop（如果可用）
            import uvloop
            asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
-            print("使用uvloop事件循环策略")
+            logger.info("使用uvloop事件循环策略")
        except ImportError:
-            print("使用默认事件循环策略")
+            logger.info("使用默认事件循环策略")
        
        # 设置线程池大小
        cpu_count = multiprocessing.cpu_count()
@ -166,7 +170,7 @@ class AsyncioOptimizer:
        
        # 注意：不能在这里设置默认线程池执行器，因为还没有运行事件循环
        # 这个设置会在应用启动时进行
-        print(f"建议线程池大小: {thread_pool_size}")
+        logger.info(f"建议线程池大小: {thread_pool_size}")
    
    @staticmethod
    def optimize_gunicorn_settings() -> Dict[str, Any]:
--- a/zip_project_handler.py
+++ b/zip_project_handler.py
@ -9,10 +9,14 @@ import hashlib
 import zipfile
 import requests
 import tempfile
+import logging
 from typing import List, Optional
 from urllib.parse import urlparse
 from pathlib import Path

+# 配置日志
+logger = logging.getLogger('app')
+

 class ZipProjectHandler:
    """ZIP项目处理器"""
@ -56,7 +60,7 @@ class ZipProjectHandler:
            
            return True
        except Exception as e:
-            print(f"下载文件失败: {e}")
+            logger.error(f"下载文件失败: {e}")
            return False
    
    def _copy_local_file(self, local_path: str, target_path: str) -> bool:
@ -66,7 +70,7 @@ class ZipProjectHandler:
            shutil.copy2(local_path, target_path)
            return True
        except Exception as e:
-            print(f"复制本地文件失败: {e}")
+            logger.error(f"复制本地文件失败: {e}")
            return False
    
    def _extract_zip(self, zip_path: str, extract_to: str) -> bool:
@ -76,7 +80,7 @@ class ZipProjectHandler:
                zip_ref.extractall(extract_to)
            return True
        except Exception as e:
-            print(f"解压ZIP文件失败: {e}")
+            logger.error(f"解压ZIP文件失败: {e}")
            return False
    
    def get_project_from_zip(self, zip_url: str, unique_id: Optional[str] = None) -> Optional[str]:
@ -91,7 +95,7 @@ class ZipProjectHandler:
            Optional[str]: 成功时返回项目目录路径，失败时返回None
        """
        if not self._is_valid_url_or_path(zip_url):
-            print(f"无效的URL或路径: {zip_url}")
+            logger.error(f"无效的URL或路径: {zip_url}")
            return None
        
        # 使用unique_id作为目录名，如果没有则使用url_hash
@ -104,7 +108,7 @@ class ZipProjectHandler:
            cached_project_dir = self.projects_dir / project_dir_name
        
        if cached_project_dir.exists() and not unique_id:
-            print(f"使用缓存的项目目录: {cached_project_dir}")
+            logger.info(f"使用缓存的项目目录: {cached_project_dir}")
            return str(cached_project_dir)
        
        # 下载或复制ZIP文件
@ -125,24 +129,24 @@ class ZipProjectHandler:
                is_url = False
            
            if is_url:
-                print(f"下载ZIP文件: {zip_url}")
+                logger.info(f"下载ZIP文件: {zip_url}")
                if not self._download_file(zip_url, str(zip_path)):
                    return None
            else:
-                print(f"复制本地ZIP文件: {zip_url}")
+                logger.info(f"复制本地ZIP文件: {zip_url}")
                # 解析相对路径
                local_path = Path(zip_url).resolve()
                if not self._copy_local_file(str(local_path), str(zip_path)):
                    return None
        else:
-            print(f"使用缓存的ZIP文件: {zip_path}")
+            logger.info(f"使用缓存的ZIP文件: {zip_path}")
        
        # 解压到项目目录
-        print(f"解压ZIP文件到: {cached_project_dir}")
+        logger.info(f"解压ZIP文件到: {cached_project_dir}")
        if not self._extract_zip(str(zip_path), str(cached_project_dir)):
            return None
        
-        print(f"项目准备完成: {cached_project_dir}")
+        logger.info(f"项目准备完成: {cached_project_dir}")
        return str(cached_project_dir)
    
    def collect_document_files(self, project_dir: str) -> List[str]:
@ -159,7 +163,7 @@ class ZipProjectHandler:
        project_path = Path(project_dir)
        
        if not project_path.exists():
-            print(f"项目目录不存在: {project_dir}")
+            logger.error(f"项目目录不存在: {project_dir}")
            return document_files
        
        # 递归搜索所有 document.txt 文件
@ -167,11 +171,11 @@ class ZipProjectHandler:
            if file_path.is_file():
                document_files.append(str(file_path))
        
-        print(f"在项目目录 {project_dir} 中找到 {len(document_files)} 个 document.txt 文件")
+        logger.info(f"在项目目录 {project_dir} 中找到 {len(document_files)} 个 document.txt 文件")
        for file_path in document_files[:5]:  # 只打印前5个文件路径作为示例
-            print(f"  - {file_path}")
+            logger.info(f"  - {file_path}")
        if len(document_files) > 5:
-            print(f"  ... 还有 {len(document_files) - 5} 个文件")
+            logger.info(f"  ... 还有 {len(document_files) - 5} 个文件")
        
        return document_files
    
@ -182,9 +186,9 @@ class ZipProjectHandler:
            if self.cache_dir.exists():
                shutil.rmtree(self.cache_dir)
                self.cache_dir.mkdir(exist_ok=True)
-            print("缓存清理完成")
+            logger.info("缓存清理完成")
        except Exception as e:
-            print(f"清理缓存失败: {e}")
+            logger.error(f"清理缓存失败: {e}")


 # 全局ZIP项目处理器实例