feat(skills): add security hardening to skill upload endpoint

- Add ZipSlip path traversal protection (validate all file paths) - Add file size limits (50MB upload, 500MB extracted) - Add zip bomb protection (max 100:1 compression ratio, 1000 entries) - Add async I/O using aiofiles to avoid blocking event loop - Add bot_id validation to prevent path traversal attacks - Add proper error cleanup on upload failures Security improvements: - P1-001: ZipSlip path traversal防护 - P1-004: File size limits (50MB) - P1-005: Zip bomb防护 (compression ratio check) - P1-008: Async I/O improvements 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2026-01-07 20:25:37 +08:00 · 2026-01-07 20:25:37 +08:00 · b8e57b2f51
commit b8e57b2f51
parent b3303ef8e6
1 changed files with 182 additions and 41 deletions
--- a/routes/skill_manager.py
+++ b/routes/skill_manager.py
@ -3,10 +3,12 @@ import re
 import shutil
 import zipfile
 import logging
 import asyncio
 from typing import List, Optional
 from fastapi import APIRouter, HTTPException, Query, UploadFile, File, Form
 from pydantic import BaseModel
 from utils.settings import SKILLS_DIR
 import aiofiles
 logger = logging.getLogger('app')
@ -24,6 +26,129 @@ class SkillListResponse(BaseModel):
    total: int
 # ============ 安全常量 ============
 MAX_FILE_SIZE = 50 * 1024 * 1024  # 50MB 最大上传文件大小
 MAX_UNCOMPRESSED_SIZE = 500 * 1024 * 1024  # 500MB 解压后最大大小
 MAX_COMPRESSION_RATIO = 100  # 最大压缩比例 100:1
 MAX_ZIP_ENTRIES = 1000  # zip 文件中最多文件数量
 def validate_bot_id(bot_id: str) -> str:
    """验证 bot_id 格式，防止路径遍历攻击"""
    if not bot_id:
        raise HTTPException(status_code=400, detail="bot_id 不能为空")
    # 检查路径遍历字符
    if '..' in bot_id or '/' in bot_id or '\\' in bot_id:
        raise HTTPException(status_code=400, detail="bot_id 包含非法字符")
    # 验证 UUID 格式（可选，根据实际需求）
    uuid_pattern = r'^[a-fA-F0-9-]{36}$'
    if not re.match(uuid_pattern, bot_id):
        logger.warning(f"bot_id 格式可能无效: {bot_id}")
    return bot_id
 async def validate_upload_file_size(file: UploadFile) -> int:
    """验证上传文件大小，返回实际文件大小"""
    file_size = 0
    chunk_size = 8192
    # 保存当前位置以便后续重置
    await file.seek(0)
    while chunk := await file.read(chunk_size):
        file_size += len(chunk)
        if file_size > MAX_FILE_SIZE:
            await file.seek(0)  # 重置文件指针
            raise HTTPException(
                status_code=413,
                detail=f"文件过大，最大允许 {MAX_FILE_SIZE // (1024*1024)}MB"
            )
    await file.seek(0)  # 重置文件指针供后续使用
    return file_size
 async def safe_extract_zip(zip_path: str, extract_dir: str) -> None:
    """安全地解压 zip 文件，防止 ZipSlip 和 zip 炸弹攻击
    Args:
        zip_path: zip 文件路径
        extract_dir: 解压目标目录
    Raises:
        HTTPException: 如果检测到恶意文件
    """
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            # 检查文件数量
            file_list = zip_ref.infolist()
            if len(file_list) > MAX_ZIP_ENTRIES:
                raise zipfile.BadZipFile(f"zip 文件包含过多文件: {len(file_list)}")
            # 检查压缩比例和总大小
            compressed_size = sum(z.file_size for z in file_list)
            uncompressed_size = sum(z.compress_size for z in file_list)
            if uncompressed_size > MAX_UNCOMPRESSED_SIZE:
                raise zipfile.BadZipFile(
                    f"解压后大小 {uncompressed_size // (1024*1024)}MB 超过限制 "
                    f"{MAX_UNCOMPRESSED_SIZE // (1024*1024)}MB"
                )
            # 检查压缩比例（防止 zip 炸弹）
            if compressed_size > 0:
                ratio = uncompressed_size / compressed_size
                if ratio > MAX_COMPRESSION_RATIO:
                    raise zipfile.BadZipFile(
                        f"压缩比例 {ratio:.1f}:1 超过限制 {MAX_COMPRESSION_RATIO}:1，"
                        f"可能是 zip 炸弹攻击"
                    )
            # 规范化目标目录路径
            extract_dir_real = os.path.realpath(extract_dir)
            # 安全地解压每个文件
            for zip_info in file_list:
                # 检查路径遍历攻击
                if '..' in zip_info.filename or zip_info.filename.startswith('/'):
                    raise zipfile.BadZipFile(
                        f"检测到路径遍历攻击: {zip_info.filename}"
                    )
                # 构建完整的目标路径
                target_path = os.path.realpath(os.path.join(extract_dir, zip_info.filename))
                # 确保目标路径在解压目录内
                if not target_path.startswith(extract_dir_real + os.sep):
                    if target_path != extract_dir_real:  # 允许目录本身
                        raise zipfile.BadZipFile(
                            f"文件将被解压到目标目录之外: {zip_info.filename}"
                        )
                # 检查符号链接
                if zip_info.is_symlink():
                    raise zipfile.BadZipFile(
                        f"不允许符号链接: {zip_info.filename}"
                    )
                # 解压文件（使用线程池避免阻塞）
                await asyncio.to_thread(zip_ref.extract, zip_info, extract_dir)
    except zipfile.BadZipFile as e:
        raise HTTPException(status_code=400, detail=f"无效的 zip 文件: {str(e)}")
 async def save_upload_file_async(file: UploadFile, destination: str) -> None:
    """异步保存上传文件到目标路径"""
    async with aiofiles.open(destination, 'wb') as f:
        chunk_size = 8192
        while chunk := await file.read(chunk_size):
            await f.write(chunk)
 def parse_skill_frontmatter(skill_md_path: str) -> Optional[dict]:
    """Parse the YAML frontmatter from SKILL.md file
@ -190,6 +315,12 @@ async def upload_skill(file: UploadFile = File(...), bot_id: Optional[str] = For
    """
    Skill文件上传API接口，上传zip文件到 ./projects/uploads/ 目录下并自动解压
    安全改进:
        - P1-001: ZipSlip 路径遍历防护 - 检查每个文件的解压路径
        - P1-004: 文件大小限制 - 最大 50MB
        - P1-005: Zip 炸弹防护 - 检查压缩比例（最大 100:1）和解压后大小（最大 500MB）
        - P1-008: 异步 I/O - 使用 aiofiles 和 asyncio.to_thread
    Args:
        file: 上传的zip文件
        bot_id: Bot ID，用于创建用户专属的skills目录
@ -200,19 +331,23 @@ async def upload_skill(file: UploadFile = File(...), bot_id: Optional[str] = For
    Notes:
        - 仅支持.zip格式的skill文件
        - 上传后会自动解压到 projects/uploads/{bot_id}/skills/{skill_name}/ 目录
        - 文件大小限制: 50MB
        - 解压后大小限制: 500MB
    """
-    try:
+    file_path = None  # 初始化以便在异常处理中使用
        logger.info(f"Skill upload - bot_id parameter: {bot_id}")
        logger.info(f"File received: {file.filename if file else 'None'}")
-        # 验证bot_id
+    try:
        # 验证 bot_id (P1-006 路径遍历防护)
        if not bot_id:
-            raise HTTPException(status_code=400, detail="bot_id is required")
+            raise HTTPException(status_code=400, detail="bot_id 不能为空")
        bot_id = validate_bot_id(bot_id)
        # 验证文件名
        if not file.filename:
            raise HTTPException(status_code=400, detail="文件名不能为空")
        logger.info(f"Skill upload - bot_id: {bot_id}, filename: {file.filename}")
        # 验证是否为zip文件
        original_filename = file.filename
        name_without_ext, file_extension = os.path.splitext(original_filename)
@ -220,53 +355,59 @@ async def upload_skill(file: UploadFile = File(...), bot_id: Optional[str] = For
        if file_extension.lower() != '.zip':
            raise HTTPException(status_code=400, detail="仅支持上传.zip格式的skill文件")
        # P1-004: 验证文件大小（异步读取，不阻塞事件循环）
        file_size = await validate_upload_file_size(file)
        logger.info(f"File size: {file_size // 1024}KB")
        folder_name = name_without_ext
        # 创建上传目录
        upload_dir = os.path.join("projects", "uploads", bot_id, "skill_zip")
        extract_target = os.path.join("projects", "uploads", bot_id, "skills", folder_name)
        os.makedirs(extract_target, exist_ok=True)
        os.makedirs(upload_dir, exist_ok=True)
-        try:
+        # 使用线程池避免阻塞
-            # 保存zip文件（使用原始文件名）
+        await asyncio.to_thread(os.makedirs, extract_target, exist_ok=True)
-            file_path = os.path.join(upload_dir, original_filename)
+        await asyncio.to_thread(os.makedirs, upload_dir, exist_ok=True)
            with open(file_path, "wb") as buffer:
                shutil.copyfileobj(file.file, buffer)
-            logger.info(f"Saved zip file: {file_path}")
+        # 保存zip文件路径
        file_path = os.path.join(upload_dir, original_filename)
-            # 解压zip文件
+        # P1-008: 异步保存文件（使用 aiofiles，不阻塞事件循环）
-            with zipfile.ZipFile(file_path, 'r') as zip_ref:
+        await save_upload_file_async(file, file_path)
-                zip_ref.extractall(extract_target)
+        logger.info(f"Saved zip file: {file_path}")
-            logger.info(f"Extracted to: {extract_target}")
+        # P1-001, P1-005: 安全解压（防止 ZipSlip 和 zip 炸弹）
        await safe_extract_zip(file_path, extract_target)
        logger.info(f"Extracted to: {extract_target}")
-            return {
+        return {
-                "success": True,
+            "success": True,
-                "message": f"Skill文件上传并解压成功",
+            "message": f"Skill文件上传并解压成功",
-                "file_path": file_path,
+            "file_path": file_path,
-                "extract_path": extract_target,
+            "extract_path": extract_target,
-                "original_filename": original_filename,
+            "original_filename": original_filename,
-                "skill_name": folder_name
+            "skill_name": folder_name
-            }
+        }
        except zipfile.BadZipFile:
            # 解压失败，删除已保存的zip文件
            if os.path.exists(file_path):
                os.remove(file_path)
            raise HTTPException(status_code=400, detail="上传的文件不是有效的zip文件")
        except Exception as e:
            # 解压失败，删除已保存的zip文件
            if os.path.exists(file_path):
                os.remove(file_path)
            logger.error(f"解压失败: {str(e)}")
            raise HTTPException(status_code=500, detail=f"解压失败: {str(e)}")
    except HTTPException:
        # 清理已上传的文件
        if file_path and os.path.exists(file_path):
            try:
                await asyncio.to_thread(os.remove, file_path)
                logger.info(f"Cleaned up file: {file_path}")
            except Exception as cleanup_error:
                logger.error(f"Failed to cleanup file: {cleanup_error}")
        raise
    except Exception as e:
-        import traceback
+        # 清理已上传的文件
-        error_details = traceback.format_exc()
+        if file_path and os.path.exists(file_path):
            try:
                await asyncio.to_thread(os.remove, file_path)
                logger.info(f"Cleaned up file: {file_path}")
            except Exception as cleanup_error:
                logger.error(f"Failed to cleanup file: {cleanup_error}")
        logger.error(f"Error uploading skill file: {str(e)}")
-        logger.error(f"Full traceback: {error_details}")
+        # 不暴露详细错误信息给客户端（安全考虑）
-        raise HTTPException(status_code=500, detail=f"Skill文件上传失败: {str(e)}")
+        raise HTTPException(status_code=500, detail="Skill文件上传失败")