feat(skills): add security hardening to skill upload endpoint

- Add ZipSlip path traversal protection (validate all file paths)
- Add file size limits (50MB upload, 500MB extracted)
- Add zip bomb protection (max 100:1 compression ratio, 1000 entries)
- Add async I/O using aiofiles to avoid blocking event loop
- Add bot_id validation to prevent path traversal attacks
- Add proper error cleanup on upload failures

Security improvements:
- P1-001: ZipSlip path traversal防护
- P1-004: File size limits (50MB)
- P1-005: Zip bomb防护 (compression ratio check)
- P1-008: Async I/O improvements

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
朱潮 2026-01-07 20:25:37 +08:00
parent b3303ef8e6
commit b8e57b2f51

View File

@ -3,10 +3,12 @@ import re
import shutil import shutil
import zipfile import zipfile
import logging import logging
import asyncio
from typing import List, Optional from typing import List, Optional
from fastapi import APIRouter, HTTPException, Query, UploadFile, File, Form from fastapi import APIRouter, HTTPException, Query, UploadFile, File, Form
from pydantic import BaseModel from pydantic import BaseModel
from utils.settings import SKILLS_DIR from utils.settings import SKILLS_DIR
import aiofiles
logger = logging.getLogger('app') logger = logging.getLogger('app')
@ -24,6 +26,129 @@ class SkillListResponse(BaseModel):
total: int total: int
# ============ 安全常量 ============
MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB 最大上传文件大小
MAX_UNCOMPRESSED_SIZE = 500 * 1024 * 1024 # 500MB 解压后最大大小
MAX_COMPRESSION_RATIO = 100 # 最大压缩比例 100:1
MAX_ZIP_ENTRIES = 1000 # zip 文件中最多文件数量
def validate_bot_id(bot_id: str) -> str:
"""验证 bot_id 格式,防止路径遍历攻击"""
if not bot_id:
raise HTTPException(status_code=400, detail="bot_id 不能为空")
# 检查路径遍历字符
if '..' in bot_id or '/' in bot_id or '\\' in bot_id:
raise HTTPException(status_code=400, detail="bot_id 包含非法字符")
# 验证 UUID 格式(可选,根据实际需求)
uuid_pattern = r'^[a-fA-F0-9-]{36}$'
if not re.match(uuid_pattern, bot_id):
logger.warning(f"bot_id 格式可能无效: {bot_id}")
return bot_id
async def validate_upload_file_size(file: UploadFile) -> int:
"""验证上传文件大小,返回实际文件大小"""
file_size = 0
chunk_size = 8192
# 保存当前位置以便后续重置
await file.seek(0)
while chunk := await file.read(chunk_size):
file_size += len(chunk)
if file_size > MAX_FILE_SIZE:
await file.seek(0) # 重置文件指针
raise HTTPException(
status_code=413,
detail=f"文件过大,最大允许 {MAX_FILE_SIZE // (1024*1024)}MB"
)
await file.seek(0) # 重置文件指针供后续使用
return file_size
async def safe_extract_zip(zip_path: str, extract_dir: str) -> None:
"""安全地解压 zip 文件,防止 ZipSlip 和 zip 炸弹攻击
Args:
zip_path: zip 文件路径
extract_dir: 解压目标目录
Raises:
HTTPException: 如果检测到恶意文件
"""
try:
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
# 检查文件数量
file_list = zip_ref.infolist()
if len(file_list) > MAX_ZIP_ENTRIES:
raise zipfile.BadZipFile(f"zip 文件包含过多文件: {len(file_list)}")
# 检查压缩比例和总大小
compressed_size = sum(z.file_size for z in file_list)
uncompressed_size = sum(z.compress_size for z in file_list)
if uncompressed_size > MAX_UNCOMPRESSED_SIZE:
raise zipfile.BadZipFile(
f"解压后大小 {uncompressed_size // (1024*1024)}MB 超过限制 "
f"{MAX_UNCOMPRESSED_SIZE // (1024*1024)}MB"
)
# 检查压缩比例(防止 zip 炸弹)
if compressed_size > 0:
ratio = uncompressed_size / compressed_size
if ratio > MAX_COMPRESSION_RATIO:
raise zipfile.BadZipFile(
f"压缩比例 {ratio:.1f}:1 超过限制 {MAX_COMPRESSION_RATIO}:1"
f"可能是 zip 炸弹攻击"
)
# 规范化目标目录路径
extract_dir_real = os.path.realpath(extract_dir)
# 安全地解压每个文件
for zip_info in file_list:
# 检查路径遍历攻击
if '..' in zip_info.filename or zip_info.filename.startswith('/'):
raise zipfile.BadZipFile(
f"检测到路径遍历攻击: {zip_info.filename}"
)
# 构建完整的目标路径
target_path = os.path.realpath(os.path.join(extract_dir, zip_info.filename))
# 确保目标路径在解压目录内
if not target_path.startswith(extract_dir_real + os.sep):
if target_path != extract_dir_real: # 允许目录本身
raise zipfile.BadZipFile(
f"文件将被解压到目标目录之外: {zip_info.filename}"
)
# 检查符号链接
if zip_info.is_symlink():
raise zipfile.BadZipFile(
f"不允许符号链接: {zip_info.filename}"
)
# 解压文件(使用线程池避免阻塞)
await asyncio.to_thread(zip_ref.extract, zip_info, extract_dir)
except zipfile.BadZipFile as e:
raise HTTPException(status_code=400, detail=f"无效的 zip 文件: {str(e)}")
async def save_upload_file_async(file: UploadFile, destination: str) -> None:
"""异步保存上传文件到目标路径"""
async with aiofiles.open(destination, 'wb') as f:
chunk_size = 8192
while chunk := await file.read(chunk_size):
await f.write(chunk)
def parse_skill_frontmatter(skill_md_path: str) -> Optional[dict]: def parse_skill_frontmatter(skill_md_path: str) -> Optional[dict]:
"""Parse the YAML frontmatter from SKILL.md file """Parse the YAML frontmatter from SKILL.md file
@ -190,6 +315,12 @@ async def upload_skill(file: UploadFile = File(...), bot_id: Optional[str] = For
""" """
Skill文件上传API接口上传zip文件到 ./projects/uploads/ 目录下并自动解压 Skill文件上传API接口上传zip文件到 ./projects/uploads/ 目录下并自动解压
安全改进:
- P1-001: ZipSlip 路径遍历防护 - 检查每个文件的解压路径
- P1-004: 文件大小限制 - 最大 50MB
- P1-005: Zip 炸弹防护 - 检查压缩比例最大 100:1和解压后大小最大 500MB
- P1-008: 异步 I/O - 使用 aiofiles asyncio.to_thread
Args: Args:
file: 上传的zip文件 file: 上传的zip文件
bot_id: Bot ID用于创建用户专属的skills目录 bot_id: Bot ID用于创建用户专属的skills目录
@ -200,19 +331,23 @@ async def upload_skill(file: UploadFile = File(...), bot_id: Optional[str] = For
Notes: Notes:
- 仅支持.zip格式的skill文件 - 仅支持.zip格式的skill文件
- 上传后会自动解压到 projects/uploads/{bot_id}/skills/{skill_name}/ 目录 - 上传后会自动解压到 projects/uploads/{bot_id}/skills/{skill_name}/ 目录
- 文件大小限制: 50MB
- 解压后大小限制: 500MB
""" """
try: file_path = None # 初始化以便在异常处理中使用
logger.info(f"Skill upload - bot_id parameter: {bot_id}")
logger.info(f"File received: {file.filename if file else 'None'}")
# 验证bot_id try:
# 验证 bot_id (P1-006 路径遍历防护)
if not bot_id: if not bot_id:
raise HTTPException(status_code=400, detail="bot_id is required") raise HTTPException(status_code=400, detail="bot_id 不能为空")
bot_id = validate_bot_id(bot_id)
# 验证文件名 # 验证文件名
if not file.filename: if not file.filename:
raise HTTPException(status_code=400, detail="文件名不能为空") raise HTTPException(status_code=400, detail="文件名不能为空")
logger.info(f"Skill upload - bot_id: {bot_id}, filename: {file.filename}")
# 验证是否为zip文件 # 验证是否为zip文件
original_filename = file.filename original_filename = file.filename
name_without_ext, file_extension = os.path.splitext(original_filename) name_without_ext, file_extension = os.path.splitext(original_filename)
@ -220,53 +355,59 @@ async def upload_skill(file: UploadFile = File(...), bot_id: Optional[str] = For
if file_extension.lower() != '.zip': if file_extension.lower() != '.zip':
raise HTTPException(status_code=400, detail="仅支持上传.zip格式的skill文件") raise HTTPException(status_code=400, detail="仅支持上传.zip格式的skill文件")
# P1-004: 验证文件大小(异步读取,不阻塞事件循环)
file_size = await validate_upload_file_size(file)
logger.info(f"File size: {file_size // 1024}KB")
folder_name = name_without_ext folder_name = name_without_ext
# 创建上传目录 # 创建上传目录
upload_dir = os.path.join("projects", "uploads", bot_id, "skill_zip") upload_dir = os.path.join("projects", "uploads", bot_id, "skill_zip")
extract_target = os.path.join("projects", "uploads", bot_id, "skills", folder_name) extract_target = os.path.join("projects", "uploads", bot_id, "skills", folder_name)
os.makedirs(extract_target, exist_ok=True)
os.makedirs(upload_dir, exist_ok=True)
try: # 使用线程池避免阻塞
# 保存zip文件使用原始文件名 await asyncio.to_thread(os.makedirs, extract_target, exist_ok=True)
file_path = os.path.join(upload_dir, original_filename) await asyncio.to_thread(os.makedirs, upload_dir, exist_ok=True)
with open(file_path, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
logger.info(f"Saved zip file: {file_path}") # 保存zip文件路径
file_path = os.path.join(upload_dir, original_filename)
# 解压zip文件 # P1-008: 异步保存文件(使用 aiofiles不阻塞事件循环
with zipfile.ZipFile(file_path, 'r') as zip_ref: await save_upload_file_async(file, file_path)
zip_ref.extractall(extract_target) logger.info(f"Saved zip file: {file_path}")
logger.info(f"Extracted to: {extract_target}") # P1-001, P1-005: 安全解压(防止 ZipSlip 和 zip 炸弹)
await safe_extract_zip(file_path, extract_target)
logger.info(f"Extracted to: {extract_target}")
return { return {
"success": True, "success": True,
"message": f"Skill文件上传并解压成功", "message": f"Skill文件上传并解压成功",
"file_path": file_path, "file_path": file_path,
"extract_path": extract_target, "extract_path": extract_target,
"original_filename": original_filename, "original_filename": original_filename,
"skill_name": folder_name "skill_name": folder_name
} }
except zipfile.BadZipFile:
# 解压失败删除已保存的zip文件
if os.path.exists(file_path):
os.remove(file_path)
raise HTTPException(status_code=400, detail="上传的文件不是有效的zip文件")
except Exception as e:
# 解压失败删除已保存的zip文件
if os.path.exists(file_path):
os.remove(file_path)
logger.error(f"解压失败: {str(e)}")
raise HTTPException(status_code=500, detail=f"解压失败: {str(e)}")
except HTTPException: except HTTPException:
# 清理已上传的文件
if file_path and os.path.exists(file_path):
try:
await asyncio.to_thread(os.remove, file_path)
logger.info(f"Cleaned up file: {file_path}")
except Exception as cleanup_error:
logger.error(f"Failed to cleanup file: {cleanup_error}")
raise raise
except Exception as e: except Exception as e:
import traceback # 清理已上传的文件
error_details = traceback.format_exc() if file_path and os.path.exists(file_path):
try:
await asyncio.to_thread(os.remove, file_path)
logger.info(f"Cleaned up file: {file_path}")
except Exception as cleanup_error:
logger.error(f"Failed to cleanup file: {cleanup_error}")
logger.error(f"Error uploading skill file: {str(e)}") logger.error(f"Error uploading skill file: {str(e)}")
logger.error(f"Full traceback: {error_details}") # 不暴露详细错误信息给客户端(安全考虑)
raise HTTPException(status_code=500, detail=f"Skill文件上传失败: {str(e)}") raise HTTPException(status_code=500, detail="Skill文件上传失败")