- Add ZipSlip path traversal protection (validate all file paths) - Add file size limits (50MB upload, 500MB extracted) - Add zip bomb protection (max 100:1 compression ratio, 1000 entries) - Add async I/O using aiofiles to avoid blocking event loop - Add bot_id validation to prevent path traversal attacks - Add proper error cleanup on upload failures Security improvements: - P1-001: ZipSlip path traversal防护 - P1-004: File size limits (50MB) - P1-005: Zip bomb防护 (compression ratio check) - P1-008: Async I/O improvements 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
414 lines
14 KiB
Python
414 lines
14 KiB
Python
import os
|
||
import re
|
||
import shutil
|
||
import zipfile
|
||
import logging
|
||
import asyncio
|
||
from typing import List, Optional
|
||
from fastapi import APIRouter, HTTPException, Query, UploadFile, File, Form
|
||
from pydantic import BaseModel
|
||
from utils.settings import SKILLS_DIR
|
||
import aiofiles
|
||
|
||
logger = logging.getLogger('app')
|
||
|
||
router = APIRouter()
|
||
|
||
|
||
class SkillItem(BaseModel):
|
||
name: str
|
||
description: str
|
||
user_skill: bool = False
|
||
|
||
|
||
class SkillListResponse(BaseModel):
|
||
skills: List[SkillItem]
|
||
total: int
|
||
|
||
|
||
# ============ 安全常量 ============
|
||
MAX_FILE_SIZE = 50 * 1024 * 1024 # 50MB 最大上传文件大小
|
||
MAX_UNCOMPRESSED_SIZE = 500 * 1024 * 1024 # 500MB 解压后最大大小
|
||
MAX_COMPRESSION_RATIO = 100 # 最大压缩比例 100:1
|
||
MAX_ZIP_ENTRIES = 1000 # zip 文件中最多文件数量
|
||
|
||
|
||
def validate_bot_id(bot_id: str) -> str:
|
||
"""验证 bot_id 格式,防止路径遍历攻击"""
|
||
if not bot_id:
|
||
raise HTTPException(status_code=400, detail="bot_id 不能为空")
|
||
|
||
# 检查路径遍历字符
|
||
if '..' in bot_id or '/' in bot_id or '\\' in bot_id:
|
||
raise HTTPException(status_code=400, detail="bot_id 包含非法字符")
|
||
|
||
# 验证 UUID 格式(可选,根据实际需求)
|
||
uuid_pattern = r'^[a-fA-F0-9-]{36}$'
|
||
if not re.match(uuid_pattern, bot_id):
|
||
logger.warning(f"bot_id 格式可能无效: {bot_id}")
|
||
|
||
return bot_id
|
||
|
||
|
||
async def validate_upload_file_size(file: UploadFile) -> int:
|
||
"""验证上传文件大小,返回实际文件大小"""
|
||
file_size = 0
|
||
chunk_size = 8192
|
||
|
||
# 保存当前位置以便后续重置
|
||
await file.seek(0)
|
||
|
||
while chunk := await file.read(chunk_size):
|
||
file_size += len(chunk)
|
||
if file_size > MAX_FILE_SIZE:
|
||
await file.seek(0) # 重置文件指针
|
||
raise HTTPException(
|
||
status_code=413,
|
||
detail=f"文件过大,最大允许 {MAX_FILE_SIZE // (1024*1024)}MB"
|
||
)
|
||
|
||
await file.seek(0) # 重置文件指针供后续使用
|
||
return file_size
|
||
|
||
|
||
async def safe_extract_zip(zip_path: str, extract_dir: str) -> None:
|
||
"""安全地解压 zip 文件,防止 ZipSlip 和 zip 炸弹攻击
|
||
|
||
Args:
|
||
zip_path: zip 文件路径
|
||
extract_dir: 解压目标目录
|
||
|
||
Raises:
|
||
HTTPException: 如果检测到恶意文件
|
||
"""
|
||
try:
|
||
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
||
# 检查文件数量
|
||
file_list = zip_ref.infolist()
|
||
if len(file_list) > MAX_ZIP_ENTRIES:
|
||
raise zipfile.BadZipFile(f"zip 文件包含过多文件: {len(file_list)}")
|
||
|
||
# 检查压缩比例和总大小
|
||
compressed_size = sum(z.file_size for z in file_list)
|
||
uncompressed_size = sum(z.compress_size for z in file_list)
|
||
|
||
if uncompressed_size > MAX_UNCOMPRESSED_SIZE:
|
||
raise zipfile.BadZipFile(
|
||
f"解压后大小 {uncompressed_size // (1024*1024)}MB 超过限制 "
|
||
f"{MAX_UNCOMPRESSED_SIZE // (1024*1024)}MB"
|
||
)
|
||
|
||
# 检查压缩比例(防止 zip 炸弹)
|
||
if compressed_size > 0:
|
||
ratio = uncompressed_size / compressed_size
|
||
if ratio > MAX_COMPRESSION_RATIO:
|
||
raise zipfile.BadZipFile(
|
||
f"压缩比例 {ratio:.1f}:1 超过限制 {MAX_COMPRESSION_RATIO}:1,"
|
||
f"可能是 zip 炸弹攻击"
|
||
)
|
||
|
||
# 规范化目标目录路径
|
||
extract_dir_real = os.path.realpath(extract_dir)
|
||
|
||
# 安全地解压每个文件
|
||
for zip_info in file_list:
|
||
# 检查路径遍历攻击
|
||
if '..' in zip_info.filename or zip_info.filename.startswith('/'):
|
||
raise zipfile.BadZipFile(
|
||
f"检测到路径遍历攻击: {zip_info.filename}"
|
||
)
|
||
|
||
# 构建完整的目标路径
|
||
target_path = os.path.realpath(os.path.join(extract_dir, zip_info.filename))
|
||
|
||
# 确保目标路径在解压目录内
|
||
if not target_path.startswith(extract_dir_real + os.sep):
|
||
if target_path != extract_dir_real: # 允许目录本身
|
||
raise zipfile.BadZipFile(
|
||
f"文件将被解压到目标目录之外: {zip_info.filename}"
|
||
)
|
||
|
||
# 检查符号链接
|
||
if zip_info.is_symlink():
|
||
raise zipfile.BadZipFile(
|
||
f"不允许符号链接: {zip_info.filename}"
|
||
)
|
||
|
||
# 解压文件(使用线程池避免阻塞)
|
||
await asyncio.to_thread(zip_ref.extract, zip_info, extract_dir)
|
||
|
||
except zipfile.BadZipFile as e:
|
||
raise HTTPException(status_code=400, detail=f"无效的 zip 文件: {str(e)}")
|
||
|
||
|
||
async def save_upload_file_async(file: UploadFile, destination: str) -> None:
|
||
"""异步保存上传文件到目标路径"""
|
||
async with aiofiles.open(destination, 'wb') as f:
|
||
chunk_size = 8192
|
||
while chunk := await file.read(chunk_size):
|
||
await f.write(chunk)
|
||
|
||
|
||
def parse_skill_frontmatter(skill_md_path: str) -> Optional[dict]:
|
||
"""Parse the YAML frontmatter from SKILL.md file
|
||
|
||
Args:
|
||
skill_md_path: Path to the SKILL.md file
|
||
|
||
Returns:
|
||
dict with 'name' and 'description' if found, None otherwise
|
||
"""
|
||
try:
|
||
with open(skill_md_path, 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
|
||
# Match YAML frontmatter between --- delimiters
|
||
frontmatter_match = re.match(r'^---\s*\n(.*?)\n---', content, re.DOTALL)
|
||
if not frontmatter_match:
|
||
logger.warning(f"No frontmatter found in {skill_md_path}")
|
||
return None
|
||
|
||
frontmatter = frontmatter_match.group(1)
|
||
metadata = {}
|
||
|
||
# Parse key: value pairs from frontmatter
|
||
for line in frontmatter.split('\n'):
|
||
line = line.strip()
|
||
if ':' in line:
|
||
key, value = line.split(':', 1)
|
||
metadata[key.strip()] = value.strip()
|
||
|
||
# Return name and description if both exist
|
||
if 'name' in metadata and 'description' in metadata:
|
||
return {
|
||
'name': metadata['name'],
|
||
'description': metadata['description']
|
||
}
|
||
|
||
logger.warning(f"Missing name or description in {skill_md_path}")
|
||
return None
|
||
|
||
except Exception as e:
|
||
logger.error(f"Error parsing {skill_md_path}: {e}")
|
||
return None
|
||
|
||
|
||
def get_official_skills(base_dir: str) -> List[SkillItem]:
|
||
"""Get all official skills from the skills directory
|
||
|
||
Args:
|
||
base_dir: Base directory of the project
|
||
|
||
Returns:
|
||
List of SkillItem objects
|
||
"""
|
||
skills = []
|
||
# Use SKILLS_DIR from settings, relative to base_dir
|
||
if os.path.isabs(SKILLS_DIR):
|
||
official_skills_dir = SKILLS_DIR
|
||
else:
|
||
official_skills_dir = os.path.join(base_dir, SKILLS_DIR)
|
||
|
||
if not os.path.exists(official_skills_dir):
|
||
logger.warning(f"Official skills directory not found: {official_skills_dir}")
|
||
return skills
|
||
|
||
for skill_name in os.listdir(official_skills_dir):
|
||
skill_path = os.path.join(official_skills_dir, skill_name)
|
||
if os.path.isdir(skill_path):
|
||
skill_md_path = os.path.join(skill_path, 'SKILL.md')
|
||
if os.path.exists(skill_md_path):
|
||
metadata = parse_skill_frontmatter(skill_md_path)
|
||
if metadata:
|
||
skills.append(SkillItem(
|
||
name=metadata['name'],
|
||
description=metadata['description'],
|
||
user_skill=False
|
||
))
|
||
logger.debug(f"Found official skill: {metadata['name']}")
|
||
|
||
return skills
|
||
|
||
|
||
def get_user_skills(base_dir: str, bot_id: str) -> List[SkillItem]:
|
||
"""Get all user uploaded skills for a specific bot
|
||
|
||
Args:
|
||
base_dir: Base directory of the project
|
||
bot_id: Bot ID to look up user skills for
|
||
|
||
Returns:
|
||
List of SkillItem objects
|
||
"""
|
||
skills = []
|
||
user_skills_dir = os.path.join(base_dir, 'projects', 'uploads', bot_id, 'skills')
|
||
|
||
if not os.path.exists(user_skills_dir):
|
||
logger.info(f"No user skills directory found for bot {bot_id}: {user_skills_dir}")
|
||
return skills
|
||
|
||
for skill_name in os.listdir(user_skills_dir):
|
||
skill_path = os.path.join(user_skills_dir, skill_name)
|
||
if os.path.isdir(skill_path):
|
||
skill_md_path = os.path.join(skill_path, 'SKILL.md')
|
||
if os.path.exists(skill_md_path):
|
||
metadata = parse_skill_frontmatter(skill_md_path)
|
||
if metadata:
|
||
skills.append(SkillItem(
|
||
name=metadata['name'],
|
||
description=metadata['description'],
|
||
user_skill=True
|
||
))
|
||
logger.debug(f"Found user skill: {metadata['name']}")
|
||
|
||
return skills
|
||
|
||
|
||
@router.get("/api/v1/skill/list", response_model=SkillListResponse)
|
||
async def list_skills(
|
||
bot_id: str = Query(..., description="Bot ID to fetch user skills for")
|
||
):
|
||
"""
|
||
Get list of all available skills (official + user uploaded)
|
||
|
||
Args:
|
||
bot_id: Bot ID to fetch user uploaded skills for
|
||
|
||
Returns:
|
||
SkillListResponse containing all skills
|
||
|
||
Notes:
|
||
- Official skills are read from the /skills directory
|
||
- User skills are read from /projects/uploads/{bot_id}/skills directory
|
||
- User skills are marked with user_skill: true
|
||
"""
|
||
try:
|
||
# Get the project base directory
|
||
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||
|
||
# Get official skills
|
||
official_skills = get_official_skills(base_dir)
|
||
|
||
# Get user skills for the specific bot
|
||
user_skills = get_user_skills(base_dir, bot_id)
|
||
|
||
# Combine both lists (user skills first)
|
||
all_skills = user_skills + official_skills
|
||
|
||
logger.info(f"Found {len(official_skills)} official skills and {len(user_skills)} user skills for bot {bot_id}")
|
||
|
||
return SkillListResponse(
|
||
skills=all_skills,
|
||
total=len(all_skills)
|
||
)
|
||
|
||
except Exception as e:
|
||
import traceback
|
||
error_details = traceback.format_exc()
|
||
logger.error(f"Error in list_skills: {str(e)}")
|
||
logger.error(f"Full traceback: {error_details}")
|
||
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
||
|
||
|
||
@router.post("/api/v1/skill/upload")
|
||
async def upload_skill(file: UploadFile = File(...), bot_id: Optional[str] = Form(None)):
|
||
"""
|
||
Skill文件上传API接口,上传zip文件到 ./projects/uploads/ 目录下并自动解压
|
||
|
||
安全改进:
|
||
- P1-001: ZipSlip 路径遍历防护 - 检查每个文件的解压路径
|
||
- P1-004: 文件大小限制 - 最大 50MB
|
||
- P1-005: Zip 炸弹防护 - 检查压缩比例(最大 100:1)和解压后大小(最大 500MB)
|
||
- P1-008: 异步 I/O - 使用 aiofiles 和 asyncio.to_thread
|
||
|
||
Args:
|
||
file: 上传的zip文件
|
||
bot_id: Bot ID,用于创建用户专属的skills目录
|
||
|
||
Returns:
|
||
dict: 包含文件路径、解压信息的响应
|
||
|
||
Notes:
|
||
- 仅支持.zip格式的skill文件
|
||
- 上传后会自动解压到 projects/uploads/{bot_id}/skills/{skill_name}/ 目录
|
||
- 文件大小限制: 50MB
|
||
- 解压后大小限制: 500MB
|
||
"""
|
||
file_path = None # 初始化以便在异常处理中使用
|
||
|
||
try:
|
||
# 验证 bot_id (P1-006 路径遍历防护)
|
||
if not bot_id:
|
||
raise HTTPException(status_code=400, detail="bot_id 不能为空")
|
||
bot_id = validate_bot_id(bot_id)
|
||
|
||
# 验证文件名
|
||
if not file.filename:
|
||
raise HTTPException(status_code=400, detail="文件名不能为空")
|
||
|
||
logger.info(f"Skill upload - bot_id: {bot_id}, filename: {file.filename}")
|
||
|
||
# 验证是否为zip文件
|
||
original_filename = file.filename
|
||
name_without_ext, file_extension = os.path.splitext(original_filename)
|
||
|
||
if file_extension.lower() != '.zip':
|
||
raise HTTPException(status_code=400, detail="仅支持上传.zip格式的skill文件")
|
||
|
||
# P1-004: 验证文件大小(异步读取,不阻塞事件循环)
|
||
file_size = await validate_upload_file_size(file)
|
||
logger.info(f"File size: {file_size // 1024}KB")
|
||
|
||
folder_name = name_without_ext
|
||
|
||
# 创建上传目录
|
||
upload_dir = os.path.join("projects", "uploads", bot_id, "skill_zip")
|
||
extract_target = os.path.join("projects", "uploads", bot_id, "skills", folder_name)
|
||
|
||
# 使用线程池避免阻塞
|
||
await asyncio.to_thread(os.makedirs, extract_target, exist_ok=True)
|
||
await asyncio.to_thread(os.makedirs, upload_dir, exist_ok=True)
|
||
|
||
# 保存zip文件路径
|
||
file_path = os.path.join(upload_dir, original_filename)
|
||
|
||
# P1-008: 异步保存文件(使用 aiofiles,不阻塞事件循环)
|
||
await save_upload_file_async(file, file_path)
|
||
logger.info(f"Saved zip file: {file_path}")
|
||
|
||
# P1-001, P1-005: 安全解压(防止 ZipSlip 和 zip 炸弹)
|
||
await safe_extract_zip(file_path, extract_target)
|
||
logger.info(f"Extracted to: {extract_target}")
|
||
|
||
return {
|
||
"success": True,
|
||
"message": f"Skill文件上传并解压成功",
|
||
"file_path": file_path,
|
||
"extract_path": extract_target,
|
||
"original_filename": original_filename,
|
||
"skill_name": folder_name
|
||
}
|
||
|
||
except HTTPException:
|
||
# 清理已上传的文件
|
||
if file_path and os.path.exists(file_path):
|
||
try:
|
||
await asyncio.to_thread(os.remove, file_path)
|
||
logger.info(f"Cleaned up file: {file_path}")
|
||
except Exception as cleanup_error:
|
||
logger.error(f"Failed to cleanup file: {cleanup_error}")
|
||
raise
|
||
|
||
except Exception as e:
|
||
# 清理已上传的文件
|
||
if file_path and os.path.exists(file_path):
|
||
try:
|
||
await asyncio.to_thread(os.remove, file_path)
|
||
logger.info(f"Cleaned up file: {file_path}")
|
||
except Exception as cleanup_error:
|
||
logger.error(f"Failed to cleanup file: {cleanup_error}")
|
||
|
||
logger.error(f"Error uploading skill file: {str(e)}")
|
||
# 不暴露详细错误信息给客户端(安全考虑)
|
||
raise HTTPException(status_code=500, detail="Skill文件上传失败")
|