187 lines
6.2 KiB
Python
187 lines
6.2 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
性能测试脚本 - 验证 RAG 文件预加载优化效果
|
||
"""
|
||
|
||
import time
|
||
import asyncio
|
||
from typing import List
|
||
|
||
from file_loaded_agent_manager import get_global_agent_manager
|
||
from zip_project_handler import zip_handler
|
||
from gbase_agent import init_agent_service_with_files
|
||
|
||
|
||
async def test_file_loading_performance():
|
||
"""测试文件加载性能"""
|
||
print("=== RAG 文件预加载性能测试 ===")
|
||
|
||
# 测试数据
|
||
test_zip_url = "https://example.com/test.zip" # 替换为实际的测试 URL
|
||
test_files = [
|
||
"./projects/7f2fdcb1bad17323/all_hp_product_spec_book2506/document.txt"
|
||
]
|
||
|
||
manager = get_global_agent_manager()
|
||
|
||
print(f"测试文件数量: {len(test_files)}")
|
||
|
||
# 测试首次创建(包含文件预加载)
|
||
print("\n1. 测试首次创建助手实例(包含文件预加载)...")
|
||
start_time = time.time()
|
||
|
||
agent1 = await manager.get_or_create_agent(
|
||
zip_url=test_zip_url,
|
||
files=test_files,
|
||
model_name="qwen3-next"
|
||
)
|
||
|
||
first_create_time = time.time() - start_time
|
||
print(f" 首次创建耗时: {first_create_time:.2f} 秒")
|
||
|
||
# 测试后续复用(无文件预加载)
|
||
print("\n2. 测试复用助手实例(无文件预加载)...")
|
||
start_time = time.time()
|
||
|
||
agent2 = await manager.get_or_create_agent(
|
||
zip_url=test_zip_url,
|
||
files=test_files,
|
||
model_name="qwen3-next"
|
||
)
|
||
|
||
reuse_time = time.time() - start_time
|
||
print(f" 复用实例耗时: {reuse_time:.2f} 秒")
|
||
|
||
# 验证是否为同一个实例
|
||
is_same_instance = agent1 is agent2
|
||
print(f" 是否为同一实例: {is_same_instance}")
|
||
|
||
# 计算性能提升
|
||
if reuse_time > 0:
|
||
speedup = first_create_time / reuse_time
|
||
print(f" 性能提升倍数: {speedup:.1f}x")
|
||
|
||
# 显示缓存统计
|
||
stats = manager.get_cache_stats()
|
||
print(f"\n3. 缓存统计:")
|
||
print(f" 缓存的实例数: {stats['total_cached_agents']}")
|
||
print(f" 最大缓存数: {stats['max_cached_agents']}")
|
||
|
||
if stats['agents']:
|
||
for cache_key, info in stats['agents'].items():
|
||
print(f" 实例 {cache_key}:")
|
||
print(f" 文件数: {info['file_count']}")
|
||
print(f" 创建时间: {info['created_at']:.2f}")
|
||
print(f" 最后访问: {info['last_accessed']:.2f}")
|
||
print(f" 空闲时间: {info['idle_seconds']} 秒")
|
||
|
||
return {
|
||
'first_create_time': first_create_time,
|
||
'reuse_time': reuse_time,
|
||
'speedup': speedup if reuse_time > 0 else 0,
|
||
'is_same_instance': is_same_instance,
|
||
'cached_instances': stats['total_cached_agents']
|
||
}
|
||
|
||
|
||
def test_file_collection():
|
||
"""测试文件收集功能"""
|
||
print("\n=== 文件收集功能测试 ===")
|
||
|
||
# 测试当前项目目录
|
||
if zip_handler.projects_dir.exists():
|
||
files = zip_handler.collect_document_files(str(zip_handler.projects_dir))
|
||
print(f"在 {zip_handler.projects_dir} 中找到 {len(files)} 个 document.txt 文件")
|
||
|
||
for i, file in enumerate(files[:5]): # 只显示前5个
|
||
print(f" {i+1}. {file}")
|
||
|
||
return len(files)
|
||
else:
|
||
print(f"项目目录 {zip_handler.projects_dir} 不存在")
|
||
return 0
|
||
|
||
|
||
async def test_comparison_with_old_method():
|
||
"""对比测试:传统方法 vs 优化方法"""
|
||
print("\n=== 传统方法 vs 优化方法对比 ===")
|
||
|
||
test_files = [
|
||
"./projects/7f2fdcb1bad17323/all_hp_product_spec_book2506/document.txt"
|
||
]
|
||
|
||
if not all(os.path.exists(f) for f in test_files):
|
||
print("测试文件不存在,跳过对比测试")
|
||
return
|
||
|
||
# 测试传统方法(每次创建新实例)
|
||
print("1. 传统方法 - 每次创建新实例并重新加载文件...")
|
||
start_time = time.time()
|
||
|
||
for i in range(3):
|
||
agent = init_agent_service_with_files(files=test_files)
|
||
print(f" 创建实例 {i+1} 完成")
|
||
|
||
traditional_time = time.time() - start_time
|
||
print(f" 传统方法总耗时: {traditional_time:.2f} 秒")
|
||
|
||
# 测试优化方法(复用缓存的实例)
|
||
print("\n2. 优化方法 - 复用缓存的实例...")
|
||
start_time = time.time()
|
||
|
||
manager = get_global_agent_manager()
|
||
test_url = "test://comparison"
|
||
|
||
for i in range(3):
|
||
agent = await manager.get_or_create_agent(
|
||
zip_url=f"{test_url}_{i}", # 使用不同的URL避免缓存
|
||
files=test_files,
|
||
model_name="qwen3-next"
|
||
)
|
||
print(f" 获取实例 {i+1} 完成")
|
||
|
||
optimized_time = time.time() - start_time
|
||
print(f" 优化方法总耗时: {optimized_time:.2f} 秒")
|
||
|
||
# 计算性能提升
|
||
if optimized_time > 0:
|
||
speedup = traditional_time / optimized_time
|
||
print(f"\n 性能提升: {speedup:.1f}x")
|
||
print(f" 时间节省: {traditional_time - optimized_time:.2f} 秒 ({((traditional_time - optimized_time) / traditional_time * 100):.1f}%)")
|
||
|
||
|
||
async def main():
|
||
"""主测试函数"""
|
||
print("开始 RAG 文件预加载优化测试...")
|
||
print("=" * 50)
|
||
|
||
try:
|
||
# 测试文件收集
|
||
file_count = test_file_collection()
|
||
|
||
if file_count > 0:
|
||
# 测试性能
|
||
performance_results = await test_file_loading_performance()
|
||
|
||
# 测试对比
|
||
await test_comparison_with_old_method()
|
||
|
||
print("\n" + "=" * 50)
|
||
print("测试总结:")
|
||
print(f"✓ 文件收集功能正常,找到 {file_count} 个文件")
|
||
print(f"✓ 助手实例缓存正常工作")
|
||
print(f"✓ 性能提升 {performance_results['speedup']:.1f}x")
|
||
print(f"✓ 实例复用功能正常: {performance_results['is_same_instance']}")
|
||
print("\n所有测试通过!RAG 文件预加载优化成功。")
|
||
else:
|
||
print("未找到测试文件,跳过性能测试")
|
||
|
||
except Exception as e:
|
||
print(f"\n测试过程中出现错误: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
|
||
|
||
if __name__ == "__main__":
|
||
import os
|
||
asyncio.run(main()) |