205 lines
8.0 KiB
Python
205 lines
8.0 KiB
Python
"""Daytona sandbox 双向文件同步工具。"""
|
||
|
||
import io
|
||
import logging
|
||
import subprocess
|
||
import tarfile
|
||
import time
|
||
from pathlib import Path
|
||
|
||
from utils.settings import DAYTONA_API_KEY, DAYTONA_SERVER_URL, DAYTONA_ENABLED
|
||
|
||
logger = logging.getLogger('app')
|
||
|
||
|
||
def _list_local_changed_files(workspace_path: Path) -> tuple[bool, list[str]]:
|
||
"""返回是否需要首次同步,以及本地增量变更文件列表。"""
|
||
marker_local = workspace_path / ".last_sync"
|
||
if not marker_local.exists():
|
||
return True, []
|
||
|
||
result = subprocess.run(
|
||
[
|
||
"find", str(workspace_path), "-newer", str(marker_local), "-type", "f",
|
||
"-not", "-name", ".last_sync", "-not", "-name", ".DS_Store",
|
||
],
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=30,
|
||
)
|
||
changed_files = [f for f in result.stdout.strip().split('\n') if f]
|
||
return False, changed_files
|
||
|
||
|
||
def init_daytona_sandbox(bot_id: str, local_workspace_root: str):
|
||
"""初始化 Daytona sandbox,失败时回退到本地模式。"""
|
||
sandbox = None
|
||
sandbox_type = None
|
||
workspace_root = local_workspace_root
|
||
|
||
if not (DAYTONA_ENABLED and DAYTONA_API_KEY and DAYTONA_SERVER_URL):
|
||
return sandbox, sandbox_type, workspace_root
|
||
|
||
try:
|
||
from daytona import Daytona, DaytonaConfig, VolumeMount, CreateSandboxFromSnapshotParams
|
||
from langchain_daytona import DaytonaSandbox
|
||
|
||
start_time = time.time()
|
||
daytona_config = DaytonaConfig(
|
||
api_key=DAYTONA_API_KEY,
|
||
api_url=DAYTONA_SERVER_URL,
|
||
)
|
||
daytona_client = Daytona(daytona_config)
|
||
|
||
sandbox_name = f"bot-{bot_id}"
|
||
sandbox_instance = None
|
||
created_new_sandbox = False
|
||
try:
|
||
existing = daytona_client.get(sandbox_name)
|
||
if existing.state in ("Started", "Creating"):
|
||
sandbox_instance = existing
|
||
logger.info(f"Reusing existing sandbox: {sandbox_instance.id} (state={existing.state})")
|
||
else:
|
||
existing.start()
|
||
sandbox_instance = existing
|
||
logger.info(f"Restarted existing sandbox: {sandbox_instance.id}")
|
||
except Exception:
|
||
volume_name = f"bot-{bot_id}"
|
||
volume = daytona_client.volume.get(volume_name, create=True)
|
||
|
||
for _ in range(30):
|
||
volume = daytona_client.volume.get(volume_name)
|
||
if "READY" in str(volume.state).upper():
|
||
break
|
||
time.sleep(1)
|
||
else:
|
||
raise RuntimeError(f"Volume {volume_name} not ready after 30s, state: {volume.state}")
|
||
|
||
sandbox_params = CreateSandboxFromSnapshotParams(
|
||
name=sandbox_name,
|
||
volumes=[VolumeMount(volume_id=volume.id, mount_path="/workspace")],
|
||
env_vars={"BASH_ENV": "/home/daytona/.bash_env"},
|
||
)
|
||
sandbox_instance = daytona_client.create(sandbox_params)
|
||
created_new_sandbox = True
|
||
logger.info(f"Created new sandbox: {sandbox_instance.id}, volume: {volume.id}")
|
||
|
||
logger.info(f"daytona get/start done, elapsed: {time.time() - start_time:.3f}s")
|
||
|
||
sandbox = DaytonaSandbox(sandbox=sandbox_instance)
|
||
sandbox_type = "daytona"
|
||
workspace_root = "/workspace"
|
||
|
||
sync_workspace_to_sandbox(sandbox, local_workspace_root)
|
||
logger.info(f"daytona sync done, elapsed: {time.time() - start_time:.3f}s")
|
||
|
||
if created_new_sandbox:
|
||
sandbox.execute("test -f /home/daytona/.bash_env || echo 'cd /workspace' > /home/daytona/.bash_env")
|
||
logger.info(f"daytona bash_env done, elapsed: {time.time() - start_time:.3f}s")
|
||
except Exception as e:
|
||
logger.error(f"Failed to create Daytona sandbox: {e}, falling back to local mode")
|
||
sandbox = None
|
||
sandbox_type = None
|
||
workspace_root = local_workspace_root
|
||
|
||
return sandbox, sandbox_type, workspace_root
|
||
|
||
|
||
def sync_workspace_to_sandbox(sandbox, workspace_root: str) -> None:
|
||
"""增量同步本地 workspace 到 Daytona sandbox。
|
||
|
||
基于 .last_sync 时间戳标记:
|
||
- 首次(无标记文件):全量同步
|
||
- 后续:只同步比标记更新的文件
|
||
|
||
Args:
|
||
sandbox: DaytonaSandbox 实例
|
||
workspace_root: 本地 workspace 目录路径
|
||
"""
|
||
workspace_path = Path(workspace_root)
|
||
if not workspace_path.exists() or not any(workspace_path.iterdir()):
|
||
return
|
||
|
||
is_first_sync, changed_files = _list_local_changed_files(workspace_path)
|
||
if not is_first_sync and not changed_files:
|
||
logger.info("No local file changes to sync")
|
||
return
|
||
|
||
if is_first_sync:
|
||
check = sandbox.execute("test -f /workspace/.last_sync && echo yes || echo no")
|
||
if "yes" in check.output:
|
||
logger.info("Local marker missing but sandbox already synced, refreshing local marker")
|
||
(workspace_path / ".last_sync").touch()
|
||
return
|
||
|
||
logger.info("First sync: uploading all workspace files...")
|
||
buf = io.BytesIO()
|
||
with tarfile.open(fileobj=buf, mode='w:gz') as tar:
|
||
for item in workspace_path.iterdir():
|
||
if item.name in ('.DS_Store', '.last_sync'):
|
||
continue
|
||
tar.add(str(item), arcname=item.name)
|
||
buf.seek(0)
|
||
sandbox._sandbox.fs.upload_file(buf.read(), "/tmp/workspace.tar.gz")
|
||
sandbox.execute("cd /workspace && tar -xzf /tmp/workspace.tar.gz && rm /tmp/workspace.tar.gz")
|
||
sandbox.execute("echo 'cd /workspace' > /home/daytona/.bash_env")
|
||
logger.info("Full sync complete")
|
||
else:
|
||
logger.info(f"Incremental sync: {len(changed_files)} changed files")
|
||
buf = io.BytesIO()
|
||
with tarfile.open(fileobj=buf, mode='w:gz') as tar:
|
||
for fpath in changed_files:
|
||
arcname = str(Path(fpath).relative_to(workspace_path))
|
||
tar.add(fpath, arcname=arcname)
|
||
buf.seek(0)
|
||
sandbox._sandbox.fs.upload_file(buf.read(), "/tmp/workspace_inc.tar.gz")
|
||
sandbox.execute("cd /workspace && tar -xzf /tmp/workspace_inc.tar.gz && rm /tmp/workspace_inc.tar.gz")
|
||
logger.info(f"Incremental sync complete: {len(changed_files)} files")
|
||
|
||
sandbox.execute("date +%Y%m%d%H%M.%S > /workspace/.last_sync")
|
||
(workspace_path / ".last_sync").touch()
|
||
|
||
|
||
def sync_sandbox_to_local(sandbox, workspace_root: str) -> None:
|
||
"""Agent 执行完成后,将 sandbox 中的变更文件同步回本地。
|
||
|
||
基于 /workspace/.last_sync 时间戳,找 sandbox 中更新的文件并下载。
|
||
|
||
Args:
|
||
sandbox: DaytonaSandbox 实例
|
||
workspace_root: 本地 workspace 目录路径
|
||
"""
|
||
workspace_path = Path(workspace_root)
|
||
workspace_path.mkdir(parents=True, exist_ok=True)
|
||
|
||
check = sandbox.execute("test -f /workspace/.last_sync && echo yes || echo no")
|
||
if "no" in check.output:
|
||
logger.info("No .last_sync in sandbox, skipping reverse sync")
|
||
return
|
||
|
||
result = sandbox.execute(
|
||
"find /workspace -newer /workspace/.last_sync -type f "
|
||
"-not -name '.last_sync' -not -name '.DS_Store' "
|
||
"-not -path '/workspace/.daytona*' 2>/dev/null"
|
||
)
|
||
changed_files = [f for f in result.output.strip().split('\n') if f and f != '/workspace']
|
||
if not changed_files:
|
||
logger.info("No sandbox file changes to sync back")
|
||
return
|
||
|
||
logger.info(f"Reverse sync: {len(changed_files)} changed files from sandbox")
|
||
rel_files = [f.removeprefix("/workspace/") for f in changed_files]
|
||
file_list = " ".join(f"'{f}'" for f in rel_files)
|
||
sandbox.execute(f"cd /workspace && tar -czf /tmp/sync_back.tar.gz {file_list}")
|
||
|
||
tar_data = sandbox._sandbox.fs.download_file("/tmp/sync_back.tar.gz")
|
||
sandbox.execute("rm -f /tmp/sync_back.tar.gz")
|
||
|
||
buf = io.BytesIO(tar_data)
|
||
with tarfile.open(fileobj=buf, mode='r:gz') as tar:
|
||
tar.extractall(path=str(workspace_path))
|
||
|
||
sandbox.execute("date +%Y%m%d%H%M.%S > /workspace/.last_sync")
|
||
(workspace_path / ".last_sync").touch()
|
||
logger.info(f"Reverse sync complete: {len(changed_files)} files downloaded")
|