From 3c0fa498b5b35fc02aad41b7591c8971d356c016 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E6=BD=AE?= Date: Wed, 6 May 2026 19:37:00 +0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20fix(sync):=20sync=20dataset=20sy?= =?UTF-8?q?mlinks=20to=20Daytona=20sandbox?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The incremental sync used `find -type f` which misses symlinks (type l), so dataset symlinks were never detected and synced to the sandbox. Additionally, `tar.add()` without `dereference=True` would store broken symlinks pointing to host-only paths. - _list_local_changed_files: match both regular files and symlinks - _tar_workspace_entries: dereference symlinks to pack actual content - Unify dataset path to `datasets/` (plural) in prompts and SKILL.md Co-Authored-By: Claude Opus 4.6 (1M context) --- prompt/system_prompt_deep_agent.md | 4 ++-- skills/onprem/kfs-answer/SKILL.md | 2 +- utils/daytona_sync.py | 7 +++---- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/prompt/system_prompt_deep_agent.md b/prompt/system_prompt_deep_agent.md index 01a2575..73e6b11 100644 --- a/prompt/system_prompt_deep_agent.md +++ b/prompt/system_prompt_deep_agent.md @@ -58,13 +58,13 @@ When executing scripts from SKILL.md files, you MUST convert relative paths to a **3. Workspace Directory Structure** - **`{agent_dir_path}/skills/`** - Skill packages with embedded scripts -- **`{agent_dir_path}/dataset/`** - Store file datasets and document data +- **`{agent_dir_path}/datasets/`** - Store file datasets and document data - **`{agent_dir_path}/executable_code/`** - Place generated executable scripts here (not skill scripts) - **`{agent_dir_path}/download/`** - Store downloaded files and content **Path Examples:** - Skill script: `{agent_dir_path}/skills/rag-retrieve/scripts/rag_retrieve.py` -- Dataset file: `{agent_dir_path}/dataset/document.txt` +- Dataset file: `{agent_dir_path}/datasets/document.txt` - Generated script: `{agent_dir_path}/scripts/process_data.py` - Downloaded file: `{agent_dir_path}/download/report.pdf` diff --git a/skills/onprem/kfs-answer/SKILL.md b/skills/onprem/kfs-answer/SKILL.md index ae50d5e..39ab748 100644 --- a/skills/onprem/kfs-answer/SKILL.md +++ b/skills/onprem/kfs-answer/SKILL.md @@ -14,7 +14,7 @@ Answer ALL questions about the datasets knowledge base using this skill's script Scripts are in `{SKILL_DIR}/scripts/`. -Datasets are auto-discovered by scripts from `./dataset/` (catalog-agent) or `./datasets/` (gbase-agent-service) subdirectories — agent does NOT need to know or pass dataset IDs. +Datasets are auto-discovered by scripts from `./datasets/` subdirectories — agent does NOT need to know or pass dataset IDs. ## Scripts diff --git a/utils/daytona_sync.py b/utils/daytona_sync.py index 152b3a4..fee8629 100644 --- a/utils/daytona_sync.py +++ b/utils/daytona_sync.py @@ -43,8 +43,7 @@ def _list_local_changed_files(workspace_path: Path) -> tuple[bool, list[str]]: str(workspace_path), "-newer", str(marker_local), - "-type", - "f", + "(", "-type", "f", "-o", "-type", "l", ")", "-not", "-name", LOCAL_MARKER_NAME, @@ -65,9 +64,9 @@ def _tar_workspace_entries(workspace_path: Path, entries: list[Path]) -> bytes: with tarfile.open(fileobj=buf, mode="w:gz") as tar: for entry in entries: if entry.is_absolute(): - tar.add(str(entry), arcname=entry.relative_to(workspace_path).as_posix()) + tar.add(str(entry), arcname=entry.relative_to(workspace_path).as_posix(), dereference=True) else: - tar.add(str(workspace_path / entry), arcname=entry.as_posix()) + tar.add(str(workspace_path / entry), arcname=entry.as_posix(), dereference=True) buf.seek(0) return buf.read()