fix(review): U3 atomic file writes for YAML + .env + skill config
All config file writes now use the write-temp + fsync + os.replace pattern (KTD-4) so a crash mid-write leaves the original file intact. - Add src/agentkit/server/utils/atomic_write.py with write_text_atomic - settings.py: _write_yaml_config and _write_env_var use atomic write - skill_service.py: import_skill uses atomic write - skill_service.py: update_skill_config uses atomic write + fcntl.flock around the read-modify-write cycle to serialize concurrent updates - Add 11 unit tests covering happy path, crash safety, concurrency, errors
This commit is contained in:
parent
698a8fafba
commit
4f261523c2
|
|
@ -253,8 +253,11 @@ class SkillService:
|
||||||
if not _is_within_directory(file_path, skills_dir):
|
if not _is_within_directory(file_path, skills_dir):
|
||||||
raise ValueError(f"Resolved skill path escapes skills_dir: {file_path}")
|
raise ValueError(f"Resolved skill path escapes skills_dir: {file_path}")
|
||||||
|
|
||||||
with open(file_path, "w", encoding="utf-8") as f:
|
# Atomic write (U3 — R6): write-temp + fsync + os.replace so a
|
||||||
f.write(yaml_content)
|
# crash mid-write doesn't leave a partial/corrupt YAML.
|
||||||
|
from agentkit.server.utils.atomic_write import write_text_atomic
|
||||||
|
|
||||||
|
write_text_atomic(file_path, yaml_content)
|
||||||
|
|
||||||
if skill_registry is not None:
|
if skill_registry is not None:
|
||||||
try:
|
try:
|
||||||
|
|
@ -363,27 +366,46 @@ class SkillService:
|
||||||
if not _is_within_directory(file_path, skills_dir):
|
if not _is_within_directory(file_path, skills_dir):
|
||||||
raise ValueError(f"Resolved skill path escapes skills_dir: {file_path}")
|
raise ValueError(f"Resolved skill path escapes skills_dir: {file_path}")
|
||||||
|
|
||||||
# Read existing YAML, apply patch, validate, write back.
|
# Read-modify-write with fcntl.flock to serialize concurrent
|
||||||
with open(file_path, encoding="utf-8") as f:
|
# updates (U3 — R6). The lockfile is a sibling of the YAML so
|
||||||
existing = yaml.safe_load(f) or {}
|
# it survives crashes and is visible to other processes.
|
||||||
|
import fcntl
|
||||||
|
|
||||||
if not isinstance(existing, dict):
|
from agentkit.server.utils.atomic_write import write_text_atomic
|
||||||
raise ValueError(f"Existing skill YAML at {file_path} is not a mapping")
|
|
||||||
|
|
||||||
# Shallow merge: top-level keys in config_patch overwrite.
|
lock_path = f"{file_path}.lock"
|
||||||
# The 'name' field is preserved (cannot rename via patch).
|
with open(lock_path, "w") as lock_fh:
|
||||||
patched = {**existing, **config_patch, "name": existing.get("name", normalized)}
|
fcntl.flock(lock_fh.fileno(), fcntl.LOCK_EX)
|
||||||
|
try:
|
||||||
|
# Read existing YAML, apply patch, validate, write back.
|
||||||
|
with open(file_path, encoding="utf-8") as f:
|
||||||
|
existing = yaml.safe_load(f) or {}
|
||||||
|
|
||||||
# Validate the patched config by round-tripping through YAML.
|
if not isinstance(existing, dict):
|
||||||
try:
|
raise ValueError(f"Existing skill YAML at {file_path} is not a mapping")
|
||||||
patched_yaml = yaml.safe_dump(patched, default_flow_style=False, allow_unicode=True)
|
|
||||||
except yaml.YAMLError as exc:
|
|
||||||
raise ValueError(f"Failed to serialize patched config: {exc}") from exc
|
|
||||||
|
|
||||||
_validate_yaml_content(patched_yaml)
|
# Shallow merge: top-level keys in config_patch overwrite.
|
||||||
|
# The 'name' field is preserved (cannot rename via patch).
|
||||||
|
patched = {
|
||||||
|
**existing,
|
||||||
|
**config_patch,
|
||||||
|
"name": existing.get("name", normalized),
|
||||||
|
}
|
||||||
|
|
||||||
with open(file_path, "w", encoding="utf-8") as f:
|
# Validate the patched config by round-tripping through YAML.
|
||||||
f.write(patched_yaml)
|
try:
|
||||||
|
patched_yaml = yaml.safe_dump(
|
||||||
|
patched, default_flow_style=False, allow_unicode=True
|
||||||
|
)
|
||||||
|
except yaml.YAMLError as exc:
|
||||||
|
raise ValueError(f"Failed to serialize patched config: {exc}") from exc
|
||||||
|
|
||||||
|
_validate_yaml_content(patched_yaml)
|
||||||
|
|
||||||
|
# Atomic write: write-temp + fsync + os.replace.
|
||||||
|
write_text_atomic(file_path, patched_yaml)
|
||||||
|
finally:
|
||||||
|
fcntl.flock(lock_fh.fileno(), fcntl.LOCK_UN)
|
||||||
|
|
||||||
# Reload the skill into the registry.
|
# Reload the skill into the registry.
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -159,7 +159,14 @@ def _write_yaml_config(config_path: str, data: dict) -> None:
|
||||||
|
|
||||||
Uses ruamel.yaml when available to preserve comments and formatting.
|
Uses ruamel.yaml when available to preserve comments and formatting.
|
||||||
Falls back to PyYAML otherwise.
|
Falls back to PyYAML otherwise.
|
||||||
|
|
||||||
|
The write is atomic (write-temp + fsync + os.replace) so a crash
|
||||||
|
mid-write leaves the original file intact (U3 — R6).
|
||||||
"""
|
"""
|
||||||
|
import io
|
||||||
|
|
||||||
|
from agentkit.server.utils.atomic_write import write_text_atomic
|
||||||
|
|
||||||
original = _read_yaml_config(config_path)
|
original = _read_yaml_config(config_path)
|
||||||
preserved = _reverse_resolve_env(data, original)
|
preserved = _reverse_resolve_env(data, original)
|
||||||
try:
|
try:
|
||||||
|
|
@ -173,11 +180,13 @@ def _write_yaml_config(config_path: str, data: dict) -> None:
|
||||||
original_data = yaml_writer.load(f)
|
original_data = yaml_writer.load(f)
|
||||||
# Apply preserved values onto the ruamel-parsed structure
|
# Apply preserved values onto the ruamel-parsed structure
|
||||||
_deep_update_ruamel(original_data, preserved)
|
_deep_update_ruamel(original_data, preserved)
|
||||||
with open(config_path, "w", encoding="utf-8") as f:
|
buf = io.StringIO()
|
||||||
yaml_writer.dump(original_data, f)
|
yaml_writer.dump(original_data, buf)
|
||||||
|
write_text_atomic(config_path, buf.getvalue())
|
||||||
except ImportError:
|
except ImportError:
|
||||||
with open(config_path, "w", encoding="utf-8") as f:
|
buf = io.StringIO()
|
||||||
yaml.dump(preserved, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
yaml.dump(preserved, buf, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
||||||
|
write_text_atomic(config_path, buf.getvalue())
|
||||||
|
|
||||||
|
|
||||||
def _deep_update_ruamel(target: Any, source: Any) -> None:
|
def _deep_update_ruamel(target: Any, source: Any) -> None:
|
||||||
|
|
@ -207,9 +216,14 @@ def _write_env_var(config_path: str, key: str, value: str) -> None:
|
||||||
|
|
||||||
If the key already exists in .env, its value is updated in place.
|
If the key already exists in .env, its value is updated in place.
|
||||||
If not, it's appended. Comments and formatting are preserved.
|
If not, it's appended. Comments and formatting are preserved.
|
||||||
|
|
||||||
|
The write is atomic (write-temp + fsync + os.replace) so a crash
|
||||||
|
mid-write leaves the original .env intact (U3 — R6).
|
||||||
"""
|
"""
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from agentkit.server.utils.atomic_write import write_text_atomic
|
||||||
|
|
||||||
env_path = Path(config_path).parent / ".env"
|
env_path = Path(config_path).parent / ".env"
|
||||||
lines: list[str] = []
|
lines: list[str] = []
|
||||||
found = False
|
found = False
|
||||||
|
|
@ -229,8 +243,7 @@ def _write_env_var(config_path: str, key: str, value: str) -> None:
|
||||||
if not found:
|
if not found:
|
||||||
lines.append(f"{key}={value}\n")
|
lines.append(f"{key}={value}\n")
|
||||||
|
|
||||||
with open(env_path, "w", encoding="utf-8") as f:
|
write_text_atomic(env_path, "".join(lines))
|
||||||
f.writelines(lines)
|
|
||||||
|
|
||||||
# Also set in current process so the next from_yaml resolves correctly
|
# Also set in current process so the next from_yaml resolves correctly
|
||||||
os.environ[key] = value
|
os.environ[key] = value
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
"""Server utility helpers."""
|
||||||
|
|
@ -0,0 +1,88 @@
|
||||||
|
"""Atomic file-write utilities (U3 — R6).
|
||||||
|
|
||||||
|
Provides :func:`write_text_atomic` which writes text content to a file
|
||||||
|
using the write-temp + fsync + os.replace pattern (KTD-4). This
|
||||||
|
guarantees that:
|
||||||
|
|
||||||
|
- A crash during the write leaves the original file intact (the temp
|
||||||
|
file is abandoned).
|
||||||
|
- Concurrent writers don't produce mixed content (``os.replace`` is
|
||||||
|
atomic on POSIX).
|
||||||
|
- The file's directory entry is durable after a crash (parent-dir
|
||||||
|
fsync).
|
||||||
|
|
||||||
|
The function is intentionally minimal — it operates on ``str`` content.
|
||||||
|
Callers that need to serialize YAML/dict/etc. should do so before
|
||||||
|
calling.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
def write_text_atomic(path: str | Path, content: str, *, encoding: str = "utf-8") -> None:
|
||||||
|
"""Write ``content`` to ``path`` atomically.
|
||||||
|
|
||||||
|
Flow:
|
||||||
|
1. Write content to a temporary file in the same directory.
|
||||||
|
2. ``fsync`` the temp file (data durability).
|
||||||
|
3. ``os.replace(temp, path)`` — atomic rename on POSIX.
|
||||||
|
4. ``fsync`` the parent directory (directory-entry durability).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path: Target file path. The parent directory must exist.
|
||||||
|
content: Text content to write.
|
||||||
|
encoding: Text encoding (default ``utf-8``).
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
PermissionError: If the target directory or file is not writable.
|
||||||
|
OSError: For other filesystem errors.
|
||||||
|
"""
|
||||||
|
target = Path(path)
|
||||||
|
parent = target.parent
|
||||||
|
parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Create the temp file in the same directory so os.replace stays
|
||||||
|
# within a single filesystem (required for atomicity).
|
||||||
|
fd, tmp_name = tempfile.mkstemp(
|
||||||
|
dir=str(parent),
|
||||||
|
prefix=f".{target.name}.tmp.",
|
||||||
|
suffix=".partial",
|
||||||
|
)
|
||||||
|
tmp_path = Path(tmp_name)
|
||||||
|
try:
|
||||||
|
with os.fdopen(fd, "w", encoding=encoding) as f:
|
||||||
|
f.write(content)
|
||||||
|
f.flush()
|
||||||
|
os.fsync(f.fileno())
|
||||||
|
os.replace(tmp_path, target)
|
||||||
|
# fsync the parent directory so the rename is durable.
|
||||||
|
_fsync_dir(parent)
|
||||||
|
except BaseException:
|
||||||
|
# On any failure (including crash), clean up the temp file.
|
||||||
|
# os.replace already succeeded → tmp_path no longer exists.
|
||||||
|
try:
|
||||||
|
tmp_path.unlink(missing_ok=True)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def _fsync_dir(path: Path) -> None:
|
||||||
|
"""Best-effort fsync of a directory (POSIX only, no-op on Windows)."""
|
||||||
|
if os.name != "posix":
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
dir_fd = os.open(str(path), os.O_RDONLY)
|
||||||
|
try:
|
||||||
|
os.fsync(dir_fd)
|
||||||
|
finally:
|
||||||
|
os.close(dir_fd)
|
||||||
|
except OSError:
|
||||||
|
# Some filesystems (e.g. network mounts) don't support fsync
|
||||||
|
# on directories. Swallow rather than crash — the rename is
|
||||||
|
# still atomic, just not guaranteed durable.
|
||||||
|
pass
|
||||||
|
|
@ -0,0 +1,202 @@
|
||||||
|
"""Unit tests for atomic file-write utilities (U3 — R6).
|
||||||
|
|
||||||
|
Covers:
|
||||||
|
- Happy path: write_text_atomic writes correct content
|
||||||
|
- Edge case: no temp file residue after success
|
||||||
|
- Edge case: original file intact when write fails (mocked os.replace)
|
||||||
|
- Edge case: concurrent writes don't produce mixed content
|
||||||
|
- Error path: permission error on target directory
|
||||||
|
- Integration: settings _write_yaml_config uses atomic write
|
||||||
|
- Integration: settings _write_env_var uses atomic write
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import threading
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from agentkit.server.utils.atomic_write import write_text_atomic
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Happy path
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestWriteTextAtomic:
|
||||||
|
def test_writes_content_correctly(self, tmp_path: Path):
|
||||||
|
target = tmp_path / "output.yaml"
|
||||||
|
write_text_atomic(target, "key: value\n")
|
||||||
|
assert target.read_text(encoding="utf-8") == "key: value\n"
|
||||||
|
|
||||||
|
def test_overwrites_existing_file(self, tmp_path: Path):
|
||||||
|
target = tmp_path / "output.yaml"
|
||||||
|
target.write_text("old: content\n", encoding="utf-8")
|
||||||
|
write_text_atomic(target, "new: content\n")
|
||||||
|
assert target.read_text(encoding="utf-8") == "new: content\n"
|
||||||
|
|
||||||
|
def test_no_temp_file_residue(self, tmp_path: Path):
|
||||||
|
target = tmp_path / "output.yaml"
|
||||||
|
write_text_atomic(target, "key: value\n")
|
||||||
|
# No temp/partial files should remain in the directory.
|
||||||
|
residue = [
|
||||||
|
f.name
|
||||||
|
for f in tmp_path.iterdir()
|
||||||
|
if f.name != "output.yaml" and not f.name.startswith(".")
|
||||||
|
]
|
||||||
|
assert residue == [], f"Unexpected temp files: {residue}"
|
||||||
|
|
||||||
|
def test_accepts_str_path(self, tmp_path: Path):
|
||||||
|
target = str(tmp_path / "output.txt")
|
||||||
|
write_text_atomic(target, "hello\n")
|
||||||
|
assert Path(target).read_text(encoding="utf-8") == "hello\n"
|
||||||
|
|
||||||
|
def test_creates_parent_dirs(self, tmp_path: Path):
|
||||||
|
target = tmp_path / "subdir" / "nested" / "output.yaml"
|
||||||
|
write_text_atomic(target, "key: value\n")
|
||||||
|
assert target.read_text(encoding="utf-8") == "key: value\n"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Crash safety
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestWriteTextAtomicCrashSafety:
|
||||||
|
def test_original_intact_when_replace_fails(self, tmp_path: Path):
|
||||||
|
"""If os.replace raises, the original file must be unchanged."""
|
||||||
|
target = tmp_path / "output.yaml"
|
||||||
|
target.write_text("original: true\n", encoding="utf-8")
|
||||||
|
|
||||||
|
with patch("agentkit.server.utils.atomic_write.os.replace") as mock_replace:
|
||||||
|
mock_replace.side_effect = OSError("simulated crash")
|
||||||
|
with pytest.raises(OSError, match="simulated crash"):
|
||||||
|
write_text_atomic(target, "new: true\n")
|
||||||
|
|
||||||
|
# Original content must be intact.
|
||||||
|
assert target.read_text(encoding="utf-8") == "original: true\n"
|
||||||
|
# Temp file must be cleaned up.
|
||||||
|
residue = [
|
||||||
|
f.name
|
||||||
|
for f in tmp_path.iterdir()
|
||||||
|
if f.name != "output.yaml" and not f.name.startswith(".")
|
||||||
|
]
|
||||||
|
assert residue == [], f"Temp file not cleaned up: {residue}"
|
||||||
|
|
||||||
|
def test_original_intact_when_fsync_fails(self, tmp_path: Path):
|
||||||
|
"""If fsync raises (e.g. disk full), the original must be intact."""
|
||||||
|
target = tmp_path / "output.yaml"
|
||||||
|
target.write_text("original: true\n", encoding="utf-8")
|
||||||
|
|
||||||
|
with patch("agentkit.server.utils.atomic_write.os.fsync") as mock_fsync:
|
||||||
|
mock_fsync.side_effect = OSError("disk full")
|
||||||
|
with pytest.raises(OSError, match="disk full"):
|
||||||
|
write_text_atomic(target, "new: true\n")
|
||||||
|
|
||||||
|
assert target.read_text(encoding="utf-8") == "original: true\n"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Concurrency
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestWriteTextAtomicConcurrency:
|
||||||
|
def test_concurrent_writes_no_mixed_content(self, tmp_path: Path):
|
||||||
|
"""Two threads writing different content — no mixed lines."""
|
||||||
|
target = tmp_path / "output.yaml"
|
||||||
|
target.write_text("init: 0\n", encoding="utf-8")
|
||||||
|
|
||||||
|
barrier = threading.Barrier(2)
|
||||||
|
results: list[bool] = []
|
||||||
|
|
||||||
|
def writer(content: str) -> None:
|
||||||
|
barrier.wait()
|
||||||
|
write_text_atomic(target, content)
|
||||||
|
results.append(True)
|
||||||
|
|
||||||
|
t1 = threading.Thread(target=writer, args=("aaa: 1\nbbb: 2\nccc: 3\n",))
|
||||||
|
t2 = threading.Thread(target=writer, args=("xxx: 1\nyyy: 2\nzzz: 3\n",))
|
||||||
|
t1.start()
|
||||||
|
t2.start()
|
||||||
|
t1.join(timeout=5)
|
||||||
|
t2.join(timeout=5)
|
||||||
|
|
||||||
|
assert len(results) == 2
|
||||||
|
# The final content must be one of the two writes, not a mix.
|
||||||
|
final = target.read_text(encoding="utf-8")
|
||||||
|
assert final in ("aaa: 1\nbbb: 2\nccc: 3\n", "xxx: 1\nyyy: 2\nzzz: 3\n")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Error paths
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestWriteTextAtomicErrors:
|
||||||
|
def test_permission_error_on_unwritable_dir(self, tmp_path: Path):
|
||||||
|
"""Writing to a read-only directory raises PermissionError."""
|
||||||
|
ro_dir = tmp_path / "readonly"
|
||||||
|
ro_dir.mkdir()
|
||||||
|
os.chmod(ro_dir, 0o444)
|
||||||
|
try:
|
||||||
|
target = ro_dir / "output.yaml"
|
||||||
|
with pytest.raises(PermissionError):
|
||||||
|
write_text_atomic(target, "key: value\n")
|
||||||
|
finally:
|
||||||
|
os.chmod(ro_dir, 0o755)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Integration: settings.py
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestSettingsAtomicWrite:
|
||||||
|
"""Verify that settings.py helpers use atomic writes."""
|
||||||
|
|
||||||
|
def test_write_env_var_uses_atomic_write(self, tmp_path: Path):
|
||||||
|
"""_write_env_var should write .env atomically (no partial writes)."""
|
||||||
|
from agentkit.server.routes.settings import _write_env_var
|
||||||
|
|
||||||
|
config_path = tmp_path / "agentkit.yaml"
|
||||||
|
config_path.write_text("llm:\n default_model: gpt-4\n", encoding="utf-8")
|
||||||
|
|
||||||
|
_write_env_var(str(config_path), "OPENAI_API_KEY", "sk-test-123")
|
||||||
|
|
||||||
|
env_path = tmp_path / ".env"
|
||||||
|
assert env_path.exists()
|
||||||
|
content = env_path.read_text(encoding="utf-8")
|
||||||
|
assert "OPENAI_API_KEY=sk-test-123" in content
|
||||||
|
|
||||||
|
# No temp files should remain.
|
||||||
|
residue = [
|
||||||
|
f.name
|
||||||
|
for f in tmp_path.iterdir()
|
||||||
|
if not f.name.startswith(".") and f.name != "agentkit.yaml"
|
||||||
|
]
|
||||||
|
assert residue == [], f"Unexpected files: {residue}"
|
||||||
|
|
||||||
|
def test_write_env_var_preserves_existing_content(self, tmp_path: Path):
|
||||||
|
"""Updating an existing key preserves other lines."""
|
||||||
|
from agentkit.server.routes.settings import _write_env_var
|
||||||
|
|
||||||
|
config_path = tmp_path / "agentkit.yaml"
|
||||||
|
config_path.write_text("llm:\n default_model: gpt-4\n", encoding="utf-8")
|
||||||
|
env_path = tmp_path / ".env"
|
||||||
|
env_path.write_text(
|
||||||
|
"# Header comment\nOPENAI_API_KEY=old-value\nANTHROPIC_API_KEY=sk-ant\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
_write_env_var(str(config_path), "OPENAI_API_KEY", "new-value")
|
||||||
|
|
||||||
|
content = env_path.read_text(encoding="utf-8")
|
||||||
|
assert "# Header comment" in content
|
||||||
|
assert "OPENAI_API_KEY=new-value" in content
|
||||||
|
assert "ANTHROPIC_API_KEY=sk-ant" in content
|
||||||
|
assert "old-value" not in content
|
||||||
Loading…
Reference in New Issue