diff --git a/src/anthropic/lib/tools/_skills.py b/src/anthropic/lib/tools/_skills.py index f706c8c5c..6b88825af 100644 --- a/src/anthropic/lib/tools/_skills.py +++ b/src/anthropic/lib/tools/_skills.py @@ -7,13 +7,15 @@ from __future__ import annotations +import io import os +import re import shutil import logging import tarfile import zipfile import tempfile -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Sequence from pathlib import Path, PurePosixPath from functools import partial @@ -21,9 +23,110 @@ from anyio.to_thread import run_sync if TYPE_CHECKING: + from ..._types import FileTypes from ..._client import AsyncAnthropic -__all__ = ["download_session_skills"] +__all__ = ["download_session_skills", "normalize_skill_upload_paths"] + +# Matches ``name: `` inside SKILL.md YAML front-matter. +_SKILL_NAME_RE = re.compile(r"^\s*name:\s*(.+?)\s*$", re.MULTILINE) + + +def _parse_skill_name_from_frontmatter(content: bytes) -> str | None: + """Return the ``name:`` value from a SKILL.md YAML front-matter block. + + Looks for the first ``name: `` line in the file; front-matter + delimiters (``---``) are not required. Returns ``None`` when no ``name:`` + line is found. + """ + text = content.decode("utf-8", errors="replace") + m = _SKILL_NAME_RE.search(text) + return m.group(1) if m else None + + +def _read_file_entry_bytes(content: object) -> bytes: + """Read raw bytes from a ``FileContent`` value. + + If *content* is an IO stream the stream is seeked back to position 0 + after reading so downstream consumers still see the full content. + """ + if isinstance(content, bytes): + return content + if isinstance(content, os.PathLike): + return Path(content).read_bytes() + if isinstance(content, io.IOBase): + data = content.read() + if hasattr(content, "seek"): + content.seek(0) + return data if isinstance(data, bytes) else data.encode("utf-8") + return b"" + + +def normalize_skill_upload_paths( + files: Sequence[FileTypes], + *, + display_title: str | None = None, +) -> list[FileTypes]: + """Ensure every upload path is prefixed with the skill-name directory. + + ``beta.skills.create`` requires every file path to be under a top-level + directory whose name matches the ``name:`` field in ``SKILL.md`` — for + example ``my-skill/SKILL.md`` and ``my-skill/scripts/run.py``. Callers + who pass bare names (``"SKILL.md"``) hit a cryptic 400 error. This + function adds the prefix automatically, making the upload layout symmetric + with the download extraction in :func:`_archive_top_dir`. + + The skill name is taken from: + + 1. The ``name:`` front-matter field in the supplied ``SKILL.md`` bytes. + 2. The first path component if any file is already prefixed (the caller + already has the right layout; nothing is rewritten). + 3. *display_title* normalised to ``lowercase-with-hyphens`` as a fallback. + + Returns the (possibly rewritten) file list. If the name cannot be + determined the original sequence is returned unchanged — the API will + surface a descriptive error. + """ + skill_name: str | None = None + + # Phase 1: resolve the canonical skill name from the supplied files. + for entry in files: + if not isinstance(entry, tuple) or not entry: + continue + filename = entry[0] + if not isinstance(filename, str): + continue + basename = filename.rsplit("/", 1)[-1] + if basename.upper() != "SKILL.MD": + continue + if "/" in filename: + # Already prefixed — the caller is using the correct layout. + skill_name = filename.split("/", 1)[0] + else: + # Bare "SKILL.md" — read the content and parse ``name:``. + try: + skill_name = _parse_skill_name_from_frontmatter( + _read_file_entry_bytes(entry[1] if len(entry) > 1 else b"") + ) + except Exception: + pass + break # SKILL.md found; stop searching. + + if not skill_name and display_title: + skill_name = re.sub(r"[^a-z0-9-]+", "-", display_title.lower().strip()).strip("-") + + if not skill_name: + return list(files) + + # Phase 2: prefix any path that is not already under ``{skill_name}/``. + prefix = f"{skill_name}/" + result: list[FileTypes] = [] + for entry in files: + if isinstance(entry, tuple) and entry and isinstance(entry[0], str) and not entry[0].startswith(prefix): + entry = (f"{prefix}{entry[0]}",) + entry[1:] # type: ignore[assignment] + result.append(entry) + return result + # Skill dirs hold downloaded, possibly third-party content — keep them # owner-only rather than inheriting whatever the process umask happens to be. diff --git a/src/anthropic/resources/beta/skills/skills.py b/src/anthropic/resources/beta/skills/skills.py index b2241a6da..cfcda6261 100644 --- a/src/anthropic/resources/beta/skills/skills.py +++ b/src/anthropic/resources/beta/skills/skills.py @@ -35,6 +35,7 @@ from ....pagination import SyncPageCursor, AsyncPageCursor from ....types.beta import skill_list_params, skill_create_params from ...._base_client import AsyncPaginator, make_request_options +from ....lib.tools._skills import normalize_skill_upload_paths from ....types.anthropic_beta_param import AnthropicBetaParam from ....types.beta.skill_list_response import SkillListResponse from ....types.beta.skill_create_response import SkillCreateResponse @@ -105,6 +106,11 @@ def create( timeout: Override the client-level default timeout for this request, in seconds """ + if is_given(files) and files is not None: + files = normalize_skill_upload_paths( + list(files), + display_title=display_title if is_given(display_title) and display_title is not None else None, + ) extra_headers = { **strip_not_given( { @@ -377,6 +383,11 @@ async def create( timeout: Override the client-level default timeout for this request, in seconds """ + if is_given(files) and files is not None: + files = normalize_skill_upload_paths( + list(files), + display_title=display_title if is_given(display_title) and display_title is not None else None, + ) extra_headers = { **strip_not_given( { diff --git a/tests/lib/tools/test_skills.py b/tests/lib/tools/test_skills.py index ad3f2e0d2..b10f9f4d9 100644 --- a/tests/lib/tools/test_skills.py +++ b/tests/lib/tools/test_skills.py @@ -1,9 +1,14 @@ -"""Tests for skill-archive extraction (:mod:`anthropic.lib.tools._skills`). +"""Tests for skill-archive extraction and upload-path normalisation +(:mod:`anthropic.lib.tools._skills`). Skill bundles are packaged wrapped in a single directory named after the skill (e.g. ``pdf/SKILL.md``). The extractor must strip that wrapper so files land at ``/SKILL.md``, not the doubled ``/pdf/SKILL.md``. It must also still refuse zip-slip / tar-slip members. + +The upload side has a symmetric requirement: ``beta.skills.create`` rejects +paths that are not prefixed with the skill name. ``normalize_skill_upload_paths`` +rewrites bare paths automatically so callers don't need to know this constraint. """ from __future__ import annotations @@ -21,9 +26,17 @@ # archive records for that member. ArchiveModeMaker = Callable[[Path, "dict[str, tuple[bytes, int]]"], None] +import io + import pytest -from anthropic.lib.tools._skills import _strip_top, _archive_top_dir, _extract_skill_archive +from anthropic.lib.tools._skills import ( + _strip_top, + _archive_top_dir, + _extract_skill_archive, + normalize_skill_upload_paths, + _parse_skill_name_from_frontmatter, +) def _make_zip(path: Path, entries: dict[str, bytes]) -> None: @@ -194,3 +207,91 @@ def test_extract_drops_setuid_setgid_sticky(make: ArchiveModeMaker, tmp_path: Pa # A non-executable member with setuid set must also drop the bit. assert doc & 0o7000 == 0 assert doc == 0o644 + + +# --------------------------------------------------------------------------- +# normalize_skill_upload_paths +# --------------------------------------------------------------------------- + +_SKILL_MD_BYTES = b"""\ +--- +name: my-skill +description: A test skill. +--- + +Body text here. +""" + + +def test_parse_skill_name_from_frontmatter_found() -> None: + assert _parse_skill_name_from_frontmatter(_SKILL_MD_BYTES) == "my-skill" + + +def test_parse_skill_name_from_frontmatter_missing() -> None: + assert _parse_skill_name_from_frontmatter(b"no front matter here") is None + + +def test_normalize_bare_paths_prefixed_from_skill_md() -> None: + files = [ + ("SKILL.md", _SKILL_MD_BYTES, "text/markdown"), + ("scripts/run.py", b"print(1)", "text/x-python"), + ] + result = normalize_skill_upload_paths(files) # type: ignore[arg-type] + assert result[0][0] == "my-skill/SKILL.md" + assert result[1][0] == "my-skill/scripts/run.py" + + +def test_normalize_already_prefixed_paths_unchanged() -> None: + files = [ + ("my-skill/SKILL.md", _SKILL_MD_BYTES, "text/markdown"), + ("my-skill/scripts/run.py", b"print(1)", "text/x-python"), + ] + result = normalize_skill_upload_paths(files) # type: ignore[arg-type] + assert result[0][0] == "my-skill/SKILL.md" + assert result[1][0] == "my-skill/scripts/run.py" + + +def test_normalize_idempotent() -> None: + files = [("SKILL.md", _SKILL_MD_BYTES, "text/markdown")] + once = normalize_skill_upload_paths(files) # type: ignore[arg-type] + twice = normalize_skill_upload_paths(once) + assert once == twice + + +def test_normalize_fallback_to_display_title() -> None: + no_name_md = b"---\ndescription: No name field.\n---\n" + files = [("SKILL.md", no_name_md, "text/markdown")] + result = normalize_skill_upload_paths(files, display_title="My Skill") # type: ignore[arg-type] + assert result[0][0] == "my-skill/SKILL.md" + + +def test_normalize_display_title_special_chars() -> None: + no_name_md = b"---\ndescription: x\n---\n" + files = [("SKILL.md", no_name_md, "text/markdown")] + result = normalize_skill_upload_paths(files, display_title="My Skill v2!") # type: ignore[arg-type] + assert result[0][0] == "my-skill-v2/SKILL.md" + + +def test_normalize_io_stream_skill_md() -> None: + stream = io.BytesIO(_SKILL_MD_BYTES) + files = [("SKILL.md", stream, "text/markdown")] + result = normalize_skill_upload_paths(files) # type: ignore[arg-type] + assert result[0][0] == "my-skill/SKILL.md" + # Stream must be rewound so the SDK can still send the bytes. + assert stream.read() == _SKILL_MD_BYTES + + +def test_normalize_no_skill_md_no_display_title_unchanged() -> None: + files = [("config.json", b"{}", "application/json")] + result = normalize_skill_upload_paths(files) # type: ignore[arg-type] + assert result[0][0] == "config.json" + + +def test_normalize_two_tuple_entries() -> None: + files = [ + ("SKILL.md", _SKILL_MD_BYTES), + ("README.md", b"# readme"), + ] + result = normalize_skill_upload_paths(files) # type: ignore[arg-type] + assert result[0][0] == "my-skill/SKILL.md" + assert result[1][0] == "my-skill/README.md"