Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 105 additions & 2 deletions src/anthropic/lib/tools/_skills.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,126 @@

from __future__ import annotations

import io
import os
import re
import shutil
import logging
import tarfile
import zipfile
import tempfile
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Sequence
from pathlib import Path, PurePosixPath
from functools import partial

import anyio
from anyio.to_thread import run_sync

if TYPE_CHECKING:
from ..._types import FileTypes
from ..._client import AsyncAnthropic

__all__ = ["download_session_skills"]
__all__ = ["download_session_skills", "normalize_skill_upload_paths"]

# Matches ``name: <value>`` inside SKILL.md YAML front-matter.
_SKILL_NAME_RE = re.compile(r"^\s*name:\s*(.+?)\s*$", re.MULTILINE)


def _parse_skill_name_from_frontmatter(content: bytes) -> str | None:
"""Return the ``name:`` value from a SKILL.md YAML front-matter block.

Looks for the first ``name: <value>`` line in the file; front-matter
delimiters (``---``) are not required. Returns ``None`` when no ``name:``
line is found.
"""
text = content.decode("utf-8", errors="replace")
m = _SKILL_NAME_RE.search(text)
return m.group(1) if m else None


def _read_file_entry_bytes(content: object) -> bytes:
"""Read raw bytes from a ``FileContent`` value.

If *content* is an IO stream the stream is seeked back to position 0
after reading so downstream consumers still see the full content.
"""
if isinstance(content, bytes):
return content
if isinstance(content, os.PathLike):
return Path(content).read_bytes()
if isinstance(content, io.IOBase):
data = content.read()
if hasattr(content, "seek"):
content.seek(0)
return data if isinstance(data, bytes) else data.encode("utf-8")
return b""


def normalize_skill_upload_paths(
files: Sequence[FileTypes],
*,
display_title: str | None = None,
) -> list[FileTypes]:
"""Ensure every upload path is prefixed with the skill-name directory.

``beta.skills.create`` requires every file path to be under a top-level
directory whose name matches the ``name:`` field in ``SKILL.md`` — for
example ``my-skill/SKILL.md`` and ``my-skill/scripts/run.py``. Callers
who pass bare names (``"SKILL.md"``) hit a cryptic 400 error. This
function adds the prefix automatically, making the upload layout symmetric
with the download extraction in :func:`_archive_top_dir`.

The skill name is taken from:

1. The ``name:`` front-matter field in the supplied ``SKILL.md`` bytes.
2. The first path component if any file is already prefixed (the caller
already has the right layout; nothing is rewritten).
3. *display_title* normalised to ``lowercase-with-hyphens`` as a fallback.

Returns the (possibly rewritten) file list. If the name cannot be
determined the original sequence is returned unchanged — the API will
surface a descriptive error.
"""
skill_name: str | None = None

# Phase 1: resolve the canonical skill name from the supplied files.
for entry in files:
if not isinstance(entry, tuple) or not entry:
continue
filename = entry[0]
if not isinstance(filename, str):
continue
basename = filename.rsplit("/", 1)[-1]
if basename.upper() != "SKILL.MD":
continue
if "/" in filename:
# Already prefixed — the caller is using the correct layout.
skill_name = filename.split("/", 1)[0]
else:
# Bare "SKILL.md" — read the content and parse ``name:``.
try:
skill_name = _parse_skill_name_from_frontmatter(
_read_file_entry_bytes(entry[1] if len(entry) > 1 else b"")
)
except Exception:
pass
break # SKILL.md found; stop searching.

if not skill_name and display_title:
skill_name = re.sub(r"[^a-z0-9-]+", "-", display_title.lower().strip()).strip("-")

if not skill_name:
return list(files)

# Phase 2: prefix any path that is not already under ``{skill_name}/``.
prefix = f"{skill_name}/"
result: list[FileTypes] = []
for entry in files:
if isinstance(entry, tuple) and entry and isinstance(entry[0], str) and not entry[0].startswith(prefix):
entry = (f"{prefix}{entry[0]}",) + entry[1:] # type: ignore[assignment]
result.append(entry)
return result


# Skill dirs hold downloaded, possibly third-party content — keep them
# owner-only rather than inheriting whatever the process umask happens to be.
Expand Down
11 changes: 11 additions & 0 deletions src/anthropic/resources/beta/skills/skills.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from ....pagination import SyncPageCursor, AsyncPageCursor
from ....types.beta import skill_list_params, skill_create_params
from ...._base_client import AsyncPaginator, make_request_options
from ....lib.tools._skills import normalize_skill_upload_paths
from ....types.anthropic_beta_param import AnthropicBetaParam
from ....types.beta.skill_list_response import SkillListResponse
from ....types.beta.skill_create_response import SkillCreateResponse
Expand Down Expand Up @@ -105,6 +106,11 @@ def create(

timeout: Override the client-level default timeout for this request, in seconds
"""
if is_given(files) and files is not None:
files = normalize_skill_upload_paths(
list(files),
display_title=display_title if is_given(display_title) and display_title is not None else None,
)
extra_headers = {
**strip_not_given(
{
Expand Down Expand Up @@ -377,6 +383,11 @@ async def create(

timeout: Override the client-level default timeout for this request, in seconds
"""
if is_given(files) and files is not None:
files = normalize_skill_upload_paths(
list(files),
display_title=display_title if is_given(display_title) and display_title is not None else None,
)
extra_headers = {
**strip_not_given(
{
Expand Down
105 changes: 103 additions & 2 deletions tests/lib/tools/test_skills.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
"""Tests for skill-archive extraction (:mod:`anthropic.lib.tools._skills`).
"""Tests for skill-archive extraction and upload-path normalisation
(:mod:`anthropic.lib.tools._skills`).

Skill bundles are packaged wrapped in a single directory named after the skill
(e.g. ``pdf/SKILL.md``). The extractor must strip that wrapper so files land at
``<dest>/SKILL.md``, not the doubled ``<dest>/pdf/SKILL.md``. It must also still
refuse zip-slip / tar-slip members.

The upload side has a symmetric requirement: ``beta.skills.create`` rejects
paths that are not prefixed with the skill name. ``normalize_skill_upload_paths``
rewrites bare paths automatically so callers don't need to know this constraint.
"""

from __future__ import annotations
Expand All @@ -21,9 +26,17 @@
# archive records for that member.
ArchiveModeMaker = Callable[[Path, "dict[str, tuple[bytes, int]]"], None]

import io

import pytest

from anthropic.lib.tools._skills import _strip_top, _archive_top_dir, _extract_skill_archive
from anthropic.lib.tools._skills import (
_strip_top,
_archive_top_dir,
_extract_skill_archive,
normalize_skill_upload_paths,
_parse_skill_name_from_frontmatter,
)


def _make_zip(path: Path, entries: dict[str, bytes]) -> None:
Expand Down Expand Up @@ -194,3 +207,91 @@ def test_extract_drops_setuid_setgid_sticky(make: ArchiveModeMaker, tmp_path: Pa
# A non-executable member with setuid set must also drop the bit.
assert doc & 0o7000 == 0
assert doc == 0o644


# ---------------------------------------------------------------------------
# normalize_skill_upload_paths
# ---------------------------------------------------------------------------

_SKILL_MD_BYTES = b"""\
---
name: my-skill
description: A test skill.
---

Body text here.
"""


def test_parse_skill_name_from_frontmatter_found() -> None:
assert _parse_skill_name_from_frontmatter(_SKILL_MD_BYTES) == "my-skill"


def test_parse_skill_name_from_frontmatter_missing() -> None:
assert _parse_skill_name_from_frontmatter(b"no front matter here") is None


def test_normalize_bare_paths_prefixed_from_skill_md() -> None:
files = [
("SKILL.md", _SKILL_MD_BYTES, "text/markdown"),
("scripts/run.py", b"print(1)", "text/x-python"),
]
result = normalize_skill_upload_paths(files) # type: ignore[arg-type]
assert result[0][0] == "my-skill/SKILL.md"
assert result[1][0] == "my-skill/scripts/run.py"


def test_normalize_already_prefixed_paths_unchanged() -> None:
files = [
("my-skill/SKILL.md", _SKILL_MD_BYTES, "text/markdown"),
("my-skill/scripts/run.py", b"print(1)", "text/x-python"),
]
result = normalize_skill_upload_paths(files) # type: ignore[arg-type]
assert result[0][0] == "my-skill/SKILL.md"
assert result[1][0] == "my-skill/scripts/run.py"


def test_normalize_idempotent() -> None:
files = [("SKILL.md", _SKILL_MD_BYTES, "text/markdown")]
once = normalize_skill_upload_paths(files) # type: ignore[arg-type]
twice = normalize_skill_upload_paths(once)
assert once == twice


def test_normalize_fallback_to_display_title() -> None:
no_name_md = b"---\ndescription: No name field.\n---\n"
files = [("SKILL.md", no_name_md, "text/markdown")]
result = normalize_skill_upload_paths(files, display_title="My Skill") # type: ignore[arg-type]
assert result[0][0] == "my-skill/SKILL.md"


def test_normalize_display_title_special_chars() -> None:
no_name_md = b"---\ndescription: x\n---\n"
files = [("SKILL.md", no_name_md, "text/markdown")]
result = normalize_skill_upload_paths(files, display_title="My Skill v2!") # type: ignore[arg-type]
assert result[0][0] == "my-skill-v2/SKILL.md"


def test_normalize_io_stream_skill_md() -> None:
stream = io.BytesIO(_SKILL_MD_BYTES)
files = [("SKILL.md", stream, "text/markdown")]
result = normalize_skill_upload_paths(files) # type: ignore[arg-type]
assert result[0][0] == "my-skill/SKILL.md"
# Stream must be rewound so the SDK can still send the bytes.
assert stream.read() == _SKILL_MD_BYTES


def test_normalize_no_skill_md_no_display_title_unchanged() -> None:
files = [("config.json", b"{}", "application/json")]
result = normalize_skill_upload_paths(files) # type: ignore[arg-type]
assert result[0][0] == "config.json"


def test_normalize_two_tuple_entries() -> None:
files = [
("SKILL.md", _SKILL_MD_BYTES),
("README.md", b"# readme"),
]
result = normalize_skill_upload_paths(files) # type: ignore[arg-type]
assert result[0][0] == "my-skill/SKILL.md"
assert result[1][0] == "my-skill/README.md"