Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 0 additions & 11 deletions docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,11 @@ Config stored in `~/.mini-agent/config.toml`.
|---|---|---|
| `model_id` | `claude-sonnet-4-6` | Any model ID from `/v1/models` |
| `reasoning_effort` | `high` | `disabled`, `adaptive`, `low`, `medium`, `high`, `xhigh`, `max` |
| `provider` | *(none)* | Any provider ID from [models.dev](https://models.dev/) |
| `cache_control` | `false` | `true`, `false` |

## Example

```toml
provider = "openrouter"
model_id = "gemini-3.5-flash"
reasoning_effort = "high"
cache_control = true
Expand All @@ -22,13 +20,4 @@ cache_control = true

- `/model` in interactive mode saves to config.toml permanently.
- The file is read on startup. Created automatically when you first run `/model`.
- `provider` is optional. If specified, `mini-agent` searches for model metadata under this provider inside [models.dev](https://models.dev/) first. If not found or omitted, it falls back to prefix matching, then searches all providers.

| Default Provider | Model ID Prefixes |
|---|---|
| `anthropic` | `claude-` |
| `deepseek` | `deepseek-` |
| `google` | `gemini-` |
| `openai` | `gpt-`, `o3`, `o4`, `text-`, `chatgpt-` |

- `cache_control` — When set to `true`, sends an [ephemeral cache control](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) directive on every message, enabling prompt caching where the API supports it. Defaults to `false`. Typically only useful with Anthropic Claude models.
65 changes: 16 additions & 49 deletions src/mini_agent/cli/models.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import json
import time
import urllib.request
from urllib.parse import urlparse

from ..config import (
CONFIG_DIR,
DEFAULT_PROVIDERS,
REASONING_EFFORT_LEVELS,
client,
config,
Expand All @@ -13,25 +13,6 @@
from .display.picker import select_from_list


def _get_provider_hint(model_id: str) -> str | None:
for provider_id, prefixes in DEFAULT_PROVIDERS.items():
if model_id.startswith(tuple(prefixes)):
return provider_id
return None


def _get_limit_for_provider(
cache: dict[str, dict], provider_id: str, model_id: str, key: str
) -> int | None:
provider = cache.get(provider_id)
if not provider:
return None
model = (provider.get("models") or {}).get(model_id)
if not model:
return None
return (model.get("limit") or {}).get(key)


class _ModelInfo:
def __init__(self) -> None:
self._cache_path = CONFIG_DIR / "models.json"
Expand All @@ -48,41 +29,27 @@ def _load_cache(self) -> dict[str, dict]:
return self._cache

def get_best_limit(self, model_id: str, key: str) -> int | None:
"""Return the maximum value for *key* (e.g. 'context' or 'output') across all providers for a given model."""
"""Return the value for *key* (e.g. 'context' or 'output') for a given model."""
cache = self._load_cache()

# 1. Try with user-specified provider
user_provider = config.get_provider()
if user_provider:
best = _get_limit_for_provider(cache, user_provider, model_id, key)
if best is not None:
return best

# 2. Fall back to prefix matching using DEFAULT_PROVIDERS
provider_hint = _get_provider_hint(model_id)
if provider_hint and provider_hint != user_provider:
best = _get_limit_for_provider(cache, provider_hint, model_id, key)
if best is not None:
return best

# 3. Fall back to searching across ALL providers in the catalog
best = None
for provider_id, provider in cache.items():
if provider_id in (user_provider, provider_hint):
continue
model = (provider.get("models") or {}).get(model_id)
if model is None:
continue
value = (model.get("limit") or {}).get(key)
if value is not None and (best is None or value > best):
best = value
return best
model = cache.get(model_id)
if model is None:
for full_key, data in cache.items():
if full_key.split("/")[-1] == model_id:
model = data
break
Comment on lines +35 to +39

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Short-name fallback can silently return data for the wrong provider

The loop returns the first entry whose full_key.split("/")[-1] equals model_id, and the order depends on JSON parse order. If two providers list a model with the same short name (e.g., provider-a/deepseek-v4-flash and provider-b/deepseek-v4-flash), the context/output limits from whichever entry appears first in models.json are returned, regardless of which provider is actually serving the model. Consider logging a warning or preferring a well-known provider when a collision is detected.

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!

if model is None:
return None
return (model.get("limit") or {}).get(key)

def refresh_cache(self) -> None:
"""Fetch the latest model data from the remote API and update the local cache."""
if self._cache_path.exists():
age = time.time() - self._cache_path.stat().st_mtime
if age < 3600:
return
Comment on lines +46 to +49

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

If the cache file exists but is empty or contains invalid JSON, _load_cache() will return an empty dictionary, but refresh_cache() will still skip the refresh if the file was modified less than an hour ago. To make this more robust, we should only skip the refresh if the cache was successfully loaded with data.

        if self._cache_path.exists() and self._load_cache():
            age = time.time() - self._cache_path.stat().st_mtime
            if age < 3600:
                return

Comment on lines +46 to +49

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 TTL skips refresh even for corrupt cache files

The age check gates the entire fetch unconditionally on file existence and age. If the cache file is corrupted (invalid JSON), _load_cache() catches the JSONDecodeError and silently returns {}, but refresh_cache() will not attempt to re-download within the 1-hour window because the file's mtime is recent. A user would see stale empty limits for up to an hour with no feedback.

try:
req = urllib.request.Request(
"https://models.dev/api.json",
"https://models.dev/models.json",
headers={"User-Agent": "Mozilla/5.0"},
)
with urllib.request.urlopen(req, timeout=5) as resp:
Expand Down
17 changes: 0 additions & 17 deletions src/mini_agent/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,6 @@
"xhigh",
"max",
]
DEFAULT_PROVIDERS = {
"anthropic": ["claude-"],
"deepseek": ["deepseek-"],
"google": ["gemini-"],
"openai": ["gpt-", "o3", "o4", "text-", "chatgpt-"],
}
CONFIG_DIR = DEFAULT_CONFIG_DIR
SESSION_DIR = CONFIG_DIR / "sessions"
CONFIG_FILE = CONFIG_DIR / "config.toml"
Expand All @@ -51,24 +45,13 @@ def __init__(self) -> None:
self._session_model_override: str | None = None
self._reasoning_effort: str | None = None
self._session_reasoning_effort_override: str | None = None
self._provider: str | None = None
self._provider_loaded: bool = False
self._cache_control: bool | None = None

def _load_config(self) -> dict[str, object]:
if CONFIG_FILE.exists():
return tomllib.loads(CONFIG_FILE.read_text())
return {}

def get_provider(self) -> str | None:
if not self._provider_loaded:
cfg = self._load_config()
val = cfg.get("provider")
if isinstance(val, str):
self._provider = val
self._provider_loaded = True
return self._provider

def set_session_model(self, model_id: str) -> None:
self._session_model_override = model_id

Expand Down
Loading