lbedner
diff --git a/‎Makefile‎
Lines changed: 4 additions & 2 deletions b/‎Makefile‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎README.md‎
Lines changed: 47 additions & 26 deletions b/‎README.md‎
Lines changed: 47 additions & 26 deletions
diff --git a/‎aegis/templates/copier-aegis-project/{{ project_slug }}/.env.example.jinja‎
Lines changed: 2 additions & 2 deletions b/‎aegis/templates/copier-aegis-project/{{ project_slug }}/.env.example.jinja‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎aegis/templates/copier-aegis-project/{{ project_slug }}/app/cli/ai.py.jinja‎
Lines changed: 37 additions & 9 deletions b/‎aegis/templates/copier-aegis-project/{{ project_slug }}/app/cli/ai.py.jinja‎
Lines changed: 37 additions & 9 deletions
diff --git a/‎aegis/templates/copier-aegis-project/{{ project_slug }}/app/core/config.py.jinja‎
Lines changed: 2 additions & 2 deletions b/‎aegis/templates/copier-aegis-project/{{ project_slug }}/app/core/config.py.jinja‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/ai/fixtures/llm_fixtures.py‎
Lines changed: 13 additions & 13 deletions b/‎aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/ai/fixtures/llm_fixtures.py‎
Lines changed: 13 additions & 13 deletions
diff --git a/‎aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/rag/config.py‎
Lines changed: 1 addition & 1 deletion b/‎aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/rag/config.py‎
Lines changed: 1 addition & 1 deletion
@@ -137,18 +137,20 @@ help: ## Show this help message
 
 gif: ## Convert MP4 to high-quality GIF (usage: make gif INPUT=recording.mp4)
 ifndef INPUT
-	@echo "Usage: make gif INPUT=path/to/video.mp4 [OUTPUT=output.gif] [FPS=15] [WIDTH=1200]"
+	@echo "Usage: make gif INPUT=path/to/video.mp4 [OUTPUT=output.gif] [FPS=15] [WIDTH=1200] [START=0] [END=10]"
 	@echo ""
 	@echo "Options:"
 	@echo "  INPUT   - Required. Path to input MP4 file"
 	@echo "  OUTPUT  - Optional. Output GIF path (default: same name as input with .gif)"
 	@echo "  FPS     - Optional. Frames per second (default: 15, max 30)"
 	@echo "  WIDTH   - Optional. Output width in pixels (default: 1200)"
+	@echo "  START   - Optional. Start time in seconds (default: beginning)"
+	@echo "  END     - Optional. End time in seconds (default: end of video)"
 	@exit 1
 endif
 	@echo "🎬 Converting $(INPUT) to GIF..."
 	@mkdir -p .gif-frames
-	@ffmpeg -i "$(INPUT)" -vf "fps=$(or $(FPS),15),scale=$(or $(WIDTH),1200):-1:flags=lanczos" -y .gif-frames/frame_%04d.png
+	@ffmpeg $(if $(START),-ss $(START)) -i "$(INPUT)" $(if $(END),-to $(END)) -vf "fps=$(or $(FPS),15),scale=$(or $(WIDTH),1200):-1:flags=lanczos" -y .gif-frames/frame_%04d.png
 	@gifski -o "$(or $(OUTPUT),$(basename $(INPUT)).gif)" --fps $(or $(FPS),15) --quality 90 .gif-frames/*.png
 	@rm -rf .gif-frames
 	@echo "✅ Created: $(or $(OUTPUT),$(basename $(INPUT)).gif)"
 
@@ -20,7 +20,7 @@ No time for health checks, proper testing, or clean architecture. Just enough ti
 
 ![Aegis Stack Quick Start Demo](docs/images/aegis-demo.gif)
 
-Aegis Stack is a CLI that scaffolds modular Python applications — start with an API, add Auth, Scheduler, Workers, or AI services when you need them.
+Aegis Stack is a system for creating and evolving modular Python applications over time, built on tools you already know.
 
 ## Prerequisites
 
@@ -45,6 +45,42 @@ cd my-api && uv sync && cp .env.example .env && make serve
 
 **Installation alternatives:** See the [Installation Guide](https://lbedner.github.io/aegis-stack/installation/) for `uv tool install`, `pip install`, and development setup.
 
+## Overseer - Built-In System Visibility
+
+![Overseer](docs/images/overseer-demo.gif)
+
+**[Overseer](https://lbedner.github.io/aegis-stack/overseer/)** is the built-in system dashboard that ships with every Aegis Stack project.
+
+It provides a live view of what your application is doing at runtime - across core components (Backend, Database, Workers, Scheduler) and services (Auth, AI, Comms) - through a web UI.
+
+Overseer goes beyond simple health checks. You can inspect worker queues, scheduled jobs, database state, and AI usage, all in one place, without wiring up external tools.
+
+No Datadog. No New Relic. No vendor lock-in.
+
+Just a clear view of your system, included from day one.
+
+## CLI - First-Class System Interface
+
+![CLI Demo](docs/images/cli-demo.gif)
+
+The Aegis CLI is a first-class interface to your running system.
+
+It goes beyond simple health checks, exposing rich, component-specific commands for inspecting and understanding your application from the terminal.
+
+Query worker queues, scheduler activity, database state, AI usage, and service configuration, all without leaving the CLI.
+
+The same system intelligence that powers Overseer and Illiana is available here, optimized for terminal workflows.
+
+## Illiana - Optional System Operator
+
+![Illiana Demo](docs/images/illiana-demo.gif)
+
+When the AI service is enabled, Aegis exposes an additional interface: **Illiana**.
+
+Illiana is a conversational interface that answers questions about your running system using live telemetry and optional RAG over your codebase.
+
+She is not required to use Aegis Stack, and nothing in the system depends on her being present. When enabled, she becomes another way, alongside the CLI and Overseer, to understand what your application is doing and why.
+
 ## Your Stack Grows With You
 
 **Your choices aren't permanent.** Start with what you need today, add components when requirements change, remove what you outgrow.
@@ -75,36 +111,21 @@ aegis update
 
 Most starters lock you in at `init`. Aegis Stack doesn't. See **[Evolving Your Stack](https://lbedner.github.io/aegis-stack/evolving-your-stack/)** for the complete guide.
 
-## See It In Action
-
-### Overseer - Built-In Health Monitoring
-
-![Overseer](docs/images/overseer-demo.gif)
-
-**[Overseer](https://lbedner.github.io/aegis-stack/overseer/)** is the read-only health monitoring dashboard built into every Aegis Stack project. It provides real-time visibility into all your components (Backend, Database, Worker, Scheduler) and services (Auth, AI, Comms) through a web UI and CLI commands.
-
-No Datadog. No New Relic. No vendor lock-in. Just centralized monitoring you own from day one.
-
-### CLI Health Monitoring
-
-![CLI Health Check](docs/images/cli_health_check.png)
-
-Rich terminal output showing detailed component status, health metrics, and system diagnostics.
-
 ## Available Components & Services
 
 **Components** (infrastructure)
 
-- **Core** - API + Frontend (always included)
-- **Database** - ORM with health monitoring
-- **Scheduler** - Background tasks, cron jobs
-- **Worker** - Async task queues
+- **Core** → FastAPI + Pydantic V2 + Uvicorn
+- **Database** → Postgres / SQLite
+- **Cache/Queue** → Redis
+- **Scheduler** → APScheduler
+- **Worker** → Arq / Taskiq
 
 **Services** (business logic)
 
-- **Auth** - User authentication & JWT
-- **AI** - Multi-provider AI chat
-- **Comms** - Email, SMS, voice calls
+- **Auth** → JWT authentication
+- **AI** → PydanticAI / LangChain
+- **Comms** → Resend + Twilio
 
 [Components Docs →](https://lbedner.github.io/aegis-stack/components/) | [Services Docs →](https://lbedner.github.io/aegis-stack/services/)
 
@@ -121,6 +142,6 @@ Rich terminal output showing detailed component status, health metrics, and syst
 
 No reinventing the wheel. Just the tools you already know, pre-configured and ready to compose.
 
-Aegis Stack respects your expertise. We maintain existing standards - FastAPI for APIs, SQLModel for databases, arq for workers. No custom abstractions or proprietary patterns to learn. Pick your components, get a production-ready foundation, and build your way.
+Aegis Stack respects your expertise. No custom abstractions or proprietary patterns to learn. Pick your components, get a production-ready foundation, and build your way.
 
-The tool gets out of your way so you can get started.
+Aegis gets out of your way so you can get started.
@@ -142,8 +142,8 @@ RAG_EMBEDDING_MODEL=BAAI/bge-small-en-v1.5  # Free, local embedding model
 # Chunking settings
 RAG_CHUNK_SIZE=2000
 RAG_CHUNK_OVERLAP=400
-RAG_DEFAULT_TOP_K=5
-RAG_CHAT_TOP_K=10
+RAG_DEFAULT_TOP_K=15
+RAG_CHAT_TOP_K=15
 {% endif %}
 
 {% if include_comms %}
 
@@ -35,6 +35,19 @@ from ..services.ai.providers import ProviderNotInstalledError
 # Initialize logging at module load
 setup_logging()
 
+# Provider display name aliases
+PROVIDER_DISPLAY_NAMES: dict[str, str] = {
+    "public": "LLM7.io",
+    "unknown": "LLM7.io",
+}
+
+
+def get_provider_display_name(provider: AIProvider | str) -> str:
+    """Get display name for a provider, with aliases for branding."""
+    value = provider.value if isinstance(provider, AIProvider) else provider
+    return PROVIDER_DISPLAY_NAMES.get(value, value)
+
+
 app = typer.Typer(help="AI service management and chat commands")
 console = Console()
 
@@ -58,7 +71,7 @@ def status() -> None:
         + typer.style(status_text, fg=status_color)
     )
     typer.echo(
-        typer.style("Provider: ", fg=typer.colors.CYAN) + ai_config.provider.value
+        typer.style("Provider: ", fg=typer.colors.CYAN) + get_provider_display_name(ai_config.provider)
     )
     typer.echo(typer.style("Model: ", fg=typer.colors.CYAN) + str(ai_config.model))
     typer.echo(
@@ -168,7 +181,7 @@ def providers() -> None:
         free_tier = "Yes" if provider in free_providers else "No"
 
         table.add_row(
-            provider.value,
+            get_provider_display_name(provider),
             installed_display,
             api_key_display,
             status,
@@ -532,6 +545,7 @@ def chat(
 {% if ai_rag %}
                         use_rag=rag_enabled,
                         rag_collection=rag_collection,
+                        rag_top_k=top_k,
                         show_sources=show_sources,
 {% endif %}
                     )
@@ -618,7 +632,7 @@ def history(
     typer.echo(f"Conversation: {conversation_id}")
     if conversation.title:
         typer.echo(f"Title: {conversation.title}")
-    typer.echo(f"Provider: {conversation.provider.value}")
+    typer.echo(f"Provider: {get_provider_display_name(conversation.provider)}")
     typer.echo(f"Messages: {conversation.get_message_count()}")
     typer.echo("")
 
@@ -701,7 +715,8 @@ def usage(
     class RecentActivity(BaseModel):
         timestamp: str
         model: str
-        tokens: int
+        input_tokens: int
+        output_tokens: int
         cost: float
         success: bool
         action: str
@@ -732,9 +747,15 @@ def usage(
             "groq": "yellow",
             "mistral": "cyan",
             "cohere": "bright_red",
+            "public": "bright_cyan",
+            "llm7.io": "bright_cyan",
         }
         return colors.get(vendor.lower(), fallback)
 
+    def get_vendor_display_name(vendor: str) -> str:
+        """Get display name for a vendor, with aliases for branding."""
+        return PROVIDER_DISPLAY_NAMES.get(vendor.lower(), vendor)
+
     # Display functions
     def display_summary_panel(stats: UsageStatsResponse) -> None:
         success_color = get_success_color(stats.success_rate)
@@ -791,9 +812,10 @@ def usage(
         table.add_column("Share", justify="right")
         for model in stats.models:
             vendor_color = get_vendor_color(model.vendor, model.vendor_color)
+            vendor_display = get_vendor_display_name(model.vendor)
             table.add_row(
                 model.model_title,
-                f"[{vendor_color}]{model.vendor}[/{vendor_color}]",
+                f"[{vendor_color}]{vendor_display}[/{vendor_color}]",
                 format_number(model.requests),
                 format_number(model.tokens),
                 format_cost(model.cost),
@@ -820,11 +842,12 @@ def usage(
             except ValueError:
                 time_str = activity.timestamp[:8]
             status = "[green]OK[/green]" if activity.success else "[red]FAIL[/red]"
+            total_tokens = activity.input_tokens + activity.output_tokens
             table.add_row(
                 time_str,
                 activity.model,
                 activity.action,
-                format_number(activity.tokens),
+                format_number(total_tokens),
                 format_cost(activity.cost),
                 status,
             )
@@ -1007,6 +1030,7 @@ async def _interactive_chat_session(
 {% if ai_rag %}
     use_rag: bool = False,
     rag_collection: str | None = None,
+    rag_top_k: int | None = None,
     show_sources: bool = False,
 {% endif %}
 ) -> None:
@@ -1027,8 +1051,10 @@ async def _interactive_chat_session(
     await asyncio.sleep(0.15)
     console.print(" [green]OK[/green]")
 
-    console.print(f"  [dim]>[/dim] Connecting to [cyan]{ai_config.provider.value}[/cyan]...", end="")
-    await asyncio.sleep(0.2)
+    console.print(f"  [dim]>[/dim] Connecting to [cyan]{get_provider_display_name(ai_config.provider)}[/cyan]...", end="")
+    # Warm up the agent (lazy imports, model initialization)
+    from app.services.ai.providers import get_agent
+    _ = get_agent(ai_config, settings)
     console.print(" [green]OK[/green]")
 
     console.print(f"  [dim]>[/dim] Model: [cyan]{ai_config.model}[/cyan]")
@@ -1090,7 +1116,7 @@ async def _interactive_chat_session(
 
     # Initialize status line state for prompt_toolkit toolbar
     session_state = ChatSessionState(
-        provider=ai_config.provider.value,
+        provider=get_provider_display_name(ai_config.provider),
         model=ai_config.model,
 {% if ai_rag %}
         rag_enabled=use_rag,
@@ -1241,6 +1267,7 @@ async def _interactive_chat_session(
 {% if ai_rag %}
                         use_rag=use_rag,
                         rag_collection=rag_collection,
+                        rag_top_k=rag_top_k,
                         show_sources=show_sources,
 {% endif %}
                         session_state=session_state,
@@ -1274,6 +1301,7 @@ async def _interactive_chat_session(
 {% if ai_rag %}
                             use_rag=use_rag,
                             rag_collection=rag_collection,
+                            rag_top_k=rag_top_k,
 {% endif %}
                         )
                     finally:
 
@@ -205,8 +205,8 @@ class Settings(BaseSettings):
     RAG_MODEL_CACHE_DIR: str | None = None  # None = system default, "./data/models" for local dev
     RAG_CHUNK_SIZE: int = 2000
     RAG_CHUNK_OVERLAP: int = 400
-    RAG_DEFAULT_TOP_K: int = 5
-    RAG_CHAT_TOP_K: int = 10  # Number of chunks for AI chat context
+    RAG_DEFAULT_TOP_K: int = 15
+    RAG_CHAT_TOP_K: int = 15  # Number of chunks for AI chat context
 {% endif %}
 
 {% if include_redis or database_engine == "postgres" %}
 
@@ -3,10 +3,10 @@
 Contains seed data for LLM vendors, models, deployments, and pricing (Dec 2024 rates).
 
 Architecture:
-- LLMVendor: API providers (OpenAI, Anthropic, public/LLM7.io, etc.)
+- LLMVendor: API providers (OpenAI, Anthropic, LLM7.io, etc.)
 - LargeLanguageModel: Unique models (gpt-4o-mini exists ONCE, owned by OpenAI)
-- LLMDeployment: Which vendors offer which models (public deploys gpt-4o-mini via proxy)
-- LLMPrice: Per vendor-model pricing (OpenAI charges $0.15, public charges $0.00)
+- LLMDeployment: Which vendors offer which models (LLM7.io deploys gpt-4o-mini via proxy)
+- LLMPrice: Per vendor-model pricing (OpenAI charges $0.15, LLM7.io charges $0.00)
 """
 
 from datetime import UTC, datetime
@@ -69,9 +69,9 @@
         "auth_method": "api-key",
     },
     {
-        "name": "public",
-        "description": "Public free endpoints via LLM7.io (no API key required)",
-        "color": "#6B7280",
+        "name": "LLM7.io",
+        "description": "Free public endpoints via LLM7.io (no API key required)",
+        "color": "#00D4AA",
         "api_base": "https://api.llm7.io/v1",
         "auth_method": "none",
     },
@@ -294,8 +294,8 @@
             "family": "command",
         },
     ],
-    # Public vendor's "auto" model - represents dynamic model selection via LLM7.io
-    "public": [
+    # LLM7.io's "auto" model - represents dynamic model selection
+    "LLM7.io": [
         {
             "model_id": "auto",
             "title": "Auto (LLM7.io)",
@@ -418,8 +418,8 @@
         {"model_id": "command-r", "speed": 75, "intelligence": 75, "reasoning": 72},
         {"model_id": "command-light", "speed": 90, "intelligence": 55, "reasoning": 50},
     ],
-    # Public deploys models via LLM7.io proxy (free but slower, no streaming)
-    "public": [
+    # LLM7.io deploys models via proxy (free but slower, no streaming)
+    "LLM7.io": [
         {"model_id": "gpt-4o-mini", "speed": 40, "intelligence": 75, "reasoning": 70},
         {"model_id": "auto", "speed": 40, "intelligence": 75, "reasoning": 70},
     ],
@@ -457,9 +457,9 @@
     ("cohere", "command-r-plus"): {"input": 2.50, "output": 10.00},
     ("cohere", "command-r"): {"input": 0.15, "output": 0.60},
     ("cohere", "command-light"): {"input": 0.03, "output": 0.06},
-    # Public pricing (free via LLM7.io)
-    ("public", "gpt-4o-mini"): {"input": 0.00, "output": 0.00},
-    ("public", "auto"): {"input": 0.00, "output": 0.00},
+    # LLM7.io pricing (free)
+    ("LLM7.io", "gpt-4o-mini"): {"input": 0.00, "output": 0.00},
+    ("LLM7.io", "auto"): {"input": 0.00, "output": 0.00},
 }
 
 
 
@@ -54,7 +54,7 @@ class RAGServiceConfig(BaseModel):
         description="Overlap between chunks in characters",
     )
     default_top_k: int = Field(
-        default=5,
+        default=15,
         gt=0,
         le=50,
         description="Default number of search results",
Original file line number	Diff line number	Diff line change
`@@ -54,7 +54,7 @@ class RAGServiceConfig(BaseModel):`
`54`	`54`	`description="Overlap between chunks in characters",`
`55`	`55`	`)`
`56`	`56`	`default_top_k: int = Field(`
`57`		`- default=5,`
	`57`	`+ default=15,`
`58`	`58`	`gt=0,`
`59`	`59`	`le=50,`
`60`	`60`	`description="Default number of search results",`