lbedner
diff --git a/‎aegis/cli/interactive.py‎
Lines changed: 70 additions & 0 deletions b/‎aegis/cli/interactive.py‎
Lines changed: 70 additions & 0 deletions
diff --git a/‎aegis/constants.py‎
Lines changed: 18 additions & 1 deletion b/‎aegis/constants.py‎
Lines changed: 18 additions & 1 deletion
diff --git a/‎aegis/core/copier_manager.py‎
Lines changed: 3 additions & 0 deletions b/‎aegis/core/copier_manager.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎aegis/core/post_gen_tasks.py‎
Lines changed: 17 additions & 8 deletions b/‎aegis/core/post_gen_tasks.py‎
Lines changed: 17 additions & 8 deletions
diff --git a/‎aegis/core/template_generator.py‎
Lines changed: 23 additions & 0 deletions b/‎aegis/core/template_generator.py‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎aegis/templates/copier-aegis-project/{{ project_slug }}/.env.example.jinja‎
Lines changed: 16 additions & 0 deletions b/‎aegis/templates/copier-aegis-project/{{ project_slug }}/.env.example.jinja‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎aegis/templates/copier-aegis-project/{{ project_slug }}/app/cli/ai.py.jinja‎
Lines changed: 13 additions & 8 deletions b/‎aegis/templates/copier-aegis-project/{{ project_slug }}/app/cli/ai.py.jinja‎
Lines changed: 13 additions & 8 deletions
@@ -17,6 +17,7 @@
     AnswerKeys,
     ComponentNames,
     Messages,
+    OllamaMode,
     StorageBackends,
 )
 from ..core.components import COMPONENTS, CORE_COMPONENTS, ComponentSpec, ComponentType
@@ -37,6 +38,9 @@
 # Global variable to store skip LLM sync selection for template generation
 _skip_llm_sync_selection: dict[str, bool] = {}
 
+# Global variable to store Ollama mode selection for template generation
+_ollama_mode_selection: dict[str, str] = {}
+
 # Global variable to store database engine selection for template generation
 _database_engine_selection: str | None = None
 
@@ -437,6 +441,37 @@ def clear_skip_llm_sync_selection() -> None:
     _skip_llm_sync_selection.clear()
 
 
+def get_ollama_mode_selection(service_name: str = "ai") -> str:
+    """
+    Get Ollama mode selection from interactive session.
+
+    Args:
+        service_name: Name of the AI service (defaults to "ai")
+
+    Returns:
+        Selected Ollama mode (host, docker, or none)
+    """
+    return _ollama_mode_selection.get(service_name, OllamaMode.NONE)
+
+
+def set_ollama_mode_selection(service_name: str, mode: str) -> None:
+    """
+    Set Ollama mode selection.
+
+    Args:
+        service_name: Name of the AI service (defaults to "ai")
+        mode: Ollama mode (host, docker, or none)
+    """
+    global _ollama_mode_selection
+    _ollama_mode_selection[service_name] = mode
+
+
+def clear_ollama_mode_selection() -> None:
+    """Clear stored Ollama mode selection (useful for testing)."""
+    global _ollama_mode_selection
+    _ollama_mode_selection.clear()
+
+
 def set_ai_service_config(
     service_name: str = "ai",
     framework: str | None = None,
@@ -455,13 +490,17 @@ def set_ai_service_config(
         providers: List of AI providers
     """
     global _ai_framework_selection, _ai_backend_selection, _ai_provider_selection
+    global _ollama_mode_selection
 
     if framework is not None:
         _ai_framework_selection[service_name] = framework
     if backend is not None:
         _ai_backend_selection[service_name] = backend
     if providers is not None:
         _ai_provider_selection[service_name] = providers
+        # Auto-set ollama_mode to "host" when ollama is a provider (non-interactive default)
+        if AIProviders.OLLAMA in providers:
+            _ollama_mode_selection[service_name] = OllamaMode.HOST
 
 
 def clear_all_ai_selections() -> None:
@@ -471,6 +510,7 @@ def clear_all_ai_selections() -> None:
     clear_ai_backend_selection()
     clear_ai_rag_selection()
     clear_skip_llm_sync_selection()
+    clear_ollama_mode_selection()
     clear_database_engine_selection()
 
 
@@ -575,6 +615,36 @@ def interactive_ai_service_config(
     # Store provider selection in global context for template generation
     _ai_provider_selection[service_name] = providers
 
+    # Ollama deployment mode selection (only if Ollama was selected)
+    if AIProviders.OLLAMA in providers:
+        typer.echo("\nOllama Deployment Mode:")
+        typer.echo("  How do you want to run Ollama?")
+        typer.echo(
+            "    1. Host - Connect to Ollama running on your machine (Mac/Windows)"
+        )
+        typer.echo("    2. Docker - Run Ollama in a Docker container (Linux/Deploy)")
+
+        use_host = typer.confirm(
+            "  Connect to host Ollama? (recommended for Mac/Windows)",
+            default=True,
+        )
+        ollama_mode = OllamaMode.HOST if use_host else OllamaMode.DOCKER
+        _ollama_mode_selection[service_name] = ollama_mode
+
+        if ollama_mode == OllamaMode.HOST:
+            typer.secho(
+                "  Ollama will connect to host.docker.internal:11434", fg="green"
+            )
+            typer.echo("  Make sure Ollama is running: ollama serve")
+        else:
+            typer.secho(
+                "  Ollama service will be added to docker-compose.yml", fg="green"
+            )
+            typer.echo("  Note: First startup may take time to download models")
+    else:
+        # No Ollama selected - set mode to none
+        _ollama_mode_selection[service_name] = OllamaMode.NONE
+
     # RAG selection with Python 3.14 compatibility check
     typer.echo("\nRAG (Retrieval-Augmented Generation):")
     if sys.version_info >= (3, 14):
 
@@ -57,9 +57,10 @@ class AIProviders:
     GROQ = "groq"
     MISTRAL = "mistral"
     COHERE = "cohere"
+    OLLAMA = "ollama"
 
     # All valid providers (used for validation)
-    ALL = {PUBLIC, OPENAI, ANTHROPIC, GOOGLE, GROQ, MISTRAL, COHERE}
+    ALL = {PUBLIC, OPENAI, ANTHROPIC, GOOGLE, GROQ, MISTRAL, COHERE, OLLAMA}
 
     # Default providers for bracket syntax (non-interactive)
     DEFAULT = [PUBLIC]
@@ -75,9 +76,24 @@ class AIProviders:
         (GROQ, "Groq", "Fast inference", "Free tier", True),
         (MISTRAL, "Mistral", "Open models", "Mostly paid", False),
         (COHERE, "Cohere", "Enterprise focus", "Limited free", False),
+        (OLLAMA, "Ollama", "Local inference", "Free (local)", True),
     ]
 
 
+class OllamaMode:
+    """Ollama deployment mode options."""
+
+    HOST = "host"  # Connect to Ollama running on host machine
+    DOCKER = "docker"  # Run Ollama in Docker container
+    NONE = "none"  # No Ollama (using cloud provider)
+
+    ALL = [HOST, DOCKER, NONE]
+
+    # Default URLs for each mode
+    HOST_URL = "http://host.docker.internal:11434"  # For Mac/Windows Docker
+    DOCKER_URL = "http://ollama:11434"  # For Docker service
+
+
 class AnswerKeys:
     """Keys in Copier .copier-answers.yml configuration."""
 
@@ -108,6 +124,7 @@ class AnswerKeys:
     AI_BACKEND = "ai_backend"
     AI_WITH_PERSISTENCE = "ai_with_persistence"
     AI_RAG = "ai_rag"
+    OLLAMA_MODE = "ollama_mode"
     PROJECT_SLUG = "project_slug"
     SRC_PATH = "_src_path"
 
 
@@ -129,6 +129,9 @@ def generate_with_copier(
         )
         == "yes",
         AnswerKeys.AI_RAG: cookiecutter_context.get(AnswerKeys.AI_RAG, "no") == "yes",
+        AnswerKeys.OLLAMA_MODE: cookiecutter_context.get(
+            AnswerKeys.OLLAMA_MODE, "none"
+        ),
     }
 
     # Detect dev vs production mode for template sourcing
 
@@ -112,7 +112,6 @@ def get_component_file_mapping() -> dict[str, list[str]]:
             # Note: alembic is now shared between auth and AI services
             # Frontend dashboard files
             "app/components/frontend/dashboard/cards/auth_card.py",
-            "app/components/frontend/dashboard/cards/services_card.py",
             "app/components/frontend/dashboard/modals/auth_modal.py",
         ],
         AnswerKeys.SERVICE_AI: [
@@ -410,24 +409,26 @@ def is_enabled(key: str) -> bool:
     ai_backend = context.get(AnswerKeys.AI_BACKEND, StorageBackends.MEMORY)
     if ai_backend == StorageBackends.MEMORY:
         remove_file(project_path, "app/models/conversation.py")
-        # Remove LLM tracking models and ETL (only needed with persistence)
-        remove_dir(project_path, "app/services/ai/models")
+        # Remove LLM tracking models (only needed with persistence)
+        # Keep app/services/ai/models/__init__.py - contains core types (AIProvider, ProviderConfig)
+        remove_dir(project_path, "app/services/ai/models/llm")
         remove_dir(project_path, "app/services/ai/etl")
         remove_dir(project_path, "app/services/ai/fixtures")
-        # Remove persistence-related contexts
+        # Remove persistence-related contexts (keep usage_context.py - no DB deps)
         remove_file(project_path, "app/services/ai/llm_catalog_context.py")
         remove_file(project_path, "app/services/ai/llm_service.py")
         remove_file(project_path, "app/services/ai/provider_management.py")
-        remove_file(project_path, "app/services/ai/usage_context.py")
         # Remove persistence-related tests
         remove_dir(project_path, "tests/services/ai/etl")
         remove_file(project_path, "tests/services/ai/test_usage_tracking.py")
         remove_file(project_path, "tests/services/ai/test_llm_catalog_context.py")
         remove_file(project_path, "tests/services/ai/test_llm_service.py")
         remove_file(project_path, "tests/services/ai/test_provider_management.py")
-        # Remove LLM CLI (catalog management needs database)
+        # Remove LLM CLI and API (catalog management needs database)
         remove_file(project_path, "app/cli/llm.py")
         remove_file(project_path, "tests/cli/test_llm_cli.py")
+        remove_dir(project_path, "app/components/backend/api/llm")
+        remove_file(project_path, "tests/api/test_llm_endpoints.py")
         # Remove analytics UI (needs database for usage tracking)
         remove_file(
             project_path, "app/components/frontend/dashboard/modals/ai_analytics_tab.py"
@@ -469,10 +470,18 @@ def is_enabled(key: str) -> bool:
             project_path, "app/components/frontend/dashboard/cards/auth_card.py"
         )
         remove_file(
-            project_path, "app/components/frontend/dashboard/cards/services_card.py"
+            project_path, "app/components/frontend/dashboard/modals/auth_modal.py"
         )
+
+    # Remove services_card.py only if NO services are enabled
+    # ServicesCard shows all services (auth, AI, comms), so keep if ANY service is enabled
+    if (
+        not is_enabled(AnswerKeys.AUTH)
+        and not is_enabled(AnswerKeys.AI)
+        and not is_enabled(AnswerKeys.COMMS)
+    ):
         remove_file(
-            project_path, "app/components/frontend/dashboard/modals/auth_modal.py"
+            project_path, "app/components/frontend/dashboard/cards/services_card.py"
         )
 
     # Remove Alembic directory only if NO service needs migrations
 
@@ -14,6 +14,7 @@
     AIFrameworks,
     AnswerKeys,
     ComponentNames,
+    OllamaMode,
     StorageBackends,
     WorkerBackends,
 )
@@ -213,6 +214,8 @@ def get_template_context(self) -> dict[str, Any]:
             AnswerKeys.AI_PROVIDERS: self._get_ai_providers_string(),
             # AI RAG (Retrieval-Augmented Generation) selection
             AnswerKeys.AI_RAG: "yes" if self.ai_rag else "no",
+            # Ollama deployment mode (host, docker, or none)
+            AnswerKeys.OLLAMA_MODE: self._get_ollama_mode(),
             # Dependency lists for templates
             "selected_components": selected_only,  # Original selection for context
             "docker_services": self._get_docker_services(),
@@ -352,6 +355,26 @@ def _get_ai_framework(self) -> str:
 
         return get_ai_framework_selection("ai")
 
+    def _get_ollama_mode(self) -> str:
+        """
+        Get Ollama deployment mode selection (host, docker, or none).
+
+        Returns:
+            Ollama mode string
+        """
+        # Check if AI service is selected (handle bracket syntax)
+        has_ai = any(
+            extract_base_service_name(s) == AnswerKeys.SERVICE_AI
+            for s in self.selected_services
+        )
+        if not has_ai:
+            return OllamaMode.NONE  # Default when AI not selected
+
+        # Import here to avoid circular imports
+        from ..cli.interactive import get_ollama_mode_selection
+
+        return get_ollama_mode_selection("ai")
+
     def _get_ai_framework_deps(self) -> list[str]:
         """
         Get AI framework-specific dependencies based on framework and provider selection.
 
@@ -114,6 +114,21 @@ AI_CONVERSATION_TIMEOUT_HOURS=24  # Auto-cleanup old conversations
 # MISTRAL_API_KEY=...
 # COHERE_API_KEY=...
 
+{% if ollama_mode != "none" %}
+# Ollama Configuration (Local LLM Inference)
+{% if ollama_mode == "host" %}
+# Connecting to Ollama on host machine (Mac/Windows Docker Desktop)
+OLLAMA_BASE_URL=http://host.docker.internal:11434
+# For local CLI commands when not running in Docker:
+OLLAMA_BASE_URL_LOCAL=http://localhost:11434
+{% elif ollama_mode == "docker" %}
+# Connecting to Ollama Docker container
+OLLAMA_BASE_URL=http://ollama:11434
+# For local CLI commands when not running in Docker:
+OLLAMA_BASE_URL_LOCAL=http://localhost:11434
+{% endif %}
+{% endif %}
+
 # Provider-Specific Notes:
 # - Public: Works immediately without API keys (uses free endpoints, actual model varies)
 # - Groq: Excellent free tier with fast inference (recommended for testing)
@@ -122,6 +137,7 @@ AI_CONVERSATION_TIMEOUT_HOURS=24  # Auto-cleanup old conversations
 # - Anthropic: Requires paid account, very good for reasoning
 # - Mistral: Good open source models, some free options
 # - Cohere: Enterprise focused, limited free tier
+# - Ollama: Free local inference, requires Ollama installed (https://ollama.com)
 {% endif %}
 
 {% if ai_rag %}
 
@@ -152,8 +152,10 @@ def providers() -> None:
         is_current = provider == ai_config.provider
 
         # Determine API key status
-        if provider == AIProvider.PUBLIC:
-            has_api_key = True  # PUBLIC doesn't need one
+        # LOCAL providers (PUBLIC, OLLAMA) don't require API keys
+        local_providers = {AIProvider.PUBLIC, AIProvider.OLLAMA}
+        if provider in local_providers:
+            has_api_key = True  # Local providers don't need API keys
             api_key_display = "[dim]N/A[/dim]"
         else:
             env_var = f"{provider.value.upper()}_API_KEY"
@@ -164,14 +166,18 @@ def providers() -> None:
         if is_current:
             if not is_installed:
                 status = "[bold red]Current (Not installed)[/bold red]"
-            elif not has_api_key and provider != AIProvider.PUBLIC:
+            elif not has_api_key and provider not in local_providers:
                 status = "[bold yellow]Current (Need API key)[/bold yellow]"
+            elif provider == AIProvider.OLLAMA:
+                status = "[bold green]Current (Local)[/bold green]"
             else:
                 status = "[bold green]Current[/bold green]"
         elif is_available:
-            status = "Ready"
+            status = "Ready" if provider not in local_providers else "Local"
         elif is_installed and not has_api_key:
             status = "[yellow]Need API key[/yellow]"
+        elif is_installed and provider == AIProvider.OLLAMA:
+            status = "[cyan]Local[/cyan]"
         elif not is_installed:
             status = "[red]Not installed[/red]"
         else:
@@ -1146,12 +1152,11 @@ async def _interactive_chat_session(
 {% endif %}
     )
 
-{% if ai_backend != "memory" %}
-    # Pre-load model cache for tab completion
+    # Pre-load model cache for tab completion (includes Ollama models)
     await command_handler.load_model_cache()
+{% if ai_rag %}
     # Pre-load collection cache for /rag tab completion
     await command_handler.load_collection_cache()
-
 {% endif %}
     # Create completer for slash command autocomplete
     chat_completer = ChatCompleter(command_handler)
@@ -1422,7 +1427,7 @@ async def _stream_chat_response(
         try:
             processed_content = set()
 
-            async with asyncio.timeout(30.0):  # 30 second timeout
+            async with asyncio.timeout(settings.AI_TIMEOUT_SECONDS):
                 async for chunk in ai_service.stream_chat(
                     message=message,
                     conversation_id=conversation_id,
Original file line number	Diff line number	Diff line change
`@@ -129,6 +129,9 @@ def generate_with_copier(`
`129`	`129`	`)`
`130`	`130`	`== "yes",`
`131`	`131`	`AnswerKeys.AI_RAG: cookiecutter_context.get(AnswerKeys.AI_RAG, "no") == "yes",`
	`132`	`+ AnswerKeys.OLLAMA_MODE: cookiecutter_context.get(`
	`133`	`+ AnswerKeys.OLLAMA_MODE, "none"`
	`134`	`+ ),`
`132`	`135`	`}`
`133`	`136`
`134`	`137`	`# Detect dev vs production mode for template sourcing`