diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index d4b57540..1b7d1755 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -52,9 +52,11 @@ jobs: - name: Run security audit run: | - # Ignoring pip 25.2 vulnerabilities (fixed in pip 26.0, not yet released) - # GHSA-4xh5-x5gv-qwph: symlink traversal vulnerability - # GHSA-6vgw-5pg2-w6jp: additional pip vulnerability + # Ignoring pip 25.2 vulnerabilities (uv manages pip, not user-facing) # Risk: Low - only affects installation of malicious packages from untrusted sources # Mitigation: All packages installed from trusted PyPI with uv.lock verification - uv run pip-audit --ignore-vuln GHSA-4xh5-x5gv-qwph --ignore-vuln GHSA-6vgw-5pg2-w6jp \ No newline at end of file + uv run pip-audit \ + --ignore-vuln GHSA-4xh5-x5gv-qwph \ + --ignore-vuln GHSA-6vgw-5pg2-w6jp \ + --ignore-vuln ECHO-ffe1-1d3c-d9bc \ + --ignore-vuln ECHO-7db2-03aa-5591 \ No newline at end of file diff --git a/README.md b/README.md index 655d1889..22d44dc8 100644 --- a/README.md +++ b/README.md @@ -132,6 +132,7 @@ Most starters lock you in at `init`. Aegis Stack doesn't. See **[Evolving Your S - **Auth** → JWT authentication - **AI** → PydanticAI / LangChain - **Comms** → Resend + Twilio +- **Insights** → Adoption metrics (GitHub, PyPI, Plausible) *(experimental)* [Components Docs →](https://lbedner.github.io/aegis-stack/components/) | [Services Docs →](https://lbedner.github.io/aegis-stack/services/) diff --git a/aegis/cli/callbacks.py b/aegis/cli/callbacks.py index cbd969b5..ead4c4a4 100644 --- a/aegis/cli/callbacks.py +++ b/aegis/cli/callbacks.py @@ -21,6 +21,10 @@ restore_engine_info, ) from ..core.dependency_resolver import DependencyResolver +from ..core.insights_service_parser import ( + is_insights_service_with_options, + parse_insights_service_config, +) from ..core.service_resolver import ServiceResolver from ..core.services import SERVICES from ..i18n import t @@ -212,6 +216,18 @@ def validate_and_resolve_services( typer.secho(f"Invalid auth service syntax: {e}", fg="red", err=True) raise typer.Exit(1) + # Parse Insights service bracket syntax + for service in selected_services: + if is_insights_service_with_options(service): + try: + insights_config = parse_insights_service_config(service) + typer.echo( + f"Insights service: sources={','.join(insights_config.sources)}" + ) + except ValueError as e: + typer.secho(f"Invalid insights service syntax: {e}", fg="red", err=True) + raise typer.Exit(1) + # Resolve services to components resolved_components, service_added = ServiceResolver.resolve_service_dependencies( selected_services diff --git a/aegis/commands/add_service.py b/aegis/commands/add_service.py index d408ae31..800a95c0 100644 --- a/aegis/commands/add_service.py +++ b/aegis/commands/add_service.py @@ -422,6 +422,24 @@ def add_service_command( if isinstance(framework, str): service_data[AnswerKeys.AI_FRAMEWORK] = framework + # For insights service, pass source flags + if base_service == AnswerKeys.SERVICE_INSIGHTS: + from ..core.insights_service_parser import ( + DEFAULT_SOURCES, + is_insights_service_with_options, + parse_insights_service_config, + ) + + if is_insights_service_with_options(service): + insights_config = parse_insights_service_config(service) + sources = insights_config.sources + else: + sources = DEFAULT_SOURCES + service_data[AnswerKeys.INSIGHTS_GITHUB] = "github" in sources + service_data[AnswerKeys.INSIGHTS_PYPI] = "pypi" in sources + service_data[AnswerKeys.INSIGHTS_PLAUSIBLE] = "plausible" in sources + service_data[AnswerKeys.INSIGHTS_REDDIT] = "reddit" in sources + # Add the service (services are added like components) # Use base_service for file lookup, not the full variant name result = updater.add_component(base_service, service_data) diff --git a/aegis/constants.py b/aegis/constants.py index b54e0de3..2f52819c 100644 --- a/aegis/constants.py +++ b/aegis/constants.py @@ -125,11 +125,19 @@ class AnswerKeys: AUTH = "include_auth" AI = "include_ai" COMMS = "include_comms" + INSIGHTS = "include_insights" # Service names (used for selection/lookup) SERVICE_AUTH = "auth" SERVICE_AI = "ai" SERVICE_COMMS = "comms" + SERVICE_INSIGHTS = "insights" + + # Insights source flags + INSIGHTS_GITHUB = "insights_github" + INSIGHTS_PYPI = "insights_pypi" + INSIGHTS_PLAUSIBLE = "insights_plausible" + INSIGHTS_REDDIT = "insights_reddit" # Configuration values SCHEDULER_BACKEND = "scheduler_backend" diff --git a/aegis/core/copier_manager.py b/aegis/core/copier_manager.py index 070ff6a2..f90704be 100644 --- a/aegis/core/copier_manager.py +++ b/aegis/core/copier_manager.py @@ -138,6 +138,21 @@ def generate_with_copier( AnswerKeys.AI_RAG: template_context.get(AnswerKeys.AI_RAG, "no") == "yes", AnswerKeys.AI_VOICE: template_context.get(AnswerKeys.AI_VOICE, "no") == "yes", AnswerKeys.OLLAMA_MODE: template_context.get(AnswerKeys.OLLAMA_MODE, "none"), + AnswerKeys.INSIGHTS: template_context.get(AnswerKeys.INSIGHTS, "no") == "yes", + AnswerKeys.INSIGHTS_GITHUB: template_context.get( + AnswerKeys.INSIGHTS_GITHUB, "no" + ) + == "yes", + AnswerKeys.INSIGHTS_PYPI: template_context.get(AnswerKeys.INSIGHTS_PYPI, "no") + == "yes", + AnswerKeys.INSIGHTS_PLAUSIBLE: template_context.get( + AnswerKeys.INSIGHTS_PLAUSIBLE, "no" + ) + == "yes", + AnswerKeys.INSIGHTS_REDDIT: template_context.get( + AnswerKeys.INSIGHTS_REDDIT, "no" + ) + == "yes", } # Detect dev vs production mode for template sourcing @@ -228,6 +243,7 @@ def generate_with_copier( # This ensures consistent behavior with Cookiecutter include_auth = copier_data.get(AnswerKeys.AUTH, False) include_ai = copier_data.get(AnswerKeys.AI, False) + include_insights = copier_data.get(AnswerKeys.INSIGHTS, False) ai_backend = copier_data.get(AnswerKeys.AI_BACKEND, StorageBackends.MEMORY) database_engine = copier_data.get( AnswerKeys.DATABASE_ENGINE, StorageBackends.SQLITE @@ -240,8 +256,11 @@ def generate_with_copier( # Type narrowing: ai_backend should always be a string, but narrow from Any ai_backend_str: str = str(ai_backend) if ai_backend else StorageBackends.MEMORY + is_insights_included: bool = include_insights is True ai_needs_migrations = is_ai_included and ai_backend_str != StorageBackends.MEMORY - needs_migration_files = is_auth_included or ai_needs_migrations + needs_migration_files = ( + is_auth_included or ai_needs_migrations or is_insights_included + ) # Only run migrations automatically for SQLite (file-based, no server needed) # PostgreSQL requires a running server, so skip auto-migration is_sqlite = database_engine == StorageBackends.SQLITE diff --git a/aegis/core/insights_service_parser.py b/aegis/core/insights_service_parser.py new file mode 100644 index 00000000..dbb3ac90 --- /dev/null +++ b/aegis/core/insights_service_parser.py @@ -0,0 +1,105 @@ +""" +Insights service bracket syntax parser. + +Parses insights[sources...] syntax where values are data source names: +- github: GitHub Traffic API + Stargazers API +- pypi: PyPI/pepy.tech download stats +- plausible: Plausible docs analytics +- reddit: Reddit post tracking + +Order doesn't matter. Defaults: github, pypi +""" + +from dataclasses import dataclass, field + +# Valid source names +SOURCES = {"github", "pypi", "plausible", "reddit"} + +# Default sources when no brackets specified +DEFAULT_SOURCES = ["github", "pypi"] + + +@dataclass +class InsightsServiceConfig: + """Parsed insights service configuration.""" + + sources: list[str] = field(default_factory=lambda: DEFAULT_SOURCES.copy()) + + +def parse_insights_service_config(service_string: str) -> InsightsServiceConfig: + """ + Parse insights[...] service string into config. + + Args: + service_string: Service specification like "insights", "insights[github]", + or "insights[github,pypi,plausible,reddit]" + + Returns: + InsightsServiceConfig with selected sources + + Raises: + ValueError: If service string is invalid or has unknown values + """ + service_string = service_string.strip() + + if not service_string.startswith("insights"): + raise ValueError( + f"Expected 'insights' service, got '{service_string}'. " + "This parser only handles insights[...] syntax." + ) + + # Plain "insights" with no brackets + if service_string == "insights": + return InsightsServiceConfig() + + if "[" not in service_string: + raise ValueError( + f"Invalid service string '{service_string}'. " + "Expected 'insights' or 'insights[sources]' format." + ) + + if not service_string.endswith("]"): + raise ValueError( + f"Malformed brackets in '{service_string}'. Expected closing ']'." + ) + + bracket_start = service_string.index("[") + bracket_content = service_string[bracket_start + 1 : -1].strip() + + # Empty brackets = defaults + if not bracket_content: + return InsightsServiceConfig() + + # Split by comma and validate + values = [v.strip().lower() for v in bracket_content.split(",") if v.strip()] + + # Check for duplicates + seen: set[str] = set() + for value in values: + if value in seen: + raise ValueError(f"Duplicate source '{value}' in insights[...] syntax.") + seen.add(value) + + if value not in SOURCES: + raise ValueError( + f"Unknown source '{value}' in insights[...] syntax. " + f"Valid sources: {', '.join(sorted(SOURCES))}." + ) + + return InsightsServiceConfig(sources=values) + + +def is_insights_service_with_options(service_string: str) -> bool: + """ + Check if a service string is an insights service with bracket options. + + Returns True ONLY when explicit bracket syntax is used (insights[...]). + Plain "insights" without brackets returns False. + + Args: + service_string: Service specification string + + Returns: + True if this is an insights[...] format string with explicit options + """ + return service_string.strip().startswith("insights[") diff --git a/aegis/core/post_gen_tasks.py b/aegis/core/post_gen_tasks.py index 4a93e499..cc4297aa 100644 --- a/aegis/core/post_gen_tasks.py +++ b/aegis/core/post_gen_tasks.py @@ -185,6 +185,17 @@ def get_component_file_mapping() -> dict[str, list[str]]: "tests/api/test_voice_endpoints.py", "app/components/frontend/dashboard/modals/voice_settings_tab.py", ], + AnswerKeys.SERVICE_INSIGHTS: [ + "app/components/backend/api/insights", + "app/services/insights", + "app/cli/insights.py", + "tests/services/test_insights_service.py", + "tests/services/test_insights_collectors.py", + "tests/api/test_insights_endpoints.py", + # Frontend dashboard files + "app/components/frontend/dashboard/cards/insights_card.py", + "app/components/frontend/dashboard/modals/insights_modal.py", + ], } @@ -559,6 +570,21 @@ def _rename_backend_files(suffix: str) -> set[str]: project_path, "app/components/frontend/dashboard/modals/comms_modal.py" ) + # Remove insights service if not selected + if not is_enabled(AnswerKeys.INSIGHTS): + remove_dir(project_path, "app/components/backend/api/insights") + remove_dir(project_path, "app/services/insights") + remove_file(project_path, "app/cli/insights.py") + remove_file(project_path, "tests/services/test_insights_service.py") + remove_file(project_path, "tests/services/test_insights_collectors.py") + remove_file(project_path, "tests/api/test_insights_endpoints.py") + remove_file( + project_path, "app/components/frontend/dashboard/cards/insights_card.py" + ) + remove_file( + project_path, "app/components/frontend/dashboard/modals/insights_modal.py" + ) + # Remove auth service dashboard files if not selected if not is_enabled(AnswerKeys.AUTH): remove_file( @@ -569,23 +595,25 @@ def _rename_backend_files(suffix: str) -> set[str]: ) # Remove services_card.py only if NO services are enabled - # ServicesCard shows all services (auth, AI, comms), so keep if ANY service is enabled + # ServicesCard shows all services, so keep if ANY service is enabled if ( not is_enabled(AnswerKeys.AUTH) and not is_enabled(AnswerKeys.AI) and not is_enabled(AnswerKeys.COMMS) + and not is_enabled(AnswerKeys.INSIGHTS) ): remove_file( project_path, "app/components/frontend/dashboard/cards/services_card.py" ) # Remove Alembic directory only if NO service needs migrations - # Alembic is needed when: auth is enabled OR (AI is enabled AND backend is NOT memory) + # Alembic is needed when: auth, insights, or (AI with non-memory backend) include_auth = is_enabled(AnswerKeys.AUTH) include_ai = is_enabled(AnswerKeys.AI) + include_insights = is_enabled(AnswerKeys.INSIGHTS) ai_backend = context.get(AnswerKeys.AI_BACKEND, StorageBackends.MEMORY) ai_needs_migrations = include_ai and ai_backend != StorageBackends.MEMORY - needs_migrations = include_auth or ai_needs_migrations + needs_migrations = include_auth or ai_needs_migrations or include_insights if not needs_migrations: remove_dir(project_path, "alembic") @@ -632,6 +660,7 @@ def _render_jinja_template(src: Path, dst: Path, project_path: Path) -> None: "include_auth": True, "include_ai": True, "include_comms": True, + "include_insights": True, # Component flags - check what exists in project "include_scheduler": (project_path / "app/components/scheduler").exists(), "include_worker": (project_path / "app/components/worker").exists(), diff --git a/aegis/core/service_resolver.py b/aegis/core/service_resolver.py index 086ab930..bdc9a1f6 100644 --- a/aegis/core/service_resolver.py +++ b/aegis/core/service_resolver.py @@ -11,6 +11,10 @@ from .auth_service_parser import is_auth_service_with_options, parse_auth_service_config from .component_utils import extract_base_component_name, extract_base_service_name from .dependency_resolver import DependencyResolver +from .insights_service_parser import ( + is_insights_service_with_options, + parse_insights_service_config, +) from .services import SERVICES, get_service_dependencies @@ -126,6 +130,16 @@ def validate_services(services: list[str]) -> list[str]: except ValueError as e: errors.append(f"Invalid AI service syntax: {e}") + # Validate insights service bracket syntax if provided + if ( + base_service == AnswerKeys.SERVICE_INSIGHTS + and is_insights_service_with_options(service) + ): + try: + parse_insights_service_config(service) + except ValueError as e: + errors.append(f"Invalid insights service syntax: {e}") + spec = SERVICES[base_service] # Check service conflicts diff --git a/aegis/core/services.py b/aegis/core/services.py index ad435c09..076371c0 100644 --- a/aegis/core/services.py +++ b/aegis/core/services.py @@ -8,6 +8,7 @@ from dataclasses import dataclass, field from enum import Enum +from ..constants import ComponentNames from ..i18n import t @@ -51,7 +52,7 @@ class ServiceSpec: name="auth", type=ServiceType.AUTH, description="User authentication and authorization with JWT tokens", - required_components=["backend", "database"], + required_components=[ComponentNames.BACKEND, ComponentNames.DATABASE], pyproject_deps=[ "python-jose[cryptography]==3.3.0", "passlib[bcrypt]==1.7.4", @@ -69,7 +70,7 @@ class ServiceSpec: name="ai", type=ServiceType.AI, description="AI chatbot service with multi-framework support", - required_components=["backend"], + required_components=[ComponentNames.BACKEND], pyproject_deps=[ "{AI_FRAMEWORK_DEPS}", # Dynamic framework + provider deps ], @@ -83,7 +84,7 @@ class ServiceSpec: name="comms", type=ServiceType.NOTIFICATION, description="Communications service with email (Resend), SMS and voice (Twilio)", - required_components=["backend"], + required_components=[ComponentNames.BACKEND], pyproject_deps=[ "resend>=2.4.0", # Email provider "twilio>=9.3.7", # SMS/Voice provider @@ -95,6 +96,25 @@ class ServiceSpec: "app/components/backend/api/comms/", ], ), + "insights": ServiceSpec( + name="insights", + type=ServiceType.ANALYTICS, + description="Adoption metrics and analytics with automated data collection", + required_components=[ + ComponentNames.BACKEND, + ComponentNames.DATABASE, + ComponentNames.SCHEDULER, + ], + recommended_components=[ComponentNames.WORKER], + pyproject_deps=[ + "httpx>=0.27.0", # HTTP client for API collectors + ], + template_files=[ + "app/services/insights/", + "app/cli/insights.py", + "app/components/backend/api/insights/", + ], + ), } diff --git a/aegis/core/template_generator.py b/aegis/core/template_generator.py index 9b70d871..6a1ffc18 100644 --- a/aegis/core/template_generator.py +++ b/aegis/core/template_generator.py @@ -27,11 +27,16 @@ extract_engine_info, ) from .components import COMPONENTS, CORE_COMPONENTS +from .insights_service_parser import ( + is_insights_service_with_options, + parse_insights_service_config, +) from .services import SERVICES # Service names for bracket syntax detection SERVICE_AI = "ai" SERVICE_AUTH = "auth" +SERVICE_INSIGHTS = "insights" class TemplateGenerator: @@ -128,6 +133,17 @@ def __init__( self._user_specified_auth_level = True break + # Extract insights sources from insights[sources] format in services + from .insights_service_parser import DEFAULT_SOURCES + + self.insights_sources: list[str] = DEFAULT_SOURCES.copy() + for service in self.selected_services: + if extract_base_service_name(service) == SERVICE_INSIGHTS: + if is_insights_service_with_options(service): + insights_config = parse_insights_service_config(service) + self.insights_sources = insights_config.sources + break + # Auto-detect: if AI service selected AND database available AND no explicit backend, # use SQLite for persistence (analytics, conversation history, LLM tracking) if not user_specified_ai_backend: @@ -232,6 +248,25 @@ def get_template_context(self) -> dict[str, Any]: for s in self.selected_services ) else "no", + AnswerKeys.INSIGHTS: "yes" + if any( + extract_base_service_name(s) == AnswerKeys.SERVICE_INSIGHTS + for s in self.selected_services + ) + else "no", + # Insights source flags + AnswerKeys.INSIGHTS_GITHUB: "yes" + if "github" in self.insights_sources + else "no", + AnswerKeys.INSIGHTS_PYPI: "yes" + if "pypi" in self.insights_sources + else "no", + AnswerKeys.INSIGHTS_PLAUSIBLE: "yes" + if "plausible" in self.insights_sources + else "no", + AnswerKeys.INSIGHTS_REDDIT: "yes" + if "reddit" in self.insights_sources + else "no", # AI backend selection for conversation persistence AnswerKeys.AI_BACKEND: self.ai_backend, # AI persistence flag for backwards compatibility with template conditionals diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/.env.example.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/.env.example.jinja index 86b0445b..d9e48eee 100644 --- a/aegis/templates/copier-aegis-project/{{ project_slug }}/.env.example.jinja +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/.env.example.jinja @@ -216,6 +216,34 @@ RAG_CHAT_TOP_K=15 # TWILIO_MESSAGING_SERVICE_SID=MGxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx # Required for SMS {% endif %} +{% if include_insights %} +# ============================================================================= +# INSIGHTS SERVICE CONFIGURATION +# ============================================================================= + +{% if insights_github %} +# GitHub API (required for traffic + stargazer collection) +# Create a PAT at https://github.com/settings/tokens with 'repo' scope +# INSIGHT_GITHUB_TOKEN=ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +INSIGHT_GITHUB_OWNER=your-username +INSIGHT_GITHUB_REPO=your-repo +INSIGHT_COLLECTION_GITHUB_HOURS=6 +{% endif %} + +{% if insights_pypi %} +# PyPI package tracking +INSIGHT_PYPI_PACKAGE=your-package-name +INSIGHT_COLLECTION_PYPI_HOURS=24 +{% endif %} + +{% if insights_plausible %} +# Plausible Analytics (requires API key from your Plausible dashboard) +# INSIGHT_PLAUSIBLE_API_KEY=your-api-key +INSIGHT_PLAUSIBLE_SITES=your-site.example.com +INSIGHT_COLLECTION_PLAUSIBLE_HOURS=24 +{% endif %} +{% endif %} + {%- if include_worker %} # ============================================================================= # WORKER SETTINGS diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/README.md.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/README.md.jinja index 89541c77..ae05cd85 100644 --- a/aegis/templates/copier-aegis-project/{{ project_slug }}/README.md.jinja +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/README.md.jinja @@ -18,6 +18,23 @@ This Aegis Stack project includes the following components: - **Cache**: Redis-based async caching {%- endif %} +{% if include_auth or include_ai or include_comms or include_insights %} +## Services + +{%- if include_auth %} +- **Auth**: JWT authentication with user management +{%- endif %} +{%- if include_ai %} +- **AI**: Multi-provider AI integration with PydanticAI +{%- endif %} +{%- if include_comms %} +- **Comms**: Email, SMS, and voice communications +{%- endif %} +{%- if include_insights %} +- **Insights**: Adoption metrics tracking (GitHub, PyPI, Plausible, Reddit) +{%- endif %} +{% endif %} + ## Getting Started ### Prerequisites diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/alembic/env.py.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/alembic/env.py.jinja index 60f66fa5..14bac17c 100644 --- a/aegis/templates/copier-aegis-project/{{ project_slug }}/alembic/env.py.jinja +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/alembic/env.py.jinja @@ -26,6 +26,15 @@ from app.models.user import User # noqa: E402,F401 {% if include_ai and ai_backend != "memory" %} from app.models.conversation import Conversation, ConversationMessage # noqa: E402,F401 {% endif %} +{% if include_insights %} +from app.services.insights.models import ( # noqa: E402,F401 + InsightSource, + InsightMetricType, + InsightMetric, + InsightRecord, + InsightEvent, +) +{% endif %} # this is the Alembic Config object, which provides # access to the values within the .ini file in use. diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/cli/insights.py.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/cli/insights.py.jinja new file mode 100644 index 00000000..7254cc71 --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/cli/insights.py.jinja @@ -0,0 +1,321 @@ +""" +Insights service CLI commands. + +Command-line interface for viewing adoption metrics, triggering collection, +managing Reddit posts, and logging events. +""" + +import asyncio + +from rich.console import Console +from rich.table import Table +import typer + +from app.i18n import lazy_t, t + +app = typer.Typer(help=lazy_t("insights.help")) +console = Console() + +# Sub-groups +reddit_app = typer.Typer(help=lazy_t("insights.help_reddit")) +app.add_typer(reddit_app, name="reddit") + + +@app.command(help=lazy_t("insights.help_status")) +def status() -> None: + asyncio.run(_status()) + + +async def _status() -> None: + from app.core.db import get_async_session + from app.services.insights.insight_service import InsightService + + async with get_async_session() as session: + service = InsightService(session) + summary = await service.get_status_summary() + + typer.secho(t("insights.status_title"), fg=typer.colors.BLUE, bold=True) + typer.echo() + + source_table = Table(title=t("insights.sources_title"), width=70) + source_table.add_column(t("insights.col_source"), style="cyan", width=20) + source_table.add_column(t("insights.col_enabled"), width=8) + source_table.add_column(t("insights.col_last_collected"), width=20) + source_table.add_column(t("insights.col_metrics"), width=10, justify="right") + + for src in summary["sources"]: + enabled = f"[green]{t('insights.yes')}[/]" if src["enabled"] else f"[red]{t('insights.no')}[/]" + last = str(src["last_collected"])[:19] if src["last_collected"] else f"[dim]{t('insights.never')}[/]" + source_table.add_row( + src["display_name"], enabled, last, str(src["metric_count"]) + ) + + console.print(source_table) + + if summary["records"]: + typer.echo() + record_table = Table(title=t("insights.records_title"), width=70) + record_table.add_column(t("insights.col_metric"), style="cyan", width=20) + record_table.add_column(t("insights.col_value"), width=12, justify="right") + record_table.add_column(t("insights.col_date"), width=12) + record_table.add_column(t("insights.col_previous"), width=12, justify="right") + + for rec in summary["records"]: + prev = str(rec["previous_value"]) if rec["previous_value"] else "—" + record_table.add_row( + str(rec["metric_type_id"]), + f"{rec['value']:.0f}", + str(rec["date_achieved"])[:10], + prev, + ) + + console.print(record_table) + + typer.echo() + typer.secho( + t("insights.total_metrics", count=summary["total_metrics"]), + fg=typer.colors.GREEN, + bold=True, + ) + + +@app.command(help=lazy_t("insights.help_collect")) +def collect( + source: str | None = typer.Argument( + None, help=lazy_t("insights.arg_source") + ), + lookback_days: int = typer.Option( + 1, "--lookback-days", "-d", help="Days to fetch (for backfill). Default 1 = today only.", + ), +) -> None: + asyncio.run(_collect(source, lookback_days)) + + +async def _collect(source: str | None, lookback_days: int = 1) -> None: + from app.core.db import get_async_session + from app.services.insights.collector_service import CollectorService + + kwargs = {} + if lookback_days > 1: + kwargs["lookback_days"] = lookback_days + + async with get_async_session() as session: + service = CollectorService(session) + + if source: + typer.echo(t("insights.collecting_from", source=source)) + result = await service.collect_source(source, **kwargs) + _print_collection_result(result) + else: + typer.echo(t("insights.collecting_all")) + results = await service.collect_all() + for _key, result in results.items(): + _print_collection_result(result) + + +def _print_collection_result(result: CollectionResult) -> None: # noqa: F821 + if result.success: + typer.secho( + t("insights.collect_success", + source=result.source_key, + written=result.rows_written, + skipped=result.rows_skipped), + fg=typer.colors.GREEN, + ) + if result.records_broken: + typer.secho( + t("insights.collect_records", records=", ".join(result.records_broken)), + fg=typer.colors.YELLOW, + bold=True, + ) + else: + typer.secho( + t("insights.collect_failed", source=result.source_key, error=result.error), + fg=typer.colors.RED, + ) + + +@app.command(help=lazy_t("insights.help_stars")) +def stars( + limit: int = typer.Option(20, "--limit", "-n", help=lazy_t("insights.opt_limit")), +) -> None: + asyncio.run(_stars(limit)) + + +async def _stars(limit: int) -> None: + from app.core.db import get_async_session + from app.services.insights.insight_service import InsightService + + async with get_async_session() as session: + service = InsightService(session) + star_rows = await service.get_stars(limit=limit, offset=0) + + if not star_rows: + typer.echo(t("insights.no_stars")) + return + + table = Table(title=t("insights.stars_title"), width=90) + table.add_column(t("insights.col_star_num"), style="bold", width=5, justify="right") + table.add_column(t("insights.col_username"), style="cyan", width=20) + table.add_column(t("insights.col_name"), width=20) + table.add_column(t("insights.col_location"), width=20) + table.add_column(t("insights.col_company"), width=15) + + for star in star_rows: + meta = star.metadata_ or {} + table.add_row( + f"{star.value:.0f}", + meta.get("username", "—"), + meta.get("name") or "—", + meta.get("location") or "—", + meta.get("company") or "—", + ) + + console.print(table) + + +@app.command(help=lazy_t("insights.help_records")) +def records() -> None: + asyncio.run(_records()) + + +async def _records() -> None: + from app.core.db import get_async_session + from app.services.insights.insight_service import InsightService + + async with get_async_session() as session: + service = InsightService(session) + record_list = await service.get_records() + + if not record_list: + typer.echo(t("insights.no_records")) + return + + table = Table(title=t("insights.all_time_records_title"), width=70) + table.add_column(t("insights.col_metric"), style="cyan", width=20) + table.add_column(t("insights.col_value"), width=12, justify="right") + table.add_column(t("insights.col_date"), width=12) + table.add_column(t("insights.col_previous"), width=12, justify="right") + table.add_column(t("insights.col_prev_date"), width=12) + + for rec in record_list: + prev_val = f"{rec.previous_value:.0f}" if rec.previous_value else "—" + prev_date = str(rec.previous_date)[:10] if rec.previous_date else "—" + table.add_row( + str(rec.metric_type_id), + f"{rec.value:.0f}", + str(rec.date_achieved)[:10], + prev_val, + prev_date, + ) + + console.print(table) + + +@app.command(help=lazy_t("insights.help_sources")) +def sources() -> None: + asyncio.run(_sources()) + + +async def _sources() -> None: + from app.core.db import get_async_session + from app.services.insights.insight_service import InsightService + + async with get_async_session() as session: + service = InsightService(session) + source_list = await service.get_sources() + + table = Table(title=t("insights.insight_sources_title"), width=70) + table.add_column(t("insights.col_key"), style="cyan", width=18) + table.add_column(t("insights.col_display_name"), width=18) + table.add_column(t("insights.col_interval"), width=10, justify="right") + table.add_column(t("insights.col_auth"), width=6) + table.add_column(t("insights.col_enabled"), width=8) + + for src in source_list: + interval = f"{src.collection_interval_hours}h" if src.collection_interval_hours else t("insights.manual") + auth = f"[yellow]{t('insights.yes')}[/]" if src.requires_auth else t("insights.no") + enabled = f"[green]{t('insights.yes')}[/]" if src.enabled else f"[red]{t('insights.no')}[/]" + table.add_row(src.key, src.display_name, interval, auth, enabled) + + console.print(table) + + +@reddit_app.command("add", help=lazy_t("insights.help_reddit_add")) +def reddit_add( + url: str = typer.Argument(..., help=lazy_t("insights.arg_url")), +) -> None: + asyncio.run(_reddit_add(url)) + + +async def _reddit_add(url: str) -> None: + from app.core.db import get_async_session + from app.services.insights.collectors.reddit import RedditCollector + + async with get_async_session() as session: + collector = RedditCollector(session) + result = await collector.add_post(url) + + if result.success: + typer.secho( + t("insights.reddit_added", count=result.rows_written), + fg=typer.colors.GREEN, + ) + else: + typer.secho( + t("insights.reddit_failed", error=result.error), + fg=typer.colors.RED, + ) + + +@app.command("event", help=lazy_t("insights.help_event")) +def log_event( + event_type: str = typer.Argument(..., help=lazy_t("insights.arg_event_type")), + description: str = typer.Argument(..., help=lazy_t("insights.arg_description")), + date: str | None = typer.Option(None, "--date", help="Event date (YYYY-MM-DD). Defaults to today."), + category: str | None = typer.Option(None, "--category", help="Milestone category (e.g., daily_clones, pypi_daily)."), +) -> None: + asyncio.run(_log_event(event_type, description, date, category)) + + +async def _log_event( + event_type: str, + description: str, + date_str: str | None = None, + category: str | None = None, +) -> None: + from datetime import date as date_type + from app.core.db import get_async_session + from app.services.insights.insight_service import InsightService + + event_date = None + if date_str: + try: + event_date = date_type.fromisoformat(date_str) + except ValueError: + typer.secho(f"Invalid date format: {date_str}. Use YYYY-MM-DD.", fg=typer.colors.RED) + raise typer.Exit(1) + + metadata = {} + if category: + metadata["category"] = category + + async with get_async_session() as session: + service = InsightService(session) + await service.add_event( + event_type=event_type, + description=description, + event_date=event_date, + metadata=metadata if metadata else None, + ) + await session.commit() + + date_display = date_str or "today" + typer.secho( + t("insights.event_logged", event_type=event_type, description=description) + f" ({date_display})", + fg=typer.colors.GREEN, + ) + + +if __name__ == "__main__": + app() diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/cli/main.py.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/cli/main.py.jinja index 4960ff26..364b5635 100644 --- a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/cli/main.py.jinja +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/cli/main.py.jinja @@ -94,6 +94,14 @@ except ImportError: # Comms service not available, skip comms commands pass +# Conditionally register insights command if insights service is available +try: + insights_module = importlib.import_module("app.cli.insights") + app.add_typer(insights_module.app, name="insights") +except ImportError: + # Insights service not available, skip insights commands + pass + # Conditionally register rag command if ai_rag is enabled try: rag_module = importlib.import_module("app.cli.rag") diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/backend/api/deps.py.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/backend/api/deps.py.jinja index 25ca5b6b..32d77861 100644 --- a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/backend/api/deps.py.jinja +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/backend/api/deps.py.jinja @@ -17,6 +17,10 @@ from app.services.auth.invite_service import InviteService from app.services.auth.membership_service import MembershipService from app.services.auth.org_service import OrgService {% endif %} +{% if include_insights %} +from app.services.insights.collector_service import CollectorService +from app.services.insights.insight_service import InsightService +{% endif %} def get_db() -> Generator[Session]: @@ -102,6 +106,24 @@ async def get_invite_service( {% endif %} +{% if include_insights %} + + +async def get_insight_service( + db: AsyncSession = Depends(get_async_db), +) -> InsightService: + """Provide an InsightService instance.""" + return InsightService(db) + + +async def get_collector_service( + db: AsyncSession = Depends(get_async_db), +) -> CollectorService: + """Provide a CollectorService instance.""" + return CollectorService(db) +{% endif %} + + def get_audit() -> AuditEmitter: """Provide the audit emitter singleton.""" return audit_emitter diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/backend/startup/component_health.py.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/backend/startup/component_health.py.jinja index f0502e5a..dcb78eeb 100644 --- a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/backend/startup/component_health.py.jinja +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/backend/startup/component_health.py.jinja @@ -548,6 +548,13 @@ async def startup_hook() -> None: logger.info("Comms service health check registered") {%- endif %} + {%- if include_insights %} + # Register insights service health check + from app.services.insights.health import check_insight_health + register_service_health_check("insights", check_insight_health) + logger.info("Insights service health check registered") + {%- endif %} + # Future services will be registered here: # Example: # from app.services.payment.health import check_payment_service_health diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/backend/startup/database_init.py.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/backend/startup/database_init.py.jinja index dd92b06a..9b99b949 100644 --- a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/backend/startup/database_init.py.jinja +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/backend/startup/database_init.py.jinja @@ -334,6 +334,19 @@ async def startup_database_init() -> None: logger.warning(f"Database verification failed: {e}") # Don't fail startup - let the app run and show clear errors +{% if include_insights %} + # Seed insight lookup tables (idempotent) + try: + from app.core.db import db_session as get_db_session + from app.services.insights.seed import seed_insight_tables + + with get_db_session() as session: + seed_insight_tables(session) + logger.info("Insight seed data verified") + except Exception as e: + logger.warning(f"Insight seed failed: {e}") +{% endif %} + {% if include_ai and database_engine == "postgres" %} # Sync LLM catalog (postgres only - SQLite syncs during generation) # Uses sync Session because sync_llm_catalog does sync DB ops (async is for HTTP calls) diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/activity_feed.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/activity_feed.py index 181a534f..37e64c0d 100644 --- a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/activity_feed.py +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/activity_feed.py @@ -56,6 +56,8 @@ def format_relative_time(timestamp: datetime) -> str: hours = int(seconds / 3600) return f"{hours} hour{'s' if hours != 1 else ''} ago" else: + if timestamp.hour == 0 and timestamp.minute == 0: + return timestamp.strftime("%b %d") return timestamp.strftime("%b %d %H:%M") diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/cards/__init__.py.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/cards/__init__.py.jinja index 11538257..1c77cde1 100644 --- a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/cards/__init__.py.jinja +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/cards/__init__.py.jinja @@ -14,6 +14,9 @@ from .database_card import DatabaseCard {% if include_ingress %} from .ingress_card import IngressCard {% endif %} +{% if include_insights %} +from .insights_card import InsightsCard +{% endif %} {% if include_observability %} from .observability_card import ObservabilityCard {% endif %} @@ -54,6 +57,9 @@ __all__ = [ {% if include_ingress %} "IngressCard", {% endif %} +{% if include_insights %} + "InsightsCard", +{% endif %} {% if include_observability %} "ObservabilityCard", {% endif %} diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/cards/card_utils.py.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/cards/card_utils.py.jinja index eeb5baf4..a2b8ba9f 100644 --- a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/cards/card_utils.py.jinja +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/cards/card_utils.py.jinja @@ -471,6 +471,9 @@ def create_modal_for_component( {%- if include_ingress %} IngressDetailDialog, {%- endif %} + {%- if include_insights %} + InsightsDetailDialog, + {%- endif %} {%- if include_observability %} ObservabilityDetailDialog, {%- endif %} @@ -506,6 +509,9 @@ def create_modal_for_component( {%- if include_ingress %} "ingress": IngressDetailDialog, {%- endif %} + {%- if include_insights %} + "service_insights": InsightsDetailDialog, + {%- endif %} {%- if include_observability %} "observability": ObservabilityDetailDialog, {%- endif %} @@ -538,6 +544,18 @@ def _open_modal( ) -> None: """Open a component detail modal, using cache for subsequent opens.""" modal_cache: dict[str, ft.Container] = page.data.setdefault("_modal_cache", {}) +{% if include_insights %} + + # Data-heavy modals: always recreate so they load fresh data + from app.services.insights.constants import INSIGHT_COMPONENT_NAME + + no_cache = {f"service_{INSIGHT_COMPONENT_NAME}"} + if component_name in no_cache: + old = modal_cache.pop(component_name, None) + if old and old in page.overlay: + page.overlay.remove(old) +{% endif %} + popup = modal_cache.get(component_name) if popup is None: popup = create_modal_for_component(component_name, component_data, page) diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/cards/insights_card.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/cards/insights_card.py new file mode 100644 index 00000000..70517918 --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/cards/insights_card.py @@ -0,0 +1,92 @@ +""" +Insights Service Card + +Dashboard card for adoption metrics and analytics monitoring. +Matches the Server/Scheduler/Database card style. +""" + +import flet as ft +from app.services.system.models import ComponentStatus +from app.services.system.ui import get_component_subtitle + +from .card_container import CardContainer +from .card_utils import ( + create_header_row, + create_metric_container, + get_status_colors, +) + + +class InsightsCard: + """Insights service card showing adoption metrics status.""" + + def __init__(self, component_data: ComponentStatus) -> None: + self.component_data = component_data + self.metadata = component_data.metadata or {} + + def _create_metrics_section(self) -> ft.Container: + """Create the metrics section with a clean grid layout.""" + total_metrics = self.metadata.get("total_metrics", 0) + enabled_sources = self.metadata.get("enabled_sources", 0) + stale_sources = self.metadata.get("stale_sources", []) + stale_display = str(len(stale_sources)) if stale_sources else "0" + + return ft.Container( + content=ft.Column( + [ + # Row 1: Total Metrics (full width) + ft.Row( + [ + create_metric_container( + "Total Metrics", f"{total_metrics:,}" + ) + ], + expand=True, + ), + ft.Container(height=12), + # Row 2: Active Sources and Stale + ft.Row( + [ + create_metric_container( + "Active Sources", str(enabled_sources) + ), + create_metric_container("Stale", stale_display), + ], + expand=True, + ), + ], + spacing=0, + ), + expand=True, + ) + + def _create_card_content(self) -> ft.Container: + """Create the full card content with header and metrics.""" + subtitle = get_component_subtitle("service_insights", self.metadata) + + return ft.Container( + content=ft.Column( + [ + create_header_row( + "Insights", + subtitle, + self.component_data, + ), + self._create_metrics_section(), + ], + spacing=0, + ), + padding=ft.padding.all(16), + expand=True, + ) + + def build(self) -> ft.Container: + """Build and return the complete insights card.""" + _, _, border_color = get_status_colors(self.component_data) + + return CardContainer( + content=self._create_card_content(), + border_color=border_color, + component_data=self.component_data, + component_name="service_insights", + ) diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/diagram/diagram_node.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/diagram/diagram_node.py index 4f2fe1d5..7d388ce0 100644 --- a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/diagram/diagram_node.py +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/diagram/diagram_node.py @@ -115,7 +115,7 @@ def _get_subtitle( return get_ai_engine_display(metadata) elif component_name == "service_auth": return "JWT Authentication" - elif component_name in ("ingress", "worker"): + elif component_name in ("ingress", "worker", "service_insights"): return get_component_subtitle(component_name, metadata) # Fall back to static label diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/modals/__init__.py.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/modals/__init__.py.jinja index df8a4cf1..f92e1131 100644 --- a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/modals/__init__.py.jinja +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/modals/__init__.py.jinja @@ -22,6 +22,9 @@ from .frontend_modal import FrontendDetailDialog {%- if include_ingress %} from .ingress_modal import IngressDetailDialog {%- endif %} +{%- if include_insights %} +from .insights_modal import InsightsDetailDialog +{%- endif %} {%- if include_observability %} from .observability_modal import ObservabilityDetailDialog {%- endif %} @@ -56,6 +59,9 @@ __all__ = [ {%- if include_ingress %} "IngressDetailDialog", {%- endif %} + {%- if include_insights %} + "InsightsDetailDialog", + {%- endif %} {%- if include_observability %} "ObservabilityDetailDialog", {%- endif %} diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/modals/insights_modal.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/modals/insights_modal.py new file mode 100644 index 00000000..14a9a4ee --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/modals/insights_modal.py @@ -0,0 +1,3263 @@ +""" +Insights Service Detail Modal + +Tabbed modal showing adoption metrics across all data sources. +All tabs pull real data from the database via _load_db(). +""" + +from __future__ import annotations + +from typing import Any + +import flet as ft +from app.components.frontend.controls import ( + BodyText, + H3Text, + LabelText, + SecondaryText, +) +from app.components.frontend.theme import AegisTheme as Theme +from app.services.system.models import ComponentStatus +from app.services.system.ui import get_component_subtitle, get_component_title + +from ..cards.card_utils import get_status_detail +from .base_detail_popup import BaseDetailPopup +from .modal_sections import MetricCard, MilestoneCard, PieChartCard + +# Event type → chip border/highlight color +EVENT_TYPE_COLORS: dict[str, str] = { + "release": "#22C55E", + "fork": "#A855F7", + "star": "#F59E0B", + "reddit_post": "#FF5722", + "localization": "#3B82F6", + "feature": "#06B6D4", + "milestone_github": "#EC4899", + "milestone_pypi": "#EC4899", + "anomaly_github": "#EF4444", + "external": "#9CA3AF", +} + +# Shared date range options for all tabs +RANGE_OPTIONS = [ + ("7d", 7), + ("14d", 14), + ("1m", 30), + ("3m", 90), + ("6m", 180), + ("1y", 365), + ("All", 9999), +] + +# Event types relevant to each tab +GITHUB_EVENT_TYPES = { + "release", + "fork", + "star", + "feature", + "milestone_github", + "anomaly_github", + "localization", + "external", +} +PYPI_EVENT_TYPES = { + "release", + "reddit_post", + "star", + "feature", + "milestone_pypi", + "localization", + "external", +} +DOCS_EVENT_TYPES = { + "release", + "reddit_post", + "star", + "feature", + "localization", + "external", +} + +# Milestone category config (for Overview trophy cards) +CATEGORY_CONFIG: dict[str, dict[str, str]] = { + "daily_clones": {"label": "GitHub 1-Day Clones", "color": "#2563eb"}, + "daily_unique": {"label": "GitHub 1-Day Unique", "color": "#A855F7"}, + "daily_views": {"label": "GitHub 1-Day Views", "color": "#22C55E"}, + "daily_visitors": {"label": "GitHub 1-Day Visitors", "color": "#F59E0B"}, + "14d_clones": {"label": "GitHub 14-Day Clones", "color": "#06B6D4"}, + "14d_unique": {"label": "GitHub 14-Day Unique", "color": "#EC4899"}, + "14d_visitors": {"label": "GitHub 14-Day Visitors", "color": "#F97316"}, + "pypi_daily": {"label": "PyPI Best Single Day", "color": "#EF4444"}, + "plausible_daily_visitors": {"label": "Docs 1-Day Visitors", "color": "#6366F1"}, + "plausible_daily_pageviews": {"label": "Docs 1-Day Pageviews", "color": "#22C55E"}, +} + +# Event type to status mapping (for activity feed dot colors) +EVENT_STATUS_MAP: dict[str, str] = { + "release": "success", + "star": "warning", + "reddit_post": "info", + "milestone_github": "warning", + "milestone_pypi": "warning", + "feature": "info", + "anomaly_github": "error", + "external": "info", +} + + +def _pct(current: float, previous: float) -> float | None: + """Compute period-over-period percentage change.""" + if previous > 0: + return (current - previous) / previous * 100 + return 100.0 if current > 0 else None + + +def _make_line_chart( + data_series: list, + max_y: float, + daily: list[dict], + step: int, + min_y: float = 0, + height: int = 350, +) -> ft.LineChart: + """Build a standard line chart with shared tooltip/grid/border config.""" + return ft.LineChart( + data_series=data_series, + left_axis=ft.ChartAxis(labels_size=50, labels_interval=step), + bottom_axis=ft.ChartAxis( + labels_size=50, + labels=[ + ft.ChartAxisLabel( + value=i, + label=ft.Text( + d["date"][-5:], size=9, color=ft.Colors.ON_SURFACE_VARIANT + ), + ) + for i, d in enumerate(daily) + if i % max(1, len(daily) // 8) == 0 or i == len(daily) - 1 + ], + ), + horizontal_grid_lines=ft.ChartGridLines( + interval=step, + color=ft.Colors.with_opacity(0.08, ft.Colors.ON_SURFACE), + width=1, + ), + tooltip_bgcolor=Theme.Colors.SURFACE_1, + tooltip_rounded_radius=8, + tooltip_padding=10, + tooltip_max_content_width=200, + tooltip_tooltip_border_side=ft.BorderSide(1, ft.Colors.OUTLINE_VARIANT), + tooltip_fit_inside_vertically=True, + tooltip_fit_inside_horizontally=True, + tooltip_show_on_top_of_chart_box_area=True, + point_line_start=0, + point_line_end=float("inf"), + border=ft.border.all(1, ft.Colors.OUTLINE_VARIANT), + interactive=True, + min_y=min_y, + max_y=max_y, + min_x=0, + max_x=len(daily) - 1, + height=height, + expand=True, + ) + + +def _make_legend(items: list[tuple[str, str]]) -> ft.Row: + """Build chart legend. items = [(color, label), ...]""" + return ft.Row( + [ + ft.Row( + [ + ft.Container(width=10, height=10, bgcolor=color, border_radius=5), + SecondaryText(label, size=Theme.Typography.BODY_SMALL), + ], + spacing=4, + ) + for color, label in items + ], + spacing=16, + alignment=ft.MainAxisAlignment.CENTER, + ) + + +# --------------------------------------------------------------------------- +# Base class for interactive insight tabs +# --------------------------------------------------------------------------- + + +class InsightsTab(ft.Container): + """Base class for insight tabs with date range chips, events toggle, and rebuild pattern.""" + + _default_days: int = 7 # Override in subclass + + def __init__(self) -> None: + super().__init__() + + self._days = self._default_days + self._data = self._load_data(self._days) + self._show_events = False + self._highlighted_dates: set[str] = set() + self._content_column = ft.Column(spacing=8, scroll=ft.ScrollMode.AUTO) + + self._events_toggle = ft.Switch( + label="Show events", + value=False, + on_change=self._on_events_toggle, + label_style=ft.TextStyle(size=12, color=ft.Colors.ON_SURFACE_VARIANT), + ) + + self._range_chips = ft.Row( + [ + ft.Container( + content=ft.Text( + label, + size=11, + weight=ft.FontWeight.W_600 + if days == self._days + else ft.FontWeight.W_400, + color=ft.Colors.ON_SURFACE + if days == self._days + else ft.Colors.ON_SURFACE_VARIANT, + ), + bgcolor=Theme.Colors.SURFACE_2 if days == self._days else None, + border=ft.border.all(1, ft.Colors.ON_SURFACE) + if days == self._days + else ft.border.all(1, ft.Colors.OUTLINE_VARIANT), + border_radius=12, + padding=ft.padding.symmetric(horizontal=10, vertical=4), + on_click=lambda e, d=days: self._on_range_change(d), + ink=True, + ) + for label, days in RANGE_OPTIONS + ], + spacing=6, + ) + + self._build_content() + + self.content = self._content_column + self.padding = ft.padding.only( + left=Theme.Spacing.MD, + top=Theme.Spacing.MD, + bottom=Theme.Spacing.MD, + right=Theme.Spacing.LG + 8, + ) + self.expand = True + + def _on_events_toggle(self, e: ft.ControlEvent) -> None: + self._show_events = e.control.value + self._highlighted_dates = set() + self._build_content() + self._content_column.update() + + def _on_event_click(self, dates: set[str]) -> None: + if self._highlighted_dates == dates: + self._highlighted_dates = set() + else: + self._highlighted_dates = dates + self._build_content() + self._content_column.update() + + def _on_range_change(self, days: int) -> None: + self._days = days + self._data = self._load_data(days) + + for i, (_label, d) in enumerate(RANGE_OPTIONS): + chip = self._range_chips.controls[i] + is_active = d == days + chip.bgcolor = Theme.Colors.SURFACE_2 if is_active else None + chip.border = ( + ft.border.all(1, ft.Colors.ON_SURFACE) + if is_active + else ft.border.all(1, ft.Colors.OUTLINE_VARIANT) + ) + chip.content.weight = ( + ft.FontWeight.W_600 if is_active else ft.FontWeight.W_400 + ) + chip.content.color = ( + ft.Colors.ON_SURFACE if is_active else ft.Colors.ON_SURFACE_VARIANT + ) + + self._build_content() + self._content_column.update() + + def _build_content(self) -> None: + """Override in subclass to build tab-specific content.""" + raise NotImplementedError + + @staticmethod + def _load_data(days: int = 14) -> dict[str, Any]: + """Override in subclass to load tab-specific data.""" + raise NotImplementedError + + def _make_filter_bar( + self, last_updated: str = "", extra_controls: list[ft.Control] | None = None + ) -> ft.Row: + """Build the standard filter bar with range chips, last updated, and events toggle.""" + right_items: list[ft.Control] = [] + if last_updated: + right_items.append( + SecondaryText( + f"Last updated: {last_updated}", size=Theme.Typography.BODY_SMALL + ) + ) + right_items.append(self._events_toggle) + if extra_controls: + right_items.extend(extra_controls) + return ft.Row( + [self._range_chips, ft.Row(right_items, spacing=Theme.Spacing.MD)], + alignment=ft.MainAxisAlignment.SPACE_BETWEEN, + ) + + def _render_event_chips( + self, + all_events: list[tuple[str, str, str]], + valid_dates: set[str] | None = None, + exclude_types: set[str] | None = None, + ) -> ft.Control | None: + """Render grouped event chips. Returns Row control or None.""" + if not self._show_events: + return None + + if exclude_types: + all_events = [ + (d, lbl, t) for d, lbl, t in all_events if t not in exclude_types + ] + if valid_dates is not None: + all_events = [(d, lbl, t) for d, lbl, t in all_events if d in valid_dates] + + grouped = _group_events(all_events, self._days) + if not grouped: + return None + + highlighted = self._highlighted_dates + return ft.Row( + [ + ft.Container( + content=ft.Text( + f"{label} {date[-5:]}", + size=Theme.Typography.BODY_SMALL, + weight=ft.FontWeight.W_600, + selectable=False, + ), + bgcolor=EVENT_TYPE_COLORS.get(etype, Theme.Colors.SURFACE_2) + if dates_set & highlighted + else Theme.Colors.SURFACE_2, + border=ft.border.all( + 2 if dates_set & highlighted else 1, + EVENT_TYPE_COLORS.get(etype, ft.Colors.OUTLINE_VARIANT), + ), + border_radius=10, + padding=ft.padding.symmetric(horizontal=8, vertical=3), + on_click=lambda e, ds=dates_set: self._on_event_click(ds), + ink=True, + ) + for date, label, etype, dates_set in grouped + ], + spacing=6, + wrap=True, + ) + + +# --------------------------------------------------------------------------- +# Shared DB loader +# --------------------------------------------------------------------------- + + +def _load_db() -> dict[str, Any]: + """Load all insight data from the database in one session. + + Returns a dict with keys consumed by every tab: + traffic_daily - list[dict] with keys date, clones, unique_cloners, views, unique_visitors + referrers - list[dict] with keys domain, views, uniques + popular_paths - list[dict] with keys path, views, uniques + stars_total - int + stars_recent - list[dict] latest 10 new_star events (username, location, company, date) + star_countries - dict[str, int] country -> count + sources - list[dict] enabled source statuses + pypi_total - int + """ + from app.services.insights.query_service import InsightQueryService + + with InsightQueryService() as qs: + cutoff_14d, _ = qs.compute_cutoffs(14) + + # -- github_traffic --------------------------------------------------- + + clones_rows = qs.get_daily("clones", cutoff_14d) + unique_rows = qs.get_daily("unique_cloners", cutoff_14d) + views_rows = qs.get_daily("views", cutoff_14d) + visitors_rows = qs.get_daily("unique_visitors", cutoff_14d) + + unique_map = {str(r.date)[:10]: int(r.value) for r in unique_rows} + views_map = {str(r.date)[:10]: int(r.value) for r in views_rows} + visitors_map = {str(r.date)[:10]: int(r.value) for r in visitors_rows} + + traffic_daily: list[dict[str, Any]] = [] + for r in clones_rows: + day = str(r.date)[:10] + traffic_daily.append( + { + "date": day, + "clones": int(r.value), + "unique_cloners": unique_map.get(day, 0), + "views": views_map.get(day, 0), + "unique_visitors": visitors_map.get(day, 0), + } + ) + + # Referrers from latest referrers metric + referrers_row = qs.get_latest("referrers") + referrers: list[dict[str, Any]] = [] + if referrers_row and referrers_row.metadata_: + meta = referrers_row.metadata_ + if isinstance(meta, dict) and not meta.get("referrers"): + for domain, counts in meta.items(): + if isinstance(counts, dict): + referrers.append( + { + "domain": domain, + "views": counts.get("views", 0), + "uniques": counts.get("uniques", 0), + } + ) + else: + for ref in meta.get("referrers", []): + referrers.append( + { + "domain": ref.get("referrer", ref.get("domain", "unknown")), + "views": ref.get("count", ref.get("views", 0)), + "uniques": ref.get("uniques", 0), + } + ) + referrers.sort(key=lambda x: -x["views"]) + + # Popular paths from latest popular_paths metric + paths_row = qs.get_latest("popular_paths") + popular_paths: list[dict[str, Any]] = [] + if paths_row and paths_row.metadata_: + for p in paths_row.metadata_.get("popular_paths", []): + popular_paths.append( + { + "path": p.get("path", "unknown"), + "views": p.get("count", p.get("views", 0)), + "uniques": p.get("uniques", 0), + } + ) + + # -- github_stars ----------------------------------------------------- + + star_events = qs.get_all_events("new_star") + stars_total = len(star_events) + + stars_recent: list[dict[str, Any]] = [] + star_countries: dict[str, int] = {} + for ev in star_events: + meta = ev.metadata_ or {} + country = meta.get("location", "Unknown") + if country and country != "Unknown": + parts = [p.strip() for p in country.split(",")] + country_key = parts[-1] if parts else "Unknown" + else: + country_key = "Unknown" + star_countries[country_key] = star_countries.get(country_key, 0) + 1 + + if len(stars_recent) < 10: + stars_recent.append( + { + "username": meta.get("username", "unknown"), + "location": meta.get("location", ""), + "company": meta.get("company", ""), + "date": str(ev.date)[:10], + } + ) + + star_countries = dict(sorted(star_countries.items(), key=lambda x: -x[1])) + + # -- sources ---------------------------------------------------------- + + sources = [ + { + "key": s.key, + "display_name": s.display_name, + "enabled": s.enabled, + } + for s in qs.get_sources() + ] + + # -- pypi_total ------------------------------------------------------- + + pypi_total_row = qs.get_latest("downloads_total") + pypi_total = int(pypi_total_row.value) if pypi_total_row else 0 + + return { + "traffic_daily": traffic_daily, + "referrers": referrers, + "popular_paths": popular_paths, + "stars_total": stars_total, + "stars_recent": stars_recent, + "star_countries": star_countries, + "sources": sources, + "pypi_total": pypi_total, + } + + +# --------------------------------------------------------------------------- +# Tab 1: Overview +# --------------------------------------------------------------------------- + + +class OverviewTab(ft.Container): + """Overview: key metrics, milestones, recent events, source status.""" + + def __init__(self, metadata: dict[str, Any], db: dict[str, Any]) -> None: + super().__init__() + + daily = db["traffic_daily"] + + # Compute rolling 14d totals + total_clones = sum(d["clones"] for d in daily) + total_unique = sum(d["unique_cloners"] for d in daily) + total_views = sum(d["views"] for d in daily) + + # Compute previous 14d for change arrows + load milestones/events + from datetime import datetime, timedelta + + from app.services.insights.query_service import InsightQueryService + + stars_total = db["stars_total"] + + with InsightQueryService() as qs: + now = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) + d14 = now - timedelta(days=14) + d28 = now - timedelta(days=28) + + prev_clones = qs.sum_range("clones", d28, d14) + prev_unique = qs.sum_range("unique_cloners", d28, d14) + prev_views = qs.sum_range("views", d28, d14) + + pypi_14d = qs.sum_range("downloads_daily", d14, now + timedelta(days=1)) + pypi_prev14d = qs.sum_range("downloads_daily", d28, d14) + + recent_stars = len(qs.get_events("new_star", d14)) + prev_star_count = len(qs.get_events_in_range("new_star", d28, d14)) + + # Milestones - highest value per category (ATH per record type) + best_per_category: dict[str, dict[str, Any]] = {} + for ev in qs.get_milestone_events(): + meta = ev.metadata_ if isinstance(ev.metadata_, dict) else {} + category = meta.get("category", ev.description) + + hero_str = ( + _extract_max_number(ev.description) + if ev.event_type != "feature" + else "" + ) + value = int(hero_str.replace(",", "")) if hero_str else 0 + + existing = best_per_category.get(category) + if existing is None or value > existing.get("_value", 0): + best_per_category[category] = { + "date": str(ev.date)[:10], + "description": ev.description, + "type": ev.event_type, + "metadata": meta, + "_value": value, + } + + milestones = sorted( + best_per_category.values(), key=lambda m: m["date"], reverse=True + ) + + # Recent events of all types + recent_events: list[dict[str, Any]] = [] + for ev in qs.get_insight_events(): + meta = ev.metadata_ if isinstance(ev.metadata_, dict) else {} + recent_events.append( + { + "date": str(ev.date)[:10], + "description": ev.description, + "type": ev.event_type, + "metadata": meta, + } + ) + + # Also add releases from metric rows + for r in qs.get_release_metrics(): + meta = r.metadata_ or {} + tag = meta.get("tag", "") + if tag: + recent_events.append( + { + "date": str(r.date)[:10], + "description": tag, + "type": "release", + "metadata": meta, + } + ) + + # Enrich reddit posts with upvote/comment data from post_stats + reddit_stats: dict[str, dict] = {} + for r in qs.get_all_metrics("post_stats"): + meta = r.metadata_ or {} + pid = meta.get("post_id", "") + if pid: + reddit_stats[pid] = { + "upvotes": int(r.value), + "comments": meta.get("comments", 0), + "subreddit": meta.get("subreddit", ""), + } + + # Sort by date desc, take 15 + recent_events.sort(key=lambda x: x["date"], reverse=True) + recent_events = recent_events[:15] + + # Top-level metrics with change arrows + previous values + metrics_row = ft.Row( + [ + MetricCard( + "Stars", + str(stars_total), + "#FFD700", + change_pct=_pct(recent_stars, prev_star_count), + prev_value=f"+{recent_stars} last 14d", + ), + MetricCard( + "PyPI Downloads", + f"{db['pypi_total']:,}", + "#FF69B4", + change_pct=_pct(pypi_14d, pypi_prev14d), + prev_value=f"14d: {pypi_14d:,} (prev: {pypi_prev14d:,})", + ), + MetricCard( + "14d Clones", + f"{total_clones:,}", + Theme.Colors.PRIMARY, + change_pct=_pct(total_clones, prev_clones), + prev_value=f"{prev_clones:,}", + ), + MetricCard( + "14d Unique", + f"{total_unique:,}", + Theme.Colors.INFO, + change_pct=_pct(total_unique, prev_unique), + prev_value=f"{prev_unique:,}", + ), + MetricCard( + "14d Views", + f"{total_views:,}", + Theme.Colors.SUCCESS, + change_pct=_pct(total_views, prev_views), + prev_value=f"{prev_views:,}" if prev_views else None, + ), + ], + spacing=Theme.Spacing.MD, + ) + + # Recent activity (left) — reuse ExpandableActivityRow + from datetime import datetime as _dt + + from app.components.frontend.controls.data_table import ( + DataTableColumn, + DataTableRow, + ) + from app.services.system.activity import ActivityEvent + + from ..activity_feed import ExpandableActivityRow + + _row_col = [DataTableColumn("Activity")] + + activity_items: list[ft.Control] = [] + for ev in recent_events: + status = EVENT_STATUS_MAP.get(ev["type"], "info") + try: + ts = _dt.strptime(ev["date"], "%Y-%m-%d") + except (ValueError, TypeError): + ts = _dt.now() + + # Build details from metadata + meta = ev.get("metadata", {}) + details = None + reddit_url = None + if ev["type"] == "reddit_post": + pid = meta.get("post_id", "") + stats = reddit_stats.get(pid, {}) + parts = [] + sub = meta.get("subreddit") or stats.get("subreddit", "") + if sub: + parts.append(f"r/{sub}") + if stats.get("upvotes"): + parts.append(f"{stats['upvotes']} upvotes") + if stats.get("comments"): + parts.append(f"{stats['comments']} comments") + details = " \u2022 ".join(parts) if parts else None + reddit_url = meta.get("url", "") + elif ev["type"] == "star": + usernames = meta.get("usernames", []) + if usernames: + details = ", ".join(usernames[:10]) + if len(usernames) > 10: + details += f" +{len(usernames) - 10} more" + release_url = None + if ev["type"] == "release": + tag = meta.get("tag", ev["description"]) + release_url = ( + f"https://github.com/lbedner/aegis-stack/releases/tag/{tag}" + ) + details = tag + elif ev["type"] in ("milestone_github", "milestone_pypi"): + cat = meta.get("category", "") + if cat: + details = cat.replace("_", " ").title() + + # For stars, show just the number in the title, name in details + message = ev["description"][:80] + if ev["type"] == "star" and " \u2014 " in message: + message = message.split(" \u2014 ")[0] # "⭐ #99 — ncthuc" → "⭐ #99" + + event_obj = ActivityEvent( + component="insights", + event_type=ev["type"], + message=message, + status=status, + timestamp=ts, + details=details or (reddit_url if reddit_url else None), + ) + row = ExpandableActivityRow(event_obj) + # Hide the status dot — not needed in insights feed + row.content.controls[0].controls[0].visible = False + + # For reddit posts, replace details with stats + clickable link + if reddit_url and details: + row._details_container.content = ft.Column( + [ + SecondaryText(details), + ft.Container( + content=ft.Text( + reddit_url, + size=Theme.Typography.BODY_SMALL, + style=ft.TextStyle( + color=Theme.Colors.INFO, + decoration=ft.TextDecoration.UNDERLINE, + ), + selectable=False, + ), + on_click=lambda e, u=reddit_url: e.page.launch_url(u), + ink=True, + ), + ], + spacing=4, + ) + elif release_url: + row._details_container.content = ft.Container( + content=ft.Text( + release_url, + size=Theme.Typography.BODY_SMALL, + style=ft.TextStyle( + color=Theme.Colors.INFO, + decoration=ft.TextDecoration.UNDERLINE, + ), + selectable=False, + ), + on_click=lambda e, u=release_url: e.page.launch_url(u), + ink=True, + ) + + activity_items.append( + DataTableRow(columns=_row_col, row_data=[row], padding=4) + ) + + # Parse milestone data into trophy cards + milestone_cards: list[ft.Control] = [] + for m in milestones: + meta = m.get("metadata", {}) + category = meta.get("category", "") + config = CATEGORY_CONFIG.get(category, {}) + label = config.get("label", m["description"]) + accent = config.get("color", "#9CA3AF") + + # Extract hero number — only for milestone types, not features + hero = ( + _extract_max_number(m["description"]) if m["type"] != "feature" else "" + ) + + milestone_cards.append( + MilestoneCard( + label=label, + value=hero or "\u2014", + date=_pretty_date(m["date"]), + accent_color=accent, + ) + ) + + # Arrange milestones in a 2x2 grid + milestone_grid: list[ft.Control] = [ + H3Text("Key Milestones"), + ft.Divider(height=1, color=ft.Colors.OUTLINE_VARIANT), + ] + row_items: list[ft.Control] = [] + for card in milestone_cards: + row_items.append(card) + if len(row_items) == 2: + milestone_grid.append(ft.Row(row_items, spacing=Theme.Spacing.MD)) + row_items = [] + if row_items: + milestone_grid.append(ft.Row(row_items, spacing=Theme.Spacing.MD)) + + side_by_side = ft.Row( + [ + ft.Column( + [ + H3Text("Recent Activity"), + ft.Divider(height=1, color=ft.Colors.OUTLINE_VARIANT), + *activity_items, + ], + spacing=6, + expand=2, + ), + ft.Column( + milestone_grid, + spacing=6, + expand=1, + ), + ], + spacing=Theme.Spacing.LG, + vertical_alignment=ft.CrossAxisAlignment.START, + ) + + # Intelligence Report — collapsible analysis + self.content = ft.Column( + [ + metrics_row, + ft.Container(height=4), + side_by_side, + ], + spacing=8, + scroll=ft.ScrollMode.AUTO, + ) + self.padding = Theme.Spacing.MD + self.expand = True + + +# --------------------------------------------------------------------------- +# Tab 2: GitHub +# --------------------------------------------------------------------------- + + +class GitHubTrafficTab(InsightsTab): + """GitHub traffic, events, and activity with date range and event annotations.""" + + _default_days = 7 + + # -- build content -------------------------------------------------------- + + def _build_content(self) -> None: # noqa: C901 + """Build or rebuild all content based on state.""" + data = self._data + daily = data["daily"] + + last_date = daily[-1]["date"] if daily else "" + content: list[ft.Control] = [ + self._make_filter_bar(last_updated=last_date), + ft.Container(height=8), + ] + + if not daily: + content.append(SecondaryText("No GitHub traffic data collected yet.")) + self._content_column.controls = content + return + + # Range-level aggregates + total_clones = sum(d["clones"] for d in daily) + total_unique = sum(d["unique_cloners"] for d in daily) + total_views = sum(d["views"] for d in daily) + total_visitors = sum(d["unique_visitors"] for d in daily) + clone_ratio = total_clones / total_unique if total_unique > 0 else 0 + num_days = len(daily) + range_label = next( + (label for label, days in RANGE_OPTIONS if days == self._days), + f"{self._days}d", + ) + + # Period-over-period change + + prev_c = data.get("prev_clones", 0) + prev_u = data.get("prev_unique", 0) + prev_v = data.get("prev_views", 0) + prev_vis = data.get("prev_visitors", 0) + + # Metric cards — all on one row, always visible + forks = data.get("forks", []) + releases = data.get("releases", {}) + star_daily = data.get("star_events_daily", []) + avg_stars = ( + sum(d["stars"] for d in star_daily) / len(star_daily) if star_daily else 0 + ) + + content.append( + ft.Row( + [ + MetricCard( + "Clones", + f"{total_clones:,}", + Theme.Colors.PRIMARY, + change_pct=_pct(total_clones, prev_c), + ), + MetricCard( + "Unique", + f"{total_unique:,}", + Theme.Colors.INFO, + change_pct=_pct(total_unique, prev_u), + ), + MetricCard( + "Views", + f"{total_views:,}", + Theme.Colors.SUCCESS, + change_pct=_pct(total_views, prev_v), + ), + MetricCard( + "Visitors", + f"{total_visitors:,}", + Theme.Colors.WARNING, + change_pct=_pct(total_visitors, prev_vis), + ), + MetricCard("Clone Ratio", f"{clone_ratio:.1f}:1", "#E91E63"), + MetricCard("Forks", str(len(forks)), "#A855F7"), + MetricCard("Releases", str(len(releases)), "#22C55E"), + MetricCard("Avg Stars/Day", f"{avg_stars:.1f}", "#F59E0B"), + ], + spacing=Theme.Spacing.MD, + ) + ) + + # Date range text + date_range = ( + f"{_pretty_date(daily[0]['date'])} \u2014 {_pretty_date(daily[-1]['date'])}" + ) + content.append(SecondaryText(date_range, size=Theme.Typography.BODY_SMALL)) + + # Event chips + first_date = daily[0]["date"] + last_date = daily[-1]["date"] + window_events = [ + (date, label, etype) + for date, label, etype in data.get("all_events", []) + if first_date <= date <= last_date + ] + chips = self._render_event_chips(window_events) + if chips: + content.append(chips) + + content.append(ft.Container(height=4)) + + # -- Clones + Unique chart with event annotations --------------------- + + releases_map = data.get("releases", {}) if self._show_events else {} + highlighted = self._highlighted_dates + + max_clone = max(d["clones"] for d in daily) if daily else 1 + clone_step = _smart_step(max_clone) + clone_max_y = int((max_clone // clone_step + 1) * clone_step) + + clone_points: list[ft.LineChartDataPoint] = [] + unique_points: list[ft.LineChartDataPoint] = [] + release_anno_points: list[ft.LineChartDataPoint] = [] + + for i, d in enumerate(daily): + is_hl = d["date"] in highlighted + hl_point = ( + ft.ChartCirclePoint( + radius=7, color="#FF5722", stroke_width=2, stroke_color="#FFFFFF" + ) + if is_hl + else None + ) + + clone_points.append( + ft.LineChartDataPoint( + i, + d["clones"], + tooltip=f"Clones: {d['clones']:,}", + point=hl_point, + ) + ) + unique_points.append( + ft.LineChartDataPoint( + i, + d["unique_cloners"], + tooltip=f"Unique: {d['unique_cloners']:,}", + ) + ) + + rel = releases_map.get(d["date"]) + if rel: + release_anno_points.append( + ft.LineChartDataPoint(i, 0, tooltip=rel, show_tooltip=True) + ) + else: + release_anno_points.append( + ft.LineChartDataPoint(i, 0, show_tooltip=False) + ) + + clone_series = [ + ft.LineChartData( + data_points=clone_points, + stroke_width=2, + color="#2563eb", + curved=True, + point=ft.ChartCirclePoint( + radius=3, color=ft.Colors.ON_SURFACE, stroke_width=0 + ), + stroke_cap_round=True, + ), + ft.LineChartData( + data_points=unique_points, + stroke_width=2, + color="#7c3aed", + curved=True, + point=ft.ChartCirclePoint( + radius=3, color=ft.Colors.ON_SURFACE, stroke_width=0 + ), + stroke_cap_round=True, + ), + ] + if any(p.show_tooltip for p in release_anno_points): + clone_series.append( + ft.LineChartData( + data_points=release_anno_points, + stroke_width=0, + color="#9CA3AF", + ) + ) + + clone_chart = _make_line_chart(clone_series, clone_max_y, daily, clone_step) + content.append( + ft.Container(content=clone_chart, margin=ft.margin.only(right=20)) + ) + content.append( + _make_legend([("#2563eb", "Clones"), ("#7c3aed", "Unique Cloners")]) + ) + + content.append(ft.Container(height=12)) + + # -- Views + Visitors chart ------------------------------------------- + + max_view = max(d["views"] for d in daily) if daily else 1 + view_step = _smart_step(max_view) + view_max_y = int((max_view // view_step + 1) * view_step) + + view_points: list[ft.LineChartDataPoint] = [] + visitor_points: list[ft.LineChartDataPoint] = [] + release_anno2: list[ft.LineChartDataPoint] = [] + + for i, d in enumerate(daily): + is_hl = d["date"] in highlighted + hl_point = ( + ft.ChartCirclePoint( + radius=7, color="#FF5722", stroke_width=2, stroke_color="#FFFFFF" + ) + if is_hl + else None + ) + + view_points.append( + ft.LineChartDataPoint( + i, + d["views"], + tooltip=f"Views: {d['views']:,}", + point=hl_point, + ) + ) + visitor_points.append( + ft.LineChartDataPoint( + i, + d["unique_visitors"], + tooltip=f"Visitors: {d['unique_visitors']:,}", + ) + ) + + rel = releases_map.get(d["date"]) + if rel: + release_anno2.append( + ft.LineChartDataPoint(i, 0, tooltip=rel, show_tooltip=True) + ) + else: + release_anno2.append(ft.LineChartDataPoint(i, 0, show_tooltip=False)) + + view_series = [ + ft.LineChartData( + data_points=view_points, + stroke_width=2, + color="#22C55E", + curved=True, + point=ft.ChartCirclePoint( + radius=3, color=ft.Colors.ON_SURFACE, stroke_width=0 + ), + stroke_cap_round=True, + ), + ft.LineChartData( + data_points=visitor_points, + stroke_width=2, + color="#F59E0B", + curved=True, + point=ft.ChartCirclePoint( + radius=3, color=ft.Colors.ON_SURFACE, stroke_width=0 + ), + stroke_cap_round=True, + ), + ] + if any(p.show_tooltip for p in release_anno2): + view_series.append( + ft.LineChartData( + data_points=release_anno2, + stroke_width=0, + color="#9CA3AF", + ) + ) + + views_chart = _make_line_chart(view_series, view_max_y, daily, view_step) + content.append( + ft.Container(content=views_chart, margin=ft.margin.only(right=20)) + ) + content.append(_make_legend([("#22C55E", "Views"), ("#F59E0B", "Visitors")])) + + # Interpretation + content.append( + ft.Container( + content=SecondaryText( + f"{range_label} clone ratio of {clone_ratio:.1f}:1 across {total_clones:,} clones " + f"from {total_unique:,} unique cloners. " + f"Traffic data covers {num_days} days.", + size=Theme.Typography.BODY_SMALL, + ), + padding=ft.padding.symmetric(horizontal=4, vertical=8), + ) + ) + + # -- Activity Summary stacked bar chart ------------------------------- + + activity = data.get("activity_summary", []) + if activity: + content.append(ft.Container(height=12)) + content.append(H3Text("Activity Summary")) + + # Group into 5 categories + act_categories = [ + ("Code", "#3B82F6", ["push", "creates", "deletes"]), + ("Issues", "#F59E0B", ["issues", "issue_comments"]), + ("PRs", "#A855F7", ["pull_requests", "pull_request_reviews"]), + ("Community", "#22C55E", ["forks", "stars"]), + ("Releases", "#EC4899", ["releases"]), + ] + + bar_groups: list[ft.BarChartGroup] = [] + act_max = 0 + + bar_width = max(8, 400 // max(len(activity), 1)) + + for i, day in enumerate(activity): + stack_items: list[ft.BarChartRodStackItem] = [] + running_y = 0.0 + for _cat_name, color, fields in act_categories: + val = sum(day.get(f, 0) for f in fields) + if val > 0: + stack_items.append( + ft.BarChartRodStackItem( + from_y=running_y, + to_y=running_y + val, + color=color, + ) + ) + running_y += val + act_max = max(act_max, running_y) + bar_groups.append( + ft.BarChartGroup( + x=i, + bar_rods=[ + ft.BarChartRod( + to_y=running_y, + width=bar_width, + rod_stack_items=stack_items, + color=ft.Colors.TRANSPARENT, + border_radius=2, + ), + ], + ) + ) + + act_step = _smart_step(act_max) if act_max > 0 else 5 + act_max_y = int((act_max // act_step + 1) * act_step) if act_max > 0 else 10 + + activity_chart = ft.BarChart( + bar_groups=bar_groups, + left_axis=ft.ChartAxis(labels_size=50, labels_interval=act_step), + bottom_axis=ft.ChartAxis( + labels_size=50, + labels=[ + ft.ChartAxisLabel( + value=i, + label=ft.Text( + day["date"][-5:], + size=9, + color=ft.Colors.ON_SURFACE_VARIANT, + ), + ) + for i, day in enumerate(activity) + if i % 3 == 0 or i == len(activity) - 1 + ], + ), + horizontal_grid_lines=ft.ChartGridLines( + interval=act_step, + color=ft.Colors.with_opacity(0.08, ft.Colors.ON_SURFACE), + width=1, + ), + border=ft.border.all(1, ft.Colors.OUTLINE_VARIANT), + interactive=False, + max_y=act_max_y, + height=300, + expand=True, + ) + + content.append( + ft.Container(content=activity_chart, margin=ft.margin.only(right=20)) + ) + content.append( + ft.Row( + [ + ft.Row( + [ + ft.Container( + width=10, height=10, bgcolor=color, border_radius=5 + ), + SecondaryText(name, size=Theme.Typography.BODY_SMALL), + ], + spacing=4, + ) + for name, color, _fields in act_categories + ], + spacing=16, + alignment=ft.MainAxisAlignment.CENTER, + ) + ) + + # -- Referrers -------------------------------------------------------- + + referrers = data.get("referrers", []) + content.append(ft.Container(height=8)) + content.append(H3Text("Referrers")) + content.append(ft.Divider(height=1, color=ft.Colors.OUTLINE_VARIANT)) + + if referrers: + for ref in referrers: + domain = ref["domain"] + # Build URL — search engines get their URL, others get https:// + url = ( + f"https://{domain}" + if "." in domain + else f"https://www.google.com/search?q={domain}" + ) + content.append( + ft.Row( + [ + ft.Container( + content=ft.Text( + domain, + size=Theme.Typography.BODY_SMALL, + style=ft.TextStyle( + color=Theme.Colors.INFO, + decoration=ft.TextDecoration.UNDERLINE, + ), + selectable=False, + ), + width=200, + on_click=lambda e, u=url: e.page.launch_url(u), + ink=True, + ), + SecondaryText( + f"{ref['views']} views", + size=Theme.Typography.BODY_SMALL, + ), + SecondaryText( + f"{ref['uniques']} unique", + size=Theme.Typography.BODY_SMALL, + ), + ], + spacing=8, + ) + ) + else: + content.append( + SecondaryText( + "No referrer data available.", size=Theme.Typography.BODY_SMALL + ) + ) + + # -- Popular Paths ---------------------------------------------------- + + paths = data.get("popular_paths", []) + if paths: + content.append(ft.Container(height=8)) + content.append(H3Text("Popular Paths")) + content.append(ft.Divider(height=1, color=ft.Colors.OUTLINE_VARIANT)) + for p in paths: + path_url = f"https://github.com{p['path']}" + content.append( + ft.Row( + [ + ft.Container( + content=ft.Text( + p["path"], + size=Theme.Typography.BODY_SMALL, + style=ft.TextStyle( + color=Theme.Colors.INFO, + decoration=ft.TextDecoration.UNDERLINE, + ), + selectable=False, + ), + expand=True, + on_click=lambda e, u=path_url: e.page.launch_url(u), + ink=True, + ), + SecondaryText( + f"{p['views']} views", size=Theme.Typography.BODY_SMALL + ), + SecondaryText( + f"{p['uniques']} unique", + size=Theme.Typography.BODY_SMALL, + ), + ], + spacing=8, + ) + ) + + self._content_column.controls = content + + # -- data loader ---------------------------------------------------------- + + @staticmethod + def _load_data(days: int = 14) -> dict[str, Any]: + """Load GitHub data from database with date cutoff.""" + from app.services.insights.query_service import InsightQueryService + + with InsightQueryService() as qs: + cutoff, prev_cutoff = qs.compute_cutoffs(days) + + # Traffic daily + clones_rows = qs.get_daily("clones", cutoff) + unique_rows = qs.get_daily("unique_cloners", cutoff) + views_rows = qs.get_daily("views", cutoff) + visitors_rows = qs.get_daily("unique_visitors", cutoff) + + unique_map = {str(r.date)[:10]: int(r.value) for r in unique_rows} + views_map = {str(r.date)[:10]: int(r.value) for r in views_rows} + visitors_map = {str(r.date)[:10]: int(r.value) for r in visitors_rows} + + daily: list[dict[str, Any]] = [] + for r in clones_rows: + day = str(r.date)[:10] + daily.append( + { + "date": day, + "clones": int(r.value), + "unique_cloners": unique_map.get(day, 0), + "views": views_map.get(day, 0), + "unique_visitors": visitors_map.get(day, 0), + } + ) + + # Referrers (latest snapshot) + referrers_row = qs.get_latest("referrers") + referrers: list[dict[str, Any]] = [] + if referrers_row and referrers_row.metadata_: + meta = referrers_row.metadata_ + if isinstance(meta, dict) and not meta.get("referrers"): + for domain, counts in meta.items(): + if isinstance(counts, dict): + referrers.append( + { + "domain": domain, + "views": counts.get("views", 0), + "uniques": counts.get("uniques", 0), + } + ) + else: + for ref in meta.get("referrers", []): + referrers.append( + { + "domain": ref.get( + "referrer", ref.get("domain", "unknown") + ), + "views": ref.get("count", ref.get("views", 0)), + "uniques": ref.get("uniques", 0), + } + ) + referrers.sort(key=lambda x: -x["views"]) + + # Popular paths (latest snapshot) + paths_row = qs.get_latest("popular_paths") + popular_paths: list[dict[str, Any]] = [] + if paths_row and paths_row.metadata_: + for p in paths_row.metadata_.get( + "paths", paths_row.metadata_.get("popular_paths", []) + ): + popular_paths.append( + { + "path": p.get("path", "unknown"), + "title": p.get("title", ""), + "views": p.get("count", p.get("views", 0)), + "uniques": p.get("uniques", 0), + } + ) + + # Fork events + fork_rows = qs.get_events("forks", cutoff) + forks: list[dict[str, str]] = [] + for r in fork_rows: + meta = r.metadata_ or {} + forks.append( + {"actor": meta.get("actor", "unknown"), "date": str(r.date)[:10]} + ) + + # Star events daily + star_rows = qs.get_daily("star_events", cutoff) + star_events_daily: list[dict[str, Any]] = [] + for r in star_rows: + star_events_daily.append( + {"date": str(r.date)[:10], "stars": int(r.value)} + ) + + # Activity summary daily + activity_rows = qs.get_daily("activity_summary", cutoff) + activity_summary: list[dict[str, Any]] = [] + for r in activity_rows: + meta = r.metadata_ or {} + entry: dict[str, Any] = {"date": str(r.date)[:10]} + for field in ( + "push", + "issues", + "pull_requests", + "pull_request_reviews", + "issue_comments", + "forks", + "stars", + "releases", + "creates", + "deletes", + ): + entry[field] = meta.get(field, 0) + activity_summary.append(entry) + + # Build all_events list for chips + all_events: list[tuple[str, str, str]] = [] + + # Release events from metrics + release_rows = qs.get_events("releases", cutoff) + releases: dict[str, str] = {} + for r in release_rows: + meta = r.metadata_ or {} + tag = meta.get("tag", "") + day = str(r.date)[:10] + if tag: + all_events.append((day, tag, "release")) + if day in releases: + releases[day] += f"\n{tag}" + else: + releases[day] = tag + + for f in forks: + all_events.append((f["date"], f"Fork: {f['actor']}", "fork")) + + # InsightEvent rows filtered to GitHub-relevant types + for ev in qs.get_insight_events( + cutoff=cutoff, type_filter=GITHUB_EVENT_TYPES + ): + day = str(ev.date)[:10] + all_events.append((day, ev.description[:60], ev.event_type)) + if day in releases: + releases[day] += f"\n{ev.description[:60]}" + else: + releases[day] = ev.description[:60] + + all_events.sort(key=lambda x: x[0]) + + return { + "daily": daily, + "referrers": referrers, + "popular_paths": popular_paths, + "forks": forks, + "releases": releases, + "all_events": all_events, + "activity_summary": activity_summary, + "star_events_daily": star_events_daily, + "prev_clones": qs.sum_range("clones", prev_cutoff, cutoff), + "prev_unique": qs.sum_range("unique_cloners", prev_cutoff, cutoff), + "prev_views": qs.sum_range("views", prev_cutoff, cutoff), + "prev_visitors": qs.sum_range("unique_visitors", prev_cutoff, cutoff), + } + + +# --------------------------------------------------------------------------- +# Tab 3: Stars +# --------------------------------------------------------------------------- + + +class StarsTab(InsightsTab): + """Stars: cumulative chart, recent list, event chips — with date range.""" + + _default_days = 7 + + def _build_content(self) -> None: + """Build or rebuild all content based on state.""" + data = self._data + star_history = data["star_history"] + total_stars = data["total_stars"] + range_stars = data["range_stars"] + + last_date = star_history[-1]["date"] if star_history else "" + content: list[ft.Control] = [ + self._make_filter_bar(last_updated=last_date), + ft.Container(height=8), + ] + + # Metric cards + num_days = len(star_history) if star_history else 1 + avg_per_day = range_stars / num_days if num_days else 0 + + content.append( + ft.Row( + [ + MetricCard("Total Stars", str(total_stars), "#FFD700"), + MetricCard("In Range", str(range_stars), Theme.Colors.INFO), + MetricCard("Avg / Day", f"{avg_per_day:.1f}", Theme.Colors.SUCCESS), + ], + spacing=Theme.Spacing.MD, + ) + ) + + if not star_history: + content.append(SecondaryText("No star events in this range.")) + self._content_column.controls = content + return + + # Date range text + date_range = f"{_pretty_date(star_history[0]['date'])} \u2014 {_pretty_date(star_history[-1]['date'])}" + content.append(SecondaryText(date_range, size=Theme.Typography.BODY_SMALL)) + + # Event chips — only on dates with stars, exclude star type + star_dates = {d["date"] for d in star_history} + chips = self._render_event_chips( + data.get("all_events", []), + valid_dates=star_dates, + exclude_types={"star"}, + ) + if chips: + content.append(chips) + + # Star History cumulative chart + max_stars = star_history[-1]["stars"] + min_stars = star_history[0]["stars"] + padding = max(1, (max_stars - min_stars) // 4) + star_min_y = max(0, min_stars - padding) + star_range = max_stars - star_min_y + star_step = _smart_step(star_range) + star_max_y = int((max_stars // star_step + 1) * star_step) + highlighted = self._highlighted_dates + # Filter releases: exclude star events, only dates that have chart points + releases_map = data.get("releases", {}) if self._show_events else {} + non_star_releases: dict[str, str] = {} + for day, label in releases_map.items(): + if day not in star_dates: + continue + lines = [ln for ln in label.split("\n") if not ln.startswith("\u2b50")] + if lines: + non_star_releases[day] = "\n".join(lines) + + history_points: list[ft.LineChartDataPoint] = [] + release_anno: list[ft.LineChartDataPoint] = [] + + for i, d in enumerate(star_history): + is_hl = d["date"] in highlighted + hl_point = ( + ft.ChartCirclePoint( + radius=7, color="#FF5722", stroke_width=2, stroke_color="#FFFFFF" + ) + if is_hl + else None + ) + count = d.get("count", 1) + names = d.get("usernames", []) + if count == 1: + tip = f"#{d['stars']} — {names[0] if names else ''}\n{_pretty_date(d['date'])}" + else: + first_num = d["stars"] - count + 1 + tip = f"#{first_num}-#{d['stars']} ({count} stars)\n{_pretty_date(d['date'])}" + history_points.append( + ft.LineChartDataPoint( + i, + d["stars"], + tooltip=tip, + point=hl_point, + ) + ) + + rel = non_star_releases.get(d["date"]) + if rel: + release_anno.append( + ft.LineChartDataPoint(i, 0, tooltip=rel, show_tooltip=True) + ) + else: + release_anno.append(ft.LineChartDataPoint(i, 0, show_tooltip=False)) + + chart_series = [ + ft.LineChartData( + data_points=history_points, + stroke_width=3, + color="#FFD700", + curved=True, + below_line_bgcolor=ft.Colors.with_opacity(0.15, "#FFD700"), + point=ft.ChartCirclePoint(radius=3, color="#FFD700", stroke_width=0), + stroke_cap_round=True, + ), + ] + if any(p.show_tooltip for p in release_anno): + chart_series.append( + ft.LineChartData( + data_points=release_anno, + stroke_width=0, + color="#9CA3AF", + ) + ) + + history_chart = _make_line_chart( + chart_series, star_max_y, star_history, star_step, min_y=star_min_y + ) + content.append( + ft.Container(content=history_chart, margin=ft.margin.only(right=20)) + ) + content.append(_make_legend([("#FFD700", "Cumulative Stars")])) + + self._content_column.controls = content + + @staticmethod + def _load_data(days: int = 9999) -> dict[str, Any]: + """Load star data from database with date cutoff.""" + from app.services.insights.query_service import InsightQueryService + + with InsightQueryService() as qs: + cutoff, _ = qs.compute_cutoffs(days) + + # All stars (for total count) + all_rows = qs.get_all_events("new_star") + total_stars = len(all_rows) + + if not all_rows: + return { + "star_history": [], + "stars_recent": [], + "total_stars": 0, + "range_stars": 0, + "all_events": [], + "releases": {}, + } + + # Stars in range + range_rows = [r for r in all_rows if r.date >= cutoff] + range_stars = len(range_rows) + + # Cumulative history - only days with stars, within range + by_date: dict[str, dict[str, Any]] = {} + for r in range_rows: + day = str(r.date)[:10] + meta = r.metadata_ if isinstance(r.metadata_, dict) else {} + if day not in by_date: + by_date[day] = {"max_num": 0, "count": 0, "usernames": []} + by_date[day]["max_num"] = max(by_date[day]["max_num"], int(r.value)) + by_date[day]["count"] += 1 + by_date[day]["usernames"].append(meta.get("username", "unknown")) + star_history = [ + { + "date": d, + "stars": info["max_num"], + "count": info["count"], + "usernames": info["usernames"], + } + for d, info in sorted(by_date.items()) + ] + + # Recent stars (last 20 in range) + stars_recent: list[dict[str, Any]] = [] + for r in reversed(range_rows[-20:]): + meta = r.metadata_ if isinstance(r.metadata_, dict) else {} + stars_recent.append( + { + "number": int(r.value), + "username": meta.get("username", "unknown"), + "location": meta.get("location", ""), + "company": meta.get("company", ""), + "date": str(r.date)[:10], + } + ) + + # Events for chips + chart annotations + all_events: list[tuple[str, str, str]] = [] + for r in qs.get_release_metrics(): + tag = (r.metadata_ or {}).get("tag", "") + if tag: + all_events.append((str(r.date)[:10], tag, "release")) + for ev in qs.get_insight_events( + cutoff=cutoff, type_filter=GITHUB_EVENT_TYPES + ): + all_events.append( + (str(ev.date)[:10], ev.description[:60], ev.event_type) + ) + + release_map: dict[str, str] = {} + for date, label, _ in all_events: + if date in release_map: + release_map[date] += f"\n{label}" + else: + release_map[date] = label + + all_events.sort(key=lambda x: x[0]) + + return { + "star_history": star_history, + "stars_recent": stars_recent, + "total_stars": total_stars, + "range_stars": range_stars, + "all_events": all_events, + "releases": release_map, + } + + +# --------------------------------------------------------------------------- +# Tab 4: PyPI (unchanged -- already uses real data) +# --------------------------------------------------------------------------- + + +class PyPITab(InsightsTab): + """PyPI: real data from database with CI/mirror toggle and date range filter.""" + + _default_days = 7 + + def __init__(self) -> None: + self._include_ci = False + self._toggle = ft.Switch( + label="Include CI/Mirror downloads", + value=False, + on_change=self._on_toggle, + label_style=ft.TextStyle(size=12, color=ft.Colors.ON_SURFACE_VARIANT), + ) + super().__init__() + + def _on_toggle(self, e: ft.ControlEvent) -> None: + self._include_ci = e.control.value + self._build_content() + self._content_column.update() + + def _build_content(self) -> None: + """Build or rebuild all content based on toggle state and date range.""" + data = self._data + include_ci = self._include_ci + daily = data["daily"] + + # Compute averages from daily data + bot_pct = data["bot_percent"] + num_days = len(daily) if daily else 1 + range_label = next( + (label for label, days in RANGE_OPTIONS if days == self._days), + f"{self._days}d", + ) + + range_all = sum(d["total"] for d in daily) if daily else 0 + range_human = sum(d["human"] for d in daily) if daily else 0 + + if include_ci: + total_display = f"{range_all:,}" + avg_day = range_all // num_days if num_days else 0 + else: + total_display = f"{range_human:,}" + avg_day = range_human // num_days if num_days else 0 + + avg_week = avg_day * 7 + avg_month = avg_day * 30 + + last_date = daily[-1]["date"] if daily else "" + content: list[ft.Control] = [ + self._make_filter_bar( + last_updated=last_date, extra_controls=[self._toggle] + ), + ft.Container(height=8), + ] + + # Period-over-period change + prev_total = data.get("prev_total", 0) + prev_human = data.get("prev_human", 0) + prev_val = prev_total if include_ci else prev_human + cur_val = range_all if include_ci else range_human + + # Metric cards + metrics_row = ft.Row( + [ + MetricCard( + "Total Downloads", + total_display, + "#FF69B4", + change_pct=_pct(cur_val, prev_val), + ), + MetricCard("Avg / Day", f"{avg_day:,}", Theme.Colors.INFO), + MetricCard("Avg / Week", f"{avg_week:,}", Theme.Colors.SUCCESS), + MetricCard("Avg / Month", f"{avg_month:,}", Theme.Colors.PRIMARY), + MetricCard( + "Bot %", + f"{bot_pct:.0f}%", + Theme.Colors.WARNING if bot_pct > 50 else Theme.Colors.INFO, + ), + ], + spacing=Theme.Spacing.MD, + ) + content.append(metrics_row) + + # Date range + events in window + releases = data.get("releases", {}) + if daily: + date_range = f"{_pretty_date(daily[0]['date'])} \u2014 {_pretty_date(daily[-1]['date'])}" + content.append(ft.Container(height=8)) + content.append(SecondaryText(date_range, size=Theme.Typography.BODY_SMALL)) + + # Event chips + first_date = daily[0]["date"] if daily else "" + last_date = daily[-1]["date"] if daily else "" + window_events = [ + (date, label, etype) + for date, label, etype in data.get("all_events", []) + if first_date <= date <= last_date + ] + chips = self._render_event_chips(window_events) + if chips: + content.append(chips) + + # Chart 1: Downloads — toggle controls which lines show + if daily: + if include_ci: + max_val = max(d["total"] for d in daily) + else: + max_val = ( + max(d["human"] for d in daily) + if any(d["human"] for d in daily) + else 1 + ) + + # Smart rounding: small values round to nearest 5, medium to 25, large to 100 + if max_val <= 20: + step = 5 + elif max_val <= 100: + step = 10 + elif max_val <= 500: + step = 50 + else: + step = 100 + rounded_max = int((max_val // step + 1) * step) + + releases = data.get("releases", {}) if self._show_events else {} + + chart1_series = [] + if include_ci: + # Stacked: total (pink filled) on top, human (green filled) below + total_points = [] + human_points_ci = [] + release_points_ci = [] + highlighted = self._highlighted_dates + for i, d in enumerate(daily): + is_hl = d["date"] in highlighted + point_style = ( + ft.ChartCirclePoint( + radius=7, + color="#FF5722", + stroke_width=2, + stroke_color="#FFFFFF", + ) + if is_hl + else None + ) + total_points.append( + ft.LineChartDataPoint( + i, + d["total"], + tooltip=f"Total: {d['total']:,} Bot: {d['total'] - d['human']:,}", + point=point_style, + ) + ) + human_points_ci.append( + ft.LineChartDataPoint( + i, d["human"], tooltip=f"Human: {d['human']:,}" + ) + ) + + rel = releases.get(d["date"]) + if rel: + release_points_ci.append( + ft.LineChartDataPoint( + i, 0, tooltip=f"{rel}", show_tooltip=True + ) + ) + else: + release_points_ci.append( + ft.LineChartDataPoint(i, 0, show_tooltip=False) + ) + + chart1_series.append( + ft.LineChartData( + data_points=total_points, + stroke_width=1, + color="#FF69B4", + below_line_bgcolor=ft.Colors.with_opacity(0.5, "#FF69B4"), + ) + ) + chart1_series.append( + ft.LineChartData( + data_points=human_points_ci, + stroke_width=2, + color="#22C55E", + below_line_bgcolor=ft.Colors.with_opacity(0.6, "#22C55E"), + point=ft.ChartCirclePoint( + radius=3, color=ft.Colors.ON_SURFACE, stroke_width=0 + ), + ) + ) + chart1_series.append( + ft.LineChartData( + data_points=release_points_ci, + stroke_width=0, + color="#9CA3AF", + ) + ) + else: + # Just human as filled area + human_points = [] + release_points = [] + highlighted = self._highlighted_dates + for i, d in enumerate(daily): + tip = f"{d['human']:,} downloads" + is_hl = d["date"] in highlighted + point_style = ( + ft.ChartCirclePoint( + radius=7, + color="#FF5722", + stroke_width=2, + stroke_color="#FFFFFF", + ) + if is_hl + else ft.ChartCirclePoint( + radius=3, color=ft.Colors.ON_SURFACE, stroke_width=0 + ) + ) + human_points.append( + ft.LineChartDataPoint( + i, d["human"], tooltip=tip, point=point_style + ) + ) + + rel = releases.get(d["date"]) + if rel: + release_points.append( + ft.LineChartDataPoint( + i, 0, tooltip=f"{rel}", show_tooltip=True + ) + ) + else: + release_points.append( + ft.LineChartDataPoint(i, 0, show_tooltip=False) + ) + + chart1_series.append( + ft.LineChartData( + data_points=human_points, + stroke_width=2, + color="#22C55E", + below_line_bgcolor=ft.Colors.with_opacity(0.4, "#22C55E"), + point=ft.ChartCirclePoint( + radius=3, color=ft.Colors.ON_SURFACE, stroke_width=0 + ), + ) + ) + # Release annotation series — invisible line, tooltip in secondary color + chart1_series.append( + ft.LineChartData( + data_points=release_points, + stroke_width=0, + color="#9CA3AF", + ) + ) + + chart1 = _make_line_chart(chart1_series, rounded_max, daily, step) + + if include_ci: + legend1 = _make_legend( + [ + (ft.Colors.with_opacity(0.5, "#FF69B4"), "Bot / Mirror"), + (ft.Colors.with_opacity(0.6, "#22C55E"), "Human"), + ] + ) + else: + legend1 = _make_legend( + [(ft.Colors.with_opacity(0.4, "#22C55E"), "Human Downloads")] + ) + + chart1_wrapped = ft.Container( + content=chart1, + margin=ft.margin.only(right=20), + ) + content.extend([ft.Container(height=8), chart1_wrapped, legend1]) + + # Bar chart: downloads by version + versions = data["versions"] + if versions: + # Sort by version number (semantic sort) + def _version_sort_key(ver: str) -> tuple: + parts = ( + ver.replace("rc", ".") + .replace("a", ".") + .replace("b", ".") + .split(".") + ) + return tuple(int(p) if p.isdigit() else 0 for p in parts) + + all_sorted = sorted(versions.keys(), key=_version_sort_key) + + # Filter out versions with 0 downloads for the current mode + sorted_versions = [] + for ver in all_sorted: + info = versions[ver] + if isinstance(info, dict): + t, h = info.get("total", 0), info.get("human", 0) + else: + t, h = info, 0 + val = t if include_ci else h + if val > 0: + sorted_versions.append(ver) + + bar_groups = [] + bar_max = 0 + for i, ver in enumerate(sorted_versions): + info = versions[ver] + if isinstance(info, dict): + t, h = info.get("total", 0), info.get("human", 0) + else: + t, h = info, 0 + + val = t if include_ci else h + bar_max = max(bar_max, val) + + bar_groups.append( + ft.BarChartGroup( + x=i, + bar_rods=[ + ft.BarChartRod( + from_y=0, + to_y=val, + width=max(8, 400 // len(sorted_versions)), + color="#FF69B4" if include_ci else "#22C55E", + border_radius=ft.border_radius.only( + top_left=3, top_right=3 + ), + tooltip=f"{ver}: {val:,}", + ) + ], + ) + ) + + bar_rounded_max = int(bar_max * 1.15) + 1 if bar_max > 0 else 10 + + # Show every Nth label to avoid overlap + label_step = max(1, len(sorted_versions) // 12) + + version_bar = ft.BarChart( + bar_groups=bar_groups, + left_axis=ft.ChartAxis( + labels_size=50, labels_interval=max(1, bar_rounded_max // 4) + ), + bottom_axis=ft.ChartAxis( + labels_size=50, + labels=[ + ft.ChartAxisLabel( + value=i, + label=ft.Text( + ver, size=8, color=ft.Colors.ON_SURFACE_VARIANT + ), + ) + for i, ver in enumerate(sorted_versions) + if i % label_step == 0 or i == len(sorted_versions) - 1 + ], + ), + horizontal_grid_lines=ft.ChartGridLines( + interval=bar_rounded_max // 4 or 1, + color=ft.Colors.with_opacity(0.08, ft.Colors.ON_SURFACE), + width=1, + ), + tooltip_bgcolor=Theme.Colors.SURFACE_1, + tooltip_rounded_radius=8, + tooltip_padding=10, + tooltip_tooltip_border_side=ft.BorderSide(1, ft.Colors.OUTLINE_VARIANT), + border=ft.border.all(1, ft.Colors.OUTLINE_VARIANT), + max_y=bar_rounded_max, + height=250, + expand=True, + ) + + bar_wrapped = ft.Container( + content=version_bar, margin=ft.margin.only(right=20) + ) + bar_legend = ft.Row( + [ + ft.Row( + [ + ft.Container( + width=10, + height=10, + bgcolor="#FF69B4" if include_ci else "#22C55E", + border_radius=5, + ), + SecondaryText( + f"Downloads by Version ({'incl. CI' if include_ci else 'human only'})", + size=Theme.Typography.BODY_SMALL, + ), + ], + spacing=4, + ) + ], + alignment=ft.MainAxisAlignment.CENTER, + ) + + content.extend([ft.Container(height=12), bar_wrapped, bar_legend]) + + # Three pie charts in one row + pie_charts: list[ft.Control] = [] + + installers = data["installers"] + if installers: + total_inst = sum(installers.values()) + pie_charts.append( + PieChartCard( + title=f"By Installer ({range_label})", + sections=[ + {"value": count, "label": f"{name} ({count / total_inst:.0%})"} + for name, count in list(installers.items())[:8] + ], + ) + ) + + countries = data["countries"] + if countries: + total_c = sum(countries.values()) + pie_charts.append( + PieChartCard( + title=f"By Country ({range_label})", + sections=[ + {"value": count, "label": f"{code} ({count / total_c:.0%})"} + for code, count in list(countries.items())[:10] + ], + ) + ) + + dist_types = data.get("types", {}) + if dist_types: + total_t = sum(dist_types.values()) + pie_charts.append( + PieChartCard( + title=f"Dist Type ({range_label})", + sections=[ + {"value": count, "label": f"{name} ({count / total_t:.0%})"} + for name, count in dist_types.items() + ], + ) + ) + + if pie_charts: + content.extend( + [ + ft.Container(height=8), + ft.Row(pie_charts, spacing=Theme.Spacing.MD), + ] + ) + + # Version table + versions = data["versions"] + if versions: + from app.components.frontend.controls.data_table import ( + DataTable, + DataTableColumn, + ) + + version_columns = [ + DataTableColumn(header="Version", width=100, style="primary"), + DataTableColumn(header="Total", width=80, alignment="right"), + DataTableColumn(header="Human", width=80, alignment="right"), + DataTableColumn(header="Bot", width=80, alignment="right"), + DataTableColumn(header="Bot %", width=70, alignment="right"), + ] + + version_rows_data = [] + for ver, info in list(versions.items())[:10]: + if isinstance(info, dict): + t, h = info.get("total", 0), info.get("human", 0) + else: + t, h = info, 0 + b = t - h + pct = f"{(b / t * 100):.0f}%" if t > 0 else "\u2014" + pct_color = "#EF4444" if t > 0 and b / t > 0.8 else "#22C55E" + version_rows_data.append( + [ + ver, + f"{t:,}", + ft.Text(f"{h:,}", color="#22C55E", size=12), + ft.Text(f"{b:,}", color="#EF4444", size=12), + ft.Text( + pct, color=pct_color, size=12, weight=ft.FontWeight.W_600 + ), + ] + ) + + # Totals row + total_t = sum( + info.get("total", 0) if isinstance(info, dict) else info + for info in versions.values() + ) + total_h = sum( + info.get("human", 0) if isinstance(info, dict) else 0 + for info in versions.values() + ) + total_b = total_t - total_h + total_pct = f"{(total_b / total_t * 100):.0f}%" if total_t > 0 else "\u2014" + + version_rows_data.append( + [ + ft.Text("TOTAL", size=12, weight=ft.FontWeight.W_700), + ft.Text(f"{total_t:,}", size=12, weight=ft.FontWeight.W_700), + ft.Text( + f"{total_h:,}", + size=12, + weight=ft.FontWeight.W_700, + color="#22C55E", + ), + ft.Text( + f"{total_b:,}", + size=12, + weight=ft.FontWeight.W_700, + color="#EF4444", + ), + ft.Text( + total_pct, + size=12, + weight=ft.FontWeight.W_700, + color="#EF4444" + if total_t > 0 and total_b / total_t > 0.8 + else "#22C55E", + ), + ] + ) + + version_table = DataTable( + columns=version_columns, + rows=version_rows_data, + ) + + # Daily downloads table (sorted by highest day) + daily_columns = [ + DataTableColumn(header="Date", width=80, style="primary"), + DataTableColumn(header="Total", width=70, alignment="right"), + DataTableColumn(header="Human", width=70, alignment="right"), + DataTableColumn(header="Bot", width=70, alignment="right"), + ] + + sorted_days = sorted(daily, key=lambda d: d["date"], reverse=True) + daily_rows_data = [] + for d in sorted_days: + bot = d["total"] - d["human"] + daily_rows_data.append( + [ + d["date"][-5:], + f"{d['total']:,}", + ft.Text(f"{d['human']:,}", color="#22C55E", size=12), + ft.Text(f"{bot:,}", color="#EF4444", size=12), + ] + ) + + daily_table = DataTable( + columns=daily_columns, + rows=daily_rows_data, + scroll_height=400, + ) + + content.extend( + [ + ft.Container(height=12), + ft.Row( + [ + ft.Column( + [ + H3Text(f"Downloads by Version ({range_label})"), + version_table, + ], + expand=True, + ), + ft.Column( + [ + H3Text(f"Daily Downloads ({range_label})"), + daily_table, + ], + expand=True, + ), + ], + spacing=Theme.Spacing.LG, + vertical_alignment=ft.CrossAxisAlignment.START, + ), + ] + ) + + self._content_column.controls = content + + @staticmethod + def _load_data(days: int = 14) -> dict: + """Load PyPI data from database (sync).""" + from app.services.insights.query_service import InsightQueryService + + with InsightQueryService() as qs: + cutoff, prev_cutoff = qs.compute_cutoffs(days) + + # Total + total_row = qs.get_latest("downloads_total") + total = int(total_row.value) if total_row else 0 + + # Daily total + human + daily_rows = qs.get_daily("downloads_daily", cutoff) + human_rows = qs.get_daily("downloads_daily_human", cutoff) + human_map = {str(r.date)[:10]: int(r.value) for r in human_rows} + + daily = [] + for r in daily_rows: + day = str(r.date)[:10] + t = int(r.value) + h = human_map.get(day, 0) + daily.append({"date": day, "total": t, "human": h}) + + today_total = daily[-1]["total"] if daily else 0 + today_human = daily[-1]["human"] if daily else 0 + + # Bot % computed over entire selected range + range_total = sum(d["total"] for d in daily) + range_human = sum(d["human"] for d in daily) + bot_pct = ( + ((range_total - range_human) / range_total * 100) + if range_total > 0 + else 0 + ) + + # Latest installer breakdown (aggregate from all days) + all_installers: dict[str, int] = {} + for r in qs.get_daily("downloads_by_installer", cutoff): + meta = r.metadata_ or {} + for name, count in meta.get("installers", {}).items(): + all_installers[name] = all_installers.get(name, 0) + count + installers = dict(sorted(all_installers.items(), key=lambda x: -x[1])) + + # Latest country breakdown (aggregate) + all_countries: dict[str, int] = {} + for r in qs.get_daily("downloads_by_country", cutoff): + meta = r.metadata_ or {} + for code, count in meta.get("countries", {}).items(): + all_countries[code] = all_countries.get(code, 0) + count + countries = dict(sorted(all_countries.items(), key=lambda x: -x[1])) + + # Per-day per-version data + version_daily_rows = qs.get_daily("downloads_by_version", cutoff) + version_daily: dict[str, dict[str, int]] = {} + for r in version_daily_rows: + day = str(r.date)[:10] + meta = r.metadata_ or {} + day_versions = meta.get("versions", {}) + version_daily[day] = {} + for ver, info in day_versions.items(): + if isinstance(info, dict): + version_daily[day][ver] = info.get("total", 0) + else: + version_daily[day][ver] = info + + # Version breakdown with real human/bot + versions: dict[str, dict[str, int]] = {} + for r in version_daily_rows: + meta = r.metadata_ or {} + for ver, info in meta.get("versions", {}).items(): + if ver not in versions: + versions[ver] = {"total": 0, "human": 0} + if isinstance(info, dict): + versions[ver]["total"] += info.get("total", 0) + versions[ver]["human"] += info.get("human", 0) + else: + versions[ver]["total"] += info + versions = dict(sorted(versions.items(), key=lambda x: -x[1]["total"])) + + # Distribution type breakdown + all_types: dict[str, int] = {} + for r in qs.get_daily("downloads_by_type", cutoff): + meta = r.metadata_ or {} + for t, count in meta.get("types", {}).items(): + all_types[t] = all_types.get(t, 0) + count + dist_types = dict(sorted(all_types.items(), key=lambda x: -x[1])) + + # Events for chart annotations + all_events: list[tuple[str, str, str]] = [] + for r in qs.get_release_metrics(): + tag = (r.metadata_ or {}).get("tag", "") + if tag: + all_events.append((str(r.date)[:10], tag, "release")) + for ev in qs.get_insight_events(type_filter=PYPI_EVENT_TYPES): + all_events.append( + (str(ev.date)[:10], ev.description[:60], ev.event_type) + ) + + release_map: dict[str, str] = {} + for date, label, _ in all_events: + if date in release_map: + release_map[date] += f"\n{label}" + else: + release_map[date] = label + + return { + "total": total, + "today_total": today_total, + "today_human": today_human, + "bot_percent": bot_pct, + "prev_total": qs.sum_range("downloads_daily", prev_cutoff, cutoff), + "prev_human": qs.sum_range( + "downloads_daily_human", prev_cutoff, cutoff + ), + "daily": daily, + "version_daily": version_daily, + "installers": installers, + "countries": countries, + "versions": versions, + "types": dist_types, + "releases": release_map, + "all_events": all_events, + } + + +# --------------------------------------------------------------------------- +# Tab 5: Docs (Plausible) +# --------------------------------------------------------------------------- + + +class DocsTab(InsightsTab): + """Docs analytics from Plausible with date range and event annotations.""" + + _default_days = 7 + + def _build_content(self) -> None: + """Build or rebuild all content.""" + data = self._data + daily = data["daily"] + + last_collected = data.get("last_collected", "") + content: list[ft.Control] = [ + self._make_filter_bar(last_updated=last_collected), + ft.Container(height=8), + ] + + if not daily: + content.append( + SecondaryText( + "No Plausible data collected yet. Run: my-app insights collect plausible" + ) + ) + self._content_column.controls = content + return + + # Aggregates over range + total_visitors = sum(d["visitors"] for d in daily) + total_pageviews = sum(d["pageviews"] for d in daily) + num_days = len(daily) + avg_bounce = sum(d["bounce_rate"] for d in daily) / num_days if num_days else 0 + avg_duration = ( + sum(d["avg_duration"] for d in daily) / num_days if num_days else 0 + ) + views_per_visit = total_pageviews / total_visitors if total_visitors else 0 + duration_min = int(avg_duration // 60) + duration_sec = int(avg_duration % 60) + + # Period-over-period change + prev_v = data.get("prev_visitors", 0) + prev_pv = data.get("prev_pageviews", 0) + prev_b = data.get("prev_bounce", 0) + prev_d = data.get("prev_duration", 0) + + # Metric cards with change arrows + content.append( + ft.Row( + [ + MetricCard( + "Visitors", + f"{total_visitors:,}", + Theme.Colors.PRIMARY, + change_pct=_pct(total_visitors, prev_v), + ), + MetricCard( + "Pageviews", + f"{total_pageviews:,}", + Theme.Colors.INFO, + change_pct=_pct(total_pageviews, prev_pv), + ), + MetricCard( + "Views/Visit", f"{views_per_visit:.1f}", Theme.Colors.SUCCESS + ), + MetricCard( + "Bounce Rate", + f"{avg_bounce:.0f}%", + Theme.Colors.WARNING if avg_bounce > 50 else Theme.Colors.INFO, + change_pct=_pct(avg_bounce, prev_b), + invert=True, + ), + MetricCard( + "Avg Duration", + f"{duration_min}m {duration_sec}s", + "#A855F7", + change_pct=_pct(avg_duration, prev_d), + ), + ], + spacing=Theme.Spacing.MD, + ) + ) + + # Date range + date_range = ( + f"{_pretty_date(daily[0]['date'])} \u2014 {_pretty_date(daily[-1]['date'])}" + ) + content.append(SecondaryText(date_range, size=Theme.Typography.BODY_SMALL)) + + # Event chips — only on days with visitor activity + active_dates = {d["date"] for d in daily} + chips = self._render_event_chips( + data.get("all_events", []), valid_dates=active_dates + ) + if chips: + content.append(chips) + + content.append(ft.Container(height=4)) + + # Visitors + Pageviews chart + highlighted = self._highlighted_dates + releases_map = { + day: label + for day, label in ( + data.get("releases", {}) if self._show_events else {} + ).items() + if day in active_dates + } + + max_val = max( + max(d["pageviews"] for d in daily), max(d["visitors"] for d in daily) + ) + step = _smart_step(max_val) + max_y = int((max_val // step + 1) * step) + + visitor_points: list[ft.LineChartDataPoint] = [] + pageview_points: list[ft.LineChartDataPoint] = [] + release_anno: list[ft.LineChartDataPoint] = [] + + for i, d in enumerate(daily): + is_hl = d["date"] in highlighted + hl_point = ( + ft.ChartCirclePoint( + radius=7, color="#FF5722", stroke_width=2, stroke_color="#FFFFFF" + ) + if is_hl + else None + ) + + visitor_points.append( + ft.LineChartDataPoint( + i, + d["visitors"], + tooltip=f"Visitors: {d['visitors']}", + point=hl_point, + ) + ) + pageview_points.append( + ft.LineChartDataPoint( + i, + d["pageviews"], + tooltip=f"Pageviews: {d['pageviews']}", + ) + ) + + rel = releases_map.get(d["date"]) + if rel: + release_anno.append( + ft.LineChartDataPoint(i, 0, tooltip=rel, show_tooltip=True) + ) + else: + release_anno.append(ft.LineChartDataPoint(i, 0, show_tooltip=False)) + + chart_series = [ + ft.LineChartData( + data_points=visitor_points, + stroke_width=2, + color="#6366F1", + curved=True, + below_line_bgcolor=ft.Colors.with_opacity(0.15, "#6366F1"), + point=ft.ChartCirclePoint( + radius=3, color=ft.Colors.ON_SURFACE, stroke_width=0 + ), + stroke_cap_round=True, + ), + ft.LineChartData( + data_points=pageview_points, + stroke_width=2, + color="#22C55E", + curved=True, + point=ft.ChartCirclePoint( + radius=3, color=ft.Colors.ON_SURFACE, stroke_width=0 + ), + stroke_cap_round=True, + ), + ] + if any(p.show_tooltip for p in release_anno): + chart_series.append( + ft.LineChartData( + data_points=release_anno, + stroke_width=0, + color="#9CA3AF", + ) + ) + + chart = _make_line_chart(chart_series, max_y, daily, step) + content.append(ft.Container(content=chart, margin=ft.margin.only(right=20))) + content.append( + _make_legend([("#6366F1", "Visitors"), ("#22C55E", "Pageviews")]) + ) + + # Country breakdown — horizontal bar chart + countries = data.get("countries", []) + if countries: + content.append(ft.Container(height=12)) + content.append(H3Text("Countries")) + + max_visitors = countries[0]["visitors"] if countries else 1 + country_bar_groups = [] + country_labels = [] + for i, c in enumerate(countries): + country_bar_groups.append( + ft.BarChartGroup( + x=i, + bar_rods=[ + ft.BarChartRod( + from_y=0, + to_y=c["visitors"], + width=max(12, 300 // max(len(countries), 1)), + color="#6366F1", + border_radius=ft.border_radius.only( + top_left=3, top_right=3 + ), + tooltip=f"{c['country']}: {c['visitors']}", + ) + ], + ) + ) + country_labels.append(c["country"]) + + country_step = _smart_step(max_visitors) + country_max_y = int((max_visitors // country_step + 1) * country_step) + + country_chart = ft.BarChart( + bar_groups=country_bar_groups, + left_axis=ft.ChartAxis(labels_size=50, labels_interval=country_step), + bottom_axis=ft.ChartAxis( + labels_size=50, + labels=[ + ft.ChartAxisLabel( + value=i, + label=ft.Text( + lbl, size=9, color=ft.Colors.ON_SURFACE_VARIANT + ), + ) + for i, lbl in enumerate(country_labels) + ], + ), + horizontal_grid_lines=ft.ChartGridLines( + interval=country_step, + color=ft.Colors.with_opacity(0.08, ft.Colors.ON_SURFACE), + width=1, + ), + tooltip_bgcolor=Theme.Colors.SURFACE_1, + tooltip_rounded_radius=8, + tooltip_padding=10, + tooltip_tooltip_border_side=ft.BorderSide(1, ft.Colors.OUTLINE_VARIANT), + border=ft.border.all(1, ft.Colors.OUTLINE_VARIANT), + max_y=country_max_y, + height=250, + expand=True, + ) + + content.append( + ft.Container(content=country_chart, margin=ft.margin.only(right=20)) + ) + + # Top Pages table + top_pages = data.get("top_pages", []) + if top_pages: + content.append(ft.Container(height=12)) + content.append(H3Text("Top Pages")) + content.append(ft.Divider(height=1, color=ft.Colors.OUTLINE_VARIANT)) + for p in top_pages: + page_url = f"https://lbedner.github.io{p['url']}" + duration = p.get("time_s") or 0 + d_min = int(duration // 60) + d_sec = int(duration % 60) + content.append( + ft.Row( + [ + ft.Container( + content=ft.Text( + p["url"], + size=Theme.Typography.BODY_SMALL, + style=ft.TextStyle( + color=Theme.Colors.INFO, + decoration=ft.TextDecoration.UNDERLINE, + ), + selectable=False, + ), + expand=True, + on_click=lambda e, u=page_url: e.page.launch_url(u), + ink=True, + ), + SecondaryText( + f"{p['visitors']} visitors", + size=Theme.Typography.BODY_SMALL, + ), + SecondaryText( + f"{d_min}m {d_sec}s", size=Theme.Typography.BODY_SMALL + ), + ], + spacing=8, + ) + ) + + self._content_column.controls = content + + @staticmethod + def _load_data(days: int = 30) -> dict[str, Any]: + """Load Plausible data from database.""" + from app.services.insights.query_service import InsightQueryService + + with InsightQueryService() as qs: + cutoff, prev_cutoff = qs.compute_cutoffs(days) + + # Daily metrics - current period + visitors_rows = qs.get_daily("visitors", cutoff) + pageviews_rows = qs.get_daily("pageviews", cutoff) + duration_rows = qs.get_daily("avg_duration", cutoff) + bounce_rows = qs.get_daily("bounce_rate", cutoff) + + # Previous period totals for comparison + prev_visitors = qs.sum_range("visitors", prev_cutoff, cutoff) + prev_pageviews = qs.sum_range("pageviews", prev_cutoff, cutoff) + prev_dur_rows = qs.get_daily_range("avg_duration", prev_cutoff, cutoff) + prev_duration = ( + sum(float(r.value) for r in prev_dur_rows) / len(prev_dur_rows) + if prev_dur_rows + else 0 + ) + prev_bounce_rows = qs.get_daily_range("bounce_rate", prev_cutoff, cutoff) + prev_bounce = ( + sum(float(r.value) for r in prev_bounce_rows) / len(prev_bounce_rows) + if prev_bounce_rows + else 0 + ) + + pv_map = {str(r.date)[:10]: int(r.value) for r in pageviews_rows} + dur_map = {str(r.date)[:10]: float(r.value) for r in duration_rows} + bounce_map = {str(r.date)[:10]: float(r.value) for r in bounce_rows} + + daily: list[dict[str, Any]] = [] + last_collected = "" + for r in visitors_rows: + day = str(r.date)[:10] + last_collected = day + daily.append( + { + "date": day, + "visitors": int(r.value), + "pageviews": pv_map.get(day, 0), + "avg_duration": dur_map.get(day, 0), + "bounce_rate": bounce_map.get(day, 0), + } + ) + + # Top pages - aggregate per-day snapshots across selected range + all_pages: dict[str, dict[str, Any]] = {} + for r in qs.get_daily("top_pages", cutoff): + meta = r.metadata_ if isinstance(r.metadata_, dict) else {} + for p in meta.get("pages", []): + url = p.get("url", "") + if url not in all_pages: + all_pages[url] = {"url": url, "visitors": 0, "time_s": 0} + all_pages[url]["visitors"] += p.get("visitors", 0) + all_pages[url]["time_s"] += p.get("time_s") or 0 + top_pages = sorted(all_pages.values(), key=lambda x: -x["visitors"])[:20] + + # Countries - aggregate per-day snapshots across selected range + all_countries: dict[str, int] = {} + for r in qs.get_daily("top_countries", cutoff): + meta = r.metadata_ if isinstance(r.metadata_, dict) else {} + for c in meta.get("countries", []): + code = c.get("country", "") + all_countries[code] = all_countries.get(code, 0) + c.get( + "visitors", 0 + ) + countries = [ + {"country": code, "visitors": count} + for code, count in sorted(all_countries.items(), key=lambda x: -x[1]) + ][:20] + + # Events + all_events: list[tuple[str, str, str]] = [] + for r in qs.get_release_metrics(): + tag = (r.metadata_ or {}).get("tag", "") + if tag: + all_events.append((str(r.date)[:10], tag, "release")) + for ev in qs.get_insight_events( + cutoff=cutoff, type_filter=DOCS_EVENT_TYPES + ): + all_events.append( + (str(ev.date)[:10], ev.description[:60], ev.event_type) + ) + + release_map: dict[str, str] = {} + for date, label, _ in all_events: + if date in release_map: + release_map[date] += f"\n{label}" + else: + release_map[date] = label + + all_events.sort(key=lambda x: x[0]) + + return { + "daily": daily, + "top_pages": top_pages, + "countries": countries, + "all_events": all_events, + "releases": release_map, + "prev_visitors": prev_visitors, + "prev_pageviews": prev_pageviews, + "prev_bounce": prev_bounce, + "prev_duration": prev_duration, + "last_collected": last_collected, + } + + +# --------------------------------------------------------------------------- +# Tab 6: Reddit +# --------------------------------------------------------------------------- + + +class RedditTab(ft.Container): + """Reddit: tracked post stats from database.""" + + def __init__(self) -> None: + super().__init__() + + posts = self._load_posts() + + if not posts: + self.content = ft.Column( + [ + SecondaryText( + "No Reddit posts tracked. Use: my-app insights reddit add " + ) + ], + scroll=ft.ScrollMode.AUTO, + ) + self.padding = Theme.Spacing.MD + self.expand = True + return + + last_date = posts[0]["date"] if posts else "" + content: list[ft.Control] = [ + SecondaryText( + f"Last updated: {last_date}" if last_date else "", + size=Theme.Typography.BODY_SMALL, + ), + ft.Container(height=4), + ] + + for post in posts: + meta = post.get("metadata", {}) + subreddit = meta.get("subreddit", "") + title = meta.get("title", "") + comments = meta.get("comments", 0) + upvote_ratio = meta.get("upvote_ratio", 0) + url = meta.get("url", "") + upvotes = post.get("upvotes", 0) + date = post.get("date", "") + + # Post card — compact layout + post_card = ft.Container( + content=ft.Column( + [ + # Row 1: subreddit + date + stats + ft.Row( + [ + ft.Container( + content=LabelText( + f"r/{subreddit}", color=Theme.Colors.BADGE_TEXT + ), + bgcolor="#FF5722", + padding=ft.padding.symmetric( + horizontal=6, vertical=2 + ), + border_radius=4, + ), + SecondaryText( + _pretty_date(date), size=Theme.Typography.BODY_SMALL + ), + ft.Container(expand=True), + ft.Text( + f"{upvotes:,}", + size=13, + weight=ft.FontWeight.W_700, + color="#FF5722", + ), + SecondaryText("upvotes", size=Theme.Typography.CAPTION), + ft.Container(width=8), + ft.Text( + str(comments), + size=13, + weight=ft.FontWeight.W_700, + color=Theme.Colors.INFO, + ), + SecondaryText( + "comments", size=Theme.Typography.CAPTION + ), + ft.Container(width=8), + ft.Text( + f"{upvote_ratio:.0%}", + size=13, + weight=ft.FontWeight.W_700, + color=Theme.Colors.SUCCESS + if upvote_ratio and upvote_ratio > 0.9 + else Theme.Colors.WARNING, + ), + ], + spacing=4, + vertical_alignment=ft.CrossAxisAlignment.CENTER, + ), + # Row 2: title as hyperlink + ft.Text( + spans=[ + ft.TextSpan( + title, + style=ft.TextStyle( + size=13, + decoration=ft.TextDecoration.UNDERLINE, + color=Theme.Colors.PRIMARY, + ), + url=url, + ), + ], + ) + if url + else BodyText(title), + ], + spacing=6, + ), + padding=ft.padding.all(10), + border=ft.border.all(1, ft.Colors.OUTLINE_VARIANT), + border_radius=8, + ) + + content.append(post_card) + + self.content = ft.Column( + content, + spacing=12, + scroll=ft.ScrollMode.AUTO, + ) + self.padding = Theme.Spacing.MD + self.expand = True + + @staticmethod + def _load_posts() -> list[dict]: + """Load Reddit posts from database (sync).""" + from app.services.insights.query_service import InsightQueryService + + with InsightQueryService() as qs: + rows = qs.get_all_metrics("post_stats") + if not rows: + return [] + + # Get original post dates from events + event_dates: dict[str, str] = {} + for ev in qs.get_insight_events(type_filter={"reddit_post"}): + pid = (ev.metadata_ or {}).get("post_id", "") + if pid and pid not in event_dates: + event_dates[pid] = str(ev.date)[:10] + + # Group by post_id, take latest snapshot per post + seen: set[str] = set() + posts = [] + for r in rows: + meta = r.metadata_ or {} + post_id = meta.get("post_id", "") + if post_id in seen: + continue + seen.add(post_id) + original_date = event_dates.get(post_id, str(r.date)[:10]) + posts.append( + { + "upvotes": int(r.value), + "date": original_date, + "metadata": meta, + } + ) + + return posts + + +# --------------------------------------------------------------------------- +# Modal +# --------------------------------------------------------------------------- + + +def _group_events( + events: list[tuple[str, str, str]], + days: int, +) -> list[tuple[str, str, str, set[str]]]: + """Group same-type events by time bucket for cleaner display. + + Returns list of (display_date, label, type, dates_set). + At small ranges (<=30d), no grouping — each event gets its own chip. + """ + import re + from datetime import datetime as dt + + # Always return with dates_set for consistent interface + if days <= 30 or not events: + return [(date, label, etype, {date}) for date, label, etype in events] + + # Determine bucket size + if days <= 90: + + def bucket_key(date_str: str) -> str: + d = dt.strptime(date_str, "%Y-%m-%d") + # ISO week: YYYY-WNN + return f"{d.isocalendar()[0]}-W{d.isocalendar()[1]:02d}" + else: + + def bucket_key(date_str: str) -> str: + return date_str[:7] # YYYY-MM + + # Group by (bucket, type) + buckets: dict[tuple[str, str], list[tuple[str, str]]] = {} + for date, label, etype in events: + key = (bucket_key(date), etype) + buckets.setdefault(key, []).append((date, label)) + + result: list[tuple[str, str, str, set[str]]] = [] + for (_, etype), items in buckets.items(): + dates = {d for d, _ in items} + first_date = min(dates) + + if len(items) == 1: + result.append((items[0][0], items[0][1], etype, dates)) + continue + + if etype == "release": + tags = [lbl for _, lbl in sorted(items)] + label = f"{tags[0]}\u2013{tags[-1]}" if len(tags) > 1 else tags[0] + elif etype == "star": + # Extract star numbers from labels like "⭐ #80-#85 (6 stars)" or "⭐ #99 — user" + nums: list[int] = [] + for _, lbl in items: + for m in re.findall(r"#(\d+)", lbl): + nums.append(int(m)) + if nums: + label = f"\u2b50 #{min(nums)}-#{max(nums)} ({len(items)} events)" + else: + label = f"\u2b50 ({len(items)} stars)" + elif etype == "reddit_post": + # Keep individual reddit posts — don't group + for date, lbl in items: + result.append((date, lbl, etype, {date})) + continue + else: + label = f"{etype} ({len(items)})" + + result.append((first_date, label, etype, dates)) + + result.sort(key=lambda x: x[0]) + return result + + +def _extract_max_number(text: str) -> str: + """Extract the largest number from a text string (e.g., '5,292 clones' -> '5,292').""" + import re + + numbers = re.findall(r"\d[\d,]*", text) + if not numbers: + return "" + return max(numbers, key=lambda n: int(n.replace(",", ""))) + + +def _smart_step(max_val: float) -> int: + """Pick a nice y-axis interval based on magnitude.""" + if max_val <= 20: + return 5 + if max_val <= 100: + return 10 + if max_val <= 500: + return 50 + return 100 + + +def _pretty_date(date_str: str) -> str: + """Format '2026-04-03' as 'April 3rd, 2026'.""" + from datetime import datetime as dt + + try: + d = dt.strptime(date_str, "%Y-%m-%d") + except (ValueError, TypeError): + return date_str + + day = d.day + if 11 <= day <= 13: + suffix = "th" + else: + suffix = {1: "st", 2: "nd", 3: "rd"}.get(day % 10, "th") + return d.strftime(f"%B {day}{suffix}, %Y") + + +class SettingsTab(ft.Container): + """Settings: data sources, collection status, metric counts.""" + + def __init__(self, metadata: dict[str, Any], db: dict[str, Any]) -> None: + super().__init__() + + total_metrics = metadata.get("total_metrics", 0) + enabled_sources = metadata.get("enabled_sources", 0) + stale_sources = metadata.get("stale_sources", []) + sources_meta = metadata.get("sources", {}) + + content: list[ft.Control] = [ + ft.Row( + [ + MetricCard( + "Total Metrics", f"{total_metrics:,}", Theme.Colors.PRIMARY + ), + MetricCard( + "Active Sources", str(enabled_sources), Theme.Colors.SUCCESS + ), + ], + spacing=Theme.Spacing.MD, + ), + ft.Container(height=8), + H3Text("Data Sources"), + ft.Divider(height=1, color=ft.Colors.OUTLINE_VARIANT), + ] + + for src in db["sources"]: + is_stale = src["key"] in stale_sources + if src["enabled"]: + status_text = "Stale" if is_stale else "Active" + status_color = "#F59E0B" if is_stale else "#22C55E" + else: + status_text = "Disabled" + status_color = ft.Colors.ON_SURFACE_VARIANT + + # Last collected time from health metadata + src_meta = sources_meta.get(src["key"], {}) + last_collected = src_meta.get("last_collected", "") + if last_collected: + last_collected = last_collected[:16].replace("T", " ") + + content.append( + ft.Row( + [ + ft.Container( + content=BodyText( + src["display_name"], size=Theme.Typography.BODY_SMALL + ), + width=140, + ), + ft.Container( + content=LabelText( + status_text, color=Theme.Colors.BADGE_TEXT + ), + bgcolor=status_color, + padding=ft.padding.symmetric(horizontal=6, vertical=2), + border_radius=4, + ), + SecondaryText( + f"Last: {last_collected}" if last_collected else "", + size=Theme.Typography.BODY_SMALL, + ), + ], + spacing=8, + ) + ) + + self.content = ft.Column( + content, + spacing=8, + scroll=ft.ScrollMode.AUTO, + ) + self.padding = Theme.Spacing.MD + self.expand = True + + +class InsightsDetailDialog(BaseDetailPopup): + """Insights service detail modal with tabbed interface.""" + + def __init__(self, component_data: ComponentStatus, page: ft.Page) -> None: + metadata: dict[str, Any] = component_data.metadata or {} + + # Single DB load shared by Overview, GitHub, and Stars tabs + db = _load_db() + + tabs_list = [ + ft.Tab(text="Overview", content=OverviewTab(metadata, db)), + ft.Tab(text="GitHub", content=GitHubTrafficTab()), + ft.Tab(text="Stars", content=StarsTab()), + ft.Tab(text="PyPI", content=PyPITab()), + ft.Tab(text="Docs", content=DocsTab()), + ft.Tab(text="Reddit", content=RedditTab()), + ft.Tab(text="Settings", content=SettingsTab(metadata, db)), + ] + + tabs = ft.Tabs( + selected_index=0, + animation_duration=200, + tabs=tabs_list, + expand=True, + label_color=ft.Colors.ON_SURFACE, + unselected_label_color=ft.Colors.ON_SURFACE_VARIANT, + indicator_color=ft.Colors.ON_SURFACE_VARIANT, + ) + + super().__init__( + page=page, + component_data=component_data, + title_text=get_component_title("service_insights"), + subtitle_text=get_component_subtitle("service_insights", metadata), + sections=[ + ft.Container( + content=tabs, + padding=ft.padding.symmetric(horizontal=60), + expand=True, + ) + ], + scrollable=False, + status_detail=get_status_detail(component_data), + width=1500, + height=850, + ) diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/modals/modal_sections.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/modals/modal_sections.py index bdb29ff9..93338842 100644 --- a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/modals/modal_sections.py +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/modals/modal_sections.py @@ -28,6 +28,37 @@ from app.components.frontend.theme import DarkColorPalette +def format_duration_ms(duration_ms: int | float | str | None) -> str: + """Format milliseconds to human-readable duration (e.g., '1.2s', '3m 45s').""" + if not duration_ms: + return "\u2014" + try: + ms = float(duration_ms) + if ms < 1000: + return f"{ms:.0f}ms" + s = ms / 1000 + if s < 60: + return f"{s:.1f}s" + m = int(s // 60) + s = s % 60 + return f"{m}m {s:.0f}s" + except (ValueError, TypeError): + return "\u2014" + + +def format_timestamp(iso_str: str | None) -> str: + """Format ISO timestamp for display (HH:MM:SS).""" + if not iso_str: + return "\u2014" + try: + from datetime import datetime + + dt = datetime.fromisoformat(iso_str) + return dt.strftime("%H:%M:%S") + except (ValueError, TypeError): + return "\u2014" + + class InfoCard(ft.Container): """Info card displaying a label and value with consistent card styling.""" @@ -88,6 +119,9 @@ def __init__( value: str, color: str, icon: str | None = None, + change_pct: float | None = None, + invert: bool = False, + prev_value: str | None = None, ) -> None: """ Initialize metric card. @@ -97,6 +131,9 @@ def __init__( value: Metric value to display color: Color for the value text icon: Optional icon name (e.g., ft.Icons.TOKEN) + change_pct: Optional period-over-period change percentage + invert: If True, down is good (green) and up is bad (red) — e.g., bounce rate + prev_value: Optional previous period value to display (e.g., "prev: 3,080") """ super().__init__() @@ -118,8 +155,47 @@ def __init__( weight=ft.FontWeight.W_600, ) + # Value row: number + optional change arrow inline + value_items: list[ft.Control] = [self.value_text] + if change_pct is not None: + # When invert=True, up is bad (red) and down is good (green) + if change_pct > 0: + arrow_icon = ft.Icons.NORTH_EAST + arrow_color = Theme.Colors.ERROR if invert else Theme.Colors.SUCCESS + elif change_pct < 0: + arrow_icon = ft.Icons.SOUTH_EAST + arrow_color = Theme.Colors.SUCCESS if invert else Theme.Colors.ERROR + else: + arrow_icon = ft.Icons.EAST + arrow_color = ft.Colors.ON_SURFACE_VARIANT + value_items.append( + ft.Row( + [ + ft.Icon(arrow_icon, size=14, color=arrow_color), + ft.Text( + f"{abs(change_pct):.0f}%", + size=14, + color=arrow_color, + weight=ft.FontWeight.W_600, + ), + ], + spacing=2, + vertical_alignment=ft.CrossAxisAlignment.CENTER, + ) + ) + + value_row = ft.Row( + value_items, spacing=6, vertical_alignment=ft.CrossAxisAlignment.END + ) + + column_items = [header_row, value_row] + if prev_value is not None: + column_items.append( + SecondaryText(f"prev: {prev_value}", size=Theme.Typography.BODY_SMALL) + ) + self.content = ft.Column( - [header_row, self.value_text], + column_items, spacing=Theme.Spacing.XS, ) self.padding = Theme.Spacing.MD @@ -135,6 +211,44 @@ def set_value(self, value: str, color: str | None = None) -> None: self.value_text.color = color +class MilestoneCard(ft.Container): + """Trophy-style card for key milestones with hero number.""" + + def __init__( + self, + label: str, + value: str, + date: str, + accent_color: str = "#9CA3AF", + ) -> None: + super().__init__() + + items: list[ft.Control] = [SecondaryText(label)] + if value and value != "\u2014": + items.append( + ft.Text( + value, + size=28, + weight=ft.FontWeight.W_700, + color=accent_color, + ) + ) + items.append(SecondaryText(date, size=Theme.Typography.BODY_SMALL)) + + self.content = ft.Column( + items, + spacing=2, + horizontal_alignment=ft.CrossAxisAlignment.CENTER, + alignment=ft.MainAxisAlignment.CENTER, + ) + self.padding = Theme.Spacing.MD + self.bgcolor = ft.Colors.SURFACE_CONTAINER_HIGHEST + self.border_radius = Theme.Components.CARD_RADIUS + self.border = ft.border.all(0.5, ft.Colors.OUTLINE) + self.height = 130 + self.expand = True + + class SectionHeader(ft.Row): """Section header with icon and title.""" diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/modals/scheduler_modal.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/modals/scheduler_modal.py index 61881cf5..890a6c2b 100644 --- a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/modals/scheduler_modal.py +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/modals/scheduler_modal.py @@ -28,7 +28,7 @@ class MiniMetricCard(ft.Container): - """Compact metric card for use in expanded row content.""" + """Compact metric card for use in expanded row contents.""" def __init__(self, label: str, value: str, color: str) -> None: super().__init__() @@ -129,16 +129,6 @@ def _get_mock_recent_executions(job_id: str) -> list[dict]: ] -def _format_duration(ms: int) -> str: - """Format milliseconds to human readable duration.""" - if ms < 1000: - return f"{ms}ms" - elif ms < 60000: - return f"{ms / 1000:.1f}s" - else: - return f"{ms / 60000:.1f}m" - - def _build_job_expanded_content(task: dict) -> ft.Control: """Build expanded content for a scheduled job. @@ -212,7 +202,7 @@ def _build_job_expanded_content(task: dict) -> ft.Control: # MiniMetricCard("Total Runs", str(stats["total_runs"]), Theme.Colors.INFO), # MiniMetricCard( # "Avg Duration", - # _format_duration(stats["avg_duration_ms"]), + # format_duration_ms(stats["avg_duration_ms"]), # ft.Colors.PURPLE_200, # ), # MiniMetricCard( @@ -371,8 +361,16 @@ def _build_job_row(task: dict) -> ExpandableRow: schedule_display = format_schedule_human_readable(schedule) # Status icon and text - status_icon = "🟢" if status == "active" else "🟠" - status_text = "Active" if status == "active" else "Paused" + is_past_due = "Past due" in next_run_display + if status != "active": + status_icon = "🟠" + status_text = "Paused" + elif is_past_due: + status_icon = "🟡" + status_text = "Active" + else: + status_icon = "🟢" + status_text = "Active" cells = [ TableNameText(f"{status_icon} {job_name}"), diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/modals/task_history_section.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/modals/task_history_section.py index fdea7ac0..6aec6a44 100644 --- a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/modals/task_history_section.py +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/modals/task_history_section.py @@ -24,6 +24,8 @@ from app.core.config import settings from app.core.log import logger +from .modal_sections import format_duration_ms, format_timestamp + # Column widths COL_WIDTH_STATUS_ICON = 30 COL_WIDTH_QUEUE = 90 @@ -55,37 +57,6 @@ _PILL_SELECTED_OPACITY = 0.15 -def _format_duration(duration_ms: str | None) -> str: - """Format duration in milliseconds to a human-readable string.""" - if not duration_ms: - return "—" - try: - ms = float(duration_ms) - if ms < 1000: - return f"{ms:.0f}ms" - s = ms / 1000 - if s < 60: - return f"{s:.1f}s" - m = int(s // 60) - s = s % 60 - return f"{m}m {s:.0f}s" - except (ValueError, TypeError): - return "—" - - -def _format_timestamp(iso_str: str | None) -> str: - """Format ISO timestamp for display.""" - if not iso_str: - return "—" - try: - from datetime import datetime - - dt = datetime.fromisoformat(iso_str) - return dt.strftime("%H:%M:%S") - except (ValueError, TypeError): - return "—" - - def _build_task_row(task: dict[str, str]) -> ExpandableRow: """Build a table row for a single task record.""" status = task.get("status", "unknown") @@ -111,11 +82,11 @@ def _build_task_row(task: dict[str, str]) -> ExpandableRow: ), BodyText(task.get("queue", "—"), text_align=ft.TextAlign.CENTER), BodyText( - _format_duration(task.get("duration_ms")), + format_duration_ms(task.get("duration_ms")), text_align=ft.TextAlign.CENTER, ), SecondaryText( - _format_timestamp(task.get("enqueued_at")), + format_timestamp(task.get("enqueued_at")), text_align=ft.TextAlign.CENTER, ), SecondaryText( @@ -179,12 +150,12 @@ def _build_task_row(task: dict[str, str]) -> ExpandableRow: SecondaryText(f"Job ID: {task.get('job_id', '—')}", size=11), SecondaryText("|", size=11), SecondaryText( - f"Started: {_format_timestamp(task.get('started_at'))}", + f"Started: {format_timestamp(task.get('started_at'))}", size=11, ), SecondaryText("|", size=11), SecondaryText( - f"Finished: {_format_timestamp(task.get('finished_at'))}", + f"Finished: {format_timestamp(task.get('finished_at'))}", size=11, ), ], diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/status_overview.py.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/status_overview.py.jinja index e6c95c78..49c6503a 100644 --- a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/status_overview.py.jinja +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/dashboard/status_overview.py.jinja @@ -114,6 +114,11 @@ def get_component_display_info( elif component_name == "service_comms": return ("Comms Service", "Resend + Twilio") +{% if include_insights %} + + elif component_name == "service_insights": + return ("Insights", get_component_subtitle("service_insights", metadata)) +{% endif %} {% if include_ingress %} elif component_name == "ingress": diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/main.py.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/main.py.jinja index abcf0a1e..1fab0f3b 100644 --- a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/main.py.jinja +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/frontend/main.py.jinja @@ -13,6 +13,9 @@ import httpx from app.core.log import logger from app.services.system.models import ComponentStatus, ComponentStatusType +{% if include_insights %} +from app.services.insights.constants import INSIGHT_COMPONENT_NAME +{% endif %} from .dashboard.activity_feed import ActivityFeed from .dashboard.cards import ( {% if include_ai %} @@ -30,6 +33,9 @@ from .dashboard.cards import ( {% if include_ingress %} IngressCard, {% endif %} +{% if include_insights %} + InsightsCard, +{% endif %} {% if include_observability %} ObservabilityCard, {% endif %} @@ -782,6 +788,10 @@ def create_frontend_app() -> Callable[[ft.Page], Awaitable[None]]: elif component_name == "service_comms": return CommsCard(component_data).build() {% endif %} +{% if include_insights %} + elif component_name == f"{SERVICE_PREFIX}{INSIGHT_COMPONENT_NAME}": + return InsightsCard(component_data).build() +{% endif %} {% if include_auth or include_ai or include_comms %} elif component_name.startswith("service_"): # For other services, use generic ServicesCard for now diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/scheduler/main.py.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/scheduler/main.py.jinja index 9db59392..0ecd2dcd 100644 --- a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/scheduler/main.py.jinja +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/components/scheduler/main.py.jinja @@ -21,6 +21,17 @@ from app.services.system.backup import backup_database_job {% if include_ai and ai_backend != "memory" %} from app.services.ai.jobs import sync_llm_catalog_job {% endif %} +{% if include_insights %} +{% if insights_github %} +from app.services.insights.jobs import collect_github_events_job, collect_github_stars_job, collect_github_traffic_job +{% endif %} +{% if insights_pypi %} +from app.services.insights.jobs import collect_pypi_job +{% endif %} +{% if insights_plausible %} +from app.services.insights.jobs import collect_plausible_job +{% endif %} +{% endif %} from app.core.config import settings from app.core.log import logger @@ -209,6 +220,194 @@ def create_scheduler() -> AsyncIOScheduler: f"{existing_job.trigger}" ) {% endif %} +{% endif %} + +{% if include_insights %} +{% if insights_github %} + # Insight: GitHub traffic collection + job_id = "insight_github_traffic" +{% if scheduler_backend != "memory" %} + job_exists = _job_exists_in_database(job_id) +{% else %} + existing_job = scheduler.get_job(job_id) + job_exists = existing_job is not None +{% endif %} + + if not job_exists or force_update: + if job_exists and force_update: + logger.info(f"Force updating job '{job_id}' from code configuration") + else: + logger.info(f"Adding new job '{job_id}'") + + scheduler.add_job( + collect_github_traffic_job, + trigger="interval", + hours=settings.INSIGHT_COLLECTION_GITHUB_HOURS, + id=job_id, + name="Insight: GitHub Traffic", + max_instances=1, + coalesce=True, + replace_existing=True, + ) + else: +{% if scheduler_backend != "memory" %} + msg = f"Job '{job_id}' exists in database, preserving current configuration" + logger.info(msg) +{% else %} + logger.info( + f"Job '{job_id}' exists, preserving current configuration: " + f"{existing_job.trigger}" + ) +{% endif %} + + # Insight: GitHub stars collection (daily) + job_id = "insight_github_stars" +{% if scheduler_backend != "memory" %} + job_exists = _job_exists_in_database(job_id) +{% else %} + existing_job = scheduler.get_job(job_id) + job_exists = existing_job is not None +{% endif %} + + if not job_exists or force_update: + if job_exists and force_update: + logger.info(f"Force updating job '{job_id}' from code configuration") + else: + logger.info(f"Adding new job '{job_id}'") + + scheduler.add_job( + collect_github_stars_job, + trigger="interval", + hours=24, + id=job_id, + name="Insight: GitHub Stars", + max_instances=1, + coalesce=True, + replace_existing=True, + ) + else: +{% if scheduler_backend != "memory" %} + msg = f"Job '{job_id}' exists in database, preserving current configuration" + logger.info(msg) +{% else %} + logger.info( + f"Job '{job_id}' exists, preserving current configuration: " + f"{existing_job.trigger}" + ) +{% endif %} + + # Insight: GitHub events collection (daily via ClickHouse) + job_id = "insight_github_events" +{% if scheduler_backend != "memory" %} + job_exists = _job_exists_in_database(job_id) +{% else %} + existing_job = scheduler.get_job(job_id) + job_exists = existing_job is not None +{% endif %} + + if not job_exists or force_update: + if job_exists and force_update: + logger.info(f"Force updating job '{job_id}' from code configuration") + else: + logger.info(f"Adding new job '{job_id}'") + + scheduler.add_job( + collect_github_events_job, + trigger="interval", + hours=24, + id=job_id, + name="Insight: GitHub Events", + max_instances=1, + coalesce=True, + replace_existing=True, + ) + else: +{% if scheduler_backend != "memory" %} + msg = f"Job '{job_id}' exists in database, preserving current configuration" + logger.info(msg) +{% else %} + logger.info( + f"Job '{job_id}' exists, preserving current configuration: " + f"{existing_job.trigger}" + ) +{% endif %} +{% endif %} + +{% if insights_pypi %} + # Insight: PyPI downloads collection + job_id = "insight_pypi" +{% if scheduler_backend != "memory" %} + job_exists = _job_exists_in_database(job_id) +{% else %} + existing_job = scheduler.get_job(job_id) + job_exists = existing_job is not None +{% endif %} + + if not job_exists or force_update: + if job_exists and force_update: + logger.info(f"Force updating job '{job_id}' from code configuration") + else: + logger.info(f"Adding new job '{job_id}'") + + scheduler.add_job( + collect_pypi_job, + trigger="interval", + hours=settings.INSIGHT_COLLECTION_PYPI_HOURS, + id=job_id, + name="Insight: PyPI Downloads", + max_instances=1, + coalesce=True, + replace_existing=True, + ) + else: +{% if scheduler_backend != "memory" %} + msg = f"Job '{job_id}' exists in database, preserving current configuration" + logger.info(msg) +{% else %} + logger.info( + f"Job '{job_id}' exists, preserving current configuration: " + f"{existing_job.trigger}" + ) +{% endif %} +{% endif %} + +{% if insights_plausible %} + # Insight: Plausible analytics collection + job_id = "insight_plausible" +{% if scheduler_backend != "memory" %} + job_exists = _job_exists_in_database(job_id) +{% else %} + existing_job = scheduler.get_job(job_id) + job_exists = existing_job is not None +{% endif %} + + if not job_exists or force_update: + if job_exists and force_update: + logger.info(f"Force updating job '{job_id}' from code configuration") + else: + logger.info(f"Adding new job '{job_id}'") + + scheduler.add_job( + collect_plausible_job, + trigger="interval", + hours=settings.INSIGHT_COLLECTION_PLAUSIBLE_HOURS, + id=job_id, + name="Insight: Plausible Analytics", + max_instances=1, + coalesce=True, + replace_existing=True, + ) + else: +{% if scheduler_backend != "memory" %} + msg = f"Job '{job_id}' exists in database, preserving current configuration" + logger.info(msg) +{% else %} + logger.info( + f"Job '{job_id}' exists, preserving current configuration: " + f"{existing_job.trigger}" + ) +{% endif %} +{% endif %} {% endif %} # Add your own scheduled jobs here by importing service functions diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/core/config.py.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/core/config.py.jinja index f254ca5c..afe7b752 100644 --- a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/core/config.py.jinja +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/core/config.py.jinja @@ -240,6 +240,25 @@ class Settings(BaseSettings): TWILIO_MESSAGING_SERVICE_SID: str | None = None # Required for toll-free SMS {% endif %} +{% if include_insights %} + # Insights Service Configuration +{% if insights_github %} + INSIGHT_GITHUB_TOKEN: str | None = None # PAT with 'repo' scope + INSIGHT_GITHUB_OWNER: str = "" + INSIGHT_GITHUB_REPO: str = "" + INSIGHT_COLLECTION_GITHUB_HOURS: int = 6 +{% endif %} +{% if insights_pypi %} + INSIGHT_PYPI_PACKAGE: str = "" + INSIGHT_COLLECTION_PYPI_HOURS: int = 24 +{% endif %} +{% if insights_plausible %} + INSIGHT_PLAUSIBLE_API_KEY: str | None = None + INSIGHT_PLAUSIBLE_SITES: str = "" # Comma-separated site IDs + INSIGHT_COLLECTION_PLAUSIBLE_HOURS: int = 24 +{% endif %} +{% endif %} + {% if include_observability %} # Observability (Logfire) # Set LOGFIRE_TOKEN to enable sending traces to Logfire cloud diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/i18n/locales/en.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/i18n/locales/en.py index 4bf3a649..54078539 100644 --- a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/i18n/locales/en.py +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/i18n/locales/en.py @@ -932,4 +932,58 @@ "loadtest.rec.low_throughput": "Low throughput detected. Consider reducing task complexity or increasing worker concurrency.", "loadtest.rec.high_failure": "High failure rate ({rate}%). Check worker logs for error patterns.", "loadtest.rec.long_execution": "Long execution time for relatively few tasks suggests queue saturation. Consider testing with smaller batches or different queues.", + # ── Insights ───────────────────────────────────────────────────── + "insights.help": "Adoption metrics and analytics", + "insights.help_reddit": "Reddit post tracking", + "insights.help_status": "Show insights summary — rolling 14d, records, sources", + "insights.help_collect": "Trigger data collection — all sources or a specific one", + "insights.help_stars": "List stargazers with profile data", + "insights.help_records": "Show all-time records", + "insights.help_sources": "List all data sources with status", + "insights.help_reddit_add": "Add a Reddit post to track", + "insights.help_event": "Log a contextual event (release, reddit post, etc.)", + "insights.arg_source": "Source to collect (e.g., github_traffic). Omit for all.", + "insights.arg_url": "Reddit post URL to track", + "insights.arg_event_type": "Event type (release, reddit_post, localization, external)", + "insights.arg_description": "Event description", + "insights.opt_limit": "Number of stars to show", + "insights.status_title": "Insights Status", + "insights.sources_title": "Sources", + "insights.col_source": "Source", + "insights.col_enabled": "Enabled", + "insights.col_last_collected": "Last Collected", + "insights.col_metrics": "Metrics", + "insights.col_star_num": "#", + "insights.col_username": "Username", + "insights.col_name": "Name", + "insights.col_location": "Location", + "insights.col_company": "Company", + "insights.col_metric": "Metric", + "insights.col_value": "Value", + "insights.col_date": "Date", + "insights.col_previous": "Previous", + "insights.col_prev_date": "Prev Date", + "insights.col_key": "Key", + "insights.col_display_name": "Display Name", + "insights.col_interval": "Interval", + "insights.col_auth": "Auth", + "insights.yes": "Yes", + "insights.no": "No", + "insights.never": "Never", + "insights.manual": "Manual", + "insights.records_title": "Records", + "insights.all_time_records_title": "All-Time Records", + "insights.stars_title": "Stars", + "insights.insight_sources_title": "Insight Sources", + "insights.total_metrics": "Total metrics: {count}", + "insights.no_stars": "No stars collected yet.", + "insights.no_records": "No records yet.", + "insights.collecting_from": "Collecting from {source}...", + "insights.collecting_all": "Collecting from all enabled sources...", + "insights.collect_success": " {source}: {written} written, {skipped} skipped", + "insights.collect_records": " Records broken: {records}", + "insights.collect_failed": " {source}: FAILED — {error}", + "insights.reddit_added": "Reddit post added ({count} rows)", + "insights.reddit_failed": "Failed: {error}", + "insights.event_logged": "Event logged: [{event_type}] {description}", } diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/__init__.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collector_service.py.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collector_service.py.jinja new file mode 100644 index 00000000..65837d1a --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collector_service.py.jinja @@ -0,0 +1,304 @@ +""" +Collector service — orchestrates data collection across all enabled sources. +""" + +import logging +from typing import Any + +from sqlmodel import select +from sqlmodel.ext.asyncio.session import AsyncSession + +from .collectors.base import BaseCollector, CollectionResult +{% if insights_github %} +from .collectors.github_events import GitHubEventsCollector +from .collectors.github_stars import GitHubStarsCollector +from .collectors.github_traffic import GitHubTrafficCollector +{% endif %} +{% if insights_pypi %} +from .collectors.pypi import PyPICollector +{% endif %} +{% if insights_plausible %} +from .collectors.plausible import PlausibleCollector +{% endif %} +{% if insights_reddit %} +from .collectors.reddit import RedditCollector +{% endif %} +from .constants import SourceKeys +from .models import InsightSource + +logger = logging.getLogger(__name__) + +# Registry of source_key → collector class +COLLECTOR_REGISTRY: dict[str, type[BaseCollector]] = { +{% if insights_github %} + SourceKeys.GITHUB_TRAFFIC: GitHubTrafficCollector, + SourceKeys.GITHUB_STARS: GitHubStarsCollector, + SourceKeys.GITHUB_EVENTS: GitHubEventsCollector, +{% endif %} +{% if insights_pypi %} + SourceKeys.PYPI: PyPICollector, +{% endif %} +{% if insights_plausible %} + SourceKeys.PLAUSIBLE: PlausibleCollector, +{% endif %} +{% if insights_reddit %} + SourceKeys.REDDIT: RedditCollector, +{% endif %} +} + + +class CollectorService: + """Orchestrates data collection across all enabled sources.""" + + def __init__(self, db: AsyncSession) -> None: + self.db = db + + async def collect_all(self) -> dict[str, CollectionResult]: + """Run all enabled collectors. Returns results keyed by source key.""" + results: dict[str, CollectionResult] = {} + + # Get enabled sources that have a registered collector + stmt = select(InsightSource).where(InsightSource.enabled == True) # noqa: E712 + result = await self.db.exec(stmt) + enabled_sources = result.all() + + for source in enabled_sources: + if source.key in COLLECTOR_REGISTRY: + results[source.key] = await self._run_collector(source.key) + else: + logger.debug("No collector registered for source '%s'", source.key) + + return results + + async def collect_source(self, source_key: str, **kwargs: Any) -> CollectionResult: + """Run a specific collector by source key.""" + if source_key not in COLLECTOR_REGISTRY: + return CollectionResult( + source_key=source_key, + success=False, + error=f"No collector registered for source '{source_key}'", + ) + + # Check if source is enabled + stmt = select(InsightSource).where(InsightSource.key == source_key) + result = await self.db.exec(stmt) + source = result.first() + + if source is None: + return CollectionResult( + source_key=source_key, + success=False, + error=f"Source '{source_key}' not found in database", + ) + + if not source.enabled: + return CollectionResult( + source_key=source_key, + success=False, + error=f"Source '{source_key}' is disabled", + ) + + return await self._run_collector(source_key, **kwargs) + + async def _run_collector(self, source_key: str, **kwargs: Any) -> CollectionResult: + """Instantiate and run a collector.""" + collector_cls = COLLECTOR_REGISTRY[source_key] + collector = collector_cls(self.db) + + logger.info("Running collector for '%s'", source_key) + + try: + result = await collector.collect(**kwargs) + if result.success: + # Update last_collected_at on the source + from datetime import datetime + stmt = select(InsightSource).where(InsightSource.key == source_key) + source = (await self.db.exec(stmt)).first() + if source: + source.last_collected_at = datetime.now() + self.db.add(source) + await self.db.commit() + + # Check for new records + new_records = await self._check_records(source_key) + result.records_broken = new_records + + logger.info( + "Collector '%s' completed: %d written, %d skipped", + source_key, + result.rows_written, + result.rows_skipped, + ) + else: + logger.warning( + "Collector '%s' failed: %s", + source_key, + result.error, + ) + return result + except Exception as e: + error_msg = f"Collector '{source_key}' raised exception: {e}" + logger.error(error_msg, exc_info=True) + return CollectionResult( + source_key=source_key, + success=False, + error=error_msg, + ) + + async def _check_records(self, source_key: str) -> list[str]: + """Check if any metrics set new all-time records after collection.""" + from datetime import datetime, timedelta + import re + + from .models import InsightEvent, InsightMetric, InsightMetricType + + broken: list[str] = [] + + RECORD_CHECKS: dict[str, list[dict[str, str]]] = { +{% if insights_github %} + SourceKeys.GITHUB_TRAFFIC: [ + {"key": "clones", "category": "daily_clones", "label": "GitHub 1-Day Clones", "event_type": "milestone_github"}, + {"key": "unique_cloners", "category": "daily_unique", "label": "GitHub 1-Day Unique", "event_type": "milestone_github"}, + {"key": "views", "category": "daily_views", "label": "GitHub 1-Day Views", "event_type": "milestone_github"}, + {"key": "unique_visitors", "category": "daily_visitors", "label": "GitHub 1-Day Visitors", "event_type": "milestone_github"}, + ], +{% endif %} +{% if insights_pypi %} + SourceKeys.PYPI: [ + {"key": "downloads_daily", "category": "pypi_daily", "label": "PyPI Best Single Day", "event_type": "milestone_pypi"}, + ], +{% endif %} +{% if insights_plausible %} + SourceKeys.PLAUSIBLE: [ + {"key": "visitors", "category": "plausible_daily_visitors", "label": "Docs 1-Day Visitors", "event_type": "milestone_pypi"}, + {"key": "pageviews", "category": "plausible_daily_pageviews", "label": "Docs 1-Day Pageviews", "event_type": "milestone_pypi"}, + ], +{% endif %} + } + + checks = RECORD_CHECKS.get(source_key, []) + if not checks: + return broken + + now = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) + + for check in checks: + mt = (await self.db.exec( + select(InsightMetricType).where(InsightMetricType.key == check["key"]) + )).first() + if not mt: + continue + + result = await self.db.exec( + select(InsightMetric.date, InsightMetric.value) + .where( + InsightMetric.metric_type_id == mt.id, + InsightMetric.period == "daily", + ) + .order_by(InsightMetric.value.desc()) + .limit(1) + ) + top_row = result.first() + if not top_row: + continue + + record_date = str(top_row[0])[:10] + record_value = int(top_row[1]) + + if record_value == 0: + continue + + existing = (await self.db.exec( + select(InsightEvent) + .where(InsightEvent.event_type == check["event_type"]) + )).all() + + current_record = 0 + for ev in existing: + meta = ev.metadata_ if isinstance(ev.metadata_, dict) else {} + if meta.get("category") == check["category"]: + numbers = re.findall(r"\d[\d,]*", ev.description) + for n in numbers: + val = int(n.replace(",", "")) + current_record = max(current_record, val) + + if record_value > current_record: + desc = f"{record_value:,} ({check['label']})" + event = InsightEvent( + date=datetime.strptime(record_date, "%Y-%m-%d"), + event_type=check["event_type"], + description=desc, + metadata_={"category": check["category"]}, + ) + self.db.add(event) + await self.db.commit() + broken.append(f"{check['label']}: {record_value:,} (was {current_record:,})") + logger.info( + "New record: %s = %s on %s (prev: %s)", + check["label"], record_value, record_date, current_record, + ) + +{% if insights_github %} + # 14-day rolling records for GitHub traffic + if source_key == SourceKeys.GITHUB_TRAFFIC: + d14 = now - timedelta(days=14) + rolling_checks = [ + {"key": "clones", "category": "14d_clones", "label": "GitHub 14-Day Clones", "event_type": "milestone_github"}, + {"key": "unique_cloners", "category": "14d_unique", "label": "GitHub 14-Day Unique", "event_type": "milestone_github"}, + ] + for check in rolling_checks: + mt = (await self.db.exec( + select(InsightMetricType).where(InsightMetricType.key == check["key"]) + )).first() + if not mt: + continue + + rows = (await self.db.exec( + select(InsightMetric).where( + InsightMetric.metric_type_id == mt.id, + InsightMetric.period == "daily", + InsightMetric.date >= d14, + ) + )).all() + rolling_total = sum(int(r.value) for r in rows) + + if rolling_total == 0: + continue + + existing = (await self.db.exec( + select(InsightEvent).where( + InsightEvent.event_type == check["event_type"], + ) + )).all() + + current_record = 0 + for ev in existing: + meta = ev.metadata_ if isinstance(ev.metadata_, dict) else {} + if meta.get("category") == check["category"]: + numbers = re.findall(r"\d[\d,]*", ev.description) + for n in numbers: + val = int(n.replace(",", "")) + current_record = max(current_record, val) + + if rolling_total > current_record: + desc = f"{rolling_total:,} ({check['label']})" + event = InsightEvent( + date=now, + event_type=check["event_type"], + description=desc, + metadata_={"category": check["category"]}, + ) + self.db.add(event) + await self.db.commit() + broken.append(f"{check['label']}: {rolling_total:,} (was {current_record:,})") + logger.info( + "New 14-day record: %s = %s (prev: %s)", + check["label"], rolling_total, current_record, + ) +{% endif %} + + return broken + + def get_registered_sources(self) -> list[str]: + """Get list of source keys that have registered collectors.""" + return list(COLLECTOR_REGISTRY.keys()) diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/__init__.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/base.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/base.py new file mode 100644 index 00000000..8ff4d438 --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/base.py @@ -0,0 +1,223 @@ +""" +Base collector for insight data sources. + +All collectors inherit from BaseCollector and implement: +- source_key: which insight_source this collector writes to +- collect(): run the collection, return a CollectionResult +""" + +import logging +from abc import ABC, abstractmethod +from datetime import UTC, datetime +from typing import Any + +import httpx +from pydantic import BaseModel, Field +from sqlmodel import select +from sqlmodel.ext.asyncio.session import AsyncSession + +from ..models import InsightMetric, InsightMetricType, InsightSource + +logger = logging.getLogger(__name__) + + +class CollectionResult(BaseModel): + """Result of a collection run.""" + + source_key: str + success: bool + rows_written: int = Field(default=0, ge=0) + rows_skipped: int = Field(default=0, ge=0) + records_broken: list[str] = Field(default_factory=list) + error: str | None = None + collected_at: datetime = Field( + default_factory=lambda: datetime.now(UTC).replace(tzinfo=None) + ) + + +# -- Shared helpers ----------------------------------------------------------- + +CLICKHOUSE_URL = "https://sql-clickhouse.clickhouse.com" +CLICKHOUSE_PARAMS = {"user": "play", "default_format": "JSONCompact"} + + +def today() -> datetime: + """Get today as a midnight datetime (no timezone).""" + return datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) + + +def parse_date(date_str: str) -> datetime: + """Parse YYYY-MM-DD date string to datetime.""" + return datetime.strptime(date_str, "%Y-%m-%d") + + +def parse_github_date(timestamp: str) -> datetime: + """Parse GitHub API timestamp (ISO 8601) to date-only datetime.""" + dt = datetime.fromisoformat(timestamp.replace("Z", "+00:00")) + return dt.replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=None) + + +async def clickhouse_query(client: httpx.AsyncClient, sql: str) -> list[list]: + """Execute a ClickHouse SQL query and return rows.""" + resp = await client.post( + CLICKHOUSE_URL, + params=CLICKHOUSE_PARAMS, + content=sql.strip(), + ) + resp.raise_for_status() + return resp.json().get("data", []) + + +class BaseCollector(ABC): + """Base class for all insight data collectors.""" + + def __init__(self, db: AsyncSession) -> None: + self.db = db + + @property + @abstractmethod + def source_key(self) -> str: + """The insight_source.key this collector writes to.""" + ... + + @abstractmethod + async def collect(self) -> CollectionResult: + """Run collection. Returns what was collected.""" + ... + + # -- Result helpers ------------------------------------------------------- + + def _validate_config(self, **required: Any) -> CollectionResult | None: + """Check required config values. Returns error result if any are falsy.""" + missing = [k for k, v in required.items() if not v] + if missing: + return self._error(f"Missing {', '.join(missing)}") + return None + + def _error( + self, + msg: str, + rows_written: int = 0, + rows_skipped: int = 0, + ) -> CollectionResult: + """Build a failure CollectionResult and log the error.""" + logger.error(msg) + return CollectionResult( + source_key=self.source_key, + success=False, + rows_written=rows_written, + rows_skipped=rows_skipped, + error=msg, + ) + + def _success( + self, + rows_written: int, + rows_skipped: int = 0, + ) -> CollectionResult: + """Commit, log, and return a success CollectionResult.""" + logger.info( + "%s collected: %d written, %d skipped", + self.source_key, + rows_written, + rows_skipped, + ) + return CollectionResult( + source_key=self.source_key, + success=True, + rows_written=rows_written, + rows_skipped=rows_skipped, + ) + + async def get_source(self) -> InsightSource: + """Look up this collector's source row.""" + result = await self.db.exec( + select(InsightSource).where(InsightSource.key == self.source_key) + ) + source = result.first() + if source is None: + raise RuntimeError( + f"InsightSource '{self.source_key}' not found. " + "Run seed_insight_tables() first." + ) + return source + + async def get_metric_type(self, key: str) -> InsightMetricType: + """Look up a metric type by key within this source.""" + source = await self.get_source() + result = await self.db.exec( + select(InsightMetricType).where( + InsightMetricType.source_id == source.id, + InsightMetricType.key == key, + ) + ) + metric_type = result.first() + if metric_type is None: + raise RuntimeError( + f"InsightMetricType '{key}' not found for source '{self.source_key}'. " + "Run seed_insight_tables() first." + ) + return metric_type + + async def upsert_metric( + self, + metric_type: InsightMetricType, + date: datetime, + value: float, + period: str, + metadata: dict[str, Any] | None = None, + ) -> tuple[InsightMetric, bool]: + """ + Insert or update a metric row. + + Deduplicates on (metric_type_id, date, period) for non-event periods. + Event period rows (e.g., new stars) are always inserted. + + Returns: + Tuple of (metric, was_created). was_created=False means updated existing. + """ + if metric_type.id is None: + raise RuntimeError("MetricType has no id — was it persisted?") + + # Event rows are always new (multiple stars per day, etc.) + from ..constants import Periods + + if period == Periods.EVENT: + metric = InsightMetric( + date=date, + metric_type_id=metric_type.id, + value=value, + period=period, + metadata_=metadata or {}, + ) + self.db.add(metric) + return metric, True + + # For other periods, check for existing row + result = await self.db.exec( + select(InsightMetric).where( + InsightMetric.metric_type_id == metric_type.id, + InsightMetric.date == date, + InsightMetric.period == period, + ) + ) + existing = result.first() + + if existing is not None: + # Update existing row + existing.value = value + if metadata is not None: + existing.metadata_ = metadata + self.db.add(existing) + return existing, False + + # Create new row + metric = InsightMetric( + date=date, + metric_type_id=metric_type.id, + value=value, + period=period, + metadata_=metadata or {}, + ) + self.db.add(metric) + return metric, True diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/github_events.py.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/github_events.py.jinja new file mode 100644 index 00000000..2351fc9a --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/github_events.py.jinja @@ -0,0 +1,194 @@ +""" +GitHub Events collector via ClickHouse public SQL endpoint. + +Collects forks, releases, star events, and activity summaries +from the public GitHub events dataset. No authentication required. +""" + +import httpx +from sqlmodel import select +from sqlmodel.ext.asyncio.session import AsyncSession + +from app.core.config import settings + +from ..constants import MetricKeys, Periods, SourceKeys +from ..models import InsightMetric +from ..schemas import ActivitySummaryMetadata, ForkEventMetadata, ReleaseEventMetadata2 +from .base import BaseCollector, CollectionResult, clickhouse_query, parse_date + +# Event type mapping from ClickHouse enum to ActivitySummaryMetadata fields +EVENT_TYPE_MAP = { + "PushEvent": "push", + "IssuesEvent": "issues", + "PullRequestEvent": "pull_requests", + "PullRequestReviewEvent": "pull_request_reviews", + "IssueCommentEvent": "issue_comments", + "ForkEvent": "forks", + "WatchEvent": "stars", + "ReleaseEvent": "releases", + "CreateEvent": "creates", + "DeleteEvent": "deletes", +} + + +class GitHubEventsCollector(BaseCollector): + """Collects GitHub event data from ClickHouse public dataset.""" + + def __init__(self, db: AsyncSession) -> None: + super().__init__(db) + + @property + def source_key(self) -> str: + return SourceKeys.GITHUB_EVENTS + + async def collect(self) -> CollectionResult: + """Collect forks, releases, star events, and activity summary.""" + owner = settings.INSIGHT_GITHUB_OWNER + repo = settings.INSIGHT_GITHUB_REPO + + if err := self._validate_config(INSIGHT_GITHUB_OWNER=owner, INSIGHT_GITHUB_REPO=repo): + return err + + repo_name = f"{owner}/{repo}" + rows_written = 0 + rows_skipped = 0 + + try: + forks_type = await self.get_metric_type(MetricKeys.FORKS) + releases_type = await self.get_metric_type(MetricKeys.RELEASES) + stars_type = await self.get_metric_type(MetricKeys.STAR_EVENTS) + activity_type = await self.get_metric_type(MetricKeys.ACTIVITY_SUMMARY) + + async with httpx.AsyncClient(timeout=30.0) as client: + # Forks — individual events + fork_data = await clickhouse_query(client, f""" + SELECT actor_login, toDate(created_at) as day + FROM github.github_events + WHERE repo_name = '{repo_name}' + AND event_type = 'ForkEvent' + AND created_at >= today() - 90 + ORDER BY day DESC + """) + + # Get existing fork actors to skip duplicates + existing_forks = await self.db.exec( + select(InsightMetric.metadata_).where( + InsightMetric.metric_type_id == forks_type.id, + InsightMetric.period == Periods.EVENT, + ) + ) + existing_actors = { + m.get("actor", "") for m in existing_forks.all() if isinstance(m, dict) + } + + for row in fork_data: + actor = row[0] + if actor in existing_actors: + rows_skipped += 1 + continue + + date = parse_date(row[1]) + metadata = ForkEventMetadata(actor=actor, date=row[1]) + await self.upsert_metric( + metric_type=forks_type, date=date, + value=1.0, period=Periods.EVENT, + metadata=metadata.model_dump(), + ) + rows_written += 1 + + # Releases — individual events + release_data = await clickhouse_query(client, f""" + SELECT actor_login, release_tag_name, release_name, + toDate(created_at) as day + FROM github.github_events + WHERE repo_name = '{repo_name}' + AND event_type = 'ReleaseEvent' + AND created_at >= today() - 90 + ORDER BY day DESC + """) + + existing_releases = await self.db.exec( + select(InsightMetric.metadata_).where( + InsightMetric.metric_type_id == releases_type.id, + InsightMetric.period == Periods.EVENT, + ) + ) + existing_tags = { + m.get("tag", "") for m in existing_releases.all() if isinstance(m, dict) + } + + for row in release_data: + tag = row[1] + if tag in existing_tags: + rows_skipped += 1 + continue + + date = parse_date(row[3]) + metadata = ReleaseEventMetadata2( + tag=tag, name=row[2] or None, actor=row[0], + ) + await self.upsert_metric( + metric_type=releases_type, date=date, + value=1.0, period=Periods.EVENT, + metadata=metadata.model_dump(), + ) + rows_written += 1 + + # Daily star count (14 days) + star_data = await clickhouse_query(client, f""" + SELECT toDate(created_at) as day, count() as stars + FROM github.github_events + WHERE repo_name = '{repo_name}' + AND event_type = 'WatchEvent' + AND created_at >= today() - 14 + GROUP BY day ORDER BY day + """) + + for row in star_data: + date = parse_date(row[0]) + _, created = await self.upsert_metric( + metric_type=stars_type, date=date, + value=float(row[1]), period=Periods.DAILY, + ) + rows_written += 1 if created else 0 + rows_skipped += 0 if created else 1 + + # Activity summary (14 days, one row per day) + activity_data = await clickhouse_query(client, f""" + SELECT toDate(created_at) as day, event_type, count() as cnt + FROM github.github_events + WHERE repo_name = '{repo_name}' + AND created_at >= today() - 14 + GROUP BY day, event_type + ORDER BY day + """) + + # Group by day + daily_activity: dict[str, dict[str, int]] = {} + for row in activity_data: + day = row[0] + event_type = row[1] + count = int(row[2]) + if day not in daily_activity: + daily_activity[day] = {} + field = EVENT_TYPE_MAP.get(event_type) + if field: + daily_activity[day][field] = count + + for day_str, events in daily_activity.items(): + date = parse_date(day_str) + total_events = sum(events.values()) + metadata = ActivitySummaryMetadata(**events) + _, created = await self.upsert_metric( + metric_type=activity_type, date=date, + value=float(total_events), period=Periods.DAILY, + metadata=metadata.model_dump(), + ) + rows_written += 1 if created else 0 + rows_skipped += 0 if created else 1 + + await self.db.commit() + return self._success(rows_written, rows_skipped) + + except Exception as e: + return self._error(f"GitHub events collection failed: {e}", rows_written, rows_skipped) diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/github_stars.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/github_stars.py new file mode 100644 index 00000000..ab495f74 --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/github_stars.py @@ -0,0 +1,216 @@ +""" +GitHub Stargazers API collector. + +Collects star events with user profiles stored as JSONB metadata. +Only fetches profiles for new stars not already in the database. +""" + +from datetime import datetime + +import httpx +from app.core.config import settings +from sqlmodel import select +from sqlmodel.ext.asyncio.session import AsyncSession + +from ..constants import MetricKeys, Periods, SourceKeys +from ..models import InsightMetric +from ..schemas import StarProfileMetadata +from .base import BaseCollector, CollectionResult, parse_github_date, today + +GITHUB_API = "https://api.github.com" + + +class GitHubStarsCollector(BaseCollector): + """Collects stargazer data from the GitHub REST API.""" + + def __init__(self, db: AsyncSession) -> None: + super().__init__(db) + + @property + def source_key(self) -> str: + return SourceKeys.GITHUB_STARS + + async def collect(self) -> CollectionResult: + """Collect new stars with user profiles.""" + token = settings.INSIGHT_GITHUB_TOKEN + owner = settings.INSIGHT_GITHUB_OWNER + repo = settings.INSIGHT_GITHUB_REPO + + if err := self._validate_config( + INSIGHT_GITHUB_TOKEN=token, + INSIGHT_GITHUB_OWNER=owner, + INSIGHT_GITHUB_REPO=repo, + ): + return err + + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/vnd.github.v3.star+json", + "X-GitHub-Api-Version": "2022-11-28", + } + base_url = f"{GITHUB_API}/repos/{owner}/{repo}" + + rows_written = 0 + rows_skipped = 0 + + try: + star_type = await self.get_metric_type(MetricKeys.NEW_STAR) + + # Get existing star numbers to skip + existing_result = await self.db.exec( + select(InsightMetric.value).where( + InsightMetric.metric_type_id == star_type.id, + InsightMetric.period == Periods.EVENT, + ) + ) + existing_star_numbers = {int(v) for v in existing_result.all()} + + async with httpx.AsyncClient(headers=headers, timeout=30.0) as client: + # Paginate through all stargazers + page = 1 + star_number = 0 + + while True: + resp = await client.get( + f"{base_url}/stargazers", + params={"per_page": 100, "page": page}, + ) + resp.raise_for_status() + stargazers = resp.json() + + if not stargazers: + break + + for stargazer in stargazers: + star_number += 1 + + if star_number in existing_star_numbers: + rows_skipped += 1 + continue + + # Fetch user profile + user_data = stargazer.get("user", {}) + starred_at = stargazer.get("starred_at", "") + + profile = await self._fetch_profile(client, user_data) + + date = parse_github_date(starred_at) if starred_at else today() + await self.upsert_metric( + metric_type=star_type, + date=date, + value=float(star_number), + period=Periods.EVENT, + metadata=profile.model_dump(), + ) + rows_written += 1 + + if len(stargazers) < 100: + break + page += 1 + + # Create InsightEvent entries grouped by day + from ..models import InsightEvent + + # Get all star metrics to group by date + all_stars = await self.db.exec( + select(InsightMetric) + .where( + InsightMetric.metric_type_id == star_type.id, + InsightMetric.period == Periods.EVENT, + ) + .order_by(InsightMetric.date.asc()) + ) + stars_by_date: dict[str, list[dict]] = {} + for s in all_stars.all(): + day = str(s.date)[:10] + meta = s.metadata_ if isinstance(s.metadata_, dict) else {} + stars_by_date.setdefault(day, []).append( + { + "number": int(s.value), + "username": meta.get("username", "unknown"), + } + ) + + # Get existing star events to avoid duplicates + existing_star_events = await self.db.exec( + select(InsightEvent).where(InsightEvent.event_type == "star") + ) + existing_star_dates = { + str(ev.date)[:10] for ev in existing_star_events.all() + } + + for day, stars in stars_by_date.items(): + if day in existing_star_dates: + continue + stars.sort(key=lambda x: x["number"]) + usernames = [s["username"] for s in stars] + numbers = [s["number"] for s in stars] + if len(stars) == 1: + desc = f"\u2b50 #{numbers[0]} — {usernames[0]}" + else: + desc = f"\u2b50 #{numbers[0]}-#{numbers[-1]} ({len(stars)} stars)" + event = InsightEvent( + date=datetime.strptime(day, "%Y-%m-%d"), + event_type="star", + description=desc, + metadata_={"usernames": usernames, "numbers": numbers}, + ) + self.db.add(event) + + await self.db.commit() + return self._success(rows_written, rows_skipped) + + except httpx.HTTPStatusError as e: + return self._error( + f"GitHub API error: {e.response.status_code} {e.response.text[:200]}", + rows_written, + rows_skipped, + ) + except Exception as e: + return self._error( + f"GitHub stars collection failed: {e}", + rows_written, + rows_skipped, + ) + + async def _fetch_profile( + self, client: httpx.AsyncClient, user_data: dict + ) -> StarProfileMetadata: + """Fetch full user profile from GitHub API.""" + username = user_data.get("login", "unknown") + + try: + resp = await client.get(f"{GITHUB_API}/users/{username}") + resp.raise_for_status() + profile_data = resp.json() + except Exception: + # If profile fetch fails, use what we have from stargazer response + profile_data = user_data + + created_at = profile_data.get("created_at", "") + account_age = None + if created_at: + try: + created_dt = datetime.fromisoformat(created_at.replace("Z", "+00:00")) + account_age = ( + datetime.now() - created_dt.replace(tzinfo=None) + ).days / 365.25 + except (ValueError, TypeError): + pass + + return StarProfileMetadata( + username=username, + name=profile_data.get("name"), + location=profile_data.get("location"), + company=profile_data.get("company"), + bio=profile_data.get("bio"), + email=profile_data.get("email"), + blog=profile_data.get("blog"), + followers=profile_data.get("followers", 0), + following=profile_data.get("following", 0), + public_repos=profile_data.get("public_repos", 0), + stars_given=profile_data.get("starred_repos_count", 0), + account_created=created_at or None, + account_age_years=round(account_age, 1) if account_age else None, + github_pro=profile_data.get("plan", {}).get("name", "") == "pro", + ) diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/github_stars.py.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/github_stars.py.jinja new file mode 100644 index 00000000..f83b31a5 --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/github_stars.py.jinja @@ -0,0 +1,242 @@ +""" +GitHub Stargazers API collector. + +Collects star events with user profiles stored as JSONB metadata. +Only fetches profiles for new stars not already in the database. +""" + +from datetime import datetime +import logging + +import httpx +from sqlmodel import select +from sqlmodel.ext.asyncio.session import AsyncSession + +from app.core.config import settings + +from ..constants import MetricKeys, Periods, SourceKeys +from ..models import InsightMetric +from ..schemas import StarProfileMetadata +from .base import BaseCollector, CollectionResult + +logger = logging.getLogger(__name__) + +GITHUB_API = "https://api.github.com" + + +class GitHubStarsCollector(BaseCollector): + """Collects stargazer data from the GitHub REST API.""" + + def __init__(self, db: AsyncSession) -> None: + super().__init__(db) + + @property + def source_key(self) -> str: + return SourceKeys.GITHUB_STARS + + async def collect(self) -> CollectionResult: + """Collect new stars with user profiles.""" + token = settings.INSIGHT_GITHUB_TOKEN + owner = settings.INSIGHT_GITHUB_OWNER + repo = settings.INSIGHT_GITHUB_REPO + + if not token or not owner or not repo: + return CollectionResult( + source_key=self.source_key, + success=False, + error="Missing INSIGHT_GITHUB_TOKEN, INSIGHT_GITHUB_OWNER, or INSIGHT_GITHUB_REPO", + ) + + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/vnd.github.v3.star+json", + "X-GitHub-Api-Version": "2022-11-28", + } + base_url = f"{GITHUB_API}/repos/{owner}/{repo}" + + rows_written = 0 + rows_skipped = 0 + + try: + star_type = await self.get_metric_type(MetricKeys.NEW_STAR) + + # Get existing star numbers to skip + existing_result = await self.db.exec( + select(InsightMetric.value).where( + InsightMetric.metric_type_id == star_type.id, + InsightMetric.period == Periods.EVENT, + ) + ) + existing_star_numbers = {int(v) for v in existing_result.all()} + + async with httpx.AsyncClient(headers=headers, timeout=30.0) as client: + # Paginate through all stargazers + page = 1 + star_number = 0 + + while True: + resp = await client.get( + f"{base_url}/stargazers", + params={"per_page": 100, "page": page}, + ) + resp.raise_for_status() + stargazers = resp.json() + + if not stargazers: + break + + for stargazer in stargazers: + star_number += 1 + + if star_number in existing_star_numbers: + rows_skipped += 1 + continue + + # Fetch user profile + user_data = stargazer.get("user", {}) + starred_at = stargazer.get("starred_at", "") + + profile = await self._fetch_profile(client, user_data) + + date = _parse_star_date(starred_at) + await self.upsert_metric( + metric_type=star_type, + date=date, + value=float(star_number), + period=Periods.EVENT, + metadata=profile.model_dump(), + ) + rows_written += 1 + + if len(stargazers) < 100: + break + page += 1 + + # Create InsightEvent entries grouped by day + from ..models import InsightEvent + + # Get all star metrics to group by date + all_stars = await self.db.exec( + select(InsightMetric).where( + InsightMetric.metric_type_id == star_type.id, + InsightMetric.period == Periods.EVENT, + ).order_by(InsightMetric.date.asc()) + ) + stars_by_date: dict[str, list[dict]] = {} + for s in all_stars.all(): + day = str(s.date)[:10] + meta = s.metadata_ if isinstance(s.metadata_, dict) else {} + stars_by_date.setdefault(day, []).append({ + "number": int(s.value), + "username": meta.get("username", "unknown"), + }) + + # Get existing star events to avoid duplicates + existing_star_events = await self.db.exec( + select(InsightEvent).where(InsightEvent.event_type == "star") + ) + existing_star_dates = { + str(ev.date)[:10] for ev in existing_star_events.all() + } + + for day, stars in stars_by_date.items(): + if day in existing_star_dates: + continue + stars.sort(key=lambda x: x["number"]) + usernames = [s["username"] for s in stars] + numbers = [s["number"] for s in stars] + if len(stars) == 1: + desc = f"\u2b50 #{numbers[0]} — {usernames[0]}" + else: + desc = f"\u2b50 #{numbers[0]}-#{numbers[-1]} ({len(stars)} stars)" + event = InsightEvent( + date=datetime.strptime(day, "%Y-%m-%d"), + event_type="star", + description=desc, + metadata_={"usernames": usernames, "numbers": numbers}, + ) + self.db.add(event) + + await self.db.commit() + + logger.info( + "GitHub stars collected: %d new, %d existing", + rows_written, + rows_skipped, + ) + + return CollectionResult( + source_key=self.source_key, + success=True, + rows_written=rows_written, + rows_skipped=rows_skipped, + ) + + except httpx.HTTPStatusError as e: + error_msg = f"GitHub API error: {e.response.status_code} {e.response.text[:200]}" + logger.error(error_msg) + return CollectionResult( + source_key=self.source_key, + success=False, + rows_written=rows_written, + rows_skipped=rows_skipped, + error=error_msg, + ) + except Exception as e: + error_msg = f"GitHub stars collection failed: {e}" + logger.error(error_msg) + return CollectionResult( + source_key=self.source_key, + success=False, + rows_written=rows_written, + rows_skipped=rows_skipped, + error=error_msg, + ) + + async def _fetch_profile( + self, client: httpx.AsyncClient, user_data: dict + ) -> StarProfileMetadata: + """Fetch full user profile from GitHub API.""" + username = user_data.get("login", "unknown") + + try: + resp = await client.get(f"{GITHUB_API}/users/{username}") + resp.raise_for_status() + profile_data = resp.json() + except Exception: + # If profile fetch fails, use what we have from stargazer response + profile_data = user_data + + created_at = profile_data.get("created_at", "") + account_age = None + if created_at: + try: + created_dt = datetime.fromisoformat(created_at.replace("Z", "+00:00")) + account_age = (datetime.now() - created_dt.replace(tzinfo=None)).days / 365.25 + except (ValueError, TypeError): + pass + + return StarProfileMetadata( + username=username, + name=profile_data.get("name"), + location=profile_data.get("location"), + company=profile_data.get("company"), + bio=profile_data.get("bio"), + email=profile_data.get("email"), + blog=profile_data.get("blog"), + followers=profile_data.get("followers", 0), + following=profile_data.get("following", 0), + public_repos=profile_data.get("public_repos", 0), + stars_given=profile_data.get("starred_repos_count", 0), + account_created=created_at or None, + account_age_years=round(account_age, 1) if account_age else None, + github_pro=profile_data.get("plan", {}).get("name", "") == "pro", + ) + + +def _parse_star_date(timestamp: str) -> datetime: + """Parse starred_at timestamp to date-only datetime.""" + if not timestamp: + return datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) + dt = datetime.fromisoformat(timestamp.replace("Z", "+00:00")) + return dt.replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=None) diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/github_traffic.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/github_traffic.py new file mode 100644 index 00000000..5a9a0899 --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/github_traffic.py @@ -0,0 +1,210 @@ +""" +GitHub Traffic API collector. + +Collects clones, views, referrers, and popular paths from the GitHub Traffic API. +GitHub retains only 14 days of data — this collector persists it before expiry. +""" + +from typing import Any + +import httpx +from app.core.config import settings +from sqlmodel.ext.asyncio.session import AsyncSession + +from ..constants import MetricKeys, Periods, SourceKeys +from ..schemas import PopularPathEntry, PopularPathsMetadata, ReferrerEntry +from .base import BaseCollector, CollectionResult, parse_github_date, today + +GITHUB_API = "https://api.github.com" + + +class GitHubTrafficCollector(BaseCollector): + """Collects traffic data from the GitHub REST API.""" + + def __init__(self, db: AsyncSession) -> None: + super().__init__(db) + + @property + def source_key(self) -> str: + return SourceKeys.GITHUB_TRAFFIC + + async def collect(self) -> CollectionResult: + """Collect clones, views, referrers, and popular paths.""" + token = settings.INSIGHT_GITHUB_TOKEN + owner = settings.INSIGHT_GITHUB_OWNER + repo = settings.INSIGHT_GITHUB_REPO + + if err := self._validate_config( + INSIGHT_GITHUB_TOKEN=token, + INSIGHT_GITHUB_OWNER=owner, + INSIGHT_GITHUB_REPO=repo, + ): + return err + + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + } + base_url = f"{GITHUB_API}/repos/{owner}/{repo}" + + rows_written = 0 + rows_skipped = 0 + + try: + async with httpx.AsyncClient(headers=headers, timeout=30.0) as client: + clones_resp = await client.get(f"{base_url}/traffic/clones") + views_resp = await client.get(f"{base_url}/traffic/views") + referrers_resp = await client.get( + f"{base_url}/traffic/popular/referrers" + ) + paths_resp = await client.get(f"{base_url}/traffic/popular/paths") + + for resp in [clones_resp, views_resp, referrers_resp, paths_resp]: + resp.raise_for_status() + + written, skipped = await self._process_clones(clones_resp.json()) + rows_written += written + rows_skipped += skipped + + written, skipped = await self._process_views(views_resp.json()) + rows_written += written + rows_skipped += skipped + + written, skipped = await self._process_referrers(referrers_resp.json()) + rows_written += written + rows_skipped += skipped + + written, skipped = await self._process_popular_paths(paths_resp.json()) + rows_written += written + rows_skipped += skipped + + await self.db.commit() + return self._success(rows_written, rows_skipped) + + except httpx.HTTPStatusError as e: + return self._error( + f"GitHub API error: {e.response.status_code} {e.response.text[:200]}", + rows_written, + rows_skipped, + ) + except Exception as e: + return self._error( + f"GitHub traffic collection failed: {e}", + rows_written, + rows_skipped, + ) + + async def _process_clones(self, data: dict[str, Any]) -> tuple[int, int]: + """Process clones response — one row per day for clones + unique_cloners.""" + clones_type = await self.get_metric_type(MetricKeys.CLONES) + unique_type = await self.get_metric_type(MetricKeys.UNIQUE_CLONERS) + + written = 0 + skipped = 0 + + for entry in data.get("clones", []): + date = parse_github_date(entry["timestamp"]) + + _, created = await self.upsert_metric( + metric_type=clones_type, + date=date, + value=float(entry["count"]), + period=Periods.DAILY, + ) + written += 1 if created else 0 + skipped += 0 if created else 1 + + _, created = await self.upsert_metric( + metric_type=unique_type, + date=date, + value=float(entry["uniques"]), + period=Periods.DAILY, + ) + written += 1 if created else 0 + skipped += 0 if created else 1 + + return written, skipped + + async def _process_views(self, data: dict[str, Any]) -> tuple[int, int]: + """Process views response — one row per day for views + unique_visitors.""" + views_type = await self.get_metric_type(MetricKeys.VIEWS) + visitors_type = await self.get_metric_type(MetricKeys.UNIQUE_VISITORS) + + written = 0 + skipped = 0 + + for entry in data.get("views", []): + date = parse_github_date(entry["timestamp"]) + + _, created = await self.upsert_metric( + metric_type=views_type, + date=date, + value=float(entry["count"]), + period=Periods.DAILY, + ) + written += 1 if created else 0 + skipped += 0 if created else 1 + + _, created = await self.upsert_metric( + metric_type=visitors_type, + date=date, + value=float(entry["uniques"]), + period=Periods.DAILY, + ) + written += 1 if created else 0 + skipped += 0 if created else 1 + + return written, skipped + + async def _process_referrers(self, data: list[dict[str, Any]]) -> tuple[int, int]: + """Process referrers — single snapshot row with typed referrer entries.""" + referrers_type = await self.get_metric_type(MetricKeys.REFERRERS) + + referrer_map: dict[str, dict[str, int]] = {} + for entry in data: + validated = ReferrerEntry( + views=entry["count"], + uniques=entry["uniques"], + ) + referrer_map[entry["referrer"]] = validated.model_dump() + + _today = today() + _, created = await self.upsert_metric( + metric_type=referrers_type, + date=_today, + value=float(len(data)), + period=Periods.DAILY, + metadata=referrer_map, + ) + + return (1, 0) if created else (0, 1) + + async def _process_popular_paths( + self, data: list[dict[str, Any]] + ) -> tuple[int, int]: + """Process popular paths — single snapshot row with typed path entries.""" + paths_type = await self.get_metric_type(MetricKeys.POPULAR_PATHS) + + paths = PopularPathsMetadata( + paths=[ + PopularPathEntry( + path=entry["path"], + title=entry["title"], + views=entry["count"], + uniques=entry["uniques"], + ) + for entry in data + ] + ) + + _today = today() + _, created = await self.upsert_metric( + metric_type=paths_type, + date=_today, + value=float(len(data)), + period=Periods.DAILY, + metadata=paths.model_dump(), + ) + + return (1, 0) if created else (0, 1) diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/github_traffic.py.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/github_traffic.py.jinja new file mode 100644 index 00000000..36a650a4 --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/github_traffic.py.jinja @@ -0,0 +1,250 @@ +""" +GitHub Traffic API collector. + +Collects clones, views, referrers, and popular paths from the GitHub Traffic API. +GitHub retains only 14 days of data — this collector persists it before expiry. +""" + +import logging +from datetime import datetime +from typing import Any + +import httpx +from sqlmodel.ext.asyncio.session import AsyncSession + +from app.core.config import settings + +from ..constants import MetricKeys, Periods, SourceKeys +from ..schemas import PopularPathEntry, PopularPathsMetadata, ReferrerEntry +from .base import BaseCollector, CollectionResult + +logger = logging.getLogger(__name__) + +GITHUB_API = "https://api.github.com" + + +class GitHubTrafficCollector(BaseCollector): + """Collects traffic data from the GitHub REST API.""" + + def __init__(self, db: AsyncSession) -> None: + super().__init__(db) + + @property + def source_key(self) -> str: + return SourceKeys.GITHUB_TRAFFIC + + async def collect(self) -> CollectionResult: + """Collect clones, views, referrers, and popular paths.""" + token = settings.INSIGHT_GITHUB_TOKEN + owner = settings.INSIGHT_GITHUB_OWNER + repo = settings.INSIGHT_GITHUB_REPO + + if not token or not owner or not repo: + return CollectionResult( + source_key=self.source_key, + success=False, + error="Missing INSIGHT_GITHUB_TOKEN, INSIGHT_GITHUB_OWNER, or INSIGHT_GITHUB_REPO", + ) + + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + } + base_url = f"{GITHUB_API}/repos/{owner}/{repo}" + + rows_written = 0 + rows_skipped = 0 + + try: + async with httpx.AsyncClient(headers=headers, timeout=30.0) as client: + clones_resp = await client.get(f"{base_url}/traffic/clones") + views_resp = await client.get(f"{base_url}/traffic/views") + referrers_resp = await client.get(f"{base_url}/traffic/popular/referrers") + paths_resp = await client.get(f"{base_url}/traffic/popular/paths") + + for resp in [clones_resp, views_resp, referrers_resp, paths_resp]: + resp.raise_for_status() + + written, skipped = await self._process_clones(clones_resp.json()) + rows_written += written + rows_skipped += skipped + + written, skipped = await self._process_views(views_resp.json()) + rows_written += written + rows_skipped += skipped + + written, skipped = await self._process_referrers(referrers_resp.json()) + rows_written += written + rows_skipped += skipped + + written, skipped = await self._process_popular_paths(paths_resp.json()) + rows_written += written + rows_skipped += skipped + + await self.db.commit() + + logger.info( + "GitHub traffic collected: %d written, %d skipped", + rows_written, + rows_skipped, + ) + + return CollectionResult( + source_key=self.source_key, + success=True, + rows_written=rows_written, + rows_skipped=rows_skipped, + ) + + except httpx.HTTPStatusError as e: + error_msg = f"GitHub API error: {e.response.status_code} {e.response.text[:200]}" + logger.error(error_msg) + return CollectionResult( + source_key=self.source_key, + success=False, + rows_written=rows_written, + rows_skipped=rows_skipped, + error=error_msg, + ) + except Exception as e: + error_msg = f"GitHub traffic collection failed: {e}" + logger.error(error_msg) + return CollectionResult( + source_key=self.source_key, + success=False, + rows_written=rows_written, + rows_skipped=rows_skipped, + error=error_msg, + ) + + async def _process_clones( + self, data: dict[str, Any] + ) -> tuple[int, int]: + """Process clones response — one row per day for clones + unique_cloners.""" + clones_type = await self.get_metric_type(MetricKeys.CLONES) + unique_type = await self.get_metric_type(MetricKeys.UNIQUE_CLONERS) + + written = 0 + skipped = 0 + + for entry in data.get("clones", []): + date = _parse_github_date(entry["timestamp"]) + + _, created = await self.upsert_metric( + metric_type=clones_type, + date=date, + value=float(entry["count"]), + period=Periods.DAILY, + ) + written += 1 if created else 0 + skipped += 0 if created else 1 + + _, created = await self.upsert_metric( + metric_type=unique_type, + date=date, + value=float(entry["uniques"]), + period=Periods.DAILY, + ) + written += 1 if created else 0 + skipped += 0 if created else 1 + + return written, skipped + + async def _process_views( + self, data: dict[str, Any] + ) -> tuple[int, int]: + """Process views response — one row per day for views + unique_visitors.""" + views_type = await self.get_metric_type(MetricKeys.VIEWS) + visitors_type = await self.get_metric_type(MetricKeys.UNIQUE_VISITORS) + + written = 0 + skipped = 0 + + for entry in data.get("views", []): + date = _parse_github_date(entry["timestamp"]) + + _, created = await self.upsert_metric( + metric_type=views_type, + date=date, + value=float(entry["count"]), + period=Periods.DAILY, + ) + written += 1 if created else 0 + skipped += 0 if created else 1 + + _, created = await self.upsert_metric( + metric_type=visitors_type, + date=date, + value=float(entry["uniques"]), + period=Periods.DAILY, + ) + written += 1 if created else 0 + skipped += 0 if created else 1 + + return written, skipped + + async def _process_referrers( + self, data: list[dict[str, Any]] + ) -> tuple[int, int]: + """Process referrers — single snapshot row with typed referrer entries.""" + referrers_type = await self.get_metric_type(MetricKeys.REFERRERS) + + referrer_map: dict[str, dict[str, int]] = {} + for entry in data: + validated = ReferrerEntry( + views=entry["count"], + uniques=entry["uniques"], + ) + referrer_map[entry["referrer"]] = validated.model_dump() + + today = _today() + _, created = await self.upsert_metric( + metric_type=referrers_type, + date=today, + value=float(len(data)), + period=Periods.DAILY, + metadata=referrer_map, + ) + + return (1, 0) if created else (0, 1) + + async def _process_popular_paths( + self, data: list[dict[str, Any]] + ) -> tuple[int, int]: + """Process popular paths — single snapshot row with typed path entries.""" + paths_type = await self.get_metric_type(MetricKeys.POPULAR_PATHS) + + paths = PopularPathsMetadata( + paths=[ + PopularPathEntry( + path=entry["path"], + title=entry["title"], + views=entry["count"], + uniques=entry["uniques"], + ) + for entry in data + ] + ) + + today = _today() + _, created = await self.upsert_metric( + metric_type=paths_type, + date=today, + value=float(len(data)), + period=Periods.DAILY, + metadata=paths.model_dump(), + ) + + return (1, 0) if created else (0, 1) + + +def _parse_github_date(timestamp: str) -> datetime: + """Parse GitHub API timestamp (ISO 8601) to date-only datetime.""" + dt = datetime.fromisoformat(timestamp.replace("Z", "+00:00")) + return dt.replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=None) + + +def _today() -> datetime: + """Get today as a midnight datetime (no timezone).""" + return datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/plausible.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/plausible.py new file mode 100644 index 00000000..b290c8f5 --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/plausible.py @@ -0,0 +1,211 @@ +""" +Plausible Analytics collector. + +Collects docs visitor metrics, page engagement, and bounce rates. +Supports multiple sites via comma-separated INSIGHT_PLAUSIBLE_SITES. +Uses /stats/timeseries for daily breakdowns (supports backfill). +Per-day country + page breakdowns stored for range-aware display. +""" + +from datetime import datetime, timedelta + +import httpx +from app.core.config import settings +from sqlmodel.ext.asyncio.session import AsyncSession + +from ..constants import MetricKeys, Periods, SourceKeys +from ..schemas import ( + PlausibleCountryEntry, + PlausiblePageEntry, + PlausibleSiteMetadata, + PlausibleTopCountriesMetadata, + PlausibleTopPagesMetadata, +) +from .base import BaseCollector, CollectionResult, today + +PLAUSIBLE_API = "https://plausible.io/api/v1" + + +class PlausibleCollector(BaseCollector): + """Collects analytics from the Plausible API.""" + + def __init__(self, db: AsyncSession) -> None: + super().__init__(db) + + @property + def source_key(self) -> str: + return SourceKeys.PLAUSIBLE + + async def collect(self, lookback_days: int = 1) -> CollectionResult: + """Collect visitor metrics and page engagement for all configured sites. + + Args: + lookback_days: Number of days to fetch. 1 = today only (default). + Higher values for backfill (e.g., 365). + """ + api_key = settings.INSIGHT_PLAUSIBLE_API_KEY + sites_str = settings.INSIGHT_PLAUSIBLE_SITES + + if err := self._validate_config( + INSIGHT_PLAUSIBLE_API_KEY=api_key, + INSIGHT_PLAUSIBLE_SITES=sites_str, + ): + return err + + sites = [s.strip() for s in sites_str.split(",") if s.strip()] + headers = {"Authorization": f"Bearer {api_key}"} + + rows_written = 0 + rows_skipped = 0 + + try: + visitors_type = await self.get_metric_type(MetricKeys.VISITORS) + pageviews_type = await self.get_metric_type(MetricKeys.PAGEVIEWS) + duration_type = await self.get_metric_type(MetricKeys.AVG_DURATION) + bounce_type = await self.get_metric_type(MetricKeys.BOUNCE_RATE) + pages_type = await self.get_metric_type(MetricKeys.TOP_PAGES) + countries_type = await self.get_metric_type(MetricKeys.TOP_COUNTRIES) + + _today = today() + start_date = _today - timedelta(days=lookback_days - 1) + date_range = ( + f"{start_date.strftime('%Y-%m-%d')},{_today.strftime('%Y-%m-%d')}" + ) + + async with httpx.AsyncClient(headers=headers, timeout=30.0) as client: + for site in sites: + site_meta = PlausibleSiteMetadata(site=site).model_dump() + + # Fetch daily timeseries for the date range + resp = await client.get( + f"{PLAUSIBLE_API}/stats/timeseries", + params={ + "site_id": site, + "period": "custom", + "date": date_range, + "metrics": "visitors,pageviews,visit_duration,bounce_rate", + }, + ) + resp.raise_for_status() + timeseries = resp.json().get("results", []) + + # Collect days that had visitors (for per-day breakdowns) + active_days: list[str] = [] + + for day_data in timeseries: + day_str = day_data.get("date", "") + if not day_str: + continue + day_dt = datetime.strptime(day_str, "%Y-%m-%d") + + visitors = day_data.get("visitors") or 0 + if visitors > 0: + active_days.append(day_str) + + for mt, key in [ + (visitors_type, "visitors"), + (pageviews_type, "pageviews"), + (duration_type, "visit_duration"), + (bounce_type, "bounce_rate"), + ]: + value = day_data.get(key) or 0 + _, created = await self.upsert_metric( + metric_type=mt, + date=day_dt, + value=float(value), + period=Periods.DAILY, + metadata=site_meta, + ) + rows_written += 1 if created else 0 + rows_skipped += 0 if created else 1 + + # Per-day country + page breakdowns for active days + for day_str in active_days: + day_dt = datetime.strptime(day_str, "%Y-%m-%d") + + # Pages + pages_resp = await client.get( + f"{PLAUSIBLE_API}/stats/breakdown", + params={ + "site_id": site, + "period": "day", + "date": day_str, + "property": "event:page", + "metrics": "visitors,visit_duration", + "limit": 20, + }, + ) + pages_resp.raise_for_status() + page_results = pages_resp.json().get("results", []) + + pages_metadata = PlausibleTopPagesMetadata( + site=site, + pages=[ + PlausiblePageEntry( + url=p.get("page", ""), + visitors=p.get("visitors", 0), + time_s=p.get("visit_duration"), + ) + for p in page_results + ], + ) + _, created = await self.upsert_metric( + metric_type=pages_type, + date=day_dt, + value=float(len(page_results)), + period=Periods.DAILY, + metadata=pages_metadata.model_dump(), + ) + rows_written += 1 if created else 0 + rows_skipped += 0 if created else 1 + + # Countries + countries_resp = await client.get( + f"{PLAUSIBLE_API}/stats/breakdown", + params={ + "site_id": site, + "period": "day", + "date": day_str, + "property": "visit:country", + "metrics": "visitors", + "limit": 20, + }, + ) + countries_resp.raise_for_status() + country_results = countries_resp.json().get("results", []) + + countries_metadata = PlausibleTopCountriesMetadata( + site=site, + countries=[ + PlausibleCountryEntry( + country=c.get("country", ""), + visitors=c.get("visitors", 0), + ) + for c in country_results + ], + ) + _, created = await self.upsert_metric( + metric_type=countries_type, + date=day_dt, + value=float(len(country_results)), + period=Periods.DAILY, + metadata=countries_metadata.model_dump(), + ) + rows_written += 1 if created else 0 + rows_skipped += 0 if created else 1 + + await self.db.commit() + return self._success(rows_written, rows_skipped) + + except httpx.HTTPStatusError as e: + return self._error( + f"Plausible API error: {e.response.status_code}", + rows_written, + rows_skipped, + ) + except Exception as e: + return self._error( + f"Plausible collection failed: {e}", + rows_written, + rows_skipped, + ) diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/plausible.py.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/plausible.py.jinja new file mode 100644 index 00000000..c9c95090 --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/plausible.py.jinja @@ -0,0 +1,242 @@ +""" +Plausible Analytics collector. + +Collects docs visitor metrics, page engagement, and bounce rates. +Supports multiple sites via comma-separated INSIGHT_PLAUSIBLE_SITES. +Uses /stats/timeseries for daily breakdowns (supports backfill). +Per-day country + page breakdowns stored for range-aware display. +""" + +from datetime import datetime, timedelta +import logging + +import httpx +from sqlmodel.ext.asyncio.session import AsyncSession + +from app.core.config import settings + +from ..constants import MetricKeys, Periods, SourceKeys +from ..schemas import ( + PlausibleCountryEntry, + PlausiblePageEntry, + PlausibleSiteMetadata, + PlausibleTopCountriesMetadata, + PlausibleTopPagesMetadata, +) +from .base import BaseCollector, CollectionResult + +logger = logging.getLogger(__name__) + +PLAUSIBLE_API = "https://plausible.io/api/v1" + + +class PlausibleCollector(BaseCollector): + """Collects analytics from the Plausible API.""" + + def __init__(self, db: AsyncSession) -> None: + super().__init__(db) + + @property + def source_key(self) -> str: + return SourceKeys.PLAUSIBLE + + async def collect(self, lookback_days: int = 1) -> CollectionResult: + """Collect visitor metrics and page engagement for all configured sites. + + Args: + lookback_days: Number of days to fetch. 1 = today only (default). + Higher values for backfill (e.g., 365). + """ + api_key = settings.INSIGHT_PLAUSIBLE_API_KEY + sites_str = settings.INSIGHT_PLAUSIBLE_SITES + + if not api_key or not sites_str: + return CollectionResult( + source_key=self.source_key, + success=False, + error="Missing INSIGHT_PLAUSIBLE_API_KEY or INSIGHT_PLAUSIBLE_SITES", + ) + + sites = [s.strip() for s in sites_str.split(",") if s.strip()] + headers = {"Authorization": f"Bearer {api_key}"} + + rows_written = 0 + rows_skipped = 0 + + try: + visitors_type = await self.get_metric_type(MetricKeys.VISITORS) + pageviews_type = await self.get_metric_type(MetricKeys.PAGEVIEWS) + duration_type = await self.get_metric_type(MetricKeys.AVG_DURATION) + bounce_type = await self.get_metric_type(MetricKeys.BOUNCE_RATE) + pages_type = await self.get_metric_type(MetricKeys.TOP_PAGES) + countries_type = await self.get_metric_type(MetricKeys.TOP_COUNTRIES) + + today = _today() + start_date = today - timedelta(days=lookback_days - 1) + date_range = f"{start_date.strftime('%Y-%m-%d')},{today.strftime('%Y-%m-%d')}" + + async with httpx.AsyncClient(headers=headers, timeout=30.0) as client: + for site in sites: + site_meta = PlausibleSiteMetadata(site=site).model_dump() + + # Fetch daily timeseries for the date range + resp = await client.get( + f"{PLAUSIBLE_API}/stats/timeseries", + params={ + "site_id": site, + "period": "custom", + "date": date_range, + "metrics": "visitors,pageviews,visit_duration,bounce_rate", + }, + ) + resp.raise_for_status() + timeseries = resp.json().get("results", []) + + # Collect days that had visitors (for per-day breakdowns) + active_days: list[str] = [] + + for day_data in timeseries: + day_str = day_data.get("date", "") + if not day_str: + continue + day_dt = datetime.strptime(day_str, "%Y-%m-%d") + + visitors = day_data.get("visitors") or 0 + if visitors > 0: + active_days.append(day_str) + + for mt, key in [ + (visitors_type, "visitors"), + (pageviews_type, "pageviews"), + (duration_type, "visit_duration"), + (bounce_type, "bounce_rate"), + ]: + value = day_data.get(key) or 0 + _, created = await self.upsert_metric( + metric_type=mt, + date=day_dt, + value=float(value), + period=Periods.DAILY, + metadata=site_meta, + ) + rows_written += 1 if created else 0 + rows_skipped += 0 if created else 1 + + # Per-day country + page breakdowns for active days + for day_str in active_days: + day_dt = datetime.strptime(day_str, "%Y-%m-%d") + + # Pages + pages_resp = await client.get( + f"{PLAUSIBLE_API}/stats/breakdown", + params={ + "site_id": site, + "period": "day", + "date": day_str, + "property": "event:page", + "metrics": "visitors,visit_duration", + "limit": 20, + }, + ) + pages_resp.raise_for_status() + page_results = pages_resp.json().get("results", []) + + pages_metadata = PlausibleTopPagesMetadata( + site=site, + pages=[ + PlausiblePageEntry( + url=p.get("page", ""), + visitors=p.get("visitors", 0), + time_s=p.get("visit_duration"), + ) + for p in page_results + ], + ) + _, created = await self.upsert_metric( + metric_type=pages_type, + date=day_dt, + value=float(len(page_results)), + period=Periods.DAILY, + metadata=pages_metadata.model_dump(), + ) + rows_written += 1 if created else 0 + rows_skipped += 0 if created else 1 + + # Countries + countries_resp = await client.get( + f"{PLAUSIBLE_API}/stats/breakdown", + params={ + "site_id": site, + "period": "day", + "date": day_str, + "property": "visit:country", + "metrics": "visitors", + "limit": 20, + }, + ) + countries_resp.raise_for_status() + country_results = countries_resp.json().get("results", []) + + countries_metadata = PlausibleTopCountriesMetadata( + site=site, + countries=[ + PlausibleCountryEntry( + country=c.get("country", ""), + visitors=c.get("visitors", 0), + ) + for c in country_results + ], + ) + _, created = await self.upsert_metric( + metric_type=countries_type, + date=day_dt, + value=float(len(country_results)), + period=Periods.DAILY, + metadata=countries_metadata.model_dump(), + ) + rows_written += 1 if created else 0 + rows_skipped += 0 if created else 1 + + await self.db.commit() + + logger.info( + "Plausible collected for %d sites (%d days, %d active): %d written, %d skipped", + len(sites), + lookback_days, + len(active_days), + rows_written, + rows_skipped, + ) + + return CollectionResult( + source_key=self.source_key, + success=True, + rows_written=rows_written, + rows_skipped=rows_skipped, + ) + + except httpx.HTTPStatusError as e: + error_msg = f"Plausible API error: {e.response.status_code}" + logger.error(error_msg) + return CollectionResult( + source_key=self.source_key, + success=False, + rows_written=rows_written, + rows_skipped=rows_skipped, + error=error_msg, + ) + except Exception as e: + error_msg = f"Plausible collection failed: {e}" + logger.error(error_msg) + return CollectionResult( + source_key=self.source_key, + success=False, + rows_written=rows_written, + rows_skipped=rows_skipped, + error=error_msg, + ) + + +def _today() -> datetime: + """Get today as a midnight datetime (no timezone).""" + return datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/pypi.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/pypi.py new file mode 100644 index 00000000..84d269ba --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/pypi.py @@ -0,0 +1,272 @@ +""" +PyPI downloads collector via ClickHouse public SQL endpoint. + +Collects download stats with full dimensional breakdowns: +country, installer, version, distribution type, and human vs bot. +No authentication required. +""" + +import httpx +from app.core.config import settings +from sqlmodel.ext.asyncio.session import AsyncSession + +from ..constants import MetricKeys, Periods, SourceKeys +from ..schemas import ( + PyPICountryBreakdown, + PyPIDownloadMetadata, + PyPIInstallerBreakdown, + PyPITypeBreakdown, + PyPIVersionDetail, +) +from .base import BaseCollector, CollectionResult, clickhouse_query, parse_date, today + +# Pre-aggregated table with all dimensions +FULL_TABLE = "pypi.pypi_downloads_per_day_by_version_by_installer_by_type_by_country" +DAILY_TABLE = "pypi.pypi_downloads_per_day" + +# Known bot/mirror/scanner installers — NOT real humans +# Real humans use: pip, uv. Everything else is automated. +BOT_INSTALLERS = { + "bandersnatch", # PyPI mirror tool + "z3c.pypimirror", # PyPI mirror tool + "Nexus", # Sonatype artifact proxy + "devpi", # PyPI cache/proxy + "pep381client", # PyPI mirror client + "requests", # Scripts/automation + "OS", # OS-level package managers + "Artifactory", # JFrog artifact proxy + "Browser", # Automated security scanners downloading every version + "", # Empty user-agent = automated +} + + +class PyPICollector(BaseCollector): + """Collects PyPI download stats from ClickHouse public dataset.""" + + def __init__(self, db: AsyncSession) -> None: + super().__init__(db) + + @property + def source_key(self) -> str: + return SourceKeys.PYPI + + async def collect(self, lookback_days: int = 14) -> CollectionResult: + """Collect daily totals + per-day dimensional breakdowns.""" + package = settings.INSIGHT_PYPI_PACKAGE + + if err := self._validate_config(INSIGHT_PYPI_PACKAGE=package): + return err + + rows_written = 0 + rows_skipped = 0 + + try: + daily_type = await self.get_metric_type(MetricKeys.DOWNLOADS_DAILY) + daily_human_type = await self.get_metric_type( + MetricKeys.DOWNLOADS_DAILY_HUMAN + ) + total_type = await self.get_metric_type(MetricKeys.DOWNLOADS_TOTAL) + country_type = await self.get_metric_type(MetricKeys.DOWNLOADS_BY_COUNTRY) + installer_type = await self.get_metric_type( + MetricKeys.DOWNLOADS_BY_INSTALLER + ) + version_type = await self.get_metric_type(MetricKeys.DOWNLOADS_BY_VERSION) + dist_type = await self.get_metric_type(MetricKeys.DOWNLOADS_BY_TYPE) + + async with httpx.AsyncClient(timeout=30.0) as client: + # All-time cumulative total + total_data = await clickhouse_query( + client, + f""" + SELECT sum(count) FROM {DAILY_TABLE} + WHERE project = '{package}' + """, + ) + total = int(total_data[0][0]) if total_data else 0 + + _, created = await self.upsert_metric( + metric_type=total_type, + date=today(), + value=float(total), + period=Periods.CUMULATIVE, + ) + rows_written += 1 if created else 0 + rows_skipped += 0 if created else 1 + + # Per-day data with installer for human/bot split + daily_installer_data = await clickhouse_query( + client, + f""" + SELECT date, installer, sum(count) as downloads + FROM {FULL_TABLE} + WHERE project = '{package}' AND date >= today() - {lookback_days} + GROUP BY date, installer + ORDER BY date + """, + ) + + # Aggregate per day: total and human-only + daily_totals: dict[str, int] = {} + daily_humans: dict[str, int] = {} + daily_installers: dict[str, dict[str, int]] = {} + + for row in daily_installer_data: + day = row[0] + installer = row[1] or "(unknown)" + count = int(row[2]) + + daily_totals[day] = daily_totals.get(day, 0) + count + + if row[1] not in BOT_INSTALLERS: + daily_humans[day] = daily_humans.get(day, 0) + count + + if day not in daily_installers: + daily_installers[day] = {} + daily_installers[day][installer] = count + + # Write daily total + human rows + for day, total_count in daily_totals.items(): + date = parse_date(day) + + _, created = await self.upsert_metric( + metric_type=daily_type, + date=date, + value=float(total_count), + period=Periods.DAILY, + ) + rows_written += 1 if created else 0 + rows_skipped += 0 if created else 1 + + human_count = daily_humans.get(day, 0) + _, created = await self.upsert_metric( + metric_type=daily_human_type, + date=date, + value=float(human_count), + period=Periods.DAILY, + ) + rows_written += 1 if created else 0 + rows_skipped += 0 if created else 1 + + # Write per-day installer breakdown + for day, installers in daily_installers.items(): + date = parse_date(day) + metadata = PyPIInstallerBreakdown(installers=installers) + _, created = await self.upsert_metric( + metric_type=installer_type, + date=date, + value=float(len(installers)), + period=Periods.DAILY, + metadata=metadata.model_dump(), + ) + rows_written += 1 if created else 0 + rows_skipped += 0 if created else 1 + + # Per-day country breakdown + country_data = await clickhouse_query( + client, + f""" + SELECT date, country_code, sum(count) as downloads + FROM {FULL_TABLE} + WHERE project = '{package}' AND date >= today() - {lookback_days} + GROUP BY date, country_code + ORDER BY date + """, + ) + + daily_countries: dict[str, dict[str, int]] = {} + for row in country_data: + day = row[0] + country = row[1] or "XX" + if day not in daily_countries: + daily_countries[day] = {} + daily_countries[day][country] = int(row[2]) + + for day, countries in daily_countries.items(): + date = parse_date(day) + metadata = PyPICountryBreakdown(countries=countries) + _, created = await self.upsert_metric( + metric_type=country_type, + date=date, + value=float(len(countries)), + period=Periods.DAILY, + metadata=metadata.model_dump(), + ) + rows_written += 1 if created else 0 + rows_skipped += 0 if created else 1 + + # Per-day version breakdown with human/bot split + version_data = await clickhouse_query( + client, + f""" + SELECT date, version, + sum(count) as total, + sumIf(count, installer NOT IN ('bandersnatch','z3c.pypimirror','Nexus','devpi','pep381client','requests','OS','Artifactory','Browser','')) as human + FROM {FULL_TABLE} + WHERE project = '{package}' AND date >= today() - {lookback_days} + GROUP BY date, version + ORDER BY date + """, + ) + + daily_versions: dict[str, dict[str, PyPIVersionDetail]] = {} + for row in version_data: + day = row[0] + ver = row[1] + if day not in daily_versions: + daily_versions[day] = {} + daily_versions[day][ver] = PyPIVersionDetail( + total=int(row[2]), human=int(row[3]) + ) + + for day, versions in daily_versions.items(): + date = parse_date(day) + metadata = PyPIDownloadMetadata(versions=versions) + _, created = await self.upsert_metric( + metric_type=version_type, + date=date, + value=float(len(versions)), + period=Periods.DAILY, + metadata=metadata.model_dump(), + ) + rows_written += 1 if created else 0 + rows_skipped += 0 if created else 1 + + # Per-day distribution type breakdown + type_data = await clickhouse_query( + client, + f""" + SELECT date, type, sum(count) as downloads + FROM {FULL_TABLE} + WHERE project = '{package}' AND date >= today() - {lookback_days} + GROUP BY date, type + ORDER BY date + """, + ) + + daily_types: dict[str, dict[str, int]] = {} + for row in type_data: + day = row[0] + if day not in daily_types: + daily_types[day] = {} + daily_types[day][row[1]] = int(row[2]) + + for day, types in daily_types.items(): + date = parse_date(day) + metadata = PyPITypeBreakdown(types=types) + _, created = await self.upsert_metric( + metric_type=dist_type, + date=date, + value=float(len(types)), + period=Periods.DAILY, + metadata=metadata.model_dump(), + ) + rows_written += 1 if created else 0 + rows_skipped += 0 if created else 1 + + await self.db.commit() + return self._success(rows_written, rows_skipped) + + except Exception as e: + return self._error( + f"PyPI collection failed: {e}", rows_written, rows_skipped + ) diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/pypi.py.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/pypi.py.jinja new file mode 100644 index 00000000..e170a6a1 --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/pypi.py.jinja @@ -0,0 +1,284 @@ +""" +PyPI downloads collector via ClickHouse public SQL endpoint. + +Collects download stats with full dimensional breakdowns: +country, installer, version, distribution type, and human vs bot. +No authentication required. +""" + +import logging +from datetime import datetime +from typing import Any + +import httpx +from sqlmodel.ext.asyncio.session import AsyncSession + +from app.core.config import settings + +from ..constants import MetricKeys, Periods, SourceKeys +from ..schemas import ( + PyPICountryBreakdown, + PyPIDownloadMetadata, + PyPIInstallerBreakdown, + PyPITypeBreakdown, +) +from .base import BaseCollector, CollectionResult + +logger = logging.getLogger(__name__) + +CLICKHOUSE_URL = "https://sql-clickhouse.clickhouse.com" +CLICKHOUSE_PARAMS = {"user": "play", "default_format": "JSONCompact"} + +# Pre-aggregated table with all dimensions +FULL_TABLE = "pypi.pypi_downloads_per_day_by_version_by_installer_by_type_by_country" +DAILY_TABLE = "pypi.pypi_downloads_per_day" + +# Known bot/mirror installers — these are NOT real humans installing your package +BOT_INSTALLERS = { + "bandersnatch", + "z3c.pypimirror", + "Nexus", + "devpi", + "pep381client", + "requests", + "OS", + "Artifactory", + "", # empty user-agent = automated +} + + +class PyPICollector(BaseCollector): + """Collects PyPI download stats from ClickHouse public dataset.""" + + def __init__(self, db: AsyncSession) -> None: + super().__init__(db) + + @property + def source_key(self) -> str: + return SourceKeys.PYPI + + async def collect(self) -> CollectionResult: + """Collect daily totals + per-day dimensional breakdowns for last 14 days.""" + package = settings.INSIGHT_PYPI_PACKAGE + + if not package: + return CollectionResult( + source_key=self.source_key, + success=False, + error="Missing INSIGHT_PYPI_PACKAGE", + ) + + rows_written = 0 + rows_skipped = 0 + + try: + daily_type = await self.get_metric_type(MetricKeys.DOWNLOADS_DAILY) + daily_human_type = await self.get_metric_type(MetricKeys.DOWNLOADS_DAILY_HUMAN) + total_type = await self.get_metric_type(MetricKeys.DOWNLOADS_TOTAL) + country_type = await self.get_metric_type(MetricKeys.DOWNLOADS_BY_COUNTRY) + installer_type = await self.get_metric_type(MetricKeys.DOWNLOADS_BY_INSTALLER) + version_type = await self.get_metric_type(MetricKeys.DOWNLOADS_BY_VERSION) + dist_type = await self.get_metric_type(MetricKeys.DOWNLOADS_BY_TYPE) + + async with httpx.AsyncClient(timeout=30.0) as client: + # All-time cumulative total + total_data = await self._query(client, f""" + SELECT sum(count) FROM {DAILY_TABLE} + WHERE project = '{package}' + """) + total = int(total_data[0][0]) if total_data else 0 + today = _today() + + _, created = await self.upsert_metric( + metric_type=total_type, date=today, + value=float(total), period=Periods.CUMULATIVE, + ) + rows_written += 1 if created else 0 + rows_skipped += 0 if created else 1 + + # Per-day data with installer for human/bot split + daily_installer_data = await self._query(client, f""" + SELECT date, installer, sum(count) as downloads + FROM {FULL_TABLE} + WHERE project = '{package}' AND date >= today() - 14 + GROUP BY date, installer + ORDER BY date + """) + + # Aggregate per day: total and human-only + daily_totals: dict[str, int] = {} + daily_humans: dict[str, int] = {} + daily_installers: dict[str, dict[str, int]] = {} + + for row in daily_installer_data: + day = row[0] + installer = row[1] or "(unknown)" + count = int(row[2]) + + daily_totals[day] = daily_totals.get(day, 0) + count + + if row[1] not in BOT_INSTALLERS: + daily_humans[day] = daily_humans.get(day, 0) + count + + if day not in daily_installers: + daily_installers[day] = {} + daily_installers[day][installer] = count + + # Write daily total + human rows + for day, total_count in daily_totals.items(): + date = _parse_date(day) + + _, created = await self.upsert_metric( + metric_type=daily_type, date=date, + value=float(total_count), period=Periods.DAILY, + ) + rows_written += 1 if created else 0 + rows_skipped += 0 if created else 1 + + human_count = daily_humans.get(day, 0) + _, created = await self.upsert_metric( + metric_type=daily_human_type, date=date, + value=float(human_count), period=Periods.DAILY, + ) + rows_written += 1 if created else 0 + rows_skipped += 0 if created else 1 + + # Write per-day installer breakdown + for day, installers in daily_installers.items(): + date = _parse_date(day) + metadata = PyPIInstallerBreakdown(installers=installers) + _, created = await self.upsert_metric( + metric_type=installer_type, date=date, + value=float(len(installers)), period=Periods.DAILY, + metadata=metadata.model_dump(), + ) + rows_written += 1 if created else 0 + rows_skipped += 0 if created else 1 + + # Per-day country breakdown + country_data = await self._query(client, f""" + SELECT date, country_code, sum(count) as downloads + FROM {FULL_TABLE} + WHERE project = '{package}' AND date >= today() - 14 + GROUP BY date, country_code + ORDER BY date + """) + + daily_countries: dict[str, dict[str, int]] = {} + for row in country_data: + day = row[0] + country = row[1] or "XX" + if day not in daily_countries: + daily_countries[day] = {} + daily_countries[day][country] = int(row[2]) + + for day, countries in daily_countries.items(): + date = _parse_date(day) + metadata = PyPICountryBreakdown(countries=countries) + _, created = await self.upsert_metric( + metric_type=country_type, date=date, + value=float(len(countries)), period=Periods.DAILY, + metadata=metadata.model_dump(), + ) + rows_written += 1 if created else 0 + rows_skipped += 0 if created else 1 + + # Per-day version breakdown + version_data = await self._query(client, f""" + SELECT date, version, sum(count) as downloads + FROM {FULL_TABLE} + WHERE project = '{package}' AND date >= today() - 14 + GROUP BY date, version + ORDER BY date + """) + + daily_versions: dict[str, dict[str, int]] = {} + for row in version_data: + day = row[0] + if day not in daily_versions: + daily_versions[day] = {} + daily_versions[day][row[1]] = int(row[2]) + + for day, versions in daily_versions.items(): + date = _parse_date(day) + metadata = PyPIDownloadMetadata(versions=versions) + _, created = await self.upsert_metric( + metric_type=version_type, date=date, + value=float(len(versions)), period=Periods.DAILY, + metadata=metadata.model_dump(), + ) + rows_written += 1 if created else 0 + rows_skipped += 0 if created else 1 + + # Per-day distribution type breakdown + type_data = await self._query(client, f""" + SELECT date, type, sum(count) as downloads + FROM {FULL_TABLE} + WHERE project = '{package}' AND date >= today() - 14 + GROUP BY date, type + ORDER BY date + """) + + daily_types: dict[str, dict[str, int]] = {} + for row in type_data: + day = row[0] + if day not in daily_types: + daily_types[day] = {} + daily_types[day][row[1]] = int(row[2]) + + for day, types in daily_types.items(): + date = _parse_date(day) + metadata = PyPITypeBreakdown(types=types) + _, created = await self.upsert_metric( + metric_type=dist_type, date=date, + value=float(len(types)), period=Periods.DAILY, + metadata=metadata.model_dump(), + ) + rows_written += 1 if created else 0 + rows_skipped += 0 if created else 1 + + await self.db.commit() + + logger.info( + "PyPI collected via ClickHouse: %d written, %d skipped, total=%d", + rows_written, rows_skipped, total, + ) + + return CollectionResult( + source_key=self.source_key, + success=True, + rows_written=rows_written, + rows_skipped=rows_skipped, + ) + + except Exception as e: + error_msg = f"PyPI collection failed: {e}" + logger.error(error_msg, exc_info=True) + return CollectionResult( + source_key=self.source_key, + success=False, + rows_written=rows_written, + rows_skipped=rows_skipped, + error=error_msg, + ) + + async def _query(self, client: httpx.AsyncClient, sql: str) -> list[list]: + """Execute a ClickHouse SQL query and return rows.""" + resp = await client.post( + CLICKHOUSE_URL, + params=CLICKHOUSE_PARAMS, + content=sql.strip(), + ) + resp.raise_for_status() + data = resp.json() + return data.get("data", []) + + +def _parse_date(date_str: str) -> datetime: + """Parse YYYY-MM-DD date string to datetime.""" + return datetime.strptime(date_str, "%Y-%m-%d") + + +def _today() -> datetime: + """Get today as a midnight datetime.""" + return datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/reddit.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/reddit.py new file mode 100644 index 00000000..38c5374c --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/reddit.py @@ -0,0 +1,124 @@ +""" +Reddit post tracker. + +Lightweight collector — primarily manual entry via CLI/API. +Stores post stats as snapshot rows with metadata. +""" + +from datetime import datetime + +import httpx +from sqlmodel.ext.asyncio.session import AsyncSession + +from ..constants import MetricKeys, Periods, SourceKeys +from ..schemas import RedditPostMetadata +from .base import BaseCollector, CollectionResult, today + + +class RedditCollector(BaseCollector): + """Tracks Reddit post stats.""" + + def __init__(self, db: AsyncSession) -> None: + super().__init__(db) + + @property + def source_key(self) -> str: + return SourceKeys.REDDIT + + async def collect(self) -> CollectionResult: + """Refresh stats for all tracked posts. Not scheduled — called on demand.""" + return CollectionResult( + source_key=self.source_key, + success=True, + error="Reddit collection is on-demand only. Use add_post() or refresh_post().", + ) + + async def add_post(self, url: str) -> CollectionResult: + """ + Add a Reddit post to track by fetching its current stats. + + Args: + url: Reddit post URL (e.g., https://reddit.com/r/python/comments/abc123/...) + """ + rows_written = 0 + + try: + post_type = await self.get_metric_type(MetricKeys.POST_STATS) + + # Fetch post data via Reddit JSON API (append .json to URL) + json_url = url.rstrip("/") + ".json" + + async with httpx.AsyncClient( + timeout=15.0, + headers={"User-Agent": "aegis-insights/1.0"}, + follow_redirects=True, + ) as client: + resp = await client.get(json_url) + resp.raise_for_status() + data = resp.json() + + # Reddit returns an array of listings + post_data = data[0]["data"]["children"][0]["data"] + + post_id = post_data.get("id", "") + metadata = RedditPostMetadata( + post_id=post_id, + subreddit=post_data.get("subreddit", ""), + title=post_data.get("title", ""), + comments=post_data.get("num_comments", 0), + views=post_data.get("view_count"), + upvote_ratio=post_data.get("upvote_ratio"), + url=url, + ) + + _today = today() + upvotes = post_data.get("ups", 0) + + await self.upsert_metric( + metric_type=post_type, + date=_today, + value=float(upvotes), + period=Periods.EVENT, + metadata=metadata.model_dump(), + ) + rows_written += 1 + + # Also create an event for the timeline + from sqlmodel import select + + from ..models import InsightEvent + + existing_events = await self.db.exec( + select(InsightEvent).where(InsightEvent.event_type == "reddit_post") + ) + already_tracked = any( + (ev.metadata_ or {}).get("post_id") == post_id + for ev in existing_events.all() + ) + if not already_tracked: + subreddit = post_data.get("subreddit", "") + title = post_data.get("title", "") + created = post_data.get("created_utc", 0) + post_date = ( + datetime.fromtimestamp(created).replace(tzinfo=None) + if created + else _today + ) + + event = InsightEvent( + date=post_date, + event_type="reddit_post", + description=f"r/{subreddit} — {title[:80]}", + metadata_={"post_id": post_id, "subreddit": subreddit, "url": url}, + ) + self.db.add(event) + + await self.db.commit() + return self._success(rows_written) + + except httpx.HTTPStatusError as e: + return self._error(f"Reddit API error: {e.response.status_code}") + except (KeyError, IndexError) as e: + return self._error(f"Failed to parse Reddit response: {e}") + except Exception as e: + return self._error(f"Reddit post tracking failed: {e}") diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/reddit.py.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/reddit.py.jinja new file mode 100644 index 00000000..125bb2ad --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/collectors/reddit.py.jinja @@ -0,0 +1,158 @@ +""" +Reddit post tracker. + +Lightweight collector — primarily manual entry via CLI/API. +Stores post stats as snapshot rows with metadata. +""" + +from datetime import datetime +import logging + +import httpx +from sqlmodel.ext.asyncio.session import AsyncSession + +from ..constants import MetricKeys, Periods, SourceKeys +from ..schemas import RedditPostMetadata +from .base import BaseCollector, CollectionResult + +logger = logging.getLogger(__name__) + + +class RedditCollector(BaseCollector): + """Tracks Reddit post stats.""" + + def __init__(self, db: AsyncSession) -> None: + super().__init__(db) + + @property + def source_key(self) -> str: + return SourceKeys.REDDIT + + async def collect(self) -> CollectionResult: + """Refresh stats for all tracked posts. Not scheduled — called on demand.""" + return CollectionResult( + source_key=self.source_key, + success=True, + error="Reddit collection is on-demand only. Use add_post() or refresh_post().", + ) + + async def add_post(self, url: str) -> CollectionResult: + """ + Add a Reddit post to track by fetching its current stats. + + Args: + url: Reddit post URL (e.g., https://reddit.com/r/python/comments/abc123/...) + """ + rows_written = 0 + + try: + post_type = await self.get_metric_type(MetricKeys.POST_STATS) + + # Fetch post data via Reddit JSON API (append .json to URL) + json_url = url.rstrip("/") + ".json" + + async with httpx.AsyncClient( + timeout=15.0, + headers={"User-Agent": "aegis-insights/1.0"}, + follow_redirects=True, + ) as client: + resp = await client.get(json_url) + resp.raise_for_status() + data = resp.json() + + # Reddit returns an array of listings + post_data = data[0]["data"]["children"][0]["data"] + + post_id = post_data.get("id", "") + metadata = RedditPostMetadata( + post_id=post_id, + subreddit=post_data.get("subreddit", ""), + title=post_data.get("title", ""), + comments=post_data.get("num_comments", 0), + views=post_data.get("view_count"), + upvote_ratio=post_data.get("upvote_ratio"), + url=url, + ) + + today = _today() + upvotes = post_data.get("ups", 0) + + await self.upsert_metric( + metric_type=post_type, + date=today, + value=float(upvotes), + period=Periods.EVENT, + metadata=metadata.model_dump(), + ) + rows_written += 1 + + # Also create an event for the timeline + from ..models import InsightEvent + from sqlmodel import select + + # Only create event if one doesn't exist for this post + existing_events = await self.db.exec( + select(InsightEvent).where(InsightEvent.event_type == "reddit_post") + ) + already_tracked = any( + (ev.metadata_ or {}).get("post_id") == post_id + for ev in existing_events.all() + ) + if not already_tracked: + subreddit = post_data.get("subreddit", "") + title = post_data.get("title", "") + from datetime import datetime, UTC + created = post_data.get("created_utc", 0) + post_date = datetime.fromtimestamp(created).replace(tzinfo=None) if created else today + + event = InsightEvent( + date=post_date, + event_type="reddit_post", + description=f"r/{subreddit} — {title[:80]}", + metadata_={"post_id": post_id, "subreddit": subreddit, "url": url}, + ) + self.db.add(event) + + await self.db.commit() + + logger.info( + "Reddit post added: %s (%d upvotes)", + post_id, + upvotes, + ) + + return CollectionResult( + source_key=self.source_key, + success=True, + rows_written=rows_written, + ) + + except httpx.HTTPStatusError as e: + error_msg = f"Reddit API error: {e.response.status_code}" + logger.error(error_msg) + return CollectionResult( + source_key=self.source_key, + success=False, + error=error_msg, + ) + except (KeyError, IndexError) as e: + error_msg = f"Failed to parse Reddit response: {e}" + logger.error(error_msg) + return CollectionResult( + source_key=self.source_key, + success=False, + error=error_msg, + ) + except Exception as e: + error_msg = f"Reddit post tracking failed: {e}" + logger.error(error_msg) + return CollectionResult( + source_key=self.source_key, + success=False, + error=error_msg, + ) + + +def _today() -> datetime: + """Get today as a midnight datetime (no timezone).""" + return datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/constants.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/constants.py new file mode 100644 index 00000000..07584ba6 --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/constants.py @@ -0,0 +1,82 @@ +""" +Constants for the insights service. + +Single source of truth for source keys, metric type keys, and period values. +""" + + +class SourceKeys: + """Insight source identifiers. Must match seed data in insight_source table.""" + + GITHUB_TRAFFIC = "github_traffic" + GITHUB_STARS = "github_stars" + GITHUB_EVENTS = "github_events" + PYPI = "pypi" + PLAUSIBLE = "plausible" + REDDIT = "reddit" + + ALL = [GITHUB_TRAFFIC, GITHUB_STARS, GITHUB_EVENTS, PYPI, PLAUSIBLE, REDDIT] + + +class MetricKeys: + """Metric type identifiers. Must match seed data in insight_metric_type table.""" + + # github_traffic + CLONES = "clones" + UNIQUE_CLONERS = "unique_cloners" + VIEWS = "views" + UNIQUE_VISITORS = "unique_visitors" + REFERRERS = "referrers" + POPULAR_PATHS = "popular_paths" + + # github_stars + NEW_STAR = "new_star" + + # pypi + DOWNLOADS_TOTAL = "downloads_total" + DOWNLOADS_DAILY = "downloads_daily" + DOWNLOADS_DAILY_HUMAN = "downloads_daily_human" + DOWNLOADS_BY_COUNTRY = "downloads_by_country" + DOWNLOADS_BY_INSTALLER = "downloads_by_installer" + DOWNLOADS_BY_VERSION = "downloads_by_version" + DOWNLOADS_BY_TYPE = "downloads_by_type" + + # github_events + FORKS = "forks" + RELEASES = "releases" + STAR_EVENTS = "star_events" + ACTIVITY_SUMMARY = "activity_summary" + + # plausible + VISITORS = "visitors" + PAGEVIEWS = "pageviews" + AVG_DURATION = "avg_duration" + BOUNCE_RATE = "bounce_rate" + TOP_PAGES = "top_pages" + TOP_COUNTRIES = "top_countries" + + # reddit + POST_STATS = "post_stats" + + +class Periods: + """Time period classifications for metric rows.""" + + DAILY = "daily" + CUMULATIVE = "cumulative" + SNAPSHOT = "snapshot" + EVENT = "event" + + +class Units: + """Metric type units. Used in seed data and display formatting.""" + + COUNT = "count" + SECONDS = "seconds" + PERCENTAGE = "percentage" + RATIO = "ratio" + JSON = "json" + + +# Component name for health check registration +INSIGHT_COMPONENT_NAME = "insights" diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/health.py.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/health.py.jinja new file mode 100644 index 00000000..a62a69d1 --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/health.py.jinja @@ -0,0 +1,89 @@ +""" +Health check for the insights service. +""" + +from datetime import datetime, timedelta +import logging + +from sqlalchemy import func +from sqlmodel import select + +from app.core.db import get_async_session +from app.services.system.models import ComponentStatus, ComponentStatusType + +from .constants import INSIGHT_COMPONENT_NAME +from .models import InsightMetric, InsightSource + +logger = logging.getLogger(__name__) + + +async def check_insight_health() -> ComponentStatus: + """Check insight service health — last collection times, stale sources.""" + try: + async with get_async_session() as session: + # Get all enabled sources + stmt = select(InsightSource).where(InsightSource.enabled == True) # noqa: E712 + result = await session.exec(stmt) + sources = result.all() + + # Total metric rows + count_stmt = select(func.count()).select_from(InsightMetric) + count_result = await session.exec(count_stmt) + total_metrics = count_result.one() + + # Check staleness per source + now = datetime.now() + stale_sources: list[str] = [] + source_details: dict[str, dict] = {} + + for source in sources: + last_collected = source.last_collected_at + + is_stale = False + if ( + source.collection_interval_hours is not None + and last_collected is not None + ): + max_age = timedelta(hours=source.collection_interval_hours * 3) + if now - last_collected > max_age: + is_stale = True + stale_sources.append(source.key) + elif source.collection_interval_hours is not None and last_collected is None: + stale_sources.append(source.key) + + source_details[source.key] = { + "enabled": source.enabled, + "last_collected": last_collected.isoformat() if last_collected else None, + "stale": is_stale, + } + + if total_metrics == 0: + status = ComponentStatusType.WARNING + message = "No data collected yet" + elif stale_sources: + status = ComponentStatusType.WARNING + message = f"Stale sources: {', '.join(stale_sources)}" + else: + status = ComponentStatusType.HEALTHY + message = f"{total_metrics} metrics across {len(sources)} sources" + + return ComponentStatus( + name=INSIGHT_COMPONENT_NAME, + status=status, + message=message, + metadata={ + "total_metrics": total_metrics, + "enabled_sources": len(sources), + "stale_sources": stale_sources, + "sources": source_details, + }, + ) + + except Exception as e: + logger.error("Insight health check failed: %s", e) + return ComponentStatus( + name=INSIGHT_COMPONENT_NAME, + status=ComponentStatusType.UNHEALTHY, + message=f"Health check error: {e}", + metadata={"error": str(e)}, + ) diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/insight_service.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/insight_service.py new file mode 100644 index 00000000..7d16fbb0 --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/insight_service.py @@ -0,0 +1,337 @@ +""" +Insight service — query layer for all insight data. + +Provides methods to read metrics, manage records, and generate summaries. +""" + +import logging +from datetime import date, datetime, timedelta +from typing import Any + +from sqlalchemy import func +from sqlmodel import select +from sqlmodel.ext.asyncio.session import AsyncSession + +from .constants import MetricKeys, Periods +from .models import ( + InsightEvent, + InsightMetric, + InsightMetricType, + InsightRecord, + InsightSource, +) + +logger = logging.getLogger(__name__) + + +class InsightService: + """Query layer for all insight data.""" + + def __init__(self, db: AsyncSession) -> None: + self.db = db + + # ------------------------------------------------------------------- + # Metrics + # ------------------------------------------------------------------- + + async def get_metrics( + self, + source_key: str | None = None, + metric_keys: list[str] | None = None, + from_date: date | None = None, + to_date: date | None = None, + ) -> list[InsightMetric]: + """Query metrics with optional filters.""" + stmt = select(InsightMetric).join(InsightMetricType) + + if source_key is not None: + stmt = stmt.join(InsightSource).where(InsightSource.key == source_key) + + if metric_keys is not None: + stmt = stmt.where(InsightMetricType.key.in_(metric_keys)) # type: ignore[union-attr] + + if from_date is not None: + stmt = stmt.where( + InsightMetric.date >= datetime.combine(from_date, datetime.min.time()) + ) + + if to_date is not None: + stmt = stmt.where( + InsightMetric.date <= datetime.combine(to_date, datetime.max.time()) + ) + + stmt = stmt.order_by(InsightMetric.date.desc()) # type: ignore[union-attr] + + result = await self.db.exec(stmt) + return list(result.all()) + + async def get_rolling_14d(self, metric_type_id: int) -> float: + """Compute rolling 14-day sum for a metric type.""" + cutoff = datetime.now().replace( + hour=0, minute=0, second=0, microsecond=0 + ) - timedelta(days=14) + + stmt = select(func.coalesce(func.sum(InsightMetric.value), 0.0)).where( + InsightMetric.metric_type_id == metric_type_id, + InsightMetric.date >= cutoff, + InsightMetric.period == Periods.DAILY, + ) + + result = await self.db.exec(stmt) + return float(result.one()) + + # ------------------------------------------------------------------- + # Records + # ------------------------------------------------------------------- + + async def get_records(self) -> list[InsightRecord]: + """Get all current records with metric type info.""" + stmt = select(InsightRecord).order_by(InsightRecord.date_achieved.desc()) # type: ignore[union-attr] + result = await self.db.exec(stmt) + return list(result.all()) + + async def check_and_update_records( + self, + metric_type_id: int, + value: float, + achieved_date: datetime, + context: str | None = None, + ) -> bool: + """ + Compare value against stored record. Update if exceeded. + + Returns True if a new record was set. + """ + stmt = select(InsightRecord).where( + InsightRecord.metric_type_id == metric_type_id + ) + result = await self.db.exec(stmt) + record = result.first() + + if record is None: + # First record for this metric + self.db.add( + InsightRecord( + metric_type_id=metric_type_id, + value=value, + date_achieved=achieved_date, + context=context, + ) + ) + return True + + if value > record.value: + # New record — shift current to previous + record.previous_value = record.value + record.previous_date = record.date_achieved + record.value = value + record.date_achieved = achieved_date + record.context = context + record.updated_at = datetime.now().replace(tzinfo=None) + self.db.add(record) + return True + + return False + + # ------------------------------------------------------------------- + # Stars + # ------------------------------------------------------------------- + + async def get_stars(self, limit: int = 50, offset: int = 0) -> list[InsightMetric]: + """Get star event rows with profiles, newest first.""" + star_type = await self._get_metric_type_by_key(MetricKeys.NEW_STAR) + if star_type is None: + return [] + + stmt = ( + select(InsightMetric) + .where( + InsightMetric.metric_type_id == star_type.id, + InsightMetric.period == Periods.EVENT, + ) + .order_by(InsightMetric.value.desc()) # type: ignore[union-attr] + .offset(offset) + .limit(limit) + ) + + result = await self.db.exec(stmt) + return list(result.all()) + + # ------------------------------------------------------------------- + # Sources & Types + # ------------------------------------------------------------------- + + async def get_sources(self) -> list[InsightSource]: + """Get all sources.""" + result = await self.db.exec(select(InsightSource).order_by(InsightSource.id)) + return list(result.all()) + + async def get_metric_types( + self, source_id: int | None = None + ) -> list[InsightMetricType]: + """Get metric types, optionally filtered by source.""" + stmt = select(InsightMetricType) + if source_id is not None: + stmt = stmt.where(InsightMetricType.source_id == source_id) + stmt = stmt.order_by(InsightMetricType.id) + + result = await self.db.exec(stmt) + return list(result.all()) + + # ------------------------------------------------------------------- + # Referrers + # ------------------------------------------------------------------- + + async def get_referrers( + self, + from_date: date | None = None, + to_date: date | None = None, + ) -> list[InsightMetric]: + """Get referrer rows with JSONB breakdown.""" + referrer_type = await self._get_metric_type_by_key(MetricKeys.REFERRERS) + if referrer_type is None: + return [] + + stmt = select(InsightMetric).where( + InsightMetric.metric_type_id == referrer_type.id + ) + + if from_date is not None: + stmt = stmt.where( + InsightMetric.date >= datetime.combine(from_date, datetime.min.time()) + ) + if to_date is not None: + stmt = stmt.where( + InsightMetric.date <= datetime.combine(to_date, datetime.max.time()) + ) + + stmt = stmt.order_by(InsightMetric.date.desc()) # type: ignore[union-attr] + + result = await self.db.exec(stmt) + return list(result.all()) + + # ------------------------------------------------------------------- + # Reddit + # ------------------------------------------------------------------- + + async def get_reddit_posts(self) -> list[InsightMetric]: + """Get latest snapshot per tracked Reddit post.""" + post_type = await self._get_metric_type_by_key(MetricKeys.POST_STATS) + if post_type is None: + return [] + + stmt = ( + select(InsightMetric) + .where(InsightMetric.metric_type_id == post_type.id) + .order_by(InsightMetric.date.desc()) # type: ignore[union-attr] + ) + + result = await self.db.exec(stmt) + return list(result.all()) + + # ------------------------------------------------------------------- + # Events + # ------------------------------------------------------------------- + + async def get_events( + self, + from_date: date | None = None, + to_date: date | None = None, + ) -> list[InsightEvent]: + """Get events, optionally filtered by date range.""" + stmt = select(InsightEvent) + + if from_date is not None: + stmt = stmt.where( + InsightEvent.date >= datetime.combine(from_date, datetime.min.time()) + ) + if to_date is not None: + stmt = stmt.where( + InsightEvent.date <= datetime.combine(to_date, datetime.max.time()) + ) + + stmt = stmt.order_by(InsightEvent.date.desc()) # type: ignore[union-attr] + + result = await self.db.exec(stmt) + return list(result.all()) + + async def add_event( + self, + event_type: str, + description: str, + event_date: date | None = None, + metadata: dict[str, Any] | None = None, + ) -> InsightEvent: + """Create a new contextual event.""" + event = InsightEvent( + date=datetime.combine(event_date or date.today(), datetime.min.time()), + event_type=event_type, + description=description, + metadata_=metadata or {}, + ) + self.db.add(event) + await self.db.flush() + return event + + # ------------------------------------------------------------------- + # Summary + # ------------------------------------------------------------------- + + async def get_status_summary(self) -> dict[str, Any]: + """Build a status summary across all sources.""" + sources = await self.get_sources() + records = await self.get_records() + + # Get latest metric date per source + source_status: list[dict[str, Any]] = [] + for source in sources: + metric_types = await self.get_metric_types(source.id) + type_ids = [mt.id for mt in metric_types if mt.id is not None] + + last_collected = None + if type_ids: + stmt = select(func.max(InsightMetric.created_at)).where( + InsightMetric.metric_type_id.in_(type_ids) # type: ignore[union-attr] + ) + result = await self.db.exec(stmt) + last_collected = result.first() + + source_status.append( + { + "key": source.key, + "display_name": source.display_name, + "enabled": source.enabled, + "last_collected": last_collected, + "metric_count": len(metric_types), + } + ) + + # Total metric rows + stmt = select(func.count()).select_from(InsightMetric) + result = await self.db.exec(stmt) + total_metrics = result.one() + + return { + "sources": source_status, + "records": [ + { + "metric_type_id": r.metric_type_id, + "value": r.value, + "date_achieved": r.date_achieved, + "previous_value": r.previous_value, + } + for r in records + ], + "total_metrics": total_metrics, + } + + # ------------------------------------------------------------------- + # Internal helpers + # ------------------------------------------------------------------- + + async def _get_metric_type_by_key(self, key: str) -> InsightMetricType | None: + """Look up a metric type by key.""" + result = await self.db.exec( + select(InsightMetricType).where(InsightMetricType.key == key) + ) + return result.first() diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/jobs.py.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/jobs.py.jinja new file mode 100644 index 00000000..401b92d5 --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/jobs.py.jinja @@ -0,0 +1,120 @@ +""" +Scheduler jobs for insight data collection. + +Each enabled source gets a scheduled job that runs its collector +at the configured interval. +""" + +import logging + +from app.core.db import get_async_session + +from .collector_service import CollectorService +from .constants import SourceKeys + +logger = logging.getLogger(__name__) + +{% if insights_github %} + + +async def collect_github_traffic_job() -> None: + """Scheduled job: collect GitHub traffic data.""" + logger.info("Running scheduled GitHub traffic collection") + async with get_async_session() as session: + service = CollectorService(session) + result = await service.collect_source(SourceKeys.GITHUB_TRAFFIC) + if result.success: + logger.info( + "GitHub traffic: %d written, %d skipped", + result.rows_written, + result.rows_skipped, + ) + else: + logger.error("GitHub traffic collection failed: %s", result.error) + + +async def collect_github_stars_job() -> None: + """Scheduled job: collect GitHub stargazer data.""" + logger.info("Running scheduled GitHub stars collection") + async with get_async_session() as session: + service = CollectorService(session) + result = await service.collect_source(SourceKeys.GITHUB_STARS) + if result.success: + logger.info( + "GitHub stars: %d written, %d skipped", + result.rows_written, + result.rows_skipped, + ) + else: + logger.error("GitHub stars collection failed: %s", result.error) + + +async def collect_github_events_job() -> None: + """Scheduled job: collect GitHub events from ClickHouse.""" + logger.info("Running scheduled GitHub events collection") + async with get_async_session() as session: + service = CollectorService(session) + result = await service.collect_source(SourceKeys.GITHUB_EVENTS) + if result.success: + logger.info( + "GitHub events: %d written, %d skipped", + result.rows_written, + result.rows_skipped, + ) + else: + logger.error("GitHub events collection failed: %s", result.error) +{% endif %} +{% if insights_pypi %} + + +async def collect_pypi_job() -> None: + """Scheduled job: collect PyPI download stats.""" + logger.info("Running scheduled PyPI collection") + async with get_async_session() as session: + service = CollectorService(session) + result = await service.collect_source(SourceKeys.PYPI) + if result.success: + logger.info( + "PyPI: %d written, %d skipped", + result.rows_written, + result.rows_skipped, + ) + else: + logger.error("PyPI collection failed: %s", result.error) +{% endif %} +{% if insights_plausible %} + + +async def collect_plausible_job() -> None: + """Scheduled job: collect Plausible analytics.""" + logger.info("Running scheduled Plausible collection") + async with get_async_session() as session: + service = CollectorService(session) + result = await service.collect_source(SourceKeys.PLAUSIBLE) + if result.success: + logger.info( + "Plausible: %d written, %d skipped", + result.rows_written, + result.rows_skipped, + ) + else: + logger.error("Plausible collection failed: %s", result.error) +{% endif %} + + +async def collect_all_job() -> None: + """Scheduled job: run all enabled collectors.""" + logger.info("Running scheduled collection for all enabled sources") + async with get_async_session() as session: + service = CollectorService(session) + results = await service.collect_all() + for source_key, result in results.items(): + if result.success: + logger.info( + "%s: %d written, %d skipped", + source_key, + result.rows_written, + result.rows_skipped, + ) + else: + logger.error("%s failed: %s", source_key, result.error) diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/models.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/models.py new file mode 100644 index 00000000..19aa53c4 --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/models.py @@ -0,0 +1,167 @@ +""" +Insight service database models. + +Five tables: +- InsightSource: Lookup table for data sources (GitHub, PyPI, Plausible, Reddit) +- InsightMetricType: Lookup table for metric types, FK to source +- InsightMetric: Time-series data, FK to metric type, JSONB metadata +- InsightRecord: All-time records/milestones per metric type +- InsightEvent: Contextual markers (releases, posts, external events) +""" + +from datetime import UTC, datetime +from typing import Any + +from sqlalchemy import JSON, Column, Index, String, UniqueConstraint +from sqlmodel import Field, Relationship, SQLModel + +# --------------------------------------------------------------------------- +# Lookup: InsightSource +# --------------------------------------------------------------------------- + + +class InsightSource(SQLModel, table=True): + """Data source for insight collection (e.g., GitHub, PyPI, Plausible).""" + + __tablename__ = "insight_source" + + id: int | None = Field(default=None, primary_key=True) + key: str = Field(unique=True, index=True, max_length=64) + display_name: str = Field(max_length=128) + collection_interval_hours: int | None = Field(default=None) + requires_auth: bool = Field(default=False) + enabled: bool = Field(default=True) + last_collected_at: datetime | None = Field(default=None) + metadata_: dict[str, Any] = Field( + default_factory=dict, sa_column=Column("metadata", JSON) + ) + created_at: datetime = Field( + default_factory=lambda: datetime.now(UTC).replace(tzinfo=None) + ) + + # Relationships + metric_types: list["InsightMetricType"] = Relationship(back_populates="source") + + +# --------------------------------------------------------------------------- +# Lookup: InsightMetricType +# --------------------------------------------------------------------------- + + +class InsightMetricType(SQLModel, table=True): + """Type of metric collected by a source (e.g., clones, unique_cloners).""" + + __tablename__ = "insight_metric_type" + __table_args__ = ( + UniqueConstraint("source_id", "key", name="uq_metric_type_source_key"), + ) + + id: int | None = Field(default=None, primary_key=True) + source_id: int = Field(foreign_key="insight_source.id", index=True) + key: str = Field(index=True, max_length=64) + display_name: str = Field(max_length=128) + unit: str = Field(max_length=32) # count, seconds, percentage, ratio, json + metadata_: dict[str, Any] = Field( + default_factory=dict, sa_column=Column("metadata", JSON) + ) + created_at: datetime = Field( + default_factory=lambda: datetime.now(UTC).replace(tzinfo=None) + ) + + # Relationships + source: InsightSource = Relationship(back_populates="metric_types") + metrics: list["InsightMetric"] = Relationship(back_populates="metric_type") + records: list["InsightRecord"] = Relationship(back_populates="metric_type") + + +# --------------------------------------------------------------------------- +# Time-series: InsightMetric +# --------------------------------------------------------------------------- + + +class InsightMetric(SQLModel, table=True): + """Single metric data point. The core time-series table.""" + + __tablename__ = "insight_metric" + __table_args__ = ( + # THE primary query pattern: "get clones for last 14 days" + Index("ix_insight_metric_type_date", "metric_type_id", "date"), + # Secondary: "all metrics for this date" + Index("ix_insight_metric_date", "date"), + ) + + id: int | None = Field(default=None, primary_key=True) + date: datetime = Field(index=False) # Covered by compound indexes above + metric_type_id: int = Field(foreign_key="insight_metric_type.id", index=True) + value: float = Field(default=0.0) + period: str = Field(max_length=32) # daily, cumulative, snapshot, event + metadata_: dict[str, Any] = Field( + default_factory=dict, sa_column=Column("metadata", JSON) + ) + created_at: datetime = Field( + default_factory=lambda: datetime.now(UTC).replace(tzinfo=None) + ) + + # Relationships + metric_type: InsightMetricType = Relationship(back_populates="metrics") + + +# --------------------------------------------------------------------------- +# Records: InsightRecord +# --------------------------------------------------------------------------- + + +class InsightRecord(SQLModel, table=True): + """All-time record for a metric type. Updated in place when broken.""" + + __tablename__ = "insight_record" + + id: int | None = Field(default=None, primary_key=True) + metric_type_id: int = Field( + foreign_key="insight_metric_type.id", unique=True, index=True + ) + value: float = Field(default=0.0) + date_achieved: datetime + previous_value: float | None = Field(default=None) + previous_date: datetime | None = Field(default=None) + context: str | None = Field(default=None, max_length=512) + metadata_: dict[str, Any] = Field( + default_factory=dict, sa_column=Column("metadata", JSON) + ) + updated_at: datetime = Field( + default_factory=lambda: datetime.now(UTC).replace(tzinfo=None) + ) + created_at: datetime = Field( + default_factory=lambda: datetime.now(UTC).replace(tzinfo=None) + ) + + # Relationships + metric_type: InsightMetricType = Relationship(back_populates="records") + + +# --------------------------------------------------------------------------- +# Events: InsightEvent +# --------------------------------------------------------------------------- + + +class InsightEvent(SQLModel, table=True): + """Contextual marker explaining why a metric changed.""" + + __tablename__ = "insight_event" + __table_args__ = ( + Index("ix_insight_event_date", "date"), + Index("ix_insight_event_type_date", "event_type", "date"), + ) + + id: int | None = Field(default=None, primary_key=True) + date: datetime + event_type: str = Field( + max_length=64, sa_column=Column("event_type", String(64), nullable=False) + ) # reddit_post, release, localization, external + description: str = Field(max_length=1024) + metadata_: dict[str, Any] = Field( + default_factory=dict, sa_column=Column("metadata", JSON) + ) + created_at: datetime = Field( + default_factory=lambda: datetime.now(UTC).replace(tzinfo=None) + ) diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/query_service.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/query_service.py new file mode 100644 index 00000000..2607b6f5 --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/query_service.py @@ -0,0 +1,239 @@ +""" +Sync query layer for insight metrics. + +Centralizes all DB queries used by the Overseer dashboard tabs. +This will become the API endpoint layer when we migrate from direct DB access. +""" + +from datetime import datetime, timedelta +from typing import Any + +from app.core.db import SessionLocal +from sqlmodel import Session, select + +from .constants import Periods +from .models import InsightEvent, InsightMetric, InsightMetricType, InsightSource + + +class InsightQueryService: + """Sync query service for insight metrics. One session per service lifetime.""" + + def __init__(self, session: Session | None = None) -> None: + self.session = session or SessionLocal() + self._owns_session = session is None + self._type_cache: dict[str, InsightMetricType | None] = {} + + def close(self) -> None: + if self._owns_session: + self.session.close() + + def __enter__(self) -> "InsightQueryService": + return self + + def __exit__(self, *args: Any) -> None: + self.close() + + # -- metric type lookup (cached) ------------------------------------------ + + def _get_type(self, key: str) -> InsightMetricType | None: + if key not in self._type_cache: + self._type_cache[key] = self.session.exec( + select(InsightMetricType).where(InsightMetricType.key == key) + ).first() + return self._type_cache[key] + + # -- core queries --------------------------------------------------------- + + def get_daily(self, key: str, cutoff: datetime) -> list[InsightMetric]: + """Fetch daily metrics for a key from cutoff date onward.""" + mt = self._get_type(key) + if not mt: + return [] + return list( + self.session.exec( + select(InsightMetric) + .where( + InsightMetric.metric_type_id == mt.id, + InsightMetric.period == Periods.DAILY, + InsightMetric.date >= cutoff, + ) + .order_by(InsightMetric.date.asc()) + ).all() + ) + + def get_daily_range( + self, key: str, start: datetime, end: datetime + ) -> list[InsightMetric]: + """Fetch daily metrics between start (inclusive) and end (exclusive).""" + mt = self._get_type(key) + if not mt: + return [] + return list( + self.session.exec( + select(InsightMetric).where( + InsightMetric.metric_type_id == mt.id, + InsightMetric.period == Periods.DAILY, + InsightMetric.date >= start, + InsightMetric.date < end, + ) + ).all() + ) + + def get_latest(self, key: str) -> InsightMetric | None: + """Fetch the most recent metric row for a key.""" + mt = self._get_type(key) + if not mt: + return None + return self.session.exec( + select(InsightMetric) + .where(InsightMetric.metric_type_id == mt.id) + .order_by(InsightMetric.date.desc()) + .limit(1) + ).first() + + def get_events(self, key: str, cutoff: datetime) -> list[InsightMetric]: + """Fetch event-period metrics for a key from cutoff onward.""" + mt = self._get_type(key) + if not mt: + return [] + return list( + self.session.exec( + select(InsightMetric) + .where( + InsightMetric.metric_type_id == mt.id, + InsightMetric.period == Periods.EVENT, + InsightMetric.date >= cutoff, + ) + .order_by(InsightMetric.date.desc()) + ).all() + ) + + def get_all_events(self, key: str) -> list[InsightMetric]: + """Fetch all event-period metrics for a key (no date filter).""" + mt = self._get_type(key) + if not mt: + return [] + return list( + self.session.exec( + select(InsightMetric) + .where( + InsightMetric.metric_type_id == mt.id, + InsightMetric.period == Periods.EVENT, + ) + .order_by(InsightMetric.date.asc()) + ).all() + ) + + def get_events_in_range( + self, key: str, start: datetime, end: datetime + ) -> list[InsightMetric]: + """Fetch event-period metrics between start (inclusive) and end (exclusive).""" + mt = self._get_type(key) + if not mt: + return [] + return list( + self.session.exec( + select(InsightMetric).where( + InsightMetric.metric_type_id == mt.id, + InsightMetric.period == Periods.EVENT, + InsightMetric.date >= start, + InsightMetric.date < end, + ) + ).all() + ) + + def get_all_metrics(self, key: str) -> list[InsightMetric]: + """Fetch all metrics for a key (any period, ordered by date desc).""" + mt = self._get_type(key) + if not mt: + return [] + return list( + self.session.exec( + select(InsightMetric) + .where(InsightMetric.metric_type_id == mt.id) + .order_by(InsightMetric.date.desc()) + ).all() + ) + + def sum_range(self, key: str, start: datetime, end: datetime) -> int: + """Sum daily metric values between start and end.""" + rows = self.get_daily_range(key, start, end) + return sum(int(r.value) for r in rows) + + def sum_daily(self, key: str, cutoff: datetime) -> int: + """Sum all daily metric values from cutoff onward.""" + rows = self.get_daily(key, cutoff) + return sum(int(r.value) for r in rows) + + # -- insight events ------------------------------------------------------- + + def get_insight_events( + self, + cutoff: datetime | None = None, + type_filter: set[str] | None = None, + ) -> list[InsightEvent]: + """Fetch InsightEvent rows with optional date and type filters.""" + q = select(InsightEvent).order_by(InsightEvent.date.asc()) + events = list(self.session.exec(q).all()) + + if cutoff: + cutoff_str = str(cutoff.date()) + events = [ev for ev in events if str(ev.date)[:10] >= cutoff_str] + + if type_filter: + events = [ev for ev in events if ev.event_type in type_filter] + + return events + + def get_recent_insight_events(self, limit: int = 15) -> list[InsightEvent]: + """Fetch most recent InsightEvent rows.""" + return list( + self.session.exec( + select(InsightEvent).order_by(InsightEvent.date.desc()).limit(limit) + ).all() + ) + + def get_milestone_events(self) -> list[InsightEvent]: + """Fetch all milestone and feature events.""" + return list( + self.session.exec( + select(InsightEvent).where( + InsightEvent.event_type.in_( + ["milestone_github", "milestone_pypi", "feature"] + ) + ) + ).all() + ) + + def get_release_metrics(self) -> list[InsightMetric]: + """Fetch release metric rows.""" + mt = self._get_type("releases") + if not mt: + return [] + q = select(InsightMetric).where(InsightMetric.metric_type_id == mt.id) + return list(self.session.exec(q).all()) + + # -- sources -------------------------------------------------------------- + + def get_sources(self) -> list[InsightSource]: + """Fetch all insight sources.""" + return list(self.session.exec(select(InsightSource)).all()) + + # -- convenience: date helpers -------------------------------------------- + + @staticmethod + def compute_cutoffs(days: int) -> tuple[datetime, datetime]: + """Compute current and previous period cutoff dates. + + Returns (cutoff, prev_cutoff) where: + - cutoff = now - days + - prev_cutoff = cutoff - days + """ + now = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) + if days >= 9999: + cutoff = datetime(2000, 1, 1) + prev_cutoff = datetime(2000, 1, 1) + else: + cutoff = now - timedelta(days=days) + prev_cutoff = cutoff - timedelta(days=days) + return cutoff, prev_cutoff diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/schemas.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/schemas.py new file mode 100644 index 00000000..101e1ea2 --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/schemas.py @@ -0,0 +1,233 @@ +""" +Pydantic models for insight metadata shapes. + +These define the structure of JSONB metadata stored in insight_metric rows. +Used for validation on write and typed access on read. + +Usage: + # Writing + profile = StarProfileMetadata(username="ncthuc", ...) + await upsert_metric(..., metadata=profile.model_dump()) + + # Reading + profile = StarProfileMetadata.model_validate(metric.metadata_) +""" + +from pydantic import BaseModel, Field + +# --------------------------------------------------------------------------- +# GitHub Traffic metadata +# --------------------------------------------------------------------------- + + +class ReferrerEntry(BaseModel): + """Single referrer with view/unique counts.""" + + views: int = Field(ge=0) + uniques: int = Field(ge=0) + + +class PopularPathEntry(BaseModel): + """Single popular path/page from GitHub traffic.""" + + path: str + title: str + views: int = Field(ge=0) + uniques: int = Field(ge=0) + + +class PopularPathsMetadata(BaseModel): + """Metadata for the popular_paths metric type.""" + + paths: list[PopularPathEntry] = Field(default_factory=list) + + +# Note: Referrer metadata is stored as dict[str, ReferrerEntry] directly, +# where keys are referrer domains. Use ReferrerEntry.model_validate() per entry. + + +# --------------------------------------------------------------------------- +# GitHub Stars metadata +# --------------------------------------------------------------------------- + + +class StarProfileMetadata(BaseModel): + """GitHub user profile stored as metadata on a new_star event row.""" + + username: str + name: str | None = None + location: str | None = None + company: str | None = None + bio: str | None = None + email: str | None = None + blog: str | None = None + followers: int = Field(default=0, ge=0) + following: int = Field(default=0, ge=0) + public_repos: int = Field(default=0, ge=0) + stars_given: int = Field(default=0, ge=0) + account_created: str | None = None # ISO 8601 + account_age_years: float | None = None + github_pro: bool = False + top_repo: str | None = None + top_repo_stars: int | None = None + + +# --------------------------------------------------------------------------- +# PyPI metadata +# --------------------------------------------------------------------------- + + +class PyPIVersionDetail(BaseModel): + """Per-version download breakdown.""" + + total: int = 0 + human: int = 0 + + +class PyPIDownloadMetadata(BaseModel): + """Version breakdown metadata for daily PyPI downloads.""" + + versions: dict[str, PyPIVersionDetail] = Field(default_factory=dict) + + +class PyPICountryBreakdown(BaseModel): + """Country breakdown for PyPI downloads.""" + + countries: dict[str, int] = Field(default_factory=dict) # {"US": 1186, "CN": 206} + + +class PyPIInstallerBreakdown(BaseModel): + """Installer breakdown for PyPI downloads.""" + + installers: dict[str, int] = Field( + default_factory=dict + ) # {"bandersnatch": 1092, "pip": 39} + + +class PyPITypeBreakdown(BaseModel): + """Distribution type breakdown for PyPI downloads.""" + + types: dict[str, int] = Field( + default_factory=dict + ) # {"sdist": 1376, "bdist_wheel": 1032} + + +# --------------------------------------------------------------------------- +# Plausible metadata +# --------------------------------------------------------------------------- + + +class PlausibleSiteMetadata(BaseModel): + """Site identifier on Plausible aggregate metric rows.""" + + site: str + + +class PlausiblePageEntry(BaseModel): + """Single page from Plausible analytics.""" + + url: str + visitors: int = Field(default=0, ge=0) + time_s: float | None = None # Time on page in seconds + scroll: float | None = None # Scroll depth percentage + + +class PlausibleTopPagesMetadata(BaseModel): + """Metadata for the top_pages metric type.""" + + site: str + pages: list[PlausiblePageEntry] = Field(default_factory=list) + + +class PlausibleCountryEntry(BaseModel): + """Single country from Plausible analytics.""" + + country: str + visitors: int = Field(default=0, ge=0) + + +class PlausibleTopCountriesMetadata(BaseModel): + """Metadata for the top_countries metric type.""" + + site: str + countries: list[PlausibleCountryEntry] = Field(default_factory=list) + + +# --------------------------------------------------------------------------- +# Reddit metadata +# --------------------------------------------------------------------------- + + +class RedditPostMetadata(BaseModel): + """Reddit post stats stored as metadata on a post_stats snapshot row.""" + + post_id: str + subreddit: str | None = None + title: str | None = None + comments: int = Field(default=0, ge=0) + views: int | None = None + shares: int | None = None + upvote_ratio: float | None = None + url: str | None = None + + +# --------------------------------------------------------------------------- +# Event metadata +# --------------------------------------------------------------------------- + + +class ReleaseEventMetadata(BaseModel): + """Metadata for release-type events.""" + + version: str + url: str | None = None + + +class RedditPostEventMetadata(BaseModel): + """Metadata for reddit_post-type events.""" + + subreddit: str + url: str | None = None + post_id: str | None = None + + +class ExternalEventMetadata(BaseModel): + """Metadata for external events (industry news, etc.).""" + + url: str | None = None + source: str | None = None + + +# --------------------------------------------------------------------------- +# GitHub Events metadata (from ClickHouse) +# --------------------------------------------------------------------------- + + +class ForkEventMetadata(BaseModel): + """Metadata for a fork event from ClickHouse GitHub data.""" + + actor: str + date: str # ISO date + + +class ReleaseEventMetadata2(BaseModel): + """Metadata for a release event from ClickHouse GitHub data.""" + + tag: str + name: str | None = None + actor: str + + +class ActivitySummaryMetadata(BaseModel): + """Daily event type counts from ClickHouse GitHub data.""" + + push: int = 0 + issues: int = 0 + pull_requests: int = 0 + pull_request_reviews: int = 0 + issue_comments: int = 0 + forks: int = 0 + stars: int = 0 + releases: int = 0 + creates: int = 0 + deletes: int = 0 diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/seed.py.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/seed.py.jinja new file mode 100644 index 00000000..9669accc --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/insights/seed.py.jinja @@ -0,0 +1,154 @@ +""" +Seed data for insight service tables. + +Populates insight_source and insight_metric_type with default entries. +Idempotent — safe to call multiple times. +""" + +from sqlmodel import Session, select + +from .constants import MetricKeys, SourceKeys, Units +from .models import InsightMetricType, InsightSource + + +# --------------------------------------------------------------------------- +# Source definitions +# --------------------------------------------------------------------------- + +DEFAULT_SOURCES: list[dict] = [ + { + "key": SourceKeys.GITHUB_TRAFFIC, + "display_name": "GitHub Traffic", + "collection_interval_hours": 6, + "requires_auth": True, + "enabled": {% if insights_github %}True{% else %}False{% endif %}, + }, + { + "key": SourceKeys.GITHUB_STARS, + "display_name": "GitHub Stars", + "collection_interval_hours": 24, + "requires_auth": True, + "enabled": {% if insights_github %}True{% else %}False{% endif %}, + }, + { + "key": SourceKeys.PYPI, + "display_name": "PyPI", + "collection_interval_hours": 24, + "requires_auth": False, + "enabled": {% if insights_pypi %}True{% else %}False{% endif %}, + }, + { + "key": SourceKeys.PLAUSIBLE, + "display_name": "Plausible", + "collection_interval_hours": 24, + "requires_auth": True, + "enabled": {% if insights_plausible %}True{% else %}False{% endif %}, + }, + { + "key": SourceKeys.GITHUB_EVENTS, + "display_name": "GitHub Events", + "collection_interval_hours": 24, + "requires_auth": False, + "enabled": {% if insights_github %}True{% else %}False{% endif %}, + }, + { + "key": SourceKeys.REDDIT, + "display_name": "Reddit", + "collection_interval_hours": None, + "requires_auth": False, + "enabled": {% if insights_reddit %}True{% else %}False{% endif %}, + }, +] + + +# --------------------------------------------------------------------------- +# Metric type definitions (keyed by source key) +# --------------------------------------------------------------------------- + +DEFAULT_METRIC_TYPES: dict[str, list[dict]] = { + SourceKeys.GITHUB_TRAFFIC: [ + {"key": MetricKeys.CLONES, "display_name": "Clones", "unit": Units.COUNT}, + {"key": MetricKeys.UNIQUE_CLONERS, "display_name": "Unique Cloners", "unit": Units.COUNT}, + {"key": MetricKeys.VIEWS, "display_name": "Views", "unit": Units.COUNT}, + {"key": MetricKeys.UNIQUE_VISITORS, "display_name": "Unique Visitors", "unit": Units.COUNT}, + {"key": MetricKeys.REFERRERS, "display_name": "Referrers", "unit": Units.JSON}, + {"key": MetricKeys.POPULAR_PATHS, "display_name": "Popular Paths", "unit": Units.JSON}, + ], + SourceKeys.GITHUB_STARS: [ + {"key": MetricKeys.NEW_STAR, "display_name": "New Star", "unit": Units.COUNT}, + ], + SourceKeys.PYPI: [ + {"key": MetricKeys.DOWNLOADS_TOTAL, "display_name": "Total Downloads", "unit": Units.COUNT}, + {"key": MetricKeys.DOWNLOADS_DAILY, "display_name": "Daily Downloads", "unit": Units.COUNT}, + {"key": MetricKeys.DOWNLOADS_DAILY_HUMAN, "display_name": "Daily Downloads (Human)", "unit": Units.COUNT}, + {"key": MetricKeys.DOWNLOADS_BY_COUNTRY, "display_name": "Downloads by Country", "unit": Units.JSON}, + {"key": MetricKeys.DOWNLOADS_BY_INSTALLER, "display_name": "Downloads by Installer", "unit": Units.JSON}, + {"key": MetricKeys.DOWNLOADS_BY_VERSION, "display_name": "Downloads by Version", "unit": Units.JSON}, + {"key": MetricKeys.DOWNLOADS_BY_TYPE, "display_name": "Downloads by Type", "unit": Units.JSON}, + ], + SourceKeys.GITHUB_EVENTS: [ + {"key": MetricKeys.FORKS, "display_name": "Forks", "unit": Units.COUNT}, + {"key": MetricKeys.RELEASES, "display_name": "Releases", "unit": Units.COUNT}, + {"key": MetricKeys.STAR_EVENTS, "display_name": "Star Events", "unit": Units.COUNT}, + {"key": MetricKeys.ACTIVITY_SUMMARY, "display_name": "Activity Summary", "unit": Units.JSON}, + ], + SourceKeys.PLAUSIBLE: [ + {"key": MetricKeys.VISITORS, "display_name": "Visitors", "unit": Units.COUNT}, + {"key": MetricKeys.PAGEVIEWS, "display_name": "Pageviews", "unit": Units.COUNT}, + {"key": MetricKeys.AVG_DURATION, "display_name": "Avg Duration", "unit": Units.SECONDS}, + {"key": MetricKeys.BOUNCE_RATE, "display_name": "Bounce Rate", "unit": Units.PERCENTAGE}, + {"key": MetricKeys.TOP_PAGES, "display_name": "Top Pages", "unit": Units.JSON}, + ], + SourceKeys.REDDIT: [ + {"key": MetricKeys.POST_STATS, "display_name": "Post Stats", "unit": Units.JSON}, + ], +} + + +def seed_insight_tables(session: Session) -> None: + """ + Seed insight_source and insight_metric_type tables. + + Idempotent: skips rows that already exist (matched by key). + """ + # Seed sources + for source_data in DEFAULT_SOURCES: + existing = session.exec( + select(InsightSource).where(InsightSource.key == source_data["key"]) + ).first() + if existing is None: + session.add(InsightSource(**source_data)) + + session.flush() # Ensure source IDs are available for metric types + + # Build source key → id lookup + sources = session.exec(select(InsightSource)).all() + source_map: dict[str, int] = {} + for source in sources: + if source.id is not None: + source_map[source.key] = source.id + + # Seed metric types + for source_key, metric_types in DEFAULT_METRIC_TYPES.items(): + source_id = source_map.get(source_key) + if source_id is None: + continue + + for mt_data in metric_types: + existing = session.exec( + select(InsightMetricType).where( + InsightMetricType.source_id == source_id, + InsightMetricType.key == mt_data["key"], + ) + ).first() + if existing is None: + session.add( + InsightMetricType( + source_id=source_id, + key=mt_data["key"], + display_name=mt_data["display_name"], + unit=mt_data["unit"], + ) + ) + + session.commit() diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/system/ui.py.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/system/ui.py.jinja index 3cce21c0..46e1a0e2 100644 --- a/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/system/ui.py.jinja +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/app/services/system/ui.py.jinja @@ -70,6 +70,9 @@ def get_component_title(component_name: str) -> str: {%- if include_comms %} "service_comms": "Communications", {%- endif %} + {%- if include_insights %} + "service_insights": "Insights", + {%- endif %} {%- if include_ingress %} "ingress": "Ingress", {%- endif %} @@ -119,9 +122,38 @@ def get_component_subtitle( Uses the base label from ``get_component_label`` and appends the version from health-check metadata when available. """ +{% if include_insights %} + # Insights: build subtitle from enabled sources + if component_name == "service_insights" and metadata: + sources = metadata.get("sources", {}) + if isinstance(sources, dict): + enabled = [ + _format_source_name(key) + for key, detail in sources.items() + if isinstance(detail, dict) and detail.get("enabled") + ] + if enabled: + return ", ".join(enabled) + return "Adoption Analytics" +{% endif %} + label = get_component_label(component_name) if metadata: version = metadata.get("version", "") if version and version != "unknown": return f"{label} {version}" return label + + +{% if include_insights %} +def _format_source_name(key: str) -> str: + """Format source keys for display: github_traffic → GitHub, pypi → PyPI.""" + display_map = { + "github_traffic": "GitHub", + "github_stars": "Stars", + "pypi": "PyPI", + "plausible": "Plausible", + "reddit": "Reddit", + } + return display_map.get(key, key.replace("_", " ").title()) +{% endif %} diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/pyproject.toml.jinja b/aegis/templates/copier-aegis-project/{{ project_slug }}/pyproject.toml.jinja index 4a3932a6..61143444 100644 --- a/aegis/templates/copier-aegis-project/{{ project_slug }}/pyproject.toml.jinja +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/pyproject.toml.jinja @@ -121,8 +121,8 @@ dependencies = [ [project.optional-dependencies] dev = [ - "pytest==8.4.1", - "pytest-asyncio==1.1.0", + "pytest>=9.0.3", + "pytest-asyncio>=1.1.0", "ruff==0.12.7", "ty", "types-psutil==7.0.0.20250801", diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_collector_github_events.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_collector_github_events.py new file mode 100644 index 00000000..f29db1b8 --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_collector_github_events.py @@ -0,0 +1,252 @@ +""" +Tests for GitHubEventsCollector -- ClickHouse-based event collection. +""" + +from datetime import datetime +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from app.services.insights.collectors.github_events import GitHubEventsCollector +from app.services.insights.constants import MetricKeys, Periods, SourceKeys +from app.services.insights.models import InsightMetric, InsightMetricType, InsightSource +from sqlmodel import select +from sqlmodel.ext.asyncio.session import AsyncSession + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _seed_github_events( + session: AsyncSession, +) -> tuple[InsightSource, dict[str, InsightMetricType]]: + """Seed github_events source with all metric types.""" + source = InsightSource( + key=SourceKeys.GITHUB_EVENTS, + display_name="GitHub Events", + collection_interval_hours=6, + enabled=True, + ) + session.add(source) + await session.flush() + + metric_types: dict[str, InsightMetricType] = {} + for key in [ + MetricKeys.FORKS, + MetricKeys.RELEASES, + MetricKeys.STAR_EVENTS, + MetricKeys.ACTIVITY_SUMMARY, + ]: + mt = InsightMetricType( + source_id=source.id, # type: ignore[arg-type] + key=key, + display_name=key.replace("_", " ").title(), + unit="json" if key == MetricKeys.ACTIVITY_SUMMARY else "count", + ) + session.add(mt) + await session.flush() + metric_types[key] = mt + + return source, metric_types + + +# --------------------------------------------------------------------------- +# Tests: GitHubEventsCollector +# --------------------------------------------------------------------------- + + +class TestGitHubEventsCollectorSuccess: + """Test successful collection.""" + + @pytest.mark.asyncio + async def test_collect_success(self, async_db_session: AsyncSession) -> None: + """Happy path: collect forks, releases, stars, activity summary.""" + await _seed_github_events(async_db_session) + + # Mock httpx.AsyncClient + mock_client = AsyncMock() + + async def mock_post(url: str, **kwargs): + """Mock ClickHouse API responses.""" + content = kwargs.get("content", "") + + if "ForkEvent" in content: + # Forks: actor, day + return MagicMock( + json=lambda: { + "data": [ + ["user1", "2026-04-11"], + ["user2", "2026-04-10"], + ] + }, + raise_for_status=lambda: None, + ) + elif "ReleaseEvent" in content: + # Releases: actor, tag, name, day + return MagicMock( + json=lambda: { + "data": [ + ["bot", "v1.0.0", "Version 1.0.0", "2026-04-10"], + ] + }, + raise_for_status=lambda: None, + ) + elif "WatchEvent" in content: + # Stars: day, count + return MagicMock( + json=lambda: { + "data": [ + ["2026-04-11", 5], + ["2026-04-10", 3], + ] + }, + raise_for_status=lambda: None, + ) + else: # Activity summary + # Activity: day, event_type, count + return MagicMock( + json=lambda: { + "data": [ + ["2026-04-11", "PushEvent", 10], + ["2026-04-11", "PullRequestEvent", 2], + ["2026-04-10", "IssuesEvent", 1], + ] + }, + raise_for_status=lambda: None, + ) + + mock_client.post = mock_post + + with patch( + "app.services.insights.collectors.github_events.httpx.AsyncClient" + ) as mock_async_client: + mock_async_client.return_value.__aenter__.return_value = mock_client + + with patch( + "app.services.insights.collectors.github_events.settings" + ) as mock_settings: + mock_settings.INSIGHT_GITHUB_OWNER = "lbedner" + mock_settings.INSIGHT_GITHUB_REPO = "aegis-stack" + + collector = GitHubEventsCollector(async_db_session) + result = await collector.collect() + + assert result.success is True + assert result.source_key == SourceKeys.GITHUB_EVENTS + assert result.rows_written > 0 + + # Verify metrics were written + metrics = await async_db_session.exec(select(InsightMetric)) + assert len(metrics.all()) > 0 + + @pytest.mark.asyncio + async def test_collect_missing_config(self, async_db_session: AsyncSession) -> None: + """Missing INSIGHT_GITHUB_OWNER or INSIGHT_GITHUB_REPO returns error.""" + await _seed_github_events(async_db_session) + + with patch( + "app.services.insights.collectors.github_events.settings" + ) as mock_settings: + mock_settings.INSIGHT_GITHUB_OWNER = None + mock_settings.INSIGHT_GITHUB_REPO = "aegis-stack" + + collector = GitHubEventsCollector(async_db_session) + result = await collector.collect() + + assert result.success is False + assert "Missing INSIGHT_GITHUB_OWNER" in result.error + + @pytest.mark.asyncio + async def test_collect_api_error(self, async_db_session: AsyncSession) -> None: + """HTTP error from ClickHouse is handled gracefully.""" + await _seed_github_events(async_db_session) + + mock_client = AsyncMock() + mock_client.post = AsyncMock( + side_effect=Exception("ClickHouse connection failed") + ) + + with patch( + "app.services.insights.collectors.github_events.httpx.AsyncClient" + ) as mock_async_client: + mock_async_client.return_value.__aenter__.return_value = mock_client + + with patch( + "app.services.insights.collectors.github_events.settings" + ) as mock_settings: + mock_settings.INSIGHT_GITHUB_OWNER = "lbedner" + mock_settings.INSIGHT_GITHUB_REPO = "aegis-stack" + + collector = GitHubEventsCollector(async_db_session) + result = await collector.collect() + + assert result.success is False + assert "GitHub events collection failed" in result.error + + @pytest.mark.asyncio + async def test_deduplication_forks(self, async_db_session: AsyncSession) -> None: + """Second collect doesn't duplicate fork rows.""" + source, metric_types = await _seed_github_events(async_db_session) + forks_type = metric_types[MetricKeys.FORKS] + + # Pre-populate an existing fork + existing_fork = InsightMetric( + date=datetime(2026, 4, 11), + metric_type_id=forks_type.id, # type: ignore[arg-type] + value=1.0, + period=Periods.EVENT, + metadata_={"actor": "user1", "date": "2026-04-11"}, + ) + async_db_session.add(existing_fork) + await async_db_session.commit() + + # Now collect with user1 already in DB + mock_client = AsyncMock() + + async def mock_post(url: str, **kwargs): + content = kwargs.get("content", "") + if "ForkEvent" in content: + return MagicMock( + json=lambda: { + "data": [["user1", "2026-04-11"], ["user2", "2026-04-10"]] + }, + raise_for_status=lambda: None, + ) + elif "ReleaseEvent" in content or "WatchEvent" in content: + return MagicMock( + json=lambda: {"data": []}, + raise_for_status=lambda: None, + ) + else: + return MagicMock( + json=lambda: {"data": []}, + raise_for_status=lambda: None, + ) + + mock_client.post = mock_post + + with patch( + "app.services.insights.collectors.github_events.httpx.AsyncClient" + ) as mock_async_client: + mock_async_client.return_value.__aenter__.return_value = mock_client + + with patch( + "app.services.insights.collectors.github_events.settings" + ) as mock_settings: + mock_settings.INSIGHT_GITHUB_OWNER = "lbedner" + mock_settings.INSIGHT_GITHUB_REPO = "aegis-stack" + + collector = GitHubEventsCollector(async_db_session) + result = await collector.collect() + + assert result.success is True + # user1 should be skipped (already in DB), user2 should be new + assert result.rows_skipped == 1 + assert result.rows_written >= 1 + + # Verify only 2 fork rows exist (original + new user2) + fork_metrics = await async_db_session.exec( + select(InsightMetric).where(InsightMetric.metric_type_id == forks_type.id) + ) + fork_count = len(fork_metrics.all()) + assert fork_count == 2 diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_collector_github_stars.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_collector_github_stars.py new file mode 100644 index 00000000..34a7c437 --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_collector_github_stars.py @@ -0,0 +1,270 @@ +""" +Tests for GitHubStarsCollector -- GitHub REST API stargazers collection. +""" + +from datetime import datetime +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from app.services.insights.collectors.github_stars import GitHubStarsCollector +from app.services.insights.constants import MetricKeys, Periods, SourceKeys +from app.services.insights.models import InsightMetric, InsightMetricType, InsightSource +from sqlmodel import select +from sqlmodel.ext.asyncio.session import AsyncSession + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _seed_github_stars( + session: AsyncSession, +) -> tuple[InsightSource, dict[str, InsightMetricType]]: + """Seed github_stars source with all metric types.""" + source = InsightSource( + key=SourceKeys.GITHUB_STARS, + display_name="GitHub Stars", + collection_interval_hours=24, + enabled=True, + ) + session.add(source) + await session.flush() + + metric_types: dict[str, InsightMetricType] = {} + mt = InsightMetricType( + source_id=source.id, # type: ignore[arg-type] + key=MetricKeys.NEW_STAR, + display_name="New Star", + unit="count", + ) + session.add(mt) + await session.flush() + metric_types[MetricKeys.NEW_STAR] = mt + + return source, metric_types + + +# --------------------------------------------------------------------------- +# Tests: GitHubStarsCollector +# --------------------------------------------------------------------------- + + +class TestGitHubStarsCollectorSuccess: + """Test successful collection.""" + + @pytest.mark.asyncio + async def test_collect_success(self, async_db_session: AsyncSession) -> None: + """Happy path: collect stargazers with profiles.""" + await _seed_github_stars(async_db_session) + + # Mock httpx.AsyncClient + mock_client = AsyncMock() + + # Stargazer data for pagination + stargazers_page_1 = [ + { + "user": {"login": "user1"}, + "starred_at": "2026-04-11T10:30:00Z", + }, + { + "user": {"login": "user2"}, + "starred_at": "2026-04-10T15:00:00Z", + }, + ] + + # User profile responses + async def mock_get(url: str, **kwargs): + if "stargazers" in url: + # Return paginated stargazers + page = kwargs.get("params", {}).get("page", 1) + if page == 1: + return MagicMock( + json=lambda: stargazers_page_1, + raise_for_status=lambda: None, + ) + else: + return MagicMock( + json=lambda: [], + raise_for_status=lambda: None, + ) + else: # User profile endpoint + username = url.split("/")[-1] + return MagicMock( + json=lambda: { + "login": username, + "name": f"User {username}", + "location": "Test City", + "followers": 10, + "following": 5, + "public_repos": 20, + "starred_repos_count": 100, + "created_at": "2010-01-01T00:00:00Z", + }, + raise_for_status=lambda: None, + ) + + mock_client.get = mock_get + + with patch( + "app.services.insights.collectors.github_stars.httpx.AsyncClient" + ) as mock_async_client: + mock_async_client.return_value.__aenter__.return_value = mock_client + + with patch( + "app.services.insights.collectors.github_stars.settings" + ) as mock_settings: + mock_settings.INSIGHT_GITHUB_TOKEN = "token123" + mock_settings.INSIGHT_GITHUB_OWNER = "lbedner" + mock_settings.INSIGHT_GITHUB_REPO = "aegis-stack" + + collector = GitHubStarsCollector(async_db_session) + result = await collector.collect() + + assert result.success is True + assert result.source_key == SourceKeys.GITHUB_STARS + assert result.rows_written > 0 + + # Verify metrics were written + metrics = await async_db_session.exec(select(InsightMetric)) + metric_list = metrics.all() + assert len(metric_list) > 0 + # Check metadata has profile info + for metric in metric_list: + if metric.metadata_: + assert "username" in metric.metadata_ + + @pytest.mark.asyncio + async def test_collect_missing_config(self, async_db_session: AsyncSession) -> None: + """Missing token/owner/repo returns error.""" + await _seed_github_stars(async_db_session) + + with patch( + "app.services.insights.collectors.github_stars.settings" + ) as mock_settings: + mock_settings.INSIGHT_GITHUB_TOKEN = None + mock_settings.INSIGHT_GITHUB_OWNER = "lbedner" + mock_settings.INSIGHT_GITHUB_REPO = "aegis-stack" + + collector = GitHubStarsCollector(async_db_session) + result = await collector.collect() + + assert result.success is False + assert "Missing" in result.error + + @pytest.mark.asyncio + async def test_collect_api_error(self, async_db_session: AsyncSession) -> None: + """HTTP error from GitHub API is handled gracefully.""" + import httpx + + await _seed_github_stars(async_db_session) + + mock_response = MagicMock(status_code=403, text="Forbidden") + error = httpx.HTTPStatusError( + "403", request=MagicMock(), response=mock_response + ) + + mock_client = AsyncMock() + mock_client.get = AsyncMock(side_effect=error) + + with patch( + "app.services.insights.collectors.github_stars.httpx.AsyncClient" + ) as mock_async_client: + mock_async_client.return_value.__aenter__.return_value = mock_client + + with patch( + "app.services.insights.collectors.github_stars.settings" + ) as mock_settings: + mock_settings.INSIGHT_GITHUB_TOKEN = "token123" + mock_settings.INSIGHT_GITHUB_OWNER = "lbedner" + mock_settings.INSIGHT_GITHUB_REPO = "aegis-stack" + + collector = GitHubStarsCollector(async_db_session) + result = await collector.collect() + + assert result.success is False + assert "GitHub API error" in result.error + + @pytest.mark.asyncio + async def test_deduplication(self, async_db_session: AsyncSession) -> None: + """Second collect doesn't duplicate existing stars.""" + source, metric_types = await _seed_github_stars(async_db_session) + star_type = metric_types[MetricKeys.NEW_STAR] + + # Pre-populate star #1 + existing_star = InsightMetric( + date=datetime(2026, 4, 11), + metric_type_id=star_type.id, # type: ignore[arg-type] + value=1.0, # star number 1 + period=Periods.EVENT, + metadata_={ + "username": "user1", + "followers": 10, + "account_age_years": 15.0, + }, + ) + async_db_session.add(existing_star) + await async_db_session.commit() + + # Collect again with star #1 already in DB + stargazers_page_1 = [ + { + "user": {"login": "user1"}, + "starred_at": "2026-04-11T10:30:00Z", + }, + { + "user": {"login": "user2"}, + "starred_at": "2026-04-10T15:00:00Z", + }, + ] + + async def mock_get(url: str, **kwargs): + if "stargazers" in url: + page = kwargs.get("params", {}).get("page", 1) + if page == 1: + return MagicMock( + json=lambda: stargazers_page_1, + raise_for_status=lambda: None, + ) + else: + return MagicMock( + json=lambda: [], + raise_for_status=lambda: None, + ) + else: + return MagicMock( + json=lambda: { + "login": "user1", + "followers": 10, + "created_at": "2010-01-01T00:00:00Z", + }, + raise_for_status=lambda: None, + ) + + mock_client = AsyncMock() + mock_client.get = mock_get + + with patch( + "app.services.insights.collectors.github_stars.httpx.AsyncClient" + ) as mock_async_client: + mock_async_client.return_value.__aenter__.return_value = mock_client + + with patch( + "app.services.insights.collectors.github_stars.settings" + ) as mock_settings: + mock_settings.INSIGHT_GITHUB_TOKEN = "token123" + mock_settings.INSIGHT_GITHUB_OWNER = "lbedner" + mock_settings.INSIGHT_GITHUB_REPO = "aegis-stack" + + collector = GitHubStarsCollector(async_db_session) + result = await collector.collect() + + assert result.success is True + # Star #1 skipped (already in DB), star #2 new + assert result.rows_skipped == 1 + assert result.rows_written >= 1 + + # Verify only 2 star rows (original + new) + star_metrics = await async_db_session.exec( + select(InsightMetric).where(InsightMetric.metric_type_id == star_type.id) + ) + assert len(star_metrics.all()) == 2 diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_collector_github_traffic.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_collector_github_traffic.py new file mode 100644 index 00000000..6dc0e4f5 --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_collector_github_traffic.py @@ -0,0 +1,529 @@ +""" +Tests for GitHubTrafficCollector. + +Tests the GitHub API integration, response processing, deduplication, +and error handling for the GitHub Traffic insight collector. +""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from app.services.insights.collectors.github_traffic import GitHubTrafficCollector +from app.services.insights.constants import MetricKeys, SourceKeys +from app.services.insights.models import InsightMetric, InsightMetricType +from sqlmodel import select +from sqlmodel.ext.asyncio.session import AsyncSession + +from .test_insights_collectors import _seed_github_traffic + +# --------------------------------------------------------------------------- +# Response Fixtures +# --------------------------------------------------------------------------- + + +CLONES_RESPONSE = { + "count": 50, + "uniques": 10, + "clones": [ + {"timestamp": "2026-04-10T00:00:00Z", "count": 30, "uniques": 6}, + {"timestamp": "2026-04-11T00:00:00Z", "count": 20, "uniques": 4}, + ], +} + +VIEWS_RESPONSE = { + "count": 100, + "uniques": 25, + "views": [ + {"timestamp": "2026-04-10T00:00:00Z", "count": 60, "uniques": 15}, + {"timestamp": "2026-04-11T00:00:00Z", "count": 40, "uniques": 10}, + ], +} + +REFERRERS_RESPONSE = [ + {"referrer": "Google", "count": 50, "uniques": 10}, + {"referrer": "github.com", "count": 30, "uniques": 8}, +] + +PATHS_RESPONSE = [ + { + "path": "/lbedner/aegis-stack", + "title": "aegis-stack", + "count": 80, + "uniques": 20, + }, + { + "path": "/lbedner/aegis-stack/issues", + "title": "Issues", + "count": 15, + "uniques": 5, + }, +] + + +# --------------------------------------------------------------------------- +# Helper: Mock HTTP Responses +# --------------------------------------------------------------------------- + + +def _create_mock_response(json_data: dict | list) -> MagicMock: + """Create a mock httpx response with json() and raise_for_status().""" + mock = MagicMock() + mock.json.return_value = json_data + mock.raise_for_status.return_value = None + return mock + + +# --------------------------------------------------------------------------- +# Tests: Successful Collection +# --------------------------------------------------------------------------- + + +class TestGitHubTrafficCollectorSuccess: + """Tests for successful GitHub API data collection.""" + + @pytest.mark.asyncio + async def test_collect_success(self, async_db_session: AsyncSession) -> None: + """Successful collection processes all 4 API endpoints and stores data.""" + await _seed_github_traffic(async_db_session) + + collector = GitHubTrafficCollector(async_db_session) + + # Mock settings + with patch( + "app.services.insights.collectors.github_traffic.settings" + ) as mock_settings: + mock_settings.INSIGHT_GITHUB_TOKEN = "test-token" + mock_settings.INSIGHT_GITHUB_OWNER = "lbedner" + mock_settings.INSIGHT_GITHUB_REPO = "aegis-stack" + + # Mock httpx.AsyncClient + mock_client = AsyncMock() + mock_client.get.side_effect = [ + _create_mock_response(CLONES_RESPONSE), + _create_mock_response(VIEWS_RESPONSE), + _create_mock_response(REFERRERS_RESPONSE), + _create_mock_response(PATHS_RESPONSE), + ] + mock_client.__aenter__.return_value = mock_client + mock_client.__aexit__.return_value = None + + with patch("httpx.AsyncClient", return_value=mock_client): + result = await collector.collect() + + assert result.success is True + assert result.source_key == SourceKeys.GITHUB_TRAFFIC + # 2 clones entries * 2 metrics (clones + unique_cloners) = 4 + # 2 views entries * 2 metrics (views + unique_visitors) = 4 + # 1 referrers row + 1 paths row = 2 + # Total = 10 + assert result.rows_written == 10 + assert result.rows_skipped == 0 + assert result.error is None + + # Verify metrics were written to database + clones_metric = await async_db_session.exec( + select(InsightMetric) + .join(InsightMetricType) + .where(InsightMetricType.key == MetricKeys.CLONES) + ) + assert len(clones_metric.all()) == 2 # One for each day + + +# --------------------------------------------------------------------------- +# Tests: Missing Configuration +# --------------------------------------------------------------------------- + + +class TestGitHubTrafficCollectorMissingConfig: + """Tests for missing or incomplete settings.""" + + @pytest.mark.asyncio + async def test_collect_missing_token(self, async_db_session: AsyncSession) -> None: + """Collection fails when INSIGHT_GITHUB_TOKEN is missing.""" + await _seed_github_traffic(async_db_session) + + collector = GitHubTrafficCollector(async_db_session) + + with patch( + "app.services.insights.collectors.github_traffic.settings" + ) as mock_settings: + mock_settings.INSIGHT_GITHUB_TOKEN = "" + mock_settings.INSIGHT_GITHUB_OWNER = "lbedner" + mock_settings.INSIGHT_GITHUB_REPO = "aegis-stack" + + result = await collector.collect() + + assert result.success is False + assert "Missing" in result.error + assert result.rows_written == 0 + assert result.rows_skipped == 0 + + @pytest.mark.asyncio + async def test_collect_missing_owner(self, async_db_session: AsyncSession) -> None: + """Collection fails when INSIGHT_GITHUB_OWNER is missing.""" + await _seed_github_traffic(async_db_session) + + collector = GitHubTrafficCollector(async_db_session) + + with patch( + "app.services.insights.collectors.github_traffic.settings" + ) as mock_settings: + mock_settings.INSIGHT_GITHUB_TOKEN = "test-token" + mock_settings.INSIGHT_GITHUB_OWNER = "" + mock_settings.INSIGHT_GITHUB_REPO = "aegis-stack" + + result = await collector.collect() + + assert result.success is False + assert "Missing" in result.error + + @pytest.mark.asyncio + async def test_collect_missing_repo(self, async_db_session: AsyncSession) -> None: + """Collection fails when INSIGHT_GITHUB_REPO is missing.""" + await _seed_github_traffic(async_db_session) + + collector = GitHubTrafficCollector(async_db_session) + + with patch( + "app.services.insights.collectors.github_traffic.settings" + ) as mock_settings: + mock_settings.INSIGHT_GITHUB_TOKEN = "test-token" + mock_settings.INSIGHT_GITHUB_OWNER = "lbedner" + mock_settings.INSIGHT_GITHUB_REPO = "" + + result = await collector.collect() + + assert result.success is False + assert "Missing" in result.error + + +# --------------------------------------------------------------------------- +# Tests: Deduplication (Idempotency) +# --------------------------------------------------------------------------- + + +class TestGitHubTrafficCollectorDeduplication: + """Tests for deduplication behavior on repeated collections.""" + + @pytest.mark.asyncio + async def test_collect_deduplication(self, async_db_session: AsyncSession) -> None: + """Running collect twice with same data skips duplicates on second run.""" + await _seed_github_traffic(async_db_session) + + collector = GitHubTrafficCollector(async_db_session) + + with patch( + "app.services.insights.collectors.github_traffic.settings" + ) as mock_settings: + mock_settings.INSIGHT_GITHUB_TOKEN = "test-token" + mock_settings.INSIGHT_GITHUB_OWNER = "lbedner" + mock_settings.INSIGHT_GITHUB_REPO = "aegis-stack" + + # First collection + mock_client = AsyncMock() + mock_client.get.side_effect = [ + _create_mock_response(CLONES_RESPONSE), + _create_mock_response(VIEWS_RESPONSE), + _create_mock_response(REFERRERS_RESPONSE), + _create_mock_response(PATHS_RESPONSE), + ] + mock_client.__aenter__.return_value = mock_client + mock_client.__aexit__.return_value = None + + with patch("httpx.AsyncClient", return_value=mock_client): + result1 = await collector.collect() + + assert result1.success is True + assert result1.rows_written == 10 + assert result1.rows_skipped == 0 + + # Second collection with same data + mock_client2 = AsyncMock() + mock_client2.get.side_effect = [ + _create_mock_response(CLONES_RESPONSE), + _create_mock_response(VIEWS_RESPONSE), + _create_mock_response(REFERRERS_RESPONSE), + _create_mock_response(PATHS_RESPONSE), + ] + mock_client2.__aenter__.return_value = mock_client2 + mock_client2.__aexit__.return_value = None + + with patch("httpx.AsyncClient", return_value=mock_client2): + result2 = await collector.collect() + + assert result2.success is True + assert result2.rows_written == 0 # All rows already exist + assert result2.rows_skipped == 10 + + +# --------------------------------------------------------------------------- +# Tests: API Errors +# --------------------------------------------------------------------------- + + +class TestGitHubTrafficCollectorAPIErrors: + """Tests for HTTP errors and malformed responses.""" + + @pytest.mark.asyncio + async def test_collect_api_error_403(self, async_db_session: AsyncSession) -> None: + """Collection fails gracefully on HTTP 403 error.""" + await _seed_github_traffic(async_db_session) + + collector = GitHubTrafficCollector(async_db_session) + + with patch( + "app.services.insights.collectors.github_traffic.settings" + ) as mock_settings: + mock_settings.INSIGHT_GITHUB_TOKEN = "invalid-token" + mock_settings.INSIGHT_GITHUB_OWNER = "lbedner" + mock_settings.INSIGHT_GITHUB_REPO = "aegis-stack" + + # Mock a 403 response + import httpx + + mock_response = MagicMock(spec=httpx.Response) + mock_response.status_code = 403 + mock_response.text = "API rate limit exceeded" + mock_response.raise_for_status.side_effect = httpx.HTTPStatusError( + "403 Forbidden", + request=MagicMock(), + response=mock_response, + ) + + mock_client = AsyncMock() + mock_client.get.return_value = mock_response + mock_client.__aenter__.return_value = mock_client + mock_client.__aexit__.return_value = None + + with patch("httpx.AsyncClient", return_value=mock_client): + result = await collector.collect() + + assert result.success is False + assert "403" in result.error + assert result.rows_written == 0 + + @pytest.mark.asyncio + async def test_collect_api_error_500(self, async_db_session: AsyncSession) -> None: + """Collection fails gracefully on HTTP 500 error.""" + await _seed_github_traffic(async_db_session) + + collector = GitHubTrafficCollector(async_db_session) + + with patch( + "app.services.insights.collectors.github_traffic.settings" + ) as mock_settings: + mock_settings.INSIGHT_GITHUB_TOKEN = "test-token" + mock_settings.INSIGHT_GITHUB_OWNER = "lbedner" + mock_settings.INSIGHT_GITHUB_REPO = "aegis-stack" + + import httpx + + mock_response = MagicMock(spec=httpx.Response) + mock_response.status_code = 500 + mock_response.text = "Internal Server Error" + mock_response.raise_for_status.side_effect = httpx.HTTPStatusError( + "500 Internal Server Error", + request=MagicMock(), + response=mock_response, + ) + + mock_client = AsyncMock() + mock_client.get.return_value = mock_response + mock_client.__aenter__.return_value = mock_client + mock_client.__aexit__.return_value = None + + with patch("httpx.AsyncClient", return_value=mock_client): + result = await collector.collect() + + assert result.success is False + assert "500" in result.error + + @pytest.mark.asyncio + async def test_collect_generic_exception( + self, async_db_session: AsyncSession + ) -> None: + """Collection fails gracefully on unexpected exceptions.""" + await _seed_github_traffic(async_db_session) + + collector = GitHubTrafficCollector(async_db_session) + + with patch( + "app.services.insights.collectors.github_traffic.settings" + ) as mock_settings: + mock_settings.INSIGHT_GITHUB_TOKEN = "test-token" + mock_settings.INSIGHT_GITHUB_OWNER = "lbedner" + mock_settings.INSIGHT_GITHUB_REPO = "aegis-stack" + + mock_client = AsyncMock() + mock_client.get.side_effect = Exception("Network timeout") + mock_client.__aenter__.return_value = mock_client + mock_client.__aexit__.return_value = None + + with patch("httpx.AsyncClient", return_value=mock_client): + result = await collector.collect() + + assert result.success is False + assert "Network timeout" in result.error + + +# --------------------------------------------------------------------------- +# Tests: Data Processing +# --------------------------------------------------------------------------- + + +class TestGitHubTrafficCollectorProcessing: + """Tests for individual data processing methods.""" + + @pytest.mark.asyncio + async def test_process_clones(self, async_db_session: AsyncSession) -> None: + """_process_clones correctly parses timestamps and creates metrics.""" + _, metric_types = await _seed_github_traffic(async_db_session) + + collector = GitHubTrafficCollector(async_db_session) + written, skipped = await collector._process_clones(CLONES_RESPONSE) + + # 2 clone entries * 2 metrics (clones + unique_cloners) = 4 + assert written == 4 + assert skipped == 0 + + @pytest.mark.asyncio + async def test_process_views(self, async_db_session: AsyncSession) -> None: + """_process_views correctly parses timestamps and creates metrics.""" + _, metric_types = await _seed_github_traffic(async_db_session) + + collector = GitHubTrafficCollector(async_db_session) + written, skipped = await collector._process_views(VIEWS_RESPONSE) + + # 2 view entries * 2 metrics (views + unique_visitors) = 4 + assert written == 4 + assert skipped == 0 + + @pytest.mark.asyncio + async def test_process_referrers(self, async_db_session: AsyncSession) -> None: + """_process_referrers creates single snapshot row with metadata.""" + _, metric_types = await _seed_github_traffic(async_db_session) + + collector = GitHubTrafficCollector(async_db_session) + written, skipped = await collector._process_referrers(REFERRERS_RESPONSE) + + # Single snapshot row for all referrers + assert written == 1 + assert skipped == 0 + + # Verify metadata was stored + result = await async_db_session.exec( + select(InsightMetric) + .join(InsightMetricType) + .where(InsightMetricType.key == MetricKeys.REFERRERS) + ) + metrics = result.all() + assert len(metrics) == 1 + assert metrics[0].metadata_ is not None + assert "Google" in metrics[0].metadata_ + + @pytest.mark.asyncio + async def test_process_popular_paths(self, async_db_session: AsyncSession) -> None: + """_process_popular_paths creates single snapshot row with path metadata.""" + _, metric_types = await _seed_github_traffic(async_db_session) + + collector = GitHubTrafficCollector(async_db_session) + written, skipped = await collector._process_popular_paths(PATHS_RESPONSE) + + # Single snapshot row for all paths + assert written == 1 + assert skipped == 0 + + # Verify metadata structure + result = await async_db_session.exec( + select(InsightMetric) + .join(InsightMetricType) + .where(InsightMetricType.key == MetricKeys.POPULAR_PATHS) + ) + metrics = result.all() + assert len(metrics) == 1 + assert metrics[0].metadata_ is not None + assert "paths" in metrics[0].metadata_ + + +# --------------------------------------------------------------------------- +# Tests: HTTP Headers and Authorization +# --------------------------------------------------------------------------- + + +class TestGitHubTrafficCollectorHeaders: + """Tests for correct HTTP headers and authentication.""" + + @pytest.mark.asyncio + async def test_collect_sets_correct_headers( + self, async_db_session: AsyncSession + ) -> None: + """Collection sets correct GitHub API headers.""" + await _seed_github_traffic(async_db_session) + + collector = GitHubTrafficCollector(async_db_session) + + with patch( + "app.services.insights.collectors.github_traffic.settings" + ) as mock_settings: + mock_settings.INSIGHT_GITHUB_TOKEN = "test-token-123" + mock_settings.INSIGHT_GITHUB_OWNER = "lbedner" + mock_settings.INSIGHT_GITHUB_REPO = "aegis-stack" + + mock_client = AsyncMock() + mock_client.get.side_effect = [ + _create_mock_response(CLONES_RESPONSE), + _create_mock_response(VIEWS_RESPONSE), + _create_mock_response(REFERRERS_RESPONSE), + _create_mock_response(PATHS_RESPONSE), + ] + mock_client.__aenter__.return_value = mock_client + mock_client.__aexit__.return_value = None + + with patch("httpx.AsyncClient") as mock_client_class: + mock_client_class.return_value = mock_client + await collector.collect() + + # Verify AsyncClient was called with correct headers + call_kwargs = mock_client_class.call_args[1] + headers = call_kwargs.get("headers", {}) + assert "Authorization" in headers + assert "Bearer test-token-123" in headers["Authorization"] + assert "application/vnd.github+json" in headers["Accept"] + assert "2022-11-28" in headers["X-GitHub-Api-Version"] + + @pytest.mark.asyncio + async def test_collect_uses_correct_urls( + self, async_db_session: AsyncSession + ) -> None: + """Collection requests correct GitHub API endpoints.""" + await _seed_github_traffic(async_db_session) + + collector = GitHubTrafficCollector(async_db_session) + + with patch( + "app.services.insights.collectors.github_traffic.settings" + ) as mock_settings: + mock_settings.INSIGHT_GITHUB_TOKEN = "test-token" + mock_settings.INSIGHT_GITHUB_OWNER = "lbedner" + mock_settings.INSIGHT_GITHUB_REPO = "aegis-stack" + + mock_client = AsyncMock() + mock_client.get.side_effect = [ + _create_mock_response(CLONES_RESPONSE), + _create_mock_response(VIEWS_RESPONSE), + _create_mock_response(REFERRERS_RESPONSE), + _create_mock_response(PATHS_RESPONSE), + ] + mock_client.__aenter__.return_value = mock_client + mock_client.__aexit__.return_value = None + + with patch("httpx.AsyncClient", return_value=mock_client): + await collector.collect() + + # Verify all 4 endpoints were called + assert mock_client.get.call_count == 4 + calls = [call[0][0] for call in mock_client.get.call_args_list] + assert any("traffic/clones" in call for call in calls) + assert any("traffic/views" in call for call in calls) + assert any("popular/referrers" in call for call in calls) + assert any("popular/paths" in call for call in calls) diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_collector_plausible.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_collector_plausible.py new file mode 100644 index 00000000..6b292333 --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_collector_plausible.py @@ -0,0 +1,308 @@ +""" +Tests for PlausibleCollector -- Plausible Analytics collection. +""" + +from datetime import datetime +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from app.services.insights.collectors.plausible import PlausibleCollector +from app.services.insights.constants import MetricKeys, Periods, SourceKeys +from app.services.insights.models import InsightMetric, InsightMetricType, InsightSource +from sqlmodel import select +from sqlmodel.ext.asyncio.session import AsyncSession + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _seed_plausible( + session: AsyncSession, +) -> tuple[InsightSource, dict[str, InsightMetricType]]: + """Seed plausible source with all metric types.""" + source = InsightSource( + key=SourceKeys.PLAUSIBLE, + display_name="Plausible Analytics", + collection_interval_hours=24, + enabled=True, + ) + session.add(source) + await session.flush() + + metric_types: dict[str, InsightMetricType] = {} + for key in [ + MetricKeys.VISITORS, + MetricKeys.PAGEVIEWS, + MetricKeys.AVG_DURATION, + MetricKeys.BOUNCE_RATE, + MetricKeys.TOP_PAGES, + MetricKeys.TOP_COUNTRIES, + ]: + mt = InsightMetricType( + source_id=source.id, # type: ignore[arg-type] + key=key, + display_name=key.replace("_", " ").title(), + unit="json" if "top_" in key else "count", + ) + session.add(mt) + await session.flush() + metric_types[key] = mt + + return source, metric_types + + +# --------------------------------------------------------------------------- +# Tests: PlausibleCollector +# --------------------------------------------------------------------------- + + +class TestPlausibleCollectorSuccess: + """Test successful collection.""" + + @pytest.mark.asyncio + async def test_collect_success(self, async_db_session: AsyncSession) -> None: + """Happy path: collect visitor metrics and page engagement.""" + await _seed_plausible(async_db_session) + + mock_client = AsyncMock() + + async def mock_get(url: str, **kwargs): + """Mock Plausible API responses.""" + if "timeseries" in url: + # Daily timeseries metrics + return MagicMock( + json=lambda: { + "results": [ + { + "date": "2026-04-11", + "visitors": 100, + "pageviews": 250, + "visit_duration": 45.5, + "bounce_rate": 35.0, + }, + { + "date": "2026-04-10", + "visitors": 80, + "pageviews": 200, + "visit_duration": 42.0, + "bounce_rate": 40.0, + }, + ] + }, + raise_for_status=lambda: None, + ) + elif "breakdown" in url: + params = kwargs.get("params", {}) + if "event:page" in params.get("property", ""): + # Top pages + return MagicMock( + json=lambda: { + "results": [ + { + "page": "/docs", + "visitors": 60, + "visit_duration": 120, + }, + {"page": "/", "visitors": 40, "visit_duration": 30}, + ] + }, + raise_for_status=lambda: None, + ) + else: + # Top countries + return MagicMock( + json=lambda: { + "results": [ + {"country": "United States", "visitors": 70}, + {"country": "Canada", "visitors": 30}, + ] + }, + raise_for_status=lambda: None, + ) + + mock_client.get = mock_get + + with patch( + "app.services.insights.collectors.plausible.httpx.AsyncClient" + ) as mock_async_client: + mock_async_client.return_value.__aenter__.return_value = mock_client + + with patch( + "app.services.insights.collectors.plausible.settings" + ) as mock_settings: + mock_settings.INSIGHT_PLAUSIBLE_API_KEY = "apikey123" + mock_settings.INSIGHT_PLAUSIBLE_SITES = "docs.example.com" + + collector = PlausibleCollector(async_db_session) + result = await collector.collect(lookback_days=1) + + assert result.success is True + assert result.source_key == SourceKeys.PLAUSIBLE + assert result.rows_written > 0 + + # Verify metrics were written + metrics = await async_db_session.exec(select(InsightMetric)) + metric_list = metrics.all() + assert len(metric_list) > 0 + + @pytest.mark.asyncio + async def test_collect_missing_config(self, async_db_session: AsyncSession) -> None: + """Missing API key or sites returns error.""" + await _seed_plausible(async_db_session) + + with patch( + "app.services.insights.collectors.plausible.settings" + ) as mock_settings: + mock_settings.INSIGHT_PLAUSIBLE_API_KEY = None + mock_settings.INSIGHT_PLAUSIBLE_SITES = "docs.example.com" + + collector = PlausibleCollector(async_db_session) + result = await collector.collect() + + assert result.success is False + assert "Missing" in result.error + + @pytest.mark.asyncio + async def test_collect_api_error(self, async_db_session: AsyncSession) -> None: + """HTTP error from Plausible is handled gracefully.""" + import httpx + + await _seed_plausible(async_db_session) + + mock_response = MagicMock(status_code=401) + error = httpx.HTTPStatusError( + "401", request=MagicMock(), response=mock_response + ) + + mock_client = AsyncMock() + mock_client.get = AsyncMock(side_effect=error) + + with patch( + "app.services.insights.collectors.plausible.httpx.AsyncClient" + ) as mock_async_client: + mock_async_client.return_value.__aenter__.return_value = mock_client + + with patch( + "app.services.insights.collectors.plausible.settings" + ) as mock_settings: + mock_settings.INSIGHT_PLAUSIBLE_API_KEY = "apikey123" + mock_settings.INSIGHT_PLAUSIBLE_SITES = "docs.example.com" + + collector = PlausibleCollector(async_db_session) + result = await collector.collect() + + assert result.success is False + assert "Plausible API error" in result.error + + @pytest.mark.asyncio + async def test_deduplication(self, async_db_session: AsyncSession) -> None: + """Second collect doesn't duplicate existing daily rows.""" + source, metric_types = await _seed_plausible(async_db_session) + visitors_type = metric_types[MetricKeys.VISITORS] + + # Pre-populate a visitors row + existing_visitors = InsightMetric( + date=datetime(2026, 4, 11), + metric_type_id=visitors_type.id, # type: ignore[arg-type] + value=100.0, + period=Periods.DAILY, + metadata_={"site": "docs.example.com"}, + ) + async_db_session.add(existing_visitors) + await async_db_session.commit() + + mock_client = AsyncMock() + + async def mock_get(url: str, **kwargs): + if "timeseries" in url: + return MagicMock( + json=lambda: { + "results": [ + { + "date": "2026-04-11", + "visitors": 100, + "pageviews": 250, + "visit_duration": 45.5, + "bounce_rate": 35.0, + }, + ] + }, + raise_for_status=lambda: None, + ) + elif "breakdown" in url: + return MagicMock( + json=lambda: {"results": []}, + raise_for_status=lambda: None, + ) + + mock_client.get = mock_get + + with patch( + "app.services.insights.collectors.plausible.httpx.AsyncClient" + ) as mock_async_client: + mock_async_client.return_value.__aenter__.return_value = mock_client + + with patch( + "app.services.insights.collectors.plausible.settings" + ) as mock_settings: + mock_settings.INSIGHT_PLAUSIBLE_API_KEY = "apikey123" + mock_settings.INSIGHT_PLAUSIBLE_SITES = "docs.example.com" + + collector = PlausibleCollector(async_db_session) + result = await collector.collect(lookback_days=1) + + assert result.success is True + # The visitors row for 2026-04-11 should be skipped (already exists) + assert result.rows_skipped > 0 + + @pytest.mark.asyncio + async def test_lookback_days(self, async_db_session: AsyncSession) -> None: + """Lookback parameter affects query date range.""" + await _seed_plausible(async_db_session) + + captured_requests: list[dict] = [] + + mock_client = AsyncMock() + + async def mock_get(url: str, **kwargs): + captured_requests.append(kwargs) + if "timeseries" in url: + return MagicMock( + json=lambda: {"results": []}, + raise_for_status=lambda: None, + ) + else: + return MagicMock( + json=lambda: {"results": []}, + raise_for_status=lambda: None, + ) + + mock_client.get = mock_get + + with patch( + "app.services.insights.collectors.plausible.httpx.AsyncClient" + ) as mock_async_client: + mock_async_client.return_value.__aenter__.return_value = mock_client + + with patch( + "app.services.insights.collectors.plausible.settings" + ) as mock_settings: + mock_settings.INSIGHT_PLAUSIBLE_API_KEY = "apikey123" + mock_settings.INSIGHT_PLAUSIBLE_SITES = "docs.example.com" + + collector = PlausibleCollector(async_db_session) + await collector.collect(lookback_days=30) + + # Check that timeseries request has date range parameter + timeseries_requests = [ + r + for r in captured_requests + if r.get("params", {}).get("period") == "custom" + ] + assert len(timeseries_requests) > 0 + # The date parameter should have a range + date_param = timeseries_requests[0].get("params", {}).get("date", "") + assert "," in date_param, ( + f"Expected date range like 'YYYY-MM-DD,YYYY-MM-DD', got {date_param}" + ) diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_collector_pypi.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_collector_pypi.py new file mode 100644 index 00000000..671517eb --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_collector_pypi.py @@ -0,0 +1,290 @@ +""" +Tests for PyPICollector -- ClickHouse PyPI downloads collection. +""" + +from datetime import datetime +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from app.services.insights.collectors.pypi import PyPICollector +from app.services.insights.constants import MetricKeys, Periods, SourceKeys +from app.services.insights.models import InsightMetric, InsightMetricType, InsightSource +from sqlmodel import select +from sqlmodel.ext.asyncio.session import AsyncSession + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _seed_pypi( + session: AsyncSession, +) -> tuple[InsightSource, dict[str, InsightMetricType]]: + """Seed pypi source with all metric types.""" + source = InsightSource( + key=SourceKeys.PYPI, + display_name="PyPI Downloads", + collection_interval_hours=24, + enabled=True, + ) + session.add(source) + await session.flush() + + metric_types: dict[str, InsightMetricType] = {} + for key in [ + MetricKeys.DOWNLOADS_DAILY, + MetricKeys.DOWNLOADS_DAILY_HUMAN, + MetricKeys.DOWNLOADS_TOTAL, + MetricKeys.DOWNLOADS_BY_COUNTRY, + MetricKeys.DOWNLOADS_BY_INSTALLER, + MetricKeys.DOWNLOADS_BY_VERSION, + MetricKeys.DOWNLOADS_BY_TYPE, + ]: + mt = InsightMetricType( + source_id=source.id, # type: ignore[arg-type] + key=key, + display_name=key.replace("_", " ").title(), + unit="json" if "breakdown" in key or "by_" in key else "count", + ) + session.add(mt) + await session.flush() + metric_types[key] = mt + + return source, metric_types + + +# --------------------------------------------------------------------------- +# Tests: PyPICollector +# --------------------------------------------------------------------------- + + +class TestPyPICollectorSuccess: + """Test successful collection.""" + + @pytest.mark.asyncio + async def test_collect_success(self, async_db_session: AsyncSession) -> None: + """Happy path: collect PyPI downloads with dimensional breakdowns.""" + await _seed_pypi(async_db_session) + + mock_client = AsyncMock() + + async def mock_post(url: str, **kwargs): + """Mock ClickHouse API responses.""" + content = kwargs.get("content", "") + + if "date, version" in content and "sumIf" in content: + # Version breakdown with human/bot split + return MagicMock( + json=lambda: { + "data": [ + ["2026-04-11", "1.0.0", 400, 350], + ["2026-04-11", "0.9.0", 100, 50], + ["2026-04-10", "1.0.0", 350, 300], + ] + }, + raise_for_status=lambda: None, + ) + elif "date, type" in content: + # Type breakdown + return MagicMock( + json=lambda: { + "data": [ + ["2026-04-11", "bdist_wheel", 350], + ["2026-04-11", "sdist", 50], + ["2026-04-10", "bdist_wheel", 320], + ] + }, + raise_for_status=lambda: None, + ) + elif "date, installer" in content: + # Daily installer breakdown + return MagicMock( + json=lambda: { + "data": [ + ["2026-04-11", "pip", 500], + ["2026-04-11", "uv", 200], + ["2026-04-10", "pip", 450], + ] + }, + raise_for_status=lambda: None, + ) + elif "date, country_code" in content: + # Country breakdown + return MagicMock( + json=lambda: { + "data": [ + ["2026-04-11", "US", 300], + ["2026-04-11", "CN", 100], + ["2026-04-10", "US", 250], + ] + }, + raise_for_status=lambda: None, + ) + else: # Total sum(count) + return MagicMock( + json=lambda: {"data": [[1000000]]}, + raise_for_status=lambda: None, + ) + + mock_client.post = mock_post + + with patch( + "app.services.insights.collectors.pypi.httpx.AsyncClient" + ) as mock_async_client: + mock_async_client.return_value.__aenter__.return_value = mock_client + + with patch( + "app.services.insights.collectors.pypi.settings" + ) as mock_settings: + mock_settings.INSIGHT_PYPI_PACKAGE = "aegis-stack" + + collector = PyPICollector(async_db_session) + result = await collector.collect(lookback_days=14) + + assert result.success is True + assert result.source_key == SourceKeys.PYPI + assert result.rows_written > 0 + + # Verify metrics were written + metrics = await async_db_session.exec(select(InsightMetric)) + metric_list = metrics.all() + assert len(metric_list) > 0 + + @pytest.mark.asyncio + async def test_collect_missing_config(self, async_db_session: AsyncSession) -> None: + """Missing INSIGHT_PYPI_PACKAGE returns error.""" + await _seed_pypi(async_db_session) + + with patch("app.services.insights.collectors.pypi.settings") as mock_settings: + mock_settings.INSIGHT_PYPI_PACKAGE = None + + collector = PyPICollector(async_db_session) + result = await collector.collect() + + assert result.success is False + assert "Missing INSIGHT_PYPI_PACKAGE" in result.error + + @pytest.mark.asyncio + async def test_collect_api_error(self, async_db_session: AsyncSession) -> None: + """HTTP error from ClickHouse is handled gracefully.""" + await _seed_pypi(async_db_session) + + mock_client = AsyncMock() + mock_client.post = AsyncMock(side_effect=Exception("Connection timeout")) + + with patch( + "app.services.insights.collectors.pypi.httpx.AsyncClient" + ) as mock_async_client: + mock_async_client.return_value.__aenter__.return_value = mock_client + + with patch( + "app.services.insights.collectors.pypi.settings" + ) as mock_settings: + mock_settings.INSIGHT_PYPI_PACKAGE = "aegis-stack" + + collector = PyPICollector(async_db_session) + result = await collector.collect() + + assert result.success is False + assert "PyPI collection failed" in result.error + + @pytest.mark.asyncio + async def test_deduplication(self, async_db_session: AsyncSession) -> None: + """Second collect doesn't duplicate daily rows.""" + source, metric_types = await _seed_pypi(async_db_session) + daily_type = metric_types[MetricKeys.DOWNLOADS_DAILY] + + # Pre-populate a daily row + existing_daily = InsightMetric( + date=datetime(2026, 4, 11), + metric_type_id=daily_type.id, # type: ignore[arg-type] + value=700.0, + period=Periods.DAILY, + ) + async_db_session.add(existing_daily) + await async_db_session.commit() + + mock_client = AsyncMock() + + async def mock_post(url: str, **kwargs): + content = kwargs.get("content", "") + if "date, version" in content and "sumIf" in content: + return MagicMock( + json=lambda: { + "data": [ + ["2026-04-11", "1.0.0", 400, 350], + ] + }, + raise_for_status=lambda: None, + ) + elif "date, installer" in content: + return MagicMock( + json=lambda: { + "data": [ + ["2026-04-11", "pip", 500], + ["2026-04-11", "uv", 200], + ] + }, + raise_for_status=lambda: None, + ) + else: + return MagicMock( + json=lambda: {"data": []}, + raise_for_status=lambda: None, + ) + + mock_client.post = mock_post + + with patch( + "app.services.insights.collectors.pypi.httpx.AsyncClient" + ) as mock_async_client: + mock_async_client.return_value.__aenter__.return_value = mock_client + + with patch( + "app.services.insights.collectors.pypi.settings" + ) as mock_settings: + mock_settings.INSIGHT_PYPI_PACKAGE = "aegis-stack" + + collector = PyPICollector(async_db_session) + result = await collector.collect(lookback_days=14) + + assert result.success is True + # The 2026-04-11 daily row should be skipped (already exists) + assert result.rows_skipped > 0 + + @pytest.mark.asyncio + async def test_lookback_days(self, async_db_session: AsyncSession) -> None: + """Lookback parameter affects query date range.""" + await _seed_pypi(async_db_session) + + captured_queries: list[str] = [] + + mock_client = AsyncMock() + + async def mock_post(url: str, **kwargs): + content = kwargs.get("content", "") + captured_queries.append(content) + return MagicMock( + json=lambda: {"data": []}, + raise_for_status=lambda: None, + ) + + mock_client.post = mock_post + + with patch( + "app.services.insights.collectors.pypi.httpx.AsyncClient" + ) as mock_async_client: + mock_async_client.return_value.__aenter__.return_value = mock_client + + with patch( + "app.services.insights.collectors.pypi.settings" + ) as mock_settings: + mock_settings.INSIGHT_PYPI_PACKAGE = "aegis-stack" + + collector = PyPICollector(async_db_session) + await collector.collect(lookback_days=30) + + # Check that one of the queries contains "today() - 30" + assert any("today() - 30" in q for q in captured_queries), ( + f"Expected 'today() - 30' in queries, got: {captured_queries}" + ) diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_collector_reddit.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_collector_reddit.py new file mode 100644 index 00000000..b41d8401 --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_collector_reddit.py @@ -0,0 +1,284 @@ +""" +Tests for RedditCollector -- Reddit post tracking. +""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from app.services.insights.collectors.reddit import RedditCollector +from app.services.insights.constants import MetricKeys, SourceKeys +from app.services.insights.models import ( + InsightEvent, + InsightMetric, + InsightMetricType, + InsightSource, +) +from sqlmodel import select +from sqlmodel.ext.asyncio.session import AsyncSession + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _seed_reddit( + session: AsyncSession, +) -> tuple[InsightSource, dict[str, InsightMetricType]]: + """Seed reddit source with metric types.""" + source = InsightSource( + key=SourceKeys.REDDIT, + display_name="Reddit", + collection_interval_hours=None, # On-demand only + enabled=True, + ) + session.add(source) + await session.flush() + + metric_types: dict[str, InsightMetricType] = {} + mt = InsightMetricType( + source_id=source.id, # type: ignore[arg-type] + key=MetricKeys.POST_STATS, + display_name="Post Stats", + unit="json", + ) + session.add(mt) + await session.flush() + metric_types[MetricKeys.POST_STATS] = mt + + return source, metric_types + + +# --------------------------------------------------------------------------- +# Tests: RedditCollector +# --------------------------------------------------------------------------- + + +class TestRedditCollectorAddPost: + """Test add_post() method.""" + + @pytest.mark.asyncio + async def test_add_post_success(self, async_db_session: AsyncSession) -> None: + """Happy path: add a Reddit post with stats.""" + await _seed_reddit(async_db_session) + + mock_client = AsyncMock() + + async def mock_get(url: str, **kwargs): + """Mock Reddit JSON API response.""" + # Reddit returns array of listings + return MagicMock( + json=lambda: [ + { + "data": { + "children": [ + { + "data": { + "id": "abc123", + "subreddit": "Python", + "title": "Announcing Aegis Stack", + "ups": 150, + "num_comments": 25, + "view_count": 5000, + "upvote_ratio": 0.95, + "created_utc": 1712900000, + "url": "https://reddit.com/r/Python/comments/abc123/...", + } + } + ] + } + } + ], + raise_for_status=lambda: None, + ) + + mock_client.get = mock_get + + with patch( + "app.services.insights.collectors.reddit.httpx.AsyncClient" + ) as mock_async_client: + mock_async_client.return_value.__aenter__.return_value = mock_client + + collector = RedditCollector(async_db_session) + result = await collector.add_post( + "https://reddit.com/r/Python/comments/abc123/announcing_aegis_stack" + ) + + assert result.success is True + assert result.source_key == SourceKeys.REDDIT + assert result.rows_written == 1 + + # Verify metric was written + metrics = await async_db_session.exec(select(InsightMetric)) + metric_list = metrics.all() + assert len(metric_list) == 1 + assert metric_list[0].value == 150.0 # upvotes + assert metric_list[0].metadata_.get("post_id") == "abc123" + + # Verify event was created + events = await async_db_session.exec(select(InsightEvent)) + event_list = events.all() + assert len(event_list) == 1 + assert "Python" in event_list[0].description + assert event_list[0].metadata_.get("post_id") == "abc123" + + @pytest.mark.asyncio + async def test_add_post_api_error(self, async_db_session: AsyncSession) -> None: + """HTTP error from Reddit is handled gracefully.""" + import httpx + + await _seed_reddit(async_db_session) + + mock_response = MagicMock(status_code=404) + error = httpx.HTTPStatusError( + "404", request=MagicMock(), response=mock_response + ) + + mock_client = AsyncMock() + mock_client.get = AsyncMock(side_effect=error) + + with patch( + "app.services.insights.collectors.reddit.httpx.AsyncClient" + ) as mock_async_client: + mock_async_client.return_value.__aenter__.return_value = mock_client + + collector = RedditCollector(async_db_session) + result = await collector.add_post( + "https://reddit.com/r/Python/comments/notfound/..." + ) + + assert result.success is False + assert "Reddit API error" in result.error + + @pytest.mark.asyncio + async def test_add_post_parse_error(self, async_db_session: AsyncSession) -> None: + """Malformed Reddit response is handled gracefully.""" + await _seed_reddit(async_db_session) + + mock_client = AsyncMock() + + async def mock_get(url: str, **kwargs): + # Missing nested structure + return MagicMock( + json=lambda: [{"data": {}}], # Missing children + raise_for_status=lambda: None, + ) + + mock_client.get = mock_get + + with patch( + "app.services.insights.collectors.reddit.httpx.AsyncClient" + ) as mock_async_client: + mock_async_client.return_value.__aenter__.return_value = mock_client + + collector = RedditCollector(async_db_session) + result = await collector.add_post( + "https://reddit.com/r/Python/comments/abc123/..." + ) + + assert result.success is False + assert "Failed to parse Reddit response" in result.error + + @pytest.mark.asyncio + async def test_add_post_deduplication(self, async_db_session: AsyncSession) -> None: + """Second add_post for same post doesn't duplicate event.""" + await _seed_reddit(async_db_session) + + # Pre-add the post + mock_client = AsyncMock() + + async def mock_get(url: str, **kwargs): + return MagicMock( + json=lambda: [ + { + "data": { + "children": [ + { + "data": { + "id": "abc123", + "subreddit": "Python", + "title": "Announcing Aegis Stack", + "ups": 150, + "num_comments": 25, + "view_count": 5000, + "upvote_ratio": 0.95, + "created_utc": 1712900000, + "url": "https://reddit.com/r/Python/comments/abc123/...", + } + } + ] + } + } + ], + raise_for_status=lambda: None, + ) + + mock_client.get = mock_get + + with patch( + "app.services.insights.collectors.reddit.httpx.AsyncClient" + ) as mock_async_client: + mock_async_client.return_value.__aenter__.return_value = mock_client + + collector = RedditCollector(async_db_session) + # First add + result1 = await collector.add_post( + "https://reddit.com/r/Python/comments/abc123/announcing_aegis_stack" + ) + assert result1.success is True + + # Second add with updated stats + async def mock_get_updated(url: str, **kwargs): + return MagicMock( + json=lambda: [ + { + "data": { + "children": [ + { + "data": { + "id": "abc123", + "subreddit": "Python", + "title": "Announcing Aegis Stack", + "ups": 200, # Updated + "num_comments": 35, + "view_count": 7000, + "upvote_ratio": 0.96, + "created_utc": 1712900000, + "url": "https://reddit.com/r/Python/comments/abc123/...", + } + } + ] + } + } + ], + raise_for_status=lambda: None, + ) + + mock_client.get = mock_get_updated + result2 = await collector.add_post( + "https://reddit.com/r/Python/comments/abc123/announcing_aegis_stack" + ) + assert result2.success is True + + # Verify only 1 event exists (deduped), but 2 metrics + # (upsert_metric always creates events with period=EVENT) + events = await async_db_session.exec( + select(InsightEvent).where(InsightEvent.event_type == "reddit_post") + ) + event_list = events.all() + # Should have exactly 1 event due to deduplication in add_post + assert len(event_list) == 1 + assert event_list[0].metadata_.get("post_id") == "abc123" + + @pytest.mark.asyncio + async def test_collect_returns_on_demand_message( + self, async_db_session: AsyncSession + ) -> None: + """collect() returns on-demand message.""" + await _seed_reddit(async_db_session) + + collector = RedditCollector(async_db_session) + result = await collector.collect() + + assert result.success is True + assert "on-demand" in result.error + assert result.source_key == SourceKeys.REDDIT diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_collector_service.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_collector_service.py new file mode 100644 index 00000000..5feca138 --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_collector_service.py @@ -0,0 +1,279 @@ +""" +Tests for CollectorService orchestration layer. +""" + +from datetime import datetime +from unittest.mock import AsyncMock, patch + +import pytest +from app.services.insights.collector_service import CollectorService +from app.services.insights.collectors.base import CollectionResult +from app.services.insights.constants import MetricKeys, Periods, SourceKeys +from app.services.insights.models import ( + InsightMetric, + InsightMetricType, + InsightSource, +) +from sqlmodel.ext.asyncio.session import AsyncSession + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _seed_source( + session: AsyncSession, + key: str = SourceKeys.GITHUB_TRAFFIC, + enabled: bool = True, +) -> InsightSource: + source = InsightSource( + key=key, + display_name="Test Source", + collection_interval_hours=6, + enabled=enabled, + ) + session.add(source) + await session.flush() + return source + + +async def _seed_metric_type( + session: AsyncSession, + source: InsightSource, + key: str = MetricKeys.CLONES, +) -> InsightMetricType: + mt = InsightMetricType( + source_id=source.id, # type: ignore[arg-type] + key=key, + display_name=key.replace("_", " ").title(), + unit="count", + ) + session.add(mt) + await session.flush() + return mt + + +# --------------------------------------------------------------------------- +# Tests: collect_source +# --------------------------------------------------------------------------- + + +class TestCollectSource: + @pytest.mark.asyncio + async def test_collect_unknown_source(self, async_db_session: AsyncSession) -> None: + """Unknown source key returns error CollectionResult.""" + service = CollectorService(async_db_session) + result = await service.collect_source("nonexistent_source") + + assert result.success is False + assert "No collector registered" in (result.error or "") + + @pytest.mark.asyncio + async def test_collect_source_not_in_db( + self, async_db_session: AsyncSession + ) -> None: + """Source key registered but not seeded in DB returns error.""" + service = CollectorService(async_db_session) + result = await service.collect_source(SourceKeys.GITHUB_TRAFFIC) + + assert result.success is False + assert "not found in database" in (result.error or "") + + @pytest.mark.asyncio + async def test_collect_disabled_source( + self, async_db_session: AsyncSession + ) -> None: + """Disabled source returns error.""" + await _seed_source(async_db_session, SourceKeys.GITHUB_TRAFFIC, enabled=False) + + service = CollectorService(async_db_session) + result = await service.collect_source(SourceKeys.GITHUB_TRAFFIC) + + assert result.success is False + assert "disabled" in (result.error or "") + + @pytest.mark.asyncio + @patch("app.services.insights.collectors.github_traffic.settings") + async def test_collect_updates_last_collected_at( + self, mock_settings: AsyncMock, async_db_session: AsyncSession + ) -> None: + """Successful collection updates source.last_collected_at.""" + mock_settings.INSIGHT_GITHUB_TOKEN = "" + mock_settings.INSIGHT_GITHUB_OWNER = "" + mock_settings.INSIGHT_GITHUB_REPO = "" + + source = await _seed_source(async_db_session, SourceKeys.GITHUB_TRAFFIC) + assert source.last_collected_at is None + + # The collector will fail due to missing config, but that's a success=False path + # so last_collected_at won't be updated. We need a success path. + # Patch the collector's collect method directly for a clean test. + with patch( + "app.services.insights.collector_service.COLLECTOR_REGISTRY", + {SourceKeys.GITHUB_TRAFFIC: _make_mock_collector_cls(success=True)}, + ): + service = CollectorService(async_db_session) + result = await service.collect_source(SourceKeys.GITHUB_TRAFFIC) + + assert result.success is True + + # Refresh the source from DB + await async_db_session.refresh(source) + assert source.last_collected_at is not None + + +# --------------------------------------------------------------------------- +# Tests: collect_all +# --------------------------------------------------------------------------- + + +class TestCollectAll: + @pytest.mark.asyncio + async def test_collect_all_runs_enabled_sources( + self, async_db_session: AsyncSession + ) -> None: + """collect_all runs collectors for all enabled sources.""" + await _seed_source(async_db_session, SourceKeys.GITHUB_TRAFFIC, enabled=True) + await _seed_source(async_db_session, SourceKeys.PYPI, enabled=True) + await _seed_source(async_db_session, SourceKeys.REDDIT, enabled=False) + + mock_cls = _make_mock_collector_cls(success=True) + registry = { + SourceKeys.GITHUB_TRAFFIC: mock_cls, + SourceKeys.PYPI: mock_cls, + SourceKeys.REDDIT: mock_cls, + } + + with patch( + "app.services.insights.collector_service.COLLECTOR_REGISTRY", registry + ): + service = CollectorService(async_db_session) + results = await service.collect_all() + + # Only enabled sources should have results + assert SourceKeys.GITHUB_TRAFFIC in results + assert SourceKeys.PYPI in results + assert SourceKeys.REDDIT not in results + + @pytest.mark.asyncio + async def test_collect_all_empty_db(self, async_db_session: AsyncSession) -> None: + """collect_all with no sources returns empty dict.""" + service = CollectorService(async_db_session) + results = await service.collect_all() + assert results == {} + + +# --------------------------------------------------------------------------- +# Tests: _check_records +# --------------------------------------------------------------------------- + + +class TestCheckRecords: + @pytest.mark.asyncio + async def test_detects_new_record(self, async_db_session: AsyncSession) -> None: + """_check_records creates a milestone event when ATH is detected.""" + source = await _seed_source(async_db_session) + mt = await _seed_metric_type(async_db_session, source, MetricKeys.CLONES) + + # Seed a daily metric that should trigger a record + metric = InsightMetric( + date=datetime(2026, 4, 10), + metric_type_id=mt.id, # type: ignore[arg-type] + value=999.0, + period=Periods.DAILY, + ) + async_db_session.add(metric) + await async_db_session.flush() + + service = CollectorService(async_db_session) + broken = await service._check_records(SourceKeys.GITHUB_TRAFFIC) + + assert len(broken) > 0 + assert "999" in broken[0] + + @pytest.mark.asyncio + async def test_no_record_when_lower(self, async_db_session: AsyncSession) -> None: + """_check_records does not create event when value <= existing record.""" + from app.services.insights.models import InsightEvent + + source = await _seed_source(async_db_session) + mt = await _seed_metric_type(async_db_session, source, MetricKeys.CLONES) + + # Seed an existing milestone + event = InsightEvent( + date=datetime(2026, 4, 1), + event_type="milestone_github", + description="1,000 (GitHub 1-Day Clones)", + metadata_={"category": "daily_clones"}, + ) + async_db_session.add(event) + await async_db_session.flush() + + # Seed a daily metric lower than existing record + metric = InsightMetric( + date=datetime(2026, 4, 10), + metric_type_id=mt.id, # type: ignore[arg-type] + value=500.0, + period=Periods.DAILY, + ) + async_db_session.add(metric) + await async_db_session.flush() + + service = CollectorService(async_db_session) + broken = await service._check_records(SourceKeys.GITHUB_TRAFFIC) + + # Should not detect record for clones (500 < 1000) + clone_records = [b for b in broken if "1-Day Clones" in b] + assert len(clone_records) == 0 + + @pytest.mark.asyncio + async def test_no_records_for_untracked_source( + self, async_db_session: AsyncSession + ) -> None: + """Sources without record checks return empty list.""" + service = CollectorService(async_db_session) + broken = await service._check_records(SourceKeys.REDDIT) + assert broken == [] + + +# --------------------------------------------------------------------------- +# Tests: get_registered_sources +# --------------------------------------------------------------------------- + + +class TestGetRegisteredSources: + def test_returns_all_registered(self) -> None: + service = CollectorService(AsyncMock()) + sources = service.get_registered_sources() + + assert SourceKeys.GITHUB_TRAFFIC in sources + assert SourceKeys.PYPI in sources + assert SourceKeys.PLAUSIBLE in sources + assert SourceKeys.REDDIT in sources + assert len(sources) == 6 + + +# --------------------------------------------------------------------------- +# Mock helpers +# --------------------------------------------------------------------------- + + +def _make_mock_collector_cls(success: bool = True) -> type: + """Create a mock collector class that returns a fixed result.""" + + class MockCollector: + def __init__(self, db: AsyncSession) -> None: + self.db = db + + @property + def source_key(self) -> str: + return "mock" + + async def collect(self, **kwargs: object) -> CollectionResult: + return CollectionResult( + source_key=self.source_key, + success=success, + rows_written=5 if success else 0, + ) + + return MockCollector diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_insight_service.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_insight_service.py new file mode 100644 index 00000000..f51aece3 --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_insight_service.py @@ -0,0 +1,330 @@ +""" +Tests for InsightService query layer and record detection. +""" + +from datetime import datetime, timedelta + +import pytest +from app.services.insights.constants import MetricKeys, Periods, SourceKeys +from app.services.insights.insight_service import InsightService +from app.services.insights.models import ( + InsightMetric, + InsightMetricType, + InsightSource, +) +from sqlmodel.ext.asyncio.session import AsyncSession + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _seed_source( + session: AsyncSession, key: str = SourceKeys.GITHUB_TRAFFIC +) -> InsightSource: + """Create a source row for testing.""" + source = InsightSource( + key=key, display_name="Test Source", collection_interval_hours=6, enabled=True + ) + session.add(source) + await session.flush() + return source + + +async def _seed_metric_type( + session: AsyncSession, + source: InsightSource, + key: str = MetricKeys.CLONES, + unit: str = "count", +) -> InsightMetricType: + """Create a metric type row for testing.""" + mt = InsightMetricType( + source_id=source.id, # type: ignore[arg-type] + key=key, + display_name=key.replace("_", " ").title(), + unit=unit, + ) + session.add(mt) + await session.flush() + return mt + + +async def _seed_metric( + session: AsyncSession, + metric_type: InsightMetricType, + date: datetime, + value: float, + period: str = Periods.DAILY, +) -> InsightMetric: + """Create a metric row for testing.""" + metric = InsightMetric( + date=date, + metric_type_id=metric_type.id, # type: ignore[arg-type] + value=value, + period=period, + ) + session.add(metric) + await session.flush() + return metric + + +# --------------------------------------------------------------------------- +# Tests: Record Detection +# --------------------------------------------------------------------------- + + +class TestRecordDetection: + """Test InsightService.check_and_update_records.""" + + @pytest.mark.asyncio + async def test_creates_first_record(self, async_db_session: AsyncSession) -> None: + """First value for a metric creates a new record.""" + source = await _seed_source(async_db_session) + mt = await _seed_metric_type(async_db_session, source) + service = InsightService(async_db_session) + + broken = await service.check_and_update_records( + metric_type_id=mt.id, # type: ignore[arg-type] + value=100.0, + achieved_date=datetime(2026, 3, 20), + ) + + assert broken is True + + records = await service.get_records() + assert len(records) == 1 + assert records[0].value == 100.0 + assert records[0].previous_value is None + + @pytest.mark.asyncio + async def test_updates_when_exceeded(self, async_db_session: AsyncSession) -> None: + """Higher value shifts current to previous and sets new record.""" + source = await _seed_source(async_db_session) + mt = await _seed_metric_type(async_db_session, source) + service = InsightService(async_db_session) + + await service.check_and_update_records( + metric_type_id=mt.id, # type: ignore[arg-type] + value=100.0, + achieved_date=datetime(2026, 3, 20), + ) + + broken = await service.check_and_update_records( + metric_type_id=mt.id, # type: ignore[arg-type] + value=200.0, + achieved_date=datetime(2026, 3, 21), + ) + + assert broken is True + + records = await service.get_records() + assert len(records) == 1 + assert records[0].value == 200.0 + assert records[0].previous_value == 100.0 + assert records[0].previous_date == datetime(2026, 3, 20) + + @pytest.mark.asyncio + async def test_no_update_when_lower(self, async_db_session: AsyncSession) -> None: + """Lower value does not update the record.""" + source = await _seed_source(async_db_session) + mt = await _seed_metric_type(async_db_session, source) + service = InsightService(async_db_session) + + await service.check_and_update_records( + metric_type_id=mt.id, # type: ignore[arg-type] + value=100.0, + achieved_date=datetime(2026, 3, 20), + ) + + broken = await service.check_and_update_records( + metric_type_id=mt.id, # type: ignore[arg-type] + value=50.0, + achieved_date=datetime(2026, 3, 21), + ) + + assert broken is False + + records = await service.get_records() + assert records[0].value == 100.0 + + @pytest.mark.asyncio + async def test_equal_value_no_update(self, async_db_session: AsyncSession) -> None: + """Equal value does not break the record.""" + source = await _seed_source(async_db_session) + mt = await _seed_metric_type(async_db_session, source) + service = InsightService(async_db_session) + + await service.check_and_update_records( + metric_type_id=mt.id, # type: ignore[arg-type] + value=100.0, + achieved_date=datetime(2026, 3, 20), + ) + + broken = await service.check_and_update_records( + metric_type_id=mt.id, # type: ignore[arg-type] + value=100.0, + achieved_date=datetime(2026, 3, 21), + ) + + assert broken is False + + +# --------------------------------------------------------------------------- +# Tests: Rolling 14-Day Window +# --------------------------------------------------------------------------- + + +class TestRolling14d: + """Test InsightService.get_rolling_14d.""" + + @pytest.mark.asyncio + async def test_sums_last_14_days(self, async_db_session: AsyncSession) -> None: + """Sums daily values within the 14-day window.""" + source = await _seed_source(async_db_session) + mt = await _seed_metric_type(async_db_session, source) + + now = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) + for i in range(7): + await _seed_metric(async_db_session, mt, now - timedelta(days=i), 10.0) + + service = InsightService(async_db_session) + total = await service.get_rolling_14d(mt.id) # type: ignore[arg-type] + + assert total == 70.0 + + @pytest.mark.asyncio + async def test_excludes_old_data(self, async_db_session: AsyncSession) -> None: + """Data older than 14 days is not included.""" + source = await _seed_source(async_db_session) + mt = await _seed_metric_type(async_db_session, source) + + now = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) + # One row within window, one outside + await _seed_metric(async_db_session, mt, now - timedelta(days=1), 50.0) + await _seed_metric(async_db_session, mt, now - timedelta(days=20), 999.0) + + service = InsightService(async_db_session) + total = await service.get_rolling_14d(mt.id) # type: ignore[arg-type] + + assert total == 50.0 + + @pytest.mark.asyncio + async def test_empty_returns_zero(self, async_db_session: AsyncSession) -> None: + """No data returns 0.0.""" + source = await _seed_source(async_db_session) + mt = await _seed_metric_type(async_db_session, source) + + service = InsightService(async_db_session) + total = await service.get_rolling_14d(mt.id) # type: ignore[arg-type] + + assert total == 0.0 + + +# --------------------------------------------------------------------------- +# Tests: Events +# --------------------------------------------------------------------------- + + +class TestEvents: + """Test InsightService event management.""" + + @pytest.mark.asyncio + async def test_add_event(self, async_db_session: AsyncSession) -> None: + """Can create a contextual event.""" + service = InsightService(async_db_session) + + event = await service.add_event( + event_type="release", + description="Shipped v0.6.8 Auth/RBAC", + metadata={"version": "0.6.8"}, + ) + + assert event.id is not None + assert event.event_type == "release" + assert event.description == "Shipped v0.6.8 Auth/RBAC" + + @pytest.mark.asyncio + async def test_get_events(self, async_db_session: AsyncSession) -> None: + """Can retrieve events.""" + service = InsightService(async_db_session) + + await service.add_event("release", "v1") + await service.add_event("reddit_post", "NWN post") + + events = await service.get_events() + assert len(events) == 2 + + +# --------------------------------------------------------------------------- +# Tests: Sources and Metric Types +# --------------------------------------------------------------------------- + + +class TestSourcesAndTypes: + """Test source and metric type queries.""" + + @pytest.mark.asyncio + async def test_get_sources(self, async_db_session: AsyncSession) -> None: + """Returns all sources.""" + await _seed_source(async_db_session, SourceKeys.GITHUB_TRAFFIC) + await _seed_source(async_db_session, SourceKeys.PYPI) + + service = InsightService(async_db_session) + sources = await service.get_sources() + + assert len(sources) == 2 + + @pytest.mark.asyncio + async def test_get_metric_types_filtered( + self, async_db_session: AsyncSession + ) -> None: + """Returns metric types filtered by source.""" + source = await _seed_source(async_db_session) + await _seed_metric_type(async_db_session, source, MetricKeys.CLONES) + await _seed_metric_type(async_db_session, source, MetricKeys.VIEWS) + + other_source = await _seed_source(async_db_session, SourceKeys.PYPI) + await _seed_metric_type( + async_db_session, other_source, MetricKeys.DOWNLOADS_TOTAL + ) + + service = InsightService(async_db_session) + types = await service.get_metric_types(source.id) + + assert len(types) == 2 + keys = {t.key for t in types} + assert MetricKeys.CLONES in keys + assert MetricKeys.VIEWS in keys + + +# --------------------------------------------------------------------------- +# Tests: Status Summary +# --------------------------------------------------------------------------- + + +class TestStatusSummary: + """Test InsightService.get_status_summary.""" + + @pytest.mark.asyncio + async def test_summary_with_data(self, async_db_session: AsyncSession) -> None: + """Summary includes sources, records, and total metrics.""" + source = await _seed_source(async_db_session) + mt = await _seed_metric_type(async_db_session, source) + await _seed_metric(async_db_session, mt, datetime(2026, 3, 31), 345.0) + + service = InsightService(async_db_session) + summary = await service.get_status_summary() + + assert summary["total_metrics"] == 1 + assert len(summary["sources"]) == 1 + assert summary["sources"][0]["key"] == SourceKeys.GITHUB_TRAFFIC + + @pytest.mark.asyncio + async def test_summary_empty_db(self, async_db_session: AsyncSession) -> None: + """Summary works with no data.""" + service = InsightService(async_db_session) + summary = await service.get_status_summary() + + assert summary["total_metrics"] == 0 + assert summary["sources"] == [] + assert summary["records"] == [] diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_insights_collectors.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_insights_collectors.py new file mode 100644 index 00000000..c2f7649b --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_insights_collectors.py @@ -0,0 +1,329 @@ +""" +Tests for insight collectors — BaseCollector helpers and GitHubTrafficCollector. +""" + +from datetime import datetime + +import pytest +from app.services.insights.collectors.base import CollectionResult +from app.services.insights.constants import MetricKeys, Periods, SourceKeys +from app.services.insights.models import ( + InsightMetric, + InsightMetricType, + InsightSource, +) +from app.services.insights.schemas import ReferrerEntry +from sqlmodel import select +from sqlmodel.ext.asyncio.session import AsyncSession + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _seed_github_traffic( + session: AsyncSession, +) -> tuple[InsightSource, dict[str, InsightMetricType]]: + """Seed github_traffic source with all metric types.""" + source = InsightSource( + key=SourceKeys.GITHUB_TRAFFIC, + display_name="GitHub Traffic", + collection_interval_hours=6, + enabled=True, + ) + session.add(source) + await session.flush() + + metric_types: dict[str, InsightMetricType] = {} + for key in [ + MetricKeys.CLONES, + MetricKeys.UNIQUE_CLONERS, + MetricKeys.VIEWS, + MetricKeys.UNIQUE_VISITORS, + MetricKeys.REFERRERS, + MetricKeys.POPULAR_PATHS, + ]: + mt = InsightMetricType( + source_id=source.id, # type: ignore[arg-type] + key=key, + display_name=key.replace("_", " ").title(), + unit="count" + if key not in (MetricKeys.REFERRERS, MetricKeys.POPULAR_PATHS) + else "json", + ) + session.add(mt) + await session.flush() + metric_types[key] = mt + + return source, metric_types + + +# --------------------------------------------------------------------------- +# Tests: CollectionResult (Pydantic validation) +# --------------------------------------------------------------------------- + + +class TestCollectionResult: + """Test CollectionResult Pydantic model.""" + + def test_default_values(self) -> None: + """Default values are correct.""" + result = CollectionResult(source_key="test", success=True) + assert result.rows_written == 0 + assert result.rows_skipped == 0 + assert result.records_broken == [] + assert result.error is None + + def test_validation_rejects_negative_counts(self) -> None: + """Negative row counts are rejected.""" + from pydantic import ValidationError + + with pytest.raises(ValidationError): + CollectionResult(source_key="test", success=True, rows_written=-1) + + def test_serialization(self) -> None: + """Can serialize to dict for API responses.""" + result = CollectionResult( + source_key="github_traffic", + success=True, + rows_written=10, + rows_skipped=4, + ) + data = result.model_dump() + assert data["source_key"] == "github_traffic" + assert data["rows_written"] == 10 + + +# --------------------------------------------------------------------------- +# Tests: BaseCollector.upsert_metric +# --------------------------------------------------------------------------- + + +class TestUpsertMetric: + """Test BaseCollector.upsert_metric deduplication logic.""" + + @pytest.mark.asyncio + async def test_creates_new_row(self, async_db_session: AsyncSession) -> None: + """First upsert creates a new metric row.""" + _, metric_types = await _seed_github_traffic(async_db_session) + mt = metric_types[MetricKeys.CLONES] + + # Need a concrete collector to test base methods + from app.services.insights.collectors.github_traffic import ( + GitHubTrafficCollector, + ) + + collector = GitHubTrafficCollector(async_db_session) + metric, created = await collector.upsert_metric( + metric_type=mt, + date=datetime(2026, 3, 31), + value=345.0, + period=Periods.DAILY, + ) + + assert created is True + assert metric.value == 345.0 + + @pytest.mark.asyncio + async def test_updates_existing_row(self, async_db_session: AsyncSession) -> None: + """Second upsert for same (type, date, period) updates instead of creating.""" + _, metric_types = await _seed_github_traffic(async_db_session) + mt = metric_types[MetricKeys.CLONES] + + from app.services.insights.collectors.github_traffic import ( + GitHubTrafficCollector, + ) + + collector = GitHubTrafficCollector(async_db_session) + + # First insert + _, created1 = await collector.upsert_metric( + metric_type=mt, + date=datetime(2026, 3, 31), + value=345.0, + period=Periods.DAILY, + ) + assert created1 is True + + # Second insert — same type + date + period + metric, created2 = await collector.upsert_metric( + metric_type=mt, + date=datetime(2026, 3, 31), + value=400.0, + period=Periods.DAILY, + ) + assert created2 is False + assert metric.value == 400.0 + + # Verify only one row exists + result = await async_db_session.exec( + select(InsightMetric).where(InsightMetric.metric_type_id == mt.id) + ) + assert len(result.all()) == 1 + + @pytest.mark.asyncio + async def test_event_period_always_creates( + self, async_db_session: AsyncSession + ) -> None: + """Event period rows are always new (no deduplication).""" + _, metric_types = await _seed_github_traffic(async_db_session) + mt = metric_types[MetricKeys.CLONES] + + from app.services.insights.collectors.github_traffic import ( + GitHubTrafficCollector, + ) + + collector = GitHubTrafficCollector(async_db_session) + + _, created1 = await collector.upsert_metric( + metric_type=mt, + date=datetime(2026, 3, 31), + value=99.0, + period=Periods.EVENT, + metadata={"username": "star1"}, + ) + _, created2 = await collector.upsert_metric( + metric_type=mt, + date=datetime(2026, 3, 31), + value=100.0, + period=Periods.EVENT, + metadata={"username": "star2"}, + ) + + assert created1 is True + assert created2 is True + + result = await async_db_session.exec( + select(InsightMetric).where( + InsightMetric.metric_type_id == mt.id, + InsightMetric.period == Periods.EVENT, + ) + ) + assert len(result.all()) == 2 + + +# --------------------------------------------------------------------------- +# Tests: BaseCollector helpers +# --------------------------------------------------------------------------- + + +class TestBaseCollectorHelpers: + """Test get_source and get_metric_type.""" + + @pytest.mark.asyncio + async def test_get_source(self, async_db_session: AsyncSession) -> None: + """get_source returns the correct source row.""" + await _seed_github_traffic(async_db_session) + + from app.services.insights.collectors.github_traffic import ( + GitHubTrafficCollector, + ) + + collector = GitHubTrafficCollector(async_db_session) + source = await collector.get_source() + + assert source.key == SourceKeys.GITHUB_TRAFFIC + + @pytest.mark.asyncio + async def test_get_source_missing_raises( + self, async_db_session: AsyncSession + ) -> None: + """get_source raises RuntimeError when source not seeded.""" + from app.services.insights.collectors.github_traffic import ( + GitHubTrafficCollector, + ) + + collector = GitHubTrafficCollector(async_db_session) + + with pytest.raises(RuntimeError, match="not found"): + await collector.get_source() + + @pytest.mark.asyncio + async def test_get_metric_type(self, async_db_session: AsyncSession) -> None: + """get_metric_type returns the correct type row.""" + await _seed_github_traffic(async_db_session) + + from app.services.insights.collectors.github_traffic import ( + GitHubTrafficCollector, + ) + + collector = GitHubTrafficCollector(async_db_session) + mt = await collector.get_metric_type(MetricKeys.CLONES) + + assert mt.key == MetricKeys.CLONES + assert mt.unit == "count" + + @pytest.mark.asyncio + async def test_get_metric_type_missing_raises( + self, async_db_session: AsyncSession + ) -> None: + """get_metric_type raises RuntimeError for unknown key.""" + await _seed_github_traffic(async_db_session) + + from app.services.insights.collectors.github_traffic import ( + GitHubTrafficCollector, + ) + + collector = GitHubTrafficCollector(async_db_session) + + with pytest.raises(RuntimeError, match="not found"): + await collector.get_metric_type("nonexistent_metric") + + +# --------------------------------------------------------------------------- +# Tests: Pydantic Schemas +# --------------------------------------------------------------------------- + + +class TestSchemas: + """Test that Pydantic metadata schemas validate correctly.""" + + def test_referrer_entry(self) -> None: + """ReferrerEntry validates and serializes.""" + entry = ReferrerEntry(views=54, uniques=9) + assert entry.views == 54 + data = entry.model_dump() + assert data == {"views": 54, "uniques": 9} + + def test_referrer_entry_rejects_negative(self) -> None: + """ReferrerEntry rejects negative values.""" + from pydantic import ValidationError + + with pytest.raises(ValidationError): + ReferrerEntry(views=-1, uniques=0) + + def test_star_profile_metadata(self) -> None: + """StarProfileMetadata handles full and partial profiles.""" + from app.services.insights.schemas import StarProfileMetadata + + # Full profile + full = StarProfileMetadata( + username="ncthuc", + name="Thuc Nguyen", + location="Hanoi, Vietnam", + company="teko.vn", + followers=6, + stars_given=50, + account_age_years=15.0, + ) + assert full.username == "ncthuc" + + # Minimal profile + minimal = StarProfileMetadata(username="anonymous") + assert minimal.followers == 0 + assert minimal.location is None + + def test_reddit_post_metadata(self) -> None: + """RedditPostMetadata validates post stats.""" + from app.services.insights.schemas import RedditPostMetadata + + post = RedditPostMetadata( + post_id="nwn_vault_revival", + subreddit="neverwinternights", + comments=20, + views=17000, + upvote_ratio=0.996, + ) + assert post.post_id == "nwn_vault_revival" + data = post.model_dump() + assert data["views"] == 17000 diff --git a/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_query_service.py b/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_query_service.py new file mode 100644 index 00000000..9abf4f14 --- /dev/null +++ b/aegis/templates/copier-aegis-project/{{ project_slug }}/tests/services/test_query_service.py @@ -0,0 +1,526 @@ +""" +Tests for InsightQueryService sync query layer. +""" + +from datetime import datetime, timedelta + +from app.services.insights.constants import MetricKeys, Periods, SourceKeys +from app.services.insights.models import ( + InsightEvent, + InsightMetric, + InsightMetricType, + InsightSource, +) +from app.services.insights.query_service import InsightQueryService +from sqlmodel import Session + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _seed_source( + session: Session, key: str = SourceKeys.GITHUB_TRAFFIC +) -> InsightSource: + source = InsightSource( + key=key, display_name="Test Source", collection_interval_hours=6, enabled=True + ) + session.add(source) + session.flush() + return source + + +def _seed_metric_type( + session: Session, + source: InsightSource, + key: str = MetricKeys.CLONES, + unit: str = "count", +) -> InsightMetricType: + mt = InsightMetricType( + source_id=source.id, # type: ignore[arg-type] + key=key, + display_name=key.replace("_", " ").title(), + unit=unit, + ) + session.add(mt) + session.flush() + return mt + + +def _seed_metric( + session: Session, + metric_type: InsightMetricType, + date: datetime, + value: float, + period: str = Periods.DAILY, + metadata: dict | None = None, +) -> InsightMetric: + metric = InsightMetric( + date=date, + metric_type_id=metric_type.id, # type: ignore[arg-type] + value=value, + period=period, + ) + if metadata: + metric.metadata_ = metadata + session.add(metric) + session.flush() + return metric + + +def _seed_event( + session: Session, + event_type: str, + description: str, + date: datetime | None = None, + metadata: dict | None = None, +) -> InsightEvent: + event = InsightEvent( + date=date or datetime.now(), + event_type=event_type, + description=description, + ) + if metadata: + event.metadata_ = metadata + session.add(event) + session.flush() + return event + + +# --------------------------------------------------------------------------- +# Tests: get_daily +# --------------------------------------------------------------------------- + + +class TestGetDaily: + def test_returns_rows_after_cutoff(self, db_session: Session) -> None: + source = _seed_source(db_session) + mt = _seed_metric_type(db_session, source) + + now = datetime(2026, 4, 10) + for i in range(5): + _seed_metric(db_session, mt, now - timedelta(days=i), float(10 + i)) + + qs = InsightQueryService(session=db_session) + cutoff = now - timedelta(days=2) + rows = qs.get_daily(MetricKeys.CLONES, cutoff) + + assert len(rows) == 3 + assert all(r.date >= cutoff for r in rows) + + def test_returns_empty_for_unknown_key(self, db_session: Session) -> None: + qs = InsightQueryService(session=db_session) + rows = qs.get_daily("nonexistent_key", datetime(2020, 1, 1)) + assert rows == [] + + def test_ordering_is_ascending(self, db_session: Session) -> None: + source = _seed_source(db_session) + mt = _seed_metric_type(db_session, source) + + dates = [datetime(2026, 4, d) for d in [3, 1, 5, 2, 4]] + for d in dates: + _seed_metric(db_session, mt, d, 10.0) + + qs = InsightQueryService(session=db_session) + rows = qs.get_daily(MetricKeys.CLONES, datetime(2026, 4, 1)) + + result_dates = [r.date for r in rows] + assert result_dates == sorted(result_dates) + + +# --------------------------------------------------------------------------- +# Tests: get_daily_range +# --------------------------------------------------------------------------- + + +class TestGetDailyRange: + def test_includes_start_excludes_end(self, db_session: Session) -> None: + source = _seed_source(db_session) + mt = _seed_metric_type(db_session, source) + + for day in range(1, 6): + _seed_metric(db_session, mt, datetime(2026, 4, day), float(day)) + + qs = InsightQueryService(session=db_session) + rows = qs.get_daily_range( + MetricKeys.CLONES, datetime(2026, 4, 2), datetime(2026, 4, 4) + ) + + dates = {r.date for r in rows} + assert datetime(2026, 4, 2) in dates + assert datetime(2026, 4, 3) in dates + assert datetime(2026, 4, 4) not in dates + + def test_empty_range(self, db_session: Session) -> None: + source = _seed_source(db_session) + mt = _seed_metric_type(db_session, source) + _seed_metric(db_session, mt, datetime(2026, 4, 1), 10.0) + + qs = InsightQueryService(session=db_session) + rows = qs.get_daily_range( + MetricKeys.CLONES, datetime(2026, 5, 1), datetime(2026, 5, 10) + ) + assert rows == [] + + +# --------------------------------------------------------------------------- +# Tests: get_latest +# --------------------------------------------------------------------------- + + +class TestGetLatest: + def test_returns_most_recent(self, db_session: Session) -> None: + source = _seed_source(db_session) + mt = _seed_metric_type(db_session, source) + + _seed_metric(db_session, mt, datetime(2026, 4, 1), 10.0) + _seed_metric(db_session, mt, datetime(2026, 4, 5), 50.0) + _seed_metric(db_session, mt, datetime(2026, 4, 3), 30.0) + + qs = InsightQueryService(session=db_session) + latest = qs.get_latest(MetricKeys.CLONES) + + assert latest is not None + assert latest.value == 50.0 + assert latest.date == datetime(2026, 4, 5) + + def test_returns_none_for_unknown(self, db_session: Session) -> None: + qs = InsightQueryService(session=db_session) + assert qs.get_latest("nonexistent") is None + + +# --------------------------------------------------------------------------- +# Tests: get_events / get_all_events / get_events_in_range +# --------------------------------------------------------------------------- + + +class TestGetEvents: + def test_returns_event_period_rows(self, db_session: Session) -> None: + source = _seed_source(db_session) + mt = _seed_metric_type(db_session, source) + + _seed_metric(db_session, mt, datetime(2026, 4, 1), 10.0, Periods.DAILY) + _seed_metric(db_session, mt, datetime(2026, 4, 1), 1.0, Periods.EVENT) + _seed_metric(db_session, mt, datetime(2026, 4, 2), 2.0, Periods.EVENT) + + qs = InsightQueryService(session=db_session) + events = qs.get_events(MetricKeys.CLONES, datetime(2026, 4, 1)) + + assert len(events) == 2 + assert all(e.period == Periods.EVENT for e in events) + + def test_respects_cutoff(self, db_session: Session) -> None: + source = _seed_source(db_session) + mt = _seed_metric_type(db_session, source) + + _seed_metric(db_session, mt, datetime(2026, 3, 1), 1.0, Periods.EVENT) + _seed_metric(db_session, mt, datetime(2026, 4, 5), 2.0, Periods.EVENT) + + qs = InsightQueryService(session=db_session) + events = qs.get_events(MetricKeys.CLONES, datetime(2026, 4, 1)) + + assert len(events) == 1 + assert events[0].date == datetime(2026, 4, 5) + + +class TestGetAllEvents: + def test_returns_all_regardless_of_date(self, db_session: Session) -> None: + source = _seed_source(db_session) + mt = _seed_metric_type(db_session, source) + + _seed_metric(db_session, mt, datetime(2020, 1, 1), 1.0, Periods.EVENT) + _seed_metric(db_session, mt, datetime(2026, 4, 1), 2.0, Periods.EVENT) + + qs = InsightQueryService(session=db_session) + events = qs.get_all_events(MetricKeys.CLONES) + + assert len(events) == 2 + + def test_ascending_order(self, db_session: Session) -> None: + source = _seed_source(db_session) + mt = _seed_metric_type(db_session, source) + + _seed_metric(db_session, mt, datetime(2026, 4, 5), 2.0, Periods.EVENT) + _seed_metric(db_session, mt, datetime(2026, 4, 1), 1.0, Periods.EVENT) + + qs = InsightQueryService(session=db_session) + events = qs.get_all_events(MetricKeys.CLONES) + + assert events[0].date < events[1].date + + +class TestGetEventsInRange: + def test_includes_start_excludes_end(self, db_session: Session) -> None: + source = _seed_source(db_session) + mt = _seed_metric_type(db_session, source) + + _seed_metric(db_session, mt, datetime(2026, 4, 1), 1.0, Periods.EVENT) + _seed_metric(db_session, mt, datetime(2026, 4, 3), 2.0, Periods.EVENT) + _seed_metric(db_session, mt, datetime(2026, 4, 5), 3.0, Periods.EVENT) + + qs = InsightQueryService(session=db_session) + events = qs.get_events_in_range( + MetricKeys.CLONES, datetime(2026, 4, 1), datetime(2026, 4, 5) + ) + + assert len(events) == 2 + dates = {e.date for e in events} + assert datetime(2026, 4, 1) in dates + assert datetime(2026, 4, 3) in dates + assert datetime(2026, 4, 5) not in dates + + +# --------------------------------------------------------------------------- +# Tests: get_all_metrics +# --------------------------------------------------------------------------- + + +class TestGetAllMetrics: + def test_returns_all_periods(self, db_session: Session) -> None: + source = _seed_source(db_session) + mt = _seed_metric_type(db_session, source) + + _seed_metric(db_session, mt, datetime(2026, 4, 1), 10.0, Periods.DAILY) + _seed_metric(db_session, mt, datetime(2026, 4, 1), 1.0, Periods.EVENT) + + qs = InsightQueryService(session=db_session) + metrics = qs.get_all_metrics(MetricKeys.CLONES) + + assert len(metrics) == 2 + periods = {m.period for m in metrics} + assert Periods.DAILY in periods + assert Periods.EVENT in periods + + def test_descending_order(self, db_session: Session) -> None: + source = _seed_source(db_session) + mt = _seed_metric_type(db_session, source) + + _seed_metric(db_session, mt, datetime(2026, 4, 1), 1.0) + _seed_metric(db_session, mt, datetime(2026, 4, 5), 5.0) + + qs = InsightQueryService(session=db_session) + metrics = qs.get_all_metrics(MetricKeys.CLONES) + + assert metrics[0].date > metrics[1].date + + +# --------------------------------------------------------------------------- +# Tests: sum_range / sum_daily +# --------------------------------------------------------------------------- + + +class TestSumRange: + def test_sums_values_in_range(self, db_session: Session) -> None: + source = _seed_source(db_session) + mt = _seed_metric_type(db_session, source) + + _seed_metric(db_session, mt, datetime(2026, 4, 1), 10.0) + _seed_metric(db_session, mt, datetime(2026, 4, 2), 20.0) + _seed_metric(db_session, mt, datetime(2026, 4, 3), 30.0) + _seed_metric(db_session, mt, datetime(2026, 4, 4), 40.0) + + qs = InsightQueryService(session=db_session) + total = qs.sum_range( + MetricKeys.CLONES, datetime(2026, 4, 2), datetime(2026, 4, 4) + ) + + assert total == 50 # 20 + 30 + + def test_zero_for_empty_range(self, db_session: Session) -> None: + qs = InsightQueryService(session=db_session) + total = qs.sum_range("nonexistent", datetime(2026, 1, 1), datetime(2026, 1, 2)) + assert total == 0 + + +class TestSumDaily: + def test_sums_from_cutoff(self, db_session: Session) -> None: + source = _seed_source(db_session) + mt = _seed_metric_type(db_session, source) + + _seed_metric(db_session, mt, datetime(2026, 3, 1), 100.0) # before cutoff + _seed_metric(db_session, mt, datetime(2026, 4, 1), 10.0) + _seed_metric(db_session, mt, datetime(2026, 4, 2), 20.0) + _seed_metric(db_session, mt, datetime(2026, 4, 3), 30.0) + + qs = InsightQueryService(session=db_session) + total = qs.sum_daily(MetricKeys.CLONES, datetime(2026, 4, 1)) + + assert total == 60 # 10 + 20 + 30 (excludes 100) + + +# --------------------------------------------------------------------------- +# Tests: InsightEvent queries +# --------------------------------------------------------------------------- + + +class TestInsightEvents: + def test_returns_all_when_no_filters(self, db_session: Session) -> None: + _seed_event(db_session, "release", "v1.0", datetime(2026, 4, 1)) + _seed_event(db_session, "star", "Star #50", datetime(2026, 4, 2)) + _seed_event(db_session, "reddit_post", "NWN post", datetime(2026, 4, 3)) + + qs = InsightQueryService(session=db_session) + events = qs.get_insight_events() + + assert len(events) == 3 + + def test_filters_by_type(self, db_session: Session) -> None: + _seed_event(db_session, "release", "v1.0", datetime(2026, 4, 1)) + _seed_event(db_session, "star", "Star #50", datetime(2026, 4, 2)) + _seed_event(db_session, "reddit_post", "NWN post", datetime(2026, 4, 3)) + + qs = InsightQueryService(session=db_session) + events = qs.get_insight_events(type_filter={"release", "star"}) + + assert len(events) == 2 + types = {e.event_type for e in events} + assert types == {"release", "star"} + + def test_filters_by_cutoff(self, db_session: Session) -> None: + _seed_event(db_session, "release", "old", datetime(2026, 3, 1)) + _seed_event(db_session, "release", "new", datetime(2026, 4, 5)) + + qs = InsightQueryService(session=db_session) + events = qs.get_insight_events(cutoff=datetime(2026, 4, 1)) + + assert len(events) == 1 + assert events[0].description == "new" + + def test_combined_filters(self, db_session: Session) -> None: + _seed_event(db_session, "release", "old release", datetime(2026, 3, 1)) + _seed_event(db_session, "release", "new release", datetime(2026, 4, 5)) + _seed_event(db_session, "star", "new star", datetime(2026, 4, 5)) + + qs = InsightQueryService(session=db_session) + events = qs.get_insight_events( + cutoff=datetime(2026, 4, 1), type_filter={"release"} + ) + + assert len(events) == 1 + assert events[0].description == "new release" + + +class TestGetRecentInsightEvents: + def test_returns_limited_results(self, db_session: Session) -> None: + for i in range(20): + _seed_event( + db_session, + "star", + f"Star #{i}", + datetime(2026, 4, 1) + timedelta(hours=i), + ) + + qs = InsightQueryService(session=db_session) + events = qs.get_recent_insight_events(limit=5) + + assert len(events) == 5 + + +class TestGetMilestoneEvents: + def test_returns_milestones_and_features(self, db_session: Session) -> None: + _seed_event( + db_session, "milestone_github", "New ATH: 100 clones", datetime(2026, 4, 1) + ) + _seed_event( + db_session, "milestone_pypi", "New ATH: 50 downloads", datetime(2026, 4, 2) + ) + _seed_event(db_session, "feature", "Added Mandarin CLI", datetime(2026, 4, 3)) + _seed_event(db_session, "release", "v0.6.9", datetime(2026, 4, 4)) + _seed_event(db_session, "star", "Star #100", datetime(2026, 4, 5)) + + qs = InsightQueryService(session=db_session) + milestones = qs.get_milestone_events() + + assert len(milestones) == 3 + types = {m.event_type for m in milestones} + assert types == {"milestone_github", "milestone_pypi", "feature"} + + +# --------------------------------------------------------------------------- +# Tests: get_release_metrics / get_sources +# --------------------------------------------------------------------------- + + +class TestGetReleaseMetrics: + def test_returns_release_rows(self, db_session: Session) -> None: + source = _seed_source(db_session) + releases_mt = _seed_metric_type(db_session, source, "releases") + other_mt = _seed_metric_type(db_session, source, MetricKeys.CLONES) + + _seed_metric( + db_session, + releases_mt, + datetime(2026, 4, 1), + 1.0, + metadata={"tag": "v0.6.9"}, + ) + _seed_metric(db_session, other_mt, datetime(2026, 4, 1), 100.0) + + qs = InsightQueryService(session=db_session) + releases = qs.get_release_metrics() + + assert len(releases) == 1 + assert releases[0].metadata_.get("tag") == "v0.6.9" + + +class TestGetSources: + def test_returns_all_sources(self, db_session: Session) -> None: + _seed_source(db_session, SourceKeys.GITHUB_TRAFFIC) + _seed_source(db_session, SourceKeys.PYPI) + + qs = InsightQueryService(session=db_session) + sources = qs.get_sources() + + assert len(sources) == 2 + keys = {s.key for s in sources} + assert SourceKeys.GITHUB_TRAFFIC in keys + assert SourceKeys.PYPI in keys + + +# --------------------------------------------------------------------------- +# Tests: compute_cutoffs +# --------------------------------------------------------------------------- + + +class TestComputeCutoffs: + def test_normal_days(self) -> None: + cutoff, prev_cutoff = InsightQueryService.compute_cutoffs(14) + + now = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) + expected_cutoff = now - timedelta(days=14) + expected_prev = expected_cutoff - timedelta(days=14) + + assert cutoff == expected_cutoff + assert prev_cutoff == expected_prev + + def test_all_time(self) -> None: + cutoff, prev_cutoff = InsightQueryService.compute_cutoffs(9999) + + assert cutoff == datetime(2000, 1, 1) + assert prev_cutoff == datetime(2000, 1, 1) + + +# --------------------------------------------------------------------------- +# Tests: Type caching +# --------------------------------------------------------------------------- + + +class TestTypeCaching: + def test_caches_type_lookups(self, db_session: Session) -> None: + source = _seed_source(db_session) + _seed_metric_type(db_session, source) + + qs = InsightQueryService(session=db_session) + + # First call populates cache + result1 = qs._get_type(MetricKeys.CLONES) + assert result1 is not None + + # Second call returns cached value + result2 = qs._get_type(MetricKeys.CLONES) + assert result2 is result1 # Same object (from cache, not re-queried) + + # Cache stores None for missing keys too + result3 = qs._get_type("nonexistent") + assert result3 is None + assert "nonexistent" in qs._type_cache diff --git a/copier.yml b/copier.yml index 718b632a..3fb8ff73 100644 --- a/copier.yml +++ b/copier.yml @@ -246,6 +246,35 @@ include_comms: help: "Include communications service (email/SMS)?" default: false +include_insights: + type: bool + help: "Include insights service (adoption metrics and analytics)?" + default: false + +insights_github: + type: bool + help: "Include GitHub Traffic + Stargazers data collection?" + default: true + when: "{{ include_insights }}" + +insights_pypi: + type: bool + help: "Include PyPI download stats collection?" + default: true + when: "{{ include_insights }}" + +insights_plausible: + type: bool + help: "Include Plausible docs analytics collection?" + default: false + when: "{{ include_insights }}" + +insights_reddit: + type: bool + help: "Include Reddit post tracking?" + default: false + when: "{{ include_insights }}" + # Internal computed values (using Jinja2) _has_additional_components: "{{ include_scheduler or include_redis or include_worker or include_database or include_cache or include_ingress or include_observability }}" _include_migrations: "{{ include_auth }}" diff --git a/docs/components/index.md b/docs/components/index.md index 961a297b..067c83fa 100644 --- a/docs/components/index.md +++ b/docs/components/index.md @@ -4,7 +4,7 @@ Components are the **infrastructure building blocks** of your Aegis Stack applic !!! info "Components vs Services" **Components** = Infrastructure capabilities (database, workers, scheduling) - **Services** = Business functionality (auth, payments, AI integrations) + **Services** = Business functionality (auth, AI, comms, insights) See **[Services Overview](../services/index.md)** for business-level features. diff --git a/docs/images/insights_clones.png b/docs/images/insights_clones.png new file mode 100644 index 00000000..cac445e0 Binary files /dev/null and b/docs/images/insights_clones.png differ diff --git a/docs/images/insights_dashboard.png b/docs/images/insights_dashboard.png new file mode 100644 index 00000000..a01f6571 Binary files /dev/null and b/docs/images/insights_dashboard.png differ diff --git a/docs/images/insights_docs.png b/docs/images/insights_docs.png new file mode 100644 index 00000000..d59686c2 Binary files /dev/null and b/docs/images/insights_docs.png differ diff --git a/docs/images/insights_downloads.png b/docs/images/insights_downloads.png new file mode 100644 index 00000000..61de4442 Binary files /dev/null and b/docs/images/insights_downloads.png differ diff --git a/docs/images/insights_stars.png b/docs/images/insights_stars.png new file mode 100644 index 00000000..7870fb27 Binary files /dev/null and b/docs/images/insights_stars.png differ diff --git a/docs/overseer/index.md b/docs/overseer/index.md index 48f1e341..e24fb0f8 100644 --- a/docs/overseer/index.md +++ b/docs/overseer/index.md @@ -23,14 +23,14 @@ You work with Datadog until management decides to migrate to New Relic. Or you'r The dashboard displays: - **Component Cards**: Backend, Database, Worker, Scheduler health -- **Service Cards**: Auth, AI, Comms health (when included) +- **Service Cards**: Auth, AI, Comms, Insights health (when included) - **Header**: Overall health summary and theme toggle - **Auto-refresh**: Polls health endpoint every 30 seconds ## Current Capabilities - Component health monitoring (Backend, Database, Worker, Scheduler) -- Service health monitoring (Auth, AI, Comms) +- Service health monitoring (Auth, AI, Comms, Insights) - System metrics (CPU, memory, disk usage) - Status hierarchy (Healthy, Warning, Unhealthy, Info) - Web dashboard with auto-refresh (30-second polling) diff --git a/docs/services/index.md b/docs/services/index.md index 6d364b25..3fc5139e 100644 --- a/docs/services/index.md +++ b/docs/services/index.md @@ -3,7 +3,7 @@ Services are **business-level functionality** that your application provides to users. While Components handle infrastructure concerns (databases, workers, scheduling), Services implement specific business capabilities like authentication, payments, or AI integrations. !!! info "Services vs Components" - **Services** = What your app does (auth, payments, AI) + **Services** = What your app does (auth, AI, insights) **Components** = How your app works (database, workers, API) ## Service Architecture @@ -14,6 +14,7 @@ graph TB Auth[🔐 Auth Service
JWT + User Management
Registration, Login, Profiles] AI[🤖 AI Service
PydanticAI Integration
Multi-Provider Chat] Comms[📧 Comms Service
Email, SMS, Voice
Resend + Twilio] + Insights[📊 Insights Service
Adoption Metrics
GitHub, PyPI, Plausible] end subgraph "Components Layer (Infrastructure)" @@ -29,9 +30,12 @@ graph TB Auth --> Database AI --> Backend Comms --> Backend + Insights --> Backend + Insights --> Database style Auth fill:#e8f5e8,stroke:#2e7d32,stroke-width:3px style AI fill:#e8f5e8,stroke:#2e7d32,stroke-width:3px + style Insights fill:#fff3e0,stroke:#f57c00,stroke-width:2px,stroke-dasharray: 5 5 style Comms fill:#e8f5e8,stroke:#2e7d32,stroke-width:3px style Backend fill:#e1f5fe,stroke:#1976d2,stroke-width:2px style Database fill:#fff3e0,stroke:#f57c00,stroke-width:2px @@ -119,8 +123,9 @@ graph LR | Service | Status | Description | Required Components | |---------|--------|-------------|-------------------| | **auth** | ✅ Available | User authentication and authorization with JWT tokens | backend, database | -| **ai** | 🧪 Experimental | Multi-provider AI chat with PydanticAI (OpenAI, Anthropic, Google, Groq, etc.) | backend | -| **comms** | 🧪 Experimental | Email (Resend), SMS, and voice calls (Twilio) | backend | +| **ai** | ✅ Available | Multi-provider AI chat with PydanticAI (OpenAI, Anthropic, Google, Groq, etc.) | backend | +| **comms** | ✅ Available | Email (Resend), SMS, and voice calls (Twilio) | backend | +| **insights** | 🧪 Experimental | Adoption metrics tracking (GitHub, PyPI, Plausible, Reddit) | backend, database, scheduler | ## Service Categories @@ -142,6 +147,10 @@ graph TB Push[push
🚧 Future: Push Notifications] end + subgraph "📊 Analytics Services" + InsightsService[insights
GitHub, PyPI, Plausible, Reddit] + end + style AuthJWT fill:#e8f5e8,stroke:#2e7d32,stroke-width:3px style AuthOAuth fill:#f0f0f0,stroke:#757575,stroke-dasharray: 5 5 style AuthSAML fill:#f0f0f0,stroke:#757575,stroke-dasharray: 5 5 @@ -149,6 +158,7 @@ graph TB style AILangChain fill:#f0f0f0,stroke:#757575,stroke-dasharray: 5 5 style CommsService fill:#e8f5e8,stroke:#2e7d32,stroke-width:3px style Push fill:#f0f0f0,stroke:#757575,stroke-dasharray: 5 5 + style InsightsService fill:#fff3e0,stroke:#f57c00,stroke-width:2px,stroke-dasharray: 5 5 ``` ## Service Development Patterns @@ -254,5 +264,6 @@ Services automatically appear in the health dashboard alongside components, prov - **[Authentication Service](auth/index.md)** - Complete JWT auth implementation - **[AI Service](ai/index.md)** - Multi-provider AI chat with PydanticAI - **[Communications Service](comms/index.md)** - Email, SMS, and voice via Resend/Twilio +- **[Insights Service](insights/index.md)** - Adoption metrics tracking (GitHub, PyPI, Plausible, Reddit) *(experimental)* - **[CLI Reference](../cli-reference.md)** - Service command reference - **[Components Overview](../components/index.md)** - Infrastructure layer \ No newline at end of file diff --git a/docs/services/insights/cli.md b/docs/services/insights/cli.md new file mode 100644 index 00000000..e05b5cf2 --- /dev/null +++ b/docs/services/insights/cli.md @@ -0,0 +1,151 @@ +# CLI Commands + +The Insights CLI provides commands for data collection, status monitoring, and manual event management. + +## collect + +Run data collection for one or all enabled sources. + +```bash +# Collect all enabled sources +my-app insights collect + +# Collect a specific source +my-app insights collect github_traffic +my-app insights collect pypi +my-app insights collect plausible + +# Backfill historical data (PyPI and Plausible support this) +my-app insights collect pypi --lookback-days 365 +my-app insights collect plausible --lookback-days 365 +``` + +### Options + +| Option | Short | Default | Description | +|--------|-------|---------|-------------| +| `--lookback-days` | `-d` | 1 | Number of days to fetch. Higher values for backfill. | + +### Output + +``` +Collecting from all enabled sources... + github_traffic: 6 written, 52 skipped + github_stars: 0 written, 99 skipped + pypi: 7 written, 78 skipped + Records broken: PyPI Best Single Day: 850 (was 334) + plausible: 4 written, 0 skipped + reddit: 0 written, 0 skipped + github_events: 2 written, 24 skipped +``` + +When a new all-time record is detected, it's reported in the output and automatically created as a milestone event. + +## status + +Display current collection status across all sources. + +```bash +my-app insights status +``` + +### Output + +``` +Insights Status + + Sources ++-----------------------+----------+----------------------+-----------+ +| Source | Enabled | Last Collected | Metrics | ++-----------------------+----------+----------------------+-----------+ +| GitHub Traffic | Yes | 2026-04-11 12:02:58 | 6 | +| GitHub Stars | Yes | 2026-04-11 09:05:30 | 1 | +| PyPI | Yes | 2026-04-11 12:02:59 | 7 | +| Plausible | Yes | 2026-04-11 12:05:32 | 6 | +| Reddit | Yes | 2026-04-11 09:05:32 | 1 | +| GitHub Events | Yes | 2026-04-11 09:05:34 | 4 | ++-----------------------+----------+----------------------+-----------+ +``` + +## stars + +Display top stargazers with profile metadata. + +```bash +# Show latest 10 stars +my-app insights stars + +# Show more +my-app insights stars -n 20 +``` + +### Options + +| Option | Short | Default | Description | +|--------|-------|---------|-------------| +| `-n` / `--limit` | `-n` | 10 | Number of stars to display | + +## records + +Display all-time records for each metric type. + +```bash +my-app insights records +``` + +## sources + +List all configured insight sources and their collection intervals. + +```bash +my-app insights sources +``` + +## reddit add + +Add a Reddit post for tracking. + +```bash +my-app insights reddit add https://reddit.com/r/FastAPI/comments/abc123/your-post +``` + +The command fetches the post's current stats (upvotes, comments, subreddit, title) and creates both a metric row and a timeline event. + +## event + +Log a manual event for the timeline. + +```bash +# Basic event (today's date) +my-app insights event feature "Added Japanese localization" + +# Backdated event +my-app insights event external "Featured in Python Weekly" --date 2026-03-15 + +# Milestone with category (for record tracking on Overview) +my-app insights event milestone_github "900 clones" --date 2026-04-15 --category daily_clones + +# Feature launch +my-app insights event feature "Traefik + Deploy shipped (v0.6.0)" --date 2026-02-09 +``` + +### Options + +| Option | Description | +|--------|-------------| +| `--date` | Event date in YYYY-MM-DD format. Defaults to today. | +| `--category` | Milestone category for record deduplication (e.g., `daily_clones`, `pypi_daily`). Used by the Overview milestones grid to show only the latest record per category. | + +### Event types + +| Type | Color | Description | +|------|-------|-------------| +| `release` | Green | Version releases (auto-detected) | +| `star` | Amber | Star events (auto-detected) | +| `reddit_post` | Orange | Reddit posts (via `reddit add`) | +| `feature` | Cyan | Feature launches | +| `milestone_github` | Pink | GitHub metric records (auto-detected) | +| `milestone_pypi` | Pink | PyPI metric records (auto-detected) | +| `anomaly_github` | Red | Data anomalies | +| `localization` | Blue | Localization events | +| `external` | Gray | External events | diff --git a/docs/services/insights/configuration.md b/docs/services/insights/configuration.md new file mode 100644 index 00000000..0d3de6e1 --- /dev/null +++ b/docs/services/insights/configuration.md @@ -0,0 +1,106 @@ +# Configuration + +All Insights configuration is done through environment variables in your `.env` file. + +## GitHub Configuration + +```bash +# Required for GitHub Traffic and Stars collectors +INSIGHT_GITHUB_TOKEN=ghp_your_personal_access_token +INSIGHT_GITHUB_OWNER=your-username +INSIGHT_GITHUB_REPO=your-repo + +# Collection interval (hours) +INSIGHT_COLLECTION_GITHUB_HOURS=6 +``` + +### Token requirements + +The GitHub token needs `repo` scope for traffic data access. Stars data requires the `read:user` scope for profile fetching. + +## PyPI Configuration + +```bash +# Required for PyPI collector +INSIGHT_PYPI_PACKAGE=your-package-name + +# Collection interval (hours) +INSIGHT_COLLECTION_PYPI_HOURS=24 +``` + +No API key needed - PyPI data is queried from the public ClickHouse endpoint. + +## Plausible Configuration + +```bash +# Required for Plausible collector +INSIGHT_PLAUSIBLE_API_KEY=your_plausible_api_key +INSIGHT_PLAUSIBLE_SITES=your-site.com + +# Collection interval (hours) +INSIGHT_COLLECTION_PLAUSIBLE_HOURS=1 +``` + +### Multiple sites + +Comma-separate multiple site IDs: + +```bash +INSIGHT_PLAUSIBLE_SITES=docs.example.com,blog.example.com +``` + +### API key + +Generate at Plausible dashboard > Settings > API Keys. Requires read access. + +## Reddit Configuration + +No configuration needed. Reddit posts are tracked on-demand via the CLI: + +```bash +my-app insights reddit add https://reddit.com/r/subreddit/comments/id/title +``` + +## Collection Intervals + +Each source has a configurable collection interval. The scheduler runs collections automatically. + +| Source | Default | Env Variable | Notes | +|--------|---------|-------------|-------| +| GitHub Traffic | 6h | `INSIGHT_COLLECTION_GITHUB_HOURS` | Must run within 14 days or data is lost | +| GitHub Stars | 24h | Fixed | Stars don't change frequently | +| GitHub Events | 24h | Fixed | ClickHouse data updates daily | +| PyPI | 24h | `INSIGHT_COLLECTION_PYPI_HOURS` | ClickHouse has ~2 day lag | +| Plausible | 24h | `INSIGHT_COLLECTION_PLAUSIBLE_HOURS` | Lower intervals (1h) useful for near-real-time data | +| Reddit | On-demand | N/A | Manual via CLI | + +### Staleness detection + +Sources are considered stale after 3x their configured interval. A stale source triggers a warning badge on the Insights card in Overseer. + +## Scheduler Setup + +For automated collection, ensure the scheduler component is included and the jobs are registered: + +```bash +# Force job registration on restart +SCHEDULER_FORCE_UPDATE=true +``` + +After the first restart with `SCHEDULER_FORCE_UPDATE=true`, set it back to `false`. The jobs persist in the scheduler database. + +## Database + +Insights requires the database component. All data is stored in SQLite (default) or PostgreSQL. + +### Tables created + +| Table | Purpose | +|-------|---------| +| `insight_source` | Source registry (GitHub, PyPI, etc.) | +| `insight_metric_type` | Metric type definitions | +| `insight_metric` | Time-series data with JSONB metadata | +| `insight_record` | All-time records (reserved for future use) | +| `insight_event` | Timeline events (releases, stars, milestones) | + +Tables are created automatically via the database init hook. Seed data (sources + metric types) is populated on first startup. diff --git a/docs/services/insights/dashboard.md b/docs/services/insights/dashboard.md new file mode 100644 index 00000000..87528d34 --- /dev/null +++ b/docs/services/insights/dashboard.md @@ -0,0 +1,90 @@ +# Overseer Dashboard + +The Insights service integrates with the Overseer dashboard as an interactive modal with 7 tabs. + +## Overview Tab + +![Insights Overview](../../images/insights_dashboard.png) + +The landing tab shows a high-level summary with metric cards, a recent activity feed, and a key milestones grid. + +Metric cards show period-over-period change arrows comparing the current 14-day window against the previous 14 days. The milestones grid shows the all-time record for each tracked category - only the highest value per category is displayed. + +The activity feed uses the same expandable row pattern as the main Overseer activity panel. Expanding a release shows a link to the GitHub release page. Expanding a Reddit post shows upvotes, comments, and a link to the post. + +## GitHub Tab + +Interactive GitHub Traffic with full clone/view history going back as far as data has been collected. + +![GitHub Clones](../../images/insights_clones.png) + +### Features + +- Date range selection (7d, 14d, 1m, 3m, 6m, 1y, All) +- Clones + Unique Cloners line chart with release annotation tooltips +- Views + Unique Visitors line chart +- Activity Summary stacked bar chart (Code, Issues, PRs, Community, Releases) +- Clickable referrers and popular paths (links to GitHub) +- Event chips with date highlighting - click a chip to highlight the data point on the chart + +## Stars Tab + +Cumulative star history chart showing growth over time. + +![Star History](../../images/insights_stars.png) + +- One data point per day that had star activity (no gap filling) +- Tooltips show star numbers and usernames +- Dynamic Y-axis scaling at zoomed ranges +- Event chips filtered to only show events on dates with star activity + +## PyPI Tab + +Download analytics with human vs bot separation. + +![PyPI Downloads](../../images/insights_downloads.png) + +### Features + +- **CI/Mirror toggle** - Switch between total downloads (including bots) and human-only (pip + uv) +- Stacked area chart showing bot vs human split when CI toggle is on +- Version breakdown bar chart +- Side-by-side tables: Downloads by Version and Daily Downloads (scrollable) +- Period-over-period arrows on the Total Downloads card + +## Docs Tab (Plausible) + +Documentation site analytics from Plausible. + +![Docs Analytics](../../images/insights_docs.png) + +### Features + +- Visitors + Pageviews dual-series line chart +- Country breakdown bar chart (range-aware - updates with date selection) +- Top Pages table with clickable links to your documentation site +- Bounce rate with inverted arrow (green when it decreases) + +## Reddit Tab + +Tracked Reddit post performance. Each post shows subreddit, title, upvotes, comments, upvote ratio, and a clickable link. + +Posts are added on-demand via the CLI: + +```bash +my-app insights reddit add https://reddit.com/r/FastAPI/comments/abc123/your-post +``` + +## Settings Tab + +Data source status and configuration. Shows each source with Active/Stale/Disabled status and last collection timestamp. + +## Shared Controls + +All interactive tabs share a common base: + +- **Date range chips** - 7d, 14d, 1m, 3m, 6m, 1y, All +- **Events toggle** - Show/hide event annotation chips +- **Event grouping** - At wider ranges, same-type events are grouped (weekly at 3m, monthly at 6m+) +- **Date highlighting** - Click an event chip to highlight all chart points in that date range +- **Last updated** - Shows the most recent data point date diff --git a/docs/services/insights/data-sources.md b/docs/services/insights/data-sources.md new file mode 100644 index 00000000..3078258b --- /dev/null +++ b/docs/services/insights/data-sources.md @@ -0,0 +1,199 @@ +# Data Sources + +Insights collects from 6 data sources across 4 external APIs. Each source has a dedicated collector that handles authentication, rate limiting, and data normalization. + +## GitHub Traffic + +**Source key:** `github_traffic` +**API:** GitHub REST API v3 +**Requires:** `INSIGHT_GITHUB_TOKEN` (personal access token with `repo` scope) +**Default interval:** Every 6 hours + +Collects the 14-day rolling traffic data from GitHub's Traffic API. + +### Metrics collected + +| Metric | Period | Description | +|--------|--------|-------------| +| clones | Daily | Total git clones per day | +| unique_cloners | Daily | Unique cloners per day | +| views | Daily | Repository page views per day | +| unique_visitors | Daily | Unique visitors per day | +| referrers | Snapshot | Top referral sources (Google, Reddit, etc.) | +| popular_paths | Snapshot | Most visited repo pages | + +### Important notes + +- GitHub only retains 14 days of traffic data. The collector preserves it permanently. +- **Run the collector at least once every 14 days** or you'll lose data that rolls off the window. +- Daily unique cloners overcount across multi-day ranges (a user who clones Monday and Tuesday counts as 1 in GitHub's 14-day window but 2 in summed daily counts). +- Clones are not deduplicated - every `git clone` counts, even from the same person. + +## GitHub Stars + +**Source key:** `github_stars` +**API:** GitHub REST API v3 (Stargazers endpoint) +**Requires:** `INSIGHT_GITHUB_TOKEN` +**Default interval:** Every 24 hours + +Fetches all stargazers with their full GitHub profiles. + +### Metrics collected + +| Metric | Period | Description | +|--------|--------|-------------| +| new_star | Event | One row per star with user profile metadata | + +### Star profile metadata + +Each star event stores: username, name, location, company, bio, email, blog, followers, following, public repos, stars given, account age, GitHub Pro status. + +### Star events + +The collector automatically creates `InsightEvent` entries grouped by day: + +- Single star days: `#99 - ncthuc` +- Multi-star days: `#80-#85 (6 stars)` + +## GitHub Events (ClickHouse) + +**Source key:** `github_events` +**API:** ClickHouse public SQL endpoint (`sql-clickhouse.clickhouse.com`) +**Requires:** No authentication +**Default interval:** Every 24 hours + +Queries the public GitHub events dataset for repository-specific activity. + +### Metrics collected + +| Metric | Period | Description | +|--------|--------|-------------| +| forks | Event | Individual fork events with actor name | +| releases | Event | Release events with tag, name, actor | +| star_events | Daily | Daily star count from ClickHouse (separate from API stars) | +| activity_summary | Daily | Breakdown by event type (push, issues, PRs, etc.) | + +### Activity summary fields + +`push`, `issues`, `pull_requests`, `pull_request_reviews`, `issue_comments`, `forks`, `stars`, `releases`, `creates`, `deletes` + +## PyPI Downloads + +**Source key:** `pypi` +**API:** ClickHouse public SQL endpoint (PyPI dataset) +**Requires:** `INSIGHT_PYPI_PACKAGE` (package name) +**Default interval:** Every 24 hours + +Queries PyPI download data from the public ClickHouse mirror of BigQuery's `pypi.file_downloads` table. + +### Metrics collected + +| Metric | Period | Description | +|--------|--------|-------------| +| downloads_total | Cumulative | All-time total downloads | +| downloads_daily | Daily | Total downloads per day | +| downloads_daily_human | Daily | Human-only downloads (pip + uv) | +| downloads_by_version | Daily | Per-version breakdown with human/bot split | +| downloads_by_country | Daily | Country breakdown | +| downloads_by_installer | Daily | Installer breakdown (pip, uv, bandersnatch, etc.) | +| downloads_by_type | Daily | Distribution type (sdist, bdist_wheel) | + +### Human vs Bot Classification + +Downloads are classified by installer: + +| Installer | Classification | Why | +|-----------|---------------|-----| +| pip | Human | Direct user install | +| uv | Human | Direct user install | +| bandersnatch | Bot | PyPI mirror sync | +| Browser | Bot | Security scanners (uniform download pattern) | +| requests | Bot | Scripts and automation | +| (empty) | Bot | No user agent = automated | +| Nexus | Bot | Sonatype corporate proxy | +| devpi | Bot | PyPI cache server | +| OS | Bot | OS package manager | + +Typically ~97% of downloads are bots. The human count (pip + uv only) is the real adoption signal. + +!!! example "Musings: On Download Classification (April 12th, 2026)" + Honestly, I'm still trying to sort out these downloads. Hell, it's the reason why I built this service. I am currently taking a pretty conservative stance on what is and isn't a bot. I know the real number is most likely larger, I just need more information before I can put a new category in here, something like "behind mirror but human triggered", or something like that. The version distribution chart helps though. Newer versions getting pulled way more than old ones tells me there's real demand behind some of this "bot" traffic. More to come. + +### Backfill support + +```bash +my-app insights collect pypi --lookback-days 365 +``` + +ClickHouse retains PyPI data for the full history. Backfill once and daily collection maintains it. + +## Plausible Analytics + +**Source key:** `plausible` +**API:** Plausible API v1 +**Requires:** `INSIGHT_PLAUSIBLE_API_KEY`, `INSIGHT_PLAUSIBLE_SITES` +**Default interval:** Every 1 hour + +Collects documentation site visitor metrics. + +### Metrics collected + +| Metric | Period | Description | +|--------|--------|-------------| +| visitors | Daily | Unique visitors per day | +| pageviews | Daily | Total page views per day | +| avg_duration | Daily | Average visit duration (seconds) | +| bounce_rate | Daily | Bounce rate percentage | +| top_pages | Daily | Per-page visitor and duration breakdown | +| top_countries | Daily | Per-country visitor breakdown | + +### Backfill support + +```bash +my-app insights collect plausible --lookback-days 365 +``` + +Per-day country and page breakdowns are stored for each active day, enabling range-aware filtering in the dashboard. + +## Reddit Posts + +**Source key:** `reddit` +**API:** Reddit JSON API (append `.json` to any post URL) +**Requires:** No authentication +**Collection:** On-demand only (not scheduled) + +Tracks Reddit post performance over time. + +### Metrics collected + +| Metric | Period | Description | +|--------|--------|-------------| +| post_stats | Event | Upvotes, comments, upvote ratio per post | + +### Adding a post + +```bash +my-app insights reddit add https://reddit.com/r/FastAPI/comments/abc123/your-post +``` + +The collector fetches current stats and creates an `InsightEvent` for the timeline. + +## Data Storage + +All metrics use a single generic table (`insight_metric`) with JSONB metadata. This means: + +- **No migrations** when data shapes change +- **Flexible metadata** per metric type +- **Consistent querying** across all sources +- **One upsert pattern** for all collectors + +``` +insight_metric: + id, date, metric_type_id, value, period, metadata, created_at + +insight_metric_type: + id, key, display_name, unit, source_id + +insight_source: + id, key, display_name, collection_interval_hours, enabled, last_collected_at +``` diff --git a/docs/services/insights/examples.md b/docs/services/insights/examples.md new file mode 100644 index 00000000..a5c73ac1 --- /dev/null +++ b/docs/services/insights/examples.md @@ -0,0 +1,105 @@ +# Examples + +Real-world patterns for using the Insights service effectively. + +## Initial Setup and Backfill + +After creating a project with Insights, backfill historical data before starting scheduled collection: + +```bash +# 1. Configure .env with API keys +# 2. Run initial collection for all sources +my-app insights collect + +# 3. Backfill PyPI (goes back to package creation) +my-app insights collect pypi --lookback-days 365 + +# 4. Backfill Plausible (goes back to site creation) +my-app insights collect plausible --lookback-days 365 + +# 5. GitHub Traffic only has 14 days - collect immediately, then keep the scheduler running +# 6. GitHub Stars and Events backfill automatically on first collection +``` + +## Understanding the Bot vs Human Split + +PyPI download numbers are dominated by automated traffic. Here's how to interpret them: + +``` +Total PyPI Downloads: 16,485 +Human Downloads (pip + uv): 462 +Bot/Mirror Traffic: 97% +``` + +The 462 is your real adoption number. The 16,485 is useful for public comparison (everyone's numbers are equally inflated) but not for internal decision-making. + +### What the installers mean + +- **pip + uv** - Real humans installing your package +- **bandersnatch** - PyPI mirror operators syncing everything. Some corporate mirrors sync on-demand (triggered by real users), so not all bandersnatch traffic is noise +- **Browser** - Security scanners downloading source to audit. Uniform download pattern across all versions confirms automated behavior +- **requests** - Scripts and CI/CD pipelines +- **(empty)** - No user agent, fully automated + +### The version chart signal + +If bots were blindly mirroring, every version would have equal downloads. An upward slope toward newer versions indicates demand-driven traffic - real users (or their corporate proxies) pulling the latest release. + +## Tracking Reddit Post Impact + +```bash +# Add a post after publishing +my-app insights reddit add https://reddit.com/r/FastAPI/comments/abc123/my-post + +# Run collection to see impact on other metrics +my-app insights collect github_traffic +my-app insights collect pypi +``` + +The Reddit post appears as an event chip on all tabs, letting you correlate the post timing with traffic spikes. + +## Event Correlation + +The event system lets you see what drove metric changes. Events show up as: + +- **Chips above charts** - Clickable, highlight the data point on that date +- **Chart annotation tooltips** - Hover over a data point to see events on that day +- **Activity feed on Overview** - Chronological feed with expandable details + +### Manual events for context + +```bash +# Log a feature launch +my-app insights event feature "Added Mandarin CLI localization" + +# Log an external mention +my-app insights event external "Featured in Python Weekly #523" + +# Log an anomaly +my-app insights event anomaly_github "CI/CD spike - 44:1 clone ratio" +``` + +## Record Tracking + +Records are detected automatically after each collection. When a new all-time high is set for any tracked metric, the system: + +1. Creates an `InsightEvent` with the record value and category +2. Reports it in the CLI output +3. Shows it in the Overview milestones grid + +### What's tracked + +- GitHub 1-Day Clones, Unique, Views, Visitors +- GitHub 14-Day Clones, Unique +- PyPI Best Single Day +- Plausible 1-Day Visitors, Pageviews + +### Milestone cards on Overview + +The Overview tab shows the latest record per category as trophy-style cards with the hero number prominently displayed. Only the all-time high per category shows - superseded records are kept in the database but don't display. + +## Geographic Analysis + +Stars carry location metadata (self-reported GitHub profiles). Plausible provides country-level visitor data. Together they reveal organic geographic spread without any marketing effort. + +The Plausible country data is range-aware - switching date ranges in the Docs tab updates both the chart and the country breakdown. diff --git a/docs/services/insights/index.md b/docs/services/insights/index.md new file mode 100644 index 00000000..1e1b1798 --- /dev/null +++ b/docs/services/insights/index.md @@ -0,0 +1,166 @@ +# Insights Service + +The **Insights Service** automates tracking of your project's adoption metrics across GitHub, PyPI, Plausible Analytics, and Reddit. It collects, stores, and visualizes the data that matters for understanding how your project is growing. + +!!! warning "Experimental Service" + Insights is currently experimental. The data model, collectors, and dashboard are functional but the API surface may change in future releases. + +!!! info "Quick Start" + ```bash + aegis init my-app --services "insights[github,pypi]" --components database,scheduler + cd my-app + uv sync && source .venv/bin/activate + ``` + + Configure your `.env` with API keys, then collect: + + ```bash + my-app insights collect + ``` + +## Why Track Adoption Metrics? + +If you ship open source software, the numbers you see on GitHub and PyPI are misleading without context. GitHub Traffic expires after 14 days. PyPI download counts are 97% bots. Stars don't tell you who's actually using your tool. + +Insights solves this by: + +- **Preserving data** that would otherwise expire (GitHub's 14-day rolling window) +- **Separating signal from noise** (human downloads vs bot mirrors on PyPI) +- **Correlating events** across sources (did that Reddit post drive stars? did the release drive clones?) +- **Tracking records automatically** (new all-time highs are detected and logged) +- **Visualizing everything** in the Overseer dashboard with interactive charts + +## What You Get + +- **6 data source collectors** - GitHub Traffic, Stars, Events, PyPI Downloads, Plausible Analytics, Reddit Posts +- **Human vs bot classification** - 97% of PyPI downloads are automated mirrors. Insights separates signal from noise. +- **Event correlation** - Releases, stars, Reddit posts, and milestones annotated on every chart +- **Automatic record detection** - New all-time highs are logged as milestone events +- **Interactive Overseer dashboard** - 7 tabs with date range filtering and period-over-period comparison +- **CLI and scheduler** - Collect on-demand or automate at configurable intervals + +![Insights Dashboard](../../images/insights_dashboard.png) + +## Architecture + +```mermaid +graph TB + subgraph "Data Sources" + GH_API[GitHub Traffic API
Clones, Views, Referrers] + GH_STARS[GitHub Stargazers API
Star profiles] + CH[ClickHouse Public SQL
PyPI downloads, GitHub events] + PL[Plausible API
Doc site visitors] + RD[Reddit JSON API
Post stats] + end + + subgraph "Insights Service" + Collectors[Collectors
One per source] + DB[(insight_metric
insight_event
insight_source)] + Records[Record Detector
Auto-milestone creation] + end + + subgraph "Presentation" + CLI[CLI Commands
collect, status, stars] + Dashboard[Overseer Dashboard
7 interactive tabs] + end + + GH_API --> Collectors + GH_STARS --> Collectors + CH --> Collectors + PL --> Collectors + RD --> Collectors + Collectors --> DB + Collectors --> Records + Records --> DB + DB --> CLI + DB --> Dashboard +``` + +## Data Flow + +```mermaid +sequenceDiagram + participant S as Scheduler + participant C as Collector + participant API as External API + participant DB as Database + participant R as Record Detector + participant UI as Overseer + + S->>C: Trigger collection (hourly/daily) + C->>API: Fetch data + API-->>C: Raw metrics + C->>DB: Upsert daily rows + C->>R: Check for new records + R->>DB: Compare against milestones + R-->>DB: Create event if ATH broken + UI->>DB: Query on modal open + DB-->>UI: Metrics + events +``` + +## Bracket Syntax + +Insights uses bracket syntax to select which data sources to enable: + +```bash +# GitHub + PyPI (default) +aegis init my-app --services insights + +# All sources +aegis init my-app --services "insights[github,pypi,plausible,reddit]" + +# Just GitHub +aegis init my-app --services "insights[github]" +``` + +Available sources: `github`, `pypi`, `plausible`, `reddit` + +## Quick Start + +1. **Create a project with Insights** + ```bash + aegis init my-app --services "insights[github,pypi,plausible]" --components database,scheduler + cd my-app + ``` + +2. **Configure API keys** in `.env` + ```bash + INSIGHT_GITHUB_TOKEN=ghp_your_token + INSIGHT_GITHUB_OWNER=your-username + INSIGHT_GITHUB_REPO=your-repo + INSIGHT_PYPI_PACKAGE=your-package + INSIGHT_PLAUSIBLE_API_KEY=your_key + INSIGHT_PLAUSIBLE_SITES=your-site.com + ``` + +3. **Run initial collection** + ```bash + my-app insights collect + ``` + +4. **Backfill historical data** (PyPI goes back 365 days, Plausible too) + ```bash + my-app insights collect pypi --lookback-days 365 + my-app insights collect plausible --lookback-days 365 + ``` + +5. **Open the Overseer dashboard** and click the Insights card to see your data. + +## Required Components + +| Component | Required | Why | +|-----------|----------|-----| +| Database | Yes | Stores all metrics, events, and source configuration | +| Scheduler | Yes | Automates collection at configured intervals | +| Backend | Yes | API endpoints (coming soon) | +| Frontend | Recommended | Overseer dashboard visualization | + +## Next Steps + +| Topic | Description | +|-------|-------------| +| [Data Sources](data-sources.md) | What each source collects and how | +| [CLI Commands](cli.md) | Command reference for collection and management | +| [Dashboard](dashboard.md) | Overseer modal tabs and interactive features | +| [Configuration](configuration.md) | Environment variables and collection intervals | +| [Examples](examples.md) | Real-world patterns and analysis workflows | diff --git a/mkdocs.yml b/mkdocs.yml index 574291ae..c7b1859f 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -82,6 +82,13 @@ nav: - Provider Setup: services/comms/setup.md - API Reference: services/comms/api.md - CLI Commands: services/comms/cli.md + - Insights (Experimental): + - Getting Started: services/insights/index.md + - Data Sources: services/insights/data-sources.md + - CLI Commands: services/insights/cli.md + - Dashboard: services/insights/dashboard.md + - Configuration: services/insights/configuration.md + - Examples: services/insights/examples.md - Overseer: - Overview: overseer/index.md - The Story: overseer/story.md diff --git a/pyproject.toml b/pyproject.toml index 94d5579a..5873ff35 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ dependencies = [ "typer>=0.9.0", "copier>=9.11.2", "rich>=13.0.0", - "pillow>=12.1.1", + "pillow>=12.2.0", "packaging>=24.0", "filelock>=3.20.1", "urllib3>=2.6.3", @@ -56,7 +56,7 @@ Issues = "https://github.com/lbedner/aegis-stack/issues" [project.optional-dependencies] dev = [ - "pytest>=8.0.0", + "pytest>=9.0.3", "pytest-asyncio>=0.23.0", "ruff>=0.1.0", "ty>=0.0.8", # Type checker diff --git a/tests/core/test_insights_service_parser.py b/tests/core/test_insights_service_parser.py new file mode 100644 index 00000000..e8f4c923 --- /dev/null +++ b/tests/core/test_insights_service_parser.py @@ -0,0 +1,145 @@ +""" +Tests for insights service bracket syntax parser. + +Tests parsing of insights[sources...] syntax for data source selection. +""" + +import pytest + +from aegis.core.insights_service_parser import ( + DEFAULT_SOURCES, + InsightsServiceConfig, + is_insights_service_with_options, + parse_insights_service_config, +) + + +class TestParseInsightsServiceConfig: + """Test insights service bracket parsing.""" + + def test_plain_insights_returns_defaults(self) -> None: + """Plain 'insights' returns default sources (github, pypi).""" + config = parse_insights_service_config("insights") + assert config.sources == ["github", "pypi"] + + def test_empty_brackets_returns_defaults(self) -> None: + """Empty brackets return default sources.""" + config = parse_insights_service_config("insights[]") + assert config.sources == DEFAULT_SOURCES + + def test_single_source(self) -> None: + """Single source in brackets.""" + config = parse_insights_service_config("insights[github]") + assert config.sources == ["github"] + + def test_two_sources(self) -> None: + """Two sources in brackets.""" + config = parse_insights_service_config("insights[github,pypi]") + assert config.sources == ["github", "pypi"] + + def test_all_sources(self) -> None: + """All four sources in brackets.""" + config = parse_insights_service_config("insights[github,pypi,plausible,reddit]") + assert config.sources == ["github", "pypi", "plausible", "reddit"] + + def test_sources_with_spaces(self) -> None: + """Spaces around values are stripped.""" + config = parse_insights_service_config("insights[ github , plausible ]") + assert config.sources == ["github", "plausible"] + + def test_case_insensitive(self) -> None: + """Source names are lowercased.""" + config = parse_insights_service_config("insights[GitHub,PyPI]") + assert config.sources == ["github", "pypi"] + + def test_single_plausible(self) -> None: + """Just plausible source.""" + config = parse_insights_service_config("insights[plausible]") + assert config.sources == ["plausible"] + + def test_single_reddit(self) -> None: + """Just reddit source.""" + config = parse_insights_service_config("insights[reddit]") + assert config.sources == ["reddit"] + + def test_unknown_source_raises(self) -> None: + """Unknown source name raises ValueError.""" + with pytest.raises(ValueError, match="Unknown source 'stripe'"): + parse_insights_service_config("insights[github,stripe]") + + def test_duplicate_source_raises(self) -> None: + """Duplicate source raises ValueError.""" + with pytest.raises(ValueError, match="Duplicate source 'github'"): + parse_insights_service_config("insights[github,github]") + + def test_wrong_service_name_raises(self) -> None: + """Non-insights service string raises ValueError.""" + with pytest.raises(ValueError, match="Expected 'insights' service"): + parse_insights_service_config("auth[basic]") + + def test_malformed_brackets_raises(self) -> None: + """Missing closing bracket raises ValueError.""" + with pytest.raises(ValueError, match="Malformed brackets"): + parse_insights_service_config("insights[github") + + def test_no_brackets_with_suffix_raises(self) -> None: + """Invalid format without brackets raises ValueError.""" + with pytest.raises(ValueError, match="Expected 'insights'"): + parse_insights_service_config("insights_extra") + + def test_whitespace_stripped(self) -> None: + """Leading/trailing whitespace is stripped.""" + config = parse_insights_service_config(" insights[github] ") + assert config.sources == ["github"] + + def test_defaults_are_independent(self) -> None: + """Default sources are a fresh copy each time.""" + config1 = parse_insights_service_config("insights") + config2 = parse_insights_service_config("insights") + config1.sources.append("plausible") + assert config2.sources == ["github", "pypi"] + + +class TestIsInsightsServiceWithOptions: + """Test bracket detection.""" + + def test_plain_insights_returns_false(self) -> None: + """Plain 'insights' has no options.""" + assert is_insights_service_with_options("insights") is False + + def test_bracket_syntax_returns_true(self) -> None: + """Bracket syntax is detected.""" + assert is_insights_service_with_options("insights[github]") is True + + def test_all_sources_returns_true(self) -> None: + """Full bracket syntax is detected.""" + assert ( + is_insights_service_with_options("insights[github,pypi,plausible,reddit]") + is True + ) + + def test_empty_brackets_returns_true(self) -> None: + """Empty brackets are still bracket syntax.""" + assert is_insights_service_with_options("insights[]") is True + + def test_whitespace_stripped(self) -> None: + """Whitespace is handled.""" + assert is_insights_service_with_options(" insights[github] ") is True + + def test_non_insights_returns_false(self) -> None: + """Other services return False.""" + assert is_insights_service_with_options("auth[basic]") is False + + +class TestInsightsServiceConfig: + """Test InsightsServiceConfig dataclass.""" + + def test_default_factory(self) -> None: + """Default config has default sources.""" + config = InsightsServiceConfig() + assert config.sources == DEFAULT_SOURCES + + def test_custom_sources(self) -> None: + """Custom sources are preserved.""" + config = InsightsServiceConfig(sources=["plausible", "reddit"]) + assert config.sources == ["plausible", "reddit"] diff --git a/tests/core/test_service_resolver.py b/tests/core/test_service_resolver.py index 213d89d4..f1d3c59e 100644 --- a/tests/core/test_service_resolver.py +++ b/tests/core/test_service_resolver.py @@ -431,3 +431,64 @@ def test_service_resolver_edge_cases(self): assert ServiceResolver.validate_services([]) == [] assert ServiceResolver.get_service_component_summary([]) == {} assert ServiceResolver.recommend_components_for_services([]) == [] + + +class TestInsightsServiceResolver: + """Test insights service resolution and bracket validation.""" + + def test_resolve_insights_dependencies(self): + """Insights requires backend, database, and scheduler.""" + services = ["insights"] + + resolved, auto_added = ServiceResolver.resolve_service_dependencies(services) + + assert "backend" in resolved + assert "database" in resolved + assert "scheduler" in resolved + + def test_resolve_insights_recommends_worker(self): + """Insights recommends worker component.""" + recommendations = ServiceResolver.recommend_components_for_services( + ["insights"] + ) + assert "worker" in recommendations + + def test_validate_insights_plain(self): + """Plain 'insights' validates successfully.""" + errors = ServiceResolver.validate_services(["insights"]) + assert errors == [] + + def test_validate_insights_with_brackets(self): + """Bracket syntax validates successfully.""" + errors = ServiceResolver.validate_services( + ["insights[github,pypi,plausible,reddit]"] + ) + assert errors == [] + + def test_validate_insights_invalid_source(self): + """Invalid source in brackets fails validation.""" + errors = ServiceResolver.validate_services(["insights[github,invalid]"]) + assert len(errors) == 1 + assert "Invalid insights" in errors[0] + + def test_insights_full_resolution(self): + """Full resolution flow for insights service.""" + services = ["insights"] + + resolved, _ = ServiceResolver.resolve_service_dependencies(services) + + errors = ServiceResolver.validate_service_component_compatibility( + services, resolved + ) + assert errors == [] + + def test_insights_with_auth_merge(self): + """Insights + auth share backend and database — no duplication.""" + services = ["insights", "auth"] + + resolved, _ = ServiceResolver.resolve_service_dependencies(services) + + # Both need backend/database, should appear once + assert resolved.count("backend") == 1 + assert resolved.count("database") == 1 + assert "scheduler" in resolved diff --git a/tests/core/test_services.py b/tests/core/test_services.py index 4607c4b5..cd779a5d 100644 --- a/tests/core/test_services.py +++ b/tests/core/test_services.py @@ -114,6 +114,31 @@ def test_services_registry_not_empty(self): """Test that the SERVICES registry contains services.""" assert len(SERVICES) > 0 assert "auth" in SERVICES + assert "insights" in SERVICES + + def test_insights_service_specification(self): + """Test the insights service specification is properly defined.""" + spec = SERVICES["insights"] + + assert spec.name == "insights" + assert spec.type == ServiceType.ANALYTICS + assert spec.description + assert "backend" in spec.required_components + assert "database" in spec.required_components + assert "scheduler" in spec.required_components + assert "worker" in spec.recommended_components + assert len(spec.pyproject_deps) > 0 + assert len(spec.template_files) > 0 + + def test_insights_uses_component_constants(self): + """Test that insights spec uses ComponentNames, not magic strings.""" + from aegis.constants import ComponentNames + + spec = SERVICES["insights"] + assert ComponentNames.BACKEND in spec.required_components + assert ComponentNames.DATABASE in spec.required_components + assert ComponentNames.SCHEDULER in spec.required_components + assert ComponentNames.WORKER in spec.recommended_components def test_auth_service_specification(self): """Test the auth service specification is properly defined.""" diff --git a/tests/core/test_template_generator.py b/tests/core/test_template_generator.py index c0d8eb37..c2856a09 100644 --- a/tests/core/test_template_generator.py +++ b/tests/core/test_template_generator.py @@ -480,3 +480,81 @@ def test_context_auth_org_implies_rbac(self) -> None: assert context["auth_level"] == AuthLevels.ORG assert context["include_auth_org"] == "yes" assert context["include_auth_rbac"] == "yes" + + +class TestTemplateGeneratorInsightsService: + """Test insights service handling in template context.""" + + def test_insights_sets_include_flag(self) -> None: + """Insights service sets include_insights to 'yes'.""" + gen = TemplateGenerator( + project_name="test", + selected_components=[], + selected_services=["insights"], + ) + context = gen.get_template_context() + assert context["include_insights"] == "yes" + + def test_no_insights_sets_flag_no(self) -> None: + """Without insights, include_insights is 'no'.""" + gen = TemplateGenerator( + project_name="test", + selected_components=[], + selected_services=[], + ) + context = gen.get_template_context() + assert context["include_insights"] == "no" + + def test_insights_default_sources(self) -> None: + """Plain 'insights' defaults to github + pypi sources.""" + gen = TemplateGenerator( + project_name="test", + selected_components=[], + selected_services=["insights"], + ) + context = gen.get_template_context() + assert context["insights_github"] == "yes" + assert context["insights_pypi"] == "yes" + assert context["insights_plausible"] == "no" + assert context["insights_reddit"] == "no" + + def test_insights_bracket_all_sources(self) -> None: + """Bracket syntax enables specified sources.""" + gen = TemplateGenerator( + project_name="test", + selected_components=[], + selected_services=["insights[github,pypi,plausible,reddit]"], + ) + context = gen.get_template_context() + assert context["include_insights"] == "yes" + assert context["insights_github"] == "yes" + assert context["insights_pypi"] == "yes" + assert context["insights_plausible"] == "yes" + assert context["insights_reddit"] == "yes" + + def test_insights_bracket_single_source(self) -> None: + """Bracket with single source only enables that one.""" + gen = TemplateGenerator( + project_name="test", + selected_components=[], + selected_services=["insights[plausible]"], + ) + context = gen.get_template_context() + assert context["include_insights"] == "yes" + assert context["insights_github"] == "no" + assert context["insights_pypi"] == "no" + assert context["insights_plausible"] == "yes" + assert context["insights_reddit"] == "no" + + def test_insights_with_other_services(self) -> None: + """Insights alongside other services — all flags correct.""" + gen = TemplateGenerator( + project_name="test", + selected_components=["database"], + selected_services=["insights", "auth"], + ) + context = gen.get_template_context() + assert context["include_insights"] == "yes" + assert context["include_auth"] == "yes" + assert context["insights_github"] == "yes" + assert context["insights_pypi"] == "yes" diff --git a/uv.lock b/uv.lock index 9d1ddef2..c3267f24 100644 --- a/uv.lock +++ b/uv.lock @@ -52,12 +52,12 @@ requires-dist = [ { name = "mkdocstrings", extras = ["python"], marker = "extra == 'docs'", specifier = ">=0.24.0" }, { name = "opencc-python-reimplemented", marker = "extra == 'dev'", specifier = ">=0.1.7" }, { name = "packaging", specifier = ">=24.0" }, - { name = "pillow", specifier = ">=12.1.1" }, + { name = "pillow", specifier = ">=12.2.0" }, { name = "pip-audit", marker = "extra == 'dev'", specifier = ">=2.6.0" }, { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=3.7.0" }, { name = "pygments", marker = "extra == 'docs'", specifier = ">=2.20.0" }, { name = "pymdown-extensions", marker = "extra == 'docs'", specifier = ">=10.21.0" }, - { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" }, + { name = "pytest", marker = "extra == 'dev'", specifier = ">=9.0.3" }, { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.23.0" }, { name = "rich", specifier = ">=13.0.0" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.1.0" }, @@ -845,89 +845,89 @@ wheels = [ [[package]] name = "pillow" -version = "12.1.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1f/42/5c74462b4fd957fcd7b13b04fb3205ff8349236ea74c7c375766d6c82288/pillow-12.1.1.tar.gz", hash = "sha256:9ad8fa5937ab05218e2b6a4cff30295ad35afd2f83ac592e68c0d871bb0fdbc4", size = 46980264, upload-time = "2026-02-11T04:23:07.146Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2b/46/5da1ec4a5171ee7bf1a0efa064aba70ba3d6e0788ce3f5acd1375d23c8c0/pillow-12.1.1-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:e879bb6cd5c73848ef3b2b48b8af9ff08c5b71ecda8048b7dd22d8a33f60be32", size = 5304084, upload-time = "2026-02-11T04:20:27.501Z" }, - { url = "https://files.pythonhosted.org/packages/78/93/a29e9bc02d1cf557a834da780ceccd54e02421627200696fcf805ebdc3fb/pillow-12.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:365b10bb9417dd4498c0e3b128018c4a624dc11c7b97d8cc54effe3b096f4c38", size = 4657866, upload-time = "2026-02-11T04:20:29.827Z" }, - { url = "https://files.pythonhosted.org/packages/13/84/583a4558d492a179d31e4aae32eadce94b9acf49c0337c4ce0b70e0a01f2/pillow-12.1.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d4ce8e329c93845720cd2014659ca67eac35f6433fd3050393d85f3ecef0dad5", size = 6232148, upload-time = "2026-02-11T04:20:31.329Z" }, - { url = "https://files.pythonhosted.org/packages/d5/e2/53c43334bbbb2d3b938978532fbda8e62bb6e0b23a26ce8592f36bcc4987/pillow-12.1.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc354a04072b765eccf2204f588a7a532c9511e8b9c7f900e1b64e3e33487090", size = 8038007, upload-time = "2026-02-11T04:20:34.225Z" }, - { url = "https://files.pythonhosted.org/packages/b8/a6/3d0e79c8a9d58150dd98e199d7c1c56861027f3829a3a60b3c2784190180/pillow-12.1.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7e7976bf1910a8116b523b9f9f58bf410f3e8aa330cd9a2bb2953f9266ab49af", size = 6345418, upload-time = "2026-02-11T04:20:35.858Z" }, - { url = "https://files.pythonhosted.org/packages/a2/c8/46dfeac5825e600579157eea177be43e2f7ff4a99da9d0d0a49533509ac5/pillow-12.1.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:597bd9c8419bc7c6af5604e55847789b69123bbe25d65cc6ad3012b4f3c98d8b", size = 7034590, upload-time = "2026-02-11T04:20:37.91Z" }, - { url = "https://files.pythonhosted.org/packages/af/bf/e6f65d3db8a8bbfeaf9e13cc0417813f6319863a73de934f14b2229ada18/pillow-12.1.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2c1fc0f2ca5f96a3c8407e41cca26a16e46b21060fe6d5b099d2cb01412222f5", size = 6458655, upload-time = "2026-02-11T04:20:39.496Z" }, - { url = "https://files.pythonhosted.org/packages/f9/c2/66091f3f34a25894ca129362e510b956ef26f8fb67a0e6417bc5744e56f1/pillow-12.1.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:578510d88c6229d735855e1f278aa305270438d36a05031dfaae5067cc8eb04d", size = 7159286, upload-time = "2026-02-11T04:20:41.139Z" }, - { url = "https://files.pythonhosted.org/packages/7b/5a/24bc8eb526a22f957d0cec6243146744966d40857e3d8deb68f7902ca6c1/pillow-12.1.1-cp311-cp311-win32.whl", hash = "sha256:7311c0a0dcadb89b36b7025dfd8326ecfa36964e29913074d47382706e516a7c", size = 6328663, upload-time = "2026-02-11T04:20:43.184Z" }, - { url = "https://files.pythonhosted.org/packages/31/03/bef822e4f2d8f9d7448c133d0a18185d3cce3e70472774fffefe8b0ed562/pillow-12.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:fbfa2a7c10cc2623f412753cddf391c7f971c52ca40a3f65dc5039b2939e8563", size = 7031448, upload-time = "2026-02-11T04:20:44.696Z" }, - { url = "https://files.pythonhosted.org/packages/49/70/f76296f53610bd17b2e7d31728b8b7825e3ac3b5b3688b51f52eab7c0818/pillow-12.1.1-cp311-cp311-win_arm64.whl", hash = "sha256:b81b5e3511211631b3f672a595e3221252c90af017e399056d0faabb9538aa80", size = 2453651, upload-time = "2026-02-11T04:20:46.243Z" }, - { url = "https://files.pythonhosted.org/packages/07/d3/8df65da0d4df36b094351dce696f2989bec731d4f10e743b1c5f4da4d3bf/pillow-12.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ab323b787d6e18b3d91a72fc99b1a2c28651e4358749842b8f8dfacd28ef2052", size = 5262803, upload-time = "2026-02-11T04:20:47.653Z" }, - { url = "https://files.pythonhosted.org/packages/d6/71/5026395b290ff404b836e636f51d7297e6c83beceaa87c592718747e670f/pillow-12.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:adebb5bee0f0af4909c30db0d890c773d1a92ffe83da908e2e9e720f8edf3984", size = 4657601, upload-time = "2026-02-11T04:20:49.328Z" }, - { url = "https://files.pythonhosted.org/packages/b1/2e/1001613d941c67442f745aff0f7cc66dd8df9a9c084eb497e6a543ee6f7e/pillow-12.1.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bb66b7cc26f50977108790e2456b7921e773f23db5630261102233eb355a3b79", size = 6234995, upload-time = "2026-02-11T04:20:51.032Z" }, - { url = "https://files.pythonhosted.org/packages/07/26/246ab11455b2549b9233dbd44d358d033a2f780fa9007b61a913c5b2d24e/pillow-12.1.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:aee2810642b2898bb187ced9b349e95d2a7272930796e022efaf12e99dccd293", size = 8045012, upload-time = "2026-02-11T04:20:52.882Z" }, - { url = "https://files.pythonhosted.org/packages/b2/8b/07587069c27be7535ac1fe33874e32de118fbd34e2a73b7f83436a88368c/pillow-12.1.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a0b1cd6232e2b618adcc54d9882e4e662a089d5768cd188f7c245b4c8c44a397", size = 6349638, upload-time = "2026-02-11T04:20:54.444Z" }, - { url = "https://files.pythonhosted.org/packages/ff/79/6df7b2ee763d619cda2fb4fea498e5f79d984dae304d45a8999b80d6cf5c/pillow-12.1.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7aac39bcf8d4770d089588a2e1dd111cbaa42df5a94be3114222057d68336bd0", size = 7041540, upload-time = "2026-02-11T04:20:55.97Z" }, - { url = "https://files.pythonhosted.org/packages/2c/5e/2ba19e7e7236d7529f4d873bdaf317a318896bac289abebd4bb00ef247f0/pillow-12.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ab174cd7d29a62dd139c44bf74b698039328f45cb03b4596c43473a46656b2f3", size = 6462613, upload-time = "2026-02-11T04:20:57.542Z" }, - { url = "https://files.pythonhosted.org/packages/03/03/31216ec124bb5c3dacd74ce8efff4cc7f52643653bad4825f8f08c697743/pillow-12.1.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:339ffdcb7cbeaa08221cd401d517d4b1fe7a9ed5d400e4a8039719238620ca35", size = 7166745, upload-time = "2026-02-11T04:20:59.196Z" }, - { url = "https://files.pythonhosted.org/packages/1f/e7/7c4552d80052337eb28653b617eafdef39adfb137c49dd7e831b8dc13bc5/pillow-12.1.1-cp312-cp312-win32.whl", hash = "sha256:5d1f9575a12bed9e9eedd9a4972834b08c97a352bd17955ccdebfeca5913fa0a", size = 6328823, upload-time = "2026-02-11T04:21:01.385Z" }, - { url = "https://files.pythonhosted.org/packages/3d/17/688626d192d7261bbbf98846fc98995726bddc2c945344b65bec3a29d731/pillow-12.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:21329ec8c96c6e979cd0dfd29406c40c1d52521a90544463057d2aaa937d66a6", size = 7033367, upload-time = "2026-02-11T04:21:03.536Z" }, - { url = "https://files.pythonhosted.org/packages/ed/fe/a0ef1f73f939b0eca03ee2c108d0043a87468664770612602c63266a43c4/pillow-12.1.1-cp312-cp312-win_arm64.whl", hash = "sha256:af9a332e572978f0218686636610555ae3defd1633597be015ed50289a03c523", size = 2453811, upload-time = "2026-02-11T04:21:05.116Z" }, - { url = "https://files.pythonhosted.org/packages/d5/11/6db24d4bd7685583caeae54b7009584e38da3c3d4488ed4cd25b439de486/pillow-12.1.1-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:d242e8ac078781f1de88bf823d70c1a9b3c7950a44cdf4b7c012e22ccbcd8e4e", size = 4062689, upload-time = "2026-02-11T04:21:06.804Z" }, - { url = "https://files.pythonhosted.org/packages/33/c0/ce6d3b1fe190f0021203e0d9b5b99e57843e345f15f9ef22fcd43842fd21/pillow-12.1.1-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:02f84dfad02693676692746df05b89cf25597560db2857363a208e393429f5e9", size = 4138535, upload-time = "2026-02-11T04:21:08.452Z" }, - { url = "https://files.pythonhosted.org/packages/a0/c6/d5eb6a4fb32a3f9c21a8c7613ec706534ea1cf9f4b3663e99f0d83f6fca8/pillow-12.1.1-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:e65498daf4b583091ccbb2556c7000abf0f3349fcd57ef7adc9a84a394ed29f6", size = 3601364, upload-time = "2026-02-11T04:21:10.194Z" }, - { url = "https://files.pythonhosted.org/packages/14/a1/16c4b823838ba4c9c52c0e6bbda903a3fe5a1bdbf1b8eb4fff7156f3e318/pillow-12.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6c6db3b84c87d48d0088943bf33440e0c42370b99b1c2a7989216f7b42eede60", size = 5262561, upload-time = "2026-02-11T04:21:11.742Z" }, - { url = "https://files.pythonhosted.org/packages/bb/ad/ad9dc98ff24f485008aa5cdedaf1a219876f6f6c42a4626c08bc4e80b120/pillow-12.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8b7e5304e34942bf62e15184219a7b5ad4ff7f3bb5cca4d984f37df1a0e1aee2", size = 4657460, upload-time = "2026-02-11T04:21:13.786Z" }, - { url = "https://files.pythonhosted.org/packages/9e/1b/f1a4ea9a895b5732152789326202a82464d5254759fbacae4deea3069334/pillow-12.1.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:18e5bddd742a44b7e6b1e773ab5db102bd7a94c32555ba656e76d319d19c3850", size = 6232698, upload-time = "2026-02-11T04:21:15.949Z" }, - { url = "https://files.pythonhosted.org/packages/95/f4/86f51b8745070daf21fd2e5b1fe0eb35d4db9ca26e6d58366562fb56a743/pillow-12.1.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc44ef1f3de4f45b50ccf9136999d71abb99dca7706bc75d222ed350b9fd2289", size = 8041706, upload-time = "2026-02-11T04:21:17.723Z" }, - { url = "https://files.pythonhosted.org/packages/29/9b/d6ecd956bb1266dd1045e995cce9b8d77759e740953a1c9aad9502a0461e/pillow-12.1.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5a8eb7ed8d4198bccbd07058416eeec51686b498e784eda166395a23eb99138e", size = 6346621, upload-time = "2026-02-11T04:21:19.547Z" }, - { url = "https://files.pythonhosted.org/packages/71/24/538bff45bde96535d7d998c6fed1a751c75ac7c53c37c90dc2601b243893/pillow-12.1.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:47b94983da0c642de92ced1702c5b6c292a84bd3a8e1d1702ff923f183594717", size = 7038069, upload-time = "2026-02-11T04:21:21.378Z" }, - { url = "https://files.pythonhosted.org/packages/94/0e/58cb1a6bc48f746bc4cb3adb8cabff73e2742c92b3bf7a220b7cf69b9177/pillow-12.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:518a48c2aab7ce596d3bf79d0e275661b846e86e4d0e7dec34712c30fe07f02a", size = 6460040, upload-time = "2026-02-11T04:21:23.148Z" }, - { url = "https://files.pythonhosted.org/packages/6c/57/9045cb3ff11eeb6c1adce3b2d60d7d299d7b273a2e6c8381a524abfdc474/pillow-12.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a550ae29b95c6dc13cf69e2c9dc5747f814c54eeb2e32d683e5e93af56caa029", size = 7164523, upload-time = "2026-02-11T04:21:25.01Z" }, - { url = "https://files.pythonhosted.org/packages/73/f2/9be9cb99f2175f0d4dbadd6616ce1bf068ee54a28277ea1bf1fbf729c250/pillow-12.1.1-cp313-cp313-win32.whl", hash = "sha256:a003d7422449f6d1e3a34e3dd4110c22148336918ddbfc6a32581cd54b2e0b2b", size = 6332552, upload-time = "2026-02-11T04:21:27.238Z" }, - { url = "https://files.pythonhosted.org/packages/3f/eb/b0834ad8b583d7d9d42b80becff092082a1c3c156bb582590fcc973f1c7c/pillow-12.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:344cf1e3dab3be4b1fa08e449323d98a2a3f819ad20f4b22e77a0ede31f0faa1", size = 7040108, upload-time = "2026-02-11T04:21:29.462Z" }, - { url = "https://files.pythonhosted.org/packages/d5/7d/fc09634e2aabdd0feabaff4a32f4a7d97789223e7c2042fd805ea4b4d2c2/pillow-12.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:5c0dd1636633e7e6a0afe7bf6a51a14992b7f8e60de5789018ebbdfae55b040a", size = 2453712, upload-time = "2026-02-11T04:21:31.072Z" }, - { url = "https://files.pythonhosted.org/packages/19/2a/b9d62794fc8a0dd14c1943df68347badbd5511103e0d04c035ffe5cf2255/pillow-12.1.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0330d233c1a0ead844fc097a7d16c0abff4c12e856c0b325f231820fee1f39da", size = 5264880, upload-time = "2026-02-11T04:21:32.865Z" }, - { url = "https://files.pythonhosted.org/packages/26/9d/e03d857d1347fa5ed9247e123fcd2a97b6220e15e9cb73ca0a8d91702c6e/pillow-12.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5dae5f21afb91322f2ff791895ddd8889e5e947ff59f71b46041c8ce6db790bc", size = 4660616, upload-time = "2026-02-11T04:21:34.97Z" }, - { url = "https://files.pythonhosted.org/packages/f7/ec/8a6d22afd02570d30954e043f09c32772bfe143ba9285e2fdb11284952cd/pillow-12.1.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2e0c664be47252947d870ac0d327fea7e63985a08794758aa8af5b6cb6ec0c9c", size = 6269008, upload-time = "2026-02-11T04:21:36.623Z" }, - { url = "https://files.pythonhosted.org/packages/3d/1d/6d875422c9f28a4a361f495a5f68d9de4a66941dc2c619103ca335fa6446/pillow-12.1.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:691ab2ac363b8217f7d31b3497108fb1f50faab2f75dfb03284ec2f217e87bf8", size = 8073226, upload-time = "2026-02-11T04:21:38.585Z" }, - { url = "https://files.pythonhosted.org/packages/a1/cd/134b0b6ee5eda6dc09e25e24b40fdafe11a520bc725c1d0bbaa5e00bf95b/pillow-12.1.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e9e8064fb1cc019296958595f6db671fba95209e3ceb0c4734c9baf97de04b20", size = 6380136, upload-time = "2026-02-11T04:21:40.562Z" }, - { url = "https://files.pythonhosted.org/packages/7a/a9/7628f013f18f001c1b98d8fffe3452f306a70dc6aba7d931019e0492f45e/pillow-12.1.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:472a8d7ded663e6162dafdf20015c486a7009483ca671cece7a9279b512fcb13", size = 7067129, upload-time = "2026-02-11T04:21:42.521Z" }, - { url = "https://files.pythonhosted.org/packages/1e/f8/66ab30a2193b277785601e82ee2d49f68ea575d9637e5e234faaa98efa4c/pillow-12.1.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:89b54027a766529136a06cfebeecb3a04900397a3590fd252160b888479517bf", size = 6491807, upload-time = "2026-02-11T04:21:44.22Z" }, - { url = "https://files.pythonhosted.org/packages/da/0b/a877a6627dc8318fdb84e357c5e1a758c0941ab1ddffdafd231983788579/pillow-12.1.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:86172b0831b82ce4f7877f280055892b31179e1576aa00d0df3bb1bbf8c3e524", size = 7190954, upload-time = "2026-02-11T04:21:46.114Z" }, - { url = "https://files.pythonhosted.org/packages/83/43/6f732ff85743cf746b1361b91665d9f5155e1483817f693f8d57ea93147f/pillow-12.1.1-cp313-cp313t-win32.whl", hash = "sha256:44ce27545b6efcf0fdbdceb31c9a5bdea9333e664cda58a7e674bb74608b3986", size = 6336441, upload-time = "2026-02-11T04:21:48.22Z" }, - { url = "https://files.pythonhosted.org/packages/3b/44/e865ef3986611bb75bfabdf94a590016ea327833f434558801122979cd0e/pillow-12.1.1-cp313-cp313t-win_amd64.whl", hash = "sha256:a285e3eb7a5a45a2ff504e31f4a8d1b12ef62e84e5411c6804a42197c1cf586c", size = 7045383, upload-time = "2026-02-11T04:21:50.015Z" }, - { url = "https://files.pythonhosted.org/packages/a8/c6/f4fb24268d0c6908b9f04143697ea18b0379490cb74ba9e8d41b898bd005/pillow-12.1.1-cp313-cp313t-win_arm64.whl", hash = "sha256:cc7d296b5ea4d29e6570dabeaed58d31c3fea35a633a69679fb03d7664f43fb3", size = 2456104, upload-time = "2026-02-11T04:21:51.633Z" }, - { url = "https://files.pythonhosted.org/packages/03/d0/bebb3ffbf31c5a8e97241476c4cf8b9828954693ce6744b4a2326af3e16b/pillow-12.1.1-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:417423db963cb4be8bac3fc1204fe61610f6abeed1580a7a2cbb2fbda20f12af", size = 4062652, upload-time = "2026-02-11T04:21:53.19Z" }, - { url = "https://files.pythonhosted.org/packages/2d/c0/0e16fb0addda4851445c28f8350d8c512f09de27bbb0d6d0bbf8b6709605/pillow-12.1.1-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:b957b71c6b2387610f556a7eb0828afbe40b4a98036fc0d2acfa5a44a0c2036f", size = 4138823, upload-time = "2026-02-11T04:22:03.088Z" }, - { url = "https://files.pythonhosted.org/packages/6b/fb/6170ec655d6f6bb6630a013dd7cf7bc218423d7b5fa9071bf63dc32175ae/pillow-12.1.1-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:097690ba1f2efdeb165a20469d59d8bb03c55fb6621eb2041a060ae8ea3e9642", size = 3601143, upload-time = "2026-02-11T04:22:04.909Z" }, - { url = "https://files.pythonhosted.org/packages/59/04/dc5c3f297510ba9a6837cbb318b87dd2b8f73eb41a43cc63767f65cb599c/pillow-12.1.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:2815a87ab27848db0321fb78c7f0b2c8649dee134b7f2b80c6a45c6831d75ccd", size = 5266254, upload-time = "2026-02-11T04:22:07.656Z" }, - { url = "https://files.pythonhosted.org/packages/05/30/5db1236b0d6313f03ebf97f5e17cda9ca060f524b2fcc875149a8360b21c/pillow-12.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f7ed2c6543bad5a7d5530eb9e78c53132f93dfa44a28492db88b41cdab885202", size = 4657499, upload-time = "2026-02-11T04:22:09.613Z" }, - { url = "https://files.pythonhosted.org/packages/6f/18/008d2ca0eb612e81968e8be0bbae5051efba24d52debf930126d7eaacbba/pillow-12.1.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:652a2c9ccfb556235b2b501a3a7cf3742148cd22e04b5625c5fe057ea3e3191f", size = 6232137, upload-time = "2026-02-11T04:22:11.434Z" }, - { url = "https://files.pythonhosted.org/packages/70/f1/f14d5b8eeb4b2cd62b9f9f847eb6605f103df89ef619ac68f92f748614ea/pillow-12.1.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d6e4571eedf43af33d0fc233a382a76e849badbccdf1ac438841308652a08e1f", size = 8042721, upload-time = "2026-02-11T04:22:13.321Z" }, - { url = "https://files.pythonhosted.org/packages/5a/d6/17824509146e4babbdabf04d8171491fa9d776f7061ff6e727522df9bd03/pillow-12.1.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b574c51cf7d5d62e9be37ba446224b59a2da26dc4c1bb2ecbe936a4fb1a7cb7f", size = 6347798, upload-time = "2026-02-11T04:22:15.449Z" }, - { url = "https://files.pythonhosted.org/packages/d1/ee/c85a38a9ab92037a75615aba572c85ea51e605265036e00c5b67dfafbfe2/pillow-12.1.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a37691702ed687799de29a518d63d4682d9016932db66d4e90c345831b02fb4e", size = 7039315, upload-time = "2026-02-11T04:22:17.24Z" }, - { url = "https://files.pythonhosted.org/packages/ec/f3/bc8ccc6e08a148290d7523bde4d9a0d6c981db34631390dc6e6ec34cacf6/pillow-12.1.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f95c00d5d6700b2b890479664a06e754974848afaae5e21beb4d83c106923fd0", size = 6462360, upload-time = "2026-02-11T04:22:19.111Z" }, - { url = "https://files.pythonhosted.org/packages/f6/ab/69a42656adb1d0665ab051eec58a41f169ad295cf81ad45406963105408f/pillow-12.1.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:559b38da23606e68681337ad74622c4dbba02254fc9cb4488a305dd5975c7eeb", size = 7165438, upload-time = "2026-02-11T04:22:21.041Z" }, - { url = "https://files.pythonhosted.org/packages/02/46/81f7aa8941873f0f01d4b55cc543b0a3d03ec2ee30d617a0448bf6bd6dec/pillow-12.1.1-cp314-cp314-win32.whl", hash = "sha256:03edcc34d688572014ff223c125a3f77fb08091e4607e7745002fc214070b35f", size = 6431503, upload-time = "2026-02-11T04:22:22.833Z" }, - { url = "https://files.pythonhosted.org/packages/40/72/4c245f7d1044b67affc7f134a09ea619d4895333d35322b775b928180044/pillow-12.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:50480dcd74fa63b8e78235957d302d98d98d82ccbfac4c7e12108ba9ecbdba15", size = 7176748, upload-time = "2026-02-11T04:22:24.64Z" }, - { url = "https://files.pythonhosted.org/packages/e4/ad/8a87bdbe038c5c698736e3348af5c2194ffb872ea52f11894c95f9305435/pillow-12.1.1-cp314-cp314-win_arm64.whl", hash = "sha256:5cb1785d97b0c3d1d1a16bc1d710c4a0049daefc4935f3a8f31f827f4d3d2e7f", size = 2544314, upload-time = "2026-02-11T04:22:26.685Z" }, - { url = "https://files.pythonhosted.org/packages/6c/9d/efd18493f9de13b87ede7c47e69184b9e859e4427225ea962e32e56a49bc/pillow-12.1.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1f90cff8aa76835cba5769f0b3121a22bd4eb9e6884cfe338216e557a9a548b8", size = 5268612, upload-time = "2026-02-11T04:22:29.884Z" }, - { url = "https://files.pythonhosted.org/packages/f8/f1/4f42eb2b388eb2ffc660dcb7f7b556c1015c53ebd5f7f754965ef997585b/pillow-12.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1f1be78ce9466a7ee64bfda57bdba0f7cc499d9794d518b854816c41bf0aa4e9", size = 4660567, upload-time = "2026-02-11T04:22:31.799Z" }, - { url = "https://files.pythonhosted.org/packages/01/54/df6ef130fa43e4b82e32624a7b821a2be1c5653a5fdad8469687a7db4e00/pillow-12.1.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:42fc1f4677106188ad9a55562bbade416f8b55456f522430fadab3cef7cd4e60", size = 6269951, upload-time = "2026-02-11T04:22:33.921Z" }, - { url = "https://files.pythonhosted.org/packages/a9/48/618752d06cc44bb4aae8ce0cd4e6426871929ed7b46215638088270d9b34/pillow-12.1.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:98edb152429ab62a1818039744d8fbb3ccab98a7c29fc3d5fcef158f3f1f68b7", size = 8074769, upload-time = "2026-02-11T04:22:35.877Z" }, - { url = "https://files.pythonhosted.org/packages/c3/bd/f1d71eb39a72fa088d938655afba3e00b38018d052752f435838961127d8/pillow-12.1.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d470ab1178551dd17fdba0fef463359c41aaa613cdcd7ff8373f54be629f9f8f", size = 6381358, upload-time = "2026-02-11T04:22:37.698Z" }, - { url = "https://files.pythonhosted.org/packages/64/ef/c784e20b96674ed36a5af839305f55616f8b4f8aa8eeccf8531a6e312243/pillow-12.1.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6408a7b064595afcab0a49393a413732a35788f2a5092fdc6266952ed67de586", size = 7068558, upload-time = "2026-02-11T04:22:39.597Z" }, - { url = "https://files.pythonhosted.org/packages/73/cb/8059688b74422ae61278202c4e1ad992e8a2e7375227be0a21c6b87ca8d5/pillow-12.1.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5d8c41325b382c07799a3682c1c258469ea2ff97103c53717b7893862d0c98ce", size = 6493028, upload-time = "2026-02-11T04:22:42.73Z" }, - { url = "https://files.pythonhosted.org/packages/c6/da/e3c008ed7d2dd1f905b15949325934510b9d1931e5df999bb15972756818/pillow-12.1.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c7697918b5be27424e9ce568193efd13d925c4481dd364e43f5dff72d33e10f8", size = 7191940, upload-time = "2026-02-11T04:22:44.543Z" }, - { url = "https://files.pythonhosted.org/packages/01/4a/9202e8d11714c1fc5951f2e1ef362f2d7fbc595e1f6717971d5dd750e969/pillow-12.1.1-cp314-cp314t-win32.whl", hash = "sha256:d2912fd8114fc5545aa3a4b5576512f64c55a03f3ebcca4c10194d593d43ea36", size = 6438736, upload-time = "2026-02-11T04:22:46.347Z" }, - { url = "https://files.pythonhosted.org/packages/f3/ca/cbce2327eb9885476b3957b2e82eb12c866a8b16ad77392864ad601022ce/pillow-12.1.1-cp314-cp314t-win_amd64.whl", hash = "sha256:4ceb838d4bd9dab43e06c363cab2eebf63846d6a4aeaea283bbdfd8f1a8ed58b", size = 7182894, upload-time = "2026-02-11T04:22:48.114Z" }, - { url = "https://files.pythonhosted.org/packages/ec/d2/de599c95ba0a973b94410477f8bf0b6f0b5e67360eb89bcb1ad365258beb/pillow-12.1.1-cp314-cp314t-win_arm64.whl", hash = "sha256:7b03048319bfc6170e93bd60728a1af51d3dd7704935feb228c4d4faab35d334", size = 2546446, upload-time = "2026-02-11T04:22:50.342Z" }, - { url = "https://files.pythonhosted.org/packages/56/11/5d43209aa4cb58e0cc80127956ff1796a68b928e6324bbf06ef4db34367b/pillow-12.1.1-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:600fd103672b925fe62ed08e0d874ea34d692474df6f4bf7ebe148b30f89f39f", size = 5228606, upload-time = "2026-02-11T04:22:52.106Z" }, - { url = "https://files.pythonhosted.org/packages/5f/d5/3b005b4e4fda6698b371fa6c21b097d4707585d7db99e98d9b0b87ac612a/pillow-12.1.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:665e1b916b043cef294bc54d47bf02d87e13f769bc4bc5fa225a24b3a6c5aca9", size = 4622321, upload-time = "2026-02-11T04:22:53.827Z" }, - { url = "https://files.pythonhosted.org/packages/df/36/ed3ea2d594356fd8037e5a01f6156c74bc8d92dbb0fa60746cc96cabb6e8/pillow-12.1.1-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:495c302af3aad1ca67420ddd5c7bd480c8867ad173528767d906428057a11f0e", size = 5247579, upload-time = "2026-02-11T04:22:56.094Z" }, - { url = "https://files.pythonhosted.org/packages/54/9a/9cc3e029683cf6d20ae5085da0dafc63148e3252c2f13328e553aaa13cfb/pillow-12.1.1-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8fd420ef0c52c88b5a035a0886f367748c72147b2b8f384c9d12656678dfdfa9", size = 6989094, upload-time = "2026-02-11T04:22:58.288Z" }, - { url = "https://files.pythonhosted.org/packages/00/98/fc53ab36da80b88df0967896b6c4b4cd948a0dc5aa40a754266aa3ae48b3/pillow-12.1.1-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f975aa7ef9684ce7e2c18a3aa8f8e2106ce1e46b94ab713d156b2898811651d3", size = 5313850, upload-time = "2026-02-11T04:23:00.554Z" }, - { url = "https://files.pythonhosted.org/packages/30/02/00fa585abfd9fe9d73e5f6e554dc36cc2b842898cbfc46d70353dae227f8/pillow-12.1.1-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8089c852a56c2966cf18835db62d9b34fef7ba74c726ad943928d494fa7f4735", size = 5963343, upload-time = "2026-02-11T04:23:02.934Z" }, - { url = "https://files.pythonhosted.org/packages/f2/26/c56ce33ca856e358d27fda9676c055395abddb82c35ac0f593877ed4562e/pillow-12.1.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:cb9bb857b2d057c6dfc72ac5f3b44836924ba15721882ef103cecb40d002d80e", size = 7029880, upload-time = "2026-02-11T04:23:04.783Z" }, +version = "12.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8c/21/c2bcdd5906101a30244eaffc1b6e6ce71a31bd0742a01eb89e660ebfac2d/pillow-12.2.0.tar.gz", hash = "sha256:a830b1a40919539d07806aa58e1b114df53ddd43213d9c8b75847eee6c0182b5", size = 46987819, upload-time = "2026-04-01T14:46:17.687Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/68/e1/748f5663efe6edcfc4e74b2b93edfb9b8b99b67f21a854c3ae416500a2d9/pillow-12.2.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:8be29e59487a79f173507c30ddf57e733a357f67881430449bb32614075a40ab", size = 5354347, upload-time = "2026-04-01T14:42:44.255Z" }, + { url = "https://files.pythonhosted.org/packages/47/a1/d5ff69e747374c33a3b53b9f98cca7889fce1fd03d79cdc4e1bccc6c5a87/pillow-12.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:71cde9a1e1551df7d34a25462fc60325e8a11a82cc2e2f54578e5e9a1e153d65", size = 4695873, upload-time = "2026-04-01T14:42:46.452Z" }, + { url = "https://files.pythonhosted.org/packages/df/21/e3fbdf54408a973c7f7f89a23b2cb97a7ef30c61ab4142af31eee6aebc88/pillow-12.2.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f490f9368b6fc026f021db16d7ec2fbf7d89e2edb42e8ec09d2c60505f5729c7", size = 6280168, upload-time = "2026-04-01T14:42:49.228Z" }, + { url = "https://files.pythonhosted.org/packages/d3/f1/00b7278c7dd52b17ad4329153748f87b6756ec195ff786c2bdf12518337d/pillow-12.2.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8bd7903a5f2a4545f6fd5935c90058b89d30045568985a71c79f5fd6edf9b91e", size = 8088188, upload-time = "2026-04-01T14:42:51.735Z" }, + { url = "https://files.pythonhosted.org/packages/ad/cf/220a5994ef1b10e70e85748b75649d77d506499352be135a4989c957b701/pillow-12.2.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3997232e10d2920a68d25191392e3a4487d8183039e1c74c2297f00ed1c50705", size = 6394401, upload-time = "2026-04-01T14:42:54.343Z" }, + { url = "https://files.pythonhosted.org/packages/e9/bd/e51a61b1054f09437acfbc2ff9106c30d1eb76bc1453d428399946781253/pillow-12.2.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e74473c875d78b8e9d5da2a70f7099549f9eb37ded4e2f6a463e60125bccd176", size = 7079655, upload-time = "2026-04-01T14:42:56.954Z" }, + { url = "https://files.pythonhosted.org/packages/6b/3d/45132c57d5fb4b5744567c3817026480ac7fc3ce5d4c47902bc0e7f6f853/pillow-12.2.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:56a3f9c60a13133a98ecff6197af34d7824de9b7b38c3654861a725c970c197b", size = 6503105, upload-time = "2026-04-01T14:42:59.847Z" }, + { url = "https://files.pythonhosted.org/packages/7d/2e/9df2fc1e82097b1df3dce58dc43286aa01068e918c07574711fcc53e6fb4/pillow-12.2.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:90e6f81de50ad6b534cab6e5aef77ff6e37722b2f5d908686f4a5c9eba17a909", size = 7203402, upload-time = "2026-04-01T14:43:02.664Z" }, + { url = "https://files.pythonhosted.org/packages/bd/2e/2941e42858ebb67e50ae741473de81c2984e6eff7b397017623c676e2e8d/pillow-12.2.0-cp311-cp311-win32.whl", hash = "sha256:8c984051042858021a54926eb597d6ee3012393ce9c181814115df4c60b9a808", size = 6378149, upload-time = "2026-04-01T14:43:05.274Z" }, + { url = "https://files.pythonhosted.org/packages/69/42/836b6f3cd7f3e5fa10a1f1a5420447c17966044c8fbf589cc0452d5502db/pillow-12.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:6e6b2a0c538fc200b38ff9eb6628228b77908c319a005815f2dde585a0664b60", size = 7082626, upload-time = "2026-04-01T14:43:08.557Z" }, + { url = "https://files.pythonhosted.org/packages/c2/88/549194b5d6f1f494b485e493edc6693c0a16f4ada488e5bd974ed1f42fad/pillow-12.2.0-cp311-cp311-win_arm64.whl", hash = "sha256:9a8a34cc89c67a65ea7437ce257cea81a9dad65b29805f3ecee8c8fe8ff25ffe", size = 2463531, upload-time = "2026-04-01T14:43:10.743Z" }, + { url = "https://files.pythonhosted.org/packages/58/be/7482c8a5ebebbc6470b3eb791812fff7d5e0216c2be3827b30b8bb6603ed/pillow-12.2.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2d192a155bbcec180f8564f693e6fd9bccff5a7af9b32e2e4bf8c9c69dbad6b5", size = 5308279, upload-time = "2026-04-01T14:43:13.246Z" }, + { url = "https://files.pythonhosted.org/packages/d8/95/0a351b9289c2b5cbde0bacd4a83ebc44023e835490a727b2a3bd60ddc0f4/pillow-12.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f3f40b3c5a968281fd507d519e444c35f0ff171237f4fdde090dd60699458421", size = 4695490, upload-time = "2026-04-01T14:43:15.584Z" }, + { url = "https://files.pythonhosted.org/packages/de/af/4e8e6869cbed569d43c416fad3dc4ecb944cb5d9492defaed89ddd6fe871/pillow-12.2.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:03e7e372d5240cc23e9f07deca4d775c0817bffc641b01e9c3af208dbd300987", size = 6284462, upload-time = "2026-04-01T14:43:18.268Z" }, + { url = "https://files.pythonhosted.org/packages/e9/9e/c05e19657fd57841e476be1ab46c4d501bffbadbafdc31a6d665f8b737b6/pillow-12.2.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b86024e52a1b269467a802258c25521e6d742349d760728092e1bc2d135b4d76", size = 8094744, upload-time = "2026-04-01T14:43:20.716Z" }, + { url = "https://files.pythonhosted.org/packages/2b/54/1789c455ed10176066b6e7e6da1b01e50e36f94ba584dc68d9eebfe9156d/pillow-12.2.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7371b48c4fa448d20d2714c9a1f775a81155050d383333e0a6c15b1123dda005", size = 6398371, upload-time = "2026-04-01T14:43:23.443Z" }, + { url = "https://files.pythonhosted.org/packages/43/e3/fdc657359e919462369869f1c9f0e973f353f9a9ee295a39b1fea8ee1a77/pillow-12.2.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:62f5409336adb0663b7caa0da5c7d9e7bdbaae9ce761d34669420c2a801b2780", size = 7087215, upload-time = "2026-04-01T14:43:26.758Z" }, + { url = "https://files.pythonhosted.org/packages/8b/f8/2f6825e441d5b1959d2ca5adec984210f1ec086435b0ed5f52c19b3b8a6e/pillow-12.2.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:01afa7cf67f74f09523699b4e88c73fb55c13346d212a59a2db1f86b0a63e8c5", size = 6509783, upload-time = "2026-04-01T14:43:29.56Z" }, + { url = "https://files.pythonhosted.org/packages/67/f9/029a27095ad20f854f9dba026b3ea6428548316e057e6fc3545409e86651/pillow-12.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fc3d34d4a8fbec3e88a79b92e5465e0f9b842b628675850d860b8bd300b159f5", size = 7212112, upload-time = "2026-04-01T14:43:32.091Z" }, + { url = "https://files.pythonhosted.org/packages/be/42/025cfe05d1be22dbfdb4f264fe9de1ccda83f66e4fc3aac94748e784af04/pillow-12.2.0-cp312-cp312-win32.whl", hash = "sha256:58f62cc0f00fd29e64b29f4fd923ffdb3859c9f9e6105bfc37ba1d08994e8940", size = 6378489, upload-time = "2026-04-01T14:43:34.601Z" }, + { url = "https://files.pythonhosted.org/packages/5d/7b/25a221d2c761c6a8ae21bfa3874988ff2583e19cf8a27bf2fee358df7942/pillow-12.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:7f84204dee22a783350679a0333981df803dac21a0190d706a50475e361c93f5", size = 7084129, upload-time = "2026-04-01T14:43:37.213Z" }, + { url = "https://files.pythonhosted.org/packages/10/e1/542a474affab20fd4a0f1836cb234e8493519da6b76899e30bcc5d990b8b/pillow-12.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:af73337013e0b3b46f175e79492d96845b16126ddf79c438d7ea7ff27783a414", size = 2463612, upload-time = "2026-04-01T14:43:39.421Z" }, + { url = "https://files.pythonhosted.org/packages/4a/01/53d10cf0dbad820a8db274d259a37ba50b88b24768ddccec07355382d5ad/pillow-12.2.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:8297651f5b5679c19968abefd6bb84d95fe30ef712eb1b2d9b2d31ca61267f4c", size = 4100837, upload-time = "2026-04-01T14:43:41.506Z" }, + { url = "https://files.pythonhosted.org/packages/0f/98/f3a6657ecb698c937f6c76ee564882945f29b79bad496abcba0e84659ec5/pillow-12.2.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:50d8520da2a6ce0af445fa6d648c4273c3eeefbc32d7ce049f22e8b5c3daecc2", size = 4176528, upload-time = "2026-04-01T14:43:43.773Z" }, + { url = "https://files.pythonhosted.org/packages/69/bc/8986948f05e3ea490b8442ea1c1d4d990b24a7e43d8a51b2c7d8b1dced36/pillow-12.2.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:766cef22385fa1091258ad7e6216792b156dc16d8d3fa607e7545b2b72061f1c", size = 3640401, upload-time = "2026-04-01T14:43:45.87Z" }, + { url = "https://files.pythonhosted.org/packages/34/46/6c717baadcd62bc8ed51d238d521ab651eaa74838291bda1f86fe1f864c9/pillow-12.2.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5d2fd0fa6b5d9d1de415060363433f28da8b1526c1c129020435e186794b3795", size = 5308094, upload-time = "2026-04-01T14:43:48.438Z" }, + { url = "https://files.pythonhosted.org/packages/71/43/905a14a8b17fdb1ccb58d282454490662d2cb89a6bfec26af6d3520da5ec/pillow-12.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:56b25336f502b6ed02e889f4ece894a72612fe885889a6e8c4c80239ff6e5f5f", size = 4695402, upload-time = "2026-04-01T14:43:51.292Z" }, + { url = "https://files.pythonhosted.org/packages/73/dd/42107efcb777b16fa0393317eac58f5b5cf30e8392e266e76e51cff28c3d/pillow-12.2.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f1c943e96e85df3d3478f7b691f229887e143f81fedab9b20205349ab04d73ed", size = 6280005, upload-time = "2026-04-01T14:43:54.242Z" }, + { url = "https://files.pythonhosted.org/packages/a8/68/b93e09e5e8549019e61acf49f65b1a8530765a7f812c77a7461bca7e4494/pillow-12.2.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:03f6fab9219220f041c74aeaa2939ff0062bd5c364ba9ce037197f4c6d498cd9", size = 8090669, upload-time = "2026-04-01T14:43:57.335Z" }, + { url = "https://files.pythonhosted.org/packages/4b/6e/3ccb54ce8ec4ddd1accd2d89004308b7b0b21c4ac3d20fa70af4760a4330/pillow-12.2.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5cdfebd752ec52bf5bb4e35d9c64b40826bc5b40a13df7c3cda20a2c03a0f5ed", size = 6395194, upload-time = "2026-04-01T14:43:59.864Z" }, + { url = "https://files.pythonhosted.org/packages/67/ee/21d4e8536afd1a328f01b359b4d3997b291ffd35a237c877b331c1c3b71c/pillow-12.2.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eedf4b74eda2b5a4b2b2fb4c006d6295df3bf29e459e198c90ea48e130dc75c3", size = 7082423, upload-time = "2026-04-01T14:44:02.74Z" }, + { url = "https://files.pythonhosted.org/packages/78/5f/e9f86ab0146464e8c133fe85df987ed9e77e08b29d8d35f9f9f4d6f917ba/pillow-12.2.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:00a2865911330191c0b818c59103b58a5e697cae67042366970a6b6f1b20b7f9", size = 6505667, upload-time = "2026-04-01T14:44:05.381Z" }, + { url = "https://files.pythonhosted.org/packages/ed/1e/409007f56a2fdce61584fd3acbc2bbc259857d555196cedcadc68c015c82/pillow-12.2.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1e1757442ed87f4912397c6d35a0db6a7b52592156014706f17658ff58bbf795", size = 7208580, upload-time = "2026-04-01T14:44:08.39Z" }, + { url = "https://files.pythonhosted.org/packages/23/c4/7349421080b12fb35414607b8871e9534546c128a11965fd4a7002ccfbee/pillow-12.2.0-cp313-cp313-win32.whl", hash = "sha256:144748b3af2d1b358d41286056d0003f47cb339b8c43a9ea42f5fea4d8c66b6e", size = 6375896, upload-time = "2026-04-01T14:44:11.197Z" }, + { url = "https://files.pythonhosted.org/packages/3f/82/8a3739a5e470b3c6cbb1d21d315800d8e16bff503d1f16b03a4ec3212786/pillow-12.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:390ede346628ccc626e5730107cde16c42d3836b89662a115a921f28440e6a3b", size = 7081266, upload-time = "2026-04-01T14:44:13.947Z" }, + { url = "https://files.pythonhosted.org/packages/c3/25/f968f618a062574294592f668218f8af564830ccebdd1fa6200f598e65c5/pillow-12.2.0-cp313-cp313-win_arm64.whl", hash = "sha256:8023abc91fba39036dbce14a7d6535632f99c0b857807cbbbf21ecc9f4717f06", size = 2463508, upload-time = "2026-04-01T14:44:16.312Z" }, + { url = "https://files.pythonhosted.org/packages/4d/a4/b342930964e3cb4dce5038ae34b0eab4653334995336cd486c5a8c25a00c/pillow-12.2.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:042db20a421b9bafecc4b84a8b6e444686bd9d836c7fd24542db3e7df7baad9b", size = 5309927, upload-time = "2026-04-01T14:44:18.89Z" }, + { url = "https://files.pythonhosted.org/packages/9f/de/23198e0a65a9cf06123f5435a5d95cea62a635697f8f03d134d3f3a96151/pillow-12.2.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:dd025009355c926a84a612fecf58bb315a3f6814b17ead51a8e48d3823d9087f", size = 4698624, upload-time = "2026-04-01T14:44:21.115Z" }, + { url = "https://files.pythonhosted.org/packages/01/a6/1265e977f17d93ea37aa28aa81bad4fa597933879fac2520d24e021c8da3/pillow-12.2.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:88ddbc66737e277852913bd1e07c150cc7bb124539f94c4e2df5344494e0a612", size = 6321252, upload-time = "2026-04-01T14:44:23.663Z" }, + { url = "https://files.pythonhosted.org/packages/3c/83/5982eb4a285967baa70340320be9f88e57665a387e3a53a7f0db8231a0cd/pillow-12.2.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d362d1878f00c142b7e1a16e6e5e780f02be8195123f164edf7eddd911eefe7c", size = 8126550, upload-time = "2026-04-01T14:44:26.772Z" }, + { url = "https://files.pythonhosted.org/packages/4e/48/6ffc514adce69f6050d0753b1a18fd920fce8cac87620d5a31231b04bfc5/pillow-12.2.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2c727a6d53cb0018aadd8018c2b938376af27914a68a492f59dfcaca650d5eea", size = 6433114, upload-time = "2026-04-01T14:44:29.615Z" }, + { url = "https://files.pythonhosted.org/packages/36/a3/f9a77144231fb8d40ee27107b4463e205fa4677e2ca2548e14da5cf18dce/pillow-12.2.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:efd8c21c98c5cc60653bcb311bef2ce0401642b7ce9d09e03a7da87c878289d4", size = 7115667, upload-time = "2026-04-01T14:44:32.773Z" }, + { url = "https://files.pythonhosted.org/packages/c1/fc/ac4ee3041e7d5a565e1c4fd72a113f03b6394cc72ab7089d27608f8aaccb/pillow-12.2.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9f08483a632889536b8139663db60f6724bfcb443c96f1b18855860d7d5c0fd4", size = 6538966, upload-time = "2026-04-01T14:44:35.252Z" }, + { url = "https://files.pythonhosted.org/packages/c0/a8/27fb307055087f3668f6d0a8ccb636e7431d56ed0750e07a60547b1e083e/pillow-12.2.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dac8d77255a37e81a2efcbd1fc05f1c15ee82200e6c240d7e127e25e365c39ea", size = 7238241, upload-time = "2026-04-01T14:44:37.875Z" }, + { url = "https://files.pythonhosted.org/packages/ad/4b/926ab182c07fccae9fcb120043464e1ff1564775ec8864f21a0ebce6ac25/pillow-12.2.0-cp313-cp313t-win32.whl", hash = "sha256:ee3120ae9dff32f121610bb08e4313be87e03efeadfc6c0d18f89127e24d0c24", size = 6379592, upload-time = "2026-04-01T14:44:40.336Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c4/f9e476451a098181b30050cc4c9a3556b64c02cf6497ea421ac047e89e4b/pillow-12.2.0-cp313-cp313t-win_amd64.whl", hash = "sha256:325ca0528c6788d2a6c3d40e3568639398137346c3d6e66bb61db96b96511c98", size = 7085542, upload-time = "2026-04-01T14:44:43.251Z" }, + { url = "https://files.pythonhosted.org/packages/00/a4/285f12aeacbe2d6dc36c407dfbbe9e96d4a80b0fb710a337f6d2ad978c75/pillow-12.2.0-cp313-cp313t-win_arm64.whl", hash = "sha256:2e5a76d03a6c6dcef67edabda7a52494afa4035021a79c8558e14af25313d453", size = 2465765, upload-time = "2026-04-01T14:44:45.996Z" }, + { url = "https://files.pythonhosted.org/packages/bf/98/4595daa2365416a86cb0d495248a393dfc84e96d62ad080c8546256cb9c0/pillow-12.2.0-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:3adc9215e8be0448ed6e814966ecf3d9952f0ea40eb14e89a102b87f450660d8", size = 4100848, upload-time = "2026-04-01T14:44:48.48Z" }, + { url = "https://files.pythonhosted.org/packages/0b/79/40184d464cf89f6663e18dfcf7ca21aae2491fff1a16127681bf1fa9b8cf/pillow-12.2.0-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:6a9adfc6d24b10f89588096364cc726174118c62130c817c2837c60cf08a392b", size = 4176515, upload-time = "2026-04-01T14:44:51.353Z" }, + { url = "https://files.pythonhosted.org/packages/b0/63/703f86fd4c422a9cf722833670f4f71418fb116b2853ff7da722ea43f184/pillow-12.2.0-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:6a6e67ea2e6feda684ed370f9a1c52e7a243631c025ba42149a2cc5934dec295", size = 3640159, upload-time = "2026-04-01T14:44:53.588Z" }, + { url = "https://files.pythonhosted.org/packages/71/e0/fb22f797187d0be2270f83500aab851536101b254bfa1eae10795709d283/pillow-12.2.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:2bb4a8d594eacdfc59d9e5ad972aa8afdd48d584ffd5f13a937a664c3e7db0ed", size = 5312185, upload-time = "2026-04-01T14:44:56.039Z" }, + { url = "https://files.pythonhosted.org/packages/ba/8c/1a9e46228571de18f8e28f16fabdfc20212a5d019f3e3303452b3f0a580d/pillow-12.2.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:80b2da48193b2f33ed0c32c38140f9d3186583ce7d516526d462645fd98660ae", size = 4695386, upload-time = "2026-04-01T14:44:58.663Z" }, + { url = "https://files.pythonhosted.org/packages/70/62/98f6b7f0c88b9addd0e87c217ded307b36be024d4ff8869a812b241d1345/pillow-12.2.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22db17c68434de69d8ecfc2fe821569195c0c373b25cccb9cbdacf2c6e53c601", size = 6280384, upload-time = "2026-04-01T14:45:01.5Z" }, + { url = "https://files.pythonhosted.org/packages/5e/03/688747d2e91cfbe0e64f316cd2e8005698f76ada3130d0194664174fa5de/pillow-12.2.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7b14cc0106cd9aecda615dd6903840a058b4700fcb817687d0ee4fc8b6e389be", size = 8091599, upload-time = "2026-04-01T14:45:04.5Z" }, + { url = "https://files.pythonhosted.org/packages/f6/35/577e22b936fcdd66537329b33af0b4ccfefaeabd8aec04b266528cddb33c/pillow-12.2.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8cbeb542b2ebc6fcdacabf8aca8c1a97c9b3ad3927d46b8723f9d4f033288a0f", size = 6396021, upload-time = "2026-04-01T14:45:07.117Z" }, + { url = "https://files.pythonhosted.org/packages/11/8d/d2532ad2a603ca2b93ad9f5135732124e57811d0168155852f37fbce2458/pillow-12.2.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4bfd07bc812fbd20395212969e41931001fd59eb55a60658b0e5710872e95286", size = 7083360, upload-time = "2026-04-01T14:45:09.763Z" }, + { url = "https://files.pythonhosted.org/packages/5e/26/d325f9f56c7e039034897e7380e9cc202b1e368bfd04d4cbe6a441f02885/pillow-12.2.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9aba9a17b623ef750a4d11b742cbafffeb48a869821252b30ee21b5e91392c50", size = 6507628, upload-time = "2026-04-01T14:45:12.378Z" }, + { url = "https://files.pythonhosted.org/packages/5f/f7/769d5632ffb0988f1c5e7660b3e731e30f7f8ec4318e94d0a5d674eb65a4/pillow-12.2.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:deede7c263feb25dba4e82ea23058a235dcc2fe1f6021025dc71f2b618e26104", size = 7209321, upload-time = "2026-04-01T14:45:15.122Z" }, + { url = "https://files.pythonhosted.org/packages/6a/7a/c253e3c645cd47f1aceea6a8bacdba9991bf45bb7dfe927f7c893e89c93c/pillow-12.2.0-cp314-cp314-win32.whl", hash = "sha256:632ff19b2778e43162304d50da0181ce24ac5bb8180122cbe1bf4673428328c7", size = 6479723, upload-time = "2026-04-01T14:45:17.797Z" }, + { url = "https://files.pythonhosted.org/packages/cd/8b/601e6566b957ca50e28725cb6c355c59c2c8609751efbecd980db44e0349/pillow-12.2.0-cp314-cp314-win_amd64.whl", hash = "sha256:4e6c62e9d237e9b65fac06857d511e90d8461a32adcc1b9065ea0c0fa3a28150", size = 7217400, upload-time = "2026-04-01T14:45:20.529Z" }, + { url = "https://files.pythonhosted.org/packages/d6/94/220e46c73065c3e2951bb91c11a1fb636c8c9ad427ac3ce7d7f3359b9b2f/pillow-12.2.0-cp314-cp314-win_arm64.whl", hash = "sha256:b1c1fbd8a5a1af3412a0810d060a78b5136ec0836c8a4ef9aa11807f2a22f4e1", size = 2554835, upload-time = "2026-04-01T14:45:23.162Z" }, + { url = "https://files.pythonhosted.org/packages/b6/ab/1b426a3974cb0e7da5c29ccff4807871d48110933a57207b5a676cccc155/pillow-12.2.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:57850958fe9c751670e49b2cecf6294acc99e562531f4bd317fa5ddee2068463", size = 5314225, upload-time = "2026-04-01T14:45:25.637Z" }, + { url = "https://files.pythonhosted.org/packages/19/1e/dce46f371be2438eecfee2a1960ee2a243bbe5e961890146d2dee1ff0f12/pillow-12.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:d5d38f1411c0ed9f97bcb49b7bd59b6b7c314e0e27420e34d99d844b9ce3b6f3", size = 4698541, upload-time = "2026-04-01T14:45:28.355Z" }, + { url = "https://files.pythonhosted.org/packages/55/c3/7fbecf70adb3a0c33b77a300dc52e424dc22ad8cdc06557a2e49523b703d/pillow-12.2.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5c0a9f29ca8e79f09de89293f82fc9b0270bb4af1d58bc98f540cc4aedf03166", size = 6322251, upload-time = "2026-04-01T14:45:30.924Z" }, + { url = "https://files.pythonhosted.org/packages/1c/3c/7fbc17cfb7e4fe0ef1642e0abc17fc6c94c9f7a16be41498e12e2ba60408/pillow-12.2.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1610dd6c61621ae1cf811bef44d77e149ce3f7b95afe66a4512f8c59f25d9ebe", size = 8127807, upload-time = "2026-04-01T14:45:33.908Z" }, + { url = "https://files.pythonhosted.org/packages/ff/c3/a8ae14d6defd2e448493ff512fae903b1e9bd40b72efb6ec55ce0048c8ce/pillow-12.2.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a34329707af4f73cf1782a36cd2289c0368880654a2c11f027bcee9052d35dd", size = 6433935, upload-time = "2026-04-01T14:45:36.623Z" }, + { url = "https://files.pythonhosted.org/packages/6e/32/2880fb3a074847ac159d8f902cb43278a61e85f681661e7419e6596803ed/pillow-12.2.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e9c4f5b3c546fa3458a29ab22646c1c6c787ea8f5ef51300e5a60300736905e", size = 7116720, upload-time = "2026-04-01T14:45:39.258Z" }, + { url = "https://files.pythonhosted.org/packages/46/87/495cc9c30e0129501643f24d320076f4cc54f718341df18cc70ec94c44e1/pillow-12.2.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:fb043ee2f06b41473269765c2feae53fc2e2fbf96e5e22ca94fb5ad677856f06", size = 6540498, upload-time = "2026-04-01T14:45:41.879Z" }, + { url = "https://files.pythonhosted.org/packages/18/53/773f5edca692009d883a72211b60fdaf8871cbef075eaa9d577f0a2f989e/pillow-12.2.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f278f034eb75b4e8a13a54a876cc4a5ab39173d2cdd93a638e1b467fc545ac43", size = 7239413, upload-time = "2026-04-01T14:45:44.705Z" }, + { url = "https://files.pythonhosted.org/packages/c9/e4/4b64a97d71b2a83158134abbb2f5bd3f8a2ea691361282f010998f339ec7/pillow-12.2.0-cp314-cp314t-win32.whl", hash = "sha256:6bb77b2dcb06b20f9f4b4a8454caa581cd4dd0643a08bacf821216a16d9c8354", size = 6482084, upload-time = "2026-04-01T14:45:47.568Z" }, + { url = "https://files.pythonhosted.org/packages/ba/13/306d275efd3a3453f72114b7431c877d10b1154014c1ebbedd067770d629/pillow-12.2.0-cp314-cp314t-win_amd64.whl", hash = "sha256:6562ace0d3fb5f20ed7290f1f929cae41b25ae29528f2af1722966a0a02e2aa1", size = 7225152, upload-time = "2026-04-01T14:45:50.032Z" }, + { url = "https://files.pythonhosted.org/packages/ff/6e/cf826fae916b8658848d7b9f38d88da6396895c676e8086fc0988073aaf8/pillow-12.2.0-cp314-cp314t-win_arm64.whl", hash = "sha256:aa88ccfe4e32d362816319ed727a004423aab09c5cea43c01a4b435643fa34eb", size = 2556579, upload-time = "2026-04-01T14:45:52.529Z" }, + { url = "https://files.pythonhosted.org/packages/4e/b7/2437044fb910f499610356d1352e3423753c98e34f915252aafecc64889f/pillow-12.2.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0538bd5e05efec03ae613fd89c4ce0368ecd2ba239cc25b9f9be7ed426b0af1f", size = 5273969, upload-time = "2026-04-01T14:45:55.538Z" }, + { url = "https://files.pythonhosted.org/packages/f6/f4/8316e31de11b780f4ac08ef3654a75555e624a98db1056ecb2122d008d5a/pillow-12.2.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:394167b21da716608eac917c60aa9b969421b5dcbbe02ae7f013e7b85811c69d", size = 4659674, upload-time = "2026-04-01T14:45:58.093Z" }, + { url = "https://files.pythonhosted.org/packages/d4/37/664fca7201f8bb2aa1d20e2c3d5564a62e6ae5111741966c8319ca802361/pillow-12.2.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5d04bfa02cc2d23b497d1e90a0f927070043f6cbf303e738300532379a4b4e0f", size = 5288479, upload-time = "2026-04-01T14:46:01.141Z" }, + { url = "https://files.pythonhosted.org/packages/49/62/5b0ed78fce87346be7a5cfcfaaad91f6a1f98c26f86bdbafa2066c647ef6/pillow-12.2.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0c838a5125cee37e68edec915651521191cef1e6aa336b855f495766e77a366e", size = 7032230, upload-time = "2026-04-01T14:46:03.874Z" }, + { url = "https://files.pythonhosted.org/packages/c3/28/ec0fc38107fc32536908034e990c47914c57cd7c5a3ece4d8d8f7ffd7e27/pillow-12.2.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a6c9fa44005fa37a91ebfc95d081e8079757d2e904b27103f4f5fa6f0bf78c0", size = 5355404, upload-time = "2026-04-01T14:46:06.33Z" }, + { url = "https://files.pythonhosted.org/packages/5e/8b/51b0eddcfa2180d60e41f06bd6d0a62202b20b59c68f5a132e615b75aecf/pillow-12.2.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:25373b66e0dd5905ed63fa3cae13c82fbddf3079f2c8bf15c6fb6a35586324c1", size = 6002215, upload-time = "2026-04-01T14:46:08.83Z" }, + { url = "https://files.pythonhosted.org/packages/bc/60/5382c03e1970de634027cee8e1b7d39776b778b81812aaf45b694dfe9e28/pillow-12.2.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:bfa9c230d2fe991bed5318a5f119bd6780cda2915cca595393649fc118ab895e", size = 7080946, upload-time = "2026-04-01T14:46:11.734Z" }, ] [[package]] @@ -1167,7 +1167,7 @@ wheels = [ [[package]] name = "pytest" -version = "8.4.1" +version = "9.0.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, @@ -1176,21 +1176,22 @@ dependencies = [ { name = "pluggy" }, { name = "pygments" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/08/ba/45911d754e8eba3d5a841a5ce61a65a685ff1798421ac054f85aa8747dfb/pytest-8.4.1.tar.gz", hash = "sha256:7c67fd69174877359ed9371ec3af8a3d2b04741818c51e5e99cc1742251fa93c", size = 1517714, upload-time = "2025-06-18T05:48:06.109Z" } +sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/29/16/c8a903f4c4dffe7a12843191437d7cd8e32751d5de349d45d3fe69544e87/pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7", size = 365474, upload-time = "2025-06-18T05:48:03.955Z" }, + { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" }, ] [[package]] name = "pytest-asyncio" -version = "1.1.0" +version = "1.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pytest" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/4e/51/f8794af39eeb870e87a8c8068642fc07bce0c854d6865d7dd0f2a9d338c2/pytest_asyncio-1.1.0.tar.gz", hash = "sha256:796aa822981e01b68c12e4827b8697108f7205020f24b5793b3c41555dab68ea", size = 46652, upload-time = "2025-07-16T04:29:26.393Z" } +sdist = { url = "https://files.pythonhosted.org/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087, upload-time = "2025-11-10T16:07:47.256Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c7/9d/bf86eddabf8c6c9cb1ea9a869d6873b46f105a5d292d3a6f7071f5b07935/pytest_asyncio-1.1.0-py3-none-any.whl", hash = "sha256:5fe2d69607b0bd75c656d1211f969cadba035030156745ee09e7d71740e58ecf", size = 15157, upload-time = "2025-07-16T04:29:24.929Z" }, + { url = "https://files.pythonhosted.org/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075, upload-time = "2025-11-10T16:07:45.537Z" }, ] [[package]]