diff --git a/.codex/PIPELINE.md b/.codex/PIPELINE.md index a9e76d86..c7d56a31 100644 --- a/.codex/PIPELINE.md +++ b/.codex/PIPELINE.md @@ -58,6 +58,25 @@ Codex must obey: * VMLab moved to v0.3.5 and is blocked until the v0.3.0 Extensions Foundation is complete. +## Current maintainer priority order + +For further ECLI work, Codex must sequence planning and implementation as: + +1. Stabilize all Extensions and the Extensions Layer. +2. Finish TextMate rendering, TextMate performance, and multiline comment + handling. +3. Implement F4 Diagnostics / Linter Panel. +4. Implement F7 AI Code Assistant as an Extensions Layer feature. +5. Implement the plugin layer for new themes, linters, and AI extensions. +6. Implement F11 Terminal-console without duplicating PySH. +7. Implement full mouse support. +8. Implement F8 System Doctor / lab engineer diagnostic tools. +9. Implement F9 Git improvements. + +This order does not override Stage gates, audit gates, the single-writer screen +invariant, Extensions Layer security constraints, or release/publish safety +rules. + ## Recommended Codex execution mode For Stage 1 inventory and diagnostics: diff --git a/.github/workflows/macos-dmg.yml b/.github/workflows/macos-dmg.yml index b1f34fc8..2bdd449d 100644 --- a/.github/workflows/macos-dmg.yml +++ b/.github/workflows/macos-dmg.yml @@ -46,6 +46,9 @@ jobs: if: runner.arch == 'ARM64' run: softwareupdate --install-rosetta --agree-to-license + - name: Install native TextMate dependencies + run: brew install oniguruma pkg-config + - name: Build DMG run: | make package-macos diff --git a/.github/workflows/macos-validate.yml b/.github/workflows/macos-validate.yml index 1ade8d8e..40705c67 100644 --- a/.github/workflows/macos-validate.yml +++ b/.github/workflows/macos-validate.yml @@ -51,6 +51,9 @@ jobs: if: runner.arch == 'ARM64' run: softwareupdate --install-rosetta --agree-to-license + - name: Install native TextMate dependencies + run: brew install oniguruma pkg-config + - name: Install build dependencies run: python -m pip install --upgrade pip build twine pyinstaller diff --git a/AGENTS.md b/AGENTS.md index 78ae7740..f5d8f776 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -691,3 +691,26 @@ All agents (Codex, Claude Code, Cursor, and any other automation) must obey: and all active platform packaging contracts must remain green. * VMLab is out of scope: it moved to v0.3.5 and is blocked until the v0.3.0 Extensions Foundation is complete. + +## 14. Current maintainer priority order + +For further ECLI work, treat extension stabilization as the first priority: + +1. Stabilize all Extensions and the Extensions Layer before adjacent feature + expansion. +2. Finish TextMate rendering, TextMate performance, and multiline comment + handling. +3. Implement F4 Diagnostics / Linter Panel. +4. Implement F7 AI Code Assistant as an Extensions Layer feature. +5. Implement the plugin layer for new themes, linters, and AI extensions. +6. Implement F11 Terminal-console without duplicating PySH. Command execution + must remain routed through explicit ECLI services, PySH, or CommandPlan + surfaces; do not migrate PySH source into ECLI. +7. Implement full mouse support. +8. Implement F8 System Doctor / lab engineer diagnostic tools. +9. Implement F9 Git improvements. + +This priority order is a sequencing rule for planning and implementation. It +does not waive Stage 1/Stage 2 gates, P0 audit requirements, the single-writer +screen invariant, the Extensions Layer security contract, or release/publish +safety rules. diff --git a/CODEX.md b/CODEX.md index 54a5ae1d..9b445a83 100644 --- a/CODEX.md +++ b/CODEX.md @@ -223,6 +223,30 @@ Codex must obey: * VMLab is out of scope: it moved to v0.3.5 and is blocked until the v0.3.0 Extensions Foundation is complete. +## Current maintainer priority order + +For further ECLI work, Codex must treat extension stabilization as the first +priority: + +1. Stabilize all Extensions and the Extensions Layer before adjacent feature + expansion. +2. Finish TextMate rendering, TextMate performance, and multiline comment + handling. +3. Implement F4 Diagnostics / Linter Panel. +4. Implement F7 AI Code Assistant as an Extensions Layer feature. +5. Implement the plugin layer for new themes, linters, and AI extensions. +6. Implement F11 Terminal-console without duplicating PySH. Command execution + must remain routed through explicit ECLI services, PySH, or CommandPlan + surfaces; do not migrate PySH source into ECLI. +7. Implement full mouse support. +8. Implement F8 System Doctor / lab engineer diagnostic tools. +9. Implement F9 Git improvements. + +This priority order guides planning and implementation only. It does not waive +Stage 1/Stage 2 gates, P0 audit requirements, the single-writer screen +invariant, the Extensions Layer security contract, or release/publish safety +rules. + ## Expected Codex final response For non-trivial work, finish with: diff --git a/README.md b/README.md index c01a10f5..7544f5ee 100755 --- a/README.md +++ b/README.md @@ -673,6 +673,13 @@ Found a bug? Please help us by opening an issue on GitHub: * **Terminal**: Supports 256 colors and UTF-8 +* **TextMate syntax highlighting** (default): installed automatically via the + `python-textmate` dependency, which pulls `onigurumacffi` (the **Oniguruma** + regex engine). Binary wheels cover Linux, macOS, and Windows. On source-build + platforms (e.g. FreeBSD) install the Oniguruma development headers + (`devel/oniguruma`). If unavailable, ECLI falls back to the built-in legacy + highlighter automatically (`[extensions].syntax_engine = "legacy"`). + ### Supported Platforms * Ubuntu 20.04 LTS and newer diff --git a/audit-report.md b/audit-report.md index 3ae5d0ee..e01e440c 100644 --- a/audit-report.md +++ b/audit-report.md @@ -595,3 +595,54 @@ find .github/workflows -maxdepth 1 -type f -print | sort nl -ba .github/workflows/*.yml | sed -n '1,260p' rg -n "config|ConfigService|syntax_highlighting|History|undo|redo|runtime_import|pyinstaller|artifact|freebsd|coderabbit|CodeRabbit" tests .github docs/quality docs/release pyproject.toml ``` + +## Issue #102 addendum — multiline TextMate protection + +Implementation: + +- ECLI keeps TextMate scopes as the primary token source for extension-backed + rendering. +- `src/ecli/extensions/ecli_integration/syntax_service.py` now applies a + deterministic protected-range pass for known stateless multiline gaps: + Python strings/docstrings, JavaScript/TypeScript block/doc/line comments and + strings, HTML comments, and CSS block comments/strings. +- Protected comment/string ranges are cached by buffer revision and mapped onto + viewport lines before `theme_bridge.tokens_to_spans()` flattens TextMate + output, so protected comment/string style wins over leaked keyword, number, + operator, tag, selector, property, or value categories. + +Real tests: + +- `tests/extensions/test_textmate_multiline_protection.py` adds direct + protected-range tests, TextMate-span rendering tests, and editor-facing + rendering tests for Python, JavaScript, TypeScript, HTML, and CSS multiline + fixtures. +- Existing performance coverage remains in + `tests/extensions/test_textmate_render_performance.py` and + `tests/extensions/test_textmate_scroll_regression.py` using real repository + files including `Makefile`, `logs/freebsd-0.2.2-fail.log`, + `logs/pr-46-body.md`, and `scripts/build_pyinstaller_linux.py`. + +Log/artifact analysis: + +- The known real artifacts for large-file acceptance remain the repository + `Makefile`, `logs/freebsd-0.2.2-fail.log`, and `logs/pr-46-body.md`. +- Synthetic fixtures are intentionally used only for exact adversarial + multiline comment/string bodies that are not guaranteed to exist in repository + files. + +Documentation: + +- `docs/architecture/extensions-layer.md` documents TextMate-primary rendering, + the bounded language-aware protection layer, and the large-file/multiline + acceptance tests. +- `docs/release/release-checklist.md` includes large-file scroll, multiline + rendering, no-SQL fallback, and TextMate dependency/fallback checks. + +Audit conclusion: + +- Imported upstream extension assets remain untouched. +- F11 PySH Console Panel behavior, future F4 linter work, and VMLab/QEMU/QMP + scope remain untouched. +- The issue #102 rendering gap is constrained to the ECLI-owned adapter layer, + with focused regression and performance evidence. diff --git a/config.toml b/config.toml index 587ca5d6..635b87b3 100755 --- a/config.toml +++ b/config.toml @@ -10,15 +10,63 @@ # # Licensed under the GNU General Public License version 2 only. # See the LICENSE file in the project root for full license text. - -keybindings = {} +# +# This is the user-facing ECLI configuration. Internal declarative defaults +# (comment delimiters, file-type format tables) live in code +# (src/ecli/utils/utils.py:DEFAULT_CONFIG) and are intentionally not duplicated +# here. # --- Colour theme ----------------------------------------------------------- -# Pick one of the eight built-in themes by number. Colours (editor surface, -# syntax highlighting, status bar, diagnostics) all come from the selected theme. -# 1 = Light Classic 2 = Light Soft 3 = Light High Contrast 4 = Light Solar -# 5 = Dark Classic 6 = Dark Soft 7 = Dark High Contrast 8 = Dark Neon -theme = 8 +# Pick one of the built-in or extension-backed themes by number. Colours for +# the editor surface, syntax highlighting, status bar, diagnostics, and file +# panels come from the selected theme. +# +# Numbering policy: +# 100-199 = light themes +# 200-299 = dark themes +# 300-399 = high-contrast themes +# 1-8 = deprecated aliases for old pre-extension-theme configs only +# 800-899 = reserved for future custom/imported special themes +# +# Light themes: +# 101 = GitHub Light Default +# 102 = GitHub Light +# 103 = GitHub Light Colorblind (Beta) +# 104 = Visual Studio Light +# 105 = Visual Studio 2017 Light - C++ +# 106 = Light Modern +# 107 = Light+ +# 108 = Quiet Light +# 109 = Solarized Light +# 110 = JetBrains Rider New UI Light +# +# Dark themes: +# 201 = GitHub Dark Default +# 202 = GitHub Dark +# 203 = GitHub Dark Dimmed +# 204 = Visual Studio Dark +# 205 = Visual Studio 2017 Dark - C++ +# 206 = Dark Modern +# 207 = Dark+ +# 208 = Monokai +# 209 = Monokai Dimmed +# 210 = Tomorrow Night Blue +# 211 = Abyss +# 212 = Atom One Dark +# 213 = Kimbie Dark +# 214 = Solarized Dark +# 215 = Red +# +# High contrast themes: +# 301 = Dark High Contrast +# 302 = GitHub Dark High Contrast +# 303 = GitHub Light High Contrast +# 304 = Light High Contrast +# +# Built-in compatibility themes are preserved in the 18x/28x/38x ranges. +# Missing theme numbers are not faked. Entering a missing or invalid theme +# number keeps the current theme unchanged and shows an ECLI warning. +theme = 207 [logging] @@ -32,18 +80,17 @@ separate_error_log = false default_provider = "openai" +# Coding-optimized default model per provider. Override with your preferred id. [ai.models] openai = "gpt-5-codex" gemini = "gemini-2.5-pro" -mistral = "magistral-medium-1.2" -claude = "claude-4-opus" -grok = "grok-4-fast" -huggingface = "meta-llama/Meta-Llama-3.1-405B-Instruct" - - -# NOTE: Editor colours are now defined by the built-in `theme` selector above. -# The former free-form [colors] table has been removed; per-colour overrides are -# no longer read by the editor. +mistral = "codestral-latest" +claude = "claude-sonnet-4-6" +grok = "grok-code-fast-1" +huggingface = "Qwen/Qwen2.5-Coder-32B-Instruct" +deepseek = "deepseek-chat" +qwen = "qwen3-coder-plus" +kimi = "kimi-k2-0905-preview" [fonts] @@ -59,26 +106,35 @@ use_spaces = true word_wrap = false auto_indent = true auto_brackets = true -# Toggle Pygments/custom syntax highlighting on or off. +# Global visible syntax-highlighting switch. When false, every line renders as +# plain default-coloured text regardless of the [extensions].syntax_engine below. syntax_highlighting = true # Opt-in mouse support (click to focus/move, wheel to scroll, click to select # list rows). Off by default so native terminal text selection still works. mouse = false -# --- Extensions Layer (data-only) ------------------------------------------- -# Deterministic switches for the imported VS Code/TextMate extension metadata -# adapters under src/ecli/extensions/ecli_integration/. These enable DATA-ONLY -# behavior only; they never start a VS Code extension host, Node/TypeScript or -# Copilot runtime, activationEvents, or package.json scripts. -# syntax_engine = "legacy" keeps the current regex highlighter authoritative -# until issue #102 ships a tested extension-backed syntax renderer. +# --- Extensions Layer ------------------------------------------------------- +# Switches for the imported VS Code / TextMate extension adapters under +# src/ecli/extensions/ecli_integration/. ECLI reads grammar/language metadata +# and tokenizes with the imported TextMate grammars only: it NEVER starts a +# VS Code extension host, a Node/TypeScript runtime, a Copilot runtime, +# activationEvents, or package.json scripts. +# +# syntax_engine selects the highlighter: +# "extension" = TextMate tokenization from the imported .tmLanguage.json +# grammars (default). Files whose grammar the tokenizer cannot +# handle — or when the tokenizer is not installed — +# fall back automatically to the legacy highlighter. +# "legacy" = the built-in Pygments/regex highlighter for every file. +# The [editor].syntax_highlighting switch above still turns visible +# highlighting on or off for both engines. [extensions] enabled = true metadata_registry = true grammar_catalog = true language_detection = true -syntax_engine = "legacy" +syntax_engine = "extension" [settings] @@ -89,7 +145,7 @@ show_git_info = true [linter] enabled = true auto_install = true -exclude = [".git", "__pycache__", ".venv"] +exclude = [".git", "**pycache**", ".venv"] [file_icons] @@ -149,1138 +205,3 @@ font = "🖋️" binary = "⚙️" document = "📄" default = "❓" - - -[comments.python] -line_prefix = "# " -docstring_delim = '"""' - - -[comments.ruby] -line_prefix = "# " -block_delims = ["=begin", "=end"] - - -[comments.perl] -line_prefix = "# " -block_delims = ["=pod", "=cut"] - - -[comments.lua] -line_prefix = "-- " -block_delims = ["--[[", "]]"] - - -[comments.javascript] -line_prefix = "// " -block_delims = ["/*", "*/"] - - -[comments.typescript] -line_prefix = "// " -block_delims = ["/*", "*/"] - - -[comments.php] -line_prefix = "// " -block_delims = ["/*", "*/"] - - -[comments.html] -block_delims = [""] - - -[comments.xml] -block_delims = [""] - - -[comments.css] -block_delims = ["/*", "*/"] - - -[comments.scss] -line_prefix = "// " -block_delims = ["/*", "*/"] - - -[comments.c] -line_prefix = "// " -block_delims = ["/*", "*/"] - - -[comments.cpp] -line_prefix = "// " -block_delims = ["/*", "*/"] - - -[comments.csharp] -line_prefix = "// " -block_delims = ["/*", "*/"] - - -[comments.java] -line_prefix = "// " -block_delims = ["/*", "*/"] - - -[comments.go] -line_prefix = "// " -block_delims = ["/*", "*/"] - - -[comments.rust] -line_prefix = "// " -block_delims = ["/*", "*/"] - - -[comments.swift] -line_prefix = "// " -block_delims = ["/*", "*/"] - - -[comments.kotlin] -line_prefix = "// " -block_delims = ["/*", "*/"] - - -[comments.scala] -line_prefix = "// " -block_delims = ["/*", "*/"] - - -[comments.dart] -line_prefix = "// " -block_delims = ["/*", "*/"] - - -[comments.haskell] -line_prefix = "-- " -block_delims = ["{-", "-}"] - - -[comments.elixir] -line_prefix = "# " -docstring_delim = '"""' - - -[comments.erlang] -line_prefix = "% " - - -[comments.clojure] -line_prefix = ";; " - - -[comments.fsharp] -line_prefix = "// " -block_delims = ["(*", "*)"] - - -[comments.ocaml] -block_delims = ["(*", "*)"] - - -[comments.shell] -line_prefix = "# " - - -[comments.powershell] -line_prefix = "# " -block_delims = ["<#", "#>"] - - -[comments.dockerfile] -line_prefix = "# " - - -[comments.makefile] -line_prefix = "# " - - -[comments.vim] -line_prefix = '" ' - - -[comments.assembly] -line_prefix = "; " - - -[comments.sql] -line_prefix = "-- " -block_delims = ["/*", "*/"] - - -[comments.yaml] -line_prefix = "# " - - -[comments.toml] -line_prefix = "# " - - -[comments.ini] -line_prefix = "; " - - -[comments.latex] -line_prefix = "% " - - -[comments.r] -line_prefix = "# " - - -[comments.matlab] -line_prefix = "% " -block_delims = ["%{", "%}"] - - -[comments.nim] -line_prefix = "# " -block_delims = ["#[", "]#"] - - -[comments.crystal] -line_prefix = "# " - - -[comments.zig] -line_prefix = "// " - - -[comments.bat] -line_prefix = "REM " - - -[[syntax_highlighting.python.patterns]] -pattern = "\\b(and|as|assert|async|await|break|class|continue|def|del|elif|else|except|exec|finally|for|from|global|if|import|in|is|lambda|nonlocal|not|or|pass|print|raise|return|try|while|with|yield)\\b" -color = "keyword" - - -[[syntax_highlighting.python.patterns]] -pattern = "@\\w+(?:\\([^)]*?\\))?" -color = "decorator" - - -[[syntax_highlighting.python.patterns]] -pattern = "(?s)(f|r|rf|fr)?('''(\\\\.|[^'])*?'''|\"\"\"(\\\\.|[^\"])*?\"\"\")" -color = "string" - - -[[syntax_highlighting.python.patterns]] -pattern = "(?s)(b|rb|br)?('''(\\\\.|[^'])*?'''|\"\"\"(\\\\.|[^\"])*?\"\"\")" -color = "string" - - -[[syntax_highlighting.python.patterns]] -pattern = "(f|r|rf|fr|b|br|rb)?(['\"])(?:\\\\\\2|.)*?\\2" -color = "string" - - -[[syntax_highlighting.python.patterns]] -pattern = "\\b(?:\\d+\\.\\d+|\\.\\d+|\\d+)(?:e[+-]?\\d+)?j?\\b" -color = "literal" - - -[[syntax_highlighting.python.patterns]] -pattern = "\\b0[bB][01_]+\\b" -color = "literal" - - -[[syntax_highlighting.python.patterns]] -pattern = "\\b0[oO][0-7_]+\\b" -color = "literal" - - -[[syntax_highlighting.python.patterns]] -pattern = "\\b0[xX][0-9a-fA-F_]+\\b" -color = "literal" - - -[[syntax_highlighting.python.patterns]] -pattern = "#.*$" -color = "comment" - - -[[syntax_highlighting.python.patterns]] -pattern = '""".*?"""' -color = "comment" - - -[[syntax_highlighting.python.patterns]] -pattern = "'''(.*?)'''" -color = "comment" - - -[[syntax_highlighting.python.patterns]] -pattern = "\\b(ArithmeticError|AssertionError|AttributeError|BaseException|BlockingIOError|BrokenPipeError|BufferError|BytesWarning|ChildProcessError|ConnectionAbortedError|ConnectionError|ConnectionRefusedError|ConnectionResetError|DeprecationWarning|EOFError|Ellipsis|EncodingWarning|EnvironmentError|Exception|FileExistsError|FileNotFoundError|FloatingPointError|FutureWarning|GeneratorExit|IOError|ImportError|ImportWarning|IndentationError|IndexError|InterruptedError|IsADirectoryError|KeyError|KeyboardInterrupt|LookupError|MemoryError|ModuleNotFoundError|NameError|NotADirectoryError|NotImplemented|NotImplementedError|OSError|OverflowError|PendingDeprecationWarning|PermissionError|ProcessLookupError|RecursionError|ReferenceError|ResourceWarning|RuntimeError|RuntimeWarning|StopAsyncIteration|StopIteration|SyntaxError|SyntaxWarning|SystemError|SystemExit|TabError|TimeoutError|TypeError|UnboundLocalError|UnicodeDecodeError|UnicodeEncodeError|UnicodeError|UnicodeTranslateError|UnicodeWarning|UserWarning|ValueError|Warning|ZeroDivisionError|__import__|abs|all|any|ascii|bin|bool|breakpoint|bytearray|bytes|callable|chr|classmethod|compile|complex|copyright|credits|delattr|dict|dir|divmod|enumerate|eval|exec|exit|filter|float|format|frozenset|getattr|globals|hasattr|hash|help|hex|id|input|int|isinstance|issubclass|iter|len|license|list|locals|map|max|memoryview|min|next|object|oct|open|ord|pow|print|property|range|repr|reversed|round|set|setattr|slice|sorted|staticmethod|str|sum|super|tuple|type|vars|zip)\\b" -color = "builtins" - - -[[syntax_highlighting.python.patterns]] -pattern = "\\b(List|Dict|Tuple|Set|Optional|Union|Any|Callable|TypeVar|Generic|Iterable|Iterator|Sequence|Mapping|MutableMapping|Awaitable|Coroutine|AsyncIterable|NamedTuple|TypedDict|Final|Literal|Annotated|TypeGuard|Self|Protocol|dataclass|field|classmethod|staticmethod)\\b" -color = "type" - - -[[syntax_highlighting.python.patterns]] -pattern = "r[\"'].*?[\"']" -color = "regexp" - - -[[syntax_highlighting.python.patterns]] -pattern = "\\b(True|False|None|Ellipsis|NotImplemented)\\b" -color = "literal" - - -[[syntax_highlighting.python.patterns]] -pattern = "__(?:init|new|str|repr|enter|exit|getattr|setattr|delattr|getitem|setitem|delitem|iter|next|call|len|contains|add|sub|mul|truediv|floordiv|mod|pow|lshift|rshift|and|or|xor|invert|eq|ne|lt|le|gt|ge|bool|bytes|format|hash|dir|sizeof|getstate|setstate|reduce|reduce_ex|subclasshook|del|doc|name|qualname|module|defaults|kwdefaults|annotations|dict|weakref|slots|class|self|cls)__(?=\\()" -color = "magic" - - -[[syntax_highlighting.python.patterns]] -pattern = "\\bimport\\s+\\w+(?:\\.\\w+)*\\b" -color = "import" - - -[[syntax_highlighting.python.patterns]] -pattern = "\\bfrom\\s+\\w+(?:\\.\\w+)*\\s+import\\b" -color = "import" - - -[[syntax_highlighting.toml.patterns]] -pattern = "^\\s*\\[\\[.*?\\]\\]" -color = "keyword" - - -[[syntax_highlighting.toml.patterns]] -pattern = "^\\s*\\[.*?\\]" -color = "keyword" - - -[[syntax_highlighting.toml.patterns]] -pattern = "^\\s*([\\w-]+)\\s*=\\s*" -color = "property" - - -[[syntax_highlighting.toml.patterns]] -pattern = '"(?:\\"|.)*?"' -color = "string" - - -[[syntax_highlighting.toml.patterns]] -pattern = "'(?:\\\\'|.)*?'" -color = "string" - - -[[syntax_highlighting.toml.patterns]] -pattern = "\\b(true|false)\\b" -color = "literal" - - -[[syntax_highlighting.toml.patterns]] -pattern = "\\b(\\d+\\.?\\d*|0x[0-9a-fA-F]+)\\b" -color = "literal" - - -[[syntax_highlighting.toml.patterns]] -pattern = "#.*" -color = "comment" - - -[[syntax_highlighting.toml.patterns]] -pattern = "\\b(\\w+)\\s*=\\s*(\\[.*?\\])" -color = "type" - - -[[syntax_highlighting.toml.patterns]] -pattern = "\\b(\\w+)\\s*=\\s*\\{.*?\\}" -color = "type" - - -[[syntax_highlighting.javascript.patterns]] -pattern = "//.*$" -color = "comment" - - -[[syntax_highlighting.javascript.patterns]] -pattern = "/\\*[\\s\\S]*?\\*/" -color = "comment" - - -[[syntax_highlighting.javascript.patterns]] -pattern = "\\b(let|const|var|function|return|if|else|for|while|do|switch|case|break|continue|try|catch|finally|new|delete|typeof|instanceof|this|class|extends|super|import|export|from|as|async|await|yield)\\b" -color = "keyword" - - -[[syntax_highlighting.javascript.patterns]] -pattern = "`[^`]*`" -color = "string" - - -[[syntax_highlighting.javascript.patterns]] -pattern = '"[^"]*"' -color = "string" - - -[[syntax_highlighting.javascript.patterns]] -pattern = "\\b(\\d+(\\.\\d+)?|true|false|null|undefined|NaN|Infinity)\\b" -color = "literal" - - -[[syntax_highlighting.javascript.patterns]] -pattern = "console\\.log" -color = "keyword" - - -[[syntax_highlighting.javascript.patterns]] -pattern = "\\$\\{[^}]*\\}" -color = "literal" - - -[[syntax_highlighting.css.patterns]] -pattern = "[\\w#.-]+(?=\\s*\\{)" -color = "selector" - - -[[syntax_highlighting.css.patterns]] -pattern = "([\\w-]+)(?=:)" -color = "property" - - -[[syntax_highlighting.css.patterns]] -pattern = ":\\s*(.*?);" -color = "string" - - -[[syntax_highlighting.css.patterns]] -pattern = "!important" -color = "literal" - - -[[syntax_highlighting.css.patterns]] -pattern = "/\\*.*?\\*/" -color = "comment" - - -[[syntax_highlighting.html.patterns]] -pattern = "<\\/?[\\w-]+" -color = "tag" - - -[[syntax_highlighting.html.patterns]] -pattern = "\\b([\\w-]+)=" -color = "attribute" - - -[[syntax_highlighting.html.patterns]] -pattern = '"(?:\\"|.)*?"' -color = "string" - - -[[syntax_highlighting.html.patterns]] -pattern = "" -color = "comment" - - -[[syntax_highlighting.json.patterns]] -pattern = '"(\w+)"\s*:' -color = "keyword" - - -[[syntax_highlighting.json.patterns]] -pattern = ':\s*(".*?"|[\d.]+|true|false|null)' -color = "string" - - -[[syntax_highlighting.json.patterns]] -pattern = "[\\[\\]{}]" -color = "punctuation" - - -[[syntax_highlighting.yaml.patterns]] -pattern = "^\\\\s*[\\\\w-]+(?::\\\\s*)?" -color = "keyword" - - -[[syntax_highlighting.yaml.patterns]] -pattern = ":\\s*(\".*?\"|'.*?')" -color = "string" - - -[[syntax_highlighting.yaml.patterns]] -pattern = "^\\s*-\\s+" -color = "punctuation" - - -[[syntax_highlighting.yaml.patterns]] -pattern = "\\b(true|false|yes|no|on|off|\\d+\\.?\\d*)\\b" -color = "literal" - - -[[syntax_highlighting.yaml.patterns]] -pattern = "#.*" -color = "comment" - - -[[syntax_highlighting.markdown.patterns]] -pattern = "^#{1,6}\\s" -color = "keyword" - - -[[syntax_highlighting.markdown.patterns]] -pattern = "\\*{1,2}.*?\\*{1,2}" -color = "emphasis" - - -[[syntax_highlighting.markdown.patterns]] -pattern = "`.*?`" -color = "string" - - -[[syntax_highlighting.markdown.patterns]] -pattern = "\\[.*?\\]\\(.*?\\)" -color = "literal" - - -[[syntax_highlighting.markdown.patterns]] -pattern = "^>\\s.*" -color = "comment" - - -[[syntax_highlighting.xml.patterns]] -pattern = "<\\/?[\\w-]+" -color = "tag" - - -[[syntax_highlighting.xml.patterns]] -pattern = "\\b([\\w-]+)=" -color = "attribute" - - -[[syntax_highlighting.xml.patterns]] -pattern = '"(?:\\"|.)*?"' -color = "string" - - -[[syntax_highlighting.xml.patterns]] -pattern = "" -color = "comment" - - -[[syntax_highlighting.shell.patterns]] -pattern = "#.*" -color = "comment" - - -[[syntax_highlighting.shell.patterns]] -pattern = "\\$\\{.*?\\}" -color = "literal" - - -[[syntax_highlighting.shell.patterns]] -pattern = "\\$\\w+" -color = "literal" - - -[[syntax_highlighting.shell.patterns]] -pattern = '".*?"' -color = "string" - - -[[syntax_highlighting.shell.patterns]] -pattern = "'.*?'" -color = "string" - - -[[syntax_highlighting.shell.patterns]] -pattern = "\\b(if|then|else|elif|fi|for|do|done|while|case|esac|echo|exit|export|source|alias)\\b" -color = "keyword" - - -[[syntax_highlighting.dart.patterns]] -pattern = "//.*$" -color = "comment" - - -[[syntax_highlighting.dart.patterns]] -pattern = "/\\*[\\s\\S]*?\\*/" -color = "comment" - - -[[syntax_highlighting.dart.patterns]] -pattern = "\\b(abstract|as|assert|async|await|break|case|catch|class|const|continue|default|deferred|do|dynamic|else|enum|export|extends|external|factory|final|finally|for|get|if|implements|import|in|is|library|new|null|operator|part|rethrow|return|set|static|super|switch|sync|this|throw|try|typedef|var|void|while|with|yield)\\b" -color = "keyword" - - -[[syntax_highlighting.dart.patterns]] -pattern = '"(?:\\"|.)*?"' -color = "string" - - -[[syntax_highlighting.dart.patterns]] -pattern = "'(?:\\\\'|.)*?'" -color = "string" - - -[[syntax_highlighting.dart.patterns]] -pattern = "\\b(\\d+(\\.\\d+)?|true|false)\\b" -color = "literal" - - -[[syntax_highlighting.go.patterns]] -pattern = "//.*$" -color = "comment" - - -[[syntax_highlighting.go.patterns]] -pattern = "/\\*[\\s\\S]*?\\*/" -color = "comment" - - -[[syntax_highlighting.go.patterns]] -pattern = "\\b(break|case|chan|const|continue|default|defer|else|fallthrough|for|func|go|goto|if|import|interface|map|package|range|return|select|struct|switch|type|var)\\b" -color = "keyword" - - -[[syntax_highlighting.go.patterns]] -pattern = '"(?:\\"|.)*?"' -color = "string" - - -[[syntax_highlighting.go.patterns]] -pattern = "'(?:\\\\'|.)*?'" -color = "string" - - -[[syntax_highlighting.go.patterns]] -pattern = "\\b(\\d+(\\.\\d+)?|true|false|nil)\\b" -color = "literal" - - -[[syntax_highlighting.c.patterns]] -pattern = "//.*$" -color = "comment" - - -[[syntax_highlighting.c.patterns]] -pattern = "/\\*[\\s\\S]*?\\*/" -color = "comment" - - -[[syntax_highlighting.c.patterns]] -pattern = "\\b(auto|break|case|char|const|continue|default|do|double|else|enum|extern|float|for|goto|if|int|long|register|return|short|signed|sizeof|static|struct|switch|typedef|union|unsigned|void|volatile|while|namespace|using|template|class|private|protected|public|virtual|friend|inline|new|delete|try|catch|throw|operator|explicit|export|bool|true|false|nullptr)\\b" -color = "keyword" - - -[[syntax_highlighting.c.patterns]] -pattern = '"(?:\\"|.)*?"' -color = "string" - - -[[syntax_highlighting.c.patterns]] -pattern = "'(?:\\\\'|.)*?'" -color = "string" - - -[[syntax_highlighting.c.patterns]] -pattern = "\\b(\\d+(\\.\\d+)?|NULL)\\b" -color = "literal" - - -[[syntax_highlighting.c.patterns]] -pattern = '#include\\s*[<"].*?[>"]' -color = "decorator" - - -[[syntax_highlighting.c.patterns]] -pattern = "#\\w+" -color = "decorator" - - -[[syntax_highlighting.cpp.patterns]] -pattern = "//.*$" -color = "comment" - - -[[syntax_highlighting.cpp.patterns]] -pattern = "/\\*[\\s\\S]*?\\*/" -color = "comment" - - -[[syntax_highlighting.cpp.patterns]] -pattern = "\\b(auto|break|case|char|const|continue|default|do|double|else|enum|extern|float|for|goto|if|int|long|register|return|short|signed|sizeof|static|struct|switch|typedef|union|unsigned|void|volatile|while|namespace|using|template|class|private|protected|public|virtual|friend|inline|new|delete|try|catch|throw|operator|explicit|export|bool|true|false|nullptr)\\b" -color = "keyword" - - -[[syntax_highlighting.cpp.patterns]] -pattern = '"(?:\\"|.)*?"' -color = "string" - - -[[syntax_highlighting.cpp.patterns]] -pattern = "'(?:\\\\'|.)*?'" -color = "string" - - -[[syntax_highlighting.cpp.patterns]] -pattern = "\\b(\\d+(\\.\\d+)?|NULL)\\b" -color = "literal" - - -[[syntax_highlighting.cpp.patterns]] -pattern = '#include\\s*[<"].*?[>"]' -color = "decorator" - - -[[syntax_highlighting.cpp.patterns]] -pattern = "#\\w+" -color = "decorator" - - -[[syntax_highlighting.java.patterns]] -pattern = "//.*$" -color = "comment" - - -[[syntax_highlighting.java.patterns]] -pattern = "/\\*[\\s\\S]*?\\*/" -color = "comment" - - -[[syntax_highlighting.java.patterns]] -pattern = "\\b(abstract|assert|boolean|break|byte|case|catch|char|class|const|continue|default|do|double|else|enum|extends|final|finally|float|for|goto|if|implements|import|instanceof|int|interface|long|native|new|package|private|protected|public|return|short|static|strictfp|super|switch|synchronized|this|throw|throws|transient|try|void|volatile|while|true|false|null)\\b" -color = "keyword" - - -[[syntax_highlighting.java.patterns]] -pattern = '"(?:\\"|.)*?"' -color = "string" - - -[[syntax_highlighting.java.patterns]] -pattern = "'(?:\\\\'|.)*?'" -color = "string" - - -[[syntax_highlighting.java.patterns]] -pattern = "\\b(\\d+(\\.\\d+)?)\\b" -color = "literal" - - -[[syntax_highlighting.java.patterns]] -pattern = "@\\w+" -color = "decorator" - - -[[syntax_highlighting.julia.patterns]] -pattern = "#.*$" -color = "comment" - - -[[syntax_highlighting.julia.patterns]] -pattern = "#=.*?=#" -color = "comment" - - -[[syntax_highlighting.julia.patterns]] -pattern = "\\b(abstract|baremodule|begin|break|catch|const|continue|do|else|elseif|end|export|false|finally|for|function|global|if|import|in|let|local|macro|module|quote|return|struct|true|try|using|while)\\b" -color = "keyword" - - -[[syntax_highlighting.julia.patterns]] -pattern = '"(?:\\"|.)*?"' -color = "string" - - -[[syntax_highlighting.julia.patterns]] -pattern = "'(?:\\\\'|.)*?'" -color = "string" - - -[[syntax_highlighting.julia.patterns]] -pattern = "\\b(\\d+(\\.\\d+)?|true|false|nothing|missing|Inf|NaN)\\b" -color = "literal" - - -[[syntax_highlighting.rust.patterns]] -pattern = "//.*$" -color = "comment" - - -[[syntax_highlighting.rust.patterns]] -pattern = "/\\*[\\s\\S]*?\\*/" -color = "comment" - - -[[syntax_highlighting.rust.patterns]] -pattern = "\\b(abstract|alignof|as|async|await|be|box|break|const|continue|crate|do|dyn|else|enum|extern|false|final|fn|for|if|impl|in|let|loop|match|mod|move|mut|offsetof|override|priv|pub|pure|ref|return|sizeof|static|self|Self|struct|super|trait|true|type|typeof|unsafe|unsized|use|virtual|where|while|yield)\\b" -color = "keyword" - - -[[syntax_highlighting.rust.patterns]] -pattern = '"(?:\\"|.)*?"' -color = "string" - - -[[syntax_highlighting.rust.patterns]] -pattern = "'(?:\\\\'|.)*?'" -color = "string" - - -[[syntax_highlighting.rust.patterns]] -pattern = "\\b(\\d+(\\.\\d+)?|true|false|Some|None)\\b" -color = "literal" - - -[[syntax_highlighting.csharp.patterns]] -pattern = "//.*$" -color = "comment" - - -[[syntax_highlighting.csharp.patterns]] -pattern = "/\\*[\\s\\S]*?\\*/" -color = "comment" - - -[[syntax_highlighting.csharp.patterns]] -pattern = "\\b(abstract|as|base|bool|break|byte|case|catch|char|checked|class|const|continue|decimal|default|delegate|do|double|else|enum|event|explicit|extern|false|finally|fixed|float|for|foreach|goto|if|implicit|in|int|interface|internal|is|lock|long|namespace|new|null|object|operator|out|override|params|private|protected|public|readonly|ref|return|sbyte|sealed|short|sizeof|stackalloc|static|string|struct|switch|this|throw|true|try|typeof|uint|ulong|unchecked|unsafe|ushort|using|virtual|void|volatile|while|add|alias|ascending|async|await|by|descending|dynamic|equals|from|get|global|group|into|join|let|on|orderby|partial|remove|select|set|value|var|where|yield)\\b" -color = "keyword" - - -[[syntax_highlighting.csharp.patterns]] -pattern = '"(?:\\"|.)*?"' -color = "string" - - -[[syntax_highlighting.csharp.patterns]] -pattern = "'(?:\\\\'|.)*?'" -color = "string" - - -[[syntax_highlighting.csharp.patterns]] -pattern = "\\b(\\d+(\\.\\d+)?|true|false|null)\\b" -color = "literal" - - -[[syntax_highlighting.csharp.patterns]] -pattern = "@\\w+" -color = "decorator" - - -[[syntax_highlighting.dockerfile.patterns]] -pattern = "#.*$" -color = "comment" - - -[[syntax_highlighting.dockerfile.patterns]] -pattern = "^(FROM|MAINTAINER|RUN|CMD|LABEL|EXPOSE|ENV|ADD|COPY|ENTRYPOINT|VOLUME|USER|WORKDIR|ARG|ONBUILD|STOPSIGNAL|HEALTHCHECK|SHELL)\\b" -color = "keyword" - - -[[syntax_highlighting.dockerfile.patterns]] -pattern = '"(?:\\"|.)*?"' -color = "string" - - -[[syntax_highlighting.dockerfile.patterns]] -pattern = "'(?:\\\\'|.)*?'" -color = "string" - - -[[syntax_highlighting.terraform.patterns]] -pattern = "#.*$" -color = "comment" - - -[[syntax_highlighting.terraform.patterns]] -pattern = "/\\*[\\s\\S]*?\\*/" -color = "comment" - - -[[syntax_highlighting.terraform.patterns]] -pattern = "\\b(resource|data|provider|variable|output|module|locals|terraform)\\b" -color = "keyword" - - -[[syntax_highlighting.terraform.patterns]] -pattern = '"(?:\\"|.)*?"' -color = "string" - - -[[syntax_highlighting.terraform.patterns]] -pattern = "'(?:\\\\'|.)*?'" -color = "string" - - -[[syntax_highlighting.terraform.patterns]] -pattern = "\\b(true|false|null)\\b" -color = "literal" - - -[[syntax_highlighting.terraform.patterns]] -pattern = "\\$\\{.*?\\}" -color = "literal" - - -[[syntax_highlighting.jenkins.patterns]] -pattern = "//.*$" -color = "comment" - - -[[syntax_highlighting.jenkins.patterns]] -pattern = "/\\*[\\s\\S]*?\\*/" -color = "comment" - - -[[syntax_highlighting.jenkins.patterns]] -pattern = "\\b(pipeline|agent|stages|steps|sh|node|stage|environment|parameters|when|parallel|script|post|always|success|failure|unstable|changed|echo|dir|withCredentials|withEnv)\\b" -color = "keyword" - - -[[syntax_highlighting.jenkins.patterns]] -pattern = '"(?:\\"|.)*?"' -color = "string" - - -[[syntax_highlighting.jenkins.patterns]] -pattern = "'(?:\\\\'|.)*?'" -color = "string" - - -[[syntax_highlighting.jenkins.patterns]] -pattern = "\\b(true|false|null)\\b" -color = "literal" - - -[[syntax_highlighting.puppet.patterns]] -pattern = "#.*$" -color = "comment" - - -[[syntax_highlighting.puppet.patterns]] -pattern = "\\b(class|define|node|include|require|contain|inherits|if|elsif|else|case|when|default|and|or|in|true|false|undef)\\b" -color = "keyword" - - -[[syntax_highlighting.puppet.patterns]] -pattern = '"(?:\\"|.)*?"' -color = "string" - - -[[syntax_highlighting.puppet.patterns]] -pattern = "'(?:\\\\'|.)*?'" -color = "string" - - -[[syntax_highlighting.puppet.patterns]] -pattern = "\\$[a-zA-Z0-9_:]+" -color = "literal" - - -[[syntax_highlighting.puppet.patterns]] -pattern = "=>" -color = "punctuation" - - -[[syntax_highlighting.saltstack.patterns]] -pattern = "#.*$" -color = "comment" - - -[[syntax_highlighting.saltstack.patterns]] -pattern = "^\\s*[\\w\\.-]+:" -color = "keyword" - - -[[syntax_highlighting.saltstack.patterns]] -pattern = '"(?:\\"|.)*?"' -color = "string" - - -[[syntax_highlighting.saltstack.patterns]] -pattern = "'(?:\\\\'|.)*?'" -color = "string" - - -[[syntax_highlighting.saltstack.patterns]] -pattern = "\\b(true|false|null|True|False|None)\\b" -color = "literal" - - -[[syntax_highlighting.saltstack.patterns]] -pattern = "\\$\\{.*?\\}" -color = "literal" - - -[[syntax_highlighting.saltstack.patterns]] -pattern = "\\{\\{.*?\\}\\}" -color = "literal" - - -[[syntax_highlighting.git.patterns]] -pattern = "#.*$" -color = "comment" - - -[[syntax_highlighting.git.patterns]] -pattern = "\\[(.*?)\\]" -color = "keyword" - - -[[syntax_highlighting.git.patterns]] -pattern = "^\\s*[a-zA-Z0-9]+\\s*=" -color = "property" - - -[[syntax_highlighting.git.patterns]] -pattern = "=\\s*(.*?)$" -color = "string" - - -[[syntax_highlighting.notebook.patterns]] -pattern = "\\b(def|class|for|while|if|else|elif|try|except|finally|with|import|from|as|return|yield|break|continue|pass|raise|assert|del|global|nonlocal|True|False|None)\\b" -color = "keyword" - - -[[syntax_highlighting.notebook.patterns]] -pattern = "#.*$" -color = "comment" - - -[[syntax_highlighting.notebook.patterns]] -pattern = '"(?:\\"|.)*?"' -color = "string" - - -[[syntax_highlighting.notebook.patterns]] -pattern = "'(?:\\\\'|.)*?'" -color = "string" - - -[[syntax_highlighting.notebook.patterns]] -pattern = "\\b(\\d+(\\.\\d+)?|True|False|None)\\b" -color = "literal" - - -[supported_formats] -docs = [ - "readme", - "docs", - "todo", - "changelog", - "license", - "contributing", - "code_of_conduct", -] -python = ["py", "pyw", "pyc", "pyd"] -toml = ["toml", "tml", "nix"] -javascript = ["js", "mjs", "cjs", "jsx"] -code = ["jl", "dart"] -css = ["css"] -nix = ["nix"] -html = ["html", "htm", "xhtml"] -json = ["json", "jsonc", "geojson", "webmanifest"] -yaml = ["yaml", "yml"] -xml = ["xml", "xsd", "xsl", "xslt", "plist", "rss", "atom", "csproj", "svg"] -markdown = ["md", "markdown", "mdown", "mkd"] -text = ["txt", "log", "rst", "srt", "sub", "me"] -shell = [ - "sh", - "bash", - "zsh", - "fish", - "ksh", - "csh", - "tcsh", - "dash", - "ash", - "cmd", - "command", - "tool", - "bat", -] -dart = ["dart"] -go = ["go"] -c = ["c", "h"] -cpp = ["cpp", "cxx", "cc", "hpp", "hxx", "hh", "inl", "tpp"] -java = ["java", "jar", "class"] -julia = ["jl"] -rust = ["rs", "rlib"] -csharp = ["cs"] -dockerfile = ["Dockerfile", "dockerfile"] -terraform = ["tf", "tfvars"] -jenkins = ["Jenkinsfile", "jenkinsfile", "groovy"] -puppet = ["pp"] -saltstack = ["sls"] -git = [ - "gitignore", - "gitattributes", - "gitmodules", - "gitkeep", - "gitconfig", - "config", -] -notebook = ["ipynb"] -typescript = ["ts", "tsx", "mts", "cts"] -php = ["php", "php3", "php4", "php5", "phtml"] -ruby = ["rb", "erb", "rake", "rbw", "gemspec"] -scala = ["scala", "sc"] -r = ["r", "R", "rds", "rda"] -swift = ["swift"] -diff = ["diff", "patch"] -makefile = ["Makefile", "makefile", "mk", "mak"] -ini = ["ini", "cfg", "conf", "properties", "editorconfig"] -csv = ["csv", "tsv"] -sql = ["sql"] -graphql = ["graphql", "gql"] -kotlin = ["kt", "kts"] -lua = ["lua"] -perl = ["pl", "pm", "t", "pod"] -powershell = ["ps1", "psm1", "psd1"] -fortran = ["f", "F", "f90", "F90", "for"] -image = [ - "jpg", - "jpeg", - "png", - "gif", - "bmp", - "ico", - "webp", - "tiff", - "tif", - "heic", - "heif", -] -audio = ["mp3", "wav", "ogg", "flac", "aac", "m4a", "wma"] -video = ["mp4", "mkv", "avi", "mov", "webm", "flv", "wmv"] -archive = [ - "zip", - "tar", - "gz", - "tgz", - "bz2", - "rar", - "7z", - "xz", - "iso", - "deb", - "rpm", - "pkg", -] -font = ["ttf", "otf", "woff", "woff2", "eot"] -binary = ["exe", "dll", "so", "o", "bin", "app", "com", "msi", "dmg"] -document = [ - "doc", - "docx", - "odt", - "rtf", - "pdf", - "ppt", - "pptx", - "odp", - "xls", - "xlsx", - "ods", - "epub", - "mobi", -] diff --git a/docs/INSTALL.md b/docs/INSTALL.md index d4e77ea5..2e1fcf27 100644 --- a/docs/INSTALL.md +++ b/docs/INSTALL.md @@ -58,6 +58,21 @@ Native `.deb`, `.rpm`, `.dmg`, and `.exe` installers may provide launcher integr ## Platform Dependencies +### TextMate syntax highlighting + +ECLI's default extension-backed syntax engine uses `python-textmate`, which +pulls `onigurumacffi` for the Oniguruma regular-expression engine. Standard +`pip`, `pipx`, and release artifact installs should receive these Python +dependencies automatically. + +On source-build platforms where `onigurumacffi` cannot use a binary wheel, +install the Oniguruma development package before building. Typical package names +are `libonig-dev` or `oniguruma` on Debian/Ubuntu-style systems, +`oniguruma` on Arch and Nix, and `devel/oniguruma` on FreeBSD. + +If the tokenizer or native dependency is unavailable at runtime, ECLI must start +and fall back to the legacy highlighter rather than crashing. + ### SUSE / openSUSE Runtime dependencies for the openSUSE/SUSE RPM path: diff --git a/docs/architecture/extensions-layer.md b/docs/architecture/extensions-layer.md index 95daaf71..cb0a87f8 100644 --- a/docs/architecture/extensions-layer.md +++ b/docs/architecture/extensions-layer.md @@ -105,8 +105,11 @@ are listed here so the boundary is fixed before implementation begins. (from `package.json` language contributions) to a language id. - **syntax service** — apply the resolved grammar to editor text to produce scope spans for rendering. -- **scope-to-style / theme bridge** — map TextMate scopes to ECLI theme styles, - producing readable, deterministic colors. +- **scope-to-style / theme bridge** — load contributed color themes from + `contributes.themes` and referenced theme JSON, then map TextMate scopes to + ECLI theme styles from that data. Imported themes are the source of truth; + missing themes must be diagnosed, not synthesized from hand-written color + dictionaries. - **diagnostics / linter integration path** — feed external linter/diagnostic output into ECLI's existing diagnostics normalization model (`docs/extensions/diagnostics-model.md`). The Extensions Layer supplies @@ -135,6 +138,55 @@ The Extensions Layer reads these file kinds and **only** these as data: Any other file kind is out of scope for the Extensions Layer until this contract is amended. +## Theme Numbering Contract + +Theme numbers are a stable ECLI configuration contract. The active theme is +selected by the root `theme = N` setting in `config.toml`; imported theme JSON +and TextMate `tokenColors` remain the source of truth for extension-backed +professional themes. + +Canonical ranges: + +- `1`-`8`: deprecated aliases for old pre-extension-theme configs only. These + values are migration inputs, not selectable professional themes. +- `100`-`199`: light themes. +- `200`-`299`: dark themes. +- `300`-`399`: high-contrast themes. +- `800`-`899`: reserved for a future custom/imported special-theme feature. + They must not be silently assigned before that feature exists. + +Canonical professional theme ids: + +- Light: `101` GitHub Light Default, `102` GitHub Light, `103` GitHub Light + Colorblind (Beta), `104` Visual Studio Light, `105` Visual Studio 2017 Light + - C++, `106` Light Modern, `107` Light+, `108` Quiet Light, `109` Solarized + Light, `110` JetBrains Rider New UI Light. +- Dark: `201` GitHub Dark Default, `202` GitHub Dark, `203` GitHub Dark + Dimmed, `204` Visual Studio Dark, `205` Visual Studio 2017 Dark - C++, + `206` Dark Modern, `207` Dark+, `208` Monokai, `209` Monokai Dimmed, `210` + Tomorrow Night Blue, `211` Abyss, `212` Atom One Dark, `213` Kimbie Dark, + `214` Solarized Dark, `215` Red. +- High contrast: `301` Dark High Contrast, `302` GitHub Dark High Contrast, + `303` GitHub Light High Contrast, `304` Light High Contrast. + +Built-in compatibility themes preserve their existing colour values and use +reserved compatibility ids: `181` PySH Light, `182` PySH Classic, `183` ECLI +Legacy Light, `281` PySH Dark, `282` PySH Classic Dark, `283` ECLI Legacy Dark, +`381` ECLI High Contrast Light, and `382` ECLI High Contrast Dark. + +Missing professional themes are not synthesized or mapped to unrelated themes. +If a configured number is invalid or the imported theme is absent, ECLI keeps +the current valid theme when one exists, emits a visible warning, and does not +crash startup. Startup without a current theme uses the default `207` Dark+ with +a warning. + +Config migrations must write +`~/.config/ecli/config.toml.pre-extension-theme-numbering.bak` before modifying +the user config. Old pre-extension ids `1`-`8` migrate to the matching +compatibility ids above. Transitional ids from the previous in-progress +implementation migrate as follows: `1`-`10` -> `101`-`110`, `11`-`25` -> +`201`-`215`, and `26`-`29` -> `301`-`304`. + ## Explicitly forbidden runtime behavior The Extensions Layer is data + adapters only. The following are forbidden: @@ -240,6 +292,107 @@ new matrix entry. #102. Covered by `tests/extensions/test_textmate_grammar_catalog.py`, `tests/extensions/test_extension_language_detection.py`, and `tests/extensions/test_extension_layer_config.py`. +- **Status (#102):** #101 delivered the grammar catalog and language detection; + #102 now delivers **real TextMate tokenization and visible editor rendering**. + The ECLI-owned modules under `src/ecli/extensions/ecli_integration/` are: + `syntax_service.py` (engine selection + per-line `LineHighlighter`), + `textmate_tokenizer.py` (loads the imported `.tmLanguage.json` grammars and + tokenizes each line into genuine TextMate scope stacks via the optional + `python-textmate` engine, which uses Oniguruma), `theme_registry.py` + (loads `contributes.themes`, follows local theme `include` chains, parses theme + JSON/JSONC, and resolves TextMate `tokenColors`), and `theme_bridge.py` + (deterministic scope → ECLI style-category mapping with specificity, flattening + overlapping scopes into per-line spans). The editor consumes those spans in + `Ecli.apply_syntax_highlighting_with_pygments`, mapping each style category onto + the active theme's curses colour and drawing them through the existing + `DrawScreen` path — so highlighting is **visibly** TextMate-driven and differs + by language. + - **Engine selection** is config-driven via `[extensions].syntax_engine`. + `"extension"` is the **default**; `"legacy"` forces the built-in + Pygments/regex highlighter. `editor.syntax_highlighting = false` disables + visible highlighting for both engines. + - **Legacy remains a always-available fallback.** Unknown files, grammars the + engine cannot tokenize (the imported **Markdown** and some **C** constructs + recurse), an uninstalled tokenizer, or any tokenizer error fall back to the + legacy highlighter automatically — per file and per line — so rendering is + never broken. Representative languages with real TextMate scopes today: + Python, Markdown, JSON, Dockerfile, Makefile, YAML/YML, TypeScript, + JavaScript, C/C++, Java, Rust, HTML, Perl, PHP, Lua, C#, BAT, logs, and + gitignore. TOML, assembler, Ada/SPARK, and Fortran are detected + deterministically but report missing imported grammar assets and use safe + fallback highlighting until those assets are added. + - **No runtime.** It reads grammar JSON only: no VS Code extension host, no + Node/TypeScript or Copilot runtime, no `activationEvents`, no `package.json` + scripts, no background workers; all engine stdout/stderr is suppressed so it + cannot corrupt curses. The imported upstream tree is unchanged. + - **Theme source of truth.** Professional theme ids are backed by real imported + theme contributions and theme JSON using the numbering contract above: + Visual Studio Light, Light Modern, Light+, Quiet Light, Solarized Light, + Visual Studio Dark, Dark Modern, Dark+, Monokai, Monokai Dimmed, Tomorrow + Night Blue, Abyss, Kimbie Dark, Solarized Dark, Red, Dark High Contrast, and + Light High Contrast are loaded from the asset tree when present. Target names + that are not present in the imported assets, including GitHub Light/Dark + variants, VS2017, Atom One Dark, JetBrains Rider Light, and Tokyo variants, + are reported as missing and are not replaced by fabricated palettes. The + legacy PySH/ECLI palettes are retained only as explicit compatibility ids in + the 18x/28x/38x ranges. + - **Multiline protection layer.** TextMate scopes remain the primary token + source. Because the current `python-textmate` adapter is line-oriented and + cannot carry every grammar's rule stack across viewport lines, ECLI applies + a bounded, language-aware protection pass over TextMate output for known + multiline regions: Python triple-quoted strings/docstrings and inline + strings; JavaScript/TypeScript `/* ... */`, `/** ... */`, `//` comments and + quoted/template strings; HTML `` comments; and CSS `/* ... */` + comments and quoted strings. The protection pass does not tokenize normal + code and does not replace grammar tokenization. It only gives known + comment/string regions priority over nested keyword, number, operator, tag, + selector, or property scopes that leak from stateless per-line tokenization. + Protected ranges are cached by buffer revision and mapped onto the current + viewport, so scrolling reuses the existing viewport-first/per-line cache + architecture instead of reparsing the file on every repaint. + - **Acceptance coverage.** Large-file scroll responsiveness is locked by real + repository-file tests over `Makefile`, `logs/freebsd-0.2.2-fail.log`, + `logs/pr-46-body.md`, and `scripts/build_pyinstaller_linux.py`. Multiline + rendering correctness is locked by synthetic fixtures for Python, + JavaScript, TypeScript, HTML, and CSS because those exact adversarial comment + bodies are not reliably present in repository files. + - **Limitations / staged work.** Tokenization is still per line; the + protection layer is a deterministic guard for known multiline + comment/string regions, not a general TextMate state-stack implementation. + Broader cross-line state for every TextMate grammar remains future + stabilization work. **Linter diagnostics integration remains #104**, and + **snippets + language-configuration metadata remain #105**. + - Covered by `tests/extensions/test_textmate_tokenization.py`, + `tests/extensions/test_extension_syntax_service.py`, + `tests/extensions/test_editor_syntax_adapter.py`, and + `tests/extensions/test_editor_syntax_rendering.py`, + `tests/extensions/test_textmate_multiline_protection.py`, + `tests/extensions/test_textmate_render_performance.py`, and + `tests/extensions/test_textmate_scroll_regression.py`. +- **Dependencies (#102, release-contract).** TextMate rendering requires the + optional tokenizer dependency, declared in `pyproject.toml`: + - `python-textmate` (pure-Python TextMate grammar interpreter), which pulls + - `onigurumacffi` (CFFI bindings to the **Oniguruma** regex engine). On Linux, + macOS, and Windows, `onigurumacffi` ships binary wheels (no system library + needed). Where only an sdist is available (notably **FreeBSD**, or Nix builds + from source), the **Oniguruma** development headers/library must be present + (`devel/oniguruma` on FreeBSD, `oniguruma` on Debian/Ubuntu/Arch, `oniguruma` + in nixpkgs). Packaging surfaces must declare/install this where they build + from source; see `docs/install/*`, `docs/release/packaging-flows.md`, and + `docs/release/release-checklist.md`. + - **Graceful degradation:** if the tokenizer or Oniguruma is unavailable at + runtime, ECLI logs a deterministic diagnostic + (`ECLI syntax engine=… textmate_tokenizer_available=False active_renderer=legacy`) + and renders with the legacy highlighter. A missing tokenizer never crashes + startup. +- **Config surface (#102).** `config.toml` is reduced to user-facing settings + only; the internal `[comments.*]`, `[[syntax_highlighting.*]]`, and + `[supported_formats]` tables moved into code (`DEFAULT_CONFIG`). The default is + `[extensions].syntax_engine = "extension"`. A one-time, backed-up migration + (`migrate_obsolete_config_tables`) strips those obsolete tables from an existing + user `~/.config/ecli/config.toml` and flips a transitional + `syntax_engine = "legacy"` to `"extension"`, so upgraded users actually get + TextMate rendering instead of being pinned to a stale legacy config. ## Sequencing @@ -254,7 +407,7 @@ on the previous one and must not be skipped or merged. | #100 | **Manifest registry** (`package.json` contribution parsing). | | #101 | **TextMate grammar catalog** and **language detection**. | | #102 | **Syntax service** wired to editor rendering. | -| #103 | **Theme bridge** (scope-to-style). | +| #103 | **Theme registry / bridge** (`contributes.themes`, theme JSON, tokenColors, scope-to-style). | | #104 | **Linter diagnostics path**. | | #105 | **Snippets** and **language-configuration** metadata. | diff --git a/docs/config/config-precedence.md b/docs/config/config-precedence.md index 5f2aebbe..15c0f990 100644 --- a/docs/config/config-precedence.md +++ b/docs/config/config-precedence.md @@ -73,6 +73,28 @@ flowchart TD - Intended target state: provide optional migration tooling or diagnostics when template introduces new recommended keys. - Validation required: verify template-diff diagnostics path in implementation. +## Theme Number Migration + +`theme = N` is a versioned configuration contract: + +- `1`-`8` are deprecated migration aliases for old pre-extension-theme configs. +- `100`-`199` are light themes. +- `200`-`299` are dark themes. +- `300`-`399` are high-contrast themes. +- `800`-`899` are reserved for future custom/imported special themes. + +When ECLI detects stale theme numbering in an existing user config, it writes +`~/.config/ecli/config.toml.pre-extension-theme-numbering.bak` before modifying +the file and emits a startup warning. Old aliases `1`-`8` migrate to preserved +built-in compatibility ids in the `18x`/`28x`/`38x` ranges. Transitional ids +from the previous in-progress implementation migrate as `1`-`10` -> `101`-`110`, +`11`-`25` -> `201`-`215`, and `26`-`29` -> `301`-`304`. + +Invalid or missing theme numbers are not treated as implicit defaults during a +live session. ECLI keeps the current valid theme when available, reports the +problem, and falls back to startup default `207` only when no current theme +exists. + ## Parse-Failure Behavior by Source - Embedded defaults failure: release-blocking defect. diff --git a/docs/config/config-schema.md b/docs/config/config-schema.md index 20043d54..7c41b9aa 100644 --- a/docs/config/config-schema.md +++ b/docs/config/config-schema.md @@ -31,6 +31,7 @@ flowchart LR ```text root + theme logging file_level console_level @@ -50,12 +51,34 @@ root settings auto_save_interval show_git_info - colors - comments. - supported_formats file_icons ``` +Internal comment delimiters, syntax fallback data, supported-format tables, and +keybinding defaults live in code. They are not public template sections in +`config.toml`. + +## Theme Numbering Policy + +The root `theme = N` value uses the stable Extensions Layer numbering contract: + +- `1`-`8`: deprecated aliases for old pre-extension-theme configs only. +- `100`-`199`: light themes. +- `200`-`299`: dark themes. +- `300`-`399`: high-contrast themes. +- `800`-`899`: reserved for future custom/imported special themes. + +The repository default is `theme = 207` (`Dark+`). Existing user configs with +old pre-extension aliases `1`-`8` are migrated to preserved built-in +compatibility themes in the `18x`/`28x`/`38x` ranges. Transitional ids from the +previous in-progress implementation migrate as `1`-`10` -> `101`-`110`, +`11`-`25` -> `201`-`215`, and `26`-`29` -> `301`-`304`. + +Before modifying a user config, migration writes +`~/.config/ecli/config.toml.pre-extension-theme-numbering.bak`. Missing or +invalid theme numbers must keep the current valid theme when one exists, emit a +visible ECLI warning, and never map to an unrelated theme. + ## Current-State vs Target-State - **Observed current state**: @@ -76,6 +99,7 @@ root | `logging.file_level` | string(enum log level) | No | `DEBUG` | defaults/user | must be valid log level token | normalize case | warn + fallback | | `logging.console_level` | string(enum) | No | `WARNING` | defaults/user | valid token | normalize case | warn + fallback | | `logging.log_to_console` | bool | No | `true` | defaults/user | boolean coercion disallowed unless explicit | none | warn + fallback | +| `theme` | int | No | `207` | defaults/user/env | must resolve to an imported or compatibility theme id | see theme numbering policy | warn + keep current/default | | `ai.default_provider` | string | No | implementation default | defaults/user | must map to known provider set | legacy alias mapping allowed | warn + fallback | | `ai.models.` | string | Provider-dependent | none | defaults/user | non-empty model id | provider alias normalization | warn; runtime feature degraded | | `editor.tab_size` | int | No | `4` | defaults/user | integer > 0 | clamp policy allowed | warn + fallback | @@ -90,8 +114,7 @@ root | `editor` | No | invalid types fail strict schema check | fallback per key | | `settings` | No | invalid numeric constraints fail strict check | fallback with warning | | `ai` | No (core editor) | malformed provider/model keys fail strict check | AI degraded, editor continues | -| `supported_formats` | No | unknown structure fails strict check | generic format fallback | -| `comments` | No | invalid per-language map fails strict check | comment feature degrade | +| `file_icons` | No | unknown structure fails strict check | generic icon fallback | ## Unknown-Key Handling Policy diff --git a/docs/contributor/build-from-source.md b/docs/contributor/build-from-source.md index 94c60ec3..22096773 100644 --- a/docs/contributor/build-from-source.md +++ b/docs/contributor/build-from-source.md @@ -37,6 +37,24 @@ flowchart LR ## Build Dependencies +### TextMate syntax engine + +The default syntax engine is extension-backed and depends on `python-textmate`; +that dependency pulls `onigurumacffi`, which uses the Oniguruma regular-expression +engine. Binary wheels cover common Linux, macOS, and Windows builds, but source +builds need the Oniguruma development headers/library available before dependency +sync. + +Use the platform package where required: + +- Debian/Ubuntu-style systems: `libonig-dev` or `oniguruma`. +- Arch and Nix: `oniguruma`. +- FreeBSD: `devel/oniguruma`. + +Packaging and smoke validation must accept only two outcomes: TextMate rendering +is available, or ECLI logs the deterministic fallback diagnostic and starts with +the legacy highlighter. A missing tokenizer must not crash startup. + ### SUSE / openSUSE Install the local RPM/package build toolchain: diff --git a/docs/release/build-matrix.md b/docs/release/build-matrix.md index f6a1b99b..2a8ed6e4 100644 --- a/docs/release/build-matrix.md +++ b/docs/release/build-matrix.md @@ -57,7 +57,9 @@ one per canonical matrix entry. Release publication is blocked unless the exact - Windows installer path requires Python 3.11+, Git, PowerShell 7, and NSIS (`makensis`). Visual Studio Build Tools are required only when native dependencies or build tooling need local compilation. -- macOS DMG path relies on `hdiutil` and Python tooling. +- macOS DMG path relies on `hdiutil`, Python tooling, Homebrew `oniguruma`, and + `pkg-config`; the build script validates native Oniguruma headers/libs before + pip can source-build `onigurumacffi`. ## GitHub Actions Workflow Contract Map diff --git a/docs/release/packaging-flows.md b/docs/release/packaging-flows.md index f6c63a98..9a72a2ed 100644 --- a/docs/release/packaging-flows.md +++ b/docs/release/packaging-flows.md @@ -174,6 +174,15 @@ Governance rule: ## macOS - DMG flow: `scripts/build_and_package_macos.py` +- Native TextMate dependency: install Homebrew `oniguruma` and `pkg-config` + before any `pip install` of ECLI or `.[dev]`. The macOS workflows do this + explicitly, and `scripts/build_and_package_macos.py` fails early if + `oniguruma.h` plus the Oniguruma library/pkg-config metadata are not visible. +- The macOS build script passes deterministic native build environment to pip: + `CPPFLAGS`/`CFLAGS` include the Oniguruma include directory, `LDFLAGS` include + the library directory, and `PKG_CONFIG_PATH` includes the discovered + `pkgconfig` directory. Common Intel (`/usr/local`) and Apple Silicon + (`/opt/homebrew`) Homebrew prefixes are supported. ## Windows @@ -181,3 +190,46 @@ Governance rule: - build prerequisites: Python 3.11+, Git, PowerShell 7, NSIS for installer builds, and Visual Studio Build Tools only when native compilation is required. - NSIS script: `packaging/windows/nsis/ecli.nsi` + +## TextMate syntax engine dependency (Oniguruma) + +ECLI's default syntax engine (`[extensions].syntax_engine = "extension"`) tokenizes +with the imported TextMate grammars via the `python-textmate` dependency, which +pulls `onigurumacffi` (CFFI bindings to the **Oniguruma** regex library). + +- **Wheel/sdist, Linux/Windows PyInstaller, AppImage, Docker helpers:** + `onigurumacffi` ships binary wheels for manylinux/musllinux and Windows in + the common case, so no system library is normally required. PyInstaller/AppImage + bundles must include `python-textmate` and `onigurumacffi`; verify the app + starts and, if the tokenizer is absent, falls back to the legacy highlighter + without crashing. +- **macOS PyInstaller/DMG:** CI and the build script install/require Homebrew + `oniguruma` and `pkg-config` before pip installs the project, because + `onigurumacffi` may fall back to a source build and require native headers. +- **Source builds (FreeBSD ports/pkg, Nix from source, musl edge cases):** the + **Oniguruma** development headers/library must be available at build time + (`devel/oniguruma` on FreeBSD, `oniguruma`/`libonig-dev` on Debian/Ubuntu, + `oniguruma` on Arch and in nixpkgs). Declare/install it in the corresponding + packaging flow, or document the explicit fallback policy (legacy highlighter). +- **Runtime guarantee:** a missing tokenizer never crashes ECLI; it logs a + deterministic diagnostic and renders with the legacy highlighter. + +## Theme numbering and config migration contract + +Release artifacts must preserve the canonical theme-numbering policy documented +in `docs/architecture/extensions-layer.md` and the shipped `config.toml`: + +- `1`-`8` are deprecated migration aliases only. +- `100`-`199` are light themes. +- `200`-`299` are dark themes. +- `300`-`399` are high-contrast themes. +- `800`-`899` are reserved for future custom/imported special themes. + +The default shipped theme is `207` (`Dark+`). Packaging must not rewrite +`config.toml` or silently substitute missing theme numbers. User-config +migration must write +`~/.config/ecli/config.toml.pre-extension-theme-numbering.bak` before changing +an existing config. Old pre-extension aliases `1`-`8` migrate to the preserved +compatibility ids in the `18x`/`28x`/`38x` ranges; transitional ids from the +previous in-progress implementation migrate as `1`-`10` -> `101`-`110`, +`11`-`25` -> `201`-`215`, and `26`-`29` -> `301`-`304`. diff --git a/docs/release/release-checklist.md b/docs/release/release-checklist.md index f6e66cb0..98f1bc4f 100644 --- a/docs/release/release-checklist.md +++ b/docs/release/release-checklist.md @@ -92,3 +92,44 @@ ecli__docker_deb_helper_evidence.tar.gz ecli__docker_rpm_helper_evidence.tar.gz ecli__workflow_contract_evidence.tar.gz ``` + +## TextMate engine dependency check + +- [ ] `pyproject.toml` declares `python-textmate` (pulls `onigurumacffi`). +- [ ] Wheel/PyInstaller/AppImage/DMG/Windows artifacts include + `python-textmate` + `onigurumacffi`, or start and fall back to the legacy + highlighter without crashing when absent. +- [ ] macOS workflows install Homebrew `oniguruma` and `pkg-config` before + package installation, and `scripts/build_and_package_macos.py` exports + `CPPFLAGS`, `CFLAGS`, `LDFLAGS`, and `PKG_CONFIG_PATH` for pip subprocesses. +- [ ] Source-build platforms (FreeBSD ports/pkg, Nix from source) provide the + **Oniguruma** dev headers/library, or document the legacy-fallback policy. +- [ ] Startup log shows `textmate_tokenizer_available=True` on a reference build. +- [ ] Real large-file scroll smoke passes on `Makefile` and + `logs/freebsd-0.2.2-fail.log` without repaint freezes. +- [ ] Multiline comment/string rendering checks pass for Python triple strings, + JavaScript block/doc comments, TypeScript block/doc comments, HTML + comments, and CSS block comments. +- [ ] Words, numbers, operators, tags, selectors, properties, and values inside + protected multiline comments/strings render as comment/string, while code + after the protected region still highlights as code. +- [ ] `.log` files and `.gitignore` are not detected as SQL/Transact-SQL. +- [ ] TextMate dependency/fallback checks pass when `python-textmate` or + Oniguruma is unavailable. + +## Theme numbering migration check + +- [ ] Shipped `config.toml` defaults to `theme = 207` (`Dark+`). +- [ ] Theme numbering policy is present in `config.toml`, + `docs/architecture/extensions-layer.md`, and config docs: + `1`-`8` deprecated aliases, `100`-`199` light, `200`-`299` dark, + `300`-`399` high contrast, `800`-`899` reserved. +- [ ] Old pre-extension `theme = 1`-`8` configs migrate to the matching + compatibility ids in the `18x`/`28x`/`38x` ranges. +- [ ] Transitional previous-implementation ids migrate as `1`-`10` -> `101`-`110`, + `11`-`25` -> `201`-`215`, and `26`-`29` -> `301`-`304`. +- [ ] Migration writes + `~/.config/ecli/config.toml.pre-extension-theme-numbering.bak` and emits a + visible ECLI warning. +- [ ] Missing/invalid theme numbers are not mapped to unrelated themes; ECLI + keeps the current valid theme when available. diff --git a/pyproject.toml b/pyproject.toml index a839addc..5e3db9bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,6 +67,7 @@ dependencies = [ "toml>=0.10.2", "tato>=0.2.3", "windows-curses>=2.4.0; sys_platform == 'win32'", + "python-textmate>=0.2.0", ] [project.optional-dependencies] diff --git a/scripts/build_and_package_macos.py b/scripts/build_and_package_macos.py index af0f2251..94d38821 100644 --- a/scripts/build_and_package_macos.py +++ b/scripts/build_and_package_macos.py @@ -43,6 +43,7 @@ import subprocess import sys import tomllib +from collections.abc import Mapping from pathlib import Path from packaging_common import require_tool @@ -56,6 +57,14 @@ APP_NAME = "ECLI" MACOS_ARCH = "universal2" +ONIGURUMA_HEADER = "oniguruma.h" +ONIGURUMA_PKG_CONFIG_NAME = "oniguruma" +ONIGURUMA_COMMON_PREFIXES = ( + "/opt/homebrew/opt/oniguruma", + "/usr/local/opt/oniguruma", + "/opt/homebrew", + "/usr/local", +) def python_bin() -> str: @@ -69,6 +78,7 @@ def read_version(root: Path) -> str: def build_arch(root: Path, spec: Path, arch_name: str, arch_flag: str) -> Path | None: """Build a per-arch PyInstaller binary; return its path or None on failure.""" + native_env = macos_native_dependency_env(os.environ) venv_dir = root / "build" / f"macos_venv_{arch_name}" build_dir = root / "build" / f"macos_{arch_name}" dist_dir = root / "dist" / f"macos_{arch_name}" @@ -92,6 +102,7 @@ def build_arch(root: Path, spec: Path, arch_name: str, arch_flag: str) -> Path | "setuptools", ], cwd=root, + env=native_env, check=True, ) subprocess.run( @@ -107,6 +118,7 @@ def build_arch(root: Path, spec: Path, arch_name: str, arch_flag: str) -> Path | ".[dev]", ], cwd=root, + env=native_env, check=True, ) @@ -130,7 +142,7 @@ def build_arch(root: Path, spec: Path, arch_name: str, arch_flag: str) -> Path | ], cwd=root, env={ - **os.environ, + **native_env, "ECLI_REPO_ROOT": str(root), "ECLI_PYINSTALLER_ONEDIR": "0", "ECLI_BUILD_MACOS_APP": "0", @@ -168,15 +180,137 @@ def info_plist(version: str, icon_entry: str) -> str: def check_packaging_prerequisites() -> int: - for tool in ("arch", "lipo", "codesign", "hdiutil", "shasum"): + for tool in ("arch", "lipo", "codesign", "hdiutil", "shasum", "pkg-config"): if not require_tool(tool): return EXIT_MISSING_TOOL if shutil.which(python_bin()) is None: print(f"ERR Missing Python interpreter: {python_bin()}", file=sys.stderr) return EXIT_MISSING_TOOL + if not check_oniguruma_prerequisites(): + return EXIT_MISSING_TOOL return EXIT_OK +def _capture_stdout(command: list[str]) -> str | None: + """Return stripped stdout for a command, or ``None`` on failure.""" + try: + result = subprocess.run( + command, + capture_output=True, + text=True, + check=True, + ) + except (OSError, subprocess.CalledProcessError): + return None + output = result.stdout.strip() + return output or None + + +def _unique_existing_paths(paths_in_order: list[Path]) -> list[Path]: + """Return existing paths without duplicates, preserving order.""" + result: list[Path] = [] + seen: set[Path] = set() + for path in paths_in_order: + resolved = path.resolve() + if resolved.exists() and resolved not in seen: + seen.add(resolved) + result.append(resolved) + return result + + +def oniguruma_prefixes() -> list[Path]: + """Return deterministic candidate prefixes for macOS Oniguruma.""" + candidates: list[Path] = [] + env_prefix = os.environ.get("ECLI_ONIGURUMA_PREFIX") + if env_prefix: + candidates.append(Path(env_prefix)) + brew_prefix = _capture_stdout(["brew", "--prefix", "oniguruma"]) + if brew_prefix: + candidates.append(Path(brew_prefix)) + candidates.extend(Path(prefix) for prefix in ONIGURUMA_COMMON_PREFIXES) + return _unique_existing_paths(candidates) + + +def _oniguruma_include_dirs(prefixes: list[Path]) -> list[Path]: + return _unique_existing_paths([prefix / "include" for prefix in prefixes]) + + +def _oniguruma_lib_dirs(prefixes: list[Path]) -> list[Path]: + return _unique_existing_paths([prefix / "lib" for prefix in prefixes]) + + +def _oniguruma_pkg_config_dirs(prefixes: list[Path]) -> list[Path]: + return _unique_existing_paths( + [ + *(prefix / "lib" / "pkgconfig" for prefix in prefixes), + *(prefix / "share" / "pkgconfig" for prefix in prefixes), + ] + ) + + +def _prepend_flags(existing: str | None, flags: list[str]) -> str: + parts = [*flags, *(existing or "").split()] + return " ".join(dict.fromkeys(part for part in parts if part)) + + +def _prepend_path_list(existing: str | None, paths_in_order: list[Path]) -> str: + parts = [str(path) for path in paths_in_order] + if existing: + parts.extend(existing.split(os.pathsep)) + return os.pathsep.join(dict.fromkeys(part for part in parts if part)) + + +def macos_native_dependency_env(base_env: Mapping[str, str]) -> dict[str, str]: + """Return env with deterministic Oniguruma build flags for pip subprocesses.""" + env = dict(base_env) + prefixes = oniguruma_prefixes() + include_dirs = _oniguruma_include_dirs(prefixes) + lib_dirs = _oniguruma_lib_dirs(prefixes) + pkg_config_dirs = _oniguruma_pkg_config_dirs(prefixes) + include_flags = [f"-I{path}" for path in include_dirs] + lib_flags = [f"-L{path}" for path in lib_dirs] + env["CPPFLAGS"] = _prepend_flags(env.get("CPPFLAGS"), include_flags) + env["CFLAGS"] = _prepend_flags(env.get("CFLAGS"), include_flags) + env["LDFLAGS"] = _prepend_flags(env.get("LDFLAGS"), lib_flags) + env["PKG_CONFIG_PATH"] = _prepend_path_list( + env.get("PKG_CONFIG_PATH"), pkg_config_dirs + ) + return env + + +def check_oniguruma_prerequisites() -> bool: + """Verify native Oniguruma headers/libs are visible before pip installs.""" + prefixes = oniguruma_prefixes() + include_dirs = _oniguruma_include_dirs(prefixes) + lib_dirs = _oniguruma_lib_dirs(prefixes) + header_found = any( + (include_dir / ONIGURUMA_HEADER).is_file() for include_dir in include_dirs + ) + pkg_config_found = ( + subprocess.run( + ["pkg-config", "--exists", ONIGURUMA_PKG_CONFIG_NAME], + check=False, + ).returncode + == 0 + ) + lib_found = any( + (lib_dir / candidate).exists() + for lib_dir in lib_dirs + for candidate in ("libonig.dylib", "libonig.a") + ) + if header_found and (pkg_config_found or lib_found): + print("OK Oniguruma native dependency is available for TextMate builds.") + return True + searched = ", ".join(str(prefix) for prefix in prefixes) or "" + print( + "ERR Missing Oniguruma native dependency for python-textmate/onigurumacffi. " + "Install Homebrew packages 'oniguruma' and 'pkg-config' before macOS " + f"packaging. Searched prefixes: {searched}", + file=sys.stderr, + ) + return False + + def verify_python_arches(root: Path) -> None: for arch_flag, label in (("-x86_64", "python-x86_64"), ("-arm64", "python-arm64")): subprocess.run( diff --git a/src/ecli/core/Ecli.py b/src/ecli/core/Ecli.py index 103b883e..81e05d40 100755 --- a/src/ecli/core/Ecli.py +++ b/src/ecli/core/Ecli.py @@ -500,6 +500,25 @@ def __init__( self.show_line_numbers, ) + # --- Buffer modification tracking --- + @property + def modified(self) -> bool: + """Whether the buffer has unsaved changes.""" + return getattr(self, "_modified", False) + + @modified.setter + def modified(self, value: bool) -> None: + """Set the modified flag and bump the monotonic buffer-edit revision. + + Every assignment brackets a potential buffer edit (all edit paths set + ``modified = True``), so bumping the revision here gives viewport + highlighting a single, central invalidation signal: scroll and cursor + movement never touch ``modified``, so caches stay warm while scrolling, + and recompute exactly once per edit (#102). + """ + self._modified = bool(value) + self._buffer_edit_revision = getattr(self, "_buffer_edit_revision", 0) + 1 + # --- State Initialization --- def _initialize_state(self) -> None: """Initializes all editor state attributes to their default values.""" @@ -508,7 +527,11 @@ def _initialize_state(self) -> None: self.cursor_y: int = 0 self.scroll_top: int = 0 self.scroll_left: int = 0 - self.modified: bool = False + # Backing field + monotonic buffer-edit revision (see the ``modified`` + # property). The revision lets viewport highlighting caches recompute + # exactly once per edit instead of once per scroll frame (#102). + self._modified: bool = False + self._buffer_edit_revision: int = 0 self.encoding: str = "UTF-8" self.filename: Optional[str] = None self._file_loaded_from_disk: bool = False @@ -532,6 +555,13 @@ def _initialize_state(self) -> None: self.current_language: Optional[str] = None self._lexer: Optional[TextLexer] = None self.custom_syntax_patterns: list[tuple[re.Pattern, str]] = [] + # #102: extension-backed syntax metadata + TextMate line highlighter for + # the current file, populated by detect_language(). When the extension + # engine is selected and a usable TextMate grammar is available, the + # highlighter renders real scoped spans; otherwise these stay None and the + # legacy highlighter renders. Both degrade safely to None on any error. + self.extension_syntax: Any = None + self._extension_highlighter: Any = None self._state_lock: threading.RLock = threading.RLock() self._shell_cmd_q: queue.Queue[str] = queue.Queue() self._git_q: queue.Queue[tuple[str, str, str]] = queue.Queue() @@ -650,15 +680,15 @@ def _setup_environment(self) -> None: # ---------------- Screen wiring ---------------- def attach_screen(self, stdscr: curses.window) -> None: """Attach curses stdscr after construction. - Keeps compatibility with wrapper(). + Keeps compatibility with wrapper(). """ self.stdscr = stdscr # ---------------- CLI file preloading ---------------- def preload_cli_document(self, path: Path) -> None: """Preload (open or create) a buffer named after 'path', - even if it does not exist on disk. - This ensures that Save will default to that path. + even if it does not exist on disk. + This ensures that Save will default to that path. """ intended = str(path.resolve()) self._cli_intended_path = intended @@ -686,10 +716,19 @@ def preload_cli_document(self, path: Path) -> None: self._set_current_path(intended) return except Exception: - logger.debug("open_file(%s) failed, will try creating an empty buffer", intended, exc_info=True) + logger.debug( + "open_file(%s) failed, will try creating an empty buffer", + intended, + exc_info=True, + ) # 3) Create an empty in-memory buffer with given name if API exists. - for meth_name in ("create_empty_buffer_with_name", "new_buffer_named", "new_file_with_name", "new_file"): + for meth_name in ( + "create_empty_buffer_with_name", + "new_buffer_named", + "new_file_with_name", + "new_file", + ): if hasattr(self, meth_name): try: meth = getattr(self, meth_name) @@ -700,7 +739,9 @@ def preload_cli_document(self, path: Path) -> None: self._set_current_path(intended) return except Exception: - logger.debug("%s failed, continue fallback", meth_name, exc_info=True) + logger.debug( + "%s failed, continue fallback", meth_name, exc_info=True + ) # 4) Last resort: create on disk then open (guarantees correct default name). try: @@ -709,11 +750,15 @@ def preload_cli_document(self, path: Path) -> None: self.open_file(intended) self._set_current_path(intended) except Exception: - logger.warning("Could not preload CLI document %s; starting unnamed buffer.", intended, exc_info=True) + logger.warning( + "Could not preload CLI document %s; starting unnamed buffer.", + intended, + exc_info=True, + ) def open_or_create(self, path: str | Path) -> None: """Open the file if it exists; otherwise create - an empty buffer with that path. + an empty buffer with that path. """ p = str(Path(path).expanduser().resolve()) if os.path.exists(p): @@ -721,7 +766,12 @@ def open_or_create(self, path: str | Path) -> None: self._set_current_path(p) return # try to create a named empty buffer via your APIs - for meth_name in ("create_empty_buffer_with_name", "new_buffer_named", "new_file_with_name", "new_file"): + for meth_name in ( + "create_empty_buffer_with_name", + "new_buffer_named", + "new_file_with_name", + "new_file", + ): if hasattr(self, meth_name): try: meth = getattr(self, meth_name) @@ -742,14 +792,12 @@ def open_or_create(self, path: str | Path) -> None: # --------------- internal helpers --------------- def _set_current_path(self, abs_path: str) -> None: """Set common filename attributes - so Save/Write use the intended path by default. + so Save/Write use the intended path by default. """ self.current_file_path = abs_path self.file_path = abs_path self.filename = abs_path - - # --- Clipboard Availability Check --- def close(self) -> None: """Gracefully shuts down the editor and releases all associated resources. @@ -1196,8 +1244,7 @@ def get_selected_text(self) -> str: return "" if not all( - isinstance(value, int) - for value in (start_row, start_col, end_row, end_col) + isinstance(value, int) for value in (start_row, start_col, end_row, end_col) ): log_record_to_file_handlers( logging.WARNING, @@ -1220,9 +1267,7 @@ def get_selected_text(self) -> str: # Delegate to the central, buffer-coordinate-only extractor. It returns # file content for the (row, col) span only — never line numbers, the # gutter, borders, or any other rendered UI chrome. - return selection_to_text( - self.text, (start_row, start_col), (end_row, end_col) - ) + return selection_to_text(self.text, (start_row, start_col), (end_row, end_col)) # --- Copy selected text --- def copy(self) -> bool: @@ -1517,7 +1562,9 @@ def _syntax_highlighting_enabled(self) -> bool: missing or non-boolean value falls back to enabled so highlighting never silently breaks on a malformed config. """ - editor_config = self.config.get("editor", {}) if isinstance(self.config, dict) else {} + editor_config = ( + self.config.get("editor", {}) if isinstance(self.config, dict) else {} + ) value = editor_config.get("syntax_highlighting", True) return bool(value) if isinstance(value, bool) else True @@ -1525,7 +1572,7 @@ def _syntax_highlighting_enabled(self) -> bool: def apply_syntax_highlighting_with_pygments( self, lines: list[str], - _line_indices: list[int], + line_indices: list[int], ) -> list[list[tuple[str, int]]]: """Returns a colorized representation of the requested lines. @@ -1553,6 +1600,13 @@ def apply_syntax_highlighting_with_pygments( if self._lexer is None: self.detect_language() + # #102: when a TextMate line highlighter is active (extension engine + + # usable grammar), render real scoped spans; fall back to legacy per line + # if the tokenizer cannot handle a given line. + extension_result = self._apply_extension_highlighting(lines, line_indices) + if extension_result is not None: + return extension_result + highlighted: list[list[tuple[str, int]]] = [] lexer_id = id(self._lexer) if self._lexer else 0 @@ -1722,10 +1776,155 @@ def detect_language(self) -> None: # Clear tokenization cache if the lexer or patterns have changed self._clear_cache_if_changed(old_lexer_id, old_custom_patterns) + # #102: also resolve read-only extension-backed syntax metadata for the + # current file. This never alters the legacy highlighting above. + self._resolve_extension_syntax_metadata() + + def _resolve_extension_syntax_metadata(self) -> None: + """Resolve extension-backed syntax metadata + TextMate highlighter. + + Narrow, safe adapter over the #102 syntax-service boundary. It records + language/grammar metadata on ``self.extension_syntax`` and, when the + extension engine is selected and a usable TextMate grammar exists, a + per-line ``self._extension_highlighter`` for real scoped rendering. It + never executes extension code, parses activation events, invokes Node, or + starts background workers. Any failure degrades silently to ``None`` so the + legacy highlighter always remains a safe fallback. + """ + self.extension_syntax = None + self._extension_highlighter = None + try: + from ecli.extensions.ecli_integration import ( + TEXTMATE_AVAILABLE, + ExtensionLayerConfig, + build_syntax_service, + ) + + layer_config = ExtensionLayerConfig.from_config(self.config) + if not layer_config.enabled: + return + # The catalog/detector are scanned once and cached process-wide, so + # this is cheap after the first file and never blocks rendering. + service = build_syntax_service(layer_config) + self.extension_syntax = service.resolve(self.filename) + self._extension_highlighter = service.build_line_highlighter(self.filename) + extension_language = getattr(self.extension_syntax, "language_id", None) + legacy_language = (getattr(self, "current_language", None) or "").lower() + if extension_language and ( + isinstance(getattr(self, "_lexer", None), TextLexer) + or legacy_language in {"text only", "transact-sql", "tsql"} + or extension_language in {"log", "ignore"} + ): + self.current_language = extension_language + if extension_language in {"log", "ignore"} or legacy_language in { + "transact-sql", + "tsql", + }: + self._lexer = TextLexer() + self.custom_syntax_patterns = [] + + # Deterministic one-time diagnostic so the active engine is visible in + # the log (and so a missing tokenizer is reported, not silent). + if not getattr(self, "_syntax_engine_logged", False): + logging.info( + "ECLI syntax engine=%s textmate_tokenizer_available=%s " + "active_renderer=%s", + layer_config.syntax_engine, + TEXTMATE_AVAILABLE, + "textmate" if self._extension_highlighter else "legacy", + ) + self._syntax_engine_logged = True + except Exception: + logging.debug( + "Extension syntax metadata resolution skipped.", exc_info=True + ) + self.extension_syntax = None + self._extension_highlighter = None + + def _apply_extension_highlighting( + self, lines: list[str], line_indices: list[int] + ) -> Optional[list[list[tuple[str, int]]]]: + """Render *lines* via the TextMate highlighter, or None to use legacy. + + Returns ``None`` when no extension highlighter is active so the caller + keeps the legacy path. For each line where the tokenizer cannot produce + spans, that single line falls back to the legacy tokenizer, so the + rendered result is always complete and never crashes on tabs, Unicode, + empty, or very long lines. + """ + highlighter = getattr(self, "_extension_highlighter", None) + if highlighter is None: + return None + + default_color = self.colors.get("default", curses.A_NORMAL) + lexer_id = id(self._lexer) if self._lexer else 0 + has_custom_rules = bool(getattr(self, "custom_syntax_patterns", [])) + + rendered: list[list[tuple[str, int]]] = [] + highlighted_lines = self._extension_highlighted_lines( + highlighter, lines, line_indices + ) + + for line, spans in zip(lines, highlighted_lines, strict=False): + # Fall back to legacy when the extension engine produced nothing + # usable for a non-empty line. Some imported grammars (e.g. the YAML + # block grammar) yield no/only-default tokens under the per-line + # stateless engine; legacy/Pygments keeps such files readable instead + # of rendering them as flat default text. + if self._extension_spans_need_legacy(line, spans): + rendered.append( + self._get_tokenized_line(line, lexer_id, has_custom_rules) + ) + continue + mapped = self._map_extension_spans(spans, default_color) + rendered.append(mapped if mapped else [(line, default_color)]) + return rendered + + def _extension_highlighted_lines( + self, highlighter: Any, lines: list[str], line_indices: list[int] + ) -> list[Any]: + """Return TextMate spans for visible lines, or ``None`` per failed line.""" + if hasattr(highlighter, "highlight_lines"): + try: + return highlighter.highlight_lines( + lines, + line_indices=line_indices, + full_text=self.text, + text_revision=getattr(self, "_buffer_edit_revision", 0), + ) + except Exception: + return [None for _line in lines] + highlighted_lines: list[Any] = [] + for line in lines: + try: + highlighted_lines.append(highlighter.highlight(line)) + except Exception: + highlighted_lines.append(None) + return highlighted_lines + + @staticmethod + def _extension_spans_need_legacy(line: str, spans: Any) -> bool: + """Return whether a line must use legacy highlighting.""" + return spans is None or ( + line and all(category == "default" for _text, category in spans) + ) + + def _map_extension_spans( + self, spans: list[tuple[str, str]], default_color: int + ) -> list[tuple[str, int]]: + """Map TextMate style categories to active curses colour attributes.""" + return [ + (text, self.colors.get(category, default_color)) for text, category in spans + ] + def _determine_lexer(self) -> TextLexer: """Determines the appropriate Pygments lexer based on filename and content. Follows a priority: filename > content > fallback to TextLexer. """ + if self._is_plain_operational_file(self.filename): + logging.debug("Pygments: Operational log/ignore file uses TextLexer.") + return TextLexer() + # Detect by filename if self.filename and self.filename != "noname": try: @@ -1740,6 +1939,15 @@ def _determine_lexer(self) -> TextLexer: if content_sample.strip(): try: lexer = guess_lexer(content_sample, stripall=False) + if self._is_non_sql_filename(self.filename) and lexer.name.lower() in { + "sql", + "transact-sql", + "tsql", + }: + logging.debug( + "Pygments: Suppressed SQL content guess for non-SQL file." + ) + return TextLexer() logging.debug(f"Pygments: Guessed '{lexer.name}' by content.") return lexer except Exception: @@ -1749,6 +1957,19 @@ def _determine_lexer(self) -> TextLexer: logging.debug("Pygments: Falling back to TextLexer.") return TextLexer() + @staticmethod + def _is_plain_operational_file(filename: str | None) -> bool: + if not filename: + return False + base = os.path.basename(filename).lower() + return base == ".gitignore" or base.endswith(".log") + + @staticmethod + def _is_non_sql_filename(filename: str | None) -> bool: + if not filename: + return False + return not os.path.basename(filename).lower().endswith(".sql") + def _load_custom_syntax_patterns(self) -> list[tuple[re.Pattern, str]]: """Loads and compiles custom regex syntax patterns from the config for the current language.""" if not self.current_language or not self._lexer: @@ -5420,7 +5641,9 @@ def _write_file(self, target_filename: str) -> None: content_to_write += newline try: - encoded = content_to_write.encode(self.encoding or "utf-8", errors="replace") + encoded = content_to_write.encode( + self.encoding or "utf-8", errors="replace" + ) with self.safe_open(target_filename, "wb") as f: cast(BinaryIO, f).write(encoded) @@ -7080,7 +7303,8 @@ def get_screen_coords_for_highlight( ): return None screen_y_coord = ( - text_row_idx - self.scroll_top + text_row_idx + - self.scroll_top + getattr(self, "_content_area_y_offset", 0) ) try: @@ -7348,15 +7572,26 @@ def init_colors(self) -> None: curses.use_default_colors() # Resolve the active built-in theme. Colours come exclusively from the - # selected palette (config key ``theme = 1..8``); the legacy free-form + # selected palette (config key ``theme = N``); the legacy free-form # ``[colors]`` table is intentionally ignored so a single, deterministic # source drives both the UI chrome and syntax highlighting. - palette: ThemePalette = resolve_theme(self.config) + palette: ThemePalette = resolve_theme( + self.config, getattr(self, "active_theme", None) + ) self.active_theme = palette + if isinstance(self.config, dict): + for warning in self.config.get("_migration_warnings", ()): + logging.warning("%s", warning) + if hasattr(self, "status_message"): + self.status_message = warning + for diagnostic in getattr(palette, "diagnostics", ()): + logging.warning("%s", diagnostic) + if hasattr(self, "status_message"): + self.status_message = diagnostic if isinstance(self.config, dict) and self.config.get("colors"): logging.info( "Ignoring deprecated [colors] config section; using built-in " - "theme '%s' (theme = %d). Set 'theme = 1..8' to change palette.", + "theme '%s' (theme = %d). Set a valid numeric theme id to change palette.", palette.name, palette.theme_id, ) @@ -7875,6 +8110,9 @@ def select_ai_provider_and_ask(self) -> bool: # noqa: python:S3516 "4": "claude", "5": "grok", "6": "huggingface", + "7": "deepseek", + "8": "qwen", + "9": "kimi", "d": default_provider, } menu_str = " ".join([f"{k}:{v}" for k, v in menu_items.items() if k != "d"]) @@ -8048,7 +8286,7 @@ def run(self) -> None: try: curses.setupterm() smcup = curses.tigetstr("smcup") # enter alternate screen - smkx = curses.tigetstr("smkx") # application cursor keys + smkx = curses.tigetstr("smkx") # application cursor keys if smcup: curses.putp(smcup.decode("ascii", "ignore")) if smkx: @@ -8093,11 +8331,15 @@ def run(self) -> None: redraw_needed: bool = self._process_events_and_input() self._render_screen(redraw_needed) except KeyboardInterrupt: - logger.info("KeyboardInterrupt received, initiating graceful shutdown.") + logger.info( + "KeyboardInterrupt received, initiating graceful shutdown." + ) self.exit_editor() break except Exception as e: - logger.critical("Unhandled exception in main loop: %s", e, exc_info=True) + logger.critical( + "Unhandled exception in main loop: %s", e, exc_info=True + ) self.exit_editor() break # ------------------------------------------------------------------------- @@ -8133,7 +8375,7 @@ def run(self) -> None: # --- EXIT terminal application modes (normal cursor keys + leave alt screen) --- try: - rmkx = curses.tigetstr("rmkx") + rmkx = curses.tigetstr("rmkx") rmcup = curses.tigetstr("rmcup") if rmkx: curses.putp(rmkx.decode("ascii", "ignore")) diff --git a/src/ecli/extensions/ecli_integration/__init__.py b/src/ecli/extensions/ecli_integration/__init__.py index 11453ba7..40be8ab5 100644 --- a/src/ecli/extensions/ecli_integration/__init__.py +++ b/src/ecli/extensions/ecli_integration/__init__.py @@ -21,11 +21,17 @@ snippets, configuration). * #101 — TextMate grammar catalog, language detection, and the typed ``[extensions]`` configuration surface. +* #102 — extension-backed syntax-service boundary that resolves language/grammar + metadata for editor rendering while keeping the legacy highlighter + authoritative. +* #103 — extension-backed theme registry from ``contributes.themes`` and VS Code + theme JSON, including tokenColor scope resolution. It is data-only: it never starts a VS Code extension host, never activates a Node/TypeScript or Copilot runtime, never runs ``activationEvents`` or -``package.json`` scripts, never tokenizes or renders syntax, and never executes -any command. +``package.json`` scripts, and never executes any command. Tokenization is limited +to reading imported TextMate grammar JSON through the optional Python tokenizer +and degrades to the legacy highlighter when unavailable. """ from __future__ import annotations @@ -48,6 +54,7 @@ LanguageContribution, RegistryDiagnostic, SnippetContribution, + ThemeContribution, parse_manifest, ) from .paths import extensions_root @@ -56,26 +63,70 @@ build_registry, discover_manifest_directories, ) +from .syntax_service import ( + EXTENSION_TOKENIZATION_AVAILABLE, + SYNTAX_ENGINE_EXTENSION, + SYNTAX_ENGINE_LEGACY, + LineHighlighter, + SyntaxResolution, + SyntaxService, + build_syntax_service, +) +from .textmate_tokenizer import TEXTMATE_AVAILABLE, TextMateTokenizer, load_tokenizer +from .theme_bridge import scope_to_category, tokens_to_spans +from .theme_registry import ( + TARGET_THEME_NAMES, + TARGET_THEME_NUMBERS, + THEME_NUMBERING_POLICY, + ExtensionTheme, + TextMateResolvedStyle, + TextMateTokenColor, + ThemeRegistry, + build_theme_registry, + cached_theme_registry, +) __all__ = [ + "EXTENSION_TOKENIZATION_AVAILABLE", + "SYNTAX_ENGINE_EXTENSION", + "SYNTAX_ENGINE_LEGACY", + "TEXTMATE_AVAILABLE", "ConfigurationContribution", "ExtensionLayerConfig", "ExtensionManifest", "ExtensionRegistry", + "ExtensionTheme", "GrammarCatalog", "GrammarContribution", "LanguageContribution", "LanguageDetectionResult", "LanguageDetector", + "LineHighlighter", "RegistryDiagnostic", "SnippetContribution", "SyntaxEngine", + "SyntaxResolution", + "SyntaxService", "TextMateGrammar", + "TextMateResolvedStyle", + "TextMateTokenColor", + "TextMateTokenizer", + "ThemeContribution", + "ThemeRegistry", + "TARGET_THEME_NAMES", + "TARGET_THEME_NUMBERS", + "THEME_NUMBERING_POLICY", "build_grammar_catalog", "build_language_detector", "build_registry", + "build_syntax_service", + "build_theme_registry", + "cached_theme_registry", "discover_manifest_directories", "extensions_root", + "load_tokenizer", "parse_manifest", + "scope_to_category", + "tokens_to_spans", ] diff --git a/src/ecli/extensions/ecli_integration/config.py b/src/ecli/extensions/ecli_integration/config.py index 9d32d9b9..dcf9a70f 100644 --- a/src/ecli/extensions/ecli_integration/config.py +++ b/src/ecli/extensions/ecli_integration/config.py @@ -21,8 +21,15 @@ The Extensions Layer is data-only. There is **no** configuration value that can enable a VS Code extension host, Node/TypeScript activation, ``activationEvents``, ``package.json`` scripts, or a Copilot runtime. Any such key found in the table -is ignored with a diagnostic, and ``syntax_engine`` stays ``"legacy"`` until -issue #102 ships a tested extension-backed renderer. +is ignored with a diagnostic. + +``syntax_engine`` accepts ``"legacy"`` (default, authoritative) and ``"extension"`` +(selects the #102 extension-backed syntax-service boundary). Selecting +``"extension"`` routes rendering through the TextMate syntax service when the +optional tokenizer and selected grammar are usable. It never enables a runtime; +missing tokenizer or grammar failures fall back to the legacy highlighter (see +``syntax_service.py``). Unknown values fall back to ``"legacy"`` with a +diagnostic. """ from __future__ import annotations @@ -57,7 +64,7 @@ class SyntaxEngine(StrEnum): - """Selectable syntax engine. Only ``LEGACY`` is active until issue #102.""" + """Selectable syntax engine.""" LEGACY = "legacy" EXTENSION = "extension" @@ -174,17 +181,13 @@ def _read_syntax_engine( if "syntax_engine" not in section: return SyntaxEngine.LEGACY.value value = section["syntax_engine"] + # "extension" is a valid selection as of #102: it routes through the + # extension-backed syntax-service boundary. It never enables a runtime, and + # rendering falls back to legacy when tokenization is unavailable. if value == SyntaxEngine.LEGACY.value: return SyntaxEngine.LEGACY.value if value == SyntaxEngine.EXTENSION.value: - diagnostics.append( - RegistryDiagnostic( - "warning", - source, - "extension syntax engine is not available until #102; using 'legacy'", - ) - ) - return SyntaxEngine.LEGACY.value + return SyntaxEngine.EXTENSION.value diagnostics.append( RegistryDiagnostic( "warning", source, f"unknown syntax_engine {value!r}; using 'legacy'" diff --git a/src/ecli/extensions/ecli_integration/grammar_catalog.py b/src/ecli/extensions/ecli_integration/grammar_catalog.py index 9061fb5e..a8cda7f1 100644 --- a/src/ecli/extensions/ecli_integration/grammar_catalog.py +++ b/src/ecli/extensions/ecli_integration/grammar_catalog.py @@ -19,7 +19,7 @@ files resolve to existing locations under ``src/ecli/extensions/``. It does **not** parse TextMate grammar internals, tokenize text, or render -syntax. Those belong to the syntax service (#102), which does not exist yet. +syntax. Those belong to the syntax service (#102). """ from __future__ import annotations diff --git a/src/ecli/extensions/ecli_integration/language_detection.py b/src/ecli/extensions/ecli_integration/language_detection.py index 612193be..9ef6c5be 100644 --- a/src/ecli/extensions/ecli_integration/language_detection.py +++ b/src/ecli/extensions/ecli_integration/language_detection.py @@ -39,6 +39,22 @@ MATCH_FILENAME_PATTERN = "filename_pattern" MATCH_EXTENSION = "extension" +_REQUIRED_FALLBACK_EXTENSIONS: tuple[tuple[str, str], ...] = ( + (".toml", "toml"), + (".asm", "asm"), + (".s", "asm"), + (".adb", "ada"), + (".ads", "ada"), + (".ada", "ada"), + (".spark", "ada"), + (".f", "fortran"), + (".for", "fortran"), + (".f90", "fortran"), + (".f95", "fortran"), + (".f03", "fortran"), + (".f08", "fortran"), +) + @dataclass(frozen=True) class LanguageDetectionResult: @@ -79,6 +95,18 @@ def detect(self, name: str) -> LanguageDetectionResult: if by_filename is not None: return by_filename + # VS Code metadata represents some leading-dot files (notably + # ``.gitignore``) as extension tokens. For those names, exact basename + # matching must still win before filename patterns and ordinary suffix + # detection, otherwise a dotfile has no suffix and falls through to + # content/legacy guessing. + base_lower = base.lower() + by_dotfile_extension = _match_tier( + self.extensions, lambda token: token == base_lower, MATCH_FILENAME + ) + if by_dotfile_extension is not None: + return by_dotfile_extension + by_pattern = _match_tier( self.filename_patterns, lambda token: _pattern_matches(token, base, name), @@ -93,9 +121,16 @@ def detect(self, name: str) -> LanguageDetectionResult: by_extension = _match_tier( self.extensions, lambda token: token == suffix, MATCH_EXTENSION ) + if by_extension is not None: + return by_extension + by_required_fallback = _match_tier( + _REQUIRED_FALLBACK_EXTENSIONS, + lambda token: token == suffix, + MATCH_EXTENSION, + ) return ( - by_extension - if by_extension is not None + by_required_fallback + if by_required_fallback is not None else LanguageDetectionResult.no_match() ) @@ -105,7 +140,14 @@ def detect_by_extension(self, extension: str) -> LanguageDetectionResult: result = _match_tier( self.extensions, lambda token: token == suffix, MATCH_EXTENSION ) - return result if result is not None else LanguageDetectionResult.no_match() + if result is not None: + return result + fallback = _match_tier( + _REQUIRED_FALLBACK_EXTENSIONS, + lambda token: token == suffix, + MATCH_EXTENSION, + ) + return fallback if fallback is not None else LanguageDetectionResult.no_match() def _suffix(base: str) -> str | None: diff --git a/src/ecli/extensions/ecli_integration/manifest.py b/src/ecli/extensions/ecli_integration/manifest.py index 0304c393..1b97e231 100644 --- a/src/ecli/extensions/ecli_integration/manifest.py +++ b/src/ecli/extensions/ecli_integration/manifest.py @@ -79,6 +79,17 @@ class SnippetContribution: path_repo_relative: str | None = None +@dataclass(frozen=True) +class ThemeContribution: + """A single ``contributes.themes[]`` entry (metadata only).""" + + theme_id: str | None + label: str | None + ui_theme: str | None + path: str | None + path_repo_relative: str | None = None + + @dataclass(frozen=True) class ConfigurationContribution: """A ``contributes.configuration`` block, reduced to declarative metadata. @@ -106,6 +117,7 @@ class ExtensionManifest: languages: tuple[LanguageContribution, ...] = field(default_factory=tuple) grammars: tuple[GrammarContribution, ...] = field(default_factory=tuple) snippets: tuple[SnippetContribution, ...] = field(default_factory=tuple) + themes: tuple[ThemeContribution, ...] = field(default_factory=tuple) configuration: tuple[ConfigurationContribution, ...] = field(default_factory=tuple) @@ -233,6 +245,26 @@ def _parse_snippets( return tuple(result) +def _parse_themes(raw: object, context: _ParseContext) -> tuple[ThemeContribution, ...]: + if not isinstance(raw, list): + return () + result: list[ThemeContribution] = [] + for entry in raw: + if not isinstance(entry, dict): + continue + theme_path = _as_str(entry.get("path")) + result.append( + ThemeContribution( + theme_id=_as_str(entry.get("id")), + label=_as_str(entry.get("label")), + ui_theme=_as_str(entry.get("uiTheme")), + path=theme_path, + path_repo_relative=context.resolve_target(theme_path, "theme"), + ) + ) + return tuple(result) + + def _parse_configuration(raw: object) -> tuple[ConfigurationContribution, ...]: blocks = raw if isinstance(raw, list) else [raw] result: list[ConfigurationContribution] = [] @@ -302,6 +334,7 @@ def parse_manifest( languages=_parse_languages(contributes.get("languages"), context), grammars=_parse_grammars(contributes.get("grammars"), context), snippets=_parse_snippets(contributes.get("snippets"), context), + themes=_parse_themes(contributes.get("themes"), context), configuration=_parse_configuration(contributes.get("configuration")), ) return manifest, context.diagnostics diff --git a/src/ecli/extensions/ecli_integration/registry.py b/src/ecli/extensions/ecli_integration/registry.py index 0154ca2a..244e36ef 100644 --- a/src/ecli/extensions/ecli_integration/registry.py +++ b/src/ecli/extensions/ecli_integration/registry.py @@ -33,6 +33,7 @@ LanguageContribution, RegistryDiagnostic, SnippetContribution, + ThemeContribution, parse_manifest, ) @@ -122,6 +123,18 @@ def find_snippets_by_language( if snippet.language_id == language_id ) + def list_themes(self) -> tuple[ThemeContribution, ...]: + """Return all contributed colour themes in deterministic manifest order.""" + return tuple(theme for manifest in self.manifests for theme in manifest.themes) + + def find_theme_by_id(self, theme_id: str) -> ThemeContribution | None: + """Return the first theme contribution with ``id == theme_id``.""" + for manifest in self.manifests: + for theme in manifest.themes: + if theme.theme_id == theme_id: + return theme + return None + def discover_manifest_directories(root: Path) -> tuple[Path, ...]: """Return direct-child directories of ``root`` that contain a ``package.json``. diff --git a/src/ecli/extensions/ecli_integration/syntax_service.py b/src/ecli/extensions/ecli_integration/syntax_service.py new file mode 100644 index 00000000..744406ba --- /dev/null +++ b/src/ecli/extensions/ecli_integration/syntax_service.py @@ -0,0 +1,793 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Project: Ecli +# File: src/ecli/extensions/ecli_integration/syntax_service.py +# Website: https://www.ecli.io +# Repository: https://github.com/SSobol77/ecli +# PyPI: https://pypi.org/project/ecli-editor/0.0.1/ +# +# Copyright (c) 2026 Siergej Sobolewski +# +# Licensed under the GNU General Public License version 2 only. +# See the LICENSE file in the project root for full license text. + +"""Extension-backed syntax service boundary (#102). + +This deterministic, data-only service bridges the #101 grammar catalog and +language detection to the editor. Given a file name and the ``[extensions]`` +configuration, it resolves the language id, TextMate scope name, and grammar +path, selects the configured syntax engine, and reports whether rendering must +fall back to the legacy highlighter. + +It is a deterministic adapter over data-only extension assets. When the optional +``python-textmate`` tokenizer is importable and a grammar can be loaded, the +service returns a line highlighter that produces TextMate scope-derived spans. +Missing tokenizer, missing grammars, and tokenizer failures always fall back to +the legacy regex/Pygments highlighter. The service never executes extension code, +never parses ``activationEvents`` as runtime instructions, never invokes +Node/npm, and starts no background workers. +""" + +from __future__ import annotations + +import io +import token +import tokenize +from collections import OrderedDict +from collections.abc import Callable +from dataclasses import dataclass, field +from functools import lru_cache +from pathlib import Path + +from . import paths +from .config import ExtensionLayerConfig, SyntaxEngine +from .grammar_catalog import GrammarCatalog, TextMateGrammar, build_grammar_catalog +from .language_detection import ( + LanguageDetectionResult, + LanguageDetector, + build_language_detector, +) +from .manifest import RegistryDiagnostic +from .registry import build_registry +from .textmate_tokenizer import ( + TEXTMATE_AVAILABLE, + TextMateTokenizer, + is_grammar_quarantined, + load_tokenizer, +) +from .theme_bridge import tokens_to_spans + + +SYNTAX_ENGINE_LEGACY = SyntaxEngine.LEGACY.value +SYNTAX_ENGINE_EXTENSION = SyntaxEngine.EXTENSION.value + +# Real TextMate tokenization is available when the optional ``python-textmate`` +# engine (Oniguruma-backed) is importable. When it is missing, the extension +# engine degrades to the legacy highlighter. Individual grammars the engine +# cannot compile (for example the imported Markdown/C grammars) also fall back +# per file via ``build_line_highlighter`` returning ``None``. +EXTENSION_TOKENIZATION_AVAILABLE = TEXTMATE_AVAILABLE + +_SERVICE_SOURCE = "syntax_service" + +_REQUIRED_MISSING_GRAMMAR_ASSETS: dict[str, str] = { + "toml": "src/ecli/extensions/toml/syntaxes/toml.tmLanguage.json", + "asm": "src/ecli/extensions/asm/syntaxes/asm.tmLanguage.json", + "ada": "src/ecli/extensions/ada/syntaxes/ada.tmLanguage.json", + "fortran": "src/ecli/extensions/fortran/syntaxes/fortran.tmLanguage.json", +} + +# A line span carries the rendered text and its ECLI style category name. +LineSpan = tuple[str, str] + +# Upper bound on the per-line span cache so scrolling a very large file can never +# grow it without limit. It comfortably covers many viewports of scrollback; once +# exceeded the least-recently-used line is evicted. +_SPAN_CACHE_MAX = 8192 + +# Protected-range map type: per buffer-line index -> immutable ranges. +_RangeMap = dict[int, tuple[tuple[int, int, str], ...]] +_CarriedState = tuple[str, str | None] +_ScanResult = tuple[int, _CarriedState | None, bool] + + +@dataclass(frozen=True) +class SyntaxResolution: + """Immutable, deterministic result of resolving syntax data for a file.""" + + filename: str | None + language_id: str | None + scope_name: str | None + grammar_path: str | None + syntax_engine: str + used_extension_metadata: bool + fallback_to_legacy: bool + matched_by: str | None = None + is_ambiguous: bool = False + diagnostics: tuple[RegistryDiagnostic, ...] = () + + @property + def has_grammar(self) -> bool: + """Return ``True`` if an extension grammar scope was resolved.""" + return self.scope_name is not None + + +@dataclass +class LineHighlighter: + """Viewport-first, bounded-cache TextMate highlighter for one file's grammar. + + ``highlight`` returns ``(text, category)`` spans that tile the line exactly, + where ``category`` is an ECLI style/colour name. It returns ``None`` only when + the tokenizer fails (or times out) for that line, so the editor can fall back + to legacy. + + Two deterministic caches keep scrolling cheap: + + * a bounded LRU of per-line spans keyed by line content (so identical lines — + and repeated viewports — never re-tokenize, and the cache cannot grow + without limit); a cached ``None`` is a negative cache so a slow/failed line + is paid at most once; + * a single protected-range map keyed by the editor's buffer revision, so + deterministic multiline guards are computed once per edit and reused across + every scroll frame instead of re-scanning comments/strings per frame. + """ + + tokenizer: TextMateTokenizer + scope_name: str | None = None + _cache: OrderedDict[ + tuple[str, tuple[tuple[int, int, str], ...]], tuple[LineSpan, ...] | None + ] = field(default_factory=OrderedDict) + _ranges_cache: tuple[object, _RangeMap] | None = field(default=None) + + def highlight(self, line: str) -> list[LineSpan] | None: + """Return cached ``(text, category)`` spans for ``line`` or ``None``.""" + protected = tuple( + _protected_ranges_for_scope(self.scope_name, [line]).get(0, ()) + ) + return self._highlight_line(line, protected) + + def highlight_lines( + self, + lines: list[str], + line_indices: list[int] | None = None, + full_text: list[str] | None = None, + text_revision: object | None = None, + ) -> list[list[LineSpan] | None]: + """Highlight a viewport of ``lines``, preserving deterministic string guards. + + Only the supplied ``lines`` are tokenized (viewport-first). Multiline + comment/string guards need buffer context; they are computed from + ``full_text`` but cached against ``text_revision`` so they run once per + edit, never per scroll. When ``text_revision`` is ``None`` the guard is + recomputed (used by tests/single calls); the editor always supplies a + revision. + """ + protected_by_line: _RangeMap = {} + if self._has_protected_ranges: + source = full_text if full_text is not None else lines + all_ranges = self._protected_ranges(source, text_revision) + if full_text is not None and line_indices is not None: + protected_by_line = { + offset: tuple(all_ranges.get(line_index, ())) + for offset, line_index in enumerate(line_indices) + } + else: + protected_by_line = { + line_index: tuple(ranges) + for line_index, ranges in all_ranges.items() + if line_index < len(lines) + } + return [ + self._highlight_line(line, protected_by_line.get(index, ())) + for index, line in enumerate(lines) + ] + + @property + def _has_protected_ranges(self) -> bool: + return _scope_supports_protection(self.scope_name) + + def _protected_ranges( + self, source: list[str], revision: object | None + ) -> _RangeMap: + """Return protected ranges, reusing them across frames per revision.""" + if ( + revision is not None + and self._ranges_cache is not None + and self._ranges_cache[0] == revision + ): + return self._ranges_cache[1] + ranges = _protected_ranges_for_scope(self.scope_name, source) + if revision is not None: + self._ranges_cache = (revision, ranges) + return ranges + + def _highlight_line( + self, line: str, protected: tuple[tuple[int, int, str], ...] + ) -> list[LineSpan] | None: + cache_key = (line, protected) + cache = self._cache + if cache_key in cache: + # A present key may map to ``None`` (negative cache for a slow/failed + # line); ``in`` distinguishes that from an absent key. + cache.move_to_end(cache_key) + spans = cache[cache_key] + return list(spans) if spans is not None else None + tokens = self.tokenizer.tokenize_line(line) + result: tuple[LineSpan, ...] | None + if tokens is None: + result = None + else: + result = tuple(tokens_to_spans(line, tokens, protected_ranges=protected)) + cache[cache_key] = result + cache.move_to_end(cache_key) + if len(cache) > _SPAN_CACHE_MAX: + cache.popitem(last=False) + return list(result) if result is not None else None + + +def _string_span_for_row( + row: int, + start: tuple[int, int], + end: tuple[int, int], + line: str, +) -> tuple[int, int]: + """Return the protected ``(start, end)`` columns of a STRING token on ``row``.""" + start_row, start_col = start + end_row, end_col = end + if row == start_row - 1 and row == end_row - 1: + return start_col, end_col + if row == start_row - 1: + # First row of a multi-line string. When everything before the opening + # quotes is whitespace (a docstring's indentation), protect the indent too + # so the whole docstring line renders as string. For ``x = """…`` the + # ``x = `` prefix is real code and is left unprotected. + protected_start = 0 if line[:start_col].strip() == "" else start_col + return protected_start, len(line) + if row == end_row - 1: + return 0, end_col + return 0, len(line) + + +def _python_string_ranges( + lines: list[str], +) -> dict[int, tuple[tuple[int, int, str], ...]]: + """Return per-line string/docstring ranges using Python's tokenizer.""" + if not lines: + return {} + ranges: dict[int, list[tuple[int, int, str]]] = {} + try: + stream = io.StringIO("\n".join(lines) + "\n") + for tok in tokenize.generate_tokens(stream.readline): + if tok.type != token.STRING: + continue + start_row, _ = tok.start + end_row, _ = tok.end + for row in range(start_row - 1, end_row): + if not 0 <= row < len(lines): + continue + start, end = _string_span_for_row(row, tok.start, tok.end, lines[row]) + ranges.setdefault(row, []).append((start, end, "string")) + except (tokenize.TokenError, IndentationError, SyntaxError): + return {} + return {line: tuple(spans) for line, spans in ranges.items()} + + +def _scope_supports_protection(scope_name: str | None) -> bool: + """Return whether ``scope_name`` has an ECLI deterministic guard.""" + return _protected_range_handler(scope_name) is not None + + +_HTML_SCOPES = frozenset( + { + "text.html.basic", + "text.html.derivative", + "text.html.markdown", + } +) + + +def _protected_ranges_for_scope( + scope_name: str | None, lines: list[str] +) -> dict[int, tuple[tuple[int, int, str], ...]]: + """Return language-aware protected comment/string ranges for ``lines``. + + TextMate tokenization remains the primary token source. These guards only + repaint known multiline comments/strings over the TextMate output when the + current line-oriented engine cannot preserve cross-line state. + """ + handler = _protected_range_handler(scope_name) + return handler(lines) if handler is not None else {} + + +def _protected_range_handler( + scope_name: str | None, +) -> Callable[[list[str]], dict[int, tuple[tuple[int, int, str], ...]]] | None: + """Return the deterministic protected-range scanner for ``scope_name``.""" + if scope_name is None: + return None + if scope_name in _HTML_SCOPES: + return _html_comment_ranges + return _PROTECTED_RANGE_HANDLERS.get(scope_name) + + +def _python_protected_ranges( + lines: list[str], +) -> dict[int, tuple[tuple[int, int, str], ...]]: + """Dispatch to the current Python string scanner.""" + return _python_string_ranges(lines) + + +def _javascript_protected_ranges( + lines: list[str], +) -> dict[int, tuple[tuple[int, int, str], ...]]: + """Dispatch to the current JavaScript/TypeScript scanner.""" + return _javascript_like_protected_ranges(lines) + + +def _css_protected_ranges_dispatch( + lines: list[str], +) -> dict[int, tuple[tuple[int, int, str], ...]]: + """Dispatch to the current CSS scanner.""" + return _css_protected_ranges(lines) + + +def _append_range( + ranges: dict[int, list[tuple[int, int, str]]], + row: int, + start: int, + end: int, + category: str, +) -> None: + """Append a non-empty protected range clamped to a line.""" + if end > start: + ranges.setdefault(row, []).append((start, end, category)) + + +def _is_escaped(line: str, index: int) -> bool: + """Return whether ``line[index]`` is escaped by an odd number of backslashes.""" + backslashes = 0 + cursor = index - 1 + while cursor >= 0 and line[cursor] == "\\": + backslashes += 1 + cursor -= 1 + return bool(backslashes % 2) + + +def _scan_quoted_string(line: str, start: int, quote: str) -> tuple[int, bool]: + """Return ``(end, closed)`` for a JS/CSS quoted string starting at ``start``.""" + index = start + 1 + while index < len(line): + if line[index] == quote and not _is_escaped(line, index): + return index + 1, True + index += 1 + return len(line), False + + +@dataclass(frozen=True) +class _CLikeRules: + block_comments: bool + line_comments: bool + quoted_strings: bool + template_strings: bool + + +def _resume_c_like_state( + line: str, + row: int, + state: _CarriedState | None, + ranges: dict[int, list[tuple[int, int, str]]], +) -> _ScanResult: + """Resume an open multiline C-like comment/string at the start of a line.""" + if state is None: + return 0, None, False + state_kind, delimiter = state + if state_kind == "block_comment": + close = line.find("*/") + if close == -1: + _append_range(ranges, row, 0, len(line), "comment") + return 0, state, True + end = close + 2 + _append_range(ranges, row, 0, end, "comment") + return end, None, False + if state_kind == "string" and delimiter is not None: + end, closed = _scan_quoted_string(line, 0, delimiter) + _append_range(ranges, row, 0, end, "string") + return (end, None, False) if closed else (0, state, True) + return 0, None, False + + +def _scan_block_comment_start( + line: str, + row: int, + index: int, + ranges: dict[int, list[tuple[int, int, str]]], +) -> tuple[int, _CarriedState | None]: + """Protect a C-like block comment that starts at ``index``.""" + close = line.find("*/", index + 2) + if close == -1: + _append_range(ranges, row, index, len(line), "comment") + return len(line), ("block_comment", None) + end = close + 2 + _append_range(ranges, row, index, end, "comment") + return end, None + + +def _scan_string_start( + line: str, + row: int, + index: int, + quote: str, + ranges: dict[int, list[tuple[int, int, str]]], +) -> tuple[int, _CarriedState | None]: + """Protect a quoted/template string that starts at ``index``.""" + end, closed = _scan_quoted_string(line, index, quote) + _append_range(ranges, row, index, end, "string") + return (end, None) if closed else (len(line), ("string", quote)) + + +def _scan_line_comment_start( + line: str, + row: int, + index: int, + ranges: dict[int, list[tuple[int, int, str]]], +) -> int: + """Protect a C-like line comment that starts at ``index``.""" + _append_range(ranges, row, index, len(line), "comment") + return len(line) + + +def _scan_c_like_comment_token( + line: str, + row: int, + index: int, + rules: _CLikeRules, + ranges: dict[int, list[tuple[int, int, str]]], +) -> _ScanResult | None: + """Scan a C-like comment opener at ``index`` when rules permit it.""" + two = line[index : index + 2] + if rules.block_comments and two == "/*": + next_index, state = _scan_block_comment_start(line, row, index, ranges) + return next_index, state, state is not None + if rules.line_comments and two == "//": + return _scan_line_comment_start(line, row, index, ranges), None, True + return None + + +def _c_like_string_delimiter(line: str, index: int, rules: _CLikeRules) -> str | None: + """Return the string delimiter starting at ``index`` under ``rules``.""" + char = line[index] + if rules.quoted_strings and char in {"'", '"'}: + return char + if rules.template_strings and char == "`": + return char + return None + + +def _scan_c_like_string_token( + line: str, + row: int, + index: int, + rules: _CLikeRules, + ranges: dict[int, list[tuple[int, int, str]]], +) -> _ScanResult | None: + """Scan a C-like string opener at ``index`` when rules permit it.""" + delimiter = _c_like_string_delimiter(line, index, rules) + if delimiter is None: + return None + next_index, state = _scan_string_start(line, row, index, delimiter, ranges) + return next_index, state, state is not None + + +def _scan_c_like_token( + line: str, + row: int, + index: int, + rules: _CLikeRules, + ranges: dict[int, list[tuple[int, int, str]]], +) -> _ScanResult: + """Scan one C-like token opener or advance by one character.""" + for scanner in (_scan_c_like_comment_token, _scan_c_like_string_token): + result = scanner(line, row, index, rules, ranges) + if result is not None: + return result + return index + 1, None, False + + +def _scan_c_like_line( + line: str, + row: int, + start: int, + rules: _CLikeRules, + ranges: dict[int, list[tuple[int, int, str]]], +) -> _CarriedState | None: + """Scan one normal-state C-like line and return any carried state.""" + index = start + while index < len(line): + index, state, stop_line = _scan_c_like_token(line, row, index, rules, ranges) + if stop_line: + return state + return None + + +def _c_like_protected_ranges( + lines: list[str], + *, + block_comments: bool, + line_comments: bool, + quoted_strings: bool, + template_strings: bool, +) -> dict[int, tuple[tuple[int, int, str], ...]]: + """Return protected ranges for C-like comments and quoted strings.""" + rules = _CLikeRules( + block_comments=block_comments, + line_comments=line_comments, + quoted_strings=quoted_strings, + template_strings=template_strings, + ) + ranges: dict[int, list[tuple[int, int, str]]] = {} + state: tuple[str, str | None] | None = None + for row, line in enumerate(lines): + index, state, skip_line = _resume_c_like_state(line, row, state, ranges) + if not skip_line: + state = _scan_c_like_line(line, row, index, rules, ranges) + return {line: tuple(spans) for line, spans in ranges.items()} + + +def _javascript_like_protected_ranges( + lines: list[str], +) -> dict[int, tuple[tuple[int, int, str], ...]]: + """Return JS/TS protected comments and strings.""" + return _c_like_protected_ranges( + lines, + block_comments=True, + line_comments=True, + quoted_strings=True, + template_strings=True, + ) + + +def _css_protected_ranges( + lines: list[str], +) -> dict[int, tuple[tuple[int, int, str], ...]]: + """Return CSS protected block comments and quoted strings.""" + return _c_like_protected_ranges( + lines, + block_comments=True, + line_comments=False, + quoted_strings=True, + template_strings=False, + ) + + +def _html_comment_ranges( + lines: list[str], +) -> dict[int, tuple[tuple[int, int, str], ...]]: + """Return protected ranges for HTML ```` comments.""" + ranges: dict[int, list[tuple[int, int, str]]] = {} + in_comment = False + for row, line in enumerate(lines): + index = 0 + if in_comment: + close = line.find("-->") + if close == -1: + _append_range(ranges, row, 0, len(line), "comment") + continue + end = close + 3 + _append_range(ranges, row, 0, end, "comment") + index = end + in_comment = False + + while index < len(line): + start = line.find("", start + 4) + if close == -1: + _append_range(ranges, row, start, len(line), "comment") + in_comment = True + break + end = close + 3 + _append_range(ranges, row, start, end, "comment") + index = end + return {line: tuple(spans) for line, spans in ranges.items()} + + +_PROTECTED_RANGE_HANDLERS: dict[ + str, Callable[[list[str]], dict[int, tuple[tuple[int, int, str], ...]]] +] = { + "source.python": _python_protected_ranges, + "source.js": _javascript_protected_ranges, + "source.ts": _javascript_protected_ranges, + "source.css": _css_protected_ranges_dispatch, +} + + +@dataclass(frozen=True) +class SyntaxService: + """Deterministic bridge from #101 metadata to editor rendering decisions.""" + + config: ExtensionLayerConfig + catalog: GrammarCatalog + detector: LanguageDetector + root: Path = field(default_factory=paths.extensions_root) + + def build_line_highlighter(self, filename: str | None) -> LineHighlighter | None: + """Build a per-line TextMate highlighter for ``filename``, or ``None``. + + Returns ``None`` (caller renders with the legacy highlighter) when the + extension engine is not selected/enabled, the engine is unavailable, no + grammar resolves for the file, or the grammar cannot be compiled by the + tokenizer (for example the imported Markdown/C grammars). + """ + if ( + not self.config.enabled + or self.config.syntax_engine != SYNTAX_ENGINE_EXTENSION + or not TEXTMATE_AVAILABLE + or not filename + ): + return None + resolution = self.resolve(filename) + if resolution.grammar_path is None or resolution.scope_name is None: + return None + grammar_file = self._absolute_grammar_path(resolution.grammar_path) + if grammar_file is None: + return None + # A grammar disabled at runtime after repeated real-line timeouts (for + # example the imported ``make`` grammar) stays on the legacy path for the + # rest of the session, so re-opening such a file never re-incurs hitches. + if is_grammar_quarantined(str(grammar_file)): + return None + tokenizer = load_tokenizer(grammar_file) + if tokenizer is None: + return None + return LineHighlighter(tokenizer=tokenizer, scope_name=resolution.scope_name) + + def _absolute_grammar_path(self, repo_relative: str) -> Path | None: + prefix = f"{paths.REPO_RELATIVE_PREFIX}/" + if not repo_relative.startswith(prefix): + return None + candidate = (self.root / repo_relative[len(prefix) :]).resolve() + return candidate if candidate.is_file() else None + + def resolve(self, filename: str | None) -> SyntaxResolution: + """Resolve language/grammar metadata + the rendering decision for a file.""" + engine = self.config.syntax_engine + diagnostics: list[RegistryDiagnostic] = list(self.config.diagnostics) + + if not self.config.enabled or not filename: + return _legacy_resolution(filename, engine, diagnostics) + + detection = ( + self.detector.detect(filename) + if self.config.language_detection + else LanguageDetectionResult.no_match() + ) + if detection.language_id is None: + return _legacy_resolution(filename, engine, diagnostics) + + scope_name, grammar_path = self._resolve_grammar(detection.language_id) + if ( + scope_name is None + and detection.language_id in _REQUIRED_MISSING_GRAMMAR_ASSETS + ): + diagnostics.append( + RegistryDiagnostic( + "warning", + _SERVICE_SOURCE, + "required language grammar missing from imported extension tree: " + f"{detection.language_id}; expected " + f"{_REQUIRED_MISSING_GRAMMAR_ASSETS[detection.language_id]}", + ) + ) + + render_with_extension = ( + engine == SYNTAX_ENGINE_EXTENSION + and EXTENSION_TOKENIZATION_AVAILABLE + and scope_name is not None + ) + if engine == SYNTAX_ENGINE_EXTENSION and not EXTENSION_TOKENIZATION_AVAILABLE: + diagnostics.append( + RegistryDiagnostic( + "info", + _SERVICE_SOURCE, + "TextMate tokenizer (python-textmate) is unavailable; " + "rendering uses the legacy highlighter", + ) + ) + + return SyntaxResolution( + filename=filename, + language_id=detection.language_id, + scope_name=scope_name, + grammar_path=grammar_path, + syntax_engine=engine, + used_extension_metadata=True, + fallback_to_legacy=not render_with_extension, + matched_by=detection.matched_by, + is_ambiguous=detection.is_ambiguous, + diagnostics=tuple(diagnostics), + ) + + def _resolve_grammar(self, language_id: str) -> tuple[str | None, str | None]: + if not self.config.grammar_catalog: + return None, None + grammar = _primary_grammar(self.catalog, language_id) + if grammar is None: + return None, None + return grammar.scope_name, grammar.path_repo_relative + + +def _primary_grammar( + catalog: GrammarCatalog, language_id: str +) -> TextMateGrammar | None: + """Return the primary grammar for a language, deterministically. + + A language may have several grammars (e.g. embedded/injection grammars). The + conventional root scope ``source.`` is preferred when present; + otherwise the first grammar in deterministic registry order is used. + """ + grammars = catalog.grammars_for_language(language_id) + if not grammars: + return None + canonical = f"source.{language_id}" + for grammar in grammars: + if grammar.scope_name == canonical: + return grammar + return grammars[0] + + +def _legacy_resolution( + filename: str | None, engine: str, diagnostics: list[RegistryDiagnostic] +) -> SyntaxResolution: + return SyntaxResolution( + filename=filename, + language_id=None, + scope_name=None, + grammar_path=None, + syntax_engine=engine, + used_extension_metadata=False, + fallback_to_legacy=True, + diagnostics=tuple(diagnostics), + ) + + +@lru_cache(maxsize=1) +def _cached_real_parts() -> tuple[GrammarCatalog, LanguageDetector]: + """Scan the imported tree once and reuse the catalog + detector process-wide.""" + registry = build_registry() + catalog = build_grammar_catalog(registry=registry) + detector = build_language_detector(registry=registry) + return catalog, detector + + +def build_syntax_service( + config: ExtensionLayerConfig | object | None = None, + *, + catalog: GrammarCatalog | None = None, + detector: LanguageDetector | None = None, + root: Path | None = None, +) -> SyntaxService: + """Build a :class:`SyntaxService`. + + ``config`` may be an :class:`ExtensionLayerConfig` or a raw config mapping + (its ``[extensions]`` table is parsed). With no ``root`` the catalog and + detector for the imported tree are built once and cached process-wide, so the + tree is scanned only once. Pass ``root=`` (or explicit ``catalog``/ + ``detector``) to exercise fixtures without touching the real cache. + """ + layer_config = ( + config + if isinstance(config, ExtensionLayerConfig) + else ExtensionLayerConfig.from_config(config) # type: ignore[arg-type] + ) + if root is not None: + catalog = catalog or build_grammar_catalog(root=root) + detector = detector or build_language_detector(root=root) + elif catalog is None or detector is None: + real_catalog, real_detector = _cached_real_parts() + catalog = catalog or real_catalog + detector = detector or real_detector + resolved_root = root if root is not None else paths.extensions_root() + return SyntaxService( + config=layer_config, catalog=catalog, detector=detector, root=resolved_root + ) diff --git a/src/ecli/extensions/ecli_integration/textmate_tokenizer.py b/src/ecli/extensions/ecli_integration/textmate_tokenizer.py new file mode 100644 index 00000000..27b11520 --- /dev/null +++ b/src/ecli/extensions/ecli_integration/textmate_tokenizer.py @@ -0,0 +1,291 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Project: Ecli +# File: src/ecli/extensions/ecli_integration/textmate_tokenizer.py +# Website: https://www.ecli.io +# Repository: https://github.com/SSobol77/ecli +# PyPI: https://pypi.org/project/ecli-editor/0.0.1/ +# +# Copyright (c) 2026 Siergej Sobolewski +# +# Licensed under the GNU General Public License version 2 only. +# See the LICENSE file in the project root for full license text. + +"""Real, time-bounded TextMate tokenization over imported grammars (#102). + +This wraps the third-party ``python-textmate`` engine (which uses Oniguruma via +``onigurumacffi``) to tokenize a single line of text into ``(scope, start, end)`` +records using the **actual** imported TextMate grammars under +``src/ecli/extensions/``. It produces genuine TextMate scope names — not a +regex approximation. + +Hard safety contract for the UI hot path (issue #102 freeze): + +* **Bounded tokenization.** Every ``parse`` call runs under a deterministic + wall-clock budget. Some imported grammars (notably ``make``) drive this + per-line engine into catastrophic Oniguruma backtracking that never returns + (for example on ``ifeq ($(VAR),x)``). A plain ``try/except`` cannot catch a + non-terminating native loop, so we additionally arm a wall-clock alarm and + treat budget overruns as a failure that degrades that line to the legacy + highlighter. The caller negatively caches the result, so each distinct slow + line is paid at most once — never repeatedly per frame. +* **Adaptive grammar quarantine.** Quarantine is driven by *real* per-line + timeouts, never by synthetic probes: a synthetic adversarial line (such as a + make ``ifeq``) is not valid input for, say, C, so probing would wrongly disable + perfectly good grammars. Instead, once a grammar exceeds the budget on enough + distinct real lines in a session it is disabled wholesale so scrolling a file + it cannot handle never accumulates repeated hitches. +* **Bounded diagnostics.** Slow/failed/quarantined grammars are reported at most + once per key, never per line and never per frame. + +It executes no extension code, runs no ``activationEvents`` or ``package.json`` +scripts, starts no Node runtime, and suppresses all engine stdout/stderr so it +can never corrupt the curses UI. +""" + +from __future__ import annotations + +import contextlib +import io +import json +import logging +import os +import signal +import threading +from collections.abc import Callable, Iterator +from functools import lru_cache +from pathlib import Path +from typing import TypeVar + + +logger = logging.getLogger(__name__) + +# Token record: (scope_name, start_column, end_column) for a single line. +TextMateToken = tuple[str, int, int] + +_T = TypeVar("_T") + + +def _budget_seconds(env_name: str, default_ms: int) -> float: + """Read a millisecond budget from the environment, clamped to a sane range.""" + raw = os.environ.get(env_name) + if raw is None: + return default_ms / 1000.0 + try: + value_ms = int(raw) + except ValueError: + return default_ms / 1000.0 + # Clamp to [1ms, 30s] so a misconfigured value can never disable the bound + # or stall the UI for an unbounded time. + value_ms = max(1, min(value_ms, 30_000)) + return value_ms / 1000.0 + + +# Per-line budget on the render hot path. Must comfortably exceed the cost of a +# legitimately slow-but-valid line (real Python ``def`` lines measured ~80ms via +# the engine) while still bounding pathological backtracking (seconds). +_LINE_BUDGET_SECONDS = _budget_seconds("ECLI_TM_LINE_BUDGET_MS", 250) + +# After a grammar exceeds the budget on this many *distinct* real lines in a +# session, it is quarantined (disabled wholesale) so scrolling a file it cannot +# handle never accumulates more than a bounded number of one-time hitches. A +# well-behaved grammar with a single odd line never reaches the threshold and +# keeps highlighting everything else. +_GRAMMAR_QUARANTINE_THRESHOLD = max( + 1, int(os.environ.get("ECLI_TM_QUARANTINE_THRESHOLD", "8") or "8") +) + +_HAS_WALL_CLOCK_ALARM = hasattr(signal, "setitimer") and hasattr(signal, "SIGALRM") + +# Bounded one-time diagnostics. We never log per line or per frame. +_WARNED_KEYS: set[str] = set() +_MAX_WARNINGS = 64 + +# Runtime adaptive quarantine state (keyed by grammar id == grammar path). +_GRAMMAR_TIMEOUT_LINES: dict[str, set[str]] = {} +_QUARANTINED_GRAMMARS: set[str] = set() + + +def is_grammar_quarantined(grammar_id: str) -> bool: + """Return ``True`` if ``grammar_id`` was disabled after repeated timeouts.""" + return grammar_id in _QUARANTINED_GRAMMARS + + +def reset_quarantine_state() -> None: + """Clear adaptive quarantine state. Intended for tests only.""" + _GRAMMAR_TIMEOUT_LINES.clear() + _QUARANTINED_GRAMMARS.clear() + _WARNED_KEYS.clear() + + +def _warn_once(key: str, message: str, *args: object) -> None: + """Emit a single bounded diagnostic per ``key`` (never per line/frame).""" + if key in _WARNED_KEYS or len(_WARNED_KEYS) >= _MAX_WARNINGS: + return + _WARNED_KEYS.add(key) + logger.warning(message, *args) + + +class _TokenizeBudgetExceededError(Exception): + """Raised when a tokenization call exceeds its wall-clock budget.""" + + +def _load_engine() -> object | None: + """Import the optional ``python-textmate`` engine, or ``None`` if absent.""" + try: + import textmate # noqa: PLC0415 + except Exception: # pragma: no cover - exercised only without the optional dep + return None + engine: object = textmate + return engine + + +_ENGINE = _load_engine() +TEXTMATE_AVAILABLE = _ENGINE is not None + + +@contextlib.contextmanager +def _silenced() -> Iterator[None]: + """Suppress stdout/stderr so the engine can never corrupt the curses UI.""" + sink = io.StringIO() + with contextlib.redirect_stdout(sink), contextlib.redirect_stderr(sink): + yield + + +def _can_arm_alarm() -> bool: + """Return ``True`` if a SIGALRM wall-clock budget can be armed right now. + + ``signal.setitimer``/SIGALRM only work on the main thread of the process. The + curses render loop runs on the main thread, so this is the common case; off + the main thread (or on platforms without ``setitimer``) we skip the alarm and + rely on the load-time grammar quarantine plus negative caching. + """ + return ( + _HAS_WALL_CLOCK_ALARM and threading.current_thread() is threading.main_thread() + ) + + +def _call_with_budget(fn: Callable[[], _T], seconds: float) -> _T: + """Run ``fn`` under a wall-clock budget, raising on overrun. + + Uses a SIGALRM interval timer (proven to interrupt the Oniguruma engine) and + restores any previously installed handler. When no alarm can be armed the + call runs unbounded (best effort) and relies on the grammar quarantine. + """ + if not _can_arm_alarm(): + return fn() + + def _on_alarm(_signum: int, _frame: object) -> None: + raise _TokenizeBudgetExceededError() + + previous = signal.signal(signal.SIGALRM, _on_alarm) + try: + signal.setitimer(signal.ITIMER_REAL, seconds) + return fn() + finally: + signal.setitimer(signal.ITIMER_REAL, 0) + signal.signal(signal.SIGALRM, previous) + + +class TextMateTokenizer: + """Line-oriented, time-bounded TextMate tokenizer for one grammar. + + The underlying engine tokenizes each line independently (stateless), which + fits ECLI's per-line render path and is cached by line content. Multi-line + constructs are therefore scoped per line, which is an accepted limitation of + this engine until a stateful tokenizer is available. + """ + + def __init__(self, grammar: object, grammar_id: str = "") -> None: + """Wrap a compiled engine grammar object for per-line tokenization.""" + self._grammar = grammar + self._grammar_id = grammar_id + + def tokenize_line(self, line: str) -> list[TextMateToken] | None: + """Return ``(scope, start, end)`` records for ``line``, or ``None``. + + Returns ``None`` (caller falls back to legacy for this line) when the + grammar is quarantined, the engine raises, **or** tokenization exceeds the + per-line wall-clock budget. A non-terminating native loop is not an + exception, so the budget is the only thing that can stop it; the caller + negatively caches the ``None`` so the budget is paid at most once per + distinct line. + """ + if self._grammar_id in _QUARANTINED_GRAMMARS: + return None + try: + with _silenced(): + raw = _call_with_budget( + lambda: self._grammar.parse(line), # type: ignore[attr-defined] + _LINE_BUDGET_SECONDS, + ) + except _TokenizeBudgetExceededError: + self._record_timeout(line) + return None + except Exception: + # Includes RecursionError on grammars the engine cannot handle. + return None + tokens: list[TextMateToken] = [] + for entry in raw: + try: + scope, (start, end) = entry + except (ValueError, TypeError): + continue + if ( + isinstance(scope, str) + and isinstance(start, int) + and isinstance(end, int) + ): + tokens.append((scope, start, end)) + return tokens + + def _record_timeout(self, line: str) -> None: + """Record a real per-line timeout and quarantine the grammar if persistent.""" + grammar_id = self._grammar_id or "" + if grammar_id in _QUARANTINED_GRAMMARS: + return + seen = _GRAMMAR_TIMEOUT_LINES.setdefault(grammar_id, set()) + seen.add(line) + if len(seen) >= _GRAMMAR_QUARANTINE_THRESHOLD: + _QUARANTINED_GRAMMARS.add(grammar_id) + _warn_once( + f"quarantine:{grammar_id}", + "TextMate grammar %s quarantined after %d slow lines (catastrophic " + "backtracking in the per-line engine); using the legacy highlighter", + grammar_id, + len(seen), + ) + else: + _warn_once( + f"line-budget:{grammar_id}", + "TextMate tokenization exceeded %.0fms budget for grammar %s; " + "falling back to legacy highlighting for slow lines", + _LINE_BUDGET_SECONDS * 1000, + grammar_id, + ) + + +@lru_cache(maxsize=64) +def load_tokenizer(grammar_path: Path) -> TextMateTokenizer | None: + """Build a :class:`TextMateTokenizer` for a ``.tmLanguage.json`` file. + + Returns ``None`` when the engine is unavailable or the grammar cannot be + loaded/compiled (so the caller falls back to the legacy highlighter). Results + are cached per grammar path, so the scan/compile cost is paid once per grammar + process-wide and never on the render hot path. Runtime quarantine (after + repeated real-line timeouts) is enforced in :meth:`TextMateTokenizer.tokenize_line` + and re-checked by the caller via :func:`is_grammar_quarantined`. + """ + if _ENGINE is None: + return None + grammar_id = str(grammar_path) + try: + grammar_dict = json.loads(Path(grammar_path).read_text(encoding="utf-8")) + with _silenced(): + repository = _ENGINE.TextMateGrammarRepository([grammar_dict]) # type: ignore[attr-defined] + grammar = _ENGINE.TextMateGrammar(grammar_dict, repository) # type: ignore[attr-defined] + except Exception as error: + # Includes RecursionError on grammars the engine cannot compile. + logger.debug("TextMate grammar load failed for %s: %s", grammar_path, error) + return None + return TextMateTokenizer(grammar, grammar_id=grammar_id) diff --git a/src/ecli/extensions/ecli_integration/theme_bridge.py b/src/ecli/extensions/ecli_integration/theme_bridge.py new file mode 100644 index 00000000..e26856e9 --- /dev/null +++ b/src/ecli/extensions/ecli_integration/theme_bridge.py @@ -0,0 +1,283 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Project: Ecli +# File: src/ecli/extensions/ecli_integration/theme_bridge.py +# Website: https://www.ecli.io +# Repository: https://github.com/SSobol77/ecli +# PyPI: https://pypi.org/project/ecli-editor/0.0.1/ +# +# Copyright (c) 2026 Siergej Sobolewski +# +# Licensed under the GNU General Public License version 2 only. +# See the LICENSE file in the project root for full license text. + +"""Deterministic TextMate scope -> ECLI style-category bridge (#102). + +This maps TextMate scope names (for example ``keyword.control.flow.python`` or +``string.quoted.double.json``) onto a small, stable set of ECLI style categories +that correspond to existing ECLI/curses colour names (``keyword``, ``string``, +``comment``, ``number`` …). It also flattens overlapping/nested TextMate tokens +into non-overlapping per-line spans, resolving overlaps by scope specificity so +the most specific scope wins. + +It is data-only and deterministic: no theme files are executed, no VS Code theme +is activated, and the same input always yields the same spans. +""" + +from __future__ import annotations + +from collections.abc import Iterable +from dataclasses import dataclass + + +# The default category renders as plain text (the editor's default colour). +DEFAULT_CATEGORY = "default" + +# ECLI style categories produced by this bridge. Each name matches an existing +# ECLI colour key (see Ecli._SYNTAX_COLOR_STRUCTURE), so the editor can map a +# category straight onto a curses attribute. +CATEGORIES: frozenset[str] = frozenset( + { + "keyword", + "string", + "comment", + "number", + "constant", + "type", + "function", + "variable", + "tag", + "attribute", + "builtin", + "operator", + "decorator", + "error", + "punctuation", + DEFAULT_CATEGORY, + } +) + +# Ordered, most-specific-first scope-prefix -> category rules. The first rule +# whose prefix equals the scope or is a dotted prefix of it wins. ``None`` means +# "render as default" (used for structural ``meta``/``source`` scopes and generic +# punctuation, so they do not over-colour the line). +_SCOPE_RULES: tuple[tuple[str, str | None], ...] = ( + ("comment", "comment"), + ("punctuation.definition.comment", "comment"), + ("punctuation.definition.string", "string"), + ("string.regexp", "string"), + ("string", "string"), + ("constant.numeric", "number"), + ("constant.character.escape", "string"), + ("constant.language", "constant"), + ("constant.other.color", "constant"), + ("constant", "constant"), + ("keyword.operator", "operator"), + ("keyword.control", "keyword"), + ("keyword", "keyword"), + ("storage.type", "type"), + ("storage.modifier", "keyword"), + ("storage", "keyword"), + ("support.function", "function"), + ("support.class", "type"), + ("support.type", "type"), + ("support.constant", "constant"), + ("support.variable", "variable"), + ("support", "builtin"), + ("entity.name.function", "function"), + ("entity.name.type", "type"), + ("entity.name.class", "type"), + ("entity.name.tag", "tag"), + ("entity.name.section", "function"), + ("entity.other.attribute-name", "attribute"), + ("entity.other.inherited-class", "type"), + ("entity.name", "function"), + ("variable.parameter", "variable"), + ("variable.language", "keyword"), + ("variable.function", "function"), + ("variable", "variable"), + ("markup.heading", "function"), + ("markup.bold", "type"), + ("markup.italic", "type"), + ("markup.raw.block", "string"), + ("markup.inline.raw", "string"), + ("markup.fenced_code", "string"), + ("markup.underline.link", "function"), + ("markup.quote", "comment"), + ("markup.list", "operator"), + ("invalid.deprecated", "error"), + ("invalid", "error"), + ("keyword.other.unit", "number"), + ("punctuation.section.embedded", "operator"), + ("meta", None), + ("source", None), + ("text", None), + ("punctuation", "punctuation"), +) + +# Protected ranges (e.g. the Python string/docstring guard) are authoritative: +# inside a string nothing else — not even ``invalid``/``error`` scopes the engine +# emits for keywords-in-strings — may override the string category. This priority +# sits above every entry in ``_CATEGORY_PRIORITY`` so a protected range always wins. +_PROTECTED_PRIORITY = -1 + +_CATEGORY_PRIORITY: dict[str, int] = { + "error": 0, + "comment": 1, + "string": 2, + "keyword": 4, + "constant": 5, + "number": 5, + "function": 6, + "builtin": 6, + "decorator": 6, + "type": 7, + "tag": 7, + "attribute": 8, + "variable": 8, + "operator": 9, + "punctuation": 9, + DEFAULT_CATEGORY: 99, +} + + +def _category_for_single_scope(scope: str) -> tuple[str | None, int]: + best_category: str | None = None + best_length = -1 + for prefix, category in _SCOPE_RULES: + matches = scope == prefix or scope.startswith(prefix + ".") + if matches and len(prefix) > best_length: + best_category = category + best_length = len(prefix) + return best_category, best_length + + +def scope_to_category(scope: str) -> str | None: + """Return the ECLI style category for a TextMate scope, or ``None``. + + ``None`` means the scope should render as default text. A scope value may be a + space-separated stack (for example ``meta.definition.variable.ts + variable.other.constant.ts``); each sub-scope is evaluated and the most + specific (longest matching dotted prefix) decision wins. Within a single + scope, ``constant.numeric.integer`` maps to ``number`` rather than the generic + ``constant`` rule. + """ + best_category: str | None = None + best_priority = _CATEGORY_PRIORITY[DEFAULT_CATEGORY] + best_length = -1 + for sub_scope in scope.split(): + category, length = _category_for_single_scope(sub_scope) + if category is None: + continue + priority = _CATEGORY_PRIORITY.get( + category, _CATEGORY_PRIORITY[DEFAULT_CATEGORY] + ) + if priority < best_priority or ( + priority == best_priority and length > best_length + ): + best_category = category + best_priority = priority + best_length = length + return best_category + + +def _specificity(scope: str) -> int: + return scope.count(".") + + +@dataclass +class _PaintBuffers: + categories: list[str] + priorities: list[int] + specificities: list[int] + + +def _paint_range( + buffers: _PaintBuffers, + start: int, + end: int, + category: str, + strength: tuple[int, int], +) -> None: + priority, specificity = strength + length = len(buffers.categories) + clamped_start = max(0, start) + clamped_end = min(length, end) + for index in range(clamped_start, clamped_end): + if priority < buffers.priorities[index] or ( + priority == buffers.priorities[index] + and specificity >= buffers.specificities[index] + ): + buffers.categories[index] = category + buffers.priorities[index] = priority + buffers.specificities[index] = specificity + + +def tokens_to_spans( + line: str, + tokens: Iterable[tuple[str, int, int]], + protected_ranges: Iterable[tuple[int, int, str]] = (), +) -> list[tuple[str, str]]: + """Flatten overlapping TextMate tokens into ``(text, category)`` spans. + + Every character starts as :data:`DEFAULT_CATEGORY`. Tokens are painted from + broadest to narrowest (ties broken so the more specific scope paints last), so + the most specific visible scope wins per character. The returned spans tile + the whole line exactly (including untokenized gaps), so the editor can render + them without any out-of-bounds slicing. + """ + length = len(line) + if length == 0: + return [] + + categories = [DEFAULT_CATEGORY] * length + priorities = [_CATEGORY_PRIORITY[DEFAULT_CATEGORY]] * length + specificities = [-1] * length + buffers = _PaintBuffers(categories, priorities, specificities) + # Broadest first; among equal width, less specific first -> specific wins. + ordered = sorted( + tokens, + key=lambda token: (-(token[2] - token[1]), _specificity(token[0])), + ) + for scope, start, end in ordered: + category = scope_to_category(scope) + if category is None or category == DEFAULT_CATEGORY: + continue + priority = _CATEGORY_PRIORITY.get( + category, _CATEGORY_PRIORITY[DEFAULT_CATEGORY] + ) + specificity = _specificity(scope) + _paint_range( + buffers, + start, + end, + category, + (priority, specificity), + ) + + for start, end, protected_category in protected_ranges: + resolved_category = ( + protected_category if protected_category in CATEGORIES else DEFAULT_CATEGORY + ) + if resolved_category == DEFAULT_CATEGORY: + continue + _paint_range( + buffers, + start, + end, + resolved_category, + (_PROTECTED_PRIORITY, 10_000), + ) + + spans: list[tuple[str, str]] = [] + current_text: list[str] = [line[0]] + current_category = categories[0] + for index in range(1, length): + if categories[index] == current_category: + current_text.append(line[index]) + else: + spans.append(("".join(current_text), current_category)) + current_text = [line[index]] + current_category = categories[index] + spans.append(("".join(current_text), current_category)) + return spans diff --git a/src/ecli/extensions/ecli_integration/theme_registry.py b/src/ecli/extensions/ecli_integration/theme_registry.py new file mode 100644 index 00000000..719ab394 --- /dev/null +++ b/src/ecli/extensions/ecli_integration/theme_registry.py @@ -0,0 +1,669 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Project: Ecli +# File: src/ecli/extensions/ecli_integration/theme_registry.py +# Website: https://www.ecli.io +# Repository: https://github.com/SSobol77/ecli +# PyPI: https://pypi.org/project/ecli-editor/0.0.1/ +# +# Copyright (c) 2026 Siergej Sobolewski +# +# Licensed under the GNU General Public License version 2 only. +# See the LICENSE file in the project root for full license text. + +"""Extension-backed VS Code/TextMate theme registry. + +This module is ECLI-owned adapter code over the imported extension asset tree. +It discovers ``contributes.themes`` metadata, reads referenced JSON/JSONC theme +files, resolves local ``include`` chains, and exposes deterministic theme and +TextMate token-colour data. It never executes extension code, activation events, +``package.json`` scripts, Node, npm, or any command. +""" + +from __future__ import annotations + +import json +import re +from collections.abc import Mapping, Sequence +from dataclasses import dataclass, field +from functools import lru_cache +from pathlib import Path + +from . import paths +from .manifest import RegistryDiagnostic, ThemeContribution +from .registry import ExtensionRegistry, build_registry + + +_SOURCE = "theme_registry" + +_TARGET_THEME_NUMBERS: tuple[tuple[int, str], ...] = ( + (101, "GitHub Light Default"), + (102, "GitHub Light"), + (103, "GitHub Light Colorblind (Beta)"), + (104, "Visual Studio Light"), + (105, "Visual Studio 2017 Light - C++"), + (106, "Light Modern"), + (107, "Light+"), + (108, "Quiet Light"), + (109, "Solarized Light"), + (110, "JetBrains Rider New UI Light"), + (201, "GitHub Dark Default"), + (202, "GitHub Dark"), + (203, "GitHub Dark Dimmed"), + (204, "Visual Studio Dark"), + (205, "Visual Studio 2017 Dark - C++"), + (206, "Dark Modern"), + (207, "Dark+"), + (208, "Monokai"), + (209, "Monokai Dimmed"), + (210, "Tomorrow Night Blue"), + (211, "Abyss"), + (212, "Atom One Dark"), + (213, "Kimbie Dark"), + (214, "Solarized Dark"), + (215, "Red"), + (301, "Dark High Contrast"), + (302, "GitHub Dark High Contrast"), + (303, "GitHub Light High Contrast"), + (304, "Light High Contrast"), +) + +TARGET_THEME_NAMES: tuple[str, ...] = tuple( + name for _number, name in _TARGET_THEME_NUMBERS +) +TARGET_THEME_NUMBERS: Mapping[int, str] = dict(_TARGET_THEME_NUMBERS) +THEME_NUMBERING_POLICY: Mapping[str, str] = { + "deprecated_aliases": "1-8", + "light": "100-199", + "dark": "200-299", + "high_contrast": "300-399", + "reserved_custom_imported": "800-899", +} + + +@dataclass(frozen=True) +class TextMateTokenColor: + """A normalized ``tokenColors`` rule from a VS Code theme file.""" + + scope_selectors: tuple[str, ...] + foreground: str | None + font_style: tuple[str, ...] = () + name: str | None = None + rule_index: int = 0 + + +@dataclass(frozen=True) +class TextMateResolvedStyle: + """Resolved TextMate style for one scope stack.""" + + foreground: str | None + font_style: tuple[str, ...] = () + matched_selector: str | None = None + specificity: int = -1 + rule_index: int = -1 + + +@dataclass(frozen=True) +class ExtensionTheme: + """Loaded, data-only VS Code colour theme.""" + + number: int | None + name: str + source_id: str | None + theme_type: str + ui_theme: str | None + path_repo_relative: str + editor_colors: tuple[tuple[str, str], ...] = () + token_colors: tuple[TextMateTokenColor, ...] = () + semantic_token_colors: tuple[tuple[str, object], ...] = () + source_manifest: str | None = None + + @property + def colors(self) -> dict[str, str]: + """Return editor/UI colour keys as a plain dict.""" + return dict(self.editor_colors) + + def resolve_token_style( + self, scope_stack: str | Sequence[str] + ) -> TextMateResolvedStyle: + """Resolve the best token colour for a TextMate scope stack. + + The matching is intentionally VS Code/TextMate-like, not a full selector + engine: compound selectors must match scope prefixes in stack order, more + specific selectors beat generic selectors, and later rules break ties. + This covers the common theme selectors used by the imported VS Code theme + assets without executing any extension code. + """ + scopes = ( + tuple(part for part in scope_stack.split() if part) + if isinstance(scope_stack, str) + else tuple(scope_stack) + ) + best = TextMateResolvedStyle(foreground=_default_token_foreground(self)) + for rule in self.token_colors: + if rule.foreground is None: + continue + for selector in rule.scope_selectors: + specificity = _selector_specificity(selector, scopes) + if specificity < 0: + continue + if (specificity, rule.rule_index) >= ( + best.specificity, + best.rule_index, + ): + best = TextMateResolvedStyle( + foreground=rule.foreground, + font_style=rule.font_style, + matched_selector=selector, + specificity=specificity, + rule_index=rule.rule_index, + ) + return best + + +@dataclass(frozen=True) +class ThemeRegistry: + """Deterministic registry of extension-backed colour themes.""" + + themes: tuple[ExtensionTheme, ...] = field(default_factory=tuple) + diagnostics: tuple[RegistryDiagnostic, ...] = field(default_factory=tuple) + + def list_available_extension_themes(self) -> tuple[ExtensionTheme, ...]: + """Return loaded extension themes in deterministic numeric/name order.""" + return self.themes + + def get_theme(self, number: int) -> ExtensionTheme | None: + """Return an extension theme by numeric id, or ``None``.""" + for theme in self.themes: + if theme.number == number: + return theme + return None + + def get_theme_by_name(self, name: str) -> ExtensionTheme | None: + """Return an extension theme by exact display name, or ``None``.""" + for theme in self.themes: + if theme.name == name: + return theme + return None + + def list_diagnostics(self) -> tuple[RegistryDiagnostic, ...]: + """Return deterministic diagnostics for missing/invalid themes.""" + return self.diagnostics + + def missing_target_names(self) -> tuple[str, ...]: + """Return target professional themes absent from ``src/ecli/extensions``.""" + present = {theme.name for theme in self.themes} + return tuple(name for name in TARGET_THEME_NAMES if name not in present) + + +def _diagnostic( + level: str, message: str, manifest: str = _SOURCE +) -> RegistryDiagnostic: + return RegistryDiagnostic(level, manifest, message) + + +def _append_string_char(out: list[str], char: str, escaped: bool) -> tuple[bool, bool]: + """Append one JSON string character and return ``(in_string, escaped)``.""" + out.append(char) + if escaped: + return True, False + if char == "\\": + return True, True + if char == '"': + return False, False + return True, False + + +def _skip_line_comment(text: str, index: int) -> int: + """Return the index after a ``//`` comment body.""" + index += 2 + while index < len(text) and text[index] not in "\r\n": + index += 1 + return index + + +def _skip_block_comment(text: str, index: int, out: list[str]) -> int: + """Replace a ``/* ... */`` comment body with whitespace/newlines.""" + index += 2 + while index + 1 < len(text) and not (text[index] == "*" and text[index + 1] == "/"): + out.append("\n" if text[index] in "\r\n" else " ") + index += 1 + return index + 2 if index + 1 < len(text) else index + + +def _strip_json_comments(text: str) -> str: + out: list[str] = [] + in_string = False + escaped = False + index = 0 + while index < len(text): + char = text[index] + nxt = text[index + 1] if index + 1 < len(text) else "" + if in_string: + in_string, escaped = _append_string_char(out, char, escaped) + index += 1 + elif char == '"': + in_string = True + out.append(char) + index += 1 + elif char == "/" and nxt == "/": + index = _skip_line_comment(text, index) + elif char == "/" and nxt == "*": + index = _skip_block_comment(text, index, out) + else: + out.append(char) + index += 1 + return "".join(out) + + +def _next_nonspace_index(text: str, index: int) -> int: + """Return the first non-space index at or after ``index``.""" + while index < len(text) and text[index].isspace(): + index += 1 + return index + + +def _is_trailing_comma(text: str, index: int) -> bool: + """Return whether ``text[index]`` is followed by a closing JSON bracket.""" + lookahead = _next_nonspace_index(text, index + 1) + return lookahead < len(text) and text[lookahead] in "}]" + + +def _strip_trailing_commas(text: str) -> str: + out: list[str] = [] + in_string = False + escaped = False + index = 0 + while index < len(text): + char = text[index] + if in_string: + in_string, escaped = _append_string_char(out, char, escaped) + elif char == '"': + in_string = True + out.append(char) + elif char != "," or not _is_trailing_comma(text, index): + out.append(char) + index += 1 + return "".join(out) + + +def _load_jsonc( + path: Path, diagnostics: list[RegistryDiagnostic], source: str +) -> dict[str, object] | None: + try: + raw = path.read_text(encoding="utf-8") + except OSError as error: + diagnostics.append( + _diagnostic("error", f"cannot read theme file: {error}", source) + ) + return None + try: + data = json.loads(_strip_trailing_commas(_strip_json_comments(raw))) + except json.JSONDecodeError as error: + diagnostics.append(_diagnostic("error", f"invalid theme JSON: {error}", source)) + return None + if not isinstance(data, dict): + diagnostics.append( + _diagnostic("error", "theme file is not a JSON object", source) + ) + return None + return data + + +def _dict_field(document: Mapping[str, object], key: str) -> dict[object, object]: + value = document.get(key) + return dict(value) if isinstance(value, dict) else {} + + +def _list_field(document: Mapping[str, object], key: str) -> list[object]: + value = document.get(key) + return list(value) if isinstance(value, list) else [] + + +def _merge_theme_documents( + base: dict[str, object], override: dict[str, object] +) -> dict[str, object]: + merged = dict(base) + merged["colors"] = { + **_dict_field(base, "colors"), + **_dict_field(override, "colors"), + } + merged["semanticTokenColors"] = { + **_dict_field(base, "semanticTokenColors"), + **_dict_field(override, "semanticTokenColors"), + } + merged["tokenColors"] = [ + *_list_field(base, "tokenColors"), + *_list_field(override, "tokenColors"), + ] + for key, value in override.items(): + if key not in {"colors", "semanticTokenColors", "tokenColors", "include"}: + merged[key] = value + return merged + + +def _load_theme_document( + theme_file: Path, + root: Path, + diagnostics: list[RegistryDiagnostic], + stack: tuple[Path, ...] = (), +) -> dict[str, object] | None: + resolved = theme_file.resolve() + source = paths.to_repo_relative(resolved, root) + if resolved in stack: + diagnostics.append( + _diagnostic("error", f"theme include cycle at {source}", source) + ) + return None + data = _load_jsonc(resolved, diagnostics, source) + if data is None: + return None + include = data.get("include") + if not isinstance(include, str): + return data + included = (resolved.parent / include).resolve() + if not paths.is_within_root(included, root): + diagnostics.append( + _diagnostic( + "error", f"theme include escapes extension tree: {include!r}", source + ) + ) + return data + if not included.is_file(): + diagnostics.append( + _diagnostic("warning", f"theme include missing: {include!r}", source) + ) + return data + base = _load_theme_document(included, root, diagnostics, (*stack, resolved)) + return _merge_theme_documents(base, data) if base is not None else data + + +def _repo_relative_to_path(repo_relative: str | None, root: Path) -> Path | None: + if repo_relative is None: + return None + prefix = f"{paths.REPO_RELATIVE_PREFIX}/" + if not repo_relative.startswith(prefix): + return None + candidate = (root / repo_relative[len(prefix) :]).resolve() + return candidate if paths.is_within_root(candidate, root) else None + + +def _normalize_hex(value: object) -> str | None: + if not isinstance(value, str): + return None + text = value.strip() + if not text.startswith("#"): + return None + body = text[1:] + if len(body) in {3, 4}: + body = "".join(ch * 2 for ch in body[:3]) + elif len(body) in {6, 8}: + body = body[:6] + else: + return None + if re.fullmatch(r"[0-9a-fA-F]{6}", body) is None: + return None + return f"#{body.upper()}" + + +def _theme_type(ui_theme: str | None, data: Mapping[str, object]) -> str: + declared = data.get("type") + if isinstance(declared, str): + normalized = declared.lower().replace("_", "-") + if normalized in {"light", "dark", "high-contrast", "custom"}: + return normalized + ui = (ui_theme or "").lower() + if ui in {"hc-black", "hc-light"}: + return "high-contrast" + if "dark" in ui or ui == "hc-black": + return "dark" + if ui == "vs" or "light" in ui: + return "light" + return "custom" + + +def _theme_name(contribution: ThemeContribution, data: Mapping[str, object]) -> str: + raw_name = data.get("name") + if isinstance(raw_name, str) and raw_name.strip(): + return raw_name.strip() + if contribution.theme_id: + return contribution.theme_id + if contribution.label and not contribution.label.startswith("%"): + return contribution.label + return contribution.path or "Unnamed Theme" + + +def _normalize_scope_selectors(raw_scope: object) -> tuple[str, ...]: + raw_items: list[str] = [] + if isinstance(raw_scope, str): + raw_items = raw_scope.split(",") + elif isinstance(raw_scope, list): + for item in raw_scope: + if isinstance(item, str): + raw_items.extend(item.split(",")) + return tuple(selector.strip() for selector in raw_items if selector.strip()) + + +def _normalize_font_style(raw_style: object) -> tuple[str, ...]: + if not isinstance(raw_style, str): + return () + if raw_style.strip().lower() in {"", "none"}: + return () + accepted = {"bold", "italic", "underline", "strikethrough"} + return tuple(part for part in raw_style.strip().lower().split() if part in accepted) + + +def _parse_token_colors(raw: object) -> tuple[TextMateTokenColor, ...]: + if not isinstance(raw, list): + return () + result: list[TextMateTokenColor] = [] + for index, entry in enumerate(raw): + if not isinstance(entry, dict): + continue + settings = entry.get("settings") + if not isinstance(settings, dict): + continue + selectors = _normalize_scope_selectors(entry.get("scope")) + foreground = _normalize_hex(settings.get("foreground")) + font_style = _normalize_font_style(settings.get("fontStyle")) + name = entry.get("name") if isinstance(entry.get("name"), str) else None + result.append( + TextMateTokenColor( + scope_selectors=selectors, + foreground=foreground, + font_style=font_style, + name=name, + rule_index=index, + ) + ) + return tuple(result) + + +def _normalize_color_mapping(raw: object) -> tuple[tuple[str, str], ...]: + if not isinstance(raw, dict): + return () + pairs = [] + for key, value in raw.items(): + normalized = _normalize_hex(value) + if isinstance(key, str) and normalized is not None: + pairs.append((key, normalized)) + return tuple(sorted(pairs)) + + +def _semantic_pairs(raw: object) -> tuple[tuple[str, object], ...]: + if not isinstance(raw, dict): + return () + return tuple( + sorted((key, value) for key, value in raw.items() if isinstance(key, str)) + ) + + +def _load_extension_theme( + contribution: ThemeContribution, + manifest_name: str | None, + root: Path, + diagnostics: list[RegistryDiagnostic], +) -> ExtensionTheme | None: + theme_file = _repo_relative_to_path(contribution.path_repo_relative, root) + if theme_file is None or not theme_file.is_file(): + diagnostics.append( + _diagnostic( + "warning", + f"theme target file missing: {contribution.path_repo_relative}", + manifest_name or _SOURCE, + ) + ) + return None + data = _load_theme_document(theme_file, root, diagnostics) + if data is None: + return None + return ExtensionTheme( + number=None, + name=_theme_name(contribution, data), + source_id=contribution.theme_id, + theme_type=_theme_type(contribution.ui_theme, data), + ui_theme=contribution.ui_theme, + path_repo_relative=contribution.path_repo_relative + or paths.to_repo_relative(theme_file, root), + editor_colors=_normalize_color_mapping(data.get("colors")), + token_colors=_parse_token_colors(data.get("tokenColors")), + semantic_token_colors=_semantic_pairs(data.get("semanticTokenColors")), + source_manifest=manifest_name, + ) + + +def _assign_numbers(themes: Sequence[ExtensionTheme]) -> tuple[ExtensionTheme, ...]: + by_name: dict[str, ExtensionTheme] = {} + for theme in themes: + by_name.setdefault(theme.name, theme) + if theme.source_id: + by_name.setdefault(theme.source_id, theme) + numbered: dict[int, ExtensionTheme] = {} + used_paths: set[str] = set() + for number, name in _TARGET_THEME_NUMBERS: + candidate = by_name.get(name) + if candidate is not None: + numbered[number] = _with_number(candidate, number, name) + used_paths.add(candidate.path_repo_relative) + + unnumbered: list[ExtensionTheme] = [] + for theme in sorted(themes, key=lambda item: (item.name, item.path_repo_relative)): + if theme.path_repo_relative in used_paths: + continue + unnumbered.append(theme) + return (*tuple(numbered[key] for key in sorted(numbered)), *tuple(unnumbered)) + + +def _with_number( + theme: ExtensionTheme, number: int, display_name: str | None = None +) -> ExtensionTheme: + return ExtensionTheme( + number=number, + name=display_name or theme.name, + source_id=theme.source_id, + theme_type=theme.theme_type, + ui_theme=theme.ui_theme, + path_repo_relative=theme.path_repo_relative, + editor_colors=theme.editor_colors, + token_colors=theme.token_colors, + semantic_token_colors=theme.semantic_token_colors, + source_manifest=theme.source_manifest, + ) + + +def _scope_prefix_matches(scope: str, selector: str) -> bool: + return scope == selector or scope.startswith(selector + ".") + + +def _selector_specificity(selector: str, scopes: Sequence[str]) -> int: + selector = selector.strip() + if not selector or selector.startswith("-"): + return -1 + parts = tuple( + part for part in selector.split() if part and not part.startswith("-") + ) + if not parts: + return -1 + search_start = 0 + score = 0 + for part in parts: + matched_index = -1 + for index in range(search_start, len(scopes)): + if _scope_prefix_matches(scopes[index], part): + matched_index = index + break + if matched_index < 0: + return -1 + search_start = matched_index + 1 + score += part.count(".") * 10 + len(part) + return score + len(parts) * 100 + + +def _default_token_foreground(theme: ExtensionTheme) -> str | None: + colors = theme.colors + if "editor.foreground" in colors: + return colors["editor.foreground"] + for rule in theme.token_colors: + if not rule.scope_selectors and rule.foreground: + return rule.foreground + return None + + +def _missing_target_diagnostics( + themes: Sequence[ExtensionTheme], +) -> tuple[RegistryDiagnostic, ...]: + present = {theme.name for theme in themes if theme.number is not None} + present.update( + theme.source_id + for theme in themes + if theme.number is not None and theme.source_id + ) + return tuple( + _diagnostic("info", f"target theme missing from imported tree: {name}") + for name in TARGET_THEME_NAMES + if name not in present + ) + + +def _unnumbered_theme_diagnostics( + themes: Sequence[ExtensionTheme], +) -> tuple[RegistryDiagnostic, ...]: + return tuple( + _diagnostic( + "info", + "imported theme has no canonical number and is left unassigned " + f"until the reserved 800-899 custom/imported theme feature exists: " + f"{theme.name} ({theme.path_repo_relative})", + ) + for theme in themes + if theme.number is None + ) + + +def build_theme_registry( + registry: ExtensionRegistry | None = None, root: Path | None = None +) -> ThemeRegistry: + """Build an extension-backed theme registry from ``contributes.themes``.""" + base = (root or paths.extensions_root()).resolve() + source_registry = registry or build_registry(base) + diagnostics: list[RegistryDiagnostic] = list(source_registry.list_diagnostics()) + loaded: list[ExtensionTheme] = [] + + for manifest in source_registry.list_manifests(): + for contribution in manifest.themes: + theme = _load_extension_theme( + contribution, manifest.directory_name, base, diagnostics + ) + if theme is not None: + loaded.append(theme) + + numbered = _assign_numbers(loaded) + diagnostics.extend(_missing_target_diagnostics(numbered)) + diagnostics.extend(_unnumbered_theme_diagnostics(numbered)) + return ThemeRegistry(themes=numbered, diagnostics=tuple(diagnostics)) + + +@lru_cache(maxsize=1) +def cached_theme_registry() -> ThemeRegistry: + """Return the cached registry for the real imported extension tree.""" + return build_theme_registry() diff --git a/src/ecli/integrations/AI.py b/src/ecli/integrations/AI.py index db0d0c3f..26122a94 100755 --- a/src/ecli/integrations/AI.py +++ b/src/ecli/integrations/AI.py @@ -22,18 +22,28 @@ - Centralized error handling for each provider. - A factory method `get_ai_client()` for dynamic client instantiation. -Supported providers: OpenAI, Gemini, Mistral, Hugging Face, Claude, Grok. +Supported providers: OpenAI, Gemini, Mistral, Hugging Face, Claude, Grok, +DeepSeek, Qwen (DashScope), Kimi (Moonshot). """ import asyncio import json import os +import re from typing import Any, Optional, cast import aiohttp from ecli.utils.logging_config import logger +_SENSITIVE_LOG_PATTERNS: tuple[re.Pattern[str], ...] = ( + re.compile( + r"(?i)(api[_-]?key|authorization|bearer|token|secret|password)" + r"\s*[:=]\s*[^,\s}\]]+" + ), +) +_MAX_PROVIDER_ERROR_LOG_CHARS = 500 + class AiConfigurationError(ValueError): """Expected AI provider configuration error suitable for user-facing UI.""" @@ -75,6 +85,16 @@ def ai_configuration_panel_message( return "\n".join(lines) +def _safe_provider_error_excerpt(response_text: str) -> str: + """Return a bounded, redacted provider error excerpt for logs.""" + excerpt = response_text[:_MAX_PROVIDER_ERROR_LOG_CHARS] + for pattern in _SENSITIVE_LOG_PATTERNS: + excerpt = pattern.sub(r"\1=", excerpt) + if len(response_text) > _MAX_PROVIDER_ERROR_LOG_CHARS: + excerpt += "..." + return excerpt + + # Get logger to ensure messages match the general logging system # ==================== BaseAiClient Class ==================== class BaseAiClient: @@ -851,6 +871,136 @@ async def ask_async( return f"Error: Unexpected Grok error: {e}" +# ============== OpenAI-compatible providers (DeepSeek, Qwen, Kimi) ========== + + +class OpenAICompatibleClient(BaseAiClient): + """Base client for providers exposing an OpenAI-style chat completions API. + + DeepSeek, Alibaba Qwen (DashScope compatible mode), and Moonshot Kimi all + speak the OpenAI ``/chat/completions`` protocol, so they share one + implementation. Subclasses set ``API_URL``, ``PROVIDER`` and ``KEY_ENV``. + """ + + API_URL = "" + PROVIDER = "OpenAI-compatible" + KEY_ENV = "" + + def _handle_compatible_error(self, status_code: int, response_text: str) -> str: + """Return a user-friendly message for a non-200 response.""" + response_lower = response_text.lower() + if status_code == 401: + return ( + f"Error: Invalid {self.PROVIDER} API key. Please check {self.KEY_ENV}." + ) + if status_code == 403: + return ( + f"Error: Access to {self.PROVIDER} API forbidden (permissions or " + "credits). Please check your account." + ) + if status_code == 429: + return ( + f"Error: {self.PROVIDER} rate limit exceeded. Please try again later." + ) + if status_code == 400 and "model" in response_lower: + return f"Error: Unsupported {self.PROVIDER} model: {self.model}" + if status_code == 500: + return ( + f"Error: {self.PROVIDER} internal server error. Please try again later." + ) + return f"{self.PROVIDER} Error {status_code}: {response_text[:200]}..." + + async def ask_async( + self, prompt: str, system_msg: str = "You are a helpful assistant." + ) -> str: + """Send a chat completion request to an OpenAI-compatible endpoint.""" + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + } + body = { + "model": self.model, + "messages": [ + {"role": "system", "content": system_msg}, + {"role": "user", "content": prompt}, + ], + "max_tokens": 2048, + # Lower temperature favours deterministic, correct code generation. + "temperature": 0.3, + } + + logger.debug("Sending request to %s API...", self.PROVIDER) + try: + session = self._get_session() + timeout = aiohttp.ClientTimeout(total=90) + async with session.post( + self.API_URL, headers=headers, json=body, timeout=timeout + ) as response: + logger.info( + "Received response from %s with status: %s", + self.PROVIDER, + response.status, + ) + if response.status != 200: + response_text = await response.text() + logger.error( + "%s API Error %s: %s", + self.PROVIDER, + response.status, + _safe_provider_error_excerpt(response_text), + ) + return self._handle_compatible_error(response.status, response_text) + + data = await response.json() + choices = data.get("choices", []) + if not choices: + if "error" in data: + message = data["error"].get("message", "Unknown error") + return f"{self.PROVIDER} API Error: {message}" + return f"Error: Empty response from {self.PROVIDER}." + content = choices[0].get("message", {}).get("content", "Empty response") + return str(content).strip() + + except TimeoutError: + logger.error("Request to %s API timed out.", self.PROVIDER) + return f"Error: {self.PROVIDER} request timeout. Please try again later." + except aiohttp.ClientError as e: + logger.error("Network error to %s: %s", self.PROVIDER, e, exc_info=True) + return f"Error: Network error connecting to {self.PROVIDER}: {e}" + except Exception as e: + logger.error( + "An unexpected error occurred in %s client: %s", + self.PROVIDER, + e, + exc_info=True, + ) + return f"Error: Unexpected {self.PROVIDER} error: {e}" + + +class DeepSeekClient(OpenAICompatibleClient): + """Client for DeepSeek (OpenAI-compatible).""" + + API_URL = "https://api.deepseek.com/v1/chat/completions" + PROVIDER = "DeepSeek" + KEY_ENV = "DEEPSEEK_API_KEY" + + +class QwenClient(OpenAICompatibleClient): + """Client for Alibaba Qwen via DashScope OpenAI-compatible mode.""" + + API_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/chat/completions" + PROVIDER = "Qwen" + KEY_ENV = "DASHSCOPE_API_KEY" + + +class KimiClient(OpenAICompatibleClient): + """Client for Moonshot AI (Kimi), OpenAI-compatible.""" + + API_URL = "https://api.moonshot.ai/v1/chat/completions" + PROVIDER = "Kimi" + KEY_ENV = "MOONSHOT_API_KEY" + + def get_ai_client(provider: str, config: dict[str, Any]) -> BaseAiClient: """Factory function for creating the appropriate AI client. @@ -861,7 +1011,7 @@ def get_ai_client(provider: str, config: dict[str, Any]) -> BaseAiClient: Args: provider (str): The AI service provider name (case-insensitive). Supported values: "openai", "gemini", "mistral", "claude", - "huggingface", "grok". + "huggingface", "grok", "deepseek", "qwen", "kimi". config (Dict[str, Any]): Configuration dictionary containing API keys and model specifications. Expected structure: { @@ -926,6 +1076,12 @@ def get_ai_client(provider: str, config: dict[str, Any]) -> BaseAiClient: return HuggingFaceClient(model=model, api_key=api_key) if provider == "grok": return GrokClient(model=model, api_key=api_key) + if provider == "deepseek": + return DeepSeekClient(model=model, api_key=api_key) + if provider == "qwen": + return QwenClient(model=model, api_key=api_key) + if provider == "kimi": + return KimiClient(model=model, api_key=api_key) raise ValueError(f"Unknown AI provider: {provider}") @@ -934,4 +1090,8 @@ def _api_key_env_var(provider: str) -> str: return "HUGGINGFACE_API_KEY" if provider == "grok": return "XAI_API_KEY" - return f"{provider.upper()}_API_KEY" + if provider == "qwen": + return "DASHSCOPE_API_KEY" + if provider == "kimi": + return "MOONSHOT_API_KEY" + return f"{provider.upper()}_API_KEY" # deepseek -> DEEPSEEK_API_KEY, ... diff --git a/src/ecli/services/config_service.py b/src/ecli/services/config_service.py index f4de6c57..1622a749 100644 --- a/src/ecli/services/config_service.py +++ b/src/ecli/services/config_service.py @@ -208,13 +208,16 @@ def _default_config() -> dict[str, Any]: "providers": { "openai": {"model": "gpt-5-codex", "api_key": ""}, "gemini": {"model": "gemini-2.5-pro", "api_key": ""}, - "mistral": {"model": "magistral-medium-1.2", "api_key": ""}, - "claude": {"model": "claude-4-opus", "api_key": ""}, - "grok": {"model": "grok-4-fast", "api_key": ""}, + "mistral": {"model": "codestral-latest", "api_key": ""}, + "claude": {"model": "claude-sonnet-4-6", "api_key": ""}, + "grok": {"model": "grok-code-fast-1", "api_key": ""}, "huggingface": { - "model": "meta-llama/Meta-Llama-3.1-405B-Instruct", + "model": "Qwen/Qwen2.5-Coder-32B-Instruct", "api_key": "", }, + "deepseek": {"model": "deepseek-chat", "api_key": ""}, + "qwen": {"model": "qwen3-coder-plus", "api_key": ""}, + "kimi": {"model": "kimi-k2-0905-preview", "api_key": ""}, }, }, "git": {"enabled": True}, diff --git a/src/ecli/utils/themes.py b/src/ecli/utils/themes.py index bc274bb4..98a43eb7 100644 --- a/src/ecli/utils/themes.py +++ b/src/ecli/utils/themes.py @@ -11,20 +11,20 @@ # Licensed under the GNU General Public License version 2 only. # See the LICENSE file in the project root for full license text. -"""Fixed, built-in colour themes for the editor. +"""Editor colour theme resolution. The editor no longer reads a free-form ``[colors]`` table from ``config.toml``. -Instead the user selects one of eight immutable, hand-tuned palettes with a -single integer key:: +Instead the user selects a deterministic numeric theme. Professional editor +themes are loaded from the imported VS Code extension tree via +``contributes.themes`` and theme JSON. The original eight ECLI palettes remain +available as immutable compatibility themes under reserved compatibility ids. - theme = 1 # in config.toml - -* ``1``-``4`` are light themes. -* ``5``-``8`` are dark themes. + theme = 207 # Dark+ from the imported VS Code theme assets ``resolve_theme()`` validates the configured value and always returns a concrete -``ThemePalette`` (falling back deterministically to the default theme), so the -rendering layer never has to deal with missing or malformed colour data. +``ThemePalette``. Missing target themes are not faked: invalid numbers preserve +the current palette when one is provided, or use the startup default with a +warning when no current palette exists. """ from __future__ import annotations @@ -36,25 +36,33 @@ from enum import IntEnum from typing import Any, cast +from ecli.extensions.ecli_integration.theme_registry import cached_theme_registry + logger = logging.getLogger("ecli") class ThemeId(IntEnum): - """Stable integer identifiers for the eight built-in themes.""" + """Stable integer identifiers for built-in compatibility themes.""" + + PYSH_LIGHT = 181 + PYSH_CLASSIC = 182 + ECLI_LEGACY_LIGHT = 183 + PYSH_DARK = 281 + PYSH_CLASSIC_DARK = 282 + ECLI_LEGACY_DARK = 283 + ECLI_HIGH_CONTRAST_LIGHT = 381 + ECLI_HIGH_CONTRAST_DARK = 382 + - LIGHT_CLASSIC = 1 - LIGHT_SOFT = 2 - LIGHT_HIGH_CONTRAST = 3 - LIGHT_SOLAR = 4 - DARK_CLASSIC = 5 - DARK_SOFT = 6 - DARK_HIGH_CONTRAST = 7 - DARK_NEON = 8 +#: Professional default. Dark+ is present in the imported VS Code theme assets. +DEFAULT_THEME_ID: int = 207 +#: Always-available fallback if the extension theme registry cannot be loaded. +COMPATIBILITY_FALLBACK_THEME_ID: int = int(ThemeId.PYSH_DARK) -#: Theme used whenever the configured value is missing, malformed or out of range. -DEFAULT_THEME_ID: int = int(ThemeId.DARK_CLASSIC) +STATUS_BAR_BACKGROUND_KEY = "statusBar.background" +STATUS_BAR_FOREGROUND_KEY = "statusBar.foreground" @dataclass(frozen=True) @@ -109,6 +117,7 @@ class ThemePalette: panel_title: str = "" info: str = "" success: str = "" + diagnostics: tuple[str, ...] = () def __post_init__(self) -> None: """Fill any unspecified chrome role from coherent palette fallbacks.""" @@ -206,8 +215,8 @@ def chrome_color_pairs(self) -> dict[str, tuple[str, str]]: _THEMES: dict[int, ThemePalette] = { - int(ThemeId.LIGHT_CLASSIC): ThemePalette( - theme_id=int(ThemeId.LIGHT_CLASSIC), + 1: ThemePalette( + theme_id=1, name="Light Classic", is_dark=False, background="#FFFFFF", @@ -234,8 +243,8 @@ def chrome_color_pairs(self) -> dict[str, tuple[str, str]]: error="#CF222E", warning="#9A6700", ), - int(ThemeId.LIGHT_SOFT): ThemePalette( - theme_id=int(ThemeId.LIGHT_SOFT), + 2: ThemePalette( + theme_id=2, name="Light Soft", is_dark=False, background="#FBF1C7", @@ -262,8 +271,8 @@ def chrome_color_pairs(self) -> dict[str, tuple[str, str]]: error="#9D0006", warning="#B57614", ), - int(ThemeId.LIGHT_HIGH_CONTRAST): ThemePalette( - theme_id=int(ThemeId.LIGHT_HIGH_CONTRAST), + 3: ThemePalette( + theme_id=3, name="Light High Contrast", is_dark=False, background="#FFFFFF", @@ -290,8 +299,8 @@ def chrome_color_pairs(self) -> dict[str, tuple[str, str]]: error="#B30000", warning="#8B4500", ), - int(ThemeId.LIGHT_SOLAR): ThemePalette( - theme_id=int(ThemeId.LIGHT_SOLAR), + 4: ThemePalette( + theme_id=4, name="Light Solar", is_dark=False, background="#FDF6E3", @@ -318,8 +327,8 @@ def chrome_color_pairs(self) -> dict[str, tuple[str, str]]: error="#DC322F", warning="#B58900", ), - int(ThemeId.DARK_CLASSIC): ThemePalette( - theme_id=int(ThemeId.DARK_CLASSIC), + 5: ThemePalette( + theme_id=5, name="Dark Classic", is_dark=True, background="#0D1117", @@ -346,8 +355,8 @@ def chrome_color_pairs(self) -> dict[str, tuple[str, str]]: error="#F85149", warning="#D29922", ), - int(ThemeId.DARK_SOFT): ThemePalette( - theme_id=int(ThemeId.DARK_SOFT), + 6: ThemePalette( + theme_id=6, name="Dark Soft", is_dark=True, background="#282828", @@ -374,8 +383,8 @@ def chrome_color_pairs(self) -> dict[str, tuple[str, str]]: error="#FB4934", warning="#FABD2F", ), - int(ThemeId.DARK_HIGH_CONTRAST): ThemePalette( - theme_id=int(ThemeId.DARK_HIGH_CONTRAST), + 7: ThemePalette( + theme_id=7, name="Dark High Contrast", is_dark=True, background="#000000", @@ -402,8 +411,8 @@ def chrome_color_pairs(self) -> dict[str, tuple[str, str]]: error="#FF5555", warning="#F1FA8C", ), - int(ThemeId.DARK_NEON): ThemePalette( - theme_id=int(ThemeId.DARK_NEON), + 8: ThemePalette( + theme_id=8, name="Dark Neon", is_dark=True, background="#0A0E14", @@ -463,30 +472,14 @@ def _chrome( # Distinctive, professional chrome bars per theme (id -> chrome overrides). _CHROME_OVERRIDES: dict[int, dict[str, str]] = { - int(ThemeId.LIGHT_CLASSIC): _chrome( - ("#EAEEF2", "#1F2328"), "#D0D7DE", "#0969DA", "#0969DA", "#1A7F37" - ), - int(ThemeId.LIGHT_SOFT): _chrome( - ("#EBDBB2", "#3C3836"), "#D5C4A1", "#B57614", "#076678", "#79740E" - ), - int(ThemeId.LIGHT_HIGH_CONTRAST): _chrome( - ("#000000", "#FFFFFF"), "#000000", "#B30000", "#00008B", "#006400" - ), - int(ThemeId.LIGHT_SOLAR): _chrome( - ("#EEE8D5", "#586E75"), "#93A1A1", "#268BD2", "#268BD2", "#859900" - ), - int(ThemeId.DARK_CLASSIC): _chrome( - ("#161B22", "#C9D1D9"), "#30363D", "#58A6FF", "#58A6FF", "#3FB950" - ), - int(ThemeId.DARK_SOFT): _chrome( - ("#3C3836", "#EBDBB2"), "#504945", "#83A598", "#83A598", "#B8BB26" - ), - int(ThemeId.DARK_HIGH_CONTRAST): _chrome( - ("#1A1A1A", "#FFFFFF"), "#5A5A5A", "#8BE9FD", "#8BE9FD", "#50FA7B" - ), - int(ThemeId.DARK_NEON): _chrome( - ("#131721", "#B3B1AD"), "#1F2430", "#59C2FF", "#59C2FF", "#C2D94C" - ), + 1: _chrome(("#EAEEF2", "#1F2328"), "#D0D7DE", "#0969DA", "#0969DA", "#1A7F37"), + 2: _chrome(("#EBDBB2", "#3C3836"), "#D5C4A1", "#B57614", "#076678", "#79740E"), + 3: _chrome(("#000000", "#FFFFFF"), "#000000", "#B30000", "#00008B", "#006400"), + 4: _chrome(("#EEE8D5", "#586E75"), "#93A1A1", "#268BD2", "#268BD2", "#859900"), + 5: _chrome(("#161B22", "#C9D1D9"), "#30363D", "#58A6FF", "#58A6FF", "#3FB950"), + 6: _chrome(("#3C3836", "#EBDBB2"), "#504945", "#83A598", "#83A598", "#B8BB26"), + 7: _chrome(("#1A1A1A", "#FFFFFF"), "#5A5A5A", "#8BE9FD", "#8BE9FD", "#50FA7B"), + 8: _chrome(("#131721", "#B3B1AD"), "#1F2430", "#59C2FF", "#59C2FF", "#C2D94C"), } # Apply the chrome overrides, keeping the palettes immutable. @@ -495,10 +488,215 @@ def _chrome( for tid, palette in _THEMES.items() } +_COMPATIBILITY_THEME_IDS: dict[int, tuple[int, str]] = { + 1: (int(ThemeId.PYSH_LIGHT), "PySH Light"), + 2: (int(ThemeId.PYSH_CLASSIC), "PySH Classic"), + 4: (int(ThemeId.ECLI_LEGACY_LIGHT), "ECLI Legacy Light"), + 5: (int(ThemeId.PYSH_DARK), "PySH Dark"), + 6: (int(ThemeId.PYSH_CLASSIC_DARK), "PySH Classic Dark"), + 8: (int(ThemeId.ECLI_LEGACY_DARK), "ECLI Legacy Dark"), + 3: (int(ThemeId.ECLI_HIGH_CONTRAST_LIGHT), "ECLI High Contrast Light"), + 7: (int(ThemeId.ECLI_HIGH_CONTRAST_DARK), "ECLI High Contrast Dark"), +} + +_LEGACY_THEME_ID_TO_COMPATIBILITY_ID = { + old_id: new_id for old_id, (new_id, _name) in _COMPATIBILITY_THEME_IDS.items() +} + +_THEMES = { + new_id: replace(_THEMES[old_id], theme_id=new_id, name=name) + for old_id, (new_id, name) in _COMPATIBILITY_THEME_IDS.items() +} + + +def _theme_exists(theme_id: int) -> bool: + if theme_id in _THEMES: + return True + try: + return cached_theme_registry().get_theme(theme_id) is not None + except Exception: + return False + + +def _first_color( + colors: Mapping[str, str], names: tuple[str, ...], default: str +) -> str: + for name in names: + value = colors.get(name) + if value: + return value + return default + + +def _resolved_token_color(theme: Any, scope_stack: str, default: str) -> str: + try: + value = theme.resolve_token_style(scope_stack).foreground + except Exception: + value = None + return value or default + + +def _resolved_first_token_color( + theme: Any, scope_stacks: tuple[str, ...], default: str +) -> str: + for scope_stack in scope_stacks: + color = _resolved_token_color(theme, scope_stack, "") + if color: + return color + return default + + +def _extension_theme_to_palette(theme: Any) -> ThemePalette: + colors = theme.colors + is_dark = theme.theme_type == "dark" or theme.ui_theme == "hc-black" + default_bg = "#1E1E1E" if is_dark else "#FFFFFF" + default_fg = "#D4D4D4" if is_dark else "#000000" + background = _first_color(colors, ("editor.background",), default_bg) + foreground = _first_color(colors, ("editor.foreground",), default_fg) + current_line = _first_color( + colors, + ("editor.lineHighlightBackground", "editorLineNumber.activeBackground"), + background, + ) + selection = _first_color( + colors, + ("editor.selectionBackground", "selection.background"), + "#264F78" if is_dark else "#ADD6FF", + ) + return ThemePalette( + theme_id=int(theme.number), + name=theme.name, + is_dark=is_dark, + background=background, + foreground=foreground, + cursor=_first_color(colors, ("editorCursor.foreground",), foreground), + selection=selection, + status=_first_color(colors, (STATUS_BAR_FOREGROUND_KEY,), foreground), + line_number=_first_color(colors, ("editorLineNumber.foreground",), foreground), + current_line=current_line, + comment=_resolved_first_token_color( + theme, + ("source comment", "text.html.markdown markup.quote"), + foreground, + ), + keyword=_resolved_first_token_color( + theme, + ("source keyword.control", "source keyword"), + foreground, + ), + string=_resolved_first_token_color( + theme, + ( + "source string.quoted", + "text.html.markdown markup.inline.raw", + "text.html.markdown markup.fenced_code", + ), + foreground, + ), + number=_resolved_first_token_color( + theme, + ("source constant.numeric", "source constant.numeric.integer"), + foreground, + ), + function=_resolved_first_token_color( + theme, + ( + "text.html.markdown markup.heading", + "text.html.markdown entity.name.section", + "source entity.name.function", + ), + foreground, + ), + klass=_resolved_token_color(theme, "source entity.name.class", foreground), + constant=_resolved_token_color(theme, "source constant.language", foreground), + type_=_resolved_first_token_color( + theme, + ("source entity.name.type", "text.html.markdown markup.bold"), + foreground, + ), + operator=_resolved_first_token_color( + theme, + ("source keyword.operator", "text.html.markdown markup.list"), + foreground, + ), + decorator=_resolved_token_color( + theme, + "source meta.function.decorator entity.name.function.decorator", + foreground, + ), + variable=_resolved_token_color(theme, "source variable.other", foreground), + tag=_resolved_token_color(theme, "text.html entity.name.tag", foreground), + attribute=_resolved_token_color( + theme, "text.html entity.other.attribute-name", foreground + ), + builtin=_resolved_token_color(theme, "source support.function", foreground), + error=_resolved_token_color(theme, "source invalid", "#F44747"), + warning=_first_color( + colors, + ("editorWarning.foreground", "problemsWarningIcon.foreground"), + "#CCA700", + ), + dim=_first_color(colors, ("descriptionForeground",), foreground), + header_bg=_first_color( + colors, + ("titleBar.activeBackground", STATUS_BAR_BACKGROUND_KEY), + current_line, + ), + header_fg=_first_color( + colors, + ("titleBar.activeForeground", STATUS_BAR_FOREGROUND_KEY), + foreground, + ), + status_bg=_first_color(colors, (STATUS_BAR_BACKGROUND_KEY,), current_line), + status_fg=_first_color(colors, (STATUS_BAR_FOREGROUND_KEY,), foreground), + footer_bg=_first_color(colors, (STATUS_BAR_BACKGROUND_KEY,), current_line), + footer_fg=_first_color(colors, (STATUS_BAR_FOREGROUND_KEY,), foreground), + border=_first_color(colors, ("panel.border", "contrastBorder"), foreground), + panel_title=_first_color( + colors, + ("panelTitle.activeForeground", "list.highlightForeground"), + _resolved_token_color(theme, "source entity.name.function", foreground), + ), + info=_first_color( + colors, + ("editorInfo.foreground", "problemsInfoIcon.foreground"), + _resolved_token_color(theme, "source constant.numeric", foreground), + ), + success=_resolved_token_color(theme, "source string", foreground), + ) + + +def _extension_theme(theme_id: int) -> ThemePalette | None: + try: + theme = cached_theme_registry().get_theme(theme_id) + except Exception: + return None + return _extension_theme_to_palette(theme) if theme is not None else None + + +def find_theme(theme_id: int) -> ThemePalette | None: + """Return an extension or compatibility palette, or ``None`` if missing.""" + candidate = int(theme_id) + if candidate in _THEMES: + return _THEMES[candidate] + return _extension_theme(candidate) + def get_theme(theme_id: int) -> ThemePalette: - """Return the palette for ``theme_id`` or the default palette if unknown.""" - return _THEMES.get(int(theme_id), _THEMES[DEFAULT_THEME_ID]) + """Return an extension or compatibility palette for ``theme_id``. + + Missing ids are a contract violation for direct callers. Config resolution + uses :func:`resolve_theme`, which reports diagnostics and preserves the + current palette when possible. + """ + palette = find_theme(theme_id) + if palette is None: + raise KeyError(f"theme id is not available: {theme_id}") + return palette + + +def _startup_default_theme() -> ThemePalette: + return find_theme(DEFAULT_THEME_ID) or _THEMES[COMPATIBILITY_FALLBACK_THEME_ID] #: Environment variable that overrides the configured theme (highest precedence). @@ -508,19 +706,18 @@ def get_theme(theme_id: int) -> ThemePalette: def _legacy_theme_id(table: Mapping[str, Any]) -> int | None: """Resolve a theme id from a legacy ``[theme]`` table (id wins, then name).""" table_id = table.get("id") - if ( - isinstance(table_id, int) - and not isinstance(table_id, bool) - and table_id in _THEMES - ): - return table_id + if isinstance(table_id, int) and not isinstance(table_id, bool): + if table_id in _LEGACY_THEME_ID_TO_COMPATIBILITY_ID: + return _LEGACY_THEME_ID_TO_COMPATIBILITY_ID[table_id] + if table_id in _THEMES: + return table_id name = table.get("name") if isinstance(name, str): lowered = name.strip().lower() if "dark" in lowered: - return int(ThemeId.DARK_CLASSIC) + return int(ThemeId.PYSH_DARK) if "light" in lowered: - return int(ThemeId.LIGHT_CLASSIC) + return int(ThemeId.PYSH_LIGHT) return None @@ -530,32 +727,44 @@ def _coerce_theme_id(raw: Any) -> int | None: Returns ``None`` (rather than the default) when the value cannot be interpreted, so callers can try the next source in the precedence chain. - * ``int`` / integer-like ``str`` (``"3"``) -> that id when 1-8. - * legacy ``[theme]`` table (``Mapping``) -> ``id`` (1-8) or ``name`` - (``"dark"`` -> 5, ``"light"`` -> 1) for backward compatibility. + * ``int`` / integer-like ``str`` (``"207"``) -> that id when present in the + extension-backed or compatibility registry. + * legacy ``[theme]`` table (``Mapping``) -> old ``id`` (1-8) mapped to + compatibility ids, or ``name`` (``"dark"`` -> 281, + ``"light"`` -> 181) for backward compatibility. * ``bool`` / out-of-range / unparseable -> ``None``. """ if raw is None or isinstance(raw, bool): return None if isinstance(raw, int): - return raw if raw in _THEMES else None + return raw if _theme_exists(raw) else None if isinstance(raw, str): try: candidate = int(raw.strip()) except ValueError: return None - return candidate if candidate in _THEMES else None + return candidate if _theme_exists(candidate) else None if isinstance(raw, Mapping): return _legacy_theme_id(raw) return None -def resolve_theme(config: Mapping[str, Any] | None) -> ThemePalette: +def _with_theme_diagnostic(palette: ThemePalette, message: str) -> ThemePalette: + updated = replace(palette, diagnostics=(*palette.diagnostics, message)) + return cast(ThemePalette, updated) + + +def resolve_theme( + config: Mapping[str, Any] | None, + current_theme: ThemePalette | None = None, +) -> ThemePalette: """Resolve the active palette from env + application config. Precedence (highest first): ``ECLI_THEME`` env var, the root-level ``theme`` key in the effective config, then the deterministic default. Never raises; - invalid values are logged and the next source is tried. + invalid values are logged and the next source is tried. If no explicit source + is valid, the current palette is preserved when supplied; otherwise the + startup default professional theme is returned with a diagnostic. """ sources: list[tuple[str, Any]] = [] env_value = os.environ.get(THEME_ENV_VAR) @@ -570,11 +779,20 @@ def resolve_theme(config: Mapping[str, Any] | None) -> ThemePalette: theme_id = _coerce_theme_id(raw) if theme_id is not None: return get_theme(theme_id) - logger.warning( - "Ignoring invalid theme from %s (%r); falling back to default %d.", - label, - raw, - DEFAULT_THEME_ID, + logger.warning("Ignoring invalid theme from %s (%r).", label, raw) + + invalid_sources = [(label, raw) for label, raw in sources if raw is not None] + if invalid_sources: + label, raw = invalid_sources[-1] + if current_theme is not None: + return _with_theme_diagnostic( + current_theme, + f"Invalid theme from {label}: {raw!r}; keeping current theme " + f"{current_theme.theme_id} ({current_theme.name})", + ) + palette = _startup_default_theme() + return _with_theme_diagnostic( + palette, + f"Invalid theme from {label}: {raw!r}; using theme {palette.theme_id} ({palette.name})", ) - - return get_theme(DEFAULT_THEME_ID) + return _startup_default_theme() diff --git a/src/ecli/utils/utils.py b/src/ecli/utils/utils.py index 68c6dc87..ef525fba 100755 --- a/src/ecli/utils/utils.py +++ b/src/ecli/utils/utils.py @@ -41,10 +41,14 @@ import shutil import subprocess import sys +import tomllib from pathlib import Path from typing import Any, Optional -import toml +try: + import toml +except ModuleNotFoundError: # pragma: no cover - exercised in minimal envs + toml = None # type: ignore[assignment] logger = logging.getLogger("ecli") @@ -61,98 +65,297 @@ MISTRAL_API_KEY= CLAUDE_API_KEY= HUGGINGFACE_API_KEY= +# DeepSeek: https://platform.deepseek.com +DEEPSEEK_API_KEY= +# Qwen (Alibaba DashScope, international): https://dashscope-intl.aliyuncs.com +DASHSCOPE_API_KEY= +# Kimi (Moonshot AI): https://platform.moonshot.ai +MOONSHOT_API_KEY= """ # This dictionary is a direct, hardcoded representation of `default_config.toml`. # It serves as the ultimate fallback, ensuring the application can ALWAYS start. DEFAULT_CONFIG: dict[str, Any] = { - # Built-in colour theme (1-4 light, 5-8 dark). See ecli.utils.themes. - "theme": 5, + # Professional extension-backed colour theme. 207 = Dark+ when the imported + # VS Code theme-defaults assets are present; built-in compatibility themes + # are available in reserved 18x/28x/38x ranges. See ecli.utils.themes. + "theme": 207, "colors": {"error": "red", "status": "bright_white", "green": "green"}, + # Mirrors the [editor] table in config.toml (the global syntax_highlighting + # switch lives here). default_new_filename is an internal-only default. "editor": { - "use_system_clipboard": True, "default_new_filename": "new_file.py", - "tab_size": 4, "use_spaces": True, "syntax_highlighting": True, + "use_system_clipboard": True, + "show_line_numbers": True, + "tab_size": 4, + "use_spaces": True, + "word_wrap": False, + "auto_indent": True, + "auto_brackets": True, + "syntax_highlighting": True, # Opt-in mouse support (off by default to preserve native text selection). "mouse": False, + "default_new_filename": "new_file.py", }, - # Extensions Layer (data-only) switches. These mirror the [extensions] table - # in config.toml and gate ONLY the deterministic metadata adapters under + "logging": { + "file_level": "DEBUG", + "console_level": "WARNING", + "log_to_console": False, + "separate_error_log": False, + }, + "linter": { + "enabled": True, + "auto_install": True, + "exclude": [".git", "**pycache**", ".venv"], + }, + # Extensions Layer switches. These mirror the [extensions] table in + # config.toml and gate ONLY the imported extension adapters under # src/ecli/extensions/ecli_integration/. They never enable an extension - # runtime; syntax_engine = "legacy" preserves the regex highlighter until the - # #102 extension-backed syntax service replaces it. See - # docs/architecture/extensions-layer.md. + # runtime. syntax_engine = "extension" uses TextMate tokenization from the + # imported grammars and falls back to the legacy highlighter when a grammar + # or the optional tokenizer is unavailable; "legacy" forces the built-in + # highlighter. See docs/architecture/extensions-layer.md. "extensions": { - "enabled": True, "metadata_registry": True, "grammar_catalog": True, - "language_detection": True, "syntax_engine": "legacy", + "enabled": True, + "metadata_registry": True, + "grammar_catalog": True, + "language_detection": True, + "syntax_engine": "extension", }, - "fonts": {"font_family": "monospace", "font_size": 12}, + "fonts": {"font_family": "monospace", "font_size": 16}, "keybindings": { - "delete": "del", "paste": "ctrl+v", "copy": "ctrl+c", "cut": "ctrl+x", - "undo": "ctrl+z", "redo": "ctrl+y", "lint": "f4", "new_file": "f2", - "open_file": "ctrl+o", "save_file": "ctrl+s", "save_as": "f5", - "select_all": "ctrl+a", "quit": "ctrl+q", "goto_line": "ctrl+g", - "git_menu": "f9", "cancel_operation": "esc", "find": "ctrl+f", - "find_next": "f3", "search_and_replace": "f6", "help": "f1", + "delete": "del", + "paste": "ctrl+v", + "copy": "ctrl+c", + "cut": "ctrl+x", + "undo": "ctrl+z", + "redo": "ctrl+y", + "lint": "f4", + "new_file": "f2", + "open_file": "ctrl+o", + "save_file": "ctrl+s", + "save_as": "f5", + "select_all": "ctrl+a", + "quit": "ctrl+q", + "goto_line": "ctrl+g", + "git_menu": "f9", + "cancel_operation": "esc", + "find": "ctrl+f", + "find_next": "f3", + "search_and_replace": "f6", + "help": "f1", "extend_selection_left": ["shift+left", "alt-h"], "extend_selection_right": ["shift+right", "alt-l"], "extend_selection_up": ["shift+up", "alt-k"], "extend_selection_down": ["shift+down", "alt-j"], - "handle_up": ["up"], "handle_down": ["down"], "handle_left": ["left"], + "handle_up": ["up"], + "handle_down": ["down"], + "handle_left": ["left"], "handle_right": ["right"], }, "ai": {"default_provider": "gemini"}, "ai.keys": { - "openai": "", "gemini": "", "mistral": "", "claude": "", "grok": "", "huggingface": "" + "openai": "", + "gemini": "", + "mistral": "", + "claude": "", + "grok": "", + "huggingface": "", + "deepseek": "", + "qwen": "", + "kimi": "", }, + # Coding-optimized default model per provider. "ai.models": { "openai": "gpt-5-codex", "gemini": "gemini-2.5-pro", - "mistral": "magistral-medium-1.2", - "claude": "claude-4-opus", - "grok": "grok-4-fast", - "huggingface": "meta-llama/Meta-Llama-3.1-405B-Instruct", + "mistral": "codestral-latest", + "claude": "claude-sonnet-4-6", + "grok": "grok-code-fast-1", + "huggingface": "Qwen/Qwen2.5-Coder-32B-Instruct", + "deepseek": "deepseek-chat", + "qwen": "qwen3-coder-plus", + "kimi": "kimi-k2-0905-preview", }, "git": {"enabled": True}, "settings": {"auto_save_interval": 5, "show_git_info": True}, "file_icons": { - "docs": "📘", "python": "🐍", "toml": "❄️", "javascript": "📜", "typescript": "📑", - "php": "🐘", "ruby": "♦️", "css": "🎨", "html": "🌐", "json": "📊", "yaml": "⚙️", - "xml": "📰", "markdown": "📗", "text": "📝", "shell": "💫", "dart": "🎯", "go": "🐹", - "c": "🇨", "cpp": "🇨➕", "java": "☕", "julia": "🧮", "rust": "🦀", "csharp": "♯", - "scala": "💎", "r": "📉", "swift": "🐦", "dockerfile": "🐳", "terraform": "🛠️", - "jenkins": "🧑‍✈️", "puppet": "🎎", "saltstack": "🧂", "git": "🔖", "notebook": "📒", - "diff": "↔️", "makefile": "🛠️", "ini": "🔩", "csv": "🗂️", "sql": "💾", - "graphql": "📈", "kotlin": "📱", "lua": "🌙", "perl": "🐪", "powershell": "💻", - "nix": "❄️", "image": "🖼️", "audio": "🎵", "video": "🎞️", "archive": "📦", - "font": "🖋️", "binary": "⚙️", "document": "📄", "folder": "📁", "folder_open": "📂", + "docs": "📘", + "text": "📝", + "code": "💻", + "python": "🐍", + "toml": "❄️", + "javascript": "📜", + "typescript": "📑", + "php": "🐘", + "ruby": "♦️", + "css": "🎨", + "html": "🌐", + "json": "📊", + "yaml": "⚙️", + "xml": "📰", + "markdown": "📗", + "plaintext": "📄", + "shell": "💫", + "dart": "🎯", + "go": "🐹", + "c": "🇨", + "cpp": "🇨➕", + "java": "☕", + "julia": "🧮", + "rust": "🦀", + "csharp": "♯", + "scala": "💎", + "r": "📉", + "swift": "🐦", + "dockerfile": "🐳", + "terraform": "🛠️", + "jenkins": "🧑‍✈️", + "puppet": "🎎", + "saltstack": "🧂", + "git": "🔖", + "notebook": "📒", + "diff": "↔️", + "makefile": "🛠️", + "ini": "⚙️", + "csv": "🔩", + "sql": "💾", + "graphql": "📈", + "kotlin": "📱", + "lua": "🌙", + "perl": "🐪", + "powershell": "💻", + "nix": "❄️", + "image": "🖼️", + "audio": "🎵", + "video": "🎞️", + "archive": "📦", + "font": "🖋️", + "binary": "⚙️", + "document": "📄", + "folder": "📁", + "folder_open": "📂", "default": "❓", }, "supported_formats": { - "docs": ["readme", "docs", "todo", "changelog", "license", "contributing", "code_of_conduct"], - "python": ["py", "pyw", "pyc", "pyd"], "toml": ["toml", "tml"], - "javascript": ["js", "mjs", "cjs", "jsx"], "typescript": ["ts", "tsx", "mts", "cts"], - "php": ["php", "php3", "php4", "php5", "phtml"], "ruby": ["rb", "erb", "rake", "rbw", "gemspec"], - "css": ["css"], "html": ["html", "htm", "xhtml"], "json": ["json", "jsonc", "geojson", "webmanifest"], - "yaml": ["yaml", "yml"], "xml": ["xml", "xsd", "xsl", "xslt", "plist", "rss", "atom", "csproj", "svg"], - "markdown": ["md", "markdown", "mdown", "mkd"], "text": ["txt", "log", "rst", "srt", "sub", "me"], - "shell": ["sh", "bash", "zsh", "fish", "ksh", "csh", "tcsh", "dash", "ash", "cmd", "command", "tool", "bat"], - "dart": ["dart"], "go": ["go"], "c": ["c", "h"], "cpp": ["cpp", "cxx", "cc", "hpp", "hxx", "hh", "inl", "tpp"], - "java": ["java", "jar", "class"], "julia": ["jl"], "rust": ["rs", "rlib"], "csharp": ["cs"], - "scala": ["scala", "sc"], "r": ["r", "R", "rds", "rda"], "swift": ["swift"], - "dockerfile": ["Dockerfile", "dockerfile"], "terraform": ["tf", "tfvars"], - "jenkins": ["Jenkinsfile", "jenkinsfile", "groovy"], "puppet": ["pp"], "saltstack": ["sls"], - "git": [".gitignore", ".gitattributes", ".gitmodules", ".gitkeep", "gitconfig", "config"], - "notebook": ["ipynb"], "diff": ["diff", "patch"], "makefile": ["Makefile", "makefile", "mk", "mak"], - "ini": ["ini", "cfg", "conf", "properties", "editorconfig"], "csv": ["csv", "tsv"], "sql": ["sql"], - "graphql": ["graphql", "gql"], "kotlin": ["kt", "kts"], "lua": ["lua"], "perl": ["pl", "pm", "t", "pod"], - "powershell": ["ps1", "psm1", "psd1"], "nix": ["nix"], - "image": ["jpg", "jpeg", "png", "gif", "bmp", "ico", "webp", "tiff", "tif", "heic", "heif"], + "docs": [ + "readme", + "docs", + "todo", + "changelog", + "license", + "contributing", + "code_of_conduct", + ], + "python": ["py", "pyw", "pyc", "pyd"], + "toml": ["toml", "tml"], + "javascript": ["js", "mjs", "cjs", "jsx"], + "typescript": ["ts", "tsx", "mts", "cts"], + "php": ["php", "php3", "php4", "php5", "phtml"], + "ruby": ["rb", "erb", "rake", "rbw", "gemspec"], + "css": ["css"], + "html": ["html", "htm", "xhtml"], + "json": ["json", "jsonc", "geojson", "webmanifest"], + "yaml": ["yaml", "yml"], + "xml": ["xml", "xsd", "xsl", "xslt", "plist", "rss", "atom", "csproj", "svg"], + "markdown": ["md", "markdown", "mdown", "mkd"], + "text": ["txt", "log", "rst", "srt", "sub", "me"], + "shell": [ + "sh", + "bash", + "zsh", + "fish", + "ksh", + "csh", + "tcsh", + "dash", + "ash", + "cmd", + "command", + "tool", + "bat", + ], + "dart": ["dart"], + "go": ["go"], + "c": ["c", "h"], + "cpp": ["cpp", "cxx", "cc", "hpp", "hxx", "hh", "inl", "tpp"], + "java": ["java", "jar", "class"], + "julia": ["jl"], + "rust": ["rs", "rlib"], + "csharp": ["cs"], + "scala": ["scala", "sc"], + "r": ["r", "R", "rds", "rda"], + "swift": ["swift"], + "dockerfile": ["Dockerfile", "dockerfile"], + "terraform": ["tf", "tfvars"], + "jenkins": ["Jenkinsfile", "jenkinsfile", "groovy"], + "puppet": ["pp"], + "saltstack": ["sls"], + "git": [ + ".gitignore", + ".gitattributes", + ".gitmodules", + ".gitkeep", + "gitconfig", + "config", + ], + "notebook": ["ipynb"], + "diff": ["diff", "patch"], + "makefile": ["Makefile", "makefile", "mk", "mak"], + "ini": ["ini", "cfg", "conf", "properties", "editorconfig"], + "csv": ["csv", "tsv"], + "sql": ["sql"], + "graphql": ["graphql", "gql"], + "kotlin": ["kt", "kts"], + "lua": ["lua"], + "perl": ["pl", "pm", "t", "pod"], + "powershell": ["ps1", "psm1", "psd1"], + "nix": ["nix"], + "image": [ + "jpg", + "jpeg", + "png", + "gif", + "bmp", + "ico", + "webp", + "tiff", + "tif", + "heic", + "heif", + ], "audio": ["mp3", "wav", "ogg", "flac", "aac", "m4a", "wma"], "video": ["mp4", "mkv", "avi", "mov", "webm", "flv", "wmv"], - "archive": ["zip", "tar", "gz", "tgz", "bz2", "rar", "7z", "xz", "iso", "deb", "rpm", "pkg"], + "archive": [ + "zip", + "tar", + "gz", + "tgz", + "bz2", + "rar", + "7z", + "xz", + "iso", + "deb", + "rpm", + "pkg", + ], "font": ["ttf", "otf", "woff", "woff2", "eot"], "binary": ["exe", "dll", "so", "o", "bin", "app", "com", "msi", "dmg"], - "document": ["doc", "docx", "odt", "rtf", "pdf", "ppt", "pptx", "odp", "xls", "xlsx", "ods", "epub", "mobi"], + "document": [ + "doc", + "docx", + "odt", + "rtf", + "pdf", + "ppt", + "pptx", + "odp", + "xls", + "xlsx", + "ods", + "epub", + "mobi", + ], }, "comments": { "python": {"line_prefix": "# ", "docstring_delim": '"""'}, @@ -162,9 +365,12 @@ "javascript": {"line_prefix": "// ", "block_delims": ["/*", "*/"]}, "typescript": {"line_prefix": "// ", "block_delims": ["/*", "*/"]}, "php": {"line_prefix": "// ", "block_delims": ["/*", "*/"]}, - "html": {"block_delims": [""]}, "xml": {"block_delims": [""]}, - "css": {"block_delims": ["/*", "*/"]}, "scss": {"line_prefix": "// ", "block_delims": ["/*", "*/"]}, - "graphql": {"line_prefix": "# "}, "c": {"line_prefix": "// ", "block_delims": ["/*", "*/"]}, + "html": {"block_delims": [""]}, + "xml": {"block_delims": [""]}, + "css": {"block_delims": ["/*", "*/"]}, + "scss": {"line_prefix": "// ", "block_delims": ["/*", "*/"]}, + "graphql": {"line_prefix": "# "}, + "c": {"line_prefix": "// ", "block_delims": ["/*", "*/"]}, "cpp": {"line_prefix": "// ", "block_delims": ["/*", "*/"]}, "csharp": {"line_prefix": "// ", "block_delims": ["/*", "*/"]}, "java": {"line_prefix": "// ", "block_delims": ["/*", "*/"]}, @@ -175,31 +381,132 @@ "scala": {"line_prefix": "// ", "block_delims": ["/*", "*/"]}, "dart": {"line_prefix": "// ", "block_delims": ["/*", "*/"]}, "haskell": {"line_prefix": "-- ", "block_delims": ["{-", "-}"]}, - "elixir": {"line_prefix": "# ", "docstring_delim": '"""'}, "erlang": {"line_prefix": "% "}, - "clojure": {"line_prefix": ";; "}, "fsharp": {"line_prefix": "// ", "block_delims": ["(*", "*)"]}, - "ocaml": {"block_delims": ["(*", "*)"]}, "shell": {"line_prefix": "# "}, + "elixir": {"line_prefix": "# ", "docstring_delim": '"""'}, + "erlang": {"line_prefix": "% "}, + "clojure": {"line_prefix": ";; "}, + "fsharp": {"line_prefix": "// ", "block_delims": ["(*", "*)"]}, + "ocaml": {"block_delims": ["(*", "*)"]}, + "shell": {"line_prefix": "# "}, "powershell": {"line_prefix": "# ", "block_delims": ["<#", "#>"]}, - "dockerfile": {"line_prefix": "# "}, "makefile": {"line_prefix": "# "}, + "dockerfile": {"line_prefix": "# "}, + "makefile": {"line_prefix": "# "}, "terraform": {"line_prefix": "# ", "block_delims": ["/*", "*/"]}, "jenkins": {"line_prefix": "// ", "block_delims": ["/*", "*/"]}, - "puppet": {"line_prefix": "# "}, "saltstack": {"line_prefix": "# "}, - "nix": {"line_prefix": "# ", "block_delims": ["/*", "*/"]}, "vim": {"line_prefix": '" '}, - "assembly": {"line_prefix": "; "}, "sql": {"line_prefix": "-- ", "block_delims": ["/*", "*/"]}, - "yaml": {"line_prefix": "# "}, "toml": {"line_prefix": "# "}, "ini": {"line_prefix": "; "}, - "markdown": {"block_delims": [""]}, "latex": {"line_prefix": "% "}, - "r": {"line_prefix": "# "}, "julia": {"line_prefix": "# ", "block_delims": ["#=", "=#"]}, + "puppet": {"line_prefix": "# "}, + "saltstack": {"line_prefix": "# "}, + "nix": {"line_prefix": "# ", "block_delims": ["/*", "*/"]}, + "vim": {"line_prefix": '" '}, + "assembly": {"line_prefix": "; "}, + "sql": {"line_prefix": "-- ", "block_delims": ["/*", "*/"]}, + "yaml": {"line_prefix": "# "}, + "toml": {"line_prefix": "# "}, + "ini": {"line_prefix": "; "}, + "markdown": {"block_delims": [""]}, + "latex": {"line_prefix": "% "}, + "r": {"line_prefix": "# "}, + "julia": {"line_prefix": "# ", "block_delims": ["#=", "=#"]}, "matlab": {"line_prefix": "% ", "block_delims": ["%{", "%}"]}, "nim": {"line_prefix": "# ", "block_delims": ["#[", "]#"]}, - "crystal": {"line_prefix": "# "}, "zig": {"line_prefix": "// "}, "bat": {"line_prefix": "REM "}, + "crystal": {"line_prefix": "# "}, + "zig": {"line_prefix": "// "}, + "bat": {"line_prefix": "REM "}, }, } +_ECLI_CONFIG_DIR_RELATIVE = Path(".config") / "ecli" +CONFIG_FILENAME = "config.toml" +_TEXTMATE_BACKUP_FILENAME = "config.toml.pre-textmate.bak" +_THEME_NUMBERING_BACKUP_FILENAME = "config.toml.pre-extension-theme-numbering.bak" +_CONFIG_MIGRATION_WARNINGS: list[str] = [] + +_OLD_THEME_ID_TO_COMPATIBILITY_ID = { + 1: 181, + 2: 182, + 3: 381, + 4: 183, + 5: 281, + 6: 282, + 7: 382, + 8: 283, +} + +_PREVIOUS_COMPATIBILITY_ID_TO_CANONICAL_ID = { + 101: 181, + 102: 281, + 103: 182, + 104: 282, + 105: 183, + 106: 283, + 107: 381, + 108: 382, +} + +_TRANSITIONAL_THEME_ID_TO_CANONICAL_ID = { + **{old: 100 + old for old in range(1, 11)}, + **{old: 190 + old for old in range(11, 26)}, + **{old: 275 + old for old in range(26, 30)}, +} + # --- Helper Functions --- + +def _trusted_user_config_dir() -> Path: + """Return the trusted ECLI user config directory.""" + return Path.home() / _ECLI_CONFIG_DIR_RELATIVE + + +def _trusted_user_config_path() -> Path: + """Return the trusted ECLI user config file path.""" + trusted_config_dir = _trusted_user_config_dir() + return trusted_config_dir / CONFIG_FILENAME + + +def _is_relative_to(path: Path, parent: Path) -> bool: + """Return whether ``path`` is contained in ``parent`` after resolution.""" + try: + path.relative_to(parent) + except ValueError: + return False + return True + + +def _trusted_config_path_for_migration(user_config_path: Path) -> Path | None: + """Return the trusted config path only when ``user_config_path`` resolves to it.""" + trusted_config_dir = _trusted_user_config_dir() + trusted_config_path = trusted_config_dir / CONFIG_FILENAME + resolved_trusted_dir = trusted_config_dir.resolve() + resolved_trusted_config = trusted_config_path.resolve() + resolved_user_config = user_config_path.resolve() + if resolved_user_config != resolved_trusted_config or not _is_relative_to( + resolved_trusted_config, resolved_trusted_dir + ): + logger.warning( + "Skipped ECLI config migration outside trusted config path: %s", + user_config_path, + ) + return None + return trusted_config_path + + +def _safe_config_backup_path(backup_name: str) -> Path | None: + """Return a fixed-name backup path inside the trusted ECLI config directory. + + Backups are constructed only from the trusted config directory and a fixed + backup filename. Caller-supplied path objects must not reach this function. + """ + trusted_dir = _trusted_user_config_dir() + resolved_trusted_dir = trusted_dir.resolve() + resolved_config_path = (trusted_dir / CONFIG_FILENAME).resolve() + if not _is_relative_to(resolved_config_path, resolved_trusted_dir): + logger.warning("Skipped ECLI config backup outside trusted config directory.") + return None + return trusted_dir / backup_name + + def get_project_root() -> Path: """Determines the project's root directory for finding template files.""" - if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'): + if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): return Path(sys._MEIPASS) else: return Path(__file__).resolve().parents[3] @@ -208,15 +515,15 @@ def get_project_root() -> Path: def ensure_user_config_exists() -> None: """Checks for user config files in `~/.config/ecli` and creates them if missing.""" try: - config_dir = Path.home() / ".config" / "ecli" - user_config_path = config_dir / "config.toml" + config_dir = _trusted_user_config_dir() + user_config_path = _trusted_user_config_path() user_env_path = config_dir / ".env" config_dir.mkdir(parents=True, exist_ok=True) if not user_config_path.exists(): project_root = get_project_root() - source_config_path = project_root / "config.toml" + source_config_path = project_root / CONFIG_FILENAME if source_config_path.exists(): shutil.copy(source_config_path, user_config_path) logger.info(f"Created user config template at: {user_config_path}") @@ -226,73 +533,159 @@ def ensure_user_config_exists() -> None: logger.info(f"Created user .env template at: {user_env_path}") migrate_legacy_theme_config(user_config_path) + migrate_obsolete_config_tables(user_config_path) except Exception as e: - logger.critical(f"Could not create user configuration files: {e}", exc_info=True) + logger.critical( + f"Could not create user configuration files: {e}", exc_info=True + ) -def migrate_legacy_theme_config(user_config_path: Path) -> bool: - """Upgrade a legacy ``[theme]``-table config to a root ``theme = N`` key. - - Old configs used ``[theme]``/``[theme.ui]``/``[colors]`` tables that the - editor no longer reads, leaving users unable to switch themes by editing the - file. This is a one-time, backed-up, conservative migration: the dead tables - are commented out (never deleted) and a single editable ``theme = N`` line is - inserted, derived from the legacy ``name``/``id``. No-op when the config - already has a root ``theme`` key, has no legacy ``[theme]`` table, or cannot - be read. Returns True when a migration was written. +def migrate_obsolete_config_tables(user_config_path: Path) -> bool: + """Refresh a pre-#102 user config to the current production format. + + Older user configs carried large internal data tables (``[comments.*]``, + ``[[syntax_highlighting.*.patterns]]``, ``[supported_formats]``) and the + transitional ``syntax_engine = "legacy"`` default. Those tables now live in + code (``DEFAULT_CONFIG``) and the default engine is TextMate, so a user who + upgraded would otherwise be pinned to the old legacy highlighter and never + see TextMate rendering. + + This one-time, backed-up migration strips those obsolete tables (text-based, + preserving all other settings and comments) and flips a transitional + ``syntax_engine = "legacy"`` to ``"extension"``. It is a no-op when none of + those markers are present. Returns True when a migration was written. """ + trusted_config_path = _trusted_config_path_for_migration(user_config_path) + if trusted_config_path is None: + return False + try: - text = user_config_path.read_text(encoding="utf-8") + text = trusted_config_path.read_text(encoding="utf-8") except Exception: return False - if re.search(r"(?m)^\s*theme\s*=\s*\d", text): - return False # already has a root theme = N - if not re.search(r"(?m)^\s*\[theme\]", text): - return False # no legacy table to migrate - - theme_id = _derive_legacy_theme_id(text) + obsolete_header = re.compile( + r"^\s*(\[\[syntax_highlighting\.|\[comments\.|\[supported_formats\])" + ) + table_header = re.compile(r"^\s*\[") + has_obsolete = any(obsolete_header.match(line) for line in text.splitlines()) + has_legacy_engine = re.search(r'(?m)^\s*syntax_engine\s*=\s*"legacy"', text) + if not has_obsolete and not has_legacy_engine: + return False - lines = text.splitlines(keepends=True) out: list[str] = [] - commenting = False - for line in lines: - stripped = line.lstrip() - if re.match(r"\[(colors|theme(\.ui)?)\]", stripped): - commenting = True - elif stripped.startswith("["): - commenting = False - if commenting and stripped and not stripped.startswith("#"): - out.append("# " + line) - else: - out.append(line) - migrated = "".join(out) + skipping = False + for line in text.splitlines(keepends=True): + if obsolete_header.match(line): + skipping = True + continue + if skipping and table_header.match(line): + skipping = False # a non-obsolete table begins; keep it + if skipping: + continue + out.append(line) - insertion = ( - "# Active colour theme (1-4 light, 5-8 dark). Edit this number to switch.\n" - f"theme = {theme_id}\n\n" + migrated = "".join(out) + migrated = re.sub( + r'(?m)^(\s*syntax_engine\s*=\s*)"legacy"', r'\1"extension"', migrated ) - first_table = re.search(r"(?m)^\s*\[", migrated) - if first_table: - migrated = migrated[: first_table.start()] + insertion + migrated[first_table.start() :] - else: - migrated = insertion + migrated + if migrated == text: + return False try: - backup = user_config_path.with_name(user_config_path.name + ".bak") + backup = _safe_config_backup_path(_TEXTMATE_BACKUP_FILENAME) + if backup is None: + return False if not backup.exists(): backup.write_text(text, encoding="utf-8") - user_config_path.write_text(migrated, encoding="utf-8") + trusted_config_path.write_text(migrated, encoding="utf-8") logger.warning( - "Migrated legacy [theme] config to 'theme = %d' at %s (backup: %s).", - theme_id, - user_config_path, + "Migrated obsolete config tables and enabled the TextMate engine at " + "%s (backup: %s).", + trusted_config_path, backup, ) return True except Exception: - logger.exception("Legacy theme config migration failed; left config unchanged.") + logger.exception("Obsolete-config migration failed; left config unchanged.") + return False + + +def migrate_legacy_theme_config(user_config_path: Path) -> bool: + """Upgrade pre-extension and transitional theme numbering in user config. + + Migrations are conservative and backed up before writing: + + * old ``[theme]`` table configs are converted to a root ``theme = N`` line; + * old pre-extension root ids ``1``-``8`` map to the preserved legacy + compatibility palettes; + * the previous in-progress professional ids ``1``-``29`` map to the new + 100/200/300 ranges when old extension-theme comments identify that shape; + * previous compatibility ids ``101``-``108`` map to the reserved + 18x/28x/38x compatibility ids when the current numbering marker is absent. + """ + trusted_config_path = _trusted_config_path_for_migration(user_config_path) + if trusted_config_path is None: + return False + + try: + text = trusted_config_path.read_text(encoding="utf-8") + except Exception: + return False + + root_match = _root_theme_match(text) + has_current_marker = _has_current_theme_numbering_marker(text) + migrated = text + reason = "" + theme_id: int | None = None + + if root_match is not None: + existing_id = int(root_match.group("id")) + theme_id = _migrated_root_theme_id(existing_id, text, has_current_marker) + if theme_id is None: + return False + migrated = ( + text[: root_match.start("id")] + + str(theme_id) + + text[root_match.end("id") :] + ) + reason = f"theme id {existing_id} -> {theme_id}" + elif re.search(r"(?m)^\s*\[theme\]", text): + theme_id = _derive_legacy_theme_id(text) + migrated = _comment_legacy_theme_tables(text) + insertion = ( + "# Active colour theme. See the theme-numbering policy in config.toml.\n" + f"theme = {theme_id}\n\n" + ) + first_table = re.search(r"(?m)^\s*\[", migrated) + if first_table: + migrated = ( + migrated[: first_table.start()] + + insertion + + migrated[first_table.start() :] + ) + else: + migrated = insertion + migrated + reason = f"legacy [theme] table -> {theme_id}" + else: + return False + + try: + backup = _safe_config_backup_path(_THEME_NUMBERING_BACKUP_FILENAME) + if backup is None: + return False + if not backup.exists(): + backup.write_text(text, encoding="utf-8") + trusted_config_path.write_text(migrated, encoding="utf-8") + message = ( + f"Migrated ECLI theme numbering in user config ({reason}); backup: {backup}" + ) + _CONFIG_MIGRATION_WARNINGS.append(message) + logger.warning("%s", message) + return True + except Exception: + logger.exception("Theme-numbering migration failed; left config unchanged.") return False @@ -301,39 +694,111 @@ def _derive_legacy_theme_id(text: str) -> int: id_match = re.search(r"(?ms)^\s*\[theme\].*?^\s*id\s*=\s*(\d+)", text) if id_match: candidate = int(id_match.group(1)) - if 1 <= candidate <= 8: + if candidate in _OLD_THEME_ID_TO_COMPATIBILITY_ID: + return _OLD_THEME_ID_TO_COMPATIBILITY_ID[candidate] + if candidate in _PREVIOUS_COMPATIBILITY_ID_TO_CANONICAL_ID: + return _PREVIOUS_COMPATIBILITY_ID_TO_CANONICAL_ID[candidate] + if candidate in _TRANSITIONAL_THEME_ID_TO_CANONICAL_ID: + return _TRANSITIONAL_THEME_ID_TO_CANONICAL_ID[candidate] + if candidate in _previous_or_current_compatibility_ids(): return candidate name_match = re.search( r"(?ms)^\s*\[theme\].*?^\s*name\s*=\s*[\"']([^\"']+)[\"']", text ) if name_match and "light" in name_match.group(1).lower(): - return 1 - return 5 + return 181 + return 281 + + +def _root_theme_match(text: str) -> re.Match[str] | None: + """Return the root-level ``theme = N`` match before the first TOML table.""" + prefix_end = len(text) + table = re.search(r"(?m)^\s*\[", text) + if table is not None: + prefix_end = table.start() + return re.search( + r"(?m)^(?P\s*theme\s*=\s*[\"']?)(?P\d+)(?P[\"']?)\s*$", + text[:prefix_end], + ) +def _has_current_theme_numbering_marker(text: str) -> bool: + return "100-199 = light themes" in text and "800-899" in text + + +def _looks_like_transitional_extension_theme_config(text: str) -> bool: + markers = ( + "professional ids 1-29", + "Available in the imported VS Code theme tree", + "Professional themes are discovered", + "Built-in compatibility themes:", + ) + return any(marker in text for marker in markers) + + +def _migrated_root_theme_id( + theme_id: int, text: str, has_current_marker: bool +) -> int | None: + if has_current_marker: + return None + if 1 <= theme_id <= 8: + if _looks_like_transitional_extension_theme_config(text): + return _TRANSITIONAL_THEME_ID_TO_CANONICAL_ID[theme_id] + return _OLD_THEME_ID_TO_COMPATIBILITY_ID[theme_id] + if 9 <= theme_id <= 29: + return _TRANSITIONAL_THEME_ID_TO_CANONICAL_ID[theme_id] + if theme_id in _PREVIOUS_COMPATIBILITY_ID_TO_CANONICAL_ID: + return _PREVIOUS_COMPATIBILITY_ID_TO_CANONICAL_ID[theme_id] + return None + + +def _comment_legacy_theme_tables(text: str) -> str: + lines = text.splitlines(keepends=True) + out: list[str] = [] + commenting = False + for line in lines: + stripped = line.lstrip() + if re.match(r"\[(colors|theme(\.ui)?)\]", stripped): + commenting = True + elif stripped.startswith("["): + commenting = False + if commenting and stripped and not stripped.startswith("#"): + out.append("# " + line) + else: + out.append(line) + return "".join(out) + + +def _previous_or_current_compatibility_ids() -> set[int]: + return { + *_PREVIOUS_COMPATIBILITY_ID_TO_CANONICAL_ID, + *_OLD_THEME_ID_TO_COMPATIBILITY_ID.values(), + } def load_config() -> dict[str, Any]: """ Loads and merges configurations, ensuring the application can always run. """ + _CONFIG_MIGRATION_WARNINGS.clear() final_config = deep_merge({}, DEFAULT_CONFIG) logger.debug("Loaded embedded default configuration.") ensure_user_config_exists() - user_config_path = Path.home() / ".config" / "ecli" / "config.toml" + user_config_path = _trusted_user_config_dir() / CONFIG_FILENAME loaded_from = "(built-in defaults only)" if user_config_path.is_file(): try: - user_config = toml.load(user_config_path) + user_config = _load_toml_file(user_config_path) final_config = deep_merge(final_config, user_config) loaded_from = str(user_config_path) logger.info("Loaded user config from %s", user_config_path) if isinstance(user_config.get("theme"), dict): logger.warning( - "User config %s uses the legacy [theme] table. Set a root-level " - "'theme = 1..8' (or [theme] id = N), or run with ECLI_THEME=N.", + "User config %s uses the legacy [theme] table. Set a " + "valid root-level 'theme = N' (or [theme] id = N), " + "or run with ECLI_THEME=N.", user_config_path, ) except Exception as e: @@ -347,9 +812,17 @@ def load_config() -> dict[str, Any]: # Record the effective config path so the runtime/UI can report which file # was actually loaded (root-cause aid for "my config changes do nothing"). final_config["_loaded_config_path"] = loaded_from + if _CONFIG_MIGRATION_WARNINGS: + final_config["_migration_warnings"] = tuple(_CONFIG_MIGRATION_WARNINGS) return final_config +def _load_toml_file(path: Path) -> dict[str, Any]: + if toml is not None: + return toml.load(str(path)) + return tomllib.loads(path.read_text(encoding="utf-8")) + + def get_file_icon(filename: Optional[str], config: dict[str, Any]) -> str: """ Returns an icon string for a given filename based on the configuration. @@ -411,7 +884,9 @@ def safe_run(cmd: list[str], **kwargs: Any) -> subprocess.CompletedProcess: """ timeout = kwargs.pop("timeout", None) if timeout is not None and not isinstance(timeout, int | float): - logger.warning("Invalid timeout type %s; running without timeout.", type(timeout).__name__) + logger.warning( + "Invalid timeout type %s; running without timeout.", type(timeout).__name__ + ) timeout = None elif timeout is not None and timeout <= 0: logger.warning("Invalid timeout value %s; using default 30s.", timeout) @@ -419,17 +894,27 @@ def safe_run(cmd: list[str], **kwargs: Any) -> subprocess.CompletedProcess: try: return subprocess.run( - cmd, capture_output=True, text=True, check=False, - encoding="utf-8", errors="replace", timeout=timeout, **kwargs, + cmd, + capture_output=True, + text=True, + check=False, + encoding="utf-8", + errors="replace", + timeout=timeout, + **kwargs, ) except FileNotFoundError as e: logger.error(f"Command not found: {cmd[0]!r}", exc_info=True) return subprocess.CompletedProcess(cmd, 127, stdout="", stderr=str(e)) except subprocess.TimeoutExpired as e: logger.warning(f"Command timed out after {timeout}s: {' '.join(cmd)}") - return subprocess.CompletedProcess(cmd, -15, stdout=e.stdout or "", stderr=e.stderr or "") + return subprocess.CompletedProcess( + cmd, -15, stdout=e.stdout or "", stderr=e.stderr or "" + ) except Exception as e: - logger.exception(f"An unexpected error occurred while running command: {' '.join(cmd)}") + logger.exception( + f"An unexpected error occurred while running command: {' '.join(cmd)}" + ) return subprocess.CompletedProcess(cmd, -1, stdout="", stderr=str(e)) diff --git a/tests/core/test_config_loading.py b/tests/core/test_config_loading.py index 3d351788..6e1bbb69 100644 --- a/tests/core/test_config_loading.py +++ b/tests/core/test_config_loading.py @@ -15,12 +15,22 @@ from __future__ import annotations +import tomllib from pathlib import Path import pytest from ecli.utils.themes import THEME_ENV_VAR, resolve_theme -from ecli.utils.utils import load_config +from ecli.utils.utils import ( + CONFIG_FILENAME, + DEFAULT_CONFIG, + load_config, + migrate_legacy_theme_config, + migrate_obsolete_config_tables, +) + + +REPO_CONFIG = Path(__file__).resolve().parents[2] / "config.toml" @pytest.fixture @@ -34,23 +44,25 @@ def isolated_home(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: def test_user_config_theme_overrides_default(isolated_home: Path) -> None: - (isolated_home / "config.toml").write_text("theme = 4\n", encoding="utf-8") + (isolated_home / "config.toml").write_text("theme = 207\n", encoding="utf-8") config = load_config() - assert config["theme"] == 4 # user value beats DEFAULT_CONFIG (5) - assert resolve_theme(config).theme_id == 4 + assert config["theme"] == 207 # user value beats DEFAULT_CONFIG + assert resolve_theme(config).theme_id == 207 assert config["_loaded_config_path"] == str(isolated_home / "config.toml") -def test_effective_config_theme_1_resolves_theme_1(isolated_home: Path) -> None: - (isolated_home / "config.toml").write_text("theme = 1\n", encoding="utf-8") - assert resolve_theme(load_config()).theme_id == 1 +def test_effective_config_theme_207_resolves_dark_plus(isolated_home: Path) -> None: + (isolated_home / "config.toml").write_text("theme = 207\n", encoding="utf-8") + assert resolve_theme(load_config()).theme_id == 207 -def test_effective_config_theme_8_resolves_theme_8(isolated_home: Path) -> None: - (isolated_home / "config.toml").write_text("theme = 8\n", encoding="utf-8") - assert resolve_theme(load_config()).theme_id == 8 +def test_effective_config_theme_181_resolves_compatibility_theme( + isolated_home: Path, +) -> None: + (isolated_home / "config.toml").write_text("theme = 181\n", encoding="utf-8") + assert resolve_theme(load_config()).theme_id == 181 def test_legacy_theme_table_config_is_migrated_to_root_theme( @@ -62,54 +74,211 @@ def test_legacy_theme_table_config_is_migrated_to_root_theme( encoding="utf-8", ) config = load_config() - # load_config migrates the file: 'theme' is now a root int, resolving to dark. - assert config["theme"] == 5 - assert resolve_theme(config).theme_id == 5 + # load_config migrates the file: 'theme' is now a root compatibility id. + assert config["theme"] == 281 + assert resolve_theme(config).theme_id == 281 # The on-disk file gained an editable root 'theme = N' and kept other sections. text = cfg.read_text(encoding="utf-8") - assert "theme = 5" in text + assert "theme = 281" in text assert "[editor]" in text - assert cfg.with_name("config.toml.bak").exists() + assert cfg.with_name("config.toml.pre-extension-theme-numbering.bak").exists() + assert config["_migration_warnings"] # The user can now switch by editing that single line. - cfg.write_text(text.replace("theme = 5", "theme = 2"), encoding="utf-8") - assert resolve_theme(load_config()).theme_id == 2 + cfg.write_text(text.replace("theme = 281", "theme = 181"), encoding="utf-8") + assert resolve_theme(load_config()).theme_id == 181 + + +def test_config_filename_constant_names_user_config() -> None: + assert CONFIG_FILENAME == "config.toml" def test_light_legacy_theme_migrates_to_theme_one(isolated_home: Path) -> None: (isolated_home / "config.toml").write_text( '[theme]\nname = "light"\n', encoding="utf-8" ) - assert resolve_theme(load_config()).theme_id == 1 + assert resolve_theme(load_config()).theme_id == 181 def test_migration_is_noop_for_root_theme_config(isolated_home: Path) -> None: cfg = isolated_home / "config.toml" - cfg.write_text("theme = 7\n[editor]\ntab_size = 4\n", encoding="utf-8") + cfg.write_text("theme = 208\n[editor]\ntab_size = 4\n", encoding="utf-8") load_config() # No legacy table -> no migration, no backup. - assert not cfg.with_name("config.toml.bak").exists() - assert "theme = 7" in cfg.read_text(encoding="utf-8") + assert not cfg.with_name("config.toml.pre-extension-theme-numbering.bak").exists() + assert "theme = 208" in cfg.read_text(encoding="utf-8") + + +def test_theme_migration_refuses_backup_outside_trusted_config_dir( + isolated_home: Path, tmp_path: Path +) -> None: + outside = tmp_path / "attacker-controlled-name.toml" + original = '[theme]\nname = "dark"\n' + outside.write_text(original, encoding="utf-8") + + assert migrate_legacy_theme_config(outside) is False + assert outside.read_text(encoding="utf-8") == original + assert not ( + tmp_path / "attacker-controlled-name.toml.pre-extension-theme-numbering.bak" + ).exists() + assert not ( + isolated_home / "config.toml.pre-extension-theme-numbering.bak" + ).exists() + + +def test_theme_migration_writes_fixed_backup_inside_trusted_config_dir( + isolated_home: Path, +) -> None: + cfg = isolated_home / CONFIG_FILENAME + original = '[theme]\nname = "dark"\n' + cfg.write_text(original, encoding="utf-8") + + assert migrate_legacy_theme_config(cfg) is True + + backup = isolated_home / "config.toml.pre-extension-theme-numbering.bak" + assert backup.exists() + assert backup.read_text(encoding="utf-8") == original + + +def test_obsolete_config_migration_refuses_external_config_path( + isolated_home: Path, tmp_path: Path +) -> None: + outside = tmp_path / "attacker-controlled-name.toml" + original = ( + '[extensions]\nsyntax_engine = "legacy"\n' + '[comments.python]\nline_prefix = "# "\n' + ) + outside.write_text(original, encoding="utf-8") + + assert migrate_obsolete_config_tables(outside) is False + assert outside.read_text(encoding="utf-8") == original + assert not (tmp_path / "attacker-controlled-name.toml.pre-textmate.bak").exists() + assert not (isolated_home / "config.toml.pre-textmate.bak").exists() def test_env_overrides_user_config_via_load(isolated_home: Path, monkeypatch) -> None: - (isolated_home / "config.toml").write_text("theme = 2\n", encoding="utf-8") - monkeypatch.setenv(THEME_ENV_VAR, "6") - assert resolve_theme(load_config()).theme_id == 6 + (isolated_home / "config.toml").write_text("theme = 181\n", encoding="utf-8") + monkeypatch.setenv(THEME_ENV_VAR, "208") + assert resolve_theme(load_config()).theme_id == 208 def test_loaded_config_path_recorded_when_present(isolated_home: Path) -> None: - (isolated_home / "config.toml").write_text("theme = 3\n", encoding="utf-8") + (isolated_home / "config.toml").write_text("theme = 207\n", encoding="utf-8") config = load_config() assert "_loaded_config_path" in config assert config["_loaded_config_path"].endswith("config.toml") +def test_obsolete_tables_are_migrated_out_of_user_config(isolated_home: Path) -> None: + # A stale pre-#102 user config (old data tables + transitional legacy engine) + # must be migrated on load so the upgraded user gets the TextMate engine. + cfg = isolated_home / "config.toml" + cfg.write_text( + "theme = 17\n" + '[extensions]\nsyntax_engine = "legacy"\n' + '[comments.python]\nline_prefix = "# "\n' + '[[syntax_highlighting.python.patterns]]\npattern = "x"\ncolor = "keyword"\n' + "[settings]\nauto_save_interval = 9\n", + encoding="utf-8", + ) + config = load_config() + text = cfg.read_text(encoding="utf-8") + + assert "[comments.python]" not in text + assert "[[syntax_highlighting" not in text + assert 'syntax_engine = "extension"' in text # transitional default flipped + assert "auto_save_interval = 9" in text # unrelated settings preserved + assert cfg.with_name("config.toml.pre-textmate.bak").exists() + assert config["extensions"]["syntax_engine"] == "extension" + assert config["settings"]["auto_save_interval"] == 9 + + +def test_pre_extension_root_theme_alias_migrates_to_legacy_compatibility( + isolated_home: Path, +) -> None: + cfg = isolated_home / "config.toml" + cfg.write_text("theme = 5\n[editor]\ntab_size = 4\n", encoding="utf-8") + config = load_config() + assert config["theme"] == 281 + assert resolve_theme(config).theme_id == 281 + assert "theme = 281" in cfg.read_text(encoding="utf-8") + assert cfg.with_name("config.toml.pre-extension-theme-numbering.bak").exists() + assert config["_migration_warnings"] + + +def test_transitional_theme_23_migrates_to_kimbie_dark( + isolated_home: Path, +) -> None: + cfg = isolated_home / "config.toml" + cfg.write_text( + "# Professional themes are discovered from extensions.\n" + "theme = 23\n[editor]\ntab_size = 4\n", + encoding="utf-8", + ) + config = load_config() + assert config["theme"] == 213 + assert resolve_theme(config).name == "Kimbie Dark" + assert "theme = 213" in cfg.read_text(encoding="utf-8") + assert cfg.with_name("config.toml.pre-extension-theme-numbering.bak").exists() + + +def test_previous_compatibility_theme_102_migrates_to_pysh_dark( + isolated_home: Path, +) -> None: + cfg = isolated_home / "config.toml" + cfg.write_text("theme = 102\n[editor]\ntab_size = 4\n", encoding="utf-8") + config = load_config() + assert config["theme"] == 281 + assert resolve_theme(config).name == "PySH Dark" + + def test_extensions_layer_switches_default_through_loader(isolated_home: Path) -> None: - # The data-only Extensions Layer switches (#101) are exposed by the existing - # config loader via DEFAULT_CONFIG, with syntax rendering kept on "legacy". + # The Extensions Layer switches are exposed by the existing config loader via + # DEFAULT_CONFIG. As of #102 the default syntax engine is "extension" + # (TextMate tokenization, with automatic legacy fallback). extensions = load_config()["extensions"] assert extensions["enabled"] is True assert extensions["metadata_registry"] is True assert extensions["grammar_catalog"] is True assert extensions["language_detection"] is True - assert extensions["syntax_engine"] == "legacy" + assert extensions["syntax_engine"] == "extension" + + +def test_repository_config_is_small_user_facing_template() -> None: + text = REPO_CONFIG.read_text(encoding="utf-8") + parsed = tomllib.loads(text) + assert "keybindings" not in parsed + assert "comments" not in parsed + assert "syntax_highlighting" not in parsed + assert "supported_formats" not in parsed + assert "theme" in parsed + for section in ( + "logging", + "ai", + "fonts", + "editor", + "extensions", + "settings", + "linter", + "file_icons", + ): + assert section in parsed + assert "models" in parsed["ai"] + assert parsed["theme"] == 207 + + +def test_default_config_matches_user_facing_template_keys() -> None: + parsed = tomllib.loads(REPO_CONFIG.read_text(encoding="utf-8")) + assert DEFAULT_CONFIG["theme"] == parsed["theme"] + for key in ("logging", "fonts", "extensions", "settings", "linter", "file_icons"): + assert DEFAULT_CONFIG[key] == parsed[key] + for key, value in parsed["editor"].items(): + assert DEFAULT_CONFIG["editor"][key] == value + + +def test_theme_numbering_comment_is_public_contract() -> None: + text = REPO_CONFIG.read_text(encoding="utf-8") + assert "100-199 = light themes" in text + assert "200-299 = dark themes" in text + assert "300-399 = high-contrast themes" in text + assert "1-8 = deprecated aliases" in text + assert "800-899 = reserved" in text diff --git a/tests/core/test_theme_system.py b/tests/core/test_theme_system.py index 53b8c925..88407f47 100644 --- a/tests/core/test_theme_system.py +++ b/tests/core/test_theme_system.py @@ -11,7 +11,7 @@ # Licensed under the GNU General Public License version 2 only. # See the LICENSE file in the project root for full license text. -"""Tests for the fixed built-in theme system and syntax-highlighting toggle.""" +"""Tests for extension-backed themes and built-in compatibility palettes.""" from __future__ import annotations @@ -26,6 +26,9 @@ ) +COMPATIBILITY_THEME_IDS = [int(member) for member in ThemeId] + + @pytest.fixture(autouse=True) def _clear_theme_env(monkeypatch: pytest.MonkeyPatch) -> None: """Ensure ECLI_THEME never leaks into config-based theme tests.""" @@ -61,24 +64,25 @@ def _clear_theme_env(monkeypatch: pytest.MonkeyPatch) -> None: } -def test_exactly_eight_themes_exist() -> None: - ids = [int(member) for member in ThemeId] - assert ids == [1, 2, 3, 4, 5, 6, 7, 8] +def test_exactly_eight_compatibility_themes_exist() -> None: + assert COMPATIBILITY_THEME_IDS == [181, 182, 183, 281, 282, 283, 381, 382] def test_four_light_and_four_dark_themes() -> None: - light = [tid for tid in range(1, 9) if not get_theme(tid).is_dark] - dark = [tid for tid in range(1, 9) if get_theme(tid).is_dark] - assert light == [1, 2, 3, 4] - assert dark == [5, 6, 7, 8] + light = [tid for tid in COMPATIBILITY_THEME_IDS if not get_theme(tid).is_dark] + dark = [tid for tid in COMPATIBILITY_THEME_IDS if get_theme(tid).is_dark] + high_contrast = [tid for tid in COMPATIBILITY_THEME_IDS if 300 <= tid <= 399] + assert light == [181, 182, 183, 381] + assert dark == [281, 282, 283, 382] + assert high_contrast == [381, 382] def test_get_theme_returns_matching_id() -> None: - for tid in range(1, 9): + for tid in [207, 208, 213, *COMPATIBILITY_THEME_IDS]: assert get_theme(tid).theme_id == tid -@pytest.mark.parametrize("tid", list(range(1, 9))) +@pytest.mark.parametrize("tid", [207, 208, 213, *COMPATIBILITY_THEME_IDS]) def test_every_palette_defines_all_renderer_colors(tid: int) -> None: color_map = get_theme(tid).syntax_color_hex() assert RENDERER_COLOR_KEYS <= set(color_map) @@ -90,7 +94,7 @@ def test_every_palette_defines_all_renderer_colors(tid: int) -> None: ) -@pytest.mark.parametrize("tid", list(range(1, 9))) +@pytest.mark.parametrize("tid", [207, 208, 213, *COMPATIBILITY_THEME_IDS]) def test_palette_defines_required_surfaces(tid: int) -> None: palette = get_theme(tid) for field in ( @@ -122,7 +126,7 @@ def test_palette_defines_required_surfaces(tid: int) -> None: ) -@pytest.mark.parametrize("tid", list(range(1, 9))) +@pytest.mark.parametrize("tid", [207, 208, 213, *COMPATIBILITY_THEME_IDS]) def test_every_theme_defines_all_chrome_roles(tid: int) -> None: palette = get_theme(tid) for role in CHROME_ROLES: @@ -134,7 +138,7 @@ def test_every_theme_defines_all_chrome_roles(tid: int) -> None: ) -@pytest.mark.parametrize("tid", list(range(1, 9))) +@pytest.mark.parametrize("tid", [207, 208, 213, *COMPATIBILITY_THEME_IDS]) def test_chrome_color_pairs_are_complete_fg_bg_hex(tid: int) -> None: pairs = get_theme(tid).chrome_color_pairs() expected = { @@ -159,8 +163,11 @@ def test_chrome_color_pairs_are_complete_fg_bg_hex(tid: int) -> None: def test_resolve_theme_reads_config_value() -> None: - assert resolve_theme({"theme": 1}).name == "Light Classic" - assert resolve_theme({"theme": 8}).name == "Dark Neon" + assert resolve_theme({"theme": 207}).name == "Dark+" + assert resolve_theme({"theme": 208}).name == "Monokai" + assert resolve_theme({"theme": 213}).name == "Kimbie Dark" + assert resolve_theme({"theme": 181}).name == "PySH Light" + assert resolve_theme({"theme": 382}).name == "ECLI High Contrast Dark" def test_resolve_theme_missing_uses_default() -> None: @@ -172,10 +179,14 @@ def test_resolve_theme_out_of_range_falls_back() -> None: assert resolve_theme({"theme": 0}).theme_id == DEFAULT_THEME_ID assert resolve_theme({"theme": 99}).theme_id == DEFAULT_THEME_ID assert resolve_theme({"theme": -3}).theme_id == DEFAULT_THEME_ID + assert resolve_theme({"theme": 1}).theme_id == DEFAULT_THEME_ID + assert resolve_theme({"theme": 800}).theme_id == DEFAULT_THEME_ID def test_resolve_theme_integer_like_string_is_accepted() -> None: - assert resolve_theme({"theme": "3"}).theme_id == 3 + assert resolve_theme({"theme": "207"}).theme_id == 207 + assert resolve_theme({"theme": "213"}).theme_id == 213 + assert resolve_theme({"theme": "181"}).theme_id == 181 def test_resolve_theme_non_integer_string_falls_back() -> None: @@ -190,34 +201,32 @@ def test_resolve_theme_boolean_is_rejected() -> None: def test_env_var_overrides_config_theme(monkeypatch: pytest.MonkeyPatch) -> None: - monkeypatch.setenv(THEME_ENV_VAR, "7") - assert resolve_theme({"theme": 2}).theme_id == 7 + monkeypatch.setenv(THEME_ENV_VAR, "208") + assert resolve_theme({"theme": 181}).theme_id == 208 def test_invalid_env_var_falls_back_to_config(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setenv(THEME_ENV_VAR, "not-a-theme") - assert resolve_theme({"theme": 2}).theme_id == 2 + assert resolve_theme({"theme": 181}).theme_id == 181 def test_blank_env_var_is_ignored(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setenv(THEME_ENV_VAR, " ") - assert resolve_theme({"theme": 4}).theme_id == 4 + assert resolve_theme({"theme": 207}).theme_id == 207 def test_legacy_theme_table_name_maps_to_default_dark_or_light() -> None: # Stale configs ship a [theme] table; map its name without forcing theme 5. - assert resolve_theme({"theme": {"name": "dark"}}).theme_id == int( - ThemeId.DARK_CLASSIC - ) + assert resolve_theme({"theme": {"name": "dark"}}).theme_id == int(ThemeId.PYSH_DARK) assert resolve_theme({"theme": {"name": "light"}}).theme_id == int( - ThemeId.LIGHT_CLASSIC + ThemeId.PYSH_LIGHT ) def test_legacy_theme_table_id_is_honoured() -> None: - assert resolve_theme({"theme": {"id": 3}}).theme_id == 3 + assert resolve_theme({"theme": {"id": 3}}).theme_id == 381 # id wins over name. - assert resolve_theme({"theme": {"id": 8, "name": "light"}}).theme_id == 8 + assert resolve_theme({"theme": {"id": 8, "name": "light"}}).theme_id == 283 def test_legacy_theme_table_without_id_or_name_uses_default() -> None: @@ -229,5 +238,26 @@ def test_legacy_theme_table_without_id_or_name_uses_default() -> None: def test_legacy_colors_table_does_not_override_theme() -> None: # A [colors] table must not change the resolved built-in theme. - cfg = {"theme": 6, "colors": {"keyword": "red", "background": "#000000"}} - assert resolve_theme(cfg).theme_id == 6 + cfg = {"theme": 208, "colors": {"keyword": "red", "background": "#000000"}} + assert resolve_theme(cfg).theme_id == 208 + + +def test_invalid_theme_preserves_current_theme() -> None: + current = get_theme(213) + resolved = resolve_theme({"theme": 999}, current_theme=current) + assert resolved.theme_id == 213 + assert "keeping current theme" in resolved.diagnostics[-1] + + +def test_missing_professional_theme_preserves_current_theme() -> None: + current = get_theme(207) + resolved = resolve_theme({"theme": 101}, current_theme=current) + assert resolved.theme_id == 207 + assert "keeping current theme" in resolved.diagnostics[-1] + + +def test_get_theme_rejects_missing_numbers() -> None: + with pytest.raises(KeyError): + get_theme(101) + with pytest.raises(KeyError): + get_theme(800) diff --git a/tests/docs/test_textmate_dependency_contract.py b/tests/docs/test_textmate_dependency_contract.py new file mode 100644 index 00000000..e1fc08ed --- /dev/null +++ b/tests/docs/test_textmate_dependency_contract.py @@ -0,0 +1,72 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Project: Ecli +# File: tests/docs/test_textmate_dependency_contract.py +# Website: https://www.ecli.io +# Repository: https://github.com/SSobol77/ecli +# PyPI: https://pypi.org/project/ecli-editor/0.0.1/ +# +# Copyright (c) 2026 Siergej Sobolewski +# +# Licensed under the GNU General Public License version 2 only. +# See the LICENSE file in the project root for full license text. + +"""Dependency + documentation contract for the TextMate engine (#102). + +Real TextMate tokenization depends on ``python-textmate`` (which pulls +``onigurumacffi`` / the native **Oniguruma** library). These gates ensure the +dependency is actually declared and that install/build/packaging docs tell users +about the native dependency and the safe runtime fallback — so the project never +silently ships "TextMate support" that cannot build or run. +""" + +from __future__ import annotations + +import tomllib +from pathlib import Path + +import pytest + + +REPO_ROOT = Path(__file__).resolve().parents[2] + + +def _read(relative: str) -> str: + path = REPO_ROOT / relative + if not path.is_file(): + pytest.fail(f"required doc missing: {relative}") + return path.read_text(encoding="utf-8") + + +def test_pyproject_declares_python_textmate() -> None: + data = tomllib.loads((REPO_ROOT / "pyproject.toml").read_text(encoding="utf-8")) + dependencies = data.get("project", {}).get("dependencies", []) + assert any(dep.lower().startswith("python-textmate") for dep in dependencies), ( + dependencies + ) + + +def test_install_docs_describe_oniguruma_and_fallback() -> None: + text = _read("docs/INSTALL.md").lower() + assert "onigurumacffi" in text + assert "oniguruma" in text + # Source-build native dependency is documented... + assert "source" in text and ("libonig" in text or "oniguruma development" in text) + # ...and so is the safe runtime fallback when it is unavailable. + assert "fallback" in text or "fall back" in text + + +def test_build_from_source_docs_mention_native_dependency() -> None: + text = _read("docs/contributor/build-from-source.md").lower() + assert "onigurumacffi" in text + assert "oniguruma" in text + assert ( + "libonig" in text or "development header" in text or "devel/oniguruma" in text + ) + + +def test_packaging_release_docs_mention_oniguruma_source_build() -> None: + text = _read("docs/release/packaging-flows.md").lower() + assert "oniguruma" in text + assert "source build" in text or "source builds" in text + assert "python-textmate" in text or "onigurumacffi" in text diff --git a/tests/extensions/test_editor_syntax_adapter.py b/tests/extensions/test_editor_syntax_adapter.py new file mode 100644 index 00000000..7f54fef5 --- /dev/null +++ b/tests/extensions/test_editor_syntax_adapter.py @@ -0,0 +1,93 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Project: Ecli +# File: tests/extensions/test_editor_syntax_adapter.py +# Website: https://www.ecli.io +# Repository: https://github.com/SSobol77/ecli +# PyPI: https://pypi.org/project/ecli-editor/0.0.1/ +# +# Copyright (c) 2026 Siergej Sobolewski +# +# Licensed under the GNU General Public License version 2 only. +# See the LICENSE file in the project root for full license text. + +"""Tests for the narrow, read-only editor syntax adapter (#102). + +The editor exposes extension-backed syntax metadata via +``Ecli._resolve_extension_syntax_metadata`` without changing the legacy +highlighter. These tests build a bare editor (the established +``Ecli.__new__`` pattern) and verify the adapter records metadata, fails safe, +and never disturbs the authoritative legacy highlighting path. +""" + +from __future__ import annotations + +from typing import Any + +from pygments.lexers import PythonLexer + +from ecli.core.Ecli import Ecli + + +def _make_editor(config: dict[str, Any], filename: str | None) -> Ecli: + editor = Ecli.__new__(Ecli) + editor.config = config + editor.filename = filename + editor.extension_syntax = None + return editor + + +def test_adapter_records_metadata_for_known_file() -> None: + editor = _make_editor({}, "example.py") + editor._resolve_extension_syntax_metadata() + + resolution = editor.extension_syntax + assert resolution is not None + assert resolution.language_id == "python" + assert resolution.scope_name == "source.python" + assert resolution.used_extension_metadata is True + # Legacy rendering stays authoritative. + assert resolution.fallback_to_legacy is True + + +def test_adapter_falls_back_for_unknown_file() -> None: + editor = _make_editor({}, "mystery.zzz") + editor._resolve_extension_syntax_metadata() + + resolution = editor.extension_syntax + assert resolution is not None + assert resolution.language_id is None + assert resolution.fallback_to_legacy is True + + +def test_adapter_disabled_extensions_layer_clears_metadata() -> None: + editor = _make_editor({"extensions": {"enabled": False}}, "example.py") + editor._resolve_extension_syntax_metadata() + assert editor.extension_syntax is None + + +def test_adapter_never_raises_on_bad_config() -> None: + editor = _make_editor({"extensions": "not-a-table"}, "example.py") + # Must not raise; degrades to a safe resolution or None. + editor._resolve_extension_syntax_metadata() + + +def test_adapter_does_not_disturb_legacy_highlighting() -> None: + editor = _make_editor({}, "example.py") + editor.text = ["def main():", " return 42"] + editor.colors = {"default": 0, "keyword": 7, "number": 3, "comment": 1} + editor.is_256_color_terminal = True + editor._lexer = PythonLexer() + editor.current_language = "python" + editor.custom_syntax_patterns = [] + + editor._resolve_extension_syntax_metadata() + code = ["def main():", " x = 42 # answer"] + result = editor.apply_syntax_highlighting_with_pygments(code, [0, 1]) + + # Legacy highlighting is unaffected: content round-trips and tokens are split. + assert ["".join(tok for tok, _ in line) for line in result] == code + assert len(result[0]) > 1 + # Metadata is exposed alongside the unchanged legacy path. + assert editor.extension_syntax is not None + assert editor.extension_syntax.fallback_to_legacy is True diff --git a/tests/extensions/test_editor_syntax_rendering.py b/tests/extensions/test_editor_syntax_rendering.py new file mode 100644 index 00000000..0f8c8ea0 --- /dev/null +++ b/tests/extensions/test_editor_syntax_rendering.py @@ -0,0 +1,252 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Project: Ecli +# File: tests/extensions/test_editor_syntax_rendering.py +# Website: https://www.ecli.io +# Repository: https://github.com/SSobol77/ecli +# PyPI: https://pypi.org/project/ecli-editor/0.0.1/ +# +# Copyright (c) 2026 Siergej Sobolewski +# +# Licensed under the GNU General Public License version 2 only. +# See the LICENSE file in the project root for full license text. + +"""Editor-facing rendering proofs for #102. + +These tests exercise the *real* legacy highlighting data path +(``detect_language`` -> ``apply_syntax_highlighting_with_pygments`` -> +``_get_tokenized_line``) on a bare editor, and prove: + +* the legacy Pygments highlighter still produces **visible** (multi-colour) + spans and differentiates file types — i.e. it was not broken by #102; +* the #102 extension metadata is exposed for the active file but never changes, + disables, or bypasses the legacy tokens; +* selecting ``syntax_engine = "extension"`` still renders **identically** through + the legacy path (no tokenizer yet), with ``fallback_to_legacy = True``; +* the ``[editor].syntax_highlighting`` toggle still works. + +The colours are plain ints standing in for the distinct curses colour pairs that +``init_colors`` builds from the active theme, so the data path can be verified +without a live terminal. +""" + +from __future__ import annotations + +import copy + +from ecli.core.Ecli import Ecli +from ecli.utils.utils import DEFAULT_CONFIG + + +# Distinct stand-in colour attributes for the semantic roles the highlighter +# maps Pygments tokens onto (see Ecli._get_tokenized_line). +COLORS: dict[str, int] = { + "default": 0, + "keyword": 1, + "string": 2, + "comment": 3, + "number": 4, + "function": 5, + "class": 6, + "type": 7, + "decorator": 8, + "operator": 9, + "builtin": 10, + "tag": 11, + "attribute": 12, + "error": 13, +} + +PYTHON_CODE = ["import os", "def main():", " x = 42 # answer", " return 'hi'"] +MARKDOWN_DOC = ["# Title", "Some **bold** text and `code`.", "> a quote"] +YAML_DOC = ["version: '3'", "services:", " app: # comment"] + + +def _make_editor(filename: str, text: list[str], engine: str = "legacy") -> Ecli: + editor = Ecli.__new__(Ecli) + config = copy.deepcopy(DEFAULT_CONFIG) + config.setdefault("extensions", {})["syntax_engine"] = engine + editor.config = config + editor.filename = filename + editor.text = text + editor.colors = COLORS + editor.is_256_color_terminal = True + editor._lexer = None + editor.current_language = None + editor.custom_syntax_patterns = [] + editor.extension_syntax = None + editor._extension_highlighter = None + return editor + + +def _distinct_colors(rendered: list[list[tuple[str, int]]]) -> set[int]: + return {attr for line in rendered for _text, attr in line} + + +# --------------------------------------------------------------------------- # +# Legacy highlighting is visibly active and differentiates file types. +# --------------------------------------------------------------------------- # + + +def test_legacy_highlighting_produces_visible_spans() -> None: + editor = _make_editor("example.py", PYTHON_CODE) + editor.detect_language() + rendered = editor.apply_syntax_highlighting_with_pygments( + PYTHON_CODE, list(range(len(PYTHON_CODE))) + ) + + assert editor.current_language == "python" + # Content round-trips and more than one colour is emitted (visible highlight). + assert ["".join(t for t, _ in line) for line in rendered] == PYTHON_CODE + assert len(_distinct_colors(rendered)) > 1 + + +def test_legacy_highlighting_differentiates_file_types() -> None: + py = _make_editor("example.py", PYTHON_CODE) + py.detect_language() + md = _make_editor("README.md", MARKDOWN_DOC) + md.detect_language() + + assert py.current_language == "python" + assert md.current_language == "markdown" + py_rendered = py.apply_syntax_highlighting_with_pygments( + PYTHON_CODE, list(range(len(PYTHON_CODE))) + ) + md_rendered = md.apply_syntax_highlighting_with_pygments( + MARKDOWN_DOC, list(range(len(MARKDOWN_DOC))) + ) + assert len(_distinct_colors(py_rendered)) > 1 + assert len(_distinct_colors(md_rendered)) > 1 + + +def test_yaml_highlighting_is_visible_for_required_filenames() -> None: + for filename in (".coderabbit.yaml", "docker-compose.yml", "config.yaml"): + editor = _make_editor(filename, YAML_DOC, engine="extension") + editor.detect_language() + rendered = editor.apply_syntax_highlighting_with_pygments( + YAML_DOC, list(range(len(YAML_DOC))) + ) + assert editor.extension_syntax is not None + assert editor.extension_syntax.language_id in {"yaml", "dockercompose"} + assert ["".join(text for text, _attr in line) for line in rendered] == YAML_DOC + assert len(_distinct_colors(rendered)) > 1 + + +def test_gitignore_status_language_is_never_sql() -> None: + editor = _make_editor(".gitignore", ["*.pyc", "build/"], engine="extension") + editor.detect_language() + assert editor.extension_syntax is not None + assert editor.extension_syntax.language_id == "ignore" + assert editor.current_language == "ignore" + assert "sql" not in editor.current_language.lower() + + +def test_log_files_are_plain_log_not_sql() -> None: + for filename in ( + "freebsd-0.2.2-fail.log", + "editor.log", + "qemu.raw.log", + ): + editor = _make_editor(filename, ["ERROR SELECT -> 123"], engine="extension") + editor.detect_language() + assert editor.extension_syntax is not None + assert editor.extension_syntax.language_id == "log" + assert editor.current_language == "log" + assert "sql" not in editor.current_language.lower() + + +def test_syntax_toggle_still_disables_highlighting() -> None: + editor = _make_editor("example.py", PYTHON_CODE) + editor.config["editor"]["syntax_highlighting"] = False + editor.detect_language() + rendered = editor.apply_syntax_highlighting_with_pygments( + PYTHON_CODE, list(range(len(PYTHON_CODE))) + ) + # Disabled => exactly one default segment per line. + assert all(len(line) == 1 for line in rendered) + assert _distinct_colors(rendered) == {COLORS["default"]} + + +# --------------------------------------------------------------------------- # +# #102 metadata is exposed but never affects legacy rendering. +# --------------------------------------------------------------------------- # + + +def test_editor_receives_extension_metadata_for_active_file() -> None: + editor = _make_editor("example.py", PYTHON_CODE) + editor.detect_language() + + resolution = editor.extension_syntax + assert resolution is not None + assert resolution.language_id == "python" + assert resolution.scope_name == "source.python" + assert resolution.grammar_path.startswith("src/ecli/extensions/") + + +def test_extension_metadata_does_not_change_legacy_tokens() -> None: + editor = _make_editor("example.py", PYTHON_CODE) + editor.detect_language() + indices = list(range(len(PYTHON_CODE))) + baseline = editor.apply_syntax_highlighting_with_pygments(PYTHON_CODE, indices) + + # The rendering path must not read extension metadata at all: mutating it + # (including to nonsense) cannot change the produced tokens. + editor.extension_syntax = None + after_none = editor.apply_syntax_highlighting_with_pygments(PYTHON_CODE, indices) + editor.extension_syntax = "garbage" # type: ignore[assignment] + after_garbage = editor.apply_syntax_highlighting_with_pygments(PYTHON_CODE, indices) + + assert baseline == after_none == after_garbage + assert len(_distinct_colors(baseline)) > 1 + + +def test_extension_engine_renders_textmate_spans() -> None: + indices = list(range(len(PYTHON_CODE))) + extension = _make_editor("example.py", PYTHON_CODE, engine="extension") + extension.detect_language() + + # The TextMate line highlighter is engaged and metadata reports no fallback. + assert extension._extension_highlighter is not None + assert extension.extension_syntax.syntax_engine == "extension" + assert extension.extension_syntax.fallback_to_legacy is False + + rendered = extension.apply_syntax_highlighting_with_pygments(PYTHON_CODE, indices) + # The TextMate path produces visible, multi-colour spans aligned to the text. + assert ["".join(t for t, _ in line) for line in rendered] == PYTHON_CODE + assert len(_distinct_colors(rendered)) > 1 + + +def test_legacy_engine_does_not_engage_textmate() -> None: + indices = list(range(len(PYTHON_CODE))) + legacy = _make_editor("example.py", PYTHON_CODE, engine="legacy") + legacy.detect_language() + + # Legacy stays on the Pygments path; the TextMate highlighter is not engaged. + assert legacy._extension_highlighter is None + rendered = legacy.apply_syntax_highlighting_with_pygments(PYTHON_CODE, indices) + assert ["".join(t for t, _ in line) for line in rendered] == PYTHON_CODE + assert len(_distinct_colors(rendered)) > 1 + + +def test_textmate_spans_reach_renderer_as_distinct_attributes() -> None: + # Rendering-level proof: the editor converts TextMate scope categories into + # the curses colour attributes the draw layer consumes, and keyword, string, + # and comment end up as THREE different attributes (not all default). + code = ["def f(): s = 'hi' # c"] + editor = _make_editor("example.py", code, engine="extension") + editor.detect_language() + assert editor._extension_highlighter is not None + + rendered = editor.apply_syntax_highlighting_with_pygments(code, [0]) + line = rendered[0] + # Build a quick char-offset -> attr map to read specific tokens' colours. + attr_by_text = dict(line) + keyword_attr = attr_by_text.get("def") + default_attr = COLORS["default"] + + assert keyword_attr is not None and keyword_attr != default_attr + # The string and comment characters carry non-default, mutually distinct attrs. + string_attr = next(attr for text, attr in line if "hi" in text) + comment_attr = next(attr for text, attr in line if "#" in text) + assert len({keyword_attr, string_attr, comment_attr}) == 3 + assert default_attr not in {keyword_attr, string_attr, comment_attr} diff --git a/tests/extensions/test_extension_language_detection.py b/tests/extensions/test_extension_language_detection.py index 0d3295ba..0e159a9b 100644 --- a/tests/extensions/test_extension_language_detection.py +++ b/tests/extensions/test_extension_language_detection.py @@ -36,14 +36,55 @@ REPRESENTATIVE_EXTENSIONS = ( ("main.py", "python"), + ("pyproject.toml", "toml"), ("data.json", "json"), ("app.js", "javascript"), + ("app.jsx", "javascriptreact"), ("app.ts", "typescript"), + ("app.tsx", "typescriptreact"), ("README.md", "markdown"), + ("doc.markdown", "markdown"), + (".coderabbit.yaml", "yaml"), + ("config.yaml", "yaml"), + ("Dockerfile", "dockerfile"), + ("build.dockerfile", "dockerfile"), + ("Makefile", "makefile"), + ("makefile", "makefile"), + ("rules.mk", "makefile"), + ("boot.asm", "asm"), + ("boot.s", "asm"), + ("boot.S", "asm"), ("core.c", "c"), ("core.cpp", "cpp"), ("core.h", "cpp"), - ("run.bat", "bat"), + ("core.cxx", "cpp"), + ("core.cc", "cpp"), + ("core.hpp", "cpp"), + ("core.hxx", "cpp"), + ("Main.java", "java"), + ("lib.rs", "rust"), + ("index.html", "html"), + ("index.htm", "html"), + ("main.adb", "ada"), + ("main.ads", "ada"), + ("main.ada", "ada"), + ("main.spark", "ada"), + ("solver.f", "fortran"), + ("solver.for", "fortran"), + ("solver.f90", "fortran"), + ("solver.f95", "fortran"), + ("solver.f03", "fortran"), + ("solver.f08", "fortran"), + ("script.pl", "perl"), + ("Module.pm", "perl"), + ("test.t", "perl"), + ("index.php", "php"), + ("index.phtml", "php"), + ("init.lua", "lua"), + ("Program.cs", "csharp"), + ("freebsd-0.2.2-fail.log", "log"), + ("editor.log", "log"), + ("qemu.raw.log", "log"), ) @@ -71,7 +112,7 @@ def test_representative_extension_detection( result = detector.detect(file_name) assert result.matched assert result.language_id == expected - assert result.matched_by == "extension" + assert result.matched_by in {"filename", "filename_pattern", "extension"} def test_exact_filename_detection(detector: LanguageDetector) -> None: @@ -95,6 +136,30 @@ def test_exact_filename_beats_extension(detector: LanguageDetector) -> None: assert result.matched_by == "filename" +def test_gitignore_is_exact_filename_not_sql(detector: LanguageDetector) -> None: + result = detector.detect(".gitignore") + assert result.language_id == "ignore" + assert result.matched_by == "filename" + assert "sql" not in result.candidates + + +@pytest.mark.parametrize( + "file_name", ("freebsd-0.2.2-fail.log", "editor.log", "qemu.raw.log") +) +def test_log_files_are_never_sql(detector: LanguageDetector, file_name: str) -> None: + result = detector.detect(file_name) + assert result.language_id == "log" + assert "sql" not in result.candidates + + +def test_yaml_detection_from_required_filenames(detector: LanguageDetector) -> None: + assert detector.detect(".coderabbit.yaml").language_id == "yaml" + assert detector.detect("config.yaml").language_id == "yaml" + compose = detector.detect("docker-compose.yml") + assert compose.language_id == "dockercompose" + assert compose.matched_by == "filename_pattern" + + def test_extension_detection_is_case_insensitive(detector: LanguageDetector) -> None: assert detector.detect("Main.PY").language_id == "python" assert detector.detect("Main.py").language_id == "python" diff --git a/tests/extensions/test_extension_layer_config.py b/tests/extensions/test_extension_layer_config.py index 17aa6817..9152c7c5 100644 --- a/tests/extensions/test_extension_layer_config.py +++ b/tests/extensions/test_extension_layer_config.py @@ -37,7 +37,7 @@ "metadata_registry": True, "grammar_catalog": True, "language_detection": True, - "syntax_engine": "legacy", + "syntax_engine": "extension", } @@ -65,7 +65,7 @@ def test_from_config_parses_default_config() -> None: assert config.metadata_registry assert config.grammar_catalog assert config.language_detection - assert config.syntax_engine == "legacy" + assert config.syntax_engine == "extension" assert config.diagnostics == () @@ -99,10 +99,14 @@ def test_syntax_engine_legacy_is_preserved() -> None: assert config.diagnostics == () -def test_syntax_engine_extension_falls_back_to_legacy() -> None: +def test_syntax_engine_extension_is_accepted() -> None: + # As of #102, "extension" is a valid selection of the extension-backed + # syntax-service boundary. It does not enable any runtime, and rendering + # still falls back to legacy (proven by the syntax-service tests). config = ExtensionLayerConfig.from_section({"syntax_engine": "extension"}) - assert config.syntax_engine == "legacy" - assert any("not available until #102" in d.message for d in config.diagnostics) + assert config.syntax_engine == "extension" + assert config.uses_legacy_syntax is False + assert config.diagnostics == () def test_unknown_syntax_engine_falls_back_to_legacy() -> None: @@ -140,14 +144,34 @@ def test_runtime_execution_keys_are_ignored_with_diagnostic() -> None: # --------------------------------------------------------------------------- # -# Legacy regex highlighter is not removed in #101. +# Legacy highlighter remains available; obsolete tables moved out of config.toml. # --------------------------------------------------------------------------- # -def test_legacy_regex_highlighter_is_preserved() -> None: +def test_global_highlighting_toggle_is_preserved() -> None: config = _config_toml() - # The legacy regex highlighter config and toggle remain in place. - assert "syntax_highlighting" in config - assert "python" in config["syntax_highlighting"] + # The global visible-highlighting switch stays in both config.toml and + # DEFAULT_CONFIG and applies to both engines. assert config["editor"]["syntax_highlighting"] is True assert DEFAULT_CONFIG["editor"]["syntax_highlighting"] is True + + +def test_obsolete_tables_removed_from_config_toml() -> None: + config = _config_toml() + # The old internal data tables no longer ship in the user-facing config. + assert "syntax_highlighting" not in config + assert "comments" not in config + assert "supported_formats" not in config + # They remain available as internal code defaults so legacy still works. + assert "comments" in DEFAULT_CONFIG + assert "supported_formats" in DEFAULT_CONFIG + + +def test_config_toml_and_default_config_agree_on_user_sections() -> None: + config = _config_toml() + # Every key the user-facing config.toml ships must have the same value in the + # in-code DEFAULT_CONFIG fallback (DEFAULT_CONFIG may carry extra internal + # keys, e.g. editor.default_new_filename). + for section in ("editor", "fonts", "linter", "logging", "extensions", "settings"): + for key, value in config[section].items(): + assert DEFAULT_CONFIG[section].get(key) == value, f"{section}.{key}" diff --git a/tests/extensions/test_extension_manifest_registry.py b/tests/extensions/test_extension_manifest_registry.py index 0a2a2c63..a6d60da1 100644 --- a/tests/extensions/test_extension_manifest_registry.py +++ b/tests/extensions/test_extension_manifest_registry.py @@ -32,6 +32,7 @@ from ecli.extensions.ecli_integration import ( ExtensionManifest, ExtensionRegistry, + ThemeContribution, build_registry, manifest as manifest_module, paths as paths_module, @@ -152,6 +153,17 @@ def test_snippet_lookup_where_present(registry: ExtensionRegistry) -> None: assert registry.find_snippets_by_language("cpp"), "expected cpp snippets" +def test_theme_contributions_are_metadata_only(registry: ExtensionRegistry) -> None: + themes = registry.list_themes() + assert themes, "expected imported theme contributions" + assert all(isinstance(theme, ThemeContribution) for theme in themes) + monokai = registry.find_theme_by_id("Monokai") + assert monokai is not None + assert monokai.path_repo_relative == ( + "src/ecli/extensions/theme-monokai/themes/monokai-color-theme.json" + ) + + def test_configuration_contributions_are_metadata_only( registry: ExtensionRegistry, ) -> None: @@ -307,6 +319,7 @@ def test_resolved_paths_stay_under_extensions_root( records = ( [g.path_repo_relative for g in manifest.grammars] + [s.path_repo_relative for s in manifest.snippets] + + [theme.path_repo_relative for theme in manifest.themes] + [language.configuration_repo_path for language in manifest.languages] ) for repo_path in records: diff --git a/tests/extensions/test_extension_syntax_service.py b/tests/extensions/test_extension_syntax_service.py new file mode 100644 index 00000000..19d0a657 --- /dev/null +++ b/tests/extensions/test_extension_syntax_service.py @@ -0,0 +1,292 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Project: Ecli +# File: tests/extensions/test_extension_syntax_service.py +# Website: https://www.ecli.io +# Repository: https://github.com/SSobol77/ecli +# PyPI: https://pypi.org/project/ecli-editor/0.0.1/ +# +# Copyright (c) 2026 Siergej Sobolewski +# +# Licensed under the GNU General Public License version 2 only. +# See the LICENSE file in the project root for full license text. + +"""Contract tests for the extension-backed syntax-service boundary (#102). + +The service resolves language/grammar metadata from the #101 catalog and +detection layers and reports the rendering decision. Full TextMate tokenization +is deliberately NOT implemented, so every resolution must keep the legacy +highlighter authoritative (``fallback_to_legacy`` is always ``True``). These +tests cover construction, config-driven engine selection, representative +metadata resolution, unknown/invalid fallback, deterministic diagnostics, and +the absence of any runtime-execution capability. +""" + +from __future__ import annotations + +import inspect +import json +from collections.abc import Mapping +from pathlib import Path + +import pytest + +from ecli.extensions.ecli_integration import ( + EXTENSION_TOKENIZATION_AVAILABLE, + ExtensionLayerConfig, + SyntaxResolution, + SyntaxService, + build_syntax_service, + syntax_service as syntax_service_module, +) + + +# (file name, expected language id, expected TextMate scope) from real metadata. +REPRESENTATIVE_FILES = ( + ("example.py", "python", "source.python"), + ("pyproject.toml", "toml", None), + ("package.json", "json", "source.json"), + ("tsconfig.json", "jsonc", "source.json.comments"), + ("main.ts", "typescript", "source.ts"), + ("app.tsx", "typescriptreact", "source.tsx"), + ("app.js", "javascript", "source.js"), + ("app.jsx", "javascriptreact", "source.js.jsx"), + ("README.md", "markdown", "text.html.markdown"), + (".gitignore", "ignore", "source.ignore"), + ("editor.log", "log", "text.log"), + (".coderabbit.yaml", "yaml", "source.yaml"), + ("docker-compose.yml", "dockercompose", "source.yaml"), + ("config.yaml", "yaml", "source.yaml"), + ("Dockerfile", "dockerfile", "source.dockerfile"), + ("build.dockerfile", "dockerfile", "source.dockerfile"), + ("Makefile", "makefile", "source.makefile"), + ("rules.mk", "makefile", "source.makefile"), + ("boot.asm", "asm", None), + ("boot.s", "asm", None), + ("main.c", "c", "source.c"), + ("main.cpp", "cpp", "source.cpp"), + ("main.h", "cpp", "source.cpp"), + ("Main.java", "java", "source.java"), + ("lib.rs", "rust", "source.rust"), + ("index.html", "html", "text.html.derivative"), + ("main.adb", "ada", None), + ("solver.f90", "fortran", None), + ("script.pl", "perl", "source.perl"), + ("index.php", "php", "source.php"), + ("init.lua", "lua", "source.lua"), + ("Program.cs", "csharp", "source.cs"), + ("script.bat", "bat", "source.batchfile"), +) + + +@pytest.fixture(scope="module") +def legacy_service() -> SyntaxService: + return build_syntax_service(ExtensionLayerConfig()) + + +@pytest.fixture(scope="module") +def extension_service() -> SyntaxService: + return build_syntax_service( + ExtensionLayerConfig.from_section({"syntax_engine": "extension"}) + ) + + +# --------------------------------------------------------------------------- # +# Construction + engine selection. +# --------------------------------------------------------------------------- # + + +def test_service_constructs_from_default_config(legacy_service: SyntaxService) -> None: + assert isinstance(legacy_service, SyntaxService) + assert legacy_service.config.syntax_engine == "legacy" + + +def test_service_constructs_from_raw_mapping() -> None: + service = build_syntax_service({"extensions": {"syntax_engine": "extension"}}) + assert service.config.syntax_engine == "extension" + + +def test_engine_selection_is_config_driven() -> None: + assert build_syntax_service(ExtensionLayerConfig()).config.syntax_engine == "legacy" + extension = build_syntax_service( + ExtensionLayerConfig.from_section({"syntax_engine": "extension"}) + ) + assert extension.config.syntax_engine == "extension" + + +# --------------------------------------------------------------------------- # +# Representative metadata resolution. +# --------------------------------------------------------------------------- # + + +@pytest.mark.parametrize(("file_name", "language_id", "scope"), REPRESENTATIVE_FILES) +def test_representative_resolution( + legacy_service: SyntaxService, + file_name: str, + language_id: str, + scope: str | None, +) -> None: + resolution = legacy_service.resolve(file_name) + assert resolution.language_id == language_id + assert resolution.scope_name == scope + assert resolution.used_extension_metadata is True + if scope is None: + assert resolution.grammar_path is None + assert resolution.fallback_to_legacy is True + assert any( + "required language grammar missing" in d.message + for d in resolution.diagnostics + ) + else: + assert resolution.grammar_path is not None + assert resolution.grammar_path.startswith("src/ecli/extensions/") + + +def test_grammar_path_points_into_extensions_tree( + legacy_service: SyntaxService, +) -> None: + resolution = legacy_service.resolve("example.py") + assert resolution.grammar_path == ( + "src/ecli/extensions/python/syntaxes/MagicPython.tmLanguage.json" + ) + + +# --------------------------------------------------------------------------- # +# Legacy stays authoritative; tokenization is not implemented. +# --------------------------------------------------------------------------- # + + +def test_textmate_tokenizer_is_available() -> None: + # The optional python-textmate engine is a project dependency as of #102. + if not EXTENSION_TOKENIZATION_AVAILABLE: + pytest.skip("python-textmate tokenizer is not installed") + assert EXTENSION_TOKENIZATION_AVAILABLE is True + + +def test_legacy_engine_always_falls_back_to_legacy( + legacy_service: SyntaxService, +) -> None: + for file_name, _lang, _scope in REPRESENTATIVE_FILES: + assert legacy_service.resolve(file_name).fallback_to_legacy is True + + +def test_extension_engine_does_not_fall_back_when_tokenizer_available( + extension_service: SyntaxService, +) -> None: + if not EXTENSION_TOKENIZATION_AVAILABLE: + pytest.skip("python-textmate tokenizer is not installed") + # With the tokenizer available and a grammar resolved, the extension engine + # renders via TextMate (no legacy fallback at the metadata level). + resolution = extension_service.resolve("example.py") + assert resolution.syntax_engine == "extension" + assert resolution.used_extension_metadata is True + assert resolution.fallback_to_legacy is False + + +# --------------------------------------------------------------------------- # +# Fallback behavior. +# --------------------------------------------------------------------------- # + + +def test_unknown_file_falls_back_safely(legacy_service: SyntaxService) -> None: + resolution = legacy_service.resolve("unknown.zzz") + assert resolution.language_id is None + assert resolution.scope_name is None + assert resolution.grammar_path is None + assert resolution.used_extension_metadata is False + assert resolution.fallback_to_legacy is True + + +def test_missing_filename_falls_back_safely(legacy_service: SyntaxService) -> None: + resolution = legacy_service.resolve(None) + assert not resolution.used_extension_metadata + assert resolution.fallback_to_legacy is True + + +def test_disabled_extensions_layer_falls_back(legacy_service: SyntaxService) -> None: + service = build_syntax_service(ExtensionLayerConfig(enabled=False)) + resolution = service.resolve("example.py") + assert resolution.used_extension_metadata is False + assert resolution.fallback_to_legacy is True + + +def test_invalid_config_falls_back_safely() -> None: + config = ExtensionLayerConfig.from_section({"syntax_engine": "tree-sitter"}) + assert config.syntax_engine == "legacy" + service = build_syntax_service(config) + resolution = service.resolve("example.py") + assert resolution.syntax_engine == "legacy" + assert resolution.fallback_to_legacy is True + # The config-level diagnostic is carried through deterministically. + assert any("unknown syntax_engine" in d.message for d in resolution.diagnostics) + + +# --------------------------------------------------------------------------- # +# Determinism + no runtime execution. +# --------------------------------------------------------------------------- # + + +def test_resolution_is_deterministic() -> None: + service = build_syntax_service(ExtensionLayerConfig()) + assert service.resolve("main.cpp") == service.resolve("main.cpp") + + +def test_resolution_object_is_frozen(legacy_service: SyntaxService) -> None: + resolution = legacy_service.resolve("example.py") + assert isinstance(resolution, SyntaxResolution) + with pytest.raises((AttributeError, TypeError)): + resolution.language_id = "hacked" # type: ignore[misc] + + +def test_service_has_no_runtime_execution_primitives() -> None: + # Scan only for genuine code-execution primitives; prose in the module + # docstring legitimately describes what the service does *not* do. + source = inspect.getsource(syntax_service_module) + for token in ( + "subprocess", + "os.system", + "os.popen", + "pty.", + "eval(", + "exec(", + "__import__(", + ): + assert token not in source, f"syntax_service must not reference {token!r}" + + +# --------------------------------------------------------------------------- # +# Fixture isolation (no dependency on the real cache). +# --------------------------------------------------------------------------- # + + +def _make_extension(root: Path, name: str, manifest: Mapping[str, object]) -> None: + directory = root / name + directory.mkdir(parents=True) + (directory / "package.json").write_text(json.dumps(manifest), encoding="utf-8") + + +def test_service_resolves_against_fixture_root(tmp_path: Path) -> None: + _make_extension( + tmp_path, + "fixture-lang", + { + "name": "fixture-lang", + "contributes": { + "languages": [{"id": "fixturelang", "extensions": [".fxt"]}], + "grammars": [ + { + "language": "fixturelang", + "scopeName": "source.fixturelang", + "path": "./g.json", + } + ], + }, + }, + ) + (tmp_path / "fixture-lang" / "g.json").write_text("{}", encoding="utf-8") + + service = build_syntax_service(ExtensionLayerConfig(), root=tmp_path) + resolution = service.resolve("demo.fxt") + assert resolution.language_id == "fixturelang" + assert resolution.scope_name == "source.fixturelang" + assert resolution.fallback_to_legacy is True diff --git a/tests/extensions/test_extension_theme_registry.py b/tests/extensions/test_extension_theme_registry.py new file mode 100644 index 00000000..129be6c1 --- /dev/null +++ b/tests/extensions/test_extension_theme_registry.py @@ -0,0 +1,190 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Project: Ecli +# File: tests/extensions/test_extension_theme_registry.py +# Website: https://www.ecli.io +# Repository: https://github.com/SSobol77/ecli +# PyPI: https://pypi.org/project/ecli-editor/0.0.1/ +# +# Copyright (c) 2026 Siergej Sobolewski +# +# Licensed under the GNU General Public License version 2 only. +# See the LICENSE file in the project root for full license text. + +"""Contract tests for the extension-backed theme registry.""" + +from __future__ import annotations + +import inspect + +from ecli.extensions.ecli_integration import ( + TARGET_THEME_NUMBERS, + ThemeRegistry, + build_theme_registry, + theme_registry as theme_registry_module, +) +from ecli.utils.themes import get_theme, resolve_theme + + +def test_theme_registry_loads_real_contributed_themes() -> None: + registry = build_theme_registry() + assert isinstance(registry, ThemeRegistry) + names = {theme.name for theme in registry.list_available_extension_themes()} + assert {"Dark+", "Light+", "Monokai", "Quiet Light"} <= names + + +def test_professional_theme_numbers_are_source_of_truth() -> None: + registry = build_theme_registry() + expected = { + 104: "Visual Studio Light", + 106: "Light Modern", + 107: "Light+", + 108: "Quiet Light", + 109: "Solarized Light", + 204: "Visual Studio Dark", + 206: "Dark Modern", + 207: "Dark+", + 208: "Monokai", + 209: "Monokai Dimmed", + 210: "Tomorrow Night Blue", + 211: "Abyss", + 213: "Kimbie Dark", + 214: "Solarized Dark", + 215: "Red", + 301: "Dark High Contrast", + 304: "Light High Contrast", + } + for theme_id, name in expected.items(): + theme = registry.get_theme(theme_id) + assert theme is not None, (theme_id, name) + assert theme.name == name + assert theme.token_colors + + +def test_missing_target_themes_are_diagnosed_not_faked() -> None: + registry = build_theme_registry() + missing = registry.missing_target_names() + assert "GitHub Dark" in missing + assert "Atom One Dark" in missing + assert registry.get_theme(202) is None + assert registry.get_theme(212) is None + assert any("GitHub Dark" in d.message for d in registry.list_diagnostics()) + + +def test_theme_jsonc_and_includes_are_resolved() -> None: + dark_plus = build_theme_registry().get_theme(207) + assert dark_plus is not None + # dark_plus.json includes dark_vs.json; the merged result therefore has both + # base editor colours and Dark+-specific token rules. + assert dark_plus.colors["editor.background"] == "#1E1E1E" + assert dark_plus.resolve_token_style("source entity.name.function").foreground + + +def test_token_color_matching_prefers_specific_scope() -> None: + dark_plus = build_theme_registry().get_theme(207) + assert dark_plus is not None + generic = dark_plus.resolve_token_style("source keyword") + specific = dark_plus.resolve_token_style("source keyword.control") + assert generic.foreground is not None + assert specific.foreground is not None + assert specific.specificity >= generic.specificity + + +def test_utils_theme_resolution_uses_extension_theme_colours() -> None: + palette = resolve_theme({"theme": 207}) + assert palette.theme_id == 207 + assert palette.name == "Dark+" + assert palette.background == "#1E1E1E" + # Comes from the imported theme's tokenColors, not a hand-maintained ECLI + # bright palette. + assert palette.keyword == "#C586C0" + assert palette.comment == "#6A9955" + + +def test_kimbie_dark_uses_imported_theme_json() -> None: + registry = build_theme_registry() + theme = registry.get_theme(213) + assert theme is not None + assert theme.name == "Kimbie Dark" + assert theme.path_repo_relative.endswith( + "theme-kimbie-dark/themes/kimbie-dark-color-theme.json" + ) + palette = resolve_theme({"theme": 213}) + assert palette.name == "Kimbie Dark" + assert palette.background == theme.colors["editor.background"] + assert palette.string != palette.keyword + + +def test_markdown_roles_do_not_collapse_under_kimbie_dark() -> None: + palette = resolve_theme({"theme": 213}) + markdown_roles = { + palette.function, + palette.type_, + palette.string, + palette.comment, + palette.operator, + } + assert len(markdown_roles) >= 4 + assert palette.comment not in {palette.keyword, palette.string} + + +def test_light_modern_readability_roles_are_distinct() -> None: + palette = resolve_theme({"theme": 106}) + assert palette.name == "Light Modern" + assert palette.background == "#FFFFFF" + assert palette.foreground != palette.background + assert palette.comment != palette.foreground + assert palette.keyword != palette.string + + +def test_builtin_compatibility_themes_remain_available() -> None: + assert get_theme(181).name == "PySH Light" + assert get_theme(281).name == "PySH Dark" + assert get_theme(382).name == "ECLI High Contrast Dark" + + +def test_invalid_theme_number_reports_diagnostic() -> None: + palette = resolve_theme({"theme": 202}) + assert palette.theme_id == 207 + assert palette.diagnostics + assert "Invalid theme" in palette.diagnostics[-1] + + +def test_numbering_policy_ranges_are_enforced() -> None: + registry = build_theme_registry() + for number, name in TARGET_THEME_NUMBERS.items(): + theme = registry.get_theme(number) + if theme is None: + continue + assert theme.name == name + if 100 <= number <= 199: + assert theme.theme_type == "light" + elif 200 <= number <= 299: + assert theme.theme_type == "dark" + elif 300 <= number <= 399: + assert theme.theme_type == "high-contrast" + else: # pragma: no cover - protects future contract edits + raise AssertionError(f"unexpected theme range: {number}") + + +def test_reserved_custom_theme_range_is_not_silently_assigned() -> None: + registry = build_theme_registry() + assert all( + theme.number is None or not 800 <= theme.number <= 899 + for theme in registry.themes + ) + assert resolve_theme({"theme": 800}).theme_id == 207 + + +def test_theme_registry_has_no_execution_primitives() -> None: + source = inspect.getsource(theme_registry_module) + for token in ( + "subprocess", + "os.system", + "os.popen", + "pty.", + "eval(", + "exec(", + "__import__(", + ): + assert token not in source, f"theme_registry must not reference {token!r}" diff --git a/tests/extensions/test_no_fake_textmate_support.py b/tests/extensions/test_no_fake_textmate_support.py new file mode 100644 index 00000000..d0d18691 --- /dev/null +++ b/tests/extensions/test_no_fake_textmate_support.py @@ -0,0 +1,207 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Project: Ecli +# File: tests/extensions/test_no_fake_textmate_support.py +# Website: https://www.ecli.io +# Repository: https://github.com/SSobol77/ecli +# PyPI: https://pypi.org/project/ecli-editor/0.0.1/ +# +# Copyright (c) 2026 Siergej Sobolewski +# +# Licensed under the GNU General Public License version 2 only. +# See the LICENSE file in the project root for full license text. + +"""Acceptance gates: no fake TextMate support, no SQL fallback (#102). + +These prove behaviour the previous metadata-only tests could not: + +* **No fake support.** When a required language grammar is missing from the + imported tree, ECLI must report the missing grammar and fall back to legacy — + it must never claim a TextMate scope/grammar it does not have. +* **No SQL fallback.** Real logs, dotfiles, and plain text — even when their + *content* looks like SQL — must never be detected as SQL/Transact-SQL. Only a + genuinely SQL-named file may be SQL. +* **Safe fallback when the tokenizer is missing.** With ``python-textmate`` + unavailable, the service degrades to legacy and says so via a diagnostic. +""" + +from __future__ import annotations + +import copy + +import pytest + +from ecli.core.Ecli import Ecli +from ecli.extensions.ecli_integration import build_syntax_service, syntax_service as svc +from ecli.extensions.ecli_integration.config import ExtensionLayerConfig +from ecli.utils.utils import DEFAULT_CONFIG + + +_STYLE_COLORS = { + name: index + for index, name in enumerate( + [ + "default", + "keyword", + "string", + "comment", + "number", + "constant", + "type", + "function", + "variable", + "tag", + "attribute", + "builtin", + "operator", + "decorator", + "error", + "punctuation", + "class", + ] + ) +} + +# Languages the project requires but whose grammar is *not* in the imported tree. +REQUIRED_MISSING = ("pyproject.toml", "boot.asm", "main.ada", "solver.f90") + + +def _make_editor( + filename: str | None, text: list[str], engine: str = "extension" +) -> Ecli: + editor = Ecli.__new__(Ecli) + config = copy.deepcopy(DEFAULT_CONFIG) + extensions = config.setdefault("extensions", {}) + extensions["syntax_engine"] = engine + extensions["enabled"] = True + editor.config = config + editor.filename = filename + editor.text = text + editor.colors = _STYLE_COLORS + editor.is_256_color_terminal = True + editor._lexer = None + editor.current_language = None + editor.custom_syntax_patterns = [] + editor.extension_syntax = None + editor._extension_highlighter = None + editor._modified = False + editor._buffer_edit_revision = 0 + editor.detect_language() + return editor + + +def _service() -> svc.SyntaxService: + return build_syntax_service( + ExtensionLayerConfig.from_section({"syntax_engine": "extension"}) + ) + + +# --------------------------------------------------------------------------- # +# No fake support: missing grammar => report + safe fallback, never a claim. +# --------------------------------------------------------------------------- # + + +@pytest.mark.parametrize("filename", REQUIRED_MISSING) +def test_missing_grammar_is_reported_and_not_faked(filename: str) -> None: + resolution = _service().resolve(filename) + # The language is recognised by name... + assert resolution.language_id is not None + # ...but ECLI must NOT pretend to have a TextMate grammar/scope for it. + assert resolution.scope_name is None + assert resolution.grammar_path is None + assert resolution.has_grammar is False + assert resolution.fallback_to_legacy is True + # A diagnostic must explicitly report the missing required grammar. + messages = [d.message.lower() for d in resolution.diagnostics] + assert any( + "grammar missing" in m and resolution.language_id in m for m in messages + ), messages + + +@pytest.mark.parametrize("filename", REQUIRED_MISSING) +def test_missing_grammar_yields_no_highlighter(filename: str) -> None: + assert _service().build_line_highlighter(filename) is None + + +def test_editor_renders_missing_grammar_file_via_legacy() -> None: + # A .toml file (grammar missing) must still render — through legacy, not as a + # fake TextMate claim and not as flat default text. + text = ['name = "ecli"', "[tool.x]", "value = 42 # c"] + editor = _make_editor("pyproject.toml", text, engine="extension") + assert editor._extension_highlighter is None + rendered = editor.apply_syntax_highlighting_with_pygments( + text, list(range(len(text))) + ) + assert ["".join(t for t, _ in line) for line in rendered] == text + + +# --------------------------------------------------------------------------- # +# No SQL fallback for text / log / dotfiles, even with SQL-looking content. +# --------------------------------------------------------------------------- # + +_SQL_LOOKING = "SELECT * FROM users WHERE id = 1; DROP TABLE x;" + +_NON_SQL_CASES = [ + ("server.log", [f"2026-06-21 ERROR {_SQL_LOOKING}"]), + ("freebsd-0.2.2-fail.log", [_SQL_LOOKING, "make: stopped"]), + (".gitignore", ["*.pyc", "build/", "SELECT/"]), + (".env", [f"QUERY={_SQL_LOOKING}"]), + ("notes.txt", [_SQL_LOOKING]), + ("README", [_SQL_LOOKING]), + ("Makefile", ["all:", f"\techo '{_SQL_LOOKING}'"]), +] + + +@pytest.mark.parametrize("filename,text", _NON_SQL_CASES) +def test_non_sql_files_are_never_sql(filename: str, text: list[str]) -> None: + editor = _make_editor(filename, text, engine="extension") + language = (editor.current_language or "").lower() + assert "sql" not in language, (filename, language) + # The extension resolution must not claim a SQL language id either. + ext_lang = (getattr(editor.extension_syntax, "language_id", "") or "").lower() + assert "sql" not in ext_lang, (filename, ext_lang) + + +def test_genuine_sql_file_is_allowed_to_be_sql() -> None: + # Control: a truly SQL-named file *is* permitted to detect as SQL, proving the + # guard suppresses content-guessing, not real SQL files. + editor = _make_editor("schema.sql", ["SELECT 1;"], engine="extension") + assert "sql" in (editor.current_language or "").lower() + + +def test_unnamed_buffer_does_not_suppress_sql_content_guess() -> None: + assert Ecli._is_non_sql_filename(None) is False + + +# --------------------------------------------------------------------------- # +# Safe fallback when the tokenizer (python-textmate) is unavailable. +# --------------------------------------------------------------------------- # + + +def test_missing_tokenizer_falls_back_to_legacy( + monkeypatch: pytest.MonkeyPatch, +) -> None: + # Simulate python-textmate/onigurumacffi being absent. + monkeypatch.setattr(svc, "TEXTMATE_AVAILABLE", False) + monkeypatch.setattr(svc, "EXTENSION_TOKENIZATION_AVAILABLE", False) + service = _service() + + assert service.build_line_highlighter("example.py") is None + resolution = service.resolve("example.py") + assert resolution.fallback_to_legacy is True + messages = [d.message.lower() for d in resolution.diagnostics] + assert any("tokenizer" in m and "unavailable" in m for m in messages), messages + + +def test_missing_tokenizer_editor_still_highlights_via_legacy( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(svc, "TEXTMATE_AVAILABLE", False) + monkeypatch.setattr(svc, "EXTENSION_TOKENIZATION_AVAILABLE", False) + code = ["def main():", " return 42 # answer"] + editor = _make_editor("example.py", code, engine="extension") + assert editor._extension_highlighter is None + rendered = editor.apply_syntax_highlighting_with_pygments(code, [0, 1]) + # Legacy still highlights (more than one colour) and round-trips. + assert ["".join(t for t, _ in line) for line in rendered] == code + assert len({attr for line in rendered for _t, attr in line}) > 1 diff --git a/tests/extensions/test_textmate_grammar_catalog.py b/tests/extensions/test_textmate_grammar_catalog.py index 82a3612b..43e44576 100644 --- a/tests/extensions/test_textmate_grammar_catalog.py +++ b/tests/extensions/test_textmate_grammar_catalog.py @@ -42,6 +42,9 @@ ("javascript", "source.js"), ("typescript", "source.ts"), ("markdown", "text.html.markdown"), + ("ignore", "source.ignore"), + ("yaml", "source.yaml"), + ("dockercompose", "source.yaml"), ("bat", "source.batchfile"), ("c", "source.c"), ("cpp", "source.cpp"), diff --git a/tests/extensions/test_textmate_multiline_protection.py b/tests/extensions/test_textmate_multiline_protection.py new file mode 100644 index 00000000..2b717f2b --- /dev/null +++ b/tests/extensions/test_textmate_multiline_protection.py @@ -0,0 +1,377 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Project: Ecli +# File: tests/extensions/test_textmate_multiline_protection.py +# Website: https://www.ecli.io +# Repository: https://github.com/SSobol77/ecli +# PyPI: https://pypi.org/project/ecli-editor/0.0.1/ +# +# Copyright (c) 2026 Siergej Sobolewski +# +# Licensed under the GNU General Public License version 2 only. +# See the LICENSE file in the project root for full license text. + +"""Multiline TextMate protection regression tests for issue #102.""" + +from __future__ import annotations + +import copy + +import pytest + +from ecli.core.Ecli import Ecli +from ecli.extensions.ecli_integration import ( + EXTENSION_TOKENIZATION_AVAILABLE, + ExtensionLayerConfig, + SyntaxService, + build_syntax_service, +) +from ecli.extensions.ecli_integration.syntax_service import ( + _protected_ranges_for_scope, +) +from ecli.utils.utils import DEFAULT_CONFIG + + +pytestmark = pytest.mark.skipif( + not EXTENSION_TOKENIZATION_AVAILABLE, + reason="python-textmate tokenizer is not installed", +) + +STYLE_COLORS = { + name: index + for index, name in enumerate( + [ + "default", + "keyword", + "string", + "comment", + "number", + "constant", + "type", + "function", + "variable", + "tag", + "attribute", + "builtin", + "operator", + "decorator", + "error", + "punctuation", + "class", + ] + ) +} + + +@pytest.fixture(scope="module") +def service() -> SyntaxService: + return build_syntax_service( + ExtensionLayerConfig.from_section({"syntax_engine": "extension"}) + ) + + +def _make_editor(filename: str, text: list[str]) -> Ecli: + editor = Ecli.__new__(Ecli) + config = copy.deepcopy(DEFAULT_CONFIG) + extensions = config.setdefault("extensions", {}) + extensions["syntax_engine"] = "extension" + extensions["enabled"] = True + editor.config = config + editor.filename = filename + editor.text = text + editor.colors = STYLE_COLORS + editor.is_256_color_terminal = True + editor._lexer = None + editor.current_language = None + editor.custom_syntax_patterns = [] + editor.extension_syntax = None + editor._extension_highlighter = None + editor._modified = False + editor._buffer_edit_revision = 0 + editor.detect_language() + return editor + + +def _char_categories(spans: list[tuple[str, str]]) -> list[str]: + return [category for text, category in spans for _char in text] + + +def _attr_categories(rendered_line: list[tuple[str, int]]) -> list[int]: + return [attr for text, attr in rendered_line for _char in text] + + +def _assert_range_category( + spans: list[tuple[str, str]], line: str, needle: str, category: str +) -> None: + start = line.index(needle) + end = start + len(needle) + categories = _char_categories(spans) + assert set(categories[start:end]) == {category}, spans + + +def _assert_editor_range_attr( + rendered_line: list[tuple[str, int]], line: str, needle: str, attr: int +) -> None: + start = line.index(needle) + end = start + len(needle) + attrs = _attr_categories(rendered_line) + assert set(attrs[start:end]) == {attr}, rendered_line + + +def test_protected_range_detection_for_required_languages() -> None: + js_body = 'class Test { return 123; const x = "
"; }' + ts_body = "interface User { id: number; }" + html_body = '
return 123
' + css_body = "color: red; display: block; margin: 10px;" + py_body = "def class return import for while 123 == -> " + cases = [ + ( + "source.js", + ["/*", js_body, "*/"], + { + 0: ((0, 2, "comment"),), + 1: ((0, len(js_body), "comment"),), + 2: ((0, 2, "comment"),), + }, + ), + ( + "source.ts", + ["/*", ts_body, "*/"], + { + 0: ((0, 2, "comment"),), + 1: ((0, len(ts_body), "comment"),), + 2: ((0, 2, "comment"),), + }, + ), + ( + "text.html.derivative", + [""], + { + 0: ((0, 4, "comment"),), + 1: ((0, len(html_body), "comment"),), + 2: ((0, 3, "comment"),), + }, + ), + ( + "source.css", + ["/*", css_body, "*/"], + { + 0: ((0, 2, "comment"),), + 1: ((0, len(css_body), "comment"),), + 2: ((0, 2, "comment"),), + }, + ), + ( + "source.python", + ["'''", py_body, "'''"], + { + 0: ((0, 3, "string"),), + 1: ((0, len(py_body), "string"),), + 2: ((0, 3, "string"),), + }, + ), + ] + for scope_name, lines, expected in cases: + assert _protected_ranges_for_scope(scope_name, lines) == expected + + +def test_python_triple_single_and_double_strings_are_protected( + service: SyntaxService, +) -> None: + highlighter = service.build_line_highlighter("example.py") + assert highlighter is not None + lines = [ + "'''", + "def class return import for while 123 == -> ", + "'''", + '"""', + "def class return import for while 123 == -> ", + '"""', + "def real_function():", + " return 1", + ] + highlighted = highlighter.highlight_lines( + lines, line_indices=list(range(len(lines))), full_text=lines + ) + for index in (0, 1, 2, 3, 4, 5): + assert highlighted[index] is not None + assert {category for _text, category in highlighted[index]} == {"string"} + assert highlighted[6] is not None + assert any( + text == "def" and category == "keyword" for text, category in highlighted[6] + ) + + +def test_javascript_multiline_comments_win_over_code_scopes( + service: SyntaxService, +) -> None: + highlighter = service.build_line_highlighter("fixture.js") + assert highlighter is not None + lines = [ + "/*", + 'class Test { return 123; const x = "
"; }', + "*/", + "/**", + "function return import export let const 123 == =>", + "*/", + "const real = 123;", + ] + highlighted = highlighter.highlight_lines( + lines, line_indices=list(range(len(lines))), full_text=lines + ) + assert highlighted[1] is not None + _assert_range_category(highlighted[1], lines[1], "class Test", "comment") + _assert_range_category(highlighted[1], lines[1], "123", "comment") + _assert_range_category(highlighted[1], lines[1], "
", "comment") + assert highlighted[4] is not None + _assert_range_category(highlighted[4], lines[4], "function", "comment") + _assert_range_category(highlighted[4], lines[4], "=>", "comment") + assert highlighted[6] is not None + assert any( + text == "const" and category in {"keyword", "type"} + for text, category in highlighted[6] + ) + + +def test_typescript_multiline_comments_win_over_code_scopes( + service: SyntaxService, +) -> None: + highlighter = service.build_line_highlighter("fixture.ts") + assert highlighter is not None + lines = [ + "/*", + 'class Test { return 123; const x = "
"; }', + "interface User { id: number; }", + "*/", + "const real: number = 123;", + ] + highlighted = highlighter.highlight_lines( + lines, line_indices=list(range(len(lines))), full_text=lines + ) + assert highlighted[1] is not None + _assert_range_category(highlighted[1], lines[1], "return", "comment") + _assert_range_category(highlighted[1], lines[1], "123", "comment") + assert highlighted[2] is not None + _assert_range_category(highlighted[2], lines[2], "interface", "comment") + _assert_range_category(highlighted[2], lines[2], "number", "comment") + assert highlighted[4] is not None + assert any( + text == "const" and category in {"keyword", "type"} + for text, category in highlighted[4] + ) + + +def test_html_multiline_comments_win_over_tag_scopes(service: SyntaxService) -> None: + highlighter = service.build_line_highlighter("fixture.html") + assert highlighter is not None + lines = [ + "", + '
content
', + ] + highlighted = highlighter.highlight_lines( + lines, line_indices=list(range(len(lines))), full_text=lines + ) + assert highlighted[1] is not None + _assert_range_category(highlighted[1], lines[1], " None: + highlighter = service.build_line_highlighter("fixture.css") + assert highlighter is not None + lines = [ + "/*", + "color: red; display: block; margin: 10px;", + "*/", + "body { color: red; }", + ] + highlighted = highlighter.highlight_lines( + lines, line_indices=list(range(len(lines))), full_text=lines + ) + assert highlighted[1] is not None + _assert_range_category(highlighted[1], lines[1], "color", "comment") + _assert_range_category(highlighted[1], lines[1], "10px", "comment") + assert highlighted[3] is not None + assert any(category != "comment" for _text, category in highlighted[3]) + assert any(category != "default" for _text, category in highlighted[3]) + + +@pytest.mark.parametrize( + ("filename", "lines", "line_index", "needle", "expected_attr"), + [ + ( + "fixture.js", + [ + "/*", + 'class Test { return 123; const x = "
"; }', + "*/", + "const real = 1;", + ], + 1, + "return 123", + STYLE_COLORS["comment"], + ), + ( + "fixture.ts", + ["/*", "interface User { id: number; }", "*/", "const real: number = 1;"], + 1, + "interface User", + STYLE_COLORS["comment"], + ), + ( + "fixture.html", + ["", "

real

"], + 1, + " ", + "'''", + "return_value = 1", + ], + 1, + "return import", + STYLE_COLORS["string"], + ), + ], +) +def test_editor_rendering_applies_multiline_protection( + filename: str, + lines: list[str], + line_index: int, + needle: str, + expected_attr: int, +) -> None: + editor = _make_editor(filename, lines) + if editor._extension_highlighter is None: + pytest.skip(f"no extension highlighter for {filename}") + rendered = editor.apply_syntax_highlighting_with_pygments( + lines, list(range(len(lines))) + ) + _assert_editor_range_attr( + rendered[line_index], lines[line_index], needle, expected_attr + ) + assert ["".join(text for text, _attr in line) for line in rendered] == lines diff --git a/tests/extensions/test_textmate_render_performance.py b/tests/extensions/test_textmate_render_performance.py new file mode 100644 index 00000000..6f4ba9dd --- /dev/null +++ b/tests/extensions/test_textmate_render_performance.py @@ -0,0 +1,285 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Project: Ecli +# File: tests/extensions/test_textmate_render_performance.py +# Website: https://www.ecli.io +# Repository: https://github.com/SSobol77/ecli +# PyPI: https://pypi.org/project/ecli-editor/0.0.1/ +# +# Copyright (c) 2026 Siergej Sobolewski +# +# Licensed under the GNU General Public License version 2 only. +# See the LICENSE file in the project root for full license text. + +"""Viewport-first rendering performance proofs on **real** repository files (#102). + +These are not mock tests. They drive the editor's actual render data path +(``detect_language`` -> ``apply_syntax_highlighting_with_pygments`` -> +``_apply_extension_highlighting``) over real, large, dirty repository artifacts — +the ``Makefile`` (which previously froze ECLI at line ~42), the large FreeBSD +failure log, a real PR-body Markdown file, and a real packaging script — and +assert: + +* every viewport repaint is bounded (the #102 freeze is gone); +* scrolling never re-tokenizes the whole file (the per-frame string guard is + computed once per buffer revision, reused across scroll frames); +* repeated scroll over the same region is cheap (cached) and the per-line cache + cannot grow without bound; +* the grammar catalog, language detector, theme registry, and per-grammar + tokenizer are each built once and reused. +""" + +from __future__ import annotations + +import copy +import time +from pathlib import Path + +import pytest + +from ecli.core.Ecli import Ecli +from ecli.extensions.ecli_integration import textmate_tokenizer as tokenizer_module +from ecli.extensions.ecli_integration.config import ExtensionLayerConfig +from ecli.extensions.ecli_integration.syntax_service import ( + _SPAN_CACHE_MAX, + LineHighlighter, + build_syntax_service, +) +from ecli.utils.utils import DEFAULT_CONFIG + + +REPO_ROOT = Path(__file__).resolve().parents[2] + +# A repaint budget per viewport frame. The pre-fix bug was an unbounded freeze; +# this generous bound proves termination/responsiveness without being flaky on +# slow CI. The worst real one-time make repaint is ~250ms (the per-line budget). +FRAME_BUDGET_SECONDS = 2.0 + +_STYLE_COLORS = { + name: index + for index, name in enumerate( + [ + "default", + "keyword", + "string", + "comment", + "number", + "constant", + "type", + "function", + "variable", + "tag", + "attribute", + "builtin", + "operator", + "decorator", + "error", + "punctuation", + "class", + ] + ) +} + + +def _make_editor(filename: str, text: list[str], engine: str = "extension") -> Ecli: + editor = Ecli.__new__(Ecli) + config = copy.deepcopy(DEFAULT_CONFIG) + extensions = config.setdefault("extensions", {}) + extensions["syntax_engine"] = engine + extensions["enabled"] = True + editor.config = config + editor.filename = filename + editor.text = text + editor.colors = _STYLE_COLORS + editor.is_256_color_terminal = True + editor._lexer = None + editor.current_language = None + editor.custom_syntax_patterns = [] + editor.extension_syntax = None + editor._extension_highlighter = None + editor._modified = False + editor._buffer_edit_revision = 0 + editor.detect_language() + return editor + + +def _read_repo_lines(relative: str) -> list[str]: + path = REPO_ROOT / relative + if not path.is_file(): + pytest.skip(f"required real artifact missing: {relative}") + return path.read_text(encoding="utf-8", errors="replace").splitlines() + + +def _scroll_and_time( + editor: Ecli, text: list[str], viewport: int = 40, step: int = 13 +) -> tuple[float, list[float]]: + """Render sliding viewports across the whole file; return (worst, all frames).""" + frame_times: list[float] = [] + for top in range(0, max(1, len(text)), step): + lines = text[top : top + viewport] + indices = list(range(top, top + len(lines))) + start = time.perf_counter() + rendered = editor.apply_syntax_highlighting_with_pygments(lines, indices) + frame_times.append(time.perf_counter() - start) + # Every visible line round-trips exactly (no dropped/duplicated text). + assert ["".join(t for t, _ in line) for line in rendered] == lines + return (max(frame_times) if frame_times else 0.0), frame_times + + +@pytest.fixture(autouse=True) +def _fresh_quarantine_state() -> None: + tokenizer_module.reset_quarantine_state() + + +# --------------------------------------------------------------------------- # +# Real large-file rendering is bounded (the freeze is gone). +# --------------------------------------------------------------------------- # + + +@pytest.mark.parametrize( + "relative", + [ + "Makefile", + "logs/freebsd-0.2.2-fail.log", + "logs/pr-46-body.md", + "scripts/build_pyinstaller_linux.py", + ], +) +def test_real_file_scroll_is_bounded(relative: str) -> None: + text = _read_repo_lines(relative) + editor = _make_editor(relative, text) + worst, frames = _scroll_and_time(editor, text) + assert worst < FRAME_BUDGET_SECONDS, ( + f"{relative}: slowest viewport repaint {worst * 1000:.0f}ms exceeded " + f"{FRAME_BUDGET_SECONDS * 1000:.0f}ms budget" + ) + assert frames, f"{relative}: no frames rendered" + + +def test_makefile_does_not_freeze_around_line_42() -> None: + # The exact reported symptom: scrolling the repo Makefile to the ifeq block + # near line 42 used to hang ECLI forever. Render that window directly. + text = _read_repo_lines("Makefile") + editor = _make_editor("Makefile", text) + window = text[30:80] + start = time.perf_counter() + rendered = editor.apply_syntax_highlighting_with_pygments( + window, list(range(30, 30 + len(window))) + ) + elapsed = time.perf_counter() - start + assert elapsed < FRAME_BUDGET_SECONDS, f"line-42 window took {elapsed:.2f}s" + assert ["".join(t for t, _ in line) for line in rendered] == window + + +# --------------------------------------------------------------------------- # +# No full-file retokenization during scroll. +# --------------------------------------------------------------------------- # + + +def test_no_full_file_retokenization_during_scroll( + monkeypatch: pytest.MonkeyPatch, +) -> None: + text = _read_repo_lines("scripts/build_pyinstaller_linux.py") + editor = _make_editor("scripts/build_pyinstaller_linux.py", text) + assert editor._extension_highlighter is not None + assert editor.current_language == "python" + + calls = {"n": 0} + import ecli.extensions.ecli_integration.syntax_service as svc + + real = svc._python_string_ranges + + def _counting(lines: list[str]): # type: ignore[no-untyped-def] + calls["n"] += 1 + return real(lines) + + monkeypatch.setattr(svc, "_python_string_ranges", _counting) + + # Many scroll frames, no edits => the whole-file Python string guard must be + # computed at most once (cached by buffer revision), never per frame. + for top in range(0, len(text), 7): + lines = text[top : top + 40] + editor.apply_syntax_highlighting_with_pygments( + lines, list(range(top, top + len(lines))) + ) + assert calls["n"] <= 1, f"string guard recomputed {calls['n']}x during scroll" + + # An edit bumps the revision and recomputes exactly once more. + editor.modified = True + top = 10 + editor.apply_syntax_highlighting_with_pygments( + text[top : top + 40], list(range(top, top + 40)) + ) + assert calls["n"] == 2 + + +# --------------------------------------------------------------------------- # +# Repeated scroll is cheap (cache) and the cache is bounded. +# --------------------------------------------------------------------------- # + + +def test_repeated_scroll_is_cached_and_fast() -> None: + text = _read_repo_lines("scripts/build_pyinstaller_linux.py") + editor = _make_editor("scripts/build_pyinstaller_linux.py", text) + window = text[:40] + indices = list(range(40)) + + first = time.perf_counter() + editor.apply_syntax_highlighting_with_pygments(window, indices) + first_elapsed = time.perf_counter() - first + + repeats = [] + for _ in range(5): + start = time.perf_counter() + editor.apply_syntax_highlighting_with_pygments(window, indices) + repeats.append(time.perf_counter() - start) + # Cached repaints of the same viewport are no slower than the first render. + assert max(repeats) <= first_elapsed + 0.05 + + +def test_span_cache_is_bounded() -> None: + service = build_syntax_service( + ExtensionLayerConfig.from_section({"syntax_engine": "extension"}) + ) + highlighter = service.build_line_highlighter("example.py") + assert isinstance(highlighter, LineHighlighter) + # Feed many more distinct lines than the cache can hold; it must evict. + for i in range(_SPAN_CACHE_MAX + 500): + highlighter.highlight(f"x{i} = {i}") + assert len(highlighter._cache) <= _SPAN_CACHE_MAX + + +# --------------------------------------------------------------------------- # +# Caches: build-once, reuse. +# --------------------------------------------------------------------------- # + + +def test_theme_registry_loaded_once() -> None: + from ecli.extensions.ecli_integration.theme_registry import cached_theme_registry + + cached_theme_registry.cache_clear() + first = cached_theme_registry() + second = cached_theme_registry() + assert first == second + assert cached_theme_registry.cache_info().hits >= 1 + + +def test_grammar_loaded_once_per_scope() -> None: + from ecli.extensions.ecli_integration.textmate_tokenizer import load_tokenizer + + grammar = ( + REPO_ROOT / "src/ecli/extensions/python/syntaxes/MagicPython.tmLanguage.json" + ).resolve() + first = load_tokenizer(grammar) + second = load_tokenizer(grammar) + assert first == second, "tokenizer must be cached per grammar path" + + +def test_catalog_and_detector_built_once() -> None: + from ecli.extensions.ecli_integration.syntax_service import _cached_real_parts + + _cached_real_parts.cache_clear() + first = _cached_real_parts() + second = _cached_real_parts() + assert first == second + assert _cached_real_parts.cache_info().hits >= 1 diff --git a/tests/extensions/test_textmate_render_regressions.py b/tests/extensions/test_textmate_render_regressions.py new file mode 100644 index 00000000..57d834ef --- /dev/null +++ b/tests/extensions/test_textmate_render_regressions.py @@ -0,0 +1,192 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Project: Ecli +# File: tests/extensions/test_textmate_render_regressions.py +# Website: https://www.ecli.io +# Repository: https://github.com/SSobol77/ecli +# PyPI: https://pypi.org/project/ecli-editor/0.0.1/ +# +# Copyright (c) 2026 Siergej Sobolewski +# +# Licensed under the GNU General Public License version 2 only. +# See the LICENSE file in the project root for full license text. + +"""Render-correctness regression locks for #102. + +These pin the syntax-correctness behaviours that must survive the viewport-first +rendering work: log/gitignore are never mis-detected as SQL, YAML stays visible, +Python docstrings keep absolute string priority, invalid theme numbers keep the +current theme with a warning, old theme ids migrate, and the shipped +``config.toml`` stays clean. +""" + +from __future__ import annotations + +import copy +import logging +import tomllib +from pathlib import Path + +from ecli.core.Ecli import Ecli +from ecli.utils.themes import DEFAULT_THEME_ID, get_theme, resolve_theme +from ecli.utils.utils import DEFAULT_CONFIG + + +REPO_ROOT = Path(__file__).resolve().parents[2] + +_STYLE_COLORS = { + name: index + for index, name in enumerate( + [ + "default", + "keyword", + "string", + "comment", + "number", + "constant", + "type", + "function", + "variable", + "tag", + "attribute", + "builtin", + "operator", + "decorator", + "error", + "punctuation", + "class", + ] + ) +} + + +def _make_editor(filename: str, text: list[str], engine: str = "extension") -> Ecli: + editor = Ecli.__new__(Ecli) + config = copy.deepcopy(DEFAULT_CONFIG) + extensions = config.setdefault("extensions", {}) + extensions["syntax_engine"] = engine + extensions["enabled"] = True + editor.config = config + editor.filename = filename + editor.text = text + editor.colors = _STYLE_COLORS + editor.is_256_color_terminal = True + editor._lexer = None + editor.current_language = None + editor.custom_syntax_patterns = [] + editor.extension_syntax = None + editor._extension_highlighter = None + editor._modified = False + editor._buffer_edit_revision = 0 + editor.detect_language() + return editor + + +def _categories(rendered: list[list[tuple[str, int]]]) -> set[int]: + return {attr for line in rendered for _text, attr in line} + + +# --------------------------------------------------------------------------- # +# Language detection: .log / .gitignore are never Transact-SQL. +# --------------------------------------------------------------------------- # + + +def test_log_file_is_not_transact_sql() -> None: + for filename in ("freebsd-0.2.2-fail.log", "editor.log", "qemu.raw.log"): + editor = _make_editor(filename, ["ERROR SELECT * FROM x WHERE id = 1"]) + assert editor.current_language == "log" + assert "sql" not in (editor.current_language or "").lower() + + +def test_gitignore_is_not_transact_sql() -> None: + editor = _make_editor(".gitignore", ["*.pyc", "build/", "SELECT/"]) + assert editor.current_language == "ignore" + assert "sql" not in (editor.current_language or "").lower() + + +# --------------------------------------------------------------------------- # +# YAML stays visibly highlighted (legacy fallback when the engine yields nothing). +# --------------------------------------------------------------------------- # + + +def test_yaml_renders_with_visible_colours() -> None: + yaml_doc = ["version: '3'", "services:", " app: # comment"] + editor = _make_editor(".coderabbit.yaml", yaml_doc, engine="extension") + rendered = editor.apply_syntax_highlighting_with_pygments( + yaml_doc, list(range(len(yaml_doc))) + ) + assert ["".join(t for t, _ in line) for line in rendered] == yaml_doc + assert len(_categories(rendered)) > 1, "YAML must not render as flat default text" + + +# --------------------------------------------------------------------------- # +# Python multiline docstring keeps absolute string priority. +# --------------------------------------------------------------------------- # + + +def test_python_docstring_is_uniformly_string() -> None: + code = [ + "def f():", + ' """class def return 123 ==', + " import while -> + -", + ' """', + " return 1", + ] + editor = _make_editor("example.py", code, engine="extension") + if editor._extension_highlighter is None: + import pytest + + pytest.skip("python-textmate engine unavailable") + string_attr = _STYLE_COLORS["string"] + default_attr = _STYLE_COLORS["default"] + rendered = editor.apply_syntax_highlighting_with_pygments( + code, list(range(len(code))) + ) + # Lines fully inside the docstring carry only the string colour — no keyword, + # number, operator or error colour leaks through the guard. + for index in (1, 2, 3): + attrs = {attr for _text, attr in rendered[index]} + assert attrs <= {string_attr, default_attr}, (index, rendered[index]) + assert string_attr in attrs + # The real statement after the docstring still highlights its keyword. + assert _STYLE_COLORS["keyword"] in {attr for _text, attr in rendered[4]} + + +# --------------------------------------------------------------------------- # +# Theme: invalid number keeps current theme + warns; old ids migrate. +# --------------------------------------------------------------------------- # + + +def test_invalid_theme_number_keeps_current_and_warns( + caplog: logging.LogCaptureFixture, +) -> None: + current = get_theme(208) # Monokai + with caplog.at_level(logging.WARNING): + result = resolve_theme({"theme": 9999}, current_theme=current) + assert result.theme_id == current.theme_id + assert result.name == current.name + assert result.diagnostics, "a diagnostic must record the rejected theme" + assert any("invalid theme" in record.message.lower() for record in caplog.records) + + +def test_old_theme_id_is_migrated() -> None: + # Legacy pre-extension configs used a [theme] table with ids 1-8. + assert resolve_theme({"theme": {"id": 3}}).theme_id == 381 + assert resolve_theme({"theme": {"name": "dark"}}).theme_id != DEFAULT_THEME_ID + + +# --------------------------------------------------------------------------- # +# Shipped config.toml is clean. +# --------------------------------------------------------------------------- # + + +def test_shipped_config_toml_is_clean() -> None: + config_path = REPO_ROOT / "config.toml" + raw = config_path.read_text(encoding="utf-8") + # The obsolete empty keybindings table was removed during cleanup. + assert "keybindings = {}" not in raw + parsed = tomllib.loads(raw) + # The configured theme is a real, resolvable theme number. + theme = parsed.get("theme") + assert isinstance(theme, int) and not isinstance(theme, bool) + assert resolve_theme({"theme": theme}).theme_id == theme diff --git a/tests/extensions/test_textmate_scroll_regression.py b/tests/extensions/test_textmate_scroll_regression.py new file mode 100644 index 00000000..6e12ef5e --- /dev/null +++ b/tests/extensions/test_textmate_scroll_regression.py @@ -0,0 +1,181 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Project: Ecli +# File: tests/extensions/test_textmate_scroll_regression.py +# Website: https://www.ecli.io +# Repository: https://github.com/SSobol77/ecli +# PyPI: https://pypi.org/project/ecli-editor/0.0.1/ +# +# Copyright (c) 2026 Siergej Sobolewski +# +# Licensed under the GNU General Public License version 2 only. +# See the LICENSE file in the project root for full license text. + +"""Scroll performance regression gates on real repository files (#102). + +These simulate the real interaction that froze ECLI — repeated PageDown/PageUp +over large files — through the editor's actual render data path, and assert: + +* no viewport repaint hangs (bounded wall-clock per frame) on the ``Makefile`` + and the large ``logs/freebsd-0.2.2-fail.log``; +* repeated scrolling over the same regions never grows the per-line span cache + without bound; +* opening files and rendering never triggers a repeated full extension-tree + registry scan (the catalog/detector are scanned once and reused). +""" + +from __future__ import annotations + +import copy +import time +from pathlib import Path + +import pytest + +from ecli.core.Ecli import Ecli +from ecli.extensions.ecli_integration import ( + syntax_service as svc, + textmate_tokenizer as tokenizer_module, +) +from ecli.extensions.ecli_integration.syntax_service import _SPAN_CACHE_MAX +from ecli.utils.utils import DEFAULT_CONFIG + + +REPO_ROOT = Path(__file__).resolve().parents[2] +VIEWPORT = 40 +FRAME_BUDGET_SECONDS = 2.0 + +_STYLE_COLORS = { + name: index + for index, name in enumerate( + [ + "default", + "keyword", + "string", + "comment", + "number", + "constant", + "type", + "function", + "variable", + "tag", + "attribute", + "builtin", + "operator", + "decorator", + "error", + "punctuation", + "class", + ] + ) +} + + +def _make_editor(filename: str, text: list[str], engine: str = "extension") -> Ecli: + editor = Ecli.__new__(Ecli) + config = copy.deepcopy(DEFAULT_CONFIG) + extensions = config.setdefault("extensions", {}) + extensions["syntax_engine"] = engine + extensions["enabled"] = True + editor.config = config + editor.filename = filename + editor.text = text + editor.colors = _STYLE_COLORS + editor.is_256_color_terminal = True + editor._lexer = None + editor.current_language = None + editor.custom_syntax_patterns = [] + editor.extension_syntax = None + editor._extension_highlighter = None + editor._modified = False + editor._buffer_edit_revision = 0 + editor.detect_language() + return editor + + +def _read_lines(relative: str) -> list[str]: + path = REPO_ROOT / relative + if not path.is_file(): + pytest.skip(f"required real artifact missing: {relative}") + return path.read_text(encoding="utf-8", errors="replace").splitlines() + + +def _render_viewport(editor: Ecli, text: list[str], top: int) -> float: + top = max(0, min(top, max(0, len(text) - 1))) + lines = text[top : top + VIEWPORT] + indices = list(range(top, top + len(lines))) + start = time.perf_counter() + editor.apply_syntax_highlighting_with_pygments(lines, indices) + return time.perf_counter() - start + + +def _page_tops(line_count: int) -> list[int]: + """PageDown to the bottom, then PageUp back to the top.""" + downs = list(range(0, line_count, VIEWPORT)) + ups = list(reversed(downs)) + return downs + ups + + +@pytest.fixture(autouse=True) +def _fresh_quarantine_state() -> None: + tokenizer_module.reset_quarantine_state() + + +@pytest.mark.parametrize("relative", ["Makefile", "logs/freebsd-0.2.2-fail.log"]) +def test_pagedown_pageup_cycles_do_not_hang(relative: str) -> None: + text = _read_lines(relative) + editor = _make_editor(relative, text) + worst = 0.0 + for _cycle in range(3): # repeated PgDn/PgUp, the reported interaction + for top in _page_tops(len(text)): + worst = max(worst, _render_viewport(editor, text, top)) + assert worst < FRAME_BUDGET_SECONDS, ( + f"{relative}: slowest PgDn/PgUp repaint {worst * 1000:.0f}ms exceeded budget" + ) + + +def test_repeated_scroll_does_not_grow_cache_unbounded() -> None: + text = _read_lines("scripts/build_pyinstaller_linux.py") + editor = _make_editor("scripts/build_pyinstaller_linux.py", text) + assert editor._extension_highlighter is not None + highlighter = editor._extension_highlighter + + # First full PgDn/PgUp pass populates the cache. + for top in _page_tops(len(text)): + _render_viewport(editor, text, top) + size_after_first = len(highlighter._cache) + + # Repeating the identical scroll must not grow the cache further (same lines). + for _ in range(5): + for top in _page_tops(len(text)): + _render_viewport(editor, text, top) + assert len(highlighter._cache) == size_after_first + assert len(highlighter._cache) <= _SPAN_CACHE_MAX + + +def test_scrolling_does_not_rescan_extension_registry( + monkeypatch: pytest.MonkeyPatch, +) -> None: + # The full extension-tree scan must happen at most once, regardless of how + # many files are opened or how much they are scrolled. + svc._cached_real_parts.cache_clear() + calls = {"n": 0} + real_build_registry = svc.build_registry + + def _counting(*args: object, **kwargs: object): # type: ignore[no-untyped-def] + calls["n"] += 1 + return real_build_registry(*args, **kwargs) + + monkeypatch.setattr(svc, "build_registry", _counting) + + for name, text in ( + ("a.py", ["import os", "def f():", " return 1"]), + ("b.py", ["x = 1"]), + ("Makefile", ["all:", "\techo hi"]), + ("c.py", ["y = 2"]), + ): + editor = _make_editor(name, text) + for top in _page_tops(len(text)): + _render_viewport(editor, text, top) + + assert calls["n"] <= 1, f"extension registry scanned {calls['n']}x (expected once)" diff --git a/tests/extensions/test_textmate_tokenization.py b/tests/extensions/test_textmate_tokenization.py new file mode 100644 index 00000000..7e90c51d --- /dev/null +++ b/tests/extensions/test_textmate_tokenization.py @@ -0,0 +1,284 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Project: Ecli +# File: tests/extensions/test_textmate_tokenization.py +# Website: https://www.ecli.io +# Repository: https://github.com/SSobol77/ecli +# PyPI: https://pypi.org/project/ecli-editor/0.0.1/ +# +# Copyright (c) 2026 Siergej Sobolewski +# +# Licensed under the GNU General Public License version 2 only. +# See the LICENSE file in the project root for full license text. + +"""Real TextMate tokenization + scope-to-style tests (#102). + +These prove that ECLI tokenizes representative files with the **actual** imported +`.tmLanguage.json` grammars (producing genuine TextMate scopes), maps scopes to +ECLI style categories deterministically, and falls back safely for grammars the +engine cannot handle (Markdown), unknown files, and invalid grammars. They never +execute extension code. +""" + +from __future__ import annotations + +import inspect +import json +from pathlib import Path + +import pytest + +from ecli.extensions.ecli_integration import ( + EXTENSION_TOKENIZATION_AVAILABLE, + ExtensionLayerConfig, + SyntaxService, + build_syntax_service, + load_tokenizer, + scope_to_category, + textmate_tokenizer as tokenizer_module, + theme_bridge as theme_bridge_module, + tokens_to_spans, +) + + +pytestmark = pytest.mark.skipif( + not EXTENSION_TOKENIZATION_AVAILABLE, + reason="python-textmate tokenizer is not installed", +) + +EXTENSIONS_ROOT = Path("src/ecli/extensions") + + +@pytest.fixture(scope="module") +def service() -> SyntaxService: + return build_syntax_service( + ExtensionLayerConfig.from_section({"syntax_engine": "extension"}) + ) + + +def _categories(service: SyntaxService, filename: str, lines: list[str]) -> set[str]: + highlighter = service.build_line_highlighter(filename) + assert highlighter is not None, f"no TextMate highlighter for {filename}" + categories: set[str] = set() + for line in lines: + spans = highlighter.highlight(line) + if spans: + categories |= {category for _text, category in spans} + return categories + + +# --------------------------------------------------------------------------- # +# Real grammar loading + representative tokenization. +# --------------------------------------------------------------------------- # + + +def test_real_grammar_loads_from_extensions_tree() -> None: + grammar = EXTENSIONS_ROOT / "python/syntaxes/MagicPython.tmLanguage.json" + tokenizer = load_tokenizer(grammar.resolve()) + assert tokenizer is not None + tokens = tokenizer.tokenize_line("def f(x): return 42") + assert tokens, "tokenizer must produce TextMate tokens" + # Genuine TextMate scope names (not flat token categories). + assert any(scope.endswith(".python") for scope, _s, _e in tokens) + + +def test_python_scopes(service: SyntaxService) -> None: + categories = _categories( + service, + "example.py", + ["def greet(name):", " s = 'hello' # comment", " return 42"], + ) + assert {"keyword", "string", "comment", "function"} <= categories + + +def test_python_docstring_words_do_not_get_keyword_style( + service: SyntaxService, +) -> None: + highlighter = service.build_line_highlighter("example.py") + assert highlighter is not None + lines = [ + '"""class def return import for while 123 == -> + -"""', + "def real_function():", + ' """class def return 123 ==', + " import for while -> + -", + ' """', + " return 1", + ] + highlighted = highlighter.highlight_lines( + lines, line_indices=list(range(len(lines))), full_text=lines + ) + assert highlighted[0] == [ + ('"""class def return import for while 123 == -> + -"""', "string") + ] + for line_spans in highlighted[2:5]: + assert line_spans is not None + assert {category for _text, category in line_spans} == {"string"} + real_return = highlighted[5] + assert real_return is not None + assert any( + text == "return" and category == "keyword" for text, category in real_return + ) + + +def test_json_scopes(service: SyntaxService) -> None: + categories = _categories( + service, "package.json", [' "name": "ecli",', ' "n": 42'] + ) + assert {"string", "number"} <= categories + + +def test_typescript_scopes(service: SyntaxService) -> None: + categories = _categories( + service, "main.ts", ["const x = 'h'; // c", "let n = 42;"] + ) + assert {"string", "comment"} <= categories + + +def test_javascript_scopes(service: SyntaxService) -> None: + categories = _categories(service, "app.js", ["function f() { return 'h'; } // c"]) + assert {"string", "comment", "function"} <= categories + + +def test_cpp_scopes(service: SyntaxService) -> None: + categories = _categories(service, "main.cpp", ["int main() { return 0; } // c"]) + assert {"keyword", "type", "comment"} <= categories + + +def test_c_scopes(service: SyntaxService) -> None: + categories = _categories(service, "main.c", ["int x = 0; // c"]) + assert {"type", "comment"} <= categories + + +def test_bat_scopes(service: SyntaxService) -> None: + tokenizer = load_tokenizer( + (EXTENSIONS_ROOT / "bat/syntaxes/batchfile.tmLanguage.json").resolve() + ) + assert tokenizer is not None + tokens = tokenizer.tokenize_line("REM a comment") + assert any("batchfile" in scope for scope, _s, _e in tokens) + + +def test_yaml_is_visible_via_legacy_fallback(service: SyntaxService) -> None: + # The imported YAML block grammar yields no tokens under the per-line stateless + # engine (it needs multi-line state ECLI does not maintain), so the extension + # highlighter alone produces only default spans. The required guarantee — "YAML + # is visibly highlighted" — is met at the editor level: when the extension + # engine produces nothing usable, the editor falls back to the legacy/Pygments + # highlighter (see ``Ecli._apply_extension_highlighting``). The editor-level + # proof lives in ``tests/extensions/test_editor_syntax_rendering.py``; here we + # pin the engine-level reality so the fallback contract stays honest. + for filename in (".coderabbit.yaml", "docker-compose.yml", "config.yaml"): + highlighter = service.build_line_highlighter(filename) + assert highlighter is not None, filename + spans = highlighter.highlight("name: ecli # comment") + assert spans is not None + categories = {category for _text, category in spans} + # The stateless engine cannot colour YAML, so the editor must fall back. + assert categories <= {"default", "punctuation"}, (filename, spans) + + +def test_gitignore_scopes_are_not_sql(service: SyntaxService) -> None: + resolution = service.resolve(".gitignore") + assert resolution.language_id == "ignore" + assert resolution.scope_name == "source.ignore" + assert resolution.language_id != "sql" + + +# --------------------------------------------------------------------------- # +# Safe fallback behaviour. +# --------------------------------------------------------------------------- # + + +def test_markdown_falls_back_safely(service: SyntaxService) -> None: + # The imported Markdown grammar makes the engine recurse; tokenization must + # degrade to None (caller renders via legacy) rather than crash. + highlighter = service.build_line_highlighter("README.md") + if highlighter is not None: + assert highlighter.highlight("# Heading") is None + + +def test_unknown_file_has_no_highlighter(service: SyntaxService) -> None: + assert service.build_line_highlighter("mystery.zzz") is None + + +def test_invalid_grammar_returns_none(tmp_path: Path) -> None: + bad = tmp_path / "bad.tmLanguage.json" + bad.write_text("{ not valid json", encoding="utf-8") + assert load_tokenizer(bad.resolve()) is None + + +def test_legacy_engine_has_no_highlighter() -> None: + legacy = build_syntax_service( + ExtensionLayerConfig.from_section({"syntax_engine": "legacy"}) + ) + assert legacy.build_line_highlighter("example.py") is None + + +# --------------------------------------------------------------------------- # +# Determinism + scope-to-style mapping. +# --------------------------------------------------------------------------- # + + +def test_tokenization_is_deterministic(service: SyntaxService) -> None: + first = service.build_line_highlighter("example.py") + second = service.build_line_highlighter("example.py") + assert first is not None and second is not None + line = "def f(x): return 'hi' # c" + assert first.highlight(line) == second.highlight(line) + + +def test_scope_to_category_specificity() -> None: + assert scope_to_category("keyword.control.flow.python") == "keyword" + assert scope_to_category("string.quoted.double.json") == "string" + assert scope_to_category("comment.line.number-sign.python") == "comment" + assert scope_to_category("constant.numeric.integer.python") == "number" + assert scope_to_category("entity.name.function.python") == "function" + # Structural scopes render as default text. + assert scope_to_category("meta.function.python") is None + assert scope_to_category("source.python") is None + + +def test_tokens_to_spans_tile_line_exactly() -> None: + line = "def f" + tokens = [("keyword.control.python", 0, 3), ("entity.name.function.python", 4, 5)] + spans = tokens_to_spans(line, tokens) + assert "".join(text for text, _category in spans) == line + categories = dict(spans) + assert categories["def"] == "keyword" + assert categories["f"] == "function" + + +def test_tokens_to_spans_handles_empty_line() -> None: + assert tokens_to_spans("", []) == [] + + +def test_distinct_scope_categories_map_to_distinct_styles( + service: SyntaxService, +) -> None: + # A rendering-level proof: different scope categories yield different style + # categories, so the editor maps them to different curses attributes. + highlighter = service.build_line_highlighter("example.py") + assert highlighter is not None + spans = highlighter.highlight("def f(): s = 'h' # c") + assert spans is not None + categories = {category for _text, category in spans} + # Keyword, string, and comment are all present and distinct. + assert {"keyword", "string", "comment"} <= categories + + +# --------------------------------------------------------------------------- # +# No runtime execution. +# --------------------------------------------------------------------------- # + + +def test_no_runtime_execution_primitives() -> None: + for module in (tokenizer_module, theme_bridge_module): + source = inspect.getsource(module) + for token in ("subprocess", "os.system", "os.popen", "pty.", "eval(", "exec("): + assert token not in source, f"{module.__name__} must not use {token!r}" + + +def test_tokenizer_loads_only_json_data() -> None: + # The tokenizer reads grammar JSON; it never imports or executes grammar code. + source = inspect.getsource(tokenizer_module) + assert "json.loads" in source diff --git a/tests/extensions/test_textmate_tokenizer_budget.py b/tests/extensions/test_textmate_tokenizer_budget.py new file mode 100644 index 00000000..8386c466 --- /dev/null +++ b/tests/extensions/test_textmate_tokenizer_budget.py @@ -0,0 +1,158 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Project: Ecli +# File: tests/extensions/test_textmate_tokenizer_budget.py +# Website: https://www.ecli.io +# Repository: https://github.com/SSobol77/ecli +# PyPI: https://pypi.org/project/ecli-editor/0.0.1/ +# +# Copyright (c) 2026 Siergej Sobolewski +# +# Licensed under the GNU General Public License version 2 only. +# See the LICENSE file in the project root for full license text. + +"""Bounded-tokenization / freeze regression for the TextMate engine (#102). + +The imported ``make`` grammar drives the per-line Oniguruma engine into +catastrophic, non-terminating backtracking on lines such as +``ifeq ($(ARCH),x86_64)`` — the exact cause of the reported UI freeze. These +tests use the **real** grammars (no mocks) and a hard wall-clock bound to prove: + +* tokenizing any line of the real repository ``Makefile`` terminates within a + per-line budget (it used to run forever); +* the specific ``ifeq ($(...))`` line returns within budget (legacy fallback is + acceptable; hanging is not); +* a grammar that keeps timing out is quarantined after a bounded number of + distinct slow lines, after which the editor uses the legacy highlighter. +""" + +from __future__ import annotations + +import signal +import time +from pathlib import Path + +import pytest + +from ecli.extensions.ecli_integration import textmate_tokenizer as tok +from ecli.extensions.ecli_integration.config import ExtensionLayerConfig +from ecli.extensions.ecli_integration.syntax_service import build_syntax_service + + +pytestmark = pytest.mark.skipif( + not tok.TEXTMATE_AVAILABLE, + reason="python-textmate tokenizer is not installed", +) + +REPO_ROOT = Path(__file__).resolve().parents[2] +MAKE_GRAMMAR = REPO_ROOT / "src/ecli/extensions/make/syntaxes/make.tmLanguage.json" + +# A hard ceiling, well above the in-engine per-line budget but far below the +# unbounded freeze. If a single tokenize_line call ever exceeds this, the freeze +# has regressed. +HARD_LINE_CEILING_SECONDS = 2.0 + + +@pytest.fixture(autouse=True) +def _fresh_state() -> None: + tok.reset_quarantine_state() + + +def _alarm_guard(seconds: float): + """A SIGALRM watchdog that converts a true hang into a test failure.""" + + class _Guard: + def __enter__(self) -> None: + def _fire(_s: int, _f: object) -> None: + raise AssertionError( + f"tokenization did not terminate within {seconds}s (freeze)" + ) + + self._prev = signal.signal(signal.SIGALRM, _fire) + signal.setitimer(signal.ITIMER_REAL, seconds) + + def __exit__(self, *_exc: object) -> None: + signal.setitimer(signal.ITIMER_REAL, 0) + signal.signal(signal.SIGALRM, self._prev) + + return _Guard() + + +@pytest.fixture(scope="module") +def make_highlighter(): + service = build_syntax_service( + ExtensionLayerConfig.from_section({"syntax_engine": "extension"}) + ) + highlighter = service.build_line_highlighter("Makefile") + assert highlighter is not None, "expected a TextMate highlighter for Makefile" + return highlighter + + +def test_every_makefile_line_tokenizes_within_budget(make_highlighter) -> None: + lines = (REPO_ROOT / "Makefile").read_text(encoding="utf-8").splitlines() + assert len(lines) > 1000, "expected a large real Makefile" + for number, line in enumerate(lines, start=1): + start = time.perf_counter() + make_highlighter.tokenizer.tokenize_line(line) + elapsed = time.perf_counter() - start + assert elapsed < HARD_LINE_CEILING_SECONDS, ( + f"Makefile line {number} took {elapsed:.2f}s " + f"(>{HARD_LINE_CEILING_SECONDS}s): {line[:60]!r}" + ) + + +def test_ifeq_line_terminates(make_highlighter) -> None: + # The exact catastrophic line. It must return within budget; None (legacy + # fallback) is a fine result — a hang is not. + with _alarm_guard(HARD_LINE_CEILING_SECONDS): + result = make_highlighter.tokenizer.tokenize_line("ifeq ($(ARCH),x86_64)") + # Either bounded tokens or a safe None fallback; never a freeze. + assert result is None or isinstance(result, list) + + +def test_grammar_quarantined_after_repeated_timeouts( + monkeypatch: pytest.MonkeyPatch, +) -> None: + # Lower the threshold so the two naturally-catastrophic make lines trip it. + monkeypatch.setattr(tok, "_GRAMMAR_QUARANTINE_THRESHOLD", 2) + service = build_syntax_service( + ExtensionLayerConfig.from_section({"syntax_engine": "extension"}) + ) + highlighter = service.build_line_highlighter("Makefile") + assert highlighter is not None + + grammar_id = highlighter.tokenizer._grammar_id + with _alarm_guard(HARD_LINE_CEILING_SECONDS * 3): + highlighter.tokenizer.tokenize_line("ifeq ($(ARCH),x86_64)") + highlighter.tokenizer.tokenize_line("ifeq ($(MACOS_ASSERT_MODE),native)") + + assert tok.is_grammar_quarantined(grammar_id) + # Once quarantined, a freshly built highlighter for the same file is None, + # so the editor renders the whole file with the legacy highlighter. + assert service.build_line_highlighter("Makefile") is None + + +def test_quarantine_bookkeeping_is_deterministic() -> None: + # Unit-level proof of the threshold logic, independent of engine timing. + tokenizer = tok.TextMateTokenizer(grammar=object(), grammar_id="grammar://x") + threshold = tok._GRAMMAR_QUARANTINE_THRESHOLD + for i in range(threshold - 1): + tokenizer._record_timeout(f"line-{i}") + assert not tok.is_grammar_quarantined("grammar://x") + tokenizer._record_timeout(f"line-{threshold}") + assert tok.is_grammar_quarantined("grammar://x") + # A quarantined grammar short-circuits to None without touching the engine. + assert tokenizer.tokenize_line("anything") is None + + +def test_repeated_timeout_on_same_line_counts_once() -> None: + tokenizer = tok.TextMateTokenizer(grammar=object(), grammar_id="grammar://y") + for _ in range(tok._GRAMMAR_QUARANTINE_THRESHOLD + 5): + tokenizer._record_timeout("the same slow line") + # Distinct lines drive quarantine; the same line repeated must not. + assert not tok.is_grammar_quarantined("grammar://y") + + +def test_wall_clock_budget_available_on_main_thread() -> None: + # The render loop runs on the main thread, where SIGALRM can bound the engine. + assert tok._can_arm_alarm() is True diff --git a/tests/extensions/test_theme_bridge_priority.py b/tests/extensions/test_theme_bridge_priority.py new file mode 100644 index 00000000..ecf69ce3 --- /dev/null +++ b/tests/extensions/test_theme_bridge_priority.py @@ -0,0 +1,82 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Project: Ecli +# File: tests/extensions/test_theme_bridge_priority.py +# Website: https://www.ecli.io +# Repository: https://github.com/SSobol77/ecli +# PyPI: https://pypi.org/project/ecli-editor/0.0.1/ +# +# Copyright (c) 2026 Siergej Sobolewski +# +# Licensed under the GNU General Public License version 2 only. +# See the LICENSE file in the project root for full license text. + +"""Scope-priority tests for TextMate-to-renderer spans.""" + +from __future__ import annotations + +from ecli.extensions.ecli_integration.theme_bridge import ( + scope_to_category, + tokens_to_spans, +) + + +def test_string_scope_beats_nested_keyword_scope() -> None: + scope = "source.python string.quoted.docstring.python keyword.control.flow.python" + assert scope_to_category(scope) == "string" + assert tokens_to_spans("return", [(scope, 0, 6)]) == [("return", "string")] + + +def test_comment_scope_beats_nested_keyword_scope() -> None: + scope = ( + "source.python comment.line.number-sign.python keyword.control.import.python" + ) + assert scope_to_category(scope) == "comment" + assert tokens_to_spans("import", [(scope, 0, 6)]) == [("import", "comment")] + + +def test_invalid_scope_has_highest_priority() -> None: + scope = "source.python string.quoted.python invalid.illegal.python" + assert scope_to_category(scope) == "error" + assert tokens_to_spans("bad", [(scope, 0, 3)]) == [("bad", "error")] + + +def test_protected_string_range_blocks_keyword_overpaint() -> None: + spans = tokens_to_spans( + "class def return 123 == ->", + [ + ("keyword.control.python", 0, 16), + ("constant.numeric.python", 17, 20), + ("keyword.operator.python", 21, 26), + ], + protected_ranges=[(0, 16, "string")], + ) + assert spans == [ + ("class def return", "string"), + (" ", "default"), + ("123", "number"), + (" ", "default"), + ("== ->", "operator"), + ] + + +def test_protected_docstring_range_blocks_code_like_tokens() -> None: + spans = tokens_to_spans( + "class def return 123 == ->", + [ + ("keyword.control.python", 0, 16), + ("constant.numeric.python", 17, 20), + ("keyword.operator.python", 21, 26), + ], + protected_ranges=[(0, 26, "string")], + ) + assert spans == [("class def return 123 == ->", "string")] + + +def test_markdown_scopes_do_not_collapse_to_one_category() -> None: + assert scope_to_category("text.html.markdown markup.heading.markdown") == "function" + assert scope_to_category("text.html.markdown markup.bold.markdown") == "type" + assert ( + scope_to_category("text.html.markdown markup.inline.raw.markdown") == "string" + ) + assert scope_to_category("text.html.markdown markup.quote.markdown") == "comment" diff --git a/tests/extensions/test_theme_numbering_contract.py b/tests/extensions/test_theme_numbering_contract.py new file mode 100644 index 00000000..bbd8d141 --- /dev/null +++ b/tests/extensions/test_theme_numbering_contract.py @@ -0,0 +1,141 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Project: Ecli +# File: tests/extensions/test_theme_numbering_contract.py +# Website: https://www.ecli.io +# Repository: https://github.com/SSobol77/ecli +# PyPI: https://pypi.org/project/ecli-editor/0.0.1/ +# +# Copyright (c) 2026 Siergej Sobolewski +# +# Licensed under the GNU General Public License version 2 only. +# See the LICENSE file in the project root for full license text. + +"""Theme numbering policy + config.toml cleanliness contract (#102). + +The policy: 1-8 are deprecated aliases only, 100-199 light, 200-299 dark, +300-399 high contrast, 800-899 reserved. These gates pin the policy against the +**real** theme registry built from imported extension metadata, and assert the +shipped, user-facing ``config.toml`` stays clean (no internal/declarative tables). +""" + +from __future__ import annotations + +import tomllib +from pathlib import Path + +from ecli.extensions.ecli_integration.theme_registry import ( + THEME_NUMBERING_POLICY, + cached_theme_registry, +) +from ecli.utils.themes import DEFAULT_THEME_ID, resolve_theme + + +REPO_ROOT = Path(__file__).resolve().parents[2] + + +def _band(number: int) -> str: + if 100 <= number < 200: + return "light" + if 200 <= number < 300: + return "dark" + if 300 <= number < 400: + return "high-contrast" + if 800 <= number < 900: + return "reserved" + if 1 <= number < 9: + return "deprecated" + return "out-of-policy" + + +# --------------------------------------------------------------------------- # +# Numbering policy is documented and enforced against real loaded themes. +# --------------------------------------------------------------------------- # + + +def test_numbering_policy_bands_are_documented() -> None: + assert THEME_NUMBERING_POLICY["deprecated_aliases"] == "1-8" + assert THEME_NUMBERING_POLICY["light"] == "100-199" + assert THEME_NUMBERING_POLICY["dark"] == "200-299" + assert THEME_NUMBERING_POLICY["high_contrast"] == "300-399" + assert THEME_NUMBERING_POLICY["reserved_custom_imported"] == "800-899" + + +def test_loaded_extension_theme_numbers_match_their_band() -> None: + registry = cached_theme_registry() + numbered = [t for t in registry.list_available_extension_themes() if t.number] + assert numbered, "expected real numbered themes from imported metadata" + for theme in numbered: + band = _band(theme.number) + assert band in {"light", "dark", "high-contrast"}, (theme.number, theme.name) + if band in {"light", "dark"}: + assert theme.theme_type == band, ( + theme.number, + theme.name, + theme.theme_type, + ) + else: + assert theme.theme_type == "high-contrast", (theme.number, theme.name) + + +def test_no_theme_is_assigned_in_reserved_band() -> None: + registry = cached_theme_registry() + reserved = [ + t + for t in registry.list_available_extension_themes() + if t.number and 800 <= t.number < 900 + ] + assert reserved == [], reserved + # And resolving a reserved-band number is not a real theme. + assert resolve_theme({"theme": 850}).theme_id == DEFAULT_THEME_ID + + +def test_resolved_theme_band_matches_darkness() -> None: + # Light band is light; dark band is dark (builtin compatibility ids are always + # present, so this is deterministic regardless of which files are imported). + assert resolve_theme({"theme": 181}).is_dark is False # PySH Light (light band) + assert resolve_theme({"theme": 281}).is_dark is True # PySH Dark (dark band) + + +def test_deprecated_1_8_are_aliases_only_not_primary_numbers() -> None: + # 1-8 as a *primary* theme number is not a real theme: it falls back. + for primary in range(1, 9): + assert resolve_theme({"theme": primary}).theme_id == DEFAULT_THEME_ID + # But a legacy ``[theme]`` table id (the deprecated alias form) still migrates. + assert resolve_theme({"theme": {"id": 3}}).theme_id == 381 + assert resolve_theme({"theme": {"id": 8}}).theme_id == 283 + + +# --------------------------------------------------------------------------- # +# Shipped config.toml stays user-facing and clean. +# --------------------------------------------------------------------------- # + + +def _config_text_and_parsed() -> tuple[str, dict[str, object]]: + path = REPO_ROOT / "config.toml" + raw = path.read_text(encoding="utf-8") + return raw, tomllib.loads(raw) + + +def test_config_toml_has_no_internal_declarative_tables() -> None: + raw, parsed = _config_text_and_parsed() + # Internal/declarative defaults live in code, not the user config. + assert "comments" not in parsed, "[comments.*] must not be in user config" + assert "supported_formats" not in parsed, "[supported_formats] must not be present" + syntax = parsed.get("syntax_highlighting") + if isinstance(syntax, dict): + for language, table in syntax.items(): + assert not (isinstance(table, dict) and "patterns" in table), ( + f"[[syntax_highlighting.{language}.patterns]] must not ship in config" + ) + # The obsolete empty placeholder is gone. + assert "keybindings = {}" not in raw + + +def test_config_toml_theme_is_in_a_real_policy_band() -> None: + _raw, parsed = _config_text_and_parsed() + theme = parsed.get("theme") + assert isinstance(theme, int) and not isinstance(theme, bool) + assert _band(theme) in {"light", "dark", "high-contrast"}, theme + # It resolves to exactly that theme (a real, present theme). + assert resolve_theme({"theme": theme}).theme_id == theme diff --git a/tests/packaging/test_build_and_package_macos_script.py b/tests/packaging/test_build_and_package_macos_script.py index 8ec661cc..c7cdd347 100644 --- a/tests/packaging/test_build_and_package_macos_script.py +++ b/tests/packaging/test_build_and_package_macos_script.py @@ -35,6 +35,8 @@ def test_constants(macos: ModuleType) -> None: assert macos.EXIT_DMG_MISSING == 2 assert macos.EXIT_SHA_MISSING == 3 assert macos.EXIT_MISSING_TOOL == 5 + assert macos.ONIGURUMA_HEADER == "oniguruma.h" + assert macos.ONIGURUMA_PKG_CONFIG_NAME == "oniguruma" def test_non_darwin_returns_error( @@ -57,3 +59,32 @@ def test_dmg_artifact_token(macos: ModuleType, repo_root: Path) -> None: ) assert expected.parent == repo_root / "releases" / version assert expected.name == f"ecli_{version}_macos_{macos.MACOS_ARCH}.dmg" + + +def test_oniguruma_env_adds_include_lib_and_pkg_config_paths( + macos: ModuleType, tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + prefix = tmp_path / "oniguruma" + (prefix / "include").mkdir(parents=True) + (prefix / "lib" / "pkgconfig").mkdir(parents=True) + (prefix / "lib" / "pkgconfig" / "oniguruma.pc").write_text("", encoding="utf-8") + (prefix / "include" / "oniguruma.h").write_text("", encoding="utf-8") + (prefix / "lib" / "libonig.dylib").write_text("", encoding="utf-8") + monkeypatch.setenv("ECLI_ONIGURUMA_PREFIX", str(prefix)) + monkeypatch.setattr(macos, "_capture_stdout", lambda _command: None) + + env = macos.macos_native_dependency_env({}) + + assert f"-I{prefix / 'include'}" in env["CPPFLAGS"] + assert f"-I{prefix / 'include'}" in env["CFLAGS"] + assert f"-L{prefix / 'lib'}" in env["LDFLAGS"] + assert str(prefix / "lib" / "pkgconfig") in env["PKG_CONFIG_PATH"] + + +def test_macos_workflows_install_oniguruma_before_build(repo_root: Path) -> None: + for relative in ( + ".github/workflows/macos-dmg.yml", + ".github/workflows/macos-validate.yml", + ): + text = (repo_root / relative).read_text(encoding="utf-8") + assert "brew install oniguruma pkg-config" in text diff --git a/tests/packaging/test_packaging_macos_dmg_contract.py b/tests/packaging/test_packaging_macos_dmg_contract.py index 21714855..a448f303 100644 --- a/tests/packaging/test_packaging_macos_dmg_contract.py +++ b/tests/packaging/test_packaging_macos_dmg_contract.py @@ -49,4 +49,16 @@ def test_macos_dmg_naming_and_validation_workflow( validate = read_repo_text(".github/workflows/macos-validate.yml") assert_tokens_present(macos_script, ["ecli__macos_universal2.dmg"]) + assert_tokens_present( + macos_script, + [ + "ONIGURUMA_HEADER", + "macos_native_dependency_env", + "check_oniguruma_prerequisites", + "CPPFLAGS", + "LDFLAGS", + "PKG_CONFIG_PATH", + ], + ) assert "macOS Contract Validate" in validate + assert "brew install oniguruma pkg-config" in validate diff --git a/tests/ui/test_design_system.py b/tests/ui/test_design_system.py index f7754379..43d79178 100644 --- a/tests/ui/test_design_system.py +++ b/tests/ui/test_design_system.py @@ -62,7 +62,7 @@ def test_all_required_roles_are_defined() -> None: def test_every_role_maps_to_a_color_key_present_in_chrome_or_syntax() -> None: # Every mapped colour key must be a key the renderer actually allocates, # i.e. a chrome pair name or a syntax/git colour name. - palette = get_theme(5) + palette = get_theme(207) available = set(palette.chrome_color_pairs()) | set(palette.syntax_color_hex()) for role, key in ROLE_COLOR_KEYS.items(): assert key in available, (role, key) diff --git a/tests/ui/test_professional_chrome.py b/tests/ui/test_professional_chrome.py index 14e40d7d..c229e741 100644 --- a/tests/ui/test_professional_chrome.py +++ b/tests/ui/test_professional_chrome.py @@ -121,7 +121,7 @@ def __init__(self) -> None: self.filename = "/home/user/projects/ecli/src/sample.py" self.modified = True self.is_lightweight = False - self.active_theme = get_theme(5) + self.active_theme = get_theme(281) self.cursor_y = 0 def get_string_width(self, text: str) -> int: @@ -157,7 +157,7 @@ def test_header_renders_app_file_and_theme_within_bounds() -> None: row0 = _row_text(win, 0) assert "ECLI" in row0 assert "sample.py" in row0 - assert "theme 5" in row0 or "Dark Classic" in row0 + assert "theme 281" in row0 or "PySH Dark" in row0 # Every write stays within the terminal width. assert all(0 <= x < 90 for (_y, x) in win.cells) # Buffer is untouched by chrome rendering. diff --git a/tests/ui/test_textmate_scroll_pty_smoke.py b/tests/ui/test_textmate_scroll_pty_smoke.py new file mode 100644 index 00000000..4cebdc59 --- /dev/null +++ b/tests/ui/test_textmate_scroll_pty_smoke.py @@ -0,0 +1,174 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Project: Ecli +# File: tests/ui/test_textmate_scroll_pty_smoke.py +# Website: https://www.ecli.io +# Repository: https://github.com/SSobol77/ecli +# PyPI: https://pypi.org/project/ecli-editor/0.0.1/ +# +# Copyright (c) 2026 Siergej Sobolewski +# +# Licensed under the GNU General Public License version 2 only. +# See the LICENSE file in the project root for full license text. + +"""End-to-end PTY scroll smoke test for the #102 freeze. + +This launches the **real** ECLI curses application inside a pseudo-terminal, +opens the **real** repository ``Makefile`` with the extension TextMate engine +enabled, sends scroll keys that move the viewport past the ``ifeq`` block near +line 42 (the reported freeze point), then sends quit. The test **fails** if ECLI +does not respond to quit within a hard timeout — i.e. if it froze. Environments +where curses cannot start at all (no usable terminal) are skipped, never failed. + +Every interactive read/wait has a deadline; nothing here can hang indefinitely. +""" + +from __future__ import annotations + +import os +import select +import signal +import struct +import sys +import time +from pathlib import Path + +import pytest + + +REPO_ROOT = Path(__file__).resolve().parents[2] + +STARTUP_TIMEOUT = 20.0 # seconds to first render +QUIT_TIMEOUT = 15.0 # seconds to exit after Ctrl+Q (freeze budget) + +pytestmark = pytest.mark.slow + + +def _set_winsize(fd: int, rows: int, cols: int) -> None: + import fcntl + import termios + + fcntl.ioctl(fd, termios.TIOCSWINSZ, struct.pack("HHHH", rows, cols, 0, 0)) + + +def _drain(fd: int, seconds: float) -> bytes: + """Read whatever is available from ``fd`` for up to ``seconds`` (never blocks).""" + deadline = time.monotonic() + seconds + data = b"" + while time.monotonic() < deadline: + readable, _, _ = select.select([fd], [], [], 0.2) + if fd not in readable: + continue + try: + chunk = os.read(fd, 65536) + except OSError: + break + if not chunk: + break + data += chunk + return data + + +def _isolated_env(tmp_path: Path) -> tuple[dict[str, str], Path]: + """Build an isolated HOME + config forcing the extension TextMate engine.""" + home = tmp_path / "home" + config_dir = home / ".config" / "ecli" + config_dir.mkdir(parents=True) + (config_dir / "config.toml").write_text( + "theme = 207\n" + "[editor]\n" + "syntax_highlighting = true\n" + "[extensions]\n" + "enabled = true\n" + 'syntax_engine = "extension"\n', + encoding="utf-8", + ) + makefile = tmp_path / "Makefile" + makefile.write_text( + (REPO_ROOT / "Makefile").read_text(encoding="utf-8"), encoding="utf-8" + ) + env = dict(os.environ) + env["HOME"] = str(home) + env["XDG_CONFIG_HOME"] = str(config_dir.parent) + env["TERM"] = "xterm-256color" + env.setdefault("LANG", "C.UTF-8") + env.pop("ECLI_THEME", None) + return env, makefile + + +def _scroll_past_line_42(master: int) -> None: + """Move the viewport well past the ifeq block, triggering repaints.""" + for _ in range(10): + os.write(master, b"\x1b[6~") # PageDown (mode-agnostic) + time.sleep(0.05) + for _ in range(20): + os.write(master, b"\x1bOB") # Down (application cursor keys) + os.write(master, b"\x1b[B") # Down (normal cursor keys) + time.sleep(0.02) + + +@pytest.mark.skipif( + not hasattr(os, "openpty"), reason="no pty support on this platform" +) +def test_scroll_makefile_in_pty_does_not_freeze(tmp_path: Path) -> None: + import pty + import subprocess + + if not (REPO_ROOT / "Makefile").is_file(): + pytest.skip("repository Makefile is required for this smoke test") + + env, makefile = _isolated_env(tmp_path) + master, slave = pty.openpty() + _set_winsize(slave, rows=40, cols=120) + try: + proc = subprocess.Popen( + [sys.executable, "-m", "ecli", str(makefile)], + stdin=slave, + stdout=slave, + stderr=slave, + env=env, + cwd=str(tmp_path), + close_fds=True, + start_new_session=True, + ) + except OSError as error: # pragma: no cover - environment dependent + os.close(master) + os.close(slave) + pytest.skip(f"cannot spawn ecli: {error}") + os.close(slave) + + try: + startup = _drain(master, STARTUP_TIMEOUT) + if proc.poll() is not None: + pytest.skip( + f"ecli exited during startup (rc={proc.returncode}); curses could " + "not initialise in this environment" + ) + assert startup, "ecli produced no initial render output" + + # Scroll past the ifeq block near line 42 (and far beyond). + _scroll_past_line_42(master) + # Proof it kept rendering while scrolling (would be empty/stalled on freeze). + _drain(master, 5.0) + + # Quit (Ctrl+Q). The buffer is unmodified, so no save prompt blocks exit. + os.write(master, b"\x11") + + deadline = time.monotonic() + QUIT_TIMEOUT + while time.monotonic() < deadline and proc.poll() is None: + _drain(master, 0.25) + assert proc.poll() is not None, ( + "ECLI did not exit within " + f"{QUIT_TIMEOUT:.0f}s of quit while scrolling the Makefile — freeze" + ) + finally: + if proc.poll() is None: + try: + os.killpg(os.getpgid(proc.pid), signal.SIGKILL) + except OSError: + proc.kill() + try: + proc.wait(timeout=5) + except Exception: # pragma: no cover - cleanup best effort + pass + os.close(master)