From 2f3ac28ecccfe48cb5499045b7d362e21d495129 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gregor=20=C5=BDuni=C4=8D?= <36313686+gregpr07@users.noreply.github.com> Date: Thu, 18 Jun 2026 01:48:19 +0000 Subject: [PATCH 01/15] Add Python browser manager --- SKILL.md | 31 +- TEMP_BROWSER_MANAGER_CONTEXT_DO_NOT_MERGE.md | 138 +++++ install.md | 5 +- pyproject.toml | 1 + src/browser_harness/_ipc.py | 66 ++- src/browser_harness/admin.py | 73 ++- src/browser_harness/context.py | 173 +++++++ src/browser_harness/helpers.py | 11 +- src/browser_harness/manager_client.py | 222 ++++++++ src/browser_harness/manager_daemon.py | 514 +++++++++++++++++++ src/browser_harness/manager_helpers.py | 51 ++ src/browser_harness/run.py | 38 +- tests/unit/test_context.py | 75 +++ tests/unit/test_manager_daemon.py | 78 +++ tests/unit/test_manager_helpers.py | 81 +++ tests/unit/test_run_manager_mode.py | 61 +++ 16 files changed, 1566 insertions(+), 52 deletions(-) create mode 100644 TEMP_BROWSER_MANAGER_CONTEXT_DO_NOT_MERGE.md create mode 100644 src/browser_harness/context.py create mode 100644 src/browser_harness/manager_client.py create mode 100644 src/browser_harness/manager_daemon.py create mode 100644 src/browser_harness/manager_helpers.py create mode 100644 tests/unit/test_context.py create mode 100644 tests/unit/test_manager_daemon.py create mode 100644 tests/unit/test_manager_helpers.py create mode 100644 tests/unit/test_run_manager_mode.py diff --git a/SKILL.md b/SKILL.md index 13dfcda6..6ca96303 100644 --- a/SKILL.md +++ b/SKILL.md @@ -1,6 +1,6 @@ --- name: browser -description: Direct browser control via CDP. Use when the user wants to automate, scrape, test, or interact with web pages. Connects to the user's already-running Chrome. +description: Direct browser control via CDP. Use when the user wants to automate, scrape, test, or interact with web pages. Connects to Chrome, Chromium, or a manager-created cloud browser. --- # browser-harness @@ -33,7 +33,30 @@ browser-harness <<'PY' PY ``` -run.py calls ensure_daemon() before exec — you never start/stop manually unless you want to. +Legacy mode calls ensure_daemon() before exec. Manager mode starts when the script uses a `browser_*` lifecycle helper or `BH_MANAGER_MODE=1`. + +### Managed browsers + +Use this when you need an isolated browser, parallel sub-agents, a cloud browser, or a restart after the current browser gets blocked. + +```bash +browser-harness <<'PY' +print(browser_status()) +print(browser_new(backend="cloud", proxy_country="us")) +new_tab("https://example.com") +print(page_info()) +print(browser_close()) +PY +``` + +Lifecycle helpers: +- `browser_status()` — current binding state. +- `browser_new(backend="cloud"|"managed", profile="clean", proxy_country=None, reason=None)` — create and switch to a browser. +- `browser_list()` — browser ids visible to this run/agent. +- `browser_switch(browser_id)` — reuse an existing browser id. +- `browser_close(browser_id=None)` — close the active private browser, or release access to a shared one. + +After `browser_new(...)` or `browser_switch(...)`, all normal page helpers work unchanged. If you are a sub-agent, create your own browser unless the parent gives you a browser id to reuse with `browser_switch(id)`. ### Remote browsers @@ -95,11 +118,11 @@ If you start struggling with a specific mechanic while navigating, look in inter ## Design constraints - Coordinate clicks default. Input.dispatchMouseEvent goes through iframes/shadow/cross-origin at the compositor level. -- Connect to the user's running Chrome. Don't launch your own browser. +- Legacy mode connects to the user's running Chrome. Manager mode may create cloud or managed browsers via `browser_new`. - cdp-use is only for CDPClient.send_raw. Prefer raw CDP strings over typed wrappers. - run.py stays tiny. No argparse, subcommands, or extra control layer. - Core helpers stay short. Put task-specific helper additions in `agent-workspace/agent_helpers.py`; daemon/bootstrap and remote session admin live in the core package. -- Don't add a manager layer. No retries framework, session manager, daemon supervisor, config system, or logging framework. +- Don't add another manager layer. Use the built-in `browser_*` lifecycle helpers. ## Gotchas (field-tested) diff --git a/TEMP_BROWSER_MANAGER_CONTEXT_DO_NOT_MERGE.md b/TEMP_BROWSER_MANAGER_CONTEXT_DO_NOT_MERGE.md new file mode 100644 index 00000000..de8ea64e --- /dev/null +++ b/TEMP_BROWSER_MANAGER_CONTEXT_DO_NOT_MERGE.md @@ -0,0 +1,138 @@ +# Temporary Browser Manager Context + +Remove this file before merging the PR. It is session context for review and follow-up, not product documentation. + +## Why This Branch Exists + +The current browser-harness works unusually well because the LLM sees the actual Python helper surface and can directly control browser/page behavior with very little indirection. The goal of this branch is to preserve that property while adding a tiny lifecycle layer for cases the current harness handles poorly: + +- many parallel agents; +- subagents needing either their own browser or a reused parent browser; +- remote/cloud browser creation from inside the harness flow; +- isolated per-browser daemon/runtime/tmp/artifact directories; +- safer cleanup and switching between browser backends. + +The important constraint from the discussion was: do not turn the LLM into a browser manager with a complicated control plane. The LLM should see a small set of obvious helpers, then use the existing page helpers exactly as before. + +## Final LLM-Facing Interface + +The intended surface is: + +```python +browser_status() +browser_new(backend="cloud"|"managed", profile="clean", proxy_country=None, reason=None) +browser_list() +browser_switch(browser_id) +browser_close(browser_id=None) +``` + +After `browser_new(...)` or `browser_switch(...)`, normal browser-harness helpers such as `new_tab`, `page_info`, `capture_screenshot`, `click_at_xy`, `js`, and `cdp` work unchanged. + +The model does not need to know about sockets, daemon names, runtime dirs, CDP URLs, Browser Use browser IDs, or process cleanup. Those are manager internals. + +## Why Python Instead Of Rust + +This was switched from the earlier Rust manager direction to Python because browser-harness is already a Python package and the simplest install path matters more than a theoretically cleaner standalone daemon. + +Python keeps the end-to-end flow simple: + +```bash +uv tool install -e . +browser-harness <<'PY' +print(browser_new(backend="cloud", proxy_country="us")) +new_tab("https://example.com") +print(page_info()) +print(browser_close()) +PY +``` + +No separate Rust build, no extra binary distribution problem, and no cross-language install story. The manager daemon is just another Python module/script in the package. + +## Architecture + +The manager owns browser leases. A lease includes: + +- `browser_id`; +- backend type: `cloud` or `managed`; +- per-browser harness daemon name; +- per-browser runtime/tmp/download/artifact/profile dirs; +- CDP endpoint info; +- owner agent and allowed agent ids; +- an active execution lock. + +The runtime path is: + +```text +LLM code + -> browser_* helper + -> manager_client over Unix socket + -> manager_daemon creates/switches/closes lease + -> per-browser browser_harness.daemon + -> existing page helpers talk to that daemon +``` + +The existing non-manager browser-harness path still works. + +## Parallelism Reasoning + +The branch tries to handle the obvious 100-agent failure modes: + +- manager auto-start is single-flight via a file lock, so concurrent agents should not start competing managers; +- browser ids and daemon names are generated per lease; +- each lease gets isolated runtime/tmp/artifact/profile directories; +- manager registry state is persisted under the manager root; +- browser creation does not hold the global manager lock while slow cloud/local startup happens; +- execution locks are per client process, so two simultaneous `browser-harness` invocations from the same agent do not mutate the same browser at once; +- cross-run close/switch attempts are rejected. + +This is still not a full stress-test result. It is the first implementation pass with targeted unit coverage for the scary cases. + +## Subagent Model + +The harness cannot rely on controlling Codex subagent spawn parameters. The practical design is therefore prompt/interface based: + +- default subagent behavior: call `browser_new(...)` and get an isolated browser; +- reuse behavior: parent gives a `browser_id`, subagent calls `browser_switch(browser_id)`; +- if the browser is busy, the manager returns `busy`, and the safe action is to wait or call `browser_new(...)`. + +This keeps the LLM-visible protocol minimal and avoids requiring Codex runtime changes. + +## Local Browser Note + +The VM used for this work must not start local Chrome or Chromium. Local managed-browser code exists, but local startup was intentionally not smoke-tested here. + +Cloud/live lifecycle should be tested separately with a Browser Use API key in the environment. Do not commit keys or put them in docs. + +## Verification Done In This Session + +Commands run: + +```bash +uv run --with pytest pytest -q tests/unit +uv run python -m compileall -q src/browser_harness +``` + +Result at the time this note was written: + +```text +101 passed +``` + +A no-browser protocol smoke was also run: + +- auto-start Python manager; +- `browser_status()` returned `no-active-browser`; +- `browser_list()` returned `[]`; +- test manager was killed afterward. + +No local Chrome/Chromium was started. + +## What To Review Before Merge + +- Decide whether manager mode should be enabled by AST-detecting lifecycle helper calls, env vars only, or both. +- Live-test `browser_new(backend="cloud")` and `browser_close()` with a real Browser Use key. +- Live-test `browser_new(backend="managed")` on a laptop, not the VM. +- Stress-test many parallel agents/processes using the same manager root. +- Decide whether stale lease cleanup needs a sweeper. +- Decide whether profile support should remain `profile="clean"` only for the first version. +- Remove this file before merging. diff --git a/install.md b/install.md index 021af5c7..375d63b6 100644 --- a/install.md +++ b/install.md @@ -48,6 +48,8 @@ This makes new Codex or Claude Code sessions in other folders load the runtime b ```text Chrome / Browser Use cloud -> CDP WS -> browser_harness.daemon -> IPC -> browser_harness.run + ^ +optional browser_harness.manager_daemon owns many isolated browser leases ``` - Protocol is one JSON line each way. @@ -58,6 +60,7 @@ Chrome / Browser Use cloud -> CDP WS -> browser_harness.daemon -> IPC -> browser - BU_CDP_WS overrides local Chrome discovery for remote browsers. - BU_CDP_URL overrides local Chrome discovery with a specific DevTools HTTP endpoint (used for Way 2). - BU_BROWSER_ID + BROWSER_USE_API_KEY lets the daemon stop a Browser Use cloud browser on shutdown. +- Manager mode auto-starts `browser-harness-manager` when `browser_status`, `browser_new`, `browser_list`, `browser_switch`, or `browser_close` is used. # Browser connection setup and troubleshooting @@ -67,7 +70,7 @@ This section is the source of truth for how browser-harness connects to a browse Browser-harness can connect to any Chrome or Chromium-based browser on your computer, or to a Browser Use cloud browser. -**Cloud browsers** are managed by the Browser Use cloud API. Start one in Python with `start_remote_daemon("work", ...)`. Authentication is via the `BROWSER_USE_API_KEY` environment variable; the harness handles the WebSocket URL itself. To carry your local Chrome cookies into a cloud browser, install `profile-use` once (`curl -fsSL https://browser-use.com/profile.sh | sh`), then call `uuid = sync_local_profile("MyChromeProfile")` followed by `start_remote_daemon("work", profileId=uuid)`. Cookies are the only thing synced — not localStorage, not extensions, not history. +**Cloud browsers** are managed by the Browser Use cloud API. In manager mode, start one with `browser_new(backend="cloud", proxy_country="us")`; for legacy named daemons use `start_remote_daemon("work", ...)`. Authentication is via the `BROWSER_USE_API_KEY` environment variable; the harness handles the WebSocket URL itself. To carry your local Chrome cookies into a cloud browser, install `profile-use` once (`curl -fsSL https://browser-use.com/profile.sh | sh`), then call `uuid = sync_local_profile("MyChromeProfile")` followed by `start_remote_daemon("work", profileId=uuid)`. Cookies are the only thing synced — not localStorage, not extensions, not history. **Local browsers** require remote debugging to be enabled. There are two ways, and they suit different use cases. diff --git a/pyproject.toml b/pyproject.toml index f812a6ab..02b7d0bf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ dependencies = [ [project.scripts] browser-harness = "browser_harness.run:main" +browser-harness-manager = "browser_harness.manager_daemon:main" [tool.setuptools] package-dir = {"" = "src"} diff --git a/src/browser_harness/_ipc.py b/src/browser_harness/_ipc.py index 2d265766..1f32daee 100644 --- a/src/browser_harness/_ipc.py +++ b/src/browser_harness/_ipc.py @@ -34,35 +34,47 @@ def _check(name): # path-traversal guard for BU_NAME return name -def _runtime_stem(name): # "bu" when BH_RUNTIME_DIR isolates us, else "bu-" +def _runtime_path(runtime_dir=None): + p = Path(runtime_dir) if runtime_dir else _RUNTIME + p.mkdir(parents=True, exist_ok=True) + return p + + +def _tmp_path(tmp_dir=None): + p = Path(tmp_dir) if tmp_dir else _TMP + p.mkdir(parents=True, exist_ok=True) + return p + + +def _runtime_stem(name, runtime_dir=None): # "bu" when runtime dir isolates us, else "bu-" _check(name) - return "bu" if BH_RUNTIME_DIR else f"bu-{name}" + return "bu" if (runtime_dir or BH_RUNTIME_DIR) else f"bu-{name}" -def _tmp_stem(name): # "bu" when BH_TMP_DIR isolates us, else "bu-" +def _tmp_stem(name, tmp_dir=None): # "bu" when tmp dir isolates us, else "bu-" _check(name) - return "bu" if BH_TMP_DIR else f"bu-{name}" + return "bu" if (tmp_dir or BH_TMP_DIR) else f"bu-{name}" -def log_path(name): return _TMP / f"{_tmp_stem(name)}.log" -def pid_path(name): return _RUNTIME / f"{_runtime_stem(name)}.pid" -def port_path(name): return _RUNTIME / f"{_runtime_stem(name)}.port" # Windows-only: holds {"port","token"} JSON -def _sock_path(name): return _RUNTIME / f"{_runtime_stem(name)}.sock" +def log_path(name, tmp_dir=None): return _tmp_path(tmp_dir) / f"{_tmp_stem(name, tmp_dir)}.log" +def pid_path(name, runtime_dir=None): return _runtime_path(runtime_dir) / f"{_runtime_stem(name, runtime_dir)}.pid" +def port_path(name, runtime_dir=None): return _runtime_path(runtime_dir) / f"{_runtime_stem(name, runtime_dir)}.port" # Windows-only +def _sock_path(name, runtime_dir=None): return _runtime_path(runtime_dir) / f"{_runtime_stem(name, runtime_dir)}.sock" -def _read_port_file(name): +def _read_port_file(name, runtime_dir=None): """(port, token) from the Windows port file, or (None, None) on any failure.""" try: - d = json.loads(port_path(name).read_text()) + d = json.loads(port_path(name, runtime_dir).read_text()) return int(d["port"]), d["token"] except (FileNotFoundError, ValueError, KeyError, TypeError, OSError): return None, None -def sock_addr(name): # display-only, used in log lines - if not IS_WINDOWS: return str(_sock_path(name)) - port, _ = _read_port_file(name) - return f"127.0.0.1:{port}" if port else f"tcp:{_runtime_stem(name)}" +def sock_addr(name, runtime_dir=None): # display-only, used in log lines + if not IS_WINDOWS: return str(_sock_path(name, runtime_dir)) + port, _ = _read_port_file(name, runtime_dir) + return f"127.0.0.1:{port}" if port else f"tcp:{_runtime_stem(name, runtime_dir)}" def spawn_kwargs(): # subprocess.Popen flags so the daemon detaches from this terminal @@ -76,15 +88,15 @@ def spawn_kwargs(): # subprocess.Popen flags so the daemon detaches from this t return {"start_new_session": True} -def connect(name, timeout=1.0): +def connect(name, timeout=1.0, runtime_dir=None): """Blocking client. Returns (sock, token); token is None on POSIX, hex string on Windows. Callers sending JSON requests MUST include the token as req["token"] on Windows.""" if not IS_WINDOWS: # uv-Python on Windows lacks socket.AF_UNIX, so this branch must be gated. s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - s.settimeout(timeout); s.connect(str(_sock_path(name))); return s, None - port, token = _read_port_file(name) - if port is None: raise FileNotFoundError(str(port_path(name))) + s.settimeout(timeout); s.connect(str(_sock_path(name, runtime_dir))); return s, None + port, token = _read_port_file(name, runtime_dir) + if port is None: raise FileNotFoundError(str(port_path(name, runtime_dir))) s = socket.create_connection(("127.0.0.1", port), timeout=timeout) s.settimeout(timeout); return s, token @@ -102,12 +114,15 @@ def request(c, token, req): return json.loads(data or b"{}") -def ping(name, timeout=1.0): +def ping(name, timeout=1.0, runtime_dir=None): """True iff a live daemon answers our ping. Defends against stale .port files + port reuse: a bare TCP connect can succeed against an unrelated process that grabbed the port after our daemon crashed; only our daemon answers {"pong":true}.""" try: - c, token = connect(name, timeout=timeout) + if runtime_dir is None: + c, token = connect(name, timeout=timeout) + else: + c, token = connect(name, timeout=timeout, runtime_dir=runtime_dir) except (FileNotFoundError, ConnectionRefusedError, TimeoutError, socket.timeout, OSError): return False try: @@ -123,14 +138,17 @@ def ping(name, timeout=1.0): except OSError: pass -def identify(name, timeout=1.0): +def identify(name, timeout=1.0, runtime_dir=None): """Return the live daemon's PID, or None if unreachable. Used by restart_daemon() to signal a process whose identity has been verified end-to-end (live IPC + self-reported PID), instead of trusting a pid file whose number may have been reused by an unrelated process.""" try: - c, token = connect(name, timeout=timeout) + if runtime_dir is None: + c, token = connect(name, timeout=timeout) + else: + c, token = connect(name, timeout=timeout, runtime_dir=runtime_dir) except (FileNotFoundError, ConnectionRefusedError, TimeoutError, socket.timeout, OSError): return None try: @@ -191,7 +209,7 @@ def expected_token(): return _server_token -def cleanup_endpoint(name): # best-effort; silent if already gone - p = _sock_path(name) if not IS_WINDOWS else port_path(name) +def cleanup_endpoint(name, runtime_dir=None): # best-effort; silent if already gone + p = _sock_path(name, runtime_dir) if not IS_WINDOWS else port_path(name, runtime_dir) try: p.unlink() except FileNotFoundError: pass diff --git a/src/browser_harness/admin.py b/src/browser_harness/admin.py index c72a8fb3..d418e920 100644 --- a/src/browser_harness/admin.py +++ b/src/browser_harness/admin.py @@ -9,6 +9,7 @@ from pathlib import Path from . import _ipc as ipc +from . import context def _process_start_time(pid): @@ -130,9 +131,35 @@ def _load_env_file(p): DOCTOR_TEXT_LIMIT = 140 -def _log_tail(name): +def _binding_parts(binding=None): + if binding is None: + return None, None, None + return binding.bu_name, binding.runtime_dir, binding.tmp_dir + + +def _ipc_pid_path(name, runtime_dir=None): + return ipc.pid_path(name) if runtime_dir is None else ipc.pid_path(name, runtime_dir=runtime_dir) + + +def _ipc_connect(name, timeout=1.0, runtime_dir=None): + return ipc.connect(name, timeout=timeout) if runtime_dir is None else ipc.connect(name, timeout=timeout, runtime_dir=runtime_dir) + + +def _ipc_ping(name, timeout=1.0, runtime_dir=None): + return ipc.ping(name, timeout=timeout) if runtime_dir is None else ipc.ping(name, timeout=timeout, runtime_dir=runtime_dir) + + +def _ipc_identify(name, timeout=1.0, runtime_dir=None): + return ipc.identify(name, timeout=timeout) if runtime_dir is None else ipc.identify(name, timeout=timeout, runtime_dir=runtime_dir) + + +def _ipc_cleanup_endpoint(name, runtime_dir=None): + return ipc.cleanup_endpoint(name) if runtime_dir is None else ipc.cleanup_endpoint(name, runtime_dir=runtime_dir) + + +def _log_tail(name, tmp_dir=None): try: - return ipc.log_path(name or NAME).read_text().strip().splitlines()[-1] + return ipc.log_path(name or NAME, tmp_dir=tmp_dir).read_text().strip().splitlines()[-1] except (FileNotFoundError, IndexError): return None @@ -161,10 +188,11 @@ def _is_local_chrome_mode(env=None): return not (env or {}).get("BU_CDP_WS") and not os.environ.get("BU_CDP_WS") -def daemon_alive(name=None): +def daemon_alive(name=None, binding=None): # Ping handshake (not a bare connect) so a stale .port file + port reuse # after a daemon crash doesn't make us mistake an unrelated listener for ours. - return ipc.ping(name or NAME, timeout=1.0) + b_name, runtime_dir, _tmp_dir = _binding_parts(binding) + return _ipc_ping(name or b_name or NAME, timeout=1.0, runtime_dir=runtime_dir) def _daemon_endpoint_names(): @@ -295,19 +323,23 @@ def run_doctor_fix_snap(): return 0 -def ensure_daemon(wait=60.0, name=None, env=None): +def ensure_daemon(wait=60.0, name=None, env=None, binding=None): """Idempotent. Self-heals stale daemon, cold Chrome, and missing Allow on chrome://inspect.""" - if daemon_alive(name): + b_name, runtime_dir, tmp_dir = _binding_parts(binding) + name = name or b_name + binding_env = binding.daemon_env() if binding else {} + env = {**binding_env, **(env or {})} + if daemon_alive(name, binding=binding): # Stale daemons accept connects AND reply to meta:* (pure Python) even when the # CDP WS to Chrome is dead — probe with a real CDP call and require "result". # Must go through ipc.connect so this works on Windows (TCP loopback) too; # raw AF_UNIX here would fail on every warm call and churn the daemon. try: - s, token = ipc.connect(name or NAME, timeout=3.0) + s, token = _ipc_connect(name or NAME, timeout=3.0, runtime_dir=runtime_dir) resp = ipc.request(s, token, {"method": "Target.getTargets", "params": {}}) if "result" in resp: return except Exception: pass - restart_daemon(name) + restart_daemon(name, binding=binding) import subprocess, sys local = _is_local_chrome_mode(env) @@ -319,16 +351,16 @@ def ensure_daemon(wait=60.0, name=None, env=None): ) deadline = time.time() + wait while time.time() < deadline: - if daemon_alive(name): return + if daemon_alive(name, binding=binding): return if p.poll() is not None: break time.sleep(0.2) - msg = _log_tail(name) or "" + msg = _log_tail(name, tmp_dir=tmp_dir) or "" if local and attempt == 0 and _needs_chrome_remote_debugging_prompt(msg): _open_chrome_inspect() print('browser-harness: at chrome://inspect/#remote-debugging, tick "Allow remote debugging for this browser instance" and click Allow on the popup that appears', file=sys.stderr) - restart_daemon(name) + restart_daemon(name, binding=binding) continue - raise RuntimeError(msg or f"daemon {name or NAME} didn't come up -- check {ipc.log_path(name or NAME)}") + raise RuntimeError(msg or f"daemon {name or NAME} didn't come up -- check {ipc.log_path(name or NAME, tmp_dir=tmp_dir)}") def stop_remote_daemon(name="remote"): @@ -345,7 +377,7 @@ def stop_remote_daemon(name="remote"): restart_daemon(name) -def restart_daemon(name=None): +def restart_daemon(name=None, binding=None): """Best-effort daemon shutdown + socket/pid cleanup. Name is historical: callers typically follow this with another @@ -359,8 +391,9 @@ def restart_daemon(name=None): """ import signal - name = name or NAME - pid_path = str(ipc.pid_path(name)) + b_name, runtime_dir, _tmp_dir = _binding_parts(binding) + name = name or b_name or NAME + pid_path = str(_ipc_pid_path(name, runtime_dir=runtime_dir)) # Two pieces of information are tracked separately: # - daemon_pid: the daemon's self-reported PID, or None. Only daemons @@ -370,8 +403,8 @@ def restart_daemon(name=None): # IPC path working across upgrades — without it, a still-running # pre-upgrade daemon would have its socket deleted out from under it # while the process stayed alive. - daemon_pid = ipc.identify(name, timeout=5.0) - daemon_alive = daemon_pid is not None or ipc.ping(name, timeout=1.0) + daemon_pid = _ipc_identify(name, timeout=5.0, runtime_dir=runtime_dir) + daemon_alive = daemon_pid is not None or _ipc_ping(name, timeout=1.0, runtime_dir=runtime_dir) # Snapshot the daemon's process start-time as a secondary identity check. # The IPC socket can disappear before the process exits (e.g. the shutdown # path tears down the socket and then waits on a slow remote `stop` PATCH), @@ -383,7 +416,7 @@ def restart_daemon(name=None): if daemon_alive: try: - c, token = ipc.connect(name, timeout=5.0) + c, token = _ipc_connect(name, timeout=5.0, runtime_dir=runtime_dir) ipc.request(c, token, {"meta": "shutdown"}) c.close() except Exception: @@ -405,7 +438,7 @@ def restart_daemon(name=None): # same process, just slow to exit (e.g. stuck in remote stop). # The IPC may already be gone; that's expected. # If neither holds, the PID may have been reused; skip SIGTERM. - verified_pid = ipc.identify(name, timeout=1.0) + verified_pid = _ipc_identify(name, timeout=1.0, runtime_dir=runtime_dir) same_process = verified_pid == daemon_pid or ( daemon_start is not None and _process_start_time(daemon_pid) == daemon_start @@ -416,7 +449,7 @@ def restart_daemon(name=None): except (ProcessLookupError, OSError, SystemError, OverflowError): pass - ipc.cleanup_endpoint(name) + _ipc_cleanup_endpoint(name, runtime_dir=runtime_dir) try: os.unlink(pid_path) except FileNotFoundError: diff --git a/src/browser_harness/context.py b/src/browser_harness/context.py new file mode 100644 index 00000000..782981b2 --- /dev/null +++ b/src/browser_harness/context.py @@ -0,0 +1,173 @@ +"""Runtime browser binding state for manager mode. + +Legacy browser-harness is environment-driven: BU_NAME, BH_RUNTIME_DIR, and +BH_TMP_DIR are read when modules import. Manager mode needs the active browser +binding to change inside one Python process, so helpers resolve this context at +call time. +""" +from __future__ import annotations + +from dataclasses import dataclass +import hashlib +import os +import tempfile +from pathlib import Path + + +@dataclass(frozen=True) +class AgentIdentity: + run_id: str | None + agent_id: str | None + parent_agent_id: str | None = None + + @property + def degraded(self) -> bool: + return not (self.run_id and self.agent_id) + + def payload(self) -> dict: + return { + "run_id": self.run_id, + "agent_id": self.agent_id, + "parent_agent_id": self.parent_agent_id, + "identity_degraded": self.degraded, + } + + +@dataclass(frozen=True) +class BrowserBinding: + browser_id: str | None + bu_name: str + runtime_dir: Path | None = None + tmp_dir: Path | None = None + download_dir: Path | None = None + artifact_dir: Path | None = None + cdp_url: str | None = None + cdp_ws: str | None = None + manager_mode: bool = False + + @classmethod + def from_manager(cls, data: dict) -> "BrowserBinding": + return cls( + browser_id=data.get("browser_id"), + bu_name=data["bu_name"], + runtime_dir=_path_or_none(data.get("runtime_dir")), + tmp_dir=_path_or_none(data.get("tmp_dir")), + download_dir=_path_or_none(data.get("download_dir")), + artifact_dir=_path_or_none(data.get("artifact_dir")), + cdp_url=data.get("cdp_url"), + cdp_ws=data.get("cdp_ws"), + manager_mode=True, + ) + + def daemon_env(self) -> dict[str, str]: + env = {"BU_NAME": self.bu_name} + if self.runtime_dir is not None: + env["BH_RUNTIME_DIR"] = str(self.runtime_dir) + if self.tmp_dir is not None: + env["BH_TMP_DIR"] = str(self.tmp_dir) + if self.cdp_url: + env["BU_CDP_URL"] = self.cdp_url + if self.cdp_ws: + env["BU_CDP_WS"] = self.cdp_ws + return env + + +def _path_or_none(value) -> Path | None: + return Path(value) if value else None + + +def manager_enabled() -> bool: + return os.environ.get("BH_MANAGER_MODE") == "1" or bool(os.environ.get("BH_MANAGER_SOCKET")) + + +def agent_identity() -> AgentIdentity: + run_id = ( + os.environ.get("BH_RUN_ID") + or os.environ.get("CODEX_THREAD_ID") + or os.environ.get("CODEX_SESSION_ID") + or _cwd_run_id() + ) + agent_id = ( + os.environ.get("BH_AGENT_ID") + or os.environ.get("CODEX_AGENT_ID") + or os.environ.get("CODEX_SUBAGENT_ID") + or "main" + ) + return AgentIdentity( + run_id=run_id, + agent_id=agent_id, + parent_agent_id=os.environ.get("BH_PARENT_AGENT_ID") or os.environ.get("CODEX_PARENT_AGENT_ID"), + ) + + +def _cwd_run_id() -> str: + raw = f"{os.environ.get('USER') or ''}:{Path.cwd()}" + return "cwd-" + hashlib.sha256(raw.encode()).hexdigest()[:16] + + +def default_binding_from_env() -> BrowserBinding: + tmp_dir = _path_or_none(os.environ.get("BH_TMP_DIR")) + runtime_dir = _path_or_none(os.environ.get("BH_RUNTIME_DIR")) or tmp_dir + return BrowserBinding( + browser_id=os.environ.get("BH_BROWSER_ID"), + bu_name=os.environ.get("BU_NAME", "default"), + runtime_dir=runtime_dir, + tmp_dir=tmp_dir, + cdp_url=os.environ.get("BU_CDP_URL") or None, + cdp_ws=os.environ.get("BU_CDP_WS") or None, + manager_mode=manager_enabled(), + ) + + +_active_binding: BrowserBinding | None = default_binding_from_env() + + +def get_active_binding() -> BrowserBinding | None: + return _active_binding + + +def activate_binding(binding: BrowserBinding) -> None: + global _active_binding + _active_binding = binding + for p in (binding.runtime_dir, binding.tmp_dir, binding.download_dir, binding.artifact_dir): + if p is not None: + p.mkdir(parents=True, exist_ok=True) + + +def clear_active_binding() -> None: + global _active_binding + _active_binding = None + + +def require_active_binding() -> BrowserBinding: + binding = get_active_binding() + if binding is None: + raise RuntimeError("no-active-browser: call browser_new(...) or browser_switch(browser_id) first") + return binding + + +def active_bu_name() -> str: + return require_active_binding().bu_name + + +def active_runtime_dir() -> Path | None: + binding = get_active_binding() + return binding.runtime_dir if binding else None + + +def active_tmp_dir() -> Path | None: + binding = get_active_binding() + return binding.tmp_dir if binding else None + + +def active_artifact_dir() -> Path: + binding = require_active_binding() + if binding.artifact_dir is not None: + binding.artifact_dir.mkdir(parents=True, exist_ok=True) + return binding.artifact_dir + if binding.tmp_dir is not None: + binding.tmp_dir.mkdir(parents=True, exist_ok=True) + return binding.tmp_dir + p = Path(tempfile.gettempdir()) + p.mkdir(parents=True, exist_ok=True) + return p diff --git a/src/browser_harness/helpers.py b/src/browser_harness/helpers.py index 2014887b..57b8cfb5 100644 --- a/src/browser_harness/helpers.py +++ b/src/browser_harness/helpers.py @@ -8,6 +8,7 @@ from urllib.parse import urlparse from . import _ipc as ipc +from . import context CORE_DIR = Path(__file__).resolve().parent @@ -40,7 +41,8 @@ def _load_env_file(p): def _send(req): - c, token = ipc.connect(NAME, timeout=5.0) + binding = context.require_active_binding() + c, token = ipc.connect(binding.bu_name, timeout=5.0, runtime_dir=binding.runtime_dir) try: r = ipc.request(c, token, req) finally: @@ -269,7 +271,12 @@ def scroll(x, y, dy=-300, dx=0): def capture_screenshot(path=None, full=False, max_dim=None): """Save a PNG of the current viewport. Set max_dim=1800 on a 2× display to keep the file under the 2000px-per-side limit some image-aware LLMs enforce.""" - path = path or str(ipc._TMP / "shot.png") + if path is None: + binding = context.get_active_binding() + if binding and binding.manager_mode: + path = str(context.active_artifact_dir() / "shot.png") + else: + path = str(ipc._TMP / "shot.png") r = cdp("Page.captureScreenshot", format="png", captureBeyondViewport=full) open(path, "wb").write(base64.b64decode(r["data"])) if max_dim: diff --git a/src/browser_harness/manager_client.py b/src/browser_harness/manager_client.py new file mode 100644 index 00000000..c61ef708 --- /dev/null +++ b/src/browser_harness/manager_client.py @@ -0,0 +1,222 @@ +"""Client for the browser-harness manager.""" +from __future__ import annotations + +from contextlib import contextmanager +import json +import os +from pathlib import Path +import secrets +import socket +import subprocess +import sys +import tempfile +import time +from dataclasses import dataclass + +from . import context + + +class ManagerError(RuntimeError): + def __init__(self, response): + self.response = response if isinstance(response, dict) else {"reason": str(response)} + reason = self.response.get("reason") or self.response.get("error") or self.response.get("state") or "manager error" + super().__init__(reason) + + +@dataclass +class ExecutionLock: + browser_id: str + lock_id: str + + +_active_lock: ExecutionLock | None = None +_manager_started = False +_CLIENT_ID = f"{os.getpid()}_{secrets.token_hex(4)}" + + +def default_manager_root() -> str: + return os.environ.get("BH_MANAGER_ROOT") or str(Path(tempfile.gettempdir()) / "bhm") + + +def default_manager_socket() -> str: + return os.environ.get("BH_MANAGER_SOCKET") or str(Path(default_manager_root()) / "manager.sock") + + +def manager_socket() -> str: + path = default_manager_socket() + os.environ.setdefault("BH_MANAGER_SOCKET", path) + os.environ.setdefault("BH_MANAGER_ROOT", default_manager_root()) + ensure_manager_running(path) + return path + + +def ensure_manager_running(path: str | None = None) -> None: + global _manager_started + path = path or default_manager_socket() + if _manager_socket_alive(path): + return + root = Path(os.environ.get("BH_MANAGER_ROOT") or default_manager_root()) + root.mkdir(parents=True, exist_ok=True) + with _start_lock(root): + if _manager_socket_alive(path): + return + log = open(root / "manager.log", "ab") + env = {**os.environ, "BH_MANAGER_SOCKET": path, "BH_MANAGER_ROOT": str(root)} + try: + subprocess.Popen( + [sys.executable, "-m", "browser_harness.manager_daemon", "--socket", path, "--root", str(root)], + stdin=subprocess.DEVNULL, + stdout=log, + stderr=log, + env=env, + start_new_session=True, + ) + finally: + log.close() + _manager_started = True + deadline = time.time() + float(os.environ.get("BH_MANAGER_START_TIMEOUT", "10")) + while time.time() < deadline: + if _manager_socket_alive(path): + return + time.sleep(0.05) + raise ManagerError({"state": "manager-unavailable", "reason": f"manager did not start at {path}"}) + + +@contextmanager +def _start_lock(root: Path): + lock_path = root / "manager.start.lock" + with open(lock_path, "a+b") as f: + if os.name == "nt": + import msvcrt + msvcrt.locking(f.fileno(), msvcrt.LK_LOCK, 1) + try: + yield + finally: + f.seek(0) + msvcrt.locking(f.fileno(), msvcrt.LK_UNLCK, 1) + else: + import fcntl + fcntl.flock(f.fileno(), fcntl.LOCK_EX) + try: + yield + finally: + fcntl.flock(f.fileno(), fcntl.LOCK_UN) + + +def _manager_socket_alive(path: str) -> bool: + try: + s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + s.settimeout(0.2) + s.connect(path) + s.close() + return True + except OSError: + return False + + +def request(op: str, **payload) -> dict: + req = {"op": op, **context.agent_identity().payload(), "client_id": _CLIENT_ID, **payload} + path = manager_socket() + s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + try: + s.settimeout(float(os.environ.get("BH_MANAGER_TIMEOUT", "30"))) + s.connect(path) + s.sendall((json.dumps(req) + "\n").encode()) + data = b"" + while not data.endswith(b"\n"): + chunk = s.recv(1 << 16) + if not chunk: + break + data += chunk + finally: + s.close() + resp = json.loads(data or b"{}") + if not isinstance(resp, dict): + raise ManagerError({"state": "bad-response", "reason": "manager returned non-object JSON"}) + if resp.get("ok") is False: + raise ManagerError(resp) + return resp + + +def public_state(resp: dict) -> dict: + return {k: v for k, v in resp.items() if k not in {"binding", "ok"}} + + +def binding_from_response(resp: dict) -> context.BrowserBinding: + binding = resp.get("binding") + if not isinstance(binding, dict): + raise ManagerError({"state": "bad-response", "reason": "manager response missing binding"}) + return context.BrowserBinding.from_manager(binding) + + +def status() -> dict: + try: + return public_state(request("status")) + except ManagerError as e: + if e.response.get("state") == "manager-unavailable": + return {"ready": False, "state": "manager-unavailable", "reason": str(e), "safe_actions": []} + raise + + +def list_browsers() -> list[dict]: + resp = request("list") + browsers = resp.get("browsers", []) + if not isinstance(browsers, list): + raise ManagerError({"state": "bad-response", "reason": "manager list response missing browsers"}) + return browsers + + +def new_browser(backend="managed", *, profile="clean", proxy_country=None, reason=None) -> dict: + return request( + "new", + backend=backend, + profile=profile, + proxy_country=proxy_country, + reason=reason, + ) + + +def switch_browser(browser_id: str) -> dict: + return request("switch", browser_id=browser_id) + + +def close_browser(browser_id: str | None = None) -> dict: + return request("close", browser_id=browser_id) + + +def acquire_execution(browser_id: str) -> ExecutionLock: + resp = request("lock", browser_id=browser_id) + lock_id = resp.get("lock_id") + if not lock_id: + raise ManagerError({"state": "bad-response", "reason": "manager lock response missing lock_id"}) + return ExecutionLock(browser_id=browser_id, lock_id=lock_id) + + +def release_execution(lock: ExecutionLock) -> None: + try: + request("unlock", browser_id=lock.browser_id, lock_id=lock.lock_id) + except ManagerError: + # Process shutdown should not mask the task's real exception. + pass + + +def release_active_execution_lock() -> None: + global _active_lock + lock = _active_lock + _active_lock = None + if lock is not None: + release_execution(lock) + + +def acquire_execution_for_binding(binding: context.BrowserBinding) -> None: + global _active_lock + if not binding.browser_id: + return + if _active_lock and _active_lock.browser_id == binding.browser_id: + return + release_active_execution_lock() + _active_lock = acquire_execution(binding.browser_id) + + +def active_lock() -> ExecutionLock | None: + return _active_lock diff --git a/src/browser_harness/manager_daemon.py b/src/browser_harness/manager_daemon.py new file mode 100644 index 00000000..e6e872bc --- /dev/null +++ b/src/browser_harness/manager_daemon.py @@ -0,0 +1,514 @@ +"""Browser lifecycle manager for browser-harness manager mode.""" +from __future__ import annotations + +from dataclasses import asdict, dataclass, field +import argparse +import json +import os +from pathlib import Path +import secrets +import shutil +import socket +import subprocess +import sys +import tempfile +import threading +import time +import urllib.request + +from . import admin, context + + +BU_API = "https://api.browser-use.com/api/v3" + + +@dataclass +class BrowserLease: + browser_id: str + run_id: str + owner_agent_id: str + backend: str + profile_kind: str + harness_daemon_name: str + runtime_dir: str + tmp_dir: str + download_dir: str + artifact_dir: str + profile_dir: str + cdp_url: str | None = None + cdp_ws: str | None = None + local_process_id: int | None = None + local_debug_port: int | None = None + cloud_browser_id: str | None = None + cloud_live_url: str | None = None + allowed_agents: list[str] = field(default_factory=list) + active_execution: dict | None = None + created_at_ms: int = field(default_factory=lambda: int(time.time() * 1000)) + last_used_at_ms: int = field(default_factory=lambda: int(time.time() * 1000)) + + @classmethod + def from_json(cls, data: dict) -> "BrowserLease": + return cls(**data) + + def binding(self) -> dict: + return { + "browser_id": self.browser_id, + "bu_name": self.harness_daemon_name, + "runtime_dir": self.runtime_dir, + "tmp_dir": self.tmp_dir, + "download_dir": self.download_dir, + "artifact_dir": self.artifact_dir, + "cdp_url": self.cdp_url, + "cdp_ws": self.cdp_ws, + } + + +class Manager: + def __init__(self, root: Path): + self.root = root + self.root.mkdir(parents=True, exist_ok=True) + self._lock = threading.RLock() + self.leases: dict[str, BrowserLease] = {} + self.active_by_agent: dict[str, str] = {} + self.next_seq = 0 + self._load() + + def _load(self): + try: + data = json.loads((self.root / "registry.json").read_text()) + except (FileNotFoundError, json.JSONDecodeError, OSError): + return + self.next_seq = int(data.get("next_seq") or 0) + self.active_by_agent = dict(data.get("active_by_agent") or {}) + for item in data.get("leases") or []: + lease = BrowserLease.from_json(item) + lease.active_execution = None + self.leases[lease.browser_id] = lease + + def _persist(self): + data = { + "next_seq": self.next_seq, + "active_by_agent": self.active_by_agent, + "leases": [asdict(v) for v in self.leases.values()], + } + tmp = self.root / "registry.json.tmp" + tmp.write_text(json.dumps(data, indent=2)) + os.replace(tmp, self.root / "registry.json") + + def handle(self, req: dict) -> dict: + op = req.get("op") + try: + if op == "status": + return self.status(req) + if op == "list": + return self.list(req) + if op == "new": + return self.new(req) + if op == "switch": + return self.switch(req) + if op == "close": + return self.close(req) + if op == "lock": + return self.lock(req) + if op == "unlock": + return self.unlock(req) + return error("unknown-op", f"unknown op {op!r}", []) + except Exception as e: + return error("manager-error", str(e), []) + + def status(self, req: dict) -> dict: + with self._lock: + key = agent_key(req) + browser_id = self.active_by_agent.get(key) + if not browser_id: + return {"ok": True, "ready": False, "state": "no-active-browser", "safe_actions": ["browser_new", "browser_switch"]} + lease = self.leases.get(browser_id) + if not lease: + return {"ok": True, "ready": False, "state": "stale-binding", "safe_actions": ["browser_new", "browser_switch"]} + return ready_public(lease) + + def list(self, req: dict) -> dict: + with self._lock: + run_id, agent_id = run_agent(req) + browsers = [] + for lease in self.leases.values(): + if lease.run_id != run_id: + continue + if lease.owner_agent_id != agent_id and agent_id not in lease.allowed_agents: + continue + browsers.append({ + "browser_id": lease.browser_id, + "backend": lease.backend, + "owned_by_this_agent": lease.owner_agent_id == agent_id, + "shared": len(lease.allowed_agents) > 1, + "state": "busy" if lease.active_execution else "ready", + }) + return {"ok": True, "browsers": browsers} + + def new(self, req: dict) -> dict: + run_id, agent_id = run_agent(req) + backend = req.get("backend") or "managed" + if backend not in {"managed", "cloud"}: + return error("unsupported-backend", f"unsupported backend {backend!r}", ["browser_new"]) + with self._lock: + lease = self._allocate_lease(run_id, agent_id, backend, req.get("profile") or "clean") + try: + if backend == "cloud": + start_cloud_backend(lease, req.get("proxy_country")) + else: + start_managed_backend(lease) + except Exception as e: + cleanup_backend(lease) + return error("browser-start-failed", str(e), ["browser_new"]) + with self._lock: + self.leases[lease.browser_id] = lease + self.active_by_agent[agent_key_parts(run_id, agent_id)] = lease.browser_id + self._persist() + return ready_response(lease) + + def switch(self, req: dict) -> dict: + with self._lock: + run_id, agent_id = run_agent(req) + browser_id = req.get("browser_id") + if not browser_id: + return error("bad-request", "browser_id is required", ["browser_list", "browser_new"]) + lease = self.leases.get(browser_id) + if not lease: + return error("not-found", "browser id not found", ["browser_list", "browser_new"]) + if lease.run_id != run_id: + return error("forbidden", "browser belongs to another run", ["browser_list", "browser_new"]) + if agent_id not in lease.allowed_agents: + lease.allowed_agents.append(agent_id) + lease.last_used_at_ms = int(time.time() * 1000) + self.active_by_agent[agent_key_parts(run_id, agent_id)] = browser_id + self._persist() + return ready_response(lease) + + def close(self, req: dict) -> dict: + cleanup = None + with self._lock: + run_id, agent_id = run_agent(req) + key = agent_key_parts(run_id, agent_id) + browser_id = req.get("browser_id") or self.active_by_agent.get(key) + if not browser_id: + return {"ok": True, "ready": False, "state": "no-active-browser"} + lease = self.leases.get(browser_id) + if not lease: + self.active_by_agent.pop(key, None) + self._persist() + return {"ok": True, "ready": False, "state": "stale-binding"} + if lease.run_id != run_id or agent_id not in lease.allowed_agents: + return error("forbidden", "browser belongs to another run or agent", ["browser_list"]) + if agent_id in lease.allowed_agents: + lease.allowed_agents.remove(agent_id) + if lease.owner_agent_id == agent_id or not lease.allowed_agents: + cleanup = lease + self.leases.pop(browser_id, None) + self.active_by_agent = {k: v for k, v in self.active_by_agent.items() if v != browser_id} + self._persist() + resp = {"ok": True, "ready": False, "state": "closed", "browser_id": browser_id} + else: + self.active_by_agent.pop(key, None) + self._persist() + resp = {"ok": True, "ready": False, "state": "released", "browser_id": browser_id} + if cleanup is not None: + cleanup_backend(cleanup) + return resp + + def lock(self, req: dict) -> dict: + with self._lock: + run_id, agent_id = run_agent(req) + client_id = req.get("client_id") or agent_id + browser_id = req.get("browser_id") + if not browser_id: + return error("bad-request", "browser_id is required", ["browser_new", "browser_switch"]) + lease = self.leases.get(browser_id) + if not lease: + return error("not-found", "browser id not found", ["browser_list", "browser_new"]) + if lease.run_id != run_id or agent_id not in lease.allowed_agents: + return error("forbidden", "browser belongs to another run or agent", ["browser_list", "browser_new"]) + active = lease.active_execution or {} + if active and active.get("client_id") != client_id: + return error("busy", "browser is currently active in another browser-harness process", ["wait", "browser_new"]) + if active and active.get("client_id") == client_id: + return {"ok": True, "state": "ready", "browser_id": browser_id, "lock_id": active["lock_id"]} + lock_id = f"lk_{int(time.time() * 1000):x}_{secrets.token_hex(4)}" + lease.active_execution = {"agent_id": agent_id, "client_id": client_id, "lock_id": lock_id} + self._persist() + return {"ok": True, "state": "ready", "browser_id": browser_id, "lock_id": lock_id} + + def unlock(self, req: dict) -> dict: + with self._lock: + _run_id, agent_id = run_agent(req) + client_id = req.get("client_id") or agent_id + browser_id = req.get("browser_id") + lease = self.leases.get(browser_id or "") + if not lease: + return {"ok": True, "state": "not-found"} + active = lease.active_execution or {} + if ( + active.get("agent_id") == agent_id + and active.get("client_id") == client_id + and active.get("lock_id") == req.get("lock_id") + ): + lease.active_execution = None + self._persist() + return {"ok": True, "state": "released", "browser_id": browser_id} + + def _allocate_lease(self, run_id: str, agent_id: str, backend: str, profile_kind: str) -> BrowserLease: + self.next_seq += 1 + short = f"{int(time.time() * 1000):x}{self.next_seq:x}" + browser_id = f"br_{short}" + bu_name = f"bh_{short[-16:]}" + base = self.root / "leases" / browser_id + runtime_dir = base / "r" + tmp_dir = base / "t" + download_dir = base / "downloads" + artifact_dir = base / "artifacts" + profile_dir = base / "profile" + for p in (runtime_dir, tmp_dir, download_dir, artifact_dir, profile_dir): + p.mkdir(parents=True, exist_ok=True) + return BrowserLease( + browser_id=browser_id, + run_id=run_id, + owner_agent_id=agent_id, + backend=backend, + profile_kind=profile_kind, + harness_daemon_name=bu_name, + runtime_dir=str(runtime_dir), + tmp_dir=str(tmp_dir), + download_dir=str(download_dir), + artifact_dir=str(artifact_dir), + profile_dir=str(profile_dir), + allowed_agents=[agent_id], + ) + + +def start_cloud_backend(lease: BrowserLease, proxy_country: str | None): + key = os.environ.get("BROWSER_USE_API_KEY") + if not key: + raise RuntimeError("BROWSER_USE_API_KEY is not set") + body = {} + if proxy_country: + body["proxyCountryCode"] = proxy_country + browser = _browser_use("/browsers", "POST", body) + lease.cloud_browser_id = browser["id"] + lease.cloud_live_url = browser.get("liveUrl") + lease.cdp_url = browser["cdpUrl"] + try: + start_harness_daemon(lease) + except BaseException: + stop_cloud_browser(lease.cloud_browser_id) + raise + + +def start_managed_backend(lease: BrowserLease): + browser = find_browser_binary() + if not browser: + raise RuntimeError("no Chrome/Chromium binary found; set BH_CHROME_PATH or CHROME_PATH") + port = allocate_port() + lease.cdp_url = f"http://127.0.0.1:{port}" + args = [ + browser, + f"--remote-debugging-port={port}", + f"--user-data-dir={lease.profile_dir}", + "--no-first-run", + "--no-default-browser-check", + "--disable-background-networking", + "--disable-dev-shm-usage", + "--disable-gpu", + "about:blank", + ] + if os.environ.get("BH_MANAGED_HEADLESS") == "1" or (not os.environ.get("DISPLAY") and not os.environ.get("WAYLAND_DISPLAY")): + args.insert(-1, "--headless=new") + if os.environ.get("BH_CHROME_NO_SANDBOX") == "1": + args.insert(-1, "--no-sandbox") + proc = subprocess.Popen(args, stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, start_new_session=True) + lease.local_process_id = proc.pid + lease.local_debug_port = port + wait_devtools(port) + start_harness_daemon(lease) + + +def start_harness_daemon(lease: BrowserLease): + binding = context.BrowserBinding( + browser_id=lease.browser_id, + bu_name=lease.harness_daemon_name, + runtime_dir=Path(lease.runtime_dir), + tmp_dir=Path(lease.tmp_dir), + download_dir=Path(lease.download_dir), + artifact_dir=Path(lease.artifact_dir), + cdp_url=lease.cdp_url, + cdp_ws=lease.cdp_ws, + manager_mode=True, + ) + admin.ensure_daemon(wait=60.0, binding=binding) + + +def cleanup_backend(lease: BrowserLease): + binding = context.BrowserBinding( + browser_id=lease.browser_id, + bu_name=lease.harness_daemon_name, + runtime_dir=Path(lease.runtime_dir), + tmp_dir=Path(lease.tmp_dir), + manager_mode=True, + ) + try: + admin.restart_daemon(binding=binding) + except Exception: + pass + if lease.backend == "cloud" and lease.cloud_browser_id: + stop_cloud_browser(lease.cloud_browser_id) + if lease.backend == "managed" and lease.local_process_id: + try: + os.killpg(lease.local_process_id, 15) + except Exception: + try: + os.kill(lease.local_process_id, 15) + except Exception: + pass + + +def _browser_use(path: str, method: str, body=None): + key = os.environ.get("BROWSER_USE_API_KEY") + if not key: + raise RuntimeError("BROWSER_USE_API_KEY is not set") + req = urllib.request.Request( + f"{BU_API}{path}", + method=method, + data=(json.dumps(body).encode() if body is not None else None), + headers={"X-Browser-Use-API-Key": key, "Content-Type": "application/json"}, + ) + with urllib.request.urlopen(req, timeout=60) as resp: + return json.loads(resp.read() or b"{}") + + +def stop_cloud_browser(browser_id: str | None): + if not browser_id: + return + try: + _browser_use(f"/browsers/{browser_id}", "PATCH", {"action": "stop"}) + except Exception: + pass + + +def find_browser_binary() -> str | None: + for key in ("BH_CHROME_PATH", "CHROME_PATH"): + value = os.environ.get(key) + if value: + return value + for name in ("google-chrome-stable", "google-chrome", "chromium", "chromium-browser"): + path = shutil.which(name) + if path: + return path + return None + + +def allocate_port() -> int: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + sock.bind(("127.0.0.1", 0)) + return sock.getsockname()[1] + finally: + sock.close() + + +def wait_devtools(port: int, timeout=20.0): + deadline = time.time() + timeout + last = None + while time.time() < deadline: + try: + with urllib.request.urlopen(f"http://127.0.0.1:{port}/json/version", timeout=2) as resp: + data = json.loads(resp.read() or b"{}") + if data.get("webSocketDebuggerUrl"): + return + except Exception as e: + last = e + time.sleep(0.2) + raise RuntimeError(f"Chrome DevTools did not become ready on port {port}: {last}") + + +def ready_public(lease: BrowserLease) -> dict: + return { + "ok": True, + "ready": True, + "state": "ready", + "browser_id": lease.browser_id, + "backend": lease.backend, + "shared": len(lease.allowed_agents) > 1, + } + + +def ready_response(lease: BrowserLease) -> dict: + return {**ready_public(lease), "binding": lease.binding()} + + +def error(state: str, reason: str, safe_actions: list[str]) -> dict: + return {"ok": False, "ready": False, "state": state, "reason": reason, "safe_actions": safe_actions} + + +def run_agent(req: dict) -> tuple[str, str]: + return sanitize(req.get("run_id") or "unknown-run"), sanitize(req.get("agent_id") or "unknown-agent") + + +def agent_key(req: dict) -> str: + return agent_key_parts(*run_agent(req)) + + +def agent_key_parts(run_id: str, agent_id: str) -> str: + return f"{run_id}/{agent_id}" + + +def sanitize(value: str) -> str: + out = "".join(c for c in str(value) if c.isalnum() or c in "_-")[:64] + return out or "unknown" + + +def serve(socket_path: Path, root: Path): + socket_path.parent.mkdir(parents=True, exist_ok=True) + try: + socket_path.unlink() + except FileNotFoundError: + pass + manager = Manager(root) + server = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + server.bind(str(socket_path)) + os.chmod(socket_path, 0o600) + server.listen(128) + print(f"browser-harness manager listening on {socket_path}", file=sys.stderr, flush=True) + try: + while True: + conn, _ = server.accept() + threading.Thread(target=handle_conn, args=(manager, conn), daemon=True).start() + finally: + server.close() + + +def handle_conn(manager: Manager, conn: socket.socket): + with conn: + try: + data = b"" + while not data.endswith(b"\n"): + chunk = conn.recv(1 << 16) + if not chunk: + break + data += chunk + if not data: + return + req = json.loads(data or b"{}") + resp = manager.handle(req) + except Exception as e: + resp = error("bad-request", str(e), []) + conn.sendall((json.dumps(resp, default=str) + "\n").encode()) + + +def main(argv=None): + parser = argparse.ArgumentParser() + parser.add_argument("--socket", default=os.environ.get("BH_MANAGER_SOCKET") or str(Path(tempfile.gettempdir()) / "bhm" / "manager.sock")) + parser.add_argument("--root", default=os.environ.get("BH_MANAGER_ROOT") or str(Path(tempfile.gettempdir()) / "bhm")) + args = parser.parse_args(argv) + serve(Path(args.socket), Path(args.root)) + + +if __name__ == "__main__": + main() diff --git a/src/browser_harness/manager_helpers.py b/src/browser_harness/manager_helpers.py new file mode 100644 index 00000000..7ab676ab --- /dev/null +++ b/src/browser_harness/manager_helpers.py @@ -0,0 +1,51 @@ +"""Model-visible browser lifecycle helpers.""" +from __future__ import annotations + +from . import context +from . import manager_client + + +def browser_status(): + """Return lifecycle state for the active browser binding.""" + return manager_client.status() + + +def browser_new(backend="managed", *, profile="clean", proxy_country=None, reason=None): + """Create a browser, switch this agent to it, and return concise state.""" + resp = manager_client.new_browser( + backend=backend, + profile=profile, + proxy_country=proxy_country, + reason=reason, + ) + binding = manager_client.binding_from_response(resp) + context.activate_binding(binding) + manager_client.acquire_execution_for_binding(binding) + return manager_client.public_state(resp) + + +def browser_switch(browser_id): + """Switch this agent/process to an existing allowed browser id.""" + resp = manager_client.switch_browser(browser_id) + binding = manager_client.binding_from_response(resp) + context.activate_binding(binding) + manager_client.acquire_execution_for_binding(binding) + return manager_client.public_state(resp) + + +def browser_list(): + """List concise browser ids visible to this run/agent.""" + return manager_client.list_browsers() + + +def browser_close(browser_id=None): + """Close private browsers or release this agent's access to shared browsers.""" + active = context.get_active_binding() + closing_active = browser_id is None or (active and active.browser_id == browser_id) + if closing_active: + manager_client.release_active_execution_lock() + resp = manager_client.close_browser(browser_id) + if closing_active: + context.clear_active_binding() + return manager_client.public_state(resp) + diff --git a/src/browser_harness/run.py b/src/browser_harness/run.py index 8ab1f0f1..fa2f894a 100644 --- a/src/browser_harness/run.py +++ b/src/browser_harness/run.py @@ -1,4 +1,4 @@ -import os, sys, urllib.request +import ast, os, sys, urllib.request # Windows default stdout encoding is cp1252, which can't encode the 🐴 marker # helpers prepend to tab titles (or anything else outside Latin-1). Force UTF-8 @@ -23,7 +23,9 @@ stop_remote_daemon, sync_local_profile, ) +from . import context, manager_client from .helpers import * +from .manager_helpers import * HELP = """Browser Harness @@ -52,6 +54,28 @@ PY """ +_MANAGER_HELPER_NAMES = ( + "browser_status", + "browser_new", + "browser_switch", + "browser_list", + "browser_close", +) + + +def _uses_manager_helpers(code: str) -> bool: + try: + tree = ast.parse(code) + except SyntaxError: + return False + for node in ast.walk(tree): + if not isinstance(node, ast.Call): + continue + func = node.func + if isinstance(func, ast.Name) and func.id in _MANAGER_HELPER_NAMES: + return True + return False + # Probe /json/version (not a bare TCP connect) so a non-Chrome process bound to # 9222/9223 doesn't masquerade as Chrome and skip the cloud bootstrap. Mirrors @@ -109,6 +133,18 @@ def main(): else: sys.exit(USAGE) print_update_banner() + if context.manager_enabled() or _uses_manager_helpers(code): + os.environ.setdefault("BH_MANAGER_MODE", "1") + if os.environ.get("BH_BROWSER_ID"): + browser_switch(os.environ["BH_BROWSER_ID"]) + else: + context.clear_active_binding() + try: + exec(code, globals()) + finally: + manager_client.release_active_execution_lock() + return + # Auto-bootstrap a cloud browser is opt-in via BU_AUTOSPAWN — BROWSER_USE_API_KEY alone # is not enough, since the key is commonly set for unrelated reasons (profile sync, # cloud API calls, parent agents managing their own session). An explicit BU_CDP_URL diff --git a/tests/unit/test_context.py b/tests/unit/test_context.py new file mode 100644 index 00000000..4ea371b0 --- /dev/null +++ b/tests/unit/test_context.py @@ -0,0 +1,75 @@ +from pathlib import Path + +from browser_harness import context, helpers + + +class _FakeConn: + def close(self): + pass + + +def test_send_uses_active_binding_runtime_dir(monkeypatch, tmp_path): + calls = [] + binding = context.BrowserBinding( + browser_id="br_test", + bu_name="bh_test", + runtime_dir=tmp_path / "r", + tmp_dir=tmp_path / "t", + manager_mode=True, + ) + old = context.get_active_binding() + context.activate_binding(binding) + try: + monkeypatch.setattr( + helpers.ipc, + "connect", + lambda name, timeout=1.0, runtime_dir=None: calls.append((name, runtime_dir)) or (_FakeConn(), None), + ) + monkeypatch.setattr(helpers.ipc, "request", lambda conn, token, req: {"ok": True}) + + assert helpers._send({"meta": "ping"}) == {"ok": True} + finally: + if old is not None: + context.activate_binding(old) + else: + context.clear_active_binding() + + assert calls == [("bh_test", tmp_path / "r")] + + +def test_capture_screenshot_defaults_to_binding_artifact_dir(monkeypatch, tmp_path, fake_png): + binding = context.BrowserBinding( + browser_id="br_test", + bu_name="bh_test", + runtime_dir=tmp_path / "r", + tmp_dir=tmp_path / "t", + artifact_dir=tmp_path / "artifacts", + manager_mode=True, + ) + old = context.get_active_binding() + context.activate_binding(binding) + try: + monkeypatch.setattr(helpers, "cdp", lambda method, **kwargs: {"data": fake_png(20, 10)}) + path = helpers.capture_screenshot() + finally: + if old is not None: + context.activate_binding(old) + else: + context.clear_active_binding() + + assert Path(path) == tmp_path / "artifacts" / "shot.png" + assert Path(path).exists() + + +def test_agent_identity_uses_codex_thread_fallback(monkeypatch): + monkeypatch.delenv("BH_RUN_ID", raising=False) + monkeypatch.delenv("BH_AGENT_ID", raising=False) + monkeypatch.setenv("CODEX_THREAD_ID", "thread-123") + monkeypatch.delenv("CODEX_AGENT_ID", raising=False) + monkeypatch.delenv("CODEX_SUBAGENT_ID", raising=False) + + ident = context.agent_identity() + + assert ident.run_id == "thread-123" + assert ident.agent_id == "main" + assert ident.degraded is False diff --git a/tests/unit/test_manager_daemon.py b/tests/unit/test_manager_daemon.py new file mode 100644 index 00000000..cc225ab0 --- /dev/null +++ b/tests/unit/test_manager_daemon.py @@ -0,0 +1,78 @@ +from browser_harness.manager_daemon import Manager + + +def _manager_with_lease(tmp_path): + manager = Manager(tmp_path) + lease = manager._allocate_lease("run-1", "agent-1", "cloud", "clean") + manager.leases[lease.browser_id] = lease + manager.active_by_agent["run-1/agent-1"] = lease.browser_id + return manager, lease + + +def test_lock_is_exclusive_across_client_processes(tmp_path): + manager, lease = _manager_with_lease(tmp_path) + + first = manager.handle({ + "op": "lock", + "run_id": "run-1", + "agent_id": "agent-1", + "client_id": "client-1", + "browser_id": lease.browser_id, + }) + second = manager.handle({ + "op": "lock", + "run_id": "run-1", + "agent_id": "agent-1", + "client_id": "client-2", + "browser_id": lease.browser_id, + }) + + assert first["ok"] is True + assert second["ok"] is False + assert second["state"] == "busy" + + +def test_unlock_requires_same_client_process(tmp_path): + manager, lease = _manager_with_lease(tmp_path) + first = manager.handle({ + "op": "lock", + "run_id": "run-1", + "agent_id": "agent-1", + "client_id": "client-1", + "browser_id": lease.browser_id, + }) + + wrong = manager.handle({ + "op": "unlock", + "run_id": "run-1", + "agent_id": "agent-1", + "client_id": "client-2", + "browser_id": lease.browser_id, + "lock_id": first["lock_id"], + }) + second = manager.handle({ + "op": "lock", + "run_id": "run-1", + "agent_id": "agent-1", + "client_id": "client-2", + "browser_id": lease.browser_id, + }) + + assert wrong["ok"] is True + assert second["ok"] is False + assert second["state"] == "busy" + + +def test_close_rejects_other_runs(tmp_path): + manager, lease = _manager_with_lease(tmp_path) + + resp = manager.handle({ + "op": "close", + "run_id": "other-run", + "agent_id": "agent-1", + "browser_id": lease.browser_id, + }) + + assert resp["ok"] is False + assert resp["state"] == "forbidden" + assert lease.browser_id in manager.leases diff --git a/tests/unit/test_manager_helpers.py b/tests/unit/test_manager_helpers.py new file mode 100644 index 00000000..5bef46b7 --- /dev/null +++ b/tests/unit/test_manager_helpers.py @@ -0,0 +1,81 @@ +from browser_harness import context, manager_helpers + + +def _manager_response(tmp_path): + return { + "ok": True, + "ready": True, + "state": "ready", + "browser_id": "br_123", + "backend": "managed", + "shared": False, + "binding": { + "browser_id": "br_123", + "bu_name": "bh_123", + "runtime_dir": str(tmp_path / "r"), + "tmp_dir": str(tmp_path / "t"), + "download_dir": str(tmp_path / "downloads"), + "artifact_dir": str(tmp_path / "artifacts"), + "cdp_url": "http://127.0.0.1:4567", + "cdp_ws": None, + }, + } + + +def test_browser_new_activates_binding_and_acquires_lock(monkeypatch, tmp_path): + acquired = [] + old = context.get_active_binding() + try: + monkeypatch.setattr(manager_helpers.manager_client, "new_browser", lambda *args, **kwargs: _manager_response(tmp_path)) + monkeypatch.setattr( + manager_helpers.manager_client, + "acquire_execution_for_binding", + lambda binding: acquired.append(binding.browser_id), + ) + + state = manager_helpers.browser_new(backend="managed", reason="test") + binding = context.get_active_binding() + finally: + if old is not None: + context.activate_binding(old) + else: + context.clear_active_binding() + + assert state["browser_id"] == "br_123" + assert "binding" not in state + assert binding is not None + assert binding.bu_name == "bh_123" + assert acquired == ["br_123"] + + +def test_browser_close_releases_lock_and_clears_active_binding(monkeypatch, tmp_path): + released = [] + closed = [] + old = context.get_active_binding() + context.activate_binding(context.BrowserBinding( + browser_id="br_123", + bu_name="bh_123", + runtime_dir=tmp_path / "r", + tmp_dir=tmp_path / "t", + manager_mode=True, + )) + try: + monkeypatch.setattr(manager_helpers.manager_client, "release_active_execution_lock", lambda: released.append(True)) + monkeypatch.setattr( + manager_helpers.manager_client, + "close_browser", + lambda browser_id=None: closed.append(browser_id) or {"ok": True, "state": "closed", "browser_id": "br_123"}, + ) + + state = manager_helpers.browser_close() + active = context.get_active_binding() + finally: + if old is not None: + context.activate_binding(old) + else: + context.clear_active_binding() + + assert state == {"state": "closed", "browser_id": "br_123"} + assert released == [True] + assert closed == [None] + assert active is None diff --git a/tests/unit/test_run_manager_mode.py b/tests/unit/test_run_manager_mode.py new file mode 100644 index 00000000..7d6a931b --- /dev/null +++ b/tests/unit/test_run_manager_mode.py @@ -0,0 +1,61 @@ +import os +import sys +from io import StringIO +from unittest.mock import patch + +from browser_harness import run + + +def test_manager_mode_skips_legacy_daemon_start(monkeypatch): + monkeypatch.setenv("BH_MANAGER_SOCKET", "/tmp/nonexistent-manager.sock") + stdout = StringIO() + fake_stdin = StringIO("print('manager mode ok')") + + with patch.object(sys, "argv", ["browser-harness"]), \ + patch("sys.stdin", fake_stdin), \ + patch("sys.stdout", stdout), \ + patch("browser_harness.run.print_update_banner"), \ + patch("browser_harness.run.ensure_daemon") as ensure_daemon: + run.main() + + ensure_daemon.assert_not_called() + assert stdout.getvalue().strip() == "manager mode ok" + + +def test_manager_helper_call_enables_manager_mode_without_env(monkeypatch): + monkeypatch.delenv("BH_MANAGER_SOCKET", raising=False) + monkeypatch.delenv("BH_MANAGER_MODE", raising=False) + stdout = StringIO() + fake_stdin = StringIO("print(browser_status())") + + with patch.object(sys, "argv", ["browser-harness"]), \ + patch("sys.stdin", fake_stdin), \ + patch("sys.stdout", stdout), \ + patch("browser_harness.run.print_update_banner"), \ + patch("browser_harness.run.ensure_daemon") as ensure_daemon, \ + patch("browser_harness.run.browser_status", lambda: "manager helper mode ok"), \ + patch("browser_harness.run.manager_client.release_active_execution_lock"): + run.main() + + ensure_daemon.assert_not_called() + assert stdout.getvalue().strip() == "manager helper mode ok" + assert "BH_MANAGER_MODE" in os.environ + + +def test_manager_mode_releases_execution_lock_on_exception(monkeypatch): + monkeypatch.setenv("BH_MANAGER_SOCKET", "/tmp/nonexistent-manager.sock") + fake_stdin = StringIO("raise RuntimeError('boom')") + released = [] + + with patch.object(sys, "argv", ["browser-harness"]), \ + patch("sys.stdin", fake_stdin), \ + patch("browser_harness.run.print_update_banner"), \ + patch("browser_harness.run.manager_client.release_active_execution_lock", lambda: released.append(True)): + try: + run.main() + except RuntimeError as e: + assert str(e) == "boom" + else: + raise AssertionError("expected RuntimeError") + + assert released == [True] From dffcca02ae9a05a65a7b66813e556fb5faa57de0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gregor=20=C5=BDuni=C4=8D?= <36313686+gregpr07@users.noreply.github.com> Date: Thu, 18 Jun 2026 06:11:25 +0000 Subject: [PATCH 02/15] Add Browser Use cloud auth login --- SKILL.md | 18 +- TEMP_BROWSER_MANAGER_CONTEXT_DO_NOT_MERGE.md | 4 + install.md | 34 +- src/browser_harness/auth.py | 523 +++++++++++++++++++ src/browser_harness/manager_daemon.py | 13 +- src/browser_harness/run.py | 8 +- tests/unit/test_auth.py | 115 ++++ tests/unit/test_manager_daemon.py | 48 ++ 8 files changed, 752 insertions(+), 11 deletions(-) create mode 100644 src/browser_harness/auth.py create mode 100644 tests/unit/test_auth.py diff --git a/SKILL.md b/SKILL.md index 6ca96303..9f749cb5 100644 --- a/SKILL.md +++ b/SKILL.md @@ -58,9 +58,23 @@ Lifecycle helpers: After `browser_new(...)` or `browser_switch(...)`, all normal page helpers work unchanged. If you are a sub-agent, create your own browser unless the parent gives you a browser id to reuse with `browser_switch(id)`. -### Remote browsers +If `browser_new(backend="cloud")` reports `cloud-auth-required`, run: -Use remote for parallel sub-agents (each gets its own isolated browser via a distinct BU_NAME) or on a headless server. BROWSER_USE_API_KEY must be set. start_remote_daemon, list_cloud_profiles, list_local_profiles, sync_local_profile are pre-imported. +```bash +browser-harness auth login +``` + +It prints/opens a Browser Use login URL and waits while the user logs in online. The API key is stored locally and is never printed. In SSH/headless environments use: + +```bash +browser-harness auth login --device-code +``` + +Then retry `browser_new(backend="cloud")`. + +### Legacy remote browsers + +Prefer `browser_new(backend="cloud")` for new work. The older named-daemon remote API is still available for advanced/manual cases. BROWSER_USE_API_KEY must be set or `browser-harness auth login` must have been completed. start_remote_daemon, list_cloud_profiles, list_local_profiles, sync_local_profile are pre-imported. ```bash browser-harness <<'PY' diff --git a/TEMP_BROWSER_MANAGER_CONTEXT_DO_NOT_MERGE.md b/TEMP_BROWSER_MANAGER_CONTEXT_DO_NOT_MERGE.md index de8ea64e..166e8b93 100644 --- a/TEMP_BROWSER_MANAGER_CONTEXT_DO_NOT_MERGE.md +++ b/TEMP_BROWSER_MANAGER_CONTEXT_DO_NOT_MERGE.md @@ -28,6 +28,8 @@ browser_close(browser_id=None) After `browser_new(...)` or `browser_switch(...)`, normal browser-harness helpers such as `new_tab`, `page_info`, `capture_screenshot`, `click_at_xy`, `js`, and `cdp` work unchanged. +For cloud browsers, missing auth should produce `cloud-auth-required`; the model should run `browser-harness auth login` and retry. The user logs in online and the API key is stored locally without being printed into chat. + The model does not need to know about sockets, daemon names, runtime dirs, CDP URLs, Browser Use browser IDs, or process cleanup. Those are manager internals. ## Why Python Instead Of Rust @@ -103,6 +105,8 @@ The VM used for this work must not start local Chrome or Chromium. Local managed Cloud/live lifecycle should be tested separately with a Browser Use API key in the environment. Do not commit keys or put them in docs. +OAuth auth was added after this note was first created. Cloud lifecycle can now also be tested after `browser-harness auth login`, which stores a local Browser Use API key outside the repo. + ## Verification Done In This Session Commands run: diff --git a/install.md b/install.md index 375d63b6..c254aa24 100644 --- a/install.md +++ b/install.md @@ -61,6 +61,38 @@ optional browser_harness.manager_daemon owns many isolated browser leases - BU_CDP_URL overrides local Chrome discovery with a specific DevTools HTTP endpoint (used for Way 2). - BU_BROWSER_ID + BROWSER_USE_API_KEY lets the daemon stop a Browser Use cloud browser on shutdown. - Manager mode auto-starts `browser-harness-manager` when `browser_status`, `browser_new`, `browser_list`, `browser_switch`, or `browser_close` is used. +- Cloud manager mode reads Browser Use auth from `BROWSER_USE_API_KEY` first, then the local `browser-harness auth login` store. + +## Browser Use Cloud auth + +For cloud browsers, prefer OAuth login over pasting API keys: + +```bash +browser-harness auth login +``` + +The command generates a PKCE login request, opens or prints a Browser Use login URL, waits for the local callback, exchanges the code for an API key, and stores it in a private local file. The key is never printed. + +Headless/SSH fallback: + +```bash +browser-harness auth login --device-code +``` + +Other auth commands: + +```bash +browser-harness auth status +browser-harness auth logout +``` + +Key resolution order for cloud browser creation: + +```text +BROWSER_USE_API_KEY + -> stored browser-harness auth key + -> cloud-auth-required +``` # Browser connection setup and troubleshooting @@ -70,7 +102,7 @@ This section is the source of truth for how browser-harness connects to a browse Browser-harness can connect to any Chrome or Chromium-based browser on your computer, or to a Browser Use cloud browser. -**Cloud browsers** are managed by the Browser Use cloud API. In manager mode, start one with `browser_new(backend="cloud", proxy_country="us")`; for legacy named daemons use `start_remote_daemon("work", ...)`. Authentication is via the `BROWSER_USE_API_KEY` environment variable; the harness handles the WebSocket URL itself. To carry your local Chrome cookies into a cloud browser, install `profile-use` once (`curl -fsSL https://browser-use.com/profile.sh | sh`), then call `uuid = sync_local_profile("MyChromeProfile")` followed by `start_remote_daemon("work", profileId=uuid)`. Cookies are the only thing synced — not localStorage, not extensions, not history. +**Cloud browsers** are managed by the Browser Use cloud API. In manager mode, start one with `browser_new(backend="cloud", proxy_country="us")`; for legacy named daemons use `start_remote_daemon("work", ...)`. Authentication is via `BROWSER_USE_API_KEY` or `browser-harness auth login`; the harness handles the WebSocket URL itself. To carry your local Chrome cookies into a cloud browser, install `profile-use` once (`curl -fsSL https://browser-use.com/profile.sh | sh`), then call `uuid = sync_local_profile("MyChromeProfile")` followed by `start_remote_daemon("work", profileId=uuid)`. Cookies are the only thing synced — not localStorage, not extensions, not history. **Local browsers** require remote debugging to be enabled. There are two ways, and they suit different use cases. diff --git a/src/browser_harness/auth.py b/src/browser_harness/auth.py new file mode 100644 index 00000000..ade427e3 --- /dev/null +++ b/src/browser_harness/auth.py @@ -0,0 +1,523 @@ +"""Browser Use Cloud auth for browser-harness. + +The model-facing contract stays small: cloud browser startup either has a key +or tells the agent to run `browser-harness auth login`. OAuth details live here. +""" +from __future__ import annotations + +from dataclasses import dataclass, field +from http.server import BaseHTTPRequestHandler, HTTPServer +import argparse +import base64 +import hashlib +import json +import os +from pathlib import Path +import secrets +import stat +import sys +import time +import urllib.error +import urllib.parse +import urllib.request +import webbrowser + + +AUTH_BASE = "https://api.browser-use.com" +# Browser Use currently exposes this registered CLI OAuth client. Keep an env +# escape hatch so a dedicated browser-harness client can be used once issued. +DEFAULT_CLIENT_ID = "browser-use-terminal" +CALLBACK_PATH = "/browser-use-cloud/callback" +AUTH_TIMEOUT_SECONDS = 600 + + +class CloudAuthRequired(RuntimeError): + def __init__(self): + super().__init__("cloud-auth-required: run `browser-harness auth login`") + + +class AuthError(RuntimeError): + pass + + +@dataclass +class PendingCallback: + state: str + code: str | None = None + error: str | None = None + error_description: str | None = None + complete: bool = False + + +@dataclass +class BrowserAuthStart: + server: HTTPServer + callback: PendingCallback + redirect_uri: str + verifier: str + auth_url: str + expires_in: int | None + opened: bool = False + + +@dataclass +class DeviceAuthStart: + device_code: str + user_code: str + verification_uri: str + verification_uri_complete: str | None = None + expires_in: int | None = None + interval: int = 5 + opened: bool = False + + @property + def open_uri(self) -> str: + return self.verification_uri_complete or self.verification_uri + + +@dataclass +class AuthRecord: + api_key: str + api_key_id: str | None = None + project_id: str | None = None + expires_at: str | None = None + scopes: list[str] = field(default_factory=list) + source: str = "oauth" + + @classmethod + def from_token_response(cls, data: dict, *, source: str = "oauth") -> "AuthRecord": + api_key = data.get("api_key") + if not api_key: + raise AuthError("auth token response did not include an api_key") + scopes = data.get("scopes") or [] + if not isinstance(scopes, list): + scopes = [] + return cls( + api_key=api_key, + api_key_id=data.get("api_key_id"), + project_id=data.get("project_id"), + expires_at=data.get("expires_at"), + scopes=[str(s) for s in scopes], + source=source, + ) + + def to_storage(self) -> dict: + return { + "api_key": self.api_key, + "api_key_id": self.api_key_id, + "project_id": self.project_id, + "expires_at": self.expires_at, + "scopes": self.scopes, + "source": self.source, + } + + +def auth_base() -> str: + return (os.environ.get("BROWSER_USE_CLOUD_API_URL") or AUTH_BASE).rstrip("/") + + +def client_id() -> str: + return os.environ.get("BROWSER_HARNESS_OAUTH_CLIENT_ID") or DEFAULT_CLIENT_ID + + +def auth_path() -> Path: + override = os.environ.get("BH_AUTH_PATH") + if override: + return Path(override).expanduser() + config_home = os.environ.get("XDG_CONFIG_HOME") + base = Path(config_home).expanduser() if config_home else Path.home() / ".config" + return base / "browser-harness" / "auth.json" + + +def load_auth_file(path: Path | None = None) -> dict: + path = path or auth_path() + try: + return json.loads(path.read_text()) + except FileNotFoundError: + return {} + except json.JSONDecodeError as e: + raise AuthError(f"auth file is not valid JSON: {path}") from e + + +def save_auth_record(record: AuthRecord, path: Path | None = None) -> None: + path = path or auth_path() + path.parent.mkdir(parents=True, exist_ok=True) + _chmod_private(path.parent, directory=True) + existing = load_auth_file(path) + existing["browser_use"] = record.to_storage() + tmp = path.with_name(path.name + ".tmp") + _write_private_json(tmp, existing) + os.replace(tmp, path) + _chmod_private(path) + + +def clear_auth(path: Path | None = None) -> bool: + path = path or auth_path() + data = load_auth_file(path) + existed = bool(data.get("browser_use")) + data.pop("browser_use", None) + if data: + tmp = path.with_name(path.name + ".tmp") + _write_private_json(tmp, data) + os.replace(tmp, path) + _chmod_private(path) + else: + try: + path.unlink() + except FileNotFoundError: + pass + return existed + + +def stored_auth_record(path: Path | None = None) -> dict | None: + data = load_auth_file(path) + value = data.get("browser_use") + return value if isinstance(value, dict) else None + + +def get_browser_use_api_key() -> str: + env_key = os.environ.get("BROWSER_USE_API_KEY") + if env_key: + return env_key + stored = stored_auth_record() + key = stored.get("api_key") if stored else None + if key: + return str(key) + raise CloudAuthRequired() + + +def auth_status() -> dict: + if os.environ.get("BROWSER_USE_API_KEY"): + return {"status": "authenticated", "source": "env", "path": str(auth_path())} + stored = stored_auth_record() + if not stored or not stored.get("api_key"): + return {"status": "missing", "source": None, "path": str(auth_path())} + return { + "status": "authenticated", + "source": stored.get("source") or "stored", + "path": str(auth_path()), + "api_key_id": stored.get("api_key_id"), + "project_id": stored.get("project_id"), + "expires_at": stored.get("expires_at"), + "scopes": stored.get("scopes") or [], + } + + +def pkce_pair() -> tuple[str, str]: + verifier = secrets.token_urlsafe(48) + digest = hashlib.sha256(verifier.encode()).digest() + challenge = base64.urlsafe_b64encode(digest).decode().rstrip("=") + return verifier, challenge + + +def start_browser_auth(*, open_url=True, timeout=AUTH_TIMEOUT_SECONDS) -> BrowserAuthStart: + verifier, challenge = pkce_pair() + state = secrets.token_urlsafe(32) + callback = PendingCallback(state=state) + server = _callback_server(callback) + host, port = server.server_address + redirect_uri = f"http://{host}:{port}{CALLBACK_PATH}" + req = { + "client_id": client_id(), + "response_type": "code", + "redirect_uri": redirect_uri, + "code_challenge": challenge, + "code_challenge_method": "S256", + "state": state, + "device_name": os.environ.get("BH_DEVICE_NAME") or "browser-harness", + } + try: + data = _post_json(f"{auth_base()}/cloud/cli-auth/browser", req) + except BaseException: + server.server_close() + raise + auth_url = data.get("authorization_uri") or data.get("auth_url") + if not auth_url: + server.server_close() + raise AuthError("auth start response did not include authorization_uri") + expires_in = _int_or_none(data.get("expires_in")) + opened = False + if open_url: + try: + opened = bool(webbrowser.open(auth_url)) + except Exception: + opened = False + return BrowserAuthStart( + server=server, + callback=callback, + redirect_uri=redirect_uri, + verifier=verifier, + auth_url=auth_url, + expires_in=expires_in, + opened=opened, + ) + + +def complete_browser_auth(start: BrowserAuthStart, *, timeout=AUTH_TIMEOUT_SECONDS) -> AuthRecord: + deadline = time.time() + timeout + start.server.timeout = 0.5 + try: + while not start.callback.complete and time.time() < deadline: + start.server.handle_request() + finally: + start.server.server_close() + if not start.callback.complete: + raise AuthError("timed out waiting for browser auth callback") + if start.callback.error: + detail = f": {start.callback.error_description}" if start.callback.error_description else "" + raise AuthError(f"auth failed: {start.callback.error}{detail}") + if not start.callback.code: + raise AuthError("auth callback did not include a code") + token = _exchange_authorization_code(start.callback.code, start.redirect_uri, start.verifier) + record = AuthRecord.from_token_response(token) + save_auth_record(record) + return record + + +def browser_login(*, open_url=True, json_output=False, timeout=AUTH_TIMEOUT_SECONDS) -> AuthRecord: + start = start_browser_auth(open_url=open_url, timeout=timeout) + if json_output: + print(json.dumps({ + "status": "needs_user_auth", + "auth_url": start.auth_url, + "callback": start.redirect_uri, + "expires_in": start.expires_in, + "opened": start.opened, + }), flush=True) + else: + print("Open this URL to sign in to Browser Use Cloud:") + print(start.auth_url, flush=True) + if start.opened: + print("Waiting for login to complete...", flush=True) + else: + print("Waiting for login to complete after you open the URL...", flush=True) + record = complete_browser_auth(start, timeout=timeout) + if json_output: + print(json.dumps(_stored_output(record)), flush=True) + else: + print("Browser Use Cloud auth stored.") + return record + + +def start_device_auth(*, open_url=True) -> DeviceAuthStart: + data = _post_json( + f"{auth_base()}/cloud/cli-auth/device", + {"client_id": client_id(), "device_name": os.environ.get("BH_DEVICE_NAME") or "browser-harness"}, + ) + device_code = data.get("device_code") + user_code = data.get("user_code") + verification_uri = data.get("verification_uri") or data.get("verification_url") + if not device_code or not user_code or not verification_uri: + raise AuthError("device auth response missing device_code, user_code, or verification_uri") + opened = False + open_uri = data.get("verification_uri_complete") or verification_uri + if open_url: + try: + opened = bool(webbrowser.open(open_uri)) + except Exception: + opened = False + return DeviceAuthStart( + device_code=device_code, + user_code=user_code, + verification_uri=verification_uri, + verification_uri_complete=data.get("verification_uri_complete"), + expires_in=_int_or_none(data.get("expires_in")), + interval=max(1, _int_or_none(data.get("interval")) or 5), + opened=opened, + ) + + +def complete_device_auth(start: DeviceAuthStart, *, timeout: int | None = None) -> AuthRecord: + deadline = time.time() + (timeout or start.expires_in or AUTH_TIMEOUT_SECONDS) + interval = start.interval + while time.time() < deadline: + try: + token = _post_json(f"{auth_base()}/cloud/cli-auth/token", { + "grant_type": "urn:ietf:params:oauth:grant-type:device_code", + "device_code": start.device_code, + "client_id": client_id(), + }) + record = AuthRecord.from_token_response(token) + save_auth_record(record) + return record + except AuthError as e: + err = _auth_error_code(str(e)) + if err == "authorization_pending": + time.sleep(interval) + continue + if err == "slow_down": + interval += 5 + time.sleep(interval) + continue + raise + raise AuthError("timed out waiting for device auth") + + +def device_login(*, open_url=True, json_output=False) -> AuthRecord: + start = start_device_auth(open_url=open_url) + if json_output: + print(json.dumps({ + "status": "needs_user_auth", + "verification_uri": start.verification_uri, + "verification_uri_complete": start.verification_uri_complete, + "user_code": start.user_code, + "expires_in": start.expires_in, + "opened": start.opened, + }), flush=True) + else: + print("Open this URL to sign in to Browser Use Cloud:") + print(start.open_uri, flush=True) + print(f"Code: {start.user_code}", flush=True) + print("Waiting for login to complete...", flush=True) + record = complete_device_auth(start) + if json_output: + print(json.dumps(_stored_output(record)), flush=True) + else: + print("Browser Use Cloud auth stored.") + return record + + +def _exchange_authorization_code(code: str, redirect_uri: str, verifier: str) -> dict: + return _post_json(f"{auth_base()}/cloud/cli-auth/token", { + "grant_type": "authorization_code", + "code": code, + "redirect_uri": redirect_uri, + "code_verifier": verifier, + "client_id": client_id(), + }) + + +def _callback_server(callback: PendingCallback) -> HTTPServer: + class Handler(BaseHTTPRequestHandler): + def do_GET(self): # noqa: N802 - stdlib handler API + parsed = urllib.parse.urlparse(self.path) + if parsed.path != CALLBACK_PATH: + self.send_error(404) + return + qs = urllib.parse.parse_qs(parsed.query) + state = _one(qs, "state") + if state != callback.state: + callback.error = "invalid_state" + callback.error_description = "OAuth callback state did not match" + else: + callback.code = _one(qs, "code") + callback.error = _one(qs, "error") + callback.error_description = _one(qs, "error_description") + callback.complete = True + body = b"

Browser Use Cloud login complete

You can close this tab.

" + self.send_response(200) + self.send_header("Content-Type", "text/html; charset=utf-8") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def log_message(self, fmt, *args): + return + + return HTTPServer(("127.0.0.1", 0), Handler) + + +def _post_json(url: str, payload: dict) -> dict: + req = urllib.request.Request( + url, + method="POST", + data=json.dumps(payload).encode(), + headers={"Content-Type": "application/json"}, + ) + try: + with urllib.request.urlopen(req, timeout=60) as resp: + return json.loads(resp.read() or b"{}") + except urllib.error.HTTPError as e: + body = e.read() or b"" + try: + data = json.loads(body or b"{}") + except json.JSONDecodeError: + data = {} + err = data.get("error") or data.get("state") or f"http_{e.code}" + desc = data.get("error_description") or data.get("reason") or data.get("message") + detail = f": {desc}" if desc else "" + raise AuthError(f"{err}{detail}") from e + + +def _write_private_json(path: Path, data: dict) -> None: + raw = (json.dumps(data, indent=2) + "\n").encode() + flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC + fd = os.open(path, flags, stat.S_IRUSR | stat.S_IWUSR) + try: + with os.fdopen(fd, "wb") as f: + f.write(raw) + except BaseException: + try: + os.close(fd) + except OSError: + pass + raise + + +def _chmod_private(path: Path, *, directory=False) -> None: + mode = stat.S_IRWXU if directory else stat.S_IRUSR | stat.S_IWUSR + try: + os.chmod(path, mode) + except OSError: + pass + + +def _one(qs: dict[str, list[str]], key: str) -> str | None: + values = qs.get(key) + return values[0] if values else None + + +def _int_or_none(value) -> int | None: + try: + return int(value) + except (TypeError, ValueError): + return None + + +def _auth_error_code(message: str) -> str: + return message.split(":", 1)[0] + + +def _stored_output(record: AuthRecord) -> dict: + return { + "status": "stored", + "api_key_id": record.api_key_id, + "project_id": record.project_id, + "expires_at": record.expires_at, + "scopes": record.scopes, + "path": str(auth_path()), + } + + +def run_auth_cli(argv: list[str]) -> int: + parser = argparse.ArgumentParser(prog="browser-harness auth") + sub = parser.add_subparsers(dest="command", required=True) + login = sub.add_parser("login") + login.add_argument("--device-code", action="store_true") + login.add_argument("--json", action="store_true") + login.add_argument("--no-open", action="store_true") + sub.add_parser("status") + sub.add_parser("logout") + args = parser.parse_args(argv) + + try: + if args.command == "login": + if args.device_code: + device_login(open_url=not args.no_open, json_output=args.json) + else: + browser_login(open_url=not args.no_open, json_output=args.json) + return 0 + if args.command == "status": + print(json.dumps(auth_status(), indent=2)) + return 0 + if args.command == "logout": + removed = clear_auth() + print(json.dumps({"status": "logged-out" if removed else "missing", "path": str(auth_path())}, indent=2)) + return 0 + except (AuthError, CloudAuthRequired) as e: + if getattr(args, "json", False): + print(json.dumps({"status": "error", "reason": str(e)}), file=sys.stderr) + else: + print(str(e), file=sys.stderr) + return 1 + return 2 diff --git a/src/browser_harness/manager_daemon.py b/src/browser_harness/manager_daemon.py index e6e872bc..066171c8 100644 --- a/src/browser_harness/manager_daemon.py +++ b/src/browser_harness/manager_daemon.py @@ -16,7 +16,7 @@ import time import urllib.request -from . import admin, context +from . import admin, auth, context BU_API = "https://api.browser-use.com/api/v3" @@ -157,6 +157,9 @@ def new(self, req: dict) -> dict: start_cloud_backend(lease, req.get("proxy_country")) else: start_managed_backend(lease) + except auth.CloudAuthRequired as e: + cleanup_backend(lease) + return error("cloud-auth-required", str(e), ["browser-harness auth login"]) except Exception as e: cleanup_backend(lease) return error("browser-start-failed", str(e), ["browser_new"]) @@ -285,9 +288,7 @@ def _allocate_lease(self, run_id: str, agent_id: str, backend: str, profile_kind def start_cloud_backend(lease: BrowserLease, proxy_country: str | None): - key = os.environ.get("BROWSER_USE_API_KEY") - if not key: - raise RuntimeError("BROWSER_USE_API_KEY is not set") + auth.get_browser_use_api_key() body = {} if proxy_country: body["proxyCountryCode"] = proxy_country @@ -370,9 +371,7 @@ def cleanup_backend(lease: BrowserLease): def _browser_use(path: str, method: str, body=None): - key = os.environ.get("BROWSER_USE_API_KEY") - if not key: - raise RuntimeError("BROWSER_USE_API_KEY is not set") + key = auth.get_browser_use_api_key() req = urllib.request.Request( f"{BU_API}{path}", method=method, diff --git a/src/browser_harness/run.py b/src/browser_harness/run.py index fa2f894a..ee77f9ca 100644 --- a/src/browser_harness/run.py +++ b/src/browser_harness/run.py @@ -23,7 +23,7 @@ stop_remote_daemon, sync_local_profile, ) -from . import context, manager_client +from . import auth, context, manager_client from .helpers import * from .manager_helpers import * @@ -44,6 +44,10 @@ browser-harness --doctor diagnose install, daemon, and browser state browser-harness doctor same as --doctor browser-harness doctor --fix-snap print how to fix Snap Chromium blocking CDP (Linux) + browser-harness auth login sign in to Browser Use Cloud for cloud browsers + browser-harness auth login --device-code sign in from SSH/headless environments + browser-harness auth status show Browser Use Cloud auth state + browser-harness auth logout remove stored Browser Use Cloud auth browser-harness --update [-y] pull the latest version (agents: pass -y) browser-harness --reload stop the daemon so next call picks up code changes """ @@ -116,6 +120,8 @@ def main(): print("usage: browser-harness doctor [--fix-snap]", file=sys.stderr) sys.exit(2) sys.exit(run_doctor()) + if args and args[0] == "auth": + sys.exit(auth.run_auth_cli(args[1:])) if args and args[0] == "--update": yes = any(a in {"-y", "--yes"} for a in args[1:]) sys.exit(run_update(yes=yes)) diff --git a/tests/unit/test_auth.py b/tests/unit/test_auth.py new file mode 100644 index 00000000..502f8107 --- /dev/null +++ b/tests/unit/test_auth.py @@ -0,0 +1,115 @@ +import json +import stat +import threading +import urllib.request + +from browser_harness import auth + + +def test_get_api_key_prefers_env_over_stored(monkeypatch, tmp_path): + monkeypatch.setenv("BH_AUTH_PATH", str(tmp_path / "auth.json")) + auth.save_auth_record(auth.AuthRecord(api_key="stored-key", source="oauth")) + monkeypatch.setenv("BROWSER_USE_API_KEY", "env-key") + + assert auth.get_browser_use_api_key() == "env-key" + + +def test_status_and_logout_for_stored_key(monkeypatch, tmp_path): + monkeypatch.delenv("BROWSER_USE_API_KEY", raising=False) + monkeypatch.setenv("BH_AUTH_PATH", str(tmp_path / "auth.json")) + auth.save_auth_record(auth.AuthRecord( + api_key="secret-key", + api_key_id="key-123", + project_id="project-123", + scopes=["browser"], + )) + + status = auth.auth_status() + mode = stat.S_IMODE((tmp_path / "auth.json").stat().st_mode) + removed = auth.clear_auth() + + assert status["status"] == "authenticated" + assert status["source"] == "oauth" + assert status["api_key_id"] == "key-123" + assert "api_key" not in status + assert mode == 0o600 + assert removed is True + assert auth.auth_status()["status"] == "missing" + + +def test_missing_key_raises_cloud_auth_required(monkeypatch, tmp_path): + monkeypatch.delenv("BROWSER_USE_API_KEY", raising=False) + monkeypatch.setenv("BH_AUTH_PATH", str(tmp_path / "missing.json")) + + try: + auth.get_browser_use_api_key() + except auth.CloudAuthRequired as e: + assert "browser-harness auth login" in str(e) + else: + raise AssertionError("expected CloudAuthRequired") + + +def test_browser_login_callback_exchanges_and_stores_key(monkeypatch, tmp_path): + monkeypatch.setenv("BH_AUTH_PATH", str(tmp_path / "auth.json")) + calls = [] + + def fake_post(url, payload): + calls.append((url, payload)) + if url.endswith("/cloud/cli-auth/browser"): + return {"authorization_uri": "https://login.example/auth", "expires_in": 600} + if url.endswith("/cloud/cli-auth/token"): + return { + "api_key": "oauth-key", + "api_key_id": "key-id", + "project_id": "project-id", + "scopes": ["browser"], + } + raise AssertionError(url) + + monkeypatch.setattr(auth, "_post_json", fake_post) + start = auth.start_browser_auth(open_url=False) + callback_url = f"{start.redirect_uri}?code=abc123&state={start.callback.state}" + t = threading.Thread(target=lambda: urllib.request.urlopen(callback_url, timeout=5).read()) + t.start() + record = auth.complete_browser_auth(start, timeout=5) + t.join(timeout=5) + + assert record.api_key == "oauth-key" + assert auth.get_browser_use_api_key() == "oauth-key" + assert calls[0][1]["client_id"] == "browser-use-terminal" + assert calls[0][1]["redirect_uri"] == start.redirect_uri + assert calls[0][1]["state"] == start.callback.state + assert calls[1][1]["code"] == "abc123" + assert calls[1][1]["code_verifier"] == start.verifier + assert json.loads((tmp_path / "auth.json").read_text())["browser_use"]["api_key_id"] == "key-id" + + +def test_device_login_polls_and_stores_key(monkeypatch, tmp_path): + monkeypatch.setenv("BH_AUTH_PATH", str(tmp_path / "auth.json")) + token_attempts = [] + + def fake_post(url, payload): + if url.endswith("/cloud/cli-auth/device"): + return { + "device_code": "device-123", + "user_code": "USER-123", + "verification_uri": "https://login.example/device", + "interval": 1, + "expires_in": 60, + } + if url.endswith("/cloud/cli-auth/token"): + token_attempts.append(payload) + if len(token_attempts) == 1: + raise auth.AuthError("authorization_pending") + return {"api_key": "device-key", "api_key_id": "device-key-id"} + raise AssertionError(url) + + monkeypatch.setattr(auth, "_post_json", fake_post) + monkeypatch.setattr(auth.time, "sleep", lambda _seconds: None) + + start = auth.start_device_auth(open_url=False) + record = auth.complete_device_auth(start, timeout=5) + + assert record.api_key == "device-key" + assert token_attempts[0]["grant_type"] == "urn:ietf:params:oauth:grant-type:device_code" + assert auth.get_browser_use_api_key() == "device-key" diff --git a/tests/unit/test_manager_daemon.py b/tests/unit/test_manager_daemon.py index cc225ab0..52f7cabd 100644 --- a/tests/unit/test_manager_daemon.py +++ b/tests/unit/test_manager_daemon.py @@ -1,4 +1,17 @@ from browser_harness.manager_daemon import Manager +from browser_harness import manager_daemon +from browser_harness import auth + + +class _FakeResponse: + def __enter__(self): + return self + + def __exit__(self, *_args): + return False + + def read(self): + return b'{"ok": true}' def _manager_with_lease(tmp_path): @@ -76,3 +89,38 @@ def test_close_rejects_other_runs(tmp_path): assert resp["ok"] is False assert resp["state"] == "forbidden" assert lease.browser_id in manager.leases + + +def test_cloud_new_reports_auth_required(monkeypatch, tmp_path): + manager = Manager(tmp_path) + monkeypatch.setattr( + "browser_harness.manager_daemon.auth.get_browser_use_api_key", + lambda: (_ for _ in ()).throw(auth.CloudAuthRequired()), + ) + + resp = manager.handle({ + "op": "new", + "run_id": "run-1", + "agent_id": "agent-1", + "backend": "cloud", + }) + + assert resp["ok"] is False + assert resp["state"] == "cloud-auth-required" + assert "browser-harness auth login" in resp["reason"] + + +def test_browser_use_api_uses_auth_resolution(monkeypatch): + captured = [] + monkeypatch.delenv("BROWSER_USE_API_KEY", raising=False) + monkeypatch.setattr(manager_daemon.auth, "get_browser_use_api_key", lambda: "stored-key") + monkeypatch.setattr( + manager_daemon.urllib.request, + "urlopen", + lambda req, timeout=60: captured.append(req) or _FakeResponse(), + ) + + assert manager_daemon._browser_use("/browsers", "POST", {}) == {"ok": True} + + assert captured + assert captured[0].get_header("X-browser-use-api-key") == "stored-key" From 73743edea22e719f5cd22ea127103c6e35cbc57f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gregor=20=C5=BDuni=C4=8D?= <36313686+gregpr07@users.noreply.github.com> Date: Wed, 17 Jun 2026 23:14:43 -0700 Subject: [PATCH 03/15] Add local profile targeting and browser cleanup --- SKILL.md | 8 +- install.md | 4 +- src/browser_harness/admin.py | 168 +++++-- src/browser_harness/daemon.py | 307 ++++++++++-- src/browser_harness/helpers.py | 65 ++- src/browser_harness/local_profiles.py | 615 +++++++++++++++++++++++++ src/browser_harness/manager_daemon.py | 52 ++- src/browser_harness/manager_helpers.py | 5 +- src/browser_harness/run.py | 57 ++- tests/unit/test_admin.py | 32 +- tests/unit/test_daemon.py | 225 +++++++++ tests/unit/test_helpers.py | 73 +++ tests/unit/test_local_profiles.py | 78 ++++ tests/unit/test_manager_daemon.py | 54 ++- tests/unit/test_manager_helpers.py | 37 ++ 15 files changed, 1690 insertions(+), 90 deletions(-) create mode 100644 src/browser_harness/local_profiles.py create mode 100644 tests/unit/test_local_profiles.py diff --git a/SKILL.md b/SKILL.md index 9f749cb5..a2eb7bcd 100644 --- a/SKILL.md +++ b/SKILL.md @@ -24,6 +24,8 @@ PY - Invoke as browser-harness — it's on $PATH. No cd, no uv run. - Use the heredoc form for every multi-line command. It prevents shell quote mangling inside Python strings and JavaScript snippets. - First navigation is new_tab(url), not goto_url(url) — goto runs in the user's active tab and clobbers their work. +- Local Chrome requires an explicit selected profile. Use `list_local_profiles()` to see stable ids like `google-chrome:Default`, then `use_local_profile(profile_id)`. +- Plain helper calls use the selected local browser. If you need an isolated or remote browser, call `browser_new(...)` before `new_tab(...)`. ## Tool call shape @@ -35,6 +37,8 @@ PY Legacy mode calls ensure_daemon() before exec. Manager mode starts when the script uses a `browser_*` lifecycle helper or `BH_MANAGER_MODE=1`. +Local Chrome sessions snapshot the selected profile when the daemon starts. If a profile is changed later, existing named daemons keep their current profile until restarted. + ### Managed browsers Use this when you need an isolated browser, parallel sub-agents, a cloud browser, or a restart after the current browser gets blocked. @@ -51,7 +55,7 @@ PY Lifecycle helpers: - `browser_status()` — current binding state. -- `browser_new(backend="cloud"|"managed", profile="clean", proxy_country=None, reason=None)` — create and switch to a browser. +- `browser_new(backend="cloud"|"managed", profile="clean", proxy_country=None, reason=None)` — create and switch to a browser. Cloud responses include `live_url` when Browser Use returns one. - `browser_list()` — browser ids visible to this run/agent. - `browser_switch(browser_id)` — reuse an existing browser id. - `browser_close(browser_id=None)` — close the active private browser, or release access to a shared one. @@ -132,7 +136,7 @@ If you start struggling with a specific mechanic while navigating, look in inter ## Design constraints - Coordinate clicks default. Input.dispatchMouseEvent goes through iframes/shadow/cross-origin at the compositor level. -- Legacy mode connects to the user's running Chrome. Manager mode may create cloud or managed browsers via `browser_new`. +- Legacy mode connects to the user's selected local Chrome profile. Manager mode may create cloud or managed browsers via `browser_new`. - cdp-use is only for CDPClient.send_raw. Prefer raw CDP strings over typed wrappers. - run.py stays tiny. No argparse, subcommands, or extra control layer. - Core helpers stay short. Put task-specific helper additions in `agent-workspace/agent_helpers.py`; daemon/bootstrap and remote session admin live in the core package. diff --git a/install.md b/install.md index c254aa24..7718e143 100644 --- a/install.md +++ b/install.md @@ -106,6 +106,8 @@ Browser-harness can connect to any Chrome or Chromium-based browser on your comp **Local browsers** require remote debugging to be enabled. There are two ways, and they suit different use cases. +Local Way 1 also requires an explicit selected profile before the harness attaches. Run `list_local_profiles()` to get stable ids such as `google-chrome:Default`, then `use_local_profile("google-chrome:Default")`. The daemon snapshots that selected profile at startup and refuses to attach to an arbitrary available Chrome profile. + *Way 1: chrome://inspect/#remote-debugging checkbox — uses your real profile.* In your running Chrome, navigate to `chrome://inspect/#remote-debugging` and tick the "Allow remote debugging for this browser instance" checkbox. This setting is per-profile and sticky: tick it once and it persists across every future Chrome launch of that profile. Then run any `browser-harness` command. On Chrome 144 and later, the first attach by the harness triggers an in-browser "Allow remote debugging?" popup that you must click Allow on. The popup may reappear on later attaches under conditions that are not fully characterized.[^1] This path inherits your everyday Chrome's logins, extensions, history, and bookmarks, which makes it the right choice for an agent helping you with tasks in your real browser. *Way 2: command-line flag — uses an isolated profile, no popups ever.* Launch Chrome with `--remote-debugging-port=9222 --user-data-dir=`. Two precisions: @@ -133,7 +135,7 @@ If the user hasn't said which connection method to use, default to Way 1 if Chro PY ``` - If it prints page info, you're done. + If it prints page info, you're done. If it reports `needs-profile`, run `list_local_profiles()`, choose a stable profile id with the user, call `use_local_profile(profile_id)`, then retry. 2. Otherwise run `browser-harness --doctor`. The two lines that matter for connection are `chrome running` and `daemon alive`. diff --git a/src/browser_harness/admin.py b/src/browser_harness/admin.py index d418e920..4ffcb19a 100644 --- a/src/browser_harness/admin.py +++ b/src/browser_harness/admin.py @@ -10,6 +10,7 @@ from . import _ipc as ipc from . import context +from . import local_profiles def _process_start_time(pid): @@ -164,6 +165,51 @@ def _log_tail(name, tmp_dir=None): return None +class _DaemonStartLock: + def __init__(self, name, runtime_dir=None): + base = Path(runtime_dir) if runtime_dir else Path(tempfile.gettempdir()) + self.path = base / f"bu-{name or NAME}.start.lock" + self.file = None + + def __enter__(self): + self.path.parent.mkdir(parents=True, exist_ok=True) + self.file = self.path.open("a+") + if sys.platform == "win32": + try: + import msvcrt + self.file.seek(0) + self.file.write("\0") + self.file.flush() + msvcrt.locking(self.file.fileno(), msvcrt.LK_LOCK, 1) + except Exception: + pass + else: + try: + import fcntl + fcntl.flock(self.file.fileno(), fcntl.LOCK_EX) + except Exception: + pass + return self + + def __exit__(self, *_exc): + if not self.file: + return + if sys.platform == "win32": + try: + import msvcrt + self.file.seek(0) + msvcrt.locking(self.file.fileno(), msvcrt.LK_UNLCK, 1) + except Exception: + pass + else: + try: + import fcntl + fcntl.flock(self.file.fileno(), fcntl.LOCK_UN) + except Exception: + pass + self.file.close() + + def _needs_chrome_remote_debugging_prompt(msg): """True when Chrome needs the inspect-page permission/profile flow.""" lower = (msg or "").lower() @@ -171,6 +217,15 @@ def _needs_chrome_remote_debugging_prompt(msg): "devtoolsactiveport not found" in lower or "enable chrome://inspect" in lower or "not live yet" in lower + or "cdp-disabled" in lower + ) + + +def _needs_chrome_permission_popup(msg): + """True when Chrome is reachable but waiting on the per-session Allow popup.""" + lower = (msg or "").lower() + return ( + "permission-blocked" in lower or ( "ws handshake failed" in lower and ( @@ -185,7 +240,13 @@ def _needs_chrome_remote_debugging_prompt(msg): def _is_local_chrome_mode(env=None): """True when the daemon discovers a local Chrome instead of a remote CDP WS.""" - return not (env or {}).get("BU_CDP_WS") and not os.environ.get("BU_CDP_WS") + env = env or {} + return not ( + env.get("BU_CDP_WS") + or env.get("BU_CDP_URL") + or os.environ.get("BU_CDP_WS") + or os.environ.get("BU_CDP_URL") + ) def daemon_alive(name=None, binding=None): @@ -343,24 +404,48 @@ def ensure_daemon(wait=60.0, name=None, env=None, binding=None): import subprocess, sys local = _is_local_chrome_mode(env) - for attempt in (0, 1): - e = {**os.environ, **({"BU_NAME": name} if name else {}), **(env or {})} - p = subprocess.Popen( - [sys.executable, "-m", "browser_harness.daemon"], - env=e, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, **ipc.spawn_kwargs(), - ) - deadline = time.time() + wait - while time.time() < deadline: - if daemon_alive(name, binding=binding): return - if p.poll() is not None: break - time.sleep(0.2) - msg = _log_tail(name, tmp_dir=tmp_dir) or "" - if local and attempt == 0 and _needs_chrome_remote_debugging_prompt(msg): - _open_chrome_inspect() - print('browser-harness: at chrome://inspect/#remote-debugging, tick "Allow remote debugging for this browser instance" and click Allow on the popup that appears', file=sys.stderr) - restart_daemon(name, binding=binding) - continue - raise RuntimeError(msg or f"daemon {name or NAME} didn't come up -- check {ipc.log_path(name or NAME, tmp_dir=tmp_dir)}") + if local and not env.get("BH_SELECTED_LOCAL_PROFILE"): + selected = local_profiles.get_default_profile_id() + if selected: + env["BH_SELECTED_LOCAL_PROFILE"] = selected + with _DaemonStartLock(name or NAME, runtime_dir=runtime_dir): + if daemon_alive(name, binding=binding): + return + for attempt in (0, 1): + e = {**os.environ, **({"BU_NAME": name} if name else {}), **(env or {})} + log_file = open(ipc.log_path(name or NAME, tmp_dir=tmp_dir), "ab") + try: + p = subprocess.Popen( + [sys.executable, "-m", "browser_harness.daemon"], + env=e, stdout=log_file, stderr=log_file, **ipc.spawn_kwargs(), + ) + finally: + log_file.close() + deadline = time.time() + wait + while time.time() < deadline: + if daemon_alive(name, binding=binding): return + if p.poll() is not None: break + time.sleep(0.2) + msg = _log_tail(name, tmp_dir=tmp_dir) or "" + if local and attempt == 0 and _needs_chrome_permission_popup(msg): + _open_selected_profile(env.get("BH_SELECTED_LOCAL_PROFILE")) + print('browser-harness: Chrome is asking "Allow remote debugging?" in the selected profile. Click Allow, then retry browser work.', file=sys.stderr) + restart_daemon(name, binding=binding) + raise RuntimeError( + "permission-blocked: opened/focused the selected Chrome profile. " + "Wait for the user to click Allow in the Chrome permission popup before retrying." + ) + if local and attempt == 0 and _needs_chrome_remote_debugging_prompt(msg): + _open_chrome_inspect(env.get("BH_SELECTED_LOCAL_PROFILE")) + print('browser-harness: at chrome://inspect/#remote-debugging, tick "Allow remote debugging for this browser instance" and click Allow on the popup that appears', file=sys.stderr) + restart_daemon(name, binding=binding) + if "cdp-disabled" in msg.lower(): + raise RuntimeError( + "cdp-disabled: opened chrome://inspect/#remote-debugging in the selected profile. " + "Wait for the user to tick the checkbox and confirm before retrying." + ) + continue + raise RuntimeError(msg or f"daemon {name or NAME} didn't come up -- check {ipc.log_path(name or NAME, tmp_dir=tmp_dir)}") def stop_remote_daemon(name="remote"): @@ -581,13 +666,18 @@ def start_remote_daemon(name="remote", profileName=None, **create_kwargs): def list_local_profiles(): - """Detected local browser profiles on this machine. Shells out to `profile-use list --json`. - Returns [{BrowserName, BrowserPath, ProfileName, ProfilePath, DisplayName}, ...]. - Requires `profile-use` (see interaction-skills/profile-sync.md for install).""" - import json, shutil, subprocess - if not shutil.which("profile-use"): - raise RuntimeError("profile-use not installed -- curl -fsSL https://browser-use.com/profile.sh | sh") - return json.loads(subprocess.check_output(["profile-use", "list", "--json"], text=True)) + """Detected local Chromium-family profiles with stable profile ids.""" + return local_profiles.list_local_profiles_payload() + + +def use_local_profile(profile_id): + """Set the default local profile id for future local Chrome daemon sessions.""" + return local_profiles.set_default_profile_id(profile_id) + + +def open_local_profile(profile_id=None, marker=True): + """Open or focus a local profile. With marker=True, running Chrome gets a marker tab.""" + return local_profiles.open_local_profile(profile_id, allow_marker=marker) def sync_local_profile(profile_name, browser=None, cloud_profile_id=None, @@ -749,10 +839,17 @@ def _chrome_running(): return False -def _open_chrome_inspect(): +def _open_chrome_inspect(profile_id=None): """Open chrome://inspect/#remote-debugging so the user can tick the checkbox.""" import platform, subprocess, webbrowser url = "chrome://inspect/#remote-debugging" + profile_id = profile_id or local_profiles.get_default_profile_id() + if profile_id: + try: + local_profiles.open_local_profile(profile_id, allow_marker=False, url=url) + return + except Exception: + pass if platform.system() == "Darwin": try: subprocess.run([ @@ -769,6 +866,23 @@ def _open_chrome_inspect(): pass +def _open_selected_profile(profile_id=None): + """Focus the selected Chrome profile without routing through the checkbox page.""" + import platform, subprocess + profile_id = profile_id or local_profiles.get_default_profile_id() + if profile_id: + try: + local_profiles.open_local_profile(profile_id, allow_marker=False) + return + except Exception: + pass + if platform.system() == "Darwin": + try: + subprocess.run(["osascript", "-e", 'tell application "Google Chrome" to activate'], timeout=5, check=False) + except Exception: + pass + + def run_doctor(): """Read-only diagnostics. Exit 0 iff everything looks healthy.""" import platform, shutil, sys diff --git a/src/browser_harness/daemon.py b/src/browser_harness/daemon.py index 0f0f2555..ec6b614c 100644 --- a/src/browser_harness/daemon.py +++ b/src/browser_harness/daemon.py @@ -5,6 +5,7 @@ from pathlib import Path from . import _ipc as ipc +from . import local_profiles from cdp_use.client import CDPClient @@ -101,7 +102,11 @@ def _ws_from_devtools_active_port(http_url: str) -> str | None: return None -def get_ws_url(): +def _explicit_cdp_configured(): + return bool(os.environ.get("BU_CDP_WS") or os.environ.get("BU_CDP_URL")) + + +def get_ws_url(selected_profile: local_profiles.LocalBrowserProfile | None = None): if url := os.environ.get("BU_CDP_WS"): return url if url := os.environ.get("BU_CDP_URL"): @@ -123,21 +128,22 @@ def get_ws_url(): last_err = e time.sleep(1) raise RuntimeError(f"BU_CDP_URL={url} unreachable after 30s: {last_err} -- is the dedicated automation Chrome running?") - for base in PROFILES: - try: - active = (base / "DevToolsActivePort").read_text().splitlines() - except (FileNotFoundError, NotADirectoryError): - continue - port = active[0].strip() if active else "" - ws_path = active[1].strip() if len(active) > 1 else "" - if not port: - continue - # Resolve the live WS URL via /json/version instead of trusting the path stored - # alongside the port in DevToolsActivePort: if Chrome was previously launched - # with a different --user-data-dir on the same port, that file is left behind - # with a stale browser UUID and the WS upgrade returns 404. - deadline = time.time() + 30 - while time.time() < deadline: + bases = [selected_profile.user_data_dir] if selected_profile else PROFILES + deadline = time.time() + 30 + while time.time() < deadline: + for base in bases: + try: + active = (base / "DevToolsActivePort").read_text().splitlines() + except (FileNotFoundError, NotADirectoryError): + continue + port = active[0].strip() if active else "" + ws_path = active[1].strip() if len(active) > 1 else "" + if not port: + continue + # Resolve the live WS URL via /json/version instead of trusting the path stored + # alongside the port in DevToolsActivePort: if Chrome was previously launched + # with a different --user-data-dir on the same port, that file is left behind + # with a stale browser UUID and the WS upgrade returns 404. try: return json.loads(urllib.request.urlopen(f"http://127.0.0.1:{port}/json/version", timeout=1).read())["webSocketDebuggerUrl"] except urllib.error.HTTPError as e: @@ -145,16 +151,27 @@ def get_ws_url(): # the ws path Chrome wrote to DevToolsActivePort still works. if e.code == 404 and ws_path: return f"ws://127.0.0.1:{port}{ws_path}" - time.sleep(1) + if e.code == 403: + raise RuntimeError("permission-blocked: Chrome is reachable, but the per-session Allow remote debugging popup has not been accepted") except (OSError, KeyError, ValueError): - time.sleep(1) + pass + time.sleep(0.2) + if selected_profile: + disabled = local_profiles.local_debugging_disabled_statuses() + if disabled: + raise RuntimeError("cdp-disabled: Chrome is open, but remote debugging is turned off. Open chrome://inspect/#remote-debugging in the selected profile and wait for user confirmation.") + running = local_profiles.browser_process_running(selected_profile.browser_name, selected_profile.browser_path) + state = "stale-port" if running else "browser-not-running" raise RuntimeError( - f"Chrome's remote-debugging page is open, but DevTools is not live yet on 127.0.0.1:{port} — if Chrome opened a profile picker, choose your normal profile first, then tick the checkbox and click Allow if shown" + f"{state}: selected profile {selected_profile.id} is not exposing a reachable local CDP endpoint; open/focus the selected profile, run local setup if needed, then retry" ) for probe_port in (9222, 9223): try: with urllib.request.urlopen(f"http://127.0.0.1:{probe_port}/json/version", timeout=1) as r: return json.loads(r.read())["webSocketDebuggerUrl"] + except urllib.error.HTTPError as e: + if e.code == 403: + raise RuntimeError("permission-blocked: Chrome is reachable, but the per-session Allow remote debugging popup has not been accepted") except (OSError, KeyError, ValueError): continue raise RuntimeError(f"DevToolsActivePort not found in {[str(p) for p in PROFILES]} — enable chrome://inspect/#remote-debugging, or set BU_CDP_WS for a remote browser") @@ -176,7 +193,7 @@ def stop_remote(): def is_real_page(t): - return t["type"] == "page" and not t.get("url", "").startswith(INTERNAL) + return local_profiles.is_real_page_target(t) class Daemon: @@ -184,26 +201,194 @@ def __init__(self): self.cdp = None self.session = None self.target_id = None + self.selected_local_profile = None + self.preferred_target_marker = None + self.preferred_profile_id = None + self.active_local_profile_id = None + self.preferred_browser_context_id = None + self.owned_target_ids = set() self.events = deque(maxlen=BUF) self.dialog = None self.stop = None # asyncio.Event, set inside start() + def _prepare_selected_local_profile(self): + if _explicit_cdp_configured() or REMOTE_ID: + return None + profile_id = local_profiles.get_default_profile_id() + if not profile_id: + profiles = local_profiles.list_local_profiles_payload() + raise RuntimeError( + "needs-profile: No default local Chrome profile is set. " + f"Choose one explicit profile first. profiles={json.dumps(profiles, default=str)}" + ) + profile = local_profiles.resolve_local_profile(profile_id) + if local_profiles.remote_debugging_user_enabled(profile.user_data_dir) is False: + raise RuntimeError( + "cdp-disabled: Chrome remote debugging is turned off for the selected profile. " + "Open chrome://inspect/#remote-debugging in that profile, tick the checkbox, wait for user confirmation, then retry." + ) + opened = local_profiles.open_local_profile(profile.id, allow_marker=True) + self.selected_local_profile = profile + self.preferred_profile_id = profile.id + self.preferred_target_marker = opened.get("target_marker") + log(f"selected local profile {profile.id}; targeting={opened.get('profile_targeting')}") + return profile + + async def _targets(self): + return (await self.cdp.send_raw("Target.getTargets"))["targetInfos"] + + async def _target_info(self, target_id): + return (await self.cdp.send_raw("Target.getTargetInfo", {"targetId": target_id}))["targetInfo"] + + async def _ensure_target_browser_context(self, target_id): + if not self.preferred_browser_context_id: + return + target = next((t for t in await self._targets() if t.get("targetId") == target_id), None) + if target is None: + raise RuntimeError("target-gone: target no longer exists") + actual = target.get("browserContextId") + if actual and actual != self.preferred_browser_context_id: + raise RuntimeError("wrong-profile: refusing to switch to a target from a different Chrome profile context") + + async def _reattach_current_target(self): + if not self.target_id: + return False + targets = await self._targets() + if not any(t.get("targetId") == self.target_id for t in targets): + raise RuntimeError("target-gone: Previous browser tab target is gone.") + await self._ensure_target_browser_context(self.target_id) + self.session = (await self.cdp.send_raw( + "Target.attachToTarget", {"targetId": self.target_id, "flatten": True} + ))["sessionId"] + await self._enable_default_domains(self.session) + return True + + async def _close_profile_marker_targets(self, browser_context_id=None, keep_target_id=None): + try: + targets = await self._targets() + except Exception: + return + for target in targets: + if not local_profiles.is_profile_marker_target(target): + continue + if browser_context_id and target.get("browserContextId") != browser_context_id: + continue + target_id = target.get("targetId") + if not target_id or target_id == keep_target_id: + continue + await _silent(self.cdp.send_raw("Target.closeTarget", {"targetId": target_id})) + + async def _close_remote_debugging_setup_targets(self): + try: + targets = await self._targets() + except Exception: + return + for target in targets: + if not local_profiles.is_remote_debugging_setup_target(target): + continue + target_id = target.get("targetId") + if target_id and target_id != self.target_id: + await _silent(self.cdp.send_raw("Target.closeTarget", {"targetId": target_id})) + + def _select_work_target(self, targets, browser_context_id=None, exclude_target_ids=None): + exclude_target_ids = set(exclude_target_ids or ()) + + def in_scope(target): + if target.get("targetId") in exclude_target_ids: + return False + if browser_context_id and target.get("browserContextId") != browser_context_id: + return False + return True + + scoped = [t for t in targets if in_scope(t)] + return ( + next((t for t in scoped if local_profiles.is_real_page_target(t)), None) + or next((t for t in scoped if local_profiles.is_reusable_placeholder_target(t)), None) + ) + async def attach_first_page(self): """Attach to a real page (or any page). Sets self.session. Returns attached target or None.""" - targets = (await self.cdp.send_raw("Target.getTargets"))["targetInfos"] - pages = [t for t in targets if is_real_page(t)] - if not pages: - # No real pages — create one instead of attaching to omnibox popup - tid = (await self.cdp.send_raw("Target.createTarget", {"url": "about:blank"}))["targetId"] + attached_profile_marker = False + attached_launched_profile = False + attached_browser_context_id = None + attached_profile_id = None + page = None + if self.preferred_target_marker: + deadline = time.time() + 8 + while time.time() < deadline: + page = next( + (t for t in await self._targets() if local_profiles.target_url_contains_marker(t, self.preferred_target_marker)), + None, + ) + if page: + break + await asyncio.sleep(0.15) + if not page: + raise RuntimeError("profile-target-missing: selected Chrome profile target did not appear; refusing to attach to an arbitrary existing profile") + attached_profile_marker = True + attached_profile_id = self.preferred_profile_id + attached_browser_context_id = page.get("browserContextId") + self.preferred_target_marker = None + self.preferred_profile_id = None + targets = await self._targets() + page = self._select_work_target( + targets, + attached_browser_context_id, + exclude_target_ids={page.get("targetId")}, + ) + else: + targets = await self._targets() + launched_profile_id = self.preferred_profile_id + if launched_profile_id: + page = self._select_work_target(targets) + attached_profile_id = launched_profile_id + attached_browser_context_id = page.get("browserContextId") if page else None + attached_launched_profile = True + self.preferred_profile_id = None + else: + page = self._select_work_target(targets) + if not page: + # No real pages - create one instead of attaching to omnibox popup. + params = {"url": "about:blank"} + target_context_id = attached_browser_context_id or self.preferred_browser_context_id + if target_context_id: + params["browserContextId"] = target_context_id + tid = (await self.cdp.send_raw("Target.createTarget", params))["targetId"] + self.owned_target_ids.add(tid) log(f"no real pages found, created about:blank ({tid})") - pages = [{"targetId": tid, "url": "about:blank", "type": "page"}] + page = {"targetId": tid, "url": "about:blank", "type": "page"} + if target_context_id: + page["browserContextId"] = target_context_id + if attached_profile_id and not attached_browser_context_id: + try: + info = await self._target_info(tid) + attached_browser_context_id = info.get("browserContextId") + except Exception: + pass self.session = (await self.cdp.send_raw( - "Target.attachToTarget", {"targetId": pages[0]["targetId"], "flatten": True} + "Target.attachToTarget", {"targetId": page["targetId"], "flatten": True} ))["sessionId"] - self.target_id = pages[0]["targetId"] - log(f"attached {pages[0]['targetId']} ({pages[0].get('url','')[:80]}) session={self.session}") + self.target_id = page["targetId"] + if attached_profile_marker or attached_launched_profile: + self.active_local_profile_id = attached_profile_id + self.preferred_browser_context_id = attached_browser_context_id + elif not self.selected_local_profile: + self.active_local_profile_id = None + self.preferred_browser_context_id = None + log(f"attached {page['targetId']} ({page.get('url','')[:80]}) session={self.session}") await self._enable_default_domains(self.session) - return pages[0] + if attached_profile_marker: + await self._close_profile_marker_targets(attached_browser_context_id) + await self._close_remote_debugging_setup_targets() + return page + + async def close_owned_targets(self): + if not self.cdp: + return + target_ids = list(self.owned_target_ids) + self.owned_target_ids.clear() + for target_id in target_ids: + await _silent(self.cdp.send_raw("Target.closeTarget", {"targetId": target_id})) async def _enable_default_domains(self, session_id): """Enable Page/DOM/Runtime/Network on a CDP session. @@ -231,7 +416,8 @@ async def enable_one(d): async def start(self): self.stop = asyncio.Event() - url = get_ws_url() + selected_profile = self._prepare_selected_local_profile() + url = get_ws_url(selected_profile) log(f"connecting to {url}") self.cdp = CDPClient(url) try: @@ -283,15 +469,21 @@ async def handle(self, req): if not self.target_id: return {"error": "not_attached"} try: - info = (await self.cdp.send_raw("Target.getTargetInfo", {"targetId": self.target_id}))["targetInfo"] + info = await self._target_info(self.target_id) except Exception: - return {"error": "cdp_disconnected"} - return {"targetId": info.get("targetId"), "url": info.get("url", ""), "title": info.get("title", "")} + return {"error": "target-gone"} + return { + "targetId": info.get("targetId"), + "url": info.get("url", ""), + "title": info.get("title", ""), + "browserContextId": info.get("browserContextId"), + "local_profile_id": self.active_local_profile_id, + } if meta == "connection_status": if not self.target_id: return {"error": "not_attached"} try: - info = (await self.cdp.send_raw("Target.getTargetInfo", {"targetId": self.target_id}))["targetInfo"] + info = await self._target_info(self.target_id) except Exception: return {"error": "cdp_disconnected"} page = None @@ -300,12 +492,25 @@ async def handle(self, req): "targetId": info.get("targetId"), "title": info.get("title") or "(untitled)", "url": info.get("url") or "", + "browserContextId": info.get("browserContextId"), } - return {"target_id": self.target_id, "session_id": self.session, "page": page} + return { + "target_id": self.target_id, + "session_id": self.session, + "local_profile_id": self.active_local_profile_id, + "profile_context_id": self.preferred_browser_context_id, + "page": page, + } if meta == "set_session": + target_id = req.get("target_id") or self.target_id + if target_id: + try: + await self._ensure_target_browser_context(target_id) + except Exception as e: + return {"error": str(e)} old_session = self.session self.session = req.get("session_id") - self.target_id = req.get("target_id") or self.target_id + self.target_id = target_id # Run the old-session Network.disable (defense in depth — keeps # background-tab traffic out of the global event buffer; the # consumer-side filter in wait_for_network_idle is the actual @@ -342,17 +547,36 @@ async def disable_old(): method = req["method"] params = req.get("params") or {} + if self.preferred_browser_context_id: + try: + if method == "Target.createTarget": + requested = params.get("browserContextId") + if requested and requested != self.preferred_browser_context_id: + return {"error": "wrong-profile: refusing to create a target in a different Chrome profile context"} + params = {**params, "browserContextId": self.preferred_browser_context_id} + elif method == "Target.attachToTarget" and params.get("targetId"): + await self._ensure_target_browser_context(params["targetId"]) + except Exception as e: + return {"error": str(e)} # Browser-level Target.* calls must not use a session (stale or otherwise). # For everything else, explicit session in req wins; else default. sid = None if method.startswith("Target.") else (req.get("session_id") or self.session) try: - return {"result": await self.cdp.send_raw(method, params, session_id=sid)} + result = await self.cdp.send_raw(method, params, session_id=sid) + if method == "Target.createTarget" and isinstance(result, dict): + target_id = result.get("targetId") + if target_id: + self.owned_target_ids.add(target_id) + return {"result": result} except Exception as e: msg = str(e) if "Session with given id not found" in msg and sid == self.session and sid: - log(f"stale session {sid}, re-attaching") - if await self.attach_first_page(): - return {"result": await self.cdp.send_raw(method, params, session_id=self.session)} + log(f"stale session {sid}, re-attaching same target") + try: + if await self._reattach_current_target(): + return {"result": await self.cdp.send_raw(method, params, session_id=self.session)} + except Exception as reattach_error: + return {"error": str(reattach_error)} return {"error": msg} @@ -382,6 +606,7 @@ async def handler(reader, writer): await asyncio.wait({serve_task, stop_task}, return_when=asyncio.FIRST_COMPLETED) if serve_task.done(): await serve_task # surfaces a serve crash finally: + await d.close_owned_targets() for t in (serve_task, stop_task): t.cancel() try: await t diff --git a/src/browser_harness/helpers.py b/src/browser_harness/helpers.py index 57b8cfb5..e3661147 100644 --- a/src/browser_harness/helpers.py +++ b/src/browser_harness/helpers.py @@ -38,6 +38,7 @@ def _load_env_file(p): NAME = os.environ.get("BU_NAME", "default") SOCK = ipc.sock_addr(NAME) INTERNAL = ("chrome://", "chrome-untrusted://", "devtools://", "chrome-extension://", "about:") +PROFILE_MARKER = "browser-use-profile-target" def _send(req): @@ -258,8 +259,9 @@ def press_key(key, modifiers=0): so listeners checking e.keyCode / e.key all fire.""" vk, code, text = _KEYS.get(key, (ord(key[0]) if len(key) == 1 else 0, key, key if len(key) == 1 else "")) base = {"key": key, "code": code, "modifiers": modifiers, "windowsVirtualKeyCode": vk, "nativeVirtualKeyCode": vk} - cdp("Input.dispatchKeyEvent", type="keyDown", **base, **({"text": text} if text else {})) - if text and len(text) == 1: + printable_char = len(key) == 1 and bool(text) + cdp("Input.dispatchKeyEvent", type="keyDown", **base, **({} if printable_char or not text else {"text": text})) + if printable_char: cdp("Input.dispatchKeyEvent", type="char", text=text, **{k: v for k, v in base.items() if k != "text"}) cdp("Input.dispatchKeyEvent", type="keyUp", **base) @@ -289,18 +291,51 @@ def capture_screenshot(path=None, full=False, max_dim=None): # --- tabs --- -def list_tabs(include_chrome=True): +def _is_agent_startup_placeholder(title, url): + url = str(url or "") + return str(title or "").startswith("Starting agent ") and ( + url in ("", "about:blank") or url.startswith("about:blank#") + ) + + +def _current_target_browser_context_id(): + try: + return current_tab().get("browserContextId") + except Exception: + return None + + +def list_tabs(include_chrome=True, include_other_contexts=False): out = [] + current_context = None if include_other_contexts else _current_target_browser_context_id() for t in cdp("Target.getTargets")["targetInfos"]: if t["type"] != "page": continue + if current_context and t.get("browserContextId") != current_context: continue url = t.get("url", "") + if _is_agent_startup_placeholder(t.get("title", ""), url): continue + if not include_chrome and PROFILE_MARKER in url: continue if not include_chrome and url.startswith(INTERNAL): continue - out.append({"targetId": t["targetId"], "title": t.get("title", ""), "url": url}) + out.append({ + "targetId": t["targetId"], + "target_id": t["targetId"], + "title": t.get("title", ""), + "url": url, + "browserContextId": t.get("browserContextId"), + "browser_context_id": t.get("browserContextId"), + }) return out def current_tab(): r = _send({"meta": "current_tab"}) - return {"targetId": r["targetId"], "url": r["url"], "title": r["title"]} + return { + "targetId": r["targetId"], + "target_id": r["targetId"], + "url": r["url"], + "title": r["title"], + "browserContextId": r.get("browserContextId"), + "browser_context_id": r.get("browserContextId"), + "local_profile_id": r.get("local_profile_id"), + } def _mark_tab(): """Prepend horse emoji to tab title so the user can see which tab the agent controls.""" @@ -310,7 +345,7 @@ def _mark_tab(): def switch_tab(target): # Accept either a raw targetId string or the dict returned by current_tab() / list_tabs(), # so `switch_tab(current_tab())` works without a manual ["targetId"] dance. - target_id = target.get("targetId") if isinstance(target, dict) else target + target_id = (target.get("targetId") or target.get("target_id")) if isinstance(target, dict) else target # Unmark old tab. Horse emoji is a surrogate pair in JS UTF-16 strings (2 code units), # plus the trailing space = 3 code units, so slice(3) cleanly removes the prefix. try: cdp("Runtime.evaluate", expression="if(document.title.startsWith('\U0001F434 '))document.title=document.title.slice(3)") @@ -325,7 +360,21 @@ def new_tab(url="about:blank"): # Always create blank, then goto: passing url to createTarget races with # attach, so the brief about:blank is "complete" by the time the caller # polls and wait_for_load() returns before navigation actually starts. - tid = cdp("Target.createTarget", url="about:blank")["targetId"] + binding = context.get_active_binding() + if url != "about:blank" and binding and binding.manager_mode: + try: + cur = current_tab() + cur_url = cur.get("url") or "" + if cur_url in ("", "about:blank") or cur_url.startswith("about:blank#"): + goto_url(url) + return cur["targetId"] + except Exception: + pass + params = {"url": "about:blank"} + browser_context_id = _current_target_browser_context_id() + if browser_context_id: + params["browserContextId"] = browser_context_id + tid = cdp("Target.createTarget", **params)["targetId"] switch_tab(tid) if url != "about:blank": goto_url(url) @@ -334,7 +383,7 @@ def new_tab(url="about:blank"): def close_tab(target=None): """Close a tab. If `target` is omitted, closes the currently attached tab. Accepts a raw targetId string or a dict from list_tabs()/current_tab().""" - target_id = target.get("targetId") if isinstance(target, dict) else target + target_id = (target.get("targetId") or target.get("target_id")) if isinstance(target, dict) else target if target_id is None: target_id = current_tab()["targetId"] cdp("Target.closeTarget", targetId=target_id) diff --git a/src/browser_harness/local_profiles.py b/src/browser_harness/local_profiles.py new file mode 100644 index 00000000..97f58bd9 --- /dev/null +++ b/src/browser_harness/local_profiles.py @@ -0,0 +1,615 @@ +"""Native Chromium-family profile discovery and selected-profile state.""" +from __future__ import annotations + +from dataclasses import asdict, dataclass +import json +import os +from pathlib import Path +import socket +import subprocess +import sys +import time +import urllib.error +import urllib.request + + +MARKER_URL_PREFIX = "https://browser-use.com/browser-use-profile-target/" +INTERNAL_URL_PREFIXES = ( + "chrome://", + "chrome-untrusted://", + "devtools://", + "chrome-extension://", + "about:", +) + + +@dataclass(frozen=True) +class LocalBrowserInstall: + browser_name: str + browser_path: Path + user_data_dir: Path + + def payload(self) -> dict: + return { + "browser_name": self.browser_name, + "browser_path": str(self.browser_path), + "user_data_dir": str(self.user_data_dir), + } + + +@dataclass(frozen=True) +class LocalBrowserProfile: + id: str + browser_name: str + browser_path: Path + user_data_dir: Path + profile_dir: str + profile_name: str + profile_path: Path + display_name: str + + def payload(self) -> dict: + data = asdict(self) + for key in ("browser_path", "user_data_dir", "profile_path"): + data[key] = str(data[key]) + return data + + +@dataclass(frozen=True) +class LocalCandidate: + id: str + browser_name: str + browser_path: str | None + profile_path: str + http_url: str | None + ws_url: str + source: str + connectable: bool + state: str + stale: bool + browser_running: bool | None + remote_debugging_enabled: bool | None + reason: str | None + next_step: str | None + + def payload(self) -> dict: + return asdict(self) + + +def config_dir() -> Path: + if raw := os.environ.get("BH_CONFIG_DIR"): + return Path(raw).expanduser() + if sys.platform == "darwin": + return Path.home() / "Library" / "Application Support" / "browser-harness" + if sys.platform == "win32": + base = os.environ.get("APPDATA") + return Path(base).expanduser() / "browser-harness" if base else Path.home() / "AppData" / "Roaming" / "browser-harness" + base = os.environ.get("XDG_CONFIG_HOME") + return Path(base).expanduser() / "browser-harness" if base else Path.home() / ".config" / "browser-harness" + + +def profile_config_path() -> Path: + return config_dir() / "profile.json" + + +def get_default_profile_id() -> str | None: + for key in ("BH_SELECTED_LOCAL_PROFILE", "BH_LOCAL_PROFILE"): + value = (os.environ.get(key) or "").strip() + if value: + return value + try: + data = json.loads(profile_config_path().read_text()) + except (FileNotFoundError, json.JSONDecodeError, OSError): + return None + value = str(data.get("default_local_profile_id") or "").strip() + return value or None + + +def set_default_profile_id(profile_id: str | None) -> dict: + path = profile_config_path() + path.parent.mkdir(parents=True, exist_ok=True) + if profile_id: + profile = resolve_local_profile(profile_id) + require_browser_binary(profile) + data = { + "default_local_profile_id": profile.id, + "default_local_profile_label": profile.display_name, + } + else: + data = { + "default_local_profile_id": None, + "default_local_profile_label": None, + } + tmp = path.with_suffix(".tmp") + tmp.write_text(json.dumps(data, indent=2)) + os.replace(tmp, path) + return data + + +def known_local_browser_installs() -> list[LocalBrowserInstall]: + home = Path.home() + program_files = Path(os.environ.get("ProgramFiles") or "C:/Program Files") + program_files_x86 = Path(os.environ.get("ProgramFiles(x86)") or "C:/Program Files (x86)") + local_app_data = Path(os.environ.get("LOCALAPPDATA") or home / "AppData" / "Local") + candidates = [ + ("Google Chrome", Path("/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"), home / "Library/Application Support/Google/Chrome"), + ("Chrome Canary", Path("/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary"), home / "Library/Application Support/Google/Chrome Canary"), + ("Brave", Path("/Applications/Brave Browser.app/Contents/MacOS/Brave Browser"), home / "Library/Application Support/BraveSoftware/Brave-Browser"), + ("Microsoft Edge", Path("/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge"), home / "Library/Application Support/Microsoft Edge"), + ("Chromium", Path("/Applications/Chromium.app/Contents/MacOS/Chromium"), home / "Library/Application Support/Chromium"), + ("Arc", Path("/Applications/Arc.app/Contents/MacOS/Arc"), home / "Library/Application Support/Arc/User Data"), + ("Dia", Path("/Applications/Dia.app/Contents/MacOS/Dia"), home / "Library/Application Support/Dia"), + ("Comet", Path("/Applications/Comet.app/Contents/MacOS/Comet"), home / "Library/Application Support/Comet"), + ("Helium", Path("/Applications/Helium.app/Contents/MacOS/Helium"), home / "Library/Application Support/Helium"), + ("Sidekick", Path("/Applications/Sidekick.app/Contents/MacOS/Sidekick"), home / "Library/Application Support/Sidekick"), + ("Thorium", Path("/Applications/Thorium.app/Contents/MacOS/Thorium"), home / "Library/Application Support/Thorium"), + ("SigmaOS", Path("/Applications/SigmaOS.app/Contents/MacOS/SigmaOS"), home / "Library/Application Support/SigmaOS/User Data"), + ("Wavebox", Path("/Applications/Wavebox.app/Contents/MacOS/Wavebox"), home / "Library/Application Support/WaveboxApp"), + ("Ghost Browser", Path("/Applications/Ghost Browser.app/Contents/MacOS/Ghost Browser"), home / "Library/Application Support/Ghost Browser"), + ("Blisk", Path("/Applications/Blisk.app/Contents/MacOS/Blisk"), home / "Library/Application Support/Blisk"), + ("Opera", Path("/Applications/Opera.app/Contents/MacOS/Opera"), home / "Library/Application Support/com.operasoftware.Opera"), + ("Vivaldi", Path("/Applications/Vivaldi.app/Contents/MacOS/Vivaldi"), home / "Library/Application Support/Vivaldi"), + ("Yandex", Path("/Applications/Yandex.app/Contents/MacOS/Yandex"), home / "Library/Application Support/Yandex/YandexBrowser"), + ("Iridium", Path("/Applications/Iridium.app/Contents/MacOS/Iridium"), home / "Library/Application Support/Iridium"), + ("Google Chrome", Path("/usr/bin/google-chrome"), home / ".config/google-chrome"), + ("Google Chrome", Path("/usr/bin/google-chrome-stable"), home / ".config/google-chrome"), + ("Brave", Path("/usr/bin/brave-browser"), home / ".config/BraveSoftware/Brave-Browser"), + ("Brave", Path("/usr/bin/brave"), home / ".config/BraveSoftware/Brave-Browser"), + ("Brave", Path("/snap/bin/brave"), home / ".config/BraveSoftware/Brave-Browser"), + ("Microsoft Edge", Path("/usr/bin/microsoft-edge"), home / ".config/microsoft-edge"), + ("Microsoft Edge", Path("/usr/bin/microsoft-edge-stable"), home / ".config/microsoft-edge"), + ("Chromium", Path("/usr/bin/chromium"), home / ".config/chromium"), + ("Chromium", Path("/usr/bin/chromium-browser"), home / ".config/chromium"), + ("Chromium", Path("/snap/bin/chromium"), home / ".config/chromium"), + ("Opera", Path("/usr/bin/opera"), home / ".config/opera"), + ("Opera", Path("/snap/bin/opera"), home / ".config/opera"), + ("Vivaldi", Path("/usr/bin/vivaldi"), home / ".config/vivaldi"), + ("Vivaldi", Path("/usr/bin/vivaldi-stable"), home / ".config/vivaldi"), + ("Vivaldi", Path("/snap/bin/vivaldi"), home / ".config/vivaldi"), + ("Yandex", Path("/usr/bin/yandex-browser"), home / ".config/yandex-browser"), + ("Yandex", Path("/usr/bin/yandex-browser-stable"), home / ".config/yandex-browser"), + ("Iridium", Path("/usr/bin/iridium-browser"), home / ".config/iridium"), + ("Ungoogled Chromium", Path("/usr/bin/ungoogled-chromium"), home / ".config/chromium"), + ("Thorium", Path("/usr/bin/thorium-browser"), home / ".config/thorium"), + ("Sidekick", home / ".local/share/sidekick/sidekick", home / ".config/Sidekick"), + ("Wavebox", Path("/usr/bin/wavebox"), home / ".config/Wavebox"), + ("Google Chrome", program_files / "Google/Chrome/Application/chrome.exe", local_app_data / "Google/Chrome/User Data"), + ("Google Chrome", program_files_x86 / "Google/Chrome/Application/chrome.exe", local_app_data / "Google/Chrome/User Data"), + ("Google Chrome", local_app_data / "Google/Chrome/Application/chrome.exe", local_app_data / "Google/Chrome/User Data"), + ("Brave", program_files / "BraveSoftware/Brave-Browser/Application/brave.exe", local_app_data / "BraveSoftware/Brave-Browser/User Data"), + ("Brave", local_app_data / "BraveSoftware/Brave-Browser/Application/brave.exe", local_app_data / "BraveSoftware/Brave-Browser/User Data"), + ("Microsoft Edge", program_files / "Microsoft/Edge/Application/msedge.exe", local_app_data / "Microsoft/Edge/User Data"), + ("Microsoft Edge", program_files_x86 / "Microsoft/Edge/Application/msedge.exe", local_app_data / "Microsoft/Edge/User Data"), + ("Chromium", local_app_data / "Chromium/Application/chrome.exe", local_app_data / "Chromium/User Data"), + ("Opera", local_app_data / "Programs/Opera/opera.exe", home / "AppData/Roaming/Opera Software/Opera Stable"), + ("Opera", program_files / "Opera/opera.exe", home / "AppData/Roaming/Opera Software/Opera Stable"), + ("Vivaldi", local_app_data / "Vivaldi/Application/vivaldi.exe", local_app_data / "Vivaldi/User Data"), + ("Vivaldi", program_files / "Vivaldi/Application/vivaldi.exe", local_app_data / "Vivaldi/User Data"), + ("Yandex", local_app_data / "Yandex/YandexBrowser/Application/browser.exe", local_app_data / "Yandex/YandexBrowser/User Data"), + ("Iridium", local_app_data / "Iridium/Application/iridium.exe", local_app_data / "Iridium/User Data"), + ("Sidekick", local_app_data / "Sidekick/Application/sidekick.exe", local_app_data / "Sidekick/User Data"), + ("Thorium", local_app_data / "Thorium/Application/thorium.exe", local_app_data / "Thorium/User Data"), + ("Wavebox", local_app_data / "WaveboxApp/Application/wavebox.exe", local_app_data / "WaveboxApp/User Data"), + ("Blisk", local_app_data / "Blisk/Application/blisk.exe", local_app_data / "Blisk/User Data"), + ] + installs: list[LocalBrowserInstall] = [] + seen: dict[tuple[str, Path], int] = {} + for browser_name, browser_path, user_data_dir in candidates: + if not browser_path.exists() and not user_data_dir.exists(): + continue + key = (browser_name, user_data_dir) + candidate = LocalBrowserInstall(browser_name, browser_path, user_data_dir) + if key in seen: + index = seen[key] + if not installs[index].browser_path.exists() and browser_path.exists(): + installs[index] = candidate + else: + seen[key] = len(installs) + installs.append(candidate) + return installs + + +def known_profile_roots() -> list[tuple[str, Path]]: + home = Path.home() + return [ + ("Google Chrome", home / "Library/Application Support/Google/Chrome"), + ("Chrome Canary", home / "Library/Application Support/Google/Chrome Canary"), + ("Comet", home / "Library/Application Support/Comet"), + ("Arc", home / "Library/Application Support/Arc/User Data"), + ("Dia", home / "Library/Application Support/Dia/User Data"), + ("Microsoft Edge", home / "Library/Application Support/Microsoft Edge"), + ("Microsoft Edge Beta", home / "Library/Application Support/Microsoft Edge Beta"), + ("Microsoft Edge Dev", home / "Library/Application Support/Microsoft Edge Dev"), + ("Microsoft Edge Canary", home / "Library/Application Support/Microsoft Edge Canary"), + ("Brave", home / "Library/Application Support/BraveSoftware/Brave-Browser"), + ("Google Chrome", home / ".config/google-chrome"), + ("Chromium", home / ".config/chromium"), + ("Chromium", home / ".config/chromium-browser"), + ("Microsoft Edge", home / ".config/microsoft-edge"), + ("Microsoft Edge Beta", home / ".config/microsoft-edge-beta"), + ("Microsoft Edge Dev", home / ".config/microsoft-edge-dev"), + ("Chromium", home / ".var/app/org.chromium.Chromium/config/chromium"), + ("Google Chrome", home / ".var/app/com.google.Chrome/config/google-chrome"), + ("Brave", home / ".var/app/com.brave.Browser/config/BraveSoftware/Brave-Browser"), + ("Microsoft Edge", home / ".var/app/com.microsoft.Edge/config/microsoft-edge"), + ("Google Chrome", home / "AppData/Local/Google/Chrome/User Data"), + ("Chrome Canary", home / "AppData/Local/Google/Chrome SxS/User Data"), + ("Chromium", home / "AppData/Local/Chromium/User Data"), + ("Microsoft Edge", home / "AppData/Local/Microsoft/Edge/User Data"), + ("Microsoft Edge Beta", home / "AppData/Local/Microsoft/Edge Beta/User Data"), + ("Microsoft Edge Dev", home / "AppData/Local/Microsoft/Edge Dev/User Data"), + ("Microsoft Edge Canary", home / "AppData/Local/Microsoft/Edge SxS/User Data"), + ("Brave", home / "AppData/Local/BraveSoftware/Brave-Browser/User Data"), + ] + + +def detect_local_profiles() -> list[LocalBrowserProfile]: + profiles: list[LocalBrowserProfile] = [] + seen: set[tuple[Path, str]] = set() + for install in known_local_browser_installs(): + if not install.user_data_dir.exists(): + continue + names = load_profile_names_from_local_state(install.user_data_dir) + try: + entries = list(install.user_data_dir.iterdir()) + except OSError: + continue + for entry in entries: + if not entry.is_dir(): + continue + profile_dir = entry.name + if not is_valid_local_profile_dir(entry): + continue + key = (install.user_data_dir, profile_dir) + if key in seen: + continue + seen.add(key) + profile_name = names.get(profile_dir) or profile_dir + profiles.append(LocalBrowserProfile( + id=f"{browser_slug(install.browser_name)}:{profile_dir}", + browser_name=install.browser_name, + browser_path=install.browser_path, + user_data_dir=install.user_data_dir, + profile_dir=profile_dir, + profile_name=profile_name, + profile_path=entry, + display_name=f"{install.browser_name} - {profile_name}", + )) + profiles.sort(key=lambda p: (p.browser_name, profile_dir_sort_key(p.profile_dir), natural_key(p.profile_name))) + return profiles + + +def list_local_profiles_payload() -> dict: + default_profile_id = get_default_profile_id() + return { + "status": "ok", + "default_profile_id": default_profile_id, + "profiles": [p.payload() for p in detect_local_profiles()], + } + + +def resolve_local_profile(profile_ref: str | None = None) -> LocalBrowserProfile: + profile_ref = (profile_ref or get_default_profile_id() or "").strip() + if not profile_ref: + raise RuntimeError("no default local Chrome profile is set") + profiles = detect_local_profiles() + for profile in profiles: + if profile.id == profile_ref: + return profile + matches = [ + p for p in profiles + if p.profile_name == profile_ref or p.profile_dir == profile_ref or p.display_name == profile_ref + ] + if len(matches) == 1: + return matches[0] + if not matches: + raise RuntimeError(f"no local profile matched {profile_ref!r}; run list_local_profiles()") + raise RuntimeError(f"multiple local profiles matched {profile_ref!r}; pass the exact profile id") + + +def require_browser_binary(profile: LocalBrowserProfile) -> None: + if not browser_binary_usable(profile.browser_path): + raise RuntimeError(f"browser binary not found or not executable for {profile.id}: {profile.browser_path}") + + +def browser_binary_usable(path: Path) -> bool: + try: + if not path.exists(): + return False + return True if sys.platform == "win32" else os.access(path, os.X_OK) + except OSError: + return False + + +def load_profile_names_from_local_state(user_data_dir: Path) -> dict[str, str]: + try: + value = json.loads((user_data_dir / "Local State").read_text()) + except (FileNotFoundError, json.JSONDecodeError, OSError): + return {} + info_cache = value.get("profile", {}).get("info_cache", {}) + if not isinstance(info_cache, dict): + return {} + out = {} + for profile_dir, info in info_cache.items(): + if isinstance(info, dict): + name = str(info.get("name") or "").strip() + if name: + out[profile_dir] = name + return out + + +def remote_debugging_user_enabled(user_data_dir: Path) -> bool | None: + try: + value = json.loads((user_data_dir / "Local State").read_text()) + except (FileNotFoundError, json.JSONDecodeError, OSError): + return None + user_enabled = value.get("devtools", {}).get("remote_debugging", {}).get("user-enabled") + return user_enabled if isinstance(user_enabled, bool) else None + + +def is_valid_local_profile_dir(path: Path) -> bool: + return any((path / relative).exists() for relative in ("Preferences", "Cookies", "History", "Network/Cookies")) + + +def browser_slug(name: str) -> str: + out = [] + last_dash = False + for ch in name.lower(): + if ch.isascii() and ch.isalnum(): + out.append(ch) + last_dash = False + elif not last_dash: + out.append("-") + last_dash = True + return "".join(out).strip("-") + + +def profile_dir_sort_key(profile_dir: str) -> tuple[int, list[tuple[int, object]]]: + return (0, []) if profile_dir == "Default" else (1, natural_key(profile_dir)) + + +def natural_key(value: str) -> list[tuple[int, object]]: + out: list[tuple[int, object]] = [] + buf = "" + is_digit = False + for ch in value: + digit = ch.isdigit() + if buf and digit != is_digit: + out.append((0, int(buf)) if is_digit else (1, buf)) + buf = "" + buf += ch + is_digit = digit + if buf: + out.append((0, int(buf)) if is_digit else (1, buf)) + return out + + +def browser_process_running(browser_name: str, browser_path: Path | None = None) -> bool | None: + try: + if sys.platform == "win32": + out = subprocess.check_output(["tasklist", "/FO", "CSV"], text=True, timeout=5, stderr=subprocess.DEVNULL) + exe = browser_path.name.lower() if browser_path else "" + return bool(exe and exe in out.lower()) + out = subprocess.check_output(["ps", "-axo", "pid=,comm=,args="], text=True, timeout=5, stderr=subprocess.DEVNULL) + if browser_path: + path = str(browser_path) + if path and path in out: + return True + return browser_name.lower() in out.lower() + except Exception: + return None + + +def local_candidates() -> list[LocalCandidate]: + roots: list[tuple[str, Path | None, Path]] = [ + (install.browser_name, install.browser_path, install.user_data_dir) + for install in known_local_browser_installs() + ] + seen_roots = {(name, root) for name, _path, root in roots} + for name, root in known_profile_roots(): + if (name, root) not in seen_roots: + seen_roots.add((name, root)) + roots.append((name, None, root)) + return local_candidates_from_roots(roots, [9222, 9223]) + + +def local_candidates_from_roots( + roots: list[tuple[str, Path | None, Path]], + probe_ports: list[int], +) -> list[LocalCandidate]: + candidates: list[LocalCandidate] = [] + seen_ws: set[str] = set() + for browser_name, browser_path, user_data_dir in roots: + active = user_data_dir / "DevToolsActivePort" + try: + lines = active.read_text().splitlines() + except (FileNotFoundError, NotADirectoryError, OSError): + continue + port = lines[0].strip() if lines else "" + ws_path = lines[1].strip() if len(lines) > 1 else "" + if not port or not ws_path: + continue + ws_url = f"ws://127.0.0.1:{port}{ws_path}" + if ws_url in seen_ws: + continue + seen_ws.add(ws_url) + connectable = tcp_port_open("127.0.0.1", int(port) if port.isdigit() else 0) + running = browser_process_running(browser_name, browser_path) + enabled = remote_debugging_user_enabled(user_data_dir) + if connectable: + state, reason, next_step = "reachable", None, "connect local browser" + else: + state, reason, next_step = local_disconnected_candidate_details(running, enabled) + candidates.append(LocalCandidate( + id=f"local-{len(candidates) + 1}", + browser_name=browser_name, + browser_path=str(browser_path) if browser_path else None, + profile_path=str(user_data_dir), + http_url=f"http://127.0.0.1:{port}", + ws_url=ws_url, + source=str(active), + connectable=connectable, + state=state, + stale=not connectable, + browser_running=running, + remote_debugging_enabled=enabled, + reason=reason, + next_step=next_step, + )) + for port in probe_ports: + http_url = f"http://127.0.0.1:{port}" + try: + ws_url = resolve_ws_from_http(http_url, timeout=0.5) + except Exception: + continue + if ws_url in seen_ws: + continue + seen_ws.add(ws_url) + candidates.append(LocalCandidate( + id=f"local-{len(candidates) + 1}", + browser_name=f"CDP port {port}", + browser_path=None, + profile_path="", + http_url=http_url, + ws_url=ws_url, + source="port-probe", + connectable=True, + state="reachable", + stale=False, + browser_running=None, + remote_debugging_enabled=None, + reason=None, + next_step="connect local browser", + )) + return candidates + + +def local_debugging_disabled_statuses() -> list[dict]: + out = [] + for install in known_local_browser_installs(): + running = browser_process_running(install.browser_name, install.browser_path) + enabled = remote_debugging_user_enabled(install.user_data_dir) + if running is True and enabled is False: + out.append({ + "browser_name": install.browser_name, + "browser_path": str(install.browser_path), + "user_data_dir": str(install.user_data_dir), + "browser_running": running, + "remote_debugging_enabled": enabled, + }) + return out + + +def local_disconnected_candidate_details( + browser_running_value: bool | None, + remote_debugging_enabled_value: bool | None, +) -> tuple[str, str, str]: + if browser_running_value is True and remote_debugging_enabled_value is False: + return ( + "cdp-disabled", + "Chrome is open, but remote debugging is turned off for this browser instance.", + "local setup", + ) + if browser_running_value is True: + return ( + "stale-port", + "DevToolsActivePort exists, but the recorded CDP port is not reachable. Chrome appears open, but it is not exposing that debug endpoint.", + "open selected profile, then reconnect", + ) + return ( + "stale-port", + "DevToolsActivePort exists, but the recorded CDP port is not reachable. Chrome was likely closed or the debug server stopped.", + "open selected profile, then reconnect", + ) + + +def resolve_ws_from_http(http_url: str, timeout: float = 15.0) -> str: + url = f"{http_url.rstrip('/')}/json/version" + with urllib.request.urlopen(url, timeout=timeout) as resp: + data = json.loads(resp.read() or b"{}") + ws = data.get("webSocketDebuggerUrl") + if not ws: + raise RuntimeError(f"{url} missing webSocketDebuggerUrl") + return ws + + +def tcp_port_open(host: str, port: int, timeout: float = 0.2) -> bool: + if not port: + return False + try: + with socket.create_connection((host, port), timeout=timeout): + return True + except OSError: + return False + + +def profile_marker_target_url(marker: str) -> str: + return f"{MARKER_URL_PREFIX}{marker}" + + +def target_url_contains_marker(target: dict, marker: str) -> bool: + return is_profile_marker_target(target) and marker in str(target.get("url") or "") + + +def is_profile_marker_target(target: dict) -> bool: + return target.get("type") == "page" and MARKER_URL_PREFIX in str(target.get("url") or "") + + +def is_remote_debugging_setup_target(target: dict) -> bool: + return target.get("type") == "page" and str(target.get("url") or "").startswith("chrome://inspect/#remote-debugging") + + +def is_internal_browser_url(url: str) -> bool: + return str(url or "").startswith(INTERNAL_URL_PREFIXES) + + +def is_real_page_target(target: dict) -> bool: + if target.get("type") != "page": + return False + if is_profile_marker_target(target): + return False + url = str(target.get("url") or "") + return bool(url.strip()) and not is_internal_browser_url(url) + + +def is_reusable_placeholder_target(target: dict) -> bool: + if target.get("type") != "page": + return False + if is_profile_marker_target(target) or is_remote_debugging_setup_target(target): + return False + url = str(target.get("url") or "") + return url in ("", "about:blank") or url.startswith("about:blank#") + + +def open_local_profile( + profile_ref: str | None = None, + allow_marker: bool = True, + url: str | None = None, +) -> dict: + profile = resolve_local_profile(profile_ref) + require_browser_binary(profile) + profile_directory_arg = f"--profile-directory={profile.profile_dir}" + running = browser_process_running(profile.browser_name, profile.browser_path) + needs_marker = allow_marker and running is not False + marker = str(int(time.time() * 1000)) if needs_marker else None + target_url = profile_marker_target_url(marker) if marker else None + args = [str(profile.browser_path)] + if sys.platform == "darwin": + args.append(f"--user-data-dir={profile.user_data_dir}") + args.append(profile_directory_arg) + if target_url: + args.append(target_url) + elif url: + args.append(url) + elif allow_marker: + args.append("--no-startup-window") + subprocess.Popen(args, stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + return { + "status": "ok", + "opened": True, + "profile": profile.payload(), + "profile_targeting": "marker" if marker else ("profile-launch" if allow_marker else "profile-focus"), + "target_marker": marker, + "target_url": target_url or url, + "next_step": "Give Chrome a moment to start, then retry browser work.", + } diff --git a/src/browser_harness/manager_daemon.py b/src/browser_harness/manager_daemon.py index 066171c8..52ec7ad0 100644 --- a/src/browser_harness/manager_daemon.py +++ b/src/browser_harness/manager_daemon.py @@ -20,6 +20,13 @@ BU_API = "https://api.browser-use.com/api/v3" +MAC_BROWSER_PATHS = ( + "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", + "/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary", + "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge", + "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser", + "/Applications/Chromium.app/Contents/MacOS/Chromium", +) @dataclass @@ -142,6 +149,7 @@ def list(self, req: dict) -> dict: "owned_by_this_agent": lease.owner_agent_id == agent_id, "shared": len(lease.allowed_agents) > 1, "state": "busy" if lease.active_execution else "ready", + **({"live_url": lease.cloud_live_url} if lease.cloud_live_url else {}), }) return {"ok": True, "browsers": browsers} @@ -317,11 +325,12 @@ def start_managed_backend(lease: BrowserLease): "--no-default-browser-check", "--disable-background-networking", "--disable-dev-shm-usage", - "--disable-gpu", "about:blank", ] - if os.environ.get("BH_MANAGED_HEADLESS") == "1" or (not os.environ.get("DISPLAY") and not os.environ.get("WAYLAND_DISPLAY")): + headless = os.environ.get("BH_MANAGED_HEADLESS") == "1" or (not os.environ.get("DISPLAY") and not os.environ.get("WAYLAND_DISPLAY")) + if headless: args.insert(-1, "--headless=new") + args.insert(-1, "--disable-gpu") if os.environ.get("BH_CHROME_NO_SANDBOX") == "1": args.insert(-1, "--no-sandbox") proc = subprocess.Popen(args, stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, start_new_session=True) @@ -368,6 +377,19 @@ def cleanup_backend(lease: BrowserLease): os.kill(lease.local_process_id, 15) except Exception: pass + for _ in range(25): + try: + os.kill(lease.local_process_id, 0) + except OSError: + return + time.sleep(0.2) + try: + os.killpg(lease.local_process_id, 9) + except Exception: + try: + os.kill(lease.local_process_id, 9) + except Exception: + pass def _browser_use(path: str, method: str, body=None): @@ -391,14 +413,35 @@ def stop_cloud_browser(browser_id: str | None): pass +def _browser_binary_usable(path: str) -> bool: + try: + if not os.path.isfile(path) or not os.access(path, os.X_OK): + return False + return subprocess.run( + [path, "--version"], + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + timeout=5, + ).returncode == 0 + except Exception: + return False + + def find_browser_binary() -> str | None: for key in ("BH_CHROME_PATH", "CHROME_PATH"): value = os.environ.get(key) if value: return value + candidates = [] for name in ("google-chrome-stable", "google-chrome", "chromium", "chromium-browser"): path = shutil.which(name) if path: + candidates.append(path) + if sys.platform == "darwin": + candidates.extend(MAC_BROWSER_PATHS) + for path in candidates: + if _browser_binary_usable(path): return path return None @@ -428,7 +471,7 @@ def wait_devtools(port: int, timeout=20.0): def ready_public(lease: BrowserLease) -> dict: - return { + state = { "ok": True, "ready": True, "state": "ready", @@ -436,6 +479,9 @@ def ready_public(lease: BrowserLease) -> dict: "backend": lease.backend, "shared": len(lease.allowed_agents) > 1, } + if lease.cloud_live_url: + state["live_url"] = lease.cloud_live_url + return state def ready_response(lease: BrowserLease) -> dict: diff --git a/src/browser_harness/manager_helpers.py b/src/browser_harness/manager_helpers.py index 7ab676ab..8c434b2c 100644 --- a/src/browser_harness/manager_helpers.py +++ b/src/browser_harness/manager_helpers.py @@ -19,8 +19,8 @@ def browser_new(backend="managed", *, profile="clean", proxy_country=None, reaso reason=reason, ) binding = manager_client.binding_from_response(resp) - context.activate_binding(binding) manager_client.acquire_execution_for_binding(binding) + context.activate_binding(binding) return manager_client.public_state(resp) @@ -28,8 +28,8 @@ def browser_switch(browser_id): """Switch this agent/process to an existing allowed browser id.""" resp = manager_client.switch_browser(browser_id) binding = manager_client.binding_from_response(resp) - context.activate_binding(binding) manager_client.acquire_execution_for_binding(binding) + context.activate_binding(binding) return manager_client.public_state(resp) @@ -48,4 +48,3 @@ def browser_close(browser_id=None): if closing_active: context.clear_active_binding() return manager_client.public_state(resp) - diff --git a/src/browser_harness/run.py b/src/browser_harness/run.py index ee77f9ca..881c5bbd 100644 --- a/src/browser_harness/run.py +++ b/src/browser_harness/run.py @@ -14,6 +14,7 @@ ensure_daemon, list_cloud_profiles, list_local_profiles, + open_local_profile, print_update_banner, restart_daemon, run_doctor, @@ -22,6 +23,7 @@ start_remote_daemon, stop_remote_daemon, sync_local_profile, + use_local_profile, ) from . import auth, context, manager_client from .helpers import * @@ -38,6 +40,7 @@ PY Helpers are pre-imported. The daemon auto-starts and connects to the running browser. +For local Chrome, first choose a stable profile id with list_local_profiles() and use_local_profile(id). Commands: browser-harness --version print the installed version @@ -66,6 +69,30 @@ "browser_close", ) +_NO_DAEMON_HELPER_NAMES = { + "list_local_profiles", + "use_local_profile", + "open_local_profile", + "list_cloud_profiles", + "sync_local_profile", + "start_remote_daemon", + "stop_remote_daemon", + "restart_daemon", +} + +_NO_DAEMON_WRAPPER_NAMES = { + "print", + "repr", + "str", + "bool", + "len", + "sorted", + "list", + "dict", + "tuple", + "set", +} + def _uses_manager_helpers(code: str) -> bool: try: @@ -81,6 +108,33 @@ def _uses_manager_helpers(code: str) -> bool: return False +def _can_run_without_daemon(code: str) -> bool: + try: + tree = ast.parse(code) + except SyntaxError: + return False + saw_no_daemon_helper = False + for node in ast.walk(tree): + if not isinstance(node, ast.Call): + continue + func = node.func + if isinstance(func, ast.Name): + if func.id in _NO_DAEMON_HELPER_NAMES: + saw_no_daemon_helper = True + continue + if func.id in _NO_DAEMON_WRAPPER_NAMES: + continue + return False + if isinstance(func, ast.Attribute): + # Allow simple formatting around passive helper output, e.g. + # json.dumps(list_local_profiles()). + if func.attr in {"dumps", "loads"}: + continue + return False + return False + return saw_no_daemon_helper + + # Probe /json/version (not a bare TCP connect) so a non-Chrome process bound to # 9222/9223 doesn't masquerade as Chrome and skip the cloud bootstrap. Mirrors # daemon.py's fallback probe. @@ -163,7 +217,8 @@ def main(): and os.environ.get("BU_AUTOSPAWN") ): start_remote_daemon(NAME) - ensure_daemon() + if not _can_run_without_daemon(code): + ensure_daemon() exec(code, globals()) diff --git a/tests/unit/test_admin.py b/tests/unit/test_admin.py index d5353e2c..0541caef 100644 --- a/tests/unit/test_admin.py +++ b/tests/unit/test_admin.py @@ -30,16 +30,42 @@ def test_local_chrome_mode_is_false_when_process_env_provides_remote_cdp(monkeyp assert not admin._is_local_chrome_mode() -def test_handshake_timeout_needs_chrome_remote_debugging_prompt(): +def test_local_chrome_mode_is_false_when_env_provides_explicit_cdp_url(): + assert not admin._is_local_chrome_mode({"BU_CDP_URL": "http://127.0.0.1:9333"}) + + +def test_list_local_profiles_uses_native_detector(monkeypatch): + monkeypatch.setattr( + admin.local_profiles, + "list_local_profiles_payload", + lambda: {"status": "ok", "profiles": [{"id": "google-chrome:Default"}]}, + ) + + assert admin.list_local_profiles() == { + "status": "ok", + "profiles": [{"id": "google-chrome:Default"}], + } + + +def test_handshake_timeout_is_chrome_permission_popup(): msg = "CDP WS handshake failed: timed out during opening handshake" - assert admin._needs_chrome_remote_debugging_prompt(msg) + assert not admin._needs_chrome_remote_debugging_prompt(msg) + assert admin._needs_chrome_permission_popup(msg) -def test_handshake_403_needs_chrome_remote_debugging_prompt(): +def test_handshake_403_is_chrome_permission_popup(): msg = "CDP WS handshake failed: server rejected WebSocket connection: HTTP 403" + assert not admin._needs_chrome_remote_debugging_prompt(msg) + assert admin._needs_chrome_permission_popup(msg) + + +def test_cdp_disabled_needs_chrome_remote_debugging_prompt_not_permission_popup(): + msg = "cdp-disabled: Chrome remote debugging is turned off for the selected profile" + assert admin._needs_chrome_remote_debugging_prompt(msg) + assert not admin._needs_chrome_permission_popup(msg) def test_stale_websocket_does_not_open_chrome_inspect(): diff --git a/tests/unit/test_daemon.py b/tests/unit/test_daemon.py index 90c5bc85..13416280 100644 --- a/tests/unit/test_daemon.py +++ b/tests/unit/test_daemon.py @@ -1,5 +1,7 @@ import asyncio +import pytest + from browser_harness import daemon @@ -274,6 +276,8 @@ async def send_raw(self, method, params=None, session_id=None): "targetId": "page-target-abc", "url": "https://example.com/", "title": "Example Domain", + "browserContextId": None, + "local_profile_id": None, } # The targetId must be passed through — that's the whole point of the fix. get_info_calls = [(p, s) for (m, p, s) in d.cdp.calls if m == "Target.getTargetInfo"] @@ -293,3 +297,224 @@ def test_current_tab_meta_returns_not_attached_when_no_target_id(): assert result == {"error": "not_attached"} # No CDP call should have been issued. assert d.cdp.calls == [] + + +def test_prepare_selected_local_profile_blocks_without_default(monkeypatch): + monkeypatch.delenv("BU_CDP_WS", raising=False) + monkeypatch.delenv("BU_CDP_URL", raising=False) + monkeypatch.setattr(daemon, "REMOTE_ID", None) + monkeypatch.setattr(daemon.local_profiles, "get_default_profile_id", lambda: None) + monkeypatch.setattr( + daemon.local_profiles, + "list_local_profiles_payload", + lambda: {"status": "ok", "profiles": [{"id": "google-chrome:Default"}]}, + ) + d = daemon.Daemon() + + with pytest.raises(RuntimeError, match="needs-profile"): + d._prepare_selected_local_profile() + + +def test_prepare_selected_local_profile_blocks_checkbox_off_without_opening_marker(tmp_path, monkeypatch): + profile = daemon.local_profiles.LocalBrowserProfile( + id="google-chrome:Default", + browser_name="Google Chrome", + browser_path=tmp_path / "chrome", + user_data_dir=tmp_path / "User Data", + profile_dir="Default", + profile_name="Default", + profile_path=tmp_path / "User Data" / "Default", + display_name="Google Chrome - Default", + ) + monkeypatch.delenv("BU_CDP_WS", raising=False) + monkeypatch.delenv("BU_CDP_URL", raising=False) + monkeypatch.setattr(daemon, "REMOTE_ID", None) + monkeypatch.setattr(daemon.local_profiles, "get_default_profile_id", lambda: profile.id) + monkeypatch.setattr(daemon.local_profiles, "resolve_local_profile", lambda _profile_id: profile) + monkeypatch.setattr(daemon.local_profiles, "remote_debugging_user_enabled", lambda _path: False) + monkeypatch.setattr( + daemon.local_profiles, + "open_local_profile", + lambda *args, **kwargs: (_ for _ in ()).throw(AssertionError("must not open marker")), + ) + d = daemon.Daemon() + + with pytest.raises(RuntimeError, match="cdp-disabled"): + d._prepare_selected_local_profile() + + +def test_target_create_is_scoped_to_selected_browser_context(): + d = _fresh_daemon() + d.preferred_browser_context_id = "ctx-selected" + + result = asyncio.run(d.handle({ + "method": "Target.createTarget", + "params": {"url": "about:blank"}, + })) + + assert result == {"result": {}} + assert d.cdp.calls == [ + ("Target.createTarget", {"url": "about:blank", "browserContextId": "ctx-selected"}, None) + ] + + +def test_target_create_rejects_different_browser_context(): + d = _fresh_daemon() + d.preferred_browser_context_id = "ctx-selected" + + result = asyncio.run(d.handle({ + "method": "Target.createTarget", + "params": {"url": "about:blank", "browserContextId": "ctx-other"}, + })) + + assert result == {"error": "wrong-profile: refusing to create a target in a different Chrome profile context"} + assert d.cdp.calls == [] + + +def test_set_session_rejects_target_from_different_browser_context(): + class _TargetsCDP(_FakeCDP): + async def send_raw(self, method, params=None, session_id=None): + self.calls.append((method, params, session_id)) + if method == "Target.getTargets": + return {"targetInfos": [ + {"targetId": "target-other", "type": "page", "browserContextId": "ctx-other"}, + ]} + return {} + + d = daemon.Daemon() + d.cdp = _TargetsCDP() + d.preferred_browser_context_id = "ctx-selected" + + result = asyncio.run(d.handle({ + "meta": "set_session", + "session_id": "session-other", + "target_id": "target-other", + })) + + assert result == {"error": "wrong-profile: refusing to switch to a target from a different Chrome profile context"} + assert d.session is None + assert d.target_id is None + + +def test_marker_attach_captures_profile_and_browser_context(): + class _MarkerCDP(_FakeCDP): + async def send_raw(self, method, params=None, session_id=None): + self.calls.append((method, params, session_id)) + if method == "Target.getTargets": + return {"targetInfos": [ + { + "targetId": "marker-target", + "type": "page", + "url": "https://browser-use.com/browser-use-profile-target/123", + "browserContextId": "ctx-selected", + }, + { + "targetId": "duplicate-marker", + "type": "page", + "url": "https://browser-use.com/browser-use-profile-target/123", + "browserContextId": "ctx-selected", + }, + { + "targetId": "work-target", + "type": "page", + "url": "https://example.com/", + "browserContextId": "ctx-selected", + }, + ]} + if method == "Target.attachToTarget": + return {"sessionId": f"session-{params['targetId']}"} + return {} + + d = daemon.Daemon() + d.cdp = _MarkerCDP() + d.preferred_target_marker = "123" + d.preferred_profile_id = "google-chrome:Default" + + page = asyncio.run(d.attach_first_page()) + + assert page["targetId"] == "work-target" + assert d.session == "session-work-target" + assert d.target_id == "work-target" + assert d.active_local_profile_id == "google-chrome:Default" + assert d.preferred_browser_context_id == "ctx-selected" + assert ("Target.closeTarget", {"targetId": "marker-target"}, None) in d.cdp.calls + assert ("Target.closeTarget", {"targetId": "duplicate-marker"}, None) in d.cdp.calls + + +def test_marker_attach_creates_blank_tab_in_selected_context_when_only_marker_exists(): + class _MarkerOnlyCDP(_FakeCDP): + async def send_raw(self, method, params=None, session_id=None): + self.calls.append((method, params, session_id)) + if method == "Target.getTargets": + return {"targetInfos": [ + { + "targetId": "marker-target", + "type": "page", + "url": "https://browser-use.com/browser-use-profile-target/123", + "browserContextId": "ctx-selected", + }, + ]} + if method == "Target.createTarget": + return {"targetId": "created-target"} + if method == "Target.attachToTarget": + return {"sessionId": "session-created"} + return {} + + d = daemon.Daemon() + d.cdp = _MarkerOnlyCDP() + d.preferred_target_marker = "123" + d.preferred_profile_id = "google-chrome:Default" + + page = asyncio.run(d.attach_first_page()) + + assert page["targetId"] == "created-target" + assert d.session == "session-created" + assert d.target_id == "created-target" + assert d.active_local_profile_id == "google-chrome:Default" + assert d.preferred_browser_context_id == "ctx-selected" + assert d.owned_target_ids == {"created-target"} + assert ("Target.createTarget", {"url": "about:blank", "browserContextId": "ctx-selected"}, None) in d.cdp.calls + assert ("Target.closeTarget", {"targetId": "marker-target"}, None) in d.cdp.calls + + +def test_target_create_tracks_owned_target_and_close_owned_targets_closes_it(): + class _CreateAndCloseCDP(_FakeCDP): + async def send_raw(self, method, params=None, session_id=None): + self.calls.append((method, params, session_id)) + if method == "Target.createTarget": + return {"targetId": "created-by-helper"} + return {} + + d = daemon.Daemon() + d.cdp = _CreateAndCloseCDP() + + result = asyncio.run(d.handle({ + "method": "Target.createTarget", + "params": {"url": "https://example.com/"}, + })) + + assert result == {"result": {"targetId": "created-by-helper"}} + assert d.owned_target_ids == {"created-by-helper"} + + asyncio.run(d.close_owned_targets()) + + assert d.owned_target_ids == set() + assert ("Target.closeTarget", {"targetId": "created-by-helper"}, None) in d.cdp.calls + + +def test_reattach_same_target_reports_target_gone_instead_of_switching(): + class _GoneCDP(_FakeCDP): + async def send_raw(self, method, params=None, session_id=None): + self.calls.append((method, params, session_id)) + if method == "Target.getTargets": + return {"targetInfos": []} + raise RuntimeError("Session with given id not found") + + d = daemon.Daemon() + d.cdp = _GoneCDP() + d.session = "session-old" + d.target_id = "target-old" + + result = asyncio.run(d.handle({"method": "Runtime.evaluate", "params": {"expression": "1"}})) + + assert result == {"error": "target-gone: Previous browser tab target is gone."} diff --git a/tests/unit/test_helpers.py b/tests/unit/test_helpers.py index 4a45ee07..8b43f371 100644 --- a/tests/unit/test_helpers.py +++ b/tests/unit/test_helpers.py @@ -219,6 +219,79 @@ def fake_js(expr, **kwargs): assert any("querySelector" in e and "offsetParent" not in e for e in js_exprs) +# --- tabs / profile contexts --- + +def test_list_tabs_filters_to_current_browser_context(): + def fake_send(req): + if req.get("meta") == "current_tab": + return { + "targetId": "selected-target", + "url": "https://selected.example", + "title": "Selected", + "browserContextId": "ctx-selected", + } + return {} + + def fake_cdp(method, **kwargs): + assert method == "Target.getTargets" + return {"targetInfos": [ + { + "targetId": "selected-target", + "type": "page", + "title": "Selected", + "url": "https://selected.example", + "browserContextId": "ctx-selected", + }, + { + "targetId": "other-target", + "type": "page", + "title": "Other", + "url": "https://other.example", + "browserContextId": "ctx-other", + }, + ]} + + with patch("browser_harness.helpers._send", side_effect=fake_send), \ + patch("browser_harness.helpers.cdp", side_effect=fake_cdp): + tabs = helpers.list_tabs() + all_tabs = helpers.list_tabs(include_other_contexts=True) + + assert [tab["targetId"] for tab in tabs] == ["selected-target"] + assert {tab["targetId"] for tab in all_tabs} == {"selected-target", "other-target"} + + +def test_new_tab_preserves_current_browser_context(): + calls = [] + + def fake_send(req): + if req.get("meta") == "current_tab": + return { + "targetId": "current-target", + "url": "about:blank", + "title": "", + "browserContextId": "ctx-selected", + } + if req.get("meta") == "set_session": + return {"session_id": req["session_id"]} + return {} + + def fake_cdp(method, **kwargs): + calls.append((method, kwargs)) + if method == "Target.createTarget": + return {"targetId": "new-target"} + if method == "Target.attachToTarget": + return {"sessionId": "session-new"} + return {} + + with patch("browser_harness.helpers._send", side_effect=fake_send), \ + patch("browser_harness.helpers.cdp", side_effect=fake_cdp): + target_id = helpers.new_tab() + + assert target_id == "new-target" + create_call = next(kwargs for method, kwargs in calls if method == "Target.createTarget") + assert create_call["browserContextId"] == "ctx-selected" + + # --- wait_for_network_idle --- def test_wait_for_network_idle_returns_true_when_no_events(): diff --git a/tests/unit/test_local_profiles.py b/tests/unit/test_local_profiles.py new file mode 100644 index 00000000..21aa07ff --- /dev/null +++ b/tests/unit/test_local_profiles.py @@ -0,0 +1,78 @@ +import stat + +import pytest + +from browser_harness import local_profiles + + +def _install(tmp_path, name="Google Chrome"): + tmp_path.mkdir(parents=True, exist_ok=True) + browser = tmp_path / "chrome" + browser.write_text("#!/bin/sh\n") + browser.chmod(browser.stat().st_mode | stat.S_IXUSR) + user_data = tmp_path / "User Data" + user_data.mkdir() + (user_data / "Local State").write_text( + '{"profile":{"info_cache":{"Default":{"name":"Greg"},"Profile 1":{"name":"Work"}}}}' + ) + for profile_dir in ("Default", "Profile 1"): + profile = user_data / profile_dir + profile.mkdir() + (profile / "Preferences").write_text("{}") + return local_profiles.LocalBrowserInstall(name, browser, user_data) + + +def test_local_profile_detection_reads_local_state_names_and_stable_ids(tmp_path, monkeypatch): + install = _install(tmp_path) + monkeypatch.setattr(local_profiles, "known_local_browser_installs", lambda: [install]) + + profiles = local_profiles.detect_local_profiles() + + assert [p.id for p in profiles] == ["google-chrome:Default", "google-chrome:Profile 1"] + assert profiles[0].profile_name == "Greg" + assert profiles[1].display_name == "Google Chrome - Work" + + +def test_local_profile_resolution_requires_exact_id_when_names_collide(tmp_path, monkeypatch): + chrome = _install(tmp_path / "chrome", "Google Chrome") + brave = _install(tmp_path / "brave", "Brave") + monkeypatch.setattr(local_profiles, "known_local_browser_installs", lambda: [chrome, brave]) + + with pytest.raises(RuntimeError, match="multiple local profiles matched"): + local_profiles.resolve_local_profile("Work") + + assert local_profiles.resolve_local_profile("brave:Profile 1").browser_name == "Brave" + + +def test_default_profile_file_roundtrip(tmp_path, monkeypatch): + install = _install(tmp_path) + monkeypatch.setenv("BH_CONFIG_DIR", str(tmp_path / "config")) + monkeypatch.delenv("BH_LOCAL_PROFILE", raising=False) + monkeypatch.delenv("BH_SELECTED_LOCAL_PROFILE", raising=False) + monkeypatch.setattr(local_profiles, "known_local_browser_installs", lambda: [install]) + + result = local_profiles.set_default_profile_id("google-chrome:Default") + + assert result["default_local_profile_id"] == "google-chrome:Default" + assert local_profiles.get_default_profile_id() == "google-chrome:Default" + + +def test_default_profile_rejects_missing_browser_binary(tmp_path, monkeypatch): + install = _install(tmp_path) + install.browser_path.unlink() + monkeypatch.setenv("BH_CONFIG_DIR", str(tmp_path / "config")) + monkeypatch.setattr(local_profiles, "known_local_browser_installs", lambda: [install]) + + with pytest.raises(RuntimeError, match="browser binary not found or not executable"): + local_profiles.set_default_profile_id("google-chrome:Default") + + +def test_env_selected_profile_overrides_default_file(tmp_path, monkeypatch): + install = _install(tmp_path) + monkeypatch.setenv("BH_CONFIG_DIR", str(tmp_path / "config")) + monkeypatch.setattr(local_profiles, "known_local_browser_installs", lambda: [install]) + local_profiles.set_default_profile_id("google-chrome:Default") + + monkeypatch.setenv("BH_SELECTED_LOCAL_PROFILE", "google-chrome:Profile 1") + + assert local_profiles.get_default_profile_id() == "google-chrome:Profile 1" diff --git a/tests/unit/test_manager_daemon.py b/tests/unit/test_manager_daemon.py index 52f7cabd..e2504f44 100644 --- a/tests/unit/test_manager_daemon.py +++ b/tests/unit/test_manager_daemon.py @@ -1,6 +1,6 @@ -from browser_harness.manager_daemon import Manager from browser_harness import manager_daemon from browser_harness import auth +from browser_harness.manager_daemon import Manager class _FakeResponse: @@ -91,6 +91,43 @@ def test_close_rejects_other_runs(tmp_path): assert lease.browser_id in manager.leases +def test_cloud_live_url_is_exposed_in_ready_state(tmp_path): + manager, lease = _manager_with_lease(tmp_path) + lease.cloud_live_url = "https://live.example/session" + + resp = manager.handle({ + "op": "status", + "run_id": "run-1", + "agent_id": "agent-1", + }) + + assert resp["ok"] is True + assert resp["live_url"] == "https://live.example/session" + + +def test_cloud_live_url_is_exposed_in_browser_list(tmp_path): + manager, lease = _manager_with_lease(tmp_path) + lease.cloud_live_url = "https://live.example/session" + + resp = manager.handle({ + "op": "list", + "run_id": "run-1", + "agent_id": "agent-1", + }) + + assert resp["ok"] is True + assert resp["browsers"] == [ + { + "browser_id": lease.browser_id, + "backend": "cloud", + "owned_by_this_agent": True, + "shared": False, + "state": "ready", + "live_url": "https://live.example/session", + } + ] + + def test_cloud_new_reports_auth_required(monkeypatch, tmp_path): manager = Manager(tmp_path) monkeypatch.setattr( @@ -124,3 +161,18 @@ def test_browser_use_api_uses_auth_resolution(monkeypatch): assert captured assert captured[0].get_header("X-browser-use-api-key") == "stored-key" + + +def test_find_browser_binary_skips_unusable_path_candidate_and_uses_mac_app(monkeypatch): + monkeypatch.delenv("BH_CHROME_PATH", raising=False) + monkeypatch.delenv("CHROME_PATH", raising=False) + monkeypatch.setattr(manager_daemon.sys, "platform", "darwin") + monkeypatch.setattr(manager_daemon.shutil, "which", lambda name: "/broken/chromium" if name == "chromium" else None) + monkeypatch.setattr(manager_daemon, "MAC_BROWSER_PATHS", ("/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",)) + monkeypatch.setattr( + manager_daemon, + "_browser_binary_usable", + lambda path: path == "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", + ) + + assert manager_daemon.find_browser_binary() == "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" diff --git a/tests/unit/test_manager_helpers.py b/tests/unit/test_manager_helpers.py index 5bef46b7..073ef564 100644 --- a/tests/unit/test_manager_helpers.py +++ b/tests/unit/test_manager_helpers.py @@ -1,3 +1,5 @@ +import pytest + from browser_harness import context, manager_helpers @@ -48,6 +50,41 @@ def test_browser_new_activates_binding_and_acquires_lock(monkeypatch, tmp_path): assert acquired == ["br_123"] +def test_browser_switch_does_not_activate_binding_when_lock_fails(monkeypatch, tmp_path): + old = context.get_active_binding() + previous = context.BrowserBinding( + browser_id="br_old", + bu_name="bh_old", + runtime_dir=tmp_path / "old-r", + tmp_dir=tmp_path / "old-t", + manager_mode=True, + ) + context.activate_binding(previous) + try: + monkeypatch.setattr(manager_helpers.manager_client, "switch_browser", lambda browser_id: _manager_response(tmp_path)) + monkeypatch.setattr( + manager_helpers.manager_client, + "acquire_execution_for_binding", + lambda binding: (_ for _ in ()).throw( + manager_helpers.manager_client.ManagerError({ + "state": "busy", + "reason": "browser is currently active in another browser-harness process", + }) + ), + ) + + with pytest.raises(manager_helpers.manager_client.ManagerError, match="currently active"): + manager_helpers.browser_switch("br_123") + active = context.get_active_binding() + finally: + if old is not None: + context.activate_binding(old) + else: + context.clear_active_binding() + + assert active == previous + + def test_browser_close_releases_lock_and_clears_active_binding(monkeypatch, tmp_path): released = [] closed = [] From e573f5de68aeb9472963e192aa9d23352388cfba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gregor=20=C5=BDuni=C4=8D?= <36313686+gregpr07@users.noreply.github.com> Date: Thu, 18 Jun 2026 06:33:58 +0000 Subject: [PATCH 04/15] Add safe manual Browser Use key import --- SKILL.md | 8 +++++ TEMP_BROWSER_MANAGER_CONTEXT_DO_NOT_MERGE.md | 2 +- install.md | 8 +++++ src/browser_harness/auth.py | 34 ++++++++++++++++++-- tests/unit/test_auth.py | 30 +++++++++++++++++ 5 files changed, 79 insertions(+), 3 deletions(-) diff --git a/SKILL.md b/SKILL.md index a2eb7bcd..97b2f5b6 100644 --- a/SKILL.md +++ b/SKILL.md @@ -74,6 +74,14 @@ It prints/opens a Browser Use login URL and waits while the user logs in online. browser-harness auth login --device-code ``` +If the user directly provides an API key, store it through stdin instead of a command argument: + +```bash +browser-harness auth login --api-key-stdin +``` + +Never put API keys in command-line arguments. + Then retry `browser_new(backend="cloud")`. ### Legacy remote browsers diff --git a/TEMP_BROWSER_MANAGER_CONTEXT_DO_NOT_MERGE.md b/TEMP_BROWSER_MANAGER_CONTEXT_DO_NOT_MERGE.md index 166e8b93..ba7f3318 100644 --- a/TEMP_BROWSER_MANAGER_CONTEXT_DO_NOT_MERGE.md +++ b/TEMP_BROWSER_MANAGER_CONTEXT_DO_NOT_MERGE.md @@ -28,7 +28,7 @@ browser_close(browser_id=None) After `browser_new(...)` or `browser_switch(...)`, normal browser-harness helpers such as `new_tab`, `page_info`, `capture_screenshot`, `click_at_xy`, `js`, and `cdp` work unchanged. -For cloud browsers, missing auth should produce `cloud-auth-required`; the model should run `browser-harness auth login` and retry. The user logs in online and the API key is stored locally without being printed into chat. +For cloud browsers, missing auth should produce `cloud-auth-required`; the model should run `browser-harness auth login` and retry. The user logs in online and the API key is stored locally without being printed into chat. If a user directly provides an API key, the safe storage path is `browser-harness auth login --api-key-stdin`, never a command-line argument. The model does not need to know about sockets, daemon names, runtime dirs, CDP URLs, Browser Use browser IDs, or process cleanup. Those are manager internals. diff --git a/install.md b/install.md index 7718e143..abd92ea1 100644 --- a/install.md +++ b/install.md @@ -79,6 +79,14 @@ Headless/SSH fallback: browser-harness auth login --device-code ``` +If you already have a Browser Use API key, store it safely through stdin: + +```bash +browser-harness auth login --api-key-stdin +``` + +Do not pass API keys as command-line arguments; they can leak through shell history and process listings. + Other auth commands: ```bash diff --git a/src/browser_harness/auth.py b/src/browser_harness/auth.py index ade427e3..4014a49f 100644 --- a/src/browser_harness/auth.py +++ b/src/browser_harness/auth.py @@ -9,6 +9,7 @@ from http.server import BaseHTTPRequestHandler, HTTPServer import argparse import base64 +import getpass import hashlib import json import os @@ -377,6 +378,17 @@ def device_login(*, open_url=True, json_output=False) -> AuthRecord: return record +def api_key_stdin_login(*, json_output=False, input_stream=None) -> AuthRecord: + key = _read_manual_api_key(input_stream) + record = AuthRecord(api_key=key, source="manual") + save_auth_record(record) + if json_output: + print(json.dumps(_stored_output(record)), flush=True) + else: + print("Browser Use Cloud API key stored.") + return record + + def _exchange_authorization_code(code: str, redirect_uri: str, verifier: str) -> dict: return _post_json(f"{auth_base()}/cloud/cli-auth/token", { "grant_type": "authorization_code", @@ -439,6 +451,20 @@ def _post_json(url: str, payload: dict) -> dict: raise AuthError(f"{err}{detail}") from e +def _read_manual_api_key(input_stream=None) -> str: + stream = input_stream or sys.stdin + if hasattr(stream, "isatty") and stream.isatty(): + key = getpass.getpass("Browser Use API key: ") + else: + key = stream.read() + key = (key or "").strip() + if not key: + raise AuthError("no API key provided on stdin") + if len(key) < 20: + raise AuthError("API key looks too short") + return key + + def _write_private_json(path: Path, data: dict) -> None: raw = (json.dumps(data, indent=2) + "\n").encode() flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC @@ -493,7 +519,9 @@ def run_auth_cli(argv: list[str]) -> int: parser = argparse.ArgumentParser(prog="browser-harness auth") sub = parser.add_subparsers(dest="command", required=True) login = sub.add_parser("login") - login.add_argument("--device-code", action="store_true") + login_mode = login.add_mutually_exclusive_group() + login_mode.add_argument("--device-code", action="store_true") + login_mode.add_argument("--api-key-stdin", action="store_true") login.add_argument("--json", action="store_true") login.add_argument("--no-open", action="store_true") sub.add_parser("status") @@ -502,7 +530,9 @@ def run_auth_cli(argv: list[str]) -> int: try: if args.command == "login": - if args.device_code: + if args.api_key_stdin: + api_key_stdin_login(json_output=args.json) + elif args.device_code: device_login(open_url=not args.no_open, json_output=args.json) else: browser_login(open_url=not args.no_open, json_output=args.json) diff --git a/tests/unit/test_auth.py b/tests/unit/test_auth.py index 502f8107..c47dd1ea 100644 --- a/tests/unit/test_auth.py +++ b/tests/unit/test_auth.py @@ -2,6 +2,7 @@ import stat import threading import urllib.request +from io import StringIO from browser_harness import auth @@ -49,6 +50,35 @@ def test_missing_key_raises_cloud_auth_required(monkeypatch, tmp_path): raise AssertionError("expected CloudAuthRequired") +def test_api_key_stdin_login_stores_manual_key_without_printing(monkeypatch, tmp_path, capsys): + monkeypatch.delenv("BROWSER_USE_API_KEY", raising=False) + monkeypatch.setenv("BH_AUTH_PATH", str(tmp_path / "auth.json")) + manual_key = "manual-key-1234567890abcdef" + + record = auth.api_key_stdin_login(input_stream=StringIO(manual_key + "\n")) + out = capsys.readouterr().out + + assert record.source == "manual" + assert auth.get_browser_use_api_key() == manual_key + assert manual_key not in out + assert "stored" in out.lower() + assert json.loads((tmp_path / "auth.json").read_text())["browser_use"]["source"] == "manual" + + +def test_api_key_stdin_login_rejects_missing_or_short_key(monkeypatch, tmp_path): + monkeypatch.setenv("BH_AUTH_PATH", str(tmp_path / "auth.json")) + + for raw in ["", "too-short"]: + try: + auth.api_key_stdin_login(input_stream=StringIO(raw)) + except auth.AuthError as e: + assert "API key" in str(e) or "api key" in str(e) + else: + raise AssertionError("expected AuthError") + + assert not (tmp_path / "auth.json").exists() + + def test_browser_login_callback_exchanges_and_stores_key(monkeypatch, tmp_path): monkeypatch.setenv("BH_AUTH_PATH", str(tmp_path / "auth.json")) calls = [] From 2aaca8ba27c79c1ebf013500d1f5e6c7f044b2b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gregor=20=C5=BDuni=C4=8D?= <36313686+gregpr07@users.noreply.github.com> Date: Thu, 18 Jun 2026 09:02:43 -0700 Subject: [PATCH 05/15] Simplify LLM browser interface --- README.md | 2 + SKILL.md | 151 +++++++------------------ install.md | 10 +- src/browser_harness/daemon.py | 7 +- src/browser_harness/local_profiles.py | 27 ++++- src/browser_harness/manager_helpers.py | 24 +++- src/browser_harness/run.py | 6 +- tests/unit/test_local_profiles.py | 23 ++++ tests/unit/test_manager_helpers.py | 68 +++++++++++ tests/unit/test_run_manager_mode.py | 32 ++++++ 10 files changed, 224 insertions(+), 126 deletions(-) diff --git a/README.md b/README.md index ab7f6a5d..6ae11056 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,8 @@ Stealth, sub-agents, or headless deployment.
- `agent-workspace/agent_helpers.py` — helper code the agent edits - `agent-workspace/domain-skills/` — reusable site-specific skills the agent edits +Plain `browser-harness` helper calls use the selected local browser profile. For isolated or cloud work, start with `browser_new("private")` or `browser_new("cloud")`; cloud responses include a `live_url` preview when available. + ## Contributing PRs and improvements welcome. The best way to help: **contribute a new domain skill** under [agent-workspace/domain-skills/](agent-workspace/domain-skills/) for a site or task you use often (LinkedIn outreach, ordering on Amazon, filing expenses, etc.). Each skill teaches the agent the selectors, flows, and edge cases it would otherwise have to rediscover. diff --git a/SKILL.md b/SKILL.md index 97b2f5b6..17dc98ac 100644 --- a/SKILL.md +++ b/SKILL.md @@ -1,17 +1,11 @@ --- name: browser -description: Direct browser control via CDP. Use when the user wants to automate, scrape, test, or interact with web pages. Connects to Chrome, Chromium, or a manager-created cloud browser. +description: Control a browser with Python helpers. Use for web automation, scraping, testing, or interacting with pages. --- # browser-harness -Direct browser control via CDP. For task-specific edits, use `agent-workspace/agent_helpers.py`. For setup, install, or connection problems, read install.md. - -Domain skills (community-contributed per-site playbooks under `agent-workspace/domain-skills/`) are off by default. Set `BH_DOMAIN_SKILLS=1` to enable them; see the bottom section. - -**If `BH_DOMAIN_SKILLS=1` and the task is site-specific, read every file in the matching `agent-workspace/domain-skills//` directory before inventing an approach.** - -## Usage +There is one active browser. Normal page helpers act on it: ```bash browser-harness <<'PY' @@ -21,60 +15,41 @@ print(page_info()) PY ``` -- Invoke as browser-harness — it's on $PATH. No cd, no uv run. -- Use the heredoc form for every multi-line command. It prevents shell quote mangling inside Python strings and JavaScript snippets. -- First navigation is new_tab(url), not goto_url(url) — goto runs in the user's active tab and clobbers their work. -- Local Chrome requires an explicit selected profile. Use `list_local_profiles()` to see stable ids like `google-chrome:Default`, then `use_local_profile(profile_id)`. -- Plain helper calls use the selected local browser. If you need an isolated or remote browser, call `browser_new(...)` before `new_tab(...)`. - -## Tool call shape - -```bash -browser-harness <<'PY' -# any python. helpers pre-imported. daemon auto-starts. -PY -``` - -Legacy mode calls ensure_daemon() before exec. Manager mode starts when the script uses a `browser_*` lifecycle helper or `BH_MANAGER_MODE=1`. +Use `browser_*` helpers only to choose, set up, or close the active browser. -Local Chrome sessions snapshot the selected profile when the daemon starts. If a profile is changed later, existing named daemons keep their current profile until restarted. +## Choose Browser -### Managed browsers +- User's logged-in local Chrome: use normal helpers. If setup asks for a profile, run `browser_profiles()`, ask the user which `id` to use, then run `browser_use_profile(id)` and retry. +- Isolated local browser: `browser_new("private")`. +- Browser Use cloud browser with live view: `browser_new("cloud")`. +- Subagent: use `browser_new("private")` unless the parent gave you a `browser_id`. +- Given a `browser_id`: `browser_switch(browser_id)`. +- Done with a private or cloud browser: `browser_close()`. -Use this when you need an isolated browser, parallel sub-agents, a cloud browser, or a restart after the current browser gets blocked. +## Browser Helpers -```bash -browser-harness <<'PY' -print(browser_status()) -print(browser_new(backend="cloud", proxy_country="us")) -new_tab("https://example.com") -print(page_info()) -print(browser_close()) -PY +```python +browser_status() +browser_profiles() +browser_use_profile(profile_id) +browser_new("private") +browser_new("cloud") +browser_list() +browser_switch(browser_id) +browser_close() ``` -Lifecycle helpers: -- `browser_status()` — current binding state. -- `browser_new(backend="cloud"|"managed", profile="clean", proxy_country=None, reason=None)` — create and switch to a browser. Cloud responses include `live_url` when Browser Use returns one. -- `browser_list()` — browser ids visible to this run/agent. -- `browser_switch(browser_id)` — reuse an existing browser id. -- `browser_close(browser_id=None)` — close the active private browser, or release access to a shared one. +`browser_profiles()` and `browser_use_profile(...)` are local setup calls. They do not start browser work. -After `browser_new(...)` or `browser_switch(...)`, all normal page helpers work unchanged. If you are a sub-agent, create your own browser unless the parent gives you a browser id to reuse with `browser_switch(id)`. +After `browser_new(...)` or `browser_switch(...)`, keep using the normal page helpers: `new_tab`, `page_info`, `capture_screenshot`, `click_at_xy`, `type_text`, `js`, and `cdp`. -If `browser_new(backend="cloud")` reports `cloud-auth-required`, run: +If `browser_new("cloud")` reports `cloud-auth-required`, run: ```bash browser-harness auth login ``` -It prints/opens a Browser Use login URL and waits while the user logs in online. The API key is stored locally and is never printed. In SSH/headless environments use: - -```bash -browser-harness auth login --device-code -``` - -If the user directly provides an API key, store it through stdin instead of a command argument: +If the user directly provides an API key, store it through stdin: ```bash browser-harness auth login --api-key-stdin @@ -82,34 +57,21 @@ browser-harness auth login --api-key-stdin Never put API keys in command-line arguments. -Then retry `browser_new(backend="cloud")`. +## Page Workflow -### Legacy remote browsers +- First navigation is `new_tab(url)`, not `goto_url(url)`. +- Screenshots are the default way to understand and verify visible state: `capture_screenshot()`. +- Click visible targets by screenshot coordinates: `click_at_xy(x, y)`. +- Use `js(...)` for DOM inspection or extraction when coordinates are the wrong tool. +- After navigation, call `wait_for_load()`. +- If the current tab is stale or internal, call `ensure_real_tab()`. +- If redirected to a login wall, stop and ask the user. Do not type credentials from screenshots. +- For anything helpers do not cover, use raw CDP: `cdp("Domain.method", params)`. -Prefer `browser_new(backend="cloud")` for new work. The older named-daemon remote API is still available for advanced/manual cases. BROWSER_USE_API_KEY must be set or `browser-harness auth login` must have been completed. start_remote_daemon, list_cloud_profiles, list_local_profiles, sync_local_profile are pre-imported. +## Interaction Skills -```bash -browser-harness <<'PY' -start_remote_daemon("work") # default — clean browser, no profile -# start_remote_daemon("work", profileName="my-work") # reuse a cloud profile (already logged in) -# start_remote_daemon("work", profileId="") # same, but by UUID -# start_remote_daemon("work", proxyCountryCode="de", timeout=120) # DE proxy, 2-hour timeout -# start_remote_daemon("work", proxyCountryCode=None) # disable the Browser Use proxy -PY - -BU_NAME=work browser-harness <<'PY' -new_tab("https://example.com") -print(page_info()) -PY -``` - -start_remote_daemon prints liveUrl and auto-opens it in the local browser (if a GUI is detected) so the user can watch along. Headless servers print only — share the URL with the user. The daemon PATCHes the cloud browser to stop on shutdown, which persists profile state. Running remote daemons bill until timeout. - -Profiles (cookies-only login state) live in interaction-skills/profile-sync.md — covers list_cloud_profiles(), the chat-driven "which profile?" pattern, and sync_local_profile() for uploading a local Chrome profile. +If you get stuck on a browser mechanic, check `interaction-skills/` for focused guidance: -## Interaction skills - -If you start struggling with a specific mechanic while navigating, look in interaction-skills/ for helpers. They cover reusable UI mechanics like dialogs, tabs, dropdowns, iframes, and uploads. The available interaction skills are: - connection.md - cookies.md - cross-origin-iframes.md @@ -128,45 +90,8 @@ If you start struggling with a specific mechanic while navigating, look in inter - uploads.md - viewport.md -## What actually works - -- Screenshots first: use capture_screenshot() to understand the current page quickly, find visible targets, and decide whether you need a click, a selector, or more navigation. -- Clicking: capture_screenshot() → read the pixel off the image → click_at_xy(x, y) → capture_screenshot() to verify. Suppress the Playwright-habit reflex of "locate first, then click" — no getBoundingClientRect, no selector hunt. Drop to DOM only when the target has no visible geometry (hidden input, 0×0 node). Hit-testing happens in Chrome's browser process, so clicks go through iframes / shadow DOM / cross-origin without extra work. -- Bulk HTTP: http_get(url) + ThreadPoolExecutor. No browser for static pages (249 Netflix pages in 2.8s). -- After goto: wait_for_load(). -- Wrong/stale tab: ensure_real_tab(). Use it when the current tab is stale or internal; the daemon also auto-recovers from stale sessions on the next call. -- Verification: print(page_info()) is the simplest "is this alive?" check, but screenshots are the default way to verify whether a visible action actually worked. -- DOM reads: use js(...) for inspection and extraction when the screenshot shows that coordinates are the wrong tool. -- Iframe sites (Azure blades, Salesforce): click_at_xy(x, y) passes through; only drop to iframe DOM work when coordinate clicks are the wrong tool. -- Auth wall: redirected to login → stop and ask the user. Don't type credentials from screenshots. -- Raw CDP for anything helpers don't cover: cdp("Domain.method", params). - -## Design constraints - -- Coordinate clicks default. Input.dispatchMouseEvent goes through iframes/shadow/cross-origin at the compositor level. -- Legacy mode connects to the user's selected local Chrome profile. Manager mode may create cloud or managed browsers via `browser_new`. -- cdp-use is only for CDPClient.send_raw. Prefer raw CDP strings over typed wrappers. -- run.py stays tiny. No argparse, subcommands, or extra control layer. -- Core helpers stay short. Put task-specific helper additions in `agent-workspace/agent_helpers.py`; daemon/bootstrap and remote session admin live in the core package. -- Don't add another manager layer. Use the built-in `browser_*` lifecycle helpers. - -## Gotchas (field-tested) - -- Omnibox popups are fake page targets. Filter chrome://omnibox-popup... and other internals when you need a real tab. -- CDP target order != Chrome's visible tab-strip order. Use UI automation when the user means "the first/second tab I can see"; Target.activateTarget only shows a known target. -- Default daemon sessions can go stale. ensure_real_tab() re-attaches to a real page. -- Browser Use API is camelCase on the wire. cdpUrl, proxyCountryCode, etc. -- Remote cdpUrl is HTTPS, not ws. Resolve the websocket URL via /json/version. -- Stop cloud browsers with PATCH /browsers/{id} + {"action":"stop"}. -- After every meaningful action, re-screenshot before assuming it worked. Use the image to verify changed state, open menus, navigation, visible errors, and whether the page is in the state you expected. -- Use screenshots to drive exploration. They are often the fastest way to find the next click target, notice hidden blockers, and decide if a selector is even worth writing. -- Prefer compositor-level actions over framework hacks. Try screenshots, coordinate clicks, and raw key input before adding DOM-specific workarounds. -- If you need framework-specific DOM tricks, check interaction-skills/ first. That is where dropdown, dialog, iframe, shadow DOM, and form-specific guidance belongs. - -## Domain skills (opt-in) - -Only applies when `BH_DOMAIN_SKILLS=1`. Otherwise ignore — `agent-workspace/domain-skills/` is dormant and `goto_url` won't surface skill files. +## Domain Skills -When enabled, search `agent-workspace/domain-skills//` before inventing an approach. `goto_url` returns up to 10 skill filenames for the navigated host. +Domain skills are off by default. If `BH_DOMAIN_SKILLS=1` and the task is site-specific, read every file in `agent-workspace/domain-skills//` before inventing an approach. -If you learn anything non-obvious — a private API, stable selector, framework quirk, URL pattern, hidden wait, or site-specific trap — open a PR to `agent-workspace/domain-skills//`. Capture the durable shape of the site (the map, not the diary). Don't write pixel coordinates (break on layout), task narration, or secrets — the directory is public. +When enabled, `goto_url(...)` returns up to 10 matching skill filenames for the current host. diff --git a/install.md b/install.md index abd92ea1..14ec3bd6 100644 --- a/install.md +++ b/install.md @@ -60,8 +60,8 @@ optional browser_harness.manager_daemon owns many isolated browser leases - BU_CDP_WS overrides local Chrome discovery for remote browsers. - BU_CDP_URL overrides local Chrome discovery with a specific DevTools HTTP endpoint (used for Way 2). - BU_BROWSER_ID + BROWSER_USE_API_KEY lets the daemon stop a Browser Use cloud browser on shutdown. -- Manager mode auto-starts `browser-harness-manager` when `browser_status`, `browser_new`, `browser_list`, `browser_switch`, or `browser_close` is used. -- Cloud manager mode reads Browser Use auth from `BROWSER_USE_API_KEY` first, then the local `browser-harness auth login` store. +- The browser manager auto-starts when `browser_status`, `browser_new`, `browser_list`, `browser_switch`, or `browser_close` is used. +- Cloud browser creation reads Browser Use auth from `BROWSER_USE_API_KEY` first, then the local `browser-harness auth login` store. ## Browser Use Cloud auth @@ -110,11 +110,11 @@ This section is the source of truth for how browser-harness connects to a browse Browser-harness can connect to any Chrome or Chromium-based browser on your computer, or to a Browser Use cloud browser. -**Cloud browsers** are managed by the Browser Use cloud API. In manager mode, start one with `browser_new(backend="cloud", proxy_country="us")`; for legacy named daemons use `start_remote_daemon("work", ...)`. Authentication is via `BROWSER_USE_API_KEY` or `browser-harness auth login`; the harness handles the WebSocket URL itself. To carry your local Chrome cookies into a cloud browser, install `profile-use` once (`curl -fsSL https://browser-use.com/profile.sh | sh`), then call `uuid = sync_local_profile("MyChromeProfile")` followed by `start_remote_daemon("work", profileId=uuid)`. Cookies are the only thing synced — not localStorage, not extensions, not history. +**Cloud browsers** are managed by the Browser Use cloud API. Start one with `browser_new("cloud", proxy_country="us")`. Authentication is via `BROWSER_USE_API_KEY` or `browser-harness auth login`; the harness handles the WebSocket URL itself. To carry your local Chrome cookies into a cloud browser, install `profile-use` once (`curl -fsSL https://browser-use.com/profile.sh | sh`), then call `uuid = sync_local_profile("MyChromeProfile")` followed by the advanced cloud-profile flow. Cookies are the only thing synced — not localStorage, not extensions, not history. **Local browsers** require remote debugging to be enabled. There are two ways, and they suit different use cases. -Local Way 1 also requires an explicit selected profile before the harness attaches. Run `list_local_profiles()` to get stable ids such as `google-chrome:Default`, then `use_local_profile("google-chrome:Default")`. The daemon snapshots that selected profile at startup and refuses to attach to an arbitrary available Chrome profile. +Local Way 1 also requires an explicit selected profile before the harness attaches. Run `browser_profiles()` to get stable ids such as `google-chrome:Default`, then `browser_use_profile("google-chrome:Default")`. The daemon snapshots that selected profile at startup and refuses to attach to an arbitrary available Chrome profile. *Way 1: chrome://inspect/#remote-debugging checkbox — uses your real profile.* In your running Chrome, navigate to `chrome://inspect/#remote-debugging` and tick the "Allow remote debugging for this browser instance" checkbox. This setting is per-profile and sticky: tick it once and it persists across every future Chrome launch of that profile. Then run any `browser-harness` command. On Chrome 144 and later, the first attach by the harness triggers an in-browser "Allow remote debugging?" popup that you must click Allow on. The popup may reappear on later attaches under conditions that are not fully characterized.[^1] This path inherits your everyday Chrome's logins, extensions, history, and bookmarks, which makes it the right choice for an agent helping you with tasks in your real browser. @@ -143,7 +143,7 @@ If the user hasn't said which connection method to use, default to Way 1 if Chro PY ``` - If it prints page info, you're done. If it reports `needs-profile`, run `list_local_profiles()`, choose a stable profile id with the user, call `use_local_profile(profile_id)`, then retry. + If it prints page info, you're done. If it reports `needs-profile`, run `browser_profiles()`, choose a stable profile id with the user, call `browser_use_profile(profile_id)`, then retry. 2. Otherwise run `browser-harness --doctor`. The two lines that matter for connection are `chrome running` and `daemon alive`. diff --git a/src/browser_harness/daemon.py b/src/browser_harness/daemon.py index ec6b614c..a4823373 100644 --- a/src/browser_harness/daemon.py +++ b/src/browser_harness/daemon.py @@ -216,10 +216,11 @@ def _prepare_selected_local_profile(self): return None profile_id = local_profiles.get_default_profile_id() if not profile_id: - profiles = local_profiles.list_local_profiles_payload() + profiles = local_profiles.list_browser_profiles_payload() raise RuntimeError( "needs-profile: No default local Chrome profile is set. " - f"Choose one explicit profile first. profiles={json.dumps(profiles, default=str)}" + "Run browser_profiles(), ask the user which profile id to use, then run browser_use_profile(id). " + f"profiles={json.dumps(profiles, default=str)}" ) profile = local_profiles.resolve_local_profile(profile_id) if local_profiles.remote_debugging_user_enabled(profile.user_data_dir) is False: @@ -427,7 +428,7 @@ async def start(self): raise RuntimeError( f"CDP WS handshake failed: {e} -- remote browser WebSocket connection failed. " "This can happen when network policy blocks the connection, the WS URL is wrong or expired, or the remote endpoint is down. " - "If you use Browser Use cloud, verify BROWSER_USE_API_KEY and get a fresh URL via start_remote_daemon()." + "If you use Browser Use cloud, verify auth and start a fresh cloud browser." ) raise RuntimeError(f"CDP WS handshake failed: {e} -- click Allow in Chrome if prompted, then retry") await self.attach_first_page() diff --git a/src/browser_harness/local_profiles.py b/src/browser_harness/local_profiles.py index 97f58bd9..8aa6b59e 100644 --- a/src/browser_harness/local_profiles.py +++ b/src/browser_harness/local_profiles.py @@ -288,6 +288,31 @@ def list_local_profiles_payload() -> dict: } +def list_browser_profiles_payload(verbose: bool = False) -> dict: + if verbose: + return list_local_profiles_payload() + selected = get_default_profile_id() + return { + "selected": selected, + "profiles": [ + { + "id": p.id, + "label": p.display_name, + "selected": p.id == selected, + } + for p in detect_local_profiles() + ], + } + + +def use_browser_profile(profile_id: str) -> dict: + data = set_default_profile_id(profile_id) + return { + "selected": data.get("default_local_profile_id"), + "label": data.get("default_local_profile_label"), + } + + def resolve_local_profile(profile_ref: str | None = None) -> LocalBrowserProfile: profile_ref = (profile_ref or get_default_profile_id() or "").strip() if not profile_ref: @@ -303,7 +328,7 @@ def resolve_local_profile(profile_ref: str | None = None) -> LocalBrowserProfile if len(matches) == 1: return matches[0] if not matches: - raise RuntimeError(f"no local profile matched {profile_ref!r}; run list_local_profiles()") + raise RuntimeError(f"no local profile matched {profile_ref!r}; run browser_profiles()") raise RuntimeError(f"multiple local profiles matched {profile_ref!r}; pass the exact profile id") diff --git a/src/browser_harness/manager_helpers.py b/src/browser_harness/manager_helpers.py index 8c434b2c..0e4bca02 100644 --- a/src/browser_harness/manager_helpers.py +++ b/src/browser_harness/manager_helpers.py @@ -2,6 +2,7 @@ from __future__ import annotations from . import context +from . import local_profiles from . import manager_client @@ -10,10 +11,29 @@ def browser_status(): return manager_client.status() -def browser_new(backend="managed", *, profile="clean", proxy_country=None, reason=None): +def browser_profiles(verbose=False): + """List local Chrome/Chromium profiles for browser_use_profile(...).""" + return local_profiles.list_browser_profiles_payload(verbose=verbose) + + +def browser_use_profile(profile_id): + """Select the local browser profile future normal helper calls should use.""" + return local_profiles.use_browser_profile(profile_id) + + +def _manager_backend(kind, backend=None): + value = backend if backend is not None else kind + if value in (None, "private", "managed"): + return "managed" + if value == "cloud": + return "cloud" + raise ValueError("browser_new kind must be 'private' or 'cloud'") + + +def browser_new(kind="private", *, backend=None, profile="clean", proxy_country=None, reason=None): """Create a browser, switch this agent to it, and return concise state.""" resp = manager_client.new_browser( - backend=backend, + backend=_manager_backend(kind, backend), profile=profile, proxy_country=proxy_country, reason=reason, diff --git a/src/browser_harness/run.py b/src/browser_harness/run.py index 881c5bbd..0872ee89 100644 --- a/src/browser_harness/run.py +++ b/src/browser_harness/run.py @@ -40,7 +40,7 @@ PY Helpers are pre-imported. The daemon auto-starts and connects to the running browser. -For local Chrome, first choose a stable profile id with list_local_profiles() and use_local_profile(id). +For local Chrome, first choose a stable profile id with browser_profiles() and browser_use_profile(id). Commands: browser-harness --version print the installed version @@ -70,6 +70,8 @@ ) _NO_DAEMON_HELPER_NAMES = { + "browser_profiles", + "browser_use_profile", "list_local_profiles", "use_local_profile", "open_local_profile", @@ -127,7 +129,7 @@ def _can_run_without_daemon(code: str) -> bool: return False if isinstance(func, ast.Attribute): # Allow simple formatting around passive helper output, e.g. - # json.dumps(list_local_profiles()). + # json.dumps(browser_profiles()). if func.attr in {"dumps", "loads"}: continue return False diff --git a/tests/unit/test_local_profiles.py b/tests/unit/test_local_profiles.py index 21aa07ff..6995f3de 100644 --- a/tests/unit/test_local_profiles.py +++ b/tests/unit/test_local_profiles.py @@ -57,6 +57,29 @@ def test_default_profile_file_roundtrip(tmp_path, monkeypatch): assert local_profiles.get_default_profile_id() == "google-chrome:Default" +def test_browser_profiles_payload_is_concise_by_default(tmp_path, monkeypatch): + install = _install(tmp_path) + monkeypatch.setenv("BH_CONFIG_DIR", str(tmp_path / "config")) + monkeypatch.setattr(local_profiles, "known_local_browser_installs", lambda: [install]) + local_profiles.set_default_profile_id("google-chrome:Default") + + assert local_profiles.list_browser_profiles_payload() == { + "selected": "google-chrome:Default", + "profiles": [ + { + "id": "google-chrome:Default", + "label": "Google Chrome - Greg", + "selected": True, + }, + { + "id": "google-chrome:Profile 1", + "label": "Google Chrome - Work", + "selected": False, + }, + ], + } + + def test_default_profile_rejects_missing_browser_binary(tmp_path, monkeypatch): install = _install(tmp_path) install.browser_path.unlink() diff --git a/tests/unit/test_manager_helpers.py b/tests/unit/test_manager_helpers.py index 073ef564..8aced5c3 100644 --- a/tests/unit/test_manager_helpers.py +++ b/tests/unit/test_manager_helpers.py @@ -50,6 +50,74 @@ def test_browser_new_activates_binding_and_acquires_lock(monkeypatch, tmp_path): assert acquired == ["br_123"] +def test_browser_new_private_maps_to_managed_backend(monkeypatch, tmp_path): + calls = [] + old = context.get_active_binding() + try: + monkeypatch.setattr( + manager_helpers.manager_client, + "new_browser", + lambda *args, **kwargs: calls.append((args, kwargs)) or _manager_response(tmp_path), + ) + monkeypatch.setattr(manager_helpers.manager_client, "acquire_execution_for_binding", lambda binding: None) + + manager_helpers.browser_new("private", reason="test") + finally: + if old is not None: + context.activate_binding(old) + else: + context.clear_active_binding() + + assert calls[0][1]["backend"] == "managed" + + +def test_browser_new_cloud_maps_to_cloud_backend(monkeypatch, tmp_path): + calls = [] + old = context.get_active_binding() + try: + monkeypatch.setattr( + manager_helpers.manager_client, + "new_browser", + lambda *args, **kwargs: calls.append((args, kwargs)) or _manager_response(tmp_path), + ) + monkeypatch.setattr(manager_helpers.manager_client, "acquire_execution_for_binding", lambda binding: None) + + manager_helpers.browser_new("cloud") + finally: + if old is not None: + context.activate_binding(old) + else: + context.clear_active_binding() + + assert calls[0][1]["backend"] == "cloud" + + +def test_browser_profiles_returns_concise_payload(monkeypatch): + monkeypatch.setattr( + manager_helpers.local_profiles, + "list_browser_profiles_payload", + lambda verbose=False: {"selected": "google-chrome:Default", "profiles": []}, + ) + + assert manager_helpers.browser_profiles() == { + "selected": "google-chrome:Default", + "profiles": [], + } + + +def test_browser_use_profile_returns_selected_profile(monkeypatch): + monkeypatch.setattr( + manager_helpers.local_profiles, + "use_browser_profile", + lambda profile_id: {"selected": profile_id, "label": "Google Chrome - Default"}, + ) + + assert manager_helpers.browser_use_profile("google-chrome:Default") == { + "selected": "google-chrome:Default", + "label": "Google Chrome - Default", + } + + def test_browser_switch_does_not_activate_binding_when_lock_fails(monkeypatch, tmp_path): old = context.get_active_binding() previous = context.BrowserBinding( diff --git a/tests/unit/test_run_manager_mode.py b/tests/unit/test_run_manager_mode.py index 7d6a931b..29b0db16 100644 --- a/tests/unit/test_run_manager_mode.py +++ b/tests/unit/test_run_manager_mode.py @@ -42,6 +42,38 @@ def test_manager_helper_call_enables_manager_mode_without_env(monkeypatch): assert "BH_MANAGER_MODE" in os.environ +def test_browser_profiles_runs_without_daemon(monkeypatch): + stdout = StringIO() + fake_stdin = StringIO("print(browser_profiles())") + + with patch.object(sys, "argv", ["browser-harness"]), \ + patch("sys.stdin", fake_stdin), \ + patch("sys.stdout", stdout), \ + patch("browser_harness.run.print_update_banner"), \ + patch("browser_harness.run.ensure_daemon") as ensure_daemon, \ + patch("browser_harness.run.browser_profiles", lambda: {"profiles": []}): + run.main() + + ensure_daemon.assert_not_called() + assert stdout.getvalue().strip() == "{'profiles': []}" + + +def test_browser_use_profile_runs_without_daemon(monkeypatch): + stdout = StringIO() + fake_stdin = StringIO("print(browser_use_profile('google-chrome:Default'))") + + with patch.object(sys, "argv", ["browser-harness"]), \ + patch("sys.stdin", fake_stdin), \ + patch("sys.stdout", stdout), \ + patch("browser_harness.run.print_update_banner"), \ + patch("browser_harness.run.ensure_daemon") as ensure_daemon, \ + patch("browser_harness.run.browser_use_profile", lambda profile_id: {"selected": profile_id}): + run.main() + + ensure_daemon.assert_not_called() + assert stdout.getvalue().strip() == "{'selected': 'google-chrome:Default'}" + + def test_manager_mode_releases_execution_lock_on_exception(monkeypatch): monkeypatch.setenv("BH_MANAGER_SOCKET", "/tmp/nonexistent-manager.sock") fake_stdin = StringIO("raise RuntimeError('boom')") From 8281995350d83664e960ee221f3da39095b08d58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gregor=20=C5=BDuni=C4=8D?= <36313686+gregpr07@users.noreply.github.com> Date: Thu, 18 Jun 2026 11:44:33 -0700 Subject: [PATCH 06/15] Make managed browser selection explicit --- README.md | 2 +- SKILL.md | 46 ++++++++--- install.md | 17 ++--- interaction-skills/profile-sync.md | 29 +++---- skills/browser-harness/SKILL.md | 69 ++++++++++++----- src/browser_harness/context.py | 2 +- src/browser_harness/manager_client.py | 4 +- src/browser_harness/manager_daemon.py | 102 ++++++++++++++----------- src/browser_harness/manager_helpers.py | 28 ++++--- src/browser_harness/run.py | 10 ++- tests/unit/test_context.py | 14 ++++ tests/unit/test_manager_daemon.py | 86 +++++++++++++++++++-- tests/unit/test_manager_helpers.py | 74 +++++++++++++----- tests/unit/test_run_manager_mode.py | 21 +++++ 14 files changed, 358 insertions(+), 146 deletions(-) diff --git a/README.md b/README.md index 6ae11056..a8aaaf55 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ Stealth, sub-agents, or headless deployment.
- `agent-workspace/agent_helpers.py` — helper code the agent edits - `agent-workspace/domain-skills/` — reusable site-specific skills the agent edits -Plain `browser-harness` helper calls use the selected local browser profile. For isolated or cloud work, start with `browser_new("private")` or `browser_new("cloud")`; cloud responses include a `live_url` preview when available. +Plain `browser-harness` helper calls use the selected local browser profile. For isolated or cloud work, start with `browser_new("private")` or `browser_new("cloud")`, keep the returned short `id`, and call `browser(id)` before page helpers in each script; cloud responses include a `live_url` preview when available. ## Contributing diff --git a/SKILL.md b/SKILL.md index 17dc98ac..296f8b0a 100644 --- a/SKILL.md +++ b/SKILL.md @@ -5,43 +5,67 @@ description: Control a browser with Python helpers. Use for web automation, scra # browser-harness -There is one active browser. Normal page helpers act on it: +Managed browsers have short explicit ids. Create or receive an id, then select it inside each script. + +Create and use a private browser: ```bash browser-harness <<'PY' +b = browser_new("private") +browser(b["id"]) new_tab("https://docs.browser-use.com") wait_for_load() +print({"id": b["id"], "page": page_info()}) +PY +``` + +Use an existing managed browser: + +```bash +browser-harness <<'PY' +browser("abc123") print(page_info()) PY ``` -Use `browser_*` helpers only to choose, set up, or close the active browser. +`browser(id)` selects a browser for this script only. Do not rely on a current browser across separate shell commands. + +Inspect managed browsers: + +```bash +browser-harness <<'PY' +print(browser_list()) +print(browser_status("abc123")) +PY +``` + +`browser_list()` shows `state: "busy"` while a script is actively using that browser, including the current script. ## Choose Browser - User's logged-in local Chrome: use normal helpers. If setup asks for a profile, run `browser_profiles()`, ask the user which `id` to use, then run `browser_use_profile(id)` and retry. -- Isolated local browser: `browser_new("private")`. -- Browser Use cloud browser with live view: `browser_new("cloud")`. -- Subagent: use `browser_new("private")` unless the parent gave you a `browser_id`. -- Given a `browser_id`: `browser_switch(browser_id)`. -- Done with a private or cloud browser: `browser_close()`. +- Isolated local browser: `browser_new("private")`, then keep the returned `id`. +- Browser Use cloud browser with live view: `browser_new("cloud")`, then keep the returned `id`. +- Managed browser page work: call `browser(id)` first in the script. +- Subagent: if the parent gives an id, start browser scripts with `browser(id)` and do not close it unless asked. +- Done with a private or cloud browser: `browser_close(id)`. ## Browser Helpers ```python -browser_status() +browser_status(id) browser_profiles() browser_use_profile(profile_id) browser_new("private") browser_new("cloud") +browser(id) browser_list() -browser_switch(browser_id) -browser_close() +browser_close(id) ``` `browser_profiles()` and `browser_use_profile(...)` are local setup calls. They do not start browser work. -After `browser_new(...)` or `browser_switch(...)`, keep using the normal page helpers: `new_tab`, `page_info`, `capture_screenshot`, `click_at_xy`, `type_text`, `js`, and `cdp`. +Inside one Python script, `browser(id)` attaches the process to that browser so normal page helpers work: `new_tab`, `page_info`, `capture_screenshot`, `click_at_xy`, `type_text`, `js`, and `cdp`. If `browser_new("cloud")` reports `cloud-auth-required`, run: diff --git a/install.md b/install.md index 14ec3bd6..ca8fb5fd 100644 --- a/install.md +++ b/install.md @@ -52,15 +52,10 @@ Chrome / Browser Use cloud -> CDP WS -> browser_harness.daemon -> IPC -> browser optional browser_harness.manager_daemon owns many isolated browser leases ``` -- Protocol is one JSON line each way. -- Requests are {method, params, session_id} for CDP or {meta: ...} for daemon control. -- Responses are {result} / {error} / {events} / {session_id}. -- IPC: Unix socket at `/tmp/bu-.sock` on POSIX, TCP loopback + port file on Windows. -- BU_NAME namespaces the daemon's IPC, pid, and log files. -- BU_CDP_WS overrides local Chrome discovery for remote browsers. -- BU_CDP_URL overrides local Chrome discovery with a specific DevTools HTTP endpoint (used for Way 2). -- BU_BROWSER_ID + BROWSER_USE_API_KEY lets the daemon stop a Browser Use cloud browser on shutdown. -- The browser manager auto-starts when `browser_status`, `browser_new`, `browser_list`, `browser_switch`, or `browser_close` is used. +- The CLI talks to a local per-browser daemon over IPC. +- `BU_CDP_URL` points the normal local-browser daemon at a specific DevTools HTTP endpoint for Way 2. +- The browser manager auto-starts when `browser`, `browser_status`, `browser_new`, `browser_list`, or `browser_close` is used. +- Managed browser scripts select an explicit short id with `browser(id)`; agents should not set daemon namespace variables for normal use. - Cloud browser creation reads Browser Use auth from `BROWSER_USE_API_KEY` first, then the local `browser-harness auth login` store. ## Browser Use Cloud auth @@ -110,7 +105,7 @@ This section is the source of truth for how browser-harness connects to a browse Browser-harness can connect to any Chrome or Chromium-based browser on your computer, or to a Browser Use cloud browser. -**Cloud browsers** are managed by the Browser Use cloud API. Start one with `browser_new("cloud", proxy_country="us")`. Authentication is via `BROWSER_USE_API_KEY` or `browser-harness auth login`; the harness handles the WebSocket URL itself. To carry your local Chrome cookies into a cloud browser, install `profile-use` once (`curl -fsSL https://browser-use.com/profile.sh | sh`), then call `uuid = sync_local_profile("MyChromeProfile")` followed by the advanced cloud-profile flow. Cookies are the only thing synced — not localStorage, not extensions, not history. +**Cloud browsers** are managed by the Browser Use cloud API. Start one with `browser_new("cloud", proxy_country="us")`, keep the returned `id`, and call `browser(id)` before page helpers in each script. Authentication is via `BROWSER_USE_API_KEY` or `browser-harness auth login`; the harness handles the WebSocket URL itself. Cookie profile sync is advanced and opt-in; read `interaction-skills/profile-sync.md` only when the user explicitly asks to sync local cookies into Browser Use cloud profiles. **Local browsers** require remote debugging to be enabled. There are two ways, and they suit different use cases. @@ -143,7 +138,7 @@ If the user hasn't said which connection method to use, default to Way 1 if Chro PY ``` - If it prints page info, you're done. If it reports `needs-profile`, run `browser_profiles()`, choose a stable profile id with the user, call `browser_use_profile(profile_id)`, then retry. + If it prints page info, you're done. If it reports `needs-profile`, run `browser_profiles()`, choose a stable profile id with the user, call `browser_use_profile(profile_id)`, then retry. For private or cloud manager browsers, use `browser_new(...)` first, then select the returned id with `browser(id)`. 2. Otherwise run `browser-harness --doctor`. The two lines that matter for connection are `chrome running` and `daemon alive`. diff --git a/interaction-skills/profile-sync.md b/interaction-skills/profile-sync.md index c706ad48..cfbb0538 100644 --- a/interaction-skills/profile-sync.md +++ b/interaction-skills/profile-sync.md @@ -1,6 +1,8 @@ # Profile sync -Make a remote Browser Use browser start already logged in, by uploading cookies from a local Chrome profile. +Advanced only. Use this when the user explicitly asks to upload local Chrome cookies into Browser Use cloud profiles. For normal cloud browser work, use `browser_new("cloud")`, keep the returned `id`, and call `browser(id)` before page helpers. + +This file manages cloud cookie profiles. It does not replace the explicit browser id flow. ## One-time install @@ -16,8 +18,8 @@ Downloads `profile-use` (macOS / Linux, x64 / arm64). The Python helpers shell o list_cloud_profiles() # [{id, name, userId, cookieDomains, lastUsedAt}, ...] — every profile under this API key -list_local_profiles() -# [{BrowserName, ProfileName, DisplayName, ProfilePath, ...}, ...] — detected on this machine +browser_profiles(verbose=True) +# {"profiles": [{"id", "profile_name", "display_name", "profile_path", ...}, ...]} sync_local_profile(profile_name, browser=None, cloud_profile_id=None, # update an existing cloud profile instead of creating new @@ -25,11 +27,6 @@ sync_local_profile(profile_name, browser=None, exclude_domains=None) # drop these domains; applied before include # Shells out to `profile-use sync`. Returns the cloud profile UUID # (the existing one if cloud_profile_id was passed, else the newly-created one). - -start_remote_daemon("work", profileName="my-work") # name→id resolved client-side -start_remote_daemon("work", profileId="") # or pass UUID directly - -stop_remote_daemon("work") # shut the daemon and PATCH the cloud browser to stop — billing ends ``` `sync_local_profile` prints `♻️ Using existing cloud profile` when `cloud_profile_id` is accepted, or `📝 Creating remote profile...` → `✓ Profile created: ` when it creates a new one. Check that line if you want to confirm which path ran. @@ -46,19 +43,16 @@ for p in list_cloud_profiles(): → Agent: *"You have these cloud profiles ( domains each). Want to reuse one, sync a local profile, or start clean?"* ```python -# 2a. Reuse cloud → one call. -start_remote_daemon("work", profileName="browser-use.com") - -# 2b. Sync local first. Show the options: -for lp in list_local_profiles(): - print(lp["DisplayName"]) +# 2. Sync local first. Show the options: +for lp in browser_profiles(verbose=True)["profiles"]: + print(lp["id"], lp["display_name"]) ``` → Agent: *"Which local profile?"* → user picks → before syncing, inspect domain-level cookie counts with `profile-use inspect --profile ` (or `--verbose` for individual cookies) and report the summary; never dump 500 cookies into chat. ```python -# 3. Sync + use. Returns the cloud UUID. +# 3. Sync. Returns the cloud profile UUID. uuid = sync_local_profile("browser-use.com") -start_remote_daemon("work", profileId=uuid) +print({"cloud_profile_id": uuid}) # 3b. Refresh that same cloud profile later (idempotent — no duplicate profiles). sync_local_profile("browser-use.com", cloud_profile_id=uuid) @@ -73,13 +67,10 @@ sync_local_profile("browser-use.com", **Cookies only.** No localStorage, no IndexedDB, no extensions. Enough for session-cookie sites (Google, GitHub, Stripe, most SaaS); not for sites that store auth in localStorage. -Cookies mutated during a remote session only persist on a clean `PATCH /browsers/{id} {"action":"stop"}` — the daemon does this on shutdown when `BU_BROWSER_ID` + `BROWSER_USE_API_KEY` are set (default for remote daemons). Sessions that hit the timeout lose in-session state. - ## Cloud profile CRUD - UI: https://cloud.browser-use.com/settings?tab=profiles - API: `GET /profiles`, `GET/PATCH/DELETE /profiles/{id}` (paths are relative to `BU_API = "https://api.browser-use.com/api/v3"` in `admin.py`). Fields: `id`, `name`, `userId`, `lastUsedAt`, `cookieDomains[]`. `list_cloud_profiles()` wraps this. -- Name → UUID: `profileName=` on `start_remote_daemon` resolves client-side; no API change needed. - Need the UUID for an existing profile? `matches = [p["id"] for p in list_cloud_profiles() if p["name"] == ""]` — then verify `len(matches) == 1` before using it. Profile names are not unique; syncs create duplicates unless you pass `cloud_profile_id=`. - Lower-level raw calls: `from browser_harness.admin import _browser_use; _browser_use("/profiles/", "DELETE")`. Pass the path *without* the `/api/v3` prefix — it's already on `BU_API`. diff --git a/skills/browser-harness/SKILL.md b/skills/browser-harness/SKILL.md index 5d95d236..292ee2f1 100644 --- a/skills/browser-harness/SKILL.md +++ b/skills/browser-harness/SKILL.md @@ -5,7 +5,7 @@ description: Direct browser control via CDP — automate, scrape, test, or inter # browser-harness -Direct browser control via CDP. You drive the user's real browser with Python helpers run through the `browser-harness` command. +Direct browser control via CDP. You drive a selected browser with Python helpers run through the `browser-harness` command. ## Prerequisite (one-time — NOT part of the AI workflow) @@ -13,19 +13,69 @@ This skill is instructions only. It assumes the `browser-harness` command is alr ## Usage +Managed browsers have short explicit ids. Create or receive an id, then select it inside each script. + +Create and use a private browser: + ```bash browser-harness <<'PY' +b = browser_new("private") +browser(b["id"]) new_tab("https://docs.browser-use.com") wait_for_load() +print({"id": b["id"], "page": page_info()}) +PY +``` + +Use an existing managed browser: + +```bash +browser-harness <<'PY' +browser("abc123") print(page_info()) PY ``` +Inspect managed browsers: + +```bash +browser-harness <<'PY' +print(browser_list()) +print(browser_status("abc123")) +PY +``` + +- `browser(id)` selects a browser for this script only. Do not rely on a current browser across separate shell commands. +- `browser_list()` shows `state: "busy"` while a script is actively using that browser, including the current script. - Invoke as `browser-harness` — it's on `$PATH` after install. No `cd`, no `uv run`. - Use the heredoc form for every multi-line command. It prevents shell quote mangling inside Python strings and JavaScript snippets. - First navigation is `new_tab(url)`, not `goto_url(url)` — goto runs in the user's active tab and clobbers their work. - Helpers are pre-imported and the daemon auto-starts; you never start/stop it manually unless you want to. +## Choose Browser + +- User's logged-in local Chrome: use normal helpers. If setup asks for a profile, run `browser_profiles()`, ask the user which `id` to use, then run `browser_use_profile(id)` and retry. +- Isolated local browser: `browser_new("private")`, then keep the returned `id`. +- Browser Use cloud browser with live view: `browser_new("cloud")`, then keep the returned `id`. +- Managed browser page work: call `browser(id)` first in the script. +- Subagent: if the parent gives an id, start browser scripts with `browser(id)` and do not close it unless asked. +- Done with a private or cloud browser: `browser_close(id)`. + +## Browser Helpers + +```python +browser_status(id) +browser_profiles() +browser_use_profile(profile_id) +browser_new("private") +browser_new("cloud") +browser(id) +browser_list() +browser_close(id) +``` + +If `browser_new("cloud")` reports `cloud-auth-required`, run `browser-harness auth login`. + ## What actually works - **Screenshots first.** `capture_screenshot()` to understand the page, find visible targets, and decide whether you need a click, a selector, or more navigation. @@ -40,23 +90,6 @@ PY After every meaningful action, re-screenshot before assuming it worked. -## Remote / cloud browsers - -Use remote for parallel sub-agents (each gets an isolated browser via a distinct `BU_NAME`) or on a headless server. `BROWSER_USE_API_KEY` must be set. - -```bash -browser-harness <<'PY' -start_remote_daemon("work") # clean cloud browser; profileName=/profileId= to reuse a logged-in profile -PY - -BU_NAME=work browser-harness <<'PY' -new_tab("https://example.com") -print(page_info()) -PY -``` - -`start_remote_daemon` prints a `liveUrl` so the user can watch. Running remote daemons bill until timeout. - ## Interaction skills (progressive disclosure) If you struggle with a specific UI mechanic, read the matching file under `${CLAUDE_PLUGIN_ROOT}/interaction-skills/` before inventing an approach. Available: browser-wall, connection, cookies, cross-origin-iframes, dialogs, downloads, drag-and-drop, dropdowns, iframes, network-requests, print-as-pdf, profile-sync, screenshots, scrolling, shadow-dom, tabs, uploads, viewport. diff --git a/src/browser_harness/context.py b/src/browser_harness/context.py index 782981b2..a4df2a17 100644 --- a/src/browser_harness/context.py +++ b/src/browser_harness/context.py @@ -142,7 +142,7 @@ def clear_active_binding() -> None: def require_active_binding() -> BrowserBinding: binding = get_active_binding() if binding is None: - raise RuntimeError("no-active-browser: call browser_new(...) or browser_switch(browser_id) first") + raise RuntimeError('no-browser-selected: call browser("") before page helpers') return binding diff --git a/src/browser_harness/manager_client.py b/src/browser_harness/manager_client.py index c61ef708..d8b7fa30 100644 --- a/src/browser_harness/manager_client.py +++ b/src/browser_harness/manager_client.py @@ -149,9 +149,9 @@ def binding_from_response(resp: dict) -> context.BrowserBinding: return context.BrowserBinding.from_manager(binding) -def status() -> dict: +def status(browser_id: str | None = None) -> dict: try: - return public_state(request("status")) + return public_state(request("status", browser_id=browser_id)) except ManagerError as e: if e.response.get("state") == "manager-unavailable": return {"ready": False, "state": "manager-unavailable", "reason": str(e), "safe_actions": []} diff --git a/src/browser_harness/manager_daemon.py b/src/browser_harness/manager_daemon.py index 52ec7ad0..eafbe773 100644 --- a/src/browser_harness/manager_daemon.py +++ b/src/browser_harness/manager_daemon.py @@ -20,6 +20,7 @@ BU_API = "https://api.browser-use.com/api/v3" +RESERVED_BROWSER_IDS = {"auth", "doctor", "help", "reload", "update", "version"} MAC_BROWSER_PATHS = ( "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", "/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary", @@ -125,13 +126,17 @@ def handle(self, req: dict) -> dict: def status(self, req: dict) -> dict: with self._lock: - key = agent_key(req) - browser_id = self.active_by_agent.get(key) + browser_id = req.get("browser_id") if not browser_id: - return {"ok": True, "ready": False, "state": "no-active-browser", "safe_actions": ["browser_new", "browser_switch"]} + return { + "ok": True, + "ready": False, + "state": "browser-id-required", + "safe_actions": ["browser_list", "browser_new"], + } lease = self.leases.get(browser_id) if not lease: - return {"ok": True, "ready": False, "state": "stale-binding", "safe_actions": ["browser_new", "browser_switch"]} + return {"ok": True, "ready": False, "state": "not-found", "safe_actions": ["browser_list", "browser_new"]} return ready_public(lease) def list(self, req: dict) -> dict: @@ -139,16 +144,14 @@ def list(self, req: dict) -> dict: run_id, agent_id = run_agent(req) browsers = [] for lease in self.leases.values(): - if lease.run_id != run_id: - continue - if lease.owner_agent_id != agent_id and agent_id not in lease.allowed_agents: - continue browsers.append({ - "browser_id": lease.browser_id, - "backend": lease.backend, - "owned_by_this_agent": lease.owner_agent_id == agent_id, + "id": lease.browser_id, + "backend": public_backend(lease), + "owner": lease.owner_agent_id, + "owned_by_this_agent": lease.run_id == run_id and lease.owner_agent_id == agent_id, "shared": len(lease.allowed_agents) > 1, "state": "busy" if lease.active_execution else "ready", + **({"cloud_browser_id": lease.cloud_browser_id} if lease.cloud_browser_id else {}), **({"live_url": lease.cloud_live_url} if lease.cloud_live_url else {}), }) return {"ok": True, "browsers": browsers} @@ -173,71 +176,59 @@ def new(self, req: dict) -> dict: return error("browser-start-failed", str(e), ["browser_new"]) with self._lock: self.leases[lease.browser_id] = lease - self.active_by_agent[agent_key_parts(run_id, agent_id)] = lease.browser_id self._persist() return ready_response(lease) def switch(self, req: dict) -> dict: with self._lock: - run_id, agent_id = run_agent(req) + _run_id, agent_id = run_agent(req) browser_id = req.get("browser_id") if not browser_id: return error("bad-request", "browser_id is required", ["browser_list", "browser_new"]) lease = self.leases.get(browser_id) if not lease: return error("not-found", "browser id not found", ["browser_list", "browser_new"]) - if lease.run_id != run_id: - return error("forbidden", "browser belongs to another run", ["browser_list", "browser_new"]) if agent_id not in lease.allowed_agents: lease.allowed_agents.append(agent_id) lease.last_used_at_ms = int(time.time() * 1000) - self.active_by_agent[agent_key_parts(run_id, agent_id)] = browser_id self._persist() return ready_response(lease) def close(self, req: dict) -> dict: cleanup = None with self._lock: - run_id, agent_id = run_agent(req) - key = agent_key_parts(run_id, agent_id) - browser_id = req.get("browser_id") or self.active_by_agent.get(key) + _run_id, agent_id = run_agent(req) + client_id = req.get("client_id") or agent_id + browser_id = req.get("browser_id") if not browser_id: - return {"ok": True, "ready": False, "state": "no-active-browser"} + return error("bad-request", "browser id is required; use browser_close(id)", ["browser_list"]) lease = self.leases.get(browser_id) if not lease: - self.active_by_agent.pop(key, None) - self._persist() - return {"ok": True, "ready": False, "state": "stale-binding"} - if lease.run_id != run_id or agent_id not in lease.allowed_agents: - return error("forbidden", "browser belongs to another run or agent", ["browser_list"]) - if agent_id in lease.allowed_agents: - lease.allowed_agents.remove(agent_id) - if lease.owner_agent_id == agent_id or not lease.allowed_agents: - cleanup = lease - self.leases.pop(browser_id, None) - self.active_by_agent = {k: v for k, v in self.active_by_agent.items() if v != browser_id} - self._persist() - resp = {"ok": True, "ready": False, "state": "closed", "browser_id": browser_id} - else: - self.active_by_agent.pop(key, None) - self._persist() - resp = {"ok": True, "ready": False, "state": "released", "browser_id": browser_id} + return {"ok": True, "ready": False, "state": "not-found", "id": browser_id} + active = lease.active_execution or {} + if active and active.get("client_id") != client_id: + return error("busy", "browser is currently active in another browser-harness process", ["wait"]) + cleanup = lease + self.leases.pop(browser_id, None) + self.active_by_agent = {k: v for k, v in self.active_by_agent.items() if v != browser_id} + self._persist() + resp = {"ok": True, "ready": False, "state": "closed", "id": browser_id} if cleanup is not None: cleanup_backend(cleanup) return resp def lock(self, req: dict) -> dict: with self._lock: - run_id, agent_id = run_agent(req) + _run_id, agent_id = run_agent(req) client_id = req.get("client_id") or agent_id browser_id = req.get("browser_id") if not browser_id: - return error("bad-request", "browser_id is required", ["browser_new", "browser_switch"]) + return error("bad-request", "browser id is required; call browser(id)", ["browser_new", "browser_list"]) lease = self.leases.get(browser_id) if not lease: return error("not-found", "browser id not found", ["browser_list", "browser_new"]) - if lease.run_id != run_id or agent_id not in lease.allowed_agents: - return error("forbidden", "browser belongs to another run or agent", ["browser_list", "browser_new"]) + if agent_id not in lease.allowed_agents: + lease.allowed_agents.append(agent_id) active = lease.active_execution or {} if active and active.get("client_id") != client_id: return error("busy", "browser is currently active in another browser-harness process", ["wait", "browser_new"]) @@ -268,8 +259,8 @@ def unlock(self, req: dict) -> dict: def _allocate_lease(self, run_id: str, agent_id: str, backend: str, profile_kind: str) -> BrowserLease: self.next_seq += 1 - short = f"{int(time.time() * 1000):x}{self.next_seq:x}" - browser_id = f"br_{short}" + browser_id = self._new_browser_id() + short = f"{int(time.time() * 1000):x}{self.next_seq:x}{browser_id}" bu_name = f"bh_{short[-16:]}" base = self.root / "leases" / browser_id runtime_dir = base / "r" @@ -294,6 +285,14 @@ def _allocate_lease(self, run_id: str, agent_id: str, backend: str, profile_kind allowed_agents=[agent_id], ) + def _new_browser_id(self) -> str: + alphabet = "abcdefghijklmnopqrstuvwxyz0123456789" + for _ in range(100): + browser_id = "".join(secrets.choice(alphabet) for _ in range(6)) + if browser_id not in RESERVED_BROWSER_IDS and browser_id not in self.leases: + return browser_id + return secrets.token_hex(8) + def start_cloud_backend(lease: BrowserLease, proxy_country: str | None): auth.get_browser_use_api_key() @@ -304,6 +303,13 @@ def start_cloud_backend(lease: BrowserLease, proxy_country: str | None): lease.cloud_browser_id = browser["id"] lease.cloud_live_url = browser.get("liveUrl") lease.cdp_url = browser["cdpUrl"] + if not lease.cloud_live_url: + try: + browser = _browser_use(f"/browsers/{lease.cloud_browser_id}", "GET") + lease.cloud_live_url = browser.get("liveUrl") + lease.cdp_url = browser.get("cdpUrl") or lease.cdp_url + except Exception: + pass try: start_harness_daemon(lease) except BaseException: @@ -475,10 +481,12 @@ def ready_public(lease: BrowserLease) -> dict: "ok": True, "ready": True, "state": "ready", - "browser_id": lease.browser_id, - "backend": lease.backend, + "id": lease.browser_id, + "backend": public_backend(lease), "shared": len(lease.allowed_agents) > 1, } + if lease.cloud_browser_id: + state["cloud_browser_id"] = lease.cloud_browser_id if lease.cloud_live_url: state["live_url"] = lease.cloud_live_url return state @@ -488,6 +496,10 @@ def ready_response(lease: BrowserLease) -> dict: return {**ready_public(lease), "binding": lease.binding()} +def public_backend(lease: BrowserLease) -> str: + return "private" if lease.backend == "managed" else lease.backend + + def error(state: str, reason: str, safe_actions: list[str]) -> dict: return {"ok": False, "ready": False, "state": state, "reason": reason, "safe_actions": safe_actions} diff --git a/src/browser_harness/manager_helpers.py b/src/browser_harness/manager_helpers.py index 0e4bca02..e1aa1053 100644 --- a/src/browser_harness/manager_helpers.py +++ b/src/browser_harness/manager_helpers.py @@ -6,9 +6,9 @@ from . import manager_client -def browser_status(): - """Return lifecycle state for the active browser binding.""" - return manager_client.status() +def browser_status(browser_id=None): + """Return lifecycle state for a browser id, or manager guidance if omitted.""" + return manager_client.status(browser_id) def browser_profiles(verbose=False): @@ -31,21 +31,18 @@ def _manager_backend(kind, backend=None): def browser_new(kind="private", *, backend=None, profile="clean", proxy_country=None, reason=None): - """Create a browser, switch this agent to it, and return concise state.""" + """Create a managed browser and return its short id.""" resp = manager_client.new_browser( backend=_manager_backend(kind, backend), profile=profile, proxy_country=proxy_country, reason=reason, ) - binding = manager_client.binding_from_response(resp) - manager_client.acquire_execution_for_binding(binding) - context.activate_binding(binding) return manager_client.public_state(resp) -def browser_switch(browser_id): - """Switch this agent/process to an existing allowed browser id.""" +def browser(browser_id): + """Select a managed browser id for this Python script.""" resp = manager_client.switch_browser(browser_id) binding = manager_client.binding_from_response(resp) manager_client.acquire_execution_for_binding(binding) @@ -53,15 +50,22 @@ def browser_switch(browser_id): return manager_client.public_state(resp) +def browser_switch(browser_id): + """Compatibility alias for browser(id).""" + return browser(browser_id) + + def browser_list(): - """List concise browser ids visible to this run/agent.""" + """List concise browser ids known to the manager.""" return manager_client.list_browsers() def browser_close(browser_id=None): - """Close private browsers or release this agent's access to shared browsers.""" + """Close a browser by explicit id.""" + if not browser_id: + raise ValueError("browser_close(id) requires a browser id") active = context.get_active_binding() - closing_active = browser_id is None or (active and active.browser_id == browser_id) + closing_active = active and active.browser_id == browser_id if closing_active: manager_client.release_active_execution_lock() resp = manager_client.close_browser(browser_id) diff --git a/src/browser_harness/run.py b/src/browser_harness/run.py index 0872ee89..9df43eb2 100644 --- a/src/browser_harness/run.py +++ b/src/browser_harness/run.py @@ -35,12 +35,14 @@ Typical usage: browser-harness <<'PY' + browser("abc123") ensure_real_tab() print(page_info()) PY Helpers are pre-imported. The daemon auto-starts and connects to the running browser. -For local Chrome, first choose a stable profile id with browser_profiles() and browser_use_profile(id). +Create a browser with browser_new("private") or browser_new("cloud"), then select it with browser(id). +For local Chrome setup, first choose a stable profile id with browser_profiles() and browser_use_profile(id). Commands: browser-harness --version print the installed version @@ -57,11 +59,17 @@ USAGE = """Usage: browser-harness <<'PY' + browser("abc123") print(page_info()) PY + + browser-harness <<'PY' + print(browser_new("private")) + PY """ _MANAGER_HELPER_NAMES = ( + "browser", "browser_status", "browser_new", "browser_switch", diff --git a/tests/unit/test_context.py b/tests/unit/test_context.py index 4ea371b0..144324be 100644 --- a/tests/unit/test_context.py +++ b/tests/unit/test_context.py @@ -1,4 +1,5 @@ from pathlib import Path +import pytest from browser_harness import context, helpers @@ -73,3 +74,16 @@ def test_agent_identity_uses_codex_thread_fallback(monkeypatch): assert ident.run_id == "thread-123" assert ident.agent_id == "main" assert ident.degraded is False + + +def test_require_active_binding_explains_browser_selector(): + old = context.get_active_binding() + context.clear_active_binding() + try: + with pytest.raises(RuntimeError, match='call browser\\(""\\)'): + context.require_active_binding() + finally: + if old is not None: + context.activate_binding(old) + else: + context.clear_active_binding() diff --git a/tests/unit/test_manager_daemon.py b/tests/unit/test_manager_daemon.py index e2504f44..bfb73cc3 100644 --- a/tests/unit/test_manager_daemon.py +++ b/tests/unit/test_manager_daemon.py @@ -18,7 +18,6 @@ def _manager_with_lease(tmp_path): manager = Manager(tmp_path) lease = manager._allocate_lease("run-1", "agent-1", "cloud", "clean") manager.leases[lease.browser_id] = lease - manager.active_by_agent["run-1/agent-1"] = lease.browser_id return manager, lease @@ -76,21 +75,50 @@ def test_unlock_requires_same_client_process(tmp_path): assert second["state"] == "busy" -def test_close_rejects_other_runs(tmp_path): +def test_close_requires_explicit_id(tmp_path): manager, lease = _manager_with_lease(tmp_path) resp = manager.handle({ "op": "close", - "run_id": "other-run", + "run_id": "run-1", + "agent_id": "agent-1", + }) + + assert resp["ok"] is False + assert resp["state"] == "bad-request" + assert lease.browser_id in manager.leases + + +def test_close_rejects_browser_busy_in_another_client(tmp_path): + manager, lease = _manager_with_lease(tmp_path) + manager.handle({ + "op": "lock", + "run_id": "run-1", "agent_id": "agent-1", + "client_id": "client-1", + "browser_id": lease.browser_id, + }) + + resp = manager.handle({ + "op": "close", + "run_id": "run-1", + "agent_id": "agent-1", + "client_id": "client-2", "browser_id": lease.browser_id, }) assert resp["ok"] is False - assert resp["state"] == "forbidden" + assert resp["state"] == "busy" assert lease.browser_id in manager.leases +def test_short_browser_ids_have_no_prefix(tmp_path): + manager, lease = _manager_with_lease(tmp_path) + + assert len(lease.browser_id) == 6 + assert not lease.browser_id.startswith("br_") + + def test_cloud_live_url_is_exposed_in_ready_state(tmp_path): manager, lease = _manager_with_lease(tmp_path) lease.cloud_live_url = "https://live.example/session" @@ -99,14 +127,33 @@ def test_cloud_live_url_is_exposed_in_ready_state(tmp_path): "op": "status", "run_id": "run-1", "agent_id": "agent-1", + "browser_id": lease.browser_id, }) assert resp["ok"] is True + assert resp["id"] == lease.browser_id assert resp["live_url"] == "https://live.example/session" +def test_cloud_browser_id_is_exposed_in_ready_state(tmp_path): + manager, lease = _manager_with_lease(tmp_path) + lease.cloud_browser_id = "browser-123" + + resp = manager.handle({ + "op": "status", + "run_id": "run-1", + "agent_id": "agent-1", + "browser_id": lease.browser_id, + }) + + assert resp["ok"] is True + assert resp["id"] == lease.browser_id + assert resp["cloud_browser_id"] == "browser-123" + + def test_cloud_live_url_is_exposed_in_browser_list(tmp_path): manager, lease = _manager_with_lease(tmp_path) + lease.cloud_browser_id = "browser-123" lease.cloud_live_url = "https://live.example/session" resp = manager.handle({ @@ -118,16 +165,45 @@ def test_cloud_live_url_is_exposed_in_browser_list(tmp_path): assert resp["ok"] is True assert resp["browsers"] == [ { - "browser_id": lease.browser_id, + "id": lease.browser_id, "backend": "cloud", + "owner": "agent-1", "owned_by_this_agent": True, "shared": False, "state": "ready", + "cloud_browser_id": "browser-123", "live_url": "https://live.example/session", } ] +def test_start_cloud_backend_fetches_missing_live_url(monkeypatch, tmp_path): + lease = Manager(tmp_path)._allocate_lease("run-1", "agent-1", "cloud", "clean") + calls = [] + + def fake_browser_use(path, method, body=None): + calls.append((path, method, body)) + if (path, method) == ("/browsers", "POST"): + return {"id": "browser-123", "cdpUrl": "https://cdp.initial"} + if (path, method) == ("/browsers/browser-123", "GET"): + return {"id": "browser-123", "cdpUrl": "https://cdp.refreshed", "liveUrl": "https://live.example/session"} + raise AssertionError((path, method, body)) + + monkeypatch.setattr(manager_daemon.auth, "get_browser_use_api_key", lambda: "stored-key") + monkeypatch.setattr(manager_daemon, "_browser_use", fake_browser_use) + monkeypatch.setattr(manager_daemon, "start_harness_daemon", lambda lease: None) + + manager_daemon.start_cloud_backend(lease, proxy_country=None) + + assert calls == [ + ("/browsers", "POST", {}), + ("/browsers/browser-123", "GET", None), + ] + assert lease.cloud_browser_id == "browser-123" + assert lease.cloud_live_url == "https://live.example/session" + assert lease.cdp_url == "https://cdp.refreshed" + + def test_cloud_new_reports_auth_required(monkeypatch, tmp_path): manager = Manager(tmp_path) monkeypatch.setattr( diff --git a/tests/unit/test_manager_helpers.py b/tests/unit/test_manager_helpers.py index 8aced5c3..0e47c4da 100644 --- a/tests/unit/test_manager_helpers.py +++ b/tests/unit/test_manager_helpers.py @@ -8,11 +8,11 @@ def _manager_response(tmp_path): "ok": True, "ready": True, "state": "ready", - "browser_id": "br_123", - "backend": "managed", + "id": "abc123", + "backend": "private", "shared": False, "binding": { - "browser_id": "br_123", + "browser_id": "abc123", "bu_name": "bh_123", "runtime_dir": str(tmp_path / "r"), "tmp_dir": str(tmp_path / "t"), @@ -24,7 +24,7 @@ def _manager_response(tmp_path): } -def test_browser_new_activates_binding_and_acquires_lock(monkeypatch, tmp_path): +def test_browser_new_creates_without_activating_binding(monkeypatch, tmp_path): acquired = [] old = context.get_active_binding() try: @@ -43,11 +43,10 @@ def test_browser_new_activates_binding_and_acquires_lock(monkeypatch, tmp_path): else: context.clear_active_binding() - assert state["browser_id"] == "br_123" + assert state["id"] == "abc123" assert "binding" not in state - assert binding is not None - assert binding.bu_name == "bh_123" - assert acquired == ["br_123"] + assert binding == old + assert acquired == [] def test_browser_new_private_maps_to_managed_backend(monkeypatch, tmp_path): @@ -59,8 +58,6 @@ def test_browser_new_private_maps_to_managed_backend(monkeypatch, tmp_path): "new_browser", lambda *args, **kwargs: calls.append((args, kwargs)) or _manager_response(tmp_path), ) - monkeypatch.setattr(manager_helpers.manager_client, "acquire_execution_for_binding", lambda binding: None) - manager_helpers.browser_new("private", reason="test") finally: if old is not None: @@ -80,8 +77,6 @@ def test_browser_new_cloud_maps_to_cloud_backend(monkeypatch, tmp_path): "new_browser", lambda *args, **kwargs: calls.append((args, kwargs)) or _manager_response(tmp_path), ) - monkeypatch.setattr(manager_helpers.manager_client, "acquire_execution_for_binding", lambda binding: None) - manager_helpers.browser_new("cloud") finally: if old is not None: @@ -118,10 +113,44 @@ def test_browser_use_profile_returns_selected_profile(monkeypatch): } -def test_browser_switch_does_not_activate_binding_when_lock_fails(monkeypatch, tmp_path): +def test_browser_select_activates_binding_and_acquires_lock(monkeypatch, tmp_path): + acquired = [] + old = context.get_active_binding() + try: + monkeypatch.setattr(manager_helpers.manager_client, "switch_browser", lambda browser_id: _manager_response(tmp_path)) + monkeypatch.setattr( + manager_helpers.manager_client, + "acquire_execution_for_binding", + lambda binding: acquired.append(binding.browser_id), + ) + + state = manager_helpers.browser("abc123") + binding = context.get_active_binding() + finally: + if old is not None: + context.activate_binding(old) + else: + context.clear_active_binding() + + assert state["id"] == "abc123" + assert "binding" not in state + assert binding is not None + assert binding.bu_name == "bh_123" + assert acquired == ["abc123"] + + +def test_browser_switch_aliases_browser(monkeypatch): + calls = [] + monkeypatch.setattr(manager_helpers, "browser", lambda browser_id: calls.append(browser_id) or {"id": browser_id}) + + assert manager_helpers.browser_switch("abc123") == {"id": "abc123"} + assert calls == ["abc123"] + + +def test_browser_does_not_activate_binding_when_lock_fails(monkeypatch, tmp_path): old = context.get_active_binding() previous = context.BrowserBinding( - browser_id="br_old", + browser_id="old123", bu_name="bh_old", runtime_dir=tmp_path / "old-r", tmp_dir=tmp_path / "old-t", @@ -142,7 +171,7 @@ def test_browser_switch_does_not_activate_binding_when_lock_fails(monkeypatch, t ) with pytest.raises(manager_helpers.manager_client.ManagerError, match="currently active"): - manager_helpers.browser_switch("br_123") + manager_helpers.browser("abc123") active = context.get_active_binding() finally: if old is not None: @@ -158,7 +187,7 @@ def test_browser_close_releases_lock_and_clears_active_binding(monkeypatch, tmp_ closed = [] old = context.get_active_binding() context.activate_binding(context.BrowserBinding( - browser_id="br_123", + browser_id="abc123", bu_name="bh_123", runtime_dir=tmp_path / "r", tmp_dir=tmp_path / "t", @@ -169,10 +198,10 @@ def test_browser_close_releases_lock_and_clears_active_binding(monkeypatch, tmp_ monkeypatch.setattr( manager_helpers.manager_client, "close_browser", - lambda browser_id=None: closed.append(browser_id) or {"ok": True, "state": "closed", "browser_id": "br_123"}, + lambda browser_id=None: closed.append(browser_id) or {"ok": True, "state": "closed", "id": "abc123"}, ) - state = manager_helpers.browser_close() + state = manager_helpers.browser_close("abc123") active = context.get_active_binding() finally: if old is not None: @@ -180,7 +209,12 @@ def test_browser_close_releases_lock_and_clears_active_binding(monkeypatch, tmp_ else: context.clear_active_binding() - assert state == {"state": "closed", "browser_id": "br_123"} + assert state == {"state": "closed", "id": "abc123"} assert released == [True] - assert closed == [None] + assert closed == ["abc123"] assert active is None + + +def test_browser_close_requires_explicit_id(): + with pytest.raises(ValueError, match="browser_close\\(id\\)"): + manager_helpers.browser_close() diff --git a/tests/unit/test_run_manager_mode.py b/tests/unit/test_run_manager_mode.py index 29b0db16..f4629bd4 100644 --- a/tests/unit/test_run_manager_mode.py +++ b/tests/unit/test_run_manager_mode.py @@ -42,6 +42,27 @@ def test_manager_helper_call_enables_manager_mode_without_env(monkeypatch): assert "BH_MANAGER_MODE" in os.environ +def test_browser_selector_call_enables_manager_mode(monkeypatch): + monkeypatch.delenv("BH_MANAGER_SOCKET", raising=False) + monkeypatch.delenv("BH_MANAGER_MODE", raising=False) + stdout = StringIO() + fake_stdin = StringIO("print(browser('abc123'))") + switched = [] + + with patch.object(sys, "argv", ["browser-harness"]), \ + patch("sys.stdin", fake_stdin), \ + patch("sys.stdout", stdout), \ + patch("browser_harness.run.print_update_banner"), \ + patch("browser_harness.run.ensure_daemon") as ensure_daemon, \ + patch("browser_harness.run.browser", lambda browser_id: switched.append(browser_id) or {"id": browser_id}), \ + patch("browser_harness.run.manager_client.release_active_execution_lock"): + run.main() + + ensure_daemon.assert_not_called() + assert switched == ["abc123"] + assert stdout.getvalue().strip() == "{'id': 'abc123'}" + + def test_browser_profiles_runs_without_daemon(monkeypatch): stdout = StringIO() fake_stdin = StringIO("print(browser_profiles())") From 2571d25ccbe8d004408e0f9492b6eb4d2df53a0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gregor=20=C5=BDuni=C4=8D?= <36313686+gregpr07@users.noreply.github.com> Date: Thu, 18 Jun 2026 11:48:26 -0700 Subject: [PATCH 07/15] Add repo-local browser-harness dev launcher --- .gitignore | 1 + README.md | 14 ++++++++++++++ browser-harness | 19 +++++++++++++++++++ 3 files changed, 34 insertions(+) create mode 100755 browser-harness diff --git a/.gitignore b/.gitignore index 59e4f318..ecc4cba6 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ __pycache__/ *.pyc *.log .env +.browser-harness-dev/ uv.lock *.egg-info/ .idea/ diff --git a/README.md b/README.md index a8aaaf55..1a1067c7 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,20 @@ Stealth, sub-agents, or headless deployment.
Plain `browser-harness` helper calls use the selected local browser profile. For isolated or cloud work, start with `browser_new("private")` or `browser_new("cloud")`, keep the returned short `id`, and call `browser(id)` before page helpers in each script; cloud responses include a `live_url` preview when available. +## Development + +From a checkout, use `./browser-harness` to run the current working tree without activating a virtualenv or depending on the globally installed command: + +```bash +./browser-harness <<'PY' +print(browser_new("private")) +PY +``` + +Normal agent-facing docs should keep using `browser-harness`; the `./browser-harness` launcher is only for local repo testing. + +The dev launcher uses a short checkout-specific manager path under `/tmp`, so it does not attach to a stale global manager or another task's default manager. + ## Contributing PRs and improvements welcome. The best way to help: **contribute a new domain skill** under [agent-workspace/domain-skills/](agent-workspace/domain-skills/) for a site or task you use often (LinkedIn outreach, ordering on Amazon, filing expenses, etc.). Each skill teaches the agent the selectors, flows, and edge cases it would otherwise have to rediscover. diff --git a/browser-harness b/browser-harness new file mode 100755 index 00000000..4dac490e --- /dev/null +++ b/browser-harness @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +DEV_ID="$(printf '%s' "$ROOT" | cksum | awk '{print $1}')" + +export BH_MANAGER_ROOT="${BH_MANAGER_ROOT:-/tmp/bhm-dev-$DEV_ID}" +export BH_MANAGER_SOCKET="${BH_MANAGER_SOCKET:-$BH_MANAGER_ROOT/manager.sock}" + +if [ -x "$ROOT/.venv/bin/python" ]; then + exec "$ROOT/.venv/bin/python" -m browser_harness.run "$@" +fi + +if command -v uv >/dev/null 2>&1; then + exec uv --directory "$ROOT" run python -m browser_harness.run "$@" +fi + +export PYTHONPATH="$ROOT/src${PYTHONPATH:+:$PYTHONPATH}" +exec python3 -m browser_harness.run "$@" From 2e8613e16215f81975b250541c5d7ad38b74c9ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gregor=20=C5=BDuni=C4=8D?= <36313686+gregpr07@users.noreply.github.com> Date: Thu, 18 Jun 2026 16:01:02 -0700 Subject: [PATCH 08/15] Simplify shared browser manager semantics --- SKILL.md | 6 +- skills/browser-harness/SKILL.md | 110 +------------------------ src/browser_harness/context.py | 8 +- src/browser_harness/manager_client.py | 46 +---------- src/browser_harness/manager_daemon.py | 83 ++++++++----------- src/browser_harness/manager_helpers.py | 12 ++- src/browser_harness/run.py | 8 +- tests/unit/test_manager_daemon.py | 96 ++++++++++++++------- tests/unit/test_manager_helpers.py | 75 ++++++----------- tests/unit/test_run_manager_mode.py | 14 +--- tests/unit/test_skill_docs.py | 9 ++ 11 files changed, 159 insertions(+), 308 deletions(-) mode change 100644 => 120000 skills/browser-harness/SKILL.md create mode 100644 tests/unit/test_skill_docs.py diff --git a/SKILL.md b/SKILL.md index 296f8b0a..d6a33384 100644 --- a/SKILL.md +++ b/SKILL.md @@ -28,7 +28,7 @@ print(page_info()) PY ``` -`browser(id)` selects a browser for this script only. Do not rely on a current browser across separate shell commands. +`browser(id)` selects a browser for this script only. Do not rely on a current browser across separate shell commands. Sharing an id means sharing that browser's tabs, cookies, downloads, and session state. Inspect managed browsers: @@ -39,7 +39,7 @@ print(browser_status("abc123")) PY ``` -`browser_list()` shows `state: "busy"` while a script is actively using that browser, including the current script. +`browser_list()` shows known managed browser ids and their owners. ## Choose Browser @@ -49,6 +49,7 @@ PY - Managed browser page work: call `browser(id)` first in the script. - Subagent: if the parent gives an id, start browser scripts with `browser(id)` and do not close it unless asked. - Done with a private or cloud browser: `browser_close(id)`. +- Done with all browsers you created: `browser_close_owned()`. ## Browser Helpers @@ -61,6 +62,7 @@ browser_new("cloud") browser(id) browser_list() browser_close(id) +browser_close_owned() ``` `browser_profiles()` and `browser_use_profile(...)` are local setup calls. They do not start browser work. diff --git a/skills/browser-harness/SKILL.md b/skills/browser-harness/SKILL.md deleted file mode 100644 index 292ee2f1..00000000 --- a/skills/browser-harness/SKILL.md +++ /dev/null @@ -1,109 +0,0 @@ ---- -name: browser-harness -description: Direct browser control via CDP — automate, scrape, test, or interact with web pages by driving the user's already-running Chrome (or a Browser Use cloud browser). Use when the user wants to click, screenshot, fill forms, extract data, or navigate real web pages. Default to screenshots + coordinate clicks, not selector hunting. Requires the one-time `browser-harness` CLI install (see references/install.md). ---- - -# browser-harness - -Direct browser control via CDP. You drive a selected browser with Python helpers run through the `browser-harness` command. - -## Prerequisite (one-time — NOT part of the AI workflow) - -This skill is instructions only. It assumes the `browser-harness` command is already on `$PATH`. If `command -v browser-harness` fails, do the one-time install in [references/install.md](references/install.md) first, then continue. Installation and browser-connection setup are a prerequisite; once `browser-harness <<'PY' … PY` prints page info, never run install/connection steps again as part of normal work. - -## Usage - -Managed browsers have short explicit ids. Create or receive an id, then select it inside each script. - -Create and use a private browser: - -```bash -browser-harness <<'PY' -b = browser_new("private") -browser(b["id"]) -new_tab("https://docs.browser-use.com") -wait_for_load() -print({"id": b["id"], "page": page_info()}) -PY -``` - -Use an existing managed browser: - -```bash -browser-harness <<'PY' -browser("abc123") -print(page_info()) -PY -``` - -Inspect managed browsers: - -```bash -browser-harness <<'PY' -print(browser_list()) -print(browser_status("abc123")) -PY -``` - -- `browser(id)` selects a browser for this script only. Do not rely on a current browser across separate shell commands. -- `browser_list()` shows `state: "busy"` while a script is actively using that browser, including the current script. -- Invoke as `browser-harness` — it's on `$PATH` after install. No `cd`, no `uv run`. -- Use the heredoc form for every multi-line command. It prevents shell quote mangling inside Python strings and JavaScript snippets. -- First navigation is `new_tab(url)`, not `goto_url(url)` — goto runs in the user's active tab and clobbers their work. -- Helpers are pre-imported and the daemon auto-starts; you never start/stop it manually unless you want to. - -## Choose Browser - -- User's logged-in local Chrome: use normal helpers. If setup asks for a profile, run `browser_profiles()`, ask the user which `id` to use, then run `browser_use_profile(id)` and retry. -- Isolated local browser: `browser_new("private")`, then keep the returned `id`. -- Browser Use cloud browser with live view: `browser_new("cloud")`, then keep the returned `id`. -- Managed browser page work: call `browser(id)` first in the script. -- Subagent: if the parent gives an id, start browser scripts with `browser(id)` and do not close it unless asked. -- Done with a private or cloud browser: `browser_close(id)`. - -## Browser Helpers - -```python -browser_status(id) -browser_profiles() -browser_use_profile(profile_id) -browser_new("private") -browser_new("cloud") -browser(id) -browser_list() -browser_close(id) -``` - -If `browser_new("cloud")` reports `cloud-auth-required`, run `browser-harness auth login`. - -## What actually works - -- **Screenshots first.** `capture_screenshot()` to understand the page, find visible targets, and decide whether you need a click, a selector, or more navigation. -- **Clicking.** `capture_screenshot()` → read the pixel off the image → `click_at_xy(x, y)` → `capture_screenshot()` to verify. Suppress the Playwright-habit reflex of "locate first, then click" — no `getBoundingClientRect`, no selector hunt. Drop to DOM only when the target has no visible geometry. Hit-testing happens in Chrome's browser process, so clicks pass through iframes / shadow DOM / cross-origin without extra work. -- **Bulk HTTP.** `http_get(url)` + `ThreadPoolExecutor`. No browser needed for static pages. -- **After goto:** `wait_for_load()`. -- **Wrong/stale tab:** `ensure_real_tab()`. -- **Verification:** `print(page_info())` is the simplest "is this alive?" check; screenshots are the default way to verify whether a visible action worked. -- **DOM reads:** use `js(...)` for inspection/extraction when a screenshot shows coordinates are the wrong tool. -- **Auth wall:** redirected to login → stop and ask the user. Don't type credentials from screenshots. -- **Raw CDP** for anything helpers don't cover: `cdp("Domain.method", params)`. - -After every meaningful action, re-screenshot before assuming it worked. - -## Interaction skills (progressive disclosure) - -If you struggle with a specific UI mechanic, read the matching file under `${CLAUDE_PLUGIN_ROOT}/interaction-skills/` before inventing an approach. Available: browser-wall, connection, cookies, cross-origin-iframes, dialogs, downloads, drag-and-drop, dropdowns, iframes, network-requests, print-as-pdf, profile-sync, screenshots, scrolling, shadow-dom, tabs, uploads, viewport. - -## Task-specific edits - -For task-specific helper additions, edit `${CLAUDE_PLUGIN_ROOT}/agent-workspace/agent_helpers.py`. Keep core helpers short. - -## Domain skills (opt-in) - -Community per-site playbooks live in `${CLAUDE_PLUGIN_ROOT}/agent-workspace/domain-skills//` and are **off by default**. Set `BH_DOMAIN_SKILLS=1` to enable them; when enabled and the task is site-specific, read every file in the matching `/` directory before inventing an approach. - -## Design constraints - -- Coordinate clicks default. `Input.dispatchMouseEvent` goes through iframes/shadow/cross-origin at the compositor level. -- Connect to the user's running Chrome. Don't launch your own browser. -- Prefer compositor-level actions (screenshots, coordinate clicks, raw key input) over framework/DOM hacks. Reach for `interaction-skills/` only when those are the wrong tool. diff --git a/skills/browser-harness/SKILL.md b/skills/browser-harness/SKILL.md new file mode 120000 index 00000000..4215faef --- /dev/null +++ b/skills/browser-harness/SKILL.md @@ -0,0 +1 @@ +../../SKILL.md \ No newline at end of file diff --git a/src/browser_harness/context.py b/src/browser_harness/context.py index a4df2a17..79813ce9 100644 --- a/src/browser_harness/context.py +++ b/src/browser_harness/context.py @@ -18,7 +18,6 @@ class AgentIdentity: run_id: str | None agent_id: str | None - parent_agent_id: str | None = None @property def degraded(self) -> bool: @@ -28,7 +27,6 @@ def payload(self) -> dict: return { "run_id": self.run_id, "agent_id": self.agent_id, - "parent_agent_id": self.parent_agent_id, "identity_degraded": self.degraded, } @@ -93,11 +91,7 @@ def agent_identity() -> AgentIdentity: or os.environ.get("CODEX_SUBAGENT_ID") or "main" ) - return AgentIdentity( - run_id=run_id, - agent_id=agent_id, - parent_agent_id=os.environ.get("BH_PARENT_AGENT_ID") or os.environ.get("CODEX_PARENT_AGENT_ID"), - ) + return AgentIdentity(run_id=run_id, agent_id=agent_id) def _cwd_run_id() -> str: diff --git a/src/browser_harness/manager_client.py b/src/browser_harness/manager_client.py index d8b7fa30..026ecee3 100644 --- a/src/browser_harness/manager_client.py +++ b/src/browser_harness/manager_client.py @@ -11,7 +11,6 @@ import sys import tempfile import time -from dataclasses import dataclass from . import context @@ -23,13 +22,6 @@ def __init__(self, response): super().__init__(reason) -@dataclass -class ExecutionLock: - browser_id: str - lock_id: str - - -_active_lock: ExecutionLock | None = None _manager_started = False _CLIENT_ID = f"{os.getpid()}_{secrets.token_hex(4)}" @@ -184,39 +176,5 @@ def close_browser(browser_id: str | None = None) -> dict: return request("close", browser_id=browser_id) -def acquire_execution(browser_id: str) -> ExecutionLock: - resp = request("lock", browser_id=browser_id) - lock_id = resp.get("lock_id") - if not lock_id: - raise ManagerError({"state": "bad-response", "reason": "manager lock response missing lock_id"}) - return ExecutionLock(browser_id=browser_id, lock_id=lock_id) - - -def release_execution(lock: ExecutionLock) -> None: - try: - request("unlock", browser_id=lock.browser_id, lock_id=lock.lock_id) - except ManagerError: - # Process shutdown should not mask the task's real exception. - pass - - -def release_active_execution_lock() -> None: - global _active_lock - lock = _active_lock - _active_lock = None - if lock is not None: - release_execution(lock) - - -def acquire_execution_for_binding(binding: context.BrowserBinding) -> None: - global _active_lock - if not binding.browser_id: - return - if _active_lock and _active_lock.browser_id == binding.browser_id: - return - release_active_execution_lock() - _active_lock = acquire_execution(binding.browser_id) - - -def active_lock() -> ExecutionLock | None: - return _active_lock +def close_owned_browsers() -> dict: + return request("close_owned") diff --git a/src/browser_harness/manager_daemon.py b/src/browser_harness/manager_daemon.py index eafbe773..9a617262 100644 --- a/src/browser_harness/manager_daemon.py +++ b/src/browser_harness/manager_daemon.py @@ -49,14 +49,13 @@ class BrowserLease: local_debug_port: int | None = None cloud_browser_id: str | None = None cloud_live_url: str | None = None - allowed_agents: list[str] = field(default_factory=list) - active_execution: dict | None = None created_at_ms: int = field(default_factory=lambda: int(time.time() * 1000)) last_used_at_ms: int = field(default_factory=lambda: int(time.time() * 1000)) @classmethod def from_json(cls, data: dict) -> "BrowserLease": - return cls(**data) + fields = cls.__dataclass_fields__ + return cls(**{key: value for key, value in data.items() if key in fields}) def binding(self) -> dict: return { @@ -77,7 +76,6 @@ def __init__(self, root: Path): self.root.mkdir(parents=True, exist_ok=True) self._lock = threading.RLock() self.leases: dict[str, BrowserLease] = {} - self.active_by_agent: dict[str, str] = {} self.next_seq = 0 self._load() @@ -87,16 +85,13 @@ def _load(self): except (FileNotFoundError, json.JSONDecodeError, OSError): return self.next_seq = int(data.get("next_seq") or 0) - self.active_by_agent = dict(data.get("active_by_agent") or {}) for item in data.get("leases") or []: lease = BrowserLease.from_json(item) - lease.active_execution = None self.leases[lease.browser_id] = lease def _persist(self): data = { "next_seq": self.next_seq, - "active_by_agent": self.active_by_agent, "leases": [asdict(v) for v in self.leases.values()], } tmp = self.root / "registry.json.tmp" @@ -116,6 +111,8 @@ def handle(self, req: dict) -> dict: return self.switch(req) if op == "close": return self.close(req) + if op == "close_owned": + return self.close_owned(req) if op == "lock": return self.lock(req) if op == "unlock": @@ -149,8 +146,7 @@ def list(self, req: dict) -> dict: "backend": public_backend(lease), "owner": lease.owner_agent_id, "owned_by_this_agent": lease.run_id == run_id and lease.owner_agent_id == agent_id, - "shared": len(lease.allowed_agents) > 1, - "state": "busy" if lease.active_execution else "ready", + "state": "ready", **({"cloud_browser_id": lease.cloud_browser_id} if lease.cloud_browser_id else {}), **({"live_url": lease.cloud_live_url} if lease.cloud_live_url else {}), }) @@ -188,8 +184,6 @@ def switch(self, req: dict) -> dict: lease = self.leases.get(browser_id) if not lease: return error("not-found", "browser id not found", ["browser_list", "browser_new"]) - if agent_id not in lease.allowed_agents: - lease.allowed_agents.append(agent_id) lease.last_used_at_ms = int(time.time() * 1000) self._persist() return ready_response(lease) @@ -197,64 +191,63 @@ def switch(self, req: dict) -> dict: def close(self, req: dict) -> dict: cleanup = None with self._lock: - _run_id, agent_id = run_agent(req) - client_id = req.get("client_id") or agent_id browser_id = req.get("browser_id") if not browser_id: return error("bad-request", "browser id is required; use browser_close(id)", ["browser_list"]) lease = self.leases.get(browser_id) if not lease: return {"ok": True, "ready": False, "state": "not-found", "id": browser_id} - active = lease.active_execution or {} - if active and active.get("client_id") != client_id: - return error("busy", "browser is currently active in another browser-harness process", ["wait"]) cleanup = lease self.leases.pop(browser_id, None) - self.active_by_agent = {k: v for k, v in self.active_by_agent.items() if v != browser_id} self._persist() resp = {"ok": True, "ready": False, "state": "closed", "id": browser_id} if cleanup is not None: cleanup_backend(cleanup) return resp + def close_owned(self, req: dict) -> dict: + cleanup = [] + with self._lock: + run_id, agent_id = run_agent(req) + owned_ids = [ + browser_id + for browser_id, lease in self.leases.items() + if lease.run_id == run_id and lease.owner_agent_id == agent_id + ] + for browser_id in owned_ids: + cleanup.append(self.leases.pop(browser_id)) + self._persist() + for lease in cleanup: + cleanup_backend(lease) + return { + "ok": True, + "ready": False, + "state": "closed-owned", + "closed": [lease.browser_id for lease in cleanup], + } + def lock(self, req: dict) -> dict: + """Compatibility no-op for old manager clients. + + Browser ids are explicit shared handles now. Selecting the same browser + from multiple processes is allowed; callers that still ask for a lock + get a stable success response without exclusive ownership. + """ with self._lock: - _run_id, agent_id = run_agent(req) - client_id = req.get("client_id") or agent_id browser_id = req.get("browser_id") if not browser_id: return error("bad-request", "browser id is required; call browser(id)", ["browser_new", "browser_list"]) lease = self.leases.get(browser_id) if not lease: return error("not-found", "browser id not found", ["browser_list", "browser_new"]) - if agent_id not in lease.allowed_agents: - lease.allowed_agents.append(agent_id) - active = lease.active_execution or {} - if active and active.get("client_id") != client_id: - return error("busy", "browser is currently active in another browser-harness process", ["wait", "browser_new"]) - if active and active.get("client_id") == client_id: - return {"ok": True, "state": "ready", "browser_id": browser_id, "lock_id": active["lock_id"]} - lock_id = f"lk_{int(time.time() * 1000):x}_{secrets.token_hex(4)}" - lease.active_execution = {"agent_id": agent_id, "client_id": client_id, "lock_id": lock_id} - self._persist() - return {"ok": True, "state": "ready", "browser_id": browser_id, "lock_id": lock_id} + return {"ok": True, "state": "ready", "browser_id": browser_id, "lock_id": req.get("lock_id") or "shared"} def unlock(self, req: dict) -> dict: with self._lock: - _run_id, agent_id = run_agent(req) - client_id = req.get("client_id") or agent_id browser_id = req.get("browser_id") lease = self.leases.get(browser_id or "") if not lease: return {"ok": True, "state": "not-found"} - active = lease.active_execution or {} - if ( - active.get("agent_id") == agent_id - and active.get("client_id") == client_id - and active.get("lock_id") == req.get("lock_id") - ): - lease.active_execution = None - self._persist() return {"ok": True, "state": "released", "browser_id": browser_id} def _allocate_lease(self, run_id: str, agent_id: str, backend: str, profile_kind: str) -> BrowserLease: @@ -282,7 +275,6 @@ def _allocate_lease(self, run_id: str, agent_id: str, backend: str, profile_kind download_dir=str(download_dir), artifact_dir=str(artifact_dir), profile_dir=str(profile_dir), - allowed_agents=[agent_id], ) def _new_browser_id(self) -> str: @@ -483,7 +475,6 @@ def ready_public(lease: BrowserLease) -> dict: "state": "ready", "id": lease.browser_id, "backend": public_backend(lease), - "shared": len(lease.allowed_agents) > 1, } if lease.cloud_browser_id: state["cloud_browser_id"] = lease.cloud_browser_id @@ -508,14 +499,6 @@ def run_agent(req: dict) -> tuple[str, str]: return sanitize(req.get("run_id") or "unknown-run"), sanitize(req.get("agent_id") or "unknown-agent") -def agent_key(req: dict) -> str: - return agent_key_parts(*run_agent(req)) - - -def agent_key_parts(run_id: str, agent_id: str) -> str: - return f"{run_id}/{agent_id}" - - def sanitize(value: str) -> str: out = "".join(c for c in str(value) if c.isalnum() or c in "_-")[:64] return out or "unknown" diff --git a/src/browser_harness/manager_helpers.py b/src/browser_harness/manager_helpers.py index e1aa1053..c6505b05 100644 --- a/src/browser_harness/manager_helpers.py +++ b/src/browser_harness/manager_helpers.py @@ -45,7 +45,6 @@ def browser(browser_id): """Select a managed browser id for this Python script.""" resp = manager_client.switch_browser(browser_id) binding = manager_client.binding_from_response(resp) - manager_client.acquire_execution_for_binding(binding) context.activate_binding(binding) return manager_client.public_state(resp) @@ -66,9 +65,16 @@ def browser_close(browser_id=None): raise ValueError("browser_close(id) requires a browser id") active = context.get_active_binding() closing_active = active and active.browser_id == browser_id - if closing_active: - manager_client.release_active_execution_lock() resp = manager_client.close_browser(browser_id) if closing_active: context.clear_active_binding() return manager_client.public_state(resp) + + +def browser_close_owned(): + """Close managed browsers created by this agent identity.""" + active = context.get_active_binding() + resp = manager_client.close_owned_browsers() + if active and active.browser_id in set(resp.get("closed") or []): + context.clear_active_binding() + return manager_client.public_state(resp) diff --git a/src/browser_harness/run.py b/src/browser_harness/run.py index 9df43eb2..4c0a467b 100644 --- a/src/browser_harness/run.py +++ b/src/browser_harness/run.py @@ -25,7 +25,7 @@ sync_local_profile, use_local_profile, ) -from . import auth, context, manager_client +from . import auth, context from .helpers import * from .manager_helpers import * @@ -75,6 +75,7 @@ "browser_switch", "browser_list", "browser_close", + "browser_close_owned", ) _NO_DAEMON_HELPER_NAMES = { @@ -209,10 +210,7 @@ def main(): browser_switch(os.environ["BH_BROWSER_ID"]) else: context.clear_active_binding() - try: - exec(code, globals()) - finally: - manager_client.release_active_execution_lock() + exec(code, globals()) return # Auto-bootstrap a cloud browser is opt-in via BU_AUTOSPAWN — BROWSER_USE_API_KEY alone diff --git a/tests/unit/test_manager_daemon.py b/tests/unit/test_manager_daemon.py index bfb73cc3..80ce8c14 100644 --- a/tests/unit/test_manager_daemon.py +++ b/tests/unit/test_manager_daemon.py @@ -21,18 +21,18 @@ def _manager_with_lease(tmp_path): return manager, lease -def test_lock_is_exclusive_across_client_processes(tmp_path): +def test_switch_allows_multiple_clients_to_select_same_browser(tmp_path): manager, lease = _manager_with_lease(tmp_path) first = manager.handle({ - "op": "lock", + "op": "switch", "run_id": "run-1", "agent_id": "agent-1", "client_id": "client-1", "browser_id": lease.browser_id, }) second = manager.handle({ - "op": "lock", + "op": "switch", "run_id": "run-1", "agent_id": "agent-1", "client_id": "client-2", @@ -40,11 +40,12 @@ def test_lock_is_exclusive_across_client_processes(tmp_path): }) assert first["ok"] is True - assert second["ok"] is False - assert second["state"] == "busy" + assert second["ok"] is True + assert first["id"] == lease.browser_id + assert second["id"] == lease.browser_id -def test_unlock_requires_same_client_process(tmp_path): +def test_lock_endpoint_is_compatibility_noop(tmp_path): manager, lease = _manager_with_lease(tmp_path) first = manager.handle({ "op": "lock", @@ -54,14 +55,6 @@ def test_unlock_requires_same_client_process(tmp_path): "browser_id": lease.browser_id, }) - wrong = manager.handle({ - "op": "unlock", - "run_id": "run-1", - "agent_id": "agent-1", - "client_id": "client-2", - "browser_id": lease.browser_id, - "lock_id": first["lock_id"], - }) second = manager.handle({ "op": "lock", "run_id": "run-1", @@ -70,9 +63,10 @@ def test_unlock_requires_same_client_process(tmp_path): "browser_id": lease.browser_id, }) - assert wrong["ok"] is True - assert second["ok"] is False - assert second["state"] == "busy" + assert first["ok"] is True + assert second["ok"] is True + assert first["lock_id"] == "shared" + assert second["lock_id"] == "shared" def test_close_requires_explicit_id(tmp_path): @@ -89,27 +83,51 @@ def test_close_requires_explicit_id(tmp_path): assert lease.browser_id in manager.leases -def test_close_rejects_browser_busy_in_another_client(tmp_path): +def test_close_removes_exact_browser_id(monkeypatch, tmp_path): manager, lease = _manager_with_lease(tmp_path) - manager.handle({ - "op": "lock", + cleaned = [] + monkeypatch.setattr(manager_daemon, "cleanup_backend", lambda lease: cleaned.append(lease.browser_id)) + + resp = manager.handle({ + "op": "close", "run_id": "run-1", "agent_id": "agent-1", - "client_id": "client-1", + "client_id": "client-2", "browser_id": lease.browser_id, }) + assert resp["ok"] is True + assert resp["state"] == "closed" + assert lease.browser_id not in manager.leases + assert cleaned == [lease.browser_id] + + +def test_close_owned_closes_only_current_owner_browsers(monkeypatch, tmp_path): + manager = Manager(tmp_path) + cleaned = [] + monkeypatch.setattr(manager_daemon, "cleanup_backend", lambda lease: cleaned.append(lease.browser_id)) + owned = manager._allocate_lease("run-1", "agent-1", "cloud", "clean") + other_agent = manager._allocate_lease("run-1", "agent-2", "cloud", "clean") + other_run = manager._allocate_lease("run-2", "agent-1", "cloud", "clean") + manager.leases = { + owned.browser_id: owned, + other_agent.browser_id: other_agent, + other_run.browser_id: other_run, + } + resp = manager.handle({ - "op": "close", + "op": "close_owned", "run_id": "run-1", "agent_id": "agent-1", - "client_id": "client-2", - "browser_id": lease.browser_id, }) - assert resp["ok"] is False - assert resp["state"] == "busy" - assert lease.browser_id in manager.leases + assert resp["ok"] is True + assert resp["state"] == "closed-owned" + assert resp["closed"] == [owned.browser_id] + assert owned.browser_id not in manager.leases + assert other_agent.browser_id in manager.leases + assert other_run.browser_id in manager.leases + assert cleaned == [owned.browser_id] def test_short_browser_ids_have_no_prefix(tmp_path): @@ -119,6 +137,29 @@ def test_short_browser_ids_have_no_prefix(tmp_path): assert not lease.browser_id.startswith("br_") +def test_lease_load_ignores_removed_hierarchy_fields(tmp_path): + payload = { + "browser_id": "abc123", + "run_id": "run-1", + "owner_agent_id": "agent-1", + "backend": "cloud", + "profile_kind": "clean", + "harness_daemon_name": "bh_123", + "runtime_dir": str(tmp_path / "r"), + "tmp_dir": str(tmp_path / "t"), + "download_dir": str(tmp_path / "downloads"), + "artifact_dir": str(tmp_path / "artifacts"), + "profile_dir": str(tmp_path / "profile"), + "allowed_agents": ["agent-1", "agent-2"], + "active_execution": {"client_id": "old-client"}, + } + + lease = manager_daemon.BrowserLease.from_json(payload) + + assert lease.browser_id == "abc123" + assert lease.owner_agent_id == "agent-1" + + def test_cloud_live_url_is_exposed_in_ready_state(tmp_path): manager, lease = _manager_with_lease(tmp_path) lease.cloud_live_url = "https://live.example/session" @@ -169,7 +210,6 @@ def test_cloud_live_url_is_exposed_in_browser_list(tmp_path): "backend": "cloud", "owner": "agent-1", "owned_by_this_agent": True, - "shared": False, "state": "ready", "cloud_browser_id": "browser-123", "live_url": "https://live.example/session", diff --git a/tests/unit/test_manager_helpers.py b/tests/unit/test_manager_helpers.py index 0e47c4da..58b8c163 100644 --- a/tests/unit/test_manager_helpers.py +++ b/tests/unit/test_manager_helpers.py @@ -10,7 +10,6 @@ def _manager_response(tmp_path): "state": "ready", "id": "abc123", "backend": "private", - "shared": False, "binding": { "browser_id": "abc123", "bu_name": "bh_123", @@ -25,15 +24,9 @@ def _manager_response(tmp_path): def test_browser_new_creates_without_activating_binding(monkeypatch, tmp_path): - acquired = [] old = context.get_active_binding() try: monkeypatch.setattr(manager_helpers.manager_client, "new_browser", lambda *args, **kwargs: _manager_response(tmp_path)) - monkeypatch.setattr( - manager_helpers.manager_client, - "acquire_execution_for_binding", - lambda binding: acquired.append(binding.browser_id), - ) state = manager_helpers.browser_new(backend="managed", reason="test") binding = context.get_active_binding() @@ -46,7 +39,6 @@ def test_browser_new_creates_without_activating_binding(monkeypatch, tmp_path): assert state["id"] == "abc123" assert "binding" not in state assert binding == old - assert acquired == [] def test_browser_new_private_maps_to_managed_backend(monkeypatch, tmp_path): @@ -113,16 +105,10 @@ def test_browser_use_profile_returns_selected_profile(monkeypatch): } -def test_browser_select_activates_binding_and_acquires_lock(monkeypatch, tmp_path): - acquired = [] +def test_browser_select_activates_binding(monkeypatch, tmp_path): old = context.get_active_binding() try: monkeypatch.setattr(manager_helpers.manager_client, "switch_browser", lambda browser_id: _manager_response(tmp_path)) - monkeypatch.setattr( - manager_helpers.manager_client, - "acquire_execution_for_binding", - lambda binding: acquired.append(binding.browser_id), - ) state = manager_helpers.browser("abc123") binding = context.get_active_binding() @@ -136,7 +122,6 @@ def test_browser_select_activates_binding_and_acquires_lock(monkeypatch, tmp_pat assert "binding" not in state assert binding is not None assert binding.bu_name == "bh_123" - assert acquired == ["abc123"] def test_browser_switch_aliases_browser(monkeypatch): @@ -147,31 +132,24 @@ def test_browser_switch_aliases_browser(monkeypatch): assert calls == ["abc123"] -def test_browser_does_not_activate_binding_when_lock_fails(monkeypatch, tmp_path): +def test_browser_close_clears_active_binding(monkeypatch, tmp_path): + closed = [] old = context.get_active_binding() - previous = context.BrowserBinding( - browser_id="old123", - bu_name="bh_old", - runtime_dir=tmp_path / "old-r", - tmp_dir=tmp_path / "old-t", + context.activate_binding(context.BrowserBinding( + browser_id="abc123", + bu_name="bh_123", + runtime_dir=tmp_path / "r", + tmp_dir=tmp_path / "t", manager_mode=True, - ) - context.activate_binding(previous) + )) try: - monkeypatch.setattr(manager_helpers.manager_client, "switch_browser", lambda browser_id: _manager_response(tmp_path)) monkeypatch.setattr( manager_helpers.manager_client, - "acquire_execution_for_binding", - lambda binding: (_ for _ in ()).throw( - manager_helpers.manager_client.ManagerError({ - "state": "busy", - "reason": "browser is currently active in another browser-harness process", - }) - ), + "close_browser", + lambda browser_id=None: closed.append(browser_id) or {"ok": True, "state": "closed", "id": "abc123"}, ) - with pytest.raises(manager_helpers.manager_client.ManagerError, match="currently active"): - manager_helpers.browser("abc123") + state = manager_helpers.browser_close("abc123") active = context.get_active_binding() finally: if old is not None: @@ -179,12 +157,17 @@ def test_browser_does_not_activate_binding_when_lock_fails(monkeypatch, tmp_path else: context.clear_active_binding() - assert active == previous + assert state == {"state": "closed", "id": "abc123"} + assert closed == ["abc123"] + assert active is None -def test_browser_close_releases_lock_and_clears_active_binding(monkeypatch, tmp_path): - released = [] - closed = [] +def test_browser_close_requires_explicit_id(): + with pytest.raises(ValueError, match="browser_close\\(id\\)"): + manager_helpers.browser_close() + + +def test_browser_close_owned_clears_active_binding_when_active_id_closed(monkeypatch, tmp_path): old = context.get_active_binding() context.activate_binding(context.BrowserBinding( browser_id="abc123", @@ -194,14 +177,13 @@ def test_browser_close_releases_lock_and_clears_active_binding(monkeypatch, tmp_ manager_mode=True, )) try: - monkeypatch.setattr(manager_helpers.manager_client, "release_active_execution_lock", lambda: released.append(True)) monkeypatch.setattr( manager_helpers.manager_client, - "close_browser", - lambda browser_id=None: closed.append(browser_id) or {"ok": True, "state": "closed", "id": "abc123"}, + "close_owned_browsers", + lambda: {"ok": True, "state": "closed-owned", "closed": ["abc123", "def456"]}, ) - state = manager_helpers.browser_close("abc123") + state = manager_helpers.browser_close_owned() active = context.get_active_binding() finally: if old is not None: @@ -209,12 +191,5 @@ def test_browser_close_releases_lock_and_clears_active_binding(monkeypatch, tmp_ else: context.clear_active_binding() - assert state == {"state": "closed", "id": "abc123"} - assert released == [True] - assert closed == ["abc123"] + assert state == {"state": "closed-owned", "closed": ["abc123", "def456"]} assert active is None - - -def test_browser_close_requires_explicit_id(): - with pytest.raises(ValueError, match="browser_close\\(id\\)"): - manager_helpers.browser_close() diff --git a/tests/unit/test_run_manager_mode.py b/tests/unit/test_run_manager_mode.py index f4629bd4..3a45758e 100644 --- a/tests/unit/test_run_manager_mode.py +++ b/tests/unit/test_run_manager_mode.py @@ -33,8 +33,7 @@ def test_manager_helper_call_enables_manager_mode_without_env(monkeypatch): patch("sys.stdout", stdout), \ patch("browser_harness.run.print_update_banner"), \ patch("browser_harness.run.ensure_daemon") as ensure_daemon, \ - patch("browser_harness.run.browser_status", lambda: "manager helper mode ok"), \ - patch("browser_harness.run.manager_client.release_active_execution_lock"): + patch("browser_harness.run.browser_status", lambda: "manager helper mode ok"): run.main() ensure_daemon.assert_not_called() @@ -54,8 +53,7 @@ def test_browser_selector_call_enables_manager_mode(monkeypatch): patch("sys.stdout", stdout), \ patch("browser_harness.run.print_update_banner"), \ patch("browser_harness.run.ensure_daemon") as ensure_daemon, \ - patch("browser_harness.run.browser", lambda browser_id: switched.append(browser_id) or {"id": browser_id}), \ - patch("browser_harness.run.manager_client.release_active_execution_lock"): + patch("browser_harness.run.browser", lambda browser_id: switched.append(browser_id) or {"id": browser_id}): run.main() ensure_daemon.assert_not_called() @@ -95,20 +93,16 @@ def test_browser_use_profile_runs_without_daemon(monkeypatch): assert stdout.getvalue().strip() == "{'selected': 'google-chrome:Default'}" -def test_manager_mode_releases_execution_lock_on_exception(monkeypatch): +def test_manager_mode_exception_propagates(monkeypatch): monkeypatch.setenv("BH_MANAGER_SOCKET", "/tmp/nonexistent-manager.sock") fake_stdin = StringIO("raise RuntimeError('boom')") - released = [] with patch.object(sys, "argv", ["browser-harness"]), \ patch("sys.stdin", fake_stdin), \ - patch("browser_harness.run.print_update_banner"), \ - patch("browser_harness.run.manager_client.release_active_execution_lock", lambda: released.append(True)): + patch("browser_harness.run.print_update_banner"): try: run.main() except RuntimeError as e: assert str(e) == "boom" else: raise AssertionError("expected RuntimeError") - - assert released == [True] diff --git a/tests/unit/test_skill_docs.py b/tests/unit/test_skill_docs.py new file mode 100644 index 00000000..aa1e5e7d --- /dev/null +++ b/tests/unit/test_skill_docs.py @@ -0,0 +1,9 @@ +from pathlib import Path + + +def test_packaged_skill_points_to_root_skill(): + repo = Path(__file__).resolve().parents[2] + skill = repo / "skills" / "browser-harness" / "SKILL.md" + + assert skill.is_symlink() + assert skill.readlink() == Path("../../SKILL.md") From ded09cadd6ef482d1b80c85d21334d9f8905e0bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gregor=20=C5=BDuni=C4=8D?= <36313686+gregpr07@users.noreply.github.com> Date: Thu, 18 Jun 2026 16:58:27 -0700 Subject: [PATCH 09/15] Harden browser manager runtime --- SKILL.md | 1 + scripts/materialize_browser_harness_skill.py | 34 ++++ src/browser_harness/manager_client.py | 78 +++------- src/browser_harness/manager_daemon.py | 64 ++++++-- src/browser_harness/manager_runtime.py | 154 +++++++++++++++++++ tests/unit/test_manager_daemon.py | 31 ++++ tests/unit/test_manager_runtime.py | 69 +++++++++ tests/unit/test_skill_docs.py | 14 ++ 8 files changed, 375 insertions(+), 70 deletions(-) create mode 100644 scripts/materialize_browser_harness_skill.py create mode 100644 src/browser_harness/manager_runtime.py create mode 100644 tests/unit/test_manager_runtime.py diff --git a/SKILL.md b/SKILL.md index d6a33384..217f5e90 100644 --- a/SKILL.md +++ b/SKILL.md @@ -91,6 +91,7 @@ Never put API keys in command-line arguments. - Use `js(...)` for DOM inspection or extraction when coordinates are the wrong tool. - After navigation, call `wait_for_load()`. - If the current tab is stale or internal, call `ensure_real_tab()`. +- If a tab/session dies (`target-gone`, `browser session ended`), open a fresh tab; if status is not ready, create a new browser. - If redirected to a login wall, stop and ask the user. Do not type credentials from screenshots. - For anything helpers do not cover, use raw CDP: `cdp("Domain.method", params)`. diff --git a/scripts/materialize_browser_harness_skill.py b/scripts/materialize_browser_harness_skill.py new file mode 100644 index 00000000..2d018242 --- /dev/null +++ b/scripts/materialize_browser_harness_skill.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +"""Copy the canonical browser-harness skill into a package tree. + +The repo keeps skills/browser-harness/SKILL.md as a symlink to avoid doc drift. +Some package builders and zip-based plugin installers do not preserve symlinks, +so packaging should call this script with an output directory and ship the +regular file it writes there. +""" +from __future__ import annotations + +import argparse +from pathlib import Path +import shutil + + +def materialize(output_dir: Path) -> Path: + repo = Path(__file__).resolve().parents[1] + source = repo / "SKILL.md" + target = output_dir / "skills" / "browser-harness" / "SKILL.md" + target.parent.mkdir(parents=True, exist_ok=True) + shutil.copyfile(source, target) + return target + + +def main(argv=None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument("output_dir", help="package output directory") + args = parser.parse_args(argv) + print(materialize(Path(args.output_dir))) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/browser_harness/manager_client.py b/src/browser_harness/manager_client.py index 026ecee3..991756bc 100644 --- a/src/browser_harness/manager_client.py +++ b/src/browser_harness/manager_client.py @@ -1,18 +1,14 @@ """Client for the browser-harness manager.""" from __future__ import annotations -from contextlib import contextmanager -import json import os from pathlib import Path import secrets -import socket import subprocess import sys -import tempfile import time -from . import context +from . import context, manager_runtime class ManagerError(RuntimeError): @@ -27,11 +23,11 @@ def __init__(self, response): def default_manager_root() -> str: - return os.environ.get("BH_MANAGER_ROOT") or str(Path(tempfile.gettempdir()) / "bhm") + return str(manager_runtime.default_root()) def default_manager_socket() -> str: - return os.environ.get("BH_MANAGER_SOCKET") or str(Path(default_manager_root()) / "manager.sock") + return str(manager_runtime.default_endpoint(Path(default_manager_root()))) def manager_socket() -> str: @@ -45,14 +41,15 @@ def manager_socket() -> str: def ensure_manager_running(path: str | None = None) -> None: global _manager_started path = path or default_manager_socket() - if _manager_socket_alive(path): + endpoint = Path(path) + if _manager_socket_alive(endpoint): return root = Path(os.environ.get("BH_MANAGER_ROOT") or default_manager_root()) - root.mkdir(parents=True, exist_ok=True) - with _start_lock(root): - if _manager_socket_alive(path): + manager_runtime.ensure_private_dir(root) + with manager_runtime.start_lock(root): + if _manager_socket_alive(endpoint): return - log = open(root / "manager.log", "ab") + log = manager_runtime.open_private_append(root / "manager.log") env = {**os.environ, "BH_MANAGER_SOCKET": path, "BH_MANAGER_ROOT": str(root)} try: subprocess.Popen( @@ -61,68 +58,43 @@ def ensure_manager_running(path: str | None = None) -> None: stdout=log, stderr=log, env=env, - start_new_session=True, + **manager_runtime.spawn_kwargs(), ) finally: log.close() _manager_started = True deadline = time.time() + float(os.environ.get("BH_MANAGER_START_TIMEOUT", "10")) while time.time() < deadline: - if _manager_socket_alive(path): + if _manager_socket_alive(endpoint): return time.sleep(0.05) raise ManagerError({"state": "manager-unavailable", "reason": f"manager did not start at {path}"}) -@contextmanager -def _start_lock(root: Path): - lock_path = root / "manager.start.lock" - with open(lock_path, "a+b") as f: - if os.name == "nt": - import msvcrt - msvcrt.locking(f.fileno(), msvcrt.LK_LOCK, 1) - try: - yield - finally: - f.seek(0) - msvcrt.locking(f.fileno(), msvcrt.LK_UNLCK, 1) - else: - import fcntl - fcntl.flock(f.fileno(), fcntl.LOCK_EX) - try: - yield - finally: - fcntl.flock(f.fileno(), fcntl.LOCK_UN) - - -def _manager_socket_alive(path: str) -> bool: - try: - s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - s.settimeout(0.2) - s.connect(path) - s.close() +def _manager_socket_alive(path: Path) -> bool: + if manager_runtime.ping(path, timeout=0.2): return True + if manager_runtime.IS_WINDOWS: + return False + try: + sock, _token = manager_runtime.connect(path, timeout=0.2) except OSError: return False + try: + sock.close() + except OSError: + pass + return True def request(op: str, **payload) -> dict: req = {"op": op, **context.agent_identity().payload(), "client_id": _CLIENT_ID, **payload} path = manager_socket() - s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + sock, token = manager_runtime.connect(Path(path), timeout=float(os.environ.get("BH_MANAGER_TIMEOUT", "30"))) try: - s.settimeout(float(os.environ.get("BH_MANAGER_TIMEOUT", "30"))) - s.connect(path) - s.sendall((json.dumps(req) + "\n").encode()) - data = b"" - while not data.endswith(b"\n"): - chunk = s.recv(1 << 16) - if not chunk: - break - data += chunk + resp = manager_runtime.send_request(sock, token, req) finally: - s.close() - resp = json.loads(data or b"{}") + sock.close() if not isinstance(resp, dict): raise ManagerError({"state": "bad-response", "reason": "manager returned non-object JSON"}) if resp.get("ok") is False: diff --git a/src/browser_harness/manager_daemon.py b/src/browser_harness/manager_daemon.py index 9a617262..8bf81fd2 100644 --- a/src/browser_harness/manager_daemon.py +++ b/src/browser_harness/manager_daemon.py @@ -11,12 +11,11 @@ import socket import subprocess import sys -import tempfile import threading import time import urllib.request -from . import admin, auth, context +from . import admin, auth, context, manager_runtime BU_API = "https://api.browser-use.com/api/v3" @@ -28,6 +27,7 @@ "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser", "/Applications/Chromium.app/Contents/MacOS/Chromium", ) +_server_token: str | None = None @dataclass @@ -73,7 +73,7 @@ def binding(self) -> dict: class Manager: def __init__(self, root: Path): self.root = root - self.root.mkdir(parents=True, exist_ok=True) + manager_runtime.ensure_private_dir(self.root) self._lock = threading.RLock() self.leases: dict[str, BrowserLease] = {} self.next_seq = 0 @@ -94,9 +94,7 @@ def _persist(self): "next_seq": self.next_seq, "leases": [asdict(v) for v in self.leases.values()], } - tmp = self.root / "registry.json.tmp" - tmp.write_text(json.dumps(data, indent=2)) - os.replace(tmp, self.root / "registry.json") + manager_runtime.write_private_json(self.root / "registry.json", data) def handle(self, req: dict) -> dict: op = req.get("op") @@ -263,6 +261,8 @@ def _allocate_lease(self, run_id: str, agent_id: str, backend: str, profile_kind profile_dir = base / "profile" for p in (runtime_dir, tmp_dir, download_dir, artifact_dir, profile_dir): p.mkdir(parents=True, exist_ok=True) + if not manager_runtime.IS_WINDOWS: + os.chmod(p, 0o700) return BrowserLease( browser_id=browser_id, run_id=run_id, @@ -331,7 +331,13 @@ def start_managed_backend(lease: BrowserLease): args.insert(-1, "--disable-gpu") if os.environ.get("BH_CHROME_NO_SANDBOX") == "1": args.insert(-1, "--no-sandbox") - proc = subprocess.Popen(args, stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, start_new_session=True) + proc = subprocess.Popen( + args, + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + **manager_runtime.spawn_kwargs(), + ) lease.local_process_id = proc.pid lease.local_debug_port = port wait_devtools(port) @@ -505,15 +511,28 @@ def sanitize(value: str) -> str: def serve(socket_path: Path, root: Path): + global _server_token + manager_runtime.ensure_private_dir(root) socket_path.parent.mkdir(parents=True, exist_ok=True) - try: - socket_path.unlink() - except FileNotFoundError: - pass manager = Manager(root) - server = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - server.bind(str(socket_path)) - os.chmod(socket_path, 0o600) + if manager_runtime.IS_WINDOWS: + server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server.bind(("127.0.0.1", 0)) + _server_token = manager_runtime.new_token() + manager_runtime.write_private_json(socket_path, {"port": server.getsockname()[1], "token": _server_token}) + else: + try: + socket_path.unlink() + except FileNotFoundError: + pass + server = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + old_umask = os.umask(0o077) + try: + server.bind(str(socket_path)) + finally: + os.umask(old_umask) + os.chmod(socket_path, 0o600) + _server_token = None server.listen(128) print(f"browser-harness manager listening on {socket_path}", file=sys.stderr, flush=True) try: @@ -522,6 +541,11 @@ def serve(socket_path: Path, root: Path): threading.Thread(target=handle_conn, args=(manager, conn), daemon=True).start() finally: server.close() + if manager_runtime.IS_WINDOWS: + try: + socket_path.unlink() + except FileNotFoundError: + pass def handle_conn(manager: Manager, conn: socket.socket): @@ -536,7 +560,12 @@ def handle_conn(manager: Manager, conn: socket.socket): if not data: return req = json.loads(data or b"{}") - resp = manager.handle(req) + if _server_token and req.get("token") != _server_token: + resp = error("forbidden", "invalid manager token", []) + elif req.get("meta") == "ping": + resp = {"pong": True, "pid": os.getpid()} + else: + resp = manager.handle(req) except Exception as e: resp = error("bad-request", str(e), []) conn.sendall((json.dumps(resp, default=str) + "\n").encode()) @@ -544,8 +573,9 @@ def handle_conn(manager: Manager, conn: socket.socket): def main(argv=None): parser = argparse.ArgumentParser() - parser.add_argument("--socket", default=os.environ.get("BH_MANAGER_SOCKET") or str(Path(tempfile.gettempdir()) / "bhm" / "manager.sock")) - parser.add_argument("--root", default=os.environ.get("BH_MANAGER_ROOT") or str(Path(tempfile.gettempdir()) / "bhm")) + root = manager_runtime.default_root() + parser.add_argument("--socket", default=str(manager_runtime.default_endpoint(root))) + parser.add_argument("--root", default=str(root)) args = parser.parse_args(argv) serve(Path(args.socket), Path(args.root)) diff --git a/src/browser_harness/manager_runtime.py b/src/browser_harness/manager_runtime.py new file mode 100644 index 00000000..a8c58d0c --- /dev/null +++ b/src/browser_harness/manager_runtime.py @@ -0,0 +1,154 @@ +"""Runtime directory and IPC helpers for browser manager mode.""" +from __future__ import annotations + +from contextlib import contextmanager +import json +import os +from pathlib import Path +import secrets +import socket +import subprocess +import sys +import tempfile + + +IS_WINDOWS = sys.platform == "win32" + + +def default_root() -> Path: + if os.environ.get("BH_MANAGER_ROOT"): + return Path(os.environ["BH_MANAGER_ROOT"]) + if IS_WINDOWS: + base = os.environ.get("LOCALAPPDATA") or tempfile.gettempdir() + return Path(base) / "browser-harness" / "manager" + if os.environ.get("XDG_RUNTIME_DIR"): + return Path(os.environ["XDG_RUNTIME_DIR"]) / "browser-harness-manager" + uid = os.getuid() if hasattr(os, "getuid") else os.environ.get("USER") or "user" + return Path("/tmp") / f"bhm-{uid}" + + +def default_endpoint(root: Path | None = None) -> Path: + if os.environ.get("BH_MANAGER_SOCKET"): + return Path(os.environ["BH_MANAGER_SOCKET"]) + root = root or default_root() + return root / ("manager.port.json" if IS_WINDOWS else "manager.sock") + + +def ensure_private_dir(path: Path) -> None: + path.mkdir(parents=True, exist_ok=True, mode=0o700) + if IS_WINDOWS: + return + st = path.stat() + uid = os.getuid() + if st.st_uid != uid: + raise PermissionError(f"{path} is owned by uid {st.st_uid}, expected {uid}") + if st.st_mode & 0o077: + os.chmod(path, 0o700) + st = path.stat() + if st.st_mode & 0o077: + raise PermissionError(f"{path} must not be accessible by group/other") + + +def write_private_json(path: Path, data: dict) -> None: + ensure_private_dir(path.parent) + tmp = path.with_name(path.name + ".tmp") + flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC + fd = os.open(tmp, flags, 0o600) + try: + with os.fdopen(fd, "w") as f: + json.dump(data, f, indent=2) + except Exception: + try: + os.close(fd) + except OSError: + pass + raise + os.replace(tmp, path) + if not IS_WINDOWS: + os.chmod(path, 0o600) + + +def open_private_append(path: Path): + ensure_private_dir(path.parent) + fd = os.open(path, os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0o600) + return os.fdopen(fd, "ab") + + +@contextmanager +def start_lock(root: Path): + ensure_private_dir(root) + lock_path = root / "manager.start.lock" + fd = os.open(lock_path, os.O_RDWR | os.O_CREAT, 0o600) + with os.fdopen(fd, "a+b") as f: + if IS_WINDOWS: + import msvcrt + msvcrt.locking(f.fileno(), msvcrt.LK_LOCK, 1) + try: + yield + finally: + f.seek(0) + msvcrt.locking(f.fileno(), msvcrt.LK_UNLCK, 1) + else: + import fcntl + fcntl.flock(f.fileno(), fcntl.LOCK_EX) + try: + yield + finally: + fcntl.flock(f.fileno(), fcntl.LOCK_UN) + + +def spawn_kwargs() -> dict: + if IS_WINDOWS: + return { + "creationflags": subprocess.CREATE_NEW_PROCESS_GROUP | subprocess.CREATE_NO_WINDOW, + } + return {"start_new_session": True} + + +def connect(endpoint: Path, timeout: float = 1.0) -> tuple[socket.socket, str | None]: + if not IS_WINDOWS: + s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + s.settimeout(timeout) + s.connect(str(endpoint)) + return s, None + data = json.loads(endpoint.read_text()) + port = int(data["port"]) + token = str(data["token"]) + s = socket.create_connection(("127.0.0.1", port), timeout=timeout) + s.settimeout(timeout) + return s, token + + +def send_request(sock: socket.socket, token: str | None, req: dict) -> dict: + if token: + req = {**req, "token": token} + sock.sendall((json.dumps(req) + "\n").encode()) + data = b"" + while not data.endswith(b"\n"): + chunk = sock.recv(1 << 16) + if not chunk: + break + data += chunk + resp = json.loads(data or b"{}") + return resp if isinstance(resp, dict) else {"ok": False, "state": "bad-response"} + + +def ping(endpoint: Path, timeout: float = 0.2) -> bool: + try: + sock, token = connect(endpoint, timeout=timeout) + except (FileNotFoundError, ConnectionRefusedError, TimeoutError, socket.timeout, OSError, ValueError, KeyError, TypeError): + return False + try: + resp = send_request(sock, token, {"meta": "ping"}) + return resp.get("pong") is True + except (OSError, ValueError, AttributeError): + return False + finally: + try: + sock.close() + except OSError: + pass + + +def new_token() -> str: + return secrets.token_hex(32) diff --git a/tests/unit/test_manager_daemon.py b/tests/unit/test_manager_daemon.py index 80ce8c14..cfae8e79 100644 --- a/tests/unit/test_manager_daemon.py +++ b/tests/unit/test_manager_daemon.py @@ -1,3 +1,7 @@ +import json +import socket +import threading + from browser_harness import manager_daemon from browser_harness import auth from browser_harness.manager_daemon import Manager @@ -21,6 +25,33 @@ def _manager_with_lease(tmp_path): return manager, lease +def _send_to_handle_conn(manager, req): + left, right = socket.socketpair() + thread = threading.Thread(target=manager_daemon.handle_conn, args=(manager, right)) + thread.start() + try: + left.sendall((json.dumps(req) + "\n").encode()) + data = b"" + while not data.endswith(b"\n"): + data += left.recv(4096) + finally: + left.close() + thread.join(timeout=1) + return json.loads(data) + + +def test_handle_conn_requires_manager_token(monkeypatch, tmp_path): + manager = Manager(tmp_path) + monkeypatch.setattr(manager_daemon, "_server_token", "secret-token") + + denied = _send_to_handle_conn(manager, {"op": "list", "token": "wrong"}) + pong = _send_to_handle_conn(manager, {"meta": "ping", "token": "secret-token"}) + + assert denied["ok"] is False + assert denied["state"] == "forbidden" + assert pong["pong"] is True + + def test_switch_allows_multiple_clients_to_select_same_browser(tmp_path): manager, lease = _manager_with_lease(tmp_path) diff --git a/tests/unit/test_manager_runtime.py b/tests/unit/test_manager_runtime.py new file mode 100644 index 00000000..f0b58ea0 --- /dev/null +++ b/tests/unit/test_manager_runtime.py @@ -0,0 +1,69 @@ +import json +import socket +import stat + +import pytest + +from browser_harness import manager_runtime + + +def test_default_manager_root_is_user_private_tmp(monkeypatch): + monkeypatch.delenv("BH_MANAGER_ROOT", raising=False) + monkeypatch.delenv("XDG_RUNTIME_DIR", raising=False) + monkeypatch.setattr(manager_runtime, "IS_WINDOWS", False) + monkeypatch.setattr(manager_runtime.os, "getuid", lambda: 12345, raising=False) + + assert manager_runtime.default_root() == manager_runtime.Path("/tmp/bhm-12345") + + +def test_ensure_private_dir_tightens_permissions(tmp_path): + if manager_runtime.IS_WINDOWS: + pytest.skip("POSIX permissions only") + path = tmp_path / "manager" + path.mkdir(mode=0o755) + + manager_runtime.ensure_private_dir(path) + + mode = stat.S_IMODE(path.stat().st_mode) + assert mode == 0o700 + + +def test_write_private_json_uses_private_file_mode(tmp_path): + if manager_runtime.IS_WINDOWS: + pytest.skip("POSIX permissions only") + path = tmp_path / "manager" / "registry.json" + + manager_runtime.write_private_json(path, {"ok": True}) + + assert json.loads(path.read_text()) == {"ok": True} + assert stat.S_IMODE(path.parent.stat().st_mode) == 0o700 + assert stat.S_IMODE(path.stat().st_mode) == 0o600 + + +def test_windows_default_endpoint_is_token_file(monkeypatch, tmp_path): + monkeypatch.setattr(manager_runtime, "IS_WINDOWS", True) + monkeypatch.setenv("LOCALAPPDATA", str(tmp_path)) + monkeypatch.delenv("BH_MANAGER_ROOT", raising=False) + monkeypatch.delenv("BH_MANAGER_SOCKET", raising=False) + + root = manager_runtime.default_root() + + assert root == tmp_path / "browser-harness" / "manager" + assert manager_runtime.default_endpoint(root) == root / "manager.port.json" + + +def test_send_request_injects_windows_token(): + left, right = socket.socketpair() + try: + left.settimeout(1) + right.settimeout(1) + right.sendall(b'{"ok": true}\n') + + resp = manager_runtime.send_request(left, "secret-token", {"op": "list"}) + sent = right.recv(4096).decode() + finally: + left.close() + right.close() + + assert json.loads(sent) == {"op": "list", "token": "secret-token"} + assert resp == {"ok": True} diff --git a/tests/unit/test_skill_docs.py b/tests/unit/test_skill_docs.py index aa1e5e7d..b12871b6 100644 --- a/tests/unit/test_skill_docs.py +++ b/tests/unit/test_skill_docs.py @@ -1,4 +1,6 @@ from pathlib import Path +import subprocess +import sys def test_packaged_skill_points_to_root_skill(): @@ -7,3 +9,15 @@ def test_packaged_skill_points_to_root_skill(): assert skill.is_symlink() assert skill.readlink() == Path("../../SKILL.md") + + +def test_skill_materializer_writes_regular_file(tmp_path): + repo = Path(__file__).resolve().parents[2] + script = repo / "scripts" / "materialize_browser_harness_skill.py" + + subprocess.run([sys.executable, str(script), str(tmp_path)], check=True) + + materialized = tmp_path / "skills" / "browser-harness" / "SKILL.md" + assert materialized.is_file() + assert not materialized.is_symlink() + assert materialized.read_text() == (repo / "SKILL.md").read_text() From 07ea791529e6bdaaa0702525208cf426e308a830 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gregor=20=C5=BDuni=C4=8D?= <36313686+gregpr07@users.noreply.github.com> Date: Thu, 18 Jun 2026 18:41:22 -0700 Subject: [PATCH 10/15] Tighten harness setup diagnostics --- src/browser_harness/admin.py | 37 ++++++++++++++++++++ src/browser_harness/helpers.py | 63 ++++++++++------------------------ src/browser_harness/run.py | 29 +++++++++++++++- tests/integration/test_js.py | 53 +++++++++++++++++++++++----- tests/unit/test_admin.py | 26 ++++++++++++++ tests/unit/test_helpers.py | 13 +++++++ tests/unit/test_run.py | 56 ++++++++++++++++++++++++++++++ 7 files changed, 224 insertions(+), 53 deletions(-) diff --git a/src/browser_harness/admin.py b/src/browser_harness/admin.py index 4ffcb19a..184383c1 100644 --- a/src/browser_harness/admin.py +++ b/src/browser_harness/admin.py @@ -384,6 +384,36 @@ def run_doctor_fix_snap(): return 0 +def _package_source_path() -> Path: + return Path(__file__).resolve().parent + + +def _cwd_browser_harness_source_path(cwd: str | os.PathLike | None = None) -> Path | None: + try: + base = Path(cwd or os.getcwd()).resolve() + except OSError: + return None + for root in (base, *base.parents): + candidate = root / "src" / "browser_harness" + if candidate.is_dir(): + try: + return candidate.resolve() + except OSError: + return candidate + return None + + +def _doctor_source_mismatch() -> dict | None: + package_source = _package_source_path() + cwd_source = _cwd_browser_harness_source_path() + if cwd_source and cwd_source != package_source: + return { + "package_source": str(package_source), + "cwd_source": str(cwd_source), + } + return None + + def ensure_daemon(wait=60.0, name=None, env=None, binding=None): """Idempotent. Self-heals stale daemon, cold Chrome, and missing Allow on chrome://inspect.""" b_name, runtime_dir, tmp_dir = _binding_parts(binding) @@ -894,6 +924,8 @@ def run_doctor(): profile_use = shutil.which("profile-use") is not None api_key = bool(os.environ.get("BROWSER_USE_API_KEY")) latest = _latest_release_tag() + source_path = _package_source_path() + source_mismatch = _doctor_source_mismatch() # Only claim an update when we know the installed version — `cur or "(unknown)"` # for display would otherwise be parsed as (0,) and flag every latest as newer. newer = bool(cur and latest and _version_tuple(latest) > _version_tuple(cur)) @@ -908,10 +940,15 @@ def row(label, ok, detail=""): print(f" platform {platform.system()} {platform.release()}") print(f" python {sys.version.split()[0]}") print(f" version {cur_display} ({mode})") + print(f" source path {source_path}") if latest: print(f" latest release {latest}" + (" (update available)" if newer else "")) else: print(" latest release (could not reach github)") + if source_mismatch: + print("[source-mismatch]") + print(f"Current directory contains: {source_mismatch['cwd_source']}") + print(f"Imported browser-harness from: {source_mismatch['package_source']}") if platform.system() == "Linux": bname, bpath = _doctor_probe_chrome_binary_for_snap() if bname and bpath and _is_snap_browser(bpath): diff --git a/src/browser_harness/helpers.py b/src/browser_harness/helpers.py index e3661147..473f8809 100644 --- a/src/browser_harness/helpers.py +++ b/src/browser_harness/helpers.py @@ -120,42 +120,13 @@ def _runtime_evaluate(expression, session_id=None, await_promise=False): return _runtime_value(r, expression) -def _has_return_statement(expression): - i = 0 - n = len(expression) - state = "code" - quote = "" - while i < n: - ch = expression[i] - nxt = expression[i + 1] if i + 1 < n else "" - if state == "code": - if ch in ("'", '"', "`"): - state = "string"; quote = ch; i += 1; continue - if ch == "/" and nxt == "/": - state = "line_comment"; i += 2; continue - if ch == "/" and nxt == "*": - state = "block_comment"; i += 2; continue - if expression.startswith("return", i): - before = expression[i - 1] if i > 0 else "" - after = expression[i + 6] if i + 6 < n else "" - if not (before == "_" or before.isalnum()) and not (after == "_" or after.isalnum()): - return True - i += 1; continue - if state == "line_comment": - if ch == "\n": - state = "code" - i += 1; continue - if state == "block_comment": - if ch == "*" and nxt == "/": - state = "code"; i += 2; continue - i += 1; continue - if state == "string": - if ch == "\\": - i += 2; continue - if ch == quote: - state = "code"; quote = "" - i += 1; continue - return False +def _wrap_js_function(expression): + return f"(function(){{{expression}}})()" + + +def _is_illegal_return_error(exc): + return "Illegal return statement" in str(exc) + # --- navigation / page --- @@ -360,14 +331,13 @@ def new_tab(url="about:blank"): # Always create blank, then goto: passing url to createTarget races with # attach, so the brief about:blank is "complete" by the time the caller # polls and wait_for_load() returns before navigation actually starts. - binding = context.get_active_binding() - if url != "about:blank" and binding and binding.manager_mode: + if url != "about:blank": try: cur = current_tab() cur_url = cur.get("url") or "" if cur_url in ("", "about:blank") or cur_url.startswith("about:blank#"): goto_url(url) - return cur["targetId"] + return cur.get("targetId") or cur.get("target_id") except Exception: pass params = {"url": "about:blank"} @@ -491,13 +461,18 @@ def wait_for_network_idle(timeout=10.0, idle_ms=500): def js(expression, target_id=None): """Run JS in the attached tab (default) or inside an iframe target (via iframe_target()). - Expressions with top-level `return` are automatically wrapped in an IIFE, so both - `document.title` and `const x = 1; return x` are valid inputs. + Expressions are evaluated as-is first. If Chrome reports an illegal top-level + `return`, the snippet is retried inside a function wrapper, so both + `document.title` and `const x = 1; return x` work without mis-wrapping nested + functions that contain their own returns. """ sid = cdp("Target.attachToTarget", targetId=target_id, flatten=True)["sessionId"] if target_id else None - if _has_return_statement(expression) and not expression.strip().startswith("("): - expression = f"(function(){{{expression}}})()" - return _runtime_evaluate(expression, session_id=sid, await_promise=True) + try: + return _runtime_evaluate(expression, session_id=sid, await_promise=True) + except RuntimeError as e: + if _is_illegal_return_error(e): + return _runtime_evaluate(_wrap_js_function(expression), session_id=sid, await_promise=True) + raise _KC = {"Enter": 13, "Tab": 9, "Escape": 27, "Backspace": 8, " ": 32, "ArrowLeft": 37, "ArrowUp": 38, "ArrowRight": 39, "ArrowDown": 40} diff --git a/src/browser_harness/run.py b/src/browser_harness/run.py index 4c0a467b..8dc57cc7 100644 --- a/src/browser_harness/run.py +++ b/src/browser_harness/run.py @@ -1,4 +1,4 @@ -import ast, os, sys, urllib.request +import ast, json, os, sys, urllib.request # Windows default stdout encoding is cp1252, which can't encode the 🐴 marker # helpers prepend to tab titles (or anything else outside Latin-1). Force UTF-8 @@ -49,6 +49,9 @@ browser-harness --doctor diagnose install, daemon, and browser state browser-harness doctor same as --doctor browser-harness doctor --fix-snap print how to fix Snap Chromium blocking CDP (Linux) + browser-harness profiles list local Chrome/Chromium profiles without starting the daemon + browser-harness use-profile select a local profile without starting the daemon + browser-harness open-profile [id] open/focus a local profile without starting the daemon browser-harness auth login sign in to Browser Use Cloud for cloud browsers browser-harness auth login --device-code sign in from SSH/headless environments browser-harness auth status show Browser Use Cloud auth state @@ -167,6 +170,10 @@ def _explicit_cdp_configured(): return bool(os.environ.get("BU_CDP_URL") or os.environ.get("BU_CDP_WS")) +def _print_json(value): + print(json.dumps(value, indent=2, default=str)) + + def main(): args = sys.argv[1:] if args and args[0] in {"-h", "--help"}: @@ -185,6 +192,26 @@ def main(): print("usage: browser-harness doctor [--fix-snap]", file=sys.stderr) sys.exit(2) sys.exit(run_doctor()) + if args and args[0] == "profiles": + rest = args[1:] + verbose = rest == ["--verbose"] + if rest and not verbose: + print("usage: browser-harness profiles [--verbose]", file=sys.stderr) + sys.exit(2) + _print_json(browser_profiles(verbose=verbose)) + return + if args and args[0] == "use-profile": + if len(args) != 2: + print("usage: browser-harness use-profile ", file=sys.stderr) + sys.exit(2) + _print_json(browser_use_profile(args[1])) + return + if args and args[0] == "open-profile": + if len(args) > 2: + print("usage: browser-harness open-profile [profile-id]", file=sys.stderr) + sys.exit(2) + _print_json(open_local_profile(args[1] if len(args) == 2 else None, marker=False)) + return if args and args[0] == "auth": sys.exit(auth.run_auth_cli(args[1:])) if args and args[0] == "--update": diff --git a/tests/integration/test_js.py b/tests/integration/test_js.py index 86582e68..5a6e3c54 100644 --- a/tests/integration/test_js.py +++ b/tests/integration/test_js.py @@ -26,11 +26,37 @@ def test_simple_expression_passes_through(): assert _evaluated_expression(captured) == "document.title" -def test_return_statement_gets_wrapped(): - fake_cdp, captured = _capture_cdp() +def _illegal_return_response(): + return { + "result": { + "type": "object", + "subtype": "error", + "description": "SyntaxError: Illegal return statement", + }, + "exceptionDetails": { + "text": "Uncaught", + "lineNumber": 0, + "columnNumber": 13, + }, + } + + +def test_return_statement_retries_wrapped_after_illegal_return(): + captured = [] + + def fake_cdp(method, **kwargs): + captured.append((method, kwargs)) + if kwargs["expression"] == "const x = 1; return x": + return _illegal_return_response() + return {"result": {"value": 1}} + with patch("browser_harness.helpers.cdp", side_effect=fake_cdp): - helpers.js("const x = 1; return x") - assert _evaluated_expression(captured) == "(function(){const x = 1; return x})()" + assert helpers.js("const x = 1; return x") == 1 + + assert [kw["expression"] for m, kw in captured if m == "Runtime.evaluate"] == [ + "const x = 1; return x", + "(function(){const x = 1; return x})()", + ] def test_iife_with_internal_return_is_not_double_wrapped(): @@ -112,11 +138,22 @@ def test_return_word_inside_comment_does_not_trigger_wrapping(): @pytest.mark.parametrize("expr", ["return\t1", "return\n1"]) -def test_top_level_return_with_whitespace_gets_wrapped(expr): - fake_cdp, captured = _capture_cdp() +def test_top_level_return_with_whitespace_retries_wrapped(expr): + captured = [] + + def fake_cdp(method, **kwargs): + captured.append((method, kwargs)) + if kwargs["expression"] == expr: + return _illegal_return_response() + return {"result": {"value": 1}} + with patch("browser_harness.helpers.cdp", side_effect=fake_cdp): - helpers.js(expr) - assert _evaluated_expression(captured) == f"(function(){{{expr}}})()" + assert helpers.js(expr) == 1 + + assert [kw["expression"] for m, kw in captured if m == "Runtime.evaluate"] == [ + expr, + f"(function(){{{expr}}})()", + ] @pytest.mark.parametrize( diff --git a/tests/unit/test_admin.py b/tests/unit/test_admin.py index 0541caef..cdb2c161 100644 --- a/tests/unit/test_admin.py +++ b/tests/unit/test_admin.py @@ -1,4 +1,5 @@ import pytest +from pathlib import Path from browser_harness import admin @@ -254,6 +255,31 @@ def test_run_doctor_skips_snap_detect_on_non_linux(monkeypatch, capsys): assert "[snap-detect]" not in out +def test_run_doctor_prints_source_path_and_mismatch_warning(monkeypatch, capsys): + monkeypatch.setattr(admin, "_version", lambda: "0.1.0") + monkeypatch.setattr(admin, "_install_mode", lambda: "git") + monkeypatch.setattr(admin, "_chrome_running", lambda: True) + monkeypatch.setattr(admin, "daemon_alive", lambda: True) + monkeypatch.setattr(admin, "browser_connections", lambda: []) + monkeypatch.setattr(admin, "_latest_release_tag", lambda: "0.1.0") + monkeypatch.setattr(admin, "_package_source_path", lambda: Path("/installed/src/browser_harness")) + monkeypatch.setattr(admin, "_doctor_source_mismatch", lambda: { + "cwd_source": "/checkout/src/browser_harness", + "package_source": "/installed/src/browser_harness", + }) + monkeypatch.setattr("platform.system", lambda: "Darwin") + monkeypatch.setattr("shutil.which", lambda _cmd: None) + monkeypatch.delenv("BROWSER_USE_API_KEY", raising=False) + + assert admin.run_doctor() == 0 + + out = capsys.readouterr().out + assert "source path /installed/src/browser_harness" in out + assert "[source-mismatch]" in out + assert "Current directory contains: /checkout/src/browser_harness" in out + assert "Imported browser-harness from: /installed/src/browser_harness" in out + + def test_run_doctor_fix_snap_prints_steps(capsys): assert admin.run_doctor_fix_snap() == 0 out = capsys.readouterr().out diff --git a/tests/unit/test_helpers.py b/tests/unit/test_helpers.py index 8b43f371..307ab91c 100644 --- a/tests/unit/test_helpers.py +++ b/tests/unit/test_helpers.py @@ -221,6 +221,19 @@ def fake_js(expr, **kwargs): # --- tabs / profile contexts --- +def test_new_tab_reuses_attached_blank_tab_for_url(): + with patch("browser_harness.helpers.current_tab", return_value={"targetId": "blank-target", "url": "about:blank"}), \ + patch("browser_harness.helpers.goto_url") as goto_url, \ + patch("browser_harness.helpers.cdp") as cdp, \ + patch("browser_harness.helpers.switch_tab") as switch_tab: + result = helpers.new_tab("https://example.test/") + + assert result == "blank-target" + goto_url.assert_called_once_with("https://example.test/") + cdp.assert_not_called() + switch_tab.assert_not_called() + + def test_list_tabs_filters_to_current_browser_context(): def fake_send(req): if req.get("meta") == "current_tab": diff --git a/tests/unit/test_run.py b/tests/unit/test_run.py index 20783e03..656a0261 100644 --- a/tests/unit/test_run.py +++ b/tests/unit/test_run.py @@ -1,3 +1,4 @@ +import json import sys from io import StringIO from unittest.mock import patch @@ -238,3 +239,58 @@ def test_cli_doctor_rejects_unknown_flags(): assert ei.value.code == 2 assert "usage" in err.getvalue().lower() + +def test_profiles_cli_runs_without_daemon(): + stdout = StringIO() + + with patch.object(sys, "argv", ["browser-harness", "profiles"]), \ + patch("sys.stdout", stdout), \ + patch("browser_harness.run.ensure_daemon") as ensure_daemon, \ + patch("browser_harness.run.browser_profiles", return_value={"profiles": []}) as profiles: + run.main() + + ensure_daemon.assert_not_called() + profiles.assert_called_once_with(verbose=False) + assert json.loads(stdout.getvalue()) == {"profiles": []} + + +def test_profiles_cli_supports_verbose_without_daemon(): + stdout = StringIO() + + with patch.object(sys, "argv", ["browser-harness", "profiles", "--verbose"]), \ + patch("sys.stdout", stdout), \ + patch("browser_harness.run.ensure_daemon") as ensure_daemon, \ + patch("browser_harness.run.browser_profiles", return_value={"status": "ok"}) as profiles: + run.main() + + ensure_daemon.assert_not_called() + profiles.assert_called_once_with(verbose=True) + assert json.loads(stdout.getvalue()) == {"status": "ok"} + + +def test_use_profile_cli_runs_without_daemon(): + stdout = StringIO() + + with patch.object(sys, "argv", ["browser-harness", "use-profile", "google-chrome:Default"]), \ + patch("sys.stdout", stdout), \ + patch("browser_harness.run.ensure_daemon") as ensure_daemon, \ + patch("browser_harness.run.browser_use_profile", return_value={"selected": "google-chrome:Default"}) as use_profile: + run.main() + + ensure_daemon.assert_not_called() + use_profile.assert_called_once_with("google-chrome:Default") + assert json.loads(stdout.getvalue()) == {"selected": "google-chrome:Default"} + + +def test_open_profile_cli_runs_without_daemon_and_without_marker(): + stdout = StringIO() + + with patch.object(sys, "argv", ["browser-harness", "open-profile", "google-chrome:Default"]), \ + patch("sys.stdout", stdout), \ + patch("browser_harness.run.ensure_daemon") as ensure_daemon, \ + patch("browser_harness.run.open_local_profile", return_value={"opened": True}) as open_profile: + run.main() + + ensure_daemon.assert_not_called() + open_profile.assert_called_once_with("google-chrome:Default", marker=False) + assert json.loads(stdout.getvalue()) == {"opened": True} From cc2dbc2500e2b0253e2d94e61434b67e41f8f27f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gregor=20=C5=BDuni=C4=8D?= <36313686+gregpr07@users.noreply.github.com> Date: Thu, 18 Jun 2026 18:44:00 -0700 Subject: [PATCH 11/15] Clean up browser manager PR edge cases --- TEMP_BROWSER_MANAGER_CONTEXT_DO_NOT_MERGE.md | 142 ------------------- src/browser_harness/auth.py | 9 +- src/browser_harness/helpers.py | 3 +- tests/unit/test_auth.py | 27 ++++ tests/unit/test_helpers.py | 15 ++ 5 files changed, 51 insertions(+), 145 deletions(-) delete mode 100644 TEMP_BROWSER_MANAGER_CONTEXT_DO_NOT_MERGE.md diff --git a/TEMP_BROWSER_MANAGER_CONTEXT_DO_NOT_MERGE.md b/TEMP_BROWSER_MANAGER_CONTEXT_DO_NOT_MERGE.md deleted file mode 100644 index ba7f3318..00000000 --- a/TEMP_BROWSER_MANAGER_CONTEXT_DO_NOT_MERGE.md +++ /dev/null @@ -1,142 +0,0 @@ -# Temporary Browser Manager Context - -Remove this file before merging the PR. It is session context for review and follow-up, not product documentation. - -## Why This Branch Exists - -The current browser-harness works unusually well because the LLM sees the actual Python helper surface and can directly control browser/page behavior with very little indirection. The goal of this branch is to preserve that property while adding a tiny lifecycle layer for cases the current harness handles poorly: - -- many parallel agents; -- subagents needing either their own browser or a reused parent browser; -- remote/cloud browser creation from inside the harness flow; -- isolated per-browser daemon/runtime/tmp/artifact directories; -- safer cleanup and switching between browser backends. - -The important constraint from the discussion was: do not turn the LLM into a browser manager with a complicated control plane. The LLM should see a small set of obvious helpers, then use the existing page helpers exactly as before. - -## Final LLM-Facing Interface - -The intended surface is: - -```python -browser_status() -browser_new(backend="cloud"|"managed", profile="clean", proxy_country=None, reason=None) -browser_list() -browser_switch(browser_id) -browser_close(browser_id=None) -``` - -After `browser_new(...)` or `browser_switch(...)`, normal browser-harness helpers such as `new_tab`, `page_info`, `capture_screenshot`, `click_at_xy`, `js`, and `cdp` work unchanged. - -For cloud browsers, missing auth should produce `cloud-auth-required`; the model should run `browser-harness auth login` and retry. The user logs in online and the API key is stored locally without being printed into chat. If a user directly provides an API key, the safe storage path is `browser-harness auth login --api-key-stdin`, never a command-line argument. - -The model does not need to know about sockets, daemon names, runtime dirs, CDP URLs, Browser Use browser IDs, or process cleanup. Those are manager internals. - -## Why Python Instead Of Rust - -This was switched from the earlier Rust manager direction to Python because browser-harness is already a Python package and the simplest install path matters more than a theoretically cleaner standalone daemon. - -Python keeps the end-to-end flow simple: - -```bash -uv tool install -e . -browser-harness <<'PY' -print(browser_new(backend="cloud", proxy_country="us")) -new_tab("https://example.com") -print(page_info()) -print(browser_close()) -PY -``` - -No separate Rust build, no extra binary distribution problem, and no cross-language install story. The manager daemon is just another Python module/script in the package. - -## Architecture - -The manager owns browser leases. A lease includes: - -- `browser_id`; -- backend type: `cloud` or `managed`; -- per-browser harness daemon name; -- per-browser runtime/tmp/download/artifact/profile dirs; -- CDP endpoint info; -- owner agent and allowed agent ids; -- an active execution lock. - -The runtime path is: - -```text -LLM code - -> browser_* helper - -> manager_client over Unix socket - -> manager_daemon creates/switches/closes lease - -> per-browser browser_harness.daemon - -> existing page helpers talk to that daemon -``` - -The existing non-manager browser-harness path still works. - -## Parallelism Reasoning - -The branch tries to handle the obvious 100-agent failure modes: - -- manager auto-start is single-flight via a file lock, so concurrent agents should not start competing managers; -- browser ids and daemon names are generated per lease; -- each lease gets isolated runtime/tmp/artifact/profile directories; -- manager registry state is persisted under the manager root; -- browser creation does not hold the global manager lock while slow cloud/local startup happens; -- execution locks are per client process, so two simultaneous `browser-harness` invocations from the same agent do not mutate the same browser at once; -- cross-run close/switch attempts are rejected. - -This is still not a full stress-test result. It is the first implementation pass with targeted unit coverage for the scary cases. - -## Subagent Model - -The harness cannot rely on controlling Codex subagent spawn parameters. The practical design is therefore prompt/interface based: - -- default subagent behavior: call `browser_new(...)` and get an isolated browser; -- reuse behavior: parent gives a `browser_id`, subagent calls `browser_switch(browser_id)`; -- if the browser is busy, the manager returns `busy`, and the safe action is to wait or call `browser_new(...)`. - -This keeps the LLM-visible protocol minimal and avoids requiring Codex runtime changes. - -## Local Browser Note - -The VM used for this work must not start local Chrome or Chromium. Local managed-browser code exists, but local startup was intentionally not smoke-tested here. - -Cloud/live lifecycle should be tested separately with a Browser Use API key in the environment. Do not commit keys or put them in docs. - -OAuth auth was added after this note was first created. Cloud lifecycle can now also be tested after `browser-harness auth login`, which stores a local Browser Use API key outside the repo. - -## Verification Done In This Session - -Commands run: - -```bash -uv run --with pytest pytest -q tests/unit -uv run python -m compileall -q src/browser_harness -``` - -Result at the time this note was written: - -```text -101 passed -``` - -A no-browser protocol smoke was also run: - -- auto-start Python manager; -- `browser_status()` returned `no-active-browser`; -- `browser_list()` returned `[]`; -- test manager was killed afterward. - -No local Chrome/Chromium was started. - -## What To Review Before Merge - -- Decide whether manager mode should be enabled by AST-detecting lifecycle helper calls, env vars only, or both. -- Live-test `browser_new(backend="cloud")` and `browser_close()` with a real Browser Use key. -- Live-test `browser_new(backend="managed")` on a laptop, not the VM. -- Stress-test many parallel agents/processes using the same manager root. -- Decide whether stale lease cleanup needs a sweeper. -- Decide whether profile support should remain `profile="clean"` only for the first version. -- Remove this file before merging. diff --git a/src/browser_harness/auth.py b/src/browser_harness/auth.py index 4014a49f..620b3aa1 100644 --- a/src/browser_harness/auth.py +++ b/src/browser_harness/auth.py @@ -449,17 +449,22 @@ def _post_json(url: str, payload: dict) -> dict: desc = data.get("error_description") or data.get("reason") or data.get("message") detail = f": {desc}" if desc else "" raise AuthError(f"{err}{detail}") from e + except urllib.error.URLError as e: + raise AuthError(f"network error: {e.reason}") from e def _read_manual_api_key(input_stream=None) -> str: stream = input_stream or sys.stdin if hasattr(stream, "isatty") and stream.isatty(): - key = getpass.getpass("Browser Use API key: ") + try: + key = getpass.getpass("Browser Use API key: ") + except EOFError as e: + raise AuthError("no API key provided") from e else: key = stream.read() key = (key or "").strip() if not key: - raise AuthError("no API key provided on stdin") + raise AuthError("no API key provided") if len(key) < 20: raise AuthError("API key looks too short") return key diff --git a/src/browser_harness/helpers.py b/src/browser_harness/helpers.py index 473f8809..60673573 100644 --- a/src/browser_harness/helpers.py +++ b/src/browser_harness/helpers.py @@ -230,7 +230,8 @@ def press_key(key, modifiers=0): so listeners checking e.keyCode / e.key all fire.""" vk, code, text = _KEYS.get(key, (ord(key[0]) if len(key) == 1 else 0, key, key if len(key) == 1 else "")) base = {"key": key, "code": code, "modifiers": modifiers, "windowsVirtualKeyCode": vk, "nativeVirtualKeyCode": vk} - printable_char = len(key) == 1 and bool(text) + shortcut_modifiers = modifiers & (1 | 2 | 4) # Alt/Ctrl/Meta turn single keys into shortcuts. + printable_char = len(key) == 1 and bool(text) and not shortcut_modifiers cdp("Input.dispatchKeyEvent", type="keyDown", **base, **({} if printable_char or not text else {"text": text})) if printable_char: cdp("Input.dispatchKeyEvent", type="char", text=text, **{k: v for k, v in base.items() if k != "text"}) diff --git a/tests/unit/test_auth.py b/tests/unit/test_auth.py index c47dd1ea..56a1ca61 100644 --- a/tests/unit/test_auth.py +++ b/tests/unit/test_auth.py @@ -1,9 +1,12 @@ import json import stat import threading +import urllib.error import urllib.request from io import StringIO +import pytest + from browser_harness import auth @@ -79,6 +82,30 @@ def test_api_key_stdin_login_rejects_missing_or_short_key(monkeypatch, tmp_path) assert not (tmp_path / "auth.json").exists() +def test_manual_api_key_tty_eof_becomes_auth_error(monkeypatch): + class TtyInput: + def isatty(self): + return True + + def fake_getpass(_prompt): + raise EOFError + + monkeypatch.setattr(auth.getpass, "getpass", fake_getpass) + + with pytest.raises(auth.AuthError, match="no API key provided"): + auth._read_manual_api_key(TtyInput()) + + +def test_post_json_network_error_becomes_auth_error(monkeypatch): + def fake_urlopen(_req, timeout): + raise urllib.error.URLError("offline") + + monkeypatch.setattr(auth.urllib.request, "urlopen", fake_urlopen) + + with pytest.raises(auth.AuthError, match="network error: offline"): + auth._post_json("https://api.example.test/auth", {"x": 1}) + + def test_browser_login_callback_exchanges_and_stores_key(monkeypatch, tmp_path): monkeypatch.setenv("BH_AUTH_PATH", str(tmp_path / "auth.json")) calls = [] diff --git a/tests/unit/test_helpers.py b/tests/unit/test_helpers.py index 307ab91c..da00ac28 100644 --- a/tests/unit/test_helpers.py +++ b/tests/unit/test_helpers.py @@ -165,6 +165,21 @@ def fake_js(expr, **kwargs): assert "Backspace" not in keys_seen +def test_press_key_modified_character_does_not_emit_char_event(): + key_events = [] + + def fake_cdp(method, **kwargs): + if method == "Input.dispatchKeyEvent": + key_events.append(kwargs) + return {} + + with patch("browser_harness.helpers.cdp", side_effect=fake_cdp): + helpers.press_key("a", modifiers=4) + + assert [e["type"] for e in key_events] == ["keyDown", "keyUp"] + assert not any(e.get("type") == "char" for e in key_events) + + # --- wait_for_element --- def test_wait_for_element_returns_true_when_found_immediately(): From 64af3371b3263007e8e96e554ad6264579c080f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gregor=20=C5=BDuni=C4=8D?= <36313686+gregpr07@users.noreply.github.com> Date: Thu, 18 Jun 2026 18:47:05 -0700 Subject: [PATCH 12/15] Reduce auth status output surface --- src/browser_harness/auth.py | 27 ++++++--------------------- tests/unit/test_auth.py | 16 ++++++++++++++-- 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/src/browser_harness/auth.py b/src/browser_harness/auth.py index 620b3aa1..f5e76457 100644 --- a/src/browser_harness/auth.py +++ b/src/browser_harness/auth.py @@ -193,15 +193,7 @@ def auth_status() -> dict: stored = stored_auth_record() if not stored or not stored.get("api_key"): return {"status": "missing", "source": None, "path": str(auth_path())} - return { - "status": "authenticated", - "source": stored.get("source") or "stored", - "path": str(auth_path()), - "api_key_id": stored.get("api_key_id"), - "project_id": stored.get("project_id"), - "expires_at": stored.get("expires_at"), - "scopes": stored.get("scopes") or [], - } + return {"status": "authenticated", "source": "stored", "path": str(auth_path())} def pkce_pair() -> tuple[str, str]: @@ -294,7 +286,7 @@ def browser_login(*, open_url=True, json_output=False, timeout=AUTH_TIMEOUT_SECO print("Waiting for login to complete after you open the URL...", flush=True) record = complete_browser_auth(start, timeout=timeout) if json_output: - print(json.dumps(_stored_output(record)), flush=True) + print(json.dumps(_stored_success_output()), flush=True) else: print("Browser Use Cloud auth stored.") return record @@ -372,7 +364,7 @@ def device_login(*, open_url=True, json_output=False) -> AuthRecord: print("Waiting for login to complete...", flush=True) record = complete_device_auth(start) if json_output: - print(json.dumps(_stored_output(record)), flush=True) + print(json.dumps(_stored_success_output()), flush=True) else: print("Browser Use Cloud auth stored.") return record @@ -383,7 +375,7 @@ def api_key_stdin_login(*, json_output=False, input_stream=None) -> AuthRecord: record = AuthRecord(api_key=key, source="manual") save_auth_record(record) if json_output: - print(json.dumps(_stored_output(record)), flush=True) + print(json.dumps(_stored_success_output()), flush=True) else: print("Browser Use Cloud API key stored.") return record @@ -509,15 +501,8 @@ def _auth_error_code(message: str) -> str: return message.split(":", 1)[0] -def _stored_output(record: AuthRecord) -> dict: - return { - "status": "stored", - "api_key_id": record.api_key_id, - "project_id": record.project_id, - "expires_at": record.expires_at, - "scopes": record.scopes, - "path": str(auth_path()), - } +def _stored_success_output() -> dict: + return {"status": "stored", "path": str(auth_path())} def run_auth_cli(argv: list[str]) -> int: diff --git a/tests/unit/test_auth.py b/tests/unit/test_auth.py index 56a1ca61..3dfb06c7 100644 --- a/tests/unit/test_auth.py +++ b/tests/unit/test_auth.py @@ -33,9 +33,9 @@ def test_status_and_logout_for_stored_key(monkeypatch, tmp_path): removed = auth.clear_auth() assert status["status"] == "authenticated" - assert status["source"] == "oauth" - assert status["api_key_id"] == "key-123" + assert status["source"] == "stored" assert "api_key" not in status + assert "api_key_id" not in status assert mode == 0o600 assert removed is True assert auth.auth_status()["status"] == "missing" @@ -68,6 +68,18 @@ def test_api_key_stdin_login_stores_manual_key_without_printing(monkeypatch, tmp assert json.loads((tmp_path / "auth.json").read_text())["browser_use"]["source"] == "manual" +def test_api_key_stdin_json_login_outputs_no_secret(monkeypatch, tmp_path, capsys): + monkeypatch.delenv("BROWSER_USE_API_KEY", raising=False) + monkeypatch.setenv("BH_AUTH_PATH", str(tmp_path / "auth.json")) + manual_key = "manual-key-1234567890abcdef" + + auth.api_key_stdin_login(json_output=True, input_stream=StringIO(manual_key + "\n")) + out = capsys.readouterr().out + + assert manual_key not in out + assert json.loads(out) == {"status": "stored", "path": str(tmp_path / "auth.json")} + + def test_api_key_stdin_login_rejects_missing_or_short_key(monkeypatch, tmp_path): monkeypatch.setenv("BH_AUTH_PATH", str(tmp_path / "auth.json")) From 134e7fea3279f5b5cbc961a282f0d4c7a07eded3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gregor=20=C5=BDuni=C4=8D?= <36313686+gregpr07@users.noreply.github.com> Date: Sat, 20 Jun 2026 17:14:16 -0700 Subject: [PATCH 13/15] Prepare browser-harness package releases --- .github/workflows/release.yml | 25 +++ .gitignore | 2 + README.md | 12 +- SKILL.md | 7 +- install.md | 57 ++++-- pyproject.toml | 26 ++- skills/browser-harness/references/install.md | 36 +++- src/browser_harness/SKILL.md | 1 + src/browser_harness/_ipc.py | 11 +- src/browser_harness/admin.py | 92 +++++++--- src/browser_harness/auth.py | 6 +- src/browser_harness/daemon.py | 3 +- src/browser_harness/helpers.py | 3 +- src/browser_harness/local_profiles.py | 27 +-- src/browser_harness/manager_client.py | 18 ++ src/browser_harness/manager_daemon.py | 61 +++++- src/browser_harness/manager_runtime.py | 11 +- src/browser_harness/paths.py | 43 +++++ src/browser_harness/run.py | 40 +++- src/browser_harness/telemetry.py | 184 +++++++++++++++++++ 20 files changed, 566 insertions(+), 99 deletions(-) create mode 100644 .github/workflows/release.yml create mode 120000 src/browser_harness/SKILL.md create mode 100644 src/browser_harness/paths.py create mode 100644 src/browser_harness/telemetry.py diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..a9fb4072 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,25 @@ +name: release + +on: + release: + types: [published] + +jobs: + publish: + name: publish to PyPI + runs-on: ubuntu-latest + environment: pypi + permissions: + contents: read + id-token: write + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Build distributions + run: | + python -m pip install --upgrade build + python -m build + - name: Publish distributions + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.gitignore b/.gitignore index ecc4cba6..bb388a36 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,8 @@ __pycache__/ *.log .env .browser-harness-dev/ +build/ +dist/ uv.lock *.egg-info/ .idea/ diff --git a/README.md b/README.md index 1a1067c7..fafaf556 100644 --- a/README.md +++ b/README.md @@ -23,9 +23,7 @@ One websocket to Chrome, nothing between. The agent writes what's missing during Paste into Claude Code or Codex: ```text -Set up https://github.com/browser-use/browser-harness for me. - -Read `install.md` and follow the steps to install browser-harness and connect it to my browser. +Install browser-harness with uv, register the skill from `browser-harness skill`, and connect it to my browser. ``` The agent will open `chrome://inspect/#remote-debugging`. Tick the checkbox so the agent can connect to your browser: @@ -51,8 +49,8 @@ Stealth, sub-agents, or headless deployment.
- `install.md` — first-time install and browser bootstrap - `SKILL.md` — day-to-day usage - `src/browser_harness/` — protected core package -- `agent-workspace/agent_helpers.py` — helper code the agent edits -- `agent-workspace/domain-skills/` — reusable site-specific skills the agent edits +- `${XDG_CONFIG_HOME:-~/.config}/browser-harness/agent-workspace/agent_helpers.py` — helper code the agent edits +- `${XDG_CONFIG_HOME:-~/.config}/browser-harness/agent-workspace/domain-skills/` — reusable site-specific skills the agent edits Plain `browser-harness` helper calls use the selected local browser profile. For isolated or cloud work, start with `browser_new("private")` or `browser_new("cloud")`, keep the returned short `id`, and call `browser(id)` before page helpers in each script; cloud responses include a `live_url` preview when available. @@ -75,7 +73,7 @@ The dev launcher uses a short checkout-specific manager path under `/tmp`, so it PRs and improvements welcome. The best way to help: **contribute a new domain skill** under [agent-workspace/domain-skills/](agent-workspace/domain-skills/) for a site or task you use often (LinkedIn outreach, ordering on Amazon, filing expenses, etc.). Each skill teaches the agent the selectors, flows, and edge cases it would otherwise have to rediscover. - **Skills are written by the harness, not by you.** Just run your task with the agent — when it figures something non-obvious out, it files the skill itself (see [SKILL.md](SKILL.md)). Please don't hand-author skill files; agent-generated ones reflect what actually works in the browser. -- Open a PR with the generated `agent-workspace/domain-skills//` folder — small and focused is great. +- Open a PR with the generated `domain-skills//` folder copied into this repo's `agent-workspace/domain-skills/` examples — small and focused is great. - Bug fixes, docs tweaks, and helper improvements are equally welcome. - Browse existing skills (`github/`, `linkedin/`, `amazon/`, ...) to see the shape. @@ -83,7 +81,7 @@ If you're not sure where to start, open an issue and we'll point you somewhere u ## Domain skills -Set `BH_DOMAIN_SKILLS=1` to enable [agent-workspace/domain-skills/](agent-workspace/domain-skills/) — community-contributed per-site playbooks `goto_url` surfaces by domain. Contribute via PR. +Set `BH_DOMAIN_SKILLS=1` to enable domain skills from the agent workspace. This repo's [agent-workspace/domain-skills/](agent-workspace/domain-skills/) directory contains examples to contribute via PR. --- diff --git a/SKILL.md b/SKILL.md index 217f5e90..f5681f77 100644 --- a/SKILL.md +++ b/SKILL.md @@ -1,6 +1,6 @@ --- -name: browser -description: Control a browser with Python helpers. Use for web automation, scraping, testing, or interacting with pages. +name: browser-harness +description: Always use browser-harness for any web interaction: automation, scraping, testing, or site/app work. --- # browser-harness @@ -87,6 +87,7 @@ Never put API keys in command-line arguments. - First navigation is `new_tab(url)`, not `goto_url(url)`. - Screenshots are the default way to understand and verify visible state: `capture_screenshot()`. +- If using `view_image`, call it after `capture_screenshot()` returns the PNG path; do not parallelize capture and viewing. - Click visible targets by screenshot coordinates: `click_at_xy(x, y)`. - Use `js(...)` for DOM inspection or extraction when coordinates are the wrong tool. - After navigation, call `wait_for_load()`. @@ -119,6 +120,6 @@ If you get stuck on a browser mechanic, check `interaction-skills/` for focused ## Domain Skills -Domain skills are off by default. If `BH_DOMAIN_SKILLS=1` and the task is site-specific, read every file in `agent-workspace/domain-skills//` before inventing an approach. +Domain skills are off by default. If `BH_DOMAIN_SKILLS=1` and the task is site-specific, read every file in `$BH_AGENT_WORKSPACE/domain-skills//` before inventing an approach. Default workspace: `~/.config/browser-harness/agent-workspace`. When enabled, `goto_url(...)` returns up to 10 matching skill filenames for the current host. diff --git a/install.md b/install.md index ca8fb5fd..560bc874 100644 --- a/install.md +++ b/install.md @@ -9,40 +9,71 @@ Use this file only for browser-harness install, browser connection setup, and co ## Recommended `browser-harness` setup -Clone the repo once into a durable location, then install it as an editable tool so `browser-harness` works from any directory: +Install the released package as a global uv tool: ```bash -git clone https://github.com/browser-use/browser-harness -cd browser-harness -uv tool install -e . +uv tool install browser-harness command -v browser-harness ``` -That keeps the command global while still pointing at the real repo checkout, so when the agent edits `agent-workspace/agent_helpers.py` the next `browser-harness` uses the new code immediately. Prefer a stable path like `~/Developer/browser-harness`, not `/tmp`. +For browser-harness development, clone the repo into a durable path and use `uv tool install -e .` from the checkout. Normal users and other agents should use the PyPI package. ## Make browser-harness global for the current agent -After the repo is installed, register this repo's `SKILL.md` with the agent you are using: +After the command is installed, register a skill named `browser-harness` with this trigger: -- **Codex**: add this file as a global skill at `$CODEX_HOME/skills/browser-harness/SKILL.md` (often `~/.codex/skills/browser-harness/SKILL.md`). A symlink to this repo's `SKILL.md` is fine. +```text +Always use browser-harness for any web interaction: automation, scraping, testing, or site/app work. +``` + +Use `browser-harness skill` to print the packaged skill text. + +Before registering it, remove stale user-installed browser skills that came from old browser-harness installs. Do not edit bundled/vendor plugin caches. + +- **Codex**: add the printed skill as a global skill at `$CODEX_HOME/skills/browser-harness/SKILL.md` (often `~/.codex/skills/browser-harness/SKILL.md`). ```bash - mkdir -p "${CODEX_HOME:-$HOME/.codex}/skills/browser-harness" && ln -sf "$PWD/SKILL.md" "${CODEX_HOME:-$HOME/.codex}/skills/browser-harness/SKILL.md" + for root in "${CODEX_HOME:-$HOME/.codex}/skills" "$HOME/.agents/skills"; do + rm -rf "$root/browser-harness" "$root/browser" "$root/browser-use" + done + mkdir -p "${CODEX_HOME:-$HOME/.codex}/skills/browser-harness" + browser-harness skill > "${CODEX_HOME:-$HOME/.codex}/skills/browser-harness/SKILL.md" ``` -- **Claude Code**: add an import to `~/.claude/CLAUDE.md` that points at this repo's `SKILL.md`, for example `@~/Developer/browser-harness/SKILL.md`. +- **Claude Code**: paste or import the output of `browser-harness skill` into the global instruction file Claude Code uses for reusable skills. + +- **Other agents**: tell the agent to install/register a skill named `browser-harness`, use the trigger above verbatim, and use `browser-harness skill` as the skill body. -This makes new Codex or Claude Code sessions in other folders load the runtime browser harness instructions automatically. +This makes new agent sessions in other folders load the runtime browser harness instructions automatically. ## Keeping the harness current -- On each run, `browser-harness` prints `[browser-harness] update available: X -> Y` (once per day) when a newer GitHub release exists. -- When you see that banner, run `browser-harness --update -y` yourself — don't ask the user. It pulls the new code (`git pull --ff-only` for editable clones, `uv tool upgrade browser-harness` for PyPI installs) and stops the running daemon so the next call picks up the new code. With `-y` it won't prompt. +- On each run, `browser-harness` prints `[browser-harness] update available: X -> Y` (once per day) when a newer PyPI release exists. +- `browser-harness --doctor` also checks the latest PyPI version. +- When you see an update banner, decide whether to run `browser-harness --update -y`. It pulls the new code (`git pull --ff-only` for editable clones, `uv tool upgrade browser-harness` for PyPI installs) and stops the running daemon so the next call picks up the new code. With `-y` it won't prompt. - `--update` refuses to run on an editable clone with uncommitted changes. If that happens, tell the user and let them resolve the dirty worktree. +- PyPI installs require `uv` for updates; there is no pip fallback. ## Maintenance commands - browser-harness --doctor — show version, install mode, daemon and Chrome state, and whether an update is pending. +- browser-harness telemetry status — show opt-out telemetry state. +- browser-harness telemetry disable — opt out of anonymous usage telemetry. + +## Files on disk + +By default browser-harness keeps its state under `${XDG_CONFIG_HOME:-~/.config}/browser-harness`: + +```text +auth.json Browser Use Cloud auth +settings.json selected local Chrome profile and future preferences +telemetry.json anonymous install id + telemetry opt-out +agent-workspace/ agent-written helpers and domain skills +runtime/ sockets, pids, manager leases, managed browser profiles +tmp/ logs, screenshots, scratch files +``` + +Override the whole home with `BH_HOME` or `BROWSER_HARNESS_HOME`. Override specific dirs with `BH_CONFIG_DIR`, `BH_AGENT_WORKSPACE`, `BH_RUNTIME_DIR`, or `BH_TMP_DIR`. ## Architecture @@ -167,7 +198,7 @@ If the user hasn't said which connection method to use, default to Way 1 if Chro PY ``` - If that hangs, escalate: kill all Chrome and daemon processes, then reopen Chrome and retry. On macOS/Linux, also remove `/tmp/bu-default.sock` and `/tmp/bu-default.pid` if they linger. + If that hangs, escalate: kill all Chrome and daemon processes, then reopen Chrome and retry. On macOS/Linux, also remove lingering `bu-default.sock` and `bu-default.pid` files under `${XDG_CONFIG_HOME:-~/.config}/browser-harness/runtime`. 4. After any fix, retry step 1. diff --git a/pyproject.toml b/pyproject.toml index 02b7d0bf..f7a3c346 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,12 +1,26 @@ [build-system] -requires = ["setuptools>=69"] +requires = ["setuptools>=77"] build-backend = "setuptools.build_meta" [project] name = "browser-harness" -version = "0.1.0" +version = "0.1.1rc1" description = "The simplest, thinnest, and most powerful harness to control your real browser with your agent." +readme = "README.md" requires-python = ">=3.11" +license = "MIT" +license-files = ["LICENSE"] +keywords = ["agent", "automation", "browser", "cdp", "chrome", "scraping"] +classifiers = [ + "Development Status :: 3 - Alpha", + "Environment :: Console", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Internet :: WWW/HTTP :: Browsers", + "Topic :: Software Development :: Testing", +] dependencies = [ "cdp-use==1.4.5", "fetch-use==0.4.0", @@ -18,11 +32,19 @@ dependencies = [ browser-harness = "browser_harness.run:main" browser-harness-manager = "browser_harness.manager_daemon:main" +[project.urls] +Homepage = "https://github.com/browser-use/browser-harness" +Repository = "https://github.com/browser-use/browser-harness" +Issues = "https://github.com/browser-use/browser-harness/issues" + [tool.setuptools] package-dir = {"" = "src"} [tool.setuptools.packages.find] where = ["src"] +[tool.setuptools.package-data] +browser_harness = ["SKILL.md"] + [tool.pytest.ini_options] pythonpath = ["src"] diff --git a/skills/browser-harness/references/install.md b/skills/browser-harness/references/install.md index 355ca43d..34b59d03 100644 --- a/skills/browser-harness/references/install.md +++ b/skills/browser-harness/references/install.md @@ -5,13 +5,37 @@ This is a **one-time prerequisite**, not part of the regular AI workflow. Do it ## Install the command ```bash -git clone https://github.com/browser-use/browser-harness -cd browser-harness -uv tool install -e . +uv tool install browser-harness command -v browser-harness # should print a path ``` -Editable (`-e`) keeps the command global while pointing at the real checkout, so edits to `agent-workspace/agent_helpers.py` take effect on the next call. Prefer a durable path (e.g. `~/Developer/browser-harness`), not `/tmp`. +For browser-harness development, clone the repo into a durable path and run `uv tool install -e .` from the checkout. + +## Register the skill + +Install/register a skill named `browser-harness` with this trigger: + +```text +Always use browser-harness for any web interaction: automation, scraping, testing, or site/app work. +``` + +Use this command for the skill body: + +```bash +browser-harness skill +``` + +For Codex: + +```bash +for root in "${CODEX_HOME:-$HOME/.codex}/skills" "$HOME/.agents/skills"; do + rm -rf "$root/browser-harness" "$root/browser" "$root/browser-use" +done +mkdir -p "${CODEX_HOME:-$HOME/.codex}/skills/browser-harness" +browser-harness skill > "${CODEX_HOME:-$HOME/.codex}/skills/browser-harness/SKILL.md" +``` + +Only remove stale user-installed browser skills, never bundled/vendor plugin caches. ## Connect to a browser @@ -32,4 +56,6 @@ The canonical, fully-detailed connection reference and troubleshooting live in t ## Keeping current -`browser-harness` prints an update banner when a newer release exists; run `browser-harness --update -y` to pull it. +`browser-harness` prints an update banner when a newer PyPI release exists; run `browser-harness --update -y` when you decide to upgrade. `browser-harness --doctor` also checks the latest version. Telemetry is anonymous and opt-out with `browser-harness telemetry disable`. + +State lives under `${XDG_CONFIG_HOME:-~/.config}/browser-harness` by default: auth, selected profile, telemetry id, agent-workspace, runtime sockets, manager leases, logs, screenshots, and tmp files. Override with `BH_HOME` or `BROWSER_HARNESS_HOME`. diff --git a/src/browser_harness/SKILL.md b/src/browser_harness/SKILL.md new file mode 120000 index 00000000..4215faef --- /dev/null +++ b/src/browser_harness/SKILL.md @@ -0,0 +1 @@ +../../SKILL.md \ No newline at end of file diff --git a/src/browser_harness/_ipc.py b/src/browser_harness/_ipc.py index 1f32daee..40da3924 100644 --- a/src/browser_harness/_ipc.py +++ b/src/browser_harness/_ipc.py @@ -1,22 +1,23 @@ """Daemon IPC plumbing. AF_UNIX socket on POSIX, TCP loopback on Windows.""" -import asyncio, json, os, re, secrets, socket, subprocess, sys, tempfile +import asyncio, json, os, re, secrets, socket, subprocess, sys from pathlib import Path +from . import paths + IS_WINDOWS = sys.platform == "win32" # Two caller-supplied dirs: # BH_RUNTIME_DIR — sock/port/pid. AF_UNIX sun_path is 104 bytes on macOS, so # the runtime dir must be short. Caller is responsible for keeping it # within budget. Falls back to BH_TMP_DIR (legacy single-dir callers), -# then to /tmp on POSIX (gettempdir() returns long /var/folders/... on -# macOS — unsafe for AF_UNIX) or tempfile.gettempdir() on Windows (TCP). +# then to the browser-harness runtime dir. # BH_TMP_DIR — screenshots, debug overlays, daemon log. No path-length # sensitivity; caller can use a deep persistent path. # When the caller supplies a per-instance dir for either purpose, files use # bare "bu" stems; otherwise "bu-" disambiguates co-tenants. BH_TMP_DIR = os.environ.get("BH_TMP_DIR") BH_RUNTIME_DIR = os.environ.get("BH_RUNTIME_DIR") or BH_TMP_DIR -_TMP = Path(BH_TMP_DIR or (tempfile.gettempdir() if IS_WINDOWS else "/tmp")) -_RUNTIME = Path(BH_RUNTIME_DIR or (tempfile.gettempdir() if IS_WINDOWS else "/tmp")) +_TMP = paths.tmp_dir() +_RUNTIME = paths.ensure_private_dir(Path(BH_RUNTIME_DIR).expanduser().resolve()) if BH_RUNTIME_DIR else paths.runtime_dir() _TMP.mkdir(parents=True, exist_ok=True) _RUNTIME.mkdir(parents=True, exist_ok=True) _NAME_RE = re.compile(r"\A[A-Za-z0-9_-]{1,64}\Z") diff --git a/src/browser_harness/admin.py b/src/browser_harness/admin.py index 184383c1..42b17263 100644 --- a/src/browser_harness/admin.py +++ b/src/browser_harness/admin.py @@ -1,9 +1,9 @@ import json import os +import re import socket import subprocess import sys -import tempfile import time import urllib.request from pathlib import Path @@ -11,6 +11,8 @@ from . import _ipc as ipc from . import context from . import local_profiles +from . import paths +from . import telemetry def _process_start_time(pid): @@ -106,7 +108,7 @@ def _process_start_time(pid): def _load_env(): repo_root = Path(__file__).resolve().parents[2] - workspace = Path(os.environ.get("BH_AGENT_WORKSPACE", repo_root / "agent-workspace")).expanduser() + workspace = paths.workspace_dir() for p in (repo_root / ".env", workspace / ".env"): if not p.exists(): continue @@ -126,8 +128,8 @@ def _load_env_file(p): NAME = os.environ.get("BU_NAME", "default") BU_API = "https://api.browser-use.com/api/v3" -GH_RELEASES = "https://api.github.com/repos/browser-use/browser-harness/releases/latest" -VERSION_CACHE = Path(tempfile.gettempdir()) / "bu-version-cache.json" +PYPI_JSON = "https://pypi.org/pypi/browser-harness/json" +VERSION_CACHE = paths.config_dir() / "version-cache.json" VERSION_CACHE_TTL = 24 * 3600 DOCTOR_TEXT_LIMIT = 140 @@ -167,7 +169,7 @@ def _log_tail(name, tmp_dir=None): class _DaemonStartLock: def __init__(self, name, runtime_dir=None): - base = Path(runtime_dir) if runtime_dir else Path(tempfile.gettempdir()) + base = Path(runtime_dir) if runtime_dir else paths.runtime_dir() self.path = base / f"bu-{name or NAME}.start.lock" self.file = None @@ -794,20 +796,30 @@ def _cache_read(): def _cache_write(data): try: + VERSION_CACHE.parent.mkdir(parents=True, exist_ok=True) VERSION_CACHE.write_text(json.dumps(data)) + if sys.platform != "win32": + os.chmod(VERSION_CACHE, 0o600) except OSError: pass def _latest_release_tag(force=False): - """Return latest release tag from GitHub, or None. Cached for 24h to avoid hammering the API.""" + """Return latest browser-harness version on PyPI, or None. Cached for 24h.""" cache = _cache_read() now = time.time() if not force and cache.get("tag") and now - cache.get("fetched_at", 0) < VERSION_CACHE_TTL: return cache["tag"] try: - req = urllib.request.Request(GH_RELEASES, headers={"Accept": "application/vnd.github+json"}) - tag = json.loads(urllib.request.urlopen(req, timeout=5).read()).get("tag_name") or "" + req = urllib.request.Request( + PYPI_JSON, + headers={"Accept": "application/json", "User-Agent": "browser-harness"}, + ) + data = json.loads(urllib.request.urlopen(req, timeout=5).read()) + tag = data.get("info", {}).get("version") or "" + releases = data.get("releases") or {} + if releases: + tag = max(releases, key=_version_tuple) except Exception: return cache.get("tag") # fall back to last known tag = tag.lstrip("v") @@ -816,17 +828,16 @@ def _latest_release_tag(force=False): def _version_tuple(v): - """Best-effort semver parse. Non-numeric components sort as 0, so pre-releases may not rank perfectly.""" - parts = [] - for s in (v or "").split("."): - m = "" - for ch in s: - if ch.isdigit(): - m += ch - else: - break - parts.append(int(m) if m else 0) - return tuple(parts) + """Best-effort PEP 440-ish key where rc/beta/alpha sort below final.""" + m = re.match(r"^\s*v?(\d+(?:\.\d+)*)(?:(a|b|rc)(\d+))?", v or "", re.I) + if not m: + return (0, 0, 0, 3, 0) + nums = [int(p) for p in m.group(1).split(".")[:3]] + nums.extend([0] * (3 - len(nums))) + pre = (m.group(2) or "").lower() + pre_rank = {"a": 0, "b": 1, "rc": 2}.get(pre, 3) + pre_num = int(m.group(3) or 0) + return (*nums, pre_rank, pre_num) def check_for_update(): @@ -944,7 +955,7 @@ def row(label, ok, detail=""): if latest: print(f" latest release {latest}" + (" (update available)" if newer else "")) else: - print(" latest release (could not reach github)") + print(" latest release (could not reach PyPI)") if source_mismatch: print("[source-mismatch]") print(f"Current directory contains: {source_mismatch['cwd_source']}") @@ -970,7 +981,19 @@ def row(label, ok, detail=""): row("profile-use installed", profile_use, "" if profile_use else "optional: curl -fsSL https://browser-use.com/profile.sh | sh") row("BROWSER_USE_API_KEY set", api_key, "" if api_key else "optional: needed only for cloud browsers / profile sync") # Core health = chrome + daemon. Profile-use/api-key are optional. - return 0 if (chrome and daemon) else 1 + healthy = chrome and daemon + telemetry.capture("browser_harness.doctor", { + "install_mode": mode, + "chrome_running": chrome, + "daemon_alive": daemon, + "active_connections": len(connections), + "profile_use_installed": profile_use, + "cloud_auth_env": api_key, + "latest_known": bool(latest), + "update_available": newer, + "result": "ok" if healthy else "fail", + }) + return 0 if healthy else 1 def _prompt_yes(question, default_yes=True, yes=False): @@ -996,13 +1019,14 @@ def run_update(yes=False): # version. Otherwise `newer=False` just means "couldn't compare" — proceed. if cur and latest and not newer: print(f"browser-harness is up to date ({cur}).") + telemetry.capture("browser_harness.update", {"install_mode": _install_mode(), "result": "up-to-date"}) return 0 if cur and latest: print(f"updating browser-harness: {cur} -> {latest}") elif latest: print(f"installed version unknown; will try to update to {latest}.") else: - print("could not reach github; will try to update anyway.") + print("could not reach PyPI; will try to update anyway.") mode = _install_mode() if mode == "git": @@ -1010,23 +1034,30 @@ def run_update(yes=False): status = subprocess.run(["git", "-C", str(repo), "status", "--porcelain"], capture_output=True, text=True) if status.returncode != 0: print(f"git status failed: {status.stderr.strip()}", file=sys.stderr) + telemetry.capture("browser_harness.update", {"install_mode": mode, "result": "git-status-failed"}) return 1 if status.stdout.strip(): print(f"refusing to update: uncommitted changes in {repo}", file=sys.stderr) print("commit or stash them first, or run `git -C %s pull` yourself." % repo, file=sys.stderr) + telemetry.capture("browser_harness.update", {"install_mode": mode, "result": "dirty-git"}) return 1 r = subprocess.run(["git", "-C", str(repo), "pull", "--ff-only"]) if r.returncode != 0: + telemetry.capture("browser_harness.update", {"install_mode": mode, "result": "git-pull-failed"}) return r.returncode elif mode == "pypi": - tool_upgrade = subprocess.run(["uv", "tool", "upgrade", "browser-harness"]) + try: + tool_upgrade = subprocess.run(["uv", "tool", "upgrade", "browser-harness"]) + except FileNotFoundError: + print("uv is required to update PyPI installs: https://docs.astral.sh/uv/getting-started/installation/", file=sys.stderr) + telemetry.capture("browser_harness.update", {"install_mode": mode, "result": "uv-missing"}) + return 1 if tool_upgrade.returncode != 0: - # Fall back to pip in case this wasn't a `uv tool install`. - pip = subprocess.run([sys.executable, "-m", "pip", "install", "--upgrade", "browser-harness"]) - if pip.returncode != 0: - return pip.returncode + telemetry.capture("browser_harness.update", {"install_mode": mode, "result": "uv-upgrade-failed"}) + return tool_upgrade.returncode else: print("unknown install mode; can't auto-update.", file=sys.stderr) + telemetry.capture("browser_harness.update", {"install_mode": mode, "result": "unknown-install-mode"}) return 1 # Invalidate banner/tag cache so the new version doesn't keep nagging. @@ -1040,5 +1071,12 @@ def run_update(yes=False): print("daemon stopped; it will auto-restart on next `browser-harness` call.") else: print("daemon left running on old code. run `browser-harness` and it'll use the new code after the daemon recycles.") + try: + from . import manager_client + if manager_client.stop_manager_if_running(): + print("browser manager stopped; it will auto-restart on next manager call.") + except Exception: + pass print("update complete.") + telemetry.capture("browser_harness.update", {"install_mode": mode, "result": "updated"}) return 0 diff --git a/src/browser_harness/auth.py b/src/browser_harness/auth.py index f5e76457..4153ebd5 100644 --- a/src/browser_harness/auth.py +++ b/src/browser_harness/auth.py @@ -23,6 +23,8 @@ import urllib.request import webbrowser +from . import paths + AUTH_BASE = "https://api.browser-use.com" # Browser Use currently exposes this registered CLI OAuth client. Keep an env @@ -125,9 +127,7 @@ def auth_path() -> Path: override = os.environ.get("BH_AUTH_PATH") if override: return Path(override).expanduser() - config_home = os.environ.get("XDG_CONFIG_HOME") - base = Path(config_home).expanduser() if config_home else Path.home() / ".config" - return base / "browser-harness" / "auth.json" + return paths.config_dir() / "auth.json" def load_auth_file(path: Path | None = None) -> dict: diff --git a/src/browser_harness/daemon.py b/src/browser_harness/daemon.py index a4823373..3b1c71fd 100644 --- a/src/browser_harness/daemon.py +++ b/src/browser_harness/daemon.py @@ -6,12 +6,13 @@ from . import _ipc as ipc from . import local_profiles +from . import paths from cdp_use.client import CDPClient def _load_env(): repo_root = Path(__file__).resolve().parents[2] - workspace = Path(os.environ.get("BH_AGENT_WORKSPACE", repo_root / "agent-workspace")).expanduser() + workspace = paths.workspace_dir() for p in (repo_root / ".env", workspace / ".env"): if not p.exists(): continue diff --git a/src/browser_harness/helpers.py b/src/browser_harness/helpers.py index 60673573..75eb6b1d 100644 --- a/src/browser_harness/helpers.py +++ b/src/browser_harness/helpers.py @@ -9,11 +9,12 @@ from . import _ipc as ipc from . import context +from . import paths CORE_DIR = Path(__file__).resolve().parent REPO_ROOT = CORE_DIR.parent.parent -AGENT_WORKSPACE = Path(os.environ.get("BH_AGENT_WORKSPACE", REPO_ROOT / "agent-workspace")).expanduser() +AGENT_WORKSPACE = paths.workspace_dir() def _load_env(): diff --git a/src/browser_harness/local_profiles.py b/src/browser_harness/local_profiles.py index 8aa6b59e..4a6f5afe 100644 --- a/src/browser_harness/local_profiles.py +++ b/src/browser_harness/local_profiles.py @@ -12,6 +12,8 @@ import urllib.error import urllib.request +from . import paths + MARKER_URL_PREFIX = "https://browser-use.com/browser-use-profile-target/" INTERNAL_URL_PREFIXES = ( @@ -77,18 +79,14 @@ def payload(self) -> dict: def config_dir() -> Path: - if raw := os.environ.get("BH_CONFIG_DIR"): - return Path(raw).expanduser() - if sys.platform == "darwin": - return Path.home() / "Library" / "Application Support" / "browser-harness" - if sys.platform == "win32": - base = os.environ.get("APPDATA") - return Path(base).expanduser() / "browser-harness" if base else Path.home() / "AppData" / "Roaming" / "browser-harness" - base = os.environ.get("XDG_CONFIG_HOME") - return Path(base).expanduser() / "browser-harness" if base else Path.home() / ".config" / "browser-harness" + return paths.config_dir() def profile_config_path() -> Path: + return config_dir() / "settings.json" + + +def legacy_profile_config_path() -> Path: return config_dir() / "profile.json" @@ -97,10 +95,13 @@ def get_default_profile_id() -> str | None: value = (os.environ.get(key) or "").strip() if value: return value - try: - data = json.loads(profile_config_path().read_text()) - except (FileNotFoundError, json.JSONDecodeError, OSError): - return None + data = {} + for path in (profile_config_path(), legacy_profile_config_path()): + try: + data = json.loads(path.read_text()) + break + except (FileNotFoundError, json.JSONDecodeError, OSError): + continue value = str(data.get("default_local_profile_id") or "").strip() return value or None diff --git a/src/browser_harness/manager_client.py b/src/browser_harness/manager_client.py index 991756bc..f58b85c3 100644 --- a/src/browser_harness/manager_client.py +++ b/src/browser_harness/manager_client.py @@ -87,6 +87,24 @@ def _manager_socket_alive(path: Path) -> bool: return True +def stop_manager_if_running(path: str | None = None) -> bool: + endpoint = Path(path or default_manager_socket()) + try: + sock, token = manager_runtime.connect(endpoint, timeout=0.5) + except (FileNotFoundError, ConnectionRefusedError, TimeoutError, OSError, ValueError, KeyError, TypeError): + return False + try: + manager_runtime.send_request(sock, token, {"meta": "shutdown"}) + return True + except (OSError, ValueError, AttributeError): + return False + finally: + try: + sock.close() + except OSError: + pass + + def request(op: str, **payload) -> dict: req = {"op": op, **context.agent_identity().payload(), "client_id": _CLIENT_ID, **payload} path = manager_socket() diff --git a/src/browser_harness/manager_daemon.py b/src/browser_harness/manager_daemon.py index 8bf81fd2..eea58353 100644 --- a/src/browser_harness/manager_daemon.py +++ b/src/browser_harness/manager_daemon.py @@ -15,7 +15,7 @@ import time import urllib.request -from . import admin, auth, context, manager_runtime +from . import admin, auth, context, manager_runtime, telemetry BU_API = "https://api.browser-use.com/api/v3" @@ -164,13 +164,28 @@ def new(self, req: dict) -> dict: start_managed_backend(lease) except auth.CloudAuthRequired as e: cleanup_backend(lease) + telemetry.capture("browser_harness.browser_new", { + "backend": backend, + "profile_kind": lease.profile_kind, + "result": "cloud-auth-required", + }) return error("cloud-auth-required", str(e), ["browser-harness auth login"]) except Exception as e: cleanup_backend(lease) + telemetry.capture("browser_harness.browser_new", { + "backend": backend, + "profile_kind": lease.profile_kind, + "result": "start-failed", + }) return error("browser-start-failed", str(e), ["browser_new"]) with self._lock: self.leases[lease.browser_id] = lease self._persist() + telemetry.capture("browser_harness.browser_new", { + "backend": public_backend(lease), + "profile_kind": lease.profile_kind, + "result": "ready", + }) return ready_response(lease) def switch(self, req: dict) -> dict: @@ -181,9 +196,14 @@ def switch(self, req: dict) -> dict: return error("bad-request", "browser_id is required", ["browser_list", "browser_new"]) lease = self.leases.get(browser_id) if not lease: + telemetry.capture("browser_harness.browser_switch", {"result": "not-found"}) return error("not-found", "browser id not found", ["browser_list", "browser_new"]) lease.last_used_at_ms = int(time.time() * 1000) self._persist() + telemetry.capture("browser_harness.browser_switch", { + "backend": public_backend(lease), + "result": "ready", + }) return ready_response(lease) def close(self, req: dict) -> dict: @@ -194,6 +214,7 @@ def close(self, req: dict) -> dict: return error("bad-request", "browser id is required; use browser_close(id)", ["browser_list"]) lease = self.leases.get(browser_id) if not lease: + telemetry.capture("browser_harness.browser_close", {"result": "not-found"}) return {"ok": True, "ready": False, "state": "not-found", "id": browser_id} cleanup = lease self.leases.pop(browser_id, None) @@ -201,6 +222,10 @@ def close(self, req: dict) -> dict: resp = {"ok": True, "ready": False, "state": "closed", "id": browser_id} if cleanup is not None: cleanup_backend(cleanup) + telemetry.capture("browser_harness.browser_close", { + "backend": public_backend(cleanup), + "result": "closed", + }) return resp def close_owned(self, req: dict) -> dict: @@ -217,6 +242,10 @@ def close_owned(self, req: dict) -> dict: self._persist() for lease in cleanup: cleanup_backend(lease) + telemetry.capture("browser_harness.browser_close_owned", { + "closed_count": len(cleanup), + "result": "closed", + }) return { "ok": True, "ready": False, @@ -534,21 +563,29 @@ def serve(socket_path: Path, root: Path): os.chmod(socket_path, 0o600) _server_token = None server.listen(128) + server.settimeout(0.2) + stop = threading.Event() print(f"browser-harness manager listening on {socket_path}", file=sys.stderr, flush=True) try: - while True: - conn, _ = server.accept() - threading.Thread(target=handle_conn, args=(manager, conn), daemon=True).start() + while not stop.is_set(): + try: + conn, _ = server.accept() + except socket.timeout: + continue + except OSError: + if stop.is_set(): + break + raise + threading.Thread(target=handle_conn, args=(manager, conn, stop), daemon=True).start() finally: server.close() - if manager_runtime.IS_WINDOWS: - try: - socket_path.unlink() - except FileNotFoundError: - pass + try: + socket_path.unlink() + except FileNotFoundError: + pass -def handle_conn(manager: Manager, conn: socket.socket): +def handle_conn(manager: Manager, conn: socket.socket, stop: threading.Event | None = None): with conn: try: data = b"" @@ -564,6 +601,10 @@ def handle_conn(manager: Manager, conn: socket.socket): resp = error("forbidden", "invalid manager token", []) elif req.get("meta") == "ping": resp = {"pong": True, "pid": os.getpid()} + elif req.get("meta") == "shutdown": + resp = {"ok": True} + if stop: + stop.set() else: resp = manager.handle(req) except Exception as e: diff --git a/src/browser_harness/manager_runtime.py b/src/browser_harness/manager_runtime.py index a8c58d0c..afabb3ea 100644 --- a/src/browser_harness/manager_runtime.py +++ b/src/browser_harness/manager_runtime.py @@ -9,7 +9,8 @@ import socket import subprocess import sys -import tempfile + +from . import paths IS_WINDOWS = sys.platform == "win32" @@ -18,13 +19,7 @@ def default_root() -> Path: if os.environ.get("BH_MANAGER_ROOT"): return Path(os.environ["BH_MANAGER_ROOT"]) - if IS_WINDOWS: - base = os.environ.get("LOCALAPPDATA") or tempfile.gettempdir() - return Path(base) / "browser-harness" / "manager" - if os.environ.get("XDG_RUNTIME_DIR"): - return Path(os.environ["XDG_RUNTIME_DIR"]) / "browser-harness-manager" - uid = os.getuid() if hasattr(os, "getuid") else os.environ.get("USER") or "user" - return Path("/tmp") / f"bhm-{uid}" + return paths.runtime_dir() / "manager" def default_endpoint(root: Path | None = None) -> Path: diff --git a/src/browser_harness/paths.py b/src/browser_harness/paths.py new file mode 100644 index 00000000..f818c021 --- /dev/null +++ b/src/browser_harness/paths.py @@ -0,0 +1,43 @@ +"""browser-harness filesystem layout.""" +from __future__ import annotations + +import os +import sys +from pathlib import Path + + +def home_dir() -> Path: + raw = os.environ.get("BH_HOME") or os.environ.get("BROWSER_HARNESS_HOME") + if raw: + return Path(raw).expanduser().resolve() + base = os.environ.get("XDG_CONFIG_HOME") + if base: + return (Path(base).expanduser() / "browser-harness").resolve() + return (Path.home() / ".config" / "browser-harness").resolve() + + +def ensure_private_dir(path: Path) -> Path: + path.mkdir(parents=True, exist_ok=True) + if sys.platform != "win32": + os.chmod(path, 0o700) + return path + + +def config_dir() -> Path: + raw = os.environ.get("BH_CONFIG_DIR") + return ensure_private_dir(Path(raw).expanduser().resolve() if raw else home_dir()) + + +def runtime_dir() -> Path: + raw = os.environ.get("BH_RUNTIME_DIR") + return ensure_private_dir(Path(raw).expanduser().resolve() if raw else home_dir() / "runtime") + + +def tmp_dir() -> Path: + raw = os.environ.get("BH_TMP_DIR") + return ensure_private_dir(Path(raw).expanduser().resolve() if raw else home_dir() / "tmp") + + +def workspace_dir() -> Path: + raw = os.environ.get("BH_AGENT_WORKSPACE") + return ensure_private_dir(Path(raw).expanduser().resolve() if raw else home_dir() / "agent-workspace") diff --git a/src/browser_harness/run.py b/src/browser_harness/run.py index 8dc57cc7..8ad438dd 100644 --- a/src/browser_harness/run.py +++ b/src/browser_harness/run.py @@ -25,7 +25,7 @@ sync_local_profile, use_local_profile, ) -from . import auth, context +from . import auth, context, telemetry from .helpers import * from .manager_helpers import * @@ -56,6 +56,8 @@ browser-harness auth login --device-code sign in from SSH/headless environments browser-harness auth status show Browser Use Cloud auth state browser-harness auth logout remove stored Browser Use Cloud auth + browser-harness skill print the browser-harness skill text + browser-harness telemetry status show anonymous telemetry opt-out state browser-harness --update [-y] pull the latest version (agents: pass -y) browser-harness --reload stop the daemon so next call picks up code changes """ @@ -174,8 +176,36 @@ def _print_json(value): print(json.dumps(value, indent=2, default=str)) +def _print_skill(): + from importlib import resources + print(resources.files("browser_harness").joinpath("SKILL.md").read_text(), end="") + + +def _telemetry_command(args): + if not args: + return "script" + first = args[0] + if first in {"-h", "--help"}: + return "help" + if first == "--version": + return "version" + if first in {"--doctor", "doctor"}: + return "doctor" + if first == "--update": + return "update" + if first == "--reload": + return "reload" + if first == "--debug-clicks": + return "debug-clicks" + if first in {"profiles", "use-profile", "open-profile", "auth", "skill", "telemetry"}: + return first + return "usage" + + def main(): args = sys.argv[1:] + if not (args and args[0] == "telemetry"): + telemetry.capture("browser_harness.cli", {"command": _telemetry_command(args)}) if args and args[0] in {"-h", "--help"}: print(HELP) return @@ -214,6 +244,14 @@ def main(): return if args and args[0] == "auth": sys.exit(auth.run_auth_cli(args[1:])) + if args and args[0] == "skill": + if len(args) != 1: + print("usage: browser-harness skill", file=sys.stderr) + sys.exit(2) + _print_skill() + return + if args and args[0] == "telemetry": + sys.exit(telemetry.run_telemetry_cli(args[1:])) if args and args[0] == "--update": yes = any(a in {"-y", "--yes"} for a in args[1:]) sys.exit(run_update(yes=yes)) diff --git a/src/browser_harness/telemetry.py b/src/browser_harness/telemetry.py new file mode 100644 index 00000000..52a40aeb --- /dev/null +++ b/src/browser_harness/telemetry.py @@ -0,0 +1,184 @@ +"""Best-effort, opt-out telemetry for browser-harness. + +Only low-cardinality operational events are sent. Callers should pass categories, +states, and booleans, never URLs, selectors, page text, prompts, or credentials. +""" + +from __future__ import annotations + +import json +import os +import platform +import re +import urllib.request +import uuid +from importlib.metadata import PackageNotFoundError, version +from pathlib import Path + +from . import paths + + +POSTHOG_KEY = "phc_rCPCLPtaXB3EuBdiH7JLKtU2Wj5iPnuwdsbw58CnjYXc" +POSTHOG_HOST = "https://us.i.posthog.com" +DISABLE_ENVS = ("BH_TELEMETRY", "BROWSER_HARNESS_TELEMETRY") +FORBIDDEN_KEYS = ( + "api_key", + "content", + "cookie", + "email", + "href", + "key", + "message", + "password", + "path", + "prompt", + "query", + "secret", + "selector", + "text", + "title", + "token", + "url", + "uri", +) + + +def _config_dir() -> Path: + return paths.config_dir() + + +def _config_path() -> Path: + return _config_dir() / "telemetry.json" + + +def _load_config() -> dict: + try: + return json.loads(_config_path().read_text()) + except (FileNotFoundError, OSError, ValueError): + return {} + + +def _save_config(data: dict) -> None: + path = _config_path() + try: + path.parent.mkdir(parents=True, exist_ok=True) + if platform.system() != "Windows": + os.chmod(path.parent, 0o700) + path.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n") + if platform.system() != "Windows": + os.chmod(path, 0o600) + except OSError: + pass + + +def _version() -> str: + try: + return version("browser-harness") + except PackageNotFoundError: + return "" + except Exception: + return "" + + +def _env_disabled() -> bool: + return any((os.environ.get(name) or "").lower() in {"0", "false", "no", "off"} for name in DISABLE_ENVS) + + +def _install_id(config: dict | None = None) -> str: + config = config if config is not None else _load_config() + raw = config.get("install_id") + if isinstance(raw, str) and re.fullmatch(r"[0-9a-f-]{32,36}", raw): + return raw + install_id = str(uuid.uuid4()) + _save_config({**config, "install_id": install_id}) + return install_id + + +def is_enabled() -> bool: + if _env_disabled(): + return False + return not bool(_load_config().get("disabled")) + + +def status() -> dict: + config = _load_config() + env_disabled = _env_disabled() + return { + "enabled": not env_disabled and not bool(config.get("disabled")), + "disabled_by_env": env_disabled, + "disabled_by_config": bool(config.get("disabled")), + "install_id": _install_id(config), + "config_path": str(_config_path()), + } + + +def set_enabled(enabled: bool) -> dict: + config = _load_config() + config["disabled"] = not enabled + _save_config(config) + return status() + + +def _safe_properties(properties: dict | None) -> dict: + out = {} + for key, value in (properties or {}).items(): + safe_key = re.sub(r"[^A-Za-z0-9_$.-]+", "_", str(key))[:80] + lowered = safe_key.lower() + if not safe_key or any(word in lowered for word in FORBIDDEN_KEYS): + continue + if isinstance(value, bool) or value is None: + out[safe_key] = value + elif isinstance(value, int | float): + out[safe_key] = value + else: + safe_value = str(value) + if "://" in safe_value: + safe_value = "[redacted]" + out[safe_key] = safe_value[:120] + return out + + +def capture(event: str, properties: dict | None = None) -> None: + if not is_enabled(): + return + try: + config = _load_config() + props = { + "browser_harness_version": _version() or "unknown", + "python_version": platform.python_version(), + "os": platform.system() or "unknown", + "machine": platform.machine() or "unknown", + "$process_person_profile": False, + **_safe_properties(properties), + } + payload = { + "api_key": POSTHOG_KEY, + "distinct_id": _install_id(config), + "event": event, + "properties": props, + } + data = json.dumps(payload).encode("utf-8") + host = os.environ.get("BH_POSTHOG_HOST", POSTHOG_HOST).rstrip("/") + req = urllib.request.Request( + f"{host}/i/v0/e/", + method="POST", + data=data, + headers={"Content-Type": "application/json", "User-Agent": "browser-harness"}, + ) + urllib.request.urlopen(req, timeout=float(os.environ.get("BH_TELEMETRY_TIMEOUT", "1"))).close() + except Exception: + return + + +def run_telemetry_cli(argv: list[str]) -> int: + if not argv or argv == ["status"]: + print(json.dumps(status(), indent=2)) + return 0 + if argv == ["disable"]: + print(json.dumps(set_enabled(False), indent=2)) + return 0 + if argv == ["enable"]: + print(json.dumps(set_enabled(True), indent=2)) + return 0 + print("usage: browser-harness telemetry [status|enable|disable]") + return 2 From 0cf772b90d950b588872b84c041936a4ba4fee29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gregor=20=C5=BDuni=C4=8D?= <36313686+gregpr07@users.noreply.github.com> Date: Sat, 20 Jun 2026 17:17:38 -0700 Subject: [PATCH 14/15] Shorten browser-harness install guide --- docs/browser-connection.md | 138 ++++++++++++ install.md | 212 ++++--------------- skills/browser-harness/references/install.md | 2 +- 3 files changed, 181 insertions(+), 171 deletions(-) create mode 100644 docs/browser-connection.md diff --git a/docs/browser-connection.md b/docs/browser-connection.md new file mode 100644 index 00000000..e2926e95 --- /dev/null +++ b/docs/browser-connection.md @@ -0,0 +1,138 @@ +# Browser Connection Reference + +Use this only when the quick path in `install.md` fails. + +Browser-harness can connect to a local Chrome/Chromium browser or to a Browser Use cloud browser. + +## Cloud Browsers + +Start one with: + +```python +b = browser_new("cloud") +browser(b["id"]) +``` + +Authentication uses `BROWSER_USE_API_KEY` first, then the local `browser-harness auth login` store. + +```bash +browser-harness auth login +browser-harness auth login --device-code +browser-harness auth login --api-key-stdin +browser-harness auth status +browser-harness auth logout +``` + +Never pass API keys as command-line arguments. + +## Local Way 1: Real Profile + +Use this when the agent should act in the user's everyday browser with real logins. + +1. Ask the user to open Chrome. +2. Run: + + ```bash + browser-harness <<'PY' + print(browser_profiles()) + PY + ``` + +3. Ask which stable `id` to use. +4. Save it: + + ```bash + browser-harness <<'PY' + browser_use_profile("PROFILE_ID_HERE") + PY + ``` + +5. In that Chrome profile, open `chrome://inspect/#remote-debugging`. +6. Tick "Allow remote debugging for this browser instance". +7. On Chrome 144+, click Allow when the per-attach popup appears. +8. Retry: + + ```bash + browser-harness <<'PY' + print(page_info()) + PY + ``` + +On macOS, an agent can open the inspect page: + +```bash +osascript -e 'tell application "Google Chrome" to activate' \ + -e 'tell application "Google Chrome" to open location "chrome://inspect/#remote-debugging"' +``` + +## Local Way 2: Isolated Profile + +Use this for unattended automation or when permission popups are unacceptable. + +```bash +"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" \ + --remote-debugging-port=9222 \ + --user-data-dir="$HOME/.config/browser-harness/isolated-chrome" \ + about:blank +export BU_CDP_URL=http://127.0.0.1:9222 +``` + +The `--user-data-dir` must not be Chrome's default profile directory. Chrome 136+ ignores `--remote-debugging-port` with the platform default profile path. + +Copying a real Chrome profile into a custom directory is not a reliable login-preserving path because cookies are encrypted against the original profile context. Use Way 1 for real logins. + +## Doctor Cases + +Run: + +```bash +browser-harness --doctor +``` + +Interpretation: + +- `chrome running` FAIL: no compatible browser process was found. Ask the user to open Chrome or use Way 2/cloud. +- `daemon alive` FAIL with Chrome running: remote debugging permission is missing or the permission popup needs Allow. +- `active browser connections` is `0`: daemon is up but not attached to a usable page; retry after opening a normal tab. +- source mismatch: the command is importing a different install than the checkout you are reading. +- update available: run `browser-harness --update -y` if you want the new version. + +Stale daemon reset: + +```bash +browser-harness <<'PY' +restart_daemon() +PY +``` + +If that hangs, kill Chrome and browser-harness daemon processes, reopen Chrome, and retry. On macOS/Linux, remove lingering `bu-default.sock` and `bu-default.pid` files under `${XDG_CONFIG_HOME:-~/.config}/browser-harness/runtime`. + +## Files + +Default state directory: + +```text +${XDG_CONFIG_HOME:-~/.config}/browser-harness +``` + +Important files: + +```text +auth.json Browser Use Cloud auth +settings.json selected local Chrome profile and future preferences +telemetry.json anonymous install id + telemetry opt-out +agent-workspace/ agent-written helpers and domain skills +runtime/ sockets, pids, manager leases, managed browser profiles +tmp/ logs, screenshots, scratch files +``` + +Overrides: + +```text +BH_HOME +BROWSER_HARNESS_HOME +BH_CONFIG_DIR +BH_AGENT_WORKSPACE +BH_RUNTIME_DIR +BH_TMP_DIR +``` diff --git a/install.md b/install.md index 560bc874..3346409a 100644 --- a/install.md +++ b/install.md @@ -1,207 +1,79 @@ --- name: browser-install -description: Install browser-harness into the current agent and connect it to a browser with minimal prompting. +description: Install browser-harness and connect it to a browser fast. --- -# `browser-harness` installation +# browser-harness install -Use this file only for browser-harness install, browser connection setup, and connection troubleshooting. For day-to-day browser work, read `SKILL.md`. Task-specific edits belong in `agent-workspace/agent_helpers.py` and `agent-workspace/domain-skills/`. +Use once. For browser work, read `SKILL.md`. -## Recommended `browser-harness` setup - -Install the released package as a global uv tool: +## Fast Path ```bash uv tool install browser-harness -command -v browser-harness +for root in "${CODEX_HOME:-$HOME/.codex}/skills" "$HOME/.agents/skills"; do + rm -rf "$root/browser-harness" "$root/browser" "$root/browser-use" +done +mkdir -p "${CODEX_HOME:-$HOME/.codex}/skills/browser-harness" +browser-harness skill > "${CODEX_HOME:-$HOME/.codex}/skills/browser-harness/SKILL.md" +browser-harness <<'PY' +print(page_info()) +PY ``` -For browser-harness development, clone the repo into a durable path and use `uv tool install -e .` from the checkout. Normal users and other agents should use the PyPI package. - -## Make browser-harness global for the current agent +If `page_info()` prints, stop. Setup is done. -After the command is installed, register a skill named `browser-harness` with this trigger: +For Claude Code or other agents: install `browser-harness`, register a skill named `browser-harness`, use `browser-harness skill` as the body, and use this trigger verbatim: ```text Always use browser-harness for any web interaction: automation, scraping, testing, or site/app work. ``` -Use `browser-harness skill` to print the packaged skill text. - -Before registering it, remove stale user-installed browser skills that came from old browser-harness installs. Do not edit bundled/vendor plugin caches. - -- **Codex**: add the printed skill as a global skill at `$CODEX_HOME/skills/browser-harness/SKILL.md` (often `~/.codex/skills/browser-harness/SKILL.md`). - - ```bash - for root in "${CODEX_HOME:-$HOME/.codex}/skills" "$HOME/.agents/skills"; do - rm -rf "$root/browser-harness" "$root/browser" "$root/browser-use" - done - mkdir -p "${CODEX_HOME:-$HOME/.codex}/skills/browser-harness" - browser-harness skill > "${CODEX_HOME:-$HOME/.codex}/skills/browser-harness/SKILL.md" - ``` - -- **Claude Code**: paste or import the output of `browser-harness skill` into the global instruction file Claude Code uses for reusable skills. - -- **Other agents**: tell the agent to install/register a skill named `browser-harness`, use the trigger above verbatim, and use `browser-harness skill` as the skill body. - -This makes new agent sessions in other folders load the runtime browser harness instructions automatically. - -## Keeping the harness current - -- On each run, `browser-harness` prints `[browser-harness] update available: X -> Y` (once per day) when a newer PyPI release exists. -- `browser-harness --doctor` also checks the latest PyPI version. -- When you see an update banner, decide whether to run `browser-harness --update -y`. It pulls the new code (`git pull --ff-only` for editable clones, `uv tool upgrade browser-harness` for PyPI installs) and stops the running daemon so the next call picks up the new code. With `-y` it won't prompt. -- `--update` refuses to run on an editable clone with uncommitted changes. If that happens, tell the user and let them resolve the dirty worktree. -- PyPI installs require `uv` for updates; there is no pip fallback. - -## Maintenance commands - -- browser-harness --doctor — show version, install mode, daemon and Chrome state, and whether an update is pending. -- browser-harness telemetry status — show opt-out telemetry state. -- browser-harness telemetry disable — opt out of anonymous usage telemetry. - -## Files on disk - -By default browser-harness keeps its state under `${XDG_CONFIG_HOME:-~/.config}/browser-harness`: - -```text -auth.json Browser Use Cloud auth -settings.json selected local Chrome profile and future preferences -telemetry.json anonymous install id + telemetry opt-out -agent-workspace/ agent-written helpers and domain skills -runtime/ sockets, pids, manager leases, managed browser profiles -tmp/ logs, screenshots, scratch files -``` - -Override the whole home with `BH_HOME` or `BROWSER_HARNESS_HOME`. Override specific dirs with `BH_CONFIG_DIR`, `BH_AGENT_WORKSPACE`, `BH_RUNTIME_DIR`, or `BH_TMP_DIR`. - -## Architecture - -```text -Chrome / Browser Use cloud -> CDP WS -> browser_harness.daemon -> IPC -> browser_harness.run - ^ -optional browser_harness.manager_daemon owns many isolated browser leases -``` - -- The CLI talks to a local per-browser daemon over IPC. -- `BU_CDP_URL` points the normal local-browser daemon at a specific DevTools HTTP endpoint for Way 2. -- The browser manager auto-starts when `browser`, `browser_status`, `browser_new`, `browser_list`, or `browser_close` is used. -- Managed browser scripts select an explicit short id with `browser(id)`; agents should not set daemon namespace variables for normal use. -- Cloud browser creation reads Browser Use auth from `BROWSER_USE_API_KEY` first, then the local `browser-harness auth login` store. +Only remove stale user-installed browser skills. Do not edit bundled/vendor plugin caches. -## Browser Use Cloud auth - -For cloud browsers, prefer OAuth login over pasting API keys: +## If It Says `needs-profile` ```bash -browser-harness auth login +browser-harness <<'PY' +print(browser_profiles()) +PY ``` -The command generates a PKCE login request, opens or prints a Browser Use login URL, waits for the local callback, exchanges the code for an API key, and stores it in a private local file. The key is never printed. - -Headless/SSH fallback: +Ask the user which stable `id` to use, then retry: ```bash -browser-harness auth login --device-code +browser-harness <<'PY' +browser_use_profile("PROFILE_ID_HERE") +print(page_info()) +PY ``` -If you already have a Browser Use API key, store it safely through stdin: +## If Chrome Blocks It -```bash -browser-harness auth login --api-key-stdin -``` +In the selected Chrome profile: -Do not pass API keys as command-line arguments; they can leak through shell history and process listings. +1. Open `chrome://inspect/#remote-debugging`. +2. Tick "Allow remote debugging for this browser instance". +3. Click Allow on the popup if it appears. +4. Retry `page_info()`. -Other auth commands: +## If Still Broken ```bash -browser-harness auth status -browser-harness auth logout +browser-harness --doctor ``` -Key resolution order for cloud browser creation: - -```text -BROWSER_USE_API_KEY - -> stored browser-harness auth key - -> cloud-auth-required -``` +Use the output: -# Browser connection setup and troubleshooting +- `chrome running` FAIL: ask the user to open Chrome, or use isolated/cloud browser. +- `daemon alive` FAIL: Chrome remote debugging permission is missing. +- update available: run `browser-harness --update -y` if you want it. -## Browser connection reference +For full details, read `docs/browser-connection.md`. -This section is the source of truth for how browser-harness connects to a browser. It is the canonical reference for every agent and user of this repo. Every statement here is intended to be verifiable against either an official Chrome source or this repo's own code, and is held to that standard deliberately. If anything below is incorrect, incomplete, or misleading, open an issue on the browser-harness repository immediately with clear evidence and explanation so it can be corrected. Do not silently work around an error in this document; the cost of one user being misled is much higher than the cost of one issue. +Useful: -Browser-harness can connect to any Chrome or Chromium-based browser on your computer, or to a Browser Use cloud browser. - -**Cloud browsers** are managed by the Browser Use cloud API. Start one with `browser_new("cloud", proxy_country="us")`, keep the returned `id`, and call `browser(id)` before page helpers in each script. Authentication is via `BROWSER_USE_API_KEY` or `browser-harness auth login`; the harness handles the WebSocket URL itself. Cookie profile sync is advanced and opt-in; read `interaction-skills/profile-sync.md` only when the user explicitly asks to sync local cookies into Browser Use cloud profiles. - -**Local browsers** require remote debugging to be enabled. There are two ways, and they suit different use cases. - -Local Way 1 also requires an explicit selected profile before the harness attaches. Run `browser_profiles()` to get stable ids such as `google-chrome:Default`, then `browser_use_profile("google-chrome:Default")`. The daemon snapshots that selected profile at startup and refuses to attach to an arbitrary available Chrome profile. - -*Way 1: chrome://inspect/#remote-debugging checkbox — uses your real profile.* In your running Chrome, navigate to `chrome://inspect/#remote-debugging` and tick the "Allow remote debugging for this browser instance" checkbox. This setting is per-profile and sticky: tick it once and it persists across every future Chrome launch of that profile. Then run any `browser-harness` command. On Chrome 144 and later, the first attach by the harness triggers an in-browser "Allow remote debugging?" popup that you must click Allow on. The popup may reappear on later attaches under conditions that are not fully characterized.[^1] This path inherits your everyday Chrome's logins, extensions, history, and bookmarks, which makes it the right choice for an agent helping you with tasks in your real browser. - -*Way 2: command-line flag — uses an isolated profile, no popups ever.* Launch Chrome with `--remote-debugging-port=9222 --user-data-dir=`. Two precisions: - -- The path must be a directory that is **not** Chrome's platform default (`%LOCALAPPDATA%\Google\Chrome\User Data` on Windows, `~/Library/Application Support/Google/Chrome` on macOS, `~/.config/google-chrome` on Linux). On Chrome 136 and later, the port flag is silently no-opped when the user-data-dir is the platform default, even if you pass it explicitly. An empty or new path gives a fresh clean profile that Chrome will persist there across future runs. -- This path does **not** let you reuse your everyday Chrome profile. Copying the default profile's files into a custom directory makes Chrome accept the flag, but cookies are encrypted under a key bound to the original directory and will not survive the copy — so you carry over bookmarks and extensions but lose every logged-in session. If you want your real logins, use Way 1. - -Tell the harness which port you launched on by setting `BU_CDP_URL=http://127.0.0.1:9222` before running `browser-harness`. - -For most tasks where the agent acts on your behalf in your normal browser, use Way 1. For automation that runs without you watching, or any case where popup interruptions are unacceptable, use Way 2 or a cloud browser. - -[^1]: The conditions that cause Chrome to re-show the "Allow remote debugging?" popup on a subsequent attach (time elapsed since previous Allow, daemon restart, browser restart, new CDP session, version-dependent options like "Allow for N hours") are not fully characterized. Way 2 sidesteps this entirely. - -## First time setup - -Try yourself before asking the user to do anything. Retry transient errors briefly. Only ask the user when a step genuinely needs them — ticking a checkbox, clicking Allow. - -If the user hasn't said which connection method to use, default to Way 1 if Chrome is already running, Way 2 if not. Cloud is only used when the user opts in. - -1. Try the harness: - - ```bash - browser-harness <<'PY' - print(page_info()) - PY - ``` - - If it prints page info, you're done. If it reports `needs-profile`, run `browser_profiles()`, choose a stable profile id with the user, call `browser_use_profile(profile_id)`, then retry. For private or cloud manager browsers, use `browser_new(...)` first, then select the returned id with `browser(id)`. - -2. Otherwise run `browser-harness --doctor`. The two lines that matter for connection are `chrome running` and `daemon alive`. - -3. Match the output to a case: - - - **chrome FAIL** → no Chrome process detected. - - **Way 1**: ask the user to open their target Chrome themselves. - - **Way 2**: launch Chrome yourself with `--remote-debugging-port=9222 --user-data-dir=`, then set `BU_CDP_URL=http://127.0.0.1:9222` for the harness (see the Browser connection reference). - - - **chrome ok, daemon FAIL** → Way 1 setup is incomplete. Tell the user to: - - navigate to `chrome://inspect/#remote-debugging` in their Chrome and tick "Allow remote debugging for this browser instance" if not yet ticked (one-time per profile) - - click Allow on the in-browser popup if it appears (every attach on Chrome 144+) - - On macOS, you can open the inspect page in their running Chrome yourself instead of asking them to navigate: - - ```bash - osascript -e 'tell application "Google Chrome" to activate' \ - -e 'tell application "Google Chrome" to open location "chrome://inspect/#remote-debugging"' - ``` - - - **chrome ok, daemon ok, but step 1 still failed** → stale daemon. Restart it: - - ```bash - browser-harness <<'PY' - restart_daemon() - PY - ``` - - If that hangs, escalate: kill all Chrome and daemon processes, then reopen Chrome and retry. On macOS/Linux, also remove lingering `bu-default.sock` and `bu-default.pid` files under `${XDG_CONFIG_HOME:-~/.config}/browser-harness/runtime`. - -4. After any fix, retry step 1. - -If Way 1 fails repeatedly or the user's task is unattended, move to Way 2 or a cloud browser per the Browser connection reference (these have no popups). - -If you are testing browser connection for the first time, run this demo: open `https://github.com/browser-use/browser-harness` in a new tab and activate it (`switch_tab`) so the user sees the harness has attached. Then ask what they want to do next. +```bash +browser-harness --update -y +browser-harness telemetry disable +``` diff --git a/skills/browser-harness/references/install.md b/skills/browser-harness/references/install.md index 34b59d03..6b6f9e5f 100644 --- a/skills/browser-harness/references/install.md +++ b/skills/browser-harness/references/install.md @@ -52,7 +52,7 @@ If that prints page info, you're done. If not, run `browser-harness --doctor` an - **Way 1 (real profile):** in your Chrome, open `chrome://inspect/#remote-debugging` and tick "Allow remote debugging for this browser instance" (sticky, per-profile). On Chrome 144+, click Allow on the first-attach popup. Inherits your logins/extensions — best when the agent acts in your everyday browser. - **Way 2 (isolated profile, no popups):** launch Chrome with `--remote-debugging-port=9222 --user-data-dir=`, then set `BU_CDP_URL=http://127.0.0.1:9222`. Best for unattended automation. -The canonical, fully-detailed connection reference and troubleshooting live in the repo root's `install.md`. Read it if the quick path above fails. +The full connection reference and troubleshooting live in `docs/browser-connection.md`. Read it if the quick path above fails. ## Keeping current From 07e56f7644126958744f92b20cc1ff8f5faf2275 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gregor=20=C5=BDuni=C4=8D?= <36313686+gregpr07@users.noreply.github.com> Date: Sat, 20 Jun 2026 17:21:54 -0700 Subject: [PATCH 15/15] Remove branch test churn --- tests/integration/test_js.py | 53 +---- tests/unit/test_admin.py | 58 +---- tests/unit/test_auth.py | 184 ---------------- tests/unit/test_context.py | 89 -------- tests/unit/test_daemon.py | 225 ------------------- tests/unit/test_helpers.py | 101 --------- tests/unit/test_local_profiles.py | 101 --------- tests/unit/test_manager_daemon.py | 325 ---------------------------- tests/unit/test_manager_helpers.py | 195 ----------------- tests/unit/test_manager_runtime.py | 69 ------ tests/unit/test_run.py | 56 ----- tests/unit/test_run_manager_mode.py | 108 --------- tests/unit/test_skill_docs.py | 23 -- 13 files changed, 11 insertions(+), 1576 deletions(-) delete mode 100644 tests/unit/test_auth.py delete mode 100644 tests/unit/test_context.py delete mode 100644 tests/unit/test_local_profiles.py delete mode 100644 tests/unit/test_manager_daemon.py delete mode 100644 tests/unit/test_manager_helpers.py delete mode 100644 tests/unit/test_manager_runtime.py delete mode 100644 tests/unit/test_run_manager_mode.py delete mode 100644 tests/unit/test_skill_docs.py diff --git a/tests/integration/test_js.py b/tests/integration/test_js.py index 5a6e3c54..86582e68 100644 --- a/tests/integration/test_js.py +++ b/tests/integration/test_js.py @@ -26,37 +26,11 @@ def test_simple_expression_passes_through(): assert _evaluated_expression(captured) == "document.title" -def _illegal_return_response(): - return { - "result": { - "type": "object", - "subtype": "error", - "description": "SyntaxError: Illegal return statement", - }, - "exceptionDetails": { - "text": "Uncaught", - "lineNumber": 0, - "columnNumber": 13, - }, - } - - -def test_return_statement_retries_wrapped_after_illegal_return(): - captured = [] - - def fake_cdp(method, **kwargs): - captured.append((method, kwargs)) - if kwargs["expression"] == "const x = 1; return x": - return _illegal_return_response() - return {"result": {"value": 1}} - +def test_return_statement_gets_wrapped(): + fake_cdp, captured = _capture_cdp() with patch("browser_harness.helpers.cdp", side_effect=fake_cdp): - assert helpers.js("const x = 1; return x") == 1 - - assert [kw["expression"] for m, kw in captured if m == "Runtime.evaluate"] == [ - "const x = 1; return x", - "(function(){const x = 1; return x})()", - ] + helpers.js("const x = 1; return x") + assert _evaluated_expression(captured) == "(function(){const x = 1; return x})()" def test_iife_with_internal_return_is_not_double_wrapped(): @@ -138,22 +112,11 @@ def test_return_word_inside_comment_does_not_trigger_wrapping(): @pytest.mark.parametrize("expr", ["return\t1", "return\n1"]) -def test_top_level_return_with_whitespace_retries_wrapped(expr): - captured = [] - - def fake_cdp(method, **kwargs): - captured.append((method, kwargs)) - if kwargs["expression"] == expr: - return _illegal_return_response() - return {"result": {"value": 1}} - +def test_top_level_return_with_whitespace_gets_wrapped(expr): + fake_cdp, captured = _capture_cdp() with patch("browser_harness.helpers.cdp", side_effect=fake_cdp): - assert helpers.js(expr) == 1 - - assert [kw["expression"] for m, kw in captured if m == "Runtime.evaluate"] == [ - expr, - f"(function(){{{expr}}})()", - ] + helpers.js(expr) + assert _evaluated_expression(captured) == f"(function(){{{expr}}})()" @pytest.mark.parametrize( diff --git a/tests/unit/test_admin.py b/tests/unit/test_admin.py index cdb2c161..d5353e2c 100644 --- a/tests/unit/test_admin.py +++ b/tests/unit/test_admin.py @@ -1,5 +1,4 @@ import pytest -from pathlib import Path from browser_harness import admin @@ -31,42 +30,16 @@ def test_local_chrome_mode_is_false_when_process_env_provides_remote_cdp(monkeyp assert not admin._is_local_chrome_mode() -def test_local_chrome_mode_is_false_when_env_provides_explicit_cdp_url(): - assert not admin._is_local_chrome_mode({"BU_CDP_URL": "http://127.0.0.1:9333"}) - - -def test_list_local_profiles_uses_native_detector(monkeypatch): - monkeypatch.setattr( - admin.local_profiles, - "list_local_profiles_payload", - lambda: {"status": "ok", "profiles": [{"id": "google-chrome:Default"}]}, - ) - - assert admin.list_local_profiles() == { - "status": "ok", - "profiles": [{"id": "google-chrome:Default"}], - } - - -def test_handshake_timeout_is_chrome_permission_popup(): +def test_handshake_timeout_needs_chrome_remote_debugging_prompt(): msg = "CDP WS handshake failed: timed out during opening handshake" - assert not admin._needs_chrome_remote_debugging_prompt(msg) - assert admin._needs_chrome_permission_popup(msg) + assert admin._needs_chrome_remote_debugging_prompt(msg) -def test_handshake_403_is_chrome_permission_popup(): +def test_handshake_403_needs_chrome_remote_debugging_prompt(): msg = "CDP WS handshake failed: server rejected WebSocket connection: HTTP 403" - assert not admin._needs_chrome_remote_debugging_prompt(msg) - assert admin._needs_chrome_permission_popup(msg) - - -def test_cdp_disabled_needs_chrome_remote_debugging_prompt_not_permission_popup(): - msg = "cdp-disabled: Chrome remote debugging is turned off for the selected profile" - assert admin._needs_chrome_remote_debugging_prompt(msg) - assert not admin._needs_chrome_permission_popup(msg) def test_stale_websocket_does_not_open_chrome_inspect(): @@ -255,31 +228,6 @@ def test_run_doctor_skips_snap_detect_on_non_linux(monkeypatch, capsys): assert "[snap-detect]" not in out -def test_run_doctor_prints_source_path_and_mismatch_warning(monkeypatch, capsys): - monkeypatch.setattr(admin, "_version", lambda: "0.1.0") - monkeypatch.setattr(admin, "_install_mode", lambda: "git") - monkeypatch.setattr(admin, "_chrome_running", lambda: True) - monkeypatch.setattr(admin, "daemon_alive", lambda: True) - monkeypatch.setattr(admin, "browser_connections", lambda: []) - monkeypatch.setattr(admin, "_latest_release_tag", lambda: "0.1.0") - monkeypatch.setattr(admin, "_package_source_path", lambda: Path("/installed/src/browser_harness")) - monkeypatch.setattr(admin, "_doctor_source_mismatch", lambda: { - "cwd_source": "/checkout/src/browser_harness", - "package_source": "/installed/src/browser_harness", - }) - monkeypatch.setattr("platform.system", lambda: "Darwin") - monkeypatch.setattr("shutil.which", lambda _cmd: None) - monkeypatch.delenv("BROWSER_USE_API_KEY", raising=False) - - assert admin.run_doctor() == 0 - - out = capsys.readouterr().out - assert "source path /installed/src/browser_harness" in out - assert "[source-mismatch]" in out - assert "Current directory contains: /checkout/src/browser_harness" in out - assert "Imported browser-harness from: /installed/src/browser_harness" in out - - def test_run_doctor_fix_snap_prints_steps(capsys): assert admin.run_doctor_fix_snap() == 0 out = capsys.readouterr().out diff --git a/tests/unit/test_auth.py b/tests/unit/test_auth.py deleted file mode 100644 index 3dfb06c7..00000000 --- a/tests/unit/test_auth.py +++ /dev/null @@ -1,184 +0,0 @@ -import json -import stat -import threading -import urllib.error -import urllib.request -from io import StringIO - -import pytest - -from browser_harness import auth - - -def test_get_api_key_prefers_env_over_stored(monkeypatch, tmp_path): - monkeypatch.setenv("BH_AUTH_PATH", str(tmp_path / "auth.json")) - auth.save_auth_record(auth.AuthRecord(api_key="stored-key", source="oauth")) - monkeypatch.setenv("BROWSER_USE_API_KEY", "env-key") - - assert auth.get_browser_use_api_key() == "env-key" - - -def test_status_and_logout_for_stored_key(monkeypatch, tmp_path): - monkeypatch.delenv("BROWSER_USE_API_KEY", raising=False) - monkeypatch.setenv("BH_AUTH_PATH", str(tmp_path / "auth.json")) - auth.save_auth_record(auth.AuthRecord( - api_key="secret-key", - api_key_id="key-123", - project_id="project-123", - scopes=["browser"], - )) - - status = auth.auth_status() - mode = stat.S_IMODE((tmp_path / "auth.json").stat().st_mode) - removed = auth.clear_auth() - - assert status["status"] == "authenticated" - assert status["source"] == "stored" - assert "api_key" not in status - assert "api_key_id" not in status - assert mode == 0o600 - assert removed is True - assert auth.auth_status()["status"] == "missing" - - -def test_missing_key_raises_cloud_auth_required(monkeypatch, tmp_path): - monkeypatch.delenv("BROWSER_USE_API_KEY", raising=False) - monkeypatch.setenv("BH_AUTH_PATH", str(tmp_path / "missing.json")) - - try: - auth.get_browser_use_api_key() - except auth.CloudAuthRequired as e: - assert "browser-harness auth login" in str(e) - else: - raise AssertionError("expected CloudAuthRequired") - - -def test_api_key_stdin_login_stores_manual_key_without_printing(monkeypatch, tmp_path, capsys): - monkeypatch.delenv("BROWSER_USE_API_KEY", raising=False) - monkeypatch.setenv("BH_AUTH_PATH", str(tmp_path / "auth.json")) - manual_key = "manual-key-1234567890abcdef" - - record = auth.api_key_stdin_login(input_stream=StringIO(manual_key + "\n")) - out = capsys.readouterr().out - - assert record.source == "manual" - assert auth.get_browser_use_api_key() == manual_key - assert manual_key not in out - assert "stored" in out.lower() - assert json.loads((tmp_path / "auth.json").read_text())["browser_use"]["source"] == "manual" - - -def test_api_key_stdin_json_login_outputs_no_secret(monkeypatch, tmp_path, capsys): - monkeypatch.delenv("BROWSER_USE_API_KEY", raising=False) - monkeypatch.setenv("BH_AUTH_PATH", str(tmp_path / "auth.json")) - manual_key = "manual-key-1234567890abcdef" - - auth.api_key_stdin_login(json_output=True, input_stream=StringIO(manual_key + "\n")) - out = capsys.readouterr().out - - assert manual_key not in out - assert json.loads(out) == {"status": "stored", "path": str(tmp_path / "auth.json")} - - -def test_api_key_stdin_login_rejects_missing_or_short_key(monkeypatch, tmp_path): - monkeypatch.setenv("BH_AUTH_PATH", str(tmp_path / "auth.json")) - - for raw in ["", "too-short"]: - try: - auth.api_key_stdin_login(input_stream=StringIO(raw)) - except auth.AuthError as e: - assert "API key" in str(e) or "api key" in str(e) - else: - raise AssertionError("expected AuthError") - - assert not (tmp_path / "auth.json").exists() - - -def test_manual_api_key_tty_eof_becomes_auth_error(monkeypatch): - class TtyInput: - def isatty(self): - return True - - def fake_getpass(_prompt): - raise EOFError - - monkeypatch.setattr(auth.getpass, "getpass", fake_getpass) - - with pytest.raises(auth.AuthError, match="no API key provided"): - auth._read_manual_api_key(TtyInput()) - - -def test_post_json_network_error_becomes_auth_error(monkeypatch): - def fake_urlopen(_req, timeout): - raise urllib.error.URLError("offline") - - monkeypatch.setattr(auth.urllib.request, "urlopen", fake_urlopen) - - with pytest.raises(auth.AuthError, match="network error: offline"): - auth._post_json("https://api.example.test/auth", {"x": 1}) - - -def test_browser_login_callback_exchanges_and_stores_key(monkeypatch, tmp_path): - monkeypatch.setenv("BH_AUTH_PATH", str(tmp_path / "auth.json")) - calls = [] - - def fake_post(url, payload): - calls.append((url, payload)) - if url.endswith("/cloud/cli-auth/browser"): - return {"authorization_uri": "https://login.example/auth", "expires_in": 600} - if url.endswith("/cloud/cli-auth/token"): - return { - "api_key": "oauth-key", - "api_key_id": "key-id", - "project_id": "project-id", - "scopes": ["browser"], - } - raise AssertionError(url) - - monkeypatch.setattr(auth, "_post_json", fake_post) - start = auth.start_browser_auth(open_url=False) - callback_url = f"{start.redirect_uri}?code=abc123&state={start.callback.state}" - t = threading.Thread(target=lambda: urllib.request.urlopen(callback_url, timeout=5).read()) - t.start() - record = auth.complete_browser_auth(start, timeout=5) - t.join(timeout=5) - - assert record.api_key == "oauth-key" - assert auth.get_browser_use_api_key() == "oauth-key" - assert calls[0][1]["client_id"] == "browser-use-terminal" - assert calls[0][1]["redirect_uri"] == start.redirect_uri - assert calls[0][1]["state"] == start.callback.state - assert calls[1][1]["code"] == "abc123" - assert calls[1][1]["code_verifier"] == start.verifier - assert json.loads((tmp_path / "auth.json").read_text())["browser_use"]["api_key_id"] == "key-id" - - -def test_device_login_polls_and_stores_key(monkeypatch, tmp_path): - monkeypatch.setenv("BH_AUTH_PATH", str(tmp_path / "auth.json")) - token_attempts = [] - - def fake_post(url, payload): - if url.endswith("/cloud/cli-auth/device"): - return { - "device_code": "device-123", - "user_code": "USER-123", - "verification_uri": "https://login.example/device", - "interval": 1, - "expires_in": 60, - } - if url.endswith("/cloud/cli-auth/token"): - token_attempts.append(payload) - if len(token_attempts) == 1: - raise auth.AuthError("authorization_pending") - return {"api_key": "device-key", "api_key_id": "device-key-id"} - raise AssertionError(url) - - monkeypatch.setattr(auth, "_post_json", fake_post) - monkeypatch.setattr(auth.time, "sleep", lambda _seconds: None) - - start = auth.start_device_auth(open_url=False) - record = auth.complete_device_auth(start, timeout=5) - - assert record.api_key == "device-key" - assert token_attempts[0]["grant_type"] == "urn:ietf:params:oauth:grant-type:device_code" - assert auth.get_browser_use_api_key() == "device-key" diff --git a/tests/unit/test_context.py b/tests/unit/test_context.py deleted file mode 100644 index 144324be..00000000 --- a/tests/unit/test_context.py +++ /dev/null @@ -1,89 +0,0 @@ -from pathlib import Path -import pytest - -from browser_harness import context, helpers - - -class _FakeConn: - def close(self): - pass - - -def test_send_uses_active_binding_runtime_dir(monkeypatch, tmp_path): - calls = [] - binding = context.BrowserBinding( - browser_id="br_test", - bu_name="bh_test", - runtime_dir=tmp_path / "r", - tmp_dir=tmp_path / "t", - manager_mode=True, - ) - old = context.get_active_binding() - context.activate_binding(binding) - try: - monkeypatch.setattr( - helpers.ipc, - "connect", - lambda name, timeout=1.0, runtime_dir=None: calls.append((name, runtime_dir)) or (_FakeConn(), None), - ) - monkeypatch.setattr(helpers.ipc, "request", lambda conn, token, req: {"ok": True}) - - assert helpers._send({"meta": "ping"}) == {"ok": True} - finally: - if old is not None: - context.activate_binding(old) - else: - context.clear_active_binding() - - assert calls == [("bh_test", tmp_path / "r")] - - -def test_capture_screenshot_defaults_to_binding_artifact_dir(monkeypatch, tmp_path, fake_png): - binding = context.BrowserBinding( - browser_id="br_test", - bu_name="bh_test", - runtime_dir=tmp_path / "r", - tmp_dir=tmp_path / "t", - artifact_dir=tmp_path / "artifacts", - manager_mode=True, - ) - old = context.get_active_binding() - context.activate_binding(binding) - try: - monkeypatch.setattr(helpers, "cdp", lambda method, **kwargs: {"data": fake_png(20, 10)}) - path = helpers.capture_screenshot() - finally: - if old is not None: - context.activate_binding(old) - else: - context.clear_active_binding() - - assert Path(path) == tmp_path / "artifacts" / "shot.png" - assert Path(path).exists() - - -def test_agent_identity_uses_codex_thread_fallback(monkeypatch): - monkeypatch.delenv("BH_RUN_ID", raising=False) - monkeypatch.delenv("BH_AGENT_ID", raising=False) - monkeypatch.setenv("CODEX_THREAD_ID", "thread-123") - monkeypatch.delenv("CODEX_AGENT_ID", raising=False) - monkeypatch.delenv("CODEX_SUBAGENT_ID", raising=False) - - ident = context.agent_identity() - - assert ident.run_id == "thread-123" - assert ident.agent_id == "main" - assert ident.degraded is False - - -def test_require_active_binding_explains_browser_selector(): - old = context.get_active_binding() - context.clear_active_binding() - try: - with pytest.raises(RuntimeError, match='call browser\\(""\\)'): - context.require_active_binding() - finally: - if old is not None: - context.activate_binding(old) - else: - context.clear_active_binding() diff --git a/tests/unit/test_daemon.py b/tests/unit/test_daemon.py index 13416280..90c5bc85 100644 --- a/tests/unit/test_daemon.py +++ b/tests/unit/test_daemon.py @@ -1,7 +1,5 @@ import asyncio -import pytest - from browser_harness import daemon @@ -276,8 +274,6 @@ async def send_raw(self, method, params=None, session_id=None): "targetId": "page-target-abc", "url": "https://example.com/", "title": "Example Domain", - "browserContextId": None, - "local_profile_id": None, } # The targetId must be passed through — that's the whole point of the fix. get_info_calls = [(p, s) for (m, p, s) in d.cdp.calls if m == "Target.getTargetInfo"] @@ -297,224 +293,3 @@ def test_current_tab_meta_returns_not_attached_when_no_target_id(): assert result == {"error": "not_attached"} # No CDP call should have been issued. assert d.cdp.calls == [] - - -def test_prepare_selected_local_profile_blocks_without_default(monkeypatch): - monkeypatch.delenv("BU_CDP_WS", raising=False) - monkeypatch.delenv("BU_CDP_URL", raising=False) - monkeypatch.setattr(daemon, "REMOTE_ID", None) - monkeypatch.setattr(daemon.local_profiles, "get_default_profile_id", lambda: None) - monkeypatch.setattr( - daemon.local_profiles, - "list_local_profiles_payload", - lambda: {"status": "ok", "profiles": [{"id": "google-chrome:Default"}]}, - ) - d = daemon.Daemon() - - with pytest.raises(RuntimeError, match="needs-profile"): - d._prepare_selected_local_profile() - - -def test_prepare_selected_local_profile_blocks_checkbox_off_without_opening_marker(tmp_path, monkeypatch): - profile = daemon.local_profiles.LocalBrowserProfile( - id="google-chrome:Default", - browser_name="Google Chrome", - browser_path=tmp_path / "chrome", - user_data_dir=tmp_path / "User Data", - profile_dir="Default", - profile_name="Default", - profile_path=tmp_path / "User Data" / "Default", - display_name="Google Chrome - Default", - ) - monkeypatch.delenv("BU_CDP_WS", raising=False) - monkeypatch.delenv("BU_CDP_URL", raising=False) - monkeypatch.setattr(daemon, "REMOTE_ID", None) - monkeypatch.setattr(daemon.local_profiles, "get_default_profile_id", lambda: profile.id) - monkeypatch.setattr(daemon.local_profiles, "resolve_local_profile", lambda _profile_id: profile) - monkeypatch.setattr(daemon.local_profiles, "remote_debugging_user_enabled", lambda _path: False) - monkeypatch.setattr( - daemon.local_profiles, - "open_local_profile", - lambda *args, **kwargs: (_ for _ in ()).throw(AssertionError("must not open marker")), - ) - d = daemon.Daemon() - - with pytest.raises(RuntimeError, match="cdp-disabled"): - d._prepare_selected_local_profile() - - -def test_target_create_is_scoped_to_selected_browser_context(): - d = _fresh_daemon() - d.preferred_browser_context_id = "ctx-selected" - - result = asyncio.run(d.handle({ - "method": "Target.createTarget", - "params": {"url": "about:blank"}, - })) - - assert result == {"result": {}} - assert d.cdp.calls == [ - ("Target.createTarget", {"url": "about:blank", "browserContextId": "ctx-selected"}, None) - ] - - -def test_target_create_rejects_different_browser_context(): - d = _fresh_daemon() - d.preferred_browser_context_id = "ctx-selected" - - result = asyncio.run(d.handle({ - "method": "Target.createTarget", - "params": {"url": "about:blank", "browserContextId": "ctx-other"}, - })) - - assert result == {"error": "wrong-profile: refusing to create a target in a different Chrome profile context"} - assert d.cdp.calls == [] - - -def test_set_session_rejects_target_from_different_browser_context(): - class _TargetsCDP(_FakeCDP): - async def send_raw(self, method, params=None, session_id=None): - self.calls.append((method, params, session_id)) - if method == "Target.getTargets": - return {"targetInfos": [ - {"targetId": "target-other", "type": "page", "browserContextId": "ctx-other"}, - ]} - return {} - - d = daemon.Daemon() - d.cdp = _TargetsCDP() - d.preferred_browser_context_id = "ctx-selected" - - result = asyncio.run(d.handle({ - "meta": "set_session", - "session_id": "session-other", - "target_id": "target-other", - })) - - assert result == {"error": "wrong-profile: refusing to switch to a target from a different Chrome profile context"} - assert d.session is None - assert d.target_id is None - - -def test_marker_attach_captures_profile_and_browser_context(): - class _MarkerCDP(_FakeCDP): - async def send_raw(self, method, params=None, session_id=None): - self.calls.append((method, params, session_id)) - if method == "Target.getTargets": - return {"targetInfos": [ - { - "targetId": "marker-target", - "type": "page", - "url": "https://browser-use.com/browser-use-profile-target/123", - "browserContextId": "ctx-selected", - }, - { - "targetId": "duplicate-marker", - "type": "page", - "url": "https://browser-use.com/browser-use-profile-target/123", - "browserContextId": "ctx-selected", - }, - { - "targetId": "work-target", - "type": "page", - "url": "https://example.com/", - "browserContextId": "ctx-selected", - }, - ]} - if method == "Target.attachToTarget": - return {"sessionId": f"session-{params['targetId']}"} - return {} - - d = daemon.Daemon() - d.cdp = _MarkerCDP() - d.preferred_target_marker = "123" - d.preferred_profile_id = "google-chrome:Default" - - page = asyncio.run(d.attach_first_page()) - - assert page["targetId"] == "work-target" - assert d.session == "session-work-target" - assert d.target_id == "work-target" - assert d.active_local_profile_id == "google-chrome:Default" - assert d.preferred_browser_context_id == "ctx-selected" - assert ("Target.closeTarget", {"targetId": "marker-target"}, None) in d.cdp.calls - assert ("Target.closeTarget", {"targetId": "duplicate-marker"}, None) in d.cdp.calls - - -def test_marker_attach_creates_blank_tab_in_selected_context_when_only_marker_exists(): - class _MarkerOnlyCDP(_FakeCDP): - async def send_raw(self, method, params=None, session_id=None): - self.calls.append((method, params, session_id)) - if method == "Target.getTargets": - return {"targetInfos": [ - { - "targetId": "marker-target", - "type": "page", - "url": "https://browser-use.com/browser-use-profile-target/123", - "browserContextId": "ctx-selected", - }, - ]} - if method == "Target.createTarget": - return {"targetId": "created-target"} - if method == "Target.attachToTarget": - return {"sessionId": "session-created"} - return {} - - d = daemon.Daemon() - d.cdp = _MarkerOnlyCDP() - d.preferred_target_marker = "123" - d.preferred_profile_id = "google-chrome:Default" - - page = asyncio.run(d.attach_first_page()) - - assert page["targetId"] == "created-target" - assert d.session == "session-created" - assert d.target_id == "created-target" - assert d.active_local_profile_id == "google-chrome:Default" - assert d.preferred_browser_context_id == "ctx-selected" - assert d.owned_target_ids == {"created-target"} - assert ("Target.createTarget", {"url": "about:blank", "browserContextId": "ctx-selected"}, None) in d.cdp.calls - assert ("Target.closeTarget", {"targetId": "marker-target"}, None) in d.cdp.calls - - -def test_target_create_tracks_owned_target_and_close_owned_targets_closes_it(): - class _CreateAndCloseCDP(_FakeCDP): - async def send_raw(self, method, params=None, session_id=None): - self.calls.append((method, params, session_id)) - if method == "Target.createTarget": - return {"targetId": "created-by-helper"} - return {} - - d = daemon.Daemon() - d.cdp = _CreateAndCloseCDP() - - result = asyncio.run(d.handle({ - "method": "Target.createTarget", - "params": {"url": "https://example.com/"}, - })) - - assert result == {"result": {"targetId": "created-by-helper"}} - assert d.owned_target_ids == {"created-by-helper"} - - asyncio.run(d.close_owned_targets()) - - assert d.owned_target_ids == set() - assert ("Target.closeTarget", {"targetId": "created-by-helper"}, None) in d.cdp.calls - - -def test_reattach_same_target_reports_target_gone_instead_of_switching(): - class _GoneCDP(_FakeCDP): - async def send_raw(self, method, params=None, session_id=None): - self.calls.append((method, params, session_id)) - if method == "Target.getTargets": - return {"targetInfos": []} - raise RuntimeError("Session with given id not found") - - d = daemon.Daemon() - d.cdp = _GoneCDP() - d.session = "session-old" - d.target_id = "target-old" - - result = asyncio.run(d.handle({"method": "Runtime.evaluate", "params": {"expression": "1"}})) - - assert result == {"error": "target-gone: Previous browser tab target is gone."} diff --git a/tests/unit/test_helpers.py b/tests/unit/test_helpers.py index da00ac28..4a45ee07 100644 --- a/tests/unit/test_helpers.py +++ b/tests/unit/test_helpers.py @@ -165,21 +165,6 @@ def fake_js(expr, **kwargs): assert "Backspace" not in keys_seen -def test_press_key_modified_character_does_not_emit_char_event(): - key_events = [] - - def fake_cdp(method, **kwargs): - if method == "Input.dispatchKeyEvent": - key_events.append(kwargs) - return {} - - with patch("browser_harness.helpers.cdp", side_effect=fake_cdp): - helpers.press_key("a", modifiers=4) - - assert [e["type"] for e in key_events] == ["keyDown", "keyUp"] - assert not any(e.get("type") == "char" for e in key_events) - - # --- wait_for_element --- def test_wait_for_element_returns_true_when_found_immediately(): @@ -234,92 +219,6 @@ def fake_js(expr, **kwargs): assert any("querySelector" in e and "offsetParent" not in e for e in js_exprs) -# --- tabs / profile contexts --- - -def test_new_tab_reuses_attached_blank_tab_for_url(): - with patch("browser_harness.helpers.current_tab", return_value={"targetId": "blank-target", "url": "about:blank"}), \ - patch("browser_harness.helpers.goto_url") as goto_url, \ - patch("browser_harness.helpers.cdp") as cdp, \ - patch("browser_harness.helpers.switch_tab") as switch_tab: - result = helpers.new_tab("https://example.test/") - - assert result == "blank-target" - goto_url.assert_called_once_with("https://example.test/") - cdp.assert_not_called() - switch_tab.assert_not_called() - - -def test_list_tabs_filters_to_current_browser_context(): - def fake_send(req): - if req.get("meta") == "current_tab": - return { - "targetId": "selected-target", - "url": "https://selected.example", - "title": "Selected", - "browserContextId": "ctx-selected", - } - return {} - - def fake_cdp(method, **kwargs): - assert method == "Target.getTargets" - return {"targetInfos": [ - { - "targetId": "selected-target", - "type": "page", - "title": "Selected", - "url": "https://selected.example", - "browserContextId": "ctx-selected", - }, - { - "targetId": "other-target", - "type": "page", - "title": "Other", - "url": "https://other.example", - "browserContextId": "ctx-other", - }, - ]} - - with patch("browser_harness.helpers._send", side_effect=fake_send), \ - patch("browser_harness.helpers.cdp", side_effect=fake_cdp): - tabs = helpers.list_tabs() - all_tabs = helpers.list_tabs(include_other_contexts=True) - - assert [tab["targetId"] for tab in tabs] == ["selected-target"] - assert {tab["targetId"] for tab in all_tabs} == {"selected-target", "other-target"} - - -def test_new_tab_preserves_current_browser_context(): - calls = [] - - def fake_send(req): - if req.get("meta") == "current_tab": - return { - "targetId": "current-target", - "url": "about:blank", - "title": "", - "browserContextId": "ctx-selected", - } - if req.get("meta") == "set_session": - return {"session_id": req["session_id"]} - return {} - - def fake_cdp(method, **kwargs): - calls.append((method, kwargs)) - if method == "Target.createTarget": - return {"targetId": "new-target"} - if method == "Target.attachToTarget": - return {"sessionId": "session-new"} - return {} - - with patch("browser_harness.helpers._send", side_effect=fake_send), \ - patch("browser_harness.helpers.cdp", side_effect=fake_cdp): - target_id = helpers.new_tab() - - assert target_id == "new-target" - create_call = next(kwargs for method, kwargs in calls if method == "Target.createTarget") - assert create_call["browserContextId"] == "ctx-selected" - - # --- wait_for_network_idle --- def test_wait_for_network_idle_returns_true_when_no_events(): diff --git a/tests/unit/test_local_profiles.py b/tests/unit/test_local_profiles.py deleted file mode 100644 index 6995f3de..00000000 --- a/tests/unit/test_local_profiles.py +++ /dev/null @@ -1,101 +0,0 @@ -import stat - -import pytest - -from browser_harness import local_profiles - - -def _install(tmp_path, name="Google Chrome"): - tmp_path.mkdir(parents=True, exist_ok=True) - browser = tmp_path / "chrome" - browser.write_text("#!/bin/sh\n") - browser.chmod(browser.stat().st_mode | stat.S_IXUSR) - user_data = tmp_path / "User Data" - user_data.mkdir() - (user_data / "Local State").write_text( - '{"profile":{"info_cache":{"Default":{"name":"Greg"},"Profile 1":{"name":"Work"}}}}' - ) - for profile_dir in ("Default", "Profile 1"): - profile = user_data / profile_dir - profile.mkdir() - (profile / "Preferences").write_text("{}") - return local_profiles.LocalBrowserInstall(name, browser, user_data) - - -def test_local_profile_detection_reads_local_state_names_and_stable_ids(tmp_path, monkeypatch): - install = _install(tmp_path) - monkeypatch.setattr(local_profiles, "known_local_browser_installs", lambda: [install]) - - profiles = local_profiles.detect_local_profiles() - - assert [p.id for p in profiles] == ["google-chrome:Default", "google-chrome:Profile 1"] - assert profiles[0].profile_name == "Greg" - assert profiles[1].display_name == "Google Chrome - Work" - - -def test_local_profile_resolution_requires_exact_id_when_names_collide(tmp_path, monkeypatch): - chrome = _install(tmp_path / "chrome", "Google Chrome") - brave = _install(tmp_path / "brave", "Brave") - monkeypatch.setattr(local_profiles, "known_local_browser_installs", lambda: [chrome, brave]) - - with pytest.raises(RuntimeError, match="multiple local profiles matched"): - local_profiles.resolve_local_profile("Work") - - assert local_profiles.resolve_local_profile("brave:Profile 1").browser_name == "Brave" - - -def test_default_profile_file_roundtrip(tmp_path, monkeypatch): - install = _install(tmp_path) - monkeypatch.setenv("BH_CONFIG_DIR", str(tmp_path / "config")) - monkeypatch.delenv("BH_LOCAL_PROFILE", raising=False) - monkeypatch.delenv("BH_SELECTED_LOCAL_PROFILE", raising=False) - monkeypatch.setattr(local_profiles, "known_local_browser_installs", lambda: [install]) - - result = local_profiles.set_default_profile_id("google-chrome:Default") - - assert result["default_local_profile_id"] == "google-chrome:Default" - assert local_profiles.get_default_profile_id() == "google-chrome:Default" - - -def test_browser_profiles_payload_is_concise_by_default(tmp_path, monkeypatch): - install = _install(tmp_path) - monkeypatch.setenv("BH_CONFIG_DIR", str(tmp_path / "config")) - monkeypatch.setattr(local_profiles, "known_local_browser_installs", lambda: [install]) - local_profiles.set_default_profile_id("google-chrome:Default") - - assert local_profiles.list_browser_profiles_payload() == { - "selected": "google-chrome:Default", - "profiles": [ - { - "id": "google-chrome:Default", - "label": "Google Chrome - Greg", - "selected": True, - }, - { - "id": "google-chrome:Profile 1", - "label": "Google Chrome - Work", - "selected": False, - }, - ], - } - - -def test_default_profile_rejects_missing_browser_binary(tmp_path, monkeypatch): - install = _install(tmp_path) - install.browser_path.unlink() - monkeypatch.setenv("BH_CONFIG_DIR", str(tmp_path / "config")) - monkeypatch.setattr(local_profiles, "known_local_browser_installs", lambda: [install]) - - with pytest.raises(RuntimeError, match="browser binary not found or not executable"): - local_profiles.set_default_profile_id("google-chrome:Default") - - -def test_env_selected_profile_overrides_default_file(tmp_path, monkeypatch): - install = _install(tmp_path) - monkeypatch.setenv("BH_CONFIG_DIR", str(tmp_path / "config")) - monkeypatch.setattr(local_profiles, "known_local_browser_installs", lambda: [install]) - local_profiles.set_default_profile_id("google-chrome:Default") - - monkeypatch.setenv("BH_SELECTED_LOCAL_PROFILE", "google-chrome:Profile 1") - - assert local_profiles.get_default_profile_id() == "google-chrome:Profile 1" diff --git a/tests/unit/test_manager_daemon.py b/tests/unit/test_manager_daemon.py deleted file mode 100644 index cfae8e79..00000000 --- a/tests/unit/test_manager_daemon.py +++ /dev/null @@ -1,325 +0,0 @@ -import json -import socket -import threading - -from browser_harness import manager_daemon -from browser_harness import auth -from browser_harness.manager_daemon import Manager - - -class _FakeResponse: - def __enter__(self): - return self - - def __exit__(self, *_args): - return False - - def read(self): - return b'{"ok": true}' - - -def _manager_with_lease(tmp_path): - manager = Manager(tmp_path) - lease = manager._allocate_lease("run-1", "agent-1", "cloud", "clean") - manager.leases[lease.browser_id] = lease - return manager, lease - - -def _send_to_handle_conn(manager, req): - left, right = socket.socketpair() - thread = threading.Thread(target=manager_daemon.handle_conn, args=(manager, right)) - thread.start() - try: - left.sendall((json.dumps(req) + "\n").encode()) - data = b"" - while not data.endswith(b"\n"): - data += left.recv(4096) - finally: - left.close() - thread.join(timeout=1) - return json.loads(data) - - -def test_handle_conn_requires_manager_token(monkeypatch, tmp_path): - manager = Manager(tmp_path) - monkeypatch.setattr(manager_daemon, "_server_token", "secret-token") - - denied = _send_to_handle_conn(manager, {"op": "list", "token": "wrong"}) - pong = _send_to_handle_conn(manager, {"meta": "ping", "token": "secret-token"}) - - assert denied["ok"] is False - assert denied["state"] == "forbidden" - assert pong["pong"] is True - - -def test_switch_allows_multiple_clients_to_select_same_browser(tmp_path): - manager, lease = _manager_with_lease(tmp_path) - - first = manager.handle({ - "op": "switch", - "run_id": "run-1", - "agent_id": "agent-1", - "client_id": "client-1", - "browser_id": lease.browser_id, - }) - second = manager.handle({ - "op": "switch", - "run_id": "run-1", - "agent_id": "agent-1", - "client_id": "client-2", - "browser_id": lease.browser_id, - }) - - assert first["ok"] is True - assert second["ok"] is True - assert first["id"] == lease.browser_id - assert second["id"] == lease.browser_id - - -def test_lock_endpoint_is_compatibility_noop(tmp_path): - manager, lease = _manager_with_lease(tmp_path) - first = manager.handle({ - "op": "lock", - "run_id": "run-1", - "agent_id": "agent-1", - "client_id": "client-1", - "browser_id": lease.browser_id, - }) - - second = manager.handle({ - "op": "lock", - "run_id": "run-1", - "agent_id": "agent-1", - "client_id": "client-2", - "browser_id": lease.browser_id, - }) - - assert first["ok"] is True - assert second["ok"] is True - assert first["lock_id"] == "shared" - assert second["lock_id"] == "shared" - - -def test_close_requires_explicit_id(tmp_path): - manager, lease = _manager_with_lease(tmp_path) - - resp = manager.handle({ - "op": "close", - "run_id": "run-1", - "agent_id": "agent-1", - }) - - assert resp["ok"] is False - assert resp["state"] == "bad-request" - assert lease.browser_id in manager.leases - - -def test_close_removes_exact_browser_id(monkeypatch, tmp_path): - manager, lease = _manager_with_lease(tmp_path) - cleaned = [] - monkeypatch.setattr(manager_daemon, "cleanup_backend", lambda lease: cleaned.append(lease.browser_id)) - - resp = manager.handle({ - "op": "close", - "run_id": "run-1", - "agent_id": "agent-1", - "client_id": "client-2", - "browser_id": lease.browser_id, - }) - - assert resp["ok"] is True - assert resp["state"] == "closed" - assert lease.browser_id not in manager.leases - assert cleaned == [lease.browser_id] - - -def test_close_owned_closes_only_current_owner_browsers(monkeypatch, tmp_path): - manager = Manager(tmp_path) - cleaned = [] - monkeypatch.setattr(manager_daemon, "cleanup_backend", lambda lease: cleaned.append(lease.browser_id)) - owned = manager._allocate_lease("run-1", "agent-1", "cloud", "clean") - other_agent = manager._allocate_lease("run-1", "agent-2", "cloud", "clean") - other_run = manager._allocate_lease("run-2", "agent-1", "cloud", "clean") - manager.leases = { - owned.browser_id: owned, - other_agent.browser_id: other_agent, - other_run.browser_id: other_run, - } - - resp = manager.handle({ - "op": "close_owned", - "run_id": "run-1", - "agent_id": "agent-1", - }) - - assert resp["ok"] is True - assert resp["state"] == "closed-owned" - assert resp["closed"] == [owned.browser_id] - assert owned.browser_id not in manager.leases - assert other_agent.browser_id in manager.leases - assert other_run.browser_id in manager.leases - assert cleaned == [owned.browser_id] - - -def test_short_browser_ids_have_no_prefix(tmp_path): - manager, lease = _manager_with_lease(tmp_path) - - assert len(lease.browser_id) == 6 - assert not lease.browser_id.startswith("br_") - - -def test_lease_load_ignores_removed_hierarchy_fields(tmp_path): - payload = { - "browser_id": "abc123", - "run_id": "run-1", - "owner_agent_id": "agent-1", - "backend": "cloud", - "profile_kind": "clean", - "harness_daemon_name": "bh_123", - "runtime_dir": str(tmp_path / "r"), - "tmp_dir": str(tmp_path / "t"), - "download_dir": str(tmp_path / "downloads"), - "artifact_dir": str(tmp_path / "artifacts"), - "profile_dir": str(tmp_path / "profile"), - "allowed_agents": ["agent-1", "agent-2"], - "active_execution": {"client_id": "old-client"}, - } - - lease = manager_daemon.BrowserLease.from_json(payload) - - assert lease.browser_id == "abc123" - assert lease.owner_agent_id == "agent-1" - - -def test_cloud_live_url_is_exposed_in_ready_state(tmp_path): - manager, lease = _manager_with_lease(tmp_path) - lease.cloud_live_url = "https://live.example/session" - - resp = manager.handle({ - "op": "status", - "run_id": "run-1", - "agent_id": "agent-1", - "browser_id": lease.browser_id, - }) - - assert resp["ok"] is True - assert resp["id"] == lease.browser_id - assert resp["live_url"] == "https://live.example/session" - - -def test_cloud_browser_id_is_exposed_in_ready_state(tmp_path): - manager, lease = _manager_with_lease(tmp_path) - lease.cloud_browser_id = "browser-123" - - resp = manager.handle({ - "op": "status", - "run_id": "run-1", - "agent_id": "agent-1", - "browser_id": lease.browser_id, - }) - - assert resp["ok"] is True - assert resp["id"] == lease.browser_id - assert resp["cloud_browser_id"] == "browser-123" - - -def test_cloud_live_url_is_exposed_in_browser_list(tmp_path): - manager, lease = _manager_with_lease(tmp_path) - lease.cloud_browser_id = "browser-123" - lease.cloud_live_url = "https://live.example/session" - - resp = manager.handle({ - "op": "list", - "run_id": "run-1", - "agent_id": "agent-1", - }) - - assert resp["ok"] is True - assert resp["browsers"] == [ - { - "id": lease.browser_id, - "backend": "cloud", - "owner": "agent-1", - "owned_by_this_agent": True, - "state": "ready", - "cloud_browser_id": "browser-123", - "live_url": "https://live.example/session", - } - ] - - -def test_start_cloud_backend_fetches_missing_live_url(monkeypatch, tmp_path): - lease = Manager(tmp_path)._allocate_lease("run-1", "agent-1", "cloud", "clean") - calls = [] - - def fake_browser_use(path, method, body=None): - calls.append((path, method, body)) - if (path, method) == ("/browsers", "POST"): - return {"id": "browser-123", "cdpUrl": "https://cdp.initial"} - if (path, method) == ("/browsers/browser-123", "GET"): - return {"id": "browser-123", "cdpUrl": "https://cdp.refreshed", "liveUrl": "https://live.example/session"} - raise AssertionError((path, method, body)) - - monkeypatch.setattr(manager_daemon.auth, "get_browser_use_api_key", lambda: "stored-key") - monkeypatch.setattr(manager_daemon, "_browser_use", fake_browser_use) - monkeypatch.setattr(manager_daemon, "start_harness_daemon", lambda lease: None) - - manager_daemon.start_cloud_backend(lease, proxy_country=None) - - assert calls == [ - ("/browsers", "POST", {}), - ("/browsers/browser-123", "GET", None), - ] - assert lease.cloud_browser_id == "browser-123" - assert lease.cloud_live_url == "https://live.example/session" - assert lease.cdp_url == "https://cdp.refreshed" - - -def test_cloud_new_reports_auth_required(monkeypatch, tmp_path): - manager = Manager(tmp_path) - monkeypatch.setattr( - "browser_harness.manager_daemon.auth.get_browser_use_api_key", - lambda: (_ for _ in ()).throw(auth.CloudAuthRequired()), - ) - - resp = manager.handle({ - "op": "new", - "run_id": "run-1", - "agent_id": "agent-1", - "backend": "cloud", - }) - - assert resp["ok"] is False - assert resp["state"] == "cloud-auth-required" - assert "browser-harness auth login" in resp["reason"] - - -def test_browser_use_api_uses_auth_resolution(monkeypatch): - captured = [] - monkeypatch.delenv("BROWSER_USE_API_KEY", raising=False) - monkeypatch.setattr(manager_daemon.auth, "get_browser_use_api_key", lambda: "stored-key") - monkeypatch.setattr( - manager_daemon.urllib.request, - "urlopen", - lambda req, timeout=60: captured.append(req) or _FakeResponse(), - ) - - assert manager_daemon._browser_use("/browsers", "POST", {}) == {"ok": True} - - assert captured - assert captured[0].get_header("X-browser-use-api-key") == "stored-key" - - -def test_find_browser_binary_skips_unusable_path_candidate_and_uses_mac_app(monkeypatch): - monkeypatch.delenv("BH_CHROME_PATH", raising=False) - monkeypatch.delenv("CHROME_PATH", raising=False) - monkeypatch.setattr(manager_daemon.sys, "platform", "darwin") - monkeypatch.setattr(manager_daemon.shutil, "which", lambda name: "/broken/chromium" if name == "chromium" else None) - monkeypatch.setattr(manager_daemon, "MAC_BROWSER_PATHS", ("/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",)) - monkeypatch.setattr( - manager_daemon, - "_browser_binary_usable", - lambda path: path == "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", - ) - - assert manager_daemon.find_browser_binary() == "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" diff --git a/tests/unit/test_manager_helpers.py b/tests/unit/test_manager_helpers.py deleted file mode 100644 index 58b8c163..00000000 --- a/tests/unit/test_manager_helpers.py +++ /dev/null @@ -1,195 +0,0 @@ -import pytest - -from browser_harness import context, manager_helpers - - -def _manager_response(tmp_path): - return { - "ok": True, - "ready": True, - "state": "ready", - "id": "abc123", - "backend": "private", - "binding": { - "browser_id": "abc123", - "bu_name": "bh_123", - "runtime_dir": str(tmp_path / "r"), - "tmp_dir": str(tmp_path / "t"), - "download_dir": str(tmp_path / "downloads"), - "artifact_dir": str(tmp_path / "artifacts"), - "cdp_url": "http://127.0.0.1:4567", - "cdp_ws": None, - }, - } - - -def test_browser_new_creates_without_activating_binding(monkeypatch, tmp_path): - old = context.get_active_binding() - try: - monkeypatch.setattr(manager_helpers.manager_client, "new_browser", lambda *args, **kwargs: _manager_response(tmp_path)) - - state = manager_helpers.browser_new(backend="managed", reason="test") - binding = context.get_active_binding() - finally: - if old is not None: - context.activate_binding(old) - else: - context.clear_active_binding() - - assert state["id"] == "abc123" - assert "binding" not in state - assert binding == old - - -def test_browser_new_private_maps_to_managed_backend(monkeypatch, tmp_path): - calls = [] - old = context.get_active_binding() - try: - monkeypatch.setattr( - manager_helpers.manager_client, - "new_browser", - lambda *args, **kwargs: calls.append((args, kwargs)) or _manager_response(tmp_path), - ) - manager_helpers.browser_new("private", reason="test") - finally: - if old is not None: - context.activate_binding(old) - else: - context.clear_active_binding() - - assert calls[0][1]["backend"] == "managed" - - -def test_browser_new_cloud_maps_to_cloud_backend(monkeypatch, tmp_path): - calls = [] - old = context.get_active_binding() - try: - monkeypatch.setattr( - manager_helpers.manager_client, - "new_browser", - lambda *args, **kwargs: calls.append((args, kwargs)) or _manager_response(tmp_path), - ) - manager_helpers.browser_new("cloud") - finally: - if old is not None: - context.activate_binding(old) - else: - context.clear_active_binding() - - assert calls[0][1]["backend"] == "cloud" - - -def test_browser_profiles_returns_concise_payload(monkeypatch): - monkeypatch.setattr( - manager_helpers.local_profiles, - "list_browser_profiles_payload", - lambda verbose=False: {"selected": "google-chrome:Default", "profiles": []}, - ) - - assert manager_helpers.browser_profiles() == { - "selected": "google-chrome:Default", - "profiles": [], - } - - -def test_browser_use_profile_returns_selected_profile(monkeypatch): - monkeypatch.setattr( - manager_helpers.local_profiles, - "use_browser_profile", - lambda profile_id: {"selected": profile_id, "label": "Google Chrome - Default"}, - ) - - assert manager_helpers.browser_use_profile("google-chrome:Default") == { - "selected": "google-chrome:Default", - "label": "Google Chrome - Default", - } - - -def test_browser_select_activates_binding(monkeypatch, tmp_path): - old = context.get_active_binding() - try: - monkeypatch.setattr(manager_helpers.manager_client, "switch_browser", lambda browser_id: _manager_response(tmp_path)) - - state = manager_helpers.browser("abc123") - binding = context.get_active_binding() - finally: - if old is not None: - context.activate_binding(old) - else: - context.clear_active_binding() - - assert state["id"] == "abc123" - assert "binding" not in state - assert binding is not None - assert binding.bu_name == "bh_123" - - -def test_browser_switch_aliases_browser(monkeypatch): - calls = [] - monkeypatch.setattr(manager_helpers, "browser", lambda browser_id: calls.append(browser_id) or {"id": browser_id}) - - assert manager_helpers.browser_switch("abc123") == {"id": "abc123"} - assert calls == ["abc123"] - - -def test_browser_close_clears_active_binding(monkeypatch, tmp_path): - closed = [] - old = context.get_active_binding() - context.activate_binding(context.BrowserBinding( - browser_id="abc123", - bu_name="bh_123", - runtime_dir=tmp_path / "r", - tmp_dir=tmp_path / "t", - manager_mode=True, - )) - try: - monkeypatch.setattr( - manager_helpers.manager_client, - "close_browser", - lambda browser_id=None: closed.append(browser_id) or {"ok": True, "state": "closed", "id": "abc123"}, - ) - - state = manager_helpers.browser_close("abc123") - active = context.get_active_binding() - finally: - if old is not None: - context.activate_binding(old) - else: - context.clear_active_binding() - - assert state == {"state": "closed", "id": "abc123"} - assert closed == ["abc123"] - assert active is None - - -def test_browser_close_requires_explicit_id(): - with pytest.raises(ValueError, match="browser_close\\(id\\)"): - manager_helpers.browser_close() - - -def test_browser_close_owned_clears_active_binding_when_active_id_closed(monkeypatch, tmp_path): - old = context.get_active_binding() - context.activate_binding(context.BrowserBinding( - browser_id="abc123", - bu_name="bh_123", - runtime_dir=tmp_path / "r", - tmp_dir=tmp_path / "t", - manager_mode=True, - )) - try: - monkeypatch.setattr( - manager_helpers.manager_client, - "close_owned_browsers", - lambda: {"ok": True, "state": "closed-owned", "closed": ["abc123", "def456"]}, - ) - - state = manager_helpers.browser_close_owned() - active = context.get_active_binding() - finally: - if old is not None: - context.activate_binding(old) - else: - context.clear_active_binding() - - assert state == {"state": "closed-owned", "closed": ["abc123", "def456"]} - assert active is None diff --git a/tests/unit/test_manager_runtime.py b/tests/unit/test_manager_runtime.py deleted file mode 100644 index f0b58ea0..00000000 --- a/tests/unit/test_manager_runtime.py +++ /dev/null @@ -1,69 +0,0 @@ -import json -import socket -import stat - -import pytest - -from browser_harness import manager_runtime - - -def test_default_manager_root_is_user_private_tmp(monkeypatch): - monkeypatch.delenv("BH_MANAGER_ROOT", raising=False) - monkeypatch.delenv("XDG_RUNTIME_DIR", raising=False) - monkeypatch.setattr(manager_runtime, "IS_WINDOWS", False) - monkeypatch.setattr(manager_runtime.os, "getuid", lambda: 12345, raising=False) - - assert manager_runtime.default_root() == manager_runtime.Path("/tmp/bhm-12345") - - -def test_ensure_private_dir_tightens_permissions(tmp_path): - if manager_runtime.IS_WINDOWS: - pytest.skip("POSIX permissions only") - path = tmp_path / "manager" - path.mkdir(mode=0o755) - - manager_runtime.ensure_private_dir(path) - - mode = stat.S_IMODE(path.stat().st_mode) - assert mode == 0o700 - - -def test_write_private_json_uses_private_file_mode(tmp_path): - if manager_runtime.IS_WINDOWS: - pytest.skip("POSIX permissions only") - path = tmp_path / "manager" / "registry.json" - - manager_runtime.write_private_json(path, {"ok": True}) - - assert json.loads(path.read_text()) == {"ok": True} - assert stat.S_IMODE(path.parent.stat().st_mode) == 0o700 - assert stat.S_IMODE(path.stat().st_mode) == 0o600 - - -def test_windows_default_endpoint_is_token_file(monkeypatch, tmp_path): - monkeypatch.setattr(manager_runtime, "IS_WINDOWS", True) - monkeypatch.setenv("LOCALAPPDATA", str(tmp_path)) - monkeypatch.delenv("BH_MANAGER_ROOT", raising=False) - monkeypatch.delenv("BH_MANAGER_SOCKET", raising=False) - - root = manager_runtime.default_root() - - assert root == tmp_path / "browser-harness" / "manager" - assert manager_runtime.default_endpoint(root) == root / "manager.port.json" - - -def test_send_request_injects_windows_token(): - left, right = socket.socketpair() - try: - left.settimeout(1) - right.settimeout(1) - right.sendall(b'{"ok": true}\n') - - resp = manager_runtime.send_request(left, "secret-token", {"op": "list"}) - sent = right.recv(4096).decode() - finally: - left.close() - right.close() - - assert json.loads(sent) == {"op": "list", "token": "secret-token"} - assert resp == {"ok": True} diff --git a/tests/unit/test_run.py b/tests/unit/test_run.py index 656a0261..20783e03 100644 --- a/tests/unit/test_run.py +++ b/tests/unit/test_run.py @@ -1,4 +1,3 @@ -import json import sys from io import StringIO from unittest.mock import patch @@ -239,58 +238,3 @@ def test_cli_doctor_rejects_unknown_flags(): assert ei.value.code == 2 assert "usage" in err.getvalue().lower() - -def test_profiles_cli_runs_without_daemon(): - stdout = StringIO() - - with patch.object(sys, "argv", ["browser-harness", "profiles"]), \ - patch("sys.stdout", stdout), \ - patch("browser_harness.run.ensure_daemon") as ensure_daemon, \ - patch("browser_harness.run.browser_profiles", return_value={"profiles": []}) as profiles: - run.main() - - ensure_daemon.assert_not_called() - profiles.assert_called_once_with(verbose=False) - assert json.loads(stdout.getvalue()) == {"profiles": []} - - -def test_profiles_cli_supports_verbose_without_daemon(): - stdout = StringIO() - - with patch.object(sys, "argv", ["browser-harness", "profiles", "--verbose"]), \ - patch("sys.stdout", stdout), \ - patch("browser_harness.run.ensure_daemon") as ensure_daemon, \ - patch("browser_harness.run.browser_profiles", return_value={"status": "ok"}) as profiles: - run.main() - - ensure_daemon.assert_not_called() - profiles.assert_called_once_with(verbose=True) - assert json.loads(stdout.getvalue()) == {"status": "ok"} - - -def test_use_profile_cli_runs_without_daemon(): - stdout = StringIO() - - with patch.object(sys, "argv", ["browser-harness", "use-profile", "google-chrome:Default"]), \ - patch("sys.stdout", stdout), \ - patch("browser_harness.run.ensure_daemon") as ensure_daemon, \ - patch("browser_harness.run.browser_use_profile", return_value={"selected": "google-chrome:Default"}) as use_profile: - run.main() - - ensure_daemon.assert_not_called() - use_profile.assert_called_once_with("google-chrome:Default") - assert json.loads(stdout.getvalue()) == {"selected": "google-chrome:Default"} - - -def test_open_profile_cli_runs_without_daemon_and_without_marker(): - stdout = StringIO() - - with patch.object(sys, "argv", ["browser-harness", "open-profile", "google-chrome:Default"]), \ - patch("sys.stdout", stdout), \ - patch("browser_harness.run.ensure_daemon") as ensure_daemon, \ - patch("browser_harness.run.open_local_profile", return_value={"opened": True}) as open_profile: - run.main() - - ensure_daemon.assert_not_called() - open_profile.assert_called_once_with("google-chrome:Default", marker=False) - assert json.loads(stdout.getvalue()) == {"opened": True} diff --git a/tests/unit/test_run_manager_mode.py b/tests/unit/test_run_manager_mode.py deleted file mode 100644 index 3a45758e..00000000 --- a/tests/unit/test_run_manager_mode.py +++ /dev/null @@ -1,108 +0,0 @@ -import os -import sys -from io import StringIO -from unittest.mock import patch - -from browser_harness import run - - -def test_manager_mode_skips_legacy_daemon_start(monkeypatch): - monkeypatch.setenv("BH_MANAGER_SOCKET", "/tmp/nonexistent-manager.sock") - stdout = StringIO() - fake_stdin = StringIO("print('manager mode ok')") - - with patch.object(sys, "argv", ["browser-harness"]), \ - patch("sys.stdin", fake_stdin), \ - patch("sys.stdout", stdout), \ - patch("browser_harness.run.print_update_banner"), \ - patch("browser_harness.run.ensure_daemon") as ensure_daemon: - run.main() - - ensure_daemon.assert_not_called() - assert stdout.getvalue().strip() == "manager mode ok" - - -def test_manager_helper_call_enables_manager_mode_without_env(monkeypatch): - monkeypatch.delenv("BH_MANAGER_SOCKET", raising=False) - monkeypatch.delenv("BH_MANAGER_MODE", raising=False) - stdout = StringIO() - fake_stdin = StringIO("print(browser_status())") - - with patch.object(sys, "argv", ["browser-harness"]), \ - patch("sys.stdin", fake_stdin), \ - patch("sys.stdout", stdout), \ - patch("browser_harness.run.print_update_banner"), \ - patch("browser_harness.run.ensure_daemon") as ensure_daemon, \ - patch("browser_harness.run.browser_status", lambda: "manager helper mode ok"): - run.main() - - ensure_daemon.assert_not_called() - assert stdout.getvalue().strip() == "manager helper mode ok" - assert "BH_MANAGER_MODE" in os.environ - - -def test_browser_selector_call_enables_manager_mode(monkeypatch): - monkeypatch.delenv("BH_MANAGER_SOCKET", raising=False) - monkeypatch.delenv("BH_MANAGER_MODE", raising=False) - stdout = StringIO() - fake_stdin = StringIO("print(browser('abc123'))") - switched = [] - - with patch.object(sys, "argv", ["browser-harness"]), \ - patch("sys.stdin", fake_stdin), \ - patch("sys.stdout", stdout), \ - patch("browser_harness.run.print_update_banner"), \ - patch("browser_harness.run.ensure_daemon") as ensure_daemon, \ - patch("browser_harness.run.browser", lambda browser_id: switched.append(browser_id) or {"id": browser_id}): - run.main() - - ensure_daemon.assert_not_called() - assert switched == ["abc123"] - assert stdout.getvalue().strip() == "{'id': 'abc123'}" - - -def test_browser_profiles_runs_without_daemon(monkeypatch): - stdout = StringIO() - fake_stdin = StringIO("print(browser_profiles())") - - with patch.object(sys, "argv", ["browser-harness"]), \ - patch("sys.stdin", fake_stdin), \ - patch("sys.stdout", stdout), \ - patch("browser_harness.run.print_update_banner"), \ - patch("browser_harness.run.ensure_daemon") as ensure_daemon, \ - patch("browser_harness.run.browser_profiles", lambda: {"profiles": []}): - run.main() - - ensure_daemon.assert_not_called() - assert stdout.getvalue().strip() == "{'profiles': []}" - - -def test_browser_use_profile_runs_without_daemon(monkeypatch): - stdout = StringIO() - fake_stdin = StringIO("print(browser_use_profile('google-chrome:Default'))") - - with patch.object(sys, "argv", ["browser-harness"]), \ - patch("sys.stdin", fake_stdin), \ - patch("sys.stdout", stdout), \ - patch("browser_harness.run.print_update_banner"), \ - patch("browser_harness.run.ensure_daemon") as ensure_daemon, \ - patch("browser_harness.run.browser_use_profile", lambda profile_id: {"selected": profile_id}): - run.main() - - ensure_daemon.assert_not_called() - assert stdout.getvalue().strip() == "{'selected': 'google-chrome:Default'}" - - -def test_manager_mode_exception_propagates(monkeypatch): - monkeypatch.setenv("BH_MANAGER_SOCKET", "/tmp/nonexistent-manager.sock") - fake_stdin = StringIO("raise RuntimeError('boom')") - - with patch.object(sys, "argv", ["browser-harness"]), \ - patch("sys.stdin", fake_stdin), \ - patch("browser_harness.run.print_update_banner"): - try: - run.main() - except RuntimeError as e: - assert str(e) == "boom" - else: - raise AssertionError("expected RuntimeError") diff --git a/tests/unit/test_skill_docs.py b/tests/unit/test_skill_docs.py deleted file mode 100644 index b12871b6..00000000 --- a/tests/unit/test_skill_docs.py +++ /dev/null @@ -1,23 +0,0 @@ -from pathlib import Path -import subprocess -import sys - - -def test_packaged_skill_points_to_root_skill(): - repo = Path(__file__).resolve().parents[2] - skill = repo / "skills" / "browser-harness" / "SKILL.md" - - assert skill.is_symlink() - assert skill.readlink() == Path("../../SKILL.md") - - -def test_skill_materializer_writes_regular_file(tmp_path): - repo = Path(__file__).resolve().parents[2] - script = repo / "scripts" / "materialize_browser_harness_skill.py" - - subprocess.run([sys.executable, str(script), str(tmp_path)], check=True) - - materialized = tmp_path / "skills" / "browser-harness" / "SKILL.md" - assert materialized.is_file() - assert not materialized.is_symlink() - assert materialized.read_text() == (repo / "SKILL.md").read_text()