diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..a9fb4072 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,25 @@ +name: release + +on: + release: + types: [published] + +jobs: + publish: + name: publish to PyPI + runs-on: ubuntu-latest + environment: pypi + permissions: + contents: read + id-token: write + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Build distributions + run: | + python -m pip install --upgrade build + python -m build + - name: Publish distributions + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.gitignore b/.gitignore index 59e4f318..bb388a36 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,9 @@ __pycache__/ *.pyc *.log .env +.browser-harness-dev/ +build/ +dist/ uv.lock *.egg-info/ .idea/ diff --git a/README.md b/README.md index ab7f6a5d..fafaf556 100644 --- a/README.md +++ b/README.md @@ -23,9 +23,7 @@ One websocket to Chrome, nothing between. The agent writes what's missing during Paste into Claude Code or Codex: ```text -Set up https://github.com/browser-use/browser-harness for me. - -Read `install.md` and follow the steps to install browser-harness and connect it to my browser. +Install browser-harness with uv, register the skill from `browser-harness skill`, and connect it to my browser. ``` The agent will open `chrome://inspect/#remote-debugging`. Tick the checkbox so the agent can connect to your browser: @@ -51,15 +49,31 @@ Stealth, sub-agents, or headless deployment.
- `install.md` — first-time install and browser bootstrap - `SKILL.md` — day-to-day usage - `src/browser_harness/` — protected core package -- `agent-workspace/agent_helpers.py` — helper code the agent edits -- `agent-workspace/domain-skills/` — reusable site-specific skills the agent edits +- `${XDG_CONFIG_HOME:-~/.config}/browser-harness/agent-workspace/agent_helpers.py` — helper code the agent edits +- `${XDG_CONFIG_HOME:-~/.config}/browser-harness/agent-workspace/domain-skills/` — reusable site-specific skills the agent edits + +Plain `browser-harness` helper calls use the selected local browser profile. For isolated or cloud work, start with `browser_new("private")` or `browser_new("cloud")`, keep the returned short `id`, and call `browser(id)` before page helpers in each script; cloud responses include a `live_url` preview when available. + +## Development + +From a checkout, use `./browser-harness` to run the current working tree without activating a virtualenv or depending on the globally installed command: + +```bash +./browser-harness <<'PY' +print(browser_new("private")) +PY +``` + +Normal agent-facing docs should keep using `browser-harness`; the `./browser-harness` launcher is only for local repo testing. + +The dev launcher uses a short checkout-specific manager path under `/tmp`, so it does not attach to a stale global manager or another task's default manager. ## Contributing PRs and improvements welcome. The best way to help: **contribute a new domain skill** under [agent-workspace/domain-skills/](agent-workspace/domain-skills/) for a site or task you use often (LinkedIn outreach, ordering on Amazon, filing expenses, etc.). Each skill teaches the agent the selectors, flows, and edge cases it would otherwise have to rediscover. - **Skills are written by the harness, not by you.** Just run your task with the agent — when it figures something non-obvious out, it files the skill itself (see [SKILL.md](SKILL.md)). Please don't hand-author skill files; agent-generated ones reflect what actually works in the browser. -- Open a PR with the generated `agent-workspace/domain-skills//` folder — small and focused is great. +- Open a PR with the generated `domain-skills//` folder copied into this repo's `agent-workspace/domain-skills/` examples — small and focused is great. - Bug fixes, docs tweaks, and helper improvements are equally welcome. - Browse existing skills (`github/`, `linkedin/`, `amazon/`, ...) to see the shape. @@ -67,7 +81,7 @@ If you're not sure where to start, open an issue and we'll point you somewhere u ## Domain skills -Set `BH_DOMAIN_SKILLS=1` to enable [agent-workspace/domain-skills/](agent-workspace/domain-skills/) — community-contributed per-site playbooks `goto_url` surfaces by domain. Contribute via PR. +Set `BH_DOMAIN_SKILLS=1` to enable domain skills from the agent workspace. This repo's [agent-workspace/domain-skills/](agent-workspace/domain-skills/) directory contains examples to contribute via PR. --- diff --git a/SKILL.md b/SKILL.md index 13dfcda6..f5681f77 100644 --- a/SKILL.md +++ b/SKILL.md @@ -1,66 +1,105 @@ --- -name: browser -description: Direct browser control via CDP. Use when the user wants to automate, scrape, test, or interact with web pages. Connects to the user's already-running Chrome. +name: browser-harness +description: Always use browser-harness for any web interaction: automation, scraping, testing, or site/app work. --- # browser-harness -Direct browser control via CDP. For task-specific edits, use `agent-workspace/agent_helpers.py`. For setup, install, or connection problems, read install.md. +Managed browsers have short explicit ids. Create or receive an id, then select it inside each script. -Domain skills (community-contributed per-site playbooks under `agent-workspace/domain-skills/`) are off by default. Set `BH_DOMAIN_SKILLS=1` to enable them; see the bottom section. - -**If `BH_DOMAIN_SKILLS=1` and the task is site-specific, read every file in the matching `agent-workspace/domain-skills//` directory before inventing an approach.** - -## Usage +Create and use a private browser: ```bash browser-harness <<'PY' +b = browser_new("private") +browser(b["id"]) new_tab("https://docs.browser-use.com") wait_for_load() +print({"id": b["id"], "page": page_info()}) +PY +``` + +Use an existing managed browser: + +```bash +browser-harness <<'PY' +browser("abc123") print(page_info()) PY ``` -- Invoke as browser-harness — it's on $PATH. No cd, no uv run. -- Use the heredoc form for every multi-line command. It prevents shell quote mangling inside Python strings and JavaScript snippets. -- First navigation is new_tab(url), not goto_url(url) — goto runs in the user's active tab and clobbers their work. +`browser(id)` selects a browser for this script only. Do not rely on a current browser across separate shell commands. Sharing an id means sharing that browser's tabs, cookies, downloads, and session state. -## Tool call shape +Inspect managed browsers: ```bash browser-harness <<'PY' -# any python. helpers pre-imported. daemon auto-starts. +print(browser_list()) +print(browser_status("abc123")) PY ``` -run.py calls ensure_daemon() before exec — you never start/stop manually unless you want to. +`browser_list()` shows known managed browser ids and their owners. + +## Choose Browser + +- User's logged-in local Chrome: use normal helpers. If setup asks for a profile, run `browser_profiles()`, ask the user which `id` to use, then run `browser_use_profile(id)` and retry. +- Isolated local browser: `browser_new("private")`, then keep the returned `id`. +- Browser Use cloud browser with live view: `browser_new("cloud")`, then keep the returned `id`. +- Managed browser page work: call `browser(id)` first in the script. +- Subagent: if the parent gives an id, start browser scripts with `browser(id)` and do not close it unless asked. +- Done with a private or cloud browser: `browser_close(id)`. +- Done with all browsers you created: `browser_close_owned()`. + +## Browser Helpers + +```python +browser_status(id) +browser_profiles() +browser_use_profile(profile_id) +browser_new("private") +browser_new("cloud") +browser(id) +browser_list() +browser_close(id) +browser_close_owned() +``` + +`browser_profiles()` and `browser_use_profile(...)` are local setup calls. They do not start browser work. -### Remote browsers +Inside one Python script, `browser(id)` attaches the process to that browser so normal page helpers work: `new_tab`, `page_info`, `capture_screenshot`, `click_at_xy`, `type_text`, `js`, and `cdp`. -Use remote for parallel sub-agents (each gets its own isolated browser via a distinct BU_NAME) or on a headless server. BROWSER_USE_API_KEY must be set. start_remote_daemon, list_cloud_profiles, list_local_profiles, sync_local_profile are pre-imported. +If `browser_new("cloud")` reports `cloud-auth-required`, run: ```bash -browser-harness <<'PY' -start_remote_daemon("work") # default — clean browser, no profile -# start_remote_daemon("work", profileName="my-work") # reuse a cloud profile (already logged in) -# start_remote_daemon("work", profileId="") # same, but by UUID -# start_remote_daemon("work", proxyCountryCode="de", timeout=120) # DE proxy, 2-hour timeout -# start_remote_daemon("work", proxyCountryCode=None) # disable the Browser Use proxy -PY +browser-harness auth login +``` -BU_NAME=work browser-harness <<'PY' -new_tab("https://example.com") -print(page_info()) -PY +If the user directly provides an API key, store it through stdin: + +```bash +browser-harness auth login --api-key-stdin ``` -start_remote_daemon prints liveUrl and auto-opens it in the local browser (if a GUI is detected) so the user can watch along. Headless servers print only — share the URL with the user. The daemon PATCHes the cloud browser to stop on shutdown, which persists profile state. Running remote daemons bill until timeout. +Never put API keys in command-line arguments. -Profiles (cookies-only login state) live in interaction-skills/profile-sync.md — covers list_cloud_profiles(), the chat-driven "which profile?" pattern, and sync_local_profile() for uploading a local Chrome profile. +## Page Workflow -## Interaction skills +- First navigation is `new_tab(url)`, not `goto_url(url)`. +- Screenshots are the default way to understand and verify visible state: `capture_screenshot()`. +- If using `view_image`, call it after `capture_screenshot()` returns the PNG path; do not parallelize capture and viewing. +- Click visible targets by screenshot coordinates: `click_at_xy(x, y)`. +- Use `js(...)` for DOM inspection or extraction when coordinates are the wrong tool. +- After navigation, call `wait_for_load()`. +- If the current tab is stale or internal, call `ensure_real_tab()`. +- If a tab/session dies (`target-gone`, `browser session ended`), open a fresh tab; if status is not ready, create a new browser. +- If redirected to a login wall, stop and ask the user. Do not type credentials from screenshots. +- For anything helpers do not cover, use raw CDP: `cdp("Domain.method", params)`. + +## Interaction Skills + +If you get stuck on a browser mechanic, check `interaction-skills/` for focused guidance: -If you start struggling with a specific mechanic while navigating, look in interaction-skills/ for helpers. They cover reusable UI mechanics like dialogs, tabs, dropdowns, iframes, and uploads. The available interaction skills are: - connection.md - cookies.md - cross-origin-iframes.md @@ -79,45 +118,8 @@ If you start struggling with a specific mechanic while navigating, look in inter - uploads.md - viewport.md -## What actually works - -- Screenshots first: use capture_screenshot() to understand the current page quickly, find visible targets, and decide whether you need a click, a selector, or more navigation. -- Clicking: capture_screenshot() → read the pixel off the image → click_at_xy(x, y) → capture_screenshot() to verify. Suppress the Playwright-habit reflex of "locate first, then click" — no getBoundingClientRect, no selector hunt. Drop to DOM only when the target has no visible geometry (hidden input, 0×0 node). Hit-testing happens in Chrome's browser process, so clicks go through iframes / shadow DOM / cross-origin without extra work. -- Bulk HTTP: http_get(url) + ThreadPoolExecutor. No browser for static pages (249 Netflix pages in 2.8s). -- After goto: wait_for_load(). -- Wrong/stale tab: ensure_real_tab(). Use it when the current tab is stale or internal; the daemon also auto-recovers from stale sessions on the next call. -- Verification: print(page_info()) is the simplest "is this alive?" check, but screenshots are the default way to verify whether a visible action actually worked. -- DOM reads: use js(...) for inspection and extraction when the screenshot shows that coordinates are the wrong tool. -- Iframe sites (Azure blades, Salesforce): click_at_xy(x, y) passes through; only drop to iframe DOM work when coordinate clicks are the wrong tool. -- Auth wall: redirected to login → stop and ask the user. Don't type credentials from screenshots. -- Raw CDP for anything helpers don't cover: cdp("Domain.method", params). - -## Design constraints - -- Coordinate clicks default. Input.dispatchMouseEvent goes through iframes/shadow/cross-origin at the compositor level. -- Connect to the user's running Chrome. Don't launch your own browser. -- cdp-use is only for CDPClient.send_raw. Prefer raw CDP strings over typed wrappers. -- run.py stays tiny. No argparse, subcommands, or extra control layer. -- Core helpers stay short. Put task-specific helper additions in `agent-workspace/agent_helpers.py`; daemon/bootstrap and remote session admin live in the core package. -- Don't add a manager layer. No retries framework, session manager, daemon supervisor, config system, or logging framework. - -## Gotchas (field-tested) - -- Omnibox popups are fake page targets. Filter chrome://omnibox-popup... and other internals when you need a real tab. -- CDP target order != Chrome's visible tab-strip order. Use UI automation when the user means "the first/second tab I can see"; Target.activateTarget only shows a known target. -- Default daemon sessions can go stale. ensure_real_tab() re-attaches to a real page. -- Browser Use API is camelCase on the wire. cdpUrl, proxyCountryCode, etc. -- Remote cdpUrl is HTTPS, not ws. Resolve the websocket URL via /json/version. -- Stop cloud browsers with PATCH /browsers/{id} + {"action":"stop"}. -- After every meaningful action, re-screenshot before assuming it worked. Use the image to verify changed state, open menus, navigation, visible errors, and whether the page is in the state you expected. -- Use screenshots to drive exploration. They are often the fastest way to find the next click target, notice hidden blockers, and decide if a selector is even worth writing. -- Prefer compositor-level actions over framework hacks. Try screenshots, coordinate clicks, and raw key input before adding DOM-specific workarounds. -- If you need framework-specific DOM tricks, check interaction-skills/ first. That is where dropdown, dialog, iframe, shadow DOM, and form-specific guidance belongs. - -## Domain skills (opt-in) - -Only applies when `BH_DOMAIN_SKILLS=1`. Otherwise ignore — `agent-workspace/domain-skills/` is dormant and `goto_url` won't surface skill files. +## Domain Skills -When enabled, search `agent-workspace/domain-skills//` before inventing an approach. `goto_url` returns up to 10 skill filenames for the navigated host. +Domain skills are off by default. If `BH_DOMAIN_SKILLS=1` and the task is site-specific, read every file in `$BH_AGENT_WORKSPACE/domain-skills//` before inventing an approach. Default workspace: `~/.config/browser-harness/agent-workspace`. -If you learn anything non-obvious — a private API, stable selector, framework quirk, URL pattern, hidden wait, or site-specific trap — open a PR to `agent-workspace/domain-skills//`. Capture the durable shape of the site (the map, not the diary). Don't write pixel coordinates (break on layout), task narration, or secrets — the directory is public. +When enabled, `goto_url(...)` returns up to 10 matching skill filenames for the current host. diff --git a/browser-harness b/browser-harness new file mode 100755 index 00000000..4dac490e --- /dev/null +++ b/browser-harness @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +DEV_ID="$(printf '%s' "$ROOT" | cksum | awk '{print $1}')" + +export BH_MANAGER_ROOT="${BH_MANAGER_ROOT:-/tmp/bhm-dev-$DEV_ID}" +export BH_MANAGER_SOCKET="${BH_MANAGER_SOCKET:-$BH_MANAGER_ROOT/manager.sock}" + +if [ -x "$ROOT/.venv/bin/python" ]; then + exec "$ROOT/.venv/bin/python" -m browser_harness.run "$@" +fi + +if command -v uv >/dev/null 2>&1; then + exec uv --directory "$ROOT" run python -m browser_harness.run "$@" +fi + +export PYTHONPATH="$ROOT/src${PYTHONPATH:+:$PYTHONPATH}" +exec python3 -m browser_harness.run "$@" diff --git a/docs/browser-connection.md b/docs/browser-connection.md new file mode 100644 index 00000000..e2926e95 --- /dev/null +++ b/docs/browser-connection.md @@ -0,0 +1,138 @@ +# Browser Connection Reference + +Use this only when the quick path in `install.md` fails. + +Browser-harness can connect to a local Chrome/Chromium browser or to a Browser Use cloud browser. + +## Cloud Browsers + +Start one with: + +```python +b = browser_new("cloud") +browser(b["id"]) +``` + +Authentication uses `BROWSER_USE_API_KEY` first, then the local `browser-harness auth login` store. + +```bash +browser-harness auth login +browser-harness auth login --device-code +browser-harness auth login --api-key-stdin +browser-harness auth status +browser-harness auth logout +``` + +Never pass API keys as command-line arguments. + +## Local Way 1: Real Profile + +Use this when the agent should act in the user's everyday browser with real logins. + +1. Ask the user to open Chrome. +2. Run: + + ```bash + browser-harness <<'PY' + print(browser_profiles()) + PY + ``` + +3. Ask which stable `id` to use. +4. Save it: + + ```bash + browser-harness <<'PY' + browser_use_profile("PROFILE_ID_HERE") + PY + ``` + +5. In that Chrome profile, open `chrome://inspect/#remote-debugging`. +6. Tick "Allow remote debugging for this browser instance". +7. On Chrome 144+, click Allow when the per-attach popup appears. +8. Retry: + + ```bash + browser-harness <<'PY' + print(page_info()) + PY + ``` + +On macOS, an agent can open the inspect page: + +```bash +osascript -e 'tell application "Google Chrome" to activate' \ + -e 'tell application "Google Chrome" to open location "chrome://inspect/#remote-debugging"' +``` + +## Local Way 2: Isolated Profile + +Use this for unattended automation or when permission popups are unacceptable. + +```bash +"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" \ + --remote-debugging-port=9222 \ + --user-data-dir="$HOME/.config/browser-harness/isolated-chrome" \ + about:blank +export BU_CDP_URL=http://127.0.0.1:9222 +``` + +The `--user-data-dir` must not be Chrome's default profile directory. Chrome 136+ ignores `--remote-debugging-port` with the platform default profile path. + +Copying a real Chrome profile into a custom directory is not a reliable login-preserving path because cookies are encrypted against the original profile context. Use Way 1 for real logins. + +## Doctor Cases + +Run: + +```bash +browser-harness --doctor +``` + +Interpretation: + +- `chrome running` FAIL: no compatible browser process was found. Ask the user to open Chrome or use Way 2/cloud. +- `daemon alive` FAIL with Chrome running: remote debugging permission is missing or the permission popup needs Allow. +- `active browser connections` is `0`: daemon is up but not attached to a usable page; retry after opening a normal tab. +- source mismatch: the command is importing a different install than the checkout you are reading. +- update available: run `browser-harness --update -y` if you want the new version. + +Stale daemon reset: + +```bash +browser-harness <<'PY' +restart_daemon() +PY +``` + +If that hangs, kill Chrome and browser-harness daemon processes, reopen Chrome, and retry. On macOS/Linux, remove lingering `bu-default.sock` and `bu-default.pid` files under `${XDG_CONFIG_HOME:-~/.config}/browser-harness/runtime`. + +## Files + +Default state directory: + +```text +${XDG_CONFIG_HOME:-~/.config}/browser-harness +``` + +Important files: + +```text +auth.json Browser Use Cloud auth +settings.json selected local Chrome profile and future preferences +telemetry.json anonymous install id + telemetry opt-out +agent-workspace/ agent-written helpers and domain skills +runtime/ sockets, pids, manager leases, managed browser profiles +tmp/ logs, screenshots, scratch files +``` + +Overrides: + +```text +BH_HOME +BROWSER_HARNESS_HOME +BH_CONFIG_DIR +BH_AGENT_WORKSPACE +BH_RUNTIME_DIR +BH_TMP_DIR +``` diff --git a/install.md b/install.md index 021af5c7..3346409a 100644 --- a/install.md +++ b/install.md @@ -1,136 +1,79 @@ --- name: browser-install -description: Install browser-harness into the current agent and connect it to a browser with minimal prompting. +description: Install browser-harness and connect it to a browser fast. --- -# `browser-harness` installation +# browser-harness install -Use this file only for browser-harness install, browser connection setup, and connection troubleshooting. For day-to-day browser work, read `SKILL.md`. Task-specific edits belong in `agent-workspace/agent_helpers.py` and `agent-workspace/domain-skills/`. +Use once. For browser work, read `SKILL.md`. -## Recommended `browser-harness` setup - -Clone the repo once into a durable location, then install it as an editable tool so `browser-harness` works from any directory: +## Fast Path ```bash -git clone https://github.com/browser-use/browser-harness -cd browser-harness -uv tool install -e . -command -v browser-harness +uv tool install browser-harness +for root in "${CODEX_HOME:-$HOME/.codex}/skills" "$HOME/.agents/skills"; do + rm -rf "$root/browser-harness" "$root/browser" "$root/browser-use" +done +mkdir -p "${CODEX_HOME:-$HOME/.codex}/skills/browser-harness" +browser-harness skill > "${CODEX_HOME:-$HOME/.codex}/skills/browser-harness/SKILL.md" +browser-harness <<'PY' +print(page_info()) +PY ``` -That keeps the command global while still pointing at the real repo checkout, so when the agent edits `agent-workspace/agent_helpers.py` the next `browser-harness` uses the new code immediately. Prefer a stable path like `~/Developer/browser-harness`, not `/tmp`. - -## Make browser-harness global for the current agent - -After the repo is installed, register this repo's `SKILL.md` with the agent you are using: - -- **Codex**: add this file as a global skill at `$CODEX_HOME/skills/browser-harness/SKILL.md` (often `~/.codex/skills/browser-harness/SKILL.md`). A symlink to this repo's `SKILL.md` is fine. - - ```bash - mkdir -p "${CODEX_HOME:-$HOME/.codex}/skills/browser-harness" && ln -sf "$PWD/SKILL.md" "${CODEX_HOME:-$HOME/.codex}/skills/browser-harness/SKILL.md" - ``` - -- **Claude Code**: add an import to `~/.claude/CLAUDE.md` that points at this repo's `SKILL.md`, for example `@~/Developer/browser-harness/SKILL.md`. - -This makes new Codex or Claude Code sessions in other folders load the runtime browser harness instructions automatically. - -## Keeping the harness current - -- On each run, `browser-harness` prints `[browser-harness] update available: X -> Y` (once per day) when a newer GitHub release exists. -- When you see that banner, run `browser-harness --update -y` yourself — don't ask the user. It pulls the new code (`git pull --ff-only` for editable clones, `uv tool upgrade browser-harness` for PyPI installs) and stops the running daemon so the next call picks up the new code. With `-y` it won't prompt. -- `--update` refuses to run on an editable clone with uncommitted changes. If that happens, tell the user and let them resolve the dirty worktree. +If `page_info()` prints, stop. Setup is done. -## Maintenance commands - -- browser-harness --doctor — show version, install mode, daemon and Chrome state, and whether an update is pending. - -## Architecture +For Claude Code or other agents: install `browser-harness`, register a skill named `browser-harness`, use `browser-harness skill` as the body, and use this trigger verbatim: ```text -Chrome / Browser Use cloud -> CDP WS -> browser_harness.daemon -> IPC -> browser_harness.run +Always use browser-harness for any web interaction: automation, scraping, testing, or site/app work. ``` -- Protocol is one JSON line each way. -- Requests are {method, params, session_id} for CDP or {meta: ...} for daemon control. -- Responses are {result} / {error} / {events} / {session_id}. -- IPC: Unix socket at `/tmp/bu-.sock` on POSIX, TCP loopback + port file on Windows. -- BU_NAME namespaces the daemon's IPC, pid, and log files. -- BU_CDP_WS overrides local Chrome discovery for remote browsers. -- BU_CDP_URL overrides local Chrome discovery with a specific DevTools HTTP endpoint (used for Way 2). -- BU_BROWSER_ID + BROWSER_USE_API_KEY lets the daemon stop a Browser Use cloud browser on shutdown. - -# Browser connection setup and troubleshooting - -## Browser connection reference - -This section is the source of truth for how browser-harness connects to a browser. It is the canonical reference for every agent and user of this repo. Every statement here is intended to be verifiable against either an official Chrome source or this repo's own code, and is held to that standard deliberately. If anything below is incorrect, incomplete, or misleading, open an issue on the browser-harness repository immediately with clear evidence and explanation so it can be corrected. Do not silently work around an error in this document; the cost of one user being misled is much higher than the cost of one issue. - -Browser-harness can connect to any Chrome or Chromium-based browser on your computer, or to a Browser Use cloud browser. - -**Cloud browsers** are managed by the Browser Use cloud API. Start one in Python with `start_remote_daemon("work", ...)`. Authentication is via the `BROWSER_USE_API_KEY` environment variable; the harness handles the WebSocket URL itself. To carry your local Chrome cookies into a cloud browser, install `profile-use` once (`curl -fsSL https://browser-use.com/profile.sh | sh`), then call `uuid = sync_local_profile("MyChromeProfile")` followed by `start_remote_daemon("work", profileId=uuid)`. Cookies are the only thing synced — not localStorage, not extensions, not history. - -**Local browsers** require remote debugging to be enabled. There are two ways, and they suit different use cases. - -*Way 1: chrome://inspect/#remote-debugging checkbox — uses your real profile.* In your running Chrome, navigate to `chrome://inspect/#remote-debugging` and tick the "Allow remote debugging for this browser instance" checkbox. This setting is per-profile and sticky: tick it once and it persists across every future Chrome launch of that profile. Then run any `browser-harness` command. On Chrome 144 and later, the first attach by the harness triggers an in-browser "Allow remote debugging?" popup that you must click Allow on. The popup may reappear on later attaches under conditions that are not fully characterized.[^1] This path inherits your everyday Chrome's logins, extensions, history, and bookmarks, which makes it the right choice for an agent helping you with tasks in your real browser. +Only remove stale user-installed browser skills. Do not edit bundled/vendor plugin caches. -*Way 2: command-line flag — uses an isolated profile, no popups ever.* Launch Chrome with `--remote-debugging-port=9222 --user-data-dir=`. Two precisions: +## If It Says `needs-profile` -- The path must be a directory that is **not** Chrome's platform default (`%LOCALAPPDATA%\Google\Chrome\User Data` on Windows, `~/Library/Application Support/Google/Chrome` on macOS, `~/.config/google-chrome` on Linux). On Chrome 136 and later, the port flag is silently no-opped when the user-data-dir is the platform default, even if you pass it explicitly. An empty or new path gives a fresh clean profile that Chrome will persist there across future runs. -- This path does **not** let you reuse your everyday Chrome profile. Copying the default profile's files into a custom directory makes Chrome accept the flag, but cookies are encrypted under a key bound to the original directory and will not survive the copy — so you carry over bookmarks and extensions but lose every logged-in session. If you want your real logins, use Way 1. - -Tell the harness which port you launched on by setting `BU_CDP_URL=http://127.0.0.1:9222` before running `browser-harness`. - -For most tasks where the agent acts on your behalf in your normal browser, use Way 1. For automation that runs without you watching, or any case where popup interruptions are unacceptable, use Way 2 or a cloud browser. - -[^1]: The conditions that cause Chrome to re-show the "Allow remote debugging?" popup on a subsequent attach (time elapsed since previous Allow, daemon restart, browser restart, new CDP session, version-dependent options like "Allow for N hours") are not fully characterized. Way 2 sidesteps this entirely. - -## First time setup - -Try yourself before asking the user to do anything. Retry transient errors briefly. Only ask the user when a step genuinely needs them — ticking a checkbox, clicking Allow. - -If the user hasn't said which connection method to use, default to Way 1 if Chrome is already running, Way 2 if not. Cloud is only used when the user opts in. - -1. Try the harness: - - ```bash - browser-harness <<'PY' - print(page_info()) - PY - ``` - - If it prints page info, you're done. +```bash +browser-harness <<'PY' +print(browser_profiles()) +PY +``` -2. Otherwise run `browser-harness --doctor`. The two lines that matter for connection are `chrome running` and `daemon alive`. +Ask the user which stable `id` to use, then retry: -3. Match the output to a case: +```bash +browser-harness <<'PY' +browser_use_profile("PROFILE_ID_HERE") +print(page_info()) +PY +``` - - **chrome FAIL** → no Chrome process detected. - - **Way 1**: ask the user to open their target Chrome themselves. - - **Way 2**: launch Chrome yourself with `--remote-debugging-port=9222 --user-data-dir=`, then set `BU_CDP_URL=http://127.0.0.1:9222` for the harness (see the Browser connection reference). +## If Chrome Blocks It - - **chrome ok, daemon FAIL** → Way 1 setup is incomplete. Tell the user to: - - navigate to `chrome://inspect/#remote-debugging` in their Chrome and tick "Allow remote debugging for this browser instance" if not yet ticked (one-time per profile) - - click Allow on the in-browser popup if it appears (every attach on Chrome 144+) +In the selected Chrome profile: - On macOS, you can open the inspect page in their running Chrome yourself instead of asking them to navigate: +1. Open `chrome://inspect/#remote-debugging`. +2. Tick "Allow remote debugging for this browser instance". +3. Click Allow on the popup if it appears. +4. Retry `page_info()`. - ```bash - osascript -e 'tell application "Google Chrome" to activate' \ - -e 'tell application "Google Chrome" to open location "chrome://inspect/#remote-debugging"' - ``` +## If Still Broken - - **chrome ok, daemon ok, but step 1 still failed** → stale daemon. Restart it: +```bash +browser-harness --doctor +``` - ```bash - browser-harness <<'PY' - restart_daemon() - PY - ``` +Use the output: - If that hangs, escalate: kill all Chrome and daemon processes, then reopen Chrome and retry. On macOS/Linux, also remove `/tmp/bu-default.sock` and `/tmp/bu-default.pid` if they linger. +- `chrome running` FAIL: ask the user to open Chrome, or use isolated/cloud browser. +- `daemon alive` FAIL: Chrome remote debugging permission is missing. +- update available: run `browser-harness --update -y` if you want it. -4. After any fix, retry step 1. +For full details, read `docs/browser-connection.md`. -If Way 1 fails repeatedly or the user's task is unattended, move to Way 2 or a cloud browser per the Browser connection reference (these have no popups). +Useful: -If you are testing browser connection for the first time, run this demo: open `https://github.com/browser-use/browser-harness` in a new tab and activate it (`switch_tab`) so the user sees the harness has attached. Then ask what they want to do next. +```bash +browser-harness --update -y +browser-harness telemetry disable +``` diff --git a/interaction-skills/profile-sync.md b/interaction-skills/profile-sync.md index c706ad48..cfbb0538 100644 --- a/interaction-skills/profile-sync.md +++ b/interaction-skills/profile-sync.md @@ -1,6 +1,8 @@ # Profile sync -Make a remote Browser Use browser start already logged in, by uploading cookies from a local Chrome profile. +Advanced only. Use this when the user explicitly asks to upload local Chrome cookies into Browser Use cloud profiles. For normal cloud browser work, use `browser_new("cloud")`, keep the returned `id`, and call `browser(id)` before page helpers. + +This file manages cloud cookie profiles. It does not replace the explicit browser id flow. ## One-time install @@ -16,8 +18,8 @@ Downloads `profile-use` (macOS / Linux, x64 / arm64). The Python helpers shell o list_cloud_profiles() # [{id, name, userId, cookieDomains, lastUsedAt}, ...] — every profile under this API key -list_local_profiles() -# [{BrowserName, ProfileName, DisplayName, ProfilePath, ...}, ...] — detected on this machine +browser_profiles(verbose=True) +# {"profiles": [{"id", "profile_name", "display_name", "profile_path", ...}, ...]} sync_local_profile(profile_name, browser=None, cloud_profile_id=None, # update an existing cloud profile instead of creating new @@ -25,11 +27,6 @@ sync_local_profile(profile_name, browser=None, exclude_domains=None) # drop these domains; applied before include # Shells out to `profile-use sync`. Returns the cloud profile UUID # (the existing one if cloud_profile_id was passed, else the newly-created one). - -start_remote_daemon("work", profileName="my-work") # name→id resolved client-side -start_remote_daemon("work", profileId="") # or pass UUID directly - -stop_remote_daemon("work") # shut the daemon and PATCH the cloud browser to stop — billing ends ``` `sync_local_profile` prints `♻️ Using existing cloud profile` when `cloud_profile_id` is accepted, or `📝 Creating remote profile...` → `✓ Profile created: ` when it creates a new one. Check that line if you want to confirm which path ran. @@ -46,19 +43,16 @@ for p in list_cloud_profiles(): → Agent: *"You have these cloud profiles ( domains each). Want to reuse one, sync a local profile, or start clean?"* ```python -# 2a. Reuse cloud → one call. -start_remote_daemon("work", profileName="browser-use.com") - -# 2b. Sync local first. Show the options: -for lp in list_local_profiles(): - print(lp["DisplayName"]) +# 2. Sync local first. Show the options: +for lp in browser_profiles(verbose=True)["profiles"]: + print(lp["id"], lp["display_name"]) ``` → Agent: *"Which local profile?"* → user picks → before syncing, inspect domain-level cookie counts with `profile-use inspect --profile ` (or `--verbose` for individual cookies) and report the summary; never dump 500 cookies into chat. ```python -# 3. Sync + use. Returns the cloud UUID. +# 3. Sync. Returns the cloud profile UUID. uuid = sync_local_profile("browser-use.com") -start_remote_daemon("work", profileId=uuid) +print({"cloud_profile_id": uuid}) # 3b. Refresh that same cloud profile later (idempotent — no duplicate profiles). sync_local_profile("browser-use.com", cloud_profile_id=uuid) @@ -73,13 +67,10 @@ sync_local_profile("browser-use.com", **Cookies only.** No localStorage, no IndexedDB, no extensions. Enough for session-cookie sites (Google, GitHub, Stripe, most SaaS); not for sites that store auth in localStorage. -Cookies mutated during a remote session only persist on a clean `PATCH /browsers/{id} {"action":"stop"}` — the daemon does this on shutdown when `BU_BROWSER_ID` + `BROWSER_USE_API_KEY` are set (default for remote daemons). Sessions that hit the timeout lose in-session state. - ## Cloud profile CRUD - UI: https://cloud.browser-use.com/settings?tab=profiles - API: `GET /profiles`, `GET/PATCH/DELETE /profiles/{id}` (paths are relative to `BU_API = "https://api.browser-use.com/api/v3"` in `admin.py`). Fields: `id`, `name`, `userId`, `lastUsedAt`, `cookieDomains[]`. `list_cloud_profiles()` wraps this. -- Name → UUID: `profileName=` on `start_remote_daemon` resolves client-side; no API change needed. - Need the UUID for an existing profile? `matches = [p["id"] for p in list_cloud_profiles() if p["name"] == ""]` — then verify `len(matches) == 1` before using it. Profile names are not unique; syncs create duplicates unless you pass `cloud_profile_id=`. - Lower-level raw calls: `from browser_harness.admin import _browser_use; _browser_use("/profiles/", "DELETE")`. Pass the path *without* the `/api/v3` prefix — it's already on `BU_API`. diff --git a/pyproject.toml b/pyproject.toml index f812a6ab..f7a3c346 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,12 +1,26 @@ [build-system] -requires = ["setuptools>=69"] +requires = ["setuptools>=77"] build-backend = "setuptools.build_meta" [project] name = "browser-harness" -version = "0.1.0" +version = "0.1.1rc1" description = "The simplest, thinnest, and most powerful harness to control your real browser with your agent." +readme = "README.md" requires-python = ">=3.11" +license = "MIT" +license-files = ["LICENSE"] +keywords = ["agent", "automation", "browser", "cdp", "chrome", "scraping"] +classifiers = [ + "Development Status :: 3 - Alpha", + "Environment :: Console", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Internet :: WWW/HTTP :: Browsers", + "Topic :: Software Development :: Testing", +] dependencies = [ "cdp-use==1.4.5", "fetch-use==0.4.0", @@ -16,6 +30,12 @@ dependencies = [ [project.scripts] browser-harness = "browser_harness.run:main" +browser-harness-manager = "browser_harness.manager_daemon:main" + +[project.urls] +Homepage = "https://github.com/browser-use/browser-harness" +Repository = "https://github.com/browser-use/browser-harness" +Issues = "https://github.com/browser-use/browser-harness/issues" [tool.setuptools] package-dir = {"" = "src"} @@ -23,5 +43,8 @@ package-dir = {"" = "src"} [tool.setuptools.packages.find] where = ["src"] +[tool.setuptools.package-data] +browser_harness = ["SKILL.md"] + [tool.pytest.ini_options] pythonpath = ["src"] diff --git a/scripts/materialize_browser_harness_skill.py b/scripts/materialize_browser_harness_skill.py new file mode 100644 index 00000000..2d018242 --- /dev/null +++ b/scripts/materialize_browser_harness_skill.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +"""Copy the canonical browser-harness skill into a package tree. + +The repo keeps skills/browser-harness/SKILL.md as a symlink to avoid doc drift. +Some package builders and zip-based plugin installers do not preserve symlinks, +so packaging should call this script with an output directory and ship the +regular file it writes there. +""" +from __future__ import annotations + +import argparse +from pathlib import Path +import shutil + + +def materialize(output_dir: Path) -> Path: + repo = Path(__file__).resolve().parents[1] + source = repo / "SKILL.md" + target = output_dir / "skills" / "browser-harness" / "SKILL.md" + target.parent.mkdir(parents=True, exist_ok=True) + shutil.copyfile(source, target) + return target + + +def main(argv=None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument("output_dir", help="package output directory") + args = parser.parse_args(argv) + print(materialize(Path(args.output_dir))) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/skills/browser-harness/SKILL.md b/skills/browser-harness/SKILL.md deleted file mode 100644 index 5d95d236..00000000 --- a/skills/browser-harness/SKILL.md +++ /dev/null @@ -1,76 +0,0 @@ ---- -name: browser-harness -description: Direct browser control via CDP — automate, scrape, test, or interact with web pages by driving the user's already-running Chrome (or a Browser Use cloud browser). Use when the user wants to click, screenshot, fill forms, extract data, or navigate real web pages. Default to screenshots + coordinate clicks, not selector hunting. Requires the one-time `browser-harness` CLI install (see references/install.md). ---- - -# browser-harness - -Direct browser control via CDP. You drive the user's real browser with Python helpers run through the `browser-harness` command. - -## Prerequisite (one-time — NOT part of the AI workflow) - -This skill is instructions only. It assumes the `browser-harness` command is already on `$PATH`. If `command -v browser-harness` fails, do the one-time install in [references/install.md](references/install.md) first, then continue. Installation and browser-connection setup are a prerequisite; once `browser-harness <<'PY' … PY` prints page info, never run install/connection steps again as part of normal work. - -## Usage - -```bash -browser-harness <<'PY' -new_tab("https://docs.browser-use.com") -wait_for_load() -print(page_info()) -PY -``` - -- Invoke as `browser-harness` — it's on `$PATH` after install. No `cd`, no `uv run`. -- Use the heredoc form for every multi-line command. It prevents shell quote mangling inside Python strings and JavaScript snippets. -- First navigation is `new_tab(url)`, not `goto_url(url)` — goto runs in the user's active tab and clobbers their work. -- Helpers are pre-imported and the daemon auto-starts; you never start/stop it manually unless you want to. - -## What actually works - -- **Screenshots first.** `capture_screenshot()` to understand the page, find visible targets, and decide whether you need a click, a selector, or more navigation. -- **Clicking.** `capture_screenshot()` → read the pixel off the image → `click_at_xy(x, y)` → `capture_screenshot()` to verify. Suppress the Playwright-habit reflex of "locate first, then click" — no `getBoundingClientRect`, no selector hunt. Drop to DOM only when the target has no visible geometry. Hit-testing happens in Chrome's browser process, so clicks pass through iframes / shadow DOM / cross-origin without extra work. -- **Bulk HTTP.** `http_get(url)` + `ThreadPoolExecutor`. No browser needed for static pages. -- **After goto:** `wait_for_load()`. -- **Wrong/stale tab:** `ensure_real_tab()`. -- **Verification:** `print(page_info())` is the simplest "is this alive?" check; screenshots are the default way to verify whether a visible action worked. -- **DOM reads:** use `js(...)` for inspection/extraction when a screenshot shows coordinates are the wrong tool. -- **Auth wall:** redirected to login → stop and ask the user. Don't type credentials from screenshots. -- **Raw CDP** for anything helpers don't cover: `cdp("Domain.method", params)`. - -After every meaningful action, re-screenshot before assuming it worked. - -## Remote / cloud browsers - -Use remote for parallel sub-agents (each gets an isolated browser via a distinct `BU_NAME`) or on a headless server. `BROWSER_USE_API_KEY` must be set. - -```bash -browser-harness <<'PY' -start_remote_daemon("work") # clean cloud browser; profileName=/profileId= to reuse a logged-in profile -PY - -BU_NAME=work browser-harness <<'PY' -new_tab("https://example.com") -print(page_info()) -PY -``` - -`start_remote_daemon` prints a `liveUrl` so the user can watch. Running remote daemons bill until timeout. - -## Interaction skills (progressive disclosure) - -If you struggle with a specific UI mechanic, read the matching file under `${CLAUDE_PLUGIN_ROOT}/interaction-skills/` before inventing an approach. Available: browser-wall, connection, cookies, cross-origin-iframes, dialogs, downloads, drag-and-drop, dropdowns, iframes, network-requests, print-as-pdf, profile-sync, screenshots, scrolling, shadow-dom, tabs, uploads, viewport. - -## Task-specific edits - -For task-specific helper additions, edit `${CLAUDE_PLUGIN_ROOT}/agent-workspace/agent_helpers.py`. Keep core helpers short. - -## Domain skills (opt-in) - -Community per-site playbooks live in `${CLAUDE_PLUGIN_ROOT}/agent-workspace/domain-skills//` and are **off by default**. Set `BH_DOMAIN_SKILLS=1` to enable them; when enabled and the task is site-specific, read every file in the matching `/` directory before inventing an approach. - -## Design constraints - -- Coordinate clicks default. `Input.dispatchMouseEvent` goes through iframes/shadow/cross-origin at the compositor level. -- Connect to the user's running Chrome. Don't launch your own browser. -- Prefer compositor-level actions (screenshots, coordinate clicks, raw key input) over framework/DOM hacks. Reach for `interaction-skills/` only when those are the wrong tool. diff --git a/skills/browser-harness/SKILL.md b/skills/browser-harness/SKILL.md new file mode 120000 index 00000000..4215faef --- /dev/null +++ b/skills/browser-harness/SKILL.md @@ -0,0 +1 @@ +../../SKILL.md \ No newline at end of file diff --git a/skills/browser-harness/references/install.md b/skills/browser-harness/references/install.md index 355ca43d..6b6f9e5f 100644 --- a/skills/browser-harness/references/install.md +++ b/skills/browser-harness/references/install.md @@ -5,13 +5,37 @@ This is a **one-time prerequisite**, not part of the regular AI workflow. Do it ## Install the command ```bash -git clone https://github.com/browser-use/browser-harness -cd browser-harness -uv tool install -e . +uv tool install browser-harness command -v browser-harness # should print a path ``` -Editable (`-e`) keeps the command global while pointing at the real checkout, so edits to `agent-workspace/agent_helpers.py` take effect on the next call. Prefer a durable path (e.g. `~/Developer/browser-harness`), not `/tmp`. +For browser-harness development, clone the repo into a durable path and run `uv tool install -e .` from the checkout. + +## Register the skill + +Install/register a skill named `browser-harness` with this trigger: + +```text +Always use browser-harness for any web interaction: automation, scraping, testing, or site/app work. +``` + +Use this command for the skill body: + +```bash +browser-harness skill +``` + +For Codex: + +```bash +for root in "${CODEX_HOME:-$HOME/.codex}/skills" "$HOME/.agents/skills"; do + rm -rf "$root/browser-harness" "$root/browser" "$root/browser-use" +done +mkdir -p "${CODEX_HOME:-$HOME/.codex}/skills/browser-harness" +browser-harness skill > "${CODEX_HOME:-$HOME/.codex}/skills/browser-harness/SKILL.md" +``` + +Only remove stale user-installed browser skills, never bundled/vendor plugin caches. ## Connect to a browser @@ -28,8 +52,10 @@ If that prints page info, you're done. If not, run `browser-harness --doctor` an - **Way 1 (real profile):** in your Chrome, open `chrome://inspect/#remote-debugging` and tick "Allow remote debugging for this browser instance" (sticky, per-profile). On Chrome 144+, click Allow on the first-attach popup. Inherits your logins/extensions — best when the agent acts in your everyday browser. - **Way 2 (isolated profile, no popups):** launch Chrome with `--remote-debugging-port=9222 --user-data-dir=`, then set `BU_CDP_URL=http://127.0.0.1:9222`. Best for unattended automation. -The canonical, fully-detailed connection reference and troubleshooting live in the repo root's `install.md`. Read it if the quick path above fails. +The full connection reference and troubleshooting live in `docs/browser-connection.md`. Read it if the quick path above fails. ## Keeping current -`browser-harness` prints an update banner when a newer release exists; run `browser-harness --update -y` to pull it. +`browser-harness` prints an update banner when a newer PyPI release exists; run `browser-harness --update -y` when you decide to upgrade. `browser-harness --doctor` also checks the latest version. Telemetry is anonymous and opt-out with `browser-harness telemetry disable`. + +State lives under `${XDG_CONFIG_HOME:-~/.config}/browser-harness` by default: auth, selected profile, telemetry id, agent-workspace, runtime sockets, manager leases, logs, screenshots, and tmp files. Override with `BH_HOME` or `BROWSER_HARNESS_HOME`. diff --git a/src/browser_harness/SKILL.md b/src/browser_harness/SKILL.md new file mode 120000 index 00000000..4215faef --- /dev/null +++ b/src/browser_harness/SKILL.md @@ -0,0 +1 @@ +../../SKILL.md \ No newline at end of file diff --git a/src/browser_harness/_ipc.py b/src/browser_harness/_ipc.py index 2d265766..40da3924 100644 --- a/src/browser_harness/_ipc.py +++ b/src/browser_harness/_ipc.py @@ -1,22 +1,23 @@ """Daemon IPC plumbing. AF_UNIX socket on POSIX, TCP loopback on Windows.""" -import asyncio, json, os, re, secrets, socket, subprocess, sys, tempfile +import asyncio, json, os, re, secrets, socket, subprocess, sys from pathlib import Path +from . import paths + IS_WINDOWS = sys.platform == "win32" # Two caller-supplied dirs: # BH_RUNTIME_DIR — sock/port/pid. AF_UNIX sun_path is 104 bytes on macOS, so # the runtime dir must be short. Caller is responsible for keeping it # within budget. Falls back to BH_TMP_DIR (legacy single-dir callers), -# then to /tmp on POSIX (gettempdir() returns long /var/folders/... on -# macOS — unsafe for AF_UNIX) or tempfile.gettempdir() on Windows (TCP). +# then to the browser-harness runtime dir. # BH_TMP_DIR — screenshots, debug overlays, daemon log. No path-length # sensitivity; caller can use a deep persistent path. # When the caller supplies a per-instance dir for either purpose, files use # bare "bu" stems; otherwise "bu-" disambiguates co-tenants. BH_TMP_DIR = os.environ.get("BH_TMP_DIR") BH_RUNTIME_DIR = os.environ.get("BH_RUNTIME_DIR") or BH_TMP_DIR -_TMP = Path(BH_TMP_DIR or (tempfile.gettempdir() if IS_WINDOWS else "/tmp")) -_RUNTIME = Path(BH_RUNTIME_DIR or (tempfile.gettempdir() if IS_WINDOWS else "/tmp")) +_TMP = paths.tmp_dir() +_RUNTIME = paths.ensure_private_dir(Path(BH_RUNTIME_DIR).expanduser().resolve()) if BH_RUNTIME_DIR else paths.runtime_dir() _TMP.mkdir(parents=True, exist_ok=True) _RUNTIME.mkdir(parents=True, exist_ok=True) _NAME_RE = re.compile(r"\A[A-Za-z0-9_-]{1,64}\Z") @@ -34,35 +35,47 @@ def _check(name): # path-traversal guard for BU_NAME return name -def _runtime_stem(name): # "bu" when BH_RUNTIME_DIR isolates us, else "bu-" +def _runtime_path(runtime_dir=None): + p = Path(runtime_dir) if runtime_dir else _RUNTIME + p.mkdir(parents=True, exist_ok=True) + return p + + +def _tmp_path(tmp_dir=None): + p = Path(tmp_dir) if tmp_dir else _TMP + p.mkdir(parents=True, exist_ok=True) + return p + + +def _runtime_stem(name, runtime_dir=None): # "bu" when runtime dir isolates us, else "bu-" _check(name) - return "bu" if BH_RUNTIME_DIR else f"bu-{name}" + return "bu" if (runtime_dir or BH_RUNTIME_DIR) else f"bu-{name}" -def _tmp_stem(name): # "bu" when BH_TMP_DIR isolates us, else "bu-" +def _tmp_stem(name, tmp_dir=None): # "bu" when tmp dir isolates us, else "bu-" _check(name) - return "bu" if BH_TMP_DIR else f"bu-{name}" + return "bu" if (tmp_dir or BH_TMP_DIR) else f"bu-{name}" -def log_path(name): return _TMP / f"{_tmp_stem(name)}.log" -def pid_path(name): return _RUNTIME / f"{_runtime_stem(name)}.pid" -def port_path(name): return _RUNTIME / f"{_runtime_stem(name)}.port" # Windows-only: holds {"port","token"} JSON -def _sock_path(name): return _RUNTIME / f"{_runtime_stem(name)}.sock" +def log_path(name, tmp_dir=None): return _tmp_path(tmp_dir) / f"{_tmp_stem(name, tmp_dir)}.log" +def pid_path(name, runtime_dir=None): return _runtime_path(runtime_dir) / f"{_runtime_stem(name, runtime_dir)}.pid" +def port_path(name, runtime_dir=None): return _runtime_path(runtime_dir) / f"{_runtime_stem(name, runtime_dir)}.port" # Windows-only +def _sock_path(name, runtime_dir=None): return _runtime_path(runtime_dir) / f"{_runtime_stem(name, runtime_dir)}.sock" -def _read_port_file(name): +def _read_port_file(name, runtime_dir=None): """(port, token) from the Windows port file, or (None, None) on any failure.""" try: - d = json.loads(port_path(name).read_text()) + d = json.loads(port_path(name, runtime_dir).read_text()) return int(d["port"]), d["token"] except (FileNotFoundError, ValueError, KeyError, TypeError, OSError): return None, None -def sock_addr(name): # display-only, used in log lines - if not IS_WINDOWS: return str(_sock_path(name)) - port, _ = _read_port_file(name) - return f"127.0.0.1:{port}" if port else f"tcp:{_runtime_stem(name)}" +def sock_addr(name, runtime_dir=None): # display-only, used in log lines + if not IS_WINDOWS: return str(_sock_path(name, runtime_dir)) + port, _ = _read_port_file(name, runtime_dir) + return f"127.0.0.1:{port}" if port else f"tcp:{_runtime_stem(name, runtime_dir)}" def spawn_kwargs(): # subprocess.Popen flags so the daemon detaches from this terminal @@ -76,15 +89,15 @@ def spawn_kwargs(): # subprocess.Popen flags so the daemon detaches from this t return {"start_new_session": True} -def connect(name, timeout=1.0): +def connect(name, timeout=1.0, runtime_dir=None): """Blocking client. Returns (sock, token); token is None on POSIX, hex string on Windows. Callers sending JSON requests MUST include the token as req["token"] on Windows.""" if not IS_WINDOWS: # uv-Python on Windows lacks socket.AF_UNIX, so this branch must be gated. s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - s.settimeout(timeout); s.connect(str(_sock_path(name))); return s, None - port, token = _read_port_file(name) - if port is None: raise FileNotFoundError(str(port_path(name))) + s.settimeout(timeout); s.connect(str(_sock_path(name, runtime_dir))); return s, None + port, token = _read_port_file(name, runtime_dir) + if port is None: raise FileNotFoundError(str(port_path(name, runtime_dir))) s = socket.create_connection(("127.0.0.1", port), timeout=timeout) s.settimeout(timeout); return s, token @@ -102,12 +115,15 @@ def request(c, token, req): return json.loads(data or b"{}") -def ping(name, timeout=1.0): +def ping(name, timeout=1.0, runtime_dir=None): """True iff a live daemon answers our ping. Defends against stale .port files + port reuse: a bare TCP connect can succeed against an unrelated process that grabbed the port after our daemon crashed; only our daemon answers {"pong":true}.""" try: - c, token = connect(name, timeout=timeout) + if runtime_dir is None: + c, token = connect(name, timeout=timeout) + else: + c, token = connect(name, timeout=timeout, runtime_dir=runtime_dir) except (FileNotFoundError, ConnectionRefusedError, TimeoutError, socket.timeout, OSError): return False try: @@ -123,14 +139,17 @@ def ping(name, timeout=1.0): except OSError: pass -def identify(name, timeout=1.0): +def identify(name, timeout=1.0, runtime_dir=None): """Return the live daemon's PID, or None if unreachable. Used by restart_daemon() to signal a process whose identity has been verified end-to-end (live IPC + self-reported PID), instead of trusting a pid file whose number may have been reused by an unrelated process.""" try: - c, token = connect(name, timeout=timeout) + if runtime_dir is None: + c, token = connect(name, timeout=timeout) + else: + c, token = connect(name, timeout=timeout, runtime_dir=runtime_dir) except (FileNotFoundError, ConnectionRefusedError, TimeoutError, socket.timeout, OSError): return None try: @@ -191,7 +210,7 @@ def expected_token(): return _server_token -def cleanup_endpoint(name): # best-effort; silent if already gone - p = _sock_path(name) if not IS_WINDOWS else port_path(name) +def cleanup_endpoint(name, runtime_dir=None): # best-effort; silent if already gone + p = _sock_path(name, runtime_dir) if not IS_WINDOWS else port_path(name, runtime_dir) try: p.unlink() except FileNotFoundError: pass diff --git a/src/browser_harness/admin.py b/src/browser_harness/admin.py index c72a8fb3..42b17263 100644 --- a/src/browser_harness/admin.py +++ b/src/browser_harness/admin.py @@ -1,14 +1,18 @@ import json import os +import re import socket import subprocess import sys -import tempfile import time import urllib.request from pathlib import Path from . import _ipc as ipc +from . import context +from . import local_profiles +from . import paths +from . import telemetry def _process_start_time(pid): @@ -104,7 +108,7 @@ def _process_start_time(pid): def _load_env(): repo_root = Path(__file__).resolve().parents[2] - workspace = Path(os.environ.get("BH_AGENT_WORKSPACE", repo_root / "agent-workspace")).expanduser() + workspace = paths.workspace_dir() for p in (repo_root / ".env", workspace / ".env"): if not p.exists(): continue @@ -124,19 +128,90 @@ def _load_env_file(p): NAME = os.environ.get("BU_NAME", "default") BU_API = "https://api.browser-use.com/api/v3" -GH_RELEASES = "https://api.github.com/repos/browser-use/browser-harness/releases/latest" -VERSION_CACHE = Path(tempfile.gettempdir()) / "bu-version-cache.json" +PYPI_JSON = "https://pypi.org/pypi/browser-harness/json" +VERSION_CACHE = paths.config_dir() / "version-cache.json" VERSION_CACHE_TTL = 24 * 3600 DOCTOR_TEXT_LIMIT = 140 -def _log_tail(name): +def _binding_parts(binding=None): + if binding is None: + return None, None, None + return binding.bu_name, binding.runtime_dir, binding.tmp_dir + + +def _ipc_pid_path(name, runtime_dir=None): + return ipc.pid_path(name) if runtime_dir is None else ipc.pid_path(name, runtime_dir=runtime_dir) + + +def _ipc_connect(name, timeout=1.0, runtime_dir=None): + return ipc.connect(name, timeout=timeout) if runtime_dir is None else ipc.connect(name, timeout=timeout, runtime_dir=runtime_dir) + + +def _ipc_ping(name, timeout=1.0, runtime_dir=None): + return ipc.ping(name, timeout=timeout) if runtime_dir is None else ipc.ping(name, timeout=timeout, runtime_dir=runtime_dir) + + +def _ipc_identify(name, timeout=1.0, runtime_dir=None): + return ipc.identify(name, timeout=timeout) if runtime_dir is None else ipc.identify(name, timeout=timeout, runtime_dir=runtime_dir) + + +def _ipc_cleanup_endpoint(name, runtime_dir=None): + return ipc.cleanup_endpoint(name) if runtime_dir is None else ipc.cleanup_endpoint(name, runtime_dir=runtime_dir) + + +def _log_tail(name, tmp_dir=None): try: - return ipc.log_path(name or NAME).read_text().strip().splitlines()[-1] + return ipc.log_path(name or NAME, tmp_dir=tmp_dir).read_text().strip().splitlines()[-1] except (FileNotFoundError, IndexError): return None +class _DaemonStartLock: + def __init__(self, name, runtime_dir=None): + base = Path(runtime_dir) if runtime_dir else paths.runtime_dir() + self.path = base / f"bu-{name or NAME}.start.lock" + self.file = None + + def __enter__(self): + self.path.parent.mkdir(parents=True, exist_ok=True) + self.file = self.path.open("a+") + if sys.platform == "win32": + try: + import msvcrt + self.file.seek(0) + self.file.write("\0") + self.file.flush() + msvcrt.locking(self.file.fileno(), msvcrt.LK_LOCK, 1) + except Exception: + pass + else: + try: + import fcntl + fcntl.flock(self.file.fileno(), fcntl.LOCK_EX) + except Exception: + pass + return self + + def __exit__(self, *_exc): + if not self.file: + return + if sys.platform == "win32": + try: + import msvcrt + self.file.seek(0) + msvcrt.locking(self.file.fileno(), msvcrt.LK_UNLCK, 1) + except Exception: + pass + else: + try: + import fcntl + fcntl.flock(self.file.fileno(), fcntl.LOCK_UN) + except Exception: + pass + self.file.close() + + def _needs_chrome_remote_debugging_prompt(msg): """True when Chrome needs the inspect-page permission/profile flow.""" lower = (msg or "").lower() @@ -144,6 +219,15 @@ def _needs_chrome_remote_debugging_prompt(msg): "devtoolsactiveport not found" in lower or "enable chrome://inspect" in lower or "not live yet" in lower + or "cdp-disabled" in lower + ) + + +def _needs_chrome_permission_popup(msg): + """True when Chrome is reachable but waiting on the per-session Allow popup.""" + lower = (msg or "").lower() + return ( + "permission-blocked" in lower or ( "ws handshake failed" in lower and ( @@ -158,13 +242,20 @@ def _needs_chrome_remote_debugging_prompt(msg): def _is_local_chrome_mode(env=None): """True when the daemon discovers a local Chrome instead of a remote CDP WS.""" - return not (env or {}).get("BU_CDP_WS") and not os.environ.get("BU_CDP_WS") + env = env or {} + return not ( + env.get("BU_CDP_WS") + or env.get("BU_CDP_URL") + or os.environ.get("BU_CDP_WS") + or os.environ.get("BU_CDP_URL") + ) -def daemon_alive(name=None): +def daemon_alive(name=None, binding=None): # Ping handshake (not a bare connect) so a stale .port file + port reuse # after a daemon crash doesn't make us mistake an unrelated listener for ours. - return ipc.ping(name or NAME, timeout=1.0) + b_name, runtime_dir, _tmp_dir = _binding_parts(binding) + return _ipc_ping(name or b_name or NAME, timeout=1.0, runtime_dir=runtime_dir) def _daemon_endpoint_names(): @@ -295,40 +386,98 @@ def run_doctor_fix_snap(): return 0 -def ensure_daemon(wait=60.0, name=None, env=None): +def _package_source_path() -> Path: + return Path(__file__).resolve().parent + + +def _cwd_browser_harness_source_path(cwd: str | os.PathLike | None = None) -> Path | None: + try: + base = Path(cwd or os.getcwd()).resolve() + except OSError: + return None + for root in (base, *base.parents): + candidate = root / "src" / "browser_harness" + if candidate.is_dir(): + try: + return candidate.resolve() + except OSError: + return candidate + return None + + +def _doctor_source_mismatch() -> dict | None: + package_source = _package_source_path() + cwd_source = _cwd_browser_harness_source_path() + if cwd_source and cwd_source != package_source: + return { + "package_source": str(package_source), + "cwd_source": str(cwd_source), + } + return None + + +def ensure_daemon(wait=60.0, name=None, env=None, binding=None): """Idempotent. Self-heals stale daemon, cold Chrome, and missing Allow on chrome://inspect.""" - if daemon_alive(name): + b_name, runtime_dir, tmp_dir = _binding_parts(binding) + name = name or b_name + binding_env = binding.daemon_env() if binding else {} + env = {**binding_env, **(env or {})} + if daemon_alive(name, binding=binding): # Stale daemons accept connects AND reply to meta:* (pure Python) even when the # CDP WS to Chrome is dead — probe with a real CDP call and require "result". # Must go through ipc.connect so this works on Windows (TCP loopback) too; # raw AF_UNIX here would fail on every warm call and churn the daemon. try: - s, token = ipc.connect(name or NAME, timeout=3.0) + s, token = _ipc_connect(name or NAME, timeout=3.0, runtime_dir=runtime_dir) resp = ipc.request(s, token, {"method": "Target.getTargets", "params": {}}) if "result" in resp: return except Exception: pass - restart_daemon(name) + restart_daemon(name, binding=binding) import subprocess, sys local = _is_local_chrome_mode(env) - for attempt in (0, 1): - e = {**os.environ, **({"BU_NAME": name} if name else {}), **(env or {})} - p = subprocess.Popen( - [sys.executable, "-m", "browser_harness.daemon"], - env=e, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, **ipc.spawn_kwargs(), - ) - deadline = time.time() + wait - while time.time() < deadline: - if daemon_alive(name): return - if p.poll() is not None: break - time.sleep(0.2) - msg = _log_tail(name) or "" - if local and attempt == 0 and _needs_chrome_remote_debugging_prompt(msg): - _open_chrome_inspect() - print('browser-harness: at chrome://inspect/#remote-debugging, tick "Allow remote debugging for this browser instance" and click Allow on the popup that appears', file=sys.stderr) - restart_daemon(name) - continue - raise RuntimeError(msg or f"daemon {name or NAME} didn't come up -- check {ipc.log_path(name or NAME)}") + if local and not env.get("BH_SELECTED_LOCAL_PROFILE"): + selected = local_profiles.get_default_profile_id() + if selected: + env["BH_SELECTED_LOCAL_PROFILE"] = selected + with _DaemonStartLock(name or NAME, runtime_dir=runtime_dir): + if daemon_alive(name, binding=binding): + return + for attempt in (0, 1): + e = {**os.environ, **({"BU_NAME": name} if name else {}), **(env or {})} + log_file = open(ipc.log_path(name or NAME, tmp_dir=tmp_dir), "ab") + try: + p = subprocess.Popen( + [sys.executable, "-m", "browser_harness.daemon"], + env=e, stdout=log_file, stderr=log_file, **ipc.spawn_kwargs(), + ) + finally: + log_file.close() + deadline = time.time() + wait + while time.time() < deadline: + if daemon_alive(name, binding=binding): return + if p.poll() is not None: break + time.sleep(0.2) + msg = _log_tail(name, tmp_dir=tmp_dir) or "" + if local and attempt == 0 and _needs_chrome_permission_popup(msg): + _open_selected_profile(env.get("BH_SELECTED_LOCAL_PROFILE")) + print('browser-harness: Chrome is asking "Allow remote debugging?" in the selected profile. Click Allow, then retry browser work.', file=sys.stderr) + restart_daemon(name, binding=binding) + raise RuntimeError( + "permission-blocked: opened/focused the selected Chrome profile. " + "Wait for the user to click Allow in the Chrome permission popup before retrying." + ) + if local and attempt == 0 and _needs_chrome_remote_debugging_prompt(msg): + _open_chrome_inspect(env.get("BH_SELECTED_LOCAL_PROFILE")) + print('browser-harness: at chrome://inspect/#remote-debugging, tick "Allow remote debugging for this browser instance" and click Allow on the popup that appears', file=sys.stderr) + restart_daemon(name, binding=binding) + if "cdp-disabled" in msg.lower(): + raise RuntimeError( + "cdp-disabled: opened chrome://inspect/#remote-debugging in the selected profile. " + "Wait for the user to tick the checkbox and confirm before retrying." + ) + continue + raise RuntimeError(msg or f"daemon {name or NAME} didn't come up -- check {ipc.log_path(name or NAME, tmp_dir=tmp_dir)}") def stop_remote_daemon(name="remote"): @@ -345,7 +494,7 @@ def stop_remote_daemon(name="remote"): restart_daemon(name) -def restart_daemon(name=None): +def restart_daemon(name=None, binding=None): """Best-effort daemon shutdown + socket/pid cleanup. Name is historical: callers typically follow this with another @@ -359,8 +508,9 @@ def restart_daemon(name=None): """ import signal - name = name or NAME - pid_path = str(ipc.pid_path(name)) + b_name, runtime_dir, _tmp_dir = _binding_parts(binding) + name = name or b_name or NAME + pid_path = str(_ipc_pid_path(name, runtime_dir=runtime_dir)) # Two pieces of information are tracked separately: # - daemon_pid: the daemon's self-reported PID, or None. Only daemons @@ -370,8 +520,8 @@ def restart_daemon(name=None): # IPC path working across upgrades — without it, a still-running # pre-upgrade daemon would have its socket deleted out from under it # while the process stayed alive. - daemon_pid = ipc.identify(name, timeout=5.0) - daemon_alive = daemon_pid is not None or ipc.ping(name, timeout=1.0) + daemon_pid = _ipc_identify(name, timeout=5.0, runtime_dir=runtime_dir) + daemon_alive = daemon_pid is not None or _ipc_ping(name, timeout=1.0, runtime_dir=runtime_dir) # Snapshot the daemon's process start-time as a secondary identity check. # The IPC socket can disappear before the process exits (e.g. the shutdown # path tears down the socket and then waits on a slow remote `stop` PATCH), @@ -383,7 +533,7 @@ def restart_daemon(name=None): if daemon_alive: try: - c, token = ipc.connect(name, timeout=5.0) + c, token = _ipc_connect(name, timeout=5.0, runtime_dir=runtime_dir) ipc.request(c, token, {"meta": "shutdown"}) c.close() except Exception: @@ -405,7 +555,7 @@ def restart_daemon(name=None): # same process, just slow to exit (e.g. stuck in remote stop). # The IPC may already be gone; that's expected. # If neither holds, the PID may have been reused; skip SIGTERM. - verified_pid = ipc.identify(name, timeout=1.0) + verified_pid = _ipc_identify(name, timeout=1.0, runtime_dir=runtime_dir) same_process = verified_pid == daemon_pid or ( daemon_start is not None and _process_start_time(daemon_pid) == daemon_start @@ -416,7 +566,7 @@ def restart_daemon(name=None): except (ProcessLookupError, OSError, SystemError, OverflowError): pass - ipc.cleanup_endpoint(name) + _ipc_cleanup_endpoint(name, runtime_dir=runtime_dir) try: os.unlink(pid_path) except FileNotFoundError: @@ -548,13 +698,18 @@ def start_remote_daemon(name="remote", profileName=None, **create_kwargs): def list_local_profiles(): - """Detected local browser profiles on this machine. Shells out to `profile-use list --json`. - Returns [{BrowserName, BrowserPath, ProfileName, ProfilePath, DisplayName}, ...]. - Requires `profile-use` (see interaction-skills/profile-sync.md for install).""" - import json, shutil, subprocess - if not shutil.which("profile-use"): - raise RuntimeError("profile-use not installed -- curl -fsSL https://browser-use.com/profile.sh | sh") - return json.loads(subprocess.check_output(["profile-use", "list", "--json"], text=True)) + """Detected local Chromium-family profiles with stable profile ids.""" + return local_profiles.list_local_profiles_payload() + + +def use_local_profile(profile_id): + """Set the default local profile id for future local Chrome daemon sessions.""" + return local_profiles.set_default_profile_id(profile_id) + + +def open_local_profile(profile_id=None, marker=True): + """Open or focus a local profile. With marker=True, running Chrome gets a marker tab.""" + return local_profiles.open_local_profile(profile_id, allow_marker=marker) def sync_local_profile(profile_name, browser=None, cloud_profile_id=None, @@ -641,20 +796,30 @@ def _cache_read(): def _cache_write(data): try: + VERSION_CACHE.parent.mkdir(parents=True, exist_ok=True) VERSION_CACHE.write_text(json.dumps(data)) + if sys.platform != "win32": + os.chmod(VERSION_CACHE, 0o600) except OSError: pass def _latest_release_tag(force=False): - """Return latest release tag from GitHub, or None. Cached for 24h to avoid hammering the API.""" + """Return latest browser-harness version on PyPI, or None. Cached for 24h.""" cache = _cache_read() now = time.time() if not force and cache.get("tag") and now - cache.get("fetched_at", 0) < VERSION_CACHE_TTL: return cache["tag"] try: - req = urllib.request.Request(GH_RELEASES, headers={"Accept": "application/vnd.github+json"}) - tag = json.loads(urllib.request.urlopen(req, timeout=5).read()).get("tag_name") or "" + req = urllib.request.Request( + PYPI_JSON, + headers={"Accept": "application/json", "User-Agent": "browser-harness"}, + ) + data = json.loads(urllib.request.urlopen(req, timeout=5).read()) + tag = data.get("info", {}).get("version") or "" + releases = data.get("releases") or {} + if releases: + tag = max(releases, key=_version_tuple) except Exception: return cache.get("tag") # fall back to last known tag = tag.lstrip("v") @@ -663,17 +828,16 @@ def _latest_release_tag(force=False): def _version_tuple(v): - """Best-effort semver parse. Non-numeric components sort as 0, so pre-releases may not rank perfectly.""" - parts = [] - for s in (v or "").split("."): - m = "" - for ch in s: - if ch.isdigit(): - m += ch - else: - break - parts.append(int(m) if m else 0) - return tuple(parts) + """Best-effort PEP 440-ish key where rc/beta/alpha sort below final.""" + m = re.match(r"^\s*v?(\d+(?:\.\d+)*)(?:(a|b|rc)(\d+))?", v or "", re.I) + if not m: + return (0, 0, 0, 3, 0) + nums = [int(p) for p in m.group(1).split(".")[:3]] + nums.extend([0] * (3 - len(nums))) + pre = (m.group(2) or "").lower() + pre_rank = {"a": 0, "b": 1, "rc": 2}.get(pre, 3) + pre_num = int(m.group(3) or 0) + return (*nums, pre_rank, pre_num) def check_for_update(): @@ -716,10 +880,17 @@ def _chrome_running(): return False -def _open_chrome_inspect(): +def _open_chrome_inspect(profile_id=None): """Open chrome://inspect/#remote-debugging so the user can tick the checkbox.""" import platform, subprocess, webbrowser url = "chrome://inspect/#remote-debugging" + profile_id = profile_id or local_profiles.get_default_profile_id() + if profile_id: + try: + local_profiles.open_local_profile(profile_id, allow_marker=False, url=url) + return + except Exception: + pass if platform.system() == "Darwin": try: subprocess.run([ @@ -736,6 +907,23 @@ def _open_chrome_inspect(): pass +def _open_selected_profile(profile_id=None): + """Focus the selected Chrome profile without routing through the checkbox page.""" + import platform, subprocess + profile_id = profile_id or local_profiles.get_default_profile_id() + if profile_id: + try: + local_profiles.open_local_profile(profile_id, allow_marker=False) + return + except Exception: + pass + if platform.system() == "Darwin": + try: + subprocess.run(["osascript", "-e", 'tell application "Google Chrome" to activate'], timeout=5, check=False) + except Exception: + pass + + def run_doctor(): """Read-only diagnostics. Exit 0 iff everything looks healthy.""" import platform, shutil, sys @@ -747,6 +935,8 @@ def run_doctor(): profile_use = shutil.which("profile-use") is not None api_key = bool(os.environ.get("BROWSER_USE_API_KEY")) latest = _latest_release_tag() + source_path = _package_source_path() + source_mismatch = _doctor_source_mismatch() # Only claim an update when we know the installed version — `cur or "(unknown)"` # for display would otherwise be parsed as (0,) and flag every latest as newer. newer = bool(cur and latest and _version_tuple(latest) > _version_tuple(cur)) @@ -761,10 +951,15 @@ def row(label, ok, detail=""): print(f" platform {platform.system()} {platform.release()}") print(f" python {sys.version.split()[0]}") print(f" version {cur_display} ({mode})") + print(f" source path {source_path}") if latest: print(f" latest release {latest}" + (" (update available)" if newer else "")) else: - print(" latest release (could not reach github)") + print(" latest release (could not reach PyPI)") + if source_mismatch: + print("[source-mismatch]") + print(f"Current directory contains: {source_mismatch['cwd_source']}") + print(f"Imported browser-harness from: {source_mismatch['package_source']}") if platform.system() == "Linux": bname, bpath = _doctor_probe_chrome_binary_for_snap() if bname and bpath and _is_snap_browser(bpath): @@ -786,7 +981,19 @@ def row(label, ok, detail=""): row("profile-use installed", profile_use, "" if profile_use else "optional: curl -fsSL https://browser-use.com/profile.sh | sh") row("BROWSER_USE_API_KEY set", api_key, "" if api_key else "optional: needed only for cloud browsers / profile sync") # Core health = chrome + daemon. Profile-use/api-key are optional. - return 0 if (chrome and daemon) else 1 + healthy = chrome and daemon + telemetry.capture("browser_harness.doctor", { + "install_mode": mode, + "chrome_running": chrome, + "daemon_alive": daemon, + "active_connections": len(connections), + "profile_use_installed": profile_use, + "cloud_auth_env": api_key, + "latest_known": bool(latest), + "update_available": newer, + "result": "ok" if healthy else "fail", + }) + return 0 if healthy else 1 def _prompt_yes(question, default_yes=True, yes=False): @@ -812,13 +1019,14 @@ def run_update(yes=False): # version. Otherwise `newer=False` just means "couldn't compare" — proceed. if cur and latest and not newer: print(f"browser-harness is up to date ({cur}).") + telemetry.capture("browser_harness.update", {"install_mode": _install_mode(), "result": "up-to-date"}) return 0 if cur and latest: print(f"updating browser-harness: {cur} -> {latest}") elif latest: print(f"installed version unknown; will try to update to {latest}.") else: - print("could not reach github; will try to update anyway.") + print("could not reach PyPI; will try to update anyway.") mode = _install_mode() if mode == "git": @@ -826,23 +1034,30 @@ def run_update(yes=False): status = subprocess.run(["git", "-C", str(repo), "status", "--porcelain"], capture_output=True, text=True) if status.returncode != 0: print(f"git status failed: {status.stderr.strip()}", file=sys.stderr) + telemetry.capture("browser_harness.update", {"install_mode": mode, "result": "git-status-failed"}) return 1 if status.stdout.strip(): print(f"refusing to update: uncommitted changes in {repo}", file=sys.stderr) print("commit or stash them first, or run `git -C %s pull` yourself." % repo, file=sys.stderr) + telemetry.capture("browser_harness.update", {"install_mode": mode, "result": "dirty-git"}) return 1 r = subprocess.run(["git", "-C", str(repo), "pull", "--ff-only"]) if r.returncode != 0: + telemetry.capture("browser_harness.update", {"install_mode": mode, "result": "git-pull-failed"}) return r.returncode elif mode == "pypi": - tool_upgrade = subprocess.run(["uv", "tool", "upgrade", "browser-harness"]) + try: + tool_upgrade = subprocess.run(["uv", "tool", "upgrade", "browser-harness"]) + except FileNotFoundError: + print("uv is required to update PyPI installs: https://docs.astral.sh/uv/getting-started/installation/", file=sys.stderr) + telemetry.capture("browser_harness.update", {"install_mode": mode, "result": "uv-missing"}) + return 1 if tool_upgrade.returncode != 0: - # Fall back to pip in case this wasn't a `uv tool install`. - pip = subprocess.run([sys.executable, "-m", "pip", "install", "--upgrade", "browser-harness"]) - if pip.returncode != 0: - return pip.returncode + telemetry.capture("browser_harness.update", {"install_mode": mode, "result": "uv-upgrade-failed"}) + return tool_upgrade.returncode else: print("unknown install mode; can't auto-update.", file=sys.stderr) + telemetry.capture("browser_harness.update", {"install_mode": mode, "result": "unknown-install-mode"}) return 1 # Invalidate banner/tag cache so the new version doesn't keep nagging. @@ -856,5 +1071,12 @@ def run_update(yes=False): print("daemon stopped; it will auto-restart on next `browser-harness` call.") else: print("daemon left running on old code. run `browser-harness` and it'll use the new code after the daemon recycles.") + try: + from . import manager_client + if manager_client.stop_manager_if_running(): + print("browser manager stopped; it will auto-restart on next manager call.") + except Exception: + pass print("update complete.") + telemetry.capture("browser_harness.update", {"install_mode": mode, "result": "updated"}) return 0 diff --git a/src/browser_harness/auth.py b/src/browser_harness/auth.py new file mode 100644 index 00000000..4153ebd5 --- /dev/null +++ b/src/browser_harness/auth.py @@ -0,0 +1,543 @@ +"""Browser Use Cloud auth for browser-harness. + +The model-facing contract stays small: cloud browser startup either has a key +or tells the agent to run `browser-harness auth login`. OAuth details live here. +""" +from __future__ import annotations + +from dataclasses import dataclass, field +from http.server import BaseHTTPRequestHandler, HTTPServer +import argparse +import base64 +import getpass +import hashlib +import json +import os +from pathlib import Path +import secrets +import stat +import sys +import time +import urllib.error +import urllib.parse +import urllib.request +import webbrowser + +from . import paths + + +AUTH_BASE = "https://api.browser-use.com" +# Browser Use currently exposes this registered CLI OAuth client. Keep an env +# escape hatch so a dedicated browser-harness client can be used once issued. +DEFAULT_CLIENT_ID = "browser-use-terminal" +CALLBACK_PATH = "/browser-use-cloud/callback" +AUTH_TIMEOUT_SECONDS = 600 + + +class CloudAuthRequired(RuntimeError): + def __init__(self): + super().__init__("cloud-auth-required: run `browser-harness auth login`") + + +class AuthError(RuntimeError): + pass + + +@dataclass +class PendingCallback: + state: str + code: str | None = None + error: str | None = None + error_description: str | None = None + complete: bool = False + + +@dataclass +class BrowserAuthStart: + server: HTTPServer + callback: PendingCallback + redirect_uri: str + verifier: str + auth_url: str + expires_in: int | None + opened: bool = False + + +@dataclass +class DeviceAuthStart: + device_code: str + user_code: str + verification_uri: str + verification_uri_complete: str | None = None + expires_in: int | None = None + interval: int = 5 + opened: bool = False + + @property + def open_uri(self) -> str: + return self.verification_uri_complete or self.verification_uri + + +@dataclass +class AuthRecord: + api_key: str + api_key_id: str | None = None + project_id: str | None = None + expires_at: str | None = None + scopes: list[str] = field(default_factory=list) + source: str = "oauth" + + @classmethod + def from_token_response(cls, data: dict, *, source: str = "oauth") -> "AuthRecord": + api_key = data.get("api_key") + if not api_key: + raise AuthError("auth token response did not include an api_key") + scopes = data.get("scopes") or [] + if not isinstance(scopes, list): + scopes = [] + return cls( + api_key=api_key, + api_key_id=data.get("api_key_id"), + project_id=data.get("project_id"), + expires_at=data.get("expires_at"), + scopes=[str(s) for s in scopes], + source=source, + ) + + def to_storage(self) -> dict: + return { + "api_key": self.api_key, + "api_key_id": self.api_key_id, + "project_id": self.project_id, + "expires_at": self.expires_at, + "scopes": self.scopes, + "source": self.source, + } + + +def auth_base() -> str: + return (os.environ.get("BROWSER_USE_CLOUD_API_URL") or AUTH_BASE).rstrip("/") + + +def client_id() -> str: + return os.environ.get("BROWSER_HARNESS_OAUTH_CLIENT_ID") or DEFAULT_CLIENT_ID + + +def auth_path() -> Path: + override = os.environ.get("BH_AUTH_PATH") + if override: + return Path(override).expanduser() + return paths.config_dir() / "auth.json" + + +def load_auth_file(path: Path | None = None) -> dict: + path = path or auth_path() + try: + return json.loads(path.read_text()) + except FileNotFoundError: + return {} + except json.JSONDecodeError as e: + raise AuthError(f"auth file is not valid JSON: {path}") from e + + +def save_auth_record(record: AuthRecord, path: Path | None = None) -> None: + path = path or auth_path() + path.parent.mkdir(parents=True, exist_ok=True) + _chmod_private(path.parent, directory=True) + existing = load_auth_file(path) + existing["browser_use"] = record.to_storage() + tmp = path.with_name(path.name + ".tmp") + _write_private_json(tmp, existing) + os.replace(tmp, path) + _chmod_private(path) + + +def clear_auth(path: Path | None = None) -> bool: + path = path or auth_path() + data = load_auth_file(path) + existed = bool(data.get("browser_use")) + data.pop("browser_use", None) + if data: + tmp = path.with_name(path.name + ".tmp") + _write_private_json(tmp, data) + os.replace(tmp, path) + _chmod_private(path) + else: + try: + path.unlink() + except FileNotFoundError: + pass + return existed + + +def stored_auth_record(path: Path | None = None) -> dict | None: + data = load_auth_file(path) + value = data.get("browser_use") + return value if isinstance(value, dict) else None + + +def get_browser_use_api_key() -> str: + env_key = os.environ.get("BROWSER_USE_API_KEY") + if env_key: + return env_key + stored = stored_auth_record() + key = stored.get("api_key") if stored else None + if key: + return str(key) + raise CloudAuthRequired() + + +def auth_status() -> dict: + if os.environ.get("BROWSER_USE_API_KEY"): + return {"status": "authenticated", "source": "env", "path": str(auth_path())} + stored = stored_auth_record() + if not stored or not stored.get("api_key"): + return {"status": "missing", "source": None, "path": str(auth_path())} + return {"status": "authenticated", "source": "stored", "path": str(auth_path())} + + +def pkce_pair() -> tuple[str, str]: + verifier = secrets.token_urlsafe(48) + digest = hashlib.sha256(verifier.encode()).digest() + challenge = base64.urlsafe_b64encode(digest).decode().rstrip("=") + return verifier, challenge + + +def start_browser_auth(*, open_url=True, timeout=AUTH_TIMEOUT_SECONDS) -> BrowserAuthStart: + verifier, challenge = pkce_pair() + state = secrets.token_urlsafe(32) + callback = PendingCallback(state=state) + server = _callback_server(callback) + host, port = server.server_address + redirect_uri = f"http://{host}:{port}{CALLBACK_PATH}" + req = { + "client_id": client_id(), + "response_type": "code", + "redirect_uri": redirect_uri, + "code_challenge": challenge, + "code_challenge_method": "S256", + "state": state, + "device_name": os.environ.get("BH_DEVICE_NAME") or "browser-harness", + } + try: + data = _post_json(f"{auth_base()}/cloud/cli-auth/browser", req) + except BaseException: + server.server_close() + raise + auth_url = data.get("authorization_uri") or data.get("auth_url") + if not auth_url: + server.server_close() + raise AuthError("auth start response did not include authorization_uri") + expires_in = _int_or_none(data.get("expires_in")) + opened = False + if open_url: + try: + opened = bool(webbrowser.open(auth_url)) + except Exception: + opened = False + return BrowserAuthStart( + server=server, + callback=callback, + redirect_uri=redirect_uri, + verifier=verifier, + auth_url=auth_url, + expires_in=expires_in, + opened=opened, + ) + + +def complete_browser_auth(start: BrowserAuthStart, *, timeout=AUTH_TIMEOUT_SECONDS) -> AuthRecord: + deadline = time.time() + timeout + start.server.timeout = 0.5 + try: + while not start.callback.complete and time.time() < deadline: + start.server.handle_request() + finally: + start.server.server_close() + if not start.callback.complete: + raise AuthError("timed out waiting for browser auth callback") + if start.callback.error: + detail = f": {start.callback.error_description}" if start.callback.error_description else "" + raise AuthError(f"auth failed: {start.callback.error}{detail}") + if not start.callback.code: + raise AuthError("auth callback did not include a code") + token = _exchange_authorization_code(start.callback.code, start.redirect_uri, start.verifier) + record = AuthRecord.from_token_response(token) + save_auth_record(record) + return record + + +def browser_login(*, open_url=True, json_output=False, timeout=AUTH_TIMEOUT_SECONDS) -> AuthRecord: + start = start_browser_auth(open_url=open_url, timeout=timeout) + if json_output: + print(json.dumps({ + "status": "needs_user_auth", + "auth_url": start.auth_url, + "callback": start.redirect_uri, + "expires_in": start.expires_in, + "opened": start.opened, + }), flush=True) + else: + print("Open this URL to sign in to Browser Use Cloud:") + print(start.auth_url, flush=True) + if start.opened: + print("Waiting for login to complete...", flush=True) + else: + print("Waiting for login to complete after you open the URL...", flush=True) + record = complete_browser_auth(start, timeout=timeout) + if json_output: + print(json.dumps(_stored_success_output()), flush=True) + else: + print("Browser Use Cloud auth stored.") + return record + + +def start_device_auth(*, open_url=True) -> DeviceAuthStart: + data = _post_json( + f"{auth_base()}/cloud/cli-auth/device", + {"client_id": client_id(), "device_name": os.environ.get("BH_DEVICE_NAME") or "browser-harness"}, + ) + device_code = data.get("device_code") + user_code = data.get("user_code") + verification_uri = data.get("verification_uri") or data.get("verification_url") + if not device_code or not user_code or not verification_uri: + raise AuthError("device auth response missing device_code, user_code, or verification_uri") + opened = False + open_uri = data.get("verification_uri_complete") or verification_uri + if open_url: + try: + opened = bool(webbrowser.open(open_uri)) + except Exception: + opened = False + return DeviceAuthStart( + device_code=device_code, + user_code=user_code, + verification_uri=verification_uri, + verification_uri_complete=data.get("verification_uri_complete"), + expires_in=_int_or_none(data.get("expires_in")), + interval=max(1, _int_or_none(data.get("interval")) or 5), + opened=opened, + ) + + +def complete_device_auth(start: DeviceAuthStart, *, timeout: int | None = None) -> AuthRecord: + deadline = time.time() + (timeout or start.expires_in or AUTH_TIMEOUT_SECONDS) + interval = start.interval + while time.time() < deadline: + try: + token = _post_json(f"{auth_base()}/cloud/cli-auth/token", { + "grant_type": "urn:ietf:params:oauth:grant-type:device_code", + "device_code": start.device_code, + "client_id": client_id(), + }) + record = AuthRecord.from_token_response(token) + save_auth_record(record) + return record + except AuthError as e: + err = _auth_error_code(str(e)) + if err == "authorization_pending": + time.sleep(interval) + continue + if err == "slow_down": + interval += 5 + time.sleep(interval) + continue + raise + raise AuthError("timed out waiting for device auth") + + +def device_login(*, open_url=True, json_output=False) -> AuthRecord: + start = start_device_auth(open_url=open_url) + if json_output: + print(json.dumps({ + "status": "needs_user_auth", + "verification_uri": start.verification_uri, + "verification_uri_complete": start.verification_uri_complete, + "user_code": start.user_code, + "expires_in": start.expires_in, + "opened": start.opened, + }), flush=True) + else: + print("Open this URL to sign in to Browser Use Cloud:") + print(start.open_uri, flush=True) + print(f"Code: {start.user_code}", flush=True) + print("Waiting for login to complete...", flush=True) + record = complete_device_auth(start) + if json_output: + print(json.dumps(_stored_success_output()), flush=True) + else: + print("Browser Use Cloud auth stored.") + return record + + +def api_key_stdin_login(*, json_output=False, input_stream=None) -> AuthRecord: + key = _read_manual_api_key(input_stream) + record = AuthRecord(api_key=key, source="manual") + save_auth_record(record) + if json_output: + print(json.dumps(_stored_success_output()), flush=True) + else: + print("Browser Use Cloud API key stored.") + return record + + +def _exchange_authorization_code(code: str, redirect_uri: str, verifier: str) -> dict: + return _post_json(f"{auth_base()}/cloud/cli-auth/token", { + "grant_type": "authorization_code", + "code": code, + "redirect_uri": redirect_uri, + "code_verifier": verifier, + "client_id": client_id(), + }) + + +def _callback_server(callback: PendingCallback) -> HTTPServer: + class Handler(BaseHTTPRequestHandler): + def do_GET(self): # noqa: N802 - stdlib handler API + parsed = urllib.parse.urlparse(self.path) + if parsed.path != CALLBACK_PATH: + self.send_error(404) + return + qs = urllib.parse.parse_qs(parsed.query) + state = _one(qs, "state") + if state != callback.state: + callback.error = "invalid_state" + callback.error_description = "OAuth callback state did not match" + else: + callback.code = _one(qs, "code") + callback.error = _one(qs, "error") + callback.error_description = _one(qs, "error_description") + callback.complete = True + body = b"

Browser Use Cloud login complete

You can close this tab.

" + self.send_response(200) + self.send_header("Content-Type", "text/html; charset=utf-8") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def log_message(self, fmt, *args): + return + + return HTTPServer(("127.0.0.1", 0), Handler) + + +def _post_json(url: str, payload: dict) -> dict: + req = urllib.request.Request( + url, + method="POST", + data=json.dumps(payload).encode(), + headers={"Content-Type": "application/json"}, + ) + try: + with urllib.request.urlopen(req, timeout=60) as resp: + return json.loads(resp.read() or b"{}") + except urllib.error.HTTPError as e: + body = e.read() or b"" + try: + data = json.loads(body or b"{}") + except json.JSONDecodeError: + data = {} + err = data.get("error") or data.get("state") or f"http_{e.code}" + desc = data.get("error_description") or data.get("reason") or data.get("message") + detail = f": {desc}" if desc else "" + raise AuthError(f"{err}{detail}") from e + except urllib.error.URLError as e: + raise AuthError(f"network error: {e.reason}") from e + + +def _read_manual_api_key(input_stream=None) -> str: + stream = input_stream or sys.stdin + if hasattr(stream, "isatty") and stream.isatty(): + try: + key = getpass.getpass("Browser Use API key: ") + except EOFError as e: + raise AuthError("no API key provided") from e + else: + key = stream.read() + key = (key or "").strip() + if not key: + raise AuthError("no API key provided") + if len(key) < 20: + raise AuthError("API key looks too short") + return key + + +def _write_private_json(path: Path, data: dict) -> None: + raw = (json.dumps(data, indent=2) + "\n").encode() + flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC + fd = os.open(path, flags, stat.S_IRUSR | stat.S_IWUSR) + try: + with os.fdopen(fd, "wb") as f: + f.write(raw) + except BaseException: + try: + os.close(fd) + except OSError: + pass + raise + + +def _chmod_private(path: Path, *, directory=False) -> None: + mode = stat.S_IRWXU if directory else stat.S_IRUSR | stat.S_IWUSR + try: + os.chmod(path, mode) + except OSError: + pass + + +def _one(qs: dict[str, list[str]], key: str) -> str | None: + values = qs.get(key) + return values[0] if values else None + + +def _int_or_none(value) -> int | None: + try: + return int(value) + except (TypeError, ValueError): + return None + + +def _auth_error_code(message: str) -> str: + return message.split(":", 1)[0] + + +def _stored_success_output() -> dict: + return {"status": "stored", "path": str(auth_path())} + + +def run_auth_cli(argv: list[str]) -> int: + parser = argparse.ArgumentParser(prog="browser-harness auth") + sub = parser.add_subparsers(dest="command", required=True) + login = sub.add_parser("login") + login_mode = login.add_mutually_exclusive_group() + login_mode.add_argument("--device-code", action="store_true") + login_mode.add_argument("--api-key-stdin", action="store_true") + login.add_argument("--json", action="store_true") + login.add_argument("--no-open", action="store_true") + sub.add_parser("status") + sub.add_parser("logout") + args = parser.parse_args(argv) + + try: + if args.command == "login": + if args.api_key_stdin: + api_key_stdin_login(json_output=args.json) + elif args.device_code: + device_login(open_url=not args.no_open, json_output=args.json) + else: + browser_login(open_url=not args.no_open, json_output=args.json) + return 0 + if args.command == "status": + print(json.dumps(auth_status(), indent=2)) + return 0 + if args.command == "logout": + removed = clear_auth() + print(json.dumps({"status": "logged-out" if removed else "missing", "path": str(auth_path())}, indent=2)) + return 0 + except (AuthError, CloudAuthRequired) as e: + if getattr(args, "json", False): + print(json.dumps({"status": "error", "reason": str(e)}), file=sys.stderr) + else: + print(str(e), file=sys.stderr) + return 1 + return 2 diff --git a/src/browser_harness/context.py b/src/browser_harness/context.py new file mode 100644 index 00000000..79813ce9 --- /dev/null +++ b/src/browser_harness/context.py @@ -0,0 +1,167 @@ +"""Runtime browser binding state for manager mode. + +Legacy browser-harness is environment-driven: BU_NAME, BH_RUNTIME_DIR, and +BH_TMP_DIR are read when modules import. Manager mode needs the active browser +binding to change inside one Python process, so helpers resolve this context at +call time. +""" +from __future__ import annotations + +from dataclasses import dataclass +import hashlib +import os +import tempfile +from pathlib import Path + + +@dataclass(frozen=True) +class AgentIdentity: + run_id: str | None + agent_id: str | None + + @property + def degraded(self) -> bool: + return not (self.run_id and self.agent_id) + + def payload(self) -> dict: + return { + "run_id": self.run_id, + "agent_id": self.agent_id, + "identity_degraded": self.degraded, + } + + +@dataclass(frozen=True) +class BrowserBinding: + browser_id: str | None + bu_name: str + runtime_dir: Path | None = None + tmp_dir: Path | None = None + download_dir: Path | None = None + artifact_dir: Path | None = None + cdp_url: str | None = None + cdp_ws: str | None = None + manager_mode: bool = False + + @classmethod + def from_manager(cls, data: dict) -> "BrowserBinding": + return cls( + browser_id=data.get("browser_id"), + bu_name=data["bu_name"], + runtime_dir=_path_or_none(data.get("runtime_dir")), + tmp_dir=_path_or_none(data.get("tmp_dir")), + download_dir=_path_or_none(data.get("download_dir")), + artifact_dir=_path_or_none(data.get("artifact_dir")), + cdp_url=data.get("cdp_url"), + cdp_ws=data.get("cdp_ws"), + manager_mode=True, + ) + + def daemon_env(self) -> dict[str, str]: + env = {"BU_NAME": self.bu_name} + if self.runtime_dir is not None: + env["BH_RUNTIME_DIR"] = str(self.runtime_dir) + if self.tmp_dir is not None: + env["BH_TMP_DIR"] = str(self.tmp_dir) + if self.cdp_url: + env["BU_CDP_URL"] = self.cdp_url + if self.cdp_ws: + env["BU_CDP_WS"] = self.cdp_ws + return env + + +def _path_or_none(value) -> Path | None: + return Path(value) if value else None + + +def manager_enabled() -> bool: + return os.environ.get("BH_MANAGER_MODE") == "1" or bool(os.environ.get("BH_MANAGER_SOCKET")) + + +def agent_identity() -> AgentIdentity: + run_id = ( + os.environ.get("BH_RUN_ID") + or os.environ.get("CODEX_THREAD_ID") + or os.environ.get("CODEX_SESSION_ID") + or _cwd_run_id() + ) + agent_id = ( + os.environ.get("BH_AGENT_ID") + or os.environ.get("CODEX_AGENT_ID") + or os.environ.get("CODEX_SUBAGENT_ID") + or "main" + ) + return AgentIdentity(run_id=run_id, agent_id=agent_id) + + +def _cwd_run_id() -> str: + raw = f"{os.environ.get('USER') or ''}:{Path.cwd()}" + return "cwd-" + hashlib.sha256(raw.encode()).hexdigest()[:16] + + +def default_binding_from_env() -> BrowserBinding: + tmp_dir = _path_or_none(os.environ.get("BH_TMP_DIR")) + runtime_dir = _path_or_none(os.environ.get("BH_RUNTIME_DIR")) or tmp_dir + return BrowserBinding( + browser_id=os.environ.get("BH_BROWSER_ID"), + bu_name=os.environ.get("BU_NAME", "default"), + runtime_dir=runtime_dir, + tmp_dir=tmp_dir, + cdp_url=os.environ.get("BU_CDP_URL") or None, + cdp_ws=os.environ.get("BU_CDP_WS") or None, + manager_mode=manager_enabled(), + ) + + +_active_binding: BrowserBinding | None = default_binding_from_env() + + +def get_active_binding() -> BrowserBinding | None: + return _active_binding + + +def activate_binding(binding: BrowserBinding) -> None: + global _active_binding + _active_binding = binding + for p in (binding.runtime_dir, binding.tmp_dir, binding.download_dir, binding.artifact_dir): + if p is not None: + p.mkdir(parents=True, exist_ok=True) + + +def clear_active_binding() -> None: + global _active_binding + _active_binding = None + + +def require_active_binding() -> BrowserBinding: + binding = get_active_binding() + if binding is None: + raise RuntimeError('no-browser-selected: call browser("") before page helpers') + return binding + + +def active_bu_name() -> str: + return require_active_binding().bu_name + + +def active_runtime_dir() -> Path | None: + binding = get_active_binding() + return binding.runtime_dir if binding else None + + +def active_tmp_dir() -> Path | None: + binding = get_active_binding() + return binding.tmp_dir if binding else None + + +def active_artifact_dir() -> Path: + binding = require_active_binding() + if binding.artifact_dir is not None: + binding.artifact_dir.mkdir(parents=True, exist_ok=True) + return binding.artifact_dir + if binding.tmp_dir is not None: + binding.tmp_dir.mkdir(parents=True, exist_ok=True) + return binding.tmp_dir + p = Path(tempfile.gettempdir()) + p.mkdir(parents=True, exist_ok=True) + return p diff --git a/src/browser_harness/daemon.py b/src/browser_harness/daemon.py index 0f0f2555..3b1c71fd 100644 --- a/src/browser_harness/daemon.py +++ b/src/browser_harness/daemon.py @@ -5,12 +5,14 @@ from pathlib import Path from . import _ipc as ipc +from . import local_profiles +from . import paths from cdp_use.client import CDPClient def _load_env(): repo_root = Path(__file__).resolve().parents[2] - workspace = Path(os.environ.get("BH_AGENT_WORKSPACE", repo_root / "agent-workspace")).expanduser() + workspace = paths.workspace_dir() for p in (repo_root / ".env", workspace / ".env"): if not p.exists(): continue @@ -101,7 +103,11 @@ def _ws_from_devtools_active_port(http_url: str) -> str | None: return None -def get_ws_url(): +def _explicit_cdp_configured(): + return bool(os.environ.get("BU_CDP_WS") or os.environ.get("BU_CDP_URL")) + + +def get_ws_url(selected_profile: local_profiles.LocalBrowserProfile | None = None): if url := os.environ.get("BU_CDP_WS"): return url if url := os.environ.get("BU_CDP_URL"): @@ -123,21 +129,22 @@ def get_ws_url(): last_err = e time.sleep(1) raise RuntimeError(f"BU_CDP_URL={url} unreachable after 30s: {last_err} -- is the dedicated automation Chrome running?") - for base in PROFILES: - try: - active = (base / "DevToolsActivePort").read_text().splitlines() - except (FileNotFoundError, NotADirectoryError): - continue - port = active[0].strip() if active else "" - ws_path = active[1].strip() if len(active) > 1 else "" - if not port: - continue - # Resolve the live WS URL via /json/version instead of trusting the path stored - # alongside the port in DevToolsActivePort: if Chrome was previously launched - # with a different --user-data-dir on the same port, that file is left behind - # with a stale browser UUID and the WS upgrade returns 404. - deadline = time.time() + 30 - while time.time() < deadline: + bases = [selected_profile.user_data_dir] if selected_profile else PROFILES + deadline = time.time() + 30 + while time.time() < deadline: + for base in bases: + try: + active = (base / "DevToolsActivePort").read_text().splitlines() + except (FileNotFoundError, NotADirectoryError): + continue + port = active[0].strip() if active else "" + ws_path = active[1].strip() if len(active) > 1 else "" + if not port: + continue + # Resolve the live WS URL via /json/version instead of trusting the path stored + # alongside the port in DevToolsActivePort: if Chrome was previously launched + # with a different --user-data-dir on the same port, that file is left behind + # with a stale browser UUID and the WS upgrade returns 404. try: return json.loads(urllib.request.urlopen(f"http://127.0.0.1:{port}/json/version", timeout=1).read())["webSocketDebuggerUrl"] except urllib.error.HTTPError as e: @@ -145,16 +152,27 @@ def get_ws_url(): # the ws path Chrome wrote to DevToolsActivePort still works. if e.code == 404 and ws_path: return f"ws://127.0.0.1:{port}{ws_path}" - time.sleep(1) + if e.code == 403: + raise RuntimeError("permission-blocked: Chrome is reachable, but the per-session Allow remote debugging popup has not been accepted") except (OSError, KeyError, ValueError): - time.sleep(1) + pass + time.sleep(0.2) + if selected_profile: + disabled = local_profiles.local_debugging_disabled_statuses() + if disabled: + raise RuntimeError("cdp-disabled: Chrome is open, but remote debugging is turned off. Open chrome://inspect/#remote-debugging in the selected profile and wait for user confirmation.") + running = local_profiles.browser_process_running(selected_profile.browser_name, selected_profile.browser_path) + state = "stale-port" if running else "browser-not-running" raise RuntimeError( - f"Chrome's remote-debugging page is open, but DevTools is not live yet on 127.0.0.1:{port} — if Chrome opened a profile picker, choose your normal profile first, then tick the checkbox and click Allow if shown" + f"{state}: selected profile {selected_profile.id} is not exposing a reachable local CDP endpoint; open/focus the selected profile, run local setup if needed, then retry" ) for probe_port in (9222, 9223): try: with urllib.request.urlopen(f"http://127.0.0.1:{probe_port}/json/version", timeout=1) as r: return json.loads(r.read())["webSocketDebuggerUrl"] + except urllib.error.HTTPError as e: + if e.code == 403: + raise RuntimeError("permission-blocked: Chrome is reachable, but the per-session Allow remote debugging popup has not been accepted") except (OSError, KeyError, ValueError): continue raise RuntimeError(f"DevToolsActivePort not found in {[str(p) for p in PROFILES]} — enable chrome://inspect/#remote-debugging, or set BU_CDP_WS for a remote browser") @@ -176,7 +194,7 @@ def stop_remote(): def is_real_page(t): - return t["type"] == "page" and not t.get("url", "").startswith(INTERNAL) + return local_profiles.is_real_page_target(t) class Daemon: @@ -184,26 +202,195 @@ def __init__(self): self.cdp = None self.session = None self.target_id = None + self.selected_local_profile = None + self.preferred_target_marker = None + self.preferred_profile_id = None + self.active_local_profile_id = None + self.preferred_browser_context_id = None + self.owned_target_ids = set() self.events = deque(maxlen=BUF) self.dialog = None self.stop = None # asyncio.Event, set inside start() + def _prepare_selected_local_profile(self): + if _explicit_cdp_configured() or REMOTE_ID: + return None + profile_id = local_profiles.get_default_profile_id() + if not profile_id: + profiles = local_profiles.list_browser_profiles_payload() + raise RuntimeError( + "needs-profile: No default local Chrome profile is set. " + "Run browser_profiles(), ask the user which profile id to use, then run browser_use_profile(id). " + f"profiles={json.dumps(profiles, default=str)}" + ) + profile = local_profiles.resolve_local_profile(profile_id) + if local_profiles.remote_debugging_user_enabled(profile.user_data_dir) is False: + raise RuntimeError( + "cdp-disabled: Chrome remote debugging is turned off for the selected profile. " + "Open chrome://inspect/#remote-debugging in that profile, tick the checkbox, wait for user confirmation, then retry." + ) + opened = local_profiles.open_local_profile(profile.id, allow_marker=True) + self.selected_local_profile = profile + self.preferred_profile_id = profile.id + self.preferred_target_marker = opened.get("target_marker") + log(f"selected local profile {profile.id}; targeting={opened.get('profile_targeting')}") + return profile + + async def _targets(self): + return (await self.cdp.send_raw("Target.getTargets"))["targetInfos"] + + async def _target_info(self, target_id): + return (await self.cdp.send_raw("Target.getTargetInfo", {"targetId": target_id}))["targetInfo"] + + async def _ensure_target_browser_context(self, target_id): + if not self.preferred_browser_context_id: + return + target = next((t for t in await self._targets() if t.get("targetId") == target_id), None) + if target is None: + raise RuntimeError("target-gone: target no longer exists") + actual = target.get("browserContextId") + if actual and actual != self.preferred_browser_context_id: + raise RuntimeError("wrong-profile: refusing to switch to a target from a different Chrome profile context") + + async def _reattach_current_target(self): + if not self.target_id: + return False + targets = await self._targets() + if not any(t.get("targetId") == self.target_id for t in targets): + raise RuntimeError("target-gone: Previous browser tab target is gone.") + await self._ensure_target_browser_context(self.target_id) + self.session = (await self.cdp.send_raw( + "Target.attachToTarget", {"targetId": self.target_id, "flatten": True} + ))["sessionId"] + await self._enable_default_domains(self.session) + return True + + async def _close_profile_marker_targets(self, browser_context_id=None, keep_target_id=None): + try: + targets = await self._targets() + except Exception: + return + for target in targets: + if not local_profiles.is_profile_marker_target(target): + continue + if browser_context_id and target.get("browserContextId") != browser_context_id: + continue + target_id = target.get("targetId") + if not target_id or target_id == keep_target_id: + continue + await _silent(self.cdp.send_raw("Target.closeTarget", {"targetId": target_id})) + + async def _close_remote_debugging_setup_targets(self): + try: + targets = await self._targets() + except Exception: + return + for target in targets: + if not local_profiles.is_remote_debugging_setup_target(target): + continue + target_id = target.get("targetId") + if target_id and target_id != self.target_id: + await _silent(self.cdp.send_raw("Target.closeTarget", {"targetId": target_id})) + + def _select_work_target(self, targets, browser_context_id=None, exclude_target_ids=None): + exclude_target_ids = set(exclude_target_ids or ()) + + def in_scope(target): + if target.get("targetId") in exclude_target_ids: + return False + if browser_context_id and target.get("browserContextId") != browser_context_id: + return False + return True + + scoped = [t for t in targets if in_scope(t)] + return ( + next((t for t in scoped if local_profiles.is_real_page_target(t)), None) + or next((t for t in scoped if local_profiles.is_reusable_placeholder_target(t)), None) + ) + async def attach_first_page(self): """Attach to a real page (or any page). Sets self.session. Returns attached target or None.""" - targets = (await self.cdp.send_raw("Target.getTargets"))["targetInfos"] - pages = [t for t in targets if is_real_page(t)] - if not pages: - # No real pages — create one instead of attaching to omnibox popup - tid = (await self.cdp.send_raw("Target.createTarget", {"url": "about:blank"}))["targetId"] + attached_profile_marker = False + attached_launched_profile = False + attached_browser_context_id = None + attached_profile_id = None + page = None + if self.preferred_target_marker: + deadline = time.time() + 8 + while time.time() < deadline: + page = next( + (t for t in await self._targets() if local_profiles.target_url_contains_marker(t, self.preferred_target_marker)), + None, + ) + if page: + break + await asyncio.sleep(0.15) + if not page: + raise RuntimeError("profile-target-missing: selected Chrome profile target did not appear; refusing to attach to an arbitrary existing profile") + attached_profile_marker = True + attached_profile_id = self.preferred_profile_id + attached_browser_context_id = page.get("browserContextId") + self.preferred_target_marker = None + self.preferred_profile_id = None + targets = await self._targets() + page = self._select_work_target( + targets, + attached_browser_context_id, + exclude_target_ids={page.get("targetId")}, + ) + else: + targets = await self._targets() + launched_profile_id = self.preferred_profile_id + if launched_profile_id: + page = self._select_work_target(targets) + attached_profile_id = launched_profile_id + attached_browser_context_id = page.get("browserContextId") if page else None + attached_launched_profile = True + self.preferred_profile_id = None + else: + page = self._select_work_target(targets) + if not page: + # No real pages - create one instead of attaching to omnibox popup. + params = {"url": "about:blank"} + target_context_id = attached_browser_context_id or self.preferred_browser_context_id + if target_context_id: + params["browserContextId"] = target_context_id + tid = (await self.cdp.send_raw("Target.createTarget", params))["targetId"] + self.owned_target_ids.add(tid) log(f"no real pages found, created about:blank ({tid})") - pages = [{"targetId": tid, "url": "about:blank", "type": "page"}] + page = {"targetId": tid, "url": "about:blank", "type": "page"} + if target_context_id: + page["browserContextId"] = target_context_id + if attached_profile_id and not attached_browser_context_id: + try: + info = await self._target_info(tid) + attached_browser_context_id = info.get("browserContextId") + except Exception: + pass self.session = (await self.cdp.send_raw( - "Target.attachToTarget", {"targetId": pages[0]["targetId"], "flatten": True} + "Target.attachToTarget", {"targetId": page["targetId"], "flatten": True} ))["sessionId"] - self.target_id = pages[0]["targetId"] - log(f"attached {pages[0]['targetId']} ({pages[0].get('url','')[:80]}) session={self.session}") + self.target_id = page["targetId"] + if attached_profile_marker or attached_launched_profile: + self.active_local_profile_id = attached_profile_id + self.preferred_browser_context_id = attached_browser_context_id + elif not self.selected_local_profile: + self.active_local_profile_id = None + self.preferred_browser_context_id = None + log(f"attached {page['targetId']} ({page.get('url','')[:80]}) session={self.session}") await self._enable_default_domains(self.session) - return pages[0] + if attached_profile_marker: + await self._close_profile_marker_targets(attached_browser_context_id) + await self._close_remote_debugging_setup_targets() + return page + + async def close_owned_targets(self): + if not self.cdp: + return + target_ids = list(self.owned_target_ids) + self.owned_target_ids.clear() + for target_id in target_ids: + await _silent(self.cdp.send_raw("Target.closeTarget", {"targetId": target_id})) async def _enable_default_domains(self, session_id): """Enable Page/DOM/Runtime/Network on a CDP session. @@ -231,7 +418,8 @@ async def enable_one(d): async def start(self): self.stop = asyncio.Event() - url = get_ws_url() + selected_profile = self._prepare_selected_local_profile() + url = get_ws_url(selected_profile) log(f"connecting to {url}") self.cdp = CDPClient(url) try: @@ -241,7 +429,7 @@ async def start(self): raise RuntimeError( f"CDP WS handshake failed: {e} -- remote browser WebSocket connection failed. " "This can happen when network policy blocks the connection, the WS URL is wrong or expired, or the remote endpoint is down. " - "If you use Browser Use cloud, verify BROWSER_USE_API_KEY and get a fresh URL via start_remote_daemon()." + "If you use Browser Use cloud, verify auth and start a fresh cloud browser." ) raise RuntimeError(f"CDP WS handshake failed: {e} -- click Allow in Chrome if prompted, then retry") await self.attach_first_page() @@ -283,15 +471,21 @@ async def handle(self, req): if not self.target_id: return {"error": "not_attached"} try: - info = (await self.cdp.send_raw("Target.getTargetInfo", {"targetId": self.target_id}))["targetInfo"] + info = await self._target_info(self.target_id) except Exception: - return {"error": "cdp_disconnected"} - return {"targetId": info.get("targetId"), "url": info.get("url", ""), "title": info.get("title", "")} + return {"error": "target-gone"} + return { + "targetId": info.get("targetId"), + "url": info.get("url", ""), + "title": info.get("title", ""), + "browserContextId": info.get("browserContextId"), + "local_profile_id": self.active_local_profile_id, + } if meta == "connection_status": if not self.target_id: return {"error": "not_attached"} try: - info = (await self.cdp.send_raw("Target.getTargetInfo", {"targetId": self.target_id}))["targetInfo"] + info = await self._target_info(self.target_id) except Exception: return {"error": "cdp_disconnected"} page = None @@ -300,12 +494,25 @@ async def handle(self, req): "targetId": info.get("targetId"), "title": info.get("title") or "(untitled)", "url": info.get("url") or "", + "browserContextId": info.get("browserContextId"), } - return {"target_id": self.target_id, "session_id": self.session, "page": page} + return { + "target_id": self.target_id, + "session_id": self.session, + "local_profile_id": self.active_local_profile_id, + "profile_context_id": self.preferred_browser_context_id, + "page": page, + } if meta == "set_session": + target_id = req.get("target_id") or self.target_id + if target_id: + try: + await self._ensure_target_browser_context(target_id) + except Exception as e: + return {"error": str(e)} old_session = self.session self.session = req.get("session_id") - self.target_id = req.get("target_id") or self.target_id + self.target_id = target_id # Run the old-session Network.disable (defense in depth — keeps # background-tab traffic out of the global event buffer; the # consumer-side filter in wait_for_network_idle is the actual @@ -342,17 +549,36 @@ async def disable_old(): method = req["method"] params = req.get("params") or {} + if self.preferred_browser_context_id: + try: + if method == "Target.createTarget": + requested = params.get("browserContextId") + if requested and requested != self.preferred_browser_context_id: + return {"error": "wrong-profile: refusing to create a target in a different Chrome profile context"} + params = {**params, "browserContextId": self.preferred_browser_context_id} + elif method == "Target.attachToTarget" and params.get("targetId"): + await self._ensure_target_browser_context(params["targetId"]) + except Exception as e: + return {"error": str(e)} # Browser-level Target.* calls must not use a session (stale or otherwise). # For everything else, explicit session in req wins; else default. sid = None if method.startswith("Target.") else (req.get("session_id") or self.session) try: - return {"result": await self.cdp.send_raw(method, params, session_id=sid)} + result = await self.cdp.send_raw(method, params, session_id=sid) + if method == "Target.createTarget" and isinstance(result, dict): + target_id = result.get("targetId") + if target_id: + self.owned_target_ids.add(target_id) + return {"result": result} except Exception as e: msg = str(e) if "Session with given id not found" in msg and sid == self.session and sid: - log(f"stale session {sid}, re-attaching") - if await self.attach_first_page(): - return {"result": await self.cdp.send_raw(method, params, session_id=self.session)} + log(f"stale session {sid}, re-attaching same target") + try: + if await self._reattach_current_target(): + return {"result": await self.cdp.send_raw(method, params, session_id=self.session)} + except Exception as reattach_error: + return {"error": str(reattach_error)} return {"error": msg} @@ -382,6 +608,7 @@ async def handler(reader, writer): await asyncio.wait({serve_task, stop_task}, return_when=asyncio.FIRST_COMPLETED) if serve_task.done(): await serve_task # surfaces a serve crash finally: + await d.close_owned_targets() for t in (serve_task, stop_task): t.cancel() try: await t diff --git a/src/browser_harness/helpers.py b/src/browser_harness/helpers.py index 2014887b..75eb6b1d 100644 --- a/src/browser_harness/helpers.py +++ b/src/browser_harness/helpers.py @@ -8,11 +8,13 @@ from urllib.parse import urlparse from . import _ipc as ipc +from . import context +from . import paths CORE_DIR = Path(__file__).resolve().parent REPO_ROOT = CORE_DIR.parent.parent -AGENT_WORKSPACE = Path(os.environ.get("BH_AGENT_WORKSPACE", REPO_ROOT / "agent-workspace")).expanduser() +AGENT_WORKSPACE = paths.workspace_dir() def _load_env(): @@ -37,10 +39,12 @@ def _load_env_file(p): NAME = os.environ.get("BU_NAME", "default") SOCK = ipc.sock_addr(NAME) INTERNAL = ("chrome://", "chrome-untrusted://", "devtools://", "chrome-extension://", "about:") +PROFILE_MARKER = "browser-use-profile-target" def _send(req): - c, token = ipc.connect(NAME, timeout=5.0) + binding = context.require_active_binding() + c, token = ipc.connect(binding.bu_name, timeout=5.0, runtime_dir=binding.runtime_dir) try: r = ipc.request(c, token, req) finally: @@ -117,42 +121,13 @@ def _runtime_evaluate(expression, session_id=None, await_promise=False): return _runtime_value(r, expression) -def _has_return_statement(expression): - i = 0 - n = len(expression) - state = "code" - quote = "" - while i < n: - ch = expression[i] - nxt = expression[i + 1] if i + 1 < n else "" - if state == "code": - if ch in ("'", '"', "`"): - state = "string"; quote = ch; i += 1; continue - if ch == "/" and nxt == "/": - state = "line_comment"; i += 2; continue - if ch == "/" and nxt == "*": - state = "block_comment"; i += 2; continue - if expression.startswith("return", i): - before = expression[i - 1] if i > 0 else "" - after = expression[i + 6] if i + 6 < n else "" - if not (before == "_" or before.isalnum()) and not (after == "_" or after.isalnum()): - return True - i += 1; continue - if state == "line_comment": - if ch == "\n": - state = "code" - i += 1; continue - if state == "block_comment": - if ch == "*" and nxt == "/": - state = "code"; i += 2; continue - i += 1; continue - if state == "string": - if ch == "\\": - i += 2; continue - if ch == quote: - state = "code"; quote = "" - i += 1; continue - return False +def _wrap_js_function(expression): + return f"(function(){{{expression}}})()" + + +def _is_illegal_return_error(exc): + return "Illegal return statement" in str(exc) + # --- navigation / page --- @@ -256,8 +231,10 @@ def press_key(key, modifiers=0): so listeners checking e.keyCode / e.key all fire.""" vk, code, text = _KEYS.get(key, (ord(key[0]) if len(key) == 1 else 0, key, key if len(key) == 1 else "")) base = {"key": key, "code": code, "modifiers": modifiers, "windowsVirtualKeyCode": vk, "nativeVirtualKeyCode": vk} - cdp("Input.dispatchKeyEvent", type="keyDown", **base, **({"text": text} if text else {})) - if text and len(text) == 1: + shortcut_modifiers = modifiers & (1 | 2 | 4) # Alt/Ctrl/Meta turn single keys into shortcuts. + printable_char = len(key) == 1 and bool(text) and not shortcut_modifiers + cdp("Input.dispatchKeyEvent", type="keyDown", **base, **({} if printable_char or not text else {"text": text})) + if printable_char: cdp("Input.dispatchKeyEvent", type="char", text=text, **{k: v for k, v in base.items() if k != "text"}) cdp("Input.dispatchKeyEvent", type="keyUp", **base) @@ -269,7 +246,12 @@ def scroll(x, y, dy=-300, dx=0): def capture_screenshot(path=None, full=False, max_dim=None): """Save a PNG of the current viewport. Set max_dim=1800 on a 2× display to keep the file under the 2000px-per-side limit some image-aware LLMs enforce.""" - path = path or str(ipc._TMP / "shot.png") + if path is None: + binding = context.get_active_binding() + if binding and binding.manager_mode: + path = str(context.active_artifact_dir() / "shot.png") + else: + path = str(ipc._TMP / "shot.png") r = cdp("Page.captureScreenshot", format="png", captureBeyondViewport=full) open(path, "wb").write(base64.b64decode(r["data"])) if max_dim: @@ -282,18 +264,51 @@ def capture_screenshot(path=None, full=False, max_dim=None): # --- tabs --- -def list_tabs(include_chrome=True): +def _is_agent_startup_placeholder(title, url): + url = str(url or "") + return str(title or "").startswith("Starting agent ") and ( + url in ("", "about:blank") or url.startswith("about:blank#") + ) + + +def _current_target_browser_context_id(): + try: + return current_tab().get("browserContextId") + except Exception: + return None + + +def list_tabs(include_chrome=True, include_other_contexts=False): out = [] + current_context = None if include_other_contexts else _current_target_browser_context_id() for t in cdp("Target.getTargets")["targetInfos"]: if t["type"] != "page": continue + if current_context and t.get("browserContextId") != current_context: continue url = t.get("url", "") + if _is_agent_startup_placeholder(t.get("title", ""), url): continue + if not include_chrome and PROFILE_MARKER in url: continue if not include_chrome and url.startswith(INTERNAL): continue - out.append({"targetId": t["targetId"], "title": t.get("title", ""), "url": url}) + out.append({ + "targetId": t["targetId"], + "target_id": t["targetId"], + "title": t.get("title", ""), + "url": url, + "browserContextId": t.get("browserContextId"), + "browser_context_id": t.get("browserContextId"), + }) return out def current_tab(): r = _send({"meta": "current_tab"}) - return {"targetId": r["targetId"], "url": r["url"], "title": r["title"]} + return { + "targetId": r["targetId"], + "target_id": r["targetId"], + "url": r["url"], + "title": r["title"], + "browserContextId": r.get("browserContextId"), + "browser_context_id": r.get("browserContextId"), + "local_profile_id": r.get("local_profile_id"), + } def _mark_tab(): """Prepend horse emoji to tab title so the user can see which tab the agent controls.""" @@ -303,7 +318,7 @@ def _mark_tab(): def switch_tab(target): # Accept either a raw targetId string or the dict returned by current_tab() / list_tabs(), # so `switch_tab(current_tab())` works without a manual ["targetId"] dance. - target_id = target.get("targetId") if isinstance(target, dict) else target + target_id = (target.get("targetId") or target.get("target_id")) if isinstance(target, dict) else target # Unmark old tab. Horse emoji is a surrogate pair in JS UTF-16 strings (2 code units), # plus the trailing space = 3 code units, so slice(3) cleanly removes the prefix. try: cdp("Runtime.evaluate", expression="if(document.title.startsWith('\U0001F434 '))document.title=document.title.slice(3)") @@ -318,7 +333,20 @@ def new_tab(url="about:blank"): # Always create blank, then goto: passing url to createTarget races with # attach, so the brief about:blank is "complete" by the time the caller # polls and wait_for_load() returns before navigation actually starts. - tid = cdp("Target.createTarget", url="about:blank")["targetId"] + if url != "about:blank": + try: + cur = current_tab() + cur_url = cur.get("url") or "" + if cur_url in ("", "about:blank") or cur_url.startswith("about:blank#"): + goto_url(url) + return cur.get("targetId") or cur.get("target_id") + except Exception: + pass + params = {"url": "about:blank"} + browser_context_id = _current_target_browser_context_id() + if browser_context_id: + params["browserContextId"] = browser_context_id + tid = cdp("Target.createTarget", **params)["targetId"] switch_tab(tid) if url != "about:blank": goto_url(url) @@ -327,7 +355,7 @@ def new_tab(url="about:blank"): def close_tab(target=None): """Close a tab. If `target` is omitted, closes the currently attached tab. Accepts a raw targetId string or a dict from list_tabs()/current_tab().""" - target_id = target.get("targetId") if isinstance(target, dict) else target + target_id = (target.get("targetId") or target.get("target_id")) if isinstance(target, dict) else target if target_id is None: target_id = current_tab()["targetId"] cdp("Target.closeTarget", targetId=target_id) @@ -435,13 +463,18 @@ def wait_for_network_idle(timeout=10.0, idle_ms=500): def js(expression, target_id=None): """Run JS in the attached tab (default) or inside an iframe target (via iframe_target()). - Expressions with top-level `return` are automatically wrapped in an IIFE, so both - `document.title` and `const x = 1; return x` are valid inputs. + Expressions are evaluated as-is first. If Chrome reports an illegal top-level + `return`, the snippet is retried inside a function wrapper, so both + `document.title` and `const x = 1; return x` work without mis-wrapping nested + functions that contain their own returns. """ sid = cdp("Target.attachToTarget", targetId=target_id, flatten=True)["sessionId"] if target_id else None - if _has_return_statement(expression) and not expression.strip().startswith("("): - expression = f"(function(){{{expression}}})()" - return _runtime_evaluate(expression, session_id=sid, await_promise=True) + try: + return _runtime_evaluate(expression, session_id=sid, await_promise=True) + except RuntimeError as e: + if _is_illegal_return_error(e): + return _runtime_evaluate(_wrap_js_function(expression), session_id=sid, await_promise=True) + raise _KC = {"Enter": 13, "Tab": 9, "Escape": 27, "Backspace": 8, " ": 32, "ArrowLeft": 37, "ArrowUp": 38, "ArrowRight": 39, "ArrowDown": 40} diff --git a/src/browser_harness/local_profiles.py b/src/browser_harness/local_profiles.py new file mode 100644 index 00000000..4a6f5afe --- /dev/null +++ b/src/browser_harness/local_profiles.py @@ -0,0 +1,641 @@ +"""Native Chromium-family profile discovery and selected-profile state.""" +from __future__ import annotations + +from dataclasses import asdict, dataclass +import json +import os +from pathlib import Path +import socket +import subprocess +import sys +import time +import urllib.error +import urllib.request + +from . import paths + + +MARKER_URL_PREFIX = "https://browser-use.com/browser-use-profile-target/" +INTERNAL_URL_PREFIXES = ( + "chrome://", + "chrome-untrusted://", + "devtools://", + "chrome-extension://", + "about:", +) + + +@dataclass(frozen=True) +class LocalBrowserInstall: + browser_name: str + browser_path: Path + user_data_dir: Path + + def payload(self) -> dict: + return { + "browser_name": self.browser_name, + "browser_path": str(self.browser_path), + "user_data_dir": str(self.user_data_dir), + } + + +@dataclass(frozen=True) +class LocalBrowserProfile: + id: str + browser_name: str + browser_path: Path + user_data_dir: Path + profile_dir: str + profile_name: str + profile_path: Path + display_name: str + + def payload(self) -> dict: + data = asdict(self) + for key in ("browser_path", "user_data_dir", "profile_path"): + data[key] = str(data[key]) + return data + + +@dataclass(frozen=True) +class LocalCandidate: + id: str + browser_name: str + browser_path: str | None + profile_path: str + http_url: str | None + ws_url: str + source: str + connectable: bool + state: str + stale: bool + browser_running: bool | None + remote_debugging_enabled: bool | None + reason: str | None + next_step: str | None + + def payload(self) -> dict: + return asdict(self) + + +def config_dir() -> Path: + return paths.config_dir() + + +def profile_config_path() -> Path: + return config_dir() / "settings.json" + + +def legacy_profile_config_path() -> Path: + return config_dir() / "profile.json" + + +def get_default_profile_id() -> str | None: + for key in ("BH_SELECTED_LOCAL_PROFILE", "BH_LOCAL_PROFILE"): + value = (os.environ.get(key) or "").strip() + if value: + return value + data = {} + for path in (profile_config_path(), legacy_profile_config_path()): + try: + data = json.loads(path.read_text()) + break + except (FileNotFoundError, json.JSONDecodeError, OSError): + continue + value = str(data.get("default_local_profile_id") or "").strip() + return value or None + + +def set_default_profile_id(profile_id: str | None) -> dict: + path = profile_config_path() + path.parent.mkdir(parents=True, exist_ok=True) + if profile_id: + profile = resolve_local_profile(profile_id) + require_browser_binary(profile) + data = { + "default_local_profile_id": profile.id, + "default_local_profile_label": profile.display_name, + } + else: + data = { + "default_local_profile_id": None, + "default_local_profile_label": None, + } + tmp = path.with_suffix(".tmp") + tmp.write_text(json.dumps(data, indent=2)) + os.replace(tmp, path) + return data + + +def known_local_browser_installs() -> list[LocalBrowserInstall]: + home = Path.home() + program_files = Path(os.environ.get("ProgramFiles") or "C:/Program Files") + program_files_x86 = Path(os.environ.get("ProgramFiles(x86)") or "C:/Program Files (x86)") + local_app_data = Path(os.environ.get("LOCALAPPDATA") or home / "AppData" / "Local") + candidates = [ + ("Google Chrome", Path("/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"), home / "Library/Application Support/Google/Chrome"), + ("Chrome Canary", Path("/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary"), home / "Library/Application Support/Google/Chrome Canary"), + ("Brave", Path("/Applications/Brave Browser.app/Contents/MacOS/Brave Browser"), home / "Library/Application Support/BraveSoftware/Brave-Browser"), + ("Microsoft Edge", Path("/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge"), home / "Library/Application Support/Microsoft Edge"), + ("Chromium", Path("/Applications/Chromium.app/Contents/MacOS/Chromium"), home / "Library/Application Support/Chromium"), + ("Arc", Path("/Applications/Arc.app/Contents/MacOS/Arc"), home / "Library/Application Support/Arc/User Data"), + ("Dia", Path("/Applications/Dia.app/Contents/MacOS/Dia"), home / "Library/Application Support/Dia"), + ("Comet", Path("/Applications/Comet.app/Contents/MacOS/Comet"), home / "Library/Application Support/Comet"), + ("Helium", Path("/Applications/Helium.app/Contents/MacOS/Helium"), home / "Library/Application Support/Helium"), + ("Sidekick", Path("/Applications/Sidekick.app/Contents/MacOS/Sidekick"), home / "Library/Application Support/Sidekick"), + ("Thorium", Path("/Applications/Thorium.app/Contents/MacOS/Thorium"), home / "Library/Application Support/Thorium"), + ("SigmaOS", Path("/Applications/SigmaOS.app/Contents/MacOS/SigmaOS"), home / "Library/Application Support/SigmaOS/User Data"), + ("Wavebox", Path("/Applications/Wavebox.app/Contents/MacOS/Wavebox"), home / "Library/Application Support/WaveboxApp"), + ("Ghost Browser", Path("/Applications/Ghost Browser.app/Contents/MacOS/Ghost Browser"), home / "Library/Application Support/Ghost Browser"), + ("Blisk", Path("/Applications/Blisk.app/Contents/MacOS/Blisk"), home / "Library/Application Support/Blisk"), + ("Opera", Path("/Applications/Opera.app/Contents/MacOS/Opera"), home / "Library/Application Support/com.operasoftware.Opera"), + ("Vivaldi", Path("/Applications/Vivaldi.app/Contents/MacOS/Vivaldi"), home / "Library/Application Support/Vivaldi"), + ("Yandex", Path("/Applications/Yandex.app/Contents/MacOS/Yandex"), home / "Library/Application Support/Yandex/YandexBrowser"), + ("Iridium", Path("/Applications/Iridium.app/Contents/MacOS/Iridium"), home / "Library/Application Support/Iridium"), + ("Google Chrome", Path("/usr/bin/google-chrome"), home / ".config/google-chrome"), + ("Google Chrome", Path("/usr/bin/google-chrome-stable"), home / ".config/google-chrome"), + ("Brave", Path("/usr/bin/brave-browser"), home / ".config/BraveSoftware/Brave-Browser"), + ("Brave", Path("/usr/bin/brave"), home / ".config/BraveSoftware/Brave-Browser"), + ("Brave", Path("/snap/bin/brave"), home / ".config/BraveSoftware/Brave-Browser"), + ("Microsoft Edge", Path("/usr/bin/microsoft-edge"), home / ".config/microsoft-edge"), + ("Microsoft Edge", Path("/usr/bin/microsoft-edge-stable"), home / ".config/microsoft-edge"), + ("Chromium", Path("/usr/bin/chromium"), home / ".config/chromium"), + ("Chromium", Path("/usr/bin/chromium-browser"), home / ".config/chromium"), + ("Chromium", Path("/snap/bin/chromium"), home / ".config/chromium"), + ("Opera", Path("/usr/bin/opera"), home / ".config/opera"), + ("Opera", Path("/snap/bin/opera"), home / ".config/opera"), + ("Vivaldi", Path("/usr/bin/vivaldi"), home / ".config/vivaldi"), + ("Vivaldi", Path("/usr/bin/vivaldi-stable"), home / ".config/vivaldi"), + ("Vivaldi", Path("/snap/bin/vivaldi"), home / ".config/vivaldi"), + ("Yandex", Path("/usr/bin/yandex-browser"), home / ".config/yandex-browser"), + ("Yandex", Path("/usr/bin/yandex-browser-stable"), home / ".config/yandex-browser"), + ("Iridium", Path("/usr/bin/iridium-browser"), home / ".config/iridium"), + ("Ungoogled Chromium", Path("/usr/bin/ungoogled-chromium"), home / ".config/chromium"), + ("Thorium", Path("/usr/bin/thorium-browser"), home / ".config/thorium"), + ("Sidekick", home / ".local/share/sidekick/sidekick", home / ".config/Sidekick"), + ("Wavebox", Path("/usr/bin/wavebox"), home / ".config/Wavebox"), + ("Google Chrome", program_files / "Google/Chrome/Application/chrome.exe", local_app_data / "Google/Chrome/User Data"), + ("Google Chrome", program_files_x86 / "Google/Chrome/Application/chrome.exe", local_app_data / "Google/Chrome/User Data"), + ("Google Chrome", local_app_data / "Google/Chrome/Application/chrome.exe", local_app_data / "Google/Chrome/User Data"), + ("Brave", program_files / "BraveSoftware/Brave-Browser/Application/brave.exe", local_app_data / "BraveSoftware/Brave-Browser/User Data"), + ("Brave", local_app_data / "BraveSoftware/Brave-Browser/Application/brave.exe", local_app_data / "BraveSoftware/Brave-Browser/User Data"), + ("Microsoft Edge", program_files / "Microsoft/Edge/Application/msedge.exe", local_app_data / "Microsoft/Edge/User Data"), + ("Microsoft Edge", program_files_x86 / "Microsoft/Edge/Application/msedge.exe", local_app_data / "Microsoft/Edge/User Data"), + ("Chromium", local_app_data / "Chromium/Application/chrome.exe", local_app_data / "Chromium/User Data"), + ("Opera", local_app_data / "Programs/Opera/opera.exe", home / "AppData/Roaming/Opera Software/Opera Stable"), + ("Opera", program_files / "Opera/opera.exe", home / "AppData/Roaming/Opera Software/Opera Stable"), + ("Vivaldi", local_app_data / "Vivaldi/Application/vivaldi.exe", local_app_data / "Vivaldi/User Data"), + ("Vivaldi", program_files / "Vivaldi/Application/vivaldi.exe", local_app_data / "Vivaldi/User Data"), + ("Yandex", local_app_data / "Yandex/YandexBrowser/Application/browser.exe", local_app_data / "Yandex/YandexBrowser/User Data"), + ("Iridium", local_app_data / "Iridium/Application/iridium.exe", local_app_data / "Iridium/User Data"), + ("Sidekick", local_app_data / "Sidekick/Application/sidekick.exe", local_app_data / "Sidekick/User Data"), + ("Thorium", local_app_data / "Thorium/Application/thorium.exe", local_app_data / "Thorium/User Data"), + ("Wavebox", local_app_data / "WaveboxApp/Application/wavebox.exe", local_app_data / "WaveboxApp/User Data"), + ("Blisk", local_app_data / "Blisk/Application/blisk.exe", local_app_data / "Blisk/User Data"), + ] + installs: list[LocalBrowserInstall] = [] + seen: dict[tuple[str, Path], int] = {} + for browser_name, browser_path, user_data_dir in candidates: + if not browser_path.exists() and not user_data_dir.exists(): + continue + key = (browser_name, user_data_dir) + candidate = LocalBrowserInstall(browser_name, browser_path, user_data_dir) + if key in seen: + index = seen[key] + if not installs[index].browser_path.exists() and browser_path.exists(): + installs[index] = candidate + else: + seen[key] = len(installs) + installs.append(candidate) + return installs + + +def known_profile_roots() -> list[tuple[str, Path]]: + home = Path.home() + return [ + ("Google Chrome", home / "Library/Application Support/Google/Chrome"), + ("Chrome Canary", home / "Library/Application Support/Google/Chrome Canary"), + ("Comet", home / "Library/Application Support/Comet"), + ("Arc", home / "Library/Application Support/Arc/User Data"), + ("Dia", home / "Library/Application Support/Dia/User Data"), + ("Microsoft Edge", home / "Library/Application Support/Microsoft Edge"), + ("Microsoft Edge Beta", home / "Library/Application Support/Microsoft Edge Beta"), + ("Microsoft Edge Dev", home / "Library/Application Support/Microsoft Edge Dev"), + ("Microsoft Edge Canary", home / "Library/Application Support/Microsoft Edge Canary"), + ("Brave", home / "Library/Application Support/BraveSoftware/Brave-Browser"), + ("Google Chrome", home / ".config/google-chrome"), + ("Chromium", home / ".config/chromium"), + ("Chromium", home / ".config/chromium-browser"), + ("Microsoft Edge", home / ".config/microsoft-edge"), + ("Microsoft Edge Beta", home / ".config/microsoft-edge-beta"), + ("Microsoft Edge Dev", home / ".config/microsoft-edge-dev"), + ("Chromium", home / ".var/app/org.chromium.Chromium/config/chromium"), + ("Google Chrome", home / ".var/app/com.google.Chrome/config/google-chrome"), + ("Brave", home / ".var/app/com.brave.Browser/config/BraveSoftware/Brave-Browser"), + ("Microsoft Edge", home / ".var/app/com.microsoft.Edge/config/microsoft-edge"), + ("Google Chrome", home / "AppData/Local/Google/Chrome/User Data"), + ("Chrome Canary", home / "AppData/Local/Google/Chrome SxS/User Data"), + ("Chromium", home / "AppData/Local/Chromium/User Data"), + ("Microsoft Edge", home / "AppData/Local/Microsoft/Edge/User Data"), + ("Microsoft Edge Beta", home / "AppData/Local/Microsoft/Edge Beta/User Data"), + ("Microsoft Edge Dev", home / "AppData/Local/Microsoft/Edge Dev/User Data"), + ("Microsoft Edge Canary", home / "AppData/Local/Microsoft/Edge SxS/User Data"), + ("Brave", home / "AppData/Local/BraveSoftware/Brave-Browser/User Data"), + ] + + +def detect_local_profiles() -> list[LocalBrowserProfile]: + profiles: list[LocalBrowserProfile] = [] + seen: set[tuple[Path, str]] = set() + for install in known_local_browser_installs(): + if not install.user_data_dir.exists(): + continue + names = load_profile_names_from_local_state(install.user_data_dir) + try: + entries = list(install.user_data_dir.iterdir()) + except OSError: + continue + for entry in entries: + if not entry.is_dir(): + continue + profile_dir = entry.name + if not is_valid_local_profile_dir(entry): + continue + key = (install.user_data_dir, profile_dir) + if key in seen: + continue + seen.add(key) + profile_name = names.get(profile_dir) or profile_dir + profiles.append(LocalBrowserProfile( + id=f"{browser_slug(install.browser_name)}:{profile_dir}", + browser_name=install.browser_name, + browser_path=install.browser_path, + user_data_dir=install.user_data_dir, + profile_dir=profile_dir, + profile_name=profile_name, + profile_path=entry, + display_name=f"{install.browser_name} - {profile_name}", + )) + profiles.sort(key=lambda p: (p.browser_name, profile_dir_sort_key(p.profile_dir), natural_key(p.profile_name))) + return profiles + + +def list_local_profiles_payload() -> dict: + default_profile_id = get_default_profile_id() + return { + "status": "ok", + "default_profile_id": default_profile_id, + "profiles": [p.payload() for p in detect_local_profiles()], + } + + +def list_browser_profiles_payload(verbose: bool = False) -> dict: + if verbose: + return list_local_profiles_payload() + selected = get_default_profile_id() + return { + "selected": selected, + "profiles": [ + { + "id": p.id, + "label": p.display_name, + "selected": p.id == selected, + } + for p in detect_local_profiles() + ], + } + + +def use_browser_profile(profile_id: str) -> dict: + data = set_default_profile_id(profile_id) + return { + "selected": data.get("default_local_profile_id"), + "label": data.get("default_local_profile_label"), + } + + +def resolve_local_profile(profile_ref: str | None = None) -> LocalBrowserProfile: + profile_ref = (profile_ref or get_default_profile_id() or "").strip() + if not profile_ref: + raise RuntimeError("no default local Chrome profile is set") + profiles = detect_local_profiles() + for profile in profiles: + if profile.id == profile_ref: + return profile + matches = [ + p for p in profiles + if p.profile_name == profile_ref or p.profile_dir == profile_ref or p.display_name == profile_ref + ] + if len(matches) == 1: + return matches[0] + if not matches: + raise RuntimeError(f"no local profile matched {profile_ref!r}; run browser_profiles()") + raise RuntimeError(f"multiple local profiles matched {profile_ref!r}; pass the exact profile id") + + +def require_browser_binary(profile: LocalBrowserProfile) -> None: + if not browser_binary_usable(profile.browser_path): + raise RuntimeError(f"browser binary not found or not executable for {profile.id}: {profile.browser_path}") + + +def browser_binary_usable(path: Path) -> bool: + try: + if not path.exists(): + return False + return True if sys.platform == "win32" else os.access(path, os.X_OK) + except OSError: + return False + + +def load_profile_names_from_local_state(user_data_dir: Path) -> dict[str, str]: + try: + value = json.loads((user_data_dir / "Local State").read_text()) + except (FileNotFoundError, json.JSONDecodeError, OSError): + return {} + info_cache = value.get("profile", {}).get("info_cache", {}) + if not isinstance(info_cache, dict): + return {} + out = {} + for profile_dir, info in info_cache.items(): + if isinstance(info, dict): + name = str(info.get("name") or "").strip() + if name: + out[profile_dir] = name + return out + + +def remote_debugging_user_enabled(user_data_dir: Path) -> bool | None: + try: + value = json.loads((user_data_dir / "Local State").read_text()) + except (FileNotFoundError, json.JSONDecodeError, OSError): + return None + user_enabled = value.get("devtools", {}).get("remote_debugging", {}).get("user-enabled") + return user_enabled if isinstance(user_enabled, bool) else None + + +def is_valid_local_profile_dir(path: Path) -> bool: + return any((path / relative).exists() for relative in ("Preferences", "Cookies", "History", "Network/Cookies")) + + +def browser_slug(name: str) -> str: + out = [] + last_dash = False + for ch in name.lower(): + if ch.isascii() and ch.isalnum(): + out.append(ch) + last_dash = False + elif not last_dash: + out.append("-") + last_dash = True + return "".join(out).strip("-") + + +def profile_dir_sort_key(profile_dir: str) -> tuple[int, list[tuple[int, object]]]: + return (0, []) if profile_dir == "Default" else (1, natural_key(profile_dir)) + + +def natural_key(value: str) -> list[tuple[int, object]]: + out: list[tuple[int, object]] = [] + buf = "" + is_digit = False + for ch in value: + digit = ch.isdigit() + if buf and digit != is_digit: + out.append((0, int(buf)) if is_digit else (1, buf)) + buf = "" + buf += ch + is_digit = digit + if buf: + out.append((0, int(buf)) if is_digit else (1, buf)) + return out + + +def browser_process_running(browser_name: str, browser_path: Path | None = None) -> bool | None: + try: + if sys.platform == "win32": + out = subprocess.check_output(["tasklist", "/FO", "CSV"], text=True, timeout=5, stderr=subprocess.DEVNULL) + exe = browser_path.name.lower() if browser_path else "" + return bool(exe and exe in out.lower()) + out = subprocess.check_output(["ps", "-axo", "pid=,comm=,args="], text=True, timeout=5, stderr=subprocess.DEVNULL) + if browser_path: + path = str(browser_path) + if path and path in out: + return True + return browser_name.lower() in out.lower() + except Exception: + return None + + +def local_candidates() -> list[LocalCandidate]: + roots: list[tuple[str, Path | None, Path]] = [ + (install.browser_name, install.browser_path, install.user_data_dir) + for install in known_local_browser_installs() + ] + seen_roots = {(name, root) for name, _path, root in roots} + for name, root in known_profile_roots(): + if (name, root) not in seen_roots: + seen_roots.add((name, root)) + roots.append((name, None, root)) + return local_candidates_from_roots(roots, [9222, 9223]) + + +def local_candidates_from_roots( + roots: list[tuple[str, Path | None, Path]], + probe_ports: list[int], +) -> list[LocalCandidate]: + candidates: list[LocalCandidate] = [] + seen_ws: set[str] = set() + for browser_name, browser_path, user_data_dir in roots: + active = user_data_dir / "DevToolsActivePort" + try: + lines = active.read_text().splitlines() + except (FileNotFoundError, NotADirectoryError, OSError): + continue + port = lines[0].strip() if lines else "" + ws_path = lines[1].strip() if len(lines) > 1 else "" + if not port or not ws_path: + continue + ws_url = f"ws://127.0.0.1:{port}{ws_path}" + if ws_url in seen_ws: + continue + seen_ws.add(ws_url) + connectable = tcp_port_open("127.0.0.1", int(port) if port.isdigit() else 0) + running = browser_process_running(browser_name, browser_path) + enabled = remote_debugging_user_enabled(user_data_dir) + if connectable: + state, reason, next_step = "reachable", None, "connect local browser" + else: + state, reason, next_step = local_disconnected_candidate_details(running, enabled) + candidates.append(LocalCandidate( + id=f"local-{len(candidates) + 1}", + browser_name=browser_name, + browser_path=str(browser_path) if browser_path else None, + profile_path=str(user_data_dir), + http_url=f"http://127.0.0.1:{port}", + ws_url=ws_url, + source=str(active), + connectable=connectable, + state=state, + stale=not connectable, + browser_running=running, + remote_debugging_enabled=enabled, + reason=reason, + next_step=next_step, + )) + for port in probe_ports: + http_url = f"http://127.0.0.1:{port}" + try: + ws_url = resolve_ws_from_http(http_url, timeout=0.5) + except Exception: + continue + if ws_url in seen_ws: + continue + seen_ws.add(ws_url) + candidates.append(LocalCandidate( + id=f"local-{len(candidates) + 1}", + browser_name=f"CDP port {port}", + browser_path=None, + profile_path="", + http_url=http_url, + ws_url=ws_url, + source="port-probe", + connectable=True, + state="reachable", + stale=False, + browser_running=None, + remote_debugging_enabled=None, + reason=None, + next_step="connect local browser", + )) + return candidates + + +def local_debugging_disabled_statuses() -> list[dict]: + out = [] + for install in known_local_browser_installs(): + running = browser_process_running(install.browser_name, install.browser_path) + enabled = remote_debugging_user_enabled(install.user_data_dir) + if running is True and enabled is False: + out.append({ + "browser_name": install.browser_name, + "browser_path": str(install.browser_path), + "user_data_dir": str(install.user_data_dir), + "browser_running": running, + "remote_debugging_enabled": enabled, + }) + return out + + +def local_disconnected_candidate_details( + browser_running_value: bool | None, + remote_debugging_enabled_value: bool | None, +) -> tuple[str, str, str]: + if browser_running_value is True and remote_debugging_enabled_value is False: + return ( + "cdp-disabled", + "Chrome is open, but remote debugging is turned off for this browser instance.", + "local setup", + ) + if browser_running_value is True: + return ( + "stale-port", + "DevToolsActivePort exists, but the recorded CDP port is not reachable. Chrome appears open, but it is not exposing that debug endpoint.", + "open selected profile, then reconnect", + ) + return ( + "stale-port", + "DevToolsActivePort exists, but the recorded CDP port is not reachable. Chrome was likely closed or the debug server stopped.", + "open selected profile, then reconnect", + ) + + +def resolve_ws_from_http(http_url: str, timeout: float = 15.0) -> str: + url = f"{http_url.rstrip('/')}/json/version" + with urllib.request.urlopen(url, timeout=timeout) as resp: + data = json.loads(resp.read() or b"{}") + ws = data.get("webSocketDebuggerUrl") + if not ws: + raise RuntimeError(f"{url} missing webSocketDebuggerUrl") + return ws + + +def tcp_port_open(host: str, port: int, timeout: float = 0.2) -> bool: + if not port: + return False + try: + with socket.create_connection((host, port), timeout=timeout): + return True + except OSError: + return False + + +def profile_marker_target_url(marker: str) -> str: + return f"{MARKER_URL_PREFIX}{marker}" + + +def target_url_contains_marker(target: dict, marker: str) -> bool: + return is_profile_marker_target(target) and marker in str(target.get("url") or "") + + +def is_profile_marker_target(target: dict) -> bool: + return target.get("type") == "page" and MARKER_URL_PREFIX in str(target.get("url") or "") + + +def is_remote_debugging_setup_target(target: dict) -> bool: + return target.get("type") == "page" and str(target.get("url") or "").startswith("chrome://inspect/#remote-debugging") + + +def is_internal_browser_url(url: str) -> bool: + return str(url or "").startswith(INTERNAL_URL_PREFIXES) + + +def is_real_page_target(target: dict) -> bool: + if target.get("type") != "page": + return False + if is_profile_marker_target(target): + return False + url = str(target.get("url") or "") + return bool(url.strip()) and not is_internal_browser_url(url) + + +def is_reusable_placeholder_target(target: dict) -> bool: + if target.get("type") != "page": + return False + if is_profile_marker_target(target) or is_remote_debugging_setup_target(target): + return False + url = str(target.get("url") or "") + return url in ("", "about:blank") or url.startswith("about:blank#") + + +def open_local_profile( + profile_ref: str | None = None, + allow_marker: bool = True, + url: str | None = None, +) -> dict: + profile = resolve_local_profile(profile_ref) + require_browser_binary(profile) + profile_directory_arg = f"--profile-directory={profile.profile_dir}" + running = browser_process_running(profile.browser_name, profile.browser_path) + needs_marker = allow_marker and running is not False + marker = str(int(time.time() * 1000)) if needs_marker else None + target_url = profile_marker_target_url(marker) if marker else None + args = [str(profile.browser_path)] + if sys.platform == "darwin": + args.append(f"--user-data-dir={profile.user_data_dir}") + args.append(profile_directory_arg) + if target_url: + args.append(target_url) + elif url: + args.append(url) + elif allow_marker: + args.append("--no-startup-window") + subprocess.Popen(args, stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + return { + "status": "ok", + "opened": True, + "profile": profile.payload(), + "profile_targeting": "marker" if marker else ("profile-launch" if allow_marker else "profile-focus"), + "target_marker": marker, + "target_url": target_url or url, + "next_step": "Give Chrome a moment to start, then retry browser work.", + } diff --git a/src/browser_harness/manager_client.py b/src/browser_harness/manager_client.py new file mode 100644 index 00000000..f58b85c3 --- /dev/null +++ b/src/browser_harness/manager_client.py @@ -0,0 +1,170 @@ +"""Client for the browser-harness manager.""" +from __future__ import annotations + +import os +from pathlib import Path +import secrets +import subprocess +import sys +import time + +from . import context, manager_runtime + + +class ManagerError(RuntimeError): + def __init__(self, response): + self.response = response if isinstance(response, dict) else {"reason": str(response)} + reason = self.response.get("reason") or self.response.get("error") or self.response.get("state") or "manager error" + super().__init__(reason) + + +_manager_started = False +_CLIENT_ID = f"{os.getpid()}_{secrets.token_hex(4)}" + + +def default_manager_root() -> str: + return str(manager_runtime.default_root()) + + +def default_manager_socket() -> str: + return str(manager_runtime.default_endpoint(Path(default_manager_root()))) + + +def manager_socket() -> str: + path = default_manager_socket() + os.environ.setdefault("BH_MANAGER_SOCKET", path) + os.environ.setdefault("BH_MANAGER_ROOT", default_manager_root()) + ensure_manager_running(path) + return path + + +def ensure_manager_running(path: str | None = None) -> None: + global _manager_started + path = path or default_manager_socket() + endpoint = Path(path) + if _manager_socket_alive(endpoint): + return + root = Path(os.environ.get("BH_MANAGER_ROOT") or default_manager_root()) + manager_runtime.ensure_private_dir(root) + with manager_runtime.start_lock(root): + if _manager_socket_alive(endpoint): + return + log = manager_runtime.open_private_append(root / "manager.log") + env = {**os.environ, "BH_MANAGER_SOCKET": path, "BH_MANAGER_ROOT": str(root)} + try: + subprocess.Popen( + [sys.executable, "-m", "browser_harness.manager_daemon", "--socket", path, "--root", str(root)], + stdin=subprocess.DEVNULL, + stdout=log, + stderr=log, + env=env, + **manager_runtime.spawn_kwargs(), + ) + finally: + log.close() + _manager_started = True + deadline = time.time() + float(os.environ.get("BH_MANAGER_START_TIMEOUT", "10")) + while time.time() < deadline: + if _manager_socket_alive(endpoint): + return + time.sleep(0.05) + raise ManagerError({"state": "manager-unavailable", "reason": f"manager did not start at {path}"}) + + +def _manager_socket_alive(path: Path) -> bool: + if manager_runtime.ping(path, timeout=0.2): + return True + if manager_runtime.IS_WINDOWS: + return False + try: + sock, _token = manager_runtime.connect(path, timeout=0.2) + except OSError: + return False + try: + sock.close() + except OSError: + pass + return True + + +def stop_manager_if_running(path: str | None = None) -> bool: + endpoint = Path(path or default_manager_socket()) + try: + sock, token = manager_runtime.connect(endpoint, timeout=0.5) + except (FileNotFoundError, ConnectionRefusedError, TimeoutError, OSError, ValueError, KeyError, TypeError): + return False + try: + manager_runtime.send_request(sock, token, {"meta": "shutdown"}) + return True + except (OSError, ValueError, AttributeError): + return False + finally: + try: + sock.close() + except OSError: + pass + + +def request(op: str, **payload) -> dict: + req = {"op": op, **context.agent_identity().payload(), "client_id": _CLIENT_ID, **payload} + path = manager_socket() + sock, token = manager_runtime.connect(Path(path), timeout=float(os.environ.get("BH_MANAGER_TIMEOUT", "30"))) + try: + resp = manager_runtime.send_request(sock, token, req) + finally: + sock.close() + if not isinstance(resp, dict): + raise ManagerError({"state": "bad-response", "reason": "manager returned non-object JSON"}) + if resp.get("ok") is False: + raise ManagerError(resp) + return resp + + +def public_state(resp: dict) -> dict: + return {k: v for k, v in resp.items() if k not in {"binding", "ok"}} + + +def binding_from_response(resp: dict) -> context.BrowserBinding: + binding = resp.get("binding") + if not isinstance(binding, dict): + raise ManagerError({"state": "bad-response", "reason": "manager response missing binding"}) + return context.BrowserBinding.from_manager(binding) + + +def status(browser_id: str | None = None) -> dict: + try: + return public_state(request("status", browser_id=browser_id)) + except ManagerError as e: + if e.response.get("state") == "manager-unavailable": + return {"ready": False, "state": "manager-unavailable", "reason": str(e), "safe_actions": []} + raise + + +def list_browsers() -> list[dict]: + resp = request("list") + browsers = resp.get("browsers", []) + if not isinstance(browsers, list): + raise ManagerError({"state": "bad-response", "reason": "manager list response missing browsers"}) + return browsers + + +def new_browser(backend="managed", *, profile="clean", proxy_country=None, reason=None) -> dict: + return request( + "new", + backend=backend, + profile=profile, + proxy_country=proxy_country, + reason=reason, + ) + + +def switch_browser(browser_id: str) -> dict: + return request("switch", browser_id=browser_id) + + +def close_browser(browser_id: str | None = None) -> dict: + return request("close", browser_id=browser_id) + + +def close_owned_browsers() -> dict: + return request("close_owned") diff --git a/src/browser_harness/manager_daemon.py b/src/browser_harness/manager_daemon.py new file mode 100644 index 00000000..eea58353 --- /dev/null +++ b/src/browser_harness/manager_daemon.py @@ -0,0 +1,625 @@ +"""Browser lifecycle manager for browser-harness manager mode.""" +from __future__ import annotations + +from dataclasses import asdict, dataclass, field +import argparse +import json +import os +from pathlib import Path +import secrets +import shutil +import socket +import subprocess +import sys +import threading +import time +import urllib.request + +from . import admin, auth, context, manager_runtime, telemetry + + +BU_API = "https://api.browser-use.com/api/v3" +RESERVED_BROWSER_IDS = {"auth", "doctor", "help", "reload", "update", "version"} +MAC_BROWSER_PATHS = ( + "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", + "/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary", + "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge", + "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser", + "/Applications/Chromium.app/Contents/MacOS/Chromium", +) +_server_token: str | None = None + + +@dataclass +class BrowserLease: + browser_id: str + run_id: str + owner_agent_id: str + backend: str + profile_kind: str + harness_daemon_name: str + runtime_dir: str + tmp_dir: str + download_dir: str + artifact_dir: str + profile_dir: str + cdp_url: str | None = None + cdp_ws: str | None = None + local_process_id: int | None = None + local_debug_port: int | None = None + cloud_browser_id: str | None = None + cloud_live_url: str | None = None + created_at_ms: int = field(default_factory=lambda: int(time.time() * 1000)) + last_used_at_ms: int = field(default_factory=lambda: int(time.time() * 1000)) + + @classmethod + def from_json(cls, data: dict) -> "BrowserLease": + fields = cls.__dataclass_fields__ + return cls(**{key: value for key, value in data.items() if key in fields}) + + def binding(self) -> dict: + return { + "browser_id": self.browser_id, + "bu_name": self.harness_daemon_name, + "runtime_dir": self.runtime_dir, + "tmp_dir": self.tmp_dir, + "download_dir": self.download_dir, + "artifact_dir": self.artifact_dir, + "cdp_url": self.cdp_url, + "cdp_ws": self.cdp_ws, + } + + +class Manager: + def __init__(self, root: Path): + self.root = root + manager_runtime.ensure_private_dir(self.root) + self._lock = threading.RLock() + self.leases: dict[str, BrowserLease] = {} + self.next_seq = 0 + self._load() + + def _load(self): + try: + data = json.loads((self.root / "registry.json").read_text()) + except (FileNotFoundError, json.JSONDecodeError, OSError): + return + self.next_seq = int(data.get("next_seq") or 0) + for item in data.get("leases") or []: + lease = BrowserLease.from_json(item) + self.leases[lease.browser_id] = lease + + def _persist(self): + data = { + "next_seq": self.next_seq, + "leases": [asdict(v) for v in self.leases.values()], + } + manager_runtime.write_private_json(self.root / "registry.json", data) + + def handle(self, req: dict) -> dict: + op = req.get("op") + try: + if op == "status": + return self.status(req) + if op == "list": + return self.list(req) + if op == "new": + return self.new(req) + if op == "switch": + return self.switch(req) + if op == "close": + return self.close(req) + if op == "close_owned": + return self.close_owned(req) + if op == "lock": + return self.lock(req) + if op == "unlock": + return self.unlock(req) + return error("unknown-op", f"unknown op {op!r}", []) + except Exception as e: + return error("manager-error", str(e), []) + + def status(self, req: dict) -> dict: + with self._lock: + browser_id = req.get("browser_id") + if not browser_id: + return { + "ok": True, + "ready": False, + "state": "browser-id-required", + "safe_actions": ["browser_list", "browser_new"], + } + lease = self.leases.get(browser_id) + if not lease: + return {"ok": True, "ready": False, "state": "not-found", "safe_actions": ["browser_list", "browser_new"]} + return ready_public(lease) + + def list(self, req: dict) -> dict: + with self._lock: + run_id, agent_id = run_agent(req) + browsers = [] + for lease in self.leases.values(): + browsers.append({ + "id": lease.browser_id, + "backend": public_backend(lease), + "owner": lease.owner_agent_id, + "owned_by_this_agent": lease.run_id == run_id and lease.owner_agent_id == agent_id, + "state": "ready", + **({"cloud_browser_id": lease.cloud_browser_id} if lease.cloud_browser_id else {}), + **({"live_url": lease.cloud_live_url} if lease.cloud_live_url else {}), + }) + return {"ok": True, "browsers": browsers} + + def new(self, req: dict) -> dict: + run_id, agent_id = run_agent(req) + backend = req.get("backend") or "managed" + if backend not in {"managed", "cloud"}: + return error("unsupported-backend", f"unsupported backend {backend!r}", ["browser_new"]) + with self._lock: + lease = self._allocate_lease(run_id, agent_id, backend, req.get("profile") or "clean") + try: + if backend == "cloud": + start_cloud_backend(lease, req.get("proxy_country")) + else: + start_managed_backend(lease) + except auth.CloudAuthRequired as e: + cleanup_backend(lease) + telemetry.capture("browser_harness.browser_new", { + "backend": backend, + "profile_kind": lease.profile_kind, + "result": "cloud-auth-required", + }) + return error("cloud-auth-required", str(e), ["browser-harness auth login"]) + except Exception as e: + cleanup_backend(lease) + telemetry.capture("browser_harness.browser_new", { + "backend": backend, + "profile_kind": lease.profile_kind, + "result": "start-failed", + }) + return error("browser-start-failed", str(e), ["browser_new"]) + with self._lock: + self.leases[lease.browser_id] = lease + self._persist() + telemetry.capture("browser_harness.browser_new", { + "backend": public_backend(lease), + "profile_kind": lease.profile_kind, + "result": "ready", + }) + return ready_response(lease) + + def switch(self, req: dict) -> dict: + with self._lock: + _run_id, agent_id = run_agent(req) + browser_id = req.get("browser_id") + if not browser_id: + return error("bad-request", "browser_id is required", ["browser_list", "browser_new"]) + lease = self.leases.get(browser_id) + if not lease: + telemetry.capture("browser_harness.browser_switch", {"result": "not-found"}) + return error("not-found", "browser id not found", ["browser_list", "browser_new"]) + lease.last_used_at_ms = int(time.time() * 1000) + self._persist() + telemetry.capture("browser_harness.browser_switch", { + "backend": public_backend(lease), + "result": "ready", + }) + return ready_response(lease) + + def close(self, req: dict) -> dict: + cleanup = None + with self._lock: + browser_id = req.get("browser_id") + if not browser_id: + return error("bad-request", "browser id is required; use browser_close(id)", ["browser_list"]) + lease = self.leases.get(browser_id) + if not lease: + telemetry.capture("browser_harness.browser_close", {"result": "not-found"}) + return {"ok": True, "ready": False, "state": "not-found", "id": browser_id} + cleanup = lease + self.leases.pop(browser_id, None) + self._persist() + resp = {"ok": True, "ready": False, "state": "closed", "id": browser_id} + if cleanup is not None: + cleanup_backend(cleanup) + telemetry.capture("browser_harness.browser_close", { + "backend": public_backend(cleanup), + "result": "closed", + }) + return resp + + def close_owned(self, req: dict) -> dict: + cleanup = [] + with self._lock: + run_id, agent_id = run_agent(req) + owned_ids = [ + browser_id + for browser_id, lease in self.leases.items() + if lease.run_id == run_id and lease.owner_agent_id == agent_id + ] + for browser_id in owned_ids: + cleanup.append(self.leases.pop(browser_id)) + self._persist() + for lease in cleanup: + cleanup_backend(lease) + telemetry.capture("browser_harness.browser_close_owned", { + "closed_count": len(cleanup), + "result": "closed", + }) + return { + "ok": True, + "ready": False, + "state": "closed-owned", + "closed": [lease.browser_id for lease in cleanup], + } + + def lock(self, req: dict) -> dict: + """Compatibility no-op for old manager clients. + + Browser ids are explicit shared handles now. Selecting the same browser + from multiple processes is allowed; callers that still ask for a lock + get a stable success response without exclusive ownership. + """ + with self._lock: + browser_id = req.get("browser_id") + if not browser_id: + return error("bad-request", "browser id is required; call browser(id)", ["browser_new", "browser_list"]) + lease = self.leases.get(browser_id) + if not lease: + return error("not-found", "browser id not found", ["browser_list", "browser_new"]) + return {"ok": True, "state": "ready", "browser_id": browser_id, "lock_id": req.get("lock_id") or "shared"} + + def unlock(self, req: dict) -> dict: + with self._lock: + browser_id = req.get("browser_id") + lease = self.leases.get(browser_id or "") + if not lease: + return {"ok": True, "state": "not-found"} + return {"ok": True, "state": "released", "browser_id": browser_id} + + def _allocate_lease(self, run_id: str, agent_id: str, backend: str, profile_kind: str) -> BrowserLease: + self.next_seq += 1 + browser_id = self._new_browser_id() + short = f"{int(time.time() * 1000):x}{self.next_seq:x}{browser_id}" + bu_name = f"bh_{short[-16:]}" + base = self.root / "leases" / browser_id + runtime_dir = base / "r" + tmp_dir = base / "t" + download_dir = base / "downloads" + artifact_dir = base / "artifacts" + profile_dir = base / "profile" + for p in (runtime_dir, tmp_dir, download_dir, artifact_dir, profile_dir): + p.mkdir(parents=True, exist_ok=True) + if not manager_runtime.IS_WINDOWS: + os.chmod(p, 0o700) + return BrowserLease( + browser_id=browser_id, + run_id=run_id, + owner_agent_id=agent_id, + backend=backend, + profile_kind=profile_kind, + harness_daemon_name=bu_name, + runtime_dir=str(runtime_dir), + tmp_dir=str(tmp_dir), + download_dir=str(download_dir), + artifact_dir=str(artifact_dir), + profile_dir=str(profile_dir), + ) + + def _new_browser_id(self) -> str: + alphabet = "abcdefghijklmnopqrstuvwxyz0123456789" + for _ in range(100): + browser_id = "".join(secrets.choice(alphabet) for _ in range(6)) + if browser_id not in RESERVED_BROWSER_IDS and browser_id not in self.leases: + return browser_id + return secrets.token_hex(8) + + +def start_cloud_backend(lease: BrowserLease, proxy_country: str | None): + auth.get_browser_use_api_key() + body = {} + if proxy_country: + body["proxyCountryCode"] = proxy_country + browser = _browser_use("/browsers", "POST", body) + lease.cloud_browser_id = browser["id"] + lease.cloud_live_url = browser.get("liveUrl") + lease.cdp_url = browser["cdpUrl"] + if not lease.cloud_live_url: + try: + browser = _browser_use(f"/browsers/{lease.cloud_browser_id}", "GET") + lease.cloud_live_url = browser.get("liveUrl") + lease.cdp_url = browser.get("cdpUrl") or lease.cdp_url + except Exception: + pass + try: + start_harness_daemon(lease) + except BaseException: + stop_cloud_browser(lease.cloud_browser_id) + raise + + +def start_managed_backend(lease: BrowserLease): + browser = find_browser_binary() + if not browser: + raise RuntimeError("no Chrome/Chromium binary found; set BH_CHROME_PATH or CHROME_PATH") + port = allocate_port() + lease.cdp_url = f"http://127.0.0.1:{port}" + args = [ + browser, + f"--remote-debugging-port={port}", + f"--user-data-dir={lease.profile_dir}", + "--no-first-run", + "--no-default-browser-check", + "--disable-background-networking", + "--disable-dev-shm-usage", + "about:blank", + ] + headless = os.environ.get("BH_MANAGED_HEADLESS") == "1" or (not os.environ.get("DISPLAY") and not os.environ.get("WAYLAND_DISPLAY")) + if headless: + args.insert(-1, "--headless=new") + args.insert(-1, "--disable-gpu") + if os.environ.get("BH_CHROME_NO_SANDBOX") == "1": + args.insert(-1, "--no-sandbox") + proc = subprocess.Popen( + args, + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + **manager_runtime.spawn_kwargs(), + ) + lease.local_process_id = proc.pid + lease.local_debug_port = port + wait_devtools(port) + start_harness_daemon(lease) + + +def start_harness_daemon(lease: BrowserLease): + binding = context.BrowserBinding( + browser_id=lease.browser_id, + bu_name=lease.harness_daemon_name, + runtime_dir=Path(lease.runtime_dir), + tmp_dir=Path(lease.tmp_dir), + download_dir=Path(lease.download_dir), + artifact_dir=Path(lease.artifact_dir), + cdp_url=lease.cdp_url, + cdp_ws=lease.cdp_ws, + manager_mode=True, + ) + admin.ensure_daemon(wait=60.0, binding=binding) + + +def cleanup_backend(lease: BrowserLease): + binding = context.BrowserBinding( + browser_id=lease.browser_id, + bu_name=lease.harness_daemon_name, + runtime_dir=Path(lease.runtime_dir), + tmp_dir=Path(lease.tmp_dir), + manager_mode=True, + ) + try: + admin.restart_daemon(binding=binding) + except Exception: + pass + if lease.backend == "cloud" and lease.cloud_browser_id: + stop_cloud_browser(lease.cloud_browser_id) + if lease.backend == "managed" and lease.local_process_id: + try: + os.killpg(lease.local_process_id, 15) + except Exception: + try: + os.kill(lease.local_process_id, 15) + except Exception: + pass + for _ in range(25): + try: + os.kill(lease.local_process_id, 0) + except OSError: + return + time.sleep(0.2) + try: + os.killpg(lease.local_process_id, 9) + except Exception: + try: + os.kill(lease.local_process_id, 9) + except Exception: + pass + + +def _browser_use(path: str, method: str, body=None): + key = auth.get_browser_use_api_key() + req = urllib.request.Request( + f"{BU_API}{path}", + method=method, + data=(json.dumps(body).encode() if body is not None else None), + headers={"X-Browser-Use-API-Key": key, "Content-Type": "application/json"}, + ) + with urllib.request.urlopen(req, timeout=60) as resp: + return json.loads(resp.read() or b"{}") + + +def stop_cloud_browser(browser_id: str | None): + if not browser_id: + return + try: + _browser_use(f"/browsers/{browser_id}", "PATCH", {"action": "stop"}) + except Exception: + pass + + +def _browser_binary_usable(path: str) -> bool: + try: + if not os.path.isfile(path) or not os.access(path, os.X_OK): + return False + return subprocess.run( + [path, "--version"], + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + timeout=5, + ).returncode == 0 + except Exception: + return False + + +def find_browser_binary() -> str | None: + for key in ("BH_CHROME_PATH", "CHROME_PATH"): + value = os.environ.get(key) + if value: + return value + candidates = [] + for name in ("google-chrome-stable", "google-chrome", "chromium", "chromium-browser"): + path = shutil.which(name) + if path: + candidates.append(path) + if sys.platform == "darwin": + candidates.extend(MAC_BROWSER_PATHS) + for path in candidates: + if _browser_binary_usable(path): + return path + return None + + +def allocate_port() -> int: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + sock.bind(("127.0.0.1", 0)) + return sock.getsockname()[1] + finally: + sock.close() + + +def wait_devtools(port: int, timeout=20.0): + deadline = time.time() + timeout + last = None + while time.time() < deadline: + try: + with urllib.request.urlopen(f"http://127.0.0.1:{port}/json/version", timeout=2) as resp: + data = json.loads(resp.read() or b"{}") + if data.get("webSocketDebuggerUrl"): + return + except Exception as e: + last = e + time.sleep(0.2) + raise RuntimeError(f"Chrome DevTools did not become ready on port {port}: {last}") + + +def ready_public(lease: BrowserLease) -> dict: + state = { + "ok": True, + "ready": True, + "state": "ready", + "id": lease.browser_id, + "backend": public_backend(lease), + } + if lease.cloud_browser_id: + state["cloud_browser_id"] = lease.cloud_browser_id + if lease.cloud_live_url: + state["live_url"] = lease.cloud_live_url + return state + + +def ready_response(lease: BrowserLease) -> dict: + return {**ready_public(lease), "binding": lease.binding()} + + +def public_backend(lease: BrowserLease) -> str: + return "private" if lease.backend == "managed" else lease.backend + + +def error(state: str, reason: str, safe_actions: list[str]) -> dict: + return {"ok": False, "ready": False, "state": state, "reason": reason, "safe_actions": safe_actions} + + +def run_agent(req: dict) -> tuple[str, str]: + return sanitize(req.get("run_id") or "unknown-run"), sanitize(req.get("agent_id") or "unknown-agent") + + +def sanitize(value: str) -> str: + out = "".join(c for c in str(value) if c.isalnum() or c in "_-")[:64] + return out or "unknown" + + +def serve(socket_path: Path, root: Path): + global _server_token + manager_runtime.ensure_private_dir(root) + socket_path.parent.mkdir(parents=True, exist_ok=True) + manager = Manager(root) + if manager_runtime.IS_WINDOWS: + server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server.bind(("127.0.0.1", 0)) + _server_token = manager_runtime.new_token() + manager_runtime.write_private_json(socket_path, {"port": server.getsockname()[1], "token": _server_token}) + else: + try: + socket_path.unlink() + except FileNotFoundError: + pass + server = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + old_umask = os.umask(0o077) + try: + server.bind(str(socket_path)) + finally: + os.umask(old_umask) + os.chmod(socket_path, 0o600) + _server_token = None + server.listen(128) + server.settimeout(0.2) + stop = threading.Event() + print(f"browser-harness manager listening on {socket_path}", file=sys.stderr, flush=True) + try: + while not stop.is_set(): + try: + conn, _ = server.accept() + except socket.timeout: + continue + except OSError: + if stop.is_set(): + break + raise + threading.Thread(target=handle_conn, args=(manager, conn, stop), daemon=True).start() + finally: + server.close() + try: + socket_path.unlink() + except FileNotFoundError: + pass + + +def handle_conn(manager: Manager, conn: socket.socket, stop: threading.Event | None = None): + with conn: + try: + data = b"" + while not data.endswith(b"\n"): + chunk = conn.recv(1 << 16) + if not chunk: + break + data += chunk + if not data: + return + req = json.loads(data or b"{}") + if _server_token and req.get("token") != _server_token: + resp = error("forbidden", "invalid manager token", []) + elif req.get("meta") == "ping": + resp = {"pong": True, "pid": os.getpid()} + elif req.get("meta") == "shutdown": + resp = {"ok": True} + if stop: + stop.set() + else: + resp = manager.handle(req) + except Exception as e: + resp = error("bad-request", str(e), []) + conn.sendall((json.dumps(resp, default=str) + "\n").encode()) + + +def main(argv=None): + parser = argparse.ArgumentParser() + root = manager_runtime.default_root() + parser.add_argument("--socket", default=str(manager_runtime.default_endpoint(root))) + parser.add_argument("--root", default=str(root)) + args = parser.parse_args(argv) + serve(Path(args.socket), Path(args.root)) + + +if __name__ == "__main__": + main() diff --git a/src/browser_harness/manager_helpers.py b/src/browser_harness/manager_helpers.py new file mode 100644 index 00000000..c6505b05 --- /dev/null +++ b/src/browser_harness/manager_helpers.py @@ -0,0 +1,80 @@ +"""Model-visible browser lifecycle helpers.""" +from __future__ import annotations + +from . import context +from . import local_profiles +from . import manager_client + + +def browser_status(browser_id=None): + """Return lifecycle state for a browser id, or manager guidance if omitted.""" + return manager_client.status(browser_id) + + +def browser_profiles(verbose=False): + """List local Chrome/Chromium profiles for browser_use_profile(...).""" + return local_profiles.list_browser_profiles_payload(verbose=verbose) + + +def browser_use_profile(profile_id): + """Select the local browser profile future normal helper calls should use.""" + return local_profiles.use_browser_profile(profile_id) + + +def _manager_backend(kind, backend=None): + value = backend if backend is not None else kind + if value in (None, "private", "managed"): + return "managed" + if value == "cloud": + return "cloud" + raise ValueError("browser_new kind must be 'private' or 'cloud'") + + +def browser_new(kind="private", *, backend=None, profile="clean", proxy_country=None, reason=None): + """Create a managed browser and return its short id.""" + resp = manager_client.new_browser( + backend=_manager_backend(kind, backend), + profile=profile, + proxy_country=proxy_country, + reason=reason, + ) + return manager_client.public_state(resp) + + +def browser(browser_id): + """Select a managed browser id for this Python script.""" + resp = manager_client.switch_browser(browser_id) + binding = manager_client.binding_from_response(resp) + context.activate_binding(binding) + return manager_client.public_state(resp) + + +def browser_switch(browser_id): + """Compatibility alias for browser(id).""" + return browser(browser_id) + + +def browser_list(): + """List concise browser ids known to the manager.""" + return manager_client.list_browsers() + + +def browser_close(browser_id=None): + """Close a browser by explicit id.""" + if not browser_id: + raise ValueError("browser_close(id) requires a browser id") + active = context.get_active_binding() + closing_active = active and active.browser_id == browser_id + resp = manager_client.close_browser(browser_id) + if closing_active: + context.clear_active_binding() + return manager_client.public_state(resp) + + +def browser_close_owned(): + """Close managed browsers created by this agent identity.""" + active = context.get_active_binding() + resp = manager_client.close_owned_browsers() + if active and active.browser_id in set(resp.get("closed") or []): + context.clear_active_binding() + return manager_client.public_state(resp) diff --git a/src/browser_harness/manager_runtime.py b/src/browser_harness/manager_runtime.py new file mode 100644 index 00000000..afabb3ea --- /dev/null +++ b/src/browser_harness/manager_runtime.py @@ -0,0 +1,149 @@ +"""Runtime directory and IPC helpers for browser manager mode.""" +from __future__ import annotations + +from contextlib import contextmanager +import json +import os +from pathlib import Path +import secrets +import socket +import subprocess +import sys + +from . import paths + + +IS_WINDOWS = sys.platform == "win32" + + +def default_root() -> Path: + if os.environ.get("BH_MANAGER_ROOT"): + return Path(os.environ["BH_MANAGER_ROOT"]) + return paths.runtime_dir() / "manager" + + +def default_endpoint(root: Path | None = None) -> Path: + if os.environ.get("BH_MANAGER_SOCKET"): + return Path(os.environ["BH_MANAGER_SOCKET"]) + root = root or default_root() + return root / ("manager.port.json" if IS_WINDOWS else "manager.sock") + + +def ensure_private_dir(path: Path) -> None: + path.mkdir(parents=True, exist_ok=True, mode=0o700) + if IS_WINDOWS: + return + st = path.stat() + uid = os.getuid() + if st.st_uid != uid: + raise PermissionError(f"{path} is owned by uid {st.st_uid}, expected {uid}") + if st.st_mode & 0o077: + os.chmod(path, 0o700) + st = path.stat() + if st.st_mode & 0o077: + raise PermissionError(f"{path} must not be accessible by group/other") + + +def write_private_json(path: Path, data: dict) -> None: + ensure_private_dir(path.parent) + tmp = path.with_name(path.name + ".tmp") + flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC + fd = os.open(tmp, flags, 0o600) + try: + with os.fdopen(fd, "w") as f: + json.dump(data, f, indent=2) + except Exception: + try: + os.close(fd) + except OSError: + pass + raise + os.replace(tmp, path) + if not IS_WINDOWS: + os.chmod(path, 0o600) + + +def open_private_append(path: Path): + ensure_private_dir(path.parent) + fd = os.open(path, os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0o600) + return os.fdopen(fd, "ab") + + +@contextmanager +def start_lock(root: Path): + ensure_private_dir(root) + lock_path = root / "manager.start.lock" + fd = os.open(lock_path, os.O_RDWR | os.O_CREAT, 0o600) + with os.fdopen(fd, "a+b") as f: + if IS_WINDOWS: + import msvcrt + msvcrt.locking(f.fileno(), msvcrt.LK_LOCK, 1) + try: + yield + finally: + f.seek(0) + msvcrt.locking(f.fileno(), msvcrt.LK_UNLCK, 1) + else: + import fcntl + fcntl.flock(f.fileno(), fcntl.LOCK_EX) + try: + yield + finally: + fcntl.flock(f.fileno(), fcntl.LOCK_UN) + + +def spawn_kwargs() -> dict: + if IS_WINDOWS: + return { + "creationflags": subprocess.CREATE_NEW_PROCESS_GROUP | subprocess.CREATE_NO_WINDOW, + } + return {"start_new_session": True} + + +def connect(endpoint: Path, timeout: float = 1.0) -> tuple[socket.socket, str | None]: + if not IS_WINDOWS: + s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + s.settimeout(timeout) + s.connect(str(endpoint)) + return s, None + data = json.loads(endpoint.read_text()) + port = int(data["port"]) + token = str(data["token"]) + s = socket.create_connection(("127.0.0.1", port), timeout=timeout) + s.settimeout(timeout) + return s, token + + +def send_request(sock: socket.socket, token: str | None, req: dict) -> dict: + if token: + req = {**req, "token": token} + sock.sendall((json.dumps(req) + "\n").encode()) + data = b"" + while not data.endswith(b"\n"): + chunk = sock.recv(1 << 16) + if not chunk: + break + data += chunk + resp = json.loads(data or b"{}") + return resp if isinstance(resp, dict) else {"ok": False, "state": "bad-response"} + + +def ping(endpoint: Path, timeout: float = 0.2) -> bool: + try: + sock, token = connect(endpoint, timeout=timeout) + except (FileNotFoundError, ConnectionRefusedError, TimeoutError, socket.timeout, OSError, ValueError, KeyError, TypeError): + return False + try: + resp = send_request(sock, token, {"meta": "ping"}) + return resp.get("pong") is True + except (OSError, ValueError, AttributeError): + return False + finally: + try: + sock.close() + except OSError: + pass + + +def new_token() -> str: + return secrets.token_hex(32) diff --git a/src/browser_harness/paths.py b/src/browser_harness/paths.py new file mode 100644 index 00000000..f818c021 --- /dev/null +++ b/src/browser_harness/paths.py @@ -0,0 +1,43 @@ +"""browser-harness filesystem layout.""" +from __future__ import annotations + +import os +import sys +from pathlib import Path + + +def home_dir() -> Path: + raw = os.environ.get("BH_HOME") or os.environ.get("BROWSER_HARNESS_HOME") + if raw: + return Path(raw).expanduser().resolve() + base = os.environ.get("XDG_CONFIG_HOME") + if base: + return (Path(base).expanduser() / "browser-harness").resolve() + return (Path.home() / ".config" / "browser-harness").resolve() + + +def ensure_private_dir(path: Path) -> Path: + path.mkdir(parents=True, exist_ok=True) + if sys.platform != "win32": + os.chmod(path, 0o700) + return path + + +def config_dir() -> Path: + raw = os.environ.get("BH_CONFIG_DIR") + return ensure_private_dir(Path(raw).expanduser().resolve() if raw else home_dir()) + + +def runtime_dir() -> Path: + raw = os.environ.get("BH_RUNTIME_DIR") + return ensure_private_dir(Path(raw).expanduser().resolve() if raw else home_dir() / "runtime") + + +def tmp_dir() -> Path: + raw = os.environ.get("BH_TMP_DIR") + return ensure_private_dir(Path(raw).expanduser().resolve() if raw else home_dir() / "tmp") + + +def workspace_dir() -> Path: + raw = os.environ.get("BH_AGENT_WORKSPACE") + return ensure_private_dir(Path(raw).expanduser().resolve() if raw else home_dir() / "agent-workspace") diff --git a/src/browser_harness/run.py b/src/browser_harness/run.py index 8ab1f0f1..8ad438dd 100644 --- a/src/browser_harness/run.py +++ b/src/browser_harness/run.py @@ -1,4 +1,4 @@ -import os, sys, urllib.request +import ast, json, os, sys, urllib.request # Windows default stdout encoding is cp1252, which can't encode the 🐴 marker # helpers prepend to tab titles (or anything else outside Latin-1). Force UTF-8 @@ -14,6 +14,7 @@ ensure_daemon, list_cloud_profiles, list_local_profiles, + open_local_profile, print_update_banner, restart_daemon, run_doctor, @@ -22,8 +23,11 @@ start_remote_daemon, stop_remote_daemon, sync_local_profile, + use_local_profile, ) +from . import auth, context, telemetry from .helpers import * +from .manager_helpers import * HELP = """Browser Harness @@ -31,27 +35,121 @@ Typical usage: browser-harness <<'PY' + browser("abc123") ensure_real_tab() print(page_info()) PY Helpers are pre-imported. The daemon auto-starts and connects to the running browser. +Create a browser with browser_new("private") or browser_new("cloud"), then select it with browser(id). +For local Chrome setup, first choose a stable profile id with browser_profiles() and browser_use_profile(id). Commands: browser-harness --version print the installed version browser-harness --doctor diagnose install, daemon, and browser state browser-harness doctor same as --doctor browser-harness doctor --fix-snap print how to fix Snap Chromium blocking CDP (Linux) + browser-harness profiles list local Chrome/Chromium profiles without starting the daemon + browser-harness use-profile select a local profile without starting the daemon + browser-harness open-profile [id] open/focus a local profile without starting the daemon + browser-harness auth login sign in to Browser Use Cloud for cloud browsers + browser-harness auth login --device-code sign in from SSH/headless environments + browser-harness auth status show Browser Use Cloud auth state + browser-harness auth logout remove stored Browser Use Cloud auth + browser-harness skill print the browser-harness skill text + browser-harness telemetry status show anonymous telemetry opt-out state browser-harness --update [-y] pull the latest version (agents: pass -y) browser-harness --reload stop the daemon so next call picks up code changes """ USAGE = """Usage: browser-harness <<'PY' + browser("abc123") print(page_info()) PY + + browser-harness <<'PY' + print(browser_new("private")) + PY """ +_MANAGER_HELPER_NAMES = ( + "browser", + "browser_status", + "browser_new", + "browser_switch", + "browser_list", + "browser_close", + "browser_close_owned", +) + +_NO_DAEMON_HELPER_NAMES = { + "browser_profiles", + "browser_use_profile", + "list_local_profiles", + "use_local_profile", + "open_local_profile", + "list_cloud_profiles", + "sync_local_profile", + "start_remote_daemon", + "stop_remote_daemon", + "restart_daemon", +} + +_NO_DAEMON_WRAPPER_NAMES = { + "print", + "repr", + "str", + "bool", + "len", + "sorted", + "list", + "dict", + "tuple", + "set", +} + + +def _uses_manager_helpers(code: str) -> bool: + try: + tree = ast.parse(code) + except SyntaxError: + return False + for node in ast.walk(tree): + if not isinstance(node, ast.Call): + continue + func = node.func + if isinstance(func, ast.Name) and func.id in _MANAGER_HELPER_NAMES: + return True + return False + + +def _can_run_without_daemon(code: str) -> bool: + try: + tree = ast.parse(code) + except SyntaxError: + return False + saw_no_daemon_helper = False + for node in ast.walk(tree): + if not isinstance(node, ast.Call): + continue + func = node.func + if isinstance(func, ast.Name): + if func.id in _NO_DAEMON_HELPER_NAMES: + saw_no_daemon_helper = True + continue + if func.id in _NO_DAEMON_WRAPPER_NAMES: + continue + return False + if isinstance(func, ast.Attribute): + # Allow simple formatting around passive helper output, e.g. + # json.dumps(browser_profiles()). + if func.attr in {"dumps", "loads"}: + continue + return False + return False + return saw_no_daemon_helper + # Probe /json/version (not a bare TCP connect) so a non-Chrome process bound to # 9222/9223 doesn't masquerade as Chrome and skip the cloud bootstrap. Mirrors @@ -74,8 +172,40 @@ def _explicit_cdp_configured(): return bool(os.environ.get("BU_CDP_URL") or os.environ.get("BU_CDP_WS")) +def _print_json(value): + print(json.dumps(value, indent=2, default=str)) + + +def _print_skill(): + from importlib import resources + print(resources.files("browser_harness").joinpath("SKILL.md").read_text(), end="") + + +def _telemetry_command(args): + if not args: + return "script" + first = args[0] + if first in {"-h", "--help"}: + return "help" + if first == "--version": + return "version" + if first in {"--doctor", "doctor"}: + return "doctor" + if first == "--update": + return "update" + if first == "--reload": + return "reload" + if first == "--debug-clicks": + return "debug-clicks" + if first in {"profiles", "use-profile", "open-profile", "auth", "skill", "telemetry"}: + return first + return "usage" + + def main(): args = sys.argv[1:] + if not (args and args[0] == "telemetry"): + telemetry.capture("browser_harness.cli", {"command": _telemetry_command(args)}) if args and args[0] in {"-h", "--help"}: print(HELP) return @@ -92,6 +222,36 @@ def main(): print("usage: browser-harness doctor [--fix-snap]", file=sys.stderr) sys.exit(2) sys.exit(run_doctor()) + if args and args[0] == "profiles": + rest = args[1:] + verbose = rest == ["--verbose"] + if rest and not verbose: + print("usage: browser-harness profiles [--verbose]", file=sys.stderr) + sys.exit(2) + _print_json(browser_profiles(verbose=verbose)) + return + if args and args[0] == "use-profile": + if len(args) != 2: + print("usage: browser-harness use-profile ", file=sys.stderr) + sys.exit(2) + _print_json(browser_use_profile(args[1])) + return + if args and args[0] == "open-profile": + if len(args) > 2: + print("usage: browser-harness open-profile [profile-id]", file=sys.stderr) + sys.exit(2) + _print_json(open_local_profile(args[1] if len(args) == 2 else None, marker=False)) + return + if args and args[0] == "auth": + sys.exit(auth.run_auth_cli(args[1:])) + if args and args[0] == "skill": + if len(args) != 1: + print("usage: browser-harness skill", file=sys.stderr) + sys.exit(2) + _print_skill() + return + if args and args[0] == "telemetry": + sys.exit(telemetry.run_telemetry_cli(args[1:])) if args and args[0] == "--update": yes = any(a in {"-y", "--yes"} for a in args[1:]) sys.exit(run_update(yes=yes)) @@ -109,6 +269,15 @@ def main(): else: sys.exit(USAGE) print_update_banner() + if context.manager_enabled() or _uses_manager_helpers(code): + os.environ.setdefault("BH_MANAGER_MODE", "1") + if os.environ.get("BH_BROWSER_ID"): + browser_switch(os.environ["BH_BROWSER_ID"]) + else: + context.clear_active_binding() + exec(code, globals()) + return + # Auto-bootstrap a cloud browser is opt-in via BU_AUTOSPAWN — BROWSER_USE_API_KEY alone # is not enough, since the key is commonly set for unrelated reasons (profile sync, # cloud API calls, parent agents managing their own session). An explicit BU_CDP_URL @@ -121,7 +290,8 @@ def main(): and os.environ.get("BU_AUTOSPAWN") ): start_remote_daemon(NAME) - ensure_daemon() + if not _can_run_without_daemon(code): + ensure_daemon() exec(code, globals()) diff --git a/src/browser_harness/telemetry.py b/src/browser_harness/telemetry.py new file mode 100644 index 00000000..52a40aeb --- /dev/null +++ b/src/browser_harness/telemetry.py @@ -0,0 +1,184 @@ +"""Best-effort, opt-out telemetry for browser-harness. + +Only low-cardinality operational events are sent. Callers should pass categories, +states, and booleans, never URLs, selectors, page text, prompts, or credentials. +""" + +from __future__ import annotations + +import json +import os +import platform +import re +import urllib.request +import uuid +from importlib.metadata import PackageNotFoundError, version +from pathlib import Path + +from . import paths + + +POSTHOG_KEY = "phc_rCPCLPtaXB3EuBdiH7JLKtU2Wj5iPnuwdsbw58CnjYXc" +POSTHOG_HOST = "https://us.i.posthog.com" +DISABLE_ENVS = ("BH_TELEMETRY", "BROWSER_HARNESS_TELEMETRY") +FORBIDDEN_KEYS = ( + "api_key", + "content", + "cookie", + "email", + "href", + "key", + "message", + "password", + "path", + "prompt", + "query", + "secret", + "selector", + "text", + "title", + "token", + "url", + "uri", +) + + +def _config_dir() -> Path: + return paths.config_dir() + + +def _config_path() -> Path: + return _config_dir() / "telemetry.json" + + +def _load_config() -> dict: + try: + return json.loads(_config_path().read_text()) + except (FileNotFoundError, OSError, ValueError): + return {} + + +def _save_config(data: dict) -> None: + path = _config_path() + try: + path.parent.mkdir(parents=True, exist_ok=True) + if platform.system() != "Windows": + os.chmod(path.parent, 0o700) + path.write_text(json.dumps(data, indent=2, sort_keys=True) + "\n") + if platform.system() != "Windows": + os.chmod(path, 0o600) + except OSError: + pass + + +def _version() -> str: + try: + return version("browser-harness") + except PackageNotFoundError: + return "" + except Exception: + return "" + + +def _env_disabled() -> bool: + return any((os.environ.get(name) or "").lower() in {"0", "false", "no", "off"} for name in DISABLE_ENVS) + + +def _install_id(config: dict | None = None) -> str: + config = config if config is not None else _load_config() + raw = config.get("install_id") + if isinstance(raw, str) and re.fullmatch(r"[0-9a-f-]{32,36}", raw): + return raw + install_id = str(uuid.uuid4()) + _save_config({**config, "install_id": install_id}) + return install_id + + +def is_enabled() -> bool: + if _env_disabled(): + return False + return not bool(_load_config().get("disabled")) + + +def status() -> dict: + config = _load_config() + env_disabled = _env_disabled() + return { + "enabled": not env_disabled and not bool(config.get("disabled")), + "disabled_by_env": env_disabled, + "disabled_by_config": bool(config.get("disabled")), + "install_id": _install_id(config), + "config_path": str(_config_path()), + } + + +def set_enabled(enabled: bool) -> dict: + config = _load_config() + config["disabled"] = not enabled + _save_config(config) + return status() + + +def _safe_properties(properties: dict | None) -> dict: + out = {} + for key, value in (properties or {}).items(): + safe_key = re.sub(r"[^A-Za-z0-9_$.-]+", "_", str(key))[:80] + lowered = safe_key.lower() + if not safe_key or any(word in lowered for word in FORBIDDEN_KEYS): + continue + if isinstance(value, bool) or value is None: + out[safe_key] = value + elif isinstance(value, int | float): + out[safe_key] = value + else: + safe_value = str(value) + if "://" in safe_value: + safe_value = "[redacted]" + out[safe_key] = safe_value[:120] + return out + + +def capture(event: str, properties: dict | None = None) -> None: + if not is_enabled(): + return + try: + config = _load_config() + props = { + "browser_harness_version": _version() or "unknown", + "python_version": platform.python_version(), + "os": platform.system() or "unknown", + "machine": platform.machine() or "unknown", + "$process_person_profile": False, + **_safe_properties(properties), + } + payload = { + "api_key": POSTHOG_KEY, + "distinct_id": _install_id(config), + "event": event, + "properties": props, + } + data = json.dumps(payload).encode("utf-8") + host = os.environ.get("BH_POSTHOG_HOST", POSTHOG_HOST).rstrip("/") + req = urllib.request.Request( + f"{host}/i/v0/e/", + method="POST", + data=data, + headers={"Content-Type": "application/json", "User-Agent": "browser-harness"}, + ) + urllib.request.urlopen(req, timeout=float(os.environ.get("BH_TELEMETRY_TIMEOUT", "1"))).close() + except Exception: + return + + +def run_telemetry_cli(argv: list[str]) -> int: + if not argv or argv == ["status"]: + print(json.dumps(status(), indent=2)) + return 0 + if argv == ["disable"]: + print(json.dumps(set_enabled(False), indent=2)) + return 0 + if argv == ["enable"]: + print(json.dumps(set_enabled(True), indent=2)) + return 0 + print("usage: browser-harness telemetry [status|enable|disable]") + return 2