From 29655f1b54ce58ca4d36c06205d520e9140a4d4a Mon Sep 17 00:00:00 2001 From: Jovenkemp <128696682+Jovenkemp@users.noreply.github.com> Date: Fri, 12 Jun 2026 12:17:49 +1000 Subject: [PATCH] Decode piped stdin as UTF-8 and strip any BOM Windows PowerShell 5.1 commonly prepends a UTF-8 BOM when piping text to a native command (its UTF8 $OutputEncoding emits a preamble, and files written by PS 5.1 carry BOMs that survive re-piping). sys.stdin.read() leaves that BOM in the source -- decoded as U+FEFF, or as mojibake under the locale code page -- so exec() fails with a SyntaxError on the first line of every piped script. Read raw bytes and decode with utf-8-sig instead: it strips one leading UTF-8 BOM when present and is byte-for-byte identical to utf-8 otherwise, so piped input from bash/cmd decodes exactly as before. Newline handling is unchanged too: compile() normalizes \r\n itself, including inside string literals. Side benefit: non-ASCII source now decodes as UTF-8 on Windows rather than the locale code page. UTF-16 stdin stays unsupported, matching CPython, which rejects UTF-16 source files outright; it now fails loudly at decode time instead of producing mojibake. Co-Authored-By: Claude Fable 5 --- src/browser_harness/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/browser_harness/run.py b/src/browser_harness/run.py index 8ab1f0f1..623b3e84 100644 --- a/src/browser_harness/run.py +++ b/src/browser_harness/run.py @@ -103,7 +103,7 @@ def main(): os.environ["BH_DEBUG_CLICKS"] = "1" args = args[1:] if not args and not sys.stdin.isatty(): - code = sys.stdin.read() + code = sys.stdin.buffer.read().decode('utf-8-sig') # utf-8-sig strips BOM if present (PowerShell 5.1 compat) if not code.strip(): sys.exit(USAGE) else: