From 4cb5090de835314e1953eafc4a4928934ac81171 Mon Sep 17 00:00:00 2001 From: Kyle Cannon Date: Tue, 2 Jun 2026 13:37:50 -0700 Subject: [PATCH 1/2] perf(pg-protocol): encode length-prefixed strings in a single pass Add Writer.addInt32PrefixedString, which writes a value's Int32 byte-length prefix immediately followed by its UTF-8 bytes, computing Buffer.byteLength once. The previous `addInt32(Buffer.byteLength(s)).addString(s)` pairing scanned each string three times. Used for Bind parameter values and the SASL initial response. Wire output is byte-identical; addInt32/addString are unchanged for other callers. Benchmark (packages/pg-protocol/bench/write-bench.js, alternating-sampled vs base): bind(2 small) +16% bind(10 mixed) +23% bind(unicode) +14% full insert seq +9% Adds a multi-byte unicode bind unit test asserting the Int32 prefix equals the UTF-8 byte length, not the char length. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/pg-protocol/src/buffer-writer.ts | 20 +++++++++++++++++ .../src/outbound-serializer.test.ts | 22 +++++++++++++++++++ packages/pg-protocol/src/serializer.ts | 6 ++--- 3 files changed, 45 insertions(+), 3 deletions(-) diff --git a/packages/pg-protocol/src/buffer-writer.ts b/packages/pg-protocol/src/buffer-writer.ts index cebb0d9ed..d206f322a 100644 --- a/packages/pg-protocol/src/buffer-writer.ts +++ b/packages/pg-protocol/src/buffer-writer.ts @@ -58,6 +58,26 @@ export class Writer { return this } + // Write an Int32 byte-length prefix immediately followed by the string's UTF-8 + // bytes. Postgres' Bind wire format prefixes every parameter with its length, + // and doing it in one method computes Buffer.byteLength ONCE — the previous + // `addInt32(Buffer.byteLength(s)).addString(s)` pairing scanned the string + // three times (byteLength for the prefix, byteLength again inside addString, + // then the encode), which is costly for large text parameters. + public addInt32PrefixedString(string: string): Writer { + const len = Buffer.byteLength(string) + this.ensure(4 + len) + const buffer = this.buffer + let offset = this.offset + buffer[offset++] = (len >>> 24) & 0xff + buffer[offset++] = (len >>> 16) & 0xff + buffer[offset++] = (len >>> 8) & 0xff + buffer[offset++] = (len >>> 0) & 0xff + buffer.write(string, offset, 'utf-8') + this.offset = offset + len + return this + } + public add(otherBuffer: Buffer): Writer { this.ensure(otherBuffer.length) otherBuffer.copy(this.buffer, this.offset) diff --git a/packages/pg-protocol/src/outbound-serializer.test.ts b/packages/pg-protocol/src/outbound-serializer.test.ts index 0d3e387e4..7af46eba0 100644 --- a/packages/pg-protocol/src/outbound-serializer.test.ts +++ b/packages/pg-protocol/src/outbound-serializer.test.ts @@ -129,6 +129,28 @@ describe('serializer', () => { .join(true, 'B') assert.deepEqual(actual, expectedBuffer) }) + + it('encodes a multi-byte string param with its UTF-8 byte length, not char length', function () { + // Guards the single-pass addInt32PrefixedString write path: the Int32 + // length prefix must be the UTF-8 byte count, not String.length. 'héllo中🎉' + // is 7 code units / 8 chars but 13 UTF-8 bytes. + const value = 'héllo中🎉' + const bytes = Buffer.from(value, 'utf8') + assert.notEqual(bytes.length, value.length) // sanity: the divergence we're testing + const actual = serialize.bind({ values: [value] }) + const expectedBuffer = new BufferList() + .addCString('') // portal + .addCString('') // statement + .addInt16(1) // param format code count + .addInt16(0) // format code for the one value (text) + .addInt16(1) // value count + .addInt32(bytes.length) // 13 — the UTF-8 byte length, NOT value.length (8) + .add(bytes) + .addInt16(1) // result format code count + .addInt16(0) // result format (text) + .join(true, 'B') + assert.deepEqual(actual, expectedBuffer) + }) }) it('with custom valueMapper', function () { diff --git a/packages/pg-protocol/src/serializer.ts b/packages/pg-protocol/src/serializer.ts index bb0441f56..137daad79 100644 --- a/packages/pg-protocol/src/serializer.ts +++ b/packages/pg-protocol/src/serializer.ts @@ -48,7 +48,7 @@ const password = (password: string): Buffer => { const sendSASLInitialResponseMessage = function (mechanism: string, initialResponse: string): Buffer { // 0x70 = 'p' - writer.addCString(mechanism).addInt32(Buffer.byteLength(initialResponse)).addString(initialResponse) + writer.addCString(mechanism).addInt32PrefixedString(initialResponse) return writer.flush(code.startup) } @@ -135,8 +135,8 @@ const writeValues = function (values: any[], valueMapper?: ValueMapper): void { } else { // add the param type (string) to the writer writer.addInt16(ParamType.STRING) - paramWriter.addInt32(Buffer.byteLength(mappedVal)) - paramWriter.addString(mappedVal) + // length prefix + UTF-8 bytes in one pass (Buffer.byteLength computed once) + paramWriter.addInt32PrefixedString(mappedVal) } } } From f6864e86af3e157437dfbb867b8e917bdc4b7f99 Mon Sep 17 00:00:00 2001 From: Kyle Cannon Date: Wed, 3 Jun 2026 08:45:00 -0700 Subject: [PATCH 2/2] Update packages/pg-protocol/src/outbound-serializer.test.ts Co-authored-by: Charmander <~@charmander.me> --- packages/pg-protocol/src/outbound-serializer.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/pg-protocol/src/outbound-serializer.test.ts b/packages/pg-protocol/src/outbound-serializer.test.ts index 7af46eba0..856ead7b9 100644 --- a/packages/pg-protocol/src/outbound-serializer.test.ts +++ b/packages/pg-protocol/src/outbound-serializer.test.ts @@ -133,7 +133,7 @@ describe('serializer', () => { it('encodes a multi-byte string param with its UTF-8 byte length, not char length', function () { // Guards the single-pass addInt32PrefixedString write path: the Int32 // length prefix must be the UTF-8 byte count, not String.length. 'héllo中🎉' - // is 7 code units / 8 chars but 13 UTF-8 bytes. + // is 7 code points / 8 UTF-16 code units but 13 UTF-8 bytes. const value = 'héllo中🎉' const bytes = Buffer.from(value, 'utf8') assert.notEqual(bytes.length, value.length) // sanity: the divergence we're testing