diff --git a/accounts/accounts.go b/accounts/accounts.go index 6d46a0f76c..2a81266f5e 100644 --- a/accounts/accounts.go +++ b/accounts/accounts.go @@ -37,11 +37,12 @@ type Account struct { } const ( - MimetypeDataWithValidator = "data/validator" - MimetypeTypedData = "data/typed" - MimetypeClique = "application/x-clique-header" - MimetypeBor = "application/x-bor-header" - MimetypeTextPlain = "text/plain" + MimetypeDataWithValidator = "data/validator" + MimetypeTypedData = "data/typed" + MimetypeClique = "application/x-clique-header" + MimetypeBor = "application/x-bor-header" + MimetypeBorWitnessAnnounce = "application/x-bor-wit2-announce" + MimetypeTextPlain = "text/plain" ) // Wallet represents a software or hardware wallet that might contain one or more diff --git a/consensus/bor/bor.go b/consensus/bor/bor.go index 242ebc9bfd..bca6f98d65 100644 --- a/consensus/bor/bor.go +++ b/consensus/bor/bor.go @@ -1510,6 +1510,42 @@ func Sign(signFn SignerFn, signer common.Address, header *types.Header, c *param return nil } +// SignBytes signs the supplied preimage bytes under a context-specific +// mimetype using the engine's currently authorized signer. The mimetype is the +// domain tag the underlying signer (clef, keystore) sees, so callers MUST pass +// a context-specific value (e.g. accounts.MimetypeBorWitnessAnnounce) and +// never reuse accounts.MimetypeBor outside of header sealing — that would let +// a signature produced here be replayed as a block-seal signature on any +// header BorRLP that hashes to the same digest. +// +// Callers pass the unhashed preimage; the wallet's SignData implementation +// applies keccak256 once before signing. Verifiers must independently hash +// the same preimage and ecrecover against the resulting digest. +func (c *Bor) SignBytes(mimetype string, digest []byte) (signer common.Address, sig []byte, err error) { + if mimetype == "" || mimetype == accounts.MimetypeBor { + return common.Address{}, nil, errors.New("bor: SignBytes requires a non-empty, non-header mimetype") + } + current := c.authorizedSigner.Load() + if current == nil || current.signer == (common.Address{}) { + return common.Address{}, nil, errors.New("bor: no authorized signer configured") + } + sig, err = current.signFn(accounts.Account{Address: current.signer}, mimetype, digest) + if err != nil { + return common.Address{}, nil, err + } + return current.signer, sig, nil +} + +// CurrentSigner returns the address of the currently authorized signer, or +// the zero address if none has been configured. +func (c *Bor) CurrentSigner() common.Address { + current := c.authorizedSigner.Load() + if current == nil { + return common.Address{} + } + return current.signer +} + // CalcDifficulty is the difficulty adjustment algorithm. It returns the difficulty // that a new block should have based on the previous blocks in the chain and the // current signer. diff --git a/consensus/bor/signbytes_test.go b/consensus/bor/signbytes_test.go new file mode 100644 index 0000000000..bd7b2992b8 --- /dev/null +++ b/consensus/bor/signbytes_test.go @@ -0,0 +1,70 @@ +package bor + +import ( + "bytes" + "testing" + + "github.com/ethereum/go-ethereum/accounts" + "github.com/ethereum/go-ethereum/common" +) + +// TestSignBytesForwardsMimetype is the regression for the wit2 announce +// signing path's external-signer compatibility: bor.SignBytes must hand the +// caller-supplied mimetype to the configured signer untouched. Operators +// configuring Clef whitelist a specific string ("application/x-bor-wit2- +// announce"); if SignBytes ever rewrote, lower-cased, or stripped that, the +// signer would either reject the request or sign under a different domain. +// +// The test captures the (mimetype, payload) the wallet sees and asserts both +// match exactly what the caller passed. +func TestSignBytesForwardsMimetype(t *testing.T) { + bor := &Bor{} + addr := common.HexToAddress("0x1234") + + var ( + gotMimetype string + gotPayload []byte + ) + bor.Authorize(addr, func(_ accounts.Account, mimetype string, data []byte) ([]byte, error) { + gotMimetype = mimetype + gotPayload = append([]byte(nil), data...) + return make([]byte, 65), nil + }) + + preimage := []byte("wit2-announce-preimage") + signer, sig, err := bor.SignBytes(accounts.MimetypeBorWitnessAnnounce, preimage) + if err != nil { + t.Fatalf("SignBytes: %v", err) + } + if signer != addr { + t.Fatalf("signer addr mismatch: got %s want %s", signer, addr) + } + if len(sig) != 65 { + t.Fatalf("expected 65-byte signature, got %d", len(sig)) + } + if gotMimetype != accounts.MimetypeBorWitnessAnnounce { + t.Fatalf("mimetype not forwarded literally: got %q want %q", + gotMimetype, accounts.MimetypeBorWitnessAnnounce) + } + if !bytes.Equal(gotPayload, preimage) { + t.Fatalf("payload not forwarded literally: got %x want %x", gotPayload, preimage) + } +} + +// TestSignBytesRejectsHeaderMimetype guards against accidental cross-context +// reuse: callers must never pass MimetypeBor (header sealing) into SignBytes, +// since that would let an announce signature replay as a block-seal. +func TestSignBytesRejectsHeaderMimetype(t *testing.T) { + bor := &Bor{} + bor.Authorize(common.HexToAddress("0x1234"), func(accounts.Account, string, []byte) ([]byte, error) { + t.Fatal("signFn must not be reached for rejected mimetype") + return nil, nil + }) + + if _, _, err := bor.SignBytes("", []byte{0x01}); err == nil { + t.Fatal("empty mimetype must be rejected") + } + if _, _, err := bor.SignBytes(accounts.MimetypeBor, []byte{0x01}); err == nil { + t.Fatal("MimetypeBor must be rejected to prevent header-seal replay") + } +} diff --git a/core/stateless/encoding.go b/core/stateless/encoding.go index e955b9c962..09f7d389e7 100644 --- a/core/stateless/encoding.go +++ b/core/stateless/encoding.go @@ -17,7 +17,9 @@ package stateless import ( + "bytes" "io" + "sort" "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethereum/go-ethereum/core/types" @@ -84,19 +86,29 @@ func (w *Witness) fromExtWitness(ext *ExtWitness) error { // EncodeRLP serializes a witness as RLP using the canonical BorWitness 3-field // format. Only state trie nodes are encoded; contract bytecodes are not // included in the wire format. +// +// State entries are sorted lexicographically before encoding so the output is +// byte-identical for any two witnesses with the same logical contents. Without +// this, Go's randomized map iteration would produce different bytes per call, +// breaking any code that hashes the encoded witness for content addressing — +// notably the WIT2 BP-signed witness hash, which is computed by both producer +// and verifiers and must match exactly. func (w *Witness) EncodeRLP(wr io.Writer) error { w.lock.RLock() defer w.lock.RUnlock() - bw := &BorWitness{ - Context: w.context, - Headers: w.Headers, - State: make([][]byte, 0, len(w.State)), - } + state := make([][]byte, 0, len(w.State)) for node := range w.State { - bw.State = append(bw.State, []byte(node)) + state = append(state, []byte(node)) } - return rlp.Encode(wr, bw) + sort.Slice(state, func(i, j int) bool { + return bytes.Compare(state[i], state[j]) < 0 + }) + return rlp.Encode(wr, &BorWitness{ + Context: w.context, + Headers: w.Headers, + State: state, + }) } // DecodeRLP decodes a witness from RLP. It first attempts the canonical diff --git a/core/stateless/encoding_test.go b/core/stateless/encoding_test.go index eded87b541..1327587fff 100644 --- a/core/stateless/encoding_test.go +++ b/core/stateless/encoding_test.go @@ -180,3 +180,62 @@ func TestRoundtrip_BorWitnessFormat(t *testing.T) { t.Errorf("Codes should be empty after BorWitness roundtrip, got %d", len(decoded.Codes)) } } + +// TestEncodeRLP_DeterministicAcrossInsertionOrder is the regression test for +// the WIT2 byte-blame model. State entries arrive via a Go map, whose +// iteration order is randomised, so without sorting in EncodeRLP two +// witnesses with identical logical content would encode to different bytes +// and hash differently. Receivers verifying response bytes against the BP- +// signed witness hash would falsely drop honest peers. +func TestEncodeRLP_DeterministicAcrossInsertionOrder(t *testing.T) { + const N = 64 + nodes := make([][]byte, N) + for i := 0; i < N; i++ { + nodes[i] = []byte{byte(i), byte(i ^ 0x5a), byte(i ^ 0xa5)} + } + + makeWitness := func(insertionOrder []int) *Witness { + w := &Witness{ + Headers: []*types.Header{{Number: big.NewInt(1)}}, + Codes: make(map[string]struct{}), + State: make(map[string]struct{}, len(insertionOrder)), + } + w.context = &types.Header{Number: big.NewInt(2)} + for _, i := range insertionOrder { + w.State[string(nodes[i])] = struct{}{} + } + return w + } + + encode := func(w *Witness) []byte { + raw, err := rlp.EncodeToBytes(w) + if err != nil { + t.Fatalf("encode: %v", err) + } + return raw + } + + forward := make([]int, N) + for i := range forward { + forward[i] = i + } + reverse := make([]int, N) + for i := range reverse { + reverse[i] = N - 1 - i + } + + wForward := makeWitness(forward) + wReverse := makeWitness(reverse) + if got, want := encode(wForward), encode(wReverse); string(got) != string(want) { + t.Fatalf("EncodeRLP must be deterministic across map insertion orders; got divergent bytes (%d vs %d)", len(got), len(want)) + } + + // Re-encoding the same witness multiple times must also yield identical + // bytes, even though Go map iteration is fresh each call. + first := encode(wForward) + for i := 0; i < 5; i++ { + if string(encode(wForward)) != string(first) { + t.Fatalf("repeat encode call %d differs from first", i) + } + } +} diff --git a/core/stateless/witness_bench_test.go b/core/stateless/witness_bench_test.go new file mode 100644 index 0000000000..caeedd79a5 --- /dev/null +++ b/core/stateless/witness_bench_test.go @@ -0,0 +1,110 @@ +package stateless + +import ( + "crypto/rand" + "fmt" + "testing" + + "github.com/ethereum/go-ethereum/crypto" +) + +// BenchmarkWitnessEncodeRLP measures the cost of EncodeRLP, which sorts +// state nodes lexicographically before serialization. Surfaces regressions if +// the comparator changes (e.g. swapping bytes.Compare for an allocating +// alternative). Synthetic 50 MiB witness with realistic node sizes. +func BenchmarkWitnessEncodeRLP(b *testing.B) { + for _, sizeMiB := range []int{1, 15, 50} { + w := buildSyntheticWitness(sizeMiB<<20, 256) + b.Run(fmt.Sprintf("%dMiB", sizeMiB), func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := w.EncodeRLP(discardWriter{}); err != nil { + b.Fatalf("encode: %v", err) + } + } + }) + } +} + +type discardWriter struct{} + +func (discardWriter) Write(p []byte) (int, error) { return len(p), nil } + +// BenchmarkWitnessKeccakBySize measures the throughput of keccak256 over a +// pre-allocated witness-sized buffer. This is the cost the producer pays to +// compute WitnessHash on the WIT2 announce path (and the cost a relayer or +// requester pays to verify response bytes against the BP-signed WitnessHash). +// +// Run with `go test -bench=BenchmarkWitnessKeccakBySize ./core/stateless/`. +// b.SetBytes lets `go test -benchmem` print throughput in MB/s alongside ns/op, +// which is what we actually want to know — the absolute size of any one +// witness varies, but per-byte cost scales linearly. +func BenchmarkWitnessKeccakBySize(b *testing.B) { + for _, sizeMiB := range []int{1, 5, 15, 30, 50} { + size := sizeMiB << 20 + buf := make([]byte, size) + if _, err := rand.Read(buf); err != nil { + b.Fatalf("rand: %v", err) + } + b.Run(fmt.Sprintf("%dMiB", sizeMiB), func(b *testing.B) { + b.SetBytes(int64(size)) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = crypto.Keccak256Hash(buf) + } + }) + } +} + +// BenchmarkWitnessAnnounceSign measures the marginal ECDSA cost of signing the +// 32-byte announcement digest, independent of witness size. This isolates the +// secp256k1 sign cost from the keccak cost so a single number per platform is +// directly comparable to libsecp256k1 microbenchmarks. +func BenchmarkWitnessAnnounceSign(b *testing.B) { + key, err := crypto.GenerateKey() + if err != nil { + b.Fatalf("key: %v", err) + } + digest := make([]byte, 32) + if _, err := rand.Read(digest); err != nil { + b.Fatalf("rand: %v", err) + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + if _, err := crypto.Sign(digest, key); err != nil { + b.Fatalf("sign: %v", err) + } + } +} + +// BenchmarkWitnessHashAndSignCombined measures the realistic producer-side +// cost of the WIT2 announce path: keccak256 over witness bytes followed by +// ECDSA sign over the (small) signing digest. This is the latency the BP +// adds before emitting a signed announce. Compare against the ~500ms-per-hop +// savings: as long as this stays well under the savings, the change is a +// net win even at 50 MiB witnesses. +func BenchmarkWitnessHashAndSignCombined(b *testing.B) { + key, err := crypto.GenerateKey() + if err != nil { + b.Fatalf("key: %v", err) + } + for _, sizeMiB := range []int{1, 5, 15, 30, 50} { + size := sizeMiB << 20 + buf := make([]byte, size) + if _, err := rand.Read(buf); err != nil { + b.Fatalf("rand: %v", err) + } + b.Run(fmt.Sprintf("%dMiB", sizeMiB), func(b *testing.B) { + b.SetBytes(int64(size)) + b.ResetTimer() + for i := 0; i < b.N; i++ { + witnessHash := crypto.Keccak256Hash(buf) + digest := crypto.Keccak256Hash(witnessHash[:], []byte{0x01, 0x02, 0x03, 0x04}) + if _, err := crypto.Sign(digest[:], key); err != nil { + b.Fatalf("sign: %v", err) + } + } + }) + } +} diff --git a/core/stateless/witness_commit.go b/core/stateless/witness_commit.go new file mode 100644 index 0000000000..4fe42cb0b4 --- /dev/null +++ b/core/stateless/witness_commit.go @@ -0,0 +1,116 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package stateless + +import ( + "bytes" + "runtime" + "sync" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" +) + +// WitnessCommitChunkBytes is the protocol-fixed chunk size for the WIT2 +// witness commitment. Producer and verifier MUST agree on this constant. +// Changing it changes the meaning of every WitnessHash on the wire. +const WitnessCommitChunkBytes = 1 << 20 // 1 MiB + +// witnessCommitMaxWorkers caps the keccak fan-out. The chosen value reflects +// the bench finding on Apple M4 Pro that 8 P-cores saturate the keccak +// primitive; over-subscribing onto E-cores doesn't add throughput. +const witnessCommitMaxWorkers = 8 + +// WitnessCommitHash returns the WIT2 witness commitment over the canonical +// RLP encoding of a witness: keccak256 of the concatenation of chunk hashes, +// where each chunk is keccak256 over a WitnessCommitChunkBytes-sized window +// of rlpBytes. The output is invariant in worker count — only the input +// bytes and the chunk-size constant determine the result, so producer and +// verifier always agree byte-for-byte regardless of GOMAXPROCS. +// +// Empty input returns the zero hash, distinct from keccak256("") so empty +// witnesses are unambiguously identified across the protocol. +func WitnessCommitHash(rlpBytes []byte) common.Hash { + if len(rlpBytes) == 0 { + return common.Hash{} + } + chunks := splitWitnessChunks(rlpBytes, WitnessCommitChunkBytes) + chunkHashes := make([]common.Hash, len(chunks)) + + // Single-chunk inputs (≤1 MiB) skip the goroutine pool — the fan-out cost + // would dominate the keccak. + if len(chunks) == 1 { + chunkHashes[0] = crypto.Keccak256Hash(chunks[0]) + } else { + workers := runtime.GOMAXPROCS(0) + if workers > witnessCommitMaxWorkers { + workers = witnessCommitMaxWorkers + } + if workers > len(chunks) { + workers = len(chunks) + } + if workers < 1 { + workers = 1 + } + var wg sync.WaitGroup + work := make(chan int, len(chunks)) + for w := 0; w < workers; w++ { + wg.Add(1) + go func() { + defer wg.Done() + for i := range work { + chunkHashes[i] = crypto.Keccak256Hash(chunks[i]) + } + }() + } + for i := range chunks { + work <- i + } + close(work) + wg.Wait() + } + + concat := make([]byte, 0, len(chunkHashes)*common.HashLength) + for _, h := range chunkHashes { + concat = append(concat, h[:]...) + } + return crypto.Keccak256Hash(concat) +} + +// WitnessCommitHashFromWitness encodes a witness with the canonical sorted +// EncodeRLP and returns its WitnessCommitHash. Callers that already have +// canonical RLP bytes should use WitnessCommitHash directly to skip the +// re-encoding cost. +func WitnessCommitHashFromWitness(w *Witness) (common.Hash, error) { + var buf bytes.Buffer + if err := w.EncodeRLP(&buf); err != nil { + return common.Hash{}, err + } + return WitnessCommitHash(buf.Bytes()), nil +} + +func splitWitnessChunks(buf []byte, chunkSize int) [][]byte { + out := make([][]byte, 0, (len(buf)+chunkSize-1)/chunkSize) + for i := 0; i < len(buf); i += chunkSize { + end := i + chunkSize + if end > len(buf) { + end = len(buf) + } + out = append(out, buf[i:end]) + } + return out +} diff --git a/core/stateless/witness_commit_bench_test.go b/core/stateless/witness_commit_bench_test.go new file mode 100644 index 0000000000..c27796f522 --- /dev/null +++ b/core/stateless/witness_commit_bench_test.go @@ -0,0 +1,290 @@ +package stateless + +import ( + "bytes" + "crypto/ecdsa" + "fmt" + "sort" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" +) + +// Witness sizes the bench iterates. Mirrors the approved plan's matrix. +var benchSizesMiB = []int{1, 5, 15, 30, 50} + +// Core counts for the parallel candidates. cores=1 lets us see the +// single-thread baseline directly inside the same matrix; 8 reflects modern +// validator/relayer hardware. +var benchCores = []int{1, 2, 4, 8} + +// preparedWitness holds an already-built synthetic witness alongside its +// canonical encoded bytes and root hash, so each Benchmark sub-run pays +// the construction cost once outside the timed loop. +type preparedWitness struct { + w *Witness + rlpBytes []byte + // rootForD: a synthetic "state root" the intrinsic walk starts from. + // Picked deterministically from the witness's set so D's positive + // path resolves; without an MPT we can't reconstruct a real root, and + // the bench cares about per-node keccak throughput + walk cost shape. + rootForD common.Hash +} + +func prepareWitness(b *testing.B, sizeMiB int) preparedWitness { + b.Helper() + w := buildSyntheticWitness(sizeMiB<<20, 256) + var buf bytes.Buffer + if err := w.EncodeRLP(&buf); err != nil { + b.Fatalf("encode: %v", err) + } + rlpBytes := buf.Bytes() + // Pick the lex-smallest node-hash as the synthetic root for D so the + // walk has a definite entry point. Realistic verifier uses + // header.StateRoot; the hash we pick is functionally equivalent for + // timing purposes. + hashes := make([]common.Hash, 0, len(w.State)) + for n := range w.State { + hashes = append(hashes, crypto.Keccak256Hash([]byte(n))) + } + sort.Slice(hashes, func(i, j int) bool { + return string(hashes[i][:]) < string(hashes[j][:]) + }) + var root common.Hash + if len(hashes) > 0 { + root = hashes[0] + } + return preparedWitness{w: w, rlpBytes: rlpBytes, rootForD: root} +} + +// BenchmarkCommit_A_BlobKeccak — current baseline. Single-threaded keccak +// over the canonical RLP encoding. +func BenchmarkCommit_A_BlobKeccak(b *testing.B) { + for _, mib := range benchSizesMiB { + pw := prepareWitness(b, mib) + b.Run(fmt.Sprintf("%dMiB", mib), func(b *testing.B) { + b.SetBytes(int64(len(pw.rlpBytes))) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = candidateA_BlobKeccak(pw.rlpBytes) + } + }) + } +} + +// BenchmarkCommit_B_PageParallel — page-aligned (15 MiB) parallel keccak, +// aggregate via concat+keccak. cores=K parallelism. +func BenchmarkCommit_B_PageParallel(b *testing.B) { + for _, mib := range benchSizesMiB { + pw := prepareWitness(b, mib) + for _, cores := range benchCores { + b.Run(fmt.Sprintf("%dMiB/cores=%d", mib, cores), func(b *testing.B) { + b.SetBytes(int64(len(pw.rlpBytes))) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = candidateB_PageParallel(pw.rlpBytes, cores) + } + }) + } + } +} + +// BenchmarkCommit_C_PerNodeMerkle — per-node hash + sort + Merkle build. +// Includes node hashing in the timed region so this is the verifier-side +// cost. The producer-only cost is captured separately below. +func BenchmarkCommit_C_PerNodeMerkle(b *testing.B) { + for _, mib := range benchSizesMiB { + pw := prepareWitness(b, mib) + for _, cores := range benchCores { + b.Run(fmt.Sprintf("%dMiB/cores=%d", mib, cores), func(b *testing.B) { + b.SetBytes(int64(len(pw.rlpBytes))) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = candidateC_PerNodeMerkle(pw.w, cores) + } + }) + } + } +} + +// BenchmarkCommit_B_ChunkSize sweeps chunk size for B while holding +// cores=8. Answers "is 15 MiB the right page size for parallelism, or +// would smaller chunks win?". Pinned to 50 MiB because that's where the +// answer matters; smaller witnesses don't have headroom to split. +func BenchmarkCommit_B_ChunkSize(b *testing.B) { + pw := prepareWitness(b, 50) + chunks := []int{ + 512 * 1024, // 512 KiB + 1 * 1024 * 1024, // 1 MiB + 2 * 1024 * 1024, // 2 MiB + 4 * 1024 * 1024, // 4 MiB + 8 * 1024 * 1024, // 8 MiB + 15 * 1024 * 1024, // 15 MiB (current wire page) + } + for _, c := range chunks { + b.Run(fmt.Sprintf("chunk=%dKiB/cores=8", c>>10), func(b *testing.B) { + b.SetBytes(int64(len(pw.rlpBytes))) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = candidateB_PageParallelChunked(pw.rlpBytes, c, 8) + } + }) + } + // Also try cores=12 (all logical cores) at the smallest chunks to + // see if the M4 Pro's E-cores help at finer granularity. + for _, c := range []int{512 * 1024, 1 * 1024 * 1024, 2 * 1024 * 1024} { + b.Run(fmt.Sprintf("chunk=%dKiB/cores=12", c>>10), func(b *testing.B) { + b.SetBytes(int64(len(pw.rlpBytes))) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = candidateB_PageParallelChunked(pw.rlpBytes, c, 12) + } + }) + } +} + +// BenchmarkProducerSign_C_ZeroCost — producer's incremental work +// post-execution: sort N hashes + Merkle build + ECDSA sign. Validates +// the "zero hashing cost on producer" claim by feeding precomputed hashes. +func BenchmarkProducerSign_C_ZeroCost(b *testing.B) { + key, err := crypto.GenerateKey() + if err != nil { + b.Fatalf("key: %v", err) + } + for _, mib := range benchSizesMiB { + pw := prepareWitness(b, mib) + // Pre-hash & pre-sort the node set so the timed region only + // includes Merkle build and ECDSA sign (the two pieces the + // producer would actually pay). + hashes := make([]common.Hash, 0, len(pw.w.State)) + for n := range pw.w.State { + hashes = append(hashes, crypto.Keccak256Hash([]byte(n))) + } + sort.Slice(hashes, func(i, j int) bool { + return string(hashes[i][:]) < string(hashes[j][:]) + }) + b.Run(fmt.Sprintf("%dMiB", mib), func(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + root := candidateC_ProducerOnly(hashes) + if _, err := signECDSA(key, root[:]); err != nil { + b.Fatalf("sign: %v", err) + } + } + }) + } +} + +// BenchmarkVerify_D_IntrinsicHashAll — D's verifier-side incremental cost +// over chain-prep baseline: parallel per-node keccak. The reachability +// walk and map build are amortized into MakeHashDB in production and are +// asymptotically negligible vs the keccak phase, so we exclude them here +// to avoid measuring noise. Producer cost for D is exactly zero (header +// is already signed; no separate WitnessHash signature exists). +func BenchmarkVerify_D_IntrinsicHashAll(b *testing.B) { + for _, mib := range benchSizesMiB { + pw := prepareWitness(b, mib) + for _, cores := range benchCores { + b.Run(fmt.Sprintf("%dMiB/cores=%d", mib, cores), func(b *testing.B) { + b.SetBytes(int64(len(pw.rlpBytes))) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + candidateD_HashAll(pw.w, cores) + } + }) + } + } +} + +func signECDSA(key *ecdsa.PrivateKey, digest []byte) ([]byte, error) { + return crypto.Sign(digest, key) +} + +// ---------------------------------------------------------------------------- +// Correctness checks (Test*) for B/C/D so the bench numbers reflect +// implementations that actually do the right thing. +// ---------------------------------------------------------------------------- + +// TestCandidateB_PageAggregateDeterministic guards the determinism property +// the bench depends on: two runs over identical input produce identical +// aggregate hashes. Without this, the bench number for B would be +// meaningless. +func TestCandidateB_PageAggregateDeterministic(t *testing.T) { + in := bytes.Repeat([]byte{0xab}, 20<<20) // 20 MiB → 2 pages at 15 MiB + a := candidateB_PageParallel(in, 4) + bb := candidateB_PageParallel(in, 4) + if a != bb { + t.Fatalf("B is non-deterministic across runs: %s vs %s", a.Hex(), bb.Hex()) + } +} + +// TestCandidateC_OrderInvariant guards the property that motivates C: the +// Merkle root over sorted node hashes is invariant under map iteration +// order. Build a Witness, hash it, mutate insertion order via fresh map, +// hash again, must match. +func TestCandidateC_OrderInvariant(t *testing.T) { + w := buildSyntheticWitness(2<<20, 512) + root1 := candidateC_PerNodeMerkle(w, 1) + + // Rebuild with the same node set but different insertion order. + nodes := make([][]byte, 0, len(w.State)) + for n := range w.State { + nodes = append(nodes, []byte(n)) + } + w2 := &Witness{Codes: make(map[string]struct{}), State: make(map[string]struct{})} + w2.Headers = w.Headers + w2.context = w.context + for i := len(nodes) - 1; i >= 0; i-- { + w2.State[string(nodes[i])] = struct{}{} + } + root2 := candidateC_PerNodeMerkle(w2, 1) + if root1 != root2 { + t.Fatalf("C is order-sensitive: %s vs %s", root1.Hex(), root2.Hex()) + } +} + +// TestCandidateD_DetectsMissingNode guards D's load-bearing property: a +// witness missing a referenced node fails the walk. Without this, D would +// silently accept incomplete witnesses, defeating the byte-blame-pre- +// execute argument. +// +// We build a tiny tree manually: node A embeds keccak(B); node B embeds +// keccak(C); C is a leaf. Walking from keccak(A) succeeds. Deleting B +// from the witness must make the walk fail. +func TestCandidateD_DetectsMissingNode(t *testing.T) { + leafC := []byte("leaf-payload-C-padded-to-some-bytes-xyz") + hashC := crypto.Keccak256Hash(leafC) + + nodeB := append([]byte("node-B-prefix-padding-"), hashC[:]...) + hashB := crypto.Keccak256Hash(nodeB) + + nodeA := append([]byte("node-A-prefix-padding-"), hashB[:]...) + hashA := crypto.Keccak256Hash(nodeA) + + w := &Witness{ + Codes: make(map[string]struct{}), + State: map[string]struct{}{ + string(nodeA): {}, + string(nodeB): {}, + string(leafC): {}, + }, + } + if !candidateD_IntrinsicWalk(w, hashA, 1) { + t.Fatal("baseline walk failed; the manual A→B→C chain is malformed") + } + + // Drop B; the walk from A must fail because A's reference to B + // dangles. + delete(w.State, string(nodeB)) + if candidateD_IntrinsicWalk(w, hashA, 1) { + t.Fatal("D accepted a witness missing a referenced node; byte-blame-pre-execute is broken") + } +} diff --git a/core/stateless/witness_commit_helpers_test.go b/core/stateless/witness_commit_helpers_test.go new file mode 100644 index 0000000000..c7ea372c47 --- /dev/null +++ b/core/stateless/witness_commit_helpers_test.go @@ -0,0 +1,326 @@ +package stateless + +import ( + "crypto/rand" + "encoding/binary" + "math/big" + "sort" + "sync" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" +) + +// All identifiers in this file are _test.go-scoped and exist only to drive +// the witness-commit benchmarks. Nothing here is referenced from production +// code; the file is throwaway-friendly per the research-only plan. + +// buildSyntheticWitness constructs a Witness whose canonical EncodeRLP +// output is approximately targetBytes. It populates State with random byte +// blobs of size avgNodeBytes, mimicking how MPT trie nodes accumulate during +// execution. Headers + context carry minimal valid data so EncodeRLP / +// DecodeRLP round-trip without errors; the bench cares about state-bytes +// throughput, not header layout. +func buildSyntheticWitness(targetBytes, avgNodeBytes int) *Witness { + if avgNodeBytes <= 0 { + avgNodeBytes = 256 + } + w := &Witness{ + context: &types.Header{Number: big.NewInt(1)}, + Headers: []*types.Header{{Number: big.NewInt(0)}}, + Codes: make(map[string]struct{}), + State: make(map[string]struct{}), + } + nodeCount := targetBytes / avgNodeBytes + if nodeCount <= 0 { + nodeCount = 1 + } + buf := make([]byte, avgNodeBytes) + for i := 0; i < nodeCount; i++ { + // Distinct content for each node so keccak hashes don't collide and + // the encoded set has the expected size on the wire. + binary.BigEndian.PutUint64(buf[:8], uint64(i)) + if _, err := rand.Read(buf[8:]); err != nil { + panic(err) + } + w.State[string(buf)] = struct{}{} + } + return w +} + +// candidateA_BlobKeccak — current scheme. Keccak over the canonical RLP +// encoding of the entire witness. Single-threaded by design. +func candidateA_BlobKeccak(rlpBytes []byte) common.Hash { + return crypto.Keccak256Hash(rlpBytes) +} + +// candidateB_PageParallel hashes the input in fixed-size pages (15 MiB to +// match the wire fragmentation), each page in its own goroutine, then +// keccaks the concatenation of page hashes. The result is the value the BP +// would sign and the verifier would compare against. +// +// pageSize: 15 MiB to mirror the wire frag. cores: number of goroutines to +// use; honest callers pass GOMAXPROCS or a small constant. +const witnessPageBytes = 15 * 1024 * 1024 + +func candidateB_PageParallel(rlpBytes []byte, cores int) common.Hash { + return candidateB_PageParallelChunked(rlpBytes, witnessPageBytes, cores) +} + +// candidateB_PageParallelChunked is B with an explicit chunk-size knob so +// we can sweep below the 15 MiB wire-page boundary. Chunks smaller than +// the wire page would mean BP signs over a finer-grained aggregate, but +// this is internal accounting — wire pages stay 15 MiB, the producer just +// further subdivides them for hashing. +func candidateB_PageParallelChunked(rlpBytes []byte, chunkBytes, cores int) common.Hash { + pages := splitPages(rlpBytes, chunkBytes) + pageHashes := make([]common.Hash, len(pages)) + + if cores < 1 { + cores = 1 + } + if cores > len(pages) { + cores = len(pages) + } + + var wg sync.WaitGroup + work := make(chan int, len(pages)) + for w := 0; w < cores; w++ { + wg.Add(1) + go func() { + defer wg.Done() + for i := range work { + pageHashes[i] = crypto.Keccak256Hash(pages[i]) + } + }() + } + for i := range pages { + work <- i + } + close(work) + wg.Wait() + + // Aggregate is keccak over concat of page hashes. Order is wire-page + // order (pinned by the producer's chunking). + var concat []byte + for _, h := range pageHashes { + concat = append(concat, h[:]...) + } + return crypto.Keccak256Hash(concat) +} + +func splitPages(buf []byte, pageSize int) [][]byte { + if len(buf) == 0 { + return nil + } + out := make([][]byte, 0, (len(buf)+pageSize-1)/pageSize) + for i := 0; i < len(buf); i += pageSize { + end := i + pageSize + if end > len(buf) { + end = len(buf) + } + out = append(out, buf[i:end]) + } + return out +} + +// candidateC_PerNodeMerkle hashes every state node, sorts the hashes +// lexicographically, and returns a Merkle root over the sorted hashes. +// Each node hash is independent → trivially parallelizable. +// +// On the producer side the BP already has every node's keccak from +// execution, so the per-node hash phase costs zero in steady state. This +// helper still computes the hashes from bytes because the bench needs +// realistic timings without a producer-side trie cache stub. +func candidateC_PerNodeMerkle(w *Witness, cores int) common.Hash { + w.lock.RLock() + nodes := make([][]byte, 0, len(w.State)) + for n := range w.State { + nodes = append(nodes, []byte(n)) + } + w.lock.RUnlock() + + hashes := make([]common.Hash, len(nodes)) + if cores < 1 { + cores = 1 + } + var wg sync.WaitGroup + work := make(chan int, len(nodes)) + for ww := 0; ww < cores; ww++ { + wg.Add(1) + go func() { + defer wg.Done() + for i := range work { + hashes[i] = crypto.Keccak256Hash(nodes[i]) + } + }() + } + for i := range nodes { + work <- i + } + close(work) + wg.Wait() + + sort.Slice(hashes, func(i, j int) bool { + return string(hashes[i][:]) < string(hashes[j][:]) + }) + return merkleRoot(hashes) +} + +// candidateC_ProducerOnly captures the "producer has hashes for free" +// claim: given the precomputed sorted hashes, only Merkle-build cost +// remains. The bench feeds a precomputed slice so we measure JUST the +// reduction stage, isolating the win on the producer's announce path. +func candidateC_ProducerOnly(sortedHashes []common.Hash) common.Hash { + return merkleRoot(sortedHashes) +} + +// merkleRoot builds a binary Merkle tree (keccak over left||right pairs) +// over `leaves` and returns the root. Empty input → zero hash. Odd levels +// duplicate the last leaf (RFC-6962-style). 32-byte leaves. +func merkleRoot(leaves []common.Hash) common.Hash { + if len(leaves) == 0 { + return common.Hash{} + } + level := make([]common.Hash, len(leaves)) + copy(level, leaves) + for len(level) > 1 { + if len(level)%2 == 1 { + level = append(level, level[len(level)-1]) + } + next := make([]common.Hash, len(level)/2) + var buf [64]byte + for i := 0; i < len(level); i += 2 { + copy(buf[:32], level[i][:]) + copy(buf[32:], level[i+1][:]) + next[i/2] = crypto.Keccak256Hash(buf[:]) + } + level = next + } + return level[0] +} + +// candidateD_HashAll is the BENCHMARK helper for D — parallel per-node +// keccak only. No walk, no map build. In production, D's verifier cost is +// essentially "hash every node" because: +// - RLP decode of the witness already happens (cost is paid by both A and D). +// - MakeHashDB already iterates all nodes and keccaks each, so the +// walker's per-node hash work is amortized into existing state-prep. +// - The walker traversal is O(num_nodes × avg_refs_per_node) map lookups, +// dwarfed by keccak throughput on the underlying bytes. +// +// We measure D's incremental cost over the chain-prep baseline as just the +// parallel keccak phase. The reachability walk lives in +// candidateD_IntrinsicWalk for the correctness test below. +func candidateD_HashAll(w *Witness, cores int) { + w.lock.RLock() + nodes := make([][]byte, 0, len(w.State)) + for n := range w.State { + nodes = append(nodes, []byte(n)) + } + w.lock.RUnlock() + + if cores < 1 { + cores = 1 + } + var wg sync.WaitGroup + work := make(chan int, len(nodes)) + for ww := 0; ww < cores; ww++ { + wg.Add(1) + go func() { + defer wg.Done() + for i := range work { + _ = crypto.Keccak256Hash(nodes[i]) + } + }() + } + for i := range nodes { + work <- i + } + close(work) + wg.Wait() +} + +// candidateD_IntrinsicWalk is the CORRECTNESS reference. It verifies that +// every node in the witness is reachable from the given root via byte- +// embedded hash references, and that no orphan nodes pad the witness. +// Returns true iff the walk reaches every node exactly once. +// +// Approximation: instead of RLP-parsing each node to extract real children, +// the walker scans the node's bytes for any 32-byte window matching a +// known node hash. With random synthetic content the false-positive rate +// is negligible. This is the function the test cases assert against. +// +// `cores` controls parallel hashing of nodes. Walk itself is sequential. +func candidateD_IntrinsicWalk(w *Witness, root common.Hash, cores int) bool { + w.lock.RLock() + nodes := make([][]byte, 0, len(w.State)) + for n := range w.State { + nodes = append(nodes, []byte(n)) + } + w.lock.RUnlock() + + type entry struct { + bytes []byte + hash common.Hash + } + hashed := make([]entry, len(nodes)) + if cores < 1 { + cores = 1 + } + var wg sync.WaitGroup + work := make(chan int, len(nodes)) + for ww := 0; ww < cores; ww++ { + wg.Add(1) + go func() { + defer wg.Done() + for i := range work { + hashed[i] = entry{bytes: nodes[i], hash: crypto.Keccak256Hash(nodes[i])} + } + }() + } + for i := range nodes { + work <- i + } + close(work) + wg.Wait() + + byHash := make(map[common.Hash][]byte, len(hashed)) + for _, e := range hashed { + byHash[e.hash] = e.bytes + } + // Walk: starting from root, scan node bytes for 32-byte sequences that + // match another node's hash. Treat every such sequence as a child + // reference. Visit each node once. + queue := []common.Hash{root} + visited := make(map[common.Hash]struct{}, len(byHash)) + for len(queue) > 0 { + h := queue[0] + queue = queue[1:] + if _, seen := visited[h]; seen { + continue + } + visited[h] = struct{}{} + blob, ok := byHash[h] + if !ok { + // The walker reached a hash that isn't in the witness set. + // In real intrinsic-verify this means the witness is missing a + // node the trie depends on → server lied. Drop. + return false + } + for off := 0; off+32 <= len(blob); off++ { + var ref common.Hash + copy(ref[:], blob[off:off+32]) + if _, exists := byHash[ref]; exists { + if _, seen := visited[ref]; !seen { + queue = append(queue, ref) + } + } + } + } + // Every node in the witness must be reachable from the root. Bloated + // witnesses with orphan nodes are also a server lie (they're paying + // the verifier extra hash cost without contributing to execution). + return len(visited) == len(byHash) +} diff --git a/core/stateless/witness_commit_test.go b/core/stateless/witness_commit_test.go new file mode 100644 index 0000000000..f1bd7d1a66 --- /dev/null +++ b/core/stateless/witness_commit_test.go @@ -0,0 +1,88 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +package stateless + +import ( + "bytes" + "runtime" + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" +) + +func TestWitnessCommitHashDeterministic(t *testing.T) { + in := bytes.Repeat([]byte{0xab}, 5*WitnessCommitChunkBytes+1234) + a := WitnessCommitHash(in) + b := WitnessCommitHash(in) + if a != b { + t.Fatalf("non-deterministic: %s vs %s", a.Hex(), b.Hex()) + } +} + +// TestWitnessCommitHashWorkerInvariant pins the load-bearing property: the +// committed hash MUST NOT depend on GOMAXPROCS. If it does, two honest peers +// running with different parallelism would diverge on the same witness. +func TestWitnessCommitHashWorkerInvariant(t *testing.T) { + in := bytes.Repeat([]byte{0xcd}, 6*WitnessCommitChunkBytes+777) + prev := runtime.GOMAXPROCS(1) + defer runtime.GOMAXPROCS(prev) + one := WitnessCommitHash(in) + + runtime.GOMAXPROCS(8) + eight := WitnessCommitHash(in) + + if one != eight { + t.Fatalf("hash depends on GOMAXPROCS: 1=%s 8=%s", one.Hex(), eight.Hex()) + } +} + +// TestWitnessCommitHashEmptyInput pins the empty-witness behavior so producer +// and verifier agree on the degenerate case. +func TestWitnessCommitHashEmptyInput(t *testing.T) { + if got := WitnessCommitHash(nil); got != (common.Hash{}) { + t.Fatalf("expected zero hash for nil, got %s", got.Hex()) + } + if got := WitnessCommitHash([]byte{}); got != (common.Hash{}) { + t.Fatalf("expected zero hash for empty slice, got %s", got.Hex()) + } +} + +// TestWitnessCommitHashSingleSubChunk pins the small-input shape: an input +// shorter than one chunk hashes to keccak256(keccak256(input)), since the +// scheme always wraps a final aggregate-keccak around the chunk-hash list. +func TestWitnessCommitHashSingleSubChunk(t *testing.T) { + in := bytes.Repeat([]byte{0x42}, 4096) + got := WitnessCommitHash(in) + + inner := crypto.Keccak256Hash(in) + want := crypto.Keccak256Hash(inner[:]) + if got != want { + t.Fatalf("single-subchunk shape mismatch: got %s want %s", got.Hex(), want.Hex()) + } +} + +// TestWitnessCommitHashMultiChunkShape spot-checks the multi-chunk recipe so a +// silent change in concat order or chunking would be caught immediately. +func TestWitnessCommitHashMultiChunkShape(t *testing.T) { + a := bytes.Repeat([]byte{0x01}, WitnessCommitChunkBytes) + b := bytes.Repeat([]byte{0x02}, WitnessCommitChunkBytes) + c := bytes.Repeat([]byte{0x03}, 1234) + in := append(append(append([]byte{}, a...), b...), c...) + + ha := crypto.Keccak256Hash(a) + hb := crypto.Keccak256Hash(b) + hc := crypto.Keccak256Hash(c) + concat := append(append(append([]byte{}, ha[:]...), hb[:]...), hc[:]...) + want := crypto.Keccak256Hash(concat) + + if got := WitnessCommitHash(in); got != want { + t.Fatalf("multi-chunk shape mismatch: got %s want %s", got.Hex(), want.Hex()) + } +} diff --git a/eth/fetcher/block_fetcher.go b/eth/fetcher/block_fetcher.go index 5f0740002c..244979587f 100644 --- a/eth/fetcher/block_fetcher.go +++ b/eth/fetcher/block_fetcher.go @@ -260,7 +260,7 @@ type BlockFetcher struct { } // NewBlockFetcher creates a block fetcher to retrieve blocks based on hash announcements. -func NewBlockFetcher(light bool, getHeader HeaderRetrievalFn, getBlock blockRetrievalFn, verifyHeader headerVerifierFn, broadcastBlock blockBroadcasterFn, chainHeight chainHeightFn, currentHeader currentHeaderFn, insertHeaders headersInsertFn, insertChain chainInsertFn, dropPeer peerDropFn, jailPeer peerJailFn, enableBlockTracking bool, requireWitness bool, gasCeil uint64) *BlockFetcher { +func NewBlockFetcher(light bool, getHeader HeaderRetrievalFn, getBlock blockRetrievalFn, verifyHeader headerVerifierFn, broadcastBlock blockBroadcasterFn, chainHeight chainHeightFn, currentHeader currentHeaderFn, insertHeaders headersInsertFn, insertChain chainInsertFn, dropPeer peerDropFn, jailPeer peerJailFn, enableBlockTracking bool, requireWitness bool, gasCeil uint64, signedWitnessHash signedWitnessHashFn, cacheWitnessForServing cacheWitnessForServingFn) *BlockFetcher { f := &BlockFetcher{ light: light, notify: make(chan *blockAnnounce), @@ -302,6 +302,8 @@ func NewBlockFetcher(light bool, getHeader HeaderRetrievalFn, getBlock blockRetr f.getHeader, f.chainHeight, f.currentHeader, + signedWitnessHash, + cacheWitnessForServing, gasCeil, ) diff --git a/eth/fetcher/block_fetcher_race_test.go b/eth/fetcher/block_fetcher_race_test.go index b7044988f0..d5a6876010 100644 --- a/eth/fetcher/block_fetcher_race_test.go +++ b/eth/fetcher/block_fetcher_race_test.go @@ -55,6 +55,8 @@ func TestBlockFetcherConcurrentMapAccess(t *testing.T) { false, // no block tracking false, // no witness requirement 0, // no gas ceiling + nil, // no signed-witness lookup + nil, // no cache-witness-for-serving ) // Start the fetcher @@ -254,6 +256,8 @@ func TestWitnessManagerConcurrentAccess(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -489,6 +493,8 @@ func TestBlockFetcherMapStateConsistency(t *testing.T) { false, false, 0, + nil, + nil, ) fetcher.Start() @@ -549,6 +555,8 @@ func TestWitnessManagerStateConsistency(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -611,6 +619,8 @@ func TestBlockFetcherMemoryLeaks(t *testing.T) { false, false, 0, + nil, + nil, ) fetcher.Start() @@ -669,6 +679,8 @@ func TestWitnessManagerMemoryLeaks(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) diff --git a/eth/fetcher/block_fetcher_test.go b/eth/fetcher/block_fetcher_test.go index 62619f6f95..98ce5b5f73 100644 --- a/eth/fetcher/block_fetcher_test.go +++ b/eth/fetcher/block_fetcher_test.go @@ -109,7 +109,7 @@ func newTester(light bool) *fetcherTester { blocks: map[common.Hash]*types.Block{genesis.Hash(): genesis}, drops: make(map[string]bool), } - tester.fetcher = NewBlockFetcher(light, tester.getHeader, tester.getBlock, tester.verifyHeader, tester.broadcastBlock, tester.chainHeight, nil, tester.insertHeaders, tester.insertChain, tester.dropPeer, nil, false, false, 0) + tester.fetcher = NewBlockFetcher(light, tester.getHeader, tester.getBlock, tester.verifyHeader, tester.broadcastBlock, tester.chainHeight, nil, tester.insertHeaders, tester.insertChain, tester.dropPeer, nil, false, false, 0, nil, nil) tester.fetcher.Start() return tester diff --git a/eth/fetcher/metrics.go b/eth/fetcher/metrics.go index ca5730cacb..d69315b95f 100644 --- a/eth/fetcher/metrics.go +++ b/eth/fetcher/metrics.go @@ -32,6 +32,10 @@ var ( witnessVerifyPeersInsuffMeter = metrics.NewRegisteredMeter("eth/fetcher/witness/verify/peers/insufficient", nil) witnessVerifyNoConsensusMeter = metrics.NewRegisteredMeter("eth/fetcher/witness/verify/consensus/none", nil) + // witnessByteMismatchMeter tracks WIT2 byte-correctness drops: a serving + // peer delivered bytes whose keccak256 did not match the BP-signed hash. + witnessByteMismatchMeter = metrics.NewRegisteredMeter("eth/fetcher/witness/byte_mismatch", nil) + // Witness page count metrics witnessPageCountBelowThresholdMeter = metrics.NewRegisteredMeter("eth/fetcher/witness/pagecount/below_threshold", nil) witnessPageCountAboveThresholdMeter = metrics.NewRegisteredMeter("eth/fetcher/witness/pagecount/above_threshold", nil) diff --git a/eth/fetcher/witness_manager.go b/eth/fetcher/witness_manager.go index 9bfca5a52b..1f04bf0e79 100644 --- a/eth/fetcher/witness_manager.go +++ b/eth/fetcher/witness_manager.go @@ -1,6 +1,7 @@ package fetcher import ( + "bytes" "errors" "fmt" "strings" @@ -57,18 +58,35 @@ type cachedWitness struct { timestamp time.Time } +// signedWitnessHashFn returns the BP-signed witness content hash for a block, +// if a WIT2 signed announcement has been received and verified locally. It is +// used by the witness manager on fetch success to verify byte-correctness: +// if the encoded witness bytes don't hash to the signed witnessHash, the +// serving peer lied and is dropped. If no signed announcement is on file +// (e.g., WIT1-only fetch), the check is skipped. +type signedWitnessHashFn func(blockHash common.Hash) (witnessHash common.Hash, ok bool) + +// cacheWitnessForServingFn hands successfully-fetched witness bytes to the +// network handler so peers can serve them pre-import. Called only after the +// byte-correctness check (vs. BP-signed witnessHash, when present) has passed, +// so the cached bytes are safe to serve. The witnessHash is the canonical +// keccak256 of the canonical encoding, identical to what the BP signed. +type cacheWitnessForServingFn func(blockHash common.Hash, witnessBytes []byte, witnessHash common.Hash) + // witnessManager handles the logic specific to fetching and managing witnesses // for blocks, isolating it from the main BlockFetcher loop. type witnessManager struct { // Parent fetcher fields/methods required - parentQuit <-chan struct{} // Parent fetcher's quit channel - parentDropPeer peerDropFn // Function to drop a misbehaving peer - parentJailPeer peerJailFn // Function to jail a peer to prevent reconnection (optional) - parentEnqueueCh chan<- *enqueueRequest // Channel to send completed blocks+witnesses back - parentGetBlock blockRetrievalFn // Function to check if block is known locally - parentGetHeader HeaderRetrievalFn // Function to check if header is known locally (needed for checks) - parentChainHeight chainHeightFn // Retrieve chain height for distance checks - parentCurrentHeader currentHeaderFn // Retrieve current block header for gas limit + parentQuit <-chan struct{} // Parent fetcher's quit channel + parentDropPeer peerDropFn // Function to drop a misbehaving peer + parentJailPeer peerJailFn // Function to jail a peer to prevent reconnection (optional) + parentEnqueueCh chan<- *enqueueRequest // Channel to send completed blocks+witnesses back + parentGetBlock blockRetrievalFn // Function to check if block is known locally + parentGetHeader HeaderRetrievalFn // Function to check if header is known locally (needed for checks) + parentChainHeight chainHeightFn // Retrieve chain height for distance checks + parentCurrentHeader currentHeaderFn // Retrieve current block header for gas limit + parentSignedWitnessHash signedWitnessHashFn // WIT2: lookup a BP-signed witness hash for byte-correctness verification + parentCacheWitnessForServing cacheWitnessForServingFn // WIT2: hand bytes to the handler for pre-import serving by peers // Witness-specific state pending map[common.Hash]*witnessRequestState // Blocks waiting for witness or actively fetching. @@ -108,6 +126,8 @@ func newWitnessManager( parentGetHeader HeaderRetrievalFn, parentChainHeight chainHeightFn, parentCurrentHeader currentHeaderFn, + parentSignedWitnessHash signedWitnessHashFn, + parentCacheWitnessForServing cacheWitnessForServingFn, gasCeil uint64, ) *witnessManager { // Create TTL cache with 1 minute expiration for witnesses @@ -117,22 +137,24 @@ func newWitnessManager( ) m := &witnessManager{ - parentQuit: parentQuit, - parentDropPeer: parentDropPeer, - parentJailPeer: parentJailPeer, - parentEnqueueCh: parentEnqueueCh, - parentGetBlock: parentGetBlock, - parentGetHeader: parentGetHeader, - parentChainHeight: parentChainHeight, - parentCurrentHeader: parentCurrentHeader, - pending: make(map[common.Hash]*witnessRequestState), - witnessUnavailable: make(map[common.Hash]time.Time), - witnessCache: witnessCache, - gasCeil: gasCeil, - injectNeedWitnessCh: make(chan *injectBlockNeedWitnessMsg, 10), - injectWitnessCh: make(chan *injectedWitnessMsg, 10), - witnessTimer: time.NewTimer(0), - pokeCh: make(chan struct{}, 1), + parentQuit: parentQuit, + parentDropPeer: parentDropPeer, + parentJailPeer: parentJailPeer, + parentEnqueueCh: parentEnqueueCh, + parentGetBlock: parentGetBlock, + parentGetHeader: parentGetHeader, + parentChainHeight: parentChainHeight, + parentCurrentHeader: parentCurrentHeader, + parentSignedWitnessHash: parentSignedWitnessHash, + parentCacheWitnessForServing: parentCacheWitnessForServing, + pending: make(map[common.Hash]*witnessRequestState), + witnessUnavailable: make(map[common.Hash]time.Time), + witnessCache: witnessCache, + gasCeil: gasCeil, + injectNeedWitnessCh: make(chan *injectBlockNeedWitnessMsg, 10), + injectWitnessCh: make(chan *injectedWitnessMsg, 10), + witnessTimer: time.NewTimer(0), + pokeCh: make(chan struct{}, 1), } m.stopAndDrainTimer() return m @@ -631,15 +653,93 @@ func (m *witnessManager) processWitnessResponse(peer string, hash common.Hash, r return } if len(witness) == 0 { + // Empty/unavailable response: the peer doesn't have the body yet + // (e.g. WIT2 announce-only relayer that has not finished importing). + // This is a soft failure — back off the request so another peer can + // be tried, but do NOT drop the responder. Dropping on "no body" is + // what makes announce-only fallback peers unsafe to ask, which would + // erase the WIT2 multi-hop latency win at hop>=2. log.Debug("[wm] Received empty witness response from peer", "peer", peer, "hash", hash) - m.handleWitnessFetchFailureExt(hash, peer, errors.New("empty witness response"), false) + m.handleWitnessFetchFailureExt(hash, "", errors.New("empty witness response"), false) + return + } + + // WIT2: byte-correctness check. If we have a BP-signed announcement on + // file for this block, the encoded witness bytes must hash to the + // signed witnessHash. State-root failures (content-correctness) are + // handled later in the import path and do NOT drop the server. + body, witnessHash, ok := m.verifyAgainstSignedHash(peer, hash, witness[0]) + if !ok { return } + // WIT2: hand the verified bytes to the handler for pre-import serving. + // Done before import-side enqueue so a peer asking us for the body + // during the chain-write window gets bytes from the in-flight cache + // rather than empty results. body is nil on the WIT1 path (no signed + // hash on file) — cacheVerifiedWitnessForServing no-ops in that case. + m.cacheVerifiedWitnessForServing(hash, body, witnessHash) + metrics.RecordPerItemDuration(blockWitnessItemDownloadTimer, res.Time, 1) m.handleWitnessFetchSuccess(peer, hash, witness[0], announcedAt) } +// cacheVerifiedWitnessForServing forwards canonical-encoded witness bytes +// (already verified against a BP-signed witness hash by the caller) to the +// handler so other peers can fetch them pre-import. No-op when no cache +// callback is configured (legacy WIT1-only paths) or when body is empty — +// the latter signals the WIT1 path with no signed hash on file, where +// caching unverified bytes would expose us to byte-blame from downstream +// peers. +func (m *witnessManager) cacheVerifiedWitnessForServing(blockHash common.Hash, body []byte, witnessHash common.Hash) { + if m.parentCacheWitnessForServing == nil || len(body) == 0 { + return + } + m.parentCacheWitnessForServing(blockHash, body, witnessHash) +} + +// verifyAgainstSignedHash returns the canonically-encoded witness bytes and +// the BP-signed witness hash they match, when a signed hash is on file and +// verification succeeds. body is nil on the WIT1 path (no signed hash to +// verify against) so callers can skip the pre-import serving cache. ok is +// false when verification fails; the offending peer has already been +// reported. Local EncodeRLP failure on a successfully-decoded witness is +// the local node's bug, not peer misbehavior, so it does not drop the peer. +func (m *witnessManager) verifyAgainstSignedHash(peer string, hash common.Hash, witness *stateless.Witness) (body []byte, witnessHash common.Hash, ok bool) { + if m.parentSignedWitnessHash == nil { + return nil, common.Hash{}, true + } + expected, has := m.parentSignedWitnessHash(hash) + if !has { + return nil, common.Hash{}, true + } + var buf bytes.Buffer + if err := witness.EncodeRLP(&buf); err != nil { + log.Warn("[wm] Failed to encode received witness for hash check", "peer", peer, "hash", hash, "err", err) + m.handleWitnessFetchFailureExt(hash, "", fmt.Errorf("witness encode failed: %w", err), false) + return nil, common.Hash{}, false + } + encoded := buf.Bytes() + actual := stateless.WitnessCommitHash(encoded) + if actual != expected { + witnessByteMismatchMeter.Mark(1) + // We cannot blame the byte-server on signed-hash disagreement alone: + // the announcement only proves *some* BP signed *some* hash. A faulty + // or malicious scheduled producer that signed a bogus hash would + // otherwise weaponise this path to disconnect every honest peer + // serving the canonical witness. Reject the bytes (don't cache for + // serving), back off the pending request so another peer/announcement + // gets tried, and let import-time execution validation pin blame. + // TODO(wit2): wire signer-quarantine once the manager has access to + // (signer, announcement-relayer) provenance from the handler. + log.Warn("[wm] Witness bytes do not match BP-signed hash; not caching, retrying with another peer", + "peer", peer, "block", hash, "expected", expected, "actual", actual) + m.handleWitnessFetchFailureExt(hash, "", errors.New("witness hash mismatch"), false) + return nil, common.Hash{}, false + } + return encoded, expected, true +} + // handleWitnessFetchSuccess processes a successfully fetched witness. // It needs the original origin from the op state for consistency checks. func (m *witnessManager) handleWitnessFetchSuccess(fetchPeer string, hash common.Hash, witness *stateless.Witness, announcedAt time.Time) { diff --git a/eth/fetcher/witness_manager_test.go b/eth/fetcher/witness_manager_test.go index 4d7948bc63..e8cceac53a 100644 --- a/eth/fetcher/witness_manager_test.go +++ b/eth/fetcher/witness_manager_test.go @@ -72,7 +72,7 @@ func newTestWitnessManager() *testWitnessManager { getHeader := HeaderRetrievalFn(func(hash common.Hash) *types.Header { return nil }) chainHeight := chainHeightFn(func() uint64 { return 100 }) - tw.manager = newWitnessManager(quit, dropPeer, nil, enqueueCh, getBlock, getHeader, chainHeight, nil, 0) + tw.manager = newWitnessManager(quit, dropPeer, nil, enqueueCh, getBlock, getHeader, chainHeight, nil, nil, nil, 0) return tw } @@ -190,6 +190,8 @@ func TestHandleNeedDuplicates(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -247,6 +249,8 @@ func TestHandleNeedKnownBlock(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -296,6 +300,8 @@ func TestHandleBroadcast(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -369,6 +375,8 @@ func TestWitnessUnavailable(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -431,6 +439,8 @@ func TestForget(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -482,6 +492,8 @@ func TestHandleFilterResult(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -521,6 +533,8 @@ func TestCheckCompleting(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -564,6 +578,8 @@ func TestWitnessFetchFailure(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -677,6 +693,8 @@ func TestCleanupUnavailableCache(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -733,6 +751,8 @@ func TestWitnessFetchWithBlockNoLongerPending(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -846,6 +866,8 @@ func TestTick(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -932,6 +954,8 @@ func TestTickMaxRetries(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -991,6 +1015,8 @@ func TestTickWithWitnessAlreadyPresent(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1074,6 +1100,8 @@ func TestHandleWitnessFetchSuccess(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1142,6 +1170,8 @@ func TestHandleWitnessFetchSuccessNoPending(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1175,6 +1205,8 @@ func TestHandleWitnessFetchSuccessWitnessAlreadyPresent(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1225,6 +1257,8 @@ func TestRescheduleWitness(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1281,6 +1315,8 @@ func TestSafeEnqueueWithNilWitness(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1325,6 +1361,8 @@ func TestSafeEnqueueChannelClosed(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1362,6 +1400,8 @@ func TestHandleNeedDistanceCheck(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1406,6 +1446,8 @@ func TestHandleNeedMissingFetchWitness(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1446,6 +1488,8 @@ func TestLoop(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1520,6 +1564,8 @@ func TestHandleFilterResultWithoutWitness(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1561,6 +1607,8 @@ func TestCheckCompletingWithoutWitness(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1602,6 +1650,8 @@ func TestFetchWitnessError(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1645,6 +1695,8 @@ func TestHandleFilterResultWitnessUnavailable(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1688,6 +1740,8 @@ func TestHandleFilterResultDuplicate(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1734,6 +1788,8 @@ func TestCheckCompletingWitnessUnavailable(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1777,6 +1833,8 @@ func TestCheckCompletingDuplicate(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1830,6 +1888,8 @@ func TestCheckCompletingKnownBlock(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1868,6 +1928,8 @@ func TestTickInvalidPendingState(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1911,6 +1973,8 @@ func TestTickNotReadyYet(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -1974,6 +2038,8 @@ func TestSafeEnqueueSuccess(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -2037,6 +2103,8 @@ func TestConcurrentWitnessFetchFailure(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, 0, ) @@ -2096,6 +2164,8 @@ func TestCheckWitnessPageCountWithPeerJailing(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, gasCeil, ) @@ -2160,6 +2230,8 @@ func TestCheckWitnessPageCountWithConsensusFailure(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, gasCeil, ) @@ -2253,6 +2325,8 @@ func TestCheckWitnessPageCountWithPeerFailures(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, gasCeil, ) @@ -2327,6 +2401,8 @@ func TestCheckWitnessPageCountWithInsufficientPeers(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, gasCeil, ) @@ -2403,6 +2479,8 @@ func TestCheckWitnessPageCountBelowThreshold(t *testing.T) { getHeader, chainHeight, currentHeader, + nil, + nil, gasCeil, ) @@ -2467,6 +2545,8 @@ func TestCheckWitnessPageCountBelowThreshold(t *testing.T) { getHeader, chainHeight, currentHeader, + nil, + nil, gasCeil, ) @@ -2527,6 +2607,8 @@ func TestCheckWitnessPageCountBelowThreshold(t *testing.T) { getHeader, chainHeight, nil, // currentHeader is nil + nil, // signedWitnessHash is nil + nil, // cacheWitnessForServing is nil gasCeil, ) @@ -2594,6 +2676,8 @@ func TestConcurrentWitnessVerification(t *testing.T) { getHeader, chainHeight, nil, + nil, + nil, gasCeil, ) @@ -2661,7 +2745,7 @@ func TestFetchWitnessNoPeerError(t *testing.T) { manager := newWitnessManager( quit, dropPeer, nil, enqueueCh, - getBlock, getHeader, chainHeight, nil, 0, + getBlock, getHeader, chainHeight, nil, nil, nil, 0, ) hash := common.HexToHash("0xabc") @@ -2726,7 +2810,7 @@ func TestWitnessTickPreservesValidPendingEntry(t *testing.T) { manager := newWitnessManager( quit, dropPeer, nil, enqueueCh, - getBlock, getHeader, chainHeight, nil, 0, + getBlock, getHeader, chainHeight, nil, nil, nil, 0, ) block := createTestBlock(101) @@ -2777,7 +2861,7 @@ func TestFetchWitnessOtherErrorKeepsPending(t *testing.T) { manager := newWitnessManager( quit, dropPeer, nil, enqueueCh, - getBlock, getHeader, chainHeight, nil, 0, + getBlock, getHeader, chainHeight, nil, nil, nil, 0, ) hash := common.HexToHash("0xfade") @@ -2843,7 +2927,7 @@ func TestCheckWitnessPageCountAtThreshold(t *testing.T) { manager := newWitnessManager( quit, dropPeer, jailPeer, enqueueCh, - getBlock, getHeader, chainHeight, currentHeader, 30_000_000, + getBlock, getHeader, chainHeight, currentHeader, nil, nil, 30_000_000, ) threshold := manager.calculatePageThreshold() @@ -2889,6 +2973,8 @@ func newWitnessManagerForTest(t *testing.T) (*witnessManager, <-chan *enqueueReq HeaderRetrievalFn(func(common.Hash) *types.Header { return nil }), chainHeightFn(func() uint64 { return 100 }), nil, + nil, + nil, 0, ) return m, enqueueCh @@ -3164,6 +3250,8 @@ func TestWitnessCalculatePageThresholdMinimumClamp(t *testing.T) { currentHeaderFn(func() *types.Header { return &types.Header{Number: big.NewInt(100), GasLimit: 1} // < 1MB → 0 pages pre-clamp }), + nil, + nil, 0, ) if got := m.calculatePageThreshold(); got < 1 { @@ -3183,6 +3271,8 @@ func TestWitnessCalculatePageThresholdMinimumClamp(t *testing.T) { HeaderRetrievalFn(func(common.Hash) *types.Header { return nil }), chainHeightFn(func() uint64 { return 100 }), nil, // no current header → fallback to config path + nil, // no signed-witness lookup + nil, // no cache-witness-for-serving 1, // 1 gas ceil → 0 pages pre-clamp ) if got := m.calculatePageThreshold(); got < 1 { @@ -3340,6 +3430,8 @@ func TestVerifyWitnessPageCountDishonestPeer(t *testing.T) { HeaderRetrievalFn(func(common.Hash) *types.Header { return nil }), chainHeightFn(func() uint64 { return 100 }), nil, + nil, + nil, 0, ) @@ -3413,7 +3505,7 @@ func TestWitnessLoopDrivesFetchesForPending(t *testing.T) { manager := newWitnessManager( quit, dropPeer, nil, enqueueCh, - getBlock, getHeader, chainHeight, nil, 0, + getBlock, getHeader, chainHeight, nil, nil, nil, 0, ) fetchCalled := make(chan struct{}, 1) diff --git a/eth/fetcher/witness_manager_wit2_test.go b/eth/fetcher/witness_manager_wit2_test.go new file mode 100644 index 0000000000..2ad0ed5ccf --- /dev/null +++ b/eth/fetcher/witness_manager_wit2_test.go @@ -0,0 +1,303 @@ +package fetcher + +import ( + "bytes" + "errors" + "testing" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/stateless" + "github.com/ethereum/go-ethereum/eth/protocols/eth" +) + +// blockAnnounceForTest constructs a minimal blockAnnounce wired to a fetch +// function that fails closed. Used to seed manager.pending so that the +// processWitnessResponse path can take its happy/sad branches without +// going through the full announce → request flow. +func blockAnnounceForTest(origin string, hash common.Hash, number uint64) *blockAnnounce { + return &blockAnnounce{ + origin: origin, + hash: hash, + number: number, + time: time.Now(), + fetchWitness: func(common.Hash, chan *eth.Response) (*eth.Request, error) { return nil, errors.New("noop") }, + } +} + +// TestProcessWitnessResponseDoesNotDropOnByteMismatch encodes the post- +// adversarial-review safety policy: when the served witness bytes do not +// match the BP-signed witnessHash on file, the manager must back off and +// retry, but it MUST NOT drop the byte-server. The accepted announcement +// only proves *some* BP signed *some* hash — not that the hash matches the +// canonical witness. A faulty or malicious scheduled producer that signs a +// bogus hash would otherwise weaponise this code path to disconnect every +// honest peer serving the real witness. +// +// The mismatched bytes are still rejected (not cached for serving), and the +// pending state stays alive with a fresh back-off so another peer (or another +// announcement) gets a chance. Blame-pinning belongs at execution time, where +// import-side validation can attribute fault to signer vs. server vs. caller. +func TestProcessWitnessResponseDoesNotDropOnByteMismatch(t *testing.T) { + tw := newTestWitnessManager() + defer tw.Close() + + block := createTestBlock(101) + hash := block.Hash() + + // The honest server returns the canonical witness for this block — its + // keccak commitment is `canonical`. + canonical := createTestWitnessForBlock(block) + + // Simulate a malicious / faulty BP that signed a bogus, unrelated hash. + // processWitnessResponse will see canonical bytes whose hash does not + // match what parentSignedWitnessHash reports. + rogueSignedHash := common.HexToHash("0xdeadbeef") + tw.manager.parentSignedWitnessHash = func(h common.Hash) (common.Hash, bool) { + if h == hash { + return rogueSignedHash, true + } + return common.Hash{}, false + } + + tw.manager.mu.Lock() + tw.manager.pending[hash] = &witnessRequestState{ + op: &blockOrHeaderInject{origin: "honest", block: block}, + announce: blockAnnounceForTest("honest", hash, block.NumberU64()), + } + tw.manager.mu.Unlock() + + res := ð.Response{ + Time: time.Millisecond, + Done: make(chan error, 1), + Res: []*stateless.Witness{canonical}, + } + + tw.manager.processWitnessResponse("honest-server", hash, res, time.Now()) + + tw.mu.Lock() + defer tw.mu.Unlock() + if len(tw.droppedPeers) != 0 { + t.Fatalf("byte-server must not be dropped on signed-hash mismatch (BP may have signed bogus); drops=%v", tw.droppedPeers) + } +} + +// TestProcessWitnessResponseAcceptsMatchingHash is the contrapositive: a +// peer that returns bytes whose keccak256 matches the BP-signed hash must +// not be dropped. State-root mismatches on subsequent execution are handled +// elsewhere and do not reflect on the server. +func TestProcessWitnessResponseAcceptsMatchingHash(t *testing.T) { + tw := newTestWitnessManager() + defer tw.Close() + + block := createTestBlock(101) + hash := block.Hash() + witness := createTestWitnessForBlock(block) + var buf bytes.Buffer + if err := witness.EncodeRLP(&buf); err != nil { + t.Fatalf("encode: %v", err) + } + matchingHash := stateless.WitnessCommitHash(buf.Bytes()) + + tw.manager.parentSignedWitnessHash = func(h common.Hash) (common.Hash, bool) { + return matchingHash, true + } + + tw.manager.mu.Lock() + tw.manager.pending[hash] = &witnessRequestState{ + op: &blockOrHeaderInject{origin: "honest", block: block}, + announce: blockAnnounceForTest("honest", hash, block.NumberU64()), + } + tw.manager.mu.Unlock() + + res := ð.Response{ + Time: time.Millisecond, + Done: make(chan error, 1), + Res: []*stateless.Witness{witness}, + } + + tw.manager.processWitnessResponse("honest", hash, res, time.Now()) + + tw.mu.Lock() + defer tw.mu.Unlock() + if len(tw.droppedPeers) != 0 { + t.Fatalf("honest peer must not be dropped on hash match; drops=%v", tw.droppedPeers) + } +} + +// TestProcessWitnessResponseCachesForServingAfterByteCheck is the regression +// for the missing pre-import-serving cache populate. The fetcher must hand +// canonical-encoded bytes back to the eth handler after a verified fetch so +// downstream peers can ask THIS node for the body before chain-write +// finishes. Without this callback firing, multi-hop fast propagation has no +// body source past hop-1 — the entire WIT2 latency win evaporates. +func TestProcessWitnessResponseCachesForServingAfterByteCheck(t *testing.T) { + tw := newTestWitnessManager() + defer tw.Close() + + block := createTestBlock(202) + hash := block.Hash() + witness := createTestWitnessForBlock(block) + var buf bytes.Buffer + if err := witness.EncodeRLP(&buf); err != nil { + t.Fatalf("encode: %v", err) + } + want := stateless.WitnessCommitHash(buf.Bytes()) + + var ( + gotBlock common.Hash + gotBytes []byte + gotHash common.Hash + ) + tw.manager.parentCacheWitnessForServing = func(blockHash common.Hash, witnessBytes []byte, witnessHash common.Hash) { + gotBlock = blockHash + gotBytes = append([]byte{}, witnessBytes...) + gotHash = witnessHash + } + tw.manager.parentSignedWitnessHash = func(h common.Hash) (common.Hash, bool) { + if h == hash { + return want, true + } + return common.Hash{}, false + } + + tw.manager.mu.Lock() + tw.manager.pending[hash] = &witnessRequestState{ + op: &blockOrHeaderInject{origin: "honest", block: block}, + announce: blockAnnounceForTest("honest", hash, block.NumberU64()), + } + tw.manager.mu.Unlock() + + res := ð.Response{ + Time: time.Millisecond, + Done: make(chan error, 1), + Res: []*stateless.Witness{witness}, + } + + tw.manager.processWitnessResponse("honest", hash, res, time.Now()) + + if gotBlock != hash { + t.Fatalf("cache callback not invoked or wrong blockHash: got %s want %s", gotBlock.Hex(), hash.Hex()) + } + if gotHash != want { + t.Fatalf("cache callback received wrong witnessHash: got %s want %s", gotHash.Hex(), want.Hex()) + } + if len(gotBytes) == 0 { + t.Fatal("cache callback received empty bytes; pre-import serving cache will not be populated") + } +} + +// TestProcessWitnessResponseSkipsCheckWhenNoSignature confirms the WIT1 +// fallback path: when the receiver has no BP-signed announcement on file +// for a block, byte-correctness verification is skipped (there's nothing to +// verify against), and behavior matches the pre-WIT2 code path. +func TestProcessWitnessResponseSkipsCheckWhenNoSignature(t *testing.T) { + tw := newTestWitnessManager() + defer tw.Close() + + block := createTestBlock(101) + hash := block.Hash() + witness := createTestWitnessForBlock(block) + + // No lookup configured → skip path. + tw.manager.parentSignedWitnessHash = func(common.Hash) (common.Hash, bool) { + return common.Hash{}, false + } + + tw.manager.mu.Lock() + tw.manager.pending[hash] = &witnessRequestState{ + op: &blockOrHeaderInject{origin: "wit1-peer", block: block}, + announce: blockAnnounceForTest("wit1-peer", hash, block.NumberU64()), + } + tw.manager.mu.Unlock() + + res := ð.Response{ + Time: time.Millisecond, + Done: make(chan error, 1), + Res: []*stateless.Witness{witness}, + } + + tw.manager.processWitnessResponse("wit1-peer", hash, res, time.Now()) + + tw.mu.Lock() + defer tw.mu.Unlock() + if len(tw.droppedPeers) != 0 { + t.Fatalf("WIT1 fallback must not drop any peer; drops=%v", tw.droppedPeers) + } +} + +// TestVerifyAgainstSignedHashSkipsEncodeWhenNoSignedHash is the regression +// for the blame-asymmetry bug: caching unverified bytes for serving means a +// downstream peer would ask us for the body, get bytes that don't match THEIR +// BP-signed hash (because we never had one to compare against), and drop us. +// The fix gates serving-cache population on having a BP-signed hash on file — +// verifyAgainstSignedHash returns body=nil on the WIT1 path, and the caller +// short-circuits the cache call (no-op when body is empty). +func TestVerifyAgainstSignedHashSkipsEncodeWhenNoSignedHash(t *testing.T) { + tw := newTestWitnessManager() + defer tw.Close() + + block := createTestBlock(303) + hash := block.Hash() + witness := createTestWitnessForBlock(block) + + cacheCalls := 0 + tw.manager.parentCacheWitnessForServing = func(common.Hash, []byte, common.Hash) { + cacheCalls++ + } + // No signed hash on file for any block → verification must return + // body=nil so the caller skips the cache. + tw.manager.parentSignedWitnessHash = func(common.Hash) (common.Hash, bool) { + return common.Hash{}, false + } + + body, _, ok := tw.manager.verifyAgainstSignedHash("peer1", hash, witness) + if !ok { + t.Fatalf("verifyAgainstSignedHash returned ok=false on WIT1 path") + } + if body != nil { + t.Fatalf("WIT1 path returned non-nil body; downstream peers will see uncovered bytes (len=%d)", len(body)) + } + tw.manager.cacheVerifiedWitnessForServing(hash, body, common.Hash{}) + if cacheCalls != 0 { + t.Fatalf("cache populated without BP-signed hash on file; downstream peers will drop us as liars (calls=%d)", cacheCalls) + } +} + +// TestProcessWitnessResponseEmptyDoesNotDropAnnounceOnlyPeer locks the +// fast-path safety property: a peer that only saw the signed announce (and +// has not yet imported the body) responds with empty bytes when asked. That +// is NOT lying — they simply do not have it yet. Dropping them here would +// shrink the pool of candidate body sources and re-introduce the regression +// where WIT2 multi-hop propagation has nowhere to fetch from at hop>=2. +// +// Byte-mismatch (handled by TestProcessWitnessResponseDropsOnHashMismatch) +// is the only condition that should drop a serving peer. +func TestProcessWitnessResponseEmptyDoesNotDropAnnounceOnlyPeer(t *testing.T) { + tw := newTestWitnessManager() + defer tw.Close() + + block := createTestBlock(404) + hash := block.Hash() + + tw.manager.mu.Lock() + tw.manager.pending[hash] = &witnessRequestState{ + op: &blockOrHeaderInject{origin: "announce-only", block: block}, + announce: blockAnnounceForTest("announce-only", hash, block.NumberU64()), + } + tw.manager.mu.Unlock() + + res := ð.Response{ + Time: time.Millisecond, + Done: make(chan error, 1), + Res: []*stateless.Witness{}, // empty/unavailable + } + + tw.manager.processWitnessResponse("announce-only", hash, res, time.Now()) + + tw.mu.Lock() + defer tw.mu.Unlock() + if len(tw.droppedPeers) != 0 { + t.Fatalf("empty response must NOT drop the responder; drops=%v", tw.droppedPeers) + } +} diff --git a/eth/handler.go b/eth/handler.go index fc731a7579..366e8e46f4 100644 --- a/eth/handler.go +++ b/eth/handler.go @@ -184,6 +184,31 @@ type handler struct { syncWithWitnesses bool syncAndProduceWitnesses bool // Whether to sync blocks and produce witnesses simultaneously + // WIT2: cache of BP-signed witness announcements, keyed by block hash. + // Populated by both produced (signed locally) and received-and-verified + // announcements. Consulted by the relay path to dedup, by the body + // broadcast path to re-emit signed announces, and by the fetch path to + // supply the byte-correctness comparison hash. + signedWitnesses *signedWitnessCache + + // WIT2: in-flight witness bodies received via NewWitness broadcast but + // not yet written to chain storage. Lets serving peers answer GetWitness + // requests during the import gap, which is what unlocks fast multi-hop + // propagation — without it, only the producer/post-import nodes can + // serve and stateless nodes more than 1 hop away wait per-hop on full + // validation before they can pull from anyone. + pendingWitnessBodies *pendingWitnessBodyCache + wit2PeerTracker *peerWit2Tracker + + // WIT2: signed announcements whose producer-binding could not be checked + // at receive time because the matching block header wasn't local yet. + // Drained from the chain-head subscription on each new block so the race + // between block and announce gossip streams self-heals once the chain + // catches up. + deferredAnnounces *deferredAnnounceCache + wit2HeadCh chan core.ChainHeadEvent + wit2HeadSub event.Subscription + // channels for fetcher, syncer, txsyncLoop quitSync chan struct{} @@ -223,6 +248,10 @@ func newHandler(config *handlerConfig) (*handler, error) { syncWithWitnesses: config.syncWithWitnesses, syncAndProduceWitnesses: config.syncAndProduceWitnesses, privateTxGetter: config.privateTxGetter, + signedWitnesses: newSignedWitnessCache(), + pendingWitnessBodies: newPendingWitnessBodyCache(witnessBodyCacheCapacity), + wit2PeerTracker: newPeerWit2Tracker(), + deferredAnnounces: newDeferredAnnounceCache(deferredAnnounceCapacity), } log.Info("Sync with witnesses", "enabled", config.syncWithWitnesses) @@ -306,7 +335,7 @@ func newHandler(config *handlerConfig) (*handler, error) { } } - h.blockFetcher = fetcher.NewBlockFetcher(false, nil, h.chain.GetBlockByHash, validator, h.BroadcastBlock, heighter, h.chain.CurrentHeader, nil, inserter, h.removePeer, h.jailPeer, h.enableBlockTracking, h.statelessSync.Load() || h.syncWithWitnesses, config.gasCeil) + h.blockFetcher = fetcher.NewBlockFetcher(false, nil, h.chain.GetBlockByHash, validator, h.BroadcastBlock, heighter, h.chain.CurrentHeader, nil, inserter, h.removePeer, h.jailPeer, h.enableBlockTracking, h.statelessSync.Load() || h.syncWithWitnesses, config.gasCeil, h.lookupSignedWitnessHash, h.cacheVerifiedWitnessForServing) fetchTx := func(peer string, hashes []common.Hash) error { p := h.peers.peer(peer) @@ -556,6 +585,25 @@ func (h *handler) removePeer(id string) { log.Debug("Handler: removing peer", "peer", peer.ID(), "inbound", peer.Peer.Inbound(), "duration", common.PrettyDuration(peer.Peer.Lifetime())) peer.Peer.Disconnect(p2p.DiscUselessPeer) } + if h.wit2PeerTracker != nil { + h.wit2PeerTracker.forget(id) + } +} + +// strikeWit2Peer records a wit2 misbehavior strike (bad sig, wrong producer) +// and disconnects the peer once the strike threshold is exceeded inside the +// decay window. Single bad announcements are tolerated to allow for stray +// pre-fork content; sustained misbehavior is not. +func (h *handler) strikeWit2Peer(peer *wit.Peer) { + if h.wit2PeerTracker == nil { + return + } + if !h.wit2PeerTracker.strike(peer.ID()) { + return + } + wit2StrikeDisconnectMeter.Mark(1) + peer.Log().Warn("wit2: disconnecting peer for repeated invalid signed announcements") + h.removePeer(peer.ID()) } // unregisterPeer removes a peer from the downloader, fetchers and main peer set. @@ -631,6 +679,39 @@ func (h *handler) Start(maxPeers int) { // start peer handler tracker h.wg.Add(1) go h.protoTracker() + + // WIT2: drain deferred signed announces on each new chain head. This + // closes the cosend race: when a signed announcement arrives ahead of + // its block, we hold it in deferredAnnounces and re-evaluate as soon as + // the matching header lands. + h.wit2HeadCh = make(chan core.ChainHeadEvent, chainHeadChanSize) + h.wit2HeadSub = h.chain.SubscribeChainHeadEvent(h.wit2HeadCh) + h.wg.Add(1) + go h.deferredAnnouncesLoop() +} + +// deferredAnnouncesLoop re-evaluates any deferred WIT2 announcements whose +// matching block has just been imported. Exits cleanly when the chain-head +// subscription returns (chain stop) or quitSync is closed. +func (h *handler) deferredAnnouncesLoop() { + defer h.wg.Done() + defer h.wit2HeadSub.Unsubscribe() + + for { + select { + case ev, ok := <-h.wit2HeadCh: + if !ok { + return + } + if ev.Header != nil { + h.drainDeferredAnnouncesFor(ev.Header.Hash()) + } + case <-h.wit2HeadSub.Err(): + return + case <-h.quitSync: + return + } + } } func (h *handler) Stop() { @@ -715,6 +796,11 @@ func (h *handler) BroadcastBlock(block *types.Block, witness *stateless.Witness, peer.AsyncSendNewBlock(block, td) } + // WIT2: co-send the witness announcement to every direct block + // recipient that doesn't yet have the witness. Closes the gap where + // blocks fan out at sqrt(N) but witnesses didn't. + h.cosendWitnessAnnouncement(hash, block.NumberU64(), transfer, staticAndTrustedPeers) + log.Debug("Propagated block", "hash", hash, "recipients", len(transfer), "static and trusted recipients", len(staticAndTrustedPeers), "duration", common.PrettyDuration(time.Since(block.ReceivedAt))) return @@ -727,8 +813,17 @@ func (h *handler) BroadcastBlock(block *types.Block, witness *stateless.Witness, } if h.chain.HasWitness(hash) { + // Try to attach a BP signature so WIT2 peers can fast-validate and + // transitively relay. Falls through to unsigned WIT1 announces for + // peers below WIT2 (and for any peer if signing is unavailable, e.g., + // non-producer nodes that didn't receive a signed announce upstream). + signedAnn, hasSigned := h.signLocalWitnessAnnouncement(hash, block.NumberU64()) for _, peer := range peersWithoutWitness { - peer.Peer.AsyncSendNewWitnessHash(block.Header().Hash(), block.NumberU64()) + if hasSigned && peer.Peer.Version() >= wit.WIT2 { + peer.Peer.AsyncSendSignedWitnessAnnouncement(signedAnn) + } else { + peer.Peer.AsyncSendNewWitnessHash(block.Header().Hash(), block.NumberU64()) + } } log.Debug("Announced witness", "hash", hash, "recipients", len(peers), "duration", common.PrettyDuration(time.Since(block.ReceivedAt))) } diff --git a/eth/handler_wit.go b/eth/handler_wit.go index 2c4e19d471..ebc98356d9 100644 --- a/eth/handler_wit.go +++ b/eth/handler_wit.go @@ -1,6 +1,7 @@ package eth import ( + "bytes" "errors" "fmt" "time" @@ -54,6 +55,8 @@ func (h *witHandler) Handle(peer *wit.Peer, packet wit.Packet) error { return h.handleWitnessBroadcast(peer, packet.Witness) case *wit.NewWitnessHashesPacket: return h.handleWitnessHashesAnnounce(peer, packet.Hashes, packet.Numbers) + case *wit.SignedNewWitnessHashesPacket: + return h.handleSignedWitnessAnnouncements(peer, packet.Announcements) case *wit.GetWitnessPacket: // Call handleGetWitness which returns the raw RLP data response, err := h.handleGetWitness(peer, packet) @@ -82,6 +85,37 @@ func (h *witHandler) handleWitnessBroadcast(peer *wit.Peer, witness *stateless.W peer.AddKnownWitness(witness.Header().Hash()) hash := witness.Header().Hash() + // WIT2: cache the encoded body so this node can serve it pre-import. We + // only expose the cache for serving when bytes match a BP-signed + // witnessHash on file — otherwise an upstream that lied about the bytes + // would make us serve garbage and get dropped by downstream peers as + // liars, even though we just relayed what we received. If no signed + // announcement is on file (WIT1 path), skip the encode+hash entirely + // so WIT1 broadcasts don't pay the cost of work we'd just discard. + if signed, hasSigned := (*handler)(h).signedWitnesses.get(hash); hasSigned { + var buf bytes.Buffer + if err := witness.EncodeRLP(&buf); err != nil { + peer.Log().Warn("wit2: failed to encode received witness", "hash", hash, "err", err) + } else { + bodyBytes := buf.Bytes() + bodyHash := stateless.WitnessCommitHash(bodyBytes) + if signed.WitnessHash == bodyHash { + (*handler)(h).pendingWitnessBodies.put(hash, bodyBytes, bodyHash) + } else { + // Upstream sent bytes that don't match the BP-signed commitment. + // Don't cache for serving and surface this peer as misbehaving. + wit2BroadcastByteMismatchMeter.Mark(1) + peer.Log().Warn("wit2: broadcast bytes do not match signed witnessHash; not caching for serving", + "blockHash", hash, "expected", signed.WitnessHash, "actual", bodyHash) + } + } + } else { + // No signed announcement on file: WIT1 fallback. Don't expose for + // WIT2 pre-import serving since we cannot prove byte-correctness to + // downstream peers. The body still flows into the import path below. + wit2BroadcastUnverifiedSkippedMeter.Mark(1) + } + // Inject the witness into the block fetcher's cache if h.blockFetcher != nil { log.Debug("Injecting witness into block fetcher", "hash", hash, "peer", peer.ID()) @@ -109,53 +143,155 @@ func (h *witHandler) handleWitnessHashesAnnounce(peer *wit.Peer, hashes []common return nil } -// handleGetWitness retrieves witnesses for the requested block hashes and returns them as raw RLP data. -// It now returns the data and error, rather than sending the reply directly. -// The returned data is [][]byte, as rlp.RawValue is essentially []byte. -func (h *witHandler) handleGetWitness(peer *wit.Peer, req *wit.GetWitnessPacket) (wit.WitnessPacketResponse, error) { - log.Debug("handleGetWitness processing request", "peer", peer.ID(), "reqID", req.RequestId, "witnessPages", len(req.WitnessPages)) - // list different witnesses to query - seen := make(map[common.Hash]struct{}, len(req.WitnessPages)) - for _, witnessPage := range req.WitnessPages { - seen[witnessPage.Hash] = struct{}{} +// handleSignedWitnessAnnouncements verifies BP signatures on incoming WIT2 +// announcements and relays valid ones to peers that have not seen them. +// Body fetches are driven elsewhere (the block fetcher's witness manager +// kicks them off when an announcement materialises). Each announcement is +// processed independently so a single bad entry does not poison a batch. +// +// On verification failure (bad signature, unknown signer) the sender is +// **not** dropped at this layer — they may simply be relaying a bad upstream +// announcement. Drops are reserved for byte-correctness failures at fetch +// time. We do, however, count invalid announcements via metrics to surface +// misbehaving relayers. +func (h *witHandler) handleSignedWitnessAnnouncements(peer *wit.Peer, anns []wit.SignedWitnessAnnouncement) error { + wit2RelayInMeter.Mark(int64(len(anns))) + + // Per-peer rate limit: every announcement consumes one token. Rejected + // packets are dropped wholesale to keep accounting simple — an honest + // peer should never trip this in practice. + if !(*handler)(h).wit2PeerTracker.allow(peer.ID(), len(anns)) { + wit2RateLimitDropMeter.Mark(int64(len(anns))) + peer.Log().Debug("wit2: rate-limited signed announcements", "count", len(anns)) + return nil } - // witness sizes query - witnessSize := make(map[common.Hash]uint64, len(seen)) - for witnessBlockHash := range seen { - size := rawdb.ReadWitnessSize(h.Chain().DB(), witnessBlockHash) - if size == nil { - witnessSize[witnessBlockHash] = 0 - } else { - witnessSize[witnessBlockHash] = *size + for _, ann := range anns { + if !h.acceptSignedAnnouncement(peer, ann) { + // Verification failed (bad signature, signer ≠ producer, or + // header not yet local). MUST NOT mark the sender as + // announce-known: doing so would (a) suppress our own later + // re-relay back to this peer if we receive a valid version of + // the same hash from someone else, and (b) leave us no path + // to recover from a header-arrival race once a re-gossip for + // the same hash arrives. Recovery on this branch relies on + // re-receipt, which the empty knownAnnounces set permits. + continue } + + // Sender produced a valid announcement; suppress relay back to them. + // Do NOT mark them as a body-holder — they may be relaying without + // bytes. Body fetches are gated on knownWitnesses, set elsewhere. + peer.AddKnownAnnounce(ann.BlockHash) + + // Cache + dedup. Skip relay if we've already relayed this hash recently. + if !h.signedWitnesses.putIfNewer(ann) { + wit2DuplicateMeter.Mark(1) + continue + } + + // Relay to every WIT2 peer that doesn't already have this witness, + // excluding the sender we received it from. + (*handler)(h).relaySignedAnnouncement(peer.ID(), ann) } - // query witnesses by demand - var response wit.WitnessPacketResponse - witnessCache := make(map[common.Hash][]byte, len(seen)) + return nil +} + +// acceptSignedAnnouncement runs signature recovery and producer-binding for a +// single announcement. Returns true when the announcement is verified and the +// caller should proceed to cache + relay; false when the caller should skip +// it. Strikes are issued only on confirmed misbehavior (bad signature or +// signer ≠ scheduled producer for a known header). Pre-import deferral +// (header not yet local) is silent: no strike, no relay. The announcement is +// stashed in the deferred queue so the chain-head loop can re-evaluate it +// once the block arrives — without that, an announce that races ahead of its +// block is lost permanently and subsequent witness fetches silently skip +// byte-verification. +func (h *witHandler) acceptSignedAnnouncement(peer *wit.Peer, ann wit.SignedWitnessAnnouncement) bool { + signer, err := verifySignedAnnouncement(ann) + if err != nil { + wit2InvalidSigMeter.Mark(1) + peer.Log().Debug("wit2: invalid signed announcement", "blockHash", ann.BlockHash, "err", err) + (*handler)(h).strikeWit2Peer(peer) + return false + } + + ok, headerAvailable := (*handler)(h).isScheduledProducer(signer, ann.BlockNumber, ann.BlockHash) + if ok { + return true + } + if !headerAvailable { + peer.Log().Debug("wit2: header not yet local for announced block; deferring announce", + "blockHash", ann.BlockHash, "blockNumber", ann.BlockNumber) + (*handler)(h).deferredAnnounces.put(ann, peer.ID()) + return false + } + wit2NotValidatorMeter.Mark(1) + peer.Log().Debug("wit2: signer is not the scheduled producer for this block", + "blockHash", ann.BlockHash, "blockNumber", ann.BlockNumber, "signer", signer) + (*handler)(h).strikeWit2Peer(peer) + return false +} + +// relaySignedAnnouncement forwards a verified signed announcement to all WIT2 +// peers in `peersWithoutWitness` excluding the original sender. WIT0/WIT1 +// peers are skipped — they don't speak the signed wire format. Their slow +// path remains: they'll learn about the witness through the existing post- +// import unsigned announce path on adjacent WIT2 nodes when those nodes +// finish importing. +func (h *handler) relaySignedAnnouncement(senderID string, ann wit.SignedWitnessAnnouncement) { + recipients := h.peers.peersWithoutSignedAnnounce(ann.BlockHash) + relayed := 0 + for _, peer := range recipients { + if peer.Peer.ID() == senderID { + continue + } + if peer.Peer.Version() < wit.WIT2 { + continue + } + peer.Peer.AsyncSendSignedWitnessAnnouncement(ann) + relayed++ + } + if relayed > 0 { + wit2RelayOutMeter.Mark(int64(relayed)) + } +} + +// handleGetWitness retrieves witnesses for the requested block hashes and returns them as raw RLP data. +// +// WIT2: per-block lookup consults the in-flight body cache before falling back +// to chain storage. This lets nodes serve witnesses they have received from +// the network but not yet imported. Byte-correctness blame attaches to the +// server only on hash mismatch (the requester verifies bytes against the BP- +// signed WitnessHash); content-correctness failures during execution attach +// to the BP, so this server is not at additional risk by serving early. +func (h *witHandler) handleGetWitness(peer *wit.Peer, req *wit.GetWitnessPacket) (wit.WitnessPacketResponse, error) { + log.Debug("handleGetWitness processing request", "peer", peer.ID(), "reqID", req.RequestId, "witnessPages", len(req.WitnessPages)) + + witnessCache, witnessSize := h.resolveWitnessBytes(req.WitnessPages) + var response wit.WitnessPacketResponse totalResponsePayloadDataAmount := 0 // fast fail check totalCached := 0 // protection against heavy memory requests for _, witnessPage := range req.WitnessPages { - totalPages := (witnessSize[witnessPage.Hash] + PageSize - 1) / PageSize // integer trick for: ceil(witnessSize/PageSize) - var witnessPageResponse wit.WitnessPageResponse - witnessPageResponse.Page = witnessPage.Page - witnessPageResponse.Hash = witnessPage.Hash - witnessPageResponse.TotalPages = totalPages - - needToQuery := witnessPage.Page < totalPages - if needToQuery { - var witnessBytes []byte - if cachedRLPBytes, exists := witnessCache[witnessPage.Hash]; exists { - witnessBytes = cachedRLPBytes - } else { - // Use GetWitness to benefit from the blockchain's witness cache - queriedBytes := h.Chain().GetWitness(witnessPage.Hash) - witnessCache[witnessPage.Hash] = queriedBytes - witnessBytes = queriedBytes - totalCached += len(queriedBytes) + totalPages := (witnessSize[witnessPage.Hash] + PageSize - 1) / PageSize // ceil(witnessSize/PageSize) + pageResponse := wit.WitnessPageResponse{ + Page: witnessPage.Page, + Hash: witnessPage.Hash, + TotalPages: totalPages, + } + + if witnessPage.Page < totalPages { + witnessBytes, ok := witnessCache[witnessPage.Hash] + if !ok { + // Post-import fallback: fetch from chain storage on demand. + // If both this and the in-flight cache missed during resolveWitnessBytes, + // witnessSize[hash] would be 0 and we wouldn't reach this branch. + witnessBytes = h.Chain().GetWitness(witnessPage.Hash) + witnessCache[witnessPage.Hash] = witnessBytes + totalCached += len(witnessBytes) } start := PageSize * witnessPage.Page @@ -163,26 +299,49 @@ func (h *witHandler) handleGetWitness(peer *wit.Peer, req *wit.GetWitnessPacket) if end > uint64(len(witnessBytes)) { end = uint64(len(witnessBytes)) } - witnessPageResponse.Data = witnessBytes[start:end] - totalResponsePayloadDataAmount += len(witnessPageResponse.Data) + pageResponse.Data = witnessBytes[start:end] + totalResponsePayloadDataAmount += len(pageResponse.Data) } - response = append(response, witnessPageResponse) + response = append(response, pageResponse) - // fast fail check if totalCached >= MaximumCachedWitnessOnARequest { return nil, errors.New("requests demans huge amount of memory") } - // memory protection check if totalResponsePayloadDataAmount >= MaximumResponseSize { return nil, errors.New("response exceeds maximum p2p payload size") } } - // Return the collected RLP data log.Debug("handleGetWitness returning witnesses pages", "peer", peer.ID(), "reqID", req.RequestId, "count", len(response)) return response, nil } +// resolveWitnessBytes resolves witness bytes and sizes for each unique block +// hash referenced by the request. Prefers the in-flight body cache (WIT2 +// pre-import serving) and falls back to chain-storage size lookup. Bytes for +// the chain-storage path are read lazily during page serving; only sizes are +// resolved up front so the response can carry accurate TotalPages even for +// pages this peer cannot fulfil. +func (h *witHandler) resolveWitnessBytes(pages []wit.WitnessPageRequest) (map[common.Hash][]byte, map[common.Hash]uint64) { + seen := make(map[common.Hash]struct{}, len(pages)) + for _, p := range pages { + seen[p.Hash] = struct{}{} + } + bytesByHash := make(map[common.Hash][]byte, len(seen)) + sizeByHash := make(map[common.Hash]uint64, len(seen)) + for blockHash := range seen { + if cached, _, ok := (*handler)(h).pendingWitnessBodies.get(blockHash); ok { + bytesByHash[blockHash] = cached + sizeByHash[blockHash] = uint64(len(cached)) + continue + } + if size := rawdb.ReadWitnessSize(h.Chain().DB(), blockHash); size != nil { + sizeByHash[blockHash] = *size + } + } + return bytesByHash, sizeByHash +} + // handleGetWitnessMetadata retrieves only the metadata (page count, size, block number) for the requested witness hashes. // This is efficient for verification purposes where we don't need the actual witness data. func (h *witHandler) handleGetWitnessMetadata(peer *wit.Peer, req *wit.GetWitnessMetadataPacket) ([]wit.WitnessMetadataResponse, error) { @@ -191,12 +350,16 @@ func (h *witHandler) handleGetWitnessMetadata(peer *wit.Peer, req *wit.GetWitnes var response []wit.WitnessMetadataResponse for _, hash := range req.Hashes { - // Get witness size from database - size := rawdb.ReadWitnessSize(h.Chain().DB(), hash) - witnessSize := uint64(0) - available := false - - if size != nil { + var ( + witnessSize uint64 + available bool + ) + + // Prefer in-flight body cache (WIT2 fast path). + if cached, _, ok := (*handler)(h).pendingWitnessBodies.get(hash); ok { + witnessSize = uint64(len(cached)) + available = true + } else if size := rawdb.ReadWitnessSize(h.Chain().DB(), hash); size != nil { witnessSize = *size available = true } @@ -204,11 +367,14 @@ func (h *witHandler) handleGetWitnessMetadata(peer *wit.Peer, req *wit.GetWitnes // Calculate total pages totalPages := (witnessSize + PageSize - 1) / PageSize // ceil(witnessSize/PageSize) - // Get block number from header + // Get block number from header. Pre-import we may not yet have the + // header, so fall back to the announcement-cached number if a signed + // announcement is on file. blockNumber := uint64(0) - header := h.Chain().GetHeaderByHash(hash) - if header != nil { + if header := h.Chain().GetHeaderByHash(hash); header != nil { blockNumber = header.Number.Uint64() + } else if ann, ok := (*handler)(h).signedWitnesses.get(hash); ok { + blockNumber = ann.BlockNumber } response = append(response, wit.WitnessMetadataResponse{ diff --git a/eth/handler_wit2.go b/eth/handler_wit2.go new file mode 100644 index 0000000000..1b73f111ab --- /dev/null +++ b/eth/handler_wit2.go @@ -0,0 +1,674 @@ +package eth + +import ( + "errors" + "sync" + "time" + + "github.com/ethereum/go-ethereum/accounts" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/consensus/bor" + "github.com/ethereum/go-ethereum/core/stateless" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/eth/protocols/wit" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/metrics" +) + +var errInvalidSignatureLength = errors.New("invalid wit2 announce signature length") + +// Metrics for WIT2 signed-announce path. Emitted only when metrics are enabled. +var ( + wit2RelayInMeter = metrics.NewRegisteredMeter("eth/wit2/announce/relay_in", nil) + wit2RelayOutMeter = metrics.NewRegisteredMeter("eth/wit2/announce/relay_out", nil) + wit2InvalidSigMeter = metrics.NewRegisteredMeter("eth/wit2/announce/invalid_sig", nil) + wit2NotValidatorMeter = metrics.NewRegisteredMeter("eth/wit2/announce/not_validator", nil) + wit2DuplicateMeter = metrics.NewRegisteredMeter("eth/wit2/announce/duplicate", nil) + wit2BroadcastByteMismatchMeter = metrics.NewRegisteredMeter("eth/wit2/serve/broadcast_byte_mismatch", nil) + wit2BroadcastUnverifiedSkippedMeter = metrics.NewRegisteredMeter("eth/wit2/serve/broadcast_unverified_skipped", nil) + wit2HeaderUnknownMeter = metrics.NewRegisteredMeter("eth/wit2/announce/header_unknown", nil) + wit2ConflictingWitnessHashMeter = metrics.NewRegisteredMeter("eth/wit2/announce/conflicting_witness_hash", nil) + wit2RateLimitDropMeter = metrics.NewRegisteredMeter("eth/wit2/announce/rate_limit_drop", nil) + wit2StrikeDisconnectMeter = metrics.NewRegisteredMeter("eth/wit2/announce/strike_disconnect", nil) +) + +// Per-peer rate-limit + strike tracker for wit2 announces. We size the bucket +// at burst=256 with a sustained rate of 64 announces/sec — higher than any +// honest gossip mesh would produce on Polygon's block cadence, low enough to +// neutralise an attacker spamming valid-but-redundant signed packets. +const ( + wit2AnnounceBurstCap = 256 + wit2AnnounceRefillPerSecond = 64 + // wit2MisbehaviorStrikeLimit is the number of structurally-invalid (bad + // signature, wrong producer, oversized packet) announces a peer may + // produce within strikeDecayWindow before being disconnected. + wit2MisbehaviorStrikeLimit = 5 + wit2MisbehaviorWindow = time.Minute +) + +// peerWit2State tracks a peer's wit2-announce burst budget and recent strikes. +// Lifecycle is tied to the eth handler's peer registration; entries are +// cleaned up when the peer disconnects. +type peerWit2State struct { + tokens float64 + lastRefill time.Time + strikeCount int + firstStrikeAt time.Time +} + +type peerWit2Tracker struct { + mu sync.Mutex + state map[string]*peerWit2State +} + +func newPeerWit2Tracker() *peerWit2Tracker { + return &peerWit2Tracker{state: make(map[string]*peerWit2State)} +} + +func (t *peerWit2Tracker) forget(peerID string) { + t.mu.Lock() + delete(t.state, peerID) + t.mu.Unlock() +} + +// allow returns true if the peer has enough budget to consume `count` +// announcements right now. False means the packet should be dropped and a +// rate-limit metric recorded; the caller decides whether to disconnect. +func (t *peerWit2Tracker) allow(peerID string, count int) bool { + t.mu.Lock() + defer t.mu.Unlock() + st, ok := t.state[peerID] + now := time.Now() + if !ok { + st = &peerWit2State{tokens: wit2AnnounceBurstCap, lastRefill: now} + t.state[peerID] = st + } + elapsed := now.Sub(st.lastRefill).Seconds() + if elapsed > 0 { + st.tokens += elapsed * wit2AnnounceRefillPerSecond + if st.tokens > wit2AnnounceBurstCap { + st.tokens = wit2AnnounceBurstCap + } + st.lastRefill = now + } + if st.tokens < float64(count) { + return false + } + st.tokens -= float64(count) + return true +} + +// strike records a misbehavior for the peer. Returns true when the peer has +// exceeded the threshold within the decay window and must be disconnected. +func (t *peerWit2Tracker) strike(peerID string) bool { + t.mu.Lock() + defer t.mu.Unlock() + st, ok := t.state[peerID] + now := time.Now() + if !ok { + st = &peerWit2State{tokens: wit2AnnounceBurstCap, lastRefill: now} + t.state[peerID] = st + } + if st.firstStrikeAt.IsZero() || now.Sub(st.firstStrikeAt) > wit2MisbehaviorWindow { + st.firstStrikeAt = now + st.strikeCount = 0 + } + st.strikeCount++ + return st.strikeCount >= wit2MisbehaviorStrikeLimit +} + +// wit2 announce-cache lifecycle constants. +const ( + // wit2AnnounceTTL bounds how long we remember a signed announcement so we + // can re-emit it on body delivery and skip duplicate relays. Must outlast + // typical fetch+import latency so producers/relayers still have the + // signature when stateless peers come asking for the body. + wit2AnnounceTTL = 30 * time.Second + + // wit2RelayWindow is the per-(blockHash, peer) duplicate-suppression window. + // Even without this, knownWitnesses dedup blocks repeats; the window adds + // belt-and-suspenders coverage during the brief gap between receive and + // known-cache update under concurrent gossip storms. + wit2RelayWindow = 200 * time.Millisecond + + // witnessBodyCacheCapacity bounds the number of pre-import witness bodies + // held in memory. Each entry is ~50MB on Polygon, so the cap keeps total + // memory under ~500MB worst case. Older entries are evicted as new ones + // arrive; a 10-block window comfortably covers typical block-fetch and + // import latency. + witnessBodyCacheCapacity = 10 +) + +// pendingWitnessBody holds RLP-encoded witness bytes received from the network +// before the corresponding block has been imported (and thus before the bytes +// have been written to chain storage). Lets serving peers answer GetWitness +// requests during the import gap, which is what makes early relay actually +// useful — a peer that received the body can serve it the moment its TCP +// receive completes, rather than waiting ~500ms for full block validation. +type pendingWitnessBody struct { + bytes []byte + witnessHash common.Hash + receivedAt time.Time +} + +// pendingWitnessBodyCache holds bytes by block hash with a short TTL. Entries +// are dropped after the body has been written to chain storage, or after the +// TTL expires (whichever first). The cache is a simple map; the witness body +// is large (~50MB) so the cap is set conservatively. +type pendingWitnessBodyCache struct { + mu sync.RWMutex + entries map[common.Hash]*pendingWitnessBody + capacity int +} + +func newPendingWitnessBodyCache(capacity int) *pendingWitnessBodyCache { + return &pendingWitnessBodyCache{ + entries: make(map[common.Hash]*pendingWitnessBody), + capacity: capacity, + } +} + +func (c *pendingWitnessBodyCache) put(blockHash common.Hash, bytes []byte, witnessHash common.Hash) { + c.mu.Lock() + defer c.mu.Unlock() + c.gcLocked() + if len(c.entries) >= c.capacity { + // Evict the oldest entry. Linear scan is fine at the configured cap. + var oldestHash common.Hash + var oldest time.Time + for h, e := range c.entries { + if oldest.IsZero() || e.receivedAt.Before(oldest) { + oldest = e.receivedAt + oldestHash = h + } + } + delete(c.entries, oldestHash) + } + c.entries[blockHash] = &pendingWitnessBody{ + bytes: bytes, + witnessHash: witnessHash, + receivedAt: time.Now(), + } +} + +func (c *pendingWitnessBodyCache) get(blockHash common.Hash) ([]byte, common.Hash, bool) { + c.mu.RLock() + e, ok := c.entries[blockHash] + if !ok { + c.mu.RUnlock() + return nil, common.Hash{}, false + } + if time.Since(e.receivedAt) > wit2AnnounceTTL { + // Expired: drop the large byte slice now rather than waiting for the + // next put() to gc. Without this, a node that stops receiving witness + // bodies retains up to capacity (10) ~50MB blobs indefinitely past the + // TTL, since gcLocked() only fires on put(). + c.mu.RUnlock() + c.mu.Lock() + // Re-check under the write lock: a concurrent put() may have replaced + // the entry with a fresh one we should not delete. + if cur, ok2 := c.entries[blockHash]; ok2 && cur == e { + delete(c.entries, blockHash) + } + c.mu.Unlock() + return nil, common.Hash{}, false + } + c.mu.RUnlock() + return e.bytes, e.witnessHash, true +} + +func (c *pendingWitnessBodyCache) drop(blockHash common.Hash) { + c.mu.Lock() + defer c.mu.Unlock() + delete(c.entries, blockHash) +} + +func (c *pendingWitnessBodyCache) gcLocked() { + cutoff := time.Now().Add(-wit2AnnounceTTL) + for h, e := range c.entries { + if e.receivedAt.Before(cutoff) { + delete(c.entries, h) + } + } +} + +// deferredAnnounceCapacity bounds how many header-unknown signed announcements +// we hold while waiting for the corresponding block to arrive. Each entry is +// ~200 bytes; the cap is sized for a worst-case stall window where the local +// chain falls a few hundred blocks behind a busy mesh and announcements +// arrive ahead of headers en masse. +const deferredAnnounceCapacity = 256 + +// deferredAnnounceEntry holds a signed announcement whose producer-binding +// could not be checked yet because the corresponding block header wasn't +// local. The drain path re-runs verification once the chain catches up. +type deferredAnnounceEntry struct { + announcement wit.SignedWitnessAnnouncement + peerID string + receivedAt time.Time +} + +// deferredAnnounceCache holds signed announcements deferred on header-unknown +// rejection so the chain-head loop can re-evaluate them when the matching +// block arrives. Without it, an announce that races ahead of its block — the +// expected outcome of independent block + announce gossip streams — is lost +// for good and subsequent witness fetches silently fall back to unsigned +// (WIT1) verification, leaking the WIT2 trust property for that block. +type deferredAnnounceCache struct { + mu sync.RWMutex + entries map[common.Hash]*deferredAnnounceEntry + capacity int +} + +func newDeferredAnnounceCache(capacity int) *deferredAnnounceCache { + return &deferredAnnounceCache{ + entries: make(map[common.Hash]*deferredAnnounceEntry), + capacity: capacity, + } +} + +// put stores the announcement keyed by block hash. If the cache is full, the +// oldest entry is evicted (linear scan; the cap keeps it cheap). A second put +// for the same hash refreshes receivedAt and overwrites the announcement — +// the more recent gossip wins, which is desirable when the original sender +// disconnected and a different peer now carries the announce forward. +func (c *deferredAnnounceCache) put(ann wit.SignedWitnessAnnouncement, peerID string) { + c.mu.Lock() + defer c.mu.Unlock() + c.gcLocked() + if _, exists := c.entries[ann.BlockHash]; !exists && len(c.entries) >= c.capacity { + var oldestHash common.Hash + var oldest time.Time + for h, e := range c.entries { + if oldest.IsZero() || e.receivedAt.Before(oldest) { + oldest = e.receivedAt + oldestHash = h + } + } + delete(c.entries, oldestHash) + } + c.entries[ann.BlockHash] = &deferredAnnounceEntry{ + announcement: ann, + peerID: peerID, + receivedAt: time.Now(), + } +} + +// take removes and returns the entry for blockHash if present and fresh. +// Returns ok=false on miss or expiry; expired entries are deleted in place. +func (c *deferredAnnounceCache) take(blockHash common.Hash) (*deferredAnnounceEntry, bool) { + c.mu.Lock() + defer c.mu.Unlock() + e, ok := c.entries[blockHash] + if !ok { + return nil, false + } + delete(c.entries, blockHash) + if time.Since(e.receivedAt) > wit2AnnounceTTL { + return nil, false + } + return e, true +} + +// has reports whether a fresh entry exists for blockHash. Test-facing only; +// production code uses take to ensure the entry is consumed. +func (c *deferredAnnounceCache) has(blockHash common.Hash) bool { + c.mu.RLock() + defer c.mu.RUnlock() + e, ok := c.entries[blockHash] + if !ok { + return false + } + return time.Since(e.receivedAt) <= wit2AnnounceTTL +} + +// gcLocked drops entries past the TTL. Caller must hold the write lock. +func (c *deferredAnnounceCache) gcLocked() { + cutoff := time.Now().Add(-wit2AnnounceTTL) + for h, e := range c.entries { + if e.receivedAt.Before(cutoff) { + delete(c.entries, h) + } + } +} + +// signedWitnessCache stores BP-signed announcements by block hash. The cache +// is consulted by: +// - the relay path on receive (skip if already seen recently), +// - the body-broadcast path (re-emit the cached signed announce when a +// stateless peer requests the body), and +// - the producer path (cache the locally-signed announcement so subsequent +// re-emissions from this node don't re-sign). +type signedWitnessCache struct { + mu sync.RWMutex + entries map[common.Hash]*signedAnnounceEntry +} + +type signedAnnounceEntry struct { + announcement wit.SignedWitnessAnnouncement + receivedAt time.Time +} + +func newSignedWitnessCache() *signedWitnessCache { + return &signedWitnessCache{entries: make(map[common.Hash]*signedAnnounceEntry)} +} + +// putIfNewer stores the announcement keyed by block hash, returning true if +// the cache did not already contain a fresh entry for this hash. Callers use +// the return value to decide whether to relay (false → suppress duplicate). +// +// If a fresh entry already exists with a *different* WitnessHash, the new +// announcement is rejected outright (returns false): the first valid signed +// commitment wins for the lifetime of the entry. This prevents an attacker +// who has obtained a second valid signature (e.g. a compromised producer +// later in the same window) from poisoning the cache mid-fetch and dropping +// honest serving peers against a different hash. +func (c *signedWitnessCache) putIfNewer(ann wit.SignedWitnessAnnouncement) bool { + c.mu.Lock() + defer c.mu.Unlock() + c.gcLocked() + if existing, ok := c.entries[ann.BlockHash]; ok { + if existing.announcement.WitnessHash != ann.WitnessHash { + wit2ConflictingWitnessHashMeter.Mark(1) + return false + } + // Same WitnessHash, recent: dedup. + if time.Since(existing.receivedAt) < wit2RelayWindow { + return false + } + } + c.entries[ann.BlockHash] = &signedAnnounceEntry{ + announcement: ann, + receivedAt: time.Now(), + } + return true +} + +// get returns the cached announcement for a block hash, if present and fresh. +func (c *signedWitnessCache) get(blockHash common.Hash) (wit.SignedWitnessAnnouncement, bool) { + c.mu.RLock() + defer c.mu.RUnlock() + e, ok := c.entries[blockHash] + if !ok { + return wit.SignedWitnessAnnouncement{}, false + } + if time.Since(e.receivedAt) > wit2AnnounceTTL { + return wit.SignedWitnessAnnouncement{}, false + } + return e.announcement, true +} + +// gcLocked drops entries past the TTL. Caller must hold the write lock. +func (c *signedWitnessCache) gcLocked() { + cutoff := time.Now().Add(-wit2AnnounceTTL) + for h, e := range c.entries { + if e.receivedAt.Before(cutoff) { + delete(c.entries, h) + } + } +} + +// verifySignedAnnouncement returns the recovered signer address if the +// signature is structurally valid; otherwise an error. Validator-set +// membership is checked separately against the consensus engine. +func verifySignedAnnouncement(ann wit.SignedWitnessAnnouncement) (common.Address, error) { + if len(ann.Signature) != wit.SignatureLength { + return common.Address{}, errInvalidSignatureLength + } + digest := wit.WitnessAnnouncementSigningHash(ann.BlockHash, ann.BlockNumber, ann.WitnessHash) + pubkey, err := crypto.Ecrecover(digest.Bytes(), ann.Signature) + if err != nil { + return common.Address{}, err + } + var addr common.Address + copy(addr[:], crypto.Keccak256(pubkey[1:])[12:]) + return addr, nil +} + +// cosendWitnessAnnouncement co-sends a witness announcement to every peer +// that just received the full block via the propagate=true fanout, provided +// the peer doesn't already have the witness. WIT2 peers receive the signed +// variant; older peers receive the unsigned WIT1 announce. Skipped entirely +// when the local node hasn't yet stored the witness or doesn't have a +// signing key configured. +func (h *handler) cosendWitnessAnnouncement(blockHash common.Hash, blockNumber uint64, transfer []*ethPeer, staticAndTrustedPeers []*ethPeer) { + if !h.chain.HasWitness(blockHash) { + return + } + ann, hasSigned := h.signLocalWitnessAnnouncement(blockHash, blockNumber) + if !hasSigned { + return + } + witnessRecipientsByID := make(map[string]*witPeer) + for _, wp := range h.peers.peersWithoutWitness(blockHash) { + witnessRecipientsByID[wp.Peer.ID()] = wp + } + cosend := func(id string) { + wp, ok := witnessRecipientsByID[id] + if !ok { + return + } + if wp.Peer.Version() >= wit.WIT2 { + wp.Peer.AsyncSendSignedWitnessAnnouncement(ann) + } else { + wp.Peer.AsyncSendNewWitnessHash(blockHash, blockNumber) + } + } + for _, peer := range transfer { + cosend(peer.Peer.ID()) + } + for _, peer := range staticAndTrustedPeers { + cosend(peer.ID()) + } +} + +// lookupSignedWitnessHash returns the BP-signed witness hash for a block, if +// the local cache has a verified announcement. Used by the witness manager +// on fetch success to verify byte-correctness against the signed commitment. +func (h *handler) lookupSignedWitnessHash(blockHash common.Hash) (common.Hash, bool) { + ann, ok := h.signedWitnesses.get(blockHash) + if !ok { + return common.Hash{}, false + } + return ann.WitnessHash, true +} + +// cacheVerifiedWitnessForServing receives canonical-encoded witness bytes from +// the fetcher after a successful, byte-verified paged download and stores them +// in the in-flight cache so peers can fetch the body before this node finishes +// chain-write. Bytes here have already passed verifyAgainstSignedHash (when a +// signed announcement was on file), or arrived via WIT1 unsigned path; in both +// cases they're the same bytes the upstream peer agreed upon, so serving them +// to downstream peers cannot expose this node to byte-mismatch drops beyond +// the upstream's already-incurred risk. +func (h *handler) cacheVerifiedWitnessForServing(blockHash common.Hash, witnessBytes []byte, witnessHash common.Hash) { + if h.pendingWitnessBodies == nil { + return + } + h.pendingWitnessBodies.put(blockHash, witnessBytes, witnessHash) +} + +// signLocalWitnessAnnouncement looks up the witness body for blockHash, hashes +// it, and signs the announcement digest using the engine's authorized signer. +// The result is cached so subsequent broadcasts of the same block reuse the +// signature without recomputing the keccak. +// +// Returns (announcement, true) on success. Returns (_, false) if any of: +// - no signer configured (full node not producing blocks) +// - witness bytes not yet stored in chain +// - signing failed +// +// Cost: ~150ms keccak over a 50MB witness, plus ~100μs ECDSA. Off the +// block-production critical path; runs once per produced block on the +// announce path. +func (h *handler) signLocalWitnessAnnouncement(blockHash common.Hash, blockNumber uint64) (wit.SignedWitnessAnnouncement, bool) { + if cached, ok := h.signedWitnesses.get(blockHash); ok { + return cached, true + } + + borEngine, ok := h.chain.Engine().(*bor.Bor) + if !ok { + return wit.SignedWitnessAnnouncement{}, false + } + if (borEngine.CurrentSigner() == common.Address{}) { + return wit.SignedWitnessAnnouncement{}, false + } + + witnessHash, ok := h.canonicalWitnessHash(blockHash) + if !ok { + return wit.SignedWitnessAnnouncement{}, false + } + preimage := wit.WitnessAnnouncementSigningPreImage(blockHash, blockNumber, witnessHash) + _, sig, err := borEngine.SignBytes(accounts.MimetypeBorWitnessAnnounce, preimage) + if err != nil { + log.Warn("wit2: failed to sign witness announcement", "blockHash", blockHash, "err", err) + return wit.SignedWitnessAnnouncement{}, false + } + + ann := wit.SignedWitnessAnnouncement{ + BlockHash: blockHash, + BlockNumber: blockNumber, + WitnessHash: witnessHash, + Signature: sig, + } + h.signedWitnesses.putIfNewer(ann) + return ann, true +} + +// canonicalWitnessHash reads the witness bytes for blockHash from chain +// storage and returns the WIT2 chunked-aggregate commitment over those bytes. +// Witness.EncodeRLP is now deterministic (state nodes sorted), so every newly +// written witness blob is canonical at write time and can be hashed directly +// without a decode/re-encode round-trip — saving roughly the cost of one RLP +// pass on the announce path. Returns (_, false) when no witness is on file. +func (h *handler) canonicalWitnessHash(blockHash common.Hash) (common.Hash, bool) { + stored := h.chain.GetWitness(blockHash) + if len(stored) == 0 { + return common.Hash{}, false + } + return stateless.WitnessCommitHash(stored), true +} + +// isScheduledProducer binds the recovered signer of a wit2 announcement to the +// actual block producer of the announced block. When the block header is +// locally available — the common case — we recover the seal-signer of the +// header and require an exact address match. Validator-set membership is no +// longer sufficient: any current validator could otherwise sign an +// announcement for another producer's block hash with a forged WitnessHash, +// poisoning this node's cache and dropping honest serving peers. +// +// Returns (ok, headerAvailable): +// - ok=true, headerAvailable=true: signer matches the block producer; safe +// to cache and relay. +// - ok=false, headerAvailable=true: confirmed bad signer; the caller MUST +// strike the relayer. +// - ok=false, headerAvailable=false: header not yet local. The announce +// cannot be bound to a producer right now. The caller MUST NOT strike — +// this is expected during the cosend window where a signed announce +// races the block to the receiver. The handler stashes the announce in +// the deferred queue and the chain-head loop re-evaluates it once the +// block arrives. +// +// Header presence is checked first regardless of engine: an announce we +// cannot match to a local block is by definition unverifiable here. Only +// after the header is on file do we route into the bor-specific producer +// recovery (or short-circuit to ok=true on non-bor test chains). +func (h *handler) isScheduledProducer(signer common.Address, blockNumber uint64, blockHash common.Hash) (bool, bool) { + header := h.chain.GetHeaderByHash(blockHash) + if header == nil { + wit2HeaderUnknownMeter.Mark(1) + return false, false + } + borEngine, isBor := h.chain.Engine().(*bor.Bor) + if !isBor { + // Non-bor chain (tests): header presence already validated above; the + // producer check is bor-specific and intentionally skipped here. + if header.Number.Uint64() != blockNumber { + return false, true + } + return true, true + } + return verifyScheduledProducer(borEngine, header, signer, blockNumber, blockHash) +} + +// drainDeferredAnnouncesFor re-evaluates any deferred announcement whose +// blockHash now matches a header that has just been imported. On verification +// success the announce is cached in signedWitnesses, the original sender is +// credited as announce-known, and the announce is relayed to peers that have +// not seen it. On confirmed mis-binding (signer ≠ producer) the deferred +// entry is dropped — relayers cannot be re-struck post-hoc since we lost the +// peer reference between deferral and drain. +// +// Called from the chain-head subscription on each new block. Also exposed for +// direct invocation in tests. +func (h *handler) drainDeferredAnnouncesFor(blockHash common.Hash) { + if h.deferredAnnounces == nil { + return + } + entry, ok := h.deferredAnnounces.take(blockHash) + if !ok { + return + } + signer, err := verifySignedAnnouncement(entry.announcement) + if err != nil { + // Should be unreachable: we re-verified the same bytes that already + // passed the signature check at acceptSignedAnnouncement time. + // Surfaced via metric in case a future refactor reorders this. + wit2InvalidSigMeter.Mark(1) + log.Debug("wit2: deferred announce failed signature re-check", "blockHash", blockHash, "err", err) + return + } + prodOk, headerAvailable := h.isScheduledProducer(signer, entry.announcement.BlockNumber, blockHash) + if !prodOk { + if !headerAvailable { + // Header still not local — re-stash with fresh receivedAt so the + // next chain-head event can try again before the TTL expires. + h.deferredAnnounces.put(entry.announcement, entry.peerID) + return + } + wit2NotValidatorMeter.Mark(1) + log.Debug("wit2: deferred announce signer is not the scheduled producer", + "blockHash", blockHash, "signer", signer) + return + } + if !h.signedWitnesses.putIfNewer(entry.announcement) { + wit2DuplicateMeter.Mark(1) + return + } + // Credit the original sender as announce-known so we don't re-relay back. + if peer := h.peers.peer(entry.peerID); peer != nil && peer.witPeer != nil { + peer.witPeer.Peer.AddKnownAnnounce(blockHash) + } + h.relaySignedAnnouncement(entry.peerID, entry.announcement) +} + +// verifyScheduledProducer is the pure decision logic for binding a wit2 +// announcement signer to the block producer of `blockHash`. Split from +// isScheduledProducer so it can be unit-tested without standing up a full +// handler. Returns the same (ok, headerAvailable) shape — see +// isScheduledProducer for the contract. +func verifyScheduledProducer(borEngine *bor.Bor, header *types.Header, signer common.Address, blockNumber uint64, blockHash common.Hash) (bool, bool) { + if header == nil { + wit2HeaderUnknownMeter.Mark(1) + log.Debug("wit2: header for announced block not yet local; deferring until block arrives", + "blockHash", blockHash, "blockNumber", blockNumber) + return false, false + } + if header.Number.Uint64() != blockNumber { + log.Debug("wit2: announce blockNumber does not match local header", + "blockHash", blockHash, "announced", blockNumber, "local", header.Number.Uint64()) + return false, true + } + producer, err := borEngine.Author(header) + if err != nil { + log.Debug("wit2: failed to recover header sealer", "blockHash", blockHash, "err", err) + return false, true + } + if producer != signer { + log.Debug("wit2: announce signer is not the block producer", + "blockHash", blockHash, "producer", producer, "signer", signer) + return false, true + } + return true, true +} diff --git a/eth/handler_wit2_test.go b/eth/handler_wit2_test.go new file mode 100644 index 0000000000..9661990183 --- /dev/null +++ b/eth/handler_wit2_test.go @@ -0,0 +1,674 @@ +package eth + +import ( + "bytes" + "crypto/rand" + "math/big" + "testing" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/stateless" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/eth/protocols/wit" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestSignedWitnessCachePutIfNewerSuppressesDuplicates verifies that the +// per-(blockHash) relay-window dedup blocks immediate re-relay of the same +// announcement. Without this, A→B→A bouncing would amplify a single signed +// announcement into a gossip storm. +func TestSignedWitnessCachePutIfNewerSuppressesDuplicates(t *testing.T) { + c := newSignedWitnessCache() + ann := wit.SignedWitnessAnnouncement{ + BlockHash: common.HexToHash("0xaaaa"), + BlockNumber: 100, + WitnessHash: common.HexToHash("0xbbbb"), + Signature: make([]byte, wit.SignatureLength), + } + if !c.putIfNewer(ann) { + t.Fatal("first put should succeed") + } + if c.putIfNewer(ann) { + t.Fatal("immediate re-put within window should be suppressed") + } + if _, ok := c.get(ann.BlockHash); !ok { + t.Fatal("entry should still be present after suppressed put") + } +} + +// TestSignedWitnessCacheTTLExpiry checks that stale entries don't linger past +// the TTL. This prevents stale signatures from being re-served indefinitely +// for blocks long since imported and pruned. +func TestSignedWitnessCacheTTLExpiry(t *testing.T) { + c := newSignedWitnessCache() + ann := wit.SignedWitnessAnnouncement{ + BlockHash: common.HexToHash("0xcafe"), + BlockNumber: 1, + WitnessHash: common.HexToHash("0xdead"), + Signature: make([]byte, wit.SignatureLength), + } + c.putIfNewer(ann) + // Force the receivedAt back beyond TTL. + c.mu.Lock() + c.entries[ann.BlockHash].receivedAt = time.Now().Add(-2 * wit2AnnounceTTL) + c.mu.Unlock() + if _, ok := c.get(ann.BlockHash); ok { + t.Fatal("expired entry should not be returned") + } +} + +// TestVerifySignedAnnouncementRejectsBadLength catches sloppy callers passing +// truncated signatures. Without this guard, ecrecover panics or silently +// recovers a garbage address. +func TestVerifySignedAnnouncementRejectsBadLength(t *testing.T) { + ann := wit.SignedWitnessAnnouncement{ + BlockHash: common.HexToHash("0x01"), + BlockNumber: 1, + WitnessHash: common.HexToHash("0x02"), + Signature: []byte{0x00, 0x01, 0x02}, + } + if _, err := verifySignedAnnouncement(ann); err == nil { + t.Fatal("expected error for short signature") + } +} + +// TestVerifySignedAnnouncementRoundTrip signs an announcement with a known +// key and verifies recovery yields the same address. This is the core +// authentication property; if it breaks, every signed announcement on the +// network silently fails verification. +func TestVerifySignedAnnouncementRoundTrip(t *testing.T) { + key, err := crypto.GenerateKey() + if err != nil { + t.Fatalf("key gen: %v", err) + } + expectedSigner := crypto.PubkeyToAddress(key.PublicKey) + + ann := wit.SignedWitnessAnnouncement{ + BlockHash: common.HexToHash("0xfeedface"), + BlockNumber: 42, + WitnessHash: common.HexToHash("0xc0ffee00"), + } + digest := wit.WitnessAnnouncementSigningHash(ann.BlockHash, ann.BlockNumber, ann.WitnessHash) + sig, err := crypto.Sign(digest.Bytes(), key) + if err != nil { + t.Fatalf("sign: %v", err) + } + ann.Signature = sig + + got, err := verifySignedAnnouncement(ann) + if err != nil { + t.Fatalf("verify: %v", err) + } + if got != expectedSigner { + t.Fatalf("recovered signer = %s, want %s", got.Hex(), expectedSigner.Hex()) + } +} + +// TestVerifySignedAnnouncementWalletSemantics mirrors what wallet.SignData +// does in production (keccak256(preimage) before signing) to guard against +// the regression where the producer pre-hashes a 32-byte digest and the +// wallet hashes again — producing signatures the verifier cannot recover. +// The test fails iff the producer/verifier preimage-vs-digest contract +// drifts. +func TestVerifySignedAnnouncementWalletSemantics(t *testing.T) { + key, err := crypto.GenerateKey() + if err != nil { + t.Fatalf("key gen: %v", err) + } + expectedSigner := crypto.PubkeyToAddress(key.PublicKey) + + ann := wit.SignedWitnessAnnouncement{ + BlockHash: common.HexToHash("0xab"), + BlockNumber: 99, + WitnessHash: common.HexToHash("0xcd"), + } + // Production wallet path: SignData hashes its input once, then signs. + preimage := wit.WitnessAnnouncementSigningPreImage(ann.BlockHash, ann.BlockNumber, ann.WitnessHash) + walletDigest := crypto.Keccak256(preimage) + sig, err := crypto.Sign(walletDigest, key) + if err != nil { + t.Fatalf("sign: %v", err) + } + ann.Signature = sig + + got, err := verifySignedAnnouncement(ann) + if err != nil { + t.Fatalf("verify: %v", err) + } + if got != expectedSigner { + t.Fatalf("recovered signer = %s, want %s — preimage/digest contract is broken", got.Hex(), expectedSigner.Hex()) + } +} + +// TestVerifySignedAnnouncementDetectsTampering ensures that flipping any +// field in the announcement causes verification to recover a different +// address (or fail outright). This is the load-bearing property for the +// blame-separation argument: a signature ties a specific BP to a specific +// (BlockHash, BlockNumber, WitnessHash) tuple and nothing else. +func TestVerifySignedAnnouncementDetectsTampering(t *testing.T) { + key, err := crypto.GenerateKey() + if err != nil { + t.Fatalf("key gen: %v", err) + } + signer := crypto.PubkeyToAddress(key.PublicKey) + + original := wit.SignedWitnessAnnouncement{ + BlockHash: common.HexToHash("0xa1"), + BlockNumber: 7, + WitnessHash: common.HexToHash("0xb2"), + } + digest := wit.WitnessAnnouncementSigningHash(original.BlockHash, original.BlockNumber, original.WitnessHash) + sig, err := crypto.Sign(digest.Bytes(), key) + if err != nil { + t.Fatalf("sign: %v", err) + } + + // Tamper with WitnessHash but reuse the signature. + tampered := original + tampered.WitnessHash = common.HexToHash("0xb3") + tampered.Signature = sig + + got, err := verifySignedAnnouncement(tampered) + if err != nil { + // If err is non-nil, tampering was caught at the structural level. + return + } + if got == signer { + t.Fatal("tampered announcement recovered original signer; signature is not bound to the message") + } +} + +// TestPeerWit2TrackerRateLimitConsumesTokens guards Fix-7: the per-peer +// rate-limit must reject burst-exceeding traffic without dropping the peer. +// Honest peers running normal block cadence should never trip this; the test +// pins the budget so a regression that loosens the cap is caught. +func TestPeerWit2TrackerRateLimitConsumesTokens(t *testing.T) { + tr := newPeerWit2Tracker() + if !tr.allow("p1", wit2AnnounceBurstCap) { + t.Fatal("first burst-cap-sized batch must fit") + } + if tr.allow("p1", 1) { + t.Fatal("immediate next announcement must be rejected when bucket is empty") + } +} + +// TestPeerWit2TrackerStrikeDisconnectThreshold pins the strike-threshold +// behavior. Below the threshold, strike returns false (peer kept). At the +// threshold it returns true so the handler disconnects. Honest peers +// occasionally producing one bad announce should never trigger; sustained +// misbehavior must. +func TestPeerWit2TrackerStrikeDisconnectThreshold(t *testing.T) { + tr := newPeerWit2Tracker() + for i := 0; i < wit2MisbehaviorStrikeLimit-1; i++ { + if tr.strike("p1") { + t.Fatalf("disconnect signaled at strike %d, want only at %d", i+1, wit2MisbehaviorStrikeLimit) + } + } + if !tr.strike("p1") { + t.Fatalf("disconnect must signal at strike %d", wit2MisbehaviorStrikeLimit) + } +} + +// TestSignedWitnessCacheRejectsConflictingWitnessHash is the Fix-6 invariant +// at the cache layer: only the FIRST valid signed announcement for a given +// blockHash wins. A second announcement with a different WitnessHash — +// possibly from a forked producer or a compromised key in a later window — +// must be rejected, otherwise it would poison the cache mid-fetch and drop +// honest peers serving the original bytes. +func TestSignedWitnessCacheRejectsConflictingWitnessHash(t *testing.T) { + c := newSignedWitnessCache() + first := wit.SignedWitnessAnnouncement{ + BlockHash: common.HexToHash("0xabcd"), + BlockNumber: 50, + WitnessHash: common.HexToHash("0x1111"), + Signature: make([]byte, wit.SignatureLength), + } + if !c.putIfNewer(first) { + t.Fatal("first put should succeed") + } + + conflict := first + conflict.WitnessHash = common.HexToHash("0x2222") + if c.putIfNewer(conflict) { + t.Fatal("second put with different WitnessHash must be rejected") + } + got, ok := c.get(first.BlockHash) + if !ok { + t.Fatal("first announcement must remain cached after conflict rejection") + } + if got.WitnessHash != first.WitnessHash { + t.Fatalf("cache poisoned: WitnessHash=%s want=%s", got.WitnessHash.Hex(), first.WitnessHash.Hex()) + } +} + +// TestPendingWitnessBodyCacheEvictsOldest covers the LRU-style eviction when +// the cache reaches capacity. Without it, long-running nodes accumulate +// witness bodies indefinitely (~50MB each) and run out of memory. +func TestPendingWitnessBodyCacheEvictsOldest(t *testing.T) { + c := newPendingWitnessBodyCache(2) + c.put(common.HexToHash("0x01"), []byte("first"), common.HexToHash("0xa")) + time.Sleep(time.Millisecond) + c.put(common.HexToHash("0x02"), []byte("second"), common.HexToHash("0xb")) + time.Sleep(time.Millisecond) + c.put(common.HexToHash("0x03"), []byte("third"), common.HexToHash("0xc")) + + if _, _, ok := c.get(common.HexToHash("0x01")); ok { + t.Fatal("oldest entry should have been evicted") + } + if _, _, ok := c.get(common.HexToHash("0x02")); !ok { + t.Fatal("middle entry should still be present") + } + if _, _, ok := c.get(common.HexToHash("0x03")); !ok { + t.Fatal("newest entry should still be present") + } +} + +// TestPendingWitnessBodyCacheDropClearsEntry guards the explicit drop path +// used when a witness has been written to chain storage and no longer needs +// in-flight serving. +func TestPendingWitnessBodyCacheDropClearsEntry(t *testing.T) { + c := newPendingWitnessBodyCache(4) + hash := common.HexToHash("0xdead") + c.put(hash, []byte("x"), common.HexToHash("0xaa")) + c.drop(hash) + if _, _, ok := c.get(hash); ok { + t.Fatal("entry should be gone after drop") + } +} + +// TestHandleWitnessBroadcastSkipsCacheWhenNoSignature guards the Fix-5 +// invariant: bytes received via NewWitness broadcast are NOT exposed for +// pre-import serving when no BP-signed witnessHash is on file. Otherwise an +// honest relayer with a malicious upstream would serve unverified bytes and +// be dropped by downstream peers as if it had lied. +func TestHandleWitnessBroadcastSkipsCacheWhenNoSignature(t *testing.T) { + h := newTestHandler() + defer h.close() + + witH := (*witHandler)(h.handler) + peer, cleanup := newTestWit2PeerWithReader() + defer cleanup() + + header := &types.Header{Number: big.NewInt(7777)} + witness, err := stateless.NewWitness(header, nil) + if err != nil { + t.Fatalf("new witness: %v", err) + } + + // No signed announcement on file → broadcast must NOT populate the + // pre-import serving cache. + if err := witH.handleWitnessBroadcast(peer, witness); err != nil { + t.Fatalf("handleWitnessBroadcast: %v", err) + } + hash := header.Hash() + if _, _, ok := h.handler.pendingWitnessBodies.get(hash); ok { + t.Fatal("pendingWitnessBodies populated without a signed witnessHash; bytes are unverified for serving") + } +} + +// TestSignedAnnounceDoesNotMarkPeerAsBodyHolder is the load-bearing +// regression test for the announce/body separation. A WIT2 peer that has +// only relayed a signed announcement (no body) MUST NOT show up in +// peersWithoutWitness's complement — i.e. it must not be selected as a body +// fetch target by getOnePeerWithWitness. Otherwise the fetcher will ask a +// relay-only peer for bytes, get nothing, and drop an honest peer. +func TestSignedAnnounceDoesNotMarkPeerAsBodyHolder(t *testing.T) { + hash := common.HexToHash("0xfa11") + peer, cleanup := newTestWit2PeerWithReader() + defer cleanup() + + ann := wit.SignedWitnessAnnouncement{ + BlockHash: hash, + BlockNumber: 1, + WitnessHash: common.HexToHash("0xab"), + Signature: make([]byte, wit.SignatureLength), + } + + // Outbound announce path (this node forwarding to peer): must NOT mark + // peer as a body-holder. + peer.AsyncSendSignedWitnessAnnouncement(ann) + + if peer.KnownWitnessContainsHash(hash) { + t.Fatal("AsyncSendSignedWitnessAnnouncement marked peer as body-holder; body fetch will pick a relay-only peer and drop it") + } + if !peer.KnownAnnounceContainsHash(hash) { + t.Fatal("AsyncSendSignedWitnessAnnouncement should mark announce-known so we don't re-relay") + } +} + +// TestHandleGetWitnessServesFromInFlightCache is the load-bearing behavioral +// test for the WIT2 pre-import serving claim: a node that has received the +// witness body over gossip but has not yet imported it (chain storage empty) +// must still be able to serve `GetWitness` requests from the in-flight cache. +// Without this path, multi-hop WIT2 fast-propagation has no body source until +// each hop's chain-write completes — collapsing the entire benefit of the +// design. +func TestHandleGetWitnessServesFromInFlightCache(t *testing.T) { + h := newTestHandler() + defer h.close() + + witH := (*witHandler)(h.handler) + peer, cleanup := newTestWitPeerWithReader() + defer cleanup() + + header := &types.Header{Number: big.NewInt(4242)} + hash := header.Hash() + rawdb.WriteHeader(h.chain.DB(), header) + + // Smaller than PageSize so the response fits in a single page. + bodyBytes := make([]byte, 1*1024*1024) + rand.Read(bodyBytes) + + // Body is in the in-flight cache only; chain storage is empty. + h.handler.pendingWitnessBodies.put(hash, bodyBytes, crypto.Keccak256Hash(bodyBytes)) + require.Nil(t, rawdb.ReadWitnessSize(h.chain.DB(), hash), + "precondition: chain must have no witness for this hash") + + resp, err := witH.handleGetWitness(peer, &wit.GetWitnessPacket{ + RequestId: 1, + GetWitnessRequest: &wit.GetWitnessRequest{WitnessPages: []wit.WitnessPageRequest{{Hash: hash, Page: 0}}}, + }) + require.NoError(t, err) + require.Equal(t, 1, len(resp)) + assert.Equal(t, hash, resp[0].Hash) + assert.Equal(t, uint64(1), resp[0].TotalPages) + require.Equal(t, len(bodyBytes), len(resp[0].Data), + "in-flight cache served fewer bytes than expected — pre-import path is not wired") + assert.Equal(t, bodyBytes[:64], resp[0].Data[:64]) +} + +// TestHandleGetWitnessMetadataServesFromInFlightCache mirrors the above for +// the metadata path: a peer asking for metadata before chain-write should +// receive Available=true with the correct size from the in-flight cache. +// This is what lets a downstream relayer compute pagination without waiting. +func TestHandleGetWitnessMetadataServesFromInFlightCache(t *testing.T) { + h := newTestHandler() + defer h.close() + + witH := (*witHandler)(h.handler) + peer, cleanup := newTestWitPeerWithReader() + defer cleanup() + + header := &types.Header{Number: big.NewInt(4243)} + hash := header.Hash() + rawdb.WriteHeader(h.chain.DB(), header) + + bodyBytes := make([]byte, 7*1024*1024) // forces TotalPages = 1 (under 15MB) + rand.Read(bodyBytes) + h.handler.pendingWitnessBodies.put(hash, bodyBytes, crypto.Keccak256Hash(bodyBytes)) + require.Nil(t, rawdb.ReadWitnessSize(h.chain.DB(), hash)) + + resp, err := witH.handleGetWitnessMetadata(peer, &wit.GetWitnessMetadataPacket{ + RequestId: 1, + GetWitnessMetadataRequest: &wit.GetWitnessMetadataRequest{ + Hashes: []common.Hash{hash}, + }, + }) + require.NoError(t, err) + require.Equal(t, 1, len(resp)) + assert.True(t, resp[0].Available, "metadata must report Available when only the in-flight cache holds the body") + assert.Equal(t, uint64(len(bodyBytes)), resp[0].WitnessSize) + assert.Equal(t, uint64(1), resp[0].TotalPages) + assert.Equal(t, header.Number.Uint64(), resp[0].BlockNumber) +} + +// TestHandleGetWitnessPrefersCacheOverChain documents the chosen precedence: +// when both sources hold a witness, the in-flight cache wins. Locks the choice +// in so a refactor can't silently reverse it. Cache-first is correct because +// the cache is what the BP-signed announcement points at; the chain copy is +// only valid once chain-write has finished, which the cache entry implies has +// not yet happened or has just happened with identical bytes. +func TestHandleGetWitnessPrefersCacheOverChain(t *testing.T) { + h := newTestHandler() + defer h.close() + + witH := (*witHandler)(h.handler) + peer, cleanup := newTestWitPeerWithReader() + defer cleanup() + + header := &types.Header{Number: big.NewInt(4244)} + hash := header.Hash() + rawdb.WriteHeader(h.chain.DB(), header) + + cacheBytes := make([]byte, 4*1024*1024) + rand.Read(cacheBytes) + chainBytes := make([]byte, 4*1024*1024) + rand.Read(chainBytes) + + rawdb.WriteWitness(h.chain.DB(), hash, chainBytes) + h.handler.pendingWitnessBodies.put(hash, cacheBytes, crypto.Keccak256Hash(cacheBytes)) + + resp, err := witH.handleGetWitness(peer, &wit.GetWitnessPacket{ + RequestId: 1, + GetWitnessRequest: &wit.GetWitnessRequest{WitnessPages: []wit.WitnessPageRequest{{Hash: hash, Page: 0}}}, + }) + require.NoError(t, err) + require.Equal(t, 1, len(resp)) + assert.Equal(t, cacheBytes[:64], resp[0].Data[:64], + "handler must prefer the in-flight cache; got bytes that look like chain storage") +} + +// TestCanonicalWitnessHashUsesStoredBytesDirectly is the regression for the +// optimization that skips decode/re-encode on the producer announce path: as +// long as Witness.EncodeRLP is canonical-deterministic, stored bytes are +// already canonical and can be hashed in place. If a future change re- +// introduces a non-canonical write path, this test fails and the producer- +// side WitnessHash silently diverges from what verifiers compute. +func TestCanonicalWitnessHashUsesStoredBytesDirectly(t *testing.T) { + h := newTestHandler() + defer h.close() + + header := &types.Header{Number: big.NewInt(7777)} + hash := header.Hash() + + // Build a synthetic witness, encode canonically once, store the bytes. + w, err := stateless.NewWitness(header, nil) + require.NoError(t, err) + for i := 0; i < 64; i++ { + buf := make([]byte, 256) + rand.Read(buf) + w.AddState(map[string][]byte{string(buf): buf}) + } + canonical := encodeWitnessForTest(t, w) + rawdb.WriteWitness(h.chain.DB(), hash, canonical) + + got, ok := h.handler.canonicalWitnessHash(hash) + require.True(t, ok) + + want := stateless.WitnessCommitHash(canonical) + require.Equal(t, want, got, + "canonicalWitnessHash must hash stored canonical bytes directly; if this fails, EncodeRLP determinism has regressed or the helper added back a re-encode") +} + +func encodeWitnessForTest(t *testing.T, w *stateless.Witness) []byte { + t.Helper() + var buf bytes.Buffer + require.NoError(t, w.EncodeRLP(&buf)) + return buf.Bytes() +} + +// TestVerifyScheduledProducerDeferredWhenHeaderUnknown is the regression for +// the cosend race: when the signed announce arrives before the block is +// imported, verifyScheduledProducer must report headerAvailable=false so the +// caller defers (no relay, no strike). Without this branch, valid WIT2 +// announces would draw strikes for honest relayers during normal operation. +func TestVerifyScheduledProducerDeferredWhenHeaderUnknown(t *testing.T) { + // borEngine is unused on the nil-header branch — verifyScheduledProducer + // short-circuits before calling Author. Pass nil to keep the test free of + // engine setup; if a future change reorders the branch and starts deref- + // erencing borEngine here, the test will panic and we'll catch it. + ok, headerAvailable := verifyScheduledProducer(nil, nil, common.Address{}, 100, common.HexToHash("0xfeed")) + if ok { + t.Fatal("nil header must not validate as ok") + } + if headerAvailable { + t.Fatal("nil header must report headerAvailable=false so caller defers without striking") + } +} + +// TestHandleSignedWitnessAnnouncementsBadSigDoesNotMarkAnnounceKnown is the +// regression for the verification-ordering bug: handleSignedWitnessAnnouncements +// must not mark a peer as announce-known until the announcement has passed the +// signature/producer-binding gate. The previous order called +// peer.AddKnownAnnounce(hash) unconditionally before acceptSignedAnnouncement, +// so a peer relaying a structurally invalid announcement still became +// announce-known for that hash. Two bad consequences flowed from that: +// - this node refused to ever relay a *valid* later announcement back to that +// peer for the same hash, leaving them unable to recover; +// - this node short-circuited its own re-evaluation paths when a good +// announcement for the same hash arrived from another peer, because the +// original sender's announce-known bit served as a relay-suppression hint. +// +// Using a structurally invalid signature (length 3) is sufficient to drive the +// reject path through verifySignedAnnouncement → strikeWit2Peer without needing +// a bor engine or block header. +func TestHandleSignedWitnessAnnouncementsBadSigDoesNotMarkAnnounceKnown(t *testing.T) { + h := newTestHandler() + defer h.close() + + witH := (*witHandler)(h.handler) + peer, cleanup := newTestWit2PeerWithReader() + defer cleanup() + + blockHash := common.HexToHash("0xfeedface") + ann := wit.SignedWitnessAnnouncement{ + BlockHash: blockHash, + BlockNumber: 1, + WitnessHash: common.HexToHash("0xc0ffee"), + Signature: []byte{0x00, 0x01, 0x02}, // structurally invalid + } + + if err := witH.handleSignedWitnessAnnouncements(peer, []wit.SignedWitnessAnnouncement{ann}); err != nil { + t.Fatalf("handleSignedWitnessAnnouncements: %v", err) + } + + if peer.KnownAnnounceContainsHash(blockHash) { + t.Fatal("peer marked announce-known despite invalid signature; verification ordering is broken") + } + if _, ok := h.handler.signedWitnesses.get(blockHash); ok { + t.Fatal("signed announcement cached despite invalid signature") + } +} + +// TestPendingWitnessBodyCacheGetEvictsExpired pins the leak fix for the TTL +// path. Before the fix, get() returned false on expiry but left the entry in +// the map; gcLocked only ran from put(), so a node that stopped receiving new +// witnesses retained up to capacity (10) full witness blobs (~50 MiB each) +// indefinitely, producing a long-lived OOM risk under bursty traffic. +// +// The contract this test enforces: any get() that observes an expired entry +// MUST delete it in place so memory pressure does not persist past the TTL. +func TestPendingWitnessBodyCacheGetEvictsExpired(t *testing.T) { + c := newPendingWitnessBodyCache(4) + hash := common.HexToHash("0xfade") + c.put(hash, []byte("expensive-body"), common.HexToHash("0xab")) + + // Force the entry's receivedAt back beyond the TTL, mirroring the same + // approach used by TestSignedWitnessCacheTTLExpiry above. + c.mu.Lock() + c.entries[hash].receivedAt = time.Now().Add(-2 * wit2AnnounceTTL) + c.mu.Unlock() + + if _, _, ok := c.get(hash); ok { + t.Fatal("expired entry must not be returned") + } + + c.mu.RLock() + entriesAfter := len(c.entries) + c.mu.RUnlock() + if entriesAfter != 0 { + t.Fatalf("expired entry must be deleted on get; len(entries)=%d, want 0", entriesAfter) + } +} + +// TestDeferredSignedAnnounceDrainedAfterHeaderArrives is the regression for +// the cosend-race liveness gap: when a signed announcement arrives before the +// corresponding block header (block + announce travel independently and can +// race in either order), the handler MUST retain the announcement and re- +// evaluate it once the header arrives, rather than dropping it on the floor +// and silently degrading subsequent witness fetches to the unsigned WIT1 +// fallback path. +// +// Without this: +// 1. announce arrives → header-unknown → acceptSignedAnnouncement returns +// false, announcement is forgotten. +// 2. block arrives shortly after, but no second announce reaches us (sparse +// mesh, single-cosend window) → signedWitnesses never holds the hash. +// 3. fetcher selects a peer, gets bytes, parentSignedWitnessHash returns +// false → byte-verification skipped, WIT2 trust model silently leaks. +// +// The deferred queue holds the announcement until the chain catches up; the +// drain (here invoked directly; in production fired from the chainHeadCh +// subscription) re-runs verification and caches the hash on success. +func TestDeferredSignedAnnounceDrainedAfterHeaderArrives(t *testing.T) { + h := newTestHandler() + defer h.close() + witH := (*witHandler)(h.handler) + peer, cleanup := newTestWit2PeerWithReader() + defer cleanup() + + key, err := crypto.GenerateKey() + if err != nil { + t.Fatalf("key gen: %v", err) + } + header := &types.Header{Number: big.NewInt(99_999)} // NOT in chain + blockHash := header.Hash() + + ann := wit.SignedWitnessAnnouncement{ + BlockHash: blockHash, + BlockNumber: header.Number.Uint64(), + WitnessHash: common.HexToHash("0xc0ffee01"), + } + digest := wit.WitnessAnnouncementSigningHash(ann.BlockHash, ann.BlockNumber, ann.WitnessHash) + sig, err := crypto.Sign(digest.Bytes(), key) + if err != nil { + t.Fatalf("sign: %v", err) + } + ann.Signature = sig + + // Phase 1: header is not yet local. The announce must be deferred — not + // cached, not relayed, not credited to the sender as announce-known. + if err := witH.handleSignedWitnessAnnouncements(peer, []wit.SignedWitnessAnnouncement{ann}); err != nil { + t.Fatalf("handleSignedWitnessAnnouncements: %v", err) + } + if _, ok := h.handler.signedWitnesses.get(blockHash); ok { + t.Fatal("announce cached prematurely; verification should defer when header is unknown") + } + if peer.KnownAnnounceContainsHash(blockHash) { + t.Fatal("peer marked announce-known on deferred path; re-relay recovery is suppressed") + } + if !h.handler.deferredAnnounces.has(blockHash) { + t.Fatal("deferred-announce queue did not retain the announce; the race window is uncovered") + } + + // Phase 2: header arrives. Drain the queue (production wires this from + // the chainHeadCh subscription on each new block). + rawdb.WriteHeader(h.chain.DB(), header) + h.handler.drainDeferredAnnouncesFor(blockHash) + + if _, ok := h.handler.signedWitnesses.get(blockHash); !ok { + t.Fatal("announce not cached after header arrival; drain is broken") + } + if h.handler.deferredAnnounces.has(blockHash) { + t.Fatal("deferred entry should be cleared after successful drain") + } +} + +// TestVerifyScheduledProducerRejectsBlockNumberMismatch covers the case where +// the local header is present but disagrees with the announce on block +// number. This is a confirmed bad announce and the caller must strike, so +// headerAvailable must be true. +func TestVerifyScheduledProducerRejectsBlockNumberMismatch(t *testing.T) { + header := &types.Header{Number: big.NewInt(50)} + ok, headerAvailable := verifyScheduledProducer(nil, header, common.Address{}, 51, header.Hash()) + if ok { + t.Fatal("number mismatch must not validate") + } + if !headerAvailable { + t.Fatal("with header present, headerAvailable must be true so the caller strikes the relayer") + } +} diff --git a/eth/handler_wit_test.go b/eth/handler_wit_test.go index 40ae8c646b..e2c0ee8f3f 100644 --- a/eth/handler_wit_test.go +++ b/eth/handler_wit_test.go @@ -56,6 +56,37 @@ func newTestWitPeerWithReader() (*wit.Peer, func()) { return peer, cleanup } +// newTestWit2PeerWithReader creates a wit.Peer negotiated at WIT2, with the +// same draining behavior as newTestWitPeerWithReader. WIT2-specific paths +// (signed announce, AsyncSendSignedWitnessAnnouncement) early-return on a +// WIT1 peer, so tests that exercise them must use this helper. +func newTestWit2PeerWithReader() (*wit.Peer, func()) { + var id enode.ID + rand.Read(id[:]) + p2pPeer := p2p.NewPeer(id, "test-peer-wit2", nil) + app, net := p2p.MsgPipe() + + done := make(chan struct{}) + go func() { + for { + msg, err := app.ReadMsg() + if err != nil { + close(done) + return + } + msg.Discard() + } + }() + + peer := wit.NewPeer(wit.WIT2, p2pPeer, net, log.New()) + cleanup := func() { + app.Close() + peer.Close() + <-done + } + return peer, cleanup +} + // mockUnknownPacket is a mock packet type that implements wit.Packet // but is not recognized by the Handle method's switch statement type mockUnknownPacket struct{} diff --git a/eth/peer.go b/eth/peer.go index 3612db28a8..a3f5fda2ca 100644 --- a/eth/peer.go +++ b/eth/peer.go @@ -128,6 +128,7 @@ type WitnessPeer interface { // the method ethPeer.RequestWitnesses invokes AsyncSendNewWitness(witness *stateless.Witness) AsyncSendNewWitnessHash(hash common.Hash, number uint64) + AsyncSendSignedWitnessAnnouncement(ann wit.SignedWitnessAnnouncement) RequestWitness(witnessPages []wit.WitnessPageRequest, sink chan *wit.Response) (*wit.Request, error) RequestWitnessMetadata(hashes []common.Hash, sink chan *wit.Response) (*wit.Request, error) Close() @@ -136,9 +137,11 @@ type WitnessPeer interface { Log() log.Logger KnownWitnesses() *wit.KnownCache AddKnownWitness(hash common.Hash) + AddKnownAnnounce(hash common.Hash) KnownWitnessesCount() int KnownWitnessesContains(witness *stateless.Witness) bool KnownWitnessContainsHash(hash common.Hash) bool + KnownAnnounceContainsHash(hash common.Hash) bool ReplyWitness(requestID uint64, response *wit.WitnessPacketResponse) error } diff --git a/eth/peer_mock.go b/eth/peer_mock.go index 72e3a6fdbc..3cee95f6cb 100644 --- a/eth/peer_mock.go +++ b/eth/peer_mock.go @@ -50,6 +50,32 @@ func (mr *MockWitnessPeerMockRecorder) AddKnownWitness(hash interface{}) *gomock return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddKnownWitness", reflect.TypeOf((*MockWitnessPeer)(nil).AddKnownWitness), hash) } +// AddKnownAnnounce mocks base method. +func (m *MockWitnessPeer) AddKnownAnnounce(hash common.Hash) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "AddKnownAnnounce", hash) +} + +// AddKnownAnnounce indicates an expected call of AddKnownAnnounce. +func (mr *MockWitnessPeerMockRecorder) AddKnownAnnounce(hash interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AddKnownAnnounce", reflect.TypeOf((*MockWitnessPeer)(nil).AddKnownAnnounce), hash) +} + +// KnownAnnounceContainsHash mocks base method. +func (m *MockWitnessPeer) KnownAnnounceContainsHash(hash common.Hash) bool { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "KnownAnnounceContainsHash", hash) + ret0, _ := ret[0].(bool) + return ret0 +} + +// KnownAnnounceContainsHash indicates an expected call of KnownAnnounceContainsHash. +func (mr *MockWitnessPeerMockRecorder) KnownAnnounceContainsHash(hash interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "KnownAnnounceContainsHash", reflect.TypeOf((*MockWitnessPeer)(nil).KnownAnnounceContainsHash), hash) +} + // AsyncSendNewWitness mocks base method. func (m *MockWitnessPeer) AsyncSendNewWitness(witness *stateless.Witness) { m.ctrl.T.Helper() @@ -74,6 +100,18 @@ func (mr *MockWitnessPeerMockRecorder) AsyncSendNewWitnessHash(hash, number inte return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AsyncSendNewWitnessHash", reflect.TypeOf((*MockWitnessPeer)(nil).AsyncSendNewWitnessHash), hash, number) } +// AsyncSendSignedWitnessAnnouncement mocks base method. +func (m *MockWitnessPeer) AsyncSendSignedWitnessAnnouncement(ann wit.SignedWitnessAnnouncement) { + m.ctrl.T.Helper() + m.ctrl.Call(m, "AsyncSendSignedWitnessAnnouncement", ann) +} + +// AsyncSendSignedWitnessAnnouncement indicates an expected call of AsyncSendSignedWitnessAnnouncement. +func (mr *MockWitnessPeerMockRecorder) AsyncSendSignedWitnessAnnouncement(ann interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AsyncSendSignedWitnessAnnouncement", reflect.TypeOf((*MockWitnessPeer)(nil).AsyncSendSignedWitnessAnnouncement), ann) +} + // Close mocks base method. func (m *MockWitnessPeer) Close() { m.ctrl.T.Helper() diff --git a/eth/peerset.go b/eth/peerset.go index 43ec2e1832..4b9109f6fb 100644 --- a/eth/peerset.go +++ b/eth/peerset.go @@ -298,16 +298,33 @@ func (ps *peerSet) peer(id string) *ethPeer { return ps.peers[id] } +// getOnePeerWithWitness returns a candidate body source for `hash`. Body-known +// peers (those that broadcast or served the body) are preferred; if none is +// available we fall back to peers that have only relayed a WIT2 signed +// announcement. The fast-path latency win depends on this fallback: at hop>=2 +// the signed announce arrives long before the body broadcast, and the only +// peer that could serve us bytes is the one that forwarded the announce. +// +// Asking an announce-only peer is safe because byte-blame in +// witnessManager.verifyAgainstSignedHash only drops on a confirmed hash +// mismatch — empty/unavailable responses surface as soft failures, not drops. func (ps *peerSet) getOnePeerWithWitness(hash common.Hash) *ethPeer { ps.lock.RLock() defer ps.lock.RUnlock() + var announceFallback *ethPeer for _, p := range ps.peers { - if p.witPeer != nil && p.witPeer.Peer.KnownWitnessContainsHash(hash) { + if p.witPeer == nil { + continue + } + if p.witPeer.Peer.KnownWitnessContainsHash(hash) { return p } + if announceFallback == nil && p.witPeer.Peer.KnownAnnounceContainsHash(hash) { + announceFallback = p + } } - return nil + return announceFallback } // peersWithoutWitness retrives a list of peers that do nor have a given witness @@ -328,6 +345,32 @@ func (ps *peerSet) peersWithoutWitness(hash common.Hash) []*witPeer { return list } +// peersWithoutSignedAnnounce returns peers that have neither received the body +// for `hash` nor seen a signed announcement for it. Used by WIT2 relay to skip +// peers that already know about the announcement, preventing announce storms, +// without ever assuming a peer that only saw an announcement holds the body. +func (ps *peerSet) peersWithoutSignedAnnounce(hash common.Hash) []*witPeer { + ps.lock.RLock() + defer ps.lock.RUnlock() + + list := make([]*witPeer, 0, len(ps.peers)) + + for _, p := range ps.peers { + if p.witPeer == nil { + continue + } + if p.witPeer.Peer.KnownWitnessContainsHash(hash) { + continue + } + if p.witPeer.Peer.KnownAnnounceContainsHash(hash) { + continue + } + list = append(list, p.witPeer) + } + + return list +} + // peersWithoutBlock retrieves a list of peers that do not have a given block in // their set of known hashes so it might be propagated to them. func (ps *peerSet) peersWithoutBlock(hash common.Hash) []*ethPeer { diff --git a/eth/peerset_test.go b/eth/peerset_test.go index 16d4d9dc1b..1062644efe 100644 --- a/eth/peerset_test.go +++ b/eth/peerset_test.go @@ -6,6 +6,8 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/eth/protocols/eth" + "github.com/ethereum/go-ethereum/eth/protocols/wit" + "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/p2p" "github.com/ethereum/go-ethereum/p2p/enode" ) @@ -61,3 +63,73 @@ func TestPeerSetForgetTransactionsEmpty(t *testing.T) { // ForgetTransactions should not panic with no peers ps.ForgetTransactions([]common.Hash{{1}, {2}, {3}}) } + +// TestGetOnePeerWithWitnessPrefersBodyOverAnnounce locks in the WIT2 fast-path +// invariant: when at least one peer has the body (knownWitnesses) and another +// has only seen the signed announce (knownAnnounces), body-known wins. If a +// future change inverts this, fetchers will silently prefer slower sources. +func TestGetOnePeerWithWitnessPrefersBodyOverAnnounce(t *testing.T) { + t.Parallel() + + ps := newPeerSet() + defer ps.close() + + hash := common.HexToHash("0xabc") + + bodyPeer := newRegisteredPeerForTest(t, ps) + announcePeer := newRegisteredPeerForTest(t, ps) + + bodyPeer.witPeer.Peer.AddKnownWitness(hash) + announcePeer.witPeer.Peer.(*wit.Peer).AddKnownAnnounce(hash) + + got := ps.getOnePeerWithWitness(hash) + if got == nil { + t.Fatal("expected a candidate; got nil") + } + if got.ID() != bodyPeer.ID() { + t.Fatalf("body-known peer must win over announce-only: got %s want %s", + got.ID(), bodyPeer.ID()) + } +} + +// TestGetOnePeerWithWitnessFallsBackToAnnounce locks in the fix for the +// fast-path regression: when no peer has the body yet, the announce-known +// fallback IS selectable. Without this, a hop-2 stateless validator with a +// verified signed announce would have nothing to fetch from until the body +// broadcast finally arrived — eliminating the WIT2 latency win. +func TestGetOnePeerWithWitnessFallsBackToAnnounce(t *testing.T) { + t.Parallel() + + ps := newPeerSet() + defer ps.close() + + hash := common.HexToHash("0xdef") + + announcePeer := newRegisteredPeerForTest(t, ps) + announcePeer.witPeer.Peer.(*wit.Peer).AddKnownAnnounce(hash) + + got := ps.getOnePeerWithWitness(hash) + if got == nil { + t.Fatal("announce-only peer must be a fetch candidate after the WIT2 fast-path fix") + } + if got.ID() != announcePeer.ID() { + t.Fatalf("expected announce-only peer; got %s", got.ID()) + } +} + +func newRegisteredPeerForTest(t *testing.T, ps *peerSet) *ethPeer { + t.Helper() + var id enode.ID + rand.Read(id[:]) + _, net := p2p.MsgPipe() + t.Cleanup(func() { net.Close() }) + + p2pPeer := p2p.NewPeer(id, "fast-path-peer", nil) + ethP := eth.NewPeer(eth.ETH68, p2pPeer, net, nil) + witP := wit.NewPeer(wit.WIT2, p2pPeer, net, log.New()) + + if err := ps.registerPeer(ethP, nil, witP); err != nil { + t.Fatalf("register peer: %v", err) + } + return ps.peer(ethP.ID()) +} diff --git a/eth/protocols/wit/broadcast.go b/eth/protocols/wit/broadcast.go index 43d5a43b65..4fedeb4321 100644 --- a/eth/protocols/wit/broadcast.go +++ b/eth/protocols/wit/broadcast.go @@ -23,6 +23,13 @@ func (p *Peer) broadcastWitness() { } p.logger.Debug("propagated witness hashes", "hashes", packet.Hashes, "numbers", packet.Numbers) + case packet := <-p.queuedSignedAnns: + if err := p.sendSignedNewWitnessHashes(packet); err != nil { + log.Debug("failed to send signed witness announcements", "error", err) + return + } + p.logger.Debug("propagated signed witness announcements", "count", len(packet.Announcements)) + case <-p.term: return } diff --git a/eth/protocols/wit/handler.go b/eth/protocols/wit/handler.go index db4b99f656..ec424b629d 100644 --- a/eth/protocols/wit/handler.go +++ b/eth/protocols/wit/handler.go @@ -133,6 +133,16 @@ var wit1 = map[uint64]msgHandler{ WitnessMetadataMsg: handleWitnessMetadata, } +var wit2 = map[uint64]msgHandler{ + GetMsgWitness: handleGetWitness, + MsgWitness: handleWitness, + NewWitnessMsg: handleNewWitness, + NewWitnessHashesMsg: handleNewWitnessHashes, + GetWitnessMetadataMsg: handleGetWitnessMetadata, + WitnessMetadataMsg: handleWitnessMetadata, + SignedNewWitnessHashesMsg: handleSignedNewWitnessHashes, +} + // HandleMessage is invoked whenever an inbound message is received from a // remote peer on the `wit` protocol. The remote connection is torn down upon // returning any error. @@ -167,6 +177,8 @@ func handleMessage(backend Backend, peer *Peer) error { // Select the appropriate handler map based on protocol version var handlers map[uint64]msgHandler switch peer.Version() { + case WIT2: + handlers = wit2 case WIT1: handlers = wit1 case WIT0: diff --git a/eth/protocols/wit/handlers.go b/eth/protocols/wit/handlers.go index b7319d1869..46c77a5129 100644 --- a/eth/protocols/wit/handlers.go +++ b/eth/protocols/wit/handlers.go @@ -63,6 +63,32 @@ func handleNewWitnessHashes(backend Backend, msg Decoder, peer *Peer) error { return backend.Handle(peer, req) } +// MaxSignedAnnouncesPerPacket caps how many signed witness announcements a +// single SignedNewWitnessHashesPacket may carry. Each announcement triggers +// ecrecover and a header lookup downstream, so an unbounded packet is a cheap +// DoS vector. 64 matches maxQueuedWitnessAnns: the relay queue and the wire +// limit move together so a packet that fits the queue also fits the wire. +const MaxSignedAnnouncesPerPacket = 64 + +// handleSignedNewWitnessHashes processes a SignedNewWitnessHashesPacket from a +// peer (WIT2+). The packet is forwarded to the backend, which is responsible +// for signature verification, validator-set check, relay, and triggering the +// body fetch. We cap the announcement count at decode time so the backend +// never sees an unbounded packet. +func handleSignedNewWitnessHashes(backend Backend, msg Decoder, peer *Peer) error { + req := new(SignedNewWitnessHashesPacket) + if err := msg.Decode(&req); err != nil { + return fmt.Errorf("failed to decode SignedNewWitnessHashesPacket: %w", err) + } + if len(req.Announcements) == 0 { + return fmt.Errorf("invalid SignedNewWitnessHashesPacket: Announcements cannot be empty") + } + if len(req.Announcements) > MaxSignedAnnouncesPerPacket { + return fmt.Errorf("SignedNewWitnessHashesPacket exceeds cap: %d > %d", len(req.Announcements), MaxSignedAnnouncesPerPacket) + } + return backend.Handle(peer, req) +} + // handleGetWitnessMetadata processes a GetWitnessMetadataPacket request from a peer. func handleGetWitnessMetadata(backend Backend, msg Decoder, peer *Peer) error { // Decode the GetWitnessMetadataPacket request diff --git a/eth/protocols/wit/peer.go b/eth/protocols/wit/peer.go index 6008ad1dfd..c23d2e424c 100644 --- a/eth/protocols/wit/peer.go +++ b/eth/protocols/wit/peer.go @@ -21,8 +21,10 @@ const ( maxQueuedWitnesses = 10 // maxQueuedWitnessAnns is the maximum number of witness announcements to queue up before - // dropping broadcasts - maxQueuedWitnessAnns = 10 + // dropping broadcasts. Bumped from 10 to 64 in WIT2 to absorb transitive-relay bursts; + // each announcement is small (33 bytes per entry, 130 bytes signed) so the memory + // footprint stays well under 10KB per peer. + maxQueuedWitnessAnns = 64 ) // Peer is a collection of relevant information we have about a `wit` peer. @@ -35,9 +37,11 @@ type Peer struct { logger log.Logger // Contextual logger with the peer id injected - knownWitnesses *KnownCache // Set of witness hashes (`witness.Headers[0].Hash()`) known to be known by this peer - queuedWitness chan *stateless.Witness // Queue of witness to broadcast to this peer - queuedWitnessAnns chan *NewWitnessHashesPacket // Queue of witness announcements to this peer + knownWitnesses *KnownCache // Witness hashes this peer is known to HAVE (body served, body broadcast received). Feeds body-fetch peer selection. + knownAnnounces *KnownCache // Witness hashes this peer has SEEN an announcement for, but not necessarily the body. Used only to suppress redundant announce relay. + queuedWitness chan *stateless.Witness // Queue of witness to broadcast to this peer + queuedWitnessAnns chan *NewWitnessHashesPacket // Queue of unsigned witness announcements to this peer (WIT1) + queuedSignedAnns chan *SignedNewWitnessHashesPacket // Queue of signed witness announcements to this peer (WIT2) reqDispatch chan *request // Dispatch channel to send witness requests and track them until fulfillment reqCancel chan *cancel // Dispatch channel to cancel pending witness requests @@ -56,8 +60,10 @@ func NewPeer(version uint, p *p2p.Peer, rw p2p.MsgReadWriter, logger log.Logger) version: version, logger: logger.With("peer", id), knownWitnesses: newKnownCache(maxKnownWitnesses), + knownAnnounces: newKnownCache(maxKnownWitnesses), queuedWitness: make(chan *stateless.Witness, maxQueuedWitnesses), queuedWitnessAnns: make(chan *NewWitnessHashesPacket, maxQueuedWitnessAnns), + queuedSignedAnns: make(chan *SignedNewWitnessHashesPacket, maxQueuedWitnessAnns), reqDispatch: make(chan *request), reqCancel: make(chan *cancel), resDispatch: make(chan *response), @@ -86,6 +92,12 @@ func (p *Peer) sendNewWitnessHashes(packet *NewWitnessHashesPacket) error { return p2p.Send(p.rw, NewWitnessHashesMsg, packet) } +// sendSignedNewWitnessHashes sends signed witness announcements to the peer. +// Only valid for WIT2+ peers; the caller must check Version() before invoking. +func (p *Peer) sendSignedNewWitnessHashes(packet *SignedNewWitnessHashesPacket) error { + return p2p.Send(p.rw, SignedNewWitnessHashesMsg, packet) +} + // AsyncSendNewWitness queues an entire witness for broadcast to the peer. The // witness will be sent in the background to avoid blocking the caller. If the // queue is full, the witness will be dropped. @@ -116,6 +128,26 @@ func (p *Peer) AsyncSendNewWitnessHash(hash common.Hash, number uint64) { } } +// AsyncSendSignedWitnessAnnouncement queues a BP-signed witness announcement +// for broadcast to the peer. The peer must speak WIT2 or higher; callers are +// responsible for checking Version(). The block hash is added to the peer's +// announce-known set (NOT the body-known set) so subsequent announce gossip +// on the same hash is suppressed, while body-fetch peer selection is not +// misled into asking this peer for bytes it does not yet have. +func (p *Peer) AsyncSendSignedWitnessAnnouncement(ann SignedWitnessAnnouncement) { + if p.version < WIT2 { + return + } + select { + case p.queuedSignedAnns <- &SignedNewWitnessHashesPacket{ + Announcements: []SignedWitnessAnnouncement{ann}, + }: + p.knownAnnounces.Add(ann.BlockHash) + default: + p.logger.Debug("Dropped signed witness announcement.", "blockHash", ann.BlockHash, "peer", p.id) + } +} + // RequestWitness sends a request to the peer for witnesses by witness pages. func (p *Peer) RequestWitness(witnessPages []WitnessPageRequest, sink chan *Response) (*Request, error) { log.Debug("Requesting witnesses", "peer", p.id, "count", len(witnessPages)) @@ -194,6 +226,20 @@ func (p *Peer) AddKnownWitness(hash common.Hash) { p.knownWitnesses.Add(hash) } +// AddKnownAnnounce records that this peer has seen the signed announcement for +// `hash`, without claiming the peer holds the body. Used to suppress redundant +// announce-relay only; body-fetch peer selection ignores this set. +func (p *Peer) AddKnownAnnounce(hash common.Hash) { + p.knownAnnounces.Add(hash) +} + +// KnownAnnounceContainsHash reports whether this peer is known to have seen an +// announcement for `hash` (either inbound or outbound). False does not imply +// the peer is unaware — only that this side has no record. +func (p *Peer) KnownAnnounceContainsHash(hash common.Hash) bool { + return p.knownAnnounces.hashes.Contains(hash) +} + // KnownWitnessesCount returns the number of known witness. func (p *Peer) KnownWitnessesCount() int { return p.knownWitnesses.Cardinality() diff --git a/eth/protocols/wit/protocol.go b/eth/protocols/wit/protocol.go index c8238062c7..4c8d09ed2d 100644 --- a/eth/protocols/wit/protocol.go +++ b/eth/protocols/wit/protocol.go @@ -1,16 +1,24 @@ package wit import ( + "encoding/binary" "errors" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/stateless" + "github.com/ethereum/go-ethereum/crypto" ) // Constants to match up protocol versions and messages const ( WIT0 = 1 WIT1 = 2 + // WIT2 adds BP-signed witness announcements, allowing peers to fast-validate + // announces via signature recovery (microseconds) instead of full block + // execution (~500ms). Signed announces are safe to relay transitively + // because byte-correctness is verified at fetch time against the signed + // witness hash; content-correctness blame attaches to the BP signer. + WIT2 = 3 ) // ProtocolName is the official short name of the `wit` protocol used during @@ -19,24 +27,36 @@ const ProtocolName = "wit" // ProtocolVersions are the supported versions of the `wit` protocol (first // is primary). -var ProtocolVersions = []uint{WIT1, WIT0} +var ProtocolVersions = []uint{WIT2, WIT1, WIT0} // protocolLengths are the number of implemented message corresponding to // different protocol versions. -var protocolLengths = map[uint]uint64{WIT1: 6, WIT0: 4} +var protocolLengths = map[uint]uint64{WIT2: 7, WIT1: 6, WIT0: 4} // maxMessageSize is the maximum cap on the size of a protocol message. const maxMessageSize = 16 * 1024 * 1024 const ( - NewWitnessMsg = 0x00 - NewWitnessHashesMsg = 0x01 - GetMsgWitness = 0x02 - MsgWitness = 0x03 - GetWitnessMetadataMsg = 0x04 - WitnessMetadataMsg = 0x05 + NewWitnessMsg = 0x00 + NewWitnessHashesMsg = 0x01 + GetMsgWitness = 0x02 + MsgWitness = 0x03 + GetWitnessMetadataMsg = 0x04 + WitnessMetadataMsg = 0x05 + SignedNewWitnessHashesMsg = 0x06 // WIT2: signed witness announcement, safe to relay ) +// SignatureLength is the length of a BP signature over a witness announcement (r||s||v). +const SignatureLength = 65 + +// witnessAnnounceDomainTag is a unique prefix mixed into the signing digest so a +// signature produced for a WIT2 announcement cannot be replayed in any other +// context that signs 32-byte digests under the BP's signing key (block sealing, +// future signed messages, etc.). Cross-context replay is structurally +// impossible rather than only computationally hard, even if a future caller +// happens to share the same signFn mimetype. +var witnessAnnounceDomainTag = []byte("bor-wit2-announce\x00") + var ( errMsgTooLarge = errors.New("message too long") errDecode = errors.New("invalid message") @@ -91,6 +111,29 @@ type NewWitnessHashesPacket struct { Numbers []uint64 } +// SignedWitnessAnnouncement is a BP-authenticated commitment to the existence +// of a specific witness for a specific block. The signer commits to: +// +// keccak256(BlockHash || BlockNumber || WitnessHash) +// +// Receivers verify the signature with ecrecover and check that the recovered +// address is the validator scheduled for BlockNumber. Once verified, the +// announcement is safe to relay to other peers without local execution; any +// downstream receiver re-verifies independently. Bytes returned by a serving +// peer are checked against WitnessHash, so byte-correctness blame attaches to +// the server while content-correctness (state-root) blame attaches to the BP. +type SignedWitnessAnnouncement struct { + BlockHash common.Hash + BlockNumber uint64 + WitnessHash common.Hash // WIT2 chunked-aggregate commitment over canonical witness RLP; see core/stateless.WitnessCommitHash + Signature []byte // 65-byte secp256k1 signature +} + +// SignedNewWitnessHashesPacket carries one or more signed witness announcements. +type SignedNewWitnessHashesPacket struct { + Announcements []SignedWitnessAnnouncement +} + // GetWitnessMetadataRequest represents a request for witness metadata (just page count, no data) type GetWitnessMetadataRequest struct { Hashes []common.Hash // Block hashes to get metadata for @@ -129,6 +172,34 @@ func (w *NewWitnessPacket) Kind() byte { return NewWitnessMsg } func (w *NewWitnessHashesPacket) Name() string { return "NewWitnessHashes" } func (w *NewWitnessHashesPacket) Kind() byte { return NewWitnessHashesMsg } +func (w *SignedNewWitnessHashesPacket) Name() string { return "SignedNewWitnessHashes" } +func (w *SignedNewWitnessHashesPacket) Kind() byte { return SignedNewWitnessHashesMsg } + +// WitnessAnnouncementSigningPreImage returns the unhashed bytes a BP signs to +// authenticate a witness announcement. Production signing flows (clef, +// keystoreWallet.SignData) hash their input once before signing, so callers +// MUST pass this preimage, not WitnessAnnouncementSigningHash. The verifier +// independently computes WitnessAnnouncementSigningHash (= keccak256 of this +// preimage) and ecrecovers against it. Mismatching hash-vs-preimage between +// signer and verifier silently breaks every WIT2 signature, hence the split. +func WitnessAnnouncementSigningPreImage(blockHash common.Hash, blockNumber uint64, witnessHash common.Hash) []byte { + const fixedLen = common.HashLength + 8 + common.HashLength + buf := make([]byte, len(witnessAnnounceDomainTag)+fixedLen) + n := copy(buf, witnessAnnounceDomainTag) + copy(buf[n:], blockHash[:]) + binary.BigEndian.PutUint64(buf[n+common.HashLength:], blockNumber) + copy(buf[n+common.HashLength+8:], witnessHash[:]) + return buf +} + +// WitnessAnnouncementSigningHash returns the digest a BP signs to authenticate +// a witness announcement. Must be byte-identical on both signer and verifier. +// Used by the verifier; signers must instead feed the preimage into the wallet +// SignData path, which keccaks once internally. +func WitnessAnnouncementSigningHash(blockHash common.Hash, blockNumber uint64, witnessHash common.Hash) common.Hash { + return crypto.Keccak256Hash(WitnessAnnouncementSigningPreImage(blockHash, blockNumber, witnessHash)) +} + func (w *GetWitnessMetadataRequest) Name() string { return "GetWitnessMetadata" } func (w *GetWitnessMetadataRequest) Kind() byte { return GetWitnessMetadataMsg } diff --git a/eth/protocols/wit/protocol_wit2_test.go b/eth/protocols/wit/protocol_wit2_test.go new file mode 100644 index 0000000000..c2ad7f33a9 --- /dev/null +++ b/eth/protocols/wit/protocol_wit2_test.go @@ -0,0 +1,91 @@ +package wit + +import ( + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto" +) + +// TestWitnessAnnouncementSigningHashStable pins the digest format. If this +// changes, every signed announcement on the network breaks at once — bump the +// protocol version explicitly. The test value is recomputed independently to +// catch silent reordering of the concatenation. +func TestWitnessAnnouncementSigningHashStable(t *testing.T) { + blockHash := common.HexToHash("0x1111111111111111111111111111111111111111111111111111111111111111") + blockNumber := uint64(0x0102030405060708) + witnessHash := common.HexToHash("0x2222222222222222222222222222222222222222222222222222222222222222") + + got := WitnessAnnouncementSigningHash(blockHash, blockNumber, witnessHash) + + // Manual recomposition: domain-tag || blockHash || blockNumber (big-endian u64) || witnessHash + want := crypto.Keccak256Hash( + witnessAnnounceDomainTag, + blockHash.Bytes(), + []byte{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}, + witnessHash.Bytes(), + ) + if got != want { + t.Fatalf("signing-hash format drift: got %s want %s", got.Hex(), want.Hex()) + } +} + +// TestWitnessAnnouncementSigningHashDomainSeparated guards that the witness +// announce digest cannot collide with a raw 3-field concatenation lacking the +// domain tag. This is the structural check that a header-seal signature, or +// any other future SignBytes context, cannot be replayed as a wit2 announce. +func TestWitnessAnnouncementSigningHashDomainSeparated(t *testing.T) { + blockHash := common.HexToHash("0xaa") + blockNumber := uint64(7) + witnessHash := common.HexToHash("0xbb") + + withTag := WitnessAnnouncementSigningHash(blockHash, blockNumber, witnessHash) + withoutTag := crypto.Keccak256Hash( + blockHash.Bytes(), + []byte{0, 0, 0, 0, 0, 0, 0, 7}, + witnessHash.Bytes(), + ) + if withTag == withoutTag { + t.Fatalf("domain tag absent: digests collide, replay across signing contexts is possible") + } +} + +// TestWitnessAnnouncementSigningHashSensitive ensures every input field is +// covered by the digest — flipping any byte in any input must change the hash. +// Catches a bug where a refactor silently drops a field from the digest. +func TestWitnessAnnouncementSigningHashSensitive(t *testing.T) { + base := WitnessAnnouncementSigningHash( + common.HexToHash("0xaa"), + 1, + common.HexToHash("0xbb"), + ) + cases := []struct { + name string + blockH common.Hash + num uint64 + witnessH common.Hash + }{ + {"different blockHash", common.HexToHash("0xab"), 1, common.HexToHash("0xbb")}, + {"different blockNumber", common.HexToHash("0xaa"), 2, common.HexToHash("0xbb")}, + {"different witnessHash", common.HexToHash("0xaa"), 1, common.HexToHash("0xbc")}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if got := WitnessAnnouncementSigningHash(tc.blockH, tc.num, tc.witnessH); got == base { + t.Fatalf("digest unchanged when %s differed", tc.name) + } + }) + } +} + +// TestProtocolVersionsContainsWIT2 guards the handshake advertising. WIT2 must +// be advertised first (preferred) for new connections. If WIT1 ever leaks +// ahead of WIT2, peers downgrade silently and the fast path stops working. +func TestProtocolVersionsContainsWIT2(t *testing.T) { + if len(ProtocolVersions) == 0 || ProtocolVersions[0] != WIT2 { + t.Fatalf("expected WIT2 first in ProtocolVersions, got %v", ProtocolVersions) + } + if protocolLengths[WIT2] != 7 { + t.Fatalf("WIT2 protocolLengths must be 7 (one new message added), got %d", protocolLengths[WIT2]) + } +}