diff --git a/.gitignore b/.gitignore index 5823721c3..9a2942825 100644 --- a/.gitignore +++ b/.gitignore @@ -45,4 +45,4 @@ mkdocs-site-manifest.csv !test/admissible-report-wallet.json !test/admissible-report.json -!test/config.json \ No newline at end of file +!test/config.json diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000..d96d17f3f --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "native/lib/secp256k1"] + path = native/lib/secp256k1 + url = https://github.com/bitcoin-core/secp256k1.git diff --git a/docs/misc/hacking-on-hyperbeam.md b/docs/misc/hacking-on-hyperbeam.md index 6df0f5a9d..ebb69cbcd 100644 --- a/docs/misc/hacking-on-hyperbeam.md +++ b/docs/misc/hacking-on-hyperbeam.md @@ -103,4 +103,29 @@ since the last invocation. 3. Open the svg file in browser. -Happy hacking! \ No newline at end of file +## Common testing pitfalls + +Here is a helpful list of common mistakes when writing tests: + +- If you need to start a new node, be sure to use a new private key unless you + have a specific reason to use an existing one. HyperBEAM HTTP servers are + registered using their wallet ID as their 'name', so re-use can cause issues. + You can get a new private key is defined using `#{ priv_wallet => ar_wallet:new() }`. +- Similarly, always be careful of your stores in your tests! Avoid using the + default stores, as this can lead to 'context leakage', where one part of your + test is unintentionally able to access data created/stored by a supposedly + different node in the environment. `hb_http_server:start_node/1` will generate + a new unique store for you by default, but avoid creating a named store unless + you need to (and know what you are doing). +- Always try to test your devices through the HTTP AO-Core API as well as through + the local `hb_ao:resolve/[2-3]` interfaces. Avoid direct `dev_name:key` calls + unless strictly necessary. The HTTP API is how users will interact with your + almost always system, and there can be subtle differences in how the interfaces + react. For example, the Erlang function call interface has no regard for how + keys are matched by AO-Core, so will mask any issues with the choice of which + device function to call to satisfy requests. + +Happy hacking! + +Avoid pattern match a list of commitments, since we cannot guarantee the order. +This will case tests to be flaky. diff --git a/native/hb_nif/hb_nif.c b/native/hb_nif/hb_nif.c new file mode 100644 index 000000000..6c4647f06 --- /dev/null +++ b/native/hb_nif/hb_nif.c @@ -0,0 +1,35 @@ +#include "hb_nif.h" +#include + +// Utility functions. +// Based on Arweave's c_src/ar_nif.c + +ERL_NIF_TERM solution_tuple(ErlNifEnv* envPtr, ERL_NIF_TERM hashTerm) { + return enif_make_tuple2(envPtr, enif_make_atom(envPtr, "true"), hashTerm); +} + +ERL_NIF_TERM ok_tuple(ErlNifEnv* envPtr, ERL_NIF_TERM term) +{ + return enif_make_tuple2(envPtr, enif_make_atom(envPtr, "ok"), term); +} + +ERL_NIF_TERM ok_tuple2(ErlNifEnv* envPtr, ERL_NIF_TERM term1, ERL_NIF_TERM term2) +{ + return enif_make_tuple3(envPtr, enif_make_atom(envPtr, "ok"), term1, term2); +} + +ERL_NIF_TERM error_tuple(ErlNifEnv* envPtr, const char* reason) +{ + ERL_NIF_TERM reasonTerm = enif_make_string(envPtr, reason, ERL_NIF_LATIN1); + return enif_make_tuple2(envPtr, enif_make_atom(envPtr, "error"), reasonTerm); +} + +ERL_NIF_TERM make_output_binary(ErlNifEnv* envPtr, unsigned char *dataPtr, size_t size) +{ + ERL_NIF_TERM outputTerm; + unsigned char *outputTermDataPtr; + + outputTermDataPtr = enif_make_new_binary(envPtr, size, &outputTerm); + memcpy(outputTermDataPtr, dataPtr, size); + return outputTerm; +} diff --git a/native/hb_nif/hb_nif.h b/native/hb_nif/hb_nif.h new file mode 100644 index 000000000..5f72bb6e8 --- /dev/null +++ b/native/hb_nif/hb_nif.h @@ -0,0 +1,14 @@ +#ifndef HB_NIF_H +#define HB_NIF_H + +#include + +// Based on Arweave's c_src/ar_nif.h + +ERL_NIF_TERM solution_tuple(ErlNifEnv*, ERL_NIF_TERM); +ERL_NIF_TERM ok_tuple(ErlNifEnv*, ERL_NIF_TERM); +ERL_NIF_TERM ok_tuple2(ErlNifEnv*, ERL_NIF_TERM, ERL_NIF_TERM); +ERL_NIF_TERM error_tuple(ErlNifEnv*, const char*); +ERL_NIF_TERM make_output_binary(ErlNifEnv*, unsigned char*, size_t); + +#endif // HB_NIF_H diff --git a/native/lib/Makefile b/native/lib/Makefile new file mode 100644 index 000000000..6b5b3bc88 --- /dev/null +++ b/native/lib/Makefile @@ -0,0 +1,59 @@ +###################################################################### +# HyperBEAM Library GNU Makefile +# +# Usage: make [all|clean] +# Usage: make [secp256k1|secp256k1-clean] +# +# Based on Arweave's lib/Makefile +###################################################################### +SECP256K1_CMAKE_OPTIONS ?= \ + -DSECP256K1_DISABLE_SHARED=ON \ + -DSECP256K1_ENABLE_MODULE_RECOVERY=ON \ + -DBUILD_SHARED_LIBS=OFF \ + -DSECP256K1_BUILD_BENCHMARK=OFF \ + -DSECP256K1_BUILD_EXHAUSTIVE_TESTS=OFF \ + -DSECP256K1_BUILD_TESTS=OFF \ + -DSECP256K1_ENABLE_MODULE_MUSIG=OFF \ + -DSECP256K1_ENABLE_MODULE_EXTRAKEYS=OFF \ + -DSECP256K1_ENABLE_MODULE_ELLSWIFT=OFF \ + -DSECP256K1_ENABLE_MODULE_SCHNORRSIG=OFF \ + -DSECP256K1_APPEND_CFLAGS=-fPIC + +GIT_SUBMODULE_OPTIONS ?= --checkout --init + +PHONY += all +all: secp256k1 + +PHONY += help +help: + @echo "Usage: make [all|clean]" + @echo "Usage: make [secp256k1|secp256k1-clean]" + +PHONY += clean +clean: secp256k1-clean + +###################################################################### +# secp256k1 targets +###################################################################### +PHONY += secp256k1 +secp256k1: secp256k1/CMakeLists.txt secp256k1/build/lib/libsecp256k1.a + +PHONY += secp256k1-clean +secp256k1-clean: + -rm -rf secp256k1/build + +secp256k1/CMakeLists.txt: + git submodule update $(GIT_SUBMODULE_OPTIONS) secp256k1 + +secp256k1/build: secp256k1/CMakeLists.txt + mkdir $@ + +secp256k1/build/Makefile: secp256k1/build + cd secp256k1/build \ + && cmake $(SECP256K1_CMAKE_OPTIONS) .. + +secp256k1/build/lib/libsecp256k1.a: secp256k1/build/Makefile + cd secp256k1/build \ + && cmake --build . + +.PHONY: $(PHONY) diff --git a/native/lib/secp256k1 b/native/lib/secp256k1 new file mode 160000 index 000000000..1d146ac3e --- /dev/null +++ b/native/lib/secp256k1 @@ -0,0 +1 @@ +Subproject commit 1d146ac3edd47a6ea10669a18cae62171a8e35c6 diff --git a/native/secp256k1/Makefile b/native/secp256k1/Makefile new file mode 100644 index 000000000..00bf5fb39 --- /dev/null +++ b/native/secp256k1/Makefile @@ -0,0 +1,137 @@ +# secp256k1 NIF Makefile for HyperBEAM +# Based on Arweave's c_src/Makefile + +CURDIR := $(shell pwd) +BASEDIR := $(abspath $(CURDIR)/../..) + +PROJECT ?= $(notdir $(BASEDIR)) +PROJECT := $(strip $(PROJECT)) + +ifeq ($(MODE), debug) + CFLAGS ?= -O0 -g + CXXFLAGS ?= -O0 -g +else + CFLAGS ?= -O3 + CXXFLAGS ?= -O3 +endif + +UNAME_SYS := $(shell uname -s) + +# Set default libs path for secp256k1 implementation +SECP256K1_LDLIBS = -L /usr/lib -L /usr/local/lib + +ifeq ($(UNAME_SYS), Linux) + # _mm_crc32_u32 support + CFLAGS += -msse4.2 + CXXFLAGS += -msse4.2 +endif + +ERTS_INCLUDE_DIR ?= $(shell erl -noshell -eval 'io:format("~ts/erts-~ts/include/", [code:root_dir(), erlang:system_info(version)]).' -s init stop) +ERL_INTERFACE_INCLUDE_DIR ?= $(shell erl -noshell -eval 'io:format("~ts", [code:lib_dir(erl_interface, include)]).' -s init stop) +ERL_INTERFACE_LIB_DIR ?= $(shell erl -noshell -eval 'io:format("~ts", [code:lib_dir(erl_interface, lib)]).' -s init stop) + +# System type and C compiler/flags. + +ifeq ($(UNAME_SYS), Darwin) + OSX_CPU_ARCH ?= x86_64 + # nix systems may not have sysctl where uname -m will return the correct arch + SYSCTL_EXISTS := $(shell which sysctl 2>/dev/null) + ifneq ($(shell uname -m | egrep "arm64"),) + OSX_CPU_ARCH = arm64 + else + ifdef SYSCTL_EXISTS + ifneq ($(shell sysctl -n machdep.cpu.brand_string | egrep "M(1|2)"),) + OSX_CPU_ARCH = arm64 + endif + endif + endif + CC ?= cc + CFLAGS += -std=c99 -arch $(OSX_CPU_ARCH) -finline-functions -Wall -Wmissing-prototypes + CXXFLAGS += -arch $(OSX_CPU_ARCH) -finline-functions -Wall + LDFLAGS ?= -arch $(OSX_CPU_ARCH) + LDFLAGS += -undefined suppress + # on MacOS, some libs are also present in /opt/homebrew/lib + SECP256K1_LDLIBS += -L /opt/homebrew/lib +else ifeq ($(UNAME_SYS), FreeBSD) + CC ?= cc + CFLAGS += -std=c99 -finline-functions -Wall -Wmissing-prototypes + CXXFLAGS += -finline-functions -Wall +else ifeq ($(UNAME_SYS), Linux) + CC ?= gcc + CFLAGS += -std=c99 -finline-functions -Wall -Wmissing-prototypes + CXXFLAGS += -finline-functions -Wall +endif + +C_SRC_DIR = $(CURDIR) + +# Paths to libsecp256k1 +LIBSECP256K1_DIR = $(BASEDIR)/native/lib/secp256k1 +LIBSECP256K1_STATIC = $(LIBSECP256K1_DIR)/build/lib/libsecp256k1.a + +# Source files +SECP256K1_SOURCES = $(C_SRC_DIR)/secp256k1_nif.c $(BASEDIR)/native/hb_nif/hb_nif.c +SECP256K1_OBJECTS = $(C_SRC_DIR)/secp256k1_nif.o $(BASEDIR)/native/hb_nif/hb_nif.o + +# Build up SECP256K1 flags (matching Arweave's pattern) +SECP256K1_CFLAGS += $(CFLAGS) +SECP256K1_LDLIBS += $(LDFLAGS) +CFLAGS += -fPIC -I $(ERTS_INCLUDE_DIR) -I $(ERL_INTERFACE_INCLUDE_DIR) -I /usr/local/include -I $(C_SRC_DIR) +CXXFLAGS += -fPIC -I $(ERTS_INCLUDE_DIR) -I $(ERL_INTERFACE_INCLUDE_DIR) -std=c++11 +LDLIBS += -L $(ERL_INTERFACE_LIB_DIR) -L /usr/local/lib -lei + +# Final secp256k1-specific flags +SECP256K1_CFLAGS += -fPIC -I $(ERTS_INCLUDE_DIR) -I $(ERL_INTERFACE_INCLUDE_DIR) -I /usr/local/include -I $(LIBSECP256K1_DIR)/include -I $(BASEDIR)/native/hb_nif -I $(C_SRC_DIR) +SECP256K1_LDLIBS += -L $(ERL_INTERFACE_LIB_DIR) + +# Output +SECP256K1_OUTPUT ?= $(BASEDIR)/priv/secp256k1_arweave.so + +# Verbosity. + +c_verbose_0 = @echo " C " $(?F); +c_verbose = $(c_verbose_$(V)) + +cpp_verbose_0 = @echo " CPP " $(?F); +cpp_verbose = $(cpp_verbose_$(V)) + +link_verbose_0 = @echo " LD " $(@F); +link_verbose = $(link_verbose_$(V)) + +COMPILE_C = $(c_verbose) $(CC) $(CFLAGS) $(CPPFLAGS) -c +COMPILE_CPP = $(cpp_verbose) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c + +$(SECP256K1_OUTPUT): $(LIBSECP256K1_STATIC) $(SECP256K1_OBJECTS) + @mkdir -p $(BASEDIR)/priv/ +ifeq ($(UNAME_SYS), Darwin) + $(link_verbose) $(CC) $(SECP256K1_OBJECTS) $(LIBSECP256K1_STATIC) -bundle -flat_namespace -undefined suppress $(SECP256K1_LDLIBS) -o $(SECP256K1_OUTPUT) +else + $(link_verbose) $(CC) $(SECP256K1_OBJECTS) $(LIBSECP256K1_STATIC) -shared $(SECP256K1_LDLIBS) -o $(SECP256K1_OUTPUT) +endif + +%secp256k1_nif.o: %secp256k1_nif.c + $(c_verbose) $(CC) $(SECP256K1_CFLAGS) -c $(OUTPUT_OPTION) $< + +%hb_nif.o: %hb_nif.c + $(c_verbose) $(CC) $(SECP256K1_CFLAGS) -c $(OUTPUT_OPTION) $< + +%.o: %.c + $(COMPILE_C) $(OUTPUT_OPTION) $< + +%.o: %.cc + $(COMPILE_CPP) $(OUTPUT_OPTION) $< + +%.o: %.C + $(COMPILE_CPP) $(OUTPUT_OPTION) $< + +%.o: %.cpp + $(COMPILE_CPP) $(OUTPUT_OPTION) $< + +all: $(SECP256K1_OUTPUT) + +$(LIBSECP256K1_STATIC): + $(MAKE) -C $(BASEDIR)/native/lib secp256k1 + +clean: + @rm -f $(SECP256K1_OUTPUT) $(SECP256K1_OBJECTS) + +.PHONY: all clean diff --git a/native/secp256k1/secp256k1_nif.c b/native/secp256k1/secp256k1_nif.c new file mode 100644 index 000000000..91c9e2535 --- /dev/null +++ b/native/secp256k1/secp256k1_nif.c @@ -0,0 +1,272 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +// Based on Arweave's c_src/secp256k1/secp256k1_nif.c + +#define SECP256K1_PUBKEY_UNCOMPRESSED_SIZE 65 +#define SECP256K1_PUBKEY_COMPRESSED_SIZE 33 +#define SECP256K1_SIGNATURE_COMPACT_SIZE 64 +#define SECP256K1_SIGNATURE_RECOVERABLE_SIZE 65 +#define SECP256K1_PRIVKEY_SIZE 32 +#define SECP256K1_CONTEXT_SEED_SIZE 32 +#define SECP256K1_DIGEST_SIZE 32 + +static int secp256k1_load(ErlNifEnv* env, void** priv, ERL_NIF_TERM load_info) { + return 0; +} + +static int fill_devurandom(void* buffer, size_t size) { + int fd = open("/dev/urandom", O_RDONLY | O_CLOEXEC); + if (fd == -1) { + return 0; + } + + size_t offset = 0; + while (offset < size) { + ssize_t result = read(fd, (char*)buffer + offset, size - offset); + if (result == -1) { + if (errno == EINTR) continue; + goto error; + } + // EOF + if (result == 0) { + goto error; + } + offset += (size_t)result; + } + + close(fd); + return 1; + +error: + close(fd); + return 0; +} + +static int fill_random(void* buffer, size_t size) { +#if defined(__linux__) || defined(__FreeBSD__) + + size_t offset = 0; + while (offset < size) { + ssize_t result = getrandom((char*)buffer + offset, size - offset, 0); + if (result == -1) { + if (errno == EINTR) continue; + if (errno == ENOSYS) return fill_devurandom(buffer, size); + return 0; + } + offset += (size_t)result; + } + +#elif defined(__APPLE__) + + size_t offset = 0; + while (offset < size) { + // max allowed length is 256 bytes + size_t chunk = (size - offset > 256) ? 256 : (size - offset); + if (getentropy((char*)buffer + offset, chunk) == -1) { + if (errno == ENOSYS) return fill_devurandom(buffer, size); + return 0; + } + offset += chunk; + } + +#else + // Unsupported platform + return 0; +#endif + return 1; +} + +/* Cleanses memory to prevent leaking sensitive info. Won't be optimized out. */ +static void secure_erase(void *ptr, size_t len) { +#if defined(__GNUC__) + /* We use a memory barrier that scares the compiler away from optimizing out the memset. + * + * Quoting Adam Langley in commit ad1907fe73334d6c696c8539646c21b11178f20f + * in BoringSSL (ISC License): + * As best as we can tell, this is sufficient to break any optimisations that + * might try to eliminate "superfluous" memsets. + * This method used in memzero_explicit() the Linux kernel, too. Its advantage is that it is + * pretty efficient, because the compiler can still implement the memset() efficiently, + * just not remove it entirely. See "Dead Store Elimination (Still) Considered Harmful" by + * Yang et al. (USENIX Security 2017) for more background. + */ + memset(ptr, 0, len); + __asm__ __volatile__("" : : "r"(ptr) : "memory"); +#else + void *(*volatile const volatile_memset)(void *, int, size_t) = memset; + volatile_memset(ptr, 0, len); +#endif +} + +static ERL_NIF_TERM sign_recoverable(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + if (argc != 2) { + return enif_make_badarg(env); + } + ErlNifBinary Digest, PrivateBytes; + if (!enif_inspect_binary(env, argv[0], &Digest)) { + return enif_make_badarg(env); + } + if (Digest.size != SECP256K1_DIGEST_SIZE) { + return enif_make_badarg(env); + } + + if (!enif_inspect_binary(env, argv[1], &PrivateBytes)) { + return enif_make_badarg(env); + } + if (PrivateBytes.size != SECP256K1_PRIVKEY_SIZE) { + return enif_make_badarg(env); + } + + char *error = NULL; + unsigned char seed[SECP256K1_CONTEXT_SEED_SIZE]; + unsigned char digest[SECP256K1_DIGEST_SIZE]; + unsigned char privbytes[SECP256K1_PRIVKEY_SIZE]; + unsigned char signature_compact[SECP256K1_SIGNATURE_COMPACT_SIZE]; + unsigned char signature_recoverable[SECP256K1_SIGNATURE_RECOVERABLE_SIZE]; + int recid; + secp256k1_ecdsa_recoverable_signature s; + secp256k1_context* ctx = secp256k1_context_create(SECP256K1_CONTEXT_NONE); + + memcpy(digest, Digest.data, SECP256K1_DIGEST_SIZE); + memcpy(privbytes, PrivateBytes.data, SECP256K1_PRIVKEY_SIZE); + + if (!secp256k1_ec_seckey_verify(ctx, privbytes)) { + error = "secp256k1 key is invalid."; + goto cleanup; + } + + if (!fill_random(seed, sizeof(seed))) { + error = "Failed to generate random seed for context."; + goto cleanup; + } + + if (!secp256k1_context_randomize(ctx, seed)) { + error = "Failed to randomize context."; + goto cleanup; + } + + if(!secp256k1_ecdsa_sign_recoverable(ctx, &s, digest, privbytes, NULL, NULL)) { + error = "Failed to create signature."; + goto cleanup; + } + + if(!secp256k1_ecdsa_recoverable_signature_serialize_compact(ctx, signature_compact, &recid, &s)) { + error = "Failed to serialize signature."; + goto cleanup; + } + memcpy(signature_recoverable, signature_compact, SECP256K1_SIGNATURE_COMPACT_SIZE); + signature_recoverable[64] = (unsigned char)(recid); + + ERL_NIF_TERM signature_term = make_output_binary(env, signature_recoverable, SECP256K1_SIGNATURE_RECOVERABLE_SIZE); + +cleanup: + secp256k1_context_destroy(ctx); + secure_erase(seed, sizeof(seed)); + secure_erase(privbytes, sizeof(privbytes)); + memset(signature_compact, 0, SECP256K1_SIGNATURE_COMPACT_SIZE); + memset(signature_recoverable, 0, SECP256K1_SIGNATURE_RECOVERABLE_SIZE); + + if (error) { + return error_tuple(env, error); + } + return ok_tuple(env, signature_term); +} + +static ERL_NIF_TERM recover_pk_and_verify(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + if (argc != 2) { + return enif_make_badarg(env); + } + ErlNifBinary Digest, Signature; + if (!enif_inspect_binary(env, argv[0], &Digest)) { + return enif_make_badarg(env); + } + if (Digest.size != SECP256K1_DIGEST_SIZE) { + return enif_make_badarg(env); + } + + if (!enif_inspect_binary(env, argv[1], &Signature)) { + return enif_make_badarg(env); + } + if (Signature.size != SECP256K1_SIGNATURE_RECOVERABLE_SIZE) { + return enif_make_badarg(env); + } + + char *error = NULL; + unsigned char digest[SECP256K1_DIGEST_SIZE]; + unsigned char signature_recoverable[SECP256K1_SIGNATURE_RECOVERABLE_SIZE]; + unsigned char signature_compact[SECP256K1_SIGNATURE_COMPACT_SIZE]; + unsigned char pubbytes[SECP256K1_PUBKEY_COMPRESSED_SIZE]; + int recid; + secp256k1_ecdsa_recoverable_signature rs; + secp256k1_ecdsa_signature s; + secp256k1_pubkey pubkey; + + memcpy(digest, Digest.data, SECP256K1_DIGEST_SIZE); + memcpy(signature_recoverable, Signature.data, SECP256K1_SIGNATURE_RECOVERABLE_SIZE); + + memcpy(signature_compact, signature_recoverable, SECP256K1_SIGNATURE_COMPACT_SIZE); + recid = (int)signature_recoverable[64]; + + if (recid < 0 || recid > 3) { + error = "Invalid signature recid. recid >= 0 && recid <= 3."; + goto cleanup; + } + + if (!secp256k1_ecdsa_recoverable_signature_parse_compact(secp256k1_context_static, &rs, signature_compact, recid)) { + error = "Failed to deserialize/parse recoverable signature."; + goto cleanup; + } + + if (!secp256k1_ecdsa_recover(secp256k1_context_static, &pubkey, &rs, digest)) { + error = "Failed to recover public key."; + goto cleanup; + } + size_t l = SECP256K1_PUBKEY_COMPRESSED_SIZE; + if (!secp256k1_ec_pubkey_serialize(secp256k1_context_static, pubbytes, &l, &pubkey, SECP256K1_EC_COMPRESSED)) { + error = "Failed to serialize the recovered public key."; + goto cleanup; + } + + if (!secp256k1_ecdsa_recoverable_signature_convert(secp256k1_context_static, &s, &rs)) { + error = "Failed to convert recoverable signature to compact signature."; + goto cleanup; + } + + // NOTE. https://github.com/bitcoin-core/secp256k1/blob/f79f46c70386c693ff4e7aef0b9e7923ba284e56/src/secp256k1.c#L461 + // Verify performs check for low-s + int is_valid = secp256k1_ecdsa_verify(secp256k1_context_static, &s, digest, &pubkey); + ERL_NIF_TERM pubkey_term = make_output_binary(env, pubbytes, SECP256K1_PUBKEY_COMPRESSED_SIZE); + +cleanup: + memset(digest, 0, SECP256K1_DIGEST_SIZE); + memset(pubbytes, 0, SECP256K1_PUBKEY_COMPRESSED_SIZE); + memset(signature_compact, 0, SECP256K1_SIGNATURE_COMPACT_SIZE); + memset(signature_recoverable, 0, SECP256K1_SIGNATURE_RECOVERABLE_SIZE); + + if (error) { + return error_tuple(env, error); + } + if (is_valid) { + return ok_tuple2(env, enif_make_atom(env, "true"), pubkey_term); + } + return ok_tuple2(env, enif_make_atom(env, "false"), pubkey_term); +} + +static ErlNifFunc nif_funcs[] = { + {"sign_recoverable", 2, sign_recoverable}, + {"recover_pk_and_verify", 2, recover_pk_and_verify} +}; + +ERL_NIF_INIT(secp256k1_nif, nif_funcs, secp256k1_load, NULL, NULL, NULL) diff --git a/rebar.config b/rebar.config index 32a00ea58..8c44b2146 100644 --- a/rebar.config +++ b/rebar.config @@ -1,8 +1,8 @@ {erl_opts, [debug_info, {d, 'COWBOY_QUICER', 1}, {d, 'GUN_QUICER', 1}]}. -{plugins, [pc, rebar3_rustler, rebar_edown_plugin]}. +{plugins, [pc, rebar3_rustler, rebar_edown_plugin, {rebar3_eunit_start, {git, "https://github.com/permaweb/rebar3_eunit_start.git", {ref, "04ec53fea187039770db0d4459b7aeb01a9021af"}}}]}. % Increase `scale_timeouts` when running on a slower machine. -{eunit_opts, [verbose, {scale_timeouts, 10}]}. +{eunit_opts, [verbose, {scale_timeouts, 10}, {start_applications, [prometheus, hb]}]}. {profiles, [ {quiet, @@ -12,7 +12,12 @@ ] }, {no_events, [{erl_opts, [{d, 'NO_EVENTS', true}]}]}, - {top, [{deps, [observer_cli]}, {erl_opts, [{d, 'AO_TOP', true}]}]}, + {top, [ + {deps, [{observer_cli, {git, "https://github.com/permaweb/observer_cli.git", + {ref, "7e7a2613b262e08c43c539d50901e6f26a241b6f"}}}]}, + {erl_opts, [{d, 'AO_TOP', true}]}, + {relx, [{release, {'hb', "0.0.1"}, [hb, b64rs, base32, cowboy, gun, luerl, prometheus, prometheus_cowboy, prometheus_ranch, elmdb, observer_cli, runtime_tools]}]} + ]}, {store_events, [{erl_opts, [{d, 'STORE_EVENTS', true}]}]}, {ao_profiling, [{erl_opts, [{d, 'AO_PROFILING', true}]}]}, {eflame, @@ -71,7 +76,7 @@ {cargo_opts, [ {src_dir, "native/dev_snp_nif"}, - {src_dir, "deps/elmdb/native/elmdb_nif"} + {release, true} ]}. {overrides, [ @@ -82,7 +87,9 @@ {compile, "bash -c \"echo '-define(HB_BUILD_SOURCE, <<\\\"$(git rev-parse HEAD)\\\">>).\n' > ${REBAR_ROOT_DIR}/_build/hb_buildinfo.hrl\""}, {compile, "bash -c \"echo '-define(HB_BUILD_SOURCE_SHORT, <<\\\"$(git rev-parse --short HEAD)\\\">>).\n' >> ${REBAR_ROOT_DIR}/_build/hb_buildinfo.hrl\""}, {compile, "bash -c \"echo '-define(HB_BUILD_TIME, $(date +%s)).\n' >> ${REBAR_ROOT_DIR}/_build/hb_buildinfo.hrl\""}, - {compile, "make -C \"${REBAR_ROOT_DIR}\" wamr"} + {compile, "make -C \"${REBAR_ROOT_DIR}\" wamr"}, + {"(linux|darwin)", compile, "make -C \"${REBAR_ROOT_DIR}/native/lib\" all"}, + {"(linux|darwin)", compile, "make -C \"${REBAR_ROOT_DIR}/native/secp256k1\" all"} ]}. {port_env, [ @@ -93,18 +100,26 @@ ]}. {post_hooks, [ + {"(linux|darwin)", clean, "make -C \"${REBAR_ROOT_DIR}/native/secp256k1\" clean"}, + {"(linux|darwin)", clean, "make -C \"${REBAR_ROOT_DIR}/native/lib\" secp256k1-clean"}, {"(linux|darwin|solaris)", clean, "rm -rf \"${REBAR_ROOT_DIR}/_build\" \"${REBAR_ROOT_DIR}/priv\""}, {"(linux|darwin|solaris)", compile, "echo 'Post-compile hooks executed'"}, - { compile, "rm -f native/hb_beamr/*.o native/hb_beamr/*.d"}, - { compile, "rm -f native/hb_keccak/*.o native/hb_keccak/*.d"}, - { compile, "mkdir -p priv/html"}, - { compile, "cp -R src/html/* priv/html"}, - { compile, "cp _build/default/lib/elmdb/priv/crates/elmdb_nif/elmdb_nif.so _build/default/lib/elmdb/priv/elmdb_nif.so 2>/dev/null || true" } + { compile, + "sh -c '" + "rm -f native/hb_beamr/*.o native/hb_beamr/*.d; " + "rm -f native/hb_keccak/*.o native/hb_keccak/*.d; " + "mkdir -p priv/html; " + "cp -R src/html/* priv/html; " + "cp _build/default/lib/elmdb/priv/crates/elmdb_nif/elmdb_nif.so " + "_build/default/lib/elmdb/priv/elmdb_nif.so 2>/dev/null || true" + "'" + } ]}. {provider_hooks, [ {pre, [ - {compile, {cargo, build}} + {compile, {cargo, build}}, + {eunit, {default, rebar3_eunit_start}} ]}, {post, [ {compile, {pc, compile}}, @@ -125,11 +140,13 @@ "./native/hb_keccak/hb_keccak.c", "./native/hb_keccak/hb_keccak_nif.c" ]} + %% secp256k1_arweave.so is built via native/secp256k1/Makefile ]}. {deps, [ - {elmdb, {git, "https://github.com/permaweb/elmdb-rs.git", {ref, "bfda2facebdb433eea753f82e7e8d45aefc6d87a"}}}, - {b64fast, {git, "https://github.com/ArweaveTeam/b64fast.git", {ref, "58f0502e49bf73b29d95c6d02460d1fb8d2a5273"}}}, + {elmdb, {git, "https://github.com/permaweb/elmdb-rs.git", {ref, "de36a64870e671dc325b33d7aa9a368fd6a06db8"}}}, + {b64rs, {git, "https://github.com/permaweb/b64rs.git", {ref, "c6da6b61634b14e423e8f95ad51a7694bffb2438"}}}, + {base32, "1.0.0"}, {cowlib, "2.16.0"}, {cowboy, "2.14.0"}, {ranch, "2.2.0"}, @@ -139,7 +156,13 @@ {prometheus_httpd, "2.1.15"}, {prometheus, "6.0.3"}, {graphql, "0.17.1", {pkg, graphql_erl}}, - {luerl, "1.3.0"} + {luerl, "1.3.0"}, + {hackney, "1.25.0"}, + {eqwalizer_support, + {git_subdir, + "https://github.com/whatsapp/eqwalizer.git", + {branch, "main"}, + "eqwalizer_support"}} ]}. {shell, [ @@ -152,7 +175,7 @@ ]}. {relx, [ - {release, {'hb', "0.0.1"}, [hb, b64fast, cowboy, gun, luerl, prometheus, prometheus_cowboy, prometheus_ranch, elmdb]}, + {release, {'hb', "0.0.1"}, [hb, b64rs, base32, cowboy, gun, luerl, prometheus, prometheus_cowboy, prometheus_ranch, elmdb, runtime_tools]}, {sys_config, "config/app.config"}, {include_erts, true}, {extended_start_script, true}, @@ -164,7 +187,7 @@ ]}. {dialyzer, [ - {plt_extra_apps, [public_key, ranch, cowboy, prometheus, prometheus_cowboy, prometheus_ranch, b64fast, eunit, gun]}, + {plt_extra_apps, [public_key, ranch, cowboy, prometheus, prometheus_cowboy, prometheus_ranch, b64rs, eunit, gun]}, incremental, {warnings, [no_improper_lists, no_unused]} ]}. diff --git a/rebar.lock b/rebar.lock index f27fcb205..dee62d24d 100644 --- a/rebar.lock +++ b/rebar.lock @@ -1,19 +1,31 @@ {"1.2.0", [{<<"accept">>,{pkg,<<"accept">>,<<"0.3.7">>},1}, - {<<"b64fast">>, - {git,"https://github.com/ArweaveTeam/b64fast.git", - {ref,"58f0502e49bf73b29d95c6d02460d1fb8d2a5273"}}, + {<<"b64rs">>, + {git,"https://github.com/permaweb/b64rs.git", + {ref,"c6da6b61634b14e423e8f95ad51a7694bffb2438"}}, 0}, + {<<"base32">>,{pkg,<<"base32">>,<<"1.0.0">>},0}, + {<<"certifi">>,{pkg,<<"certifi">>,<<"2.15.0">>},1}, {<<"cowboy">>,{pkg,<<"cowboy">>,<<"2.14.0">>},0}, {<<"cowlib">>,{pkg,<<"cowlib">>,<<"2.16.0">>},0}, {<<"ddskerl">>,{pkg,<<"ddskerl">>,<<"0.4.2">>},1}, {<<"elmdb">>, {git,"https://github.com/permaweb/elmdb-rs.git", - {ref,"bfda2facebdb433eea753f82e7e8d45aefc6d87a"}}, + {ref,"de36a64870e671dc325b33d7aa9a368fd6a06db8"}}, + 0}, + {<<"eqwalizer_support">>, + {git_subdir,"https://github.com/whatsapp/eqwalizer.git", + {ref,"0f514eb3893fa7070835c83ecb49fbea31b0426d"}, + "eqwalizer_support"}, 0}, {<<"graphql">>,{pkg,<<"graphql_erl">>,<<"0.17.1">>},0}, {<<"gun">>,{pkg,<<"gun">>,<<"2.2.0">>},0}, + {<<"hackney">>,{pkg,<<"hackney">>,<<"1.25.0">>},0}, + {<<"idna">>,{pkg,<<"idna">>,<<"6.1.1">>},1}, {<<"luerl">>,{pkg,<<"luerl">>,<<"1.3.0">>},0}, + {<<"metrics">>,{pkg,<<"metrics">>,<<"1.0.1">>},1}, + {<<"mimerl">>,{pkg,<<"mimerl">>,<<"1.4.0">>},1}, + {<<"parse_trans">>,{pkg,<<"parse_trans">>,<<"3.4.1">>},1}, {<<"prometheus">>,{pkg,<<"prometheus">>,<<"6.0.3">>},0}, {<<"prometheus_cowboy">>,{pkg,<<"prometheus_cowboy">>,<<"0.2.0">>},0}, {<<"prometheus_httpd">>,{pkg,<<"prometheus_httpd">>,<<"2.1.15">>},0}, @@ -21,30 +33,50 @@ {git,"https://github.com/permaweb/prometheus_ranch.git", {ref,"73f16ed9856972ced3fb8f4168004fffe742d5b2"}}, 0}, - {<<"ranch">>,{pkg,<<"ranch">>,<<"2.2.0">>},0}]}. + {<<"ranch">>,{pkg,<<"ranch">>,<<"2.2.0">>},0}, + {<<"ssl_verify_fun">>,{pkg,<<"ssl_verify_fun">>,<<"1.1.7">>},1}, + {<<"unicode_util_compat">>,{pkg,<<"unicode_util_compat">>,<<"0.7.1">>},1}]}. [ {pkg_hash,[ {<<"accept">>, <<"CD6E34A2D7E28CA38B2D3CB233734CA0C221EFBC1F171F91FEC5F162CC2D18DA">>}, + {<<"base32">>, <<"1AB331F812FCC254C8F7D4348E1E5A6F2B9B32B7A260BF2BC3358E3BF14C841A">>}, + {<<"certifi">>, <<"0E6E882FCDAAA0A5A9F2B3DB55B1394DBA07E8D6D9BCAD08318FB604C6839712">>}, {<<"cowboy">>, <<"565DCF221BA99B1255B0ADCEC24D2D8DBE79E46EC79F30F8373CCEADC6A41E2A">>}, {<<"cowlib">>, <<"54592074EBBBB92EE4746C8A8846E5605052F29309D3A873468D76CDF932076F">>}, {<<"ddskerl">>, <<"A51A90BE9AC9B36A94017670BED479C623B10CA9D4BDA1EDF3A0E48CAEEADA2A">>}, {<<"graphql">>, <<"EB59FCBB39F667DC1C78C950426278015C3423F7A6ED2A121D3DB8B1D2C5F8B4">>}, {<<"gun">>, <<"B8F6B7D417E277D4C2B0DC3C07DFDF892447B087F1CC1CAFF9C0F556B884E33D">>}, + {<<"hackney">>, <<"390E9B83F31E5B325B9F43B76E1A785CBDB69B5B6CD4E079AA67835DED046867">>}, + {<<"idna">>, <<"8A63070E9F7D0C62EB9D9FCB360A7DE382448200FBBD1B106CC96D3D8099DF8D">>}, {<<"luerl">>, <<"B56423DDB721432AB980B818FEECB84ADBAB115E2E11522CF94BCD0729CAA501">>}, + {<<"metrics">>, <<"25F094DEA2CDA98213CECC3AEFF09E940299D950904393B2A29D191C346A8486">>}, + {<<"mimerl">>, <<"3882A5CA67FBBE7117BA8947F27643557ADEC38FA2307490C4C4207624CB213B">>}, + {<<"parse_trans">>, <<"6E6AA8167CB44CC8F39441D05193BE6E6F4E7C2946CB2759F015F8C56B76E5FF">>}, {<<"prometheus">>, <<"95302236124C0F919163A7762BF7D2B171B919B6FF6148D26EB38A5D2DEF7B81">>}, {<<"prometheus_cowboy">>, <<"526F75D9850A9125496F78BCEECCA0F237BC7B403C976D44508543AE5967DAD9">>}, {<<"prometheus_httpd">>, <<"8F767D819A5D36275EAB9264AFF40D87279151646776069BF69FBDBBD562BD75">>}, - {<<"ranch">>, <<"25528F82BC8D7C6152C57666CA99EC716510FE0925CB188172F41CE93117B1B0">>}]}, + {<<"ranch">>, <<"25528F82BC8D7C6152C57666CA99EC716510FE0925CB188172F41CE93117B1B0">>}, + {<<"ssl_verify_fun">>, <<"354C321CF377240C7B8716899E182CE4890C5938111A1296ADD3EC74CF1715DF">>}, + {<<"unicode_util_compat">>, <<"A48703A25C170EEDADCA83B11E88985AF08D35F37C6F664D6DCFB106A97782FC">>}]}, {pkg_hash_ext,[ {<<"accept">>, <<"CA69388943F5DAD2E7232A5478F16086E3C872F48E32B88B378E1885A59F5649">>}, + {<<"base32">>, <<"0449285348ED0C4CD83C7198E76C5FD5A0451C4EF18695B9FD43792A503E551A">>}, + {<<"certifi">>, <<"B147ED22CE71D72EAFDAD94F055165C1C182F61A2FF49DF28BCC71D1D5B94A60">>}, {<<"cowboy">>, <<"EA99769574550FE8A83225C752E8A62780A586770EF408816B82B6FE6D46476B">>}, {<<"cowlib">>, <<"7F478D80D66B747344F0EA7708C187645CFCC08B11AA424632F78E25BF05DB51">>}, {<<"ddskerl">>, <<"63F907373D7E548151D584D4DA8A38928FD26EC9477B94C0FFAAD87D7CB69FE7">>}, {<<"graphql">>, <<"4D0F08EC57EF0983E2596763900872B1AB7E94F8EE3817B9F67EEC911FF7C386">>}, {<<"gun">>, <<"76022700C64287FEB4DF93A1795CFF6741B83FB37415C40C34C38D2A4645261A">>}, + {<<"hackney">>, <<"7209BFD75FD1F42467211FF8F59EA74D6F2A9E81CBCEE95A56711EE79FD6B1D4">>}, + {<<"idna">>, <<"92376EB7894412ED19AC475E4A86F7B413C1B9FBB5BD16DCCD57934157944CEA">>}, {<<"luerl">>, <<"6B3138AA829F0FBC4CD0F083F273B4030A2B6CE99155194A6DB8C67B2C3480A4">>}, + {<<"metrics">>, <<"69B09ADDDC4F74A40716AE54D140F93BEB0FB8978D8636EADED0C31B6F099F16">>}, + {<<"mimerl">>, <<"13AF15F9F68C65884ECCA3A3891D50A7B57D82152792F3E19D88650AA126B144">>}, + {<<"parse_trans">>, <<"620A406CE75DADA827B82E453C19CF06776BE266F5A67CFF34E1EF2CBB60E49A">>}, {<<"prometheus">>, <<"53554ECADAC0354066801D514D1A244DD026175E4EE3A9A30192B71D530C8268">>}, {<<"prometheus_cowboy">>, <<"2C7EB12F4B970D91E3B47BAAD0F138F6ADC34E53EEB0AE18068FF0AFAB441B24">>}, {<<"prometheus_httpd">>, <<"67736D000745184D5013C58A63E947821AB90CB9320BC2E6AE5D3061C6FFE039">>}, - {<<"ranch">>, <<"FA0B99A1780C80218A4197A59EA8D3BDAE32FBFF7E88527D7D8A4787EFF4F8E7">>}]} + {<<"ranch">>, <<"FA0B99A1780C80218A4197A59EA8D3BDAE32FBFF7E88527D7D8A4787EFF4F8E7">>}, + {<<"ssl_verify_fun">>, <<"FE4C190E8F37401D30167C8C405EDA19469F34577987C76DDE613E838BBC67F8">>}, + {<<"unicode_util_compat">>, <<"B3A917854CE3AE233619744AD1E0102E05673136776FB2FA76234F3E03B23642">>}]} ]. diff --git a/src/ar_block.erl b/src/ar_block.erl new file mode 100644 index 000000000..c36229b9c --- /dev/null +++ b/src/ar_block.erl @@ -0,0 +1,59 @@ +%%% @doc Copied and adapted from the arweave codebase. +%%% Should track: https://github.com/ArweaveTeam/arweave/blob/master/apps/arweave/src/ar_block.erl +-module(ar_block). + +-export([strict_data_split_threshold/0, get_chunk_padded_offset/1, generate_size_tagged_list_from_txs/2]). + +-include("include/ar.hrl"). + +%%%=================================================================== +%%% Public interface. +%%%=================================================================== + +strict_data_split_threshold() -> ?STRICT_DATA_SPLIT_THRESHOLD. + +%% @doc Return Offset if it is smaller than or equal to ar_block:strict_data_split_threshold(). +%% Otherwise, return the offset of the last byte of the chunk + the size of the padding. +-spec get_chunk_padded_offset(Offset :: non_neg_integer()) -> non_neg_integer(). +get_chunk_padded_offset(Offset) -> + case Offset > ar_block:strict_data_split_threshold() of + true -> + ar_poa:get_padded_offset(Offset, ar_block:strict_data_split_threshold()); + false -> + Offset + end. + +generate_size_tagged_list_from_txs(TXs, Height) -> + lists:reverse( + element(2, + lists:foldl( + fun(TX, {Pos, List}) -> + DataSize = TX#tx.data_size, + End = Pos + DataSize, + case Height >= ar_fork:height_2_5() of + true -> + Padding = ar_tx:get_weave_size_increase(DataSize, Height) + - DataSize, + %% Encode the padding information in the Merkle tree. + case Padding > 0 of + true -> + PaddingRoot = <<>>, + {End + Padding, [{{padding, PaddingRoot}, End + Padding}, + {{TX, get_tx_data_root(TX)}, End} | List]}; + false -> + {End, [{{TX, get_tx_data_root(TX)}, End} | List]} + end; + false -> + {End, [{{TX, get_tx_data_root(TX)}, End} | List]} + end + end, + {0, []}, + lists:sort(TXs) + ) + ) + ). + +get_tx_data_root(#tx{ format = 2, data_root = DataRoot }) -> + DataRoot; +get_tx_data_root(TX) -> + (ar_tx:generate_chunk_tree(TX))#tx.data_root. \ No newline at end of file diff --git a/src/ar_bundles.erl b/src/ar_bundles.erl index 3f070b54a..e77f1cbd1 100644 --- a/src/ar_bundles.erl +++ b/src/ar_bundles.erl @@ -4,7 +4,9 @@ -export([new_item/4, sign_item/2, verify_item/1]). -export([encode_tags/1, decode_tags/1]). -export([serialize/1, deserialize/1, serialize_bundle/3]). +-export([deserialize_header/1, deserialize_item_wrapper/1]). -export([data_item_signature_data/1]). +-export([bundle_header_size/1, decode_bundle_header/1]). -include("include/hb.hrl"). -include_lib("eunit/include/eunit.hrl"). @@ -87,7 +89,12 @@ new_item(Target, Anchor, Tags, Data) -> %% @doc Sign a data item. sign_item(_, undefined) -> throw(wallet_not_found); sign_item(RawItem, {PrivKey, {KeyType, Owner}}) -> - Item = (dev_arweave_common:normalize(RawItem))#tx{format = ans104, owner = Owner, signature_type = KeyType}, + Item = + (dev_arweave_common:normalize(RawItem))#tx{ + format = ans104, + owner = Owner, + signature_type = KeyType + }, % Generate the signature from the data item's data segment in 'signed'-ready mode. Sig = ar_wallet:sign(PrivKey, data_item_signature_data(Item)), dev_arweave_common:reset_ids(Item#tx{signature = Sig}). @@ -97,6 +104,10 @@ verify_item(DataItem) -> ValidID = verify_data_item_id(DataItem), ValidSignature = verify_data_item_signature(DataItem), ValidTags = verify_data_item_tags(DataItem), + ?event(debug, {verify_item, + {id, ValidID}, + {signature, ValidSignature}, + {tags, ValidTags}}), ValidID andalso ValidSignature andalso ValidTags. %%%=================================================================== @@ -129,7 +140,7 @@ enforce_valid_tx(TX) -> {invalid_field, anchor, TX#tx.anchor} ), hb_util:ok_or_throw(TX, - hb_util:check_size(TX#tx.owner, [0, byte_size(?DEFAULT_OWNER)]), + hb_util:check_size(TX#tx.owner, [0, 32, 33, 42, 65, byte_size(?DEFAULT_OWNER)]), {invalid_field, owner, TX#tx.owner} ), hb_util:ok_or_throw(TX, @@ -137,7 +148,7 @@ enforce_valid_tx(TX) -> {invalid_field, target, TX#tx.target} ), hb_util:ok_or_throw(TX, - hb_util:check_size(TX#tx.signature, [0, 65, byte_size(?DEFAULT_SIG)]), + hb_util:check_size(TX#tx.signature, [0, 64, 65, byte_size(?DEFAULT_SIG)]), {invalid_field, signature, TX#tx.signature} ), hb_util:ok_or_throw(TX, @@ -184,8 +195,7 @@ data_item_signature_data(RawItem) -> ar_deep_hash:hash([ utf8_encoded("dataitem"), utf8_encoded("1"), - %% Only SignatureType 1 is supported for now (RSA 4096) - utf8_encoded("1"), + utf8_encoded(get_signature_type(Item#tx.signature_type)), <<(Item#tx.owner)/binary>>, <<(Item#tx.target)/binary>>, <<(Item#tx.anchor)/binary>>, @@ -193,6 +203,12 @@ data_item_signature_data(RawItem) -> <<(Item#tx.data)/binary>> ]). +get_signature_type({rsa, 65537}) -> "1"; +get_signature_type({eddsa, ed25519}) -> "2"; +get_signature_type(ethereum) -> "3"; +get_signature_type(solana) -> "4"; +get_signature_type(typed_ethereum) -> "7". + %% @doc Verify the data item's ID matches the signature. verify_data_item_id(DataItem) -> ExpectedID = crypto:hash(sha256, DataItem#tx.signature), @@ -316,8 +332,15 @@ to_serialized_pair(Item, false, Signed) -> %% little-endian format which is why we encode to `<<1, 0>>'. encode_signature_type({rsa, 65537}) -> <<1, 0>>; -encode_signature_type(_) -> - unsupported_tx_format. +encode_signature_type({eddsa, ed25519}) -> + <<2, 0>>; +encode_signature_type(ethereum) -> + <<3, 0>>; +encode_signature_type(solana) -> + <<4, 0>>; +encode_signature_type(SigType) -> + ?event(warning, {error_encoding_signature_type, {sig_type, SigType}}), + {unsupported_tx_format, SigType}. %% @doc Encode an optional field (target, anchor) with a presence byte. encode_optional_field(<<>>) -> @@ -402,13 +425,30 @@ deserialize(Item) when is_record(Item, tx) -> deserialize(Binary) -> deserialize_item(Binary). +%% @doc Deserialize an item and unbundle it if it is a bundle, returning a #tx +%% with possibly deeply nested items in the #tx.data field. deserialize_item(Binary) -> + maybe_unbundle(deserialize_item_wrapper(Binary)). + +%% @doc Deserialize only the _wrapper_ of an item, leaving the data unprocessed +%% in the case that it is a bundle. It may be unbundled by calling `maybe_unbundle/1' +%% at any later point. +deserialize_item_wrapper(Binary) -> + {ok, _HeaderSize, Header} = deserialize_header(Binary), + dev_arweave_common:reset_ids(Header). + +%% @doc Deserialize the header of an item, returning a #tx record with the +%% remaining unprocessed data in the #tx.data field. +deserialize_header(Binary) -> {SignatureType, Signature, Owner, Rest} = decode_signature(Binary), {Target, Rest2} = decode_optional_field(Rest), {Anchor, Rest3} = decode_optional_field(Rest2), - {Tags, Data} = decode_tags(Rest3), - maybe_unbundle( - dev_arweave_common:reset_ids(#tx{ + {Tags, RemainingData} = decode_tags(Rest3), + HeaderSize = byte_size(Binary) - byte_size(RemainingData), + { + ok, + HeaderSize, + #tx{ format = ans104, signature_type = SignatureType, signature = Signature, @@ -416,10 +456,10 @@ deserialize_item(Binary) -> target = Target, anchor = Anchor, tags = Tags, - data = Data, - data_size = byte_size(Data) - }) - ). + data = RemainingData, + data_size = byte_size(RemainingData) + } + }. maybe_unbundle(Item) -> case dev_arweave_common:type(Item) of @@ -430,14 +470,14 @@ maybe_unbundle(Item) -> unbundle_list(Item) -> case unbundle(Item#tx.data) of - detached -> Item#tx{data = detached}; + ?DEFAULT_DATA -> Item#tx{data = ?DEFAULT_DATA}; Items -> Item#tx{data = hb_util:list_to_numbered_message(Items)} end. unbundle_map(Item) -> MapTXID = dev_arweave_common:tagfind(<<"bundle-map">>, Item#tx.tags, <<>>), case unbundle(Item#tx.data) of - detached -> Item#tx{data = detached}; + ?DEFAULT_DATA -> Item#tx{data = ?DEFAULT_DATA}; Items -> MapItem = find_single_layer(hb_util:decode(MapTXID), Items), Map = hb_json:decode(MapItem#tx.data), @@ -469,7 +509,7 @@ find_single_layer(UnsignedID, Items) -> unbundle(<>) -> {ItemsBin, Items} = decode_bundle_header(Count, Content), decode_bundle_items(Items, ItemsBin); -unbundle(<<>>) -> detached. +unbundle(?DEFAULT_DATA) -> ?DEFAULT_DATA. decode_bundle_items([], <<>>) -> []; @@ -487,22 +527,49 @@ decode_bundle_items([{_ID, Size} | RestItems], ItemsBin) -> ) ]. +bundle_header_size(<>) -> + % Eeach item in the bundle header index consumes 64 bytes + 32 + (Count * 64); +bundle_header_size(_) -> + invalid_bundle_header. + +decode_bundle_header(<>) -> + decode_bundle_header(Count, Content); +decode_bundle_header(<<>>) -> + {<<>>, []}; +decode_bundle_header(_) -> + invalid_bundle_header. + decode_bundle_header(Count, Bin) -> decode_bundle_header(Count, Bin, []). decode_bundle_header(0, ItemsBin, Header) -> {ItemsBin, lists:reverse(Header)}; -decode_bundle_header(Count, <>, Header) -> - decode_bundle_header(Count - 1, Rest, [{ID, Size} | Header]). +decode_bundle_header( + Count, + <>, + Header +) -> + decode_bundle_header(Count - 1, Rest, [{ID, Size} | Header]); +decode_bundle_header(_, _, _) -> + invalid_bundle_header. %% @doc Decode the signature from a binary format. Only RSA 4096 is currently supported. %% Note: the signature type '1' corresponds to RSA 4096 - but it is is written in %% little-endian format which is why we match on `<<1, 0>>'. decode_signature(<<1, 0, Signature:512/binary, Owner:512/binary, Rest/binary>>) -> {{rsa, 65537}, Signature, Owner, Rest}; +decode_signature(<<2, 0, Signature:64/binary, Owner:32/binary, Rest/binary>>) -> + {{eddsa, ed25519}, Signature, Owner, Rest}; +decode_signature(<<3, 0, Signature:65/binary, Owner:65/binary, Rest/binary>>) -> + {ethereum, Signature, Owner, Rest}; +decode_signature(<<4, 0, Signature:64/binary, Owner:32/binary, Rest/binary>>) -> + {solana, Signature, Owner, Rest}; +decode_signature(<<7, 0, Signature:65/binary, Owner:42/binary, Rest/binary>>) -> + {typed_ethereum, Signature, Owner, Rest}; decode_signature(Other) -> - ?event({error_decoding_signature, - {sig_type, {explicit, binary:part(Other, 0, 2)}}, - {binary, Other}}), - unsupported_tx_format. + SigType = binary:part(Other, 0, 2), + ?event(warning, {error_decoding_signature, + {sig_type, {explicit, SigType}}}), + {unsupported_tx_format, SigType}. %% @doc Decode tags from a binary format using Apache Avro. decode_tags(<<0:64/little-integer, 0:64/little-integer, Rest/binary>>) -> @@ -643,6 +710,44 @@ with_zero_length_tag_test() -> Deserialized = deserialize(Serialized), ?assertEqual(Item, Deserialized). +bundle_header_size_test() -> + ?assertEqual(672, bundle_header_size(<<10:256/little, 1234/little>>)), + ?assertEqual(32, bundle_header_size(<<0:256/little>>)), + ?assertEqual(invalid_bundle_header, bundle_header_size(<<>>)), + ?assertEqual(invalid_bundle_header, bundle_header_size(<<0>>)). + +decode_bundle_header_test() -> + ?assertEqual({<<>>, []}, decode_bundle_header(<<>>)), + Tail = <<"tail">>, + ?assertEqual( + {Tail, []}, + decode_bundle_header(<<0:256/little, Tail/binary>>) + ), + ID1 = crypto:strong_rand_bytes(32), + Items1 = <<"abcde">>, + ?assertEqual( + {Items1, [{ID1, 5}]}, + decode_bundle_header(<<1:256/little, 5:256/little, ID1:32/binary, Items1/binary>>) + ), + ID2 = crypto:strong_rand_bytes(32), + ID3 = crypto:strong_rand_bytes(32), + Items2 = <<"payload">>, + ?assertEqual( + {Items2, [{ID2, 4}, {ID3, 2}]}, + decode_bundle_header( + << + 2:256/little, + 4:256/little, ID2:32/binary, + 2:256/little, ID3:32/binary, + Items2/binary + >> + ) + ), + ?assertEqual( + {<<>>, [{ID1, 6}]}, + decode_bundle_header(<<1:256/little, 6:256/little, ID1:32/binary>>) + ). + unsigned_data_item_id_test() -> Item1 = deserialize( serialize( @@ -749,6 +854,32 @@ bundle_map_test() -> ?assertEqual(Item1#tx.data, (maps:get(<<"key1">>, BundleItem#tx.data))#tx.data), ?assert(verify_item(BundleItem)). +eddsa_cases_test() -> + Key = ar_wallet:new(?EDDSA_KEY_TYPE), + %% Owner and SignatureType defined during signing process. + Item1 = sign_item(#tx{ + format = ans104, + target = crypto:strong_rand_bytes(32), + anchor = crypto:strong_rand_bytes(32), + tags = [{<<"tag1">>, <<"value1">>}, {<<"tag2">>, <<"value2">>}], + data = <<"item1_data">> + }, Key), + Bundle = serialize(dev_arweave_common:normalize(Item1)), + BundleItem = deserialize(Bundle), + %% Sign a valid transaction and verify it + ?assert(verify_item(BundleItem)), + %% Missing Anchor should fail + ?assertNot(verify_item(BundleItem#tx{anchor = <<>>})), + %% Missing Tags should fail + ?assertNot(verify_item(BundleItem#tx{tags = []})), + %% Missing Owner should fail + ?assertNot(verify_item(BundleItem#tx{owner = crypto:strong_rand_bytes(32)})), + %% Missing Target should fail + ?assertNot(verify_item(BundleItem#tx{target = <<>>})), + %% Missing Data should fail + ?assertNot(verify_item(BundleItem#tx{data = <<>>})), + ok. + extremely_large_bundle_test() -> W = ar_wallet:new(), Data = crypto:strong_rand_bytes(100_000_000), @@ -991,4 +1122,42 @@ generate_and_write_map_bundle_test_disabled() -> ?event(debug_test, {deserialized, {explicit, Deserialized}}), ?assert(verify_item(Deserialized)), ok = file:write_file( - <<"test/arbundles.js/ans104-map-bundle-erlang.bundle">>, Serialized). \ No newline at end of file + <<"test/arbundles.js/ans104-map-bundle-erlang.bundle">>, Serialized). + +deserialize_ed25519_transaction_test() -> + % ans104-item-ed25519.bin is dataitem 1rTy7gQuK9lJydlKqCEhtGLp2WWG-GOrVo5JdiCmaxs + {ok, Serialized} = file:read_file(<<"test/arbundles.js/ans104-item-ed25519.bin">>), + Deserialized = deserialize(Serialized), + ?assertEqual([{<<"Content-Type">>,<<"image/png">>}], Deserialized#tx.tags), + ?assertEqual(<<"ZbExyvGrJKOJTJcHMtKzoOZVCQBkjZ+5">>, Deserialized#tx.anchor), + ?assertEqual(<<"ejhYD9Cw9VCsVik6yGLoclo3CLRvAITHTZamLY_6ro4">>, + hb_util:human_id(ar_wallet:to_address(Deserialized#tx.owner, Deserialized#tx.signature_type))), + ?assert(verify_item(Deserialized)). + +deserialize_solana_transaction_test() -> + % ans104-item-ed25519.bin is dataitem hXKqH_9rkYZ7LwvVps81uKNZd_i36WZjlp4Wnc5BkiE + {ok, Serialized} = file:read_file(<<"test/arbundles.js/ans104-item-solana.bin">>), + Deserialized = deserialize(Serialized), + ?assertEqual([], Deserialized#tx.tags), + ?assertEqual(<<"e/GCI2gwfkcyXG6Q3n3CVuA0zT4EmSSf">>, Deserialized#tx.anchor), + ?assertEqual(<<"GGuACHp2FbtB4wwT5TmPCU6W5FGa3wB1vqno4gsKsxHz">>, + hb_util:human_id(ar_wallet:to_address(Deserialized#tx.owner, Deserialized#tx.signature_type))), + ?assert(verify_item(Deserialized)). + +deserialize_ethereum_transaction_test() -> + % ans104-item-ethereum.bin is dataitem te5MPrOxPqXrVygIQgzp4ZgImLN8CW-qPaI_olhlWyx + {ok, Serialized} = file:read_file(<<"test/arbundles.js/ans104-item-ethereum.bin">>), + Deserialized = deserialize(Serialized), + ?assertEqual(ethereum, Deserialized#tx.signature_type), + ExpectedTags = [ + {<<"Content-Type">>, <<"application/json">>}, + {<<"App-Name">>, <<"Rodeo">>}, + {<<"Token-Contract">>, <<"0xB6e822C6D5E0dEC983d76F28E56616057f88380f">>}, + {<<"Token-Id">>, <<"328">>}, + {<<"Chain-Id">>, <<"8453">>} + ], + ?assertEqual(ExpectedTags, Deserialized#tx.tags), + ?assertEqual(<<"zZHoADuo74sWmhEF0V-D4sxa4rj3rUR5_r7tSpWSmtY">>, hb_util:encode(Deserialized#tx.anchor)), + ?assertEqual(<<"0x626334b6ef6D3e8537E9f8d97d65f59832219315">>, + hb_util:human_id(ar_wallet:to_address(Deserialized#tx.owner, Deserialized#tx.signature_type))), + ?assert(verify_item(Deserialized)). diff --git a/src/ar_fork.erl b/src/ar_fork.erl new file mode 100644 index 000000000..0209cae57 --- /dev/null +++ b/src/ar_fork.erl @@ -0,0 +1,173 @@ +%%% +%%% @doc The module defines Arweave hard forks' heights. +%%% + +-module(ar_fork). + +-export([height_1_6/0, height_1_7/0, height_1_8/0, height_1_9/0, height_2_0/0, height_2_2/0, + height_2_3/0, height_2_4/0, height_2_5/0, height_2_6/0, height_2_6_8/0, + height_2_7/0, height_2_7_1/0, height_2_7_2/0, + height_2_8/0, height_2_9/0]). + +-ifdef(FORKS_RESET). +height_1_6() -> + 0. +-else. +height_1_6() -> + 95000. +-endif. + +-ifdef(FORKS_RESET). +height_1_7() -> + 0. +-else. +height_1_7() -> + 235200. % Targeting 2019-07-08 UTC +-endif. + +-ifdef(FORKS_RESET). +height_1_8() -> + 0. +-else. +height_1_8() -> + 269510. % Targeting 2019-08-29 UTC +-endif. + +-ifdef(FORKS_RESET). +height_1_9() -> + 0. +-else. +height_1_9() -> + 315700. % Targeting 2019-11-04 UTC +-endif. + +-ifdef(FORKS_RESET). +height_2_0() -> + 0. +-else. +height_2_0() -> + 422250. % Targeting 2020-04-09 10:00 UTC +-endif. + +-ifdef(FORKS_RESET). +height_2_2() -> + 0. +-else. +height_2_2() -> + 552180. % Targeting 2020-10-21 13:00 UTC +-endif. + +-ifdef(FORKS_RESET). +height_2_3() -> + 0. +-else. +height_2_3() -> + 591140. % Targeting 2020-12-21 11:00 UTC +-endif. + +-ifdef(FORKS_RESET). +height_2_4() -> + 0. +-else. +height_2_4() -> + 633720. % Targeting 2021-02-24 11:50 UTC +-endif. + +-ifdef(FORKS_RESET). +height_2_5() -> + 0. +-else. +height_2_5() -> + 812970. +-endif. + +-ifdef(FORK_2_6_HEIGHT). +height_2_6() -> + ?FORK_2_6_HEIGHT. +-else. + -ifdef(FORKS_RESET). + height_2_6() -> + 0. + -else. + height_2_6() -> + 1132210. % Targeting 2023-03-06 14:00 UTC + -endif. +-endif. + +-ifdef(FORK_2_6_8_HEIGHT). +height_2_6_8() -> + ?FORK_2_6_8_HEIGHT. +-else. + -ifdef(FORKS_RESET). + height_2_6_8() -> + 0. + -else. + height_2_6_8() -> + 1189560. % Targeting 2023-05-30 16:00 UTC + -endif. +-endif. + +-ifdef(FORK_2_7_HEIGHT). +height_2_7() -> + ?FORK_2_7_HEIGHT. +-else. + -ifdef(FORKS_RESET). + height_2_7() -> + 0. + -else. + height_2_7() -> + 1275480. % Targeting 2023-10-04 14:00 UTC + -endif. +-endif. + +-ifdef(FORK_2_7_1_HEIGHT). +height_2_7_1() -> + ?FORK_2_7_1_HEIGHT. +-else. + -ifdef(FORKS_RESET). + height_2_7_1() -> + 0. + -else. + height_2_7_1() -> + 1316410. % Targeting 2023-12-05 14:00 UTC + -endif. +-endif. + +-ifdef(FORK_2_7_2_HEIGHT). +height_2_7_2() -> + ?FORK_2_7_2_HEIGHT. +-else. + -ifdef(FORKS_RESET). + height_2_7_2() -> + 0. + -else. + height_2_7_2() -> + 1391330. % Targeting 2024-03-26 14:00 UTC + -endif. +-endif. + +-ifdef(FORK_2_8_HEIGHT). +height_2_8() -> + ?FORK_2_8_HEIGHT. +-else. + -ifdef(FORKS_RESET). + height_2_8() -> + 0. + -else. + height_2_8() -> + 1547120. % Targeting 2024-11-13 14:00 UTC + -endif. +-endif. + +-ifdef(FORK_2_9_HEIGHT). +height_2_9() -> + ?FORK_2_9_HEIGHT. +-else. + -ifdef(FORKS_RESET). + height_2_9() -> + 0. + -else. + height_2_9() -> + 1602350. % Targeting 2025-02-03 14:00 UTC + -endif. +-endif. diff --git a/src/ar_format.erl b/src/ar_format.erl index 8ee18d8bb..d23cc7aa1 100644 --- a/src/ar_format.erl +++ b/src/ar_format.erl @@ -18,7 +18,7 @@ format(TX, Indent, Opts) when is_list(TX); is_map(TX) -> format(TX, Indent, Opts) when is_record(TX, tx) -> MustVerify = hb_opts:get(debug_ids, true, Opts), Valid = - if MustVerify -> verify(TX); + if MustVerify -> verify(dev_arweave_common:normalize(TX)); true -> true end, UnsignedID = @@ -63,9 +63,12 @@ format(TX, Indent, Opts) when is_record(TX, tx) -> true -> format_line("Signer: ~s", [hb_util:safe_encode(ar_bundles:signer(TX))], - Indent + 1), + Indent + 1) ++ format_line("Signature: ~s", - [hb_format:binary(TX#tx.signature)], + [hb_format:binary(TX#tx.signature, Opts)], + Indent + 1) ++ + format_line("Signature Type: ~p", + [TX#tx.signature_type], Indent + 1); false -> [] end ++ @@ -125,6 +128,7 @@ format_fields(TX, Indent) -> format_anchor(TX, Indent) ++ format_quantity(TX, Indent) ++ format_reward(TX, Indent) ++ + format_data_size(TX, Indent) ++ format_data_root(TX, Indent). format_format(TX, Indent) -> @@ -152,6 +156,9 @@ format_quantity(TX, Indent) -> format_reward(TX, Indent) -> format_line("Reward: ~p", [TX#tx.reward], Indent + 1). +format_data_size(TX, Indent) -> + format_line("Data Size: ~p", [TX#tx.data_size], Indent + 1). + format_data_root(TX, Indent) -> format_line("Data Root: ~s", [ case TX#tx.data_root of diff --git a/src/ar_poa.erl b/src/ar_poa.erl new file mode 100644 index 000000000..8a88d8e90 --- /dev/null +++ b/src/ar_poa.erl @@ -0,0 +1,18 @@ +%%% @doc This module implements all mechanisms required to validate a proof of access +%%% for a chunk of data received from the network. +-module(ar_poa). + +-export([get_padded_offset/1, get_padded_offset/2]). + +-include("include/ar.hrl"). + +%% @doc Return the smallest multiple of 256 KiB >= Offset +%% counting from ar_block:strict_data_split_threshold(). +get_padded_offset(Offset) -> + get_padded_offset(Offset, ar_block:strict_data_split_threshold()). + +%% @doc Return the smallest multiple of 256 KiB >= Offset +%% counting from StrictDataSplitThreshold. +get_padded_offset(Offset, StrictDataSplitThreshold) -> + Diff = Offset - StrictDataSplitThreshold, + StrictDataSplitThreshold + ((Diff - 1) div (?DATA_CHUNK_SIZE) + 1) * (?DATA_CHUNK_SIZE). diff --git a/src/ar_rate_limiter.erl b/src/ar_rate_limiter.erl deleted file mode 100644 index 81781e81c..000000000 --- a/src/ar_rate_limiter.erl +++ /dev/null @@ -1,139 +0,0 @@ --module(ar_rate_limiter). --behaviour(gen_server). --export([start_link/1, throttle/3, off/0, on/0]). --export([init/1, handle_cast/2, handle_call/3, handle_info/2, terminate/2]). --include("include/hb.hrl"). --record(state, { - traces, - off, - opts -}). - -%%%=================================================================== -%%% Public interface. -%%%=================================================================== - -start_link(Opts) -> - gen_server:start_link({local, ?MODULE}, ?MODULE, Opts, []). - -%% @doc Hang until it is safe to make another request to the given Peer with the -%% given Path. The limits are configured in include/ar_blacklist_middleware.hrl. -throttle(Peer, Path, Opts) -> - case lists:member(Peer, hb_opts:get(throttle_exempt_peers, [], Opts)) of - true -> - ok; - false -> - throttle2(Peer, Path, Opts) - end. - -throttle2(Peer, Path, Opts) -> - Routes = hb_opts:get(throttle_exempt_paths, [], Opts), - IsExempt = - lists:any(fun(Route) -> hb_path:regex_matches(Path, Route) end, Routes), - case IsExempt of - true -> ok; - false -> - Res = catch gen_server:call(?MODULE, {throttle, Peer, Path}, infinity), - case Res of - {'EXIT', {noproc, {gen_server, call, _}}} -> - ok; - {'EXIT', Reason} -> - exit(Reason); - _ -> - ok - end - end. - -%% @doc Turn rate limiting off. -off() -> - gen_server:cast(?MODULE, turn_off). - -%% @doc Turn rate limiting on. -on() -> - gen_server:cast(?MODULE, turn_on). - -%%%=================================================================== -%%% Generic server callbacks. -%%%=================================================================== - -init(Opts) -> - process_flag(trap_exit, true), - {ok, #state{ traces = #{}, off = false, opts = Opts }}. - -handle_call({throttle, _Peer, _Path}, _From, #state{ off = true } = State) -> - {reply, ok, State}; -handle_call({throttle, Peer, Path}, From, State) -> - gen_server:cast(?MODULE, {throttle, Peer, Path, From}), - {noreply, State}; - -handle_call(Request, _From, State) -> - ?event(warning, {unhandled_call, {module, ?MODULE}, {request, Request}}), - {reply, ok, State}. - -handle_cast({throttle, Peer, Path, From}, State) -> - #state{ traces = Traces, opts = Opts } = State, - {Type, Limit} = hb_opts:get(throttle_rpm_by_path, Path, Opts), - Now = os:system_time(millisecond), - case hb_maps:get({Peer, Type}, Traces, not_found, Opts) of - not_found -> - gen_server:reply(From, ok), - Traces2 = hb_maps:put({Peer, Type}, {1, queue:from_list([Now])}, Traces, Opts), - {noreply, State#state{ traces = Traces2 }}; - {N, Trace} -> - {N2, Trace2} = cut_trace(N, queue:in(Now, Trace), Now, Opts), - %% The macro specifies requests per minute while the throttling window - %% is 30 seconds. - HalfLimit = Limit div 2, - %% Try to approach but not hit the limit. - case N2 + 1 > max(1, HalfLimit * 80 / 100) of - true -> - ?event( - {approaching_peer_rpm_limit, - {peer, Peer}, - {path, Path}, - {minute_limit, Limit}, - {caller, From} - } - ), - erlang:send_after( - 1000, - ?MODULE, - {'$gen_cast', {throttle, Peer, Path, From}} - ), - {noreply, State}; - false -> - gen_server:reply(From, ok), - Traces2 = hb_maps:put({Peer, Type}, {N2 + 1, Trace2}, Traces, Opts), - {noreply, State#state{ traces = Traces2 }} - end - end; - -handle_cast(turn_off, State) -> - {noreply, State#state{ off = true }}; - -handle_cast(turn_on, State) -> - {noreply, State#state{ off = false }}; - -handle_cast(Cast, State) -> - ?event(warning, {unhandled_cast, {module, ?MODULE}, {cast, Cast}}), - {noreply, State}. - -handle_info(Message, State) -> - ?event(warning, {unhandled_info, {module, ?MODULE}, {message, Message}}), - {noreply, State}. - -terminate(_Reason, _State) -> - ok. - -%%%=================================================================== -%%% Private functions. -%%%=================================================================== - -cut_trace(N, Trace, Now, Opts) -> - {{value, Timestamp}, Trace2} = queue:out(Trace), - case Timestamp < Now - hb_opts:get(throttle_period, 30000, Opts) of - true -> - cut_trace(N - 1, Trace2, Now, Opts); - false -> - {N, Trace} - end. diff --git a/src/ar_tx.erl b/src/ar_tx.erl index 205bb85a8..7f5d32f59 100644 --- a/src/ar_tx.erl +++ b/src/ar_tx.erl @@ -2,14 +2,20 @@ -module(ar_tx). -export([sign/2, verify/1, verify_tx_id/2]). --export([id/1, id/2, get_owner_address/1, data_root/1]). +-export([id/1, id/2, get_owner_address/1, data_root/1, data_root/2]). -export([generate_signature_data_segment/1, generate_chunk_id/1]). -export([json_struct_to_tx/1, tx_to_json_struct/1]). --export([chunk_binary/2, chunks_to_size_tagged_chunks/1, sized_chunks_to_sized_chunk_ids/1]). +-export([generate_chunk_tree/1, generate_chunk_tree/2]). +-export([chunk_binary/2, chunk_binary/3, chunking_mode/1]). +-export([chunks_to_size_tagged_chunks/1, sized_chunks_to_sized_chunk_ids/1]). +-export([get_weave_size_increase/2]). -include("include/hb.hrl"). -include_lib("eunit/include/eunit.hrl"). +%% Minimum chunk size targeted by the arweave-js chuking algorithm. +-define(MIN_CHUNK_SIZE, (32 * 1024)). + %%%=================================================================== %%% Public interface. %%%=================================================================== @@ -42,7 +48,6 @@ sign_v1(TX, PrivKey, PubKey = {_, Owner}) -> %% %% Checks that are missing: %% - format 2 unsupported pre-2.0 -%% - valid ECDSA signature post-2.9 %% - verify_denomination %% - is_tx_fee_sufficient %% - tx_field_size_limit_v1/v2 @@ -54,7 +59,7 @@ verify(TX) -> From = ar_wallet:to_address(TX#tx.owner, TX#tx.signature_type), Checks = [ {"tx_format_not_supported", TX#tx.format == 1 orelse TX#tx.format == 2}, - {"invalid_signature_type", {?RSA_SIGN_ALG, 65537} == TX#tx.signature_type}, + {"tx_signature_type_not_supported", verify_signature_type(TX)}, {"quantity_negative", TX#tx.quantity >= 0}, {"same_owner_as_target", (From =/= TX#tx.target)}, {"tx_id_not_valid", verify_hash(TX)}, @@ -88,7 +93,10 @@ get_owner_address(#tx{ owner_address = OwnerAddress }) -> OwnerAddress. data_root(Bin) -> - Chunks = chunk_binary(?DATA_CHUNK_SIZE, Bin), + data_root(arweavejs, Bin). + +data_root(Mode, Bin) -> + Chunks = chunk_binary(Mode, ?DATA_CHUNK_SIZE, Bin), SizeTaggedChunks = chunks_to_size_tagged_chunks(Chunks), SizeTaggedChunkIDs = sized_chunks_to_sized_chunk_ids(SizeTaggedChunks), {Root, _} = ar_merkle:generate_tree(SizeTaggedChunkIDs), @@ -98,6 +106,28 @@ data_root(Bin) -> %%% Private functions. %%%=================================================================== +%% @doc Verify the transaction's signature type is supported for the given format. +%% Format 1 transactions only support RSA with 65537. +%% Format 2 transactions support RSA with 65537 and ECDSA with secp256k1. +verify_signature_type(#tx{ format = 1 } = TX) -> + case TX#tx.signature_type of + {?RSA_SIGN_ALG, 65537} -> + true; + _ -> + false + end; +verify_signature_type(#tx{ format = 2 } = TX) -> + case TX#tx.signature_type of + {?RSA_SIGN_ALG, 65537} -> + true; + {?ECDSA_SIGN_ALG, secp256k1} -> + true; + _ -> + false + end; +verify_signature_type(_) -> + false. + %% @doc Verify the transaction's signature. verify_signature(TX = #tx{ signature_type = SigType }) -> case generate_signature_data_segment(TX) of @@ -109,7 +139,12 @@ verify_signature(TX = #tx{ signature_type = SigType }) -> %% @doc Generate the data segment to be signed for a given TX. generate_signature_data_segment(#tx{ format = 2 } = TX) -> - signature_data_segment_v2(TX); + case TX#tx.signature_type of + {?ECDSA_SIGN_ALG, secp256k1} -> + signature_data_segment_v2_no_public_key(TX); + {?RSA_SIGN_ALG, 65537} -> + signature_data_segment_v2(TX) + end; generate_signature_data_segment(#tx{ format = 1 } = TX) -> signature_data_segment_v1(TX); generate_signature_data_segment(_) -> @@ -137,6 +172,28 @@ signature_data_segment_v2(TX) -> end, ar_deep_hash:hash(List2). +%% @doc Generate the data segment to be signed for a given v2 TX with ECDSA. +%% ECDSA signatures do not include the owner public key in the signature data segment. +signature_data_segment_v2_no_public_key(TX) -> + List = [ + << (integer_to_binary(TX#tx.format))/binary >>, + << (TX#tx.target)/binary >>, + << (list_to_binary(integer_to_list(TX#tx.quantity)))/binary >>, + << (list_to_binary(integer_to_list(TX#tx.reward)))/binary >>, + << (TX#tx.anchor)/binary >>, + tags_to_list(TX#tx.tags), + << (integer_to_binary(TX#tx.data_size))/binary >>, + << (TX#tx.data_root)/binary >> + ], + List2 = + case TX#tx.denomination > 0 of + true -> + [<< (integer_to_binary(TX#tx.denomination))/binary >> | List]; + false -> + List + end, + ar_deep_hash:hash(List2). + %% @doc Generate the data segment to be signed for a given v1 TX. signature_data_segment_v1(TX) -> case TX#tx.denomination > 0 of @@ -206,13 +263,13 @@ verify_hash(#tx{ id = ID } = TX) -> ID == dev_arweave_common:generate_id(TX, signed). %% @doc On Arweave we don't have data on format=2 transactions, and so -%% traditionally just verify the transcation based on data_rot and data_size. +%% traditionally just verify the transaction based on data_root and data_size. %% However in HyperBEAM we will often populate the data field. Adding this %% check to verify that `data_root`, `data_size`, and `data` are consistent. verify_v2_data(#tx{ format = 2, data = ?DEFAULT_DATA }) -> true; -verify_v2_data(#tx{ - format = 2, data_root = DataRoot, +verify_v2_data(#tx{ + format = 2, data_root = DataRoot, data_size = DataSize, data = Data }) -> (DataSize == byte_size(Data)) andalso (DataRoot == data_root(Data)); verify_v2_data(_) -> @@ -262,13 +319,11 @@ json_struct_to_tx(TXStruct) -> Owner = hb_util:decode(hb_util:find_value(<<"owner">>, TXStruct)), Sig = hb_util:decode(hb_util:find_value(<<"signature">>, TXStruct)), SigType = set_sig_type_from_pub_key(Owner), - %% Only RSA supported for now - ?RSA_KEY_TYPE = SigType, TX = #tx{ format = Format, id = TXID, anchor = hb_util:decode(hb_util:find_value(<<"last_tx">>, TXStruct)), - owner = hb_util:decode(hb_util:find_value(<<"owner">>, TXStruct)), + owner = Owner, tags = [{hb_util:decode(Name), hb_util:decode(Value)} %% Only the elements matching this pattern are included in the list. || #{<<"name">> := Name, <<"value">> := Value} <- Tags], @@ -286,11 +341,22 @@ json_struct_to_tx(TXStruct) -> end, denomination = Denomination }, - TX#tx{ owner_address = get_owner_address(TX) }. + %% For ECDSA transactions, recover the owner from the signature + case SigType of + ?ECDSA_KEY_TYPE -> + DataSegment = generate_signature_data_segment(TX), + Owner2 = ar_wallet:recover_key(DataSegment, Sig, SigType), + TX#tx{ owner = Owner2, owner_address = ar_wallet:to_address(Owner2, SigType) }; + ?RSA_KEY_TYPE -> + TX#tx{ owner_address = get_owner_address(TX) } + end. +%% @doc Determine signature type from owner and signature. +%% For ECDSA transactions, the owner is empty in JSON (recovered from signature). set_sig_type_from_pub_key(Owner) -> case Owner of <<>> -> + %% Empty owner means ECDSA (public key is recovered from signature) ?ECDSA_KEY_TYPE; _ -> ?RSA_KEY_TYPE @@ -308,13 +374,10 @@ tx_to_json_struct( data = Data, reward = Reward, signature = Sig, - signature_type = SigType, data_size = DataSize, data_root = DataRoot, denomination = Denomination }) -> - %% Only RSA supported for now - ?RSA_KEY_TYPE = SigType, Fields = [ {<<"format">>, case Format of @@ -359,10 +422,11 @@ tx_to_json_struct( %% Used to compute the Merkle roots of v1 transactions' data and to compute %% Merkle proofs for v2 transactions when their data is uploaded without proofs. generate_chunk_tree(TX) -> + Mode = chunking_mode(TX#tx.format), generate_chunk_tree(TX, sized_chunks_to_sized_chunk_ids( chunks_to_size_tagged_chunks( - chunk_binary(?DATA_CHUNK_SIZE, TX#tx.data) + chunk_binary(Mode, ?DATA_CHUNK_SIZE, TX#tx.data) ) ) ). @@ -378,11 +442,43 @@ generate_chunk_id(Chunk) -> %% @doc Split the binary into chunks. Used for computing the Merkle roots of %% v1 transactions' data and computing Merkle proofs for v2 transactions' when %% their data is uploaded without proofs. -chunk_binary(ChunkSize, Bin) when byte_size(Bin) < ChunkSize -> - [Bin]; chunk_binary(ChunkSize, Bin) -> + chunk_binary(arweavejs, ChunkSize, Bin). + +chunking_mode(1) -> + legacy; +chunking_mode(2) -> + arweavejs; +chunking_mode(_) -> + legacy. + +%% @doc Split the binary into chunks using the requested mode. +%% legacy: fixed-size chunking with a smaller final chunk. +%% arweavejs: size-balanced chunking where the last two chunks may be small. +%% This is the chunking logic used by the arweave-js library. +%% Adapted from: https://github.com/ArweaveTeam/arweave-js/blob/39d8ef2799a2c555e6f9b0cc6adabd7cbc411bc8/src/common/lib/merkle.ts#L43 +chunk_binary(legacy, ChunkSize, Bin) when byte_size(Bin) < ChunkSize -> + [Bin]; +chunk_binary(legacy, ChunkSize, Bin) -> <> = Bin, - [ChunkBin | chunk_binary(ChunkSize, Rest)]. + [ChunkBin | chunk_binary(legacy, ChunkSize, Rest)]; +chunk_binary(arweavejs, ChunkSize, Bin) -> + chunk_binary_arweavejs(arweavejs, ChunkSize, Bin, []). + +chunk_binary_arweavejs(arweavejs, ChunkSize, Bin, Acc) + when byte_size(Bin) >= ChunkSize -> + BinSize = byte_size(Bin), + NextChunkSize = BinSize - ChunkSize, + ChunkSize2 = + case NextChunkSize > 0 andalso NextChunkSize < ?MIN_CHUNK_SIZE of + true -> + (BinSize + 1) div 2; + false -> ChunkSize + end, + <> = Bin, + chunk_binary_arweavejs(arweavejs, ChunkSize, Rest, [Chunk | Acc]); +chunk_binary_arweavejs(arweavejs, _ChunkSize, Bin, Acc) -> + lists:reverse([Bin | Acc]). %% @doc Assign a byte offset to every chunk in the list. chunks_to_size_tagged_chunks(Chunks) -> @@ -405,6 +501,22 @@ chunks_to_size_tagged_chunks(Chunks) -> sized_chunks_to_sized_chunk_ids(SizedChunks) -> [{generate_chunk_id(Chunk), Size} || {Chunk, Size} <- SizedChunks]. +%% @doc Return the number of bytes the weave is increased by when the given transaction +%% is included. +get_weave_size_increase(#tx{ data_size = DataSize }, Height) -> + get_weave_size_increase(DataSize, Height); + +get_weave_size_increase(0, _Height) -> + 0; +get_weave_size_increase(DataSize, Height) -> + case Height >= ar_fork:height_2_5() of + true -> + %% The smallest multiple of ?DATA_CHUNK_SIZE larger than or equal to data_size. + ar_poa:get_padded_offset(DataSize, 0); + false -> + DataSize + end. + %%%=================================================================== %%% Tests. %%%=================================================================== @@ -420,6 +532,31 @@ new(Data, Reward) -> data_size = byte_size(Data) }. +chunk_binary_legacy_test() -> + ChunkSize = 10, + Data = binary:copy(<<"a">>, 25), + ChunksLegacy = chunk_binary(legacy, ChunkSize, Data), + ?assertEqual([10, 10, 5], [byte_size(Chunk) || Chunk <- ChunksLegacy]), + ?assertEqual(ChunksLegacy, chunk_binary(legacy, ChunkSize, Data)). + +chunk_binary_arweavejs_balanced_test() -> + ChunkSize = ?DATA_CHUNK_SIZE, + MinChunkSize = ?MIN_CHUNK_SIZE, + DataSize = ChunkSize + MinChunkSize - 1, + Data = binary:copy(<<"b">>, DataSize), + Chunks = chunk_binary(arweavejs, ChunkSize, Data), + ExpectedFirst = (DataSize + 1) div 2, + ExpectedSecond = DataSize - ExpectedFirst, + ?assertEqual([ExpectedFirst, ExpectedSecond], [byte_size(Chunk) || Chunk <- Chunks]). + +chunk_binary_arweavejs_standard_test() -> + ChunkSize = ?DATA_CHUNK_SIZE, + MinChunkSize = ?MIN_CHUNK_SIZE, + DataSize = ChunkSize + MinChunkSize, + Data = binary:copy(<<"c">>, DataSize), + Chunks = chunk_binary(arweavejs, ChunkSize, Data), + ?assertEqual([ChunkSize, MinChunkSize], [byte_size(Chunk) || Chunk <- Chunks]). + sign_tx_test_() -> {timeout, 30, fun test_sign_tx/0}. @@ -681,8 +818,7 @@ json_struct_to_tx_failure_test() -> {"data_root_invalid_b64", BaseStruct#{ <<"data_root">> => InvalidB64 }, badarg}, {"tag_name_invalid_b64", BaseStruct#{ <<"tags">> => BadTagName }, badarg}, {"tag_value_invalid_b64", BaseStruct#{ <<"tags">> => BadTagValue }, badarg}, - {"target_invalid_b64", BaseStruct#{ <<"target">> => InvalidB64 }, badarg}, - {"invalid_signature_type", BaseStruct#{ <<"owner">> => <<>> }, {badmatch, {ecdsa,secp256k1}}} + {"target_invalid_b64", BaseStruct#{ <<"target">> => InvalidB64 }, badarg} ], lists:foreach( @@ -819,8 +955,7 @@ tx_to_json_struct_failure_test() -> {"denomination_not_integer_when_positive", BaseTX#tx{denomination = <<"5">>}, badarg}, {"tag_name_not_binary", BaseTX#tx{tags = [{not_binary, <<"val">>}]}, badarg}, {"tag_value_not_binary", BaseTX#tx{tags = [{<<"key">>, not_binary}]}, badarg}, - {"tags_not_list", BaseTX#tx{tags = #{}}, {case_clause, #{}}}, - {"invalid_signature_type", BaseTX#tx{signature_type = ?ECDSA_KEY_TYPE}, {badmatch, {ecdsa,secp256k1}}} + {"tags_not_list", BaseTX#tx{tags = #{}}, {case_clause, #{}}} ], lists:foreach( diff --git a/src/ar_wallet.erl b/src/ar_wallet.erl index 542a931de..116809db9 100644 --- a/src/ar_wallet.erl +++ b/src/ar_wallet.erl @@ -1,30 +1,51 @@ -module(ar_wallet). -export([sign/2, sign/3, hmac/1, hmac/2, verify/3, verify/4]). --export([to_pubkey/1, to_pubkey/2, to_address/1, to_address/2, new/0, new/1]). +-export([to_pubkey/1, to_pubkey/2, to_address/1, to_address/2, new/0, new_ecdsa/0, new/1]). -export([new_keyfile/2, load_keyfile/1, load_keyfile/2, load_key/1, load_key/2]). -export([to_json/1, from_json/1, from_json/2]). +-export([recover_key/3]). +-export([compress_ecdsa_pubkey/1]). -include("include/ar.hrl"). -include_lib("public_key/include/public_key.hrl"). %%% @doc Utilities for manipulating wallets. -define(WALLET_DIR, "."). --define(WALLET_POOL_NAME, ar_wallet_pool). -define(WALLET_POOL_TARGET, 6). %%% Public interface. new() -> new({rsa, 65537}). -new(KeyType = {rsa, 65537}) -> +new(KeyType) when KeyType =:= {rsa, 65537} orelse KeyType =:= {eddsa, ed25519} orelse KeyType =:= ethereum orelse KeyType =:= solana -> + case request_pooled_wallet(KeyType) of + {ok, Wallet} -> Wallet; + timeout -> generate_wallet(KeyType) + end; +new(KeyType = {?ECDSA_SIGN_ALG, secp256k1}) -> case request_pooled_wallet(KeyType) of {ok, Wallet} -> Wallet; timeout -> generate_wallet(KeyType) end. +new_ecdsa() -> + new({?ECDSA_SIGN_ALG, secp256k1}). + generate_wallet(KeyType = {KeyAlg, PublicExpnt}) when KeyType =:= {rsa, 65537} -> {[_, Pub], [_, Pub, Priv|_]} = {[_, Pub], [_, Pub, Priv|_]} = crypto:generate_key(KeyAlg, {4096, PublicExpnt}), + {{KeyType, Priv, Pub}, {KeyType, Pub}}; +generate_wallet(KeyType = {KeyAlg, KeyCrv}) when KeyAlg =:= ?ECDSA_SIGN_ALG andalso KeyCrv =:= secp256k1 -> + {OrigPub, Priv} = crypto:generate_key(ecdh, KeyCrv), + Pub = compress_ecdsa_pubkey(OrigPub), + {{KeyType, Priv, Pub}, {KeyType, Pub}}; +generate_wallet(ethereum) -> + {Pub, Priv} = crypto:generate_key(ecdh, secp256k1), + {{ethereum, Priv, Pub}, {ethereum, Pub}}; +generate_wallet(solana) -> + generate_wallet({eddsa, ed25519}); +generate_wallet(KeyType = {KeyAlg, Curve}) when KeyType =:= {?EDDSA_SIGN_ALG, ed25519} -> + {Pub, Priv} = crypto:generate_key(KeyAlg, Curve), {{KeyType, Priv, Pub}, {KeyType, Pub}}. request_pooled_wallet(KeyType) -> @@ -38,12 +59,13 @@ request_pooled_wallet(KeyType) -> end. ensure_wallet_pool(KeyType) -> - case whereis(?WALLET_POOL_NAME) of + PoolName = wallet_pool_name(KeyType), + case whereis(PoolName) of undefined -> Pid = spawn(fun() -> wallet_pool_loop(KeyType, queue:new(), queue:new(), 0) end), - case catch register(?WALLET_POOL_NAME, Pid) of + case catch register(PoolName, Pid) of true -> Pid; - _ -> whereis(?WALLET_POOL_NAME) + _ -> whereis(PoolName) end; Pid -> Pid @@ -83,13 +105,23 @@ maybe_spawn_wallet_workers(KeyType, Wallets, Waiters, InFlight) -> ), {Wallets, InFlight + Needed}. +wallet_pool_name({rsa, 65537}) -> + ar_wallet_pool_rsa_65537; +wallet_pool_name({?EDDSA_SIGN_ALG, ed25519}) -> + ar_wallet_pool_ed25519; +wallet_pool_name({?ECDSA_SIGN_ALG, secp256k1}) -> + ar_wallet_pool_ecdsa_secp256k1; +wallet_pool_name(solana) -> + ar_wallet_pool_solana; +wallet_pool_name(ethereum) -> + ar_wallet_pool_ethereum. %% @doc Sign some data with a private key. sign(Key, Data) -> sign(Key, Data, sha256). %% @doc sign some data, hashed using the provided DigestType. -%% TODO: support signing for other key types +%% RSA and ECDSA signatures use wallet-level wrappers. sign({{rsa, PublicExpnt}, Priv, Pub}, Data, DigestType) when PublicExpnt =:= 65537 -> rsa_pss:sign( Data, @@ -100,6 +132,13 @@ sign({{rsa, PublicExpnt}, Priv, Pub}, Data, DigestType) when PublicExpnt =:= 655 privateExponent = binary:decode_unsigned(Priv) } ); +sign({{KeyAlg, KeyCrv}, Priv, _Pub}, Data, _DigestType) + when KeyAlg =:= ?ECDSA_SIGN_ALG andalso KeyCrv =:= secp256k1 -> + secp256k1_nif:sign(Data, Priv); +sign({KeyType = {KeyAlg, Curve}, Priv, _Pub}, Data, _DigestType) when KeyType =:= {?EDDSA_SIGN_ALG, ed25519} -> + crypto:sign(KeyAlg, none, Data, [Priv, Curve]); +sign({ethereum, Priv, Pub}, Data, _DigestType) -> + secp256k1_nif:sign(Data, Priv, ethereum); sign({{KeyType, Priv, Pub}, {KeyType, Pub}}, Data, DigestType) -> sign({KeyType, Priv, Pub}, Data, DigestType). @@ -121,7 +160,24 @@ verify({{rsa, PublicExpnt}, Pub}, Data, Sig, DigestType) when PublicExpnt =:= 65 publicExponent = PublicExpnt, modulus = binary:decode_unsigned(Pub) } - ). + ); +%% NOTE: We will not write pubkey for ECDSA signature. So don't use verify function +%% for ECDSA directly, use ecrecover pattern. This function will return always false +%% if called with no Pub. +verify({{KeyAlg, KeyCrv}, Pub}, Data, Sig, _DigestType) + when KeyAlg =:= ?ECDSA_SIGN_ALG andalso KeyCrv =:= secp256k1 -> + {Pass, PubExtracted} = secp256k1_nif:ecrecover(Data, Sig), + Pass andalso PubExtracted =:= Pub; +verify({{KeyAlg, Curve}, Pub}, Data, Sig, _DigestType) when + byte_size(Pub) == 32 andalso byte_size(Sig) == 64 andalso Curve =:= ed25519 andalso KeyAlg =:= ?EDDSA_SIGN_ALG -> + crypto:verify(eddsa, none, Data, Sig, [Pub, Curve]); +verify({ethereum, Pub}, Data, Sig, _DigestType) -> + {Pass, PubExtracted} = secp256k1_nif:ecrecover(Data, Sig, ethereum), + Pass andalso PubExtracted =:= compress_ecdsa_pubkey(Pub); +verify({solana, Pub}, Data, Sig, _DigestType) when + byte_size(Pub) == 32 andalso byte_size(Sig) == 64 -> + HexData = hb_util:to_hex(Data), + crypto:verify(eddsa, none, HexData, Sig, [Pub, ed25519]). %% @doc Find a public key from a wallet. to_pubkey(Pubkey) -> @@ -144,8 +200,18 @@ to_address({{_, _, PubKey}, {_, PubKey}}, _) -> to_address(PubKey); to_address(PubKey, {rsa, 65537}) -> to_rsa_address(PubKey); -to_address(PubKey, {ecdsa, 256}) -> - to_ecdsa_address(PubKey). +to_address(PubKey, {?ECDSA_SIGN_ALG, secp256k1}) -> + %% For Arweave L1 ECDSA transactions, address is SHA256 hash of public key + %% (same as RSA). The keccak-based Ethereum address is used elsewhere. + hash_address(PubKey); +to_address(PubKey, {?EDDSA_SIGN_ALG, ed25519}) -> + to_eddsa_address(PubKey); +to_address(PubKey, solana) -> + to_solana_address(PubKey); +to_address(PubKey, ethereum) -> + to_ethereum_address(PubKey); +to_address(PubKey, typed_ethereum) -> + to_ethereum_address(PubKey). %% @doc Generate a new wallet public and private key, with a corresponding keyfile. %% The provided key is used as part of the file name. @@ -170,6 +236,11 @@ new_keyfile(KeyType, WalletName) -> {{_, Prv, Pb}, _} = new(KeyType), PrivKey = {KeyType, Prv, Pb}, Ky = to_json(PrivKey), + {Pb, Prv, Ky}; + ethereum -> + {Pb, Prv} = crypto:generate_key(ecdh, secp256k1), + PrivKey = {KeyType, Prv, Pb}, + Ky = to_json(PrivKey), {Pb, Prv, Ky} end, Filename = wallet_filepath(WalletName, Pub, KeyType), @@ -281,6 +352,16 @@ from_json(JsonBinary, Opts) -> end, {{KeyType, Priv, Pub}, {KeyType, Pub}}. +%% @doc Recover the public key from a signature (for ECDSA). +%% For ECDSA transactions, the public key is not included in the transaction, +%% it must be recovered from the signature. +recover_key(_Data, <<>>, ?ECDSA_KEY_TYPE) -> + <<>>; +recover_key(Data, Signature, ?ECDSA_KEY_TYPE) -> + {_Pass, PubKey} = secp256k1_nif:ecrecover(Data, Signature), + %% Note: if Pass = false, then PubKey will be <<>> + PubKey. + %%%=================================================================== %%% Private functions. %%%=================================================================== @@ -291,9 +372,14 @@ to_rsa_address(PubKey) -> hash_address(PubKey) -> crypto:hash(sha256, PubKey). -to_ecdsa_address(PubKey) -> +to_ethereum_address(PubKey) -> hb_keccak:key_to_ethereum_address(PubKey). +to_eddsa_address(PubKey) -> + hash_address(PubKey). + +to_solana_address(PubKey) -> + hb_util:base58_encode(PubKey). %%%=================================================================== %%% Private functions. %%%=================================================================== @@ -314,4 +400,4 @@ compress_ecdsa_pubkey(<<4:8, PubPoint/binary>>) -> 0 -> <<2:8>>; 1 -> <<3:8>> end, - iolist_to_binary([PubKeyHeader, X]). + iolist_to_binary([PubKeyHeader, X]). \ No newline at end of file diff --git a/src/dev_arweave.erl b/src/dev_arweave.erl index faa30aa4d..0da64cf5f 100644 --- a/src/dev_arweave.erl +++ b/src/dev_arweave.erl @@ -4,17 +4,31 @@ %%% The node(s) that are used to query data may be configured by altering the %%% `/arweave` route in the node's configuration message. -module(dev_arweave). --export([tx/3, chunk/3, block/3, current/3, status/3, price/3, tx_anchor/3]). --export([post_tx/3, post_tx/4, post_binary_ans104/2]). +-export([info/0]). +-export([tx/3, raw/3, chunk/3, block/3, current/3, status/3, price/3, tx_anchor/3]). +-export([post_tx_header/2, post_tx/3, post_tx/4, post_binary_ans104/2, post_json_chunk/2]). +%%% Helper functions +-export([get_chunk/2, bundle_header/2, bundle_header/3]). -include("include/hb.hrl"). -include_lib("eunit/include/eunit.hrl"). +-define(IS_BLOCK_ID(X), (is_binary(X) andalso byte_size(X) == 64)). + +%% @doc Route unknown keys through offset resolution first, then fall back to +%% the message device for direct key access. +info() -> + #{ + excludes => [<<"keys">>, <<"set">>, <<"set-path">>, <<"remove">>], + default => fun dev_arweave_offset:get/4 + }. + %% @doc Proxy the `/info' endpoint from the Arweave node. status(_Base, _Request, Opts) -> request(<<"GET">>, <<"/info">>, Opts). -%% @doc Returns the given transaction, if known to the client node(s), as an -%% AO-Core message. +%% @doc Returns the given transaction as an AO-Core message. By default, this +%% embeds the `/raw` payload. Set `exclude-data` to true to return just the +%% header. tx(Base, Request, Opts) -> case hb_maps:get(<<"method">>, Request, <<"GET">>, Opts) of <<"POST">> -> post_tx(Base, Request, Opts); @@ -41,7 +55,7 @@ post_tx(Base, RawRequest, Opts) -> ), {error, <<"No commitment found on `POST tx` request.">>}; Devices -> - ?event({too_many_commitment_devices, Devices}), + ?event(error, {too_many_commitment_devices, Devices}), {error, too_many_commitment_devices} end. @@ -60,116 +74,638 @@ extract_target(Base, Request, Opts) -> not_found end. -post_tx(Base, Request, Opts, <<"tx@1.0">>) -> - ?event({{request, Request}, {base, Base}}), +post_tx(_Base, Request, Opts, <<"tx@1.0">>) -> TX = hb_message:convert(Request, <<"tx@1.0">>, Opts), - ?event({tx, TX}), - JSON = ar_tx:tx_to_json_struct(TX#tx{ data = <<>> }), - Serialized = hb_json:encode(JSON), - ?event({serialized_tx, {explicit, Serialized}}), - TXResponse = hb_http:post( - hb_opts:get(gateway, not_found, Opts), - #{ - <<"path">> => <<"/tx">>, - <<"body">> => Serialized - }, - Opts - ), - case TXResponse of + Res = post_tx_header(TX, Opts), + case Res of {ok, _} -> - ?event({uploaded_arweave_tx, {request, Request}, {result, TXResponse}}), CacheRes = hb_cache:write(Request, Opts), - ?event( - {cache_uploaded_message, - {msg, Request}, - {status, - case CacheRes of {ok, _} -> ok; - _ -> failed - end - } - } - ), - TXResponse; - Else -> Else - end; -post_tx(Base, Request, Opts, <<"ans104@1.0">>) -> - ?event({{request, Request}, {base, Base}, {opts, Opts}}), + case CacheRes of + {ok, _} -> + ?event(debug_arweave, {tx_cached, {msg, Request}, {status, ok}}); + _ -> + ?event(error, {tx_failed_to_cache, {msg, Request}, CacheRes}) + end; + _ -> + ok + end, + Res; + +post_tx(_Base, Request, Opts, <<"ans104@1.0">>) -> TX = hb_message:convert(Request, <<"ans104@1.0">>, Opts), - ?event({tx, TX}), Serialized = ar_bundles:serialize(TX), - ?event({serialized_tx, Serialized}), - post_binary_ans104(Serialized, Opts). + LogExtra = [ + {codec, <<"ans104@1.0">>}, + {id, {explicit, hb_util:human_id(TX#tx.id)}} + ], + post_binary_ans104(Serialized, LogExtra, Opts). + + +post_tx_header(TX, Opts) -> + JSON = ar_tx:tx_to_json_struct(TX#tx{ data = <<>> }), + Serialized = hb_json:encode(JSON), + LogExtra = [ + {codec, <<"tx@1.0">>}, + {id, {explicit, hb_util:human_id(TX#tx.id)}} + ], + request( + <<"POST">>, + <<"/tx">>, + #{ <<"body">> => Serialized }, + LogExtra, + Opts + ). post_binary_ans104(SerializedTX, Opts) -> - hb_http:post( + LogExtra = [ + {codec, <<"ans104@1.0">>}, + {id, unknown} + ], + post_binary_ans104(SerializedTX, LogExtra, Opts). + +post_binary_ans104(SerializedTX, LogExtra, Opts) -> + Res = hb_http:post( hb_opts:get(bundler_ans104, not_found, Opts), #{ - <<"path">> => <<"/tx">>, + <<"path">> => <<"/~bundler@1.0/tx?codec-device=ans104@1.0">>, <<"content-type">> => <<"application/octet-stream">>, <<"body">> => SerializedTX }, - Opts#{ - http_client => - hb_opts:get(bundler_ans104_http_client, httpc, Opts) - } - ). + Opts + ), + to_message(<<"/tx">>, <<"POST">>, Res, LogExtra, Opts). -%% @doc Get a transaction ID from the Arweave node, as indicated by the `tx` key -%% in the request or base message. If the `data' key is present and set to -%% `false', the data is not retrieved and added to the response. If the `data' -%% key is set to `always', transactions for which the header is available but -%% the data is not will lead to an error. Otherwise, just the header will be -%% returned. +%% @doc Get a transaction from the Arweave node, as indicated by the +%% `tx` key in the request or base message. By default, this embeds the data +%% payload. Set `exclude_data` to true to return just the header. get_tx(Base, Request, Opts) -> - case find_txid(Base, Request, Opts) of + case find_key(<<"tx">>, Base, Request, Opts) of + not_found -> {error, not_found}; + TXID -> + request( + <<"GET">>, + <<"/tx/", TXID/binary>>, + Opts#{ + exclude_data => + hb_util:bool( + find_key( + <<"exclude-data">>, + Base, + Request, + Opts + ) + ) + } + ) + end. + +%% @doc A router for range requests by method. Both `HEAD` and `GET` requests +%% are supported. +raw(Base, Request, Opts) -> + case hb_maps:get(<<"method">>, Request, <<"GET">>, Opts) of + <<"HEAD">> -> head_raw(Base, Request, Opts); + <<"GET">> -> get_raw(Base, Request, Opts) + end. + +%% @doc Handle `HEAD /raw=ID` requests by reading the header chunk and +%% returning the `content-type` of the item, if found. +head_raw(Base, Request, Opts) -> + ?event(debug_raw, {raw, {base, Base}, {request, Request}}), + case find_key(<<"raw">>, Base, Request, Opts) of + not_found -> {error, not_found}; + TXID -> + % Read the data from the local cache. + IndexStore = hb_store_arweave:store_from_opts(Opts), + case hb_store_arweave:read_offset(IndexStore, TXID) of + {ok, + #{ + <<"codec-device">> := CodecDevice, + <<"start-offset">> := StartOffset, + <<"length">> := Length + }} -> + CodecFun = + case CodecDevice of + <<"ans104@1.0">> -> fun head_raw_ans104/4; + <<"tx@1.0">> -> fun head_raw_tx/4; + _ -> throw({invalid_codec_device, CodecDevice}) + end, + CodecFun(TXID, StartOffset, Length, Opts); + not_found -> + ?event( + arweave, + {raw_head_offset_failed, {id, TXID}}, + Opts + ), + {error, not_found} + end + end. + +%% @doc Arweave transaction headers are not part of the Arweave data tree, and +%% thus we do not add their header bytes to the offset in order to read their +%% data. +head_raw_tx(TXID, StartOffset, Length, Opts) -> + {ok, StructuredTXHeader} = + get_tx( + #{ <<"tx">> => TXID }, + #{ <<"exclude-data">> => true }, + Opts + ), + ContentType = + hb_ao:get( + <<"content-type">>, + StructuredTXHeader, + <<"application/octet-stream">>, + Opts#{ + cache_control => + [<<"no-cache">>, <<"no-store">>] + } + ), + {ok, + #{ + <<"raw-id">> => TXID, + <<"offset">> => StartOffset, + <<"data-offset">> => StartOffset, + <<"content-type">> => ContentType, + <<"header-length">> => 0, + <<"content-length">> => Length, + <<"accept-ranges">> => <<"bytes">> + } + }. + +%% @doc ANS-104 headers are stored as part of the global Arweave data tree, so +%% so to read the data associated with their IDs, we must first read the header +%% chunk, deserialize it, and offset our data read from its starting offset. +head_raw_ans104(TXID, ArweaveOffset, Length, Opts) -> + ?event(debug_raw, {head_raw_ans104, {txid, TXID}, {arweave_offset, ArweaveOffset}, {length, Length}}), + HeaderReq = + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => ArweaveOffset + 1, + <<"length">> => min(Length, ?DATA_CHUNK_SIZE) + }, + case hb_ao:resolve(#{ <<"device">> => <<"arweave@2.9">> }, HeaderReq, Opts) of + {ok, HeaderChunk} -> + do_head_raw_ans104(TXID, ArweaveOffset, Length, HeaderChunk, Opts); + {error, Error} -> {error, Error} + end. +do_head_raw_ans104(TXID, ArweaveOffset, Length, Data, _Opts) -> + {ok, HeaderSize, HeaderTX} = ar_bundles:deserialize_header(Data), + ContentType = + list_find( + <<"content-type">>, + HeaderTX#tx.tags, + <<"application/octet-stream">> + ), + {ok, + #{ + <<"raw-id">> => TXID, + <<"offset">> => ArweaveOffset, + <<"data-offset">> => ArweaveOffset + HeaderSize, + <<"content-type">> => ContentType, + <<"header-length">> => HeaderSize, + <<"content-length">> => Length - HeaderSize, + <<"accept-ranges">> => <<"bytes">> + } + }. + +%% @doc Get raw transaction *data* and `content-type` of an Arweave message. +%% Does not deserialize the message, nor return signature information. Included +%% only for compatibility with the legacy Arweave gateway `/raw` endpoint. +get_raw(Base, Request, Opts) -> + ?event(debug_raw, {raw, {base, Base}, {request, Request}}), + case head_raw(Base, Request, Opts) of not_found -> {error, not_found}; - TXID -> request(<<"GET">>, <<"/tx/", TXID/binary>>, Opts) + Err = {error, _} -> Err; + {ok, + Header = #{ + <<"raw-id">> := TXID, + <<"data-offset">> := ArweaveDataOffset, + <<"content-type">> := ContentType, + <<"content-length">> := FullContentLength + } + } -> + ?event(debug_raw, {raw_header, + {header, Header}}), + case parse_range_params(Request, Opts) of + {ok, StartRange, EndRange} -> + RangeLength = (EndRange - StartRange) + 1, + {ok, Data} = + hb_store_arweave:read_chunks( + ArweaveDataOffset + StartRange, + RangeLength, + Opts + ), + { + ok, + Header#{ + <<"status">> => 206, + <<"content-type">> => ContentType, + <<"content-length">> => RangeLength, + <<"content-range">> => + << + "bytes ", + (hb_util:bin(StartRange))/binary, + "-", + (hb_util:bin(EndRange))/binary, + "/", + (hb_util:bin(FullContentLength))/binary + >>, + <<"body">> => Data + } + }; + false -> + case hb_store_arweave:read_chunks(ArweaveDataOffset, FullContentLength, Opts) of + {ok, Data} -> + {ok, Header#{ + <<"content-type">> => ContentType, + <<"body">> => Data + }}; + Error -> + ?event( + arweave, + {raw_read_chunks_failed, {id, TXID}, {error, Error}}, + Opts + ), + Error + end + end end. +%% @doc Extract the start and end range from a request. +parse_range_params(<<"bytes=", ByteDescriptor/binary>>, Opts) -> + parse_range_params(<<"bytes ", ByteDescriptor/binary>>, Opts); +parse_range_params(<<"bytes ", ByteDescriptor/binary>>, _Opts) -> + [ByteRange|_] = binary:split(ByteDescriptor, <<"/">>), + [Start, End] = binary:split(ByteRange, <<"-">>), + {ok, hb_util:int(Start), hb_util:int(End)}; +parse_range_params(Msg, Opts) -> + case hb_ao:resolve(Msg, <<"range">>, Opts#{ hashpath => ignore }) of + {ok, Str} -> parse_range_params(Str, Opts); + _ -> false + end. + +%% @doc Case-insensitively find a key in a list and return its value. +list_find(_Key, [], Default) -> Default; +list_find(Key, [{XKey, Value} | Rest], Default) -> + NormalizedKey = hb_util:to_lower(hb_ao:normalize_key(XKey)), + if NormalizedKey =:= Key -> Value; + true -> list_find(Key, Rest, Default) + end. + +%% @doc Retrieve the data of an Arweave message that has been indexed. +data(TXID, Opts) -> + request(<<"GET">>, <<"/raw/", TXID/binary>>, Opts). + chunk(Base, Request, Opts) -> case hb_maps:get(<<"method">>, Request, <<"GET">>, Opts) of <<"POST">> -> post_chunk(Base, Request, Opts); - <<"GET">> -> {error, not_implemented} + <<"GET">> -> get_chunk_range(Base, Request, Opts) end. post_chunk(_Base, Request, Opts) -> Serialized = hb_json:encode(Request), - ?event({uploading_chunk, {explicit, Serialized}}), + post_json_chunk(Serialized, Opts). + +post_json_chunk(JSON, Opts) -> hb_http:post( hb_opts:get(gateway, not_found, Opts), #{ <<"path">> => <<"/chunk">>, - <<"body">> => Serialized + <<"body">> => JSON }, Opts ). -add_data(TXID, TXHeader, Opts) -> - case data(TXID, Opts) of - {ok, Data} -> - TX = TXHeader#tx{ data = Data }, - ?event( - {retrieved_tx_with_data, - {id, TXID}, - {data_size, byte_size(Data)}, - {tx, TX} - } - ), - {ok, TX}; +get_chunk_range(_Base, Request, Opts) -> + Offset = hb_util:int(hb_ao:get(<<"offset">>, Request, Opts)), + Length = hb_util:int(hb_ao:get(<<"length">>, Request, 1, Opts)), + case fetch_chunk_range(Offset, Length, Opts) of + {ok, Chunks} -> + Data = iolist_to_binary(Chunks), + case hb_maps:is_key(<<"length">>, Request, Opts) of + true -> + {ok, binary:part(Data, 0, min(Length, byte_size(Data)))}; + false -> + {ok, Data} + end; {error, Reason} -> - ?event( - {data_retrieval_failed_after_header, - {id, TXID}, - {error, Reason} + {error, Reason} + end. + +%% @doc Fetch a range of chunks in parallel. Dispatches to pre-threshold or +%% post-threshold algorithm depending on the offset. A single TX/data-item +%% cannot span the strict data split threshold, so mixed ranges are rejected. +fetch_chunk_range(Offset, Length, Opts) -> + EndOffset = Offset + Length - 1, + ?event(debug_arweave, {fetch_chunk_range, + {offset, Offset}, + {end_offset, EndOffset}, + {size, Length}}), + case {Offset >= ?STRICT_DATA_SPLIT_THRESHOLD, + EndOffset >= ?STRICT_DATA_SPLIT_THRESHOLD} of + {true, true} -> + fetch_post_threshold(Offset, EndOffset, Opts); + {false, false} -> + fetch_pre_threshold(Offset, EndOffset, Opts); + {false, true} -> + {error, chunk_range_spans_strict_data_split_threshold} + end. + +%% @doc Post-threshold: chunks occupy fixed 256KiB buckets. Query at +%% DATA_CHUNK_SIZE increments up to EndOffset, and if the assembled data is +%% still short, fetch exactly one additional tail chunk. This can happen +%% when a dataitem starts in the middle of a chunk, the initial set of +%% offsets generated doesn't know this and so leaves off a single chunk at +%% the end. +%% +%% Note: we don't want to *always* query an extra chunk because if it doesn't +%% exist, dev_arweave will consider the dataitem missing. +fetch_post_threshold(Offset, EndOffset, Opts) -> + hb_prometheus:observe( + EndOffset - Offset, + arweave_chunk_load_requested_bytes, + []), + Offsets = generate_offsets(Offset, EndOffset, ?DATA_CHUNK_SIZE), + case fetch_and_collect(Offsets, Opts) of + {ok, ChunkInfos} -> + % Check for one additional tail chunk if needed. + Sorted = sort_chunks(ChunkInfos), + {ok, Binaries} = assemble_chunks(Sorted, Offset), + ExpectedLength = EndOffset - Offset + 1, + BinarySize = iolist_size(Binaries), + case BinarySize < ExpectedLength of + false -> + {ok, Binaries}; + true -> + ExtraOffset = min( + lists:last(Offsets) + ?DATA_CHUNK_SIZE, EndOffset), + ?event(debug_arweave, {fetching_extra_chunk, + {binary_size, BinarySize}, + {expected_length, ExpectedLength}, + {extra_offset, ExtraOffset}}), + case fetch_and_collect([ExtraOffset], Opts) of + {ok, ExtraInfos} -> + assemble_chunks(Sorted ++ ExtraInfos, Offset); + Error -> + Error + end + end; + Error -> Error + end. + +%% @doc Pre-threshold: chunks can be any size <= 256KiB. First pass at +%% DATA_CHUNK_SIZE increments plus one extra candidate chunk, then +%% iteratively fill gaps until contiguous. +fetch_pre_threshold(Offset, EndOffset, Opts) -> + hb_prometheus:observe( + EndOffset - Offset, + arweave_chunk_load_requested_bytes, + []), + Offsets = generate_offsets(Offset, EndOffset, ?DATA_CHUNK_SIZE), + case fetch_and_collect(Offsets, Opts) of + {ok, ChunkInfos} -> + fill_gaps(ChunkInfos, Offset, EndOffset, Opts); + Error -> Error + end. + +%% @doc Iteratively detect gaps in coverage and fetch the chunk at the start +%% of each gap until the entire range [Offset, EndOffset] is covered. +fill_gaps(ChunkInfos, Offset, EndOffset, Opts) -> + Sorted = sort_chunks(ChunkInfos), + case find_gaps(Sorted, Offset, EndOffset) of + [] -> + assemble_chunks(Sorted, Offset); + Gaps -> + % WARNING: the find_gaps logic is untested in production and may not + % be needed. We have yet to find an L1 TX that is chunked in such + % a way as to create gaps when using our naive 256KiB chunking. + GapOffsets = [Start || {Start, _End} <- Gaps], + ?event(debug_arweave, + {fill_gaps, + {offset, Offset}, + {end_offset, EndOffset}, + {chunks, + [ + {Start, End, byte_size(Chunk)} + || + {Start, End, Chunk} <- Sorted + ] + }, + {gap_offsets, GapOffsets} } ), - {error, Reason} + ?event(warning, + {fetch_chunk_gap_handling_untested, + {gap_offsets, GapOffsets}}), + case fetch_and_collect(GapOffsets, Opts) of + {ok, NewInfos} -> + ?event(debug_arweave, {fill_gaps, NewInfos}), + fill_gaps( + Sorted ++ NewInfos, + Offset, EndOffset, Opts + ); + Error -> Error + end end. -%% @doc Retrieve the data of a transaction from Arweave. -data(TXID, Opts) -> - ?event({retrieving_tx_data, {tx, TXID}}), - request(<<"GET">>, <<"/raw/", TXID/binary>>, Opts). +%% @doc Fetch chunks at the given offsets in parallel and parse the responses +%% into {AbsoluteStartOffset, AbsoluteEndOffset, ChunkBinary} tuples. +fetch_and_collect(Offsets, Opts) -> + Concurrency = hb_opts:get(arweave_chunk_fetch_concurrency, 10, Opts), + Results = hb_pmap:parallel_map( + Offsets, + fun(O) -> decode_chunk(get_chunk(O, Opts)) end, + Concurrency + ), + collect_chunks(Results). + +%% @doc Generate a list of offsets from Start to End (inclusive) stepping by +%% Step bytes. Used to produce candidate query offsets at 256KiB increments. +generate_offsets(Start, End, Step) -> + generate_offsets(Start, End, Step, []). + +generate_offsets(Current, End, _Step, Acc) when Current > End -> + Offsets = lists:reverse(Acc), + ?event(debug_arweave, {fetch_chunk_offsets, {offsets, Offsets}}), + Offsets; +generate_offsets(Current, End, Step, Acc) -> + generate_offsets(Current + Step, End, Step, [Current | Acc]). + +%% @doc Decode a chunk response into a {Start, End, Binary} tuple. +%% Runs inside the pmap worker so raw JSON is GC'd per-worker. +decode_chunk({ok, JSON}) -> + Chunk = hb_util:decode(maps:get(<<"chunk">>, JSON)), + AbsEnd = hb_util:int(maps:get(<<"absolute_end_offset">>, JSON)), + AbsStart = AbsEnd - byte_size(Chunk) + 1, + ?event(debug_arweave, + {decode_chunk, + {abs_start, AbsStart}, + {abs_end, AbsEnd}, + {size, byte_size(Chunk)}}), + {ok, {AbsStart, AbsEnd, Chunk}}; +decode_chunk({error, _} = Err) -> + Err. + +%% @doc Collect decoded chunk results. Fails fast on the first error. +collect_chunks(Results) -> + collect_chunks(Results, []). + +collect_chunks([], Acc) -> + {ok, lists:reverse(Acc)}; +collect_chunks([{ok, ChunkInfo} | Rest], Acc) -> + collect_chunks(Rest, [ChunkInfo | Acc]); +collect_chunks([{error, Reason} | _], _Acc) -> + {error, Reason}. + +%% @doc Sort chunk infos by start offset. If duplicate starts appear, log a +%% warning since this should not happen. +sort_chunks(ChunkInfos) -> + Sorted = lists:sort( + fun({StartA, EndA, _}, {StartB, EndB, _}) -> + case StartA =:= StartB of + true -> + % This should never happen. Logging rather than ignoring + % "just in case". + ?event( + warning, + {duplicate_chunk_start_offset, + {start, StartA}, + {left_end, EndA}, + {right_end, EndB} + } + ); + false -> + ok + end, + StartA =< StartB + end, + ChunkInfos + ), + Sorted. + +%% @doc Find byte ranges within [RangeStart, RangeEnd] not covered by any +%% chunk. Returns a list of {GapStart, GapEnd} tuples. +%% WARNING: the find_gaps logic is untested in production and may not be +%% needed. We have yet to find an L1 TX that is chunked in such +%% a way as to create gaps when using our naive 256KiB chunking. +find_gaps(SortedChunks, RangeStart, RangeEnd) -> + find_gaps(SortedChunks, RangeStart, RangeEnd, []). + +find_gaps([], Pos, RangeEnd, Gaps) when Pos =< RangeEnd -> + lists:reverse([{Pos, RangeEnd} | Gaps]); +find_gaps([], _Pos, _RangeEnd, Gaps) -> + lists:reverse(Gaps); +find_gaps([{ChunkStart, ChunkEnd, _} | Rest], Pos, RangeEnd, Gaps) -> + NewGaps = case ChunkStart > Pos of + true -> [{Pos, ChunkStart - 1} | Gaps]; + false -> Gaps + end, + find_gaps(Rest, max(Pos, ChunkEnd + 1), RangeEnd, NewGaps). + +%% @doc Assemble chunk infos into a list of contiguous binaries suitable for +%% iolist_to_binary. The first chunk is sliced if it starts before Offset. +assemble_chunks(ChunkInfos, Offset) -> + Sorted = sort_chunks(ChunkInfos), + Binaries = lists:map( + fun({ChunkStart, _ChunkEnd, Data}) -> + case ChunkStart < Offset of + true -> + % The first chunk may start before the requested offset; + % trim the leading bytes to start exactly at Offset. + Skip = Offset - ChunkStart, + ?event(debug_arweave, {assemble_chunks, + {skip, Skip}, + {chunk_start, ChunkStart}, + {offset, Offset}, + {byte_size, byte_size(Data)}, + {length, byte_size(Data) - Skip} + }), + binary:part(Data, Skip, byte_size(Data) - Skip); + false -> + ?event(debug_arweave, {assemble_chunks, + {chunk_start, ChunkStart}, + {offset, Offset}, + {byte_size, byte_size(Data)} + }), + Data + end + end, + Sorted + ), + {ok, Binaries}. + +get_chunk(Offset, Opts) -> + % Note: it's possible that we will need to add the x-bucket-based-offset + % header to *some* queries. When querying L1 TX chunks from after the + % strict data split threshold, in theory that header is needed. But I + % haven't found a TX which requires it. However, including the header + % when querying some *dataitems* does cause an error. So for now we will + % leaeve the header out and continue to search for a case where it is + % needed. + Path = <<"/chunk/", (hb_util:bin(Offset))/binary>>, + request(<<"GET">>, Path, #{ <<"route-by">> => Offset }, Opts). + +%% @doc Read and decode the bundle header index at the given global start +%% offset, returning the header size alongside the decoded index entries. +bundle_header(BundleStartOffset, Opts) -> + bundle_header(BundleStartOffset, infinity, Opts). +bundle_header(BundleStartOffset, MaxSize, Opts) -> + case hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => BundleStartOffset + 1 + }, + Opts + ) of + {ok, FirstChunk} -> + case ar_bundles:bundle_header_size(FirstChunk) of + invalid_bundle_header -> + {error, invalid_bundle_header}; + HeaderSize when HeaderSize > MaxSize -> + {error, invalid_bundle_header}; + HeaderSize -> + case read_bundle_header( + BundleStartOffset, HeaderSize, + FirstChunk, Opts + ) of + {ok, HeaderBin} -> + case ar_bundles:decode_bundle_header( + HeaderBin + ) of + {_Items, BundleIndex} -> + {ok, HeaderSize, BundleIndex}; + invalid_bundle_header -> + {error, invalid_bundle_header} + end; + Error -> + Error + end + end; + Error -> + Error + end. + +%% @doc Read exactly the bytes needed to decode a bundle header. +read_bundle_header(_BundleStartOffset, HeaderSize, FirstChunk, _Opts) + when HeaderSize =< byte_size(FirstChunk) -> + {ok, binary:part(FirstChunk, 0, HeaderSize)}; +read_bundle_header(BundleStartOffset, HeaderSize, FirstChunk, Opts) -> + RemainingSize = HeaderSize - byte_size(FirstChunk), + case hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => BundleStartOffset + byte_size(FirstChunk) + 1, + <<"length">> => RemainingSize + }, + Opts + ) of + {ok, RemainingChunk} -> + {ok, <>}; + Error -> + Error + end. %% @doc Retrieve (and cache) block information from Arweave. If the `block' key %% is present, it is used to look up the associated block. If it is of Arweave @@ -189,10 +725,10 @@ block(Base, Request, Opts) -> case Block of <<"current">> -> current(Base, Request, Opts); not_found -> current(Base, Request, Opts); - ID when ?IS_ID(ID) -> block({id, ID}, Opts); + ID when ?IS_BLOCK_ID(ID) -> block({id, ID}, Opts); MaybeHeight -> try hb_util:int(MaybeHeight) of - Int -> block({height, Int}, Opts) + Int -> block({height, Int}, Opts) catch _:_ -> { @@ -204,7 +740,9 @@ block(Base, Request, Opts) -> block({id, ID}, Opts) -> case hb_cache:read(ID, Opts) of {ok, Block} -> - ?event({retrieved_block_from_cache, {id, ID}}), + ?event(arweave_short, {read_block_from_cache, + {id, {explicit, ID}} + }), {ok, Block}; not_found -> request(<<"GET">>, <<"/block/hash/", ID/binary>>, Opts) @@ -212,12 +750,15 @@ block({id, ID}, Opts) -> block({height, Height}, Opts) -> case dev_arweave_block_cache:read(Height, Opts) of {ok, Block} -> - ?event({retrieved_block_from_cache, {height, Height}}), + ?event(arweave_short, {read_block_from_cache, + {height, Height} + }), {ok, Block}; not_found -> request( <<"GET">>, - <<"/block/height/", (hb_util:bin(Height))/binary>>, + <<"/block/height/", + (hb_util:bin(Height))/binary>>, #{ <<"route-by">> => Height }, Opts ) @@ -251,92 +792,157 @@ tx_anchor(_Base, _Request, Opts) -> %% @doc Find the transaction ID to retrieve from Arweave based on the request or %% base message. -find_txid(Base, Request, Opts) -> - hb_ao:get_first( - [ - {Request, <<"tx">>}, - {Base, <<"tx">>} - ], - not_found, +find_key(Key, Base, Request, Opts) -> + hb_maps:get( + Key, + Request, + hb_maps:get(Key, Base, not_found, Opts), Opts ). +exclude_data(Base, Request, Opts) -> + RawValue = + hb_ao:get_first( + [ + {Request, <<"exclude-data">>}, + {Base, <<"exclude-data">>} + ], + false, + Opts + ), + hb_util:bool(RawValue). + %% @doc Make a request to the Arweave node and parse the response into an %% AO-Core message. Most Arweave API responses are in JSON format, but without %% a `content-type' header. Subsequently, we parse the response manually and %% pass it back as a message. request(Method, Path, Opts) -> - request(Method, Path, #{}, Opts). + request(Method, Path, #{}, [], Opts). request(Method, Path, Extra, Opts) -> - ?event({arweave_request, {method, Method}, {path, Path}}), + request(Method, Path, Extra, [], Opts). +request(Method, Path, Extra, LogExtra, Opts) -> + ?event(debug_arweave, {request, + {method, Method}, {path, {explicit, Path}}, {log_extra, LogExtra}}), Res = hb_http:request( Extra#{ <<"path">> => <<"/arweave", Path/binary>>, <<"method">> => Method }, - Opts + Opts#{ + cache_control => [<<"no-cache">>, <<"no-store">>] + } ), - to_message(Path, Res, Opts). + to_message(Path, Method, best_response(Res), LogExtra, Opts). + +%% @doc Select the best response from a list of responses by sorting them +%% ascending by HTTP status code. Returns the first (best) response tuple. +best_response({error, {no_viable_responses, Responses}}) -> + best_response(Responses); +best_response([]) -> + {error, no_viable_responses}; +best_response(Responses) when is_list(Responses) -> + Sorted = lists:sort( + fun({_, ResponseA}, {_, ResponseB}) -> + StatusA = response_status(ResponseA), + StatusB = response_status(ResponseB), + StatusA =< StatusB + end, + Responses + ), + hd(Sorted); +best_response(Response) -> + Response. + +response_status(Response) when is_map(Response) -> + maps:get(<<"status">>, Response, 999); +response_status(_Response) -> + 999. %% @doc Transform a response from the Arweave node into an AO-Core message. -to_message(_Path, {error, #{ <<"status">> := 404 }}, _Opts) -> +to_message(Path, Method, {error, #{ <<"status">> := 404 }}, LogExtra, _Opts) -> + event_request(Path, Method, 404, LogExtra), {error, not_found}; -to_message(_Path, {error, _}, _Opts) -> - {error, client_error}; -to_message(_Path, {failure, _}, _Opts) -> +to_message(Path, Method, {error, Response}, LogExtra, _Opts) when is_map(Response) -> + Status = maps:get(<<"status">>, Response, client_error), + event_request(Path, Method, Status, LogExtra), + {error, Response}; +to_message(Path, Method, {error, Response}, LogExtra, _Opts) -> + event_request(Path, Method, client_error, LogExtra), + {error, Response}; +to_message(Path, Method, {failure, Response}, LogExtra, _Opts) when is_map(Response) -> + Status = maps:get(<<"status">>, Response, server_error), + event_request(Path, Method, Status, LogExtra), {error, server_error}; -to_message(Path = <<"/tx/", TXID/binary>>, {ok, #{ <<"body">> := Body }}, Opts) -> +to_message(Path, Method, {failure, _Response}, LogExtra, _Opts) -> + event_request(Path, Method, server_error, LogExtra), + {error, server_error}; +to_message(Path = <<"/tx">>, <<"POST">>, {ok, Response}, LogExtra, _Opts) -> + Status = maps:get(<<"status">>, Response, 200), + event_request(Path, <<"POST">>, Status, LogExtra), + {ok, Response}; +to_message(Path = <<"/tx/", TXID/binary>>, <<"GET">>, {ok, #{ <<"body">> := Body }}, LogExtra, Opts) -> + event_request(Path, <<"GET">>, 200, LogExtra), TXHeader = ar_tx:json_struct_to_tx(hb_json:decode(Body)), - ?event( + ?event(debug_arweave, {arweave_tx_response, - {path, Path}, + {path, {explicit, Path}}, {raw_body, {explicit, Body}}, {body, {explicit, hb_json:decode(Body)}}, {tx, TXHeader} } ), - {ok, TX} = add_data(TXID, TXHeader, Opts), - { - ok, - hb_message:convert( - TX, - <<"structured@1.0">>, - <<"tx@1.0">>, - Opts - ) - }; -to_message(Path = <<"/raw/", _/binary>>, {ok, #{ <<"body">> := Body }}, _Opts) -> - ?event( - {arweave_raw_response, - {path, Path}, - {data_size, byte_size(Body)} - } - ), + case hb_opts:get(exclude_data, false, Opts) of + true -> + {ok, hb_message:convert(TXHeader, <<"structured@1.0">>, <<"tx@1.0">>, Opts)}; + false -> + case data(TXID, Opts) of + {ok, RawData} -> + TX = TXHeader#tx{ data = RawData }, + {ok, hb_message:convert(TX, <<"structured@1.0">>, <<"tx@1.0">>, Opts)}; + {error, not_found} -> + {ok, hb_message:convert(TXHeader, <<"structured@1.0">>, <<"tx@1.0">>, Opts)}; + Error -> + Error + end + end; +to_message(Path = <<"/raw/", _/binary>>, <<"GET">>, {ok, #{ <<"body">> := Body }}, LogExtra, _Opts) -> + event_request(Path, <<"GET">>, 200, LogExtra), {ok, Body}; -to_message(Path = <<"/block/", _/binary>>, {ok, #{ <<"body">> := Body }}, Opts) -> - Block = hb_message:convert(Body, <<"structured@1.0">>, <<"json@1.0">>, Opts), - ?event( - {arweave_block_response, - {path, Path}, - {block, Block} - } - ), - CacheRes = dev_arweave_block_cache:write(Block, Opts), +to_message(Path = <<"/block/", _/binary>>, <<"GET">>, {ok, #{ <<"body">> := Body }}, LogExtra, Opts) -> + event_request(Path, <<"GET">>, 200, LogExtra), + {ok, Block} = + dev_codec_json:from( + Body, + #{ <<"accept-codec">> => <<"structured@1.0">> }, + Opts + ), + CacheRes = + case hb_opts:get(arweave_index_blocks, true, Opts) of + true -> dev_arweave_block_cache:write(Block, Opts); + false -> skipped + end, ?event( - {cached_arweave_block, + debug_arweave_index, + { + if CacheRes == skipped -> skipped_caching_arweave_block; + true -> cached_arweave_block + end, {path, Path}, {result, CacheRes} } ), {ok, Block}; -to_message(<<"/price/", _/binary>>, {ok, #{ <<"body">> := Body }}, _Opts) -> +to_message(Path = <<"/price/", _/binary>>, <<"GET">>, {ok, #{ <<"body">> := Body }}, LogExtra, _Opts) -> + event_request(Path, <<"GET">>, 200, LogExtra), Price = hb_util:int(Body), {ok, Price}; -to_message(<<"/tx_anchor">>, {ok, #{ <<"body">> := Body }}, _Opts) -> +to_message(Path = <<"/tx_anchor">>, <<"GET">>, {ok, #{ <<"body">> := Body }}, LogExtra, _Opts) -> + event_request(Path, <<"GET">>, 200, LogExtra), Anchor = hb_util:decode(Body), {ok, Anchor}; -to_message(Path, {ok, #{ <<"body">> := Body }}, Opts) -> +to_message(Path, <<"GET">>, {ok, #{ <<"body">> := Body }}, LogExtra, Opts) -> + event_request(Path, <<"GET">>, 200, LogExtra), % All other responses that are `OK' status are converted from JSON to an % AO-Core message. ?event( @@ -355,9 +961,27 @@ to_message(Path, {ok, #{ <<"body">> := Body }}, Opts) -> ) }. +event_request(Path, Method, Status, Extra) -> + BaseList = [{request, {explicit, Path}}, {method, Method}, {status, Status}], + MergedTuple = erlang:list_to_tuple(BaseList ++ Extra), + ?event(arweave_short, MergedTuple). + %%% Tests -post_ans104_tx_test() -> +%% @doc A fixed bad interior offset from a live TX is rejected by +%% bundle_header/3 as invalid_bundle_header. +bundle_header_garbage_guard_test() -> + ServerOpts = #{ store => [hb_test_utils:test_store()] }, + Server = hb_http_server:start_node(ServerOpts), + ProbeOffset = 376836336327208, + Size = 121798901, + ?assertEqual( + {error, invalid_bundle_header}, + bundle_header(ProbeOffset - 1, Size, ServerOpts) + ). + + +post_ans104_message_test() -> ServerOpts = #{ store => [hb_test_utils:test_store()] }, Server = hb_http_server:start_node(ServerOpts), ClientOpts = @@ -379,12 +1003,12 @@ post_ans104_tx_test() -> hb_http:post( Server, Msg#{ - <<"path">> => <<"/~arweave@2.9-pre/tx">>, - <<"codec-device">> => <<"ans104@1.0">> + <<"path">> => <<"/~arweave@2.9/tx">> }, ClientOpts ), ?assertMatch(#{ <<"status">> := 200 }, PostRes), + ?event(debug_test, {post_res, PostRes}), SignedID = hb_message:id(Msg, signed, ClientOpts), {ok, GetRes} = hb_http:get( @@ -402,10 +1026,315 @@ post_ans104_tx_test() -> ), ok. +post_ans104_binary_test() -> + ServerOpts = #{ store => [hb_test_utils:test_store()] }, + Server = hb_http_server:start_node(ServerOpts), + ClientOpts = + #{ + store => [hb_test_utils:test_store()], + priv_wallet => hb:wallet() + }, + Msg = + hb_message:commit( + #{ + <<"variant">> => <<"ao.N.1">>, + <<"type">> => <<"Process">>, + <<"data">> => <<"test-data">> + }, + ClientOpts, + #{ <<"commitment-device">> => <<"ans104@1.0">> } + ), + DataItem = hb_message:convert(Msg, <<"ans104@1.0">>, <<"structured@1.0">>, ClientOpts), + ?event(debug_test, {data_item, DataItem}), + Serialized = ar_bundles:serialize(DataItem), + {ok, PostRes} = + hb_http:post( + Server, + #{ + <<"device">> => <<"arweave@2.9">>, + <<"path">> => <<"/tx?codec-device=ans104@1.0">>, + <<"content-type">> => <<"application/octet-stream">>, + <<"body">> => Serialized + }, + ClientOpts + ), + ?assertMatch(#{ <<"status">> := 200 }, PostRes), + ?event(debug_test, {post_res, PostRes}), + SignedID = hb_message:id(Msg, signed, ClientOpts), + {ok, GetRes} = + hb_http:get( + Server, <<"/", SignedID/binary>>, + ClientOpts + ), + ?assertMatch( + #{ + <<"status">> := 200, + <<"variant">> := <<"ao.N.1">>, + <<"type">> := <<"Process">>, + <<"data">> := <<"test-data">> + }, + GetRes + ), + ok. + +post_tx_message_test() -> + ServerOpts = #{ store => [hb_test_utils:test_store()] }, + Server = hb_http_server:start_node(ServerOpts), + ClientOpts = + #{ + store => [hb_test_utils:test_store()], + priv_wallet => hb:wallet() + }, + Msg = + hb_message:commit( + #{ + <<"tag">> => <<"value">>, + <<"data">> => <<"test-data">> + }, + ClientOpts, + #{ <<"commitment-device">> => <<"tx@1.0">> } + ), + ?event(debug_test, {msg, Msg}), + Response = + hb_http:post( + Server, + Msg#{ + <<"device">> => <<"arweave@2.9">>, + <<"path">> => <<"/tx">> + }, + ClientOpts + ), + ?event(debug_test, {post_response, Response}), + % The transaction is invalid because it has insufficient balance, only + % way we'll know that is if the HB node successfully posted the tx to + % an arweave node. + ?assertMatch({error, #{ <<"status">> := 400 }}, Response), + {error, #{ <<"body">> := Body }} = Response, + ?assertEqual(<<"Transaction verification failed.">>, Body), + ok. + +post_tx_json_failure_test() -> + ServerOpts = #{ store => [hb_test_utils:test_store()] }, + Server = hb_http_server:start_node(ServerOpts), + ClientOpts = post_tx_json_client_opts(), + Response = post_tx_json_request(Server, ClientOpts), + % The transaction is invalid because it has insufficient balance, only + % way we'll know that is if the HB node successfully posted the tx to + % an arweave node. + ?assertMatch({error, #{ <<"status">> := 400 }}, Response), + {error, #{ <<"body">> := Body }} = Response, + ?assertEqual(<<"Transaction verification failed.">>, Body), + ok. + +post_tx_json_success_test() -> + {Response, Node1Posts, Node2Posts} = + post_tx_json_two_node_test({200, <<"OK-1">>}, {200, <<"OK-2">>}), + ?assertMatch({ok, #{ <<"status">> := 200 }}, Response), + ?assertEqual(1, length(Node1Posts)), + ?assertEqual(1, length(Node2Posts)), + ok. + +post_tx_json_mixed_status_prefers_success_test() -> + {Response, Node1Posts, Node2Posts} = + post_tx_json_two_node_test( + {400, <<"Transaction verification failed.">>}, + {200, <<"OK-2">>} + ), + ?assertMatch({ok, #{ <<"status">> := 200 }}, Response), + ?assertEqual(1, length(Node1Posts)), + ?assertEqual(1, length(Node2Posts)), + ok. + +best_response_handles_failed_connect_entries_test() -> + FailedConnect = + {failed_connect, + [ + {to_address, {"tip-4.arweave.xyz", 1984}}, + {inet, [inet], etimedout} + ] + }, + Responses = [ + {error, FailedConnect}, + {ok, #{ <<"status">> => 200, <<"body">> => <<"OK-2">> }} + ], + ?assertEqual( + {ok, #{ <<"status">> => 200, <<"body">> => <<"OK-2">> }}, + best_response(Responses) + ). + +best_response_non_map_error_round_trips_test() -> + FailedConnect = + {failed_connect, + [ + {to_address, {"tip-4.arweave.xyz", 1984}}, + {inet, [inet], etimedout} + ] + }, + ?assertEqual( + {error, FailedConnect}, + to_message(<<"/tx">>, <<"GET">>, {error, FailedConnect}, [], #{}) + ). + +post_tx_json_two_node_test(Node1TxResponse, Node2TxResponse) -> + {ok, MockNode1, MockHandle1} = hb_mock_server:start([ + {"/tx", tx, Node1TxResponse} + ]), + {ok, MockNode2, MockHandle2} = hb_mock_server:start([ + {"/tx", tx, Node2TxResponse} + ]), + Server = hb_http_server:start_node( + post_tx_json_two_node_server_opts(MockNode1, MockNode2) + ), + ClientOpts = post_tx_json_client_opts(), + try + Response = post_tx_json_request(Server, ClientOpts), + Node1Posts = hb_mock_server:get_requests(tx, 1, MockHandle1), + Node2Posts = hb_mock_server:get_requests(tx, 1, MockHandle2), + {Response, Node1Posts, Node2Posts} + after + hb_mock_server:stop(MockHandle1), + hb_mock_server:stop(MockHandle2) + end. + +post_tx_json_two_node_server_opts(MockNode1, MockNode2) -> + #{ + store => [hb_test_utils:test_store()], + routes => [ + #{ + <<"template">> => + #{ + <<"path">> => <<"^/arweave/tx">>, + <<"method">> => <<"POST">> + }, + <<"nodes">> => + [ + #{ + <<"match">> => <<"^/arweave">>, + <<"with">> => MockNode1, + <<"opts">> => #{ http_client => httpc } + }, + #{ + <<"match">> => <<"^/arweave">>, + <<"with">> => MockNode2, + <<"opts">> => #{ http_client => httpc } + } + ], + <<"parallel">> => true, + <<"responses">> => 2, + <<"stop-after">> => false, + <<"admissible-status">> => 200 + } + ] + }. + +post_tx_json_client_opts() -> + #{ + store => [hb_test_utils:test_store()], + priv_wallet => hb:wallet() + }. + +post_tx_json_payload(ClientOpts) -> + Msg = + hb_message:commit( + #{ + <<"tag">> => <<"value">>, + <<"data">> => <<"test-data">> + }, + ClientOpts, + #{ <<"commitment-device">> => <<"tx@1.0">> } + ), + TX = hb_message:convert(Msg, <<"tx@1.0">>, <<"structured@1.0">>, ClientOpts), + JSON = ar_tx:tx_to_json_struct(TX#tx{ data = <<>> }), + hb_json:encode(JSON). + +post_tx_json_request(Server, ClientOpts) -> + Serialized = post_tx_json_payload(ClientOpts), + hb_http:post( + Server, + #{ + <<"device">> => <<"arweave@2.9">>, + <<"path">> => <<"/tx?codec-device=tx@1.0">>, + <<"content-type">> => <<"application/json">>, + <<"body">> => Serialized + }, + ClientOpts + ). + +%% @doc Build isolated test opts and pre-index the blocks for the given TXIDs. +setup_arweave_index_opts(TXIDs) -> + TestStore = hb_test_utils:test_store(hb_store_volatile, <<"arweave-index">>), + IndexStore = #{ <<"module">> => hb_store_arweave, <<"index-store">> => [TestStore] }, + Opts = #{ + store => [TestStore], + arweave_index_ids => true, + arweave_index_store => IndexStore + }, + % Either: Index the blocks containing the TXs... + % lists:foreach( + % fun(Block) -> ok = index_test_block(Block, Opts) end, + % lists:usort([tx_index_block(TXID) || TXID <- TXIDs]) + % ), + % ...or: Index the TXs directly. This depends on the `/tx//offset` + % endpoint being available in the `/arweave` routes. + lists:foreach( + fun(TXID) -> ok = index_test_tx(TXID, IndexStore, Opts) end, + TXIDs + ), + Opts. + +index_test_block(Block, Opts) -> + BlockBin = hb_util:bin(Block), + {ok, Block} = + hb_ao:resolve( + << + "~copycat@1.0/arweave&from=", + BlockBin/binary, + "&to=", + BlockBin/binary + >>, + Opts#{ arweave_index_ids => true } + ), + ok. + +index_test_tx(TXID, IndexStore, Opts) -> + {ok, #{ <<"body">> := OffsetBody }} = + hb_http:request( + #{ + <<"path">> => <<"/arweave/tx/", TXID/binary, "/offset">>, + <<"method">> => <<"GET">> + }, + Opts + ), + OffsetMsg = hb_json:decode(OffsetBody), + EndOffset = hb_util:int(maps:get(<<"offset">>, OffsetMsg)), + Size = hb_util:int(maps:get(<<"size">>, OffsetMsg)), + StartOffset = EndOffset - Size, + ok = + hb_store_arweave:write_offset( + IndexStore, + TXID, + <<"tx@1.0">>, + StartOffset, + Size + ), + ?assertMatch({ok, _}, hb_store_arweave:read_offset(IndexStore, TXID)), + ok. + +tx_index_block(<<"ptBC0UwDmrUTBQX3MqZ1lB57ex20ygwzkjjCrQjIx3o">>) -> 1749502; +tx_index_block(<<"jI0A4BASHaUdCCsdv249BxDX6IlE0Ko391TuI6REATw">>) -> 1289677; +tx_index_block(<<"4FnBmvgWmqXWEEprjVqBsV5aRpAgF6_yJX_GTGsSZjY">>) -> 753012; +tx_index_block(<<"YR9m4c3CrlljCRYEWBLeoKekbAyYZRMo2Kpz61IeNp8">>) -> 1233918. + get_tx_basic_data_test() -> - Node = hb_http_server:start_node(), - Path = <<"/~arweave@2.9-pre/tx=ptBC0UwDmrUTBQX3MqZ1lB57ex20ygwzkjjCrQjIx3o">>, - {ok, Structured} = hb_http:get(Node, Path, #{}), + {ok, Structured} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"tx">>, + <<"tx">> => <<"ptBC0UwDmrUTBQX3MqZ1lB57ex20ygwzkjjCrQjIx3o">>, + <<"exclude-data">> => false + }, + #{} + ), ?event(debug_test, {structured_tx, Structured}), ?assert(hb_message:verify(Structured, all, #{})), % Hash the data to make it easier to match @@ -423,18 +1352,258 @@ get_tx_basic_data_test() -> ?assert(hb_message:match(ExpectedMsg, StructuredWithHash, only_present)), ok. +%% @doc The data for this transaction ends with two smaller chunks. +get_tx_split_chunk_test() -> + {ok, Structured} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"tx">>, + <<"tx">> => <<"T2pluNnaavL7-S2GkO_m3pASLUqMH_XQ9IiIhZKfySs">>, + <<"exclude-data">> => false + }, + #{} + ), + ?assert(hb_message:verify(Structured, all, #{})), + ?assertEqual( + <<"T2pluNnaavL7-S2GkO_m3pASLUqMH_XQ9IiIhZKfySs">>, + hb_message:id(Structured, signed)), + ExpectedMsg = #{ + <<"reward">> => <<"6035386935">>, + <<"anchor">> => <<"PX16-598IrIMvLxFkvfNTWLVKXqXSmArOdW3o7X8jWMCH1fiNOjBZ2XjQlw0FOme">>, + <<"Contract">> => <<"KTzTXT_ANmF84fWEKHzWURD1LWd9QaFR9yfYUwH2Lxw">> + }, + ?assert(hb_message:match(ExpectedMsg, Structured, only_present)), + + Child = hb_ao:get(<<"1/2">>, Structured), + ?assert(hb_message:verify(Child, all, #{})), + ?event(debug_test, {child, {explicit, hb_message:id(Child, signed)}}), + ?assertEqual( + <<"8aJrRWtHcJvJ61qsH6agGkemzrtLw3W22xFrpCGAnTM">>, + hb_message:id(Child, signed)), + ok. + +get_tx_basic_data_exclude_data_test() -> + TXID = <<"ptBC0UwDmrUTBQX3MqZ1lB57ex20ygwzkjjCrQjIx3o">>, + Opts = setup_arweave_index_opts([TXID]), + {ok, Structured} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"tx">>, + <<"tx">> => TXID, + <<"exclude-data">> => true + }, + Opts + ), + ?event(debug_test, {structured_tx, Structured}), + ?assert(hb_message:verify(Structured, all, Opts)), + ?assertEqual(false, maps:is_key(<<"data">>, Structured)), + ExpectedMsg = #{ + <<"reward">> => <<"482143296">>, + <<"anchor">> => <<"XTzaU2_m_hRYDLiXkcleOC4zf5MVTXIeFWBOsJSRrtEZ8kM6Oz7EKLhZY7fTAvKq">>, + <<"content-type">> => <<"application/json">> + }, + ?assert(hb_message:match(ExpectedMsg, Structured, only_present)), + {ok, RawData} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"raw">>, + <<"raw">> => TXID + }, + Opts + ), + ?event(debug_test, {raw_data, RawData}), + Data = hb_ao:get(<<"body">>, RawData, Opts), + StructuredWithData = Structured#{ <<"data">> => Data }, + ?assert(hb_message:verify(StructuredWithData, all, Opts)), + DataHash = hb_util:encode(crypto:hash(sha256, Data)), + ?assertEqual(<<"PEShWA1ER2jq7CatAPpOZ30TeLrjOSpaf_Po7_hKPo4">>, DataHash), + ok. + +get_tx_data_tag_exclude_data_test() -> + TXID = <<"jI0A4BASHaUdCCsdv249BxDX6IlE0Ko391TuI6REATw">>, + Opts = setup_arweave_index_opts([TXID]), + {ok, Structured} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"tx">>, + <<"tx">> => TXID, + <<"exclude-data">> => true + }, + Opts + ), + ?event(debug_test, {structured_tx, Structured}), + ?assert(hb_message:verify(Structured, all, Opts)), + ?assertEqual(false, maps:is_key(<<"data">>, Structured)), + ExpectedMsg = #{ + <<"reward">> => <<"630923958">>, + <<"anchor">> => <<"CWJKkpdXEQO9sCWLFg8Cqby0d7wY0Gez5H95YG15g8pAYaXVatF9Ms1QBUpvZ-Ll">>, + <<"content-type">> => <<"application/json">> + }, + ?assert(hb_message:match(ExpectedMsg, Structured, only_present)), + {ok, RawData} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"raw">>, + <<"raw">> => TXID + }, + Opts + ), + Data = hb_ao:get(<<"body">>, RawData, Opts), + StructuredWithData = Structured#{ <<"data">> => Data }, + ?assert(hb_message:verify(StructuredWithData, all, Opts)), + DataHash = hb_util:encode(crypto:hash(sha256, Data)), + ?assertEqual(<<"IHyJ9BlQaHLWVwwklMwV1XEYXGjwx2B6HXNJZ4yJXeQ">>, DataHash), + ok. + +head_raw_tx_test() -> + TXID = <<"ptBC0UwDmrUTBQX3MqZ1lB57ex20ygwzkjjCrQjIx3o">>, + Opts = setup_arweave_index_opts([TXID]), + {ok, Result} = + hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"raw">>, + <<"raw">> => TXID, + <<"method">> => <<"HEAD">> + }, + Opts + ), + ?event({result, Result}), + ?assertEqual( + {ok, <<"application/json">>}, + hb_maps:find(<<"content-type">>, Result, Opts) + ), + ?assertEqual( + {ok, 774}, + hb_maps:find(<<"content-length">>, Result, Opts) + ), + ?assertEqual( + {ok, 0}, + hb_maps:find(<<"header-length">>, Result, Opts) + ). + +head_raw_ans104_test() -> + Opts = setup_arweave_index_opts([]), + DataItemID = <<"0vy2Ey8bWkSDcRIvWQJjxDeVGYOrTSmYIIhBILJntY8">>, + BlockBin = hb_util:bin(1_827_942), + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=", BlockBin/binary, "&to=", BlockBin/binary>>, + Opts + ), + {ok, Result} = + hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"raw">>, + <<"raw">> => DataItemID, + <<"method">> => <<"HEAD">> + }, + Opts + ), + ?assertEqual( + {ok, <<"application/json">>}, + hb_maps:find(<<"content-type">>, Result, Opts) + ), + ?assertEqual( + {ok, 575}, + hb_maps:find(<<"content-length">>, Result, Opts) + ). + +get_raw_range_tx_test() -> + DataItemID = <<"ptBC0UwDmrUTBQX3MqZ1lB57ex20ygwzkjjCrQjIx3o">>, + Opts = setup_arweave_index_opts([DataItemID]), + {ok, Result} = + hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"raw">>, + <<"raw">> => DataItemID, + <<"method">> => <<"GET">>, + <<"range">> => <<"bytes 0-2/774">> + }, + Opts + ), + ?event(debug_test, {result, Result}), + ?assertEqual( + {ok, <<"{\"d">>}, + hb_maps:find(<<"body">>, Result, Opts) + ), + {ok, Result2} = + hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"raw">>, + <<"raw">> => DataItemID, + <<"method">> => <<"GET">>, + <<"range">> => <<"bytes 100-105/774">> + }, + Opts + ), + ?event(debug_test, {result2, Result2}), + ?assertEqual( + {ok, <<"application/json">>}, + hb_maps:find(<<"content-type">>, Result2, Opts) + ), + ?assertEqual( + {ok, <<"ame Cr">>}, + hb_maps:find(<<"body">>, Result2, Opts) + ). + +get_raw_range_ans104_test() -> + Opts = setup_arweave_index_opts([]), + DataItemID = <<"0vy2Ey8bWkSDcRIvWQJjxDeVGYOrTSmYIIhBILJntY8">>, + BlockBin = hb_util:bin(1_827_942), + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=", BlockBin/binary, "&to=", BlockBin/binary>>, + Opts + ), + {ok, Result} = + hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"raw">>, + <<"raw">> => DataItemID, + <<"method">> => <<"GET">>, + <<"range">> => <<"bytes 0-1/575">> + }, + Opts + ), + ?event(debug_test, {result, Result}), + ?assertEqual( + {ok, <<"{\n">>}, + hb_maps:find(<<"body">>, Result, Opts) + ), + {ok, Result2} = + hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"raw">>, + <<"raw">> => DataItemID, + <<"method">> => <<"GET">>, + <<"range">> => <<"bytes 100-105/575">> + }, + Opts + ), + ?event(debug_test, {result2, Result2}), + ?assertEqual( + {ok, <<"application/json">>}, + hb_maps:find(<<"content-type">>, Result2, Opts) + ), + ?assertEqual( + {ok, <<"t #972">>}, + hb_maps:find(<<"body">>, Result2, Opts) + ). + get_tx_rsa_nested_bundle_test() -> Node = hb_http_server:start_node(), - Path = <<"/~arweave@2.9-pre/tx=bndIwac23-s0K11TLC1N7z472sLGAkiOdhds87ZywoE">>, + Path = <<"/~arweave@2.9/tx=bndIwac23-s0K11TLC1N7z472sLGAkiOdhds87ZywoE">>, {ok, Root} = hb_http:get(Node, Path, #{}), ?event(debug_test, {root, Root}), ?assert(hb_message:verify(Root, all, #{})), - ChildPath = <>, {ok, Child} = hb_http:get(Node, ChildPath, #{}), ?event(debug_test, {child, Child}), ?assert(hb_message:verify(Child, all, #{})), - {ok, ExpectedChild} = hb_ao:resolve( Root, @@ -442,7 +1611,6 @@ get_tx_rsa_nested_bundle_test() -> #{} ), ?assert(hb_message:match(ExpectedChild, Child, only_present)), - ManualChild = #{ <<"data">> => <<"{\"totalTickedRewardsDistributed\":0,\"distributedEpochIndexes\":[],\"newDemandFactors\":[],\"newEpochIndexes\":[],\"tickedRewardDistributions\":[],\"newPruneGatewaysResults\":[{\"delegateStakeReturned\":0,\"stakeSlashed\":0,\"gatewayStakeReturned\":0,\"delegateStakeWithdrawing\":0,\"prunedGateways\":[],\"slashedGateways\":[],\"gatewayStakeWithdrawing\":0}]}">>, <<"data-protocol">> => <<"ao">>, @@ -462,7 +1630,7 @@ get_tx_rsa_nested_bundle_test() -> get_tx_rsa_large_bundle_test_disabled() -> {timeout, 300, fun() -> Node = hb_http_server:start_node(), - Path = <<"/~arweave@2.9-pre/tx=VifINXnMxLwJXOjHG5uM0JssiylR8qvajjj7HlzQvZA">>, + Path = <<"/~arweave@2.9/tx=VifINXnMxLwJXOjHG5uM0JssiylR8qvajjj7HlzQvZA">>, {ok, Root} = hb_http:get(Node, Path, #{}), ?event(debug_test, {root, Root}), ?assert(hb_message:verify(Root, all, #{})), @@ -471,9 +1639,9 @@ get_tx_rsa_large_bundle_test_disabled() -> get_bad_tx_test() -> Node = hb_http_server:start_node(), - Path = <<"/~arweave@2.9-pre/tx=INVALID-ID">>, + Path = <<"/~arweave@2.9/tx=INVALID-ID">>, Res = hb_http:get(Node, Path, #{}), - ?assertEqual({error, client_error}, Res). + ?assertEqual({error, not_found}, Res). %% @doc: helper test to generate and write a dataitem to disk so that we %% can validate it using 3rd-party js libraries and gateways. @@ -507,3 +1675,445 @@ serialize_data_item_test_disabled() -> ?assertEqual(length(DataItem#tx.tags), length(VerifiedItem#tx.tags)), ?assert(ar_bundles:verify_item(VerifiedItem)), ok. + +get_partial_chunk_post_split_test() -> + %% https://arweave.net/tx/QL7_EnmrFtx-0wVgPr2IwaGWQT8vmPcF3R20CKMO3D4/offset + %% + Offset = 378092137521399, + ExpectedLength = 1000, + Opts = #{}, + {ok, Data} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => Offset, + <<"length">> => ExpectedLength + }, + Opts + ), + ?assertEqual( + <<"G62E7qonT1RBmkC6e3pNJz_thpS9xkVD3qTJAk6o3Uc">>, + hb_util:encode(crypto:hash(sha256, Data)) + ), + ok. + +get_full_chunk_post_split_test() -> + %% https://arweave.net/tx/QL7_EnmrFtx-0wVgPr2IwaGWQT8vmPcF3R20CKMO3D4/offset + %% + Offset = 378092137521399, + ExpectedLength = ?DATA_CHUNK_SIZE, + Opts = #{}, + {ok, Data} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => Offset, + <<"length">> => ExpectedLength + }, + Opts + ), + ?assertEqual( + <<"LyTBdUe0rNmpqt8C-p7HksdiredXaa0wCBAPt3504W0">>, + hb_util:encode(crypto:hash(sha256, Data)) + ), + ok. + +get_multi_chunk_post_split_test() -> + %% https://arweave.net/tx/QL7_EnmrFtx-0wVgPr2IwaGWQT8vmPcF3R20CKMO3D4/offset + %% + Offset = 378092137521399, + ExpectedLength = ?DATA_CHUNK_SIZE * 3, + Opts = #{}, + {ok, Data} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => Offset, + <<"length">> => ExpectedLength + }, + Opts + ), + ?assertEqual( + <<"4Cb_N0z0tMDwCiWrUbuzktfn-H6NLHT1btXGDo3CByI">>, + hb_util:encode(crypto:hash(sha256, Data)) + ), + ok. + + +%% @doc Query a chunk range that starts and ends in the middle of a chunk. +get_mid_chunk_post_split_test() -> + %% https://arweave.net/tx/QL7_EnmrFtx-0wVgPr2IwaGWQT8vmPcF3R20CKMO3D4/offset + %% + Offset = 378092137521399 + 200_000, + ExpectedLength = ?DATA_CHUNK_SIZE + 300_000, + Opts = #{}, + {ok, Data} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => Offset, + <<"length">> => ExpectedLength + }, + Opts + ), + ?assertEqual( + <<"xkEZpGqDiCVuVZfGVyscmfYNZqYmgBLjOrMD2P_SfWs">>, + hb_util:encode(crypto:hash(sha256, Data)) + ), + ok. + +get_partial_chunk_pre_split_test() -> + %% https://arweave.net/tx/v4ophPvV-cNp5gkpkjMuUZ-lf-fBfm1Wk-pB4vJb00E/offset + %% + Offset = 30575701172109, + ExpectedLength = 1000, + Opts = #{}, + {ok, Data} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => Offset, + <<"length">> => ExpectedLength + }, + Opts + ), + ?assertEqual( + <<"yU5tZyDCTZ4MFcT6lng74tvx1oIbPkpCw1VAJsSqeuo">>, + hb_util:encode(crypto:hash(sha256, Data)) + ), + ok. + +get_full_chunk_pre_split_test() -> + %% https://arweave.net/tx/v4ophPvV-cNp5gkpkjMuUZ-lf-fBfm1Wk-pB4vJb00E/offset + %% + Offset = 30575701172109, + ExpectedLength = ?DATA_CHUNK_SIZE, + Opts = #{}, + {ok, Data} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => Offset, + <<"length">> => ExpectedLength + }, + Opts + ), + ?assertEqual( + <<"nVCvjEq9T5nxIR6jvglNbX1_CYCg0WifxfQoXhS4gik">>, + hb_util:encode(crypto:hash(sha256, Data)) + ), + ok. + +get_multi_chunk_pre_split_test() -> + %% https://arweave.net/tx/v4ophPvV-cNp5gkpkjMuUZ-lf-fBfm1Wk-pB4vJb00E/offset + %% + Offset = 30575701172109, + ExpectedLength = ?DATA_CHUNK_SIZE * 3, + Opts = #{}, + {ok, Data} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => Offset, + <<"length">> => ExpectedLength + }, + Opts + ), + ?assertEqual( + <<"DfS3jtLXqG3zO_IFA3P-r55SUBoeJmeIh4Eim2Rldeo">>, + hb_util:encode(crypto:hash(sha256, Data)) + ), + ok. + +get_mid_chunk_pre_split_test() -> + %% https://arweave.net/tx/v4ophPvV-cNp5gkpkjMuUZ-lf-fBfm1Wk-pB4vJb00E/offset + %% + Offset = 30575701172109 + 200_000, + ExpectedLength = ?DATA_CHUNK_SIZE + 300_000, + Opts = #{}, + {ok, Data} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => Offset, + <<"length">> => ExpectedLength + }, + Opts + ), + ?assertEqual( + <<"mgSfqsNapn_BXpbnIHtdeu3rQyvrjBaS0c7rEbUbtBU">>, + hb_util:encode(crypto:hash(sha256, Data)) + ), + ok. + +get_pre_split_small_chunks_test() -> + TXID = <<"4FnBmvgWmqXWEEprjVqBsV5aRpAgF6_yJX_GTGsSZjY">>, + Opts = setup_arweave_index_opts([TXID]), + assert_chunk_range( + <<"tx@1.0">>, + TXID, + 11_741_031_646_397 - 810774, + 810774, + <<"LJbiKv5gT2Y5XKFFPF6WqYAdOtaZAvHmtCkfCTbP43g">>, + Opts + ). + +get_post_split_small_chunks_test() -> + TXID = <<"YR9m4c3CrlljCRYEWBLeoKekbAyYZRMo2Kpz61IeNp8">>, + Opts = setup_arweave_index_opts([TXID]), + assert_chunk_range( + <<"tx@1.0">>, + TXID, + 146_563_435_390_439 - 541937, + 541937, + <<"cR2HRQRfZP_MiC1egrdc8y8j4SAF9-ppvaIaXDq5i7s">>, + Opts + ). + +get_pre_split_gap_test() -> + TXID = <<"VexuG68KCNpw21fGZw1ycRCYBtQMHhl274zGDBh3kQE">>, + Opts = setup_arweave_index_opts([TXID]), + assert_chunk_range( + <<"tx@1.0">>, + TXID, + 13308109889261 - 8789723, + 8789723, + <<"X6sbQdUyKTQ8LGzmleWU_jxO8Oda7S_bshDDKP_Mnqs">>, + Opts + ). + +get_pre_split_small_tx_test() -> + TXID = <<"K4C4dLZ7V4ffYJcR9JtVQwIXCTLD1mMCUaPbHuUdFgw">>, + Opts = setup_arweave_index_opts([TXID]), + assert_chunk_range( + <<"tx@1.0">>, + TXID, + 12778619748052 - 1444, + 1444, + <<"o7gJm-FgmWcIvbDiFxDaL56WkJIWQCwsN95Z8zNjEO8">>, + Opts + ). + +%% @doc Checks an item that begins in the middle of a chunk - without +%% special handling get_chunk_range() used to leave off the last few bytes +get_ed25519_item_test() -> + TXID = <<"jTFA8XDI_rqmUB6-hhoJF4Yi7p6ZpS_0AByFLU1OPrU">>, + DataItemID = <<"1rTy7gQuK9lJydlKqCEhtGLp2WWG-GOrVo5JdiCmaxs">>, + Opts = setup_arweave_index_opts([TXID]), + assert_chunk_range( + <<"ans104@1.0">>, + DataItemID, + 160399272861859, + 499025, + <<"PQ5sHoQYSdi1unjHjsfNS_ZXdMvmznEvIkBTvToqVbU">>, + Opts + ). + +%% @doc this test fails if the chunks are queried with +%% the `x-bucket-based-offset' header set. +bucket_based_offset_fail_test() -> + TXID = <<"T2pluNnaavL7-S2GkO_m3pASLUqMH_XQ9IiIhZKfySs">>, + DataItemID = <<"z-oKJfhMq5qoVFrljEfiBKgumaJmCWVxNJaavR5aPE8">>, + Opts = setup_arweave_index_opts([TXID]), + assert_chunk_range( + <<"ans104@1.0">>, + DataItemID, + 376836461101675, + 116247, + <<"4BN8AQEQLpTjresTntyrjJ94eFS2TaMM21MnuHGXtJc">>, + Opts + ). + +%% @doc this dataitem needs the 'x-bucket-based-offset' header set OR +%% special handling. +bucket_based_offset_pass_test() -> + DataItemID = <<"cTI07T1OrF0KZEqPmZji1VTdbeKJG7kMAVlLu7KQvyw">>, + Opts = setup_arweave_index_opts([]), + assert_chunk_range( + <<"ans104@1.0">>, + DataItemID, + 384600234780716, + 856885, + <<"EVLmVPkpWZjcDtw_zX2r18O7GC85P8VmuaKNy-sDRrw">>, + Opts + ). + +reassemble_bundle1_test() -> + assert_bundle_tx(<<"c1-FkhQd-Ul-VpIMR5Vs77lK__BlzHzena2zgNh_hME">>). + +reassemble_bundle2_test() -> + assert_bundle_tx(<<"OVjj52NvyIys7u84Rv1uqRG2vswlF95QDVPSmsmlwLk">>). + +%% @doc This asserts that a bundle is correctly represented in the weave. +%% It queries the L1 TX chunk range, reads the chunks, and then +%% reassembles the bundle and nested items. This is also useful tool +%% debugging tool to check that a bundle is present in the weave. +assert_bundle_tx(TXID) -> + Opts = #{}, + {ok, #{ <<"body">> := OffsetBody }} = + hb_http:request( + #{ + <<"path">> => <<"/arweave/tx/", TXID/binary, "/offset">>, + <<"method">> => <<"GET">> + }, + Opts + ), + OffsetMsg = hb_json:decode(OffsetBody), + EndOffset = hb_util:int(maps:get(<<"offset">>, OffsetMsg)), + Size = hb_util:int(maps:get(<<"size">>, OffsetMsg)), + StartOffset = EndOffset - Size, + ?event(debug_test, {offset_info, + {tx, TXID}, {start_offset, StartOffset}, {size, Size}}), + assert_bundle_items(TXID, StartOffset, Size, Opts). + +%% @doc Download, decode, and verify all items in a bundle TX. Fetches the +%% chunk range and TX header from the arweave@2.9 device, parses the bundle +%% header, then verifies and logs each L1 item. Recurses into nested bundles. +assert_bundle_items(TXID, StartOffset, Size, Opts) -> + {ok, Data} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => StartOffset + 1, + <<"length">> => Size + }, + Opts + ), + ?event(debug_test, {chunk_data_size, byte_size(Data)}), + {ok, TXHeader} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"tx">>, + <<"tx">> => TXID, + <<"exclude-data">> => true + }, + Opts + ), + ?event(debug_test, {l1_tx_header, TXHeader}), + {ItemsBin, BundleHeader} = ar_bundles:decode_bundle_header(Data), + lists:foldl( + fun({ID, ItemSize}, Offset) -> + ItemBin = binary:part(ItemsBin, Offset, ItemSize), + Item = ar_bundles:deserialize(ItemBin), + ?assert(ar_bundles:verify_item(Item)), + ?event(debug_test, {l2_bundle, + {id, {explicit, hb_util:encode(ID)}}, + {size, ItemSize}, + {tags, Item#tx.tags}, + {data_size, Item#tx.data_size}, + {format, Item#tx.format}, + {signature_type, Item#tx.signature_type} + }), + case dev_arweave_common:type(Item) of + list -> print_nested_items(Item#tx.data); + _ -> ok + end, + Offset + ItemSize + end, + 0, + BundleHeader + ), + ok. + +print_nested_items(DataMap) when is_map(DataMap) -> + maps:foreach( + fun(Key, Child) -> + ?assert(ar_bundles:verify_item(Child)), + ?event(debug_test, {l3_nested_item, + {key, Key}, + {id, {explicit, hb_util:encode(ar_bundles:id(Child, unsigned))}}, + {tags, Child#tx.tags}, + {data_size, Child#tx.data_size}, + {format, Child#tx.format}, + {signature_type, Child#tx.signature_type} + }) + end, + DataMap + ); +print_nested_items(Items) when is_list(Items) -> + lists:foreach( + fun(Child) -> + ?assert(ar_bundles:verify_item(Child)), + ?event(debug_test, {l3_nested_item, + {id, {explicit, hb_util:encode(ar_bundles:id(Child, unsigned))}}, + {tags, Child#tx.tags}, + {data_size, Child#tx.data_size}, + {format, Child#tx.format}, + {signature_type, Child#tx.signature_type} + }) + end, + Items + ). + +% large_tx_test() -> +% assert_chunk_range( +% <<"GX2bvdo736wJPR1GmIkyW9GRk3JdXQ_aAd1ozX1d450">>, +% 378161418083672, +% 42040418, +% <<"wmDVKM6nYRvqre2DdxmX_mhJ6u8unwmTD4YdmzERcZs">> +% ). + +assert_chunk_range(Type, ID, StartOffset, ExpectedLength, ExpectedHash, Opts) -> + T1 = erlang:monotonic_time(millisecond), + {ok, Data} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => StartOffset+1, + <<"length">> => ExpectedLength + }, + Opts + ), + T2 = erlang:monotonic_time(millisecond), + ?event(debug_performance, {chunk_range_resolve, + {elapsed_ms, T2 - T1}, + {id, {explicit, ID}}, + {offset, StartOffset + 1}, + {length, ExpectedLength} + }), + % {ok, RawDataMsg} = hb_ao:resolve( + % #{ <<"device">> => <<"arweave@2.9">> }, + % #{ + % <<"path">> => <<"raw">>, + % <<"raw">> => ID + % }, + % Opts + % ), + % RawData = hb_ao:get(<<"data">>, RawDataMsg, Opts), + % ?event(debug_test, {chunk_vs_raw_comparison, + % {id, {explicit, ID}}, + % {type, Type}, + % {start_offset, StartOffset}, + % {expected_length, ExpectedLength}, + % {chunk_size, byte_size(Data)}, + % {raw_size, byte_size(RawData)}, + % {match, Data =:= RawData}, + % {hash, {explicit, hb_util:encode(crypto:hash(sha256, Data))}} + % }), + case Type of + <<"ans104@1.0">> -> + Item = ar_bundles:deserialize(Data), + ?event(debug_test, {item, Item}), + ?assert(ar_bundles:verify_item(Item)); + % ?assertEqual(RawData, Item#tx.data); + <<"tx@1.0">> -> + {ok, TXHeader} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"tx">>, + <<"tx">> => ID, + <<"exclude-data">> => true + }, + Opts + ), + ?assertEqual(false, maps:is_key(<<"data">>, TXHeader)), + ?event(debug_test, {tx_header, TXHeader}), + ?assert(hb_message:verify(TXHeader, all, Opts)), + TXWithData = TXHeader#{ <<"data">> => Data }, + ?event(debug_test, {tx_with_data, TXWithData}), + ?assert(hb_message:verify(TXWithData, all, Opts)) + % ?assertEqual(RawData, Data) + end, + ?event(debug_test, {data, {explicit, hb_util:encode(crypto:hash(sha256, Data))}}), + ?assertEqual(ExpectedHash, hb_util:encode(crypto:hash(sha256, Data))), + ok. diff --git a/src/dev_arweave_common.erl b/src/dev_arweave_common.erl index 6f18ca81c..9281e26c7 100644 --- a/src/dev_arweave_common.erl +++ b/src/dev_arweave_common.erl @@ -4,6 +4,7 @@ -export([is_signed/1, type/1, tagfind/3, find_key/3]). -export([reset_ids/1, generate_id/2, normalize/1, serialize_data/1]). -export([convert_bundle_list_to_map/1, convert_bundle_map_to_list/1]). +-export([serialize_sig_type/1, deserialize_sig_type/1]). -export([log_conversion/2]). -include("include/hb.hrl"). -include_lib("eunit/include/eunit.hrl"). @@ -12,7 +13,30 @@ is_signed(TX) -> TX#tx.signature =/= ?DEFAULT_SIG. +type(Item = #tx{ format = ans104 }) -> + % Always trust tags for ans104 items. + type_from_tags(Item); +type(Item = #tx{ data = Data }) + when not is_binary(Data) orelse Data =:= ?DEFAULT_DATA -> + % Trust tags for L1 TX without binary data + type_from_tags(Item); type(Item) -> + % If an L1 TX has bundle tags but does not have a valid bundle header, + % treat it as a binary. We have to do this since it may still be a valid + % L1 TX even if the tags are sneaky. + Result = case type_from_tags(Item) of + binary -> + binary; + BundleType when is_binary(Item#tx.data) -> + case ar_bundles:decode_bundle_header(Item#tx.data) of + invalid_bundle_header -> + binary; + {_Count, _Header} -> + BundleType + end + end, + Result. +type_from_tags(Item) -> Format = tagfind(<<"bundle-format">>, Item#tx.tags, <<>>), Version = tagfind(<<"bundle-version">>, Item#tx.tags, <<>>), MapTXID = tagfind(<<"bundle-map">>, Item#tx.tags, <<>>), @@ -179,7 +203,8 @@ maybe_add_bundle_tags(BundleType, TX) -> TX#tx{tags = FilteredBundleTags ++ TX#tx.tags }. %% @doc Reset the data size of a data item. Assumes that the data is already normalized. -normalize_data_size(Item = #tx{data = Bin}) when is_binary(Bin) -> +normalize_data_size(Item = #tx{data = Bin}) + when is_binary(Bin) andalso Bin =/= ?DEFAULT_DATA -> Item#tx{data_size = byte_size(Bin)}; normalize_data_size(Item) -> Item. @@ -189,12 +214,145 @@ reset_owner_address(TX) -> TX#tx{owner_address = ar_tx:get_owner_address(TX)}. +normalize_data_root(Item = #tx{data = Bin, format = 1}) + when is_binary(Bin) andalso Bin =/= ?DEFAULT_DATA -> + Item#tx{data_root = ar_tx:data_root(legacy, Bin)}; normalize_data_root(Item = #tx{data = Bin, format = 2}) when is_binary(Bin) andalso Bin =/= ?DEFAULT_DATA -> - Item#tx{data_root = ar_tx:data_root(Bin)}; + Item#tx{data_root = ar_tx:data_root(arweavejs, Bin)}; normalize_data_root(Item) -> Item. +serialize_sig_type({rsa, 65537}) -> ?RSA_SIGN_TYPE; +serialize_sig_type({ecdsa, secp256k1}) -> ?ECDSA_SIGN_TYPE; +serialize_sig_type(?EDDSA_KEY_TYPE) -> ?EDDSA_SIGN_TYPE; +serialize_sig_type(?SOLANA_KEY_TYPE) -> ?SOLANA_SIGN_TYPE; +serialize_sig_type(?ETHEREUM_KEY_TYPE) -> ?ETHEREUM_SIGN_TYPE; +serialize_sig_type(?TYPED_ETHEREUM_KEY_TYPE) -> ?TYPED_ETHEREUM_SIGN_TYPE; +serialize_sig_type(Type) -> + ?event(error, {signature_type, {type, Type}}), + throw({invalid_signature_type, Type}). + +deserialize_sig_type(?RSA_SIGN_TYPE) -> {rsa, 65537}; +deserialize_sig_type(?ECDSA_SIGN_TYPE) -> {ecdsa, secp256k1}; +deserialize_sig_type(?EDDSA_SIGN_TYPE) -> ?EDDSA_KEY_TYPE; +deserialize_sig_type(?SOLANA_SIGN_TYPE) -> ?SOLANA_KEY_TYPE; +deserialize_sig_type(?ETHEREUM_SIGN_TYPE) -> ?ETHEREUM_KEY_TYPE; +deserialize_sig_type(?TYPED_ETHEREUM_SIGN_TYPE) -> ?TYPED_ETHEREUM_KEY_TYPE; +deserialize_sig_type(<<"unsigned-sha256">>) -> {rsa, 65537}; +deserialize_sig_type(Type) -> + ?event(error, {signature_type, {type, Type}}), + throw({invalid_signature_type, Type}). + %% @doc Turn off debug_print_verify when logging within the to/from functions %% to avoid infinite recursion. log_conversion(Topic, X) -> - ?event(Topic, X, #{debug_print_verify => false}). \ No newline at end of file + ?event(Topic, X, #{debug_print_verify => false}). +%%%=================================================================== +%%% Tests. +%%%=================================================================== + +tagfind_test() -> + Default = <<"default">>, + ?assertEqual( + <<"v1">>, + tagfind(<<"Foo">>, [{<<"fOo">>, <<"v1">>}], Default) + ), + ?assertEqual( + Default, + tagfind(<<"Missing">>, [{<<"foo">>, <<"v">>}], Default) + ). + + +type_test() -> + % Basic type from tags + assert_type(binary, []), + assert_type(binary, [{<<"tag">>, <<"value">>}]), + assert_type(list, [ + {<<"bundle-format">>, <<"binary">>}, + {<<"tag">>, <<"value">>}, + {<<"bundle-version">>, <<"2.0.0">>}]), + assert_type(map, [ + {<<"bundle-format">>, <<"binary">>}, + {<<"tag">>, <<"value">>}, + {<<"bundle-version">>, <<"2.0.0">>}, + {<<"bundle-map">>, <<"JmtD0fwFqJTK4P_XexVqBQdnDc0-C7FFIOge6GEOJE8">>}]), + % L1 TX with bundle tags, but data is not a valid bundle. + ?assertEqual(binary, + type(#tx{ + format = 1, + tags = [ + {<<"bundle-format">>, <<"binary">>}, + {<<"bundle-version">>, <<"2.0.0">>}], + data = <<"not a bundle">> + })), + ?assertEqual(binary, + type(#tx{ + format = 2, + tags = [ + {<<"bundle-format">>, <<"binary">>}, + {<<"bundle-version">>, <<"2.0.0">>}], + data = <<"not a bundle">> + })), + ?assertEqual(binary, + type(#tx{ + format = 1, + tags = [ + {<<"bundle-format">>, <<"binary">>}, + {<<"bundle-version">>, <<"2.0.0">>}], + data = <<1:256/little, <<"not a bundle">>/binary>> + })), + ?assertEqual(binary, + type(#tx{ + format = 2, + tags = [ + {<<"bundle-format">>, <<"binary">>}, + {<<"bundle-version">>, <<"2.0.0">>}], + data = <<1:256/little, <<"not a bundle">>/binary>> + })), + % L1 TX with bundle tags, and non-binary data + ?assertEqual(list, + type(#tx{ + format = 1, + tags = [ + {<<"bundle-format">>, <<"binary">>}, + {<<"bundle-version">>, <<"2.0.0">>}], + data = [] + })), + ?assertEqual(list, + type(#tx{ + format = 2, + tags = [ + {<<"bundle-format">>, <<"binary">>}, + {<<"bundle-version">>, <<"2.0.0">>}], + data = [] + })), + ?assertEqual(map, + type(#tx{ + format = 1, + tags = [ + {<<"bundle-format">>, <<"binary">>}, + {<<"bundle-version">>, <<"2.0.0">>}, + {<<"bundle-map">>, <<"JmtD0fwFqJTK4P_XexVqBQdnDc0-C7FFIOge6GEOJE8">>}], + data = #{ + <<"1">> => <<"value1">>, + <<"2">> => <<"value2">> + } + })), + ?assertEqual(map, + type(#tx{ + format = 2, + tags = [ + {<<"bundle-format">>, <<"binary">>}, + {<<"bundle-version">>, <<"2.0.0">>}, + {<<"bundle-map">>, <<"JmtD0fwFqJTK4P_XexVqBQdnDc0-C7FFIOge6GEOJE8">>}], + data = #{ + <<"1">> => <<"value1">>, + <<"2">> => <<"value2">> + } + })), + ok. + +assert_type(ExpectedType, Tags) -> + ?assertEqual(ExpectedType, type(#tx{format = 1, tags = Tags})), + ?assertEqual(ExpectedType, type(#tx{format = 2, tags = Tags})), + ?assertEqual(ExpectedType, type(#tx{format = ans104, tags = Tags})). \ No newline at end of file diff --git a/src/dev_arweave_offset.erl b/src/dev_arweave_offset.erl new file mode 100644 index 000000000..ec21ac443 --- /dev/null +++ b/src/dev_arweave_offset.erl @@ -0,0 +1,489 @@ +%%% @doc A module for the Arweave device that implements the default key +%%% resolution logic. The default key returns slices of bytes inside Arweave as +%%% message representations. +-module(dev_arweave_offset). +-export([get/4]). +-include("include/hb.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +%% @doc Resolve either a message at an Arweave offset, or a direct key from the +%% base message if the key is not an integer. +get(Key, Base, Request, Opts) -> + case parse(Key) of + {ok, StartOffset, Length} -> + load_item_at_offset(StartOffset, Length, Opts); + error -> + dev_message:get(Key, Base, Request, Opts) + end. + +%% @doc Parse a path key as a global Arweave start offset. The supported syntax +%% is as follows: +%% ``` +%% Reference :: Offset-Length +%% Offset :: [Unit] +%% Length :: +%% Unit :: +%% b : The global Arweave offset in absolute bytes (default). +%% k[i][b] : The global Arweave offset in absolute kilobytes or kibibytes. +%% m[i][b] : The global Arweave offset in absolute megabytes or mebibytes. +%% g[i][b] : The global Arweave offset in absolute gigabytes or gibibytes. +%% t[i][b] : The global Arweave offset in absolute terabytes or tebibytes. +%% p[i][b] : The global Arweave offset in absolute petabytes or pebibytes. +%% e[i][b] : The global Arweave offset in absolute exabytes or exbibytes. +%% z[i][b] : The global Arweave offset in absolute zettabytes or zebibytes. +%% y[i][b] : The global Arweave offset in absolute yottabytes or yobibytes. +%% ``` +%% In the scheme above, the `i` modifier in units indicates that the unit is in +%% binary multiples of the base unit. For example, `kib` is 1024 bytes, `mib` is +%% 1024 * 1024 bytes, etc. By contrast, the `kb` unit is decimal-oriented: `kb` +%% 1000 bytes, `mb` is 1000 * 1000 bytes, etc. To aid minimization of the bytes +%% required for the references, the `b` is always implied and need not be +%% specified. +parse(Key) -> + try + {OffsetBin, Length} = + case binary:split(Key, <<"-">>) of + [Start, LengthBin] -> {Start, hb_util:int(LengthBin)}; + [Start] -> {Start, undefined} + end, + {ok, unit(OffsetBin), Length} + catch + _Class:_Error:_StackTrace -> error + end. + +%% @doc Parses and applies a unit modifier to a base value, supporting both +%% the `kb` and `kib` unit formats. +unit(Binary) -> unit(0, Binary). +unit(Complete, <<>>) -> Complete; +unit(Base, <>) when Int >= $0 andalso Int =< $9 -> + unit(Base * 10 + (Int - $0), Rest); +unit(Base, <<"b">>) -> Base; +unit(Base, <<"ki", _/binary>>) when Base > 0 -> unit(Base * 1024, <<"b">>); +unit(Base, <<"mi", _/binary>>) when Base > 0 -> unit(Base * 1024, <<"ki">>); +unit(Base, <<"gi", _/binary>>) when Base > 0 -> unit(Base * 1024, <<"mi">>); +unit(Base, <<"ti", _/binary>>) when Base > 0 -> unit(Base * 1024, <<"gi">>); +unit(Base, <<"pi", _/binary>>) when Base > 0 -> unit(Base * 1024, <<"ti">>); +unit(Base, <<"ei", _/binary>>) when Base > 0 -> unit(Base * 1024, <<"pi">>); +unit(Base, <<"zi", _/binary>>) when Base > 0 -> unit(Base * 1024, <<"ei">>); +unit(Base, <<"yi", _/binary>>) when Base > 0 -> unit(Base * 1024, <<"zi">>); +unit(Base, <<"k", _/binary>>) when Base > 0 -> unit(Base * 1000, <<"b">>); +unit(Base, <<"m", _/binary>>) when Base > 0 -> unit(Base * 1000, <<"k">>); +unit(Base, <<"g", _/binary>>) when Base > 0 -> unit(Base * 1000, <<"m">>); +unit(Base, <<"t", _/binary>>) when Base > 0 -> unit(Base * 1000, <<"g">>); +unit(Base, <<"p", _/binary>>) when Base > 0 -> unit(Base * 1000, <<"t">>); +unit(Base, <<"e", _/binary>>) when Base > 0 -> unit(Base * 1000, <<"p">>); +unit(Base, <<"z", _/binary>>) when Base > 0 -> unit(Base * 1000, <<"e">>); +unit(Base, <<"y", _/binary>>) when Base > 0 -> unit(Base * 1000, <<"z">>). + +%% @doc Load an ANS-104 item whose header begins at the given global offset. +%% When a length is supplied it is treated as the exact ANS-104 data length, so +%% we can skip bundle index discovery and read only the remaining payload bytes. +load_item_at_offset(ExplicitOffset, Length, Opts) when is_integer(Length) -> + maybe + {ok, _ChunkJSON, FirstChunk} ?= chunk_from_offset(ExplicitOffset, Opts), + ?event( + arweave_offset_lookup, + {loaded_explicit_offset, + {explicit_offset, ExplicitOffset}, + {length, Length} + }, + Opts + ), + load_item_from_data_size(ExplicitOffset, Length, FirstChunk, Opts) + end; +load_item_at_offset(TargetOffset, undefined, Opts) -> + maybe + {ok, StartOffset, ItemSize, FirstChunk} ?= + message_from_offset(TargetOffset, Opts), + load_item_from_serialized_size(StartOffset, ItemSize, FirstChunk, Opts) + else + false -> {error, invalid_item_size}; + Error -> Error + end. + +%% @doc Load an item when the exact ANS-104 data length is already known. +load_item_from_data_size(StartOffset, DataSize, FirstChunk, Opts) -> + maybe + {ok, HeaderSize, HeaderTX} ?= deserialize_header(FirstChunk), + load_item_from_header(StartOffset, HeaderSize, HeaderTX, DataSize, Opts) + end. + +%% @doc Load an item when its serialized size is known from the containing +%% bundle index. +load_item_from_serialized_size(StartOffset, ItemSize, FirstChunk, Opts) -> + maybe + {ok, HeaderSize, HeaderTX} ?= deserialize_header(FirstChunk), + true ?= HeaderSize =< ItemSize, + load_item_from_header( + StartOffset, + HeaderSize, + HeaderTX, + ItemSize - HeaderSize, + Opts + ) + else + false -> {error, invalid_item_size}; + Error -> Error + end. + +%% @doc Complete an item load once the header has been decoded, using any data +%% bytes that were already present after the header before reading the tail. +load_item_from_header(StartOffset, HeaderSize, HeaderTX, DataSize, Opts) -> + {HeaderData, RemainingLength} = + split_header_data(HeaderTX#tx.data, DataSize), + ?event( + arweave_offset_lookup, + {calculating_message_from_offset, + {start_offset, StartOffset}, + {header_size, HeaderSize}, + {data_size, DataSize}, + {header_data, HeaderData}, + {remaining_length, RemainingLength} + }, + Opts + ), + maybe + {ok, RemainingData} ?= + read_remaining_data( + StartOffset, + HeaderSize, + byte_size(HeaderData), + RemainingLength, + Opts + ), + FullTX = + HeaderTX#tx{ + data = << HeaderData/binary, RemainingData/binary >>, + data_size = DataSize + }, + {ok, + hb_message:convert( + FullTX, + <<"structured@1.0">>, + <<"ans104@1.0">>, + Opts + ) + } + end. + +%% @doc Read the chunk containing the given offset and trim it to begin at the +%% first byte of the requested item. +chunk_from_offset(StartOffset, Opts) -> + case dev_arweave:get_chunk(StartOffset + 1, Opts) of + {ok, ChunkJSON} -> + ChunkSize = hb_util:int(maps:get(<<"chunk_size">>, ChunkJSON)), + AbsEnd = hb_util:int(maps:get(<<"absolute_end_offset">>, ChunkJSON)), + Chunk = hb_util:decode(maps:get(<<"chunk">>, ChunkJSON)), + ChunkStart = AbsEnd - ChunkSize + 1, + Skip = (StartOffset + 1) - ChunkStart, + {ok, ChunkJSON, binary:part(Chunk, Skip, byte_size(Chunk) - Skip)}; + Error -> + Error + end. + +%% @doc Safe wraper for ANS-104 header deserialization. +deserialize_header(Binary) -> + try ar_bundles:deserialize_header(Binary) + catch _:_ -> {error, <<"Invalid message header">>} + end. + +%% @doc Split the bytes already present after a decoded header from those that +%% still need to be read from Arweave. +split_header_data(HeaderData, DataSize) -> + PrefixSize = min(byte_size(HeaderData), DataSize), + { + binary:part(HeaderData, 0, PrefixSize), + DataSize - PrefixSize + }. + +%% @doc Read any bytes of the data segment that were not present in the first +%% header chunk. +read_remaining_data(_StartOffset, _HeaderSize, _PrefixSize, 0, _Opts) -> + {ok, <<>>}; +read_remaining_data(StartOffset, HeaderSize, PrefixSize, Length, Opts) -> + hb_store_arweave:read_chunks(StartOffset + HeaderSize + PrefixSize, Length, Opts). + +%% @doc Locate the deepest bundled item that contains the given global offset. +message_from_offset(TargetOffset, Opts) -> + maybe + {ok, ChunkJSON, FirstChunk} ?= chunk_from_offset(TargetOffset, Opts), + message_from_offset( + TargetOffset, + bundle_start_offset(ChunkJSON), + TargetOffset, + FirstChunk, + Opts + ) + end. + +%% @doc Recover the global start offset of the containing bundle from the end +%% offset of the chunk in global space and its end offset inside the bundle. +bundle_start_offset(ChunkJSON) -> + AbsEnd = hb_util:int(maps:get(<<"absolute_end_offset">>, ChunkJSON)), + ChunkEndInBundle = + ar_merkle:extract_note( + hb_util:decode(maps:get(<<"data_path">>, ChunkJSON)) + ), + AbsEnd - ChunkEndInBundle. + +message_from_offset(TargetOffset, BundleStartOffset, KnownOffset, KnownChunk, Opts) -> + maybe + {ok, HeaderSize, BundleIndex} ?= + dev_arweave:bundle_header( + BundleStartOffset, + Opts + ), + {ok, ItemStartOffset, ItemSize} ?= + find_bundle_member( + TargetOffset, + BundleStartOffset + HeaderSize, + BundleIndex, + Opts + ), + maybe_nested_item( + TargetOffset, + ItemStartOffset, + ItemSize, + KnownOffset, + KnownChunk, + Opts + ) + end. + +%% @doc If the containing item is itself a bundle and the offset lies in its +%% data payload, recurse into its bundle header. Otherwise return the item. +maybe_nested_item( + TargetOffset, + ItemStartOffset, + ItemSize, + KnownOffset, + KnownChunk, + Opts + ) -> + maybe + {ok, FirstChunk} ?= + item_chunk(ItemStartOffset, KnownOffset, KnownChunk, Opts), + maybe_nested_item( + TargetOffset, + ItemStartOffset, + ItemSize, + FirstChunk, + KnownOffset, + KnownChunk, + Opts + ) + end. + +maybe_nested_item( + TargetOffset, + ItemStartOffset, + ItemSize, + FirstChunk, + KnownOffset, + KnownChunk, + Opts + ) -> + maybe + {ok, HeaderSize, HeaderTX} ?= deserialize_header(FirstChunk), + true ?= TargetOffset >= ItemStartOffset + HeaderSize, + true ?= dev_arweave_common:type(HeaderTX) =/= binary, + message_from_offset( + TargetOffset, + ItemStartOffset + HeaderSize, + KnownOffset, + KnownChunk, + Opts + ) + else + false -> {ok, ItemStartOffset, ItemSize, FirstChunk}; + {error, not_found} -> {ok, ItemStartOffset, ItemSize, FirstChunk}; + Error -> Error + end. + +%% @doc Reuse the first chunk we already have when the located item starts at the +%% same offset as the original request, otherwise fetch the item's first chunk. +item_chunk(ItemStartOffset, ItemStartOffset, FirstChunk, _Opts) -> + {ok, FirstChunk}; +item_chunk(ItemStartOffset, _KnownOffset, _KnownChunk, Opts) -> + case chunk_from_offset(ItemStartOffset, Opts) of + {ok, _ChunkJSON, FirstChunk} -> {ok, FirstChunk}; + Error -> Error + end. + +%% @doc Locate the bundle member containing the given offset. +find_bundle_member(TargetOffset, ItemStartOffset, _BundleIndex, Opts) + when TargetOffset < ItemStartOffset -> + ?event( + arweave_offset_lookup, + {bundle_offset_search_exceeded_bounds, + {target_offset, TargetOffset}, + {item_start_offset, ItemStartOffset} + }, + Opts + ), + {error, not_found}; +find_bundle_member(TargetOffset, ItemStartOffset, [{ID, Size} | _], Opts) + when TargetOffset < ItemStartOffset + Size -> + % The target offset is within the current bundle member. + ?event( + arweave_offset_lookup, + {resolved_bundle_member, {id, ID}, {size, Size}}, + Opts + ), + {ok, ItemStartOffset, Size}; +find_bundle_member(TargetOffset, ItemStartOffset, [{_ID, Size} | Rest], Opts) -> + find_bundle_member(TargetOffset, ItemStartOffset + Size, Rest, Opts); +find_bundle_member(_TargetOffset, _ItemStartOffset, [], _Opts) -> + {error, not_found}. + +%%% Tests + +parse_offset_test() -> + ?assertEqual({ok, 160399272861859, undefined}, parse(<<"160399272861859">>)), + ?assertEqual({ok, 160399272861859, 498852}, parse(<<"160399272861859-498852">>)), + ?assertEqual({ok, 160399273000000, undefined}, parse(<<"160399273000000">>)), + ?assertEqual({ok, 160399273000000, 498852}, parse(<<"160399273000000-498852">>)), + ?assertEqual({ok, 160399273000000, undefined}, parse(<<"160399273m">>)), + ?assertEqual({ok, 160399273000000, 498852}, parse(<<"160399273m-498852">>)), + ?assertEqual( + {ok, 1337 * 1024 * 1024 * 1024 * 1024, undefined}, + parse(<<"1337tib">>) + ), + ok. + +offset_item_cases_test() -> + Opts = #{}, + % A simple message. + assert_offset_item( + <<"160399272861859">>, + 498852, + #{ <<"content-type">> => <<"image/png">> }, + Opts + ), + % A reference with a given length. + assert_offset_item( + <<"160399272861859-498852">>, + 498852, + #{ <<"content-type">> => <<"image/png">> }, + Opts + ), + % A reference to a byte in the middle of the test message. + assert_offset_item( + <<"160399273000000">>, + 498852, + #{ <<"content-type">> => <<"image/png">> }, + Opts + ), + % A megabyte reference to the item, occurring in the middle of the item. + assert_offset_item( + <<"160399273m">>, + 498852, + #{ <<"content-type">> => <<"image/png">> }, + Opts + ), + assert_offset_item( + <<"384600234780716">>, + 856691, + #{ <<"content-type">> => <<"image/jpeg">> }, + Opts + ), + ok. + +offset_nested_item_test() -> + Opts = #{}, + TXID = <<"bndIwac23-s0K11TLC1N7z472sLGAkiOdhds87ZywoE">>, + Node = hb_http_server:start_node(), + {ok, Expected} = + hb_http:get( + Node, + <<"/~arweave@2.9/tx=", TXID/binary, "/1/2">>, + Opts + ), + {ItemStartOffset, _ItemSize} = + bundle_message_offset_from_tx(TXID, [1, 2], Opts), + assert_offset_matches(hb_util:bin(ItemStartOffset + 1), Expected, Opts). + +assert_offset_item(Path, DataSize, Tags, Opts) -> + {ok, Item} = hb_ao:resolve(#{ <<"device">> => <<"arweave@2.9">> }, Path, Opts), + TX = hb_message:convert(Item, <<"ans104@1.0">>, <<"structured@1.0">>, Opts), + ?assert(hb_message:verify(Item, all, Opts)), + ?assertEqual(DataSize, TX#tx.data_size), + ?assertEqual(DataSize, byte_size(TX#tx.data)), + maps:foreach( + fun(Key, Value) -> + ?assertEqual({ok, Value}, hb_maps:find(Key, Item, Opts)) + end, + Tags + ), + ok. + +assert_offset_matches(Path, Expected, Opts) -> + {ok, Item} = hb_ao:resolve(#{ <<"device">> => <<"arweave@2.9">> }, Path, Opts), + ExpectedTX = + hb_message:convert( + Expected, + <<"ans104@1.0">>, + <<"structured@1.0">>, + Opts + ), + TX = hb_message:convert(Item, <<"ans104@1.0">>, <<"structured@1.0">>, Opts), + ?assert(hb_message:verify(Item, all, Opts)), + ?assertEqual( + hb_message:id(Expected, signed, Opts), + hb_message:id(Item, signed, Opts) + ), + ?assertEqual(ExpectedTX#tx.data_size, TX#tx.data_size), + ok. + +bundle_message_offset_from_tx(TXID, Path, Opts) -> + {ok, #{ <<"body">> := OffsetBody }} = + hb_http:request( + #{ + <<"path">> => <<"/arweave/tx/", TXID/binary, "/offset">>, + <<"method">> => <<"GET">> + }, + Opts + ), + OffsetMsg = hb_json:decode(OffsetBody), + EndOffset = hb_util:int(maps:get(<<"offset">>, OffsetMsg)), + Size = hb_util:int(maps:get(<<"size">>, OffsetMsg)), + bundled_index_offset(EndOffset - Size, Path, Opts). + +bundled_index_offset(BundleStartOffset, [Index], Opts) -> + {ok, HeaderSize, BundleIndex} = + dev_arweave:bundle_header( + BundleStartOffset, + Opts + ), + nth_bundle_item(Index, BundleStartOffset + HeaderSize, BundleIndex); +bundled_index_offset(BundleStartOffset, [Index | Rest], Opts) -> + {ItemStartOffset, _ItemSize} = + bundled_index_offset(BundleStartOffset, [Index], Opts), + {ok, _ChunkJSON, FirstChunk} = chunk_from_offset(ItemStartOffset, Opts), + {ok, HeaderSize, _HeaderTX} = deserialize_header(FirstChunk), + bundled_index_offset(ItemStartOffset + HeaderSize, Rest, Opts). + +nth_bundle_item(1, ItemStartOffset, [{_ID, Size} | _]) -> + {ItemStartOffset, Size}; +nth_bundle_item(Index, ItemStartOffset, [{_ID, Size} | Rest]) when Index > 1 -> + nth_bundle_item(Index - 1, ItemStartOffset + Size, Rest). + +offset_as_name_resolver_lookup_test() -> + Opts = #{ + name_resolvers => [#{ <<"device">> => <<"arweave@2.9">> }], + on => + #{ + <<"request">> => [#{ <<"device">> => <<"name@1.0">> }] + } + }, + Node = hb_http_server:start_node(Opts), + {ok, Item} = + hb_http:get( + Node, + #{ + <<"path">> => <<"/">>, + <<"host">> => <<"152974576623958.localhost">> + }, + Opts + ), + ?assertEqual(<<"application/json">>, hb_ao:get(<<"content-type">>, Item, Opts)). diff --git a/src/dev_b32_name.erl b/src/dev_b32_name.erl new file mode 100644 index 000000000..e35c2200d --- /dev/null +++ b/src/dev_b32_name.erl @@ -0,0 +1,316 @@ +%%% @doc Allows Arweave message IDs to be used via their base32 encoding as +%%% subdomains on a HyperBEAM node. +-module(dev_b32_name). +-export([info/1]). +-include("include/hb.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +info(_Opts) -> + #{ + default => fun get/4, + excludes => [<<"keys">>, <<"set">>] + }. + +%% @doc Try to resolve 52char subdomain back to its original TX ID +get(Key, _, _HookMsg, _Opts) -> + ?event({resolve_52char, {key, Key}}), + case decode(Key) of + error -> + ?event({not_base32_id, {key, Key}}), + {error, not_found}; + ID -> + ?event({resolved_52char, {key, Key}, {id, ID}}), + {ok, ID} + end. + +%% @doc If the key is a 52-character binary, attempt to decode it as base32. +%% Else, return `error`. +decode(Key) when byte_size(Key) == 52 -> + try hb_util:human_id(base32:decode(Key)) catch _:_ -> error end; +decode(_Key) -> error. + +%% @doc Convert an ID into its base32 encoded string representation. +encode(ID) when ?IS_ID(ID) -> + hb_util:bin( + string:replace( + string:to_lower( + hb_util:list(base32:encode(hb_util:native_id(ID))) + ), + "=", + "", + all + ) + ). + +%%% Tests + +dev_b32_name_test_() -> + {inparallel, [ + {timeout, 30, fun test_invalid_arns_and_not_52char_host_resolution_gives_404/0}, + fun test_key_to_id/0, + {timeout, 30, fun test_empty_path_manifest/0}, + {timeout, 30, fun test_resolve_52char_subdomain_asset_if_txid_not_present/0}, + {timeout, 30, fun test_subdomain_matches_path_id_and_loads_asset/0}, + fun test_subdomain_matches_path_id/0, + fun test_subdomain_does_not_match_path_id/0, + {timeout, 30, fun test_manifest_subdomain_matches_path_id/0}, + {timeout, 30, fun test_manifest_subdomain_does_not_match_path_id/0} + ]}. + +test_invalid_arns_and_not_52char_host_resolution_gives_404() -> + Opts = (dev_name:test_arns_opts())#{ port => 0 }, + Node = hb_http_server:start_node(Opts), + ?assertMatch( + {error, #{<<"status">> := 404}}, + hb_http:get( + Node, + #{ + <<"path">> => <<"/">>, + <<"host">> => <<"non-existing-subdomain.localhost">> + }, + Opts + ) + ). + +%% @doc Unit test for 52 char subdomain to TX ID logic +test_key_to_id() -> + Subdomain = <<"4nuojs5tw6xtfjbq47dqk6ak7n6tqyr3uxgemkq5z5vmunhxphya">>, + ?assertEqual( + <<"42jky7O3rzKkMOfHBXgK-304YjulzEYqHc9qyjT3efA">>, + decode(Subdomain) + ). + +%% @doc Resolving a 52 char subdomain without a TXID in the path should work. +test_empty_path_manifest() -> + TestPath = <<"/">>, + Opts = manifest_opts(), + %% Test to load manifest with only subdomain + Subdomain = <<"4nuojs5tw6xtfjbq47dqk6ak7n6tqyr3uxgemkq5z5vmunhxphya">>, + Node = hb_http_server:start_node(Opts), + ?assertMatch( + {ok, + #{ + <<"status">> := 200, + <<"commitments">> := + #{<<"Tqh6oIS2CLUaDY11YUENlvvHmDim1q16pMyXAeSKsFM">> := _} + } + }, + hb_http:get( + Node, + #{ + <<"path">> => TestPath, + <<"host">> => <> + }, + Opts + ) + ). + +%% @doc Loading assets from a manifest where only a 52 char subdomain is +%% provided should work. +test_resolve_52char_subdomain_asset_if_txid_not_present() -> + TestPath = <<"/assets/ArticleBlock-Dtwjc54T.js">>, + Opts = manifest_opts(), + %% Test to load asset with only subdomain (no TX ID present). + Subdomain = <<"4nuojs5tw6xtfjbq47dqk6ak7n6tqyr3uxgemkq5z5vmunhxphya">>, + Node = hb_http_server:start_node(Opts), + ?assertMatch( + {ok, + #{ + <<"status">> := 200, + <<"commitments">> := + #{<<"oLnQY-EgiYRg9XyO7yZ_mC0Ehy7TFR3UiDhFvxcohC4">> := _} + } + }, + hb_http:get( + Node, + #{ + <<"path">> => TestPath, + <<"host">> => <> + }, + Opts + ) + ). + +%% @doc Loading assets from a manifest where a 52 char subdomain and TX ID +%% is provided should work. +test_subdomain_matches_path_id_and_loads_asset() -> + TestPath = <<"/42jky7O3rzKkMOfHBXgK-304YjulzEYqHc9qyjT3efA/assets/ArticleBlock-Dtwjc54T.js">>, + Opts = manifest_opts(), + %% Test to load asset with only subdomain (no TX ID present). + Subdomain = <<"4nuojs5tw6xtfjbq47dqk6ak7n6tqyr3uxgemkq5z5vmunhxphya">>, + Node = hb_http_server:start_node(Opts), + ?assertMatch( + {ok, + #{ + <<"status">> := 200, + <<"commitments">> := + #{<<"oLnQY-EgiYRg9XyO7yZ_mC0Ehy7TFR3UiDhFvxcohC4">> := _} + } + }, + hb_http:get( + Node, + #{ + <<"path">> => TestPath, + <<"host">> => <> + }, + Opts + ) + ). + +%% @doc Validate the behavior when a subdomain and primary path ID match. The +%% duplicated ID in the request message stream should be ignored. +test_subdomain_matches_path_id() -> + #{ id1 := ID1, opts := Opts } = test_opts(), + ?assertMatch( + {ok, 1}, + hb_http:get( + hb_http_server:start_node(Opts), + #{ + <<"path">> => <>, + <<"host">> => subdomain(ID1, Opts) + }, + Opts + ) + ). + +%% @doc Validate the behavior when a subdomain and primary path ID match. Both +%% IDs should be executed, the subdomain first then the path ID. +test_subdomain_does_not_match_path_id() -> + #{ id1 := ID1, id2 := ID2, opts := Opts } + = test_opts(), + ?assertMatch( + {error, not_found}, + hb_http:get( + hb_http_server:start_node(Opts), + #{ + <<"path">> => <>, + <<"host">> => subdomain(ID2, Opts) + }, + Opts + ) + ). + +%% @doc When both 52 char subdomain and TX ID are provided and equal, ignore +%% the TXID from the assets path. +test_manifest_subdomain_matches_path_id() -> + TestPath = <<"/42jky7O3rzKkMOfHBXgK-304YjulzEYqHc9qyjT3efA">>, + Opts = manifest_opts(), + Subdomain = <<"4nuojs5tw6xtfjbq47dqk6ak7n6tqyr3uxgemkq5z5vmunhxphya">>, + Node = hb_http_server:start_node(Opts), + ?assertMatch( + {ok, + #{ + <<"status">> := 200, + <<"commitments">> := + #{<<"Tqh6oIS2CLUaDY11YUENlvvHmDim1q16pMyXAeSKsFM">> := _} + } + }, + hb_http:get( + Node, + #{ + <<"path">> => TestPath, + <<"host">> => <> + }, + Opts + ) + ). + +%% @doc When a valid 52 char subdomain TXID doesn't match the TX ID provided, +%% the subdomain TXID is loaded, and tries to access the assets path defined. +%% In this case, sinse no assets exists with this TX ID, it should load the +%% index. +test_manifest_subdomain_does_not_match_path_id() -> + TestPath = <<"/1rTy7gQuK9lJydlKqCEhtGLp2WWG-GOrVo5JdiCmaxs">>, + Opts = manifest_opts(), + Subdomain = <<"4nuojs5tw6xtfjbq47dqk6ak7n6tqyr3uxgemkq5z5vmunhxphya">>, + Node = hb_http_server:start_node(Opts), + ?assertMatch( + {ok, + #{ + <<"commitments">> := + #{ + <<"1rTy7gQuK9lJydlKqCEhtGLp2WWG-GOrVo5JdiCmaxs">> := _ + } + } + }, + hb_http:get( + Node, + #{ + <<"path">> => TestPath, + <<"host">> => <> + }, + Opts + ) + ). + +test_opts() -> + Store = [hb_test_utils:test_store()], + BaseOpts = #{ store => Store, priv_wallet => ar_wallet:new(), port => 0 }, + Msg1 = + #{ + <<"a">> => 1, + <<"b">> => 2, + <<"nested">> => #{ + <<"z">> => 26 + } + }, + Msg2 = + #{ + <<"a">> => 2, + <<"b">> => 4 + }, + MsgWithPath = + #{ + <<"a">> => 3, + <<"b">> => 6, + <<"c">> => 9, + <<"path">> => <<"nested">> + }, + SignedMsg3 = + hb_message:commit( + #{ <<"a">> => 3, <<"b">> => 6, <<"c">> => 9 }, + BaseOpts + ), + {ok, UnsignedID1} = hb_cache:write(Msg1, BaseOpts), + {ok, UnsignedID2} = hb_cache:write(Msg2, BaseOpts), + {ok, UnsignedIDWithPath} = hb_cache:write(MsgWithPath, BaseOpts), + {ok, _UnsignedID3} = hb_cache:write(SignedMsg3, BaseOpts), + #{ + opts => + BaseOpts#{ + store => Store, + name_resolvers => [#{ <<"device">> => <<"b32-name@1.0">> }], + on => + #{ + <<"request">> => [#{<<"device">> => <<"name@1.0">>}] + } + }, + id1 => UnsignedID1, + id2 => UnsignedID2, + id3 => SignedMsg3, + id_with_path => UnsignedIDWithPath, + messages => [Msg1, Msg2, SignedMsg3, MsgWithPath] + }. + +%% @doc Returns the subdomain for a given ID for testing purposes. +subdomain(ID, _Opts) when ?IS_ID(ID) -> + <<(encode(ID))/binary, ".localhost">>; +subdomain(ID, Opts) -> + subdomain(hb_message:id(ID, unsigned, Opts), Opts). + +%% @doc Returns `Opts' with a test environment preloaded with manifest related +%% IDs. +manifest_opts() -> + (dev_manifest:test_env_opts())#{ + port => 0, + http_client_hackney_recv_timeout => 30_000, + name_resolvers => [#{ <<"device">> => <<"b32-name@1.0">> }], + on => + #{ + <<"request">> => + [ + #{<<"device">> => <<"name@1.0">>}, + #{<<"device">> => <<"manifest@1.0">>} + ] + } + }. diff --git a/src/dev_blacklist.erl b/src/dev_blacklist.erl new file mode 100644 index 000000000..b9a95bf56 --- /dev/null +++ b/src/dev_blacklist.erl @@ -0,0 +1,600 @@ +%%% @doc A request hook device for content moderation by blacklist. +%%% +%%% The node operator configures blacklist providers via the +%%% `blacklist-providers` key (a list) in the node message options. Each provider +%%% can be a message or a path that returns a message or binary. If a binary is +%%% returned from a provider, it is parsed as a newline-delimited list of IDs. +%%% Multiple providers are merged into a single cache (union of all IDs). +%%% +%%% The device is intended for use as a `~hook@1.0` `on/request` handler. It +%%% blocks requests when any ID present in the hook payload matches the active +%%% blacklist. The device also implements a `refresh` key that can be used to +%%% force a reload of the blacklist cache, potentially on node startup or on a +%%% `~cron@1.0/every` trigger. +%%% +%%% The principle of this device is the same as the content policies utilized in +%%% the Arweave network: No central enforcement, but each node is capable of +%%% enforcing its own content policies based on its own free choice and +%%% configuration. +%%% +%%% Configuration options: +%%% - blacklist_providers: List of providers to load in AO format. +%%% - blacklist_fallback: halt or continue. +%%% - Halt waits for X milliseconds before sending 503. +%%% - Continue allow the connection to fetch while blacklist is being loaded. +%%% - blacklist_timeout: How long should the request wait for the blacklist to be +%%% loaded. +%%% - blacklist_whitelist: List of endpoint path that are always whitelisted. +-module(dev_blacklist). +-export([request/3]). + +-include("include/hb.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +%%% The default frequency at which the blacklist cache is refreshed in seconds. +-define(DEFAULT_REFRESH_FREQUENCY, 60 * 5). +-define(DEFAULT_REQUEST_TIMEOUT, 1000). +%% Fallback mode ptions: halt or continue +-define(DEFAULT_FALLBACK_MODE, halt). +-define(DEFAULT_WHITELIST, + [<<"/~hyperbuddy@1.0/metrics">>, + <<"/~hyperbuddy@1.0/styles.css">>, + <<"/~hyperbuddy@1.0/fonts.css">>, + <<"/~hyperbuddy@1.0/script.js">>, + <<"/~hyperbuddy@1.0/bundle.js">>]). + +%% @doc Hook handler: block requests that involve blacklisted IDs. +request(_Base, HookReq, Opts) -> + ?event({hook_req, HookReq}), + case hb_opts:get(blacklist_providers, false, Opts) of + false -> + ?event({no_providers}), + {ok, HookReq}; + _ -> + case is_match(HookReq, Opts) of + {blocked_txid, ID} -> + ?event(blacklist, {blocked, ID}, Opts), + { + ok, + HookReq#{ + <<"body">> => + [#{ + <<"status">> => 451, + <<"reason">> => <<"content-policy">>, + <<"blocked-id">> => ID, + <<"body">> => + << + "Requested message blocked by this node's ", + "content policy. Blocked ID: ", ID/binary + >> + }] + } + }; + Response -> + Response + end + end. + +%% @doc Check if the message contains any blacklisted IDs. +is_match(Msg, Opts) -> + WhitelistRoutes = hb_opts:get(blacklist_whitelist, ?DEFAULT_WHITELIST, Opts), + Path = hb_maps:get(<<"path">>, maps:get(<<"request">>, Msg, #{}), no_path), + case lists:member(Path, WhitelistRoutes) of + false -> + ?event({path_do_not_match_whitelist, {path, Path}}), + case ensure_cache_table(Msg, Opts) of + {ok, Msg1} -> + IDs = collect_ids(Msg1, Opts), + MatchesFromIDs = fun(ID) -> ets:lookup(cache_table_name(Opts), ID) =/= [] end, + case lists:filter(MatchesFromIDs, IDs) of + [] -> {ok, Msg1}; + [ID|_] -> {blocked_txid, ID} + end; + {error, Msg1} -> + {error, Msg1} + end; + true -> + ?event({path_match_whitelist, {path, Path}}), + {ok, Msg} + end. + +%%% Internal + +%% @doc Fetch blacklists from all configured providers and insert IDs into the +%% cache table. +fetch_and_insert_ids(Opts) -> + Total = + lists:foldl( + fun(Provider, Acc) -> + case fetch_single_provider(Provider, Opts) of + {ok, Count} -> Acc + Count; + {error, _} -> Acc + end + end, + 0, + resolve_providers(Opts) + ), + Table = cache_table_name(Opts), + ets:insert(Table, {<<"meta/last-refresh">>, os:system_time(millisecond)}), + ?event( + {table_inserted, + {get_last_refresh, ets:lookup(Table, <<"meta/last-refresh">>)}, + {is_initialized, is_initialized(Table)} + } + ), + ?event(blacklist_short, {fetched_and_inserted_ids, Total}, Opts), + {ok, Total}. + +%% @doc Resolve the configured providers into a list. +resolve_providers(Opts) -> + case hb_opts:get(blacklist_providers, [], Opts) of + Providers when is_list(Providers) -> Providers; + _ -> [] + end. + +%% @doc Fetch a single provider's blacklist and insert its IDs into the cache. +fetch_single_provider(Provider, Opts) -> + try + case execute_provider(Provider, Opts) of + {ok, Blacklist} -> + {ok, IDs} = parse_blacklist(Blacklist, Opts), + ?event({parsed_blacklist, {ids_lengh, length(IDs)}}), + BlacklistID = hb_message:id(Blacklist, all, Opts), + ?event({update_blacklist_cache, + {ids_lengh, length(IDs)}, {blacklist_id, BlacklistID}}), + Table = cache_table_name(Opts), + {ok, insert_ids(IDs, BlacklistID, Table, Opts)}; + {error, _} = Error -> + ?event({execute_provider_error, Error}), + Error + end + catch + Type:Reason -> + ?event({provider_fetch_error, + {type, Type}, {reason, Reason}, {provider, Provider}}), + {error, {Type, Reason}} + end. + +%% @doc Execute the blacklist provider, returning the result. +execute_provider(Provider, Opts) -> + ?event({execute_provider, {provider, Provider}}), + case hb_cache:ensure_loaded(Provider, Opts) of + Bin when is_binary(Bin) -> hb_ao:resolve(#{ <<"path">> => Bin }, Opts); + Msgs when is_list(Msgs) -> hb_ao:resolve_many(Msgs, Opts) + end. + +%% @doc Parse the blacklist body, returning a list of IDs. +parse_blacklist(Link, Opts) when ?IS_LINK(Link) -> + parse_blacklist(hb_cache:ensure_loaded(Link, Opts), Opts); +parse_blacklist(Body, _Opts) when is_list(Body) -> + {ok, lists:filtermap(fun parse_blacklist_line/1, Body)}; +parse_blacklist(Msg, Opts) when is_map(Msg) -> + maybe + {ok, Body} = hb_maps:find(<<"body">>, Msg, Opts), + parse_blacklist(Body, Opts) + end; +parse_blacklist(Body, _Opts) when is_binary(Body) -> + Lines = binary:split(Body, <<"\n">>, [global]), + {ok, lists:filtermap(fun parse_blacklist_line/1, Lines)}. + +%% @doc Parse a single line of the blacklist body, returning the ID if it is valid, +%% and `false' otherwise. +parse_blacklist_line(Line) -> + case trim_ascii(Line) of + <<>> -> false; + <<"#", _/binary>> -> false; + ID when ?IS_ID(ID) -> {true, hb_util:human_id(ID)}; + _ -> false + end. + +%% @doc Fast ASCII-only whitespace trim (strips \r, \n, \s, \t). +%% Avoids Unicode machinery of string:trim/2 for performance. +trim_ascii(<>) when C =:= $\s; C =:= $\t; C =:= $\r; C =:= $\n -> + trim_ascii(Rest); +trim_ascii(Bin) -> + trim_ascii_right(Bin, byte_size(Bin)). + +trim_ascii_right(_, 0) -> <<>>; +trim_ascii_right(Bin, Len) -> + case binary:at(Bin, Len - 1) of + C when C =:= $\s; C =:= $\t; C =:= $\r; C =:= $\n -> + trim_ascii_right(Bin, Len - 1); + _ -> + binary:part(Bin, 0, Len) + end. + +%% @doc Collect all IDs found as elements of a given message. +collect_ids(Msg, Opts) -> lists:usort(collect_ids(Msg, [], Opts)). +collect_ids(Bin, Acc, _Opts) when ?IS_ID(Bin) -> [hb_util:human_id(Bin) | Acc]; +collect_ids(Bin, Acc, _Opts) when is_binary(Bin) -> Acc; +collect_ids({as, _, Msg}, Acc, Opts) -> collect_ids(Msg, Acc, Opts); +collect_ids({link, ID, _}, Acc, _Opts) when ?IS_ID(ID) -> + [hb_util:human_id(ID) | Acc]; +collect_ids(Msg, Acc, Opts) when is_map(Msg) -> + case hb_maps:get(<<"path">>, Msg, undefined, Opts) of + Path when ?IS_ID(Path) -> [hb_util:human_id(Path)]; + _ -> [] + end ++ + hb_maps:keys(hb_maps:get(<<"commitments">>, Msg, #{}, Opts), Opts) ++ + hb_maps:fold( + fun(_Key, Value, AccIn) -> collect_ids(Value, AccIn, Opts) end, + Acc, + Msg + ); +collect_ids(List, Acc, Opts) when is_list(List) -> + lists:foldl( + fun(Elem, AccIn) -> collect_ids(Elem, AccIn, Opts) end, + Acc, + List + ); +collect_ids(_Other, Acc, _Opts) -> Acc. + +%% @doc Insert a list of IDs into the cache table, returning the number of new IDs +%% inserted. Each ID is inserted as a key with the current timestamp as the value. +insert_ids([], _Value, _Table, _Opts) -> 0; +insert_ids([ID | IDs], Value, Table, Opts) when ?IS_ID(ID) -> + case ets:lookup(Table, ID) of + [] -> + ets:insert(Table, {ID, Value}), + 1 + insert_ids(IDs, Value, Table, Opts); + _ -> insert_ids(IDs, Value, Table, Opts) + end. + +%% @doc Ensure the cache table exists. +ensure_cache_table(Msg, Opts) -> + %% Options: + %% - continue: Don't wait for blacklist to be initialized + %% - halt: Close connection with HTTP 503 if not initilalized + FallbackMode = hb_opts:get(blacklist_fallback, ?DEFAULT_FALLBACK_MODE, Opts), + RequestTimeout = hb_opts:get(blacklist_timeout, ?DEFAULT_REQUEST_TIMEOUT, Opts), + TableName = cache_table_name(Opts), + case is_initialized(TableName) of + true -> {ok, Msg}; + false -> + hb_name:singleton( + TableName, + fun() -> + ?event({creating_table, TableName}), + ets:new( + TableName, + [ + named_table, + set, + public, + {read_concurrency, true}, + {write_concurrency, true} + ] + ), + ?event({table_created, TableName}), + fetch_and_insert_ids(Opts), + refresh_loop(Opts) + end + ), + case FallbackMode of + continue -> {ok, Msg}; + halt -> + IsInitialized = + hb_util:wait_until( + fun() -> is_initialized(TableName) end, + RequestTimeout + ), + case IsInitialized of + true -> {ok, Msg}; + false -> + {error, Msg#{ + <<"status">> => 503, + <<"body">> => <<"Loading blacklist ...">> + }} + end + end + end. + +%% @doc Check if the cache table is initialized. We do this by checking that the +%% `meta/last-refresh' key is present, although we do not care about its value. +is_initialized(TableName) -> + ets:info(TableName) =/= undefined + andalso ets:lookup(TableName, <<"meta/last-refresh">>) =/= []. + +%% @doc Loop that periodically refreshes the blacklist cache. Runs on the +%% singleton process that is responsible for the cache ets table. +refresh_loop(Opts) -> + timer:send_after( + hb_util:int( + hb_opts:get( + blacklist_refresh_frequency, + ?DEFAULT_REFRESH_FREQUENCY, + Opts + ) + ) * 1000, + self(), + refresh + ), + receive + refresh -> + fetch_and_insert_ids(Opts), + refresh_loop(Opts); + stop -> ok + end. + +%% @doc Calculate the name of the cache table given the `Opts`. +cache_table_name(Opts) -> + Wallet = hb_opts:get(priv_wallet, hb:wallet(), Opts), + Address = hb_util:human_id(Wallet), + binary_to_atom(<<"~blacklist@1.0/cache/", Address/binary>>). + +%%% Tests + +setup_test_env() -> + %% We need to create a new priv_wallet to avoid conflift when starting a + %% new node from an existing priv_wallet address. + Opts0 = #{ + store => hb_test_utils:test_store(), + priv_wallet => ar_wallet:new() + }, + Msg1 = hb_message:commit(#{ <<"body">> => <<"test-1">> }, Opts0), + Msg2 = hb_message:commit(#{ <<"body">> => <<"test-2">> }, Opts0), + Msg3 = hb_message:commit(#{ <<"body">> => <<"test-3">> }, Opts0), + SignedID1 = hb_message:id(Msg1, signed, Opts0), + {ok, _UnsignedID1} = hb_cache:write(Msg1, Opts0), + {ok, UnsignedID2} = hb_cache:write(Msg2, Opts0), + {ok, UnsignedID3} = hb_cache:write(Msg3, Opts0), + Blacklist = + #{ + <<"data-protocol">> => <<"content-policy">>, + <<"body">> => <> + }, + BlacklistMsg = hb_message:commit(Blacklist, Opts0), + {ok, BlacklistID} = hb_cache:write(BlacklistMsg, Opts0), + ?event( + {test_env_setup, + {opts, Opts0}, + {signed_id1, SignedID1}, + {unsigned_id2, UnsignedID2}, + {unsigned_id3, UnsignedID3}, + {blocked, [SignedID1, UnsignedID2]} + } + ), + {ok, #{ + opts => Opts0, + signed1=> SignedID1, + unsigned2=> UnsignedID2, + unsigned3 => UnsignedID3, + blacklist => BlacklistID + }}. + +%% @doc Test the blacklist device with a static blacklist that is in the local +%% store. +basic_test() -> + {ok, #{ + opts := Opts0, + signed1 := SignedID1, + unsigned3 := UnsignedID3, + blacklist := BlacklistID + }} = setup_test_env(), + Opts1 = + Opts0#{ + blacklist_providers => [BlacklistID], + on => #{ + <<"request">> => #{ <<"device">> => <<"blacklist@1.0">> } + } + }, + Node = hb_http_server:start_node(Opts1), + ?assertMatch( + {ok, <<"test-3">>}, + hb_http:get(Node, <<"/", UnsignedID3/binary, "/body">>, Opts1) + ), + ?assertMatch( + {error, + #{ + <<"status">> := 451, + <<"reason">> := <<"content-policy">> + }}, + hb_http:get(Node, SignedID1, Opts1) + ), + ok. + +%% @doc Ensure that the default provider does not block any requests. +first_request_always_return_503_test() -> + {ok, #{ + opts := Opts0, + unsigned3 := UnsignedID3 + }} = setup_test_env(), + Opts1 = Opts0#{ blacklist_providers => [] }, + Node = hb_http_server:start_node(Opts1#{blacklist_timeout => 0}), + ?assertMatch( + {failure, #{<<"status">> := 503, <<"body">> := <<"Loading blacklist ...">>}}, + hb_http:get(Node, <<"/", UnsignedID3/binary, "/body">>, Opts1) + ). + +%% @doc Ensure that the default provider does not block any requests. +default_provider_test() -> + {ok, #{ + opts := Opts0, + signed1 := SignedID1, + unsigned3 := UnsignedID3 + }} = setup_test_env(), + Opts1 = Opts0#{ blacklist_providers => [] }, + Node = hb_http_server:start_node(Opts1), + ?assertMatch( + {ok, <<"test-3">>}, + hb_http:get(Node, <<"/", UnsignedID3/binary, "/body">>, Opts1) + ), + ?assertMatch( + {ok, <<"test-1">>}, + hb_http:get(Node, <>, Opts1) + ), + ok. + +%% @doc Test the blacklist device with a blacklist that is provided via HTTP. +blacklist_from_external_http_test() -> + {ok, #{ + opts := RemoteOpts = #{ store := RootStore }, + signed1 := SignedID1, + unsigned3 := UnsignedID3, + blacklist := BlacklistID + }} = setup_test_env(), + % Start a node that we will ask to provide the blacklist via HTTP. + BlacklistHostNode = hb_http_server:start_node(RemoteOpts), + % Start a node that will use the blacklist host node to provide the blacklist + % via HTTP. + NodeOpts = + #{ + store => RootStore, + priv_wallet => ar_wallet:new(), + blacklist_providers => + [<< + "/~relay@1.0/call?relay-method=GET&relay-path=", + BlacklistHostNode/binary, BlacklistID/binary + >>], + on => #{ + <<"request">> => #{ <<"device">> => <<"blacklist@1.0">> } + } + }, + Node = hb_http_server:start_node(NodeOpts), + ?assertMatch( + {ok, <<"test-3">>}, + hb_http:get(Node, <<"/", UnsignedID3/binary, "/body">>, NodeOpts) + ), + ?assertMatch( + {error, + #{ + <<"status">> := 451, + <<"reason">> := <<"content-policy">> + }}, + hb_http:get(Node, SignedID1, NodeOpts) + ). + +%% @doc Test that multiple providers merge their blacklists. +multiple_providers_test() -> + {ok, #{ + opts := Opts0, + signed1 := SignedID1, + unsigned2 := UnsignedID2, + unsigned3 := UnsignedID3 + }} = setup_test_env(), + Blacklist1 = #{ + <<"data-protocol">> => <<"content-policy">>, + <<"body">> => <> + }, + Blacklist2 = #{ + <<"data-protocol">> => <<"content-policy">>, + <<"body">> => <> + }, + BlacklistMsg1 = hb_message:commit(Blacklist1, Opts0), + BlacklistMsg2 = hb_message:commit(Blacklist2, Opts0), + {ok, BlacklistID1} = hb_cache:write(BlacklistMsg1, Opts0), + {ok, BlacklistID2} = hb_cache:write(BlacklistMsg2, Opts0), + Opts1 = Opts0#{ + blacklist_providers => [BlacklistID1, BlacklistID2], + on => #{ + <<"request">> => #{ <<"device">> => <<"blacklist@1.0">> } + } + }, + Node = hb_http_server:start_node(Opts1), + ?assertMatch( + {error, #{ <<"status">> := 451 }}, + hb_http:get(Node, SignedID1, Opts1) + ), + ?assertMatch( + {error, #{ <<"status">> := 451 }}, + hb_http:get(Node, <<"/", UnsignedID2/binary>>, Opts1) + ), + ?assertMatch( + {ok, <<"test-3">>}, + hb_http:get(Node, <<"/", UnsignedID3/binary, "/body">>, Opts1) + ), + ok. + +%% @doc Test that a failing provider does not prevent other providers from +%% contributing entries. +provider_failure_resilience_test() -> + {ok, #{ + opts := Opts0, + signed1 := SignedID1, + unsigned3 := UnsignedID3, + blacklist := BlacklistID + }} = setup_test_env(), + BadProvider = <<"aaaabbbbccccddddeeeeffffgggghhhhiiiijjjjkkkk">>, + Opts1 = Opts0#{ + blacklist_providers => [BadProvider, BlacklistID], + on => #{ + <<"request">> => #{ <<"device">> => <<"blacklist@1.0">> } + } + }, + Node = hb_http_server:start_node(Opts1), + ?assertMatch( + {error, #{ <<"status">> := 451 }}, + hb_http:get(Node, SignedID1, Opts1) + ), + ?assertMatch( + {ok, <<"test-3">>}, + hb_http:get(Node, <<"/", UnsignedID3/binary, "/body">>, Opts1) + ), + ok. + +%% @doc Test that the blacklist cache is refreshed periodically. +refresh_periodically_test() -> + {ok, #{ + opts := Opts0 = #{ store := Store }, + signed1 := SignedID1, + unsigned3 := UnsignedID3 + }} = setup_test_env(), + InitialBlacklist = + #{ + <<"data-protocol">> => <<"content-policy">>, + <<"body">> => SignedID1 + }, + BlacklistMsg = hb_message:commit(InitialBlacklist, Opts0), + {ok, InitialBlacklistID} = hb_cache:write(BlacklistMsg, Opts0), + hb_store:make_link(Store, InitialBlacklistID, <<"mutable">>), + UpdatedBlacklist = + #{ + <<"data-protocol">> => <<"content-policy">>, + <<"body">> => <> + }, + UpdatedBlacklistMsg = hb_message:commit(UpdatedBlacklist, Opts0), + {ok, UpdatedBlacklistID} = hb_cache:write(UpdatedBlacklistMsg, Opts0), + hb_store:make_link(Store, InitialBlacklistID, <<"mutable">>), + Opts1 = Opts0#{ + blacklist_providers => [<<"/~cache@1.0/read?target=mutable">>], + on => #{ + <<"request">> => #{ <<"device">> => <<"blacklist@1.0">> } + }, + blacklist_refresh_frequency => 1 + }, + Node = hb_http_server:start_node(Opts1), + ?assertMatch( + {error, #{ <<"status">> := 451 }}, + hb_http:get(Node, SignedID1, Opts1) + ), + ?assertMatch( + {ok, <<"test-3">>}, + hb_http:get(Node, <<"/", UnsignedID3/binary, "/body">>, Opts1) + ), + hb_store:make_link(Store, UpdatedBlacklistID, <<"mutable">>), + ?assertMatch( + {ok, <<"test-3">>}, + hb_http:get(Node, <<"/", UnsignedID3/binary, "/body">>, Opts1) + ), + timer:sleep(1000), + ?assertMatch( + {error, #{ <<"status">> := 451 }}, + hb_http:get(Node, <<"/", UnsignedID3/binary, "/body">>, Opts1) + ), + ok. + +%% @doc Test that parse_blacklist/2 can handle 1 million IDs within 2000ms. +parse_blacklist_performance_test() -> + GenID = fun() -> + B64 = base64:encode(crypto:strong_rand_bytes(32)), + %% base64:encode of 32 bytes = 44 chars (with 1 '=' padding). + %% Taking the first 43 chars gives a valid 43-byte binary ID. + binary:part(B64, 0, 43) + end, + IDs = [GenID() || _ <- lists:seq(1, 1000000)], + Body = iolist_to_binary(lists:join(<<"\n">>, IDs)), + Start = erlang:monotonic_time(millisecond), + {ok, Parsed} = parse_blacklist(Body, #{}), + Duration = erlang:monotonic_time(millisecond) - Start, + ?assert(length(Parsed) =:= 1000000), + ?assert(Duration =< 2000). diff --git a/src/dev_bundler.erl b/src/dev_bundler.erl index 8b0bf3a56..0ce8a1692 100644 --- a/src/dev_bundler.erl +++ b/src/dev_bundler.erl @@ -15,14 +15,18 @@ %%% available for reading instantly (`optimistically'), even before the %%% transaction is dispatched. -module(dev_bundler). --export([tx/3, item/3]). +-export([tx/3, item/3, ensure_server/1, stop_server/0, get_state/0]). +%%% Test-only exports. +-export([start_mock_gateway/1]). -include("include/hb.hrl"). +-include("include/dev_bundler.hrl"). -include_lib("eunit/include/eunit.hrl"). %%% Default options. -define(SERVER_NAME, bundler_server). -define(DEFAULT_MAX_SIZE, 100_000_000). % 100 MB. -define(DEFAULT_MAX_IDLE_TIME, 300_000). % 5 minutes. +-define(DEFAULT_BUNDLER_MAX_DISPATCH_TIMEOUT, 30_000). % 30 seconds. -define(DEFAULT_MAX_ITEMS, 1000). %%% Public interface. @@ -31,13 +35,65 @@ tx(Base, Req, Opts) -> item(Base, Req, Opts). -%% @doc Implements an Arweave/`up.arweave.net'-compatible endpoint for +%% @doc Implements an `up.arweave.net'-compatible endpoint for %% bundling messages. -item(Base, Req, Opts) -> - PID = ensure_server(Opts), - PID ! {item, self(), Ref = make_ref(), Base, Req}, - receive - {response, Ref, Res} -> Res +item(_Base, Req, Opts) -> + ServerPID = ensure_server(Opts), + case verify_item(Req, Opts) of + {ok, Item} -> + ItemID = hb_message:id(Item, signed, Opts), + case cache_item(Item, Opts) of + ok -> + % Queue the item for bundling + % (fire-and-forget, ignore errors) + ServerPID ! {enqueue_item, Item}, + {ok, #{ + <<"id">> => ItemID, + <<"timestamp">> => erlang:system_time(millisecond) + }}; + {error, Reason} -> + ?event(bundler_short, {cache_write_failed, + {id, {explicit, ItemID}}, {reason, Reason}}), + {error, #{ + <<"status">> => 500, + <<"error">> => <<"cache_write_failed">>, + <<"details">> => list_to_binary(io_lib:format("~p", [Reason])) + }} + end; + {error, Reason} -> + {error, #{ + <<"status">> => 400, + <<"error">> => <<"invalid_item">>, + <<"details">> => list_to_binary(io_lib:format("~p", [Reason])) + }} + end. + +%% @doc Verify an item by extracting committed fields and checking signatures. +%% Returns {ok, Item} or {error, Reason}. +verify_item(Req, Opts) -> + case hb_message:with_only_committed(Req, Opts) of + {ok, Item} -> + case hb_message:verify(Item, all, Opts) of + true -> {ok, Item}; + false -> + ?event(bundler_short, {verify_failed, + {id, {explicit, hb_message:id(Item, signed, Opts)}}, + {reason, signature_verification_failed}}), + {error, signature_verification_failed} + end; + {error, Reason} -> + ?event(bundler_short, {verify_failed, {reason, Reason}}), + {error, Reason} + end. + +%% @doc Cache an item. +%% Returns ok or {error, Reason}. +cache_item(Item, Opts) -> + try + dev_bundler_cache:write_item(Item, Opts) + catch + Type:ExceptionReason -> + {error, {Type, ExceptionReason}} end. %%% Bundling server. @@ -45,13 +101,10 @@ item(Base, Req, Opts) -> %% @doc Return the PID of the bundler server. If the server is not running, %% it is started and registered with the name `?SERVER_NAME'. ensure_server(Opts) -> - case hb_name:lookup(?SERVER_NAME) of - undefined -> - PID = spawn(fun() -> init(Opts) end), - hb_name:register(?SERVER_NAME, PID), - hb_name:lookup(?SERVER_NAME); - PID -> PID - end. + hb_name:singleton( + ?SERVER_NAME, + fun() -> init(Opts) end + ). stop_server() -> case hb_name:lookup(?SERVER_NAME) of @@ -61,86 +114,120 @@ stop_server() -> hb_name:unregister(?SERVER_NAME) end. +%% @doc Return the current bundler server state for tests. +get_state() -> + case hb_name:lookup(?SERVER_NAME) of + undefined -> undefined; + PID -> + PID ! {get_state, self(), Ref = make_ref()}, + receive + {state, Ref, State} -> State + after 1000 -> timeout + end + end. + %% @doc Initialize the bundler server. init(Opts) -> - % Start the dispatcher to recover any in-progress bundles - dev_bundler_dispatch:ensure_dispatcher(Opts), - % Recover any unbundled items from cache - {UnbundledItems, RecoveredBytes} = recover_unbundled_items(Opts), - InitialState = #{ - max_size => hb_opts:get( - bundler_max_size, ?DEFAULT_MAX_SIZE, Opts), - max_idle_time => hb_opts:get( - bundler_max_idle_time, ?DEFAULT_MAX_IDLE_TIME, Opts), - max_items => hb_opts:get( - bundler_max_items, ?DEFAULT_MAX_ITEMS, Opts), - queue => UnbundledItems, - bytes => RecoveredBytes - }, - % If recovered items are ready to dispatch, do so immediately - State = maybe_dispatch(InitialState, Opts), - server(State, Opts). - -%% @doc Recover unbundled items from cache and calculate their total size. -%% Returns {Items, TotalBytes}. -recover_unbundled_items(Opts) -> - UnbundledItems = dev_bundler_cache:load_unbundled_items(Opts), - ?event({recovered_unbundled_items, length(UnbundledItems)}), - % Calculate total bytes for recovered items - RecoveredBytes = lists:foldl( - fun(Item, Acc) -> - Acc + erlang:external_size(Item) + NumWorkers = hb_opts:get(bundler_workers, ?DEFAULT_NUM_WORKERS, Opts), + Workers = lists:map( + fun(_) -> + WorkerPID = spawn_link(fun dev_bundler_task:worker_loop/0), + {WorkerPID, idle} end, - 0, - UnbundledItems + lists:seq(1, NumWorkers) ), - {UnbundledItems, RecoveredBytes}. + InitialState = #state{ + max_size = hb_opts:get(bundler_max_size, ?DEFAULT_MAX_SIZE, Opts), + max_idle_time = hb_opts:get( + bundler_max_idle_time, ?DEFAULT_MAX_IDLE_TIME, Opts), + max_items = hb_opts:get(bundler_max_items, ?DEFAULT_MAX_ITEMS, Opts), + queue = [], + bytes = 0, + workers = maps:from_list(Workers), + task_queue = queue:new(), + bundles = #{}, + opts = Opts + }, + dev_bundler_recovery:recover_unbundled_items(self(), Opts), + dev_bundler_recovery:recover_bundles(self(), Opts), + server(assign_tasks(InitialState), Opts). -%% @doc The main loop of the bundler server. Simply waits for messages to be -%% added to the queue, and then dispatches them when the queue is large enough. -server(State = #{ max_idle_time := MaxIdleTime }, Opts) -> +%% @doc The main loop of the bundler server. +server(State = #state{max_idle_time = MaxIdleTime}, Opts) -> receive - {item, From, Ref, _Base, Req} -> - From ! {response, Ref, {ok, <<"Message queued.">>}}, - server(maybe_dispatch(add_item(Req, State, Opts), Opts), Opts); + {enqueue_item, Item} -> + State1 = add_to_queue(Item, State, Opts), + server(assign_tasks(maybe_dispatch(State1)), Opts); + {dispatch_queue, Timestamp} -> + ?event(bundler_short, {dispatched_queue_start, calendar:now_to_universal_time(Timestamp)}), + server(assign_tasks(dispatch_queue(State)), Opts); + {recover_bundle, CommittedTX, Items} -> + State1 = recover_bundle(CommittedTX, Items, State), + server(assign_tasks(State1), Opts); + {task_complete, WorkerPID, Task, Result} -> + State1 = handle_task_complete(WorkerPID, Task, Result, State), + server(assign_tasks(State1), Opts); + {task_failed, WorkerPID, Task, Reason} -> + State1 = handle_task_failed(WorkerPID, Task, Reason, State), + server(assign_tasks(State1), Opts); + {retry_task, Task} -> + State1 = enqueue_task(Task, State), + server(assign_tasks(State1), Opts); + {get_state, From, Ref} -> + From ! {state, Ref, State}, + server(State, Opts); stop -> + maps:foreach( + fun(WorkerPID, _) -> WorkerPID ! stop end, + State#state.workers + ), exit(normal) after MaxIdleTime -> - Q = maps:get(queue, State), - dev_bundler_dispatch:dispatch(Q, Opts), - server(State#{ queue => [] }, Opts) + server(assign_tasks(dispatch_queue(State)), Opts) end. -%% @doc Add an item to the queue. Update the state with the new queue and -%% approximate total byte size of the queue. -add_item(Req, State = #{ queue := Queue, bytes := Bytes }, Opts) -> - {ok, Item} = hb_message:with_only_committed(Req, Opts), +%% @doc Add an enqueue_item to the queue. Update the state with the new queue +%% and approximate total byte size of the queue. +%% Note: Item has already been verified and cached before reaching here. +add_to_queue(Item, State = #state{queue = Queue, bytes = Bytes, dispatch_ref = DispatchRef}, Opts) -> ItemSize = erlang:external_size(Item), - ?event({adding_item, {item_size, ItemSize}, - {item, {explicit, hb_message:id(Item, signed, Opts)}}}), - ok = dev_bundler_cache:write_item(Item, Opts), - State#{ - queue => [Item | Queue], - bytes => Bytes + ItemSize - }. + NewQueue = [Item | Queue], + NewBytes = Bytes + ItemSize, + ?event(bundler_short, {queueing_item, + {id, {explicit, hb_message:id(Item, signed, Opts)}}, + {size, erlang:external_size(Item)}, + {queue_size, length(NewQueue)}, + {queue_bytes, NewBytes} + }), + UpdatedDispatchRef = if Queue =:= [] -> + MaxBundleDispatchTimeout = + hb_opts:get( + bundler_max_bundle_dispatch_delay, + ?DEFAULT_BUNDLER_MAX_DISPATCH_TIMEOUT, + Opts + ), + ?event(bundler_short, {scheduling_max_bundle_dispatch_timeout, {dispatch_timeout, MaxBundleDispatchTimeout}}, Opts), + erlang:send_after( + MaxBundleDispatchTimeout, + self(), + {dispatch_queue, erlang:timestamp()} + ); + true -> DispatchRef + end, + State#state{queue = NewQueue, bytes = NewBytes, dispatch_ref = UpdatedDispatchRef}. %% @doc Dispatch the queue if it is ready. %% Only dispatches up to max_items at a time to respect the limit. -maybe_dispatch(State = #{queue := Q, max_items := MaxItems}, Opts) -> - case dispatchable(State, Opts) of +maybe_dispatch(State = #state{queue = Q, max_items = MaxItems}) -> + case dispatchable(State) of true -> - % Only dispatch up to max_items, keep the rest in queue {ToDispatch, Remaining} = split_queue(Q, MaxItems), - dev_bundler_dispatch:dispatch(ToDispatch, Opts), - % Recalculate bytes for remaining items - RemainingBytes = lists:foldl( - fun(Item, Acc) -> Acc + erlang:external_size(Item) end, - 0, - Remaining - ), - NewState = State#{queue => Remaining, bytes => RemainingBytes}, - % Check if we should dispatch again (in case we have more than max_items) - maybe_dispatch(NewState, Opts); + State1 = create_bundle(ToDispatch, State), + NewState = State1#state{ + queue = Remaining, + bytes = queue_bytes(Remaining) + }, + maybe_dispatch(NewState); false -> State end. @@ -152,15 +239,251 @@ split_queue(Queue, MaxItems) -> {ToDispatch, Remaining}. %% @doc Returns whether the queue is dispatchable. -dispatchable(#{ queue := Q, max_items := MaxLen }, _Opts) - when length(Q) >= MaxLen -> +dispatchable(#state{queue = Q, max_items = MaxLen}) when length(Q) >= MaxLen -> true; -dispatchable(#{ bytes := Bytes, max_size := MaxSize }, _Opts) - when Bytes >= MaxSize -> +dispatchable(#state{bytes = Bytes, max_size = MaxSize}) when Bytes >= MaxSize -> true; -dispatchable(_State, _Opts) -> +dispatchable(_State) -> false. +%% @doc Return the total size of a queue of items. +queue_bytes(Items) -> + lists:foldl( + fun(Item, Acc) -> Acc + erlang:external_size(Item) end, + 0, + Items + ). + +%% @doc Dispatch all currently queued items immediately. +dispatch_queue(State = #state{queue = []}) -> + State; +dispatch_queue(State = #state{queue = Queue, dispatch_ref = DispatchRef}) -> + case is_reference(DispatchRef) of + true -> erlang:cancel_timer(DispatchRef); + false -> no_op + end, + create_bundle(Queue, State#state{queue = [], bytes = 0, dispatch_ref = undefined}). + +%% @doc Create a bundle and enqueue its initial post task. +create_bundle([], State) -> + State; +create_bundle(Items, State = #state{bundles = Bundles, opts = Opts}) -> + BundleID = make_ref(), + Bundle = #bundle{ + id = BundleID, + items = Items, + status = initializing, + tx = undefined, + proofs = #{}, + start_time = erlang:timestamp() + }, + State1 = State#state{ + bundles = maps:put(BundleID, Bundle, Bundles) + }, + ?event( + bundler_short, + {dispatching_bundle, + {timestamp, dev_bundler_task:format_timestamp()}, + {bundle_id, BundleID}, + {num_items, length(Items)} + } + ), + Task = #task{ + bundle_id = BundleID, + type = post_tx, + data = Items, + opts = Opts + }, + enqueue_task(Task, State1). + +%% @doc Enqueue a task for worker execution. +enqueue_task(Task, State = #state{task_queue = Queue}) -> + State#state{task_queue = queue:in(Task, Queue)}. + +%% @doc Assign pending tasks to all idle workers. +assign_tasks(State = #state{workers = Workers}) -> + IdleWorkers = maps:filter( + fun(_, Status) -> Status =:= idle end, + Workers + ), + assign_tasks(maps:keys(IdleWorkers), State). + +assign_tasks([], State) -> + State; +assign_tasks([WorkerPID | Rest], State = #state{workers = Workers, task_queue = Queue}) -> + case queue:out(Queue) of + {{value, Task}, Queue1} -> + WorkerPID ! {execute_task, self(), Task}, + State1 = State#state{ + task_queue = Queue1, + workers = maps:put(WorkerPID, {busy, Task}, Workers) + }, + assign_tasks(Rest, State1); + {empty, _} -> + State + end. + +%% @doc Handle successful task completion. +handle_task_complete(WorkerPID, Task, Result, State = #state{ + workers = Workers, + bundles = Bundles + }) -> + #task{bundle_id = BundleID} = Task, + ?event(debug_bundler, dev_bundler_task:log_task(task_complete, Task, [])), + State1 = State#state{ + workers = maps:put(WorkerPID, idle, Workers) + }, + case maps:get(BundleID, Bundles, undefined) of + undefined -> + ?event(bundler_short, {bundle_not_found, BundleID}), + State1; + Bundle -> + task_completed(Task, Bundle, Result, State1) + end. + +%% @doc Handle task failure and schedule a retry. +handle_task_failed(WorkerPID, Task, Reason, State = #state{ + workers = Workers, + opts = Opts + }) -> + RetryCount = Task#task.retry_count, + BaseDelay = hb_opts:get( + retry_base_delay_ms, ?DEFAULT_RETRY_BASE_DELAY_MS, Opts), + MaxDelay = hb_opts:get( + retry_max_delay_ms, ?DEFAULT_RETRY_MAX_DELAY_MS, Opts), + Jitter = hb_opts:get(retry_jitter, ?DEFAULT_RETRY_JITTER, Opts), + BaseDelayWithBackoff = min(BaseDelay * (1 bsl RetryCount), MaxDelay), + JitterFactor = (rand:uniform() * 2 - 1) * Jitter, + Delay = round(BaseDelayWithBackoff * (1 + JitterFactor)), + ?event( + bundler_short, + dev_bundler_task:log_task(task_failed_retrying, Task, [ + {reason, {explicit, Reason}}, + {retry_count, RetryCount}, + {delay_ms, Delay} + ]) + ), + Task1 = Task#task{retry_count = RetryCount + 1}, + erlang:send_after(Delay, self(), {retry_task, Task1}), + State#state{ + workers = maps:put(WorkerPID, idle, Workers) + }. + +%% @doc Apply task completion effects to server state. +task_completed(#task{bundle_id = BundleID, type = post_tx}, Bundle, CommittedTX, State) -> + Bundles = State#state.bundles, + Opts = State#state.opts, + Bundle1 = Bundle#bundle{status = tx_posted, tx = CommittedTX}, + State1 = State#state{ + bundles = maps:put(BundleID, Bundle1, Bundles) + }, + BuildProofsTask = #task{ + bundle_id = BundleID, + type = build_proofs, + data = CommittedTX, + opts = Opts + }, + enqueue_task(BuildProofsTask, State1); +task_completed(#task{bundle_id = BundleID, type = build_proofs}, Bundle, Proofs, State) -> + Bundles = State#state.bundles, + Opts = State#state.opts, + case Proofs of + [] -> + bundle_complete(Bundle, State); + _ -> + ProofsMap = maps:from_list([ + {maps:get(offset, Proof), #proof{proof = Proof, status = pending}} + || Proof <- Proofs + ]), + Bundle1 = Bundle#bundle{ + proofs = ProofsMap, + status = proofs_built + }, + State1 = State#state{ + bundles = maps:put(BundleID, Bundle1, Bundles) + }, + lists:foldl( + fun(ProofData, StateAcc) -> + ProofTask = #task{ + bundle_id = BundleID, + type = post_proof, + data = ProofData, + opts = Opts + }, + enqueue_task(ProofTask, StateAcc) + end, + State1, + Proofs + ) + end; +task_completed( + #task{bundle_id = BundleID, type = post_proof, data = ProofData}, + Bundle, + _Result, + State + ) -> + Bundles = State#state.bundles, + Offset = maps:get(offset, ProofData), + Proofs = Bundle#bundle.proofs, + Proofs1 = maps:update_with( + Offset, + fun(Proof) -> Proof#proof{status = seeded} end, + Proofs + ), + Bundle1 = Bundle#bundle{proofs = Proofs1}, + State1 = State#state{ + bundles = maps:put(BundleID, Bundle1, Bundles) + }, + AllSeeded = lists:all( + fun(#proof{status = Status}) -> Status =:= seeded end, + maps:values(Proofs1) + ), + case AllSeeded of + true -> + bundle_complete(Bundle1, State1); + false -> + State1 + end. + +%% @doc Mark a bundle as complete and remove it from state. +bundle_complete(Bundle, State = #state{opts = Opts}) -> + ok = dev_bundler_cache:complete_tx(Bundle#bundle.tx, Opts), + ElapsedTime = + timer:now_diff(erlang:timestamp(), Bundle#bundle.start_time) / 1000000, + ?event( + bundler_short, + {bundle_complete, + {bundle_id, Bundle#bundle.id}, + {timestamp, dev_bundler_task:format_timestamp()}, + {tx, {explicit, hb_message:id(Bundle#bundle.tx, signed, Opts)}}, + {elapsed_time_s, ElapsedTime} + } + ), + State#state{bundles = maps:remove(Bundle#bundle.id, State#state.bundles)}. + +%% @doc Recover a single bundle and enqueue any follow-up work. +recover_bundle(CommittedTX, Items, State = #state{opts = Opts}) -> + BundleID = make_ref(), + Bundle = #bundle{ + id = BundleID, + items = Items, + status = tx_posted, + tx = CommittedTX, + proofs = #{}, + start_time = erlang:timestamp() + }, + Bundles = State#state.bundles, + State1 = State#state{ + bundles = maps:put(BundleID, Bundle, Bundles) + }, + Task = #task{ + bundle_id = BundleID, + type = build_proofs, + data = CommittedTX, + opts = Opts + }, + enqueue_task(Task, State1). + %%%=================================================================== %%% Tests %%%=================================================================== @@ -171,6 +494,47 @@ bundle_count_test() -> bundle_size_test() -> test_bundle(#{ bundler_max_size => floor(3.6 * ?DATA_CHUNK_SIZE) }). +bundle_dispatch_delay_test() -> + test_bundle(#{ bundler_max_bundle_dispatch_delay => 3000 }). + +nested_bundle_test() -> + Anchor = rand:bytes(32), + Price = 12345, + % NodeOpts redirects arweave gateway requests to the mock server. + {ServerHandle, NodeOpts} = start_mock_gateway( + #{ + price => {200, integer_to_binary(Price)}, + tx_anchor => {200, hb_util:encode(Anchor)} + } + ), + try + ClientOpts = #{}, + NodeOpts2 = maps:merge(NodeOpts, #{ bundler_max_items => 3 }), + Node = hb_http_server:start_node(NodeOpts2#{ + priv_wallet => ar_wallet:new(), + store => hb_test_utils:test_store() + }), + %% Upload 3 data items across 4 chunks. + Item1 = new_data_item(1, floor(2.5 * ?DATA_CHUNK_SIZE)), + ?assertMatch({ok, _}, post_data_item(Node, Item1, ClientOpts)), + Item2 = new_data_item(2, ?DATA_CHUNK_SIZE), + ?assertMatch({ok, _}, post_data_item(Node, Item2, ClientOpts)), + Item3 = new_data_item(3, floor(0.25 * ?DATA_CHUNK_SIZE)), + ?assertMatch({ok, _}, post_data_item(Node, Item3, ClientOpts)), + TXs = hb_mock_server:get_requests(tx, 1, ServerHandle), + ?assertEqual(1, length(TXs)), + %% Wait for expected chunks + Proofs = hb_mock_server:get_requests(chunk, 4, ServerHandle), + ?assertEqual(4, length(Proofs)), + assert_bundle( + Node, + [Item1, Item2, Item3], Anchor, Price, hd(TXs), Proofs, ClientOpts), + ok + after + %% Always cleanup, even if test fails + stop_test_servers(ServerHandle) + end. + price_error_test() -> test_api_error(#{ price => {500, <<"error">>}, @@ -194,7 +558,7 @@ tx_error_test() -> try ClientOpts = #{}, Node = hb_http_server:start_node(NodeOpts#{ - priv_wallet => hb:wallet(), + priv_wallet => ar_wallet:new(), store => hb_test_utils:test_store(), bundler_max_items => 1 }), @@ -225,7 +589,7 @@ unsigned_dataitem_test() -> try ClientOpts = #{}, Node = hb_http_server:start_node(NodeOpts#{ - priv_wallet => hb:wallet(), + priv_wallet => ar_wallet:new(), store => hb_test_utils:test_store(), debug_print => false }), @@ -247,7 +611,6 @@ unsigned_dataitem_test() -> idle_test() -> Anchor = rand:bytes(32), Price = 12345, - % NodeOpts redirects arweave gateway requests to the mock server. {ServerHandle, NodeOpts} = start_mock_gateway( #{ price => {200, integer_to_binary(Price)}, @@ -258,29 +621,43 @@ idle_test() -> ClientOpts = #{}, Node = hb_http_server:start_node(NodeOpts#{ bundler_max_idle_time => 400, - priv_wallet => hb:wallet(), + priv_wallet => ar_wallet:new(), store => hb_test_utils:test_store() }), - %% Upload 1 data items across 2 chunks. - Item1 = new_data_item(1, floor(1.5 * ?DATA_CHUNK_SIZE)), - ?assertMatch({ok, _}, post_data_item(Node, Item1, ClientOpts)), - % Wait just to give the server a chance to post a transaction - % (but it shouldn't) + % Test posting each of the supported signature types + RSAWallet = ar_wallet:new({rsa, 65537}), + EdDSAWallet = ar_wallet:new({eddsa, ed25519}), + EthereumWallet = ar_wallet:new(ethereum), + ItemSize = floor(1.5 * ?DATA_CHUNK_SIZE), + Item1 = new_data_item(1, ItemSize, RSAWallet), + Item2 = new_data_item(2, ItemSize, EdDSAWallet), + {ok, SolanaBin} = + file:read_file(<<"test/arbundles.js/ans104-item-solana.bin">>), + Item3 = ar_bundles:deserialize(SolanaBin), + Item4 = new_data_item(4, ItemSize, EthereumWallet), + Items = [Item1, Item2, Item3, Item4], + lists:foreach( + fun(Item) -> + ?event(debug_test, {posting_item, Item}), + ?assertMatch({ok, _}, post_data_item(Node, Item, ClientOpts)) + end, + Items + ), timer:sleep(150), ?assertEqual(0, length(hb_mock_server:get_requests(tx, 0, ServerHandle))), ?assertEqual(0, length(hb_mock_server:get_requests(chunk, 0, ServerHandle))), - % Wait gain to give the server a chance to trip the max idle time. - % It should *now* post a transaction. timer:sleep(300), TXs = hb_mock_server:get_requests(tx, 1, ServerHandle), ?assertEqual(1, length(TXs)), - %% Wait for expected chunks - Proofs = hb_mock_server:get_requests(chunk, 2, ServerHandle), - ?assertEqual(2, length(Proofs)), - assert_bundle(Node, [Item1], Anchor, Price, hd(TXs), Proofs, ClientOpts), + %% 2x 1.5 chunk items + 1 small solana item + 1.5 Ethereum = 5 chunks + ExpectedChunks = 5, + Proofs = hb_mock_server:get_requests( + chunk, ExpectedChunks, ServerHandle), + ?assertEqual(ExpectedChunks, length(Proofs)), + assert_bundle( + Node, Items, Anchor, Price, hd(TXs), Proofs, ClientOpts), ok after - %% Always cleanup, even if test fails stop_test_servers(ServerHandle) end. @@ -302,7 +679,7 @@ dispatch_blocking_test() -> try ClientOpts = #{}, Node = hb_http_server:start_node(NodeOpts#{ - priv_wallet => hb:wallet(), + priv_wallet => ar_wallet:new(), store => hb_test_utils:test_store(), bundler_max_items => 3 }), @@ -343,29 +720,8 @@ dispatch_blocking_test() -> stop_test_servers(ServerHandle) end. -recover_unbundled_items_test() -> - Opts = #{store => hb_test_utils:test_store()}, - % Create and cache some items - Item1 = hb_message:convert(new_data_item(1, 10), <<"structured@1.0">>, <<"ans104@1.0">>, Opts), - Item2 = hb_message:convert(new_data_item(2, 10), <<"structured@1.0">>, <<"ans104@1.0">>, Opts), - Item3 = hb_message:convert(new_data_item(3, 10), <<"structured@1.0">>, <<"ans104@1.0">>, Opts), - ok = dev_bundler_cache:write_item(Item1, Opts), - ok = dev_bundler_cache:write_item(Item2, Opts), - ok = dev_bundler_cache:write_item(Item3, Opts), - % Bundle Item2 with a fake TX - FakeTX = ar_tx:sign(#tx{format = 2, tags = [{<<"test">>, <<"tx">>}]}, hb:wallet()), - StructuredTX = hb_message:convert(FakeTX, <<"structured@1.0">>, <<"tx@1.0">>, Opts), - ok = dev_bundler_cache:write_tx(StructuredTX, [Item2], Opts), - % Now recover unbundled items - {RecoveredItems, RecoveredBytes} = recover_unbundled_items(Opts), - ?assertEqual(3924, RecoveredBytes), - RecoveredItems2 = [ - hb_message:with_commitments( - #{ <<"commitment-device">> => <<"ans104@1.0">> }, Item, Opts) - || Item <- RecoveredItems], - ?assertEqual(lists:sort([Item1, Item3]), lists:sort(RecoveredItems2)), - ok. - +%% @doc Test that items are recovered and posted while respecting the +%% max_items limit. recover_respects_max_items_test() -> Anchor = rand:bytes(32), Price = 12345, @@ -377,7 +733,7 @@ recover_respects_max_items_test() -> % Use max_items of 3, so 10 items should dispatch as 3+3+3+1 MaxItems = 3, Opts = NodeOpts#{ - priv_wallet => hb:wallet(), + priv_wallet => ar_wallet:new(), store => hb_test_utils:test_store(), bundler_max_items => MaxItems }, @@ -406,10 +762,532 @@ recover_respects_max_items_test() -> stop_test_servers(ServerHandle) end. +complete_task_sequence_test() -> + Anchor = rand:bytes(32), + Price = 12345, + {ServerHandle, NodeOpts} = start_mock_gateway(#{ + price => {200, integer_to_binary(Price)}, + tx_anchor => {200, hb_util:encode(Anchor)} + }), + try + Opts = NodeOpts#{ + priv_wallet => ar_wallet:new(), + store => hb_test_utils:test_store(), + bundler_max_items => 2, + retry_base_delay_ms => 100, + retry_jitter => 0 + }, + hb_http_server:start_node(Opts), + ensure_server(Opts), + Items = [ + new_structured_data_item(1, 10, Opts), + new_structured_data_item(2, 10, Opts) + ], + submit_test_items(Items, Opts), + TXs = hb_mock_server:get_requests(tx, 1, ServerHandle), + ?assertEqual(1, length(TXs)), + Proofs = hb_mock_server:get_requests(chunk, 1, ServerHandle), + ?assertEqual(1, length(Proofs)), + State = get_state(), + ?assertNotEqual(undefined, State), + ?assertNotEqual(timeout, State), + Workers = State#state.workers, + IdleWorkers = [ + PID + || {PID, Status} <- maps:to_list(Workers), Status =:= idle + ], + ?assertEqual(maps:size(Workers), length(IdleWorkers)), + Queue = State#state.task_queue, + ?assert(queue:is_empty(Queue)), + Bundles = State#state.bundles, + ?assertEqual(0, maps:size(Bundles)), + ok + after + stop_test_servers(ServerHandle) + end. + +recover_bundles_test() -> + Anchor = rand:bytes(32), + Price = 12345, + {ServerHandle, NodeOpts} = start_mock_gateway(#{ + chunk => fun(_Req) -> + timer:sleep(250), + {200, <<"OK">>} + end, + price => {200, integer_to_binary(Price)}, + tx_anchor => {200, hb_util:encode(Anchor)} + }), + try + Opts = NodeOpts#{ + priv_wallet => ar_wallet:new(), + store => hb_test_utils:test_store() + }, + hb_http_server:start_node(Opts), + Item1 = new_structured_data_item(1, 10, Opts), + Item2 = new_structured_data_item(2, 10, Opts), + Item3 = new_structured_data_item(3, 10, Opts), + ok = dev_bundler_cache:write_item(Item1, Opts), + ok = dev_bundler_cache:write_item(Item2, Opts), + ok = dev_bundler_cache:write_item(Item3, Opts), + TX = dev_bundler_task:data_items_to_tx( + lists:reverse([Item1, Item2, Item3]), Opts), + CommittedTX = hb_message:convert( + TX, <<"structured@1.0">>, <<"tx@1.0">>, Opts), + ok = dev_bundler_cache:write_tx(CommittedTX, [Item1, Item2, Item3], Opts), + Item4 = new_structured_data_item(4, 10, Opts), + ok = dev_bundler_cache:write_item(Item4, Opts), + TX2 = dev_bundler_task:data_items_to_tx( + lists:reverse([Item4]), Opts), + CommittedTX2 = hb_message:convert( + TX2, <<"structured@1.0">>, <<"tx@1.0">>, Opts), + ok = dev_bundler_cache:write_tx(CommittedTX2, [Item4], Opts), + ok = dev_bundler_cache:complete_tx(CommittedTX2, Opts), + ensure_server(Opts), + State = get_state(), + ?assertNotEqual(undefined, State), + ?assertNotEqual(timeout, State), + TXs = hb_mock_server:get_requests(tx, 1, ServerHandle, 200), + ?assertEqual([], TXs), + ?assert( + hb_util:wait_until( + fun() -> + dev_bundler_cache:load_bundle_states(Opts) =:= [] + end, + 2000 + ) + ), + FinalState = get_state(), + ?assertEqual(0, maps:size(FinalState#state.bundles)), + ok + after + stop_test_servers(ServerHandle) + end. + +post_tx_price_failure_retry_test() -> + Anchor = rand:bytes(32), + FailCount = 3, + setup_test_counter(price_attempts_counter), + {ServerHandle, NodeOpts} = start_mock_gateway(#{ + price => fun(_Req) -> + Count = increment_test_counter(price_attempts_counter) - 1, + case Count < FailCount of + true -> {500, <<"error">>}; + false -> {200, <<"12345">>} + end + end, + tx_anchor => {200, hb_util:encode(Anchor)} + }), + try + Opts = NodeOpts#{ + priv_wallet => ar_wallet:new(), + store => hb_test_utils:test_store(), + bundler_max_items => 1, + retry_base_delay_ms => 50, + retry_jitter => 0 + }, + hb_http_server:start_node(Opts), + ensure_server(Opts), + Items = [new_structured_data_item(1, 10, Opts)], + submit_test_items(Items, Opts), + TXs = hb_mock_server:get_requests(tx, 1, ServerHandle), + ?assertEqual(1, length(TXs)), + FinalCount = get_test_counter(price_attempts_counter), + ?assertEqual(FailCount + 1, FinalCount), + ok + after + cleanup_test_counter(price_attempts_counter), + stop_test_servers(ServerHandle) + end. + +post_tx_anchor_failure_retry_test() -> + Price = 12345, + FailCount = 3, + setup_test_counter(anchor_attempts_counter), + {ServerHandle, NodeOpts} = start_mock_gateway(#{ + price => {200, integer_to_binary(Price)}, + tx_anchor => fun(_Req) -> + Count = increment_test_counter(anchor_attempts_counter) - 1, + case Count < FailCount of + true -> {500, <<"error">>}; + false -> {200, hb_util:encode(rand:bytes(32))} + end + end + }), + try + Opts = NodeOpts#{ + priv_wallet => ar_wallet:new(), + store => hb_test_utils:test_store(), + bundler_max_items => 1, + retry_base_delay_ms => 50, + retry_jitter => 0 + }, + hb_http_server:start_node(Opts), + ensure_server(Opts), + Items = [new_structured_data_item(1, 10, Opts)], + submit_test_items(Items, Opts), + TXs = hb_mock_server:get_requests(tx, 1, ServerHandle), + ?assertEqual(1, length(TXs)), + FinalCount = get_test_counter(anchor_attempts_counter), + ?assertEqual(FailCount + 1, FinalCount), + ok + after + cleanup_test_counter(anchor_attempts_counter), + stop_test_servers(ServerHandle) + end. + +post_tx_post_failure_retry_test() -> + Anchor = rand:bytes(32), + Price = 12345, + FailCount = 4, + setup_test_counter(tx_attempts_counter), + {ServerHandle, NodeOpts} = start_mock_gateway(#{ + price => {200, integer_to_binary(Price)}, + tx_anchor => {200, hb_util:encode(Anchor)}, + tx => fun(_Req) -> + Count = increment_test_counter(tx_attempts_counter) - 1, + case Count < FailCount of + true -> {400, <<"Transaction verification failed">>}; + false -> {200, <<"OK">>} + end + end + }), + try + Opts = NodeOpts#{ + priv_wallet => ar_wallet:new(), + store => hb_test_utils:test_store(), + bundler_max_items => 1, + retry_base_delay_ms => 50, + retry_jitter => 0 + }, + hb_http_server:start_node(Opts), + ensure_server(Opts), + Items = [new_structured_data_item(1, 10, Opts)], + submit_test_items(Items, Opts), + TXs = hb_mock_server:get_requests(tx, FailCount + 1, ServerHandle), + ?assertEqual(FailCount + 1, length(TXs)), + FinalCount = get_test_counter(tx_attempts_counter), + ?assertEqual(FailCount + 1, FinalCount), + ok + after + cleanup_test_counter(tx_attempts_counter), + stop_test_servers(ServerHandle) + end. + +post_proof_failure_retry_test() -> + Anchor = rand:bytes(32), + Price = 12345, + FailCount = 2, + setup_test_counter(chunk_attempts_counter), + {ServerHandle, NodeOpts} = start_mock_gateway(#{ + price => {200, integer_to_binary(Price)}, + tx_anchor => {200, hb_util:encode(Anchor)}, + chunk => fun(_Req) -> + Count = increment_test_counter(chunk_attempts_counter) - 1, + case Count < FailCount of + true -> {500, <<"error">>}; + false -> {200, <<"OK">>} + end + end + }), + try + Opts = NodeOpts#{ + priv_wallet => ar_wallet:new(), + store => hb_test_utils:test_store(), + bundler_max_items => 1, + retry_base_delay_ms => 50, + retry_jitter => 0 + }, + hb_http_server:start_node(Opts), + ensure_server(Opts), + Items = [new_structured_data_item(1, floor(4.5 * ?DATA_CHUNK_SIZE), Opts)], + submit_test_items(Items, Opts), + TXs = hb_mock_server:get_requests(tx, 1, ServerHandle), + ?assertEqual(1, length(TXs)), + Chunks = hb_mock_server:get_requests(chunk, FailCount + 5, ServerHandle), + ?assertEqual(FailCount + 5, length(Chunks)), + FinalCount = get_test_counter(chunk_attempts_counter), + ?assertEqual(FailCount + 5, FinalCount), + ok + after + cleanup_test_counter(chunk_attempts_counter), + stop_test_servers(ServerHandle) + end. + +rapid_dispatch_test() -> + Anchor = rand:bytes(32), + Price = 12345, + {ServerHandle, NodeOpts} = start_mock_gateway(#{ + price => {200, integer_to_binary(Price)}, + tx_anchor => {200, hb_util:encode(Anchor)}, + tx => fun(_Req) -> + timer:sleep(100), + {200, <<"OK">>} + end + }), + try + Opts = NodeOpts#{ + priv_wallet => ar_wallet:new(), + store => hb_test_utils:test_store(), + bundler_max_items => 1, + bundler_workers => 3 + }, + hb_http_server:start_node(Opts), + ensure_server(Opts), + lists:foreach( + fun(I) -> + Items = [new_structured_data_item(I, 10, Opts)], + submit_test_items(Items, Opts) + end, + lists:seq(1, 10) + ), + TXs = hb_mock_server:get_requests(tx, 10, ServerHandle), + ?assertEqual(10, length(TXs)), + ok + after + stop_test_servers(ServerHandle) + end. + +one_bundle_fails_others_continue_test() -> + Anchor = rand:bytes(32), + Price = 12345, + setup_test_counter(mixed_attempts_counter), + {ServerHandle, NodeOpts} = start_mock_gateway(#{ + price => {200, integer_to_binary(Price)}, + tx_anchor => {200, hb_util:encode(Anchor)}, + tx => fun(_Req) -> + Count = increment_test_counter(mixed_attempts_counter) - 1, + case Count of + 0 -> {200, <<"OK">>}; + _ -> {400, <<"fail">>} + end + end + }), + try + Opts = NodeOpts#{ + priv_wallet => ar_wallet:new(), + store => hb_test_utils:test_store(), + bundler_max_items => 1, + retry_base_delay_ms => 100, + retry_jitter => 0 + }, + hb_http_server:start_node(Opts), + ensure_server(Opts), + Items1 = [new_structured_data_item(1, 10, Opts)], + submit_test_items(Items1, Opts), + Items2 = [new_structured_data_item(2, 10, Opts)], + submit_test_items(Items2, Opts), + TXs = hb_mock_server:get_requests(tx, 5, ServerHandle), + ?assert(length(TXs) >= 5, length(TXs)), + ok + after + cleanup_test_counter(mixed_attempts_counter), + stop_test_servers(ServerHandle) + end. + +parallel_task_execution_test() -> + Anchor = rand:bytes(32), + Price = 12345, + SleepTime = 120, + {ServerHandle, NodeOpts} = start_mock_gateway(#{ + price => {200, integer_to_binary(Price)}, + tx_anchor => {200, hb_util:encode(Anchor)}, + chunk => fun(_Req) -> + timer:sleep(SleepTime), + {200, <<"OK">>} + end + }), + try + Opts = NodeOpts#{ + priv_wallet => ar_wallet:new(), + store => hb_test_utils:test_store(), + bundler_max_items => 1, + bundler_workers => 5 + }, + hb_http_server:start_node(Opts), + ensure_server(Opts), + lists:foreach( + fun(I) -> + Items = [new_structured_data_item(I, 10, Opts)], + submit_test_items(Items, Opts) + end, + lists:seq(1, 10) + ), + StartTime = erlang:system_time(millisecond), + Chunks = hb_mock_server:get_requests(chunk, 10, ServerHandle), + ElapsedTime = erlang:system_time(millisecond) - StartTime, + ?assertEqual(10, length(Chunks)), + ?assert(ElapsedTime < 2000, "ElapsedTime: " ++ integer_to_list(ElapsedTime)), + ok + after + stop_test_servers(ServerHandle) + end. + +exponential_backoff_timing_test() -> + Anchor = rand:bytes(32), + Price = 12345, + FailCount = 5, + setup_test_counter(backoff_cap_counter), + {ServerHandle, NodeOpts} = start_mock_gateway(#{ + price => {200, integer_to_binary(Price)}, + tx_anchor => {200, hb_util:encode(Anchor)}, + tx => fun(_Req) -> + Timestamp = erlang:system_time(millisecond), + Attempt = increment_test_counter(backoff_cap_counter), + Count = Attempt - 1, + add_test_attempt_timestamp(backoff_cap_counter, Attempt, Timestamp), + case Count < FailCount of + true -> {400, <<"fail">>}; + false -> {200, <<"OK">>} + end + end + }), + try + Opts = NodeOpts#{ + priv_wallet => ar_wallet:new(), + store => hb_test_utils:test_store(), + bundler_max_items => 1, + retry_base_delay_ms => 100, + retry_max_delay_ms => 500, + retry_jitter => 0 + }, + hb_http_server:start_node(Opts), + ensure_server(Opts), + Items = [new_structured_data_item(1, 10, Opts)], + submit_test_items(Items, Opts), + TXs = hb_mock_server:get_requests(tx, FailCount + 1, ServerHandle, 5000), + ?assertEqual(FailCount + 1, length(TXs)), + Timestamps = test_attempt_timestamps(backoff_cap_counter), + ?assertEqual(6, length(Timestamps)), + [T1, T2, T3, T4, T5, T6] = Timestamps, + Delay1 = T2 - T1, + Delay2 = T3 - T2, + Delay3 = T4 - T3, + Delay4 = T5 - T4, + Delay5 = T6 - T5, + ?assert(Delay1 >= 70 andalso Delay1 =< 200, Delay1), + ?assert(Delay2 >= 150 andalso Delay2 =< 300, Delay2), + ?assert(Delay3 >= 300 andalso Delay3 =< 500, Delay3), + ?assert(Delay4 >= 400 andalso Delay4 =< 700, Delay4), + ?assert(Delay5 >= 400 andalso Delay5 =< 700, Delay5), + ok + after + cleanup_test_counter(backoff_cap_counter), + stop_test_servers(ServerHandle) + end. + +independent_task_retry_counts_test() -> + Anchor = rand:bytes(32), + Price = 12345, + setup_test_counter(independent_retry_counter), + {ServerHandle, NodeOpts} = start_mock_gateway(#{ + price => {200, integer_to_binary(Price)}, + tx_anchor => {200, hb_util:encode(Anchor)}, + tx => fun(_Req) -> + Count = increment_test_counter(independent_retry_counter) - 1, + case Count < 2 of + true -> {400, <<"fail">>}; + false -> {200, <<"OK">>} + end + end + }), + try + Opts = NodeOpts#{ + priv_wallet => ar_wallet:new(), + store => hb_test_utils:test_store(), + bundler_max_items => 1, + retry_base_delay_ms => 100, + retry_jitter => 0 + }, + hb_http_server:start_node(Opts), + ensure_server(Opts), + Items1 = [new_structured_data_item(1, 10, Opts)], + submit_test_items(Items1, Opts), + hb_mock_server:get_requests(tx, 3, ServerHandle), + Items2 = [new_structured_data_item(2, 10, Opts)], + submit_test_items(Items2, Opts), + TotalAttempts = 4, + TXs = hb_mock_server:get_requests(tx, TotalAttempts, ServerHandle), + ?assertEqual(TotalAttempts, length(TXs)), + ok + after + cleanup_test_counter(independent_retry_counter), + stop_test_servers(ServerHandle) + end. + +invalid_item_test() -> + Anchor = rand:bytes(32), + Price = 12345, + {ServerHandle, NodeOpts} = start_mock_gateway(#{ + price => {200, integer_to_binary(Price)}, + tx_anchor => {200, hb_util:encode(Anchor)} + }), + try + ClientOpts = #{}, + TestOpts = NodeOpts#{ + priv_wallet => ar_wallet:new(), + store => hb_test_utils:test_store() + }, + Node = hb_http_server:start_node(TestOpts#{ + debug_print => false + }), + % Create a valid signed item + Item = ar_bundles:sign_item( + #tx{ + data = <<"testdata">>, + tags = [{<<"tag1">>, <<"value1">>}] + }, + ar_wallet:new() + ), + % Tamper with the data after signing (this invalidates the signature) + TamperedItem = Item#tx{data = <<"tampereddata">>}, + % Posting via HTTP fails upstream during ANS104 decode/verify. + PostResult = post_data_item(Node, TamperedItem, ClientOpts), + ?assertMatch({failure, #{<<"status">> := 500}}, PostResult), + % Calling dev_bundler directly should return the intended 400. + StructuredItem = hb_message:convert( + TamperedItem, <<"structured@1.0">>, <<"ans104@1.0">>, TestOpts), + DirectResult = dev_bundler:item(#{}, StructuredItem, TestOpts), + ?assertMatch({error, #{ + <<"status">> := 400, + <<"error">> := <<"invalid_item">>, + <<"details">> := <<"signature_verification_failed">>}}, DirectResult), + ok + after + stop_test_servers(ServerHandle) + end. + +cache_write_failure_test() -> + GoodOpts = #{store => hb_test_utils:test_store()}, + BadOpts = #{ + store => undefined, + debug_print => false + }, % Invalid store will cause cache write to fail + try + % Start bundler with a valid store so recovery/init paths succeed. + ensure_server(GoodOpts), + Item = ar_bundles:sign_item( + #tx{ + data = <<"testdata">>, + tags = [{<<"tag1">>, <<"value1">>}] + }, + ar_wallet:new() + ), + StructuredItem = hb_message:convert( + Item, <<"structured@1.0">>, <<"ans104@1.0">>, GoodOpts), + % Call item/3 directly without a store, should cause cache write + % to fail. + Result = dev_bundler:item(#{}, StructuredItem, BadOpts), + ?assertMatch({error, #{ + <<"status">> := 500, + <<"error">> := <<"cache_write_failed">>}}, Result), + ok + after + stop_server() + end. + stop_test_servers(ServerHandle) -> hb_mock_server:stop(ServerHandle), - stop_server(), - dev_bundler_dispatch:stop_dispatcher(). + stop_server(). test_bundle(Opts) -> Anchor = rand:bytes(32), @@ -425,7 +1303,7 @@ test_bundle(Opts) -> ClientOpts = #{}, NodeOpts2 = maps:merge(NodeOpts, Opts), Node = hb_http_server:start_node(NodeOpts2#{ - priv_wallet => hb:wallet(), + priv_wallet => ar_wallet:new(), store => hb_test_utils:test_store() }), %% Upload 3 data items across 4 chunks. @@ -454,7 +1332,7 @@ test_api_error(Responses) -> try ClientOpts = #{}, Node = hb_http_server:start_node(NodeOpts#{ - priv_wallet => hb:wallet(), + priv_wallet => ar_wallet:new(), store => hb_test_utils:test_store(), bundler_max_items => 1 }), @@ -476,6 +1354,27 @@ test_api_error(Responses) -> end. new_data_item(Index, Size) -> + new_data_item(Index, Size, ar_wallet:new()). + +new_structured_data_item(Index, Size, Opts) -> + hb_message:convert( + new_data_item(Index, Size), + <<"structured@1.0">>, + <<"ans104@1.0">>, + Opts + ). + +submit_test_items([], _Opts) -> + ok; +submit_test_items(Items, Opts) -> + lists:foreach( + fun(Item) -> + ?assertMatch({ok, _}, item(#{}, Item, Opts)) + end, + Items + ). + +new_data_item(Index, Size, Wallet) -> Data = rand:bytes(Size), Tag = <<"tag", (integer_to_binary(Index))/binary>>, Value = <<"value", (integer_to_binary(Index))/binary>>, @@ -484,7 +1383,7 @@ new_data_item(Index, Size) -> data = Data, tags = [{Tag, Value}] }, - hb:wallet() + Wallet ). post_data_item(Node, Item, Opts) -> @@ -596,3 +1495,34 @@ start_mock_gateway(Responses) -> ] }, {ServerHandle, NodeOpts}. + +setup_test_counter(Table) -> + cleanup_test_counter(Table), + ets:new(Table, [named_table, public, set]), + ok. + +cleanup_test_counter(Table) -> + case ets:info(Table) of + undefined -> ok; + _ -> ets:delete(Table), ok + end. + +increment_test_counter(Table) -> + ets:update_counter(Table, Table, {2, 1}, {Table, 0}). + +get_test_counter(Table) -> + case ets:lookup(Table, Table) of + [{_, Value}] -> Value; + [] -> 0 + end. + +add_test_attempt_timestamp(Table, Attempt, Timestamp) -> + ets:insert(Table, {{Table, Attempt}, Timestamp}). + +test_attempt_timestamps(Table) -> + TimestampEntries = [ + {Attempt, Timestamp} + || {{Prefix1, Attempt}, Timestamp} <- ets:tab2list(Table), + Prefix1 =:= Table + ], + [Timestamp || {_, Timestamp} <- lists:sort(TimestampEntries)]. \ No newline at end of file diff --git a/src/dev_bundler_cache.erl b/src/dev_bundler_cache.erl index 2d9ae95ec..ae538c933 100644 --- a/src/dev_bundler_cache.erl +++ b/src/dev_bundler_cache.erl @@ -7,17 +7,18 @@ %%% %%% Recovery flow: %%% 1. Load unbundled items (where bundle = <<>>) back into dev_bundler queue -%%% 2. Load TX states and reconstruct dev_bundler_dispatch bundles +%%% 2. Load TX states and reconstruct in-progress bundler bundles %%% 3. Enqueue appropriate tasks based on status -module(dev_bundler_cache). -export([ write_item/2, write_tx/3, complete_tx/2, - load_unbundled_items/1, load_bundle_states/1, - load_bundled_items/2, - load_tx/2 + load_tx/2, + load_items/2, + load_items/4, + list_item_ids/1 ]). -include("include/hb.hrl"). -include_lib("eunit/include/eunit.hrl"). @@ -35,7 +36,6 @@ write_item(Item, Opts) when is_map(Item) -> {ok, _} = hb_cache:write(Item, Opts), % Use the committed (structured) item for path generation Path = item_path(Item, Opts), - ?event({write_item, {path, Path}}), % Create pseudopath with empty bundle reference write_pseudopath(Path, <<>>, Opts). @@ -43,7 +43,6 @@ write_item(Item, Opts) when is_map(Item) -> link_item_to_tx(Item, TX, Opts) when is_map(Item) and is_map(TX) -> Path = item_path(Item, Opts), TXID = tx_id(TX, Opts), - ?event({link_item_to_tx, {path, Path}, {tx_id, {explicit, TXID}}}), write_pseudopath(Path, TXID, Opts). %% @doc Get the bundle TXID for a data item, or <<>> if not bundled. @@ -57,11 +56,13 @@ get_item_bundle(Item, Opts) when is_map(Item) -> %% @doc Construct the pseudopath for an item's bundle reference. %% Item should be a structured message. item_path(Item, Opts) when is_map(Item) -> + item_path(item_id(Item, Opts), Opts); +item_path(ItemID, Opts) when is_binary(ItemID) -> Store = hb_opts:get(store, no_viable_store, Opts), hb_store:path(Store, [ ?BUNDLER_PREFIX, <<"item">>, - item_id(Item, Opts), + ItemID, <<"bundle">> ]). @@ -73,7 +74,6 @@ tx_id(TX, Opts) when is_map(TX) -> hb_message:id(TX, signed, Opts). write_tx(TX, Items, Opts) when is_map(TX) -> - ?event({write_tx, {tx, {explicit, hb_message:id(TX, signed, Opts)}}}), {ok, _} = hb_cache:write(TX, Opts), set_tx_status(TX, <<"posted">>, Opts), lists:foreach( @@ -81,7 +81,8 @@ write_tx(TX, Items, Opts) when is_map(TX) -> ok = link_item_to_tx(Item, TX, Opts) end, Items - ). + ), + ok. complete_tx(TX, Opts) -> set_tx_status(TX, <<"complete">>, Opts). @@ -89,7 +90,7 @@ complete_tx(TX, Opts) -> %% @doc Set the status of a bundle TX. set_tx_status(TX, Status, Opts) -> Path = tx_path(TX, Opts), - ?event({set_tx_status, {path, Path}, {status, Status}}), + ?event(debug_bundler, {set_tx_status, {path, Path}, {status, Status}}), write_pseudopath(Path, Status, Opts). %% @doc Get the status of a bundle TX. @@ -113,46 +114,6 @@ tx_path(TX, Opts) -> %%% Recovery operations -%% @doc Load all unbundled items (where bundle = <<>>) from cache. -%% Returns list of actual Item messages for re-queuing. -load_unbundled_items(Opts) -> - Store = hb_opts:get(store, no_viable_store, Opts), - ItemsPath = hb_store:path(Store, [?BUNDLER_PREFIX, <<"item">>]), - % List all item IDs - ItemIDs = case hb_cache:list(ItemsPath, Opts) of - [] -> []; - List -> List - end, - % Filter for unbundled items and load them - lists:filtermap( - fun(ItemIDStr) -> - % Read the bundle pseudopath directly - BundlePath = hb_store:path(Store, [ - ?BUNDLER_PREFIX, - <<"item">>, - ItemIDStr, - <<"bundle">> - ]), - case read_pseudopath(BundlePath, Opts) of - {ok, <<>>} -> - % Unbundled item - load it fully (resolve all links) - case hb_cache:read(ItemIDStr, Opts) of - {ok, Item} -> - FullyLoadedItem = hb_cache:ensure_all_loaded(Item, Opts), - ?event({loaded_unbundled_item, {id, {explicit, ItemIDStr}}}), - {true, FullyLoadedItem}; - _ -> - ?event({failed_to_load_item, {id, {explicit, ItemIDStr}}}), - false - end; - _ -> - % Already bundled or not found - false - end - end, - ItemIDs - ). - %% @doc Load all bundle TX states from cache. %% Returns list of {TXID, Status} tuples. load_bundle_states(Opts) -> @@ -172,62 +133,28 @@ load_bundle_states(Opts) -> <<>> -> false; % Empty status, ignore <<"complete">> -> false; % Skip completed bundles Status -> - ?event({loaded_tx_state, {id, {explicit, TXID}}, {status, Status}}), + ?event( + debug_bundler, + {loaded_tx_state, + {id, {string, TXID}}, + {status, Status} + } + ), {true, {TXID, Status}} end end, TXIDs ). -%% @doc Load all data items associated with a bundle TX. -%% Uses the item pseudopaths to find items with matching tx-id. -load_bundled_items(TXID, Opts) -> - Store = hb_opts:get(store, no_viable_store, Opts), - ItemsPath = hb_store:path(Store, [?BUNDLER_PREFIX, <<"item">>]), - % List all item IDs - ItemIDs = case hb_cache:list(ItemsPath, Opts) of - [] -> []; - List -> List - end, - % Filter for items belonging to this TX and load them - lists:filtermap( - fun(ItemIDStr) -> - % Read the bundle pseudopath directly - BundlePath = hb_store:path(Store, [ - ?BUNDLER_PREFIX, - <<"item">>, - ItemIDStr, - <<"bundle">> - ]), - case read_pseudopath(BundlePath, Opts) of - {ok, BundleTXID} when BundleTXID =:= TXID -> - % This item belongs to our bundle - load it fully (resolve all links) - case hb_cache:read(ItemIDStr, Opts) of - {ok, Item} -> - FullyLoadedItem = hb_cache:ensure_all_loaded(Item, Opts), - ?event({loaded_tx_item, {tx_id, {explicit, TXID}}, {item_id, {explicit, ItemIDStr}}}), - {true, FullyLoadedItem}; - _ -> - ?event({failed_to_load_tx_item, {tx_id, {explicit, TXID}}, {item_id, {explicit, ItemIDStr}}}), - false - end; - _ -> - % Doesn't belong to this bundle or not found - false - end - end, - ItemIDs - ). - %% @doc Load a TX from cache by its ID. load_tx(TXID, Opts) -> - ?event({load_tx, {tx_id, {explicit, TXID}}}), + ?event(debug_bundler, {load_tx, {tx_id, {explicit, TXID}}}), case hb_cache:read(TXID, Opts) of {ok, TX} -> - ?event({loaded_tx, {tx_id, {explicit, TXID}}}), + ?event(debug_bundler, {loaded_tx, {tx_id, {explicit, TXID}}}), hb_cache:ensure_all_loaded(TX, Opts); _ -> - ?event({failed_to_load_tx, {tx_id, {explicit, TXID}}}), + ?event(error, {failed_to_load_tx, {tx_id, {explicit, TXID}}}), not_found end. @@ -237,7 +164,10 @@ load_tx(TXID, Opts) -> %% @doc Write a value to a pseudopath. write_pseudopath(Path, Value, Opts) -> Store = hb_opts:get(store, no_viable_store, Opts), - hb_store:write(Store, Path, Value). + Result = hb_store:write(Store, Path, Value), + % force a flush to disk + hb_store:read(Store, Path), + Result. %% @doc Read a value from a pseudopath. read_pseudopath(Path, Opts) -> @@ -247,6 +177,47 @@ read_pseudopath(Path, Opts) -> _ -> not_found end. +%% @doc List all cached bundler item IDs. +list_item_ids(Opts) -> + Store = hb_opts:get(store, no_viable_store, Opts), + ItemsPath = hb_store:path(Store, [?BUNDLER_PREFIX, <<"item">>]), + case hb_cache:list(ItemsPath, Opts) of + [] -> []; + List -> List + end. + +%% @doc Load all items whose bundle pseudopath matches BundleID. +load_items(BundleID, Opts) -> + load_items( + BundleID, + Opts, + fun(_ItemID, _Item) -> ok end, + fun(_ItemID) -> ok end + ). + +%% @doc Load all items whose bundle pseudopath matches BundleID and invoke callbacks. +load_items(BundleID, Opts, OnLoaded, OnFailed) -> + lists:filtermap( + fun(ItemID) -> + BundlePath = item_path(ItemID, Opts), + case read_pseudopath(BundlePath, Opts) of + {ok, BundleID} -> + case hb_cache:read(ItemID, Opts) of + {ok, Item} -> + FullyLoadedItem = hb_cache:ensure_all_loaded(Item, Opts), + OnLoaded(ItemID, FullyLoadedItem), + {true, FullyLoadedItem}; + _ -> + OnFailed(ItemID), + false + end; + _ -> + false + end + end, + list_item_ids(Opts) + ). + %%% Tests basic_cache_test() -> @@ -278,7 +249,7 @@ load_unbundled_items_test() -> % Link item2 to a bundle, leave others unbundled ok = write_tx(TX, [Item2], Opts), % Load unbundled items - UnbundledItems1 = load_unbundled_items(Opts), + UnbundledItems1 = load_items(<<>>, Opts), UnbundledItems2 = [ hb_message:with_commitments( #{ <<"commitment-device">> => <<"ans104@1.0">> }, @@ -289,6 +260,17 @@ load_unbundled_items_test() -> ?assertEqual(lists:sort([Item1, Item3]), UnbundledItems3), ok. +recovered_items_relink_to_original_bundle_path_test() -> + Opts = #{store => hb_test_utils:test_store()}, + Item = new_data_item(1, <<"data1">>, Opts), + ok = write_item(Item, Opts), + [RecoveredItem] = load_items(<<>>, Opts), + TX = new_tx(1, Opts), + ok = write_tx(TX, [RecoveredItem], Opts), + ?assertEqual(tx_id(TX, Opts), get_item_bundle(Item, Opts)), + ?assertEqual([], load_items(<<>>, Opts)), + ok. + load_bundle_states_test() -> Opts = #{store => hb_test_utils:test_store()}, TX1 = new_tx(1, Opts), @@ -320,7 +302,7 @@ load_bundled_items_test() -> ok = write_tx(TX1, [Item1, Item2], Opts), ok = write_tx(TX2, [Item3], Opts), % Load items for bundle 1 - Bundle1Items1 = load_bundled_items(tx_id(TX1, Opts), Opts), + Bundle1Items1 = load_items(tx_id(TX1, Opts), Opts), Bundle1Items2 = [ hb_message:with_commitments( #{ <<"commitment-device">> => <<"ans104@1.0">> }, @@ -329,7 +311,7 @@ load_bundled_items_test() -> Bundle1Items3 = lists:sort(Bundle1Items2), ?assertEqual(lists:sort([Item1, Item2]), Bundle1Items3), % Load items for bundle 2 - Bundle2Items1 = load_bundled_items(tx_id(TX2, Opts), Opts), + Bundle2Items1 = load_items(tx_id(TX2, Opts), Opts), Bundle2Items2 = [ hb_message:with_commitments( #{ <<"commitment-device">> => <<"ans104@1.0">> }, @@ -339,6 +321,101 @@ load_bundled_items_test() -> ?assertEqual(lists:sort([Item3]), Bundle2Items3), ok. +%% @doc That when posting a bundle to the bundler all items in the bundle +%% are accessible via optimistic cache. The bundle has the following structure: +%% L2Bundle (bundle) +%% L3Item (leaf) +%% L3Bundle (nested bundle) +%% L4IItem1 (leaf) +%% L4IItem2 (leaf) +bundler_optimistic_cache_test() -> + Wallet = ar_wallet:new(), + L3Item = ar_bundles:sign_item( + #tx{ data = <<"l3item">>, tags = [{<<"idx">>, <<"1">>}] }, + Wallet + ), + L4Item1 = ar_bundles:sign_item( + #tx{ data = <<"l4item1">>, tags = [{<<"idx">>, <<"2.1">>}] }, + Wallet + ), + L4Item2 = ar_bundles:sign_item( + #tx{ data = <<"l4item2">>, tags = [{<<"idx">>, <<"2.2">>}] }, + Wallet + ), + % L3Bundle is itself a bundle wrapping the two L4 leaves. + {undefined, L3BundlePayload} = ar_bundles:serialize_bundle( + list, [L4Item1, L4Item2], false), + L3Bundle = ar_bundles:sign_item( + #tx{ + data = L3BundlePayload, + tags = [ + {<<"Bundle-Format">>, <<"binary">>}, + {<<"Bundle-Version">>, <<"2.0.0">>}, + {<<"idx">>, <<"2">>} + ] + }, + Wallet + ), + {undefined, L2BundlePayload} = ar_bundles:serialize_bundle( + list, [L3Item, L3Bundle], false), + L2Bundle = ar_bundles:sign_item( + #tx{ + data = L2BundlePayload, + tags = [ + {<<"Bundle-Format">>, <<"binary">>}, + {<<"Bundle-Version">>, <<"2.0.0">>} + ] + }, + Wallet + ), + % Compute signed IDs for all items before posting. + L2BundleID = hb_util:encode(ar_bundles:id(L2Bundle, signed)), + L3ItemID = hb_util:encode(ar_bundles:id(L3Item, signed)), + L3BundleID = hb_util:encode(ar_bundles:id(L3Bundle, signed)), + L4Item1ID = hb_util:encode(ar_bundles:id(L4Item1, signed)), + L4Item2ID = hb_util:encode(ar_bundles:id(L4Item2, signed)), + % Start a real node with LMDB and POST the serialized bundle wrapper over HTTP. + Node = hb_http_server:start_node(#{ + priv_wallet => Wallet, + store => hb_test_utils:test_store(hb_store_lmdb) + }), + try + Serialized = ar_bundles:serialize(L2Bundle), + ?assertMatch({ok, _}, hb_http:post( + Node, + #{ + <<"device">> => <<"bundler@1.0">>, + <<"path">> => <<"/tx?codec-device=ans104@1.0">>, + <<"content-type">> => <<"application/octet-stream">>, + <<"body">> => Serialized + }, + #{} + )), + % Every item at every nesting level must be independently readable + % via a bare GET /ID — the real user-facing access pattern. + AllItems = [ + {l2bundle, L2BundleID}, + {l3item, L3ItemID}, + {l3bundle, L3BundleID}, + {l4item1, L4Item1ID}, + {l4item2, L4Item2ID} + ], + lists:foreach( + fun({Label, ExpectedID}) -> + {ok, Msg} = hb_http:get( + Node, #{ <<"path">> => <<"/", ExpectedID/binary>> }, #{}), + ?event(debug_test, {item_result, + {label, Label}, {expected_id, ExpectedID}, {msg, Msg}}), + ?assert(hb_message:verify(Msg)), + ?assertEqual(ExpectedID, hb_message:id(Msg, signed)) + end, + AllItems + ), + ok + after + dev_bundler:stop_server() + end. + new_data_item(Index, SizeOrData, Opts) -> Data = case is_binary(SizeOrData) of true -> SizeOrData; diff --git a/src/dev_bundler_dispatch.erl b/src/dev_bundler_dispatch.erl deleted file mode 100644 index 4b3a2349a..000000000 --- a/src/dev_bundler_dispatch.erl +++ /dev/null @@ -1,1066 +0,0 @@ -%%% @doc A dispatcher for the bundler device (dev_bundler). This module -%%% manages a worker pool to handle bundle building, TX posting, proof -%%% generation, and chunk seeding. Failed tasks are automatically re-queued -%%% for immediate retry until successful. --module(dev_bundler_dispatch). --export([dispatch/2, ensure_dispatcher/1, stop_dispatcher/0]). --include("include/hb.hrl"). --include_lib("eunit/include/eunit.hrl"). - -%%% State record for the dispatcher process. --record(state, { - workers, % Map of WorkerPID => idle | {busy, Task} - task_queue, % Queue of pending tasks - bundles, % Map of BundleID => #bundle{} - opts % Configuration options -}). - -%%% Task record representing work to be done by a worker. --record(task, { - bundle_id, % ID of the bundle this task belongs to - type, % Task type: post_tx | build_proofs | post_proof - data, % Task-specific data (map) - opts, % Configuration options - retry_count = 0 % Number of times this task has been retried -}). - -%%% Proof record to track individual proof seeding status. --record(proof, { - proof, % The proof data (chunk, merkle path, etc) - status % pending | seeded -}). - -%%% Bundle record to track bundle progress through the dispatch pipeline. --record(bundle, { - id, % Unique bundle identifier - items, % List of dataitems to bundle - status, % Current state (initializing, tx_built, tx_posted, proofs_built) - tx, % The built/signed transaction - proofs, % Map of offset => #proof{} records - start_time % The time the bundle was started -}). - -%%% Default options. --define(DISPATCHER_NAME, bundler_dispatcher). --define(DEFAULT_NUM_WORKERS, 5). --define(DEFAULT_RETRY_BASE_DELAY_MS, 1000). --define(DEFAULT_RETRY_MAX_DELAY_MS, 600000). % 10 minutes --define(DEFAULT_RETRY_JITTER, 0.25). % ±25% jitter - -%% @doc Dispatch the queue. -dispatch([], _Opts) -> - ?event({skipping_empty_queue}); -dispatch(Items, Opts) -> - PID = ensure_dispatcher(Opts), - PID ! {dispatch, Items}. - -%% @doc Return the PID of the dispatch server. If the server is not running, -%% it is started and registered with the name `?SERVER_NAME'. -ensure_dispatcher(Opts) -> - case hb_name:lookup(?DISPATCHER_NAME) of - undefined -> - PID = spawn(fun() -> init(Opts) end), - hb_name:register(?DISPATCHER_NAME, PID), - hb_name:lookup(?DISPATCHER_NAME); - PID -> PID - end. - -stop_dispatcher() -> - case hb_name:lookup(?DISPATCHER_NAME) of - undefined -> ok; - PID -> - PID ! stop, - hb_name:unregister(?DISPATCHER_NAME) - end. - -get_state() -> - case hb_name:lookup(?DISPATCHER_NAME) of - undefined -> undefined; - PID -> - PID ! {get_state, self(), Ref = make_ref()}, - receive - {state, Ref, State} -> State - after 1000 -> timeout - end - end. - -%% @doc Initialize the dispatcher with worker pool. -init(Opts) -> - NumWorkers = hb_opts:get(bundler_workers, ?DEFAULT_NUM_WORKERS, Opts), - Workers = lists:map( - fun(_) -> - WorkerPID = spawn_link(fun() -> worker_loop() end), - {WorkerPID, idle} - end, - lists:seq(1, NumWorkers) - ), - State = #state{ - workers = maps:from_list(Workers), - task_queue = queue:new(), - bundles = #{}, - opts = Opts - }, - % Recover any in-progress bundles from cache - State1 = recover_bundles(State), - dispatcher(assign_tasks(State1)). - -%% @doc The main loop of the dispatcher. Manages task queue and worker pool. -dispatcher(State) -> - receive - {dispatch, Items} -> - % Create a new bundle and queue the post_tx task - Opts = State#state.opts, - BundleID = make_ref(), - Bundle = #bundle{ - id = BundleID, - items = Items, - status = initializing, - tx = undefined, - proofs = #{}, - start_time = erlang:timestamp() - }, - State1 = State#state{ - bundles = maps:put(BundleID, Bundle, State#state.bundles) - }, - ?event({dispatching_bundle, {timestamp, format_timestamp()}, - {bundle_id, BundleID}, {num_items, length(Items)}}), - Task = #task{bundle_id = BundleID, type = post_tx, data = Items, opts = Opts}, - State2 = enqueue_task(Task, State1), - % Assign tasks to idle workers - dispatcher(assign_tasks(State2)); - {task_complete, WorkerPID, Task, Result} -> - State1 = handle_task_complete(WorkerPID, Task, Result, State), - dispatcher(assign_tasks(State1)); - {task_failed, WorkerPID, Task, Reason} -> - State1 = handle_task_failed(WorkerPID, Task, Reason, State), - dispatcher(assign_tasks(State1)); - {retry_task, Task} -> - % Re-enqueue the task after backoff delay - State1 = enqueue_task(Task, State), - dispatcher(assign_tasks(State1)); - {get_state, From, Ref} -> - From ! {state, Ref, State}, - dispatcher(State); - stop -> - % Stop all workers - maps:foreach( - fun(WorkerPID, _) -> WorkerPID ! stop end, - State#state.workers - ), - exit(normal) - end. - -%% @doc Enqueue a task to the task queue. -enqueue_task(Task, State) -> - Queue = State#state.task_queue, - State#state{task_queue = queue:in(Task, Queue)}. - -%% @doc Format a task for logging. -format_task(#task{bundle_id = BundleID, type = post_tx, data = CommittedTX}) -> - {post_tx, {timestamp, format_timestamp()}, {bundle, BundleID}, - {tx, {explicit, hb_message:id(CommittedTX, signed, #{})}}}; -format_task(#task{bundle_id = BundleID, type = build_proofs, data = CommittedTX}) -> - {build_proofs, {timestamp, format_timestamp()}, {bundle, BundleID}, - {tx, {explicit, hb_message:id(CommittedTX, signed, #{})}}}; -format_task(#task{bundle_id = BundleID, type = post_proof, data = Proof}) -> - Offset = maps:get(offset, Proof), - {post_proof, {timestamp, format_timestamp()}, {bundle, BundleID}, - {offset, Offset}}. - -%% @doc Format erlang:timestamp() as a user-friendly RFC3339 string with milliseconds. -format_timestamp() -> - {MegaSecs, Secs, MicroSecs} = erlang:timestamp(), - Millisecs = (MegaSecs * 1000000 + Secs) * 1000 + (MicroSecs div 1000), - calendar:system_time_to_rfc3339(Millisecs, [{unit, millisecond}, {offset, "Z"}]). - -%% @doc Assign tasks to all idle workers until no idle workers -%% or no tasks remain. -assign_tasks(State) -> - IdleWorkers = maps:filter( - fun(_, Status) -> Status =:= idle end, - State#state.workers), - assign_tasks(maps:keys(IdleWorkers), State). - -assign_tasks([], State) -> - % No more idle workers - State; -assign_tasks([WorkerPID | Rest], State) -> - Workers = State#state.workers, - Queue = State#state.task_queue, - case queue:out(Queue) of - {{value, Task}, Queue1} -> - % Assign task to this worker - WorkerPID ! {execute_task, self(), Task}, - State1 = State#state{ - task_queue = Queue1, - workers = maps:put(WorkerPID, {busy, Task}, Workers) - }, - % Continue with remaining idle workers - assign_tasks(Rest, State1); - {empty, _} -> - % No more tasks, stop - State - end. - -handle_task_complete(WorkerPID, Task, Result, State) -> - Workers = State#state.workers, - Bundles = State#state.bundles, - #task{bundle_id = BundleID} = Task, - ?event({task_complete, format_task(Task)}), - % Update worker to idle - State1 = State#state{ - workers = maps:put(WorkerPID, idle, Workers) - }, - case maps:get(BundleID, Bundles, undefined) of - undefined -> - ?event({bundle_not_found, BundleID}), - State1; - Bundle -> - task_completed(Task, Bundle, Result, State1) - end. - -handle_task_failed(WorkerPID, Task, Reason, State) -> - Workers = State#state.workers, - Opts = State#state.opts, - RetryCount = Task#task.retry_count, - % Calculate exponential backoff delay - BaseDelay = hb_opts:get(retry_base_delay_ms, ?DEFAULT_RETRY_BASE_DELAY_MS, Opts), - MaxDelay = hb_opts:get(retry_max_delay_ms, ?DEFAULT_RETRY_MAX_DELAY_MS, Opts), - Jitter = hb_opts:get(retry_jitter, ?DEFAULT_RETRY_JITTER, Opts), - % Compute base delay with exponential backoff: min(base * 2^retry_count, max_delay) - BaseDelayWithBackoff = min(BaseDelay * (1 bsl RetryCount), MaxDelay), - % Apply jitter: delay * (1 + random(-jitter, +jitter)) - % This distributes the delay across [delay * (1-jitter), delay * (1+jitter)] - JitterFactor = (rand:uniform() * 2 - 1) * Jitter, % Random value in [-jitter, +jitter] - Delay = round(BaseDelayWithBackoff * (1 + JitterFactor)), - ?event({task_failed_retrying, format_task(Task), - {reason, {explicit, Reason}}, - {retry_count, RetryCount}, {delay_ms, Delay}}), - % Update worker to idle - State1 = State#state{ - workers = maps:put(WorkerPID, idle, Workers) - }, - % Increment retry count and schedule delayed retry - Task1 = Task#task{retry_count = RetryCount + 1}, - erlang:send_after(Delay, self(), {retry_task, Task1}), - State1. - -task_completed(#task{bundle_id = BundleID, type = post_tx}, Bundle, CommittedTX, State) -> - Bundles = State#state.bundles, - Opts = State#state.opts, - dev_bundler_cache:write_tx(CommittedTX, Bundle#bundle.items, Opts), - Bundle1 = Bundle#bundle{status = tx_posted, tx = CommittedTX}, - State1 = State#state{ - bundles = maps:put(BundleID, Bundle1, Bundles) - }, - BuildProofsTask = #task{ - bundle_id = BundleID, type = build_proofs, - data = CommittedTX, opts = Opts}, - enqueue_task(BuildProofsTask, State1); - -task_completed(#task{bundle_id = BundleID, type = build_proofs}, Bundle, Proofs, State) -> - Bundles = State#state.bundles, - Opts = State#state.opts, - case Proofs of - [] -> - % No proofs, bundle complete - bundle_complete(Bundle, State); - _ -> - % Proofs built, wrap each in a proof record with offset as key - ProofsMap = maps:from_list([ - {maps:get(offset, P), #proof{proof = P, status = pending}} || P <- Proofs - ]), - Bundle1 = Bundle#bundle{ - proofs = ProofsMap, - status = proofs_built - }, - State1 = State#state{ - bundles = maps:put(BundleID, Bundle1, Bundles) - }, - % Enqueue all post_proof tasks - lists:foldl( - fun(ProofData, S) -> - ProofTask = #task{ - bundle_id = BundleID, - type = post_proof, - data = ProofData, - opts = Opts - }, - enqueue_task(ProofTask, S) - end, - State1, - Proofs - ) - end; - -task_completed(#task{bundle_id = BundleID, type = post_proof, data = ProofData}, Bundle, _Result, State) -> - Bundles = State#state.bundles, - Offset = maps:get(offset, ProofData), - Proofs = Bundle#bundle.proofs, - Proofs1 = maps:update_with( - Offset, - fun(P) -> P#proof{status = seeded} end, - Proofs - ), - Bundle1 = Bundle#bundle{proofs = Proofs1}, - State1 = State#state{ - bundles = maps:put(BundleID, Bundle1, Bundles) - }, - % Check if all proofs are seeded - AllSeeded = lists:all( - fun(#proof{status = Status}) -> Status =:= seeded end, - maps:values(Proofs1) - ), - case AllSeeded of - true -> - bundle_complete(Bundle, State1); - false -> - State1 - end. - -%% @doc Mark a bundle as complete and remove it from state. -bundle_complete(Bundle, State) -> - Opts = State#state.opts, - ok = dev_bundler_cache:complete_tx(Bundle#bundle.tx, Opts), - ElapsedTime = - timer:now_diff(erlang:timestamp(), Bundle#bundle.start_time) / 1000000, - ?event({bundle_complete, {bundle_id, Bundle#bundle.id}, - {timestamp, format_timestamp()}, - {elapsed_time_s, ElapsedTime}}), - State#state{bundles = maps:remove(Bundle#bundle.id, State#state.bundles)}. - -%%% Recovery - -%% @doc Recover in-progress bundles from cache after a crash. -recover_bundles(State) -> - Opts = State#state.opts, - % Reconstruct bundles and enqueue appropriate tasks - lists:foldl( - fun({TXID, Status}, StateAcc) -> - recover_bundle(TXID, Status, StateAcc) - end, - State, - dev_bundler_cache:load_bundle_states(Opts) - ). - -%% @doc Recover a single bundle based on its cached state. -recover_bundle(TXID, Status, State) -> - Opts = State#state.opts, - ?event({recovering_bundle, {tx_id, TXID}, {status, Status}}), - try - % Load the TX and its items - CommittedTX = dev_bundler_cache:load_tx(TXID, Opts), - Items = dev_bundler_cache:load_bundled_items(TXID, Opts), - % Create a new bundle record - BundleID = make_ref(), - Bundle = #bundle{ - id = BundleID, - items = Items, - status = tx_posted, - tx = CommittedTX, - proofs = #{}, - start_time = erlang:timestamp() - }, - % Add bundle to state - Bundles = State#state.bundles, - State1 = State#state{ - bundles = maps:put(BundleID, Bundle, Bundles) - }, - - % Enqueue appropriate task based on status - Task = #task{ - bundle_id = BundleID, type = build_proofs, - data = CommittedTX, opts = Opts}, - enqueue_task(Task, State1) - catch - _:Error:Stack -> - ?event({failed_to_recover_bundle, {tx_id, TXID}, {error, Error}, {stack, Stack}}), - % Skip this bundle and continue - State - end. - -%%% Worker implementation - -%% @doc Worker loop - executes tasks and reports back to dispatcher. -worker_loop() -> - receive - {execute_task, DispatcherPID, Task} -> - Result = execute_task(Task), - case Result of - {ok, Value} -> - DispatcherPID ! {task_complete, self(), Task, Value}; - {error, Reason} -> - DispatcherPID ! {task_failed, self(), Task, Reason} - end, - worker_loop(); - stop -> - exit(normal) - end. - -%% @doc Execute a specific task. -execute_task(#task{type = post_tx, data = Items, opts = Opts} = Task) -> - try - ?event({execute_task, format_task(Task)}), - % Get price and anchor - {ok, TX} = dev_codec_tx:to(lists:reverse(Items), #{}, #{}), - DataSize = TX#tx.data_size, - PriceResult = get_price(DataSize, Opts), - AnchorResult = get_anchor(Opts), - case {PriceResult, AnchorResult} of - {{ok, Price}, {ok, Anchor}} -> - % Sign the TX - Wallet = hb_opts:get(priv_wallet, no_viable_wallet, Opts), - SignedTX = ar_tx:sign(TX#tx{ anchor = Anchor, reward = Price }, Wallet), - % Convert and post - Committed = hb_message:convert( - SignedTX, - #{ <<"device">> => <<"structured@1.0">>, <<"bundle">> => true }, - #{ <<"device">> => <<"tx@1.0">>, <<"bundle">> => true }, - Opts), - PostTXResponse = hb_ao:resolve( - #{ <<"device">> => <<"arweave@2.9-pre">> }, - Committed#{ - <<"path">> => <<"/tx">>, - <<"method">> => <<"POST">> - }, - Opts - ), - case PostTXResponse of - {ok, _Result} -> {ok, Committed}; - {_, ErrorReason} -> {error, ErrorReason} - end; - {PriceErr, AnchorErr} -> - ?event({post_tx_failed, - format_task(Task), - {price, PriceErr}, - {anchor, AnchorErr}}), - {error, {PriceErr, AnchorErr}} - end - catch - _:Err:_Stack -> - ?event({post_tx_failed, - format_task(Task), - {error, Err}}), - {error, Err} - end; - -execute_task(#task{type = build_proofs, data = CommittedTX, opts = Opts} = Task) -> - try - ?event({execute_task, format_task(Task)}), - % Calculate chunks and proofs - TX = hb_message:convert( - CommittedTX, <<"tx@1.0">>, <<"structured@1.0">>, Opts), - Data = TX#tx.data, - DataRoot = TX#tx.data_root, - DataSize = TX#tx.data_size, - Chunks = ar_tx:chunk_binary(?DATA_CHUNK_SIZE, Data), - SizeTaggedChunks = ar_tx:chunks_to_size_tagged_chunks(Chunks), - SizeTaggedChunkIDs = ar_tx:sized_chunks_to_sized_chunk_ids(SizeTaggedChunks), - {_Root, DataTree} = ar_merkle:generate_tree(SizeTaggedChunkIDs), - % Build proof list - Proofs = lists:filtermap( - fun({Chunk, Offset}) -> - case Chunk of - <<>> -> false; - _ -> - DataPath = ar_merkle:generate_path( - DataRoot, Offset - 1, DataTree), - Proof = #{ - chunk => Chunk, - data_path => DataPath, - offset => Offset - 1, - data_size => DataSize, - data_root => DataRoot - }, - {true, Proof} - end - end, - SizeTaggedChunks - ), - {ok, Proofs} - catch - _:Err:_Stack -> - ?event({build_proofs_failed, - format_task(Task), - {error, Err}}), - {error, Err} - end; - -execute_task(#task{type = post_proof, data = Proof, opts = Opts} = Task) -> - #{chunk := Chunk, data_path := DataPath, offset := Offset, - data_size := DataSize, data_root := DataRoot} = Proof, - ?event({execute_task, format_task(Task)}), - Request = #{ - <<"chunk">> => hb_util:encode(Chunk), - <<"data_path">> => hb_util:encode(DataPath), - <<"offset">> => integer_to_binary(Offset), - <<"data_size">> => integer_to_binary(DataSize), - <<"data_root">> => hb_util:encode(DataRoot) - }, - try - Serialized = hb_json:encode(Request), - Response = hb_http:post( - hb_opts:get(gateway, not_found, Opts), - #{ - <<"path">> => <<"/chunk">>, - <<"body">> => Serialized - }, - Opts - ), - case Response of - {ok, _} -> {ok, proof_posted}; - {error, Reason} -> {error, Reason} - end - catch - _:Err:_Stack -> - ?event({post_proof_failed, - format_task(Task), - {error, Err}}), - {error, Err} - end. - -get_price(DataSize, Opts) -> - hb_ao:resolve( - #{ <<"device">> => <<"arweave@2.9-pre">> }, - #{ <<"path">> => <<"/price">>, <<"size">> => DataSize }, - Opts - ). - -get_anchor(Opts) -> - hb_ao:resolve( - #{ <<"device">> => <<"arweave@2.9-pre">> }, - #{ <<"path">> => <<"/tx_anchor">> }, - Opts - ). - -%%%=================================================================== -%%% Tests -%%%=================================================================== - -complete_task_sequence_test() -> - Anchor = rand:bytes(32), - Price = 12345, - {ServerHandle, NodeOpts} = start_mock_gateway(#{ - price => {200, integer_to_binary(Price)}, - tx_anchor => {200, hb_util:encode(Anchor)} - }), - try - Opts = NodeOpts#{ - priv_wallet => hb:wallet(), - store => hb_test_utils:test_store(), - retry_base_delay_ms => 100, - retry_jitter => 0 - }, - hb_http_server:start_node(Opts), - Items = [new_data_item(1, 10, Opts), new_data_item(2, 10, Opts)], - dispatch(Items, Opts), - % Wait for TX to be posted - TXs = hb_mock_server:get_requests(tx, 1, ServerHandle), - ?assertEqual(1, length(TXs)), - % Wait for chunk to be posted - Proofs = hb_mock_server:get_requests(chunk, 1, ServerHandle), - ?assertEqual(1, length(Proofs)), - % Verify dispatcher state - State = get_state(), - ?assertNotEqual(undefined, State), - ?assertNotEqual(timeout, State), - % All workers should be idle - Workers = State#state.workers, - IdleWorkers = [PID || {PID, Status} <- maps:to_list(Workers), Status =:= idle], - ?assertEqual(maps:size(Workers), length(IdleWorkers)), - % Task queue should be empty - Queue = State#state.task_queue, - ?assert(queue:is_empty(Queue)), - % Bundle should be completed and removed - Bundles = State#state.bundles, - ?assertEqual(0, maps:size(Bundles)), - ok - after - cleanup_dispatcher(ServerHandle) - end. - -post_tx_price_failure_retry_test() -> - Anchor = rand:bytes(32), - FailCount = 3, - {ServerHandle, NodeOpts} = start_mock_gateway(#{ - price => fun(_Req) -> - Count = persistent_term:get(price_attempts, 0), - persistent_term:put(price_attempts, Count + 1), - case Count < FailCount of - true -> {500, <<"error">>}; - false -> {200, <<"12345">>} - end - end, - tx_anchor => {200, hb_util:encode(Anchor)} - }), - try - persistent_term:put(price_attempts, 0), - Opts = NodeOpts#{ - priv_wallet => hb:wallet(), - store => hb_test_utils:test_store(), - retry_base_delay_ms => 50, - retry_jitter => 0 - }, - hb_http_server:start_node(Opts), - Items = [new_data_item(1, 10, Opts)], - dispatch(Items, Opts), - % Wait for TX to eventually be posted - TXs = hb_mock_server:get_requests(tx, 1, ServerHandle), - ?assertEqual(1, length(TXs)), - % Verify it retried multiple times - FinalCount = persistent_term:get(price_attempts, 0), - ?assertEqual(FailCount+1, FinalCount), - ok - after - persistent_term:erase(price_attempts), - cleanup_dispatcher(ServerHandle) - end. - -post_tx_anchor_failure_retry_test() -> - Price = 12345, - FailCount = 3, - {ServerHandle, NodeOpts} = start_mock_gateway(#{ - price => {200, integer_to_binary(Price)}, - tx_anchor => fun(_Req) -> - Count = persistent_term:get(anchor_attempts, 0), - persistent_term:put(anchor_attempts, Count + 1), - case Count < FailCount of - true -> {500, <<"error">>}; - false -> {200, hb_util:encode(rand:bytes(32))} - end - end - }), - try - persistent_term:put(anchor_attempts, 0), - Opts = NodeOpts#{ - priv_wallet => hb:wallet(), - store => hb_test_utils:test_store(), - retry_base_delay_ms => 50, - retry_jitter => 0 - }, - hb_http_server:start_node(Opts), - Items = [new_data_item(1, 10, Opts)], - dispatch(Items, Opts), - % Wait for TX to eventually be posted - TXs = hb_mock_server:get_requests(tx, 1, ServerHandle), - ?assertEqual(1, length(TXs)), - % Verify it retried multiple times - FinalCount = persistent_term:get(anchor_attempts, 0), - ?assertEqual(FailCount+1, FinalCount), - ok - after - persistent_term:erase(anchor_attempts), - cleanup_dispatcher(ServerHandle) - end. - -post_tx_post_failure_retry_test() -> - Anchor = rand:bytes(32), - Price = 12345, - FailCount = 4, - {ServerHandle, NodeOpts} = start_mock_gateway(#{ - price => {200, integer_to_binary(Price)}, - tx_anchor => {200, hb_util:encode(Anchor)}, - tx => fun(_Req) -> - Count = persistent_term:get(tx_attempts, 0), - persistent_term:put(tx_attempts, Count + 1), - case Count < FailCount of - true -> {400, <<"Transaction verification failed">>}; - false -> {200, <<"OK">>} - end - end - }), - try - persistent_term:put(tx_attempts, 0), - % Use short retry delays for testing. - Opts = NodeOpts#{ - priv_wallet => hb:wallet(), - store => hb_test_utils:test_store(), - retry_base_delay_ms => 50, - retry_jitter => 0 % Disable jitter for deterministic tests - }, - hb_http_server:start_node(Opts), - Items = [new_data_item(1, 10, Opts)], - dispatch(Items, Opts), - % Wait for TX to eventually succeed - TXs = hb_mock_server:get_requests(tx, FailCount+1, ServerHandle), - ?assertEqual(FailCount+1, length(TXs)), - % Verify final attempt succeeded - FinalCount = persistent_term:get(tx_attempts, 0), - ?assertEqual(FailCount+1, FinalCount), - ok - after - persistent_term:erase(tx_attempts), - cleanup_dispatcher(ServerHandle) - end. - -post_proof_failure_retry_test() -> - Anchor = rand:bytes(32), - Price = 12345, - FailCount = 2, - {ServerHandle, NodeOpts} = start_mock_gateway(#{ - price => {200, integer_to_binary(Price)}, - tx_anchor => {200, hb_util:encode(Anchor)}, - chunk => fun(_Req) -> - Count = persistent_term:get(chunk_attempts, 0), - persistent_term:put(chunk_attempts, Count + 1), - case Count < FailCount of - true -> {500, <<"error">>}; - false -> {200, <<"OK">>} - end - end - }), - try - persistent_term:put(chunk_attempts, 0), - Opts = NodeOpts#{ - priv_wallet => hb:wallet(), - store => hb_test_utils:test_store(), - retry_base_delay_ms => 50, - retry_jitter => 0 - }, - hb_http_server:start_node(Opts), - % Large enough for multiple chunks - Items = [new_data_item(1, floor(4.5 * ?DATA_CHUNK_SIZE), Opts)], - dispatch(Items, Opts), - % Wait for TX - TXs = hb_mock_server:get_requests(tx, 1, ServerHandle), - ?assertEqual(1, length(TXs)), - % Wait for chunks to eventually succeed - Chunks = hb_mock_server:get_requests(chunk, FailCount+5, ServerHandle), - ?assertEqual( FailCount+5, length(Chunks)), - % Verify retries happened - FinalCount = persistent_term:get(chunk_attempts, 0), - ?assertEqual(FailCount+5, FinalCount), - ok - after - persistent_term:erase(chunk_attempts), - cleanup_dispatcher(ServerHandle) - end. - -empty_dispatch_test() -> - Opts = #{}, - dispatch([], Opts), - % Should not crash - ok. - -rapid_dispatch_test() -> - Anchor = rand:bytes(32), - Price = 12345, - {ServerHandle, NodeOpts} = start_mock_gateway(#{ - price => {200, integer_to_binary(Price)}, - tx_anchor => {200, hb_util:encode(Anchor)}, - tx => fun(_Req) -> - timer:sleep(100), - {200, <<"OK">>} - end - }), - try - Opts = NodeOpts#{ - priv_wallet => hb:wallet(), - store => hb_test_utils:test_store(), - bundler_workers => 3 - }, - hb_http_server:start_node(Opts), - % Dispatch 10 bundles rapidly - lists:foreach( - fun(I) -> - Items = [new_data_item(I, 10, Opts)], - dispatch(Items, Opts) - end, - lists:seq(1, 10) - ), - - % Wait for all 10 TXs - TXs = hb_mock_server:get_requests(tx, 10, ServerHandle), - ?assertEqual(10, length(TXs)), - ok - after - cleanup_dispatcher(ServerHandle) - end. - -one_bundle_fails_others_continue_test() -> - Anchor = rand:bytes(32), - Price = 12345, - {ServerHandle, NodeOpts} = start_mock_gateway(#{ - price => {200, integer_to_binary(Price)}, - tx_anchor => {200, hb_util:encode(Anchor)}, - tx => fun(_Req) -> - % First TX fails, second succeeds - Count = persistent_term:get(tx_mixed_attempts, 0), - persistent_term:put(tx_mixed_attempts, Count + 1), - case Count of - 0 -> {200, <<"OK">>}; - _ -> {400, <<"fail">>} - end - end - }), - try - persistent_term:put(tx_mixed_attempts, 0), - % Use short retry delays for testing (100ms base, with exponential backoff) - Opts = NodeOpts#{ - priv_wallet => hb:wallet(), - store => hb_test_utils:test_store(), - retry_base_delay_ms => 100, - retry_jitter => 0 % Disable jitter for deterministic tests - }, - hb_http_server:start_node(Opts), - % Dispatch first bundle (will keep failing) - Items1 = [new_data_item(1, 10, Opts)], - dispatch(Items1, Opts), - % Dispatch second bundle (will succeed) - Items2 = [new_data_item(2, 10, Opts)], - dispatch(Items2, Opts), - % Wait for at least 5 TX attempts (1 success + multiple retries) - TXs = hb_mock_server:get_requests(tx, 5, ServerHandle), - ?assert(length(TXs) >= 5, length(TXs)), - ok - after - persistent_term:erase(tx_mixed_attempts), - cleanup_dispatcher(ServerHandle) - end. - -parallel_task_execution_test() -> - Anchor = rand:bytes(32), - Price = 12345, - SleepTime = 120, - {ServerHandle, NodeOpts} = start_mock_gateway(#{ - price => {200, integer_to_binary(Price)}, - tx_anchor => {200, hb_util:encode(Anchor)}, - chunk => fun(_Req) -> - timer:sleep(SleepTime), - {200, <<"OK">>} - end - }), - try - Opts = NodeOpts#{ - priv_wallet => hb:wallet(), - store => hb_test_utils:test_store(), - bundler_workers => 5 - }, - hb_http_server:start_node(Opts), - % Dispatch 3 bundles, each with 2 chunks - lists:foreach( - fun(I) -> - Items = [new_data_item(I, 10, Opts)], - dispatch(Items, Opts) - end, - lists:seq(1, 10) - ), - % With 3 workers and 1s delay, 10 chunks should complete in ~2s not 9s - StartTime = erlang:system_time(millisecond), - Chunks = hb_mock_server:get_requests(chunk, 10, ServerHandle), - ElapsedTime = erlang:system_time(millisecond) - StartTime, - ?assertEqual(10, length(Chunks)), - % Should take ~2-3 seconds with parallelism, not 9+ - ?assert(ElapsedTime < 2000, "ElapsedTime: " ++ integer_to_list(ElapsedTime)), - ok - after - cleanup_dispatcher(ServerHandle) - end. - -exponential_backoff_timing_test() -> - Anchor = rand:bytes(32), - Price = 12345, - FailCount = 5, - {ServerHandle, NodeOpts} = start_mock_gateway(#{ - price => {200, integer_to_binary(Price)}, - tx_anchor => {200, hb_util:encode(Anchor)}, - tx => fun(_Req) -> - Count = persistent_term:get(backoff_cap_attempts, 0), - Timestamp = erlang:system_time(millisecond), - persistent_term:put(backoff_cap_attempts, Count + 1), - % Store timestamp of each attempt - Timestamps = persistent_term:get(backoff_cap_timestamps, []), - persistent_term:put(backoff_cap_timestamps, [Timestamp | Timestamps]), - case Count < FailCount of - true -> {400, <<"fail">>}; - false -> {200, <<"OK">>} - end - end - }), - try - persistent_term:put(backoff_cap_attempts, 0), - persistent_term:put(backoff_cap_timestamps, []), - Opts = NodeOpts#{ - priv_wallet => hb:wallet(), - store => hb_test_utils:test_store(), - retry_base_delay_ms => 100, - retry_max_delay_ms => 500, % Cap at 500ms - retry_jitter => 0 % Disable jitter for deterministic tests - }, - hb_http_server:start_node(Opts), - Items = [new_data_item(1, 10, Opts)], - dispatch(Items, Opts), - % Wait for TX to eventually succeed - TXs = hb_mock_server:get_requests(tx, FailCount+1, ServerHandle, 5000), - ?assertEqual(FailCount+1, length(TXs)), - % Verify backoff respects cap - Timestamps = lists:reverse(persistent_term:get(backoff_cap_timestamps, [])), - ?assertEqual(6, length(Timestamps)), - [T1, T2, T3, T4, T5, T6] = Timestamps, - % Calculate actual delays - Delay1 = T2 - T1, - Delay2 = T3 - T2, - Delay3 = T4 - T3, - Delay4 = T5 - T4, - Delay5 = T6 - T5, - % Expected: ~100ms, ~200ms, ~400ms, ~500ms (capped), ~500ms (capped) - ?assert(Delay1 >= 70 andalso Delay1 =< 200, Delay1), - ?assert(Delay2 >= 150 andalso Delay2 =< 300, Delay2), - ?assert(Delay3 >= 300 andalso Delay3 =< 500, Delay3), - ?assert(Delay4 >= 400 andalso Delay4 =< 700, Delay4), - ?assert(Delay5 >= 400 andalso Delay5 =< 700, Delay5), - ok - after - persistent_term:erase(backoff_cap_attempts), - persistent_term:erase(backoff_cap_timestamps), - cleanup_dispatcher(ServerHandle) - end. - -independent_task_retry_counts_test() -> - Anchor = rand:bytes(32), - Price = 12345, - % Track which bundles we've seen - persistent_term:put(independent_bundle_ids, []), - {ServerHandle, NodeOpts} = start_mock_gateway(#{ - price => {200, integer_to_binary(Price)}, - tx_anchor => {200, hb_util:encode(Anchor)}, - tx => fun(_Req) -> - % Use request ordering to distinguish bundles - % First 3 requests are bundle1 (fail, fail, succeed) - % 4th request is bundle2 (succeed) - Count = persistent_term:get(independent_total_attempts, 0), - persistent_term:put(independent_total_attempts, Count + 1), - case Count < 2 of - true -> {400, <<"fail">>}; % First 2 attempts fail - false -> {200, <<"OK">>} % Rest succeed - end - end - }), - try - persistent_term:put(independent_total_attempts, 0), - Opts = NodeOpts#{ - priv_wallet => hb:wallet(), - store => hb_test_utils:test_store(), - retry_base_delay_ms => 100, - retry_jitter => 0 % Disable jitter for deterministic tests - }, - hb_http_server:start_node(Opts), - % Dispatch first bundle (will fail twice and retry) - Items1 = [new_data_item(1, 10, Opts)], - dispatch(Items1, Opts), - % Wait a bit for first bundle to start failing - hb_mock_server:get_requests(tx, 3, ServerHandle), - % Dispatch second bundle (will succeed on first try since we're past the 2 failures) - Items2 = [new_data_item(2, 10, Opts)], - dispatch(Items2, Opts), - % Verify we got all TX requests logged - TotalAttempts = 4, - TXs = hb_mock_server:get_requests(tx, TotalAttempts, ServerHandle), - ?assertEqual(TotalAttempts, length(TXs)), - ok - after - persistent_term:erase(independent_total_attempts), - persistent_term:erase(independent_bundle_ids), - cleanup_dispatcher(ServerHandle) - end. - -recover_bundles_test() -> - Anchor = rand:bytes(32), - Price = 12345, - {ServerHandle, NodeOpts} = start_mock_gateway(#{ - price => {200, integer_to_binary(Price)}, - tx_anchor => {200, hb_util:encode(Anchor)} - }), - try - Opts = NodeOpts#{ - priv_wallet => hb:wallet(), - store => hb_test_utils:test_store() - }, - hb_http_server:start_node(Opts), - % Create some test items - Item1 = new_data_item(1, 10, Opts), - Item2 = new_data_item(2, 10, Opts), - Item3 = new_data_item(3, 10, Opts), - % Write items to cache as unbundled - ok = dev_bundler_cache:write_item(Item1, Opts), - ok = dev_bundler_cache:write_item(Item2, Opts), - ok = dev_bundler_cache:write_item(Item3, Opts), - % Create a bundle TX and cache it with posted status - {ok, TX} = dev_codec_tx:to(lists:reverse([Item1, Item2, Item3]), #{}, #{}), - CommittedTX = hb_message:convert(TX, <<"structured@1.0">>, <<"tx@1.0">>, Opts), - ok = dev_bundler_cache:write_tx(CommittedTX, [Item1, Item2, Item3], Opts), - % Create a second bundle that is already complete (should not be recovered) - Item4 = new_data_item(4, 10, Opts), - ok = dev_bundler_cache:write_item(Item4, Opts), - {ok, TX2} = dev_codec_tx:to(lists:reverse([Item4]), #{}, #{}), - CommittedTX2 = hb_message:convert(TX2, <<"structured@1.0">>, <<"tx@1.0">>, Opts), - ok = dev_bundler_cache:write_tx(CommittedTX2, [Item4], Opts), - ok = dev_bundler_cache:complete_tx(CommittedTX2, Opts), - % Now initialize dispatcher which should recover only the posted bundle - ensure_dispatcher(Opts), - State = get_state(), - % Get the recovered bundle (should only be 1, not the completed one) - ?assertEqual(1, maps:size(State#state.bundles)), - [Bundle] = maps:values(State#state.bundles), - ?assertNotEqual(undefined, Bundle#bundle.start_time), - ?assertEqual(#{}, Bundle#bundle.proofs), - RecoveredItems = [ - hb_message:with_commitments( - #{ <<"commitment-device">> => <<"ans104@1.0">> }, Item, Opts) - || Item <- Bundle#bundle.items], - ?assertEqual( - lists:sort([Item1, Item2, Item3]), - lists:sort(RecoveredItems)), - ?assertEqual(tx_posted, Bundle#bundle.status), - ?assert(hb_message:verify(Bundle#bundle.tx)), - ?assertEqual( - hb_message:id(CommittedTX, signed, Opts), - hb_message:id(Bundle#bundle.tx, signed, Opts)), - ok - after - cleanup_dispatcher(ServerHandle) - end. - -%%% Test Helper Functions - -new_data_item(Index, Size, Opts) -> - Data = rand:bytes(Size), - Tag = <<"tag", (integer_to_binary(Index))/binary>>, - Value = <<"value", (integer_to_binary(Index))/binary>>, - Item = ar_bundles:sign_item( - #tx{ - data = Data, - tags = [{Tag, Value}] - }, - hb:wallet() - ), - hb_message:convert(Item, <<"structured@1.0">>, <<"ans104@1.0">>, Opts). - -start_mock_gateway(Responses) -> - DefaultResponse = {200, <<>>}, - Endpoints = [ - {"/chunk", chunk, maps:get(chunk, Responses, DefaultResponse)}, - {"/tx", tx, maps:get(tx, Responses, DefaultResponse)}, - {"/price/:size", price, maps:get(price, Responses, DefaultResponse)}, - {"/tx_anchor", tx_anchor, maps:get(tx_anchor, Responses, DefaultResponse)} - ], - {ok, MockServer, ServerHandle} = hb_mock_server:start(Endpoints), - NodeOpts = #{ - gateway => MockServer, - routes => [ - #{ - <<"template">> => <<"/arweave">>, - <<"node">> => #{ - <<"match">> => <<"^/arweave">>, - <<"with">> => MockServer, - <<"opts">> => #{http_client => httpc, protocol => http2} - } - } - ] - }, - {ServerHandle, NodeOpts}. - -cleanup_dispatcher(ServerHandle) -> - stop_dispatcher(), - timer:sleep(10), % Ensure dispatcher fully stops - hb_mock_server:stop(ServerHandle). diff --git a/src/dev_bundler_recovery.erl b/src/dev_bundler_recovery.erl new file mode 100644 index 000000000..915b734bd --- /dev/null +++ b/src/dev_bundler_recovery.erl @@ -0,0 +1,278 @@ +%%% @doc Logic for handling bundler recocery on node restart. +%%% +%%% When a bundler is running it will cache the state of each uploaded item +%%% or bundle as it move through the bundling and upload process. If the node +%%% is restarted before it can finish including all uploaded items in a bundle, +%%% or finish seeding all bundles in process, the recovery process will ensure +%%% that the data in process is recovered and resumed. +-module(dev_bundler_recovery). +-export([ + recover_unbundled_items/2, + recover_bundles/2 +]). +-include("include/hb.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +%% @doc Spawn a process to recover unbundled items. +recover_unbundled_items(ServerPID, Opts) -> + spawn(fun() -> do_recover_unbundled_items(ServerPID, Opts) end). + +%% @doc Spawn a process to recover in-progress bundles. +recover_bundles(ServerPID, Opts) -> + spawn(fun() -> do_recover_bundles(ServerPID, Opts) end). + +do_recover_unbundled_items(ServerPID, Opts) -> + try + ?event(bundler_short, {recover_unbundled_items_start}), + UnbundledItems = dev_bundler_cache:load_items( + <<>>, + Opts, + fun(ItemID, Item) -> + ?event( + bundler_short, + {recovered_unbundled_item, + {id, {string, ItemID}} + } + ), + ServerPID ! {enqueue_item, Item} + end, + fun(ItemID) -> + ?event( + bundler_short, + {failed_to_recover_unbundled_item, + {id, {string, ItemID}} + } + ) + end + ), + ?event(bundler_short, {recover_unbundled_items_complete, + {count, length(UnbundledItems)}}), + ok + catch + _:Error:Stack -> + ?event( + error, + {recover_unbundled_items_failed, + {error, Error}, + {stack, Stack} + }, + Opts + ) + end. + +do_recover_bundles(ServerPID, Opts) -> + try + BundleStates = dev_bundler_cache:load_bundle_states(Opts), + ?event(bundler_short, {recover_bundles_start, + {count, length(BundleStates)}}), + lists:foreach( + fun({TXID, Status}) -> + recover_bundle(ServerPID, TXID, Status, Opts) + end, + BundleStates + ), + ?event(bundler_short, {recover_bundles_complete, + {count, length(BundleStates)}}), + ok + catch + _:Error:Stack -> + ?event( + error, + {recover_bundles_failed, + {error, Error}, + {stack, Stack} + }, + Opts + ) + end. + +recover_bundle(ServerPID, TXID, Status, Opts) -> + ?event( + bundler_short, + {recovering_bundle, + {tx_id, {explicit, TXID}}, + {status, Status} + } + ), + try + CommittedTX = dev_bundler_cache:load_tx(TXID, Opts), + case CommittedTX of + not_found -> + throw(tx_not_found); + _ -> + Items = dev_bundler_cache:load_items( + TXID, + Opts, + fun(ItemID, _Item) -> + ?event( + debug_bundler, + {loaded_bundle_item, + {tx_id, {explicit, TXID}}, + {item_id, {explicit, ItemID}} + } + ) + end, + fun(ItemID) -> + ?event( + error, + {failed_to_load_bundle_item, + {tx_id, {explicit, TXID}}, + {item_id, {explicit, ItemID}} + }, + Opts + ), + throw({failed_to_load_bundle_item, ItemID}) + end + ), + ServerPID ! {recover_bundle, CommittedTX, Items} + end + catch + _:Error:Stack -> + ?event( + error, + {failed_to_recover_bundle, + {tx_id, {explicit, TXID}}, + {error, Error}, + {stack, Stack} + }, + Opts + ) + end. + +%%%=================================================================== +%%% Tests +%%%=================================================================== + +recover_unbundled_items_test() -> + Opts = #{store => hb_test_utils:test_store()}, + Item1 = new_data_item(1, 10, Opts), + Item2 = new_data_item(2, 10, Opts), + Item3 = new_data_item(3, 10, Opts), + ok = dev_bundler_cache:write_item(Item1, Opts), + ok = dev_bundler_cache:write_item(Item2, Opts), + ok = dev_bundler_cache:write_item(Item3, Opts), + FakeTX = new_bundle_tx([Item2], Opts), + ok = dev_bundler_cache:write_tx(FakeTX, [Item2], Opts), + recover_unbundled_items(self(), Opts), + RecoveredItems = receive_enqueue_items(2), + RecoveredItems1 = normalize_items(RecoveredItems, Opts), + ?assertEqual( + lists:sort([Item1, Item3]), + lists:sort(RecoveredItems1) + ). + +recover_bundles_skips_complete_test() -> + Opts = #{store => hb_test_utils:test_store()}, + Item1 = new_data_item(1, 10, Opts), + Item2 = new_data_item(2, 10, Opts), + Item3 = new_data_item(3, 10, Opts), + ok = dev_bundler_cache:write_item(Item1, Opts), + ok = dev_bundler_cache:write_item(Item2, Opts), + ok = dev_bundler_cache:write_item(Item3, Opts), + PostedTX = new_bundle_tx([Item1, Item2], Opts), + CompletedTX = new_bundle_tx([Item3], Opts), + ok = dev_bundler_cache:write_tx(PostedTX, [Item1, Item2], Opts), + ok = dev_bundler_cache:write_tx(CompletedTX, [Item3], Opts), + ok = dev_bundler_cache:complete_tx(CompletedTX, Opts), + recover_bundles(self(), Opts), + {RecoveredTX, RecoveredItems} = receive_recovered_bundle(), + RecoveredItems1 = normalize_items(RecoveredItems, Opts), + ?assertEqual( + hb_message:id(PostedTX, signed, Opts), + hb_message:id(RecoveredTX, signed, Opts) + ), + ?assertEqual( + lists:sort([Item1, Item2]), + lists:sort(RecoveredItems1) + ), + receive + {recover_bundle, _, _} -> + erlang:error(unexpected_second_recovered_bundle) + after 200 -> + ok + end. + +recover_bundles_failed_bundle_items_continue_test() -> + Opts = #{ + store => hb_test_utils:test_store(), + debug_print => false + }, + ValidItem = new_data_item(1, 10, Opts), + ok = dev_bundler_cache:write_item(ValidItem, Opts), + ValidTX = new_bundle_tx([ValidItem], Opts), + ok = dev_bundler_cache:write_tx(ValidTX, [ValidItem], Opts), + BrokenTX = new_bundle_tx([], Opts), + ok = dev_bundler_cache:write_tx(BrokenTX, [], Opts), + MissingItemID = <<"missing-item">>, + ok = write_missing_item_bundle(MissingItemID, BrokenTX, Opts), + recover_bundles(self(), Opts), + {RecoveredTX, RecoveredItems} = receive_recovered_bundle(), + RecoveredItems1 = normalize_items(RecoveredItems, Opts), + ?assertEqual( + hb_message:id(ValidTX, signed, Opts), + hb_message:id(RecoveredTX, signed, Opts) + ), + ?assertEqual([ValidItem], RecoveredItems1), + receive + {recover_bundle, _, _} -> + erlang:error(unexpected_broken_bundle_recovered) + after 200 -> + ok + end. + +receive_enqueue_items(Count) -> + receive_enqueue_items(Count, []). + +receive_enqueue_items(0, Items) -> + lists:reverse(Items); +receive_enqueue_items(Count, Items) -> + receive + {enqueue_item, Item} -> + receive_enqueue_items(Count - 1, [Item | Items]) + after 1000 -> + erlang:error({missing_enqueue_items, Count}) + end. + +receive_recovered_bundle() -> + receive + {recover_bundle, CommittedTX, Items} -> + {CommittedTX, Items} + after 1000 -> + erlang:error(missing_recovered_bundle) + end. + +normalize_items(Items, Opts) -> + [ + hb_message:with_commitments( + #{ <<"commitment-device">> => <<"ans104@1.0">> }, + Item, + Opts + ) + || Item <- Items + ]. + +write_missing_item_bundle(ItemID, TX, Opts) -> + Store = hb_opts:get(store, no_viable_store, Opts), + Path = hb_store:path(Store, [ + <<"~bundler@1.0">>, + <<"item">>, + ItemID, + <<"bundle">> + ]), + hb_store:write(Store, Path, hb_message:id(TX, signed, Opts)). + +new_data_item(Index, Size, Opts) -> + Tag = <<"tag", (integer_to_binary(Index))/binary>>, + Value = <<"value", (integer_to_binary(Index))/binary>>, + Item = ar_bundles:sign_item( + #tx{ + data = rand:bytes(Size), + tags = [{Tag, Value}] + }, + hb:wallet() + ), + hb_message:convert(Item, <<"structured@1.0">>, <<"ans104@1.0">>, Opts). + +new_bundle_tx(Items, Opts) -> + TX = dev_bundler_task:data_items_to_tx(lists:reverse(Items), Opts), + hb_message:convert(TX, <<"structured@1.0">>, <<"tx@1.0">>, Opts). diff --git a/src/dev_bundler_task.erl b/src/dev_bundler_task.erl new file mode 100644 index 000000000..93ee6e39b --- /dev/null +++ b/src/dev_bundler_task.erl @@ -0,0 +1,378 @@ +%%% @doc Implements the different bundling primitives: +%%% - post_tx: Building and posting an L1 transaction +%%% - build_proofs:Chunking up the bundle data and building the chunk proofs +%%% - post_proof: Seeding teh chunks to the Arweave network +-module(dev_bundler_task). +-export([worker_loop/0, log_task/3, format_timestamp/0]). +%%% Test-only exports. +-export([data_items_to_tx/2]). +-include("include/hb.hrl"). +-include("include/dev_bundler.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +%% @doc Worker loop - executes tasks and reports back to dispatcher. +worker_loop() -> + receive + {execute_task, DispatcherPID, Task} -> + case execute_task(Task) of + {ok, Value} -> + DispatcherPID ! {task_complete, self(), Task, Value}; + {error, Reason} -> + DispatcherPID ! {task_failed, self(), Task, Reason} + end, + + worker_loop(); + stop -> + exit(normal) + end. + +%% @doc Execute a specific task. +execute_task(#task{type = post_tx, data = Items, opts = Opts} = Task) -> + try + ?event(debug_bundler, log_task(executing_task, Task, [])), + case build_signed_tx(Items, Opts) of + {ok, SignedTX} -> + Committed = hb_message:convert( + SignedTX, + #{ <<"device">> => <<"structured@1.0">>, <<"bundle">> => true }, + #{ <<"device">> => <<"tx@1.0">>, <<"bundle">> => true }, + Opts), + ?event(bundler_short, log_task(posting_tx, + Task, + [{tx, {explicit, hb_message:id(Committed, signed, Opts)}}] + )), + PostTXResponse = dev_arweave:post_tx_header( + SignedTX, + Opts + ), + case PostTXResponse of + {ok, _Result} -> + dev_bundler_cache:write_tx( + Committed, + Items, + Opts + ), + {ok, Committed}; + {_, ErrorReason} -> {error, ErrorReason} + end; + {error, {PriceErr, AnchorErr}} -> + ?event(bundler_short, + log_task(task_failed, Task, [ + {price, PriceErr}, + {anchor, AnchorErr} + ])), + {error, {PriceErr, AnchorErr}} + end + catch + _:Err:Stack -> + ?event(bundler_short, log_task(task_failed, Task, [{error, Err}])), + ?event(bundler_upload_error, + log_task(task_failed, Task, [{error, Err}, {trace, Stack}])), + {error, Err} + end; + +execute_task(#task{type = build_proofs, data = CommittedTX, opts = Opts} = Task) -> + try + ?event(debug_bundler, log_task(executing_task, Task, [])), + % Calculate chunks and proofs + TX = hb_message:convert( + CommittedTX, <<"tx@1.0">>, <<"structured@1.0">>, Opts), + Data = TX#tx.data, + DataRoot = TX#tx.data_root, + DataSize = TX#tx.data_size, + Mode = ar_tx:chunking_mode(TX#tx.format), + Chunks = ar_tx:chunk_binary(Mode, ?DATA_CHUNK_SIZE, Data), + ?event(bundler_short, {building_proofs, + {bundle, Task#task.bundle_id}, + {data_size, DataSize}, + {num_chunks, length(Chunks)}}), + SizeTaggedChunks = ar_tx:chunks_to_size_tagged_chunks(Chunks), + SizeTaggedChunkIDs = ar_tx:sized_chunks_to_sized_chunk_ids(SizeTaggedChunks), + {_Root, DataTree} = ar_merkle:generate_tree(SizeTaggedChunkIDs), + % Build proof list + Proofs = lists:filtermap( + fun({Chunk, Offset}) -> + case Chunk of + <<>> -> false; + _ -> + DataPath = ar_merkle:generate_path( + DataRoot, Offset - 1, DataTree), + Proof = #{ + chunk => Chunk, + data_path => DataPath, + offset => Offset - 1, + data_size => DataSize, + data_root => DataRoot + }, + {true, Proof} + end + end, + SizeTaggedChunks + ), + % -1 because the `?event(...)' macro increments the counter by 1. + hb_event:increment(bundler_short, built_proofs, length(Proofs) - 1), + ?event( + bundler_short, + {built_proofs, + {bundle, Task#task.bundle_id}, + {num_proofs, length(Proofs)} + }, + Opts + ), + {ok, Proofs} + catch + _:Err:_Stack -> + ?event(bundler_short, log_task(task_failed, Task, [{error, Err}])), + {error, Err} + end; + +execute_task(#task{type = post_proof, data = Proof, opts = Opts} = Task) -> + #{chunk := Chunk, data_path := DataPath, offset := Offset, + data_size := DataSize, data_root := DataRoot} = Proof, + ?event(debug_bundler, log_task(executing_task, Task, [])), + Request = #{ + <<"chunk">> => hb_util:encode(Chunk), + <<"data_path">> => hb_util:encode(DataPath), + <<"offset">> => integer_to_binary(Offset), + <<"data_size">> => integer_to_binary(DataSize), + <<"data_root">> => hb_util:encode(DataRoot) + }, + try + Serialized = hb_json:encode(Request), + Response = dev_arweave:post_json_chunk(Serialized, Opts), + case Response of + {ok, _} -> {ok, proof_posted}; + {error, Reason} -> {error, Reason} + end + catch + _:Err:_Stack -> + ?event(bundler_short, log_task(task_failed, Task, [{error, Err}])), + {error, Err} + end. + +%% @doc Build and sign a bundle TX without posting it. +build_signed_tx(Items, Opts) -> + TX = data_items_to_tx(Items, Opts), + DataSize = TX#tx.data_size, + PriceResult = get_price(DataSize, Opts), + AnchorResult = get_anchor(Opts), + case {PriceResult, AnchorResult} of + {{ok, Price}, {ok, Anchor}} -> + Wallet = hb_opts:get(priv_wallet, no_viable_wallet, Opts), + SignedTX = + dev_arweave_common:normalize( + ar_tx:sign( + TX#tx{anchor = Anchor, reward = Price}, + Wallet + ) + ), + {ok, SignedTX}; + {PriceErr, AnchorErr} -> + {error, {PriceErr, AnchorErr}} + end. + +data_items_to_tx(Items, Opts) -> + List = lists:map( + fun(Item) -> + hb_message:convert( + Item, + #{ <<"device">> => <<"ans104@1.0">>, <<"bundle">> => true }, + <<"structured@1.0">>, + Opts + ) + end, + lists:reverse(Items)), + dev_arweave_common:normalize(#tx{ + format = 2, + data = List + }). + +get_price(DataSize, Opts) -> + hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ <<"path">> => <<"/price">>, <<"size">> => DataSize }, + Opts + ). + +get_anchor(Opts) -> + hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ <<"path">> => <<"/tx_anchor">> }, + Opts + ). + +%%%=================================================================== +%%% Logging +%%%=================================================================== + +%% @doc Return a complete task event tuple for logging. +log_task(Event, Task, ExtraLogs) -> + erlang:list_to_tuple([Event | format_task(Task) ++ ExtraLogs]). + +%% @doc Format a task for logging. +format_task(#task{bundle_id = BundleID, type = post_tx, data = DataItems}) -> + [ + {task_type, post_tx}, + {timestamp, format_timestamp()}, + {bundle, BundleID}, + {num_items, length(DataItems)} + ]; +format_task(#task{bundle_id = BundleID, type = build_proofs, data = CommittedTX}) -> + [ + {task_type, build_proofs}, + {timestamp, format_timestamp()}, + {bundle, BundleID}, + {tx, {explicit, hb_message:id(CommittedTX, signed, #{})}} + ]; +format_task(#task{bundle_id = BundleID, type = post_proof, data = Proof}) -> + Offset = maps:get(offset, Proof), + [ + {task_type, post_proof}, + {timestamp, format_timestamp()}, + {bundle, BundleID}, + {offset, Offset} + ]. + +%% @doc Format erlang:timestamp() as a user-friendly RFC3339 string with milliseconds. +format_timestamp() -> + {MegaSecs, Secs, MicroSecs} = erlang:timestamp(), + Millisecs = (MegaSecs * 1000000 + Secs) * 1000 + (MicroSecs div 1000), + calendar:system_time_to_rfc3339(Millisecs, [{unit, millisecond}, {offset, "Z"}]). + +build_signed_tx_test() -> + Anchor = rand:bytes(32), + Price = 12345, + {ServerHandle, NodeOpts} = dev_bundler:start_mock_gateway(#{ + price => {200, integer_to_binary(Price)}, + tx_anchor => {200, hb_util:encode(Anchor)} + }), + TestOpts = NodeOpts#{ + priv_wallet => ar_wallet:new(), + store => hb_test_utils:test_store() + }, + try + Timestamp = 12344567, + ListValue = [<<"a">>, <<"b">>, <<"c">>], + StructuredItems = [ + #{ + <<"body">> => <<"body1">>, + <<"tag1">> => <<"value1">>, + <<"timestamp">> => Timestamp + }, + #{ + <<"body">> => <<"body3">>, + <<"tag3">> => <<"value3">>, + <<"list">> => ListValue + }, + #{ + <<"body">> => <<"body2">>, + <<"tag2">> => <<"value2">> + } + ], + Items = [ + hb_message:commit( + Item, + TestOpts, + #{ <<"device">> => <<"ans104@1.0">>, <<"bundle">> => true } + ) + || Item <- StructuredItems], + {ok, SignedTX} = build_signed_tx(Items, TestOpts), + ?assert(ar_tx:verify(SignedTX)), + ?assertEqual(Anchor, SignedTX#tx.anchor), + ?assertEqual(Price, SignedTX#tx.reward), + ?event(debug_test, {signed_tx, SignedTX}), + BundledTX = ar_bundles:deserialize(SignedTX), + ?event(debug_test, {bundled_tx, BundledTX}), + BundledItems = hb_util:numbered_keys_to_list(BundledTX#tx.data, #{}), + lists:foreach( + fun(Item) -> + ?assert(ar_bundles:verify_item(Item)) + end, + BundledItems + ), + BundledStructuredItems = [ + hb_message:convert( + Item, + <<"structured@1.0">>, + <<"ans104@1.0">>, + TestOpts + ) + || Item <- BundledItems], + ?assertEqual(lists:reverse(Items), BundledStructuredItems), + ok + after + hb_mock_server:stop(ServerHandle) + end. + +build_signed_tx_on_arbundles_js_test() -> + Anchor = rand:bytes(32), + Price = 12345, + {ServerHandle, NodeOpts} = dev_bundler:start_mock_gateway(#{ + price => {200, integer_to_binary(Price)}, + tx_anchor => {200, hb_util:encode(Anchor)} + }), + TestOpts = NodeOpts#{ + priv_wallet => hb:wallet(), + store => hb_test_utils:test_store() + }, + try + % Load an arweave.js-created dataitem + Item = ar_bundles:deserialize( + hb_util:ok( + file:read_file(<<"test/arbundles.js/ans104-item.bundle">>) + ) + ), + ?event(debug_test, {item, Item}), + ?assert(ar_bundles:verify_item(Item)), + % Load an arweave.js-created list bundle + {ok, Bin} = file:read_file(<<"test/arbundles.js/ans104-list-bundle.bundle">>), + BundledItem = ar_bundles:sign_item(#tx{ + format = ans104, + data = Bin, + data_size = byte_size(Bin), + tags = [ + {<<"Bundle-Format">>, <<"binary">>}, + {<<"Bundle-Version">>, <<"2.0.0">>} + ] + }, hb:wallet()), + ?event(debug_test, {bundled_item, BundledItem}), + ?assert(ar_bundles:verify_item(BundledItem)), + % Convert both dataitems to structured messages + ItemStructured = hb_message:convert(Item, + #{ <<"device">> => <<"structured@1.0">>, <<"bundle">> => true }, + #{ <<"device">> => <<"ans104@1.0">>, <<"bundle">> => true }, + TestOpts), + ?event(debug_test, {item_structured, ItemStructured}), + ?assert(hb_message:verify(ItemStructured, all, TestOpts)), + BundledItemStructured = hb_message:convert(BundledItem, + #{ <<"device">> => <<"structured@1.0">>, <<"bundle">> => true }, + #{ <<"device">> => <<"ans104@1.0">>, <<"bundle">> => true }, + TestOpts), + ?event(debug_test, {bundled_item_structured, BundledItemStructured}), + ?assert(hb_message:verify(BundledItemStructured, all, TestOpts)), + % Use build_signed_tx/2 to mimic the bundler worker logic. + {ok, SignedTX} = build_signed_tx( + [ItemStructured, BundledItemStructured], + TestOpts + ), + ?event(debug_test, {signed_tx, SignedTX}), + ?assert(ar_tx:verify(SignedTX)), + % Convert the signed TX to a structured message + StructuredTX = hb_message:convert(SignedTX, + #{ <<"device">> => <<"structured@1.0">>, <<"bundle">> => true }, + #{ <<"device">> => <<"tx@1.0">>, <<"bundle">> => true }, + TestOpts), + % ?event(debug_test, {structured_tx, StructuredTX}), + ?assert(hb_message:verify(StructuredTX, all, TestOpts)), + % Convert back to an L1 TX + SignedTXRoundtrip = hb_message:convert(StructuredTX, + #{ <<"device">> => <<"tx@1.0">>, <<"bundle">> => true }, + #{ <<"device">> => <<"structured@1.0">>, <<"bundle">> => true }, + TestOpts), + ?event(debug_test, {signed_tx_roundtrip, SignedTXRoundtrip}), + ?assert(ar_tx:verify(SignedTXRoundtrip)), + ?assertEqual(SignedTX, SignedTXRoundtrip), + ok + after + hb_mock_server:stop(ServerHandle) + end. \ No newline at end of file diff --git a/src/dev_codec_ans104.erl b/src/dev_codec_ans104.erl index 4b6536859..9aa0c5675 100644 --- a/src/dev_codec_ans104.erl +++ b/src/dev_codec_ans104.erl @@ -31,21 +31,25 @@ deserialize(TX, Req, Opts) when is_record(TX, tx) -> commit(Msg, Req = #{ <<"type">> := <<"unsigned">> }, Opts) -> commit(Msg, Req#{ <<"type">> => <<"unsigned-sha256">> }, Opts); commit(Msg, Req = #{ <<"type">> := <<"signed">> }, Opts) -> - commit(Msg, Req#{ <<"type">> => <<"rsa-pss-sha256">> }, Opts); -commit(Msg, Req = #{ <<"type">> := <<"rsa-pss-sha256">> }, Opts) -> + commit(Msg, Req#{ <<"type">> => ?RSA_SIGN_TYPE }, Opts); +commit(Msg, Req = #{ <<"type">> := Type }, Opts) + when Type =:= ?RSA_SIGN_TYPE + orelse Type =:= ?EDDSA_SIGN_TYPE + orelse Type =:= ?ETHEREUM_SIGN_TYPE -> % Convert the given message to an ANS-104 TX record, sign it, and convert % it back to a structured message. {ok, TX} = to(hb_private:reset(Msg), Req, Opts), - Wallet = hb_opts:get(priv_wallet, no_viable_wallet, Opts), - Signed = ar_bundles:sign_item(TX, Wallet), - SignedStructured = - hb_message:convert( - Signed, - <<"structured@1.0">>, - <<"ans104@1.0">>, - Opts - ), - {ok, SignedStructured}; + case {hb_opts:get(priv_wallet, no_viable_wallet, Opts), Type} of + {{{?RSA_KEY_TYPE, _Priv, _Pub}, _} = Wallet, ?RSA_SIGN_TYPE} -> + sign_tx(TX, Wallet, Opts); + {{{?EDDSA_KEY_TYPE, _Priv, _Pub}, _} = Wallet, ?EDDSA_SIGN_TYPE} -> + sign_tx(TX, Wallet, Opts); + {{{?ETHEREUM_KEY_TYPE, _Priv, _Pub}, _} = Wallet, ?ETHEREUM_SIGN_TYPE} -> + sign_tx(TX, Wallet, Opts); + {{{WalletType, _, _}, _}, _Type} -> + ?event(warning, {wrong_wallet_to_sign, {request_type, Type}, {wallet_type, WalletType}}), + throw({wrong_wallet_to_sign, {request_type, Type}, {wallet_type, WalletType}}) + end; commit(Msg, #{ <<"type">> := <<"unsigned-sha256">> }, Opts) -> % Remove the commitments from the message, convert it to ANS-104, then back. % This forces the message to be normalized and the unsigned ID to be @@ -60,6 +64,17 @@ commit(Msg, #{ <<"type">> := <<"unsigned-sha256">> }, Opts) -> ) }. +sign_tx(TX, Wallet, Opts) -> + Signed = ar_bundles:sign_item(TX, Wallet), + SignedStructured = + hb_message:convert( + Signed, + <<"structured@1.0">>, + <<"ans104@1.0">>, + Opts + ), + {ok, SignedStructured}. + %% @doc Verify an ANS-104 commitment. verify(Msg, Req, Opts) -> ?event({verify, {base, Msg}, {req, Req}}), @@ -107,7 +122,8 @@ do_from(RawTX, Req, Opts) -> % Add the commitments to the message if the TX has a signature. FieldCommitments = dev_codec_ans104_from:fields(TX, ?FIELD_PREFIX, Opts), WithCommitments = dev_codec_ans104_from:with_commitments( - TX, <<"ans104@1.0">>, FieldCommitments, Tags, Base, Keys, Opts), + ?BASE_FIELDS, TX, <<"ans104@1.0">>, FieldCommitments, + Tags, Base, Keys, Opts), ?event({from, {parsed_message, WithCommitments}}), {ok, WithCommitments}. @@ -458,6 +474,48 @@ field_and_tag_ordering_test() -> UnsignedTABM, #{priv_wallet => Wallet}, <<"ans104@1.0">>), ?assert(hb_message:verify(SignedTABM)). +fields_as_tags_test() -> + AnchorTag = crypto:strong_rand_bytes(32), + TargetTag = crypto:strong_rand_bytes(32), + AnchorField = crypto:strong_rand_bytes(32), + TargetField = crypto:strong_rand_bytes(32), + TX = #tx{ + tags = [ + {<<"anchor">>, hb_util:encode(AnchorTag)}, + {<<"target">>, hb_util:encode(TargetTag)} + ], + anchor = AnchorField, + target = TargetField + }, + SignedTX = ar_bundles:sign_item(TX, hb:wallet()), + ?event({signed_tx, SignedTX}), + ?assert(ar_bundles:verify_item(SignedTX)), + TABM = hb_util:ok(from(SignedTX, #{}, #{})), + ?event({tabm, TABM}), + ConvertedTX = hb_util:ok(to(TABM, #{}, #{})), + ?event({converted_tx, ConvertedTX}), + ?assert(ar_bundles:verify_item(ConvertedTX)), + ?assertEqual(ConvertedTX, dev_arweave_common:normalize(SignedTX)). + +data_tag_with_data_test() -> + Data = <<"myrealdata">>, + TX = #tx{ + tags = [ + {<<"data">>, <<"tagdata">>} + ], + data = Data, + data_size = byte_size(Data) + }, + SignedTX = ar_bundles:sign_item(TX, hb:wallet()), + ?event(debug_test, {signed_tx, SignedTX}), + ?assert(ar_bundles:verify_item(SignedTX)), + TABM = hb_util:ok(from(SignedTX, #{}, #{})), + ?event(debug_test, {tabm, TABM}), + ConvertedTX = hb_util:ok(to(TABM, #{}, #{})), + ?event(debug_test, {converted_tx, ConvertedTX}), + ?assert(ar_bundles:verify_item(ConvertedTX)), + ?assertEqual(ConvertedTX, dev_arweave_common:normalize(SignedTX)). + unsigned_lowercase_bundle_map_tags_test() -> UnsignedTABM = #{ <<"a1">> => <<"value1">>, @@ -766,9 +824,9 @@ test_bundle_commitment(Commit, Encode, Decode) -> ?event(debug_test, {committed, Label, {explicit, Committed}}), ?assert(hb_message:verify(Committed, all, Opts), Label), {ok, _, CommittedCommitment} = hb_message:commitment( - #{ <<"type">> => <<"rsa-pss-sha256">> }, Committed, Opts), + #{ <<"type">> => ?RSA_SIGN_TYPE }, Committed, Opts), ?assertEqual( - [<<"list">>], hb_maps:get(<<"committed">>, CommittedCommitment, Opts), + [<<"list">>], hb_maps:get(<<"committed">>, CommittedCommitment, not_found, Opts), Label), ?assertEqual(ToBool(Commit), hb_util:atom(hb_ao:get(<<"bundle">>, CommittedCommitment, false, Opts)), @@ -789,9 +847,9 @@ test_bundle_commitment(Commit, Encode, Decode) -> ?event(debug_test, {decoded, Label, {explicit, Decoded}}), ?assert(hb_message:verify(Decoded, all, Opts), Label), {ok, _, DecodedCommitment} = hb_message:commitment( - #{ <<"type">> => <<"rsa-pss-sha256">> }, Decoded, Opts), + #{ <<"type">> => ?RSA_SIGN_TYPE }, Decoded, Opts), ?assertEqual( - [<<"list">>], hb_maps:get(<<"committed">>, DecodedCommitment, Opts), + [<<"list">>], hb_maps:get(<<"committed">>, DecodedCommitment, not_found, Opts), Label), ?assertEqual(ToBool(Commit), hb_util:atom(hb_ao:get(<<"bundle">>, DecodedCommitment, false, Opts)), diff --git a/src/dev_codec_ans104_from.erl b/src/dev_codec_ans104_from.erl index a60e90505..fe1f0a380 100644 --- a/src/dev_codec_ans104_from.erl +++ b/src/dev_codec_ans104_from.erl @@ -1,7 +1,7 @@ %%% @doc Library functions for decoding ANS-104-style data items to TABM form. -module(dev_codec_ans104_from). -export([fields/3, tags/2, data/4, committed/6, base/5]). --export([with_commitments/7]). +-export([with_commitments/8]). -include("include/hb.hrl"). %% @doc Return a TABM message containing the fields of the given decoded @@ -112,12 +112,18 @@ data_keys(Data, Opts) -> %% @doc Return the list of the keys from the tags TABM. Filter all metadata %% tags: `ao-data-key', `ao-types', `bundle-format', `bundle-version'. +%% We also filter `data` as we don't preserve the a data *field* via +%% `field-data` in the commitment. That means if we promote a `data` tag to +%% a key on the TABM, it will be interpreted as the message's actual data. +%% Instead if a user has provided a `data` tag, we'll preserve it in +%% `original-tags` but will strip it from the top-level message keys. tag_keys(Item, _Opts) -> MetaTags = [ <<"bundle-format">>, <<"bundle-version">>, <<"bundle-map">>, - <<"ao-data-key">> + <<"ao-data-key">>, + <<"data">> ], lists:filtermap( fun({Tag, _}) -> @@ -158,24 +164,26 @@ base(CommittedKeys, Fields, Tags, Data, Opts) -> %% @doc Return a message with the appropriate commitments added to it. with_commitments( - Item, Device, FieldCommitments, Tags, Base, CommittedKeys, Opts) -> + BaseFields, Item, Device, FieldCommitments, + Tags, Base, CommittedKeys, Opts) -> case Item#tx.signature of ?DEFAULT_SIG -> - case normal_tags(Item#tx.tags) of + case normal_tags(BaseFields, Item#tx.tags) of true -> Base; false -> with_unsigned_commitment( - Item, Device, FieldCommitments, Tags, Base, + BaseFields, Item, Device, FieldCommitments, Tags, Base, CommittedKeys, Opts) end; _ -> with_signed_commitment( - Item, Device, FieldCommitments, Tags, Base, CommittedKeys, Opts) + BaseFields, Item, Device, FieldCommitments, Tags, Base, + CommittedKeys, Opts) end. %% @doc Returns a commitments message for an item, containing an unsigned %% commitment. with_unsigned_commitment( - Item, Device, CommittedFields, Tags, + BaseFields, Item, Device, CommittedFields, Tags, UncommittedMessage, CommittedKeys, Opts) -> ID = hb_util:human_id(Item#tx.unsigned_id), UncommittedMessage#{ @@ -189,7 +197,8 @@ with_unsigned_commitment( <<"committed">> => CommittedKeys, <<"type">> => <<"unsigned-sha256">>, <<"bundle">> => bundle_commitment_key(Tags, Opts), - <<"original-tags">> => original_tags(Item, Opts) + <<"original-tags">> => original_tags( + BaseFields, Item, Opts) }, Opts ), @@ -201,9 +210,9 @@ with_unsigned_commitment( %% @doc Returns a commitments message for an item, containing a signed %% commitment. with_signed_commitment( - Item, Device, FieldCommitments, Tags, + BaseFields, Item, Device, FieldCommitments, Tags, UncommittedMessage, CommittedKeys, Opts) -> - Address = hb_util:human_id(ar_wallet:to_address(Item#tx.owner)), + Address = hb_util:human_id(ar_wallet:to_address(Item#tx.owner, Item#tx.signature_type)), ID = hb_util:human_id(Item#tx.id), ExtraCommitments = hb_maps:merge( FieldCommitments, @@ -221,9 +230,10 @@ with_signed_commitment( <<"signature">> => hb_util:encode(Item#tx.signature), <<"keyid">> => <<"publickey:", (hb_util:encode(Item#tx.owner))/binary>>, - <<"type">> => <<"rsa-pss-sha256">>, + <<"type">> => dev_arweave_common:serialize_sig_type(Item#tx.signature_type), <<"bundle">> => bundle_commitment_key(Tags, Opts), - <<"original-tags">> => original_tags(Item, Opts) + <<"original-tags">> => original_tags( + BaseFields, Item, Opts) }, Opts ), @@ -240,18 +250,20 @@ bundle_commitment_key(Tags, Opts) -> hb_util:bin(hb_maps:is_key(<<"bundle-format">>, Tags, Opts)). %% @doc Check whether a list of key-value pairs contains only normalized keys. -normal_tags(Tags) -> +normal_tags(BaseFields, Tags) -> + ReservedFields = [<<"data">> | BaseFields], lists:all( fun({Key, _}) -> - hb_util:to_lower(hb_ao:normalize_key(Key)) =:= Key + hb_util:to_lower(hb_ao:normalize_key(Key)) =:= Key andalso + not lists:member(Key, ReservedFields) end, Tags ). %% @doc Return the original tags of an item if it is applicable. Otherwise, %% return `undefined'. -original_tags(Item, _Opts) -> - case normal_tags(Item#tx.tags) of +original_tags(BaseFields, Item, _Opts) -> + case normal_tags(BaseFields, Item#tx.tags) of true -> unset; false -> encoded_tags_to_map(Item#tx.tags) end. diff --git a/src/dev_codec_ans104_to.erl b/src/dev_codec_ans104_to.erl index 721fdc053..0c3149613 100644 --- a/src/dev_codec_ans104_to.erl +++ b/src/dev_codec_ans104_to.erl @@ -106,17 +106,21 @@ commitment_to_tx(Commitment, FieldsFun, Opts) -> {ok, OriginalTags} -> original_tags_to_tags(OriginalTags); error -> [] end, + SignatureType = dev_arweave_common:deserialize_sig_type( + maps:get(<<"type">>, Commitment) + ), ?event({commitment_owner, Owner}), ?event({commitment_signature, Signature}), + ?event({commitment_signature_type, SignatureType}), ?event({commitment_tags, Tags}), TX = #tx{ owner = Owner, signature = Signature, + signature_type = SignatureType, tags = Tags }, FieldsFun(TX, ?FIELD_PREFIX, Commitment, Opts). - %% @doc Convert a HyperBEAM-compatible map into an ANS-104 encoded tag list, %% recreating the original order of the tags. original_tags_to_tags(TagMap) -> diff --git a/src/dev_codec_flat.erl b/src/dev_codec_flat.erl index 0e9e973ae..77cb7f3df 100644 --- a/src/dev_codec_flat.erl +++ b/src/dev_codec_flat.erl @@ -88,7 +88,7 @@ serialize(Map, Opts) when is_map(Map) -> Acc, hb_path:to_binary(Key), <<": ">>, - hb_maps:get(Key, Flattened, Opts), <<"\n">> + hb_maps:get(Key, Flattened, not_found, Opts), <<"\n">> ] end, <<>>, diff --git a/src/dev_codec_httpsig_conv.erl b/src/dev_codec_httpsig_conv.erl index 5a0db779b..d840ac9ea 100644 --- a/src/dev_codec_httpsig_conv.erl +++ b/src/dev_codec_httpsig_conv.erl @@ -374,7 +374,12 @@ to(TABM, Req, FormatOpts, Opts) when is_map(TABM) -> true -> % Convert back to the fully loaded structured@1.0 message, then % convert to TABM with bundling enabled. - Structured = hb_message:convert(TABM, <<"structured@1.0">>, Opts), + Structured = + hb_message:convert( + TABM, + <<"structured@1.0">>, + #{ <<"device">> => <<"structured@1.0">>, <<"bundle">> => true}, + Opts), Loaded = hb_cache:ensure_all_loaded(Structured, Opts), encode_ids( hb_message:convert( diff --git a/src/dev_codec_httpsig_siginfo.erl b/src/dev_codec_httpsig_siginfo.erl index c9c06b541..493f9421e 100644 --- a/src/dev_codec_httpsig_siginfo.erl +++ b/src/dev_codec_httpsig_siginfo.erl @@ -511,7 +511,7 @@ parse_alg_test() -> #{}), #{ <<"commitment-device">> => <<"ans104@1.0">>, - <<"type">> => <<"rsa-pss-sha256">> + <<"type">> => ?RSA_SIGN_TYPE } ). @@ -538,7 +538,7 @@ escaped_value_test() -> } }, <<"signature">> => hb_util:encode(Signature), - <<"type">> => <<"rsa-pss-sha256">> + <<"type">> => ?RSA_SIGN_TYPE }, SigInfo = commitments_to_siginfo(#{}, #{ ID => Commitment }, #{}), Commitments = siginfo_to_commitments(SigInfo, #{}, #{}), diff --git a/src/dev_codec_json.erl b/src/dev_codec_json.erl index dff5d1d91..bd99c7b01 100644 --- a/src/dev_codec_json.erl +++ b/src/dev_codec_json.erl @@ -54,9 +54,14 @@ from(JSON, Req, Opts) -> Opts ), ?event(debug_json, {structured, Structured}, Opts), - {ok, TABM} = dev_codec_structured:from(Structured, Req, Opts), - ?event(debug_json, {tabm, TABM}, Opts), - {ok, TABM}. + case hb_maps:get(<<"accept-codec">>, Req, undefined, Opts) of + <<"structured@1.0">> -> {ok, Structured}; + _ -> + % Re-encode the structured message back to TABM for the caller. + {ok, TABM} = dev_codec_structured:from(Structured, Req, Opts), + ?event(debug_json, {tabm, TABM}, Opts), + {ok, TABM} + end. commit(Msg, Req, Opts) -> dev_codec_httpsig:commit(Msg, Req, Opts). diff --git a/src/dev_codec_structured.erl b/src/dev_codec_structured.erl index aa3975a55..9a4e6f11f 100644 --- a/src/dev_codec_structured.erl +++ b/src/dev_codec_structured.erl @@ -319,7 +319,7 @@ encode_value(Value) -> decode_value(Type, Value) when is_list(Type) -> decode_value(list_to_binary(Type), Value); decode_value(Type, Value) when is_binary(Type) -> - ?event({decoding, {type, Type}, {value, Value}}), + ?event({decoding, {type, Type}, {value, {explicit, Value}}}), decode_value( binary_to_existing_atom( list_to_binary(string:to_lower(binary_to_list(Type))), diff --git a/src/dev_codec_tx.erl b/src/dev_codec_tx.erl index 3363969d2..9ac8750a4 100644 --- a/src/dev_codec_tx.erl +++ b/src/dev_codec_tx.erl @@ -6,7 +6,8 @@ -include_lib("eunit/include/eunit.hrl"). -define(BASE_FIELDS, [ - <<"anchor">>, <<"format">>, <<"quantity">>, <<"reward">>, <<"target">> ]). + <<"anchor">>, <<"format">>, <<"quantity">>, <<"reward">>, <<"target">>, + <<"data_root">>, <<"data_size">> ]). %% @doc Sign a message using the `priv_wallet' key in the options. Supports both %% the `hmac-sha256' and `rsa-pss-sha256' algorithms, offering unsigned and @@ -14,8 +15,8 @@ commit(Msg, Req = #{ <<"type">> := <<"unsigned">> }, Opts) -> commit(Msg, Req#{ <<"type">> => <<"unsigned-sha256">> }, Opts); commit(Msg, Req = #{ <<"type">> := <<"signed">> }, Opts) -> - commit(Msg, Req#{ <<"type">> => <<"rsa-pss-sha256">> }, Opts); -commit(Msg, Req = #{ <<"type">> := <<"rsa-pss-sha256">> }, Opts) -> + commit(Msg, Req#{ <<"type">> => ?RSA_SIGN_TYPE }, Opts); +commit(Msg, Req = #{ <<"type">> := ?RSA_SIGN_TYPE }, Opts) -> ?event({committing, {msg, Msg}, {req, Req}}), % Convert the given message to an L1 TX record, sign it, and convert % it back to a structured message. @@ -93,7 +94,8 @@ do_from(RawTX, Req, Opts) -> % Add the commitments to the message if the TX has a signature. FieldCommitments = dev_codec_tx_from:fields(TX, ?FIELD_PREFIX, Opts), WithCommitments = dev_codec_ans104_from:with_commitments( - TX, <<"tx@1.0">>, FieldCommitments, Tags, Base, Keys, Opts), + ?BASE_FIELDS, TX, <<"tx@1.0">>, FieldCommitments, + Tags, Base, Keys, Opts), ?event({from, {parsed_message, hb_util:human_id(TX#tx.id)}}), {ok, WithCommitments}. @@ -145,18 +147,6 @@ to(RawTABM, Req, Opts) when is_map(RawTABM) -> enforce_valid_tx(FinalTX), ?event({to_result, FinalTX}), {ok, FinalTX}; -%% @doc List of ans104 items is bundled into a single L1 transaction. -to(RawList, Req, Opts) when is_list(RawList) -> - List = lists:map( - fun(Item) -> hb_util:ok(dev_codec_ans104:to(Item, Req, Opts)) end, - RawList), - TX = #tx{ - format = 2, - data = List - }, - Bundle = dev_arweave_common:normalize(TX), - ?event({to_result, Bundle}), - {ok, Bundle}; to(Other, _Req, _Opts) -> throw({invalid_tx, Other}). @@ -169,10 +159,8 @@ to(Other, _Req, _Opts) -> %% those are checked as well (e.g. format is 1 or 2). %% 3. Unsupported fields are set to their default values. %% -%% Of note: for now we require that the `data` field be set on an L1 TX if -%% there is data. In other words we do not allow `data_root` and `data_size` to -%% be set if `data` is *not* set. This differs from the Arweave protocol which -%% explicitly allows TX headers to be validated in the absence of data. +%% Of note: `data_root`/`data_size` are optional for value transfers and should +%% be preserved when present on the header, even if `data` is missing. %% %% When support is added for new fields (e.g. when we add support for ECDSA signatures), %% this function will have to be updated. @@ -198,7 +186,7 @@ enforce_valid_tx(TX) -> {invalid_field, anchor, TX#tx.anchor} ), hb_util:ok_or_throw(TX, - hb_util:check_size(TX#tx.owner, [byte_size(?DEFAULT_OWNER)]), + hb_util:check_type(TX#tx.owner, binary), {invalid_field, owner, TX#tx.owner} ), hb_util:ok_or_throw(TX, @@ -218,7 +206,7 @@ enforce_valid_tx(TX) -> {invalid_field, data_root, TX#tx.data_root} ), hb_util:ok_or_throw(TX, - hb_util:check_size(TX#tx.signature, [65, byte_size(?DEFAULT_SIG)]), + hb_util:check_type(TX#tx.signature, binary), {invalid_field, signature, TX#tx.signature} ), hb_util:ok_or_throw(TX, @@ -231,9 +219,9 @@ enforce_valid_tx(TX) -> hb_util:check_value(TX#tx.denomination, [0]), {invalid_field, denomination, TX#tx.denomination} ), - % Arweave L1 #tx only supports RSA signatures for now + % Arweave L1 #tx supports RSA and ECDSA signatures hb_util:ok_or_throw(TX, - hb_util:check_value(TX#tx.signature_type, [?RSA_KEY_TYPE]), + hb_util:check_value(TX#tx.signature_type, [?RSA_KEY_TYPE, ?ECDSA_KEY_TYPE]), {invalid_field, signature_type, TX#tx.signature_type} ), hb_util:ok_or_throw(TX, @@ -262,28 +250,7 @@ enforce_valid_tx(TX) -> throw({invalid_field, tag, InvalidTagForm}) end, TX#tx.tags - ), - enforce_valid_tx_data(TX). - -%% @doc For now we require that the `data` field be set on an L1 TX if -%% there is data. In other words we do not allow `data_root` and `data_size` to -%% be set if `data` is *not* set. This differs from the Arweave protocol which -%% explicitly allows TX headers to be validated in the absence of data. -enforce_valid_tx_data(TX) when TX#tx.data == ?DEFAULT_DATA -> - case TX#tx.data_root =/= ?DEFAULT_DATA_ROOT of - true -> - throw({invalid_field, data_root, TX#tx.data_root}); - false -> - ok - end, - case TX#tx.data_size > 0 of - true -> - throw({invalid_field, data_size, TX#tx.data_size}); - false -> - ok - end; -enforce_valid_tx_data(TX) -> - ok. + ). %%%=================================================================== %%% Tests. @@ -291,22 +258,11 @@ enforce_valid_tx_data(TX) -> enforce_valid_tx_test() -> BaseTX = #tx{ format = 2 }, - InvalidUnsignedID = crypto:strong_rand_bytes(1), - GoodID = crypto:strong_rand_bytes(32), BadID31 = crypto:strong_rand_bytes(31), BadID33 = crypto:strong_rand_bytes(33), - BadOwnerSize = crypto:strong_rand_bytes(byte_size(?DEFAULT_OWNER) - 1), TooLongTagName = crypto:strong_rand_bytes(?MAX_TAG_NAME_SIZE + 1), TooLongTagValue = crypto:strong_rand_bytes(?MAX_TAG_VALUE_SIZE + 1), - - SigInvalidSize1 = crypto:strong_rand_bytes(1), - SigInvalidSize64 = crypto:strong_rand_bytes(64), - SigInvalidSize66 = crypto:strong_rand_bytes(66), - SigInvalidSize511 = crypto:strong_rand_bytes(511), - SigTooLong513 = crypto:strong_rand_bytes(byte_size(?DEFAULT_SIG)+1), - - FailureCases = [ {not_a_tx_record, not_a_tx_record_atom, {invalid_tx, not_a_tx_record_atom}}, {invalid_format_0, BaseTX#tx{format = 0}, {invalid_field, format, 0}}, @@ -317,32 +273,24 @@ enforce_valid_tx_test() -> {unsigned_id_invalid_val, BaseTX#tx{unsigned_id = InvalidUnsignedID}, {invalid_field, unsigned_id, InvalidUnsignedID}}, {anchor_too_short_31, BaseTX#tx{anchor = BadID31}, {invalid_field, anchor, BadID31}}, {anchor_too_long_33, BaseTX#tx{anchor = BadID33}, {invalid_field, anchor, BadID33}}, - {owner_wrong_size, BaseTX#tx{owner = BadOwnerSize}, {invalid_field, owner, BadOwnerSize}}, - {owner_empty, BaseTX#tx{owner = <<>>}, {invalid_field, owner, <<>>}}, + {owner_wrong_type, BaseTX#tx{owner = "hello"}, {invalid_field, owner, "hello"}}, {target_too_short_31, BaseTX#tx{target = BadID31}, {invalid_field, target, BadID31}}, {target_too_long_33, BaseTX#tx{target = BadID33}, {invalid_field, target, BadID33}}, {quantity_not_integer, BaseTX#tx{quantity = <<"100">>}, {invalid_field, quantity, <<"100">>}}, {data_size_not_integer, BaseTX#tx{data_size = an_atom}, {invalid_field, data_size, an_atom}}, {data_root_too_short_31, BaseTX#tx{data_root = BadID31}, {invalid_field, data_root, BadID31}}, {data_root_too_long_33, BaseTX#tx{data_root = BadID33}, {invalid_field, data_root, BadID33}}, - {signature_invalid_size_1, BaseTX#tx{signature = SigInvalidSize1}, {invalid_field, signature, SigInvalidSize1}}, - {signature_invalid_size_64, BaseTX#tx{signature = SigInvalidSize64}, {invalid_field, signature, SigInvalidSize64}}, - {signature_invalid_size_66, BaseTX#tx{signature = SigInvalidSize66}, {invalid_field, signature, SigInvalidSize66}}, - {signature_invalid_size_511, BaseTX#tx{signature = SigInvalidSize511}, {invalid_field, signature, SigInvalidSize511}}, - {signature_too_long_513, BaseTX#tx{signature = SigTooLong513}, {invalid_field, signature, SigTooLong513}}, - {signature_empty, BaseTX#tx{signature = <<>>}, {invalid_field, signature, <<>>}}, + {signature_invalid_type, BaseTX#tx{signature = "hello"}, {invalid_field, signature, "hello"}}, {reward_not_integer, BaseTX#tx{reward = 1.0}, {invalid_field, reward, 1.0}}, {denomination_not_zero, BaseTX#tx{denomination = 1}, {invalid_field, denomination, 1}}, - {signature_type_not_rsa, BaseTX#tx{signature_type = ?ECDSA_KEY_TYPE}, {invalid_field, signature_type, ?ECDSA_KEY_TYPE}}, + %% ECDSA signature type is now supported, removed signature_type_not_rsa test {tags_not_list, BaseTX#tx{tags = #{}}, {invalid_field, tags, #{}}}, {tag_name_not_binary, BaseTX#tx{tags = [{not_binary, <<"val">>}]}, {invalid_field, tag_name, not_binary}}, {tag_name_too_long, BaseTX#tx{tags = [{TooLongTagName, <<"val">>}]}, {invalid_field, tag_name, TooLongTagName}}, {tag_value_not_binary, BaseTX#tx{tags = [{<<"key">>, not_binary}]}, {invalid_field, tag_value, not_binary}}, {tag_value_too_long, BaseTX#tx{tags = [{<<"key">>, TooLongTagValue}]}, {invalid_field, tag_value, TooLongTagValue}}, {invalid_tag_form_atom, BaseTX#tx{tags = [not_a_tuple]}, {invalid_field, tag, not_a_tuple}}, - {invalid_tag_form_list, BaseTX#tx{tags = [[<<"name">>, <<"value">>]]}, {invalid_field, tag, [<<"name">>, <<"value">>]} }, - {data_root_without_data, BaseTX#tx{data_root = GoodID}, {invalid_field, data_root, GoodID}}, - {data_size_without_data, BaseTX#tx{data_size = 1}, {invalid_field, data_size, 1}} + {invalid_tag_form_list, BaseTX#tx{tags = [[<<"name">>, <<"value">>]]}, {invalid_field, tag, [<<"name">>, <<"value">>]} } ], lists:foreach( @@ -368,7 +316,7 @@ happy_tx_test() -> quantity = 1000, data = Data, data_size = byte_size(Data), - data_root = ar_tx:data_root(Data), + data_root = ar_tx:data_root(arweavejs, Data), reward = 2000 }, UnsignedTABM = #{ @@ -387,7 +335,7 @@ happy_tx_test() -> <<"data">>, <<"tag1">>, <<"tag2">>, <<"type">>, <<"anchor">>, <<"quantity">>, <<"reward">>, <<"target">>], - <<"type">> => <<"rsa-pss-sha256">>, + <<"type">> => ?RSA_SIGN_TYPE, <<"bundle">> => <<"false">>, <<"field-target">> => hb_util:encode(Target), <<"field-anchor">> => hb_util:encode(Anchor), @@ -396,6 +344,153 @@ happy_tx_test() -> }, do_tx_roundtrips(TX, UnsignedTABM, SignedCommitment). +data_header_but_no_data_test() -> + Anchor = crypto:strong_rand_bytes(32), + Target = crypto:strong_rand_bytes(32), + Data = <<"test-data">>, + DataRoot = ar_tx:data_root(arweavejs, Data), + DataSize = byte_size(Data), + UnsignedTX = #tx{ + format = 2, + anchor = Anchor, + tags = [ + {<<"tag1">>, <<"value1">>} + ], + target = Target, + quantity = 1000, + data_size = DataSize, + data_root = DataRoot, + reward = 2000 + }, + UnsignedTABM = #{ + <<"anchor">> => hb_util:encode(Anchor), + <<"target">> => hb_util:encode(Target), + <<"quantity">> => <<"1000">>, + <<"reward">> => <<"2000">>, + <<"data_root">> => hb_util:encode(DataRoot), + <<"data_size">> => integer_to_binary(DataSize), + <<"tag1">> => <<"value1">> + }, + SignedCommitment = #{ + <<"commitment-device">> => <<"tx@1.0">>, + <<"committed">> => [ + <<"tag1">>, <<"anchor">>, <<"quantity">>, <<"reward">>, + <<"target">>, <<"data_root">>, <<"data_size">>], + <<"type">> => ?RSA_SIGN_TYPE, + <<"bundle">> => <<"false">>, + <<"field-target">> => hb_util:encode(Target), + <<"field-anchor">> => hb_util:encode(Anchor), + <<"field-quantity">> => <<"1000">>, + <<"field-reward">> => <<"2000">>, + <<"field-data_root">> => hb_util:encode(DataRoot), + <<"field-data_size">> => integer_to_binary(DataSize) + }, + do_tx_roundtrips( + UnsignedTX, + UnsignedTABM, + SignedCommitment, + #{ + <<"bundle">> => false, + <<"exclude-data">> => true + } + ). + +data_tag_with_data_test() -> + Data = <<"myrealdata">>, + TX = #tx{ + format = 2, + tags = [ + {<<"data">>, <<"tagdata">>} + ], + data = Data, + data_size = byte_size(Data), + data_root = ar_tx:data_root(arweavejs, Data) + }, + UnsignedID = dev_arweave_common:generate_id(TX, unsigned), + UnsignedTABM = #{ + <<"commitments">> => #{ + hb_util:encode(UnsignedID) => #{ + <<"commitment-device">> => <<"tx@1.0">>, + <<"committed">> => [<<"data">>], + <<"original-tags">> =>#{ + <<"1">> => #{ + <<"name">> => <<"data">>, + <<"value">> => <<"tagdata">> + } + }, + <<"type">> => <<"unsigned-sha256">>, + <<"bundle">> => <<"false">> + } + }, + <<"data">> => Data + }, + SignedCommitment = #{ + <<"commitment-device">> => <<"tx@1.0">>, + <<"committed">> => [<<"data">>], + <<"original-tags">> =>#{ + <<"1">> => #{ + <<"name">> => <<"data">>, + <<"value">> => <<"tagdata">> + } + }, + <<"type">> => ?RSA_SIGN_TYPE, + <<"bundle">> => <<"false">> + }, + do_tx_roundtrips(TX, UnsignedTABM, SignedCommitment). + +data_tag_no_data_test() -> + Data = <<"myrealdata">>, + DataRoot = ar_tx:data_root(arweavejs, Data), + DataSize = byte_size(Data), + TX = #tx{ + format = 2, + tags = [ + {<<"data">>, <<"tagdata">>} + ], + data_size = DataSize, + data_root = DataRoot + }, + UnsignedID = dev_arweave_common:generate_id(TX, unsigned), + UnsignedTABM = #{ + <<"commitments">> => #{ + hb_util:encode(UnsignedID) => #{ + <<"commitment-device">> => <<"tx@1.0">>, + <<"committed">> => [<<"data_root">>, <<"data_size">>], + <<"field-data_root">> => + hb_util:encode(DataRoot), + <<"field-data_size">> => + integer_to_binary(DataSize), + <<"original-tags">> =>#{ + <<"1">> => #{ + <<"name">> => <<"data">>, + <<"value">> => <<"tagdata">> + } + }, + <<"type">> => <<"unsigned-sha256">>, + <<"bundle">> => <<"false">> + } + }, + <<"data_root">> => hb_util:encode(DataRoot), + <<"data_size">> => integer_to_binary(DataSize) + }, + SignedCommitment = #{ + <<"commitment-device">> => <<"tx@1.0">>, + <<"committed">> => [<<"data_root">>, <<"data_size">>], + <<"field-data_root">> => + hb_util:encode(DataRoot), + <<"field-data_size">> => + integer_to_binary(DataSize), + <<"original-tags">> =>#{ + <<"1">> => #{ + <<"name">> => <<"data">>, + <<"value">> => <<"tagdata">> + } + }, + <<"type">> => ?RSA_SIGN_TYPE, + <<"bundle">> => <<"false">> + }, + do_tx_roundtrips(TX, UnsignedTABM, SignedCommitment). + tag_name_case_test() -> TX = #tx{ format = 2, @@ -430,7 +525,7 @@ tag_name_case_test() -> <<"value">> => <<"test-value">> } }, - <<"type">> => <<"rsa-pss-sha256">>, + <<"type">> => ?RSA_SIGN_TYPE, <<"bundle">> => <<"false">> }, do_tx_roundtrips(TX, UnsignedTABM, SignedCommitment). @@ -478,7 +573,241 @@ duplicated_tag_name_test() -> <<"value">> => <<"test-value-2">> } }, - <<"type">> => <<"rsa-pss-sha256">>, + <<"type">> => ?RSA_SIGN_TYPE, + <<"bundle">> => <<"false">> + }, + do_tx_roundtrips(TX, UnsignedTABM, SignedCommitment). + +tags_and_fields_test() -> + AnchorTag = crypto:strong_rand_bytes(32), + TargetTag = crypto:strong_rand_bytes(32), + DataRootTag = crypto:strong_rand_bytes(32), + AnchorField = crypto:strong_rand_bytes(32), + TargetField = crypto:strong_rand_bytes(32), + DataRootField = crypto:strong_rand_bytes(32), + TX = #tx{ + + tags = [ + {<<"anchor">>, hb_util:encode(AnchorTag)}, + {<<"format">>, <<"1">>}, + {<<"quantity">>, <<"1000">>}, + {<<"reward">>, <<"2000">>}, + {<<"target">>, hb_util:encode(TargetTag)}, + {<<"data_root">>, hb_util:encode(DataRootTag)}, + {<<"data_size">>, <<"100">>} + ], + anchor = AnchorField, + format = 2, + quantity = 5, + reward = 6, + target = TargetField, + data_root = DataRootField, + data_size = 7 + }, + UnsignedID = dev_arweave_common:generate_id(TX, unsigned), + UnsignedTABM = #{ + <<"anchor">> => hb_util:encode(AnchorField), + <<"commitments">> => #{ + hb_util:encode(UnsignedID) => #{ + <<"commitment-device">> => <<"tx@1.0">>, + <<"committed">> => [<<"anchor">>, <<"format">>, + <<"quantity">>, <<"reward">>, <<"target">>, + <<"data_root">>, <<"data_size">>], + <<"field-anchor">> => hb_util:encode(AnchorField), + <<"field-quantity">> => <<"5">>, + <<"field-reward">> => <<"6">>, + <<"field-target">> => hb_util:encode(TargetField), + <<"field-data_root">> => hb_util:encode(DataRootField), + <<"field-data_size">> => <<"7">>, + <<"original-tags">> =>#{ + <<"1">> => #{ + <<"name">> => <<"anchor">>, + <<"value">> => hb_util:encode(AnchorTag) + }, + <<"2">> => #{ + <<"name">> => <<"format">>, + <<"value">> => <<"1">> + }, + <<"3">> => #{ + <<"name">> => <<"quantity">>, + <<"value">> => <<"1000">> + }, + <<"4">> => #{ + <<"name">> => <<"reward">>, + <<"value">> => <<"2000">> + }, + <<"5">> => #{ + <<"name">> => <<"target">>, + <<"value">> => hb_util:encode(TargetTag) + }, + <<"6">> => #{ + <<"name">> => <<"data_root">>, + <<"value">> => hb_util:encode(DataRootTag) + }, + <<"7">> => #{ + <<"name">> => <<"data_size">>, + <<"value">> => <<"100">> + } + }, + <<"type">> => <<"unsigned-sha256">>, + <<"bundle">> => <<"false">> + } + }, + <<"quantity">> => <<"5">>, + <<"reward">> => <<"6">>, + <<"target">> => hb_util:encode(TargetField), + <<"data_root">> => hb_util:encode(DataRootField), + <<"data_size">> => <<"7">>, + <<"format">> => <<"1">> + }, + SignedCommitment = #{ + <<"commitment-device">> => <<"tx@1.0">>, + <<"committed">> => [<<"anchor">>, <<"format">>, + <<"quantity">>, <<"reward">>, <<"target">>, + <<"data_root">>, <<"data_size">>], + <<"field-anchor">> => hb_util:encode(AnchorField), + <<"field-quantity">> => <<"5">>, + <<"field-reward">> => <<"6">>, + <<"field-target">> => hb_util:encode(TargetField), + <<"field-data_root">> => hb_util:encode(DataRootField), + <<"field-data_size">> => <<"7">>, + <<"original-tags">> =>#{ + <<"1">> => #{ + <<"name">> => <<"anchor">>, + <<"value">> => hb_util:encode(AnchorTag) + }, + <<"2">> => #{ + <<"name">> => <<"format">>, + <<"value">> => <<"1">> + }, + <<"3">> => #{ + <<"name">> => <<"quantity">>, + <<"value">> => <<"1000">> + }, + <<"4">> => #{ + <<"name">> => <<"reward">>, + <<"value">> => <<"2000">> + }, + <<"5">> => #{ + <<"name">> => <<"target">>, + <<"value">> => hb_util:encode(TargetTag) + }, + <<"6">> => #{ + <<"name">> => <<"data_root">>, + <<"value">> => hb_util:encode(DataRootTag) + }, + <<"7">> => #{ + <<"name">> => <<"data_size">>, + <<"value">> => <<"100">> + } + }, + <<"type">> => ?RSA_SIGN_TYPE, + <<"bundle">> => <<"false">> + }, + do_tx_roundtrips(TX, UnsignedTABM, SignedCommitment). + +tags_no_fields_test() -> + AnchorTag = crypto:strong_rand_bytes(32), + TargetTag = crypto:strong_rand_bytes(32), + DataRootTag = crypto:strong_rand_bytes(32), + TX = #tx{ + tags = [ + {<<"anchor">>, hb_util:encode(AnchorTag)}, + {<<"format">>, <<"1">>}, + {<<"quantity">>, <<"1000">>}, + {<<"reward">>, <<"2000">>}, + {<<"target">>, hb_util:encode(TargetTag)}, + {<<"data_root">>, hb_util:encode(DataRootTag)}, + {<<"data_size">>, <<"100">>} + ], + format = 2 + }, + UnsignedID = dev_arweave_common:generate_id(TX, unsigned), + UnsignedTABM = #{ + <<"commitments">> => #{ + hb_util:encode(UnsignedID) => #{ + <<"commitment-device">> => <<"tx@1.0">>, + <<"committed">> => [<<"anchor">>, <<"format">>, + <<"quantity">>, <<"reward">>, <<"target">>, + <<"data_root">>, <<"data_size">>], + <<"original-tags">> =>#{ + <<"1">> => #{ + <<"name">> => <<"anchor">>, + <<"value">> => hb_util:encode(AnchorTag) + }, + <<"2">> => #{ + <<"name">> => <<"format">>, + <<"value">> => <<"1">> + }, + <<"3">> => #{ + <<"name">> => <<"quantity">>, + <<"value">> => <<"1000">> + }, + <<"4">> => #{ + <<"name">> => <<"reward">>, + <<"value">> => <<"2000">> + }, + <<"5">> => #{ + <<"name">> => <<"target">>, + <<"value">> => hb_util:encode(TargetTag) + }, + <<"6">> => #{ + <<"name">> => <<"data_root">>, + <<"value">> => hb_util:encode(DataRootTag) + }, + <<"7">> => #{ + <<"name">> => <<"data_size">>, + <<"value">> => <<"100">> + } + }, + <<"type">> => <<"unsigned-sha256">>, + <<"bundle">> => <<"false">> + } + }, + <<"anchor">> => hb_util:encode(AnchorTag), + <<"quantity">> => <<"1000">>, + <<"reward">> => <<"2000">>, + <<"target">> => hb_util:encode(TargetTag), + <<"data_root">> => hb_util:encode(DataRootTag), + <<"data_size">> => <<"100">>, + <<"format">> => <<"1">> + }, + SignedCommitment = #{ + <<"commitment-device">> => <<"tx@1.0">>, + <<"committed">> => [<<"anchor">>, <<"format">>, + <<"quantity">>, <<"reward">>, <<"target">>, + <<"data_root">>, <<"data_size">>], + <<"original-tags">> =>#{ + <<"1">> => #{ + <<"name">> => <<"anchor">>, + <<"value">> => hb_util:encode(AnchorTag) + }, + <<"2">> => #{ + <<"name">> => <<"format">>, + <<"value">> => <<"1">> + }, + <<"3">> => #{ + <<"name">> => <<"quantity">>, + <<"value">> => <<"1000">> + }, + <<"4">> => #{ + <<"name">> => <<"reward">>, + <<"value">> => <<"2000">> + }, + <<"5">> => #{ + <<"name">> => <<"target">>, + <<"value">> => hb_util:encode(TargetTag) + }, + <<"6">> => #{ + <<"name">> => <<"data_root">>, + <<"value">> => hb_util:encode(DataRootTag) + }, + <<"7">> => #{ + <<"name">> => <<"data_size">>, + <<"value">> => <<"100">> + } + }, + <<"type">> => ?RSA_SIGN_TYPE, <<"bundle">> => <<"false">> }, do_tx_roundtrips(TX, UnsignedTABM, SignedCommitment). @@ -486,15 +815,11 @@ duplicated_tag_name_test() -> %% @doc Test that when a TABM has base field keys set to values that are not %% valid on a #tx record, they are preserved as tags instead. non_conforming_fields_test() -> - UnsignedTABM = #{ - <<"anchor">> => Anchor = <<"NON-ID-ANCHOR">>, - <<"target">> => Target = <<"NON-ID-TARGET">>, - <<"quantity">> => Quantity = <<"NON-INT-QUANTITY">>, - <<"reward">> => Reward = <<"NON-INT-REWARD">>, - <<"data_root">> => DataRoot = <<"NON-ID-DATA-ROOT">>, - <<"tag1">> => <<"value1">>, - <<"tag2">> => <<"value2">> - }, + Anchor = <<"NON-ID-ANCHOR">>, + DataRoot = <<"NON-ID-DATA-ROOT">>, + Quantity = <<"NON-INT-QUANTITY">>, + Reward = <<"NON-INT-REWARD">>, + Target = <<"NON-ID-TARGET">>, UnsignedTX = #tx{ format = 2, tags = [ @@ -507,11 +832,93 @@ non_conforming_fields_test() -> {<<"target">>, Target} ] }, + UnsignedID = dev_arweave_common:generate_id(UnsignedTX, unsigned), + UnsignedTABM = #{ + <<"commitments">> => #{ + hb_util:encode(UnsignedID) => #{ + <<"commitment-device">> => <<"tx@1.0">>, + <<"committed">> => [<<"anchor">>, <<"data_root">>, + <<"quantity">>, <<"reward">>, <<"tag1">>, + <<"tag2">>, <<"target">>], + <<"original-tags">> =>#{ + <<"1">> => #{ + <<"name">> => <<"anchor">>, + <<"value">> => Anchor + }, + <<"2">> => #{ + <<"name">> => <<"data_root">>, + <<"value">> => DataRoot + }, + <<"3">> => #{ + <<"name">> => <<"quantity">>, + <<"value">> => Quantity + }, + <<"4">> => #{ + <<"name">> => <<"reward">>, + <<"value">> => Reward + }, + <<"5">> => #{ + <<"name">> => <<"tag1">>, + <<"value">> => <<"value1">> + }, + <<"6">> => #{ + <<"name">> => <<"tag2">>, + <<"value">> => <<"value2">> + }, + <<"7">> => #{ + <<"name">> => <<"target">>, + <<"value">> => Target + } + }, + <<"type">> => <<"unsigned-sha256">>, + <<"bundle">> => <<"false">> + } + }, + <<"anchor">> => Anchor, + <<"target">> => Target, + <<"quantity">> => Quantity, + <<"reward">> => Reward, + <<"data_root">> => DataRoot, + <<"tag1">> => <<"value1">>, + <<"tag2">> => <<"value2">> + }, + SignedCommitment = #{ <<"commitment-device">> => <<"tx@1.0">>, - <<"committed">> => [<<"anchor">>, <<"data_root">>, <<"quantity">>, - <<"reward">>, <<"tag1">>, <<"tag2">>, <<"target">>], - <<"type">> => <<"rsa-pss-sha256">>, + <<"committed">> => [<<"anchor">>, <<"data_root">>, + <<"quantity">>, <<"reward">>, <<"tag1">>, + <<"tag2">>, <<"target">>], + <<"original-tags">> =>#{ + <<"1">> => #{ + <<"name">> => <<"anchor">>, + <<"value">> => Anchor + }, + <<"2">> => #{ + <<"name">> => <<"data_root">>, + <<"value">> => DataRoot + }, + <<"3">> => #{ + <<"name">> => <<"quantity">>, + <<"value">> => Quantity + }, + <<"4">> => #{ + <<"name">> => <<"reward">>, + <<"value">> => Reward + }, + <<"5">> => #{ + <<"name">> => <<"tag1">>, + <<"value">> => <<"value1">> + }, + <<"6">> => #{ + <<"name">> => <<"tag2">>, + <<"value">> => <<"value2">> + }, + <<"7">> => #{ + <<"name">> => <<"target">>, + <<"value">> => Target + } + }, + <<"type">> => ?RSA_SIGN_TYPE, <<"bundle">> => <<"false">> }, do_tabm_roundtrips(UnsignedTX, UnsignedTABM, SignedCommitment). @@ -530,12 +937,12 @@ ao_data_key_test() -> ], data = Data, data_size = byte_size(Data), - data_root = ar_tx:data_root(Data) + data_root = ar_tx:data_root(arweavejs, Data) }, SignedCommitment = #{ <<"commitment-device">> => <<"tx@1.0">>, <<"committed">> => [<<"body">>, <<"tag1">>], - <<"type">> => <<"rsa-pss-sha256">>, + <<"type">> => ?RSA_SIGN_TYPE, <<"bundle">> => <<"false">> }, do_tabm_roundtrips(UnsignedTX, UnsignedTABM, SignedCommitment). @@ -555,7 +962,7 @@ unsorted_tags_test() -> SignedCommitment = #{ <<"commitment-device">> => <<"tx@1.0">>, <<"committed">> => [<<"z">>, <<"a">>], - <<"type">> => <<"rsa-pss-sha256">>, + <<"type">> => ?RSA_SIGN_TYPE, <<"bundle">> => <<"false">> }, % Only do a signed test since we don't need the tag order to be preserved @@ -603,7 +1010,7 @@ nested_data_tabm_test() -> NoLinksCommitment = #{ <<"commitment-device">> => <<"tx@1.0">>, <<"committed">> => [<<"data">>, <<"tag">>], - <<"type">> => <<"rsa-pss-sha256">>, + <<"type">> => ?RSA_SIGN_TYPE, <<"bundle">> => <<"true">>, <<"bundle-format">> => <<"binary">>, <<"bundle-version">> => <<"2.0.0">>, @@ -651,7 +1058,7 @@ nested_non_data_key_tabm_test() -> NoLinksCommitment = #{ <<"commitment-device">> => <<"tx@1.0">>, <<"committed">> => [<<"a1">>, <<"tag1">>], - <<"type">> => <<"rsa-pss-sha256">>, + <<"type">> => ?RSA_SIGN_TYPE, <<"bundle">> => <<"true">>, <<"bundle-format">> => <<"binary">>, <<"bundle-version">> => <<"2.0.0">>, @@ -717,7 +1124,7 @@ nested_multiple_tabm_test() -> NoLinksCommitment = #{ <<"commitment-device">> => <<"tx@1.0">>, <<"committed">> => [<<"a1">>, <<"data">>, <<"tag1">>], - <<"type">> => <<"rsa-pss-sha256">>, + <<"type">> => ?RSA_SIGN_TYPE, <<"bundle">> => <<"true">>, <<"bundle-format">> => <<"binary">>, <<"bundle-version">> => <<"2.0.0">>, @@ -760,6 +1167,24 @@ real_ecdsa_single_item_bundle_tx_test_disabled() -> [] ). +real_ecdsa_no_data_tx_test() -> + do_real_tx_verify( + <<"p42_hnfcQd2ESry-WY8x9RcMbaw6piapVM1CpErzf8Y">>, + [<<"p42_hnfcQd2ESry-WY8x9RcMbaw6piapVM1CpErzf8Y">>] + ). + +real_ecdsa_data_tx_test() -> + do_real_tx_verify( + <<"LomQXlcWSeJdhg26wXQwiE_spvAfEq4nULkUMV_1Fqo">>, + [<<"LomQXlcWSeJdhg26wXQwiE_spvAfEq4nULkUMV_1Fqo">>] + ). + +real_2048_bit_rsa_tx_test() -> + do_real_tx_verify( + <<"tj76flZk936u0S2owyEzUFBvBAYle9Al5LH8zJ7icNc">>, + [<<"tj76flZk936u0S2owyEzUFBvBAYle9Al5LH8zJ7icNc">>] + ). + real_no_data_tx_test() -> do_real_tx_verify( <<"N1Cyu67lQtmZMQlIZVFpNfy3xz6k9wEZ8LLeDbOebbk">>, @@ -787,7 +1212,7 @@ do_real_tx_verify(TXID, ExpectedIDs) -> ?event(debug_test, { {tx_id, TXID}, {size, byte_size(Data)}, - {data, {explicit, Data}} + {data, Data} }), TXHeader#tx{ data = Data }; {ok, _} -> @@ -797,26 +1222,48 @@ do_real_tx_verify(TXID, ExpectedIDs) -> {error, Error} -> throw({http_request_error, Error}) end, - ?event(debug_test, {tx, {explicit, TX}}), + ?event(debug_test, {tx, TX}), ?assert(ar_tx:verify(TX)), - + StructuredTX = hb_message:convert( + TX, + <<"structured@1.0">>, + <<"tx@1.0">>, + Opts + ), + ?event(debug_test, {structured_tx, StructuredTX}), + ?assert(hb_message:verify(StructuredTX, all, #{})), Deserialized = ar_bundles:deserialize(TX), - ?event(debug_test, {deserialized}), + verify_items(Deserialized, ExpectedIDs, Opts). - verify_items(Deserialized, ExpectedIDs). - -verify_items(RootItem, ExpectedIDs) -> +verify_items(RootItem, ExpectedIDs, Opts) -> AllItems = flatten_items(RootItem), ?assertEqual(length(ExpectedIDs), length(AllItems)), [RootItem | NestedItems] = AllItems, [RootID | NestedIDs] = ExpectedIDs, + NormalizedRootItem = dev_arweave_common:normalize(RootItem), ?assert( - ar_tx:verify(dev_arweave_common:normalize(RootItem)), + ar_tx:verify(NormalizedRootItem), + hb_util:encode(RootItem#tx.id)), + StructuredRootItem = hb_message:convert( + NormalizedRootItem, + <<"structured@1.0">>, + <<"tx@1.0">>, + Opts + ), + ?assert(hb_message:verify(StructuredRootItem, all, Opts), hb_util:encode(RootItem#tx.id)), ?assertEqual(RootID, hb_util:encode(RootItem#tx.id)), lists:zipwith( fun(Item, ExpectedID) -> ?assert(ar_bundles:verify_item(Item), hb_util:encode(Item#tx.id)), + StructuredItem = hb_message:convert( + Item, + <<"structured@1.0">>, + <<"ans104@1.0">>, + Opts + ), + ?assert(hb_message:verify(StructuredItem, all, Opts), + hb_util:encode(Item#tx.id)), ?assertEqual(ExpectedID, hb_util:encode(Item#tx.id)) end, NestedItems, @@ -844,10 +1291,13 @@ flatten_items(_) -> do_tx_roundtrips(UnsignedTX, UnsignedTABM, Commitment) -> % For tests which don't care about bundling, just use false. do_tx_roundtrips(UnsignedTX, UnsignedTABM, Commitment, false). -do_tx_roundtrips(UnsignedTX, UnsignedTABM, Commitment, Bundle) -> - Req = #{ <<"bundle">> => Bundle }, +do_tx_roundtrips(UnsignedTX, UnsignedTABM, Commitment, Req) when is_map(Req) -> do_unsigned_tx_roundtrip(UnsignedTX, UnsignedTABM, Req), - do_signed_tx_roundtrip(UnsignedTX, UnsignedTABM, Commitment, Req). + do_signed_tx_roundtrip(UnsignedTX, UnsignedTABM, Commitment, Req); +do_tx_roundtrips(UnsignedTX, UnsignedTABM, Commitment, Bundle) + when is_boolean(Bundle) -> + Req = #{ <<"bundle">> => Bundle }, + do_tx_roundtrips(UnsignedTX, UnsignedTABM, Commitment, Req). do_unsigned_tx_roundtrip(UnsignedTX, UnsignedTABM, Req) -> % Serialize -> Deserialize @@ -991,7 +1441,7 @@ test_bundle_commitment(Commit, Encode, Decode) -> ?assert(hb_message:verify(Committed, all, Opts), Label), {ok, _, CommittedCommitment} = hb_message:commitment(#{}, Committed, Opts), ?assertEqual( - [<<"list">>], hb_maps:get(<<"committed">>, CommittedCommitment, Opts), + [<<"list">>], hb_maps:get(<<"committed">>, CommittedCommitment, not_found, Opts), Label), ?assertEqual(ToBool(Commit), hb_util:atom(hb_ao:get(<<"bundle">>, CommittedCommitment, false, Opts)), @@ -1013,7 +1463,7 @@ test_bundle_commitment(Commit, Encode, Decode) -> ?assert(hb_message:verify(Decoded, all, Opts), Label), {ok, _, DecodedCommitment} = hb_message:commitment(#{}, Decoded, Opts), ?assertEqual( - [<<"list">>], hb_maps:get(<<"committed">>, DecodedCommitment, Opts), + [<<"list">>], hb_maps:get(<<"committed">>, DecodedCommitment, not_found, Opts), Label), ?assertEqual(ToBool(Commit), hb_util:atom(hb_ao:get(<<"bundle">>, DecodedCommitment, false, Opts)), @@ -1059,61 +1509,28 @@ test_bundle_uncommitted(Encode, Decode) -> end, ok. -bundle_list_test() -> - % Load an arweave.js-created dataitem - Item = ar_bundles:deserialize( - hb_util:ok( - file:read_file(<<"test/arbundles.js/ans104-item.bundle">>) - ) - ), - ?event(debug_test, {item, Item}), - ?assert(ar_bundles:verify_item(Item)), - % Load an arweave.js-created list bundle - {ok, Bin} = file:read_file(<<"test/arbundles.js/ans104-list-bundle.bundle">>), - BundledItem = ar_bundles:sign_item(#tx{ - format = ans104, - data = Bin, - data_size = byte_size(Bin), - tags = [ - {<<"Bundle-Format">>, <<"binary">>}, - {<<"Bundle-Version">>, <<"2.0.0">>} - ] - }, hb:wallet()), - ?event(debug_test, {bundled_item, BundledItem}), - ?assert(ar_bundles:verify_item(BundledItem)), - % Convert both dataitems to structured messages - ItemStructured = hb_message:convert(Item, - #{ <<"device">> => <<"structured@1.0">>, <<"bundle">> => true }, - #{ <<"device">> => <<"ans104@1.0">>, <<"bundle">> => true }, - #{}), - ?event(debug_test, {item_structured, ItemStructured}), - ?assert(hb_message:verify(ItemStructured, all, #{})), - BundledItemStructured = hb_message:convert(BundledItem, - #{ <<"device">> => <<"structured@1.0">>, <<"bundle">> => true }, - #{ <<"device">> => <<"ans104@1.0">>, <<"bundle">> => true }, - #{}), - ?event(debug_test, {bundled_item_structured, BundledItemStructured}), - ?assert(hb_message:verify(BundledItemStructured, all, #{})), - % Use dev_codec_tx:to(List) to create a L1 TX bundle. We use this - % interface to mimic the logic used in dev_bundler - {ok, BundledTX} = dev_codec_tx:to( - [ItemStructured, BundledItemStructured], #{}, #{}), - SignedTX = ar_tx:sign(BundledTX, hb:wallet()), - ?event(debug_test, {signed_tx, SignedTX}), - ?assert(ar_tx:verify(SignedTX)), - % Convert the signed TX to a structured message - StructuredTX = hb_message:convert(SignedTX, - #{ <<"device">> => <<"structured@1.0">>, <<"bundle">> => true }, +%% Disabled test that captures an issue we will face if we want to support +%% ao-types on tx's. +list_aotypes_test_disabled() -> + Items = [ + #{ <<"tag1">> => <<"value1">> }, + #{ <<"tag2">> => <<"value2">> }, + #{ <<"tag3">> => <<"value3">> } + ], + TX = hb_message:convert( + Items, #{ <<"device">> => <<"tx@1.0">>, <<"bundle">> => true }, + <<"structured@1.0">>, #{}), - ?event(debug_test, {structured_tx, StructuredTX}), - ?assert(hb_message:verify(StructuredTX, all, #{})), - % Convert back to an L1 TX - SignedTXRoundtrip = hb_message:convert(StructuredTX, - #{ <<"device">> => <<"tx@1.0">>, <<"bundle">> => true }, - #{ <<"device">> => <<"structured@1.0">>, <<"bundle">> => true }, + Anchor = crypto:strong_rand_bytes(32), + % SignedTX = ar_tx:sign( + % TX#tx{ anchor = Anchor, reward = 100 }, + % hb:wallet()), + % ?event(debug_test, {signed_tx, SignedTX}), + StructuredTX = hb_message:convert( + TX#tx{ anchor = Anchor, reward = 100 }, + <<"structured@1.0">>, + <<"tx@1.0">>, #{}), - ?event(debug_test, {signed_tx_roundtrip, SignedTXRoundtrip}), - ?assert(ar_tx:verify(SignedTXRoundtrip)), - ?assertEqual(SignedTX, SignedTXRoundtrip), + ?event(debug_test, {structured_tx, StructuredTX}), ok. \ No newline at end of file diff --git a/src/dev_codec_tx_from.erl b/src/dev_codec_tx_from.erl index f21664217..fbe817f2e 100644 --- a/src/dev_codec_tx_from.erl +++ b/src/dev_codec_tx_from.erl @@ -14,7 +14,9 @@ fields(TX, Prefix, Opts) -> target_field(TX, Prefix, Opts), anchor_field(TX, Prefix, Opts), quantity_field(TX, Prefix, Opts), - reward_field(TX, Prefix, Opts) + reward_field(TX, Prefix, Opts), + data_root_field(TX, Prefix, Opts), + data_size_field(TX, Prefix, Opts) ] ). @@ -58,3 +60,16 @@ reward_field(TX, Prefix, _Opts) -> } end. +data_root_field(#tx{data = ?DEFAULT_DATA, data_root = ?DEFAULT_DATA_ROOT}, _Prefix, _Opts) -> + #{}; +data_root_field(#tx{data = ?DEFAULT_DATA, data_root = DataRoot}, Prefix, _Opts) -> + #{<> => hb_util:encode(DataRoot)}; +data_root_field(_TX, _Prefix, _Opts) -> + #{}. + +data_size_field(#tx{data = ?DEFAULT_DATA, data_size = ?DEFAULT_DATA_SIZE}, _Prefix, _Opts) -> + #{}; +data_size_field(#tx{data = ?DEFAULT_DATA, data_size = DataSize}, Prefix, _Opts) -> + #{<> => integer_to_binary(DataSize)}; +data_size_field(_TX, _Prefix, _Opts) -> + #{}. diff --git a/src/dev_codec_tx_to.erl b/src/dev_codec_tx_to.erl index f0fb6f586..da7094985 100644 --- a/src/dev_codec_tx_to.erl +++ b/src/dev_codec_tx_to.erl @@ -9,7 +9,9 @@ fields_to_tx(TX, Prefix, Map, Opts) -> target = target_field(Prefix, Map, Opts), anchor = anchor_field(Prefix, Map, Opts), quantity = quantity_field(Prefix, Map, Opts), - reward = reward_field(Prefix, Map, Opts) + reward = reward_field(Prefix, Map, Opts), + data_root = data_root_field(Prefix, Map, Opts), + data_size = data_size_field(Prefix, Map, Opts) }. format_field(Prefix, Map, Opts) -> @@ -62,11 +64,43 @@ reward_field(Prefix, Map, Opts) -> error -> ?DEFAULT_REWARD end. +data_root_field(Prefix, Map, Opts) -> + case hb_maps:get(<<"data">>, Map, ?DEFAULT_DATA, Opts) of + ?DEFAULT_DATA -> + case hb_maps:find(<>, Map, Opts) of + {ok, EncodedDataRoot} -> + case hb_util:safe_decode(EncodedDataRoot) of + {ok, DataRoot} when ?IS_ID(DataRoot) -> DataRoot; + _ -> ?DEFAULT_DATA_ROOT + end; + error -> ?DEFAULT_DATA_ROOT + end; + _ -> + ?DEFAULT_DATA_ROOT + end. + +data_size_field(Prefix, Map, Opts) -> + case hb_maps:get(<<"data">>, Map, ?DEFAULT_DATA, Opts) of + ?DEFAULT_DATA -> + case hb_maps:find(<>, Map, Opts) of + {ok, EncodedDataSize} -> + case hb_util:safe_int(EncodedDataSize) of + {ok, DataSize} -> DataSize; + _ -> ?DEFAULT_DATA_SIZE + end; + error -> ?DEFAULT_DATA_SIZE + end; + _ -> + ?DEFAULT_DATA_SIZE + end. + excluded_tags(TX, TABM, Opts) -> exclude_target_tag(TX, TABM, Opts) ++ exclude_anchor_tag(TX, TABM, Opts) ++ exclude_quantity_tag(TX, TABM, Opts) ++ - exclude_reward_tag(TX, TABM, Opts). + exclude_reward_tag(TX, TABM, Opts) ++ + exclude_data_root_tag(TX) ++ + exclude_data_size_tag(TX). exclude_target_tag(TX, TABM, Opts) -> case {TX#tx.target, hb_maps:get(<<"target">>, TABM, undefined, Opts)} of @@ -98,4 +132,16 @@ exclude_reward_tag(TX, TABM, Opts) -> {FieldReward, TagReward} when FieldReward =/= TagReward -> [<<"reward">>]; _ -> [] + end. + +exclude_data_root_tag(TX) -> + case TX#tx.data_root of + ?DEFAULT_DATA_ROOT -> []; + _ -> [<<"data_root">>] + end. + +exclude_data_size_tag(TX) -> + case TX#tx.data_size of + ?DEFAULT_DATA_SIZE -> []; + _ -> [<<"data_size">>] end. \ No newline at end of file diff --git a/src/dev_copycat_arweave.erl b/src/dev_copycat_arweave.erl index 6ff8f822f..affe79c1a 100644 --- a/src/dev_copycat_arweave.erl +++ b/src/dev_copycat_arweave.erl @@ -1,77 +1,1062 @@ %%% @doc A `~copycat@1.0' engine that fetches block data from an Arweave node for -%%% replication. This engine works in _reverse_ chronological order by default, -%%% fetching blocks from the latest known block towards the Genesis block. The -%%% node avoids retrieving blocks that are already present in the cache using -%%% `~arweave@2.9-pre''s built-in caching mechanism. +%%% replication. This engine works in _reverse_ chronological order by default. +%%% If `to' is omitted, it keeps moving downward from `from' until it reaches a +%%% block where at least one TX is already indexed, then stops. If `to' is +%%% provided, every block in the range is processed. -module(dev_copycat_arweave). -export([arweave/3]). -include_lib("include/hb.hrl"). -include_lib("eunit/include/eunit.hrl"). --define(ARWEAVE_DEVICE, <<"~arweave@2.9-pre">>). +-define(ARWEAVE_DEVICE, <<"~arweave@2.9">>). + +% GET /~cron@1.0/once&cron-path=~copycat@1.0/arweave %% @doc Fetch blocks from an Arweave node between a given range, or from the %% latest known block towards the Genesis block. If no range is provided, we %% fetch blocks from the latest known block towards the Genesis block. arweave(_Base, Request, Opts) -> {From, To} = parse_range(Request, Opts), - fetch_blocks(Request, From, To, Opts). + case hb_maps:get(<<"mode">>, Request, <<"write">>, Opts) of + <<"write">> -> fetch_blocks(Request, From, To, Opts); + <<"list">> -> list_index(From, To, Opts); + Mode -> + {error, <<"Unsupported mode `", (hb_util:bin(Mode))/binary, "`. Supported modes are: write, list">>} + end. %% @doc Parse the range from the request. parse_range(Request, Opts) -> From = case hb_maps:find(<<"from">>, Request, Opts) of - {ok, Height} -> Height; + {ok, FromHeight} -> normalize_height(FromHeight, Opts); error -> - {ok, LatestHeight} = - hb_ao:resolve( - <>, - Opts - ), - LatestHeight + latest_height(Opts) + end, + To = + case hb_maps:find(<<"to">>, Request, Opts) of + {ok, ToHeight} -> normalize_height(ToHeight, Opts); + error -> undefined end, - To = hb_maps:get(<<"to">>, Request, 0, Opts), {From, To}. -%% @doc Fetch blocks from an Arweave node between a given range. -fetch_blocks(Req, Current, Current, _Opts) -> - ?event(copycat_arweave, - {arweave_block_indexing_completed, - {reached_target, Current}, - {initial_request, Req} - } - ), - {ok, Current}; -fetch_blocks(Req, Current, To, Opts) -> - BlockRes = +normalize_height(Height, Opts) -> + RequestedHeight = hb_util:int(Height), + case RequestedHeight < 0 of + true -> latest_height(Opts) + RequestedHeight; + false -> RequestedHeight + end. + +latest_height(Opts) -> + case hb_ao:resolve( + <>, + Opts + ) of + {ok, ResolvedHeight} -> hb_util:int(ResolvedHeight); + {error, _} -> 0 + end. + +%% @doc Check if a transaction ID is indexed in the arweave index store. +is_tx_indexed(TXID, Opts) -> + case hb_store_arweave:store_from_opts(Opts) of + no_store -> false; + #{ <<"index-store">> := Store } -> + case hb_store:read(Store, hb_store_arweave_offset:path(TXID)) of + {ok, _} -> true; + not_found -> false + end + end. + +%% @doc List indexed blocks and transactions in the given range. +%% Returns JSON with block heights as keys, each containing indexed and not-indexed lists. +list_index(From, undefined, Opts) -> + list_index(From, 0, Opts); +list_index(From, To, _Opts) when From < To -> + {ok, #{ + <<"content-type">> => <<"application/json">>, + <<"body">> => hb_json:encode(#{}) + }}; +list_index(From, To, Opts) -> + Result = list_index_blocks(From, To, Opts, #{}), + JSON = hb_json:encode(Result), + {ok, #{ + <<"content-type">> => <<"application/json">>, + <<"body">> => JSON + }}. + +%% @doc Iterate through blocks and check index status for each transaction. +list_index_blocks(Current, To, _Opts, Acc) when Current < To -> + Acc; +list_index_blocks(Current, To, Opts, Acc) -> + case fetch_block_header(Current, Opts) of + {ok, Block} -> + TXIDs = hb_maps:get(<<"txs">>, Block, [], Opts), + case TXIDs of + [] -> + list_index_blocks(Current - 1, To, Opts, Acc); + _ -> + {IndexedTXs, NotIndexedTXs} = classify_txs(TXIDs, Opts), + case IndexedTXs of + [] -> + % Do not include blocks with no locally indexed TXs. + list_index_blocks(Current - 1, To, Opts, Acc); + _ -> + BlockKey = hb_util:bin(Current), + NewAcc = Acc#{ + BlockKey => #{ + <<"indexed">> => IndexedTXs, + <<"not-indexed">> => NotIndexedTXs + } + }, + list_index_blocks(Current - 1, To, Opts, NewAcc) + end + end; + {error, _} -> + list_index_blocks(Current - 1, To, Opts, Acc) + end. + +fetch_block_header(Height, Opts) -> + ?event(debug_copycat, {fetching_block, Height}), + observe_event(<<"block_header">>, fun() -> hb_ao:resolve( << ?ARWEAVE_DEVICE/binary, "/block=", - (hb_util:bin(Current))/binary + (hb_util:bin(Height))/binary >>, Opts - ), - process_block(BlockRes, Req, Current, To, Opts), + ) + end). + +%% @doc Classify transactions as indexed or not-indexed. +classify_txs(TXIDs, Opts) -> + lists:foldl( + fun(TXID, {IndexedAcc, NotIndexedAcc}) -> + case is_tx_indexed(TXID, Opts) of + true -> {[TXID | IndexedAcc], NotIndexedAcc}; + false -> {IndexedAcc, [TXID | NotIndexedAcc]} + end + end, + {[], []}, + TXIDs + ). + +%% @doc Fetch blocks from an Arweave node while moving downward from `Current'. +%% If `To' is provided, every block in [`To', `Current'] is processed. If `To' +%% is omitted, stop at the first block where any TX is already indexed. +fetch_blocks(Req, Current, To, _Opts) when is_integer(To), Current < To -> + ?event(copycat_short, + {arweave_block_indexing_completed, + {reached_target, To}, + {initial_request, Req} + } + ), + {ok, To}; +fetch_blocks(_Req, Current, undefined, _Opts) when Current < 0 -> + {ok, 0}; +fetch_blocks(Req, Current, undefined, Opts) -> + BlockRes = fetch_block_header(Current, Opts), + case is_already_indexed(BlockRes, Opts) of + true -> + ?event(copycat_short, + {arweave_block_indexing_completed, + {stop_at_indexed_block, Current}, + {initial_request, Req} + } + ), + {ok, Current}; + false -> + observe_event(<<"block_indexed">>, fun() -> + process_block(BlockRes, Current, undefined, Opts) + end), + fetch_blocks(Req, Current - 1, undefined, Opts) + end; +fetch_blocks(Req, Current, To, Opts) -> + observe_event(<<"block_indexed">>, fun() -> + fetch_and_process_block(Current, To, Opts) + end), fetch_blocks(Req, Current - 1, To, Opts). +%% @doc Determine whether a fetched block is considered indexed. +%% A block is indexed when any TX from its `txs' list is in the index. +is_already_indexed({ok, Block}, Opts) -> + TXIDs = hb_maps:get(<<"txs">>, Block, [], Opts), + lists:any(fun(TXID) -> is_tx_indexed(TXID, Opts) end, TXIDs); +is_already_indexed({error, _}, _Opts) -> + false. + +fetch_and_process_block(Current, To, Opts) -> + BlockRes = fetch_block_header(Current, Opts), + process_block(BlockRes, Current, To, Opts). + %% @doc Process a block. -process_block(BlockRes, _Req, Current, To, _Opts) -> +process_block(BlockRes, Current, To, Opts) -> case BlockRes of - {ok, _} -> + {ok, Block} -> + ?event(debug_copycat, {{processing_block, Current}, + {indep_hash, hb_maps:get(<<"indep_hash">>, Block, <<>>)}}), + case maybe_index_ids(Block, Opts) of + {block_skipped, Results} -> + TotalTXs = maps:get(total_txs, Results, 0), + ?event( + copycat_short, + {arweave_block_skipped, + {height, Current}, + {total_txs, TotalTXs}, + {target, To} + } + ); + {block_cached, Results} -> + ItemsIndexed = maps:get(items_count, Results, 0), + TotalTXs = maps:get(total_txs, Results, 0), + BundleTXs = maps:get(bundle_count, Results, 0), + SkippedTXs = maps:get(skipped_count, Results, 0), + ?event( + copycat_short, + {arweave_block_indexed, + {height, Current}, + {items_indexed, ItemsIndexed}, + {total_txs, TotalTXs}, + {bundle_txs, BundleTXs}, + {skipped_txs, SkippedTXs}, + {target, To} + } + ) + end; + {error, _} = Error -> ?event( copycat_short, - {arweave_block_cached, + {arweave_block_not_found, {height, Current}, - {target, To} - } - ); - {error, not_found} -> + {target, To}, + {reason, Error}} + ) + end. + +%% @doc Index the IDs of all transactions in the block if configured to do so. +maybe_index_ids(Block, Opts) -> + TotalTXs = length(hb_maps:get(<<"txs">>, Block, [], Opts)), + case hb_opts:get(arweave_index_ids, true, Opts) of + false -> + {block_skipped, #{ + items_count => 0, + total_txs => TotalTXs, + bundle_count => 0, + skipped_count => 0 + }}; + true -> + BlockEndOffset = hb_util:int( + hb_maps:get(<<"weave_size">>, Block, 0, Opts)), + BlockSize = hb_util:int( + hb_maps:get(<<"block_size">>, Block, 0, Opts)), + BlockStartOffset = BlockEndOffset - BlockSize, + case resolve_tx_headers(hb_maps:get(<<"txs">>, Block, [], Opts), Opts) of + error -> + % Skip entire block if any transaction errors + {block_skipped, #{ + skipped_count => TotalTXs, + total_txs => TotalTXs + }}; + {ok, TXs} -> + Height = hb_maps:get(<<"height">>, Block, 0, Opts), + TXsWithData = ar_block:generate_size_tagged_list_from_txs(TXs, Height), + % Filter out padding entries before processing + ValidTXs = lists:filter( + fun({{padding, _}, _}) -> false; (_) -> true end, + TXsWithData + ), + TXResults = process_txs(ValidTXs, BlockStartOffset, Opts), + {block_cached, TXResults#{total_txs => TotalTXs}} + end + end. + +%% @doc Apply Fun to each item in Items with parallel workers. +%% Fun takes an item and returns a result. +%% Returns a list of results in the same order as the input items. +%% Uses arweave_index_workers from Opts to determine max concurrency (default 1 = sequential). +parallel_map(Items, Fun, Opts) -> + MaxWorkers = max(1, hb_opts:get(arweave_index_workers, 1, Opts)), + hb_pmap:parallel_map(Items, Fun, MaxWorkers). + +%% @doc Process a single transaction and return its contribution to the counters. +%% Returns a map with keys: items_count, bundle_count, skipped_count +process_tx({{padding, _PaddingRoot}, _EndOffset}, _BlockStartOffset, _Opts) -> + #{items_count => 0, bundle_count => 0, skipped_count => 0}; +process_tx({{TX, _TXDataRoot}, EndOffset}, BlockStartOffset, Opts) -> + IndexStore = hb_store_arweave:store_from_opts(Opts), + TXID = hb_util:encode(TX#tx.id), + TXEndOffset = BlockStartOffset + EndOffset, + TXStartOffset = TXEndOffset - TX#tx.data_size, + ?event(debug_copycat, {writing_index, + {id, {explicit, TXID}}, + {offset, TXStartOffset}, + {size, TX#tx.data_size} + }), + observe_event(<<"item_indexed">>, fun() -> + hb_store_arweave:write_offset( + IndexStore, + TXID, + <<"tx@1.0">>, + TXStartOffset, + TX#tx.data_size + ) + end), + case is_bundle_tx(TX, Opts) of + false -> #{items_count => 0, bundle_count => 0, skipped_count => 0}; + true -> + % Lightweight processing of block transactions to depth 2. We + % can avoid loading the full L1 TX data into memory, and instead + % only load the bundle header. But as a result we're unable to + % recurse any deeper than L2 dataitems. + ?event(debug_copycat, {fetching_bundle_header, + {tx_id, {string, TXID}}, + {tx_end_offset, TXEndOffset}, + {tx_data_size, TX#tx.data_size} + }), + BundleRes = download_bundle_header( + TXEndOffset, TX#tx.data_size, Opts + ), + case BundleRes of + {ok, HeaderSize, BundleIndex} -> + % Batch event tracking: measure total time and count for all write_offset calls + {TotalTime, {_, ItemsCount}} = timer:tc(fun() -> + lists:foldl( + fun({ItemID, Size}, {ItemStartOffset, ItemsCountAcc}) -> + hb_store_arweave:write_offset( + IndexStore, + hb_util:encode(ItemID), + <<"ans104@1.0">>, + ItemStartOffset, + Size + ), + {ItemStartOffset + Size, ItemsCountAcc + 1} + end, + {TXStartOffset + HeaderSize, 0}, + BundleIndex + ) + end), + ?event(debug_copycat, + {bundle_items_indexed, + {tx_id, {string, TXID}}, + {items_count, ItemsCount} + }), + % Single event increment for the batch + record_event_metrics(<<"item_indexed">>, ItemsCount, TotalTime), + #{items_count => ItemsCount, bundle_count => 1, skipped_count => 0}; + {error, Reason} -> + ?event( + copycat_short, + {arweave_bundle_skipped, + {tx_id, {explicit, TXID}}, + {reason, Reason} + } + ), + #{items_count => 0, bundle_count => 1, skipped_count => 1} + end + end. + +%% @doc Process transactions: spawn workers and manage the worker pool. +%% This function processes transactions in parallel using parallel_map. +%% When arweave_index_workers <= 1, processes sequentially (one worker at a time). +%% When arweave_index_workers > 1, processes in parallel with the specified concurrency limit. +%% Returns a map with keys: items_count, bundle_count, skipped_count. +process_txs(ValidTXs, BlockStartOffset, Opts) -> + Results = parallel_map( + ValidTXs, + fun(TXWithData) -> process_tx(TXWithData, BlockStartOffset, Opts) end, + Opts + ), + lists:foldl( + fun(Result, Acc) -> + #{ + items_count => maps:get(items_count, Result, 0) + maps:get(items_count, Acc, 0), + bundle_count => maps:get(bundle_count, Result, 0) + maps:get(bundle_count, Acc, 0), + skipped_count => maps:get(skipped_count, Result, 0) + maps:get(skipped_count, Acc, 0) + } + end, + #{items_count => 0, bundle_count => 0, skipped_count => 0}, + Results + ). + +%% @doc Check whether a TX header indicates bundle content. +is_bundle_tx(TX, _Opts) -> + dev_arweave_common:type(TX) =/= binary. + +%% @doc Download and decode a bundle header from chunk data. +download_bundle_header(EndOffset, Size, Opts) -> + observe_event(<<"bundle_header">>, fun() -> + dev_arweave:bundle_header(EndOffset - Size, Size, Opts) + end). + +resolve_tx_headers(TXIDs, Opts) -> + Results = parallel_map( + TXIDs, + fun(TXID) -> resolve_tx_header(TXID, Opts) end, + Opts + ), + case lists:any(fun(Res) -> Res =:= error end, Results) of + true -> error; + false -> + TXs = lists:foldr( + fun({ok, TX}, Acc) -> [TX | Acc] end, + [], + Results + ), + {ok, TXs} + end. + +resolve_tx_header(TXID, Opts) -> + try + ?event(debug_copycat, {fetching_tx, {explicit, TXID}}), + ResolveRes = observe_event(<<"tx_header">>, fun() -> + hb_ao:resolve( + << + ?ARWEAVE_DEVICE/binary, + "/tx&tx=", + TXID/binary, + "&exclude-data=true" + >>, + Opts + ) + end), + case ResolveRes of + {ok, StructuredTXHeader} -> + {ok, + hb_message:convert( + StructuredTXHeader, + <<"tx@1.0">>, + <<"structured@1.0">>, + Opts)}; + {error, ResolveError} -> + ?event( + copycat_short, + {arweave_tx_skipped, + {tx_id, {explicit, TXID}}, + {reason, ResolveError} + } + ), + error + end + catch + Class:Reason:_ -> ?event( copycat_short, - {arweave_block_not_found, - {height, Current}, - {target, To} + {arweave_tx_skipped, + {tx_id, {explicit, TXID}}, + {class, Class}, + {reason, Reason} } - ) - end. \ No newline at end of file + ), + error + end. + +%% @doc Record event metrics (count and duration) using hb_event:increment. +record_event_metrics(MetricName, Count, Duration) -> + hb_event:increment(<<"arweave_block_count">>, MetricName, #{}, Count), + hb_event:increment(<<"arweave_block_duration">>, MetricName, #{}, Duration). + +%% @doc Track an operation's execution time and count using hb_event:increment. +%% Always tracks both count and duration, regardless of success/failure. +observe_event(MetricName, Fun) -> + {Time, Result} = timer:tc(Fun), + record_event_metrics(MetricName, 1, Time), + Result. + +%%% Tests + +index_ids_test() -> + %% Test block: https://viewblock.io/arweave/block/1827942 + %% Note: this block includes a data item with an Ethereum signature. This + %% signature type is not yet (as of Jan 2026) supported by ar_bundles.erl, + %% however we should still be able to index it (we just can't deserialize + %% it). + {_TestStore, StoreOpts, Opts} = setup_index_opts(), + {ok, 1827942} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=1827942&to=1827942">>, + Opts + ), + ?assertMatch( + {ok, _}, + hb_store_arweave:read( + StoreOpts, + <<"WbRAQbeyjPHgopBKyi0PLeKWvYZr3rgZvQ7QY3ASJS4">> + ) + ), + assert_item_read( + <<"0vy2Ey8bWkSDcRIvWQJjxDeVGYOrTSmYIIhBILJntY8">>, + Opts), + assert_item_read( + <<"2lmrYydmDweX2MgGH39ZEB9hKm2JqGOYmRiG3n_xh8A">>, + Opts), + assert_item_read( + <<"ATi9pQF_eqb99UK84R5rq8lGfRGpilVQOYyth7rXxh8">>, + Opts), + assert_item_read( + <<"4VSfUbhMVZQHW5VfVwQZOmC5fR3W21DZgFCyz8CA-cE">>, + Opts), + assert_item_read( + <<"ZQRHZhktk6dAtX9BlhO1teOtVlGHoyaWP25kAlhxrM4">>, + Opts), + % The T2pluNnaavL7-S2GkO_m3pASLUqMH_XQ9IiIhZKfySs can be deserialized so + % we'll verify that some of its items were index and match the version + % in the deserialized bundle. + assert_bundle_read( + <<"T2pluNnaavL7-S2GkO_m3pASLUqMH_XQ9IiIhZKfySs">>, + [ + {<<"54K1ehEIKZxGSusgZzgbGYaHfllwWQ09-S9-eRUJg5Y">>, <<"1">>}, + {<<"MgatoEjlO_YtdbxFi9Q7Hxbs0YQVcChddhSS7FsdeIg">>, <<"19">>}, + {<<"z-oKJfhMq5qoVFrljEfiBKgumaJmCWVxNJaavR5aPE8">>, <<"26">>} + ], + Opts + ), + % Non-ans104 data transaction + assert_item_read( + <<"bXEgFm4K2b5VD64skBNAlS3I__4qxlM3Sm4Z5IXj3h8">>, + Opts), + % Another bundle with an unsupported avro codec should be indexed even if + % it can't be deserialized. + ?assertException( + error, + {badmatch,<<"\"address\":\"0x124e64C9Ed898d4A8B130F6ACb76b33E21CD711c\"", _/binary>>}, + hb_store_arweave:read( + StoreOpts, + <<"kK67S13W_8jM9JUw2umVamo0zh9v1DeVxWrru2evNco">>) + ), + assert_bundle_read( + <<"c2ATDuTgwKCcHpAFZqSt13NC-tA4hdA7Aa2xBPuOzoE">>, + [ + {<<"OBKr-7UrmjxFD-h-qP-XLuvCgtyuO_IDpBMgIytvusA">>, <<"1">>} + ], + Opts + ), + ok. + +%% @doc Test a bundle header that fits in a single chunk. +small_bundle_header_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + TXID = <<"29TsnbqPQ_7rQ_r4KF5qRr995W1wBw_mTy6WEMy40aw">>, + {ok, #{ <<"body">> := OffsetBody }} = + hb_http:request( + #{ + <<"path">> => <<"/arweave/tx/", TXID/binary, "/offset">>, + <<"method">> => <<"GET">> + }, + Opts + ), + OffsetMsg = hb_json:decode(OffsetBody), + EndOffset = hb_util:int(maps:get(<<"offset">>, OffsetMsg)), + Size = hb_util:int(maps:get(<<"size">>, OffsetMsg)), + {ok, HeaderSize, BundleIndex} = + download_bundle_header(EndOffset, Size, Opts), + ?assertEqual(1704, length(BundleIndex)), + ?assertEqual(109088, HeaderSize), + ok. + +%% @doc Test a bundle header that doesn't fit in a single chunk. +large_bundle_header_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + TXID = <<"bnMTI7LglBGSaK5EdV_juh6GNtXLm0cd5lkd2q4nlT0">>, + {ok, #{ <<"body">> := OffsetBody }} = + hb_http:request( + #{ + <<"path">> => <<"/arweave/tx/", TXID/binary, "/offset">>, + <<"method">> => <<"GET">> + }, + Opts + ), + OffsetMsg = hb_json:decode(OffsetBody), + EndOffset = hb_util:int(maps:get(<<"offset">>, OffsetMsg)), + Size = hb_util:int(maps:get(<<"size">>, OffsetMsg)), + {ok, HeaderSize, BundleIndex} = + download_bundle_header(EndOffset, Size, Opts), + ?assertEqual(15000, length(BundleIndex)), + ?assertEqual(960032, HeaderSize), + ok. + +invalid_bundle_header_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + TXID = <<"cGNURX2IUt98VKVIeXSfYe6eulNwPEqijaQfvatzd_o">>, + {ok, #{ <<"body">> := OffsetBody }} = + hb_http:request( + #{ + <<"path">> => <<"/arweave/tx/", TXID/binary, "/offset">>, + <<"method">> => <<"GET">> + }, + Opts + ), + OffsetMsg = hb_json:decode(OffsetBody), + EndOffset = hb_util:int(maps:get(<<"offset">>, OffsetMsg)), + Size = hb_util:int(maps:get(<<"size">>, OffsetMsg)), + ?assertEqual({error, invalid_bundle_header}, + download_bundle_header(EndOffset, Size, Opts)), + ok. + +invalid_bundle_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + Block = 1307606, + {ok, Block} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=", (hb_util:bin(Block))/binary, "&to=", (hb_util:bin(Block))/binary>>, + Opts + ), + assert_bundle_read( + <<"8S12ZqO6-_icGkeuH8mFq6x9q7OIoXOqFRGH5k-wshg">>, + [ + {<<"gintz-t6q_kdeP_IBQVGnp9fgFzs-pPGGehXW-V7ZRk">>, <<"1">>} + ], + Opts + ), + % L1 TX with bundle tags, but data is not a valid bundle. The L1 TX + % should still be indexed. + assert_item_read(<<"cGNURX2IUt98VKVIeXSfYe6eulNwPEqijaQfvatzd_o">>, Opts), + ok. + +block_with_large_integer_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + Block = 633719, + {ok, Block} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=", (hb_util:bin(Block))/binary, "&to=", (hb_util:bin(Block))/binary>>, + Opts + ), + % This is bundle signed with a solana signature, so only the L1 TX can + % actually be loaded. + assert_item_read(<<"UXpcKTl6Mh34eTFSgny4NcIqoUjBcgYIcMqromcS6_Q">>, Opts), + ok. + +empty_block_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + Block = 1865858, + {ok, Block} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=", (hb_util:bin(Block))/binary, "&to=", (hb_util:bin(Block))/binary>>, + Opts + ), + ok. + +% ecdsa_no_data_test() -> +% {_TestStore, _StoreOpts, Opts} = setup_index_opts(), +% {ok, 1827904} = +% hb_ao:resolve( +% <<"~copycat@1.0/arweave&from=1827904&to=1827904">>, +% Opts +% ), +% assert_bundle_read( +% Opts, +% <<"VNhX_pSANk_8j0jZBR5bh_5jr-lkfbHDjtHd8FKqx7U">>, +% [ +% {<<"3xDKhrCQcPuBtcm1ipZS5C9gAfFYClgHuHOHAXGfchM">>, <<"1">>}, +% {<<"JantC8f89VE-RidArHnU9589gY5T37NDXnWpI7H_psc">>, <<"7">>} +% ] +% ), +% ok. + +% ecdsa_with_data_test() -> +% {_TestStore, _StoreOpts, Opts} = setup_index_opts(), +% Block = 1720431, +% fetch_and_process_block(Block, Block, Opts), +% {ok, Block} = +% hb_ao:resolve( +% <<"~copycat@1.0/arweave&from=", (hb_util:bin(Block))/binary, "&to=", (hb_util:bin(Block))/binary>>, +% Opts +% ), +% ok. + +%% @doc Disabled because the test takes ~30 seconds to run. +%% dev_arweave:get_tx_data_tag_exclude_data_test has some test coverage for +%% handling an L1 TX with a data tag. +tx_with_data_tag_test_disabled() -> + {_TestStore, StoreOpts, Opts} = setup_index_opts(), + Block = 1289677, + {ok, Block} = + hb_ao:resolve( + <<"~copycat@1.0/arweave&from=", (hb_util:bin(Block))/binary, "&to=", (hb_util:bin(Block))/binary>>, + Opts + ), + ?assertException( + error, + {badmatch, unsupported_tx_format}, + hb_store_arweave:read( + StoreOpts, + <<"ZwsFMXcwuakDuIhskokVHYiOPVcywDUAUTMLAJ72fgw">>) + ), + ?assertException( + error, + {badmatch, unsupported_tx_format}, + hb_store_arweave:read( + StoreOpts, + <<"-8ikoQo3KZkp9Hz_7kNdiUw3Vmn7J2DFslL_rBz0OBY">>) + ), + assert_bundle_read( + <<"0vvttUgGqSsMul8RKIPvBjlwTU5_0x68sZr4uJxgNF8">>, + [ + {<<"7U7GRZ8cXtKezSQmQmGpJar6haz-uink46i6evxzDCI">>, <<"1">>} + ], + Opts + ), + assert_item_read(<<"jI0A4BASHaUdCCsdv249BxDX6IlE0Ko391TuI6REATw">>, Opts), + ok. + +tx_with_no_data_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + Block = 1826700, + BlockBin = hb_util:bin(Block), + {ok, Block} = + hb_ao:resolve( + << + "~copycat@1.0/arweave&" + "from=", BlockBin/binary, "&" + "to=", BlockBin/binary, "&" + "mode=write" + >>, + Opts + ), + % Value transfer + Resolved = hb_ao:resolve(<<"XSQIgyDY1XUJNz79OeRHFaNpJZyaJSBd7XFsjWlZpNU">>, Opts), + ?assertMatch({ok, _}, Resolved), + {ok, StructuredTX} = Resolved, + ?assert(hb_message:verify(StructuredTX, all, Opts)), + ?assertEqual( + <<"XSQIgyDY1XUJNz79OeRHFaNpJZyaJSBd7XFsjWlZpNU">>, + hb_message:id(StructuredTX, signed, Opts) + ), + TX = hb_message:convert( + StructuredTX, + <<"tx@1.0">>, + <<"structured@1.0">>, + Opts), + ?assertEqual(0, TX#tx.data_size), + ?assertEqual(538493200840000, TX#tx.quantity), + % TX with non-ans104 data + assert_item_read( + <<"bpd0CzsoTr9-X83sPCx08uNzZC_EgFwb-P8lnHXSeRo">>, + Opts), + %% Now list the index using list mode + {ok, Response} = + hb_ao:resolve( + << + "~copycat@1.0/arweave&" + "from=", BlockBin/binary, "&" + "to=", BlockBin/binary, "&" + "mode=list" + >>, + Opts + ), + JSONBody = maps:get(<<"body">>, Response), + IndexData = hb_json:decode(JSONBody), + BlockInfo = maps:get(BlockBin, IndexData), + %% Verify indexed and not-indexed keys exist + ?assert(maps:is_key(<<"indexed">>, BlockInfo)), + ?assert(maps:is_key(<<"not-indexed">>, BlockInfo)), + ?assertEqual([ + <<"XSQIgyDY1XUJNz79OeRHFaNpJZyaJSBd7XFsjWlZpNU">>, + <<"bpd0CzsoTr9-X83sPCx08uNzZC_EgFwb-P8lnHXSeRo">>, + <<"n5rT8Y9Jet7SCnl_M77UrPNUFeud5iKazsn9Sr9gsWA">>, + <<"hvZlThf1B1tY4wMm4cETSsk8vIkOY3QZRmaBnQSzlVo">>, + <<"3urwRfVyWN35HE5RHGwOUk6CxkJ_lZOaMY7HZbeJyRs">> + ], maps:get(<<"indexed">>, BlockInfo)), + ?assertEqual([ ], maps:get(<<"not-indexed">>, BlockInfo)), + ok. + +non_string_tags_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + Res = resolve_tx_header(<<"752P6t4cOjMabYHqzC6hyLhxyo4YKZLblg7va_J21YE">>, Opts), + ?assertEqual(error, Res), + ok. + +list_index_test() -> + %% Test block: https://viewblock.io/arweave/block/1827942 + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + %% First index the block using write mode + Block = 1827942, + BlockBin = hb_util:bin(Block), + {ok, Block} = + hb_ao:resolve( + << + "~copycat@1.0/arweave&" + "from=", BlockBin/binary, "&" + "to=", BlockBin/binary, "&" + "mode=write" + >>, + Opts + ), + %% Now list the index using list mode + {ok, Response} = + hb_ao:resolve( + << + "~copycat@1.0/arweave&" + "from=", BlockBin/binary, "&" + "to=", BlockBin/binary, "&" + "mode=list" + >>, + Opts + ), + %% Verify content-type is application/json + ?assertEqual(<<"application/json">>, maps:get(<<"content-type">>, Response)), + ?event(debug_test, {response, Response}), + %% Decode the JSON body + JSONBody = maps:get(<<"body">>, Response), + IndexData = hb_json:decode(JSONBody), + %% Verify the block height is present as a key + ?assert(maps:is_key(BlockBin, IndexData)), + BlockInfo = maps:get(BlockBin, IndexData), + %% Verify indexed and not-indexed keys exist + ?assert(maps:is_key(<<"indexed">>, BlockInfo)), + ?assert(maps:is_key(<<"not-indexed">>, BlockInfo)), + ?assertEqual([ + <<"c2ATDuTgwKCcHpAFZqSt13NC-tA4hdA7Aa2xBPuOzoE">>, + <<"kK67S13W_8jM9JUw2umVamo0zh9v1DeVxWrru2evNco">>, + <<"bXEgFm4K2b5VD64skBNAlS3I__4qxlM3Sm4Z5IXj3h8">>, + <<"T2pluNnaavL7-S2GkO_m3pASLUqMH_XQ9IiIhZKfySs">>, + <<"WbRAQbeyjPHgopBKyi0PLeKWvYZr3rgZvQ7QY3ASJS4">> + ], maps:get(<<"indexed">>, BlockInfo)), + ?assertEqual([ ], maps:get(<<"not-indexed">>, BlockInfo)), + ok. + +auto_stop_on_indexed_block_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + IndexedBlock = 1827941, + Higher1 = IndexedBlock + 1, + Higher2 = IndexedBlock + 2, + {ok, IndexedBlock} = + hb_ao:resolve( + << + "~copycat@1.0/arweave&" + "from=", (hb_util:bin(IndexedBlock))/binary, "&" + "to=", (hb_util:bin(IndexedBlock))/binary, "&" + "mode=write" + >>, + Opts + ), + {ok, IndexedBlock} = + hb_ao:resolve( + << + "~copycat@1.0/arweave&" + "from=", (hb_util:bin(Higher2))/binary, "&" + "mode=write" + >>, + Opts + ), + ?assert(has_any_indexed_tx(Higher2, Opts)), + ?assert(has_any_indexed_tx(Higher1, Opts)), + ?assert(has_any_indexed_tx(IndexedBlock, Opts)), + ?assertNot(has_any_indexed_tx(IndexedBlock-1, Opts)), + ok. + +explicit_to_reindexes_all_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + IndexedBlock = 1827942, + LowerBlock = IndexedBlock - 1, + {ok, IndexedBlock} = + hb_ao:resolve( + << + "~copycat@1.0/arweave&" + "from=", (hb_util:bin(IndexedBlock))/binary, "&" + "to=", (hb_util:bin(IndexedBlock))/binary, "&" + "mode=write" + >>, + Opts + ), + ?assertNot(has_any_indexed_tx(LowerBlock, Opts)), + {ok, LowerBlock} = + hb_ao:resolve( + << + "~copycat@1.0/arweave&" + "from=", (hb_util:bin(IndexedBlock+1))/binary, "&" + "to=", (hb_util:bin(LowerBlock))/binary, "&" + "mode=write" + >>, + Opts + ), + ?assert(has_any_indexed_tx(LowerBlock, Opts)), + ok. + +%% @doc Manually write to the index to simulate a partially indexed block. +%% This should also trigger a stop when the `to` option is omitted. +auto_stop_partial_index_test() -> + {_TestStore, StoreOpts, Opts} = setup_index_opts(), + Block = 1826700, + HigherBlock = Block + 1, + NoIndexOpts = Opts#{ + arweave_index_ids => false, + arweave_index_blocks => true + }, + {ok, Block} = + hb_ao:resolve( + << + "~copycat@1.0/arweave&" + "from=", (hb_util:bin(Block))/binary, "&" + "to=", (hb_util:bin(Block))/binary, "&" + "mode=write" + >>, + NoIndexOpts + ), + {ok, BlockData} = dev_arweave_block_cache:read(Block, Opts), + TXIDs = hb_maps:get(<<"txs">>, BlockData, [], Opts), + ?assert(length(TXIDs) > 0), + [OneTXID | _] = TXIDs, + hb_store_arweave:write_offset(StoreOpts, OneTXID, <<"tx@1.0">>, 0, 0), + {ok, Block} = + hb_ao:resolve( + << + "~copycat@1.0/arweave&" + "from=", (hb_util:bin(HigherBlock))/binary, "&" + "mode=write" + >>, + Opts + ), + ?assert(has_any_indexed_tx(HigherBlock, Opts)), + ?assert(has_any_indexed_tx(Block, Opts)), + ?assertNot(has_any_indexed_tx(Block-1, Opts)), + ok. + +negative_parse_range_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + {ok, Tip} = + hb_ao:resolve( + <>, + Opts + ), + {NegativeFrom, UndefinedTo} = + parse_range(#{ <<"from">> => <<"-3">> }, Opts), + ?assertEqual(hb_util:int(Tip) - 3, NegativeFrom), + ?assertEqual(undefined, UndefinedTo), + {PositiveFrom, NegativeTo} = + parse_range(#{ <<"from">> => <<"10">>, <<"to">> => <<"-3">> }, Opts), + ?assertEqual(10, PositiveFrom), + ?assertEqual(hb_util:int(Tip) - 3, NegativeTo), + ok. + +negative_from_index_test() -> + {_TestStore, _StoreOpts, Opts} = setup_index_opts(), + Tip = latest_height(Opts), + StopBlock = 1827942, + StartBlock = 1827943, + OffsetFromTip = Tip - StartBlock, + ?assert(OffsetFromTip > 0), + NegativeFrom = <<"-", (hb_util:bin(OffsetFromTip))/binary>>, + {ok, StopBlock} = + hb_ao:resolve( + << + "~copycat@1.0/arweave&" + "from=", (hb_util:bin(StopBlock))/binary, "&" + "to=", (hb_util:bin(StopBlock))/binary, "&" + "mode=write" + >>, + Opts + ), + {ok, StopBlock} = + hb_ao:resolve( + << + "~copycat@1.0/arweave&" + "from=", NegativeFrom/binary, "&" + "mode=write" + >>, + Opts + ), + ?assert(has_any_indexed_tx(StartBlock, Opts)), + NextBlock = highest_contiguous_indexed_block(StopBlock, 50, Opts), + ?assertEqual(StartBlock, NextBlock), + assert_indexed_range(NextBlock, StopBlock, Opts), + ?assertNot(has_any_indexed_tx(StopBlock - 1, Opts)), + ?assertNot(has_any_indexed_tx(NextBlock + 1, Opts)), + ok. + +setup_index_opts() -> + TestStore = hb_test_utils:test_store(), + StoreOpts = #{ <<"index-store">> => [TestStore] }, + Store = [ + TestStore, + #{ + <<"store-module">> => hb_store_fs, + <<"name">> => <<"cache-mainnet">> + }, + #{ + <<"store-module">> => hb_store_arweave, + <<"name">> => <<"cache-arweave">>, + <<"index-store">> => [TestStore], + <<"arweave-node">> => <<"https://arweave.net">> + }, + #{ + <<"store-module">> => hb_store_gateway, + <<"subindex">> => [ + #{ + <<"name">> => <<"Data-Protocol">>, + <<"value">> => <<"ao">> + } + ], + <<"local-store">> => [TestStore] + }, + #{ + <<"store-module">> => hb_store_gateway, + <<"local-store">> => [TestStore] + } + ], + Opts = #{ + store => Store, + arweave_index_ids => true, + arweave_index_store => StoreOpts + }, + {TestStore, StoreOpts, Opts}. + +assert_bundle_read(BundleID, ExpectedItems, Opts) -> + ReadItems = + lists:map( + fun({ItemID, _Index}) -> + assert_item_read(ItemID, Opts) + end, + ExpectedItems + ), + Bundle = assert_item_read(BundleID, Opts), + lists:foreach( + fun({{_ItemID, Index}, Item}) -> + QueriedItem = hb_ao:get(Index, Bundle, Opts), + ?assertEqual(hb_maps:without(?AO_CORE_KEYS, Item), hb_maps:without(?AO_CORE_KEYS, QueriedItem)) + end, + lists:zip(ExpectedItems, ReadItems) + ), + ok. + +assert_item_read(ItemID, Opts) -> + ?event(debug_test, {resolving, {explicit, ItemID}}), + Resolved = hb_ao:resolve(ItemID, Opts), + ?assertMatch({ok, _}, Resolved, ItemID), + {ok, Item} = Resolved, + ?event(debug_test, {item, Item}), + ?assert(hb_message:verify(Item, all, Opts)), + ?assertEqual(ItemID, hb_message:id(Item, signed)), + Item. + +has_any_indexed_tx(Height, Opts) -> + case fetch_block_header(Height, Opts) of + {ok, Block} -> + TXIDs = hb_maps:get(<<"txs">>, Block, [], Opts), + lists:any(fun(TXID) -> is_tx_indexed(TXID, Opts) end, TXIDs); + {error, _} -> + false + end. + +highest_contiguous_indexed_block(StartBlock, MaxLookahead, Opts) -> + highest_contiguous_indexed_block( + StartBlock + 1, + StartBlock + MaxLookahead, + StartBlock, + Opts + ). + +highest_contiguous_indexed_block(Current, Max, LastIndexed, _Opts) + when Current > Max -> + LastIndexed; +highest_contiguous_indexed_block(Current, Max, LastIndexed, Opts) -> + case has_any_indexed_tx(Current, Opts) of + true -> + highest_contiguous_indexed_block(Current + 1, Max, Current, Opts); + false -> + LastIndexed + end. + +assert_indexed_range(From, To, _Opts) when From < To -> + ok; +assert_indexed_range(From, To, Opts) -> + ?assert(has_any_indexed_tx(From, Opts)), + assert_indexed_range(From - 1, To, Opts). diff --git a/src/dev_copycat_graphql.erl b/src/dev_copycat_graphql.erl index df8c9d47e..9f0752ec6 100644 --- a/src/dev_copycat_graphql.erl +++ b/src/dev_copycat_graphql.erl @@ -270,7 +270,7 @@ default_query(Parts) -> %% @doc Run node for testing run_test_node() -> Store = hb_test_utils:test_store(), - Opts = #{ store => Store, priv_wallet => hb:wallet() }, + Opts = #{ store => Store, priv_wallet => ar_wallet:new() }, Node = hb_http_server:start_node(Opts), {Node ,Opts}. %% @doc Basic test to test copycat device @@ -450,4 +450,4 @@ fetch_scheduler_location_test() -> ?assert(is_integer(Data)), ?assert(Data > 0), ?event({schedulers_indexed, Data}), - ok. + ok. \ No newline at end of file diff --git a/src/dev_hyperbuddy.erl b/src/dev_hyperbuddy.erl index 18c70abf5..0f6303412 100644 --- a/src/dev_hyperbuddy.erl +++ b/src/dev_hyperbuddy.erl @@ -1,16 +1,20 @@ %%% @doc A device that renders a REPL-like interface for AO-Core via HTML. -module(dev_hyperbuddy). --export([info/0, format/3, return_file/2, return_error/2]). +-export([info/1, format/3, return_file/2, return_error/2]). -export([metrics/3, events/3]). -export([throw/3]). -include_lib("include/hb.hrl"). -include_lib("eunit/include/eunit.hrl"). -%% @doc Export an explicit list of files via http. -info() -> +%% @doc Export an explicit list of files via http. Filenames added to the +%% `hyperbuddy_serve' key of the node message will be served as static files. +%% Each filename must point to a path relative to the HyperBEAM instance's +%% build subdirectory as follows: `priv/html/hyperbuddy@1.0'. +info(Opts) -> + ServedRoutes = hb_maps:get(hyperbuddy_serve, Opts, #{}, Opts), #{ default => fun serve/4, - routes => #{ + serve => ServedRoutes#{ % Default message viewer page: <<"index">> => <<"index.html">>, <<"bundle.js">> => <<"bundle.js">>, @@ -141,20 +145,18 @@ throw(_Msg, _Req, Opts) -> end. %% @doc Serve a file from the priv directory. Only serves files that are explicitly -%% listed in the `routes' field of the `info/0' return value. +%% listed in the `routes' field of the `info/1' return value. serve(<<"keys">>, M1, _M2, Opts) -> dev_message:keys(M1, Opts); serve(<<"set">>, M1, M2, Opts) -> dev_message:set(M1, M2, Opts); serve(Key, _, _, Opts) -> ?event({hyperbuddy_serving, Key}), - Routes = hb_maps:get(routes, info(), no_routes, Opts), - case hb_maps:get(Key, Routes, undefined, Opts) of - undefined -> {error, not_found}; - Filename -> return_file(Filename) + ServeRoutes = hb_maps:get(serve, info(Opts), #{}, Opts), + case hb_maps:find(Key, ServeRoutes, Opts) of + {ok, Filename} -> return_file(<<"hyperbuddy@1.0">>, Filename, #{}); + error -> {error, not_found} end. %% @doc Read a file from disk and serve it as a static HTML page. -return_file(Name) -> - return_file(<<"hyperbuddy@1.0">>, Name, #{}). return_file(Device, Name) -> return_file(Device, Name, #{}). return_file(Device, Name, Template) -> @@ -173,7 +175,9 @@ return_file(Device, Name, Template) -> <<".css">> -> <<"text/css">>; <<".png">> -> <<"image/png">>; <<".ico">> -> <<"image/x-icon">>; - <<".ttf">> -> <<"font/ttf">> + <<".ttf">> -> <<"font/ttf">>; + <<".json">> -> <<"application/json">>; + _ -> <<"text/plain">> end } }; @@ -197,6 +201,7 @@ apply_template(Body, Template) when is_map(Template) -> apply_template(Body, []) -> Body; apply_template(Body, [{Key, Value} | Rest]) -> + ?event(debug_apply_template, {key, Key, value, Value}), apply_template( re:replace( Body, @@ -221,4 +226,25 @@ return_templated_file_test() -> ?assertNotEqual( binary:match(Body, <<"This is an error message.">>), nomatch - ). \ No newline at end of file + ). + +return_custom_json_test() -> + ?assertMatch( + {ok, + #{ + <<"body">> := JSONBin, + <<"content-type">> := <<"application/json">> + } + } when byte_size(JSONBin) > 0, + hb_ao:resolve( + #{ + <<"device">> => <<"hyperbuddy@1.0">> + }, + <<"custom.json">>, + #{ + hyperbuddy_serve => #{ + <<"custom.json">> => <<"test.json">> + } + } + ) + ). diff --git a/src/dev_location.erl b/src/dev_location.erl index 433a8afab..42ba3061b 100644 --- a/src/dev_location.erl +++ b/src/dev_location.erl @@ -199,7 +199,7 @@ default_url(Opts) -> case hb_opts:get(location_url, not_found, Opts) of not_found -> Port = hb_util:bin(hb_opts:get(port, 8734, Opts)), - Host = hb_opts:get(host, <<"localhost">>, Opts), + Host = hb_opts:get(node_host, <<"localhost">>, Opts), Protocol = hb_opts:get(protocol, http1, Opts), ProtoStr = case Protocol of diff --git a/src/dev_manifest.erl b/src/dev_manifest.erl index 0a62e7bc9..395cc2492 100644 --- a/src/dev_manifest.erl +++ b/src/dev_manifest.erl @@ -1,7 +1,9 @@ %%% @doc An Arweave path manifest resolution device. Follows the v1 schema: %%% https://specs.ar.io/?tx=lXLd0OPwo-dJLB_Amz5jgIeDhiOkjXuM3-r0H_aiNj0 -module(dev_manifest). --export([index/3, info/0]). +-export([index/3, info/0, request/3]). +%%% Public test exports +-export([test_env_opts/0]). -include("include/hb.hrl"). -include_lib("eunit/include/eunit.hrl"). @@ -15,7 +17,7 @@ info() -> %% @doc Return the fallback index page when the manifest itself is requested. index(M1, M2, Opts) -> - ?event(debug_manifest, {index_request, {m1, M1}, {m2, M2}}), + ?event(debug_manifest, {index_request, {base, M1}, {request, M2}}, Opts), case route(<<"index">>, M1, M2, Opts) of {ok, Index} -> ?event({manifest_index_returned, Index}), @@ -62,36 +64,116 @@ route(ID, _, _, Opts) when ?IS_ID(ID) -> ?event({manifest_reading_id, ID}), hb_cache:read(ID, Opts); route(Key, M1, M2, Opts) -> - ?event(debug_manifest, {manifest_lookup, {key, Key}, {m1, M1}, {m2, M2}}), + ?event(debug_manifest, {manifest_lookup, {key, Key}, {m1, M1}, {m2, {explicit, M2}}}), {ok, Manifest} = manifest(M1, M2, Opts), - Res = hb_ao:get( - <<"paths/", Key/binary>>, - {as, <<"message@1.0">>, Manifest}, - Opts - ), - case Res of - not_found -> - %% Support materialized view in some JavaScript frameworks + {ok, Res} = maps:find(<<"paths">>, Manifest), + case maps:get(Key, Res, no_path_match) of + no_path_match -> + % Support materialized view in some JavaScript frameworks. case hb_opts:get(manifest_404, fallback, Opts) of error -> + ?event({manifest_404_error, {key, Key}}), {error, not_found}; fallback -> ?event({manifest_fallback, {key, Key}}), route(<<"index">>, M1, M2, Opts) end; + Result -> + ?event({manifest_lookup_success, {key, Key}, {result, Result}}), + try {ok, hb_cache:ensure_loaded(Result, Opts)} + catch _:_:_ -> {error, not_found} + end + end. + +%% @doc Implement the `on/request' hook for the `manifest@1.0' device, finding +%% requests for legacy (non-device-tagged) manifests and casting them to +%% `manifest@1.0' before execution. Allowing `/ID/path` style access for old data. +request(Base, Req, Opts) -> + ?event({on_req_manifest_detector, {base, Base}, {req, Req}}), + maybe + {ok, [PrimaryMsg|Rest]} ?= hb_maps:find(<<"body">>, Req, Opts), + {ok, Loaded} ?= load(PrimaryMsg, Opts), + ?event(debug_manifest, {loaded, Loaded}), + % Must handle three cases: + % 1. The maybe_cast is not a manifest, so we return the *loaded* request, + % such that the work to load it is not wasted. + % 2. The maybe_cast is a manifest, and there are no other elements of + % the path, so we add the `index' path and return. + % 3. The maybe_cast is a manifest, and there are other elements of + % the path, so we return the original request sequence with the first + % message replaced with the casted manifest. + case {Rest, maybe_cast_manifest(Loaded, Opts)} of + {_, ignored} -> + ?event( + debug_manifest, + {non_manifest_returning_loaded, {loaded, Loaded}, {rest, Rest}}), + {ok, Req#{ <<"body">> => [Loaded|Rest] }}; + {[], {ok, Casted}} -> + ?event(debug_manifest, {manifest_returning_index, {req, Req}}), + {ok, Req#{ <<"body">> => [Casted, #{<<"path">> => <<"index">>}] }}; + {_, {ok, Casted}} -> + ?event(debug_manifest, {manifest_returning_subpath, {req, Req}}), + {ok, Req#{ <<"body">> => [Casted|Rest] }} + end + else + {error, not_found} -> + ?event(debug_manifest, {not_found_on_load, {req, Req}}), + { + error, + #{ + <<"status">> => 404, + <<"body">> => <<"Not Found">> + } + }; + Error -> + ?event(debug_manifest, {request_ignored, {unexpected, Error}}), + % On other errors, we return the original request. + {ok, Req} + end. + +%% @doc Cast a message to `manifest@1.0` if it has the correct content-type but +%% no other device is specified. +load(Msg, _Opts) when is_map(Msg) -> {ok, Msg}; +load(List, _Opts) when is_list(List) -> skip; +load({as, _, _}, _Opts) -> skip; +load(ID, Opts) when ?IS_ID(ID) -> + case hb_cache:read(ID, Opts) of + {ok, Msg} -> load(Msg, Opts); + _ -> + ?event(debug_maybe_cast_manifest, {message_load_failed, {id, ID}}), + {error, not_found} + end; +load(Msg, Opts) when ?IS_LINK(Msg) -> + try load(hb_cache:ensure_loaded(Msg, Opts), Opts) + catch + _ -> + ?event(debug_maybe_cast_manifest, {message_load_failed, {link, Msg}}), + {error, not_found} + end. + +maybe_cast_manifest(Msg, Opts) -> + case hb_maps:find(<<"device">>, Msg, Opts) of + {ok, X} when X == <<"manifest@1.0">> -> {ok, Msg}; _ -> - {ok, Res} + case hb_maps:find(<<"content-type">>, Msg, Opts) of + {ok, <<"application/x.arweave-manifest", _/binary>>} -> + ?event(debug_maybe_cast_manifest, {manifest_casting, {msg, Msg}}), + {ok, {as, <<"manifest@1.0">>, Msg}}; + _IgnoredContentType -> + ignored + end end. %% @doc Find and deserialize a manifest from the given base, returning a %% message with the `~manifest@1.0' device. manifest(Base, _Req, Opts) -> JSON = - hb_ao:get_first( + hb_maps:get_first( [ - {{as, <<"message@1.0">>, Base}, [<<"data">>]}, - {{as, <<"message@1.0">>, Base}, [<<"body">>]} + {Base, <<"data">>}, + {Base, <<"body">>} ], + not_found, Opts ), FlatManifest = #{ <<"paths">> := FlatPaths } = hb_json:decode(JSON), @@ -102,7 +184,7 @@ manifest(Base, _Req, Opts) -> %% @doc Generate a nested message of links to content from a parsed (and %% structured) manifest. -linkify(#{ <<"id">> := ID }, Opts) -> +linkify(#{ <<"id">> := ID }, Opts) when is_binary(ID) -> LinkOptsBase = (maps:with([store], Opts))#{ scope => [local, remote]}, {link, ID, LinkOptsBase#{ <<"type">> => <<"link">>, <<"lazy">> => false }}; linkify(Manifest, Opts) when is_map(Manifest) -> @@ -122,7 +204,14 @@ linkify(Manifest, _Opts) -> %%% Tests resolve_test() -> - Opts = #{ store => hb_opts:get(store, no_viable_store, #{}) }, + Opts = #{ + store => hb_opts:get(store, no_viable_store, #{}), + on => #{ + <<"request">> => #{ + <<"device">> => <<"manifest@1.0">> + } + } + }, IndexPage = #{ <<"content-type">> => <<"text/html">>, <<"body">> => <<"Page 1">> @@ -146,7 +235,13 @@ resolve_test() -> <<"device">> => <<"manifest@1.0">>, <<"body">> => JSON }, + LegacyManifestWithCT = + #{ + <<"content-type">> => <<"application/x.arweave-manifest+json">>, + <<"body">> => JSON + }, {ok, ManifestID} = hb_cache:write(ManifestMsg, Opts), + {ok, LegacyManifestID} = hb_cache:write(LegacyManifestWithCT, Opts), ?event({manifest_id, ManifestID}), Node = hb_http_server:start_node(Opts), ?assertMatch( @@ -156,6 +251,16 @@ resolve_test() -> ?assertMatch( {ok, #{ <<"body">> := <<"Page 2">>}}, hb_http:get(Node, << ManifestID/binary, "/nested/page2" >>, Opts)), + % Making the same requests to a node with the `request' hook enabled should + % yield the same results. + ?event({legacy_manifest_id, LegacyManifestID}), + ?assertMatch( + {ok, #{ <<"body">> := <<"Page 1">> }}, + hb_http:get(Node, << LegacyManifestID/binary, "/index" >>, Opts) + ), + ?assertMatch( + {ok, #{ <<"body">> := <<"Page 2">>}}, + hb_http:get(Node, << LegacyManifestID/binary, "/nested/page2" >>, Opts)), ok. manifest_default_fallback_test() -> @@ -202,3 +307,113 @@ create_generic_manifest(Opts) -> <<"body">> => JSON }, hb_cache:write(ManifestMsg, Opts). + +%% @doc Download the manifest raw data. +%% NOTE: This test requests data to arweave node +manifest_download_via_raw_endpoint_test_ignore() -> + Opts = #{ + arweave_index_ids => true, + store => [ + #{ + <<"store-module">> => hb_store_arweave, + <<"name">> => <<"arweave-store">>, + <<"arweave-node">> => <<"https://arweave.net">>, + <<"index-store">> => [hb_test_utils:test_store()] + } + ]}, + Node = hb_http_server:start_node(Opts), + %% Force index the block that includes the manifest transaction + _ = hb_http:get( + Node, + #{ + <<"path">> => + <<"~copycat@1.0/arweave/?from+integer=1809222&to+integer=1809222">> + }, + #{} + ), + ?assertMatch( + {ok, + #{ + <<"arweave-id">> := <<"42jky7O3rzKkMOfHBXgK-304YjulzEYqHc9qyjT3efA">>, + <<"content-length">> := 5868 + } + }, + hb_http:get( + Node, + #{<<"path">> => <<"~arweave@2.9/raw=42jky7O3rzKkMOfHBXgK-304YjulzEYqHc9qyjT3efA">>}, + #{} + ) + ). + +%% @doc Accessing `/TXID` of a manifest transaction should access the index key. +manifest_inner_redirect_test() -> + Opts = test_env_opts(), + Node = hb_http_server:start_node(Opts), + %% Request manifest to node. + ?assertMatch( + {ok, #{<<"commitments">> := #{<<"Tqh6oIS2CLUaDY11YUENlvvHmDim1q16pMyXAeSKsFM">> := _ }}}, + hb_http:get( + Node, + #{<<"path">> => <<"/42jky7O3rzKkMOfHBXgK-304YjulzEYqHc9qyjT3efA">>}, + Opts + ) + ). + +%% @doc Accessing `/TXID/assets/ArticleBlock-Dtwjc54T.js` should return valid message. +access_key_path_in_manifest_test() -> + Opts = test_env_opts(), + Node = hb_http_server:start_node(Opts), + ?assertMatch( + {ok, #{<<"commitments">> := #{<<"oLnQY-EgiYRg9XyO7yZ_mC0Ehy7TFR3UiDhFvxcohC4">> := _ }}}, + hb_http:get( + Node, + #{<<"path">> => <<"/42jky7O3rzKkMOfHBXgK-304YjulzEYqHc9qyjT3efA/assets/ArticleBlock-Dtwjc54T.js">>}, + Opts + ) + ). + +%% This works with `not_found.js` but doesn't follow the logic if under a +%% folder structure, like `assets/not_found.js . +manifest_should_fallback_on_not_found_path_test() -> + Opts = test_env_opts(), + Node = hb_http_server:start_node(Opts), + ?assertMatch( + {ok, #{<<"commitments">> := #{<<"Tqh6oIS2CLUaDY11YUENlvvHmDim1q16pMyXAeSKsFM">> := _ }}}, + hb_http:get( + Node, + #{<<"path">> => <<"/42jky7O3rzKkMOfHBXgK-304YjulzEYqHc9qyjT3efA/x.js">>}, + Opts + ) + ). + +%% @doc Returns `Opts' with the test manifest fixture flow used by `dev_b32_name'. +test_env_opts() -> + TempStore = hb_test_utils:test_store(), + BaseOpts = + #{ + store => + [ + TempStore + #{<<"store-module">> => hb_store_gateway} + ] + }, + lists:foreach( + fun(Ref) -> + hb_test_utils:preload( + BaseOpts, + <<"test/arbundles.js/ans-104-manifest-", Ref/binary>> + ) + end, + [ + <<"42jky7O3rzKkMOfHBXgK-304YjulzEYqHc9qyjT3efA.bin">>, + <<"index-Tqh6oIS2CLUaDY11YUENlvvHmDim1q16pMyXAeSKsFM.bin">>, + <<"item-oLnQY-EgiYRg9XyO7yZ_mC0Ehy7TFR3UiDhFvxcohC4.bin">> + ] + ), + BaseOpts#{ + on => + #{ + <<"request">> => + [#{<<"device">> => <<"manifest@1.0">>}] + } + }. diff --git a/src/dev_message.erl b/src/dev_message.erl index 76059fab6..857845e8e 100644 --- a/src/dev_message.erl +++ b/src/dev_message.erl @@ -109,7 +109,7 @@ id(RawBase, Req, NodeOpts) -> case hb_maps:keys(Commitments) of [] -> % If there are no commitments, we must (re)calculate the ID. - ?event(ids, regenerating_id), + ?event(debug_id, regenerating_id), calculate_id(hb_maps:without([<<"commitments">>], Base), Req, IDOpts); IDs -> % Accumulate the relevant IDs into a single value. This is performed @@ -124,7 +124,7 @@ id(RawBase, Req, NodeOpts) -> % accumulation function starts with a buffer of zero encoded as a % 256-bit binary. Subsequently, a single ID on its own 'accumulates' % to itself. - ?event(ids, returning_existing_ids), + ?event(debug_id, returning_existing_ids), {ok, hb_util:human_id( hb_crypto:accumulate( @@ -635,7 +635,7 @@ set(Base, NewValuesMsg, Opts) -> ), % Base message with keys-to-unset removed BaseValues = hb_maps:without(UnsetKeys, Base, Opts), - ?event(message_set, + ?event(debug_message_set, {performing_set, {conflicting_keys, ConflictingKeys}, {keys_to_unset, UnsetKeys}, @@ -949,9 +949,18 @@ set_ignore_undefined_test() -> ?assertEqual(#{ <<"test-key">> => <<"Value1">> }, hb_private:reset(hb_util:ok(set(Base, Req, #{ hashpath => ignore })))). -verify_test() -> +verify_test_() -> + {foreach, fun () -> ok end, fun (_) -> ok end, [ + {"RSA", fun () -> test_verify(?RSA_KEY_TYPE) end}, + {"EDDSA", fun () -> test_verify(?EDDSA_KEY_TYPE) end}, + {"Solana", fun () -> test_verify(?SOLANA_KEY_TYPE) end}, + {"Ethereum", fun () -> test_verify(?ETHEREUM_KEY_TYPE) end} + ]}. + +test_verify(KeyType) -> Unsigned = #{ <<"a">> => <<"b">> }, - Signed = hb_message:commit(Unsigned, #{ priv_wallet => hb:wallet() }), + Wallet = ar_wallet:new(KeyType), + Signed = hb_message:commit(Unsigned, #{ priv_wallet => Wallet }), ?event({signed, Signed}), BadSigned = Signed#{ <<"a">> => <<"c">> }, ?event({bad_signed, BadSigned}), @@ -970,4 +979,4 @@ verify_test() -> #{ <<"path">> => <<"verify">>, <<"body">> => Signed }, #{ hashpath => ignore } ) - ). + ). \ No newline at end of file diff --git a/src/dev_meta.erl b/src/dev_meta.erl index 522e614dc..0dda4185e 100644 --- a/src/dev_meta.erl +++ b/src/dev_meta.erl @@ -224,6 +224,18 @@ handle_resolve(Req, Msgs, NodeMsg) -> ), LoadedMsgs = hb_cache:ensure_all_loaded(Msgs, NodeMsg), case resolve_hook(<<"request">>, Req, LoadedMsgs, NodeMsg) of + {ok, []} -> + {ok, + #{ + <<"status">> => 307, + <<"body">> => <<"Redirecting to default request.">>, + <<"location">> => hb_opts:get( + default_request, + <<"/~hyperbuddy@1.0/index">>, + NodeMsg + ) + } + }; {ok, PreProcessedMsg} -> ?event(http_request, {request_after_preprocessing, PreProcessedMsg}), AfterPreprocOpts = hb_http_server:get_opts(NodeMsg), @@ -316,6 +328,8 @@ embed_status({ErlStatus, Res}, NodeMsg) -> %% 1. The status code from the message. %% 2. The HTTP representation of the status code. %% 3. The default status code. +status_code({error, {no_viable_responses, _AllResponses}}, NodeMsg) -> + status_code(no_viable_responses, NodeMsg); status_code({ErlStatus, Msg}, NodeMsg) -> case message_to_status(Msg, NodeMsg) of default -> status_code(ErlStatus, NodeMsg); @@ -326,6 +340,7 @@ status_code(error, _NodeMsg) -> 400; status_code(created, _NodeMsg) -> 201; status_code(not_found, _NodeMsg) -> 404; status_code(client_error, _NodeMsg) -> 400; +status_code(no_viable_responses, _NodeMsg) -> 400; status_code(failure, _NodeMsg) -> 500; status_code(unavailable, _NodeMsg) -> 503; status_code(unauthorized, _NodeMsg) -> 401; diff --git a/src/dev_name.erl b/src/dev_name.erl index 04d018550..1331e3f2d 100644 --- a/src/dev_name.erl +++ b/src/dev_name.erl @@ -4,10 +4,14 @@ %%% match the key against each resolver in turn, and return the value of the %%% first resolver that matches. -module(dev_name). --export([info/1]). +-export([info/1, request/3]). +%%% Public helpers. +-export([test_arns_opts/0]). -include("include/hb.hrl"). -include_lib("eunit/include/eunit.hrl"). +%%% Core functionality. + %% @doc Configure the `default' key to proxy to the `resolver/4' function. %% Exclude the `keys' and `set' keys from being processed by this device, as %% these are needed to modify the base message itself. @@ -32,33 +36,146 @@ resolve(Key, _, Req, Opts) -> false -> {ok, Resolved}; true -> - hb_cache:read(Resolved, Opts) + maybe_load_resolved(Resolved, Opts) end; - not_found -> - not_found + not_found -> not_found end. +%% @doc Load a resolved name target if it is a cache reference, otherwise +%% return the resolved value directly. +maybe_load_resolved(Resolved, Opts) when ?IS_ID(Resolved) -> + hb_cache:read(Resolved, Opts); +maybe_load_resolved(Resolved, Opts) when ?IS_LINK(Resolved) -> + {ok, hb_cache:ensure_loaded(Resolved, Opts)}; +maybe_load_resolved(Resolved, _Opts) -> + {ok, Resolved}. + %% @doc Find the first resolver that matches the key and return its value. match_resolver(_Key, [], _Opts) -> not_found; match_resolver(Key, [Resolver | Resolvers], Opts) -> - case execute_resolver(Key, Resolver, Opts) of + case catch execute_resolver(Key, Resolver, Opts) of {ok, Value} -> - ?event({resolver_found, {key, Key}, {value, Value}}), + ?event({resolver_found, {key, Key}, {value, {string, Value}}}), {ok, Value}; _ -> match_resolver(Key, Resolvers, Opts) end. %% @doc Execute a resolver with the given key and return its value. -execute_resolver(Key, Resolver, Opts) -> +execute_resolver(Key, Path, Opts) when is_binary(Path) -> + hb_ao:resolve( + <>, + Opts + ); +execute_resolver(Key, Resolver, Opts) when is_map(Resolver) -> ?event({executing, {key, Key}, {resolver, Resolver}}), hb_ao:resolve( Resolver, - #{ <<"path">> => <<"lookup">>, <<"key">> => Key }, + Key, Opts ). +%%% `on/request` hook functionality. + +%% @doc Implements an `on/request' compatible hook that resolves names given in +%% the `host` key to their corresponding ID and prepends it to the execution path. +request(HookMsg, HookReq, Opts) -> + ?event({request_hook, {hook_msg, HookMsg}, {hook_req, HookReq}}), + maybe + {ok, Req} ?= hb_maps:find(<<"request">>, HookReq, Opts), + {ok, Host} ?= hb_maps:find(<<"host">>, Req, Opts), + {ok, Name} ?= name_from_host(Host, hb_opts:get(node_host, no_host, Opts)), + {ok, ResolvedMsg} ?= resolve(Name, HookMsg, HookReq, Opts), + ModReq = + maybe_append_named_message( + ResolvedMsg, + hb_util:ok(hb_maps:find(<<"body">>, HookReq, Opts)), + Opts + ), + ?event( + {request_with_prepended_path, + {name, Name}, + {full_host, Host}, + {resolved_msg, ResolvedMsg}, + {to_execute, ModReq} + } + ), + {ok, #{ <<"body">> => ModReq }} + else + {skip, Reason} -> + ?event({name_resolution_skipped, {reason, Reason}}), + {ok, HookReq}; + Other -> + case maps:get(<<"body">>, HookReq, []) of + [] -> + ?event({request_hook_404, root_path}), + % If no path is provided, we should return 404 if we could + % not resolve the name component. + {error, #{<<"status">> => 404, <<"body">> => <<"Not Found">>}}; + _ -> + ?event({request_hook_skip, {cause, Other}, {hook_req, HookReq}}), + {ok, HookReq} + end + end. + +%% @doc After finding a hit for a named message, we should ensure that it is the +%% base message for the evaluation. If it is already present in the request, +%% however, we should not add it twice. Instead, we must add the version that +%% is loaded (if applicable). +%% +%% Eg: +%% base32IDA.hyperbeam/ -> [IDA] +%% base32IDA.hyperbeam/base64urlIDA/xyz -> [IDA, xyz] +%% base32IDA.hyperbeam/base64urlIDB/xyz -> [IDA, IDB, xyz] +maybe_append_named_message(ResolvedMsg, [], _Opts) -> [ResolvedMsg]; +maybe_append_named_message(ResolvedMsg, OldReq = [OldBase|ReqMsgsRest], Opts) -> + case permissive_id(OldBase, Opts) == permissive_id(ResolvedMsg, Opts) of + true when is_map(OldBase) or is_list(OldBase) -> OldReq; + true -> [ResolvedMsg|ReqMsgsRest]; + false -> + case is_map(OldBase) andalso hb_maps:get(<<"path">>, OldBase, not_found, Opts) of + not_found -> + ?event( + {skipping_old_base, + {old_base, OldBase}, + {resolved_msg, ResolvedMsg} + } + ), + [ResolvedMsg|ReqMsgsRest]; + _ -> [ResolvedMsg, OldBase|ReqMsgsRest] + end + end. + +%% @doc Takes a message or resolution request (`as` or `resolve`) -- whether in +%% the form of an ID, link, or loaded map -- and returns its ID. +permissive_id(ID, _Opts) when ?IS_ID(ID) -> ID; +permissive_id({link, ID, _LinkOpts}, _Opts) -> ID; +permissive_id({as, _Device, Msg}, Opts) -> permissive_id(Msg, Opts); +permissive_id(Msg, Opts) when is_map(Msg) -> hb_message:id(Msg, signed, Opts). + +%% @doc Takes a request-given host and the host value in the node message and +%% returns only the name component of the host, if it is present. If no name is +%% present, an empty binary is returned. +name_from_host(Host, no_host) -> + % Handle the case where no host key is present in the node message. This + % logic is also used when parsing of the host key from the node message + % fails, or the node message host is not found in the client provided value + % (node claims to be `x.com`, but the user request is for `abc.y.com`). + case binary:split(Host, <<".">>, [global, trim_all]) of + [_Host] -> {skip, <<"No subdomain found in `Host: ", Host/binary, "`.">>}; + [Name|_] -> {ok, Name} + end; +name_from_host(ReqHost, RawNodeHost) -> + case uri_string:parse(RawNodeHost) of + #{ host := NodeHostName } -> + case binary:split(ReqHost, <<".", NodeHostName/binary>>) of + [Subdomain, <<>>] -> {ok, Subdomain}; + _ -> name_from_host(ReqHost, no_host) + end; + _ -> name_from_host(ReqHost, no_host) + end. + %%% Tests. no_resolvers_test() -> @@ -67,21 +184,21 @@ no_resolvers_test() -> resolve(<<"hello">>, #{}, #{}, #{ only => local }) ). -message_lookup_device_resolver(Msg) -> +device_resolver(Msg) -> #{ <<"device">> => #{ - <<"lookup">> => fun(_, Req, Opts) -> - Key = hb_ao:get(<<"key">>, Req, Opts), - ?event({test_resolver_executing, {key, Key}, {req, Req}, {msg, Msg}}), - case maps:get(Key, Msg, not_found) of - not_found -> - ?event({test_resolver_not_found, {key, Key}, {msg, Msg}}), - {error, not_found}; - Value -> - ?event({test_resolver_found, {key, Key}, {value, Value}}), - {ok, Value} + info => + fun() -> + #{ + default => + fun(Key, _, _Req, _Opts) -> + case maps:get(Key, Msg, not_found) of + not_found -> {error, not_found}; + Value -> {ok, Value} + end + end + } end - end } }. @@ -94,7 +211,23 @@ single_resolver_test() -> #{ <<"load">> => false }, #{ name_resolvers => [ - message_lookup_device_resolver( + #{<<"hello">> => <<"world">>} + ] + } + ) + ). + +%% @doc Lookup a name in a message and return it. +message_lookup_test() -> + ?assertEqual( + {ok, <<"world">>}, + resolve( + <<"hello">>, + #{}, + #{ <<"load">> => false }, + #{ + name_resolvers => [ + device_resolver( #{<<"hello">> => <<"world">>} ) ] @@ -111,10 +244,10 @@ multiple_resolvers_test() -> #{ <<"load">> => false }, #{ name_resolvers => [ - message_lookup_device_resolver( + device_resolver( #{<<"irrelevant">> => <<"world">>} ), - message_lookup_device_resolver( + device_resolver( #{<<"hello">> => <<"bigger-world">>} ) ] @@ -141,9 +274,96 @@ load_and_execute_test() -> ], #{ name_resolvers => [ - message_lookup_device_resolver(#{ <<"irrelevant">> => ID }), - message_lookup_device_resolver(#{ TestKey => ID }) + device_resolver(#{ <<"irrelevant">> => ID }), + device_resolver(#{ TestKey => ID }) ] } ) - ). \ No newline at end of file + ). + +%% @doc Return an `Opts` for an environment with the default ARNS name export +%% and a temporary store for the test. +test_arns_opts() -> + JSONNames = <<"G_gb7SAgogHMtmqycwaHaC6uC-CZ3akACdFv5PUaEE8">>, + Path = <>, + TempStore = hb_test_utils:test_store(), + #{ + store => + [ + TempStore, + #{ + <<"store-module">> => hb_store_gateway, + <<"local-store">> => [TempStore] + } + ], + name_resolvers => [Path], + on => #{ + <<"request">> => #{ + <<"device">> => <<"name@1.0">> + } + } + }. + +%% @doc Names from JSON test. +arns_json_snapshot_test() -> + Opts = test_arns_opts(), + ?assertMatch( + {ok, <<"text/html">>}, + hb_ao:resolve_many( + [ + #{ <<"device">> => <<"name@1.0">> }, + #{ <<"path">> => <<"001_permabytes">>, <<"load">> => true }, + <<"content-type">> + ], + Opts + ) + ). + +arns_host_resolution_test() -> + Opts = test_arns_opts(), + Node = hb_http_server:start_node(Opts), + ?assertMatch( + {ok, <<"text/html">>}, + hb_http:get( + Node, + #{ + <<"path">> => <<"content-type">>, + <<"host">> => <<"001_permabytes.localhost">> + }, + Opts + ) + ). + +arns_host_resolution_with_node_host_test() -> + Opts = (test_arns_opts())#{ node_host => <<"http://localhost">>, port => 0 }, + Node = hb_http_server:start_node(Opts), + ?assertMatch( + {ok, <<"text/html">>}, + hb_http:get( + Node, + #{ + <<"path">> => <<"content-type">>, + <<"host">> => <<"001_permabytes.localhost">> + }, + Opts + ) + ). + +localhost_root_request_skips_name_resolution_test() -> + Opts = (test_arns_opts())#{ port => 0 }, + Node = hb_http_server:start_node(Opts), + ?assertMatch( + {ok, + #{ + <<"status">> := 307, + <<"location">> := <<"/~hyperbuddy@1.0/index">> + }}, + hb_http:get( + Node, + #{ + <<"path">> => <<"/">>, + <<"host">> => <<"localhost">> + }, + Opts + ) + ). diff --git a/src/dev_process.erl b/src/dev_process.erl index bec7cb058..12a124a14 100644 --- a/src/dev_process.erl +++ b/src/dev_process.erl @@ -461,7 +461,7 @@ store_result(ForceSnapshot, ProcID, Slot, Res, Req, Opts) -> false -> Res; true -> ?event( - compute_debug, + debug_compute, {snapshotting, {proc_id, ProcID}, {slot, Slot}}, Opts ), diff --git a/src/dev_process_worker.erl b/src/dev_process_worker.erl index e6bd9ab90..4789f1263 100644 --- a/src/dev_process_worker.erl +++ b/src/dev_process_worker.erl @@ -106,13 +106,13 @@ await(Worker, GroupName, Base, Req, Opts) -> receive {resolved, _, GroupName, {slot, RecvdSlot}, Res} when RecvdSlot == TargetSlot orelse TargetSlot == any -> - ?event(compute_debug, {notified_of_resolution, + ?event(debug_compute, {notified_of_resolution, {target, TargetSlot}, {group, GroupName} }), Res; {resolved, _, GroupName, {slot, RecvdSlot}, _Res} -> - ?event(compute_debug, {waiting_again, + ?event(debug_compute, {waiting_again, {target, TargetSlot}, {recvd, RecvdSlot}, {worker, Worker}, @@ -120,7 +120,7 @@ await(Worker, GroupName, Base, Req, Opts) -> }), await(Worker, GroupName, Base, Req, Opts); {'DOWN', _R, process, Worker, _Reason} -> - ?event(compute_debug, + ?event(debug_compute, {leader_died, {group, GroupName}, {leader, Worker}, diff --git a/src/dev_push.erl b/src/dev_push.erl index 6ebfa1d63..184718ae0 100644 --- a/src/dev_push.erl +++ b/src/dev_push.erl @@ -363,7 +363,7 @@ push_downstream_remote(TargetID, NextSlotOnProc, Origin, RawOpts) -> {ok, NewOpts} -> NewOpts; _ -> RawOpts end, - Self = hb_opts:get(host, host_not_specified, Opts), + Self = hb_opts:get(node_host, host_not_specified, Opts), ?event(remote_push, {push_downstream_remote, {target, TargetID}, @@ -770,182 +770,201 @@ parse_redirect(Location, Opts) -> %%% Tests -full_push_test_() -> - {timeout, 30, fun() -> - dev_process_test_vectors:init(), - Opts = #{ - process_async_cache => false, - priv_wallet => hb:wallet(), - cache_control => <<"always">> - }, - Base = dev_process_test_vectors:aos_process(Opts), - hb_cache:write(Base, Opts), - {ok, SchedInit} = - hb_ao:resolve(Base, #{ - <<"method">> => <<"POST">>, - <<"path">> => <<"schedule">>, - <<"body">> => Base - }, - Opts - ), - ?event({test_setup, {base, Base}, {sched_init, SchedInit}}), - Script = ping_pong_script(2), - ?event({script, Script}), - {ok, Req} = dev_process_test_vectors:schedule_aos_call(Base, Script, Opts), - ?event({msg_sched_result, Req}), - {ok, StartingMsgSlot} = - hb_ao:resolve(Req, #{ <<"path">> => <<"slot">> }, Opts), - ?event({starting_msg_slot, StartingMsgSlot}), - Res = - #{ - <<"path">> => <<"push">>, - <<"slot">> => StartingMsgSlot - }, - {ok, _} = hb_ao:resolve(Base, Res, Opts), - ?assertEqual( - {ok, <<"Done.">>}, - hb_ao:resolve(Base, <<"now/results/data">>, Opts) - ) - end}. +dev_push_test_() -> + {inparallel, push_test_cases()}. -push_as_identity_test_() -> - {timeout, 90, fun() -> - dev_process_test_vectors:init(), - % Create a new identity for the scheduler. - DefaultWallet = hb:wallet(), - SchedulingWallet = ar_wallet:new(), - SchedulingID = hb_util:human_id(SchedulingWallet), - ComputeWallet = ar_wallet:new(), - ComputeID = hb_util:human_id(ComputeWallet), - Opts = #{ - priv_wallet => DefaultWallet, - cache_control => <<"always">>, - identities => #{ - SchedulingID => #{ - priv_wallet => SchedulingWallet, - store => [hb_test_utils:test_store()] - }, - ComputeID => #{ - priv_wallet => ComputeWallet - } - } +push_test_cases() -> + [ + {timeout, 30, fun test_full_push/0}, + {timeout, 90, fun test_push_as_identity/0}, + {timeout, 30, fun test_multi_process_push/0}, + {timeout, 30, fun test_push_prompts_encoding_change/0}, + {timeout, 60, fun test_remote_routed_push/0}, + {timeout, 30, fun test_oracle_push/0} + ] + ++ + genesis_wasm_tests(). + +-ifdef(ENABLE_GENESIS_WASM). +genesis_wasm_tests() -> [{timeout, 30, fun test_nested_push_prompts_encoding_change/0}]. +-else. +genesis_wasm_tests() -> []. +-endif. + +test_full_push() -> + dev_process_test_vectors:init(), + Opts = #{ + process_async_cache => false, + priv_wallet => hb:wallet(), + cache_control => <<"always">>, + store => [hb_test_utils:test_store(hb_store_lmdb)] + }, + Base = dev_process_test_vectors:aos_process(Opts), + hb_cache:write(Base, Opts), + {ok, SchedInit} = + hb_ao:resolve(Base, #{ + <<"method">> => <<"POST">>, + <<"path">> => <<"schedule">>, + <<"body">> => Base }, - % Create a new test AOS process, which will use the given identities as - % its authority and scheduler. - Base = - dev_process_test_vectors:aos_process( - Opts#{ - authority => ComputeID, - scheduler => [SchedulingID, ComputeID] - } - ), - ?event({base, Base}), - % Perform the remainder of the test as with `full_push_test_/0'. - hb_cache:write(Base, Opts), - {ok, SchedInit} = - hb_ao:resolve(Base, #{ - <<"method">> => <<"POST">>, - <<"path">> => <<"schedule">>, - <<"body">> => Base - }, - Opts - ), - ?event({test_setup, {base, Base}, {sched_init, SchedInit}}), - Script = ping_pong_script(2), - ?event({script, Script}), - {ok, Req} = dev_process_test_vectors:schedule_aos_call(Base, Script), - ?event(push, {msg_sched_result, Req}), - {ok, StartingMsgSlot} = - hb_ao:resolve(Req, #{ <<"path">> => <<"slot">> }, Opts), - ?event({starting_msg_slot, StartingMsgSlot}), - Res = - #{ - <<"path">> => <<"push">>, - <<"slot">> => StartingMsgSlot + Opts + ), + ?event({test_setup, {base, Base}, {sched_init, SchedInit}}), + Script = ping_pong_script(2), + ?event({script, Script}), + {ok, Req} = dev_process_test_vectors:schedule_aos_call(Base, Script, Opts), + ?event({msg_sched_result, Req}), + {ok, StartingMsgSlot} = + hb_ao:resolve(Req, #{ <<"path">> => <<"slot">> }, Opts), + ?event({starting_msg_slot, StartingMsgSlot}), + Res = + #{ + <<"path">> => <<"push">>, + <<"slot">> => StartingMsgSlot + }, + {ok, _} = hb_ao:resolve(Base, Res, Opts), + ?assertEqual( + {ok, <<"Done.">>}, + hb_ao:resolve(Base, <<"now/results/data">>, Opts) + ). + +test_push_as_identity() -> + dev_process_test_vectors:init(), + % Create a new identity for the scheduler. + DefaultWallet = hb:wallet(), + SchedulingWallet = ar_wallet:new(), + SchedulingID = hb_util:human_id(SchedulingWallet), + ComputeWallet = ar_wallet:new(), + ComputeID = hb_util:human_id(ComputeWallet), + TestStore = [hb_test_utils:test_store(hb_store_lmdb)], + Opts = #{ + priv_wallet => DefaultWallet, + cache_control => <<"always">>, + store => TestStore, + identities => #{ + SchedulingID => #{ + priv_wallet => SchedulingWallet, + store => [hb_test_utils:test_store(hb_store_lmdb)] }, - {ok, _} = hb_ao:resolve(Base, Res, Opts), - ?assertEqual( - {ok, <<"Done.">>}, - hb_ao:resolve(Base, <<"now/results/data">>, Opts) + ComputeID => #{ + priv_wallet => ComputeWallet + } + } + }, + % Create a new test AOS process, which will use the given identities as + % its authority and scheduler. + Base = + dev_process_test_vectors:aos_process( + Opts#{ + authority => ComputeID, + scheduler => [SchedulingID, ComputeID] + } ), - % Validate that the scheduler's wallet was used to sign the message. - Assignment = - hb_ao:get( - <<"schedule/assignments/2">>, - Base, - Opts - ), - Committers = hb_ao:get( - <<"committers">>, - hb_cache:read_all_commitments(Assignment, Opts), + ?event({base, Base}), + % Perform the remainder of the test as with `full_push_test_/0'. + hb_cache:write(Base, Opts), + {ok, SchedInit} = + hb_ao:resolve(Base, #{ + <<"method">> => <<"POST">>, + <<"path">> => <<"schedule">>, + <<"body">> => Base + }, + Opts + ), + ?event({test_setup, {base, Base}, {sched_init, SchedInit}}), + Script = ping_pong_script(2), + ?event({script, Script}), + {ok, Req} = dev_process_test_vectors:schedule_aos_call(Base, Script), + ?event(push, {msg_sched_result, Req}), + {ok, StartingMsgSlot} = + hb_ao:resolve(Req, #{ <<"path">> => <<"slot">> }, Opts), + ?event({starting_msg_slot, StartingMsgSlot}), + Res = + #{ + <<"path">> => <<"push">>, + <<"slot">> => StartingMsgSlot + }, + {ok, _} = hb_ao:resolve(Base, Res, Opts), + ?assertEqual( + {ok, <<"Done.">>}, + hb_ao:resolve(Base, <<"now/results/data">>, Opts) + ), + % Validate that the scheduler's wallet was used to sign the message. + Assignment = + hb_ao:get( + <<"schedule/assignments/2">>, + Base, Opts ), - ?assert(lists:member(SchedulingID, Committers)), - ?assert(lists:member(ComputeID, Committers)), - % Validate that the compute wallet was used to sign the message. - ?assertEqual( - [ComputeID], - hb_ao:get(<<"schedule/assignments/2/body/committers">>, Base, Opts) - ) - end}. + Committers = hb_ao:get( + <<"committers">>, + hb_cache:read_all_commitments(Assignment, Opts), + Opts + ), + ?assert(lists:member(SchedulingID, Committers)), + ?assert(lists:member(ComputeID, Committers)), + % Validate that the compute wallet was used to sign the message. + ?assertEqual( + [ComputeID], + hb_ao:get(<<"schedule/assignments/2/body/committers">>, Base, Opts) + ). -multi_process_push_test_() -> - {timeout, 30, fun() -> - dev_process_test_vectors:init(), - Opts = #{ - priv_wallet => hb:wallet(), - cache_control => <<"always">> +test_multi_process_push() -> + dev_process_test_vectors:init(), + Opts = #{ + priv_wallet => hb:wallet(), + cache_control => <<"always">>, + store => [hb_test_utils:test_store(hb_store_lmdb)] + }, + Proc1 = dev_process_test_vectors:aos_process(Opts), + hb_cache:write(Proc1, Opts), + {ok, _SchedInit1} = + hb_ao:resolve(Proc1, #{ + <<"method">> => <<"POST">>, + <<"path">> => <<"schedule">>, + <<"body">> => Proc1 }, - Proc1 = dev_process_test_vectors:aos_process(Opts), - hb_cache:write(Proc1, Opts), - {ok, _SchedInit1} = - hb_ao:resolve(Proc1, #{ - <<"method">> => <<"POST">>, - <<"path">> => <<"schedule">>, - <<"body">> => Proc1 - }, - Opts - ), - {ok, _} = dev_process_test_vectors:schedule_aos_call(Proc1, reply_script()), - Proc2 = dev_process_test_vectors:aos_process(Opts), - hb_cache:write(Proc2, Opts), - {ok, _SchedInit2} = - hb_ao:resolve(Proc2, #{ - <<"method">> => <<"POST">>, - <<"path">> => <<"schedule">>, - <<"body">> => Proc2 - }, - Opts - ), - ProcID1 = hb_message:id(Proc1, all, Opts), - ProcID2 = hb_message:id(Proc2, all, Opts), - ?event(push, {testing_with, {proc1_id, ProcID1}, {proc2_id, ProcID2}}), - {ok, ToPush} = dev_process_test_vectors:schedule_aos_call( - Proc2, - << - "Handlers.add(\"Pong\",\n" - " function (test) return true end,\n" - " function(m)\n" - " print(\"GOT PONG\")\n" - " end\n" - ")\n" - "Send({ Target = \"", (ProcID1)/binary, "\", Action = \"Ping\" })" - >> - ), - SlotToPush = hb_ao:get(<<"slot">>, ToPush, Opts), - ?event(push, {slot_to_push_proc2, SlotToPush}), - Res = - #{ - <<"path">> => <<"push">>, - <<"slot">> => SlotToPush, - <<"result-depth">> => 1 - }, - {ok, PushResult} = hb_ao:resolve(Proc2, Res, Opts), - ?event(push, {push_result_proc2, PushResult}), - AfterPush = hb_ao:resolve(Proc2, <<"now/results/data">>, Opts), - ?event(push, {after_push, AfterPush}), - ?assertEqual({ok, <<"GOT PONG">>}, AfterPush) - end}. + Opts + ), + {ok, _} = dev_process_test_vectors:schedule_aos_call(Proc1, reply_script()), + Proc2 = dev_process_test_vectors:aos_process(Opts), + hb_cache:write(Proc2, Opts), + {ok, _SchedInit2} = + hb_ao:resolve(Proc2, #{ + <<"method">> => <<"POST">>, + <<"path">> => <<"schedule">>, + <<"body">> => Proc2 + }, + Opts + ), + ProcID1 = hb_message:id(Proc1, all, Opts), + ProcID2 = hb_message:id(Proc2, all, Opts), + ?event(push, {testing_with, {proc1_id, ProcID1}, {proc2_id, ProcID2}}), + {ok, ToPush} = dev_process_test_vectors:schedule_aos_call( + Proc2, + << + "Handlers.add(\"Pong\",\n" + " function (test) return true end,\n" + " function(m)\n" + " print(\"GOT PONG\")\n" + " end\n" + ")\n" + "Send({ Target = \"", (ProcID1)/binary, "\", Action = \"Ping\" })" + >> + ), + SlotToPush = hb_ao:get(<<"slot">>, ToPush, Opts), + ?event(push, {slot_to_push_proc2, SlotToPush}), + Res = + #{ + <<"path">> => <<"push">>, + <<"slot">> => SlotToPush, + <<"result-depth">> => 1 + }, + {ok, PushResult} = hb_ao:resolve(Proc2, Res, Opts), + ?event(push, {push_result_proc2, PushResult}), + AfterPush = hb_ao:resolve(Proc2, <<"now/results/data">>, Opts), + ?event(push, {after_push, AfterPush}), + ?assertEqual({ok, <<"GOT PONG">>}, AfterPush). push_with_redirect_hint_test_disabled() -> {timeout, 30, fun() -> @@ -1030,9 +1049,7 @@ push_with_redirect_hint_test_disabled() -> ?assertEqual({ok, <<"GOT PONG">>}, AfterPush) end}. -push_prompts_encoding_change_test_() -> - {timeout, 30, fun push_prompts_encoding_change/0}. -push_prompts_encoding_change() -> +test_push_prompts_encoding_change() -> dev_process_test_vectors:init(), Opts = #{ priv_wallet => hb:wallet(), @@ -1069,9 +1086,7 @@ push_prompts_encoding_change() -> ), ?assertMatch({error, #{ <<"status">> := 422 }}, Res). -remote_routed_push_test_() -> - {timeout, 60, fun remote_routed_push/0}. -remote_routed_push() -> +test_remote_routed_push() -> % Creates a network of nodes and processes with the following structure: % Node 1: % - Schedules for process 1. @@ -1095,9 +1110,9 @@ remote_routed_push() -> % % We start by generating the isolated wallets and stores for each node. N1Wallet = ar_wallet:new(), - N1Store = [hb_test_utils:test_store()], + N1Store = [hb_test_utils:test_store(hb_store_lmdb)], N2Wallet = ar_wallet:new(), - N2Store = [hb_test_utils:test_store()], + N2Store = [hb_test_utils:test_store(hb_store_lmdb)], % Next, create the second node and process. We do this before node 1 such % that the routes of node 1 and the target of process 1's message are known % when we create them. @@ -1228,24 +1243,25 @@ remote_routed_push() -> hb_ao:resolve(LoadedProc2, <<"now/at-slot">>, N2Opts) ). -oracle_push_test_() -> {timeout, 30, fun oracle_push/0}. -oracle_push() -> +test_oracle_push() -> dev_process_test_vectors:init(), - Client = dev_process_test_vectors:aos_process(), - {ok, _} = hb_cache:write(Client, #{}), - {ok, _} = dev_process_test_vectors:schedule_aos_call(Client, oracle_script()), + TestStore = [hb_test_utils:test_store(hb_store_lmdb)], + Opts = #{ priv_wallet => hb:wallet(), store => TestStore }, + Client = dev_process_test_vectors:aos_process(Opts), + {ok, _} = hb_cache:write(Client, Opts), + {ok, _} = dev_process_test_vectors:schedule_aos_call(Client, oracle_script(), Opts), Res = #{ <<"path">> => <<"push">>, <<"slot">> => 0 }, - {ok, PushResult} = hb_ao:resolve(Client, Res, #{ priv_wallet => hb:wallet() }), + {ok, PushResult} = hb_ao:resolve(Client, Res, Opts), ?event({result, PushResult}), ComputeRes = hb_ao:resolve( Client, <<"now/results/data">>, - #{ priv_wallet => hb:wallet() } + Opts ), ?event({compute_res, ComputeRes}), ?assertMatch({ok, _}, ComputeRes). @@ -1254,16 +1270,14 @@ oracle_push() -> %% @doc Test that a message that generates another message which resides on an %% ANS-104 scheduler leads to `~push@1.0` re-signing the message correctly. %% Requires `ENABLE_GENESIS_WASM' to be enabled. -nested_push_prompts_encoding_change_test_() -> - {timeout, 30, fun nested_push_prompts_encoding_change/0}. -nested_push_prompts_encoding_change() -> +test_nested_push_prompts_encoding_change() -> dev_process_test_vectors:init(), Opts = #{ priv_wallet => hb:wallet(), cache_control => <<"always">>, store => hb_opts:get(store) }, - ?event(push_debug, {opts, Opts}), + ?event(debug_push, {opts, Opts}), Base = dev_process_test_vectors:aos_process(Opts), hb_cache:write(Base, Opts), {ok, SchedInit} = @@ -1388,4 +1402,4 @@ oracle_script() -> }) """ - >>. \ No newline at end of file + >>. diff --git a/src/dev_query.erl b/src/dev_query.erl index 82c0bc77f..4dc43a09d 100644 --- a/src/dev_query.erl +++ b/src/dev_query.erl @@ -215,7 +215,7 @@ query_match_key(Path, Opts) -> %% @doc Return test options with a test store. test_setup() -> Store = hb_test_utils:test_store(), - Opts = #{ store => Store, priv_wallet => hb:wallet() }, + Opts = #{ store => Store, priv_wallet => ar_wallet:new() }, % Write a simple message. hb_cache:write( #{ @@ -365,4 +365,4 @@ http_test() -> Opts ), ?assertEqual(<<"binary-value">>, hb_maps:get(<<"basic">>, Msg, Opts)), - ok. + ok. \ No newline at end of file diff --git a/src/dev_query_arweave.erl b/src/dev_query_arweave.erl index 995309e6b..43801d321 100644 --- a/src/dev_query_arweave.erl +++ b/src/dev_query_arweave.erl @@ -1,5 +1,12 @@ %%% @doc An implementation of the Arweave GraphQL API, inside the `~query@1.0' %%% device. +%%% +%%% When an `hb_store_arweave' index is available, transaction results are +%%% sorted by block height via the monotonically increasing Arweave data +%%% offsets stored in `hb_store_arweave_offset'. The `sort' argument on the +%%% `transactions' query selects the order (`HEIGHT_DESC' by default, +%%% `HEIGHT_ASC' for ascending). A `block' range filter narrows results to +%%% transactions whose offsets fall within the requested block heights. -module(dev_query_arweave). %%% AO-Core API: -export([query/4]). @@ -35,6 +42,23 @@ query(Obj, <<"transactions">>, Args, Opts) -> {args, Args} }), Matches = match_args(Args, Opts), + Ordered = + case annotate_offsets(Matches, Opts) of + unavailable -> Matches; + Annotated -> + Order = maps:get(<<"sort">>, Args, <<"HEIGHT_DESC">>), + remove_annotations( + sort_offset_annotated( + filter_offset_annotated( + Annotated, + maps:get(<<"block">>, Args, undefined), + Opts + ), + Order, + Opts + ) + ) + end, ?event({transactions_matches, Matches}), Messages = lists:filtermap( @@ -44,7 +68,7 @@ query(Obj, <<"transactions">>, Args, Opts) -> not_found -> false end end, - Matches + Ordered ), {ok, Messages}; query(Obj, <<"block">>, Args, Opts) -> @@ -82,13 +106,14 @@ query(Block, <<"timestamp">>, _Args, Opts) -> query(Msg, <<"signature">>, _Args, Opts) -> % Return the signature of the transaction. % Other TX access methods are defined below. - case hb_maps:get(<<"commitments">>, Msg, not_found, Opts) of + case hb_message:commitments(#{ <<"committer">> => '_' }, Msg, Opts) of not_found -> {ok, null}; Commitments -> - case maps:to_list(Commitments) of + case hb_maps:keys(Commitments) of [] -> {ok, null}; - [{_CommitmentID, Commitment} | _] -> - {ok, hb_maps:get(<<"signature">>, Commitment, null, Opts)} + [CommID | _] -> + {ok, Commitment} = hb_maps:find(CommID, Commitments, Opts), + hb_maps:find(<<"signature">>, Commitment, Opts) end end; query(Msg, <<"owner">>, _Args, Opts) -> @@ -170,8 +195,93 @@ find_field_key(Field, Msg, Opts) -> end end. +%% @doc Sort messages by their block height, if Arweave index store is available. +%% Takes a list of IDs and returns the same list sorted by block height. IDs that +%% do not have an offset are always placed at the end of the list -- regardless +%% of the sort order. +sort_offset_annotated(IDs, SortOrder, _Opts) -> + {WithOffset, WithoutOffset} = + lists:partition( + fun({Offset, _, _}) -> Offset =/= undefined end, + IDs + ), + Sorted = + case SortOrder of + <<"HEIGHT_ASC">> -> lists:keysort(1, WithOffset); + _ -> lists:reverse(lists:keysort(1, WithOffset)) + end, + ?event( + {order_by_block, + {sort_order, SortOrder}, + {with_offset, length(WithOffset)}, + {without_offset, length(WithoutOffset)} + } + ), + Sorted ++ WithoutOffset. + +%% @doc Convert a block height range (`#{<<"min">> => Min, <<"max">> => Max}') +%% into weave byte offset boundaries `{StartOffset, EndOffset}'. Notably, the +%% highest offset is not the max block height. It is 'infinity', such that TXs +%% that are indexed but are not yet confirmed are included. +block_range_to_offset_range(Heights, Opts) -> + StartOffset = + case hb_maps:get(<<"min">>, Heights, 0, Opts) of + 0 -> 0; + RawMin -> + case read_block(hb_util:int(RawMin), Opts) of + {ok, MinBlock} -> + % The `weave_size` is the size at the _end_ of the block, + % so we must subtract the start from it to find the + % starting byte of the block. + WeaveSize = hb_util:int( + hb_maps:get(<<"weave_size">>, MinBlock, 0, Opts)), + BlockSize = hb_util:int( + hb_maps:get(<<"block_size">>, MinBlock, 0, Opts)), + WeaveSize - BlockSize; + not_found -> 0 + end + end, + EndOffset = + case hb_maps:get(<<"max">>, Heights, infinity, Opts) of + infinity -> infinity; + RawMax -> + case read_block(hb_util:int(RawMax), Opts) of + {ok, MaxBlock} -> + hb_util:int( + hb_maps:get(<<"weave_size">>, MaxBlock, 0, Opts) + ); + not_found -> infinity + end + end, + ?event( + {calculated_offsets_from_block_range, + {block_range, Heights}, + {start_offset, StartOffset}, + {end_offset, EndOffset} + } + ), + {StartOffset, EndOffset}. + +%% @doc Read block metadata by height. Tries the local block cache first; +%% when `query_arweave_remote_block_ranges' is `true' (the default) and the +%% block is not cached locally, falls back to `dev_arweave:block/2'. +read_block(Height, Opts) -> + case dev_arweave_block_cache:read(Height, Opts) of + {ok, Block} -> {ok, Block}; + not_found -> + case hb_opts:get(query_arweave_remote_block_ranges, true, Opts) of + true -> + ?event({read_block_remote, {height, Height}}), + dev_arweave:block(#{}, #{ <<"block">> => Height }, Opts); + _ -> not_found + end + end. + +%%% Match argument processing + %% @doc Progressively generate matches from each argument for a transaction -%% query. +%% query. The `block' range is applied as a post-filter over the candidate +%% set rather than as a set-producing index lookup. match_args(Args, Opts) when is_map(Args) -> match_args( maps:to_list( @@ -254,6 +364,49 @@ match(<<"recipients">>, Recipients, Opts) -> match(UnsupportedFilter, _, _) -> throw({unsupported_query_filter, UnsupportedFilter}). +%%% Block range post-filter + +%% @doc Offset-annotate a list of IDs, returning {StartOffset, ID} pairs. +annotate_offsets(IDs, Opts) -> + case hb_store_arweave:store_from_opts(Opts) of + no_store -> unavailable; + StoreOpts -> annotate_offsets(IDs, StoreOpts, Opts) + end. +annotate_offsets(IDs, StoreOpts, _Opts) -> + lists:map( + fun(ID) -> + case hb_store_arweave:read_offset(StoreOpts, ID) of + {ok, #{ <<"start-offset">> := Offset, <<"length">> := Length }} -> + {Offset, Length, ID}; + _ -> {undefined, undefined, ID} + end + end, + IDs + ). + +%% @doc Remove all annotations of start offset and length from a list of IDs. +remove_annotations(IDs) -> lists:map(fun({_, _, ID}) -> ID end, IDs). + +%% @doc Apply the `block' height range as a post-filter over candidate IDs. +%% Each candidate's offset is checked against the block range boundaries, +%% avoiding materialisation of the full store. +filter_offset_annotated(IDs, HeightRange, _Opts) + when HeightRange =:= undefined orelse HeightRange =:= null -> + IDs; +filter_offset_annotated(IDs, Heights, Opts) -> + {StartOffset, EndOffset} = + block_range_to_offset_range(Heights, Opts), + Filtered = + lists:filter( + fun({IDOffset, Length, _}) -> + ((StartOffset =:= 0) orelse (IDOffset >= StartOffset)) andalso + ((EndOffset =:= infinity) orelse (IDOffset + Length =< EndOffset)) + end, + IDs + ), + ?event({filtered_out_of_range, length(IDs) - length(Filtered)}), + Filtered. + %% @doc Return the base IDs for messages that have a matching commitment. matching_commitments(Field, Values, Opts) when is_list(Values) -> hb_util:unique(lists:flatten( diff --git a/src/dev_query_test_vectors.erl b/src/dev_query_test_vectors.erl index a863e2fe4..71b5e973c 100644 --- a/src/dev_query_test_vectors.erl +++ b/src/dev_query_test_vectors.erl @@ -35,13 +35,42 @@ get_test_blocks(Node, Opts) -> hb_http:request( <<"GET">>, Node, - <<"/~arweave@2.9-pre/block=", (hb_util:bin(Height))/binary>>, + <<"/~arweave@2.9/block=", (hb_util:bin(Height))/binary>>, Opts ) end, lists:seq(InitialHeight, FinalHeight) ). +%% @doc Use the `~copycat@1.0' device to fetch and index blocks into a new testing +%% node with its own local and index stores. +test_env_with_blocks(InitialHeight, FinalHeight) -> + ArweaveStore = + #{ + <<"store-module">> => hb_store_arweave, + <<"index-store">> => hb_test_utils:test_store(), + <<"local-store">> => LocalStore = hb_test_utils:test_store() + }, + Opts = + #{ + priv_wallet => ar_wallet:new(), + store => [LocalStore, ArweaveStore], + arweave_index_blocks => true, + query_arweave_remote_block_ranges => true + }, + Node = hb_http_server:start_node(Opts), + hb_http:request( + <<"GET">>, + Node, + << + "/~copycat@1.0/arweave?from=", + (hb_util:bin(InitialHeight))/binary, "&to=", + (hb_util:bin(FinalHeight))/binary + >>, + Opts + ), + {ok, Node, Opts}. + %% Helper function to write test message with Recipient write_test_message_with_recipient(Recipient, Opts) -> hb_cache:write( @@ -69,8 +98,9 @@ write_test_message_with_recipient(Recipient, Opts) -> simple_blocks_query_test() -> Opts = #{ - priv_wallet => hb:wallet(), - store => [hb_test_utils:test_store()] + priv_wallet => ar_wallet:new(), + store => [hb_test_utils:test_store()], + arweave_index_blocks => true }, Node = hb_http_server:start_node(Opts), get_test_blocks(Node, Opts), @@ -114,8 +144,9 @@ simple_blocks_query_test() -> block_by_height_query_test() -> Opts = #{ - priv_wallet => hb:wallet(), - store => [hb_test_utils:test_store()] + priv_wallet => ar_wallet:new(), + store => [hb_test_utils:test_store()], + arweave_index_blocks => true }, Node = hb_http_server:start_node(Opts), get_test_blocks(Node, Opts), @@ -165,7 +196,7 @@ block_by_height_query_test() -> simple_ans104_query_test() -> Opts = #{ - priv_wallet => hb:wallet(), + priv_wallet => Wallet = ar_wallet:new(), store => [hb_test_utils:test_store()] }, Node = hb_http_server:start_node(Opts), @@ -202,7 +233,7 @@ simple_ans104_query_test() -> Node, Query, #{ - <<"owners">> => [hb:address()] + <<"owners">> => [hb:address(Wallet)] }, Opts ), @@ -232,7 +263,7 @@ simple_ans104_query_test() -> transactions_query_tags_test() -> Opts = #{ - priv_wallet => hb:wallet(), + priv_wallet => ar_wallet:new(), store => [hb_test_utils:test_store()] }, Node = hb_http_server:start_node(Opts), @@ -295,7 +326,7 @@ transactions_query_tags_test() -> transactions_query_owners_test() -> Opts = #{ - priv_wallet => hb:wallet(), + priv_wallet => Wallet = ar_wallet:new(), store => [hb_test_utils:test_store()] }, Node = hb_http_server:start_node(Opts), @@ -327,7 +358,7 @@ transactions_query_owners_test() -> Node, Query, #{ - <<"owners">> => [hb:address()] + <<"owners">> => [hb:address(Wallet)] }, Opts ), @@ -357,7 +388,7 @@ transactions_query_owners_test() -> transactions_query_recipients_test() -> Opts = #{ - priv_wallet => hb:wallet(), + priv_wallet => ar_wallet:new(), store => [hb_test_utils:test_store()] }, Node = hb_http_server:start_node(Opts), @@ -422,7 +453,7 @@ transactions_query_recipients_test() -> transactions_query_ids_test() -> Opts = #{ - priv_wallet => hb:wallet(), + priv_wallet => ar_wallet:new(), store => [hb_test_utils:test_store()] }, Node = hb_http_server:start_node(Opts), @@ -484,7 +515,7 @@ transactions_query_ids_test() -> transactions_query_combined_test() -> Opts = #{ - priv_wallet => hb:wallet(), + priv_wallet => Wallet = ar_wallet:new(), store => [hb_test_utils:test_store()] }, Node = hb_http_server:start_node(Opts), @@ -521,7 +552,7 @@ transactions_query_combined_test() -> Node, Query, #{ - <<"owners">> => [hb:address()], + <<"owners">> => [hb:address(Wallet)], <<"ids">> => [ExpectedID] }, Opts @@ -547,12 +578,101 @@ transactions_query_combined_test() -> Res ). +transactions_query_sort_by_block_test() -> + {ok, Node, Opts} = test_env_with_blocks(1892159, 1892158), + EarlierID = <<"xBpOR2KOjYEgv5HmddMlAgYa-yMvfEVl-0XzRIfm2uY">>, + LaterID = <<"HVr7EpRhlPkbwdnoXKHf25p7BPa0qJOs6C7XueLthA0">>, + VerifyFun = + fun(Order, First, Second) -> + Q = + <<""" + query($ids: [ID!], $sort: SortOrder) { + transactions( + ids: $ids, + sort: $sort + ) { + edges { + node { + id + } + } + } + } + """>>, + ?assertMatch( + #{ + <<"data">> := #{ + <<"transactions">> := #{ + <<"edges">> := [ + #{ <<"node">> := #{ <<"id">> := First } }, + #{ <<"node">> := #{ <<"id">> := Second } } + ] + } + } + }, + dev_query_graphql:test_query( + Node, + Q, + #{ <<"ids">> => [First, Second], <<"sort">> => Order }, + Opts + ) + ) + end, + VerifyFun(<<"HEIGHT_ASC">>, EarlierID, LaterID), + VerifyFun(<<"HEIGHT_DESC">>, LaterID, EarlierID). + +transactions_query_filter_by_block_test() -> + {ok, Node, Opts} = test_env_with_blocks(1892159, 1892158), + EarlierID = <<"xBpOR2KOjYEgv5HmddMlAgYa-yMvfEVl-0XzRIfm2uY">>, + LaterID = <<"HVr7EpRhlPkbwdnoXKHf25p7BPa0qJOs6C7XueLthA0">>, + VerifyFun = + fun(Start, End, Present, Absent) -> + Q = + <<""" + query($ids: [ID!], $min: Int, $max: Int) { + transactions( + ids: $ids, + block: {min: $min, max: $max} + ) { + edges { + node { + id + } + } + } + } + """>>, + #{ <<"data">> := #{ <<"transactions">> := #{ <<"edges">> := Edges } } } = + dev_query_graphql:test_query( + Node, + Q, + #{ + <<"ids">> => Present ++ Absent, + <<"min">> => Start, + <<"max">> => End + }, + Opts + ), + IDs = [ ID || #{ <<"node">> := #{ <<"id">> := ID } } <- Edges ], + lists:foreach( + fun(ID) -> ?assert(lists:member(ID, IDs)) end, + Present + ), + lists:foreach( + fun(ID) -> ?assertNot(lists:member(ID, IDs)) end, + Absent + ) + end, + VerifyFun(1892158, 1892159, [EarlierID, LaterID], []), + VerifyFun(1892156, 1892157, [], [EarlierID, LaterID]), + VerifyFun(1892157, 1892158, [EarlierID], [LaterID]), + VerifyFun(1892159, 1892160, [LaterID], [EarlierID]). %% @doc Test single transaction query by ID transaction_query_by_id_test() -> Opts = #{ - priv_wallet => hb:wallet(), + priv_wallet => ar_wallet:new(), store => [hb_test_utils:test_store()] }, Node = hb_http_server:start_node(Opts), @@ -602,7 +722,7 @@ transaction_query_by_id_test() -> transaction_query_full_test() -> Opts = #{ - priv_wallet => SenderKey = hb:wallet(), + priv_wallet => SenderKey = ar_wallet:new(), store => [hb_test_utils:test_store()] }, Node = hb_http_server:start_node(Opts), @@ -679,7 +799,7 @@ transaction_query_full_test() -> transaction_query_not_found_test() -> Opts = #{ - priv_wallet => hb:wallet(), + priv_wallet => ar_wallet:new(), store => [hb_test_utils:test_store()] }, Res = @@ -715,7 +835,7 @@ transaction_query_not_found_test() -> transaction_query_with_anchor_test() -> Opts = #{ - priv_wallet => hb:wallet(), + priv_wallet => Wallet = ar_wallet:new(), store => [hb_test_utils:test_store()] }, Node = hb_http_server:start_node(Opts), @@ -727,7 +847,7 @@ transaction_query_with_anchor_test() -> anchor = AnchorID = crypto:strong_rand_bytes(32), data = <<"test-data">> }, - hb:wallet() + Wallet ), <<"structured@1.0">>, <<"ans104@1.0">>, diff --git a/src/dev_rate_limit.erl b/src/dev_rate_limit.erl new file mode 100644 index 000000000..952a5976e --- /dev/null +++ b/src/dev_rate_limit.erl @@ -0,0 +1,261 @@ +%%% @doc A basic rate limiter device. It is intended for use as a `~hook@1.0` +%%% `on/request` handler. It limits the number of requests per time period from a +%%% given IP address, returning a 429 status code and response if the limit is +%%% exceeded. +%%% +%%% The device can be configured with the following node message options: +%%% +%%% ``` +%%% rate_limit_requests: The maximum number of requests per period from a +%%% given user. +%%% Default: 1000. +%%% rate_limit_period: The rate at which peer's fully recharge balances. +%%% Default: 60 (unit: seconds). +%%% rate_limit_max: The maximum `balance' that a peer may hold. +%%% Default: 1000. +%%% rate_limit_min: The minimum `balance' that a peer may hold. +%%% Default: -1000. +%%% rate_limit_exempt: A list of peer IDs that are exempt from the limit. +%%% Default: []. +%%% ``` +%%% +%%% Notably, the `balance` of a user -- in terms of their available limit -- may +%%% become _negative_ if they continue to make calls even after exceeding their +%%% limit. The effect of this is that users that make too many requests to the +%%% server repeatedly simply receive no further service. The `rate_limit_min` +%%% option can be used to specify the minimum balance that users will hit. Any +%%% further requests are rejected but do not diminish their balance further. +-module(dev_rate_limit). +-export([request/3]). +-include("include/hb.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +-define(LOOKUP_TIMEOUT, 1000). +-define(DEFAULT_MAX, 1_000). +-define(DEFAULT_MIN, -1_000). +-define(DEFAULT_REQS, 1000). +-define(DEFAULT_PERIOD, 60). + +%% @doc `on/request' handler that triggers rate limit counting and returns a +%% 429 status code and response if the limit is exceeded. The response includes +%% a `retry-after' header that indicates the number of seconds the client should +%% wait before making the next request. +request(_, Msg, Opts) -> + ?event(rate_limit, {request, {msg, Msg}}), + Reference = request_reference(hb_maps:get(<<"request">>, Msg, #{}, Opts), Opts), + case is_limited(Reference, Opts) of + {true, Balance} -> + ?event( + rate_limit, + {rate_limit_exceeded, {caller, Reference}, {balance, Balance}} + ), + RechargeRate = + hb_opts:get(rate_limit_requests, ?DEFAULT_REQS, Opts) / + hb_opts:get(rate_limit_period, ?DEFAULT_PERIOD, Opts), + RawRetryAfter = ceil(abs(Balance) / RechargeRate), % ...seconds + % If the node config specifies a `min` balance of `0`, callers may + % have a non-negative balance but still be rate-limited. In this case, + % we bump the `retry-after` to 1 second so as not to confuse the + % caller. + RetryAfter = + if RawRetryAfter =< 0.0 -> 1; + true -> RawRetryAfter + end, + RetryAfterBin = hb_util:bin(RetryAfter), + ?event( + rate_limit, + {rate_limit_exceeded, + {caller, Reference}, + {balance, Balance}, + {retry_after, RetryAfterBin} + } + ), + % Transform the given request into a request to return a 429 status + % code and response. + {error, + #{ + <<"status">> => 429, + <<"reason">> => <<"rate-limited">>, + <<"body">> => <<"Rate limit exceeded.">>, + <<"retry-after">> => RetryAfterBin + } + }; + false -> + ?event(rate_limit, {rate_limit_allowed, {caller, Reference}}), + {ok, Msg} + end. + +%% @doc The singleton ID of the rate limiter server. This allows us to run +%% multiple rate limiters on the same node if needed, each with its own +%% configuration, but with all of the callers sharing the same rate limiter +%% server. +server_id(Opts) -> + {?MODULE, hb_util:human_id(hb_opts:get(priv_wallet, undefined, Opts))}. + +%% @doc Determine the reference of the caller. Presently only the `ip` form +%% may be used to identify the caller. +request_reference(Msg, Opts) -> hb_private:get(<<"ip">>, Msg, Opts). + +%% @doc Check if the caller is limited according to the current state of the +%% rate limiter server. +is_limited(Reference, Opts) -> + PID = ensure_rate_limiter_started(Opts), + PID ! {request, self(), Reference}, + receive + {incremented, Balance} when Balance > 0 -> false; + {incremented, Balance} when Balance =< 0 -> {true, Balance} + after ?LOOKUP_TIMEOUT -> + ?event(warning, {rate_limit_timeout, restarting}), + hb_name:unregister(server_id(Opts)), + is_limited(Reference, Opts) + end. + +%% @doc Ensure that the rate limiter server is started and return the PID of +%% the server. In the event of two instanteous spawns, one of the new processes +%% will fail with an error and the other will succeed. The effect to the caller +%% is the same: A rate limiter is available to query. +ensure_rate_limiter_started(Opts) -> + ServerID = server_id(Opts), + hb_name:singleton( + ServerID, + fun() -> start_server(ServerID, Opts) end + ). + +start_server(ServerID, Opts) -> + % Exit the process if we cannot register the server ID. + Reqs = hb_opts:get(rate_limit_requests, ?DEFAULT_REQS, Opts), + Period = hb_opts:get(rate_limit_period, ?DEFAULT_PERIOD, Opts), + Max = hb_opts:get(rate_limit_max, ?DEFAULT_MAX, Opts), + Min = hb_opts:get(rate_limit_min, ?DEFAULT_MIN, Opts), + Exempt = hb_opts:get(rate_limit_exempt, [], Opts), + ?event( + rate_limit, + {started_rate_limiter, + {server_id, ServerID}, + {reqs, Reqs}, + {period, Period}, + {max, Max}, + {min, Min}, + {exempt, Exempt} + } + ), + server_loop( + #{ + reqs => Reqs, + period => Period, + max => Max, + min => Min, + peers => #{ Ref => infinity || Ref <- Exempt } + } + ). + +%% @doc The main loop of the rate limiter server. Only responds to two messages: +%% - `{request, Self, Reference}': Debit the account of the given reference by 1. +%% - `{balance, PID, Reference}': Return the current balance of the given reference. +%% The `balance` call is not presently used, but seems sensible to have. +server_loop(State) -> + receive + {request, PID, Reference} -> + NewState = debit(Reference, 1, State, Now = erlang:system_time(millisecond)), + Balance = account_balance(Reference, NewState, Now), + ?event( + rate_limit_short, + {rate_limit_debited, {target, Reference}, {balance, Balance}} + ), + PID ! {incremented, Balance}, + server_loop(NewState); + {balance, PID, Reference} -> + PID ! {balance, account_balance(Reference, State)}, + server_loop(State) + end. + +%% @doc Debit the account of the given reference by the given quantity. +debit(Ref, Amount, State = #{ peers := Peers, min := Min }, Now) -> + case account_balance(Ref, State, Now) of + infinity -> State; + Balance -> + State#{ + peers => + Peers#{ + Ref => + #{ + balance => max(Min, Balance - Amount), + last => Now + } + } + } + end. + +%% @doc Calculate the current balance for a user, including unused capacity +%% accrued since the last interaction. +account_balance(Reference, State) -> + account_balance(Reference, State, erlang:system_time(millisecond)). +account_balance( + Reference, + #{ max := Max, reqs := Reqs, period := Period, peers := Peers }, + Time + ) -> + case maps:get(Reference, Peers, not_found) of + infinity -> infinity; + not_found -> Max; + #{ balance := Balance, last := LastInteraction } -> + RechargeRate = Reqs / (Period * 1000), + RechargedSinceLast = (Time - LastInteraction) * RechargeRate, + min(Max, Balance + RechargedSinceLast) + end. + +%%% Tests + +rate_limit_test() -> + ServerOpts = #{ + rate_limit_requests => 2, + rate_limit_period => 1, + rate_limit_max => 2, + on => + #{ + <<"request">> => + #{ + <<"device">> => <<"rate-limit@1.0">> + } + } + }, + ServerNode = hb_http_server:start_node(ServerOpts), + ?assertMatch( + {ok, _}, + hb_http:get(ServerNode, <<"id">>, #{}) + ), + ?debug_wait(100), + ?assertMatch( + {ok, _}, + hb_http:get(ServerNode, <<"id">>, #{}) + ), + ?debug_wait(100), + ?assertMatch( + {error, #{ <<"status">> := 429 }}, + hb_http:get(ServerNode, <<"id">>, #{}) + ). + +rate_limit_reset_test() -> + ServerOpts = #{ + rate_limit_requests => 2, + rate_limit_period => 1, + rate_limit_max => 2, + rate_limit_min => 0, + rate_limit_exempt => [], + on => + #{ + <<"request">> => + #{ + <<"device">> => <<"rate-limit@1.0">> + } + } + }, + ServerNode = hb_http_server:start_node(ServerOpts), + ?assertMatch({ok, _}, hb_http:get(ServerNode, <<"id">>, #{})), + ?assertMatch({ok, _}, hb_http:get(ServerNode, <<"id">>, #{})), + ?assertMatch( + {error, #{ <<"status">> := 429 }}, + hb_http:get(ServerNode, <<"id">>, #{}) + ), + timer:sleep(1_000), + ?assertMatch({ok, _}, hb_http:get(ServerNode, <<"id">>, #{})). \ No newline at end of file diff --git a/src/dev_router.erl b/src/dev_router.erl index 94cf88b2c..1e87a65d5 100644 --- a/src/dev_router.erl +++ b/src/dev_router.erl @@ -406,7 +406,11 @@ match(Base, Req, Opts) -> {req, Req} } ), - TargetPath = hb_util:find_target_path(Req, Opts), + TargetPath = + case hb_util:find_target_path(Req, Opts) of + no_path -> no_path; + {_TargetKey, Path} -> Path + end, Match = match_routes( Req#{ <<"path">> => TargetPath }, @@ -446,9 +450,10 @@ match_routes(#{ <<"path">> := Explicit = <<"https://", _/binary>> }, _, _, _) -> #{ <<"node">> => Explicit, <<"reference">> => <<"explicit">> }; match_routes(_, _, [], _) -> no_matches; match_routes(ToMatch, Routes, [XKey|Keys], Opts) -> - XM = hb_ao:get(XKey, Routes, Opts), + NormRoutes = hb_ao:normalize_keys(Routes, Opts), + XM = hb_maps:get(hb_ao:normalize_key(XKey), NormRoutes, not_found, Opts), Template = - hb_ao:get( + hb_maps:get( <<"template">>, XM, #{}, @@ -610,19 +615,28 @@ choose_count(RawChoose, Nodes) -> min(NormalizedChoose, length(Nodes)). normalize_strategy(RawStrategy) -> - case hb_util:to_lower(hb_util:bin(RawStrategy)) of - <<"all">> -> <<"All">>; - <<"random">> -> <<"Random">>; - <<"by-base">> -> <<"By-Base">>; - <<"by_base">> -> <<"By-Base">>; - <<"by-weight">> -> <<"By-Weight">>; - <<"by_weight">> -> <<"By-Weight">>; - <<"nearest">> -> <<"Nearest">>; - <<"nearest-integer">> -> <<"Nearest-Integer">>; - <<"nearest_integer">> -> <<"Nearest-Integer">>; - _ -> <<"All">> + Lower = hb_util:to_lower(hb_util:bin(RawStrategy)), + case Lower of + <<"shuffled-", Rest/binary>> -> + <<"Shuffled-", (normalize_strategy_base(Rest))/binary>>; + <<"shuffled_", Rest/binary>> -> + <<"Shuffled-", (normalize_strategy_base(Rest))/binary>>; + _ -> + normalize_strategy_base(Lower) end. +normalize_strategy_base(<<"all">>) -> <<"All">>; +normalize_strategy_base(<<"random">>) -> <<"Random">>; +normalize_strategy_base(<<"by-base">>) -> <<"By-Base">>; +normalize_strategy_base(<<"by_base">>) -> <<"By-Base">>; +normalize_strategy_base(<<"by-weight">>) -> <<"By-Weight">>; +normalize_strategy_base(<<"by_weight">>) -> <<"By-Weight">>; +normalize_strategy_base(<<"nearest">>) -> <<"Nearest">>; +normalize_strategy_base(<<"nearest-integer">>) -> <<"Nearest-Integer">>; +normalize_strategy_base(<<"nearest_integer">>) -> <<"Nearest-Integer">>; +normalize_strategy_base(<<"range">>) -> <<"Range">>; +normalize_strategy_base(_) -> <<"All">>. + route_integer(Int, _Opts) when is_integer(Int) -> Int; route_integer(Bin, Opts) when is_binary(Bin) -> diff --git a/src/dev_scheduler.erl b/src/dev_scheduler.erl index 8b4da9629..3d165342a 100644 --- a/src/dev_scheduler.erl +++ b/src/dev_scheduler.erl @@ -211,7 +211,7 @@ find_next_assignment(Base, Req, _Schedule, LastSlot, Opts) -> end, case LocalCacheRes of {ok, Worker, Assignment} -> - ?event(next_debug, + ?event(debug_next, {in_cache, {slot, LastSlot + 1}, {assignment, Assignment} @@ -1607,8 +1607,8 @@ http_init(Opts) -> priv_wallet => Wallet, store => [ #{ - <<"store-module">> => hb_store_ets, - <<"name">> => <<"cache-mainnet/ets">> + <<"store-module">> => hb_store_volatile, + <<"name">> => <<"cache-TEST/volatile">> }, #{ <<"store-module">> => hb_store_gateway, <<"store">> => [] } ] diff --git a/src/dev_whois.erl b/src/dev_whois.erl index 61011c489..a30bf09fd 100644 --- a/src/dev_whois.erl +++ b/src/dev_whois.erl @@ -17,7 +17,7 @@ echo(_, Req, Opts) -> node(_, _, Opts) -> case ensure_host(Opts) of {ok, NewOpts} -> - {ok, hb_opts:get(host, <<"unknown">>, NewOpts)}; + {ok, hb_opts:get(node_host, <<"unknown">>, NewOpts)}; Error -> Error end. @@ -25,12 +25,12 @@ node(_, _, Opts) -> %% @doc Return the node message ensuring that the host is set. If it is not, we %% attempt to find the host information from the specified bootstrap node. ensure_host(Opts) -> - case hb_opts:get(host, <<"unknown">>, Opts) of + case hb_opts:get(node_host, <<"unknown">>, Opts) of <<"unknown">> -> case bootstrap_node_echo(Opts) of {ok, Host} -> % Set the host information in the persisted node message. - hb_http_server:set_opts(NewOpts = Opts#{ host => Host }), + hb_http_server:set_opts(NewOpts = Opts#{ node_host => Host }), {ok, NewOpts}; Error -> Error diff --git a/src/hb.app.src b/src/hb.app.src index 7416d6083..1c1bc1361 100644 --- a/src/hb.app.src +++ b/src/hb.app.src @@ -10,7 +10,8 @@ ssl, cowboy, os_mon, - gun + gun, + hackney ]}, {env, []}, {modules, []}, diff --git a/src/hb.erl b/src/hb.erl index c6cce5c75..aef04c659 100644 --- a/src/hb.erl +++ b/src/hb.erl @@ -91,12 +91,13 @@ -export([no_prod/3]). -export([read/1, read/2, debug_wait/4]). %%% Node wallet and address management: --export([address/0, wallet/0, wallet/1]). +-export([address/0, address/1, wallet/0, wallet/1]). -include("include/hb.hrl"). %% @doc Initialize system-wide settings for the hyperbeam node. init() -> hb_name:start(), + hb_event:setup_logger(), ?event({setting_debug_stack_depth, hb_opts:get(debug_stack_depth)}), Old = erlang:system_flag(backtrace_depth, hb_opts:get(debug_stack_depth)), ?event({old_system_stack_depth, Old}), diff --git a/src/hb_ao.erl b/src/hb_ao.erl index f26e29f28..8ab031b45 100644 --- a/src/hb_ao.erl +++ b/src/hb_ao.erl @@ -142,7 +142,7 @@ resolve(Base, Req, Opts) -> {stage, 1, prepare_multimessage_resolution, {path_parts, PathParts}} ), MessagesToExec = [ Req#{ <<"path">> => Path } || Path <- PathParts ], - ?event(ao_core, + ?event(debug_ao_core, {stage, 1, prepare_multimessage_resolution, @@ -164,7 +164,7 @@ resolve_many([ID], Opts) when ?IS_ID(ID) -> % 2. The main AO-Core logic looks for linkages between message input % pairs and outputs. With only a single ID, there is not a valid pairing % to use in looking up a cached result. - ?event(ao_core, {stage, na, resolve_directly_to_id, ID, {opts, Opts}}, Opts), + ?event(debug_ao_core, {stage, na, resolve_directly_to_id, ID, {opts, Opts}}, Opts), try {ok, ensure_message_loaded(ID, Opts)} catch _:_:_ -> {error, not_found} end; @@ -190,20 +190,20 @@ resolve_many({as, DevID, Msg}, Opts) -> resolve_many([{resolve, Subres}], Opts) -> resolve_many(Subres, Opts); resolve_many(MsgList, Opts) -> - ?event(ao_core, {resolve_many, MsgList}, Opts), + ?event(debug_ao_core, {resolve_many, MsgList}, Opts), Res = do_resolve_many(MsgList, Opts), - ?event(ao_core, {resolve_many_complete, {res, Res}, {reqs, MsgList}}, Opts), + ?event(debug_ao_core, {resolve_many_complete, {res, Res}, {reqs, MsgList}}, Opts), Res. do_resolve_many([], _Opts) -> {failure, <<"Attempted to resolve an empty message sequence.">>}; do_resolve_many([Res], Opts) -> - ?event(ao_core, {stage, 11, resolve_complete, Res}), + ?event(debug_ao_core, {stage, 11, resolve_complete, Res}), hb_cache:ensure_loaded(maybe_force_message(Res, Opts), Opts); do_resolve_many([Base, Req | MsgList], Opts) -> - ?event(ao_core, {stage, 0, resolve_many, {base, Base}, {req, Req}}), + ?event(debug_ao_core, {stage, 0, resolve_many, {base, Base}, {req, Req}}), case resolve_stage(1, Base, Req, Opts) of {ok, Res} -> - ?event(ao_core, + ?event(debug_ao_core, { stage, 13, @@ -216,19 +216,19 @@ do_resolve_many([Base, Req | MsgList], Opts) -> do_resolve_many([Res | MsgList], Opts); Res -> % The result is not a resolvable message. Return it. - ?event(ao_core, {stage, 13, resolve_many_terminating_early, Res}), + ?event(debug_ao_core, {stage, 13, resolve_many_terminating_early, Res}), maybe_force_message(Res, Opts) end. resolve_stage(1, Link, Req, Opts) when ?IS_LINK(Link) -> % If the first message is a link, we should load the message and % continue with the resolution. - ?event(ao_core, {stage, 1, resolve_base_link, {link, Link}}, Opts), + ?event(debug_ao_core, {stage, 1, resolve_base_link, {link, Link}}, Opts), resolve_stage(1, hb_cache:ensure_loaded(Link, Opts), Req, Opts); resolve_stage(1, Base, Link, Opts) when ?IS_LINK(Link) -> % If the second message is a link, we should load the message and % continue with the resolution. - ?event(ao_core, {stage, 1, resolve_req_link, {link, Link}}, Opts), + ?event(debug_ao_core, {stage, 1, resolve_req_link, {link, Link}}, Opts), resolve_stage(1, Base, hb_cache:ensure_loaded(Link, Opts), Opts); resolve_stage(1, {as, DevID, Ref}, Req, Opts) when ?IS_ID(Ref) orelse ?IS_LINK(Ref) -> % Normalize `as' requests with a raw ID or link as the path. Links will be @@ -237,27 +237,27 @@ resolve_stage(1, {as, DevID, Ref}, Req, Opts) when ?IS_ID(Ref) orelse ?IS_LINK(R resolve_stage(1, {as, DevID, Link}, Req, Opts) when ?IS_LINK(Link) -> % If the first message is an `as' with a link, we should load the message and % continue with the resolution. - ?event(ao_core, {stage, 1, resolve_base_as_link, {link, Link}}, Opts), + ?event(debug_ao_core, {stage, 1, resolve_base_as_link, {link, Link}}, Opts), resolve_stage(1, {as, DevID, hb_cache:ensure_loaded(Link, Opts)}, Req, Opts); resolve_stage(1, {as, DevID, Raw = #{ <<"path">> := ID }}, Req, Opts) when ?IS_ID(ID) -> % If the first message is an `as' with an ID, we should load the message and % apply the non-path elements of the sub-request to it. - ?event(ao_core, {stage, 1, subresolving_with_load, {dev, DevID}, {id, ID}}, Opts), + ?event(debug_ao_core, {stage, 1, subresolving_with_load, {dev, DevID}, {id, ID}}, Opts), RemBase = hb_maps:without([<<"path">>], Raw, Opts), - ?event(subresolution, {loading_message, {id, ID}, {params, RemBase}}, Opts), + ?event(debug_subresolution, {loading_message, {id, ID}, {params, RemBase}}, Opts), Baseb = ensure_message_loaded(ID, Opts), - ?event(subresolution, {loaded_message, {msg, Baseb}}, Opts), + ?event(debug_subresolution, {loaded_message, {msg, Baseb}}, Opts), Basec = hb_maps:merge(Baseb, RemBase, Opts), - ?event(subresolution, {merged_message, {msg, Basec}}, Opts), + ?event(debug_subresolution, {merged_message, {msg, Basec}}, Opts), Based = set(Basec, <<"device">>, DevID, Opts), - ?event(subresolution, {loaded_parameterized_message, {msg, Based}}, Opts), + ?event(debug_subresolution, {loaded_parameterized_message, {msg, Based}}, Opts), resolve_stage(1, Based, Req, Opts); resolve_stage(1, Raw = {as, DevID, SubReq}, Req, Opts) -> % Set the device of the message to the specified one and resolve the sub-path. % As this is the first message, we will then continue to execute the request % on the result. - ?event(ao_core, {stage, 1, subresolving_base, {dev, DevID}, {subreq, SubReq}}, Opts), - ?event(subresolution, {as, {dev, DevID}, {subreq, SubReq}, {req, Req}}), + ?event(debug_ao_core, {stage, 1, subresolving_base, {dev, DevID}, {subreq, SubReq}}, Opts), + ?event(debug_subresolution, {as, {dev, DevID}, {subreq, SubReq}, {req, Req}}), case subresolve(SubReq, DevID, SubReq, Opts) of {ok, SubRes} -> % The subresolution has returned a new message. Continue with it. @@ -276,7 +276,7 @@ resolve_stage(1, RawBase, ReqOuter = #{ <<"path">> := {as, DevID, ReqInner} }, O % Set the device to the specified `DevID' and resolve the message. Merging % the `ReqInner' into the `ReqOuter' message first. We return the result % of the sub-resolution directly. - ?event(ao_core, {stage, 1, subresolving_from_request, {dev, DevID}}, Opts), + ?event(debug_ao_core, {stage, 1, subresolving_from_request, {dev, DevID}}, Opts), LoadedInner = ensure_message_loaded(ReqInner, Opts), Req = hb_maps:merge( @@ -296,17 +296,17 @@ resolve_stage(1, RawBase, ReqOuter = #{ <<"path">> := {as, DevID, ReqInner} }, O resolve_stage(1, {resolve, Subres}, Req, Opts) -> % If the first message is a `{resolve, Subres}' tuple, we should execute it % directly, then apply the request to the result. - ?event(ao_core, {stage, 1, subresolving_base_message, {subres, Subres}}, Opts), + ?event(debug_ao_core, {stage, 1, subresolving_base_message, {subres, Subres}}, Opts), % Unlike the `request' case for pre-subresolutions, we do not need to unset % the `force_message' option, because the result should be a message, anyway. % If it is not, it is more helpful to have the message placed into the `body' % of a result, which can then be executed upon. case resolve_many(Subres, Opts) of {ok, Base} -> - ?event(ao_core, {stage, 1, subresolve_success, {new_base, Base}}, Opts), + ?event(debug_ao_core, {stage, 1, subresolve_success, {new_base, Base}}, Opts), resolve_stage(1, Base, Req, Opts); OtherRes -> - ?event(ao_core, + ?event(debug_ao_core, {stage, 1, subresolve_failed, @@ -321,7 +321,7 @@ resolve_stage(1, Base, {resolve, Subres}, Opts) -> % execute the subresolution directly to gain the underlying `Req' for % our execution. We assume that the subresolution is already in a normalized, % executable form, so we pass it to `resolve_many' for execution. - ?event(ao_core, {stage, 1, subresolving_request_message, {subres, Subres}}, Opts), + ?event(debug_ao_core, {stage, 1, subresolving_request_message, {subres, Subres}}, Opts), % We make sure to unset the `force_message' option so that if the subresolution % returns a literal, the rest of `resolve' will normalize it to a path. case resolve_many(Subres, maps:without([force_message], Opts)) of @@ -348,31 +348,31 @@ resolve_stage(1, Base, {resolve, Subres}, Opts) -> end; resolve_stage(1, Base, Req, Opts) when is_list(Base) -> % Normalize lists to numbered maps (base=1) if necessary. - ?event(ao_core, {stage, 1, list_normalize}, Opts), + ?event(debug_ao_core, {stage, 1, list_normalize}, Opts), resolve_stage(1, normalize_keys(Base, Opts), Req, Opts ); resolve_stage(1, Base, NonMapReq, Opts) when not is_map(NonMapReq) -> - ?event(ao_core, {stage, 1, path_normalize}), + ?event(debug_ao_core, {stage, 1, path_normalize}), resolve_stage(1, Base, #{ <<"path">> => NonMapReq }, Opts); resolve_stage(1, RawBase, RawReq, Opts) -> % Normalize the path to a private key containing the list of remaining % keys to resolve. - ?event(ao_core, {stage, 1, normalize}, Opts), + ?event(debug_ao_core, {stage, 1, normalize}, Opts), Base = normalize_keys(RawBase, Opts), Req = normalize_keys(RawReq, Opts), resolve_stage(2, Base, Req, Opts); resolve_stage(2, Base, Req, Opts) -> - ?event(ao_core, {stage, 2, cache_lookup}, Opts), + ?event(debug_ao_core, {stage, 2, cache_lookup}, Opts), % Lookup request in the cache. If we find a result, return it. % If we do not find a result, we continue to the next stage, % unless the cache lookup returns `halt' (the user has requested that we % only return a result if it is already in the cache). case hb_cache_control:maybe_lookup(Base, Req, Opts) of {ok, Res} -> - ?event(ao_core, {stage, 2, cache_hit, {res, Res}, {opts, Opts}}, Opts), + ?event(debug_ao_core, {stage, 2, cache_hit, {res, Res}, {opts, Opts}}, Opts), {ok, Res}; {continue, NewBase, NewReq} -> resolve_stage(3, NewBase, NewReq, Opts); @@ -381,10 +381,10 @@ resolve_stage(2, Base, Req, Opts) -> resolve_stage(3, Base, Req, Opts) when not is_map(Base) or not is_map(Req) -> % Validation check: If the messages are not maps, we cannot find a key % in them, so return not_found. - ?event(ao_core, {stage, 3, validation_check_type_error}, Opts), + ?event(debug_ao_core, {stage, 3, validation_check_type_error}, Opts), {error, not_found}; resolve_stage(3, Base, Req, Opts) -> - ?event(ao_core, {stage, 3, validation_check}, Opts), + ?event(debug_ao_core, {stage, 3, validation_check}, Opts), % Validation checks: If `paranoid_message_verification' is enabled, we should % verify the base and request messages prior to execution. hb_message:paranoid_verify( @@ -398,7 +398,7 @@ resolve_stage(3, Base, Req, Opts) -> ), resolve_stage(4, Base, Req, Opts); resolve_stage(4, Base, Req, Opts) -> - ?event(ao_core, {stage, 4, persistent_resolver_lookup}, Opts), + ?event(debug_ao_core, {stage, 4, persistent_resolver_lookup}, Opts), % Persistent-resolver lookup: Search for local (or Distributed % Erlang cluster) processes that are already performing the execution. % Before we search for a live executor, we check if the device specifies @@ -461,7 +461,7 @@ resolve_stage(4, Base, Req, Opts) -> end end. resolve_stage(5, Base, Req, ExecName, Opts) -> - ?event(ao_core, {stage, 5, device_lookup}, Opts), + ?event(debug_ao_core, {stage, 5, device_lookup}, Opts), % Device lookup: Find the Erlang function that should be utilized to % execute Req on Base. {ResolvedFunc, NewOpts} = @@ -528,7 +528,7 @@ resolve_stage(5, Base, Req, ExecName, Opts) -> end, resolve_stage(6, ResolvedFunc, Base, Req, ExecName, NewOpts). resolve_stage(6, Func, Base, Req, ExecName, Opts) -> - ?event(ao_core, {stage, 6, ExecName, execution}, Opts), + ?event(debug_ao_core, {stage, 6, ExecName, execution}, Opts), % Execution. ExecOpts = execution_opts(Opts), Args = @@ -542,7 +542,7 @@ resolve_stage(6, Func, Base, Req, ExecName, Opts) -> TruncatedArgs = hb_ao_device:truncate_args(Func, Args), MsgRes = maybe_profiled_apply(Func, TruncatedArgs, Base, Req, Opts), ?event( - ao_result, + debug_ao_result, { ao_result, {exec_name, ExecName}, @@ -597,7 +597,7 @@ resolve_stage(6, Func, Base, Req, ExecName, Opts) -> ), resolve_stage(7, Base, Req, Res, ExecName, Opts); resolve_stage(7, Base, Req, {St, Res}, ExecName, Opts = #{ on := On = #{ <<"step">> := _ }}) -> - ?event(ao_core, {stage, 7, ExecName, executing_step_hook, {on, On}}, Opts), + ?event(debug_ao_core, {stage, 7, ExecName, executing_step_hook, {on, On}}, Opts), % If the `step' hook is defined, we execute it. Note: This function clause % matches directly on the `on' key of the `Opts' map. This is in order to % remove the expensive lookup check that would otherwise be performed on every @@ -623,18 +623,18 @@ resolve_stage(7, Base, Req, {St, Res}, ExecName, Opts = #{ on := On = #{ <<"step Error end; resolve_stage(7, Base, Req, Res, ExecName, Opts) -> - ?event(ao_core, {stage, 7, ExecName, no_step_hook}, Opts), + ?event(debug_ao_core, {stage, 7, ExecName, no_step_hook}, Opts), resolve_stage(8, Base, Req, Res, ExecName, Opts); resolve_stage(8, Base, Req, {ok, {resolve, Sublist}}, ExecName, Opts) -> - ?event(ao_core, {stage, 8, ExecName, subresolve_result}, Opts), + ?event(debug_ao_core, {stage, 8, ExecName, subresolve_result}, Opts), % If the result is a `{resolve, Sublist}' tuple, we need to execute it % as a sub-resolution. resolve_stage(9, Base, Req, resolve_many(Sublist, Opts), ExecName, Opts); resolve_stage(8, Base, Req, Res, ExecName, Opts) -> - ?event(ao_core, {stage, 8, ExecName, no_subresolution_necessary}, Opts), + ?event(debug_ao_core, {stage, 8, ExecName, no_subresolution_necessary}, Opts), resolve_stage(9, Base, Req, Res, ExecName, Opts); resolve_stage(9, Base, Req, {ok, Res}, ExecName, Opts) when is_map(Res) -> - ?event(ao_core, {stage, 9, ExecName, generate_hashpath}, Opts), + ?event(debug_ao_core, {stage, 9, ExecName, generate_hashpath}, Opts), % Cryptographic linking. Now that we have generated the result, we % need to cryptographically link the output to its input via a hashpath. resolve_stage(10, Base, Req, @@ -663,7 +663,7 @@ resolve_stage(9, Base, Req, {ok, Res}, ExecName, Opts) when is_map(Res) -> Opts ); resolve_stage(9, Base, Req, {Status, Res}, ExecName, Opts) when is_map(Res) -> - ?event(ao_core, {stage, 9, ExecName, abnormal_status_reset_hashpath}, Opts), + ?event(debug_ao_core, {stage, 9, ExecName, abnormal_status_reset_hashpath}, Opts), ?event(hashpath, {resetting_hashpath_res, {base, Base}, {req, Req}, {opts, Opts}}), % Skip cryptographic linking and reset the hashpath if the result is abnormal. Priv = hb_private:from_message(Res), @@ -672,27 +672,27 @@ resolve_stage(9, Base, Req, {Status, Res}, ExecName, Opts) when is_map(Res) -> {Status, Res#{ <<"priv">> => maps:without([<<"hashpath">>], Priv) }}, ExecName, Opts); resolve_stage(9, Base, Req, Res, ExecName, Opts) -> - ?event(ao_core, {stage, 9, ExecName, non_map_result_skipping_hash_path}, Opts), + ?event(debug_ao_core, {stage, 9, ExecName, non_map_result_skipping_hash_path}, Opts), % Skip cryptographic linking and continue if we don't have a map that can have % a hashpath at all. resolve_stage(10, Base, Req, Res, ExecName, Opts); resolve_stage(10, Base, Req, {ok, Res}, ExecName, Opts) -> - ?event(ao_core, {stage, 10, ExecName, result_caching}, Opts), + ?event(debug_ao_core, {stage, 10, ExecName, result_caching}, Opts), % Result caching: Optionally, cache the result of the computation locally. hb_cache_control:maybe_store(Base, Req, Res, Opts), resolve_stage(11, Base, Req, {ok, Res}, ExecName, Opts); resolve_stage(10, Base, Req, Res, ExecName, Opts) -> - ?event(ao_core, {stage, 10, ExecName, abnormal_status_skip_caching}, Opts), + ?event(debug_ao_core, {stage, 10, ExecName, abnormal_status_skip_caching}, Opts), % Skip result caching if the result is abnormal. resolve_stage(11, Base, Req, Res, ExecName, Opts); resolve_stage(11, Base, Req, Res, ExecName, Opts) -> - ?event(ao_core, {stage, 11, ExecName}, Opts), + ?event(debug_ao_core, {stage, 11, ExecName}, Opts), % Notify processes that requested the resolution while we were executing and % unregister ourselves from the group. hb_persistent:unregister_notify(ExecName, Req, Res, Opts), resolve_stage(12, Base, Req, Res, ExecName, Opts); resolve_stage(12, _Base, _Req, {ok, Res} = Res, ExecName, Opts) -> - ?event(ao_core, {stage, 12, ExecName, maybe_spawn_worker}, Opts), + ?event(debug_ao_core, {stage, 12, ExecName, maybe_spawn_worker}, Opts), % Check if we should fork out a new worker process for the current execution case {is_map(Res), hb_opts:get(spawn_worker, false, Opts#{ prefer => local })} of {A, B} when (A == false) or (B == false) -> @@ -704,7 +704,7 @@ resolve_stage(12, _Base, _Req, {ok, Res} = Res, ExecName, Opts) -> Res end; resolve_stage(12, _Base, _Req, OtherRes, ExecName, Opts) -> - ?event(ao_core, {stage, 12, ExecName, abnormal_status_skip_spawning}, Opts), + ?event(debug_ao_core, {stage, 12, ExecName, abnormal_status_skip_spawning}, Opts), OtherRes. %% @doc Execute a sub-resolution. @@ -880,7 +880,7 @@ error_infinite(Base, Req, Opts) -> error_execution(ExecGroup, Req, Whence, {Class, Exception, Stacktrace}, Opts) -> Error = {error, Whence, {Class, Exception, Stacktrace}}, hb_persistent:unregister_notify(ExecGroup, Req, Error, Opts), - ?event(ao_core, {handle_error, Error, {opts, Opts}}, Opts), + ?event(debug_ao_core, {handle_error, Error, {opts, Opts}}, Opts), case hb_opts:get(error_strategy, throw, Opts) of throw -> erlang:raise(Class, Exception, Stacktrace); _ -> Error diff --git a/src/hb_ao_device.erl b/src/hb_ao_device.erl index 22ed65f5c..ad4f40522 100644 --- a/src/hb_ao_device.erl +++ b/src/hb_ao_device.erl @@ -249,8 +249,9 @@ maybe_normalize_device_key(Key, Mode) -> %% a tuple of the form {error, Reason} is returned. load(Map, _Opts) when is_map(Map) -> {ok, Map}; load(ID, _Opts) when is_atom(ID) -> - try ID:module_info(), {ok, ID} - catch _:_ -> {error, not_loadable} + case code:ensure_loaded(ID) of + {module, ID} -> {ok, ID}; + {error, _} -> {error, not_loadable} end; load(ID, Opts) when ?IS_ID(ID) -> ?event(device_load, {requested_load, {id, ID}}, Opts), @@ -439,11 +440,11 @@ do_is_direct_key_access(error, Key, Opts) -> do_is_direct_key_access(<<"message@1.0">>, Key, _Opts) -> not lists:member(Key, ?MESSAGE_KEYS); do_is_direct_key_access(Dev, NormKey, Opts) -> - ?event(read_cached, {calculating_info, {device, Dev}}), + ?event(debug_read_cached, {calculating_info, {device, Dev}}), case info(#{ <<"device">> => Dev}, Opts) of Info = #{ exports := Exports } when not is_map_key(handler, Info) andalso not is_map_key(default, Info) -> - ?event(read_cached, + ?event(debug_read_cached, {exports, {device, Dev}, {key, NormKey}, diff --git a/src/hb_ao_test_vectors.erl b/src/hb_ao_test_vectors.erl index a9677790a..dae90fff7 100644 --- a/src/hb_ao_test_vectors.erl +++ b/src/hb_ao_test_vectors.erl @@ -841,6 +841,10 @@ load_as_test(Opts) -> <<"device">> => <<"test-device@1.0">>, <<"test_func">> => #{ <<"test_key">> => <<"MESSAGE">> } }, + % There is a race condition where we write to the store and a + % reset happens making not read the written value. + % Lower this number can still produce flaky test + timer:sleep(100), {ok, ID} = hb_cache:write(Msg, Opts), {ok, ReadMsg} = hb_cache:read(ID, Opts), ?assert(hb_message:match(Msg, ReadMsg, primary, Opts)), @@ -969,7 +973,7 @@ step_hook_test(InitOpts) -> #{ <<"step">> => fun(_, Req, _) -> - ?event(ao_core, {step_hook, {self(), Ref}}), + ?event(debug_ao_core, {step_hook, {self(), Ref}}), Self ! {step, Ref}, {ok, Req} end @@ -1149,4 +1153,4 @@ benchmark_set_multiple_deep_test(Opts) -> <<"Set two keys operations:">>, ?BENCHMARK_ITERATIONS, Time - ). \ No newline at end of file + ). diff --git a/src/hb_cache.erl b/src/hb_cache.erl index 2968e7792..6980bf4f2 100644 --- a/src/hb_cache.erl +++ b/src/hb_cache.erl @@ -123,7 +123,7 @@ ensure_loaded(Ref, Link = {link, ID, LinkOpts = #{ <<"lazy">> := true }}, RawOpt end, case CacheReadResult of {ok, LoadedMsg} -> - ?event(caching, + ?event(debug_caching, {lazy_loaded, {link, ID}, {msg, LoadedMsg}, diff --git a/src/hb_client.erl b/src/hb_client.erl index 8cf60734a..e1b37e62b 100644 --- a/src/hb_client.erl +++ b/src/hb_client.erl @@ -112,14 +112,14 @@ upload(Msg, Opts, <<"ans104@1.0">>) when is_binary(Msg) -> upload(Msg, Opts, <<"ans104@1.0">>) when is_map(Msg) -> ?event({uploading_item, Msg}), dev_arweave:post_tx( - #{ <<"device">> => <<"arweave@2.9-pre">> }, + #{ <<"device">> => <<"arweave@2.9">> }, Msg, Opts, <<"ans104@1.0">> ); upload(Msg, Opts, <<"tx@1.0">>) when is_map(Msg) -> dev_arweave:post_tx( - #{ <<"device">> => <<"arweave@2.9-pre">> }, + #{ <<"device">> => <<"arweave@2.9">> }, Msg, Opts, <<"tx@1.0">> diff --git a/src/hb_ecdsa_tests.erl b/src/hb_ecdsa_tests.erl new file mode 100644 index 000000000..fb260d174 --- /dev/null +++ b/src/hb_ecdsa_tests.erl @@ -0,0 +1,662 @@ +-module(hb_ecdsa_tests). + +-include("include/ar.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +%% @doc secp256k1 curve order (n). +-define(SECP256K1_ORDER, 16#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141). + + +%%%=================================================================== +%%% Bitcoin/Ethereum Interoperability Tests +%%%=================================================================== + +%% @doc Test go-ethereum ecrecover reference vector. +%% https://github.com/ethereum/go-ethereum/blob/0cba803fbafb12e9daaea53b76de847842ab3055/crypto/secp256k1/secp256_test.go#L208 +nif_ecrecover_geth_vector_test() -> + Digest = binary:decode_hex(<<"ce0677bb30baa8cf067c88db9811f4333d131bf8bcf12fe7065d211dce971008">>), + Sig = binary:decode_hex(<<"90f27b8b488db00b00606796d2987f6a5f59ae62ea05effe84fef5b8b0e549984a691139ad57a3f0b906637673aa2f63d1f55cb1a69199d4009eea23ceaddc9301">>), + ExpectedPub = binary:decode_hex(<<"02e32df42865e97135acfb65f3bae71bdc86f4d49150ad6a440b6f15878109880a">>), + {ok, true, RecoveredPub} = secp256k1_nif:recover_pk_and_verify(Digest, Sig), + ?assertEqual(ExpectedPub, RecoveredPub). + +recovery_id_in_valid_range_test() -> + Wallet = ar_wallet:new_ecdsa(), + Msg = <<"recovery ID test">>, + Sig = ar_wallet:sign(Wallet, Msg), + <<_CompactSig:64/binary, RecId:8>> = Sig, + ?assert(lists:member(RecId, [0, 1, 2, 3])). + +signature_has_low_s_test() -> + Wallet = ar_wallet:new_ecdsa(), + %% Sign multiple messages and verify all have low-S + Messages = [<<"msg1">>, <<"msg2">>, <<"msg3">>, <<"msg4">>, <<"msg5">>], + lists:foreach( + fun(Msg) -> + Sig = ar_wallet:sign(Wallet, Msg), + S = extract_s_value(Sig), + SInt = binary:decode_unsigned(S, big), + HalfOrder = ?SECP256K1_ORDER div 2, + ?assert(SInt =< HalfOrder, "Signature must have low-S") + end, + Messages + ). + +%% @doc Test bitcoin-core edge case: (r=4, s=4) recoverable with all 4 recids. +bitcoin_core_r4_s4_all_recids_test() -> + %% Create signature with r=4, s=4 + R = pad_to_32_bytes(<<4:32>>), + S = pad_to_32_bytes(<<4:32>>), + Msg = <<"This is a very secret message...">>, + %% Test all 4 recovery IDs; at least one should recover a pubkey. + HasSuccessfulRecovery = + lists:any( + fun(RecId) -> + Sig = <>, + case ar_wallet:recover_key(Msg, Sig, ?ECDSA_KEY_TYPE) of + PubKey when byte_size(PubKey) =:= 33 -> true; + <<>> -> false + end + end, + [0, 1, 2, 3] + ), + ?assertEqual(true, HasSuccessfulRecovery). + +%% @doc Test bitcoin-core edge case: (r=1, s=1) with recid=0 succeeds. +bitcoin_core_r1_s1_recid0_test() -> + R = pad_to_32_bytes(<<1:32>>), + S = pad_to_32_bytes(<<1:32>>), + Msg = <<"test">>, + Sig = <>, + Result = ar_wallet:recover_key(Msg, Sig, ?ECDSA_KEY_TYPE), + %% May recover a compressed pubkey or fail with empty key, but never other shapes. + ?assert( + (Result =:= <<>>) orelse (byte_size(Result) =:= 33) + ). + +invalid_recid_4_fails_test() -> + Wallet = ar_wallet:new_ecdsa(), + Msg = <<"invalid recid test">>, + Sig = ar_wallet:sign(Wallet, Msg), + <> = Sig, + BadRecidSig = <>, + ?assertEqual(<<>>, ar_wallet:recover_key(Msg, BadRecidSig, ?ECDSA_KEY_TYPE)). + +invalid_recid_255_rejected_test() -> + Wallet = ar_wallet:new_ecdsa(), + Msg = <<"invalid recid 255 test">>, + Sig = ar_wallet:sign(Wallet, Msg), + <> = Sig, + BadRecidSig = <>, + ?assertEqual(<<>>, ar_wallet:recover_key(Msg, BadRecidSig, ?ECDSA_KEY_TYPE)). + +high_s_signature_rejected_test() -> + Wallet = {{_KeyType, _Priv, Pub}, _} = ar_wallet:new_ecdsa(), + Msg = <<"malleability test">>, + Sig = ar_wallet:sign(Wallet, Msg), + %% Verify original signature works + OriginalPub = ar_wallet:recover_key(Msg, Sig, ?ECDSA_KEY_TYPE), + ?assertEqual(Pub, OriginalPub), + %% Create high-S version + HighSSig = create_high_s_signature(Sig), + %% Wallet-level recover path should reject high-S signatures. + ?assertEqual(<<>>, ar_wallet:recover_key(Msg, HighSSig, ?ECDSA_KEY_TYPE)). + +%%%=================================================================== +%%% RFC 6979 Validation Tests +%%%=================================================================== + +%% @doc Test deterministic signing with known private key (key=1). +nif_sign_known_key_test() -> + PrivKey = <<0:248, 1:8>>, % Private key = 1 + Digest = <<0:256>>, % All-zero digest + {ok, Sig} = secp256k1_nif:sign_recoverable(Digest, PrivKey), + ?assertEqual(65, byte_size(Sig)), + %% Verify round-trip recovery + {ok, true, RecoveredPub} = secp256k1_nif:recover_pk_and_verify(Digest, Sig), + %% Generate expected pubkey from private key + Wallet = new_ecdsa_wallet_from_privkey(PrivKey), + {{_KeyType, _Priv, ExpectedPub}, _} = Wallet, + ?assertEqual(ExpectedPub, RecoveredPub). + +signing_is_deterministic_test() -> + Wallet = ar_wallet:new_ecdsa(), + Msg = <<"deterministic test message">>, + Sig1 = ar_wallet:sign(Wallet, Msg), + Sig2 = ar_wallet:sign(Wallet, Msg), + ?assertEqual(Sig1, Sig2). + +privkey_zero_rejected_test() -> + ZeroKey = <<0:256>>, + Digest = <<0:256>>, + Result = secp256k1_nif:sign_recoverable(Digest, ZeroKey), + ?assertMatch({error, _}, Result). + +privkey_curve_order_rejected_test() -> + Order = ?SECP256K1_ORDER, + OrderBin = binary:encode_unsigned(Order, big), + OrderPadded = pad_to_32_bytes(OrderBin), + Digest = <<0:256>>, + Result = secp256k1_nif:sign_recoverable(Digest, OrderPadded), + ?assertMatch({error, _}, Result). + +privkey_above_curve_order_rejected_test() -> + Order = ?SECP256K1_ORDER, + OrderPlusOne = Order + 1, + OrderPlusOneBin = binary:encode_unsigned(OrderPlusOne, big), + OrderPlusOnePadded = pad_to_32_bytes(OrderPlusOneBin), + Digest = <<0:256>>, + Result = secp256k1_nif:sign_recoverable(Digest, OrderPlusOnePadded), + ?assertMatch({error, _}, Result). + +privkey_max_valid_succeeds_test() -> + Order = ?SECP256K1_ORDER, + MaxKey = Order - 1, + MaxKeyBin = binary:encode_unsigned(MaxKey, big), + MaxKeyPadded = pad_to_32_bytes(MaxKeyBin), + Digest = <<0:256>>, + {ok, Sig} = secp256k1_nif:sign_recoverable(Digest, MaxKeyPadded), + ?assertEqual(65, byte_size(Sig)). + +privkey_min_valid_succeeds_test() -> + MinKey = <<0:248, 1:8>>, % Key = 1 + Digest = <<0:256>>, + {ok, Sig} = secp256k1_nif:sign_recoverable(Digest, MinKey), + ?assertEqual(65, byte_size(Sig)). + +%%%=================================================================== +%%% Standards Conformance Tests (FIPS/NIST/SEC1) +%%%=================================================================== + +%% @doc Test sign and recover roundtrip with known key. +nif_sign_recover_roundtrip_test() -> + PrivKey = <<0:248, 1:8>>, % Private key = 1 + Digest = crypto:hash(sha256, <<"known message">>), + {ok, Sig} = secp256k1_nif:sign_recoverable(Digest, PrivKey), + {ok, true, RecoveredPub} = secp256k1_nif:recover_pk_and_verify(Digest, Sig), + Wallet = new_ecdsa_wallet_from_privkey(PrivKey), + {{_KeyType, _Priv, ExpectedPub}, _} = Wallet, + ?assertEqual(ExpectedPub, RecoveredPub). + +empty_message_signs_successfully_test() -> + Wallet = ar_wallet:new_ecdsa(), + Msg = <<>>, + Sig = ar_wallet:sign(Wallet, Msg), + ?assertEqual(65, byte_size(Sig)). + +recovers_correct_pubkey_test() -> + Wallet = {{_KeyType, _Priv, Pub}, _} = ar_wallet:new_ecdsa(), + Msg = <<"recovery test message">>, + Sig = ar_wallet:sign(Wallet, Msg), + RecoveredPub = ar_wallet:recover_key(Msg, Sig, ?ECDSA_KEY_TYPE), + ?assertEqual(Pub, RecoveredPub). + +empty_message_recovers_correctly_test() -> + Wallet = {{_KeyType, _Priv, Pub}, _} = ar_wallet:new_ecdsa(), + Msg = <<>>, + Sig = ar_wallet:sign(Wallet, Msg), + RecoveredPub = ar_wallet:recover_key(Msg, Sig, ?ECDSA_KEY_TYPE), + ?assertEqual(Pub, RecoveredPub). + +all_zero_digest_test() -> + {{_KeyType, Priv, Pub}, _} = ar_wallet:new_ecdsa(), + Digest = <<0:256>>, + {ok, Sig} = secp256k1_nif:sign_recoverable(Digest, Priv), + {ok, true, RecoveredPub} = secp256k1_nif:recover_pk_and_verify(Digest, Sig), + ?assertEqual(Pub, RecoveredPub). + +all_ones_digest_test() -> + {{_KeyType, Priv, Pub}, _} = ar_wallet:new_ecdsa(), + Digest = <<16#FF:(32*8)>>, + {ok, Sig} = secp256k1_nif:sign_recoverable(Digest, Priv), + {ok, true, RecoveredPub} = secp256k1_nif:recover_pk_and_verify(Digest, Sig), + ?assertEqual(Pub, RecoveredPub). + +digest_equals_curve_order_test() -> + {{_KeyType, Priv, Pub}, _} = ar_wallet:new_ecdsa(), + Order = ?SECP256K1_ORDER, + OrderBin = binary:encode_unsigned(Order, big), + Digest = pad_to_32_bytes(OrderBin), + {ok, Sig} = secp256k1_nif:sign_recoverable(Digest, Priv), + {ok, true, RecoveredPub} = secp256k1_nif:recover_pk_and_verify(Digest, Sig), + ?assertEqual(Pub, RecoveredPub). + +%%%=================================================================== +%%% Adversarial Robustness Tests +%%%=================================================================== +wrong_message_recovers_different_key_test() -> + Wallet = {{_KeyType, _Priv, Pub}, _} = ar_wallet:new_ecdsa(), + Msg = <<"original message">>, + BadMsg = <<"wrong message">>, + Sig = ar_wallet:sign(Wallet, Msg), + RecoveredPub = ar_wallet:recover_key(BadMsg, Sig, ?ECDSA_KEY_TYPE), + ?assertNotEqual(Pub, RecoveredPub). + +corrupted_signature_byte_flip_test() -> + Wallet = {{_KeyType, _Priv, Pub}, _} = ar_wallet:new_ecdsa(), + Msg = <<"flipped bit test">>, + Sig = ar_wallet:sign(Wallet, Msg), + CorruptedSig = corrupt_signature(Sig, 10), + Result = ar_wallet:recover_key(Msg, CorruptedSig, ?ECDSA_KEY_TYPE), + ?assertNotEqual(Pub, Result). + +zeroed_r_value_fails_test() -> + Wallet = {{_KeyType, _Priv, Pub}, _} = ar_wallet:new_ecdsa(), + Msg = <<"zeroed r test">>, + Sig = ar_wallet:sign(Wallet, Msg), + CorruptedSig = zero_signature_range(Sig, 0, 32), + Result = ar_wallet:recover_key(Msg, CorruptedSig, ?ECDSA_KEY_TYPE), + ?assertNotEqual(Pub, Result). + +zeroed_s_value_fails_test() -> + Wallet = {{_KeyType, _Priv, Pub}, _} = ar_wallet:new_ecdsa(), + Msg = <<"zeroed s test">>, + Sig = ar_wallet:sign(Wallet, Msg), + CorruptedSig = zero_signature_range(Sig, 32, 32), + Result = ar_wallet:recover_key(Msg, CorruptedSig, ?ECDSA_KEY_TYPE), + ?assertNotEqual(Pub, Result). + +truncated_signature_fails_test() -> + Wallet = ar_wallet:new_ecdsa(), + Msg = <<"truncated sig test">>, + Sig = ar_wallet:sign(Wallet, Msg), + <> = Sig, + ?assertError(badarg, ar_wallet:recover_key(Msg, TruncatedSig, ?ECDSA_KEY_TYPE)). + +extended_signature_fails_test() -> + Wallet = ar_wallet:new_ecdsa(), + Msg = <<"extended sig test">>, + Sig = ar_wallet:sign(Wallet, Msg), + ExtendedSig = <>, + ?assertError(badarg, ar_wallet:recover_key(Msg, ExtendedSig, ?ECDSA_KEY_TYPE)). + +verify_rejects_wrong_signature_test() -> + Wallet = {{KeyType, _Priv, Pub}, _} = ar_wallet:new_ecdsa(), + TestData = <<"wrong sig test">>, + Signature = ar_wallet:sign(Wallet, TestData), + CorruptedSig = corrupt_signature(Signature, 0), + false = ar_wallet:verify({KeyType, Pub}, TestData, CorruptedSig). + +verify_rejects_wrong_data_test() -> + Wallet = {{KeyType, _Priv, Pub}, _} = ar_wallet:new_ecdsa(), + TestData = <<"original data">>, + WrongData = <<"wrong data">>, + Signature = ar_wallet:sign(Wallet, TestData), + false = ar_wallet:verify({KeyType, Pub}, WrongData, Signature). + +verify_rejects_wrong_pubkey_test() -> + Wallet1 = ar_wallet:new_ecdsa(), + {{KeyType2, _Priv2, Pub2}, _} = ar_wallet:new_ecdsa(), + TestData = <<"wrong pubkey test">>, + Signature = ar_wallet:sign(Wallet1, TestData), + false = ar_wallet:verify({KeyType2, Pub2}, TestData, Signature). + +verify_rejects_corrupted_signature_test() -> + Wallet = ar_wallet:new_ecdsa(), + TX = make_signed_ecdsa_tx(Wallet), + CorruptedSig = corrupt_signature(TX#tx.signature, 0), + TXCorrupted = TX#tx{signature = CorruptedSig}, + ?assertEqual(false, ar_tx:verify(TXCorrupted)). + +verify_rejects_modified_data_test() -> + Wallet = ar_wallet:new_ecdsa(), + TX = make_signed_ecdsa_tx(Wallet), + TXModified = TX#tx{quantity = TX#tx.quantity + 1}, + ?assertEqual(false, ar_tx:verify(TXModified)). + +verify_rejects_wrong_owner_test() -> + Wallet1 = ar_wallet:new_ecdsa(), + Wallet2 = ar_wallet:new_ecdsa(), + TX = make_signed_ecdsa_tx(Wallet1), + {{_KeyType, _Priv2, Pub2}, _} = Wallet2, + TXWrongOwner = TX#tx{owner = Pub2}, + ?assertEqual(false, ar_tx:verify(TXWrongOwner)). + +cannot_forge_with_random_bytes_test() -> + {{KeyType, _Priv, Pub}, _} = ar_wallet:new_ecdsa(), + TX = make_ecdsa_tx(), + TX2 = TX#tx{ + owner = Pub, + signature_type = KeyType, + owner_address = ar_wallet:to_address(Pub, KeyType) + }, + RandomSig = crypto:strong_rand_bytes(65), + TX3 = TX2#tx{signature = RandomSig}, + ?assertEqual(false, ar_tx:verify(TX3)). + +cannot_substitute_cross_tx_signature_test() -> + Wallet = ar_wallet:new_ecdsa(), + TX1 = make_signed_ecdsa_tx(Wallet), + TX2 = make_ecdsa_tx(), + TX2WithSig = TX2#tx{ + owner = TX1#tx.owner, + signature = TX1#tx.signature, + signature_type = TX1#tx.signature_type + }, + ?assertEqual(false, ar_tx:verify(TX2WithSig)). + +cannot_replay_with_different_anchor_test() -> + Wallet = ar_wallet:new_ecdsa(), + TX = make_signed_ecdsa_tx(Wallet), + TXReplay = TX#tx{anchor = crypto:strong_rand_bytes(32)}, + ?assertEqual(false, ar_tx:verify(TXReplay)). + +signature_commits_to_all_fields_test() -> + Wallet = ar_wallet:new_ecdsa(), + TX = make_signed_ecdsa_tx(Wallet), + TX1 = TX#tx{target = crypto:strong_rand_bytes(32)}, + ?assertEqual(false, ar_tx:verify(TX1)), + TX2 = TX#tx{quantity = TX#tx.quantity + 1}, + ?assertEqual(false, ar_tx:verify(TX2)), + TX3 = TX#tx{reward = TX#tx.reward + 1}, + ?assertEqual(false, ar_tx:verify(TX3)), + TX4 = TX#tx{anchor = crypto:strong_rand_bytes(32)}, + ?assertEqual(false, ar_tx:verify(TX4)), + TX5 = TX#tx{data_size = TX#tx.data_size + 1}, + ?assertEqual(false, ar_tx:verify(TX5)), + TX6 = TX#tx{data_root = crypto:strong_rand_bytes(32)}, + ?assertEqual(false, ar_tx:verify(TX6)). + +%%%=================================================================== +%%% ar_wallet Tests +%%%=================================================================== + +wallet_new_shape_contract_test() -> + Wallet = ar_wallet:new_ecdsa(), + {{KeyType1, _Priv, Pub1}, {KeyType2, Pub2}} = Wallet, + ?assertEqual(KeyType1, {?ECDSA_SIGN_ALG, secp256k1}), + ?assertEqual(KeyType2, {?ECDSA_SIGN_ALG, secp256k1}), + ?assertEqual(Pub1, Pub2). + +different_keys_produce_different_pubkeys_test() -> + Wallet1 = ar_wallet:new_ecdsa(), + Wallet2 = ar_wallet:new_ecdsa(), + {{_KeyType1, _Priv1, Pub1}, _} = Wallet1, + {{_KeyType2, _Priv2, Pub2}, _} = Wallet2, + ?assertNotEqual(Pub1, Pub2). + +valid_key_produces_33byte_compressed_pubkey_test() -> + {{_KeyType, _Priv, Pub}, _} = ar_wallet:new_ecdsa(), + ?assertEqual(33, byte_size(Pub)). + +valid_key_produces_32byte_privkey_test() -> + {{_KeyType, Priv, _Pub}, _} = ar_wallet:new_ecdsa(), + ?assertEqual(32, byte_size(Priv)). + +signature_is_65_bytes_test() -> + Wallet = ar_wallet:new_ecdsa(), + Msg = <<"test message">>, + Sig = ar_wallet:sign(Wallet, Msg), + ?assertEqual(65, byte_size(Sig)). + +%% @doc Verify Erlang wrapper correctly hashes messages before calling NIF. +wrapper_sha256_correctness_test() -> + PrivKey = <<0:248, 1:8>>, + Msg = <<"test message">>, + %% Call wrapper (hashes internally) + Wallet = new_ecdsa_wallet_from_privkey(PrivKey), + Sig1 = ar_wallet:sign(Wallet, Msg), + %% Call NIF directly with pre-hashed message + Digest = crypto:hash(sha256, Msg), + {ok, Sig2} = secp256k1_nif:sign_recoverable(Digest, PrivKey), + ?assertEqual(Sig1, Sig2). + +wrong_digest_size_rejected_test() -> + PrivKey = <<0:248, 1:8>>, + Digest31 = <<0:(31*8)>>, + Digest33 = <<0:(33*8)>>, + ?assertError(_, secp256k1_nif:sign_recoverable(Digest31, PrivKey)), + ?assertError(_, secp256k1_nif:sign_recoverable(Digest33, PrivKey)). + +wrong_signature_size_rejected_test() -> + Digest = <<0:256>>, + Sig64 = <<0:(64*8)>>, + Sig66 = <<0:(66*8)>>, + ?assertError(_, secp256k1_nif:recover_pk_and_verify(Digest, Sig64)), + ?assertError(_, secp256k1_nif:recover_pk_and_verify(Digest, Sig66)). + +empty_signature_fails_test() -> + Data = <<"empty sig test">>, + EmptyPub = ar_wallet:recover_key(Data, <<>>, ?ECDSA_KEY_TYPE), + ?assertEqual(<<>>, EmptyPub). + +sign_verify_roundtrip_test() -> + Wallet = {{KeyType, _Priv, Pub}, _} = ar_wallet:new_ecdsa(), + TestData = <<"wallet sign verify test">>, + Signature = ar_wallet:sign(Wallet, TestData), + true = ar_wallet:verify({KeyType, Pub}, TestData, Signature). + +address_is_sha256_of_pubkey_test() -> + {{KeyType, _Priv, Pub}, _} = ar_wallet:new_ecdsa(), + Address = ar_wallet:to_address(Pub, KeyType), + ExpectedAddress = crypto:hash(sha256, Pub), + ?assertEqual(ExpectedAddress, Address), + %% Verify it's not Keccak (Ethereum-style) + ?assertNotEqual(hb_keccak:key_to_ethereum_address(Pub), Address). + +address_is_32_bytes_test() -> + {{KeyType, _Priv, Pub}, _} = ar_wallet:new_ecdsa(), + Address = ar_wallet:to_address(Pub, KeyType), + ?assertEqual(32, byte_size(Address)). + +different_keys_different_addresses_test() -> + Wallet1 = ar_wallet:new_ecdsa(), + Wallet2 = ar_wallet:new_ecdsa(), + {{KeyType, _Priv1, Pub1}, _} = Wallet1, + {{KeyType, _Priv2, Pub2}, _} = Wallet2, + Address1 = ar_wallet:to_address(Pub1, KeyType), + Address2 = ar_wallet:to_address(Pub2, KeyType), + ?assertNotEqual(Address1, Address2). + +recover_key_returns_correct_pubkey_test() -> + Wallet = {{_KeyType, _Priv, Pub}, _} = ar_wallet:new_ecdsa(), + Data = <<"recover key test">>, + Signature = ar_wallet:sign(Wallet, Data), + RecoveredPub = ar_wallet:recover_key(Data, Signature, ?ECDSA_KEY_TYPE), + ?assertEqual(Pub, RecoveredPub). + +recover_key_empty_signature_returns_empty_test() -> + Data = <<"empty sig test">>, + EmptyPub = ar_wallet:recover_key(Data, <<>>, ?ECDSA_KEY_TYPE), + ?assertEqual(<<>>, EmptyPub). + +recover_key_wrong_message_returns_different_key_test() -> + Wallet = {{_KeyType, _Priv, Pub}, _} = ar_wallet:new_ecdsa(), + Data = <<"original message">>, + WrongData = <<"wrong message">>, + Signature = ar_wallet:sign(Wallet, Data), + RecoveredPub = ar_wallet:recover_key(WrongData, Signature, ?ECDSA_KEY_TYPE), + ?assertNotEqual(Pub, RecoveredPub). + +%%%=================================================================== +%%% ar_tx Tests +%%%=================================================================== +create_and_sign_tx_test() -> + Wallet = ar_wallet:new_ecdsa(), + TX = make_signed_ecdsa_tx(Wallet), + ?assertEqual(?ECDSA_KEY_TYPE, TX#tx.signature_type), + ?assertEqual(65, byte_size(TX#tx.signature)), + ?assertEqual(33, byte_size(TX#tx.owner)). + +verify_valid_tx_test() -> + Wallet = ar_wallet:new_ecdsa(), + TX = make_signed_ecdsa_tx(Wallet), + ?assertEqual(true, ar_tx:verify(TX)). + +json_roundtrip_with_owner_recovery_test() -> + Wallet = ar_wallet:new_ecdsa(), + {{KeyType, _Priv, Pub}, _} = Wallet, + TX = make_signed_ecdsa_tx(Wallet), + ExpectedAddress = ar_wallet:to_address(Pub, KeyType), + JSON = ar_tx:tx_to_json_struct(TX), + JSONEmptyOwner = JSON#{<<"owner">> => <<>>}, + ParsedTX = ar_tx:json_struct_to_tx(JSONEmptyOwner), + ?assertEqual(Pub, ParsedTX#tx.owner), + ?assertEqual(ExpectedAddress, ParsedTX#tx.owner_address). + +rsa_and_ecdsa_coexist_independently_test() -> + EcdsaWallet = ar_wallet:new_ecdsa(), + RsaWallet = {{RsaKeyType, _RsaPriv, _RsaPub}, _} = ar_wallet:new(), + EcdsaTX = make_signed_ecdsa_tx(EcdsaWallet), + ?assertEqual(true, ar_tx:verify(EcdsaTX)), + RsaTX = make_ecdsa_tx(), + RsaTX2 = ar_tx:sign(RsaTX, RsaWallet), + ?assertEqual(true, ar_tx:verify(RsaTX2)), + {{EcdsaKeyType, _, _}, _} = EcdsaWallet, + ?assertEqual(EcdsaKeyType, EcdsaTX#tx.signature_type), + ?assertEqual(RsaKeyType, RsaTX2#tx.signature_type). + +full_sign_json_recover_verify_roundtrip_test() -> + Wallet = ar_wallet:new_ecdsa(), + TX = make_signed_ecdsa_tx(Wallet), + ?assertEqual(true, ar_tx:verify(TX)), + JSON = ar_tx:tx_to_json_struct(TX), + JSONEmptyOwner = JSON#{<<"owner">> => <<>>}, + ParsedTX = ar_tx:json_struct_to_tx(JSONEmptyOwner), + ?assertEqual(true, ar_tx:verify(ParsedTX)). + +sig_segment_excludes_owner_for_ecdsa_test() -> + {{EcdsaKeyType1, _EcdsaPriv1, EcdsaPub1}, _} = ar_wallet:new_ecdsa(), + {{EcdsaKeyType2, _EcdsaPriv2, EcdsaPub2}, _} = ar_wallet:new_ecdsa(), + BaseTX = #tx{ + format = 2, + target = <<0:256>>, + quantity = 100, + reward = 10, + anchor = <<0:256>>, + tags = [], + data = <<>>, + data_size = 0, + data_root = <<>> + }, + TX1 = BaseTX#tx{ + owner = EcdsaPub1, + signature_type = EcdsaKeyType1, + owner_address = ar_wallet:to_address(EcdsaPub1, EcdsaKeyType1) + }, + TX2 = BaseTX#tx{ + owner = EcdsaPub2, + signature_type = EcdsaKeyType2, + owner_address = ar_wallet:to_address(EcdsaPub2, EcdsaKeyType2) + }, + Segment1 = ar_tx:generate_signature_data_segment(TX1), + Segment2 = ar_tx:generate_signature_data_segment(TX2), + ?assertEqual(Segment1, Segment2), + ?assert(is_binary(Segment1)), + ?assert(byte_size(Segment1) > 0). + +sig_segment_includes_owner_for_rsa_test() -> + {{RsaKeyType1, _RsaPriv1, RsaPub1}, _} = ar_wallet:new(), + {{RsaKeyType2, _RsaPriv2, RsaPub2}, _} = ar_wallet:new(), + BaseTX = #tx{ + format = 2, + target = <<0:256>>, + quantity = 100, + reward = 10, + anchor = <<0:256>>, + tags = [], + data = <<>>, + data_size = 0, + data_root = <<>> + }, + TX1 = BaseTX#tx{ + owner = RsaPub1, + signature_type = RsaKeyType1, + owner_address = ar_wallet:to_address(RsaPub1, RsaKeyType1) + }, + TX2 = BaseTX#tx{ + owner = RsaPub2, + signature_type = RsaKeyType2, + owner_address = ar_wallet:to_address(RsaPub2, RsaKeyType2) + }, + Segment1 = ar_tx:generate_signature_data_segment(TX1), + Segment2 = ar_tx:generate_signature_data_segment(TX2), + ?assertNotEqual(Segment1, Segment2), + ?assert(is_binary(Segment1)), + ?assert(is_binary(Segment2)), + ?assert(byte_size(Segment1) > 0), + ?assert(byte_size(Segment2) > 0). + +%%%=================================================================== +%%% Test Helper Functions +%%%=================================================================== + +%% @doc Generate an ECDSA keypair from a known private key. +new_ecdsa_wallet_from_privkey(PrivKey) when byte_size(PrivKey) =:= 32 -> + {OrigPub, _} = crypto:generate_key(ecdh, secp256k1, PrivKey), + CompressedPub = ar_wallet:compress_ecdsa_pubkey(OrigPub), + KeyType = {?ECDSA_SIGN_ALG, secp256k1}, + {{KeyType, PrivKey, CompressedPub}, {KeyType, CompressedPub}}. + +%% @doc Create a simple ECDSA transaction (unsigned). +make_ecdsa_tx() -> + #tx{ + format = 2, + target = crypto:strong_rand_bytes(32), + quantity = 100, + reward = 10, + anchor = crypto:strong_rand_bytes(32), + tags = [], + data = <<>>, + data_size = 0, + data_root = <<>> + }. + +%% @doc Create and sign an ECDSA transaction using the NIF directly. +make_signed_ecdsa_tx({{KeyType, Priv, Pub}, {KeyType, Pub}}) -> + TX = make_ecdsa_tx(), + TX2 = TX#tx{ + owner = Pub, + signature_type = KeyType, + owner_address = ar_wallet:to_address(Pub, KeyType) + }, + SignatureDataSegment = ar_tx:generate_signature_data_segment(TX2), + Signature = ar_wallet:sign({{KeyType, Priv, Pub}, {KeyType, Pub}}, SignatureDataSegment), + TX3 = TX2#tx{signature = Signature}, + TX3#tx{ + id = ar_tx:id(TX3, signed) + }; +make_signed_ecdsa_tx(Wallet) when is_tuple(Wallet) -> + case Wallet of + {{KeyType, Priv, Pub}, {KeyType, Pub}} -> + make_signed_ecdsa_tx({{KeyType, Priv, Pub}, {KeyType, Pub}}); + {Priv, {KeyType, Pub}} -> + make_signed_ecdsa_tx({{KeyType, Priv, Pub}, {KeyType, Pub}}) + end. + +%% @doc Corrupt a signature by flipping a bit at a specific byte position. +corrupt_signature(Sig, BytePos) when BytePos < byte_size(Sig) -> + <> = Sig, + <>; +corrupt_signature(Sig, _BytePos) -> + Sig. + +%% @doc Zero out a range of bytes in a signature. +zero_signature_range(Sig, Start, Length) -> + Size = byte_size(Sig), + End = min(Start + Length, Size), + <> = Sig, + Zeros = binary:copy(<<0>>, End - Start), + <>. + +%% @doc Extract s value from signature (bytes 32-63). +extract_s_value(Sig) when byte_size(Sig) >= 64 -> + <<_R:32/binary, S:32/binary, _RecId/binary>> = Sig, + S. + +%% @doc Create high-S signature from low-S signature. +%% Computes s_high = n - s_low and replaces s in signature. +create_high_s_signature(Sig) when byte_size(Sig) =:= 65 -> + <> = Sig, + SInt = binary:decode_unsigned(S, big), + SHigh = ?SECP256K1_ORDER - SInt, + SHighBin = binary:encode_unsigned(SHigh, big), + %% Pad to 32 bytes if needed + SHighPadded = pad_to_32_bytes(SHighBin), + <>. + +pad_to_32_bytes(Bin) when byte_size(Bin) =:= 32 -> + Bin; +pad_to_32_bytes(Bin) when byte_size(Bin) < 32 -> + Padding = binary:copy(<<0>>, 32 - byte_size(Bin)), + <>. \ No newline at end of file diff --git a/src/hb_event.erl b/src/hb_event.erl index ab32856de..3a3300607 100644 --- a/src/hb_event.erl +++ b/src/hb_event.erl @@ -1,15 +1,31 @@ %%% @doc Wrapper for incrementing prometheus counters. -module(hb_event). -export([counters/0, diff/1, diff/2]). +-export([debug_print/4, debug_print/5, debug_print/6]). +-export([format_file_log/2]). -export([log/1, log/2, log/3, log/4, log/5, log/6]). +-export([log_event/6]). +-export([setup_logger/0]). -export([increment/3, increment/4, increment_callers/1]). -include("include/hb.hrl"). -include_lib("eunit/include/eunit.hrl"). --define(OVERLOAD_QUEUE_LENGTH, 10000). --define(MAX_MEMORY, 1_000_000_000). % 1GB +-define(OVERLOAD_QUEUE_LENGTH, 10_000). +-define(MAX_MEMORY, 50_000_000). % 50 MB +-define(MAX_EVENT_NAME_LENGTH, 100). +%% OTP handler for logging to disk. +-define(PRINT_LOGGER, hb_print_logger). +-define(PRINT_LOGGER_DOMAIN, [hb_print]). +-define(FILE_LOGGER, hb_file_logger). +%% OTP logger domain, logs sent with this domain are directed to hb_file_logger. +-define(FILE_LOGGER_DOMAIN, [hb_log]). +-define(DEFAULT_PRINT_HANDLER_FILTER, hb_drop_hb_print_logs). +-define(DEFAULT_FILE_HANDLER_FILTER, hb_drop_hb_file_logs). -ifdef(NO_EVENTS). +debug_print(_X, _Mod, _Func, _Line) -> ok. +debug_print(_X, _Mod, _Func, _Line, _Opts) -> ok. +debug_print(_Topic, _X, _Mod, _Func, _Line, _Opts) -> ok. log(_X) -> ok. log(_Topic, _X) -> ok. log(_Topic, _X, _Mod) -> ok. @@ -27,35 +43,191 @@ log(Topic, X, Mod, Func, Line) -> log(Topic, X, Mod, Func, Line, #{}). log(Topic, X, Mod, undefined, Line, Opts) -> log(Topic, X, Mod, "", Line, Opts); log(Topic, X, Mod, Func, undefined, Opts) -> log(Topic, X, Mod, Func, "", Opts); log(Topic, X, Mod, Func, Line, Opts) -> - % Check if the debug_print option has the topic in it if set. - case should_print(Topic, Opts) orelse should_print(Mod, Opts) of - true -> hb_format:print(X, Mod, Func, Line, Opts); - false -> X - end, + debug_print(Topic, X, Mod, Func, Line, Opts), try increment(Topic, X, Opts) catch _:_ -> ok end, % Return the logged value to the caller. This allows callers to insert % `?event(...)' macros into the flow of other executions, without having to % break functional style. X. + +debug_print(X, Mod, Func, Line) -> + debug_print(X, Mod, Func, Line, #{}). +debug_print(X, Mod, Func, Line, Opts) -> + debug_print(debug_print, X, Mod, Func, Line, Opts). +debug_print(Topic, X, Mod, Func, Line, Opts) -> + case should_print(print, Topic, Opts) + orelse should_print(print, Mod, Opts) + of + true -> print_event(Topic, X, Mod, Func, Line, Opts); + false -> X + end, + case should_print(log, Topic, Opts) + orelse should_print(log, Mod, Opts) + of + true -> log_event(Topic, X, Mod, Func, Line, Opts); + false -> ok + end, + X. -endif. -%% @doc Determine if the topic should be printed. Uses a cache in the process -%% dictionary to avoid re-checking the same topic multiple times. -should_print(Topic, Opts) -> - case erlang:get({event_print, Topic}) of +%% @doc Determine if the topic should be printed or logged. Uses a cache in the +%% process dictionary to avoid re-checking the same topic multiple times. +should_print(Type, Topic, Opts) -> + case erlang:get({event_print, Type, Topic}) of {cached, X} -> X; undefined -> Result = - case hb_opts:get(debug_print, false, Opts) of + case hb_opts:get(print_opt(Type), false, Opts) of EventList when is_list(EventList) -> lists:member(Topic, EventList); true -> true; false -> false end, - erlang:put({event_print, Topic}, {cached, Result}), + erlang:put({event_print, Type, Topic}, {cached, Result}), Result end. +print_opt(print) -> debug_print; +print_opt(log) -> debug_log. + +%% @doc Configure a rotating file logger for HyperBEAM events. +setup_logger() -> + LogFile = + filename:join( + hb_util:list(hb_opts:get(log_dir)), + "hyperbeam.log" + ), + ok = filelib:ensure_dir(LogFile), + setup_handler( + ?PRINT_LOGGER, + ?DEFAULT_PRINT_HANDLER_FILTER, + ?PRINT_LOGGER_DOMAIN, + print_logger_config() + ), + setup_handler( + ?FILE_LOGGER, + ?DEFAULT_FILE_HANDLER_FILTER, + ?FILE_LOGGER_DOMAIN, + file_logger_config(LogFile) + ). + +setup_handler(Handler, DefaultFilter, Domain, Config) -> + ensure_default_handler_filter(DefaultFilter, Domain), + logger:remove_handler(Handler), + case logger:add_handler(Handler, logger_std_h, Config) of + ok -> ok; + {error, {handler_not_added, {already_exist, _}}} -> ok; + {error, {handler_not_added, {already_started, _}}} -> ok; + {error, {already_exist, _}} -> ok; + {error, {already_started, _}} -> ok; + {error, HandlerReason} -> erlang:error(HandlerReason) + end. + +%% @doc Build the OTP logger configuration for the HyperBEAM file handler. +file_logger_config(LogFile) -> + logger_handler_config( + ?FILE_LOGGER_DOMAIN, + hb_log_domain, + #{ + report_cb => fun ?MODULE:format_file_log/2, + template => [time, " ", msg, "\n"], + single_line => false + }, + #{ + file => LogFile, + max_no_bytes => hb_opts:get(log_max_bytes), + max_no_files => hb_opts:get(log_max_files) + } + ). + +print_logger_config() -> + logger_handler_config( + ?PRINT_LOGGER_DOMAIN, + hb_print_domain, + #{ + report_cb => fun ?MODULE:format_file_log/2, + template => [msg, "\n"], + single_line => false + }, + #{type => standard_error} + ). + +logger_handler_config(Domain, FilterId, FormatterConfig, HandlerConfig) -> + #{ + level => all, + sync_mode_qlen => 200, + drop_mode_qlen => 200, + flush_qlen => 1000, + burst_limit_enable => true, + burst_limit_max_count => 500, + burst_limit_window_time => 1000, + filter_default => stop, + filters => + [ + { + FilterId, + {fun logger_filters:domain/2, {log, sub, Domain}} + } + ], + formatter => {logger_formatter, FormatterConfig}, + config => HandlerConfig + }. + +ensure_default_handler_filter(FilterId, Domain) -> + logger:remove_handler_filter(default, FilterId), + case logger:add_handler_filter( + default, + FilterId, + {fun logger_filters:domain/2, {stop, sub, Domain}} + ) of + ok -> ok; + {error, {already_exist, _}} -> ok; + {error, FilterReason} -> erlang:error(FilterReason) + end. + +print_event(Topic, X, Mod, Func, Line, Opts) -> + logger:log( + notice, + event_report(X, Mod, Func, Line, Opts), + (event_metadata(Topic, Mod, Func, Line))#{ + domain => ?PRINT_LOGGER_DOMAIN + } + ). + +%% @doc Queue an event for asynchronous file logging via OTP logger. +log_event(Topic, X, Mod, Func, Line, Opts) -> + logger:log( + notice, + event_report(X, Mod, Func, Line, Opts), + (event_metadata(Topic, Mod, Func, Line))#{ + domain => ?FILE_LOGGER_DOMAIN + } + ). + +event_report(X, Mod, Func, Line, Opts) -> + #{ + event => X, + line => Line, + function => Func, + module => Mod, + opts => Opts + }. + +event_metadata(Topic, Mod, Func, Line) -> + #{ + line => Line, + function => Func, + module => Mod, + topic => Topic + }. + +%% @doc Render the event log entry in the logger handler process. +format_file_log( + #{event := X, line := Line, function := Func, module := Mod, opts := Opts}, + _Config +) -> + hb_format:format_debug(X, Mod, Func, Line, Opts). + %% @doc Increment the counter for the given topic and message. Registers the %% counter if it doesn't exist. If the topic is `global', the message is ignored. %% This means that events must specify a topic if they want to be counted, @@ -66,11 +238,15 @@ should_print(Topic, Opts) -> %% would otherwise cause heavy performance costs. increment(Topic, Message, Opts) -> increment(Topic, Message, Opts, 1). +increment(ids, _Message, _Opts, _Count) -> ignored; increment(global, _Message, _Opts, _Count) -> ignored; increment(linkify, _Message, _Opts, _Count) -> ignored; increment(debug_linkify, _Message, _Opts, _Count) -> ignored; increment(debug_id, _Message, _Opts, _Count) -> ignored; +increment(debug_enc, _Message, _Opts, _Count) -> ignored; increment(debug_commitments, _Message, _Opts, _Count) -> ignored; +increment(message_set, _Message, _Opts, _Count) -> ignored; +increment(read_cached, _Message, _Opts, _Count) -> ignored; increment(ao_core, _Message, _Opts, _Count) -> ignored; increment(ao_internal, _Message, _Opts, _Count) -> ignored; increment(ao_devices, _Message, _Opts, _Count) -> ignored; @@ -80,6 +256,7 @@ increment(id_base, _Message, _Opts, _Count) -> ignored; increment(parsing, _Message, _Opts, _Count) -> ignored; increment(Topic, Message, _Opts, Count) -> case parse_name(Topic) of + no_event_name -> ignored; <<"debug", _/binary>> -> ignored; TopicBin -> find_event_server() ! {increment, TopicBin, parse_name(Message), Count} @@ -144,43 +321,47 @@ raw_counters() -> []. -else. raw_counters() -> - ets:tab2list(prometheus_counter_table). + ets:match_object( + prometheus_counter_table, + {{default, <<"event">>, '_', '_'}, '_', '_'} + ). -endif. %% @doc Find the event server, creating it if it doesn't exist. We cache the %% result in the process dictionary to avoid looking it up multiple times. find_event_server() -> - case erlang:get({event_server, ?MODULE}) of - {cached, Pid} -> Pid; - undefined -> - PID = - case hb_name:lookup(?MODULE) of - Pid when is_pid(Pid) -> Pid; - undefined -> - NewServer = spawn(fun() -> server() end), - hb_name:register(?MODULE, NewServer), - NewServer - end, - erlang:put({event_server, ?MODULE}, {cached, PID}), - PID - end. + hb_name:singleton(?MODULE, fun() -> server() end). server() -> - await_prometheus_started(), - prometheus_counter:declare( + hb_prometheus:ensure_started(), + ensure_event_counter(), + handle_events(). + +ensure_event_counter() -> + hb_prometheus:declare( + counter, [ {name, <<"event">>}, {help, <<"AO-Core execution events">>}, {labels, [topic, event]} - ]), - handle_events(). + ]). + handle_events() -> + handle_events(0). +handle_events(N) -> receive - {increment, TopicBin, EventName, Count} -> + {increment, Topic, Event, Count} -> + BatchCount = 0, + prometheus_counter:inc(<<"event">>, [Topic, Event], Count + BatchCount), + check_overload({Topic, Event}, N), + handle_events(N + 1) + end. + +check_overload(Last, N) -> + case N rem 1000 of + 0 -> case erlang:process_info(self(), message_queue_len) of {message_queue_len, Len} when Len > ?OVERLOAD_QUEUE_LENGTH -> - % Print a warning, but do so less frequently the more - % overloaded the system is. {memory, MemorySize} = erlang:process_info(self(), memory), case rand:uniform(max(1000, Len - ?OVERLOAD_QUEUE_LENGTH)) of 1 -> @@ -188,14 +369,12 @@ handle_events() -> {warning, prometheus_event_queue_overloading, {queue, Len}, - {current_message, EventName}, + {last_event, Last}, {memory_bytes, MemorySize} } ); _ -> ignored end, - % If the size of this process is too large, exit such that - % we can be restarted by the next caller. case MemorySize of MemorySize when MemorySize > ?MAX_MEMORY -> ?debug_print( @@ -203,36 +382,29 @@ handle_events() -> prometheus_event_queue_terminating_on_memory_overload, {queue, Len}, {memory_bytes, MemorySize}, - {current_message, EventName} + {last_event, Last} } ), exit(memory_overload); _ -> no_action end; _ -> ignored - end, - prometheus_counter:inc(<<"event">>, [TopicBin, EventName], Count), - handle_events() - end. - -%% @doc Delay the event server until prometheus is started. -await_prometheus_started() -> - receive - Msg -> - case application:get_application(prometheus) of - undefined -> await_prometheus_started(); - _ -> self() ! Msg, ok - end + end; + _ -> ok end. parse_name(Name) when is_tuple(Name) -> parse_name(element(1, Name)); parse_name(Name) when is_atom(Name) -> atom_to_binary(Name, utf8); -parse_name(Name) when is_binary(Name) -> - Name; +parse_name(Name) + when is_binary(Name) + andalso byte_size(Name) > ?MAX_EVENT_NAME_LENGTH -> + no_event_name; parse_name(Name) when is_list(Name) -> iolist_to_binary(Name); +parse_name(Name) when is_binary(Name) -> + Name; parse_name(_) -> no_event_name. %%% Benchmark tests @@ -257,8 +429,8 @@ benchmark_print_lookup_test() -> Iterations = hb_test_utils:benchmark( fun() -> - should_print(test_module, DefaultOpts) - orelse should_print(test_event, DefaultOpts) + should_print(print, test_module, DefaultOpts) + orelse should_print(print, test_event, DefaultOpts) end, 0.25 ), @@ -276,3 +448,143 @@ benchmark_increment_test() -> hb_test_utils:benchmark_print(<<"Incremented">>, <<"events">>, Iterations), ?assert(Iterations >= 1000), ok. + +should_log_test() -> + ?assertEqual(true, should_print(log, topic_a, #{ debug_log => [topic_a] })), + ?assertEqual(false, should_print(log, topic_b, #{ debug_log => [topic_a] })), + ?assertEqual(true, should_print(log, topic_c, #{ debug_log => true })), + ?assertEqual(false, should_print(log, topic_d, #{ debug_log => false })). + +-ifdef(NO_EVENTS). +benchmark_drain_rate_test() -> ok. +batch_correctness_test() -> ok. +overload_checks_past_first_thousand_test() -> ok. +-else. +benchmark_drain_rate_test() -> + NumKeys = 50, + NumEvents = 100000, + log(warmup, {warmup, 0}), + timer:sleep(100), + EventPid = hb_name:lookup(?MODULE), + wait_drain(EventPid, 5000), + erlang:suspend_process(EventPid), + Keys = + [ + { + hb_util:bin([<<"corr-topic-">>, hb_util:int(K)]), + hb_util:bin([<<"corr-event-">>, hb_util:int(K)]) + } + || + K <- lists:seq(1, NumKeys) + ], + fill_mailbox(EventPid, NumEvents, Keys), + erlang:resume_process(EventPid), + {DrainTime, _} = + timer:tc( + fun() -> + wait_drain(EventPid, 30000) + end + ), + DrainRate = round(NumEvents / (max(1, DrainTime) / 1_000_000)), + hb_test_utils:benchmark_print( + <<"Drained">>, + <<"events">>, + DrainRate, + 1 + ), + ?assert(DrainRate >= 10000), + ok. + +batch_correctness_test() -> + log(warmup, {warmup, 0}), + timer:sleep(100), + EventPid = hb_name:lookup(?MODULE), + wait_drain(EventPid, 5000), + NumKeys = 5, + N = 30_000, + Keys = [{list_to_binary("corr_topic_" ++ integer_to_list(K)), + list_to_binary("corr_event_" ++ integer_to_list(K))} + || K <- lists:seq(1, NumKeys)], + Before = counters(), + BeforeCounts = [{T, E, deep_get([T, E], Before, 0)} || {T, E} <- Keys], + erlang:suspend_process(EventPid), + lists:foreach(fun(I) -> + {T, E} = lists:nth((I rem NumKeys) + 1, Keys), + EventPid ! {increment, T, E, 1} + end, lists:seq(1, N)), + erlang:resume_process(EventPid), + wait_drain(EventPid, 30000), + After = counters(), + PerKey = N div NumKeys, + lists:foreach(fun({T, E, BeforeVal}) -> + AfterVal = deep_get([T, E], After, 0), + ?assertEqual(PerKey, AfterVal - BeforeVal) + end, BeforeCounts), + ok. + +overload_checks_past_first_thousand_test() -> + {EventPid, Ref} = + spawn_monitor( + fun() -> + hb_prometheus:ensure_started(), + ensure_event_counter(), + handle_events(1000) + end + ), + erlang:suspend_process(EventPid), + Topic = lists:duplicate(256, $a), + Event = lists:duplicate(256, $b), + lists:foreach( + fun(_) -> + EventPid ! {increment, Topic, Event, 1} + end, + lists:seq(1, ?OVERLOAD_QUEUE_LENGTH + 100) + ), + {message_queue_len, QueueLen} = + erlang:process_info(EventPid, message_queue_len), + {memory, MemorySize} = erlang:process_info(EventPid, memory), + ?assert(QueueLen > ?OVERLOAD_QUEUE_LENGTH), + ?assert(MemorySize > ?MAX_MEMORY), + erlang:resume_process(EventPid), + receive + {'DOWN', Ref, process, EventPid, memory_overload} -> + ok; + {'DOWN', Ref, process, EventPid, Reason} -> + ?assertEqual(memory_overload, Reason) + after 5000 -> + exit(EventPid, kill), + error(memory_overload_not_triggered) + end. + +deep_get([Group, Name], Map, Default) -> + case maps:get(Group, Map, undefined) of + undefined -> Default; + Inner -> maps:get(Name, Inner, Default) + end. + +%% @doc Fill the event server mailbox with a list of keys. Rotate the keys to +%% ensure that we are testing the event server's ability to handle many different +%% types of event. +fill_mailbox(_Pid, 0, _Keys) -> ok; +fill_mailbox(Pid, N, Keys = [{Topic, Event}|_]) -> + Pid ! {increment, Topic, Event, 1}, + fill_mailbox(Pid, N - 1, hb_util:shuffle(Keys)). + +wait_drain(Pid, Timeout) -> + Deadline = erlang:monotonic_time(millisecond) + Timeout, + wait_drain_loop(Pid, Deadline). + +wait_drain_loop(Pid, Deadline) -> + case erlang:process_info(Pid, message_queue_len) of + {message_queue_len, 0} -> ok; + {message_queue_len, _} -> + case erlang:monotonic_time(millisecond) >= Deadline of + true -> error(drain_timeout); + false -> + timer:sleep(10), + wait_drain_loop(Pid, Deadline) + end; + undefined -> + error(event_server_dead) + end. +-endif. diff --git a/src/hb_format.erl b/src/hb_format.erl index 20e173563..36d3a8844 100644 --- a/src/hb_format.erl +++ b/src/hb_format.erl @@ -10,6 +10,7 @@ -module(hb_format). %%% Public API. -export([term/1, term/2, term/3]). +-export([format_debug/5]). -export([print/1, print/3, print/4, print/5, eunit_print/2]). -export([message/1, message/2, message/3]). -export([binary/2, error/2, trace/1, trace_short/0, trace_short/1]). @@ -18,6 +19,7 @@ %%% Public Utility Functions. -export([escape_format/1, short_id/1, trace_to_list/1]). -export([get_trace/1, print_trace/4, trace_macro_helper/5, print_trace_short/4]). +-export([process_from_trace/1]). -include("include/hb.hrl"). %%% Characters that are considered noise and should be removed from strings @@ -29,15 +31,16 @@ print(X) -> print(X, <<>>, #{}). print(X, Info, Opts) -> - io:format( - standard_error, - "=== HB DEBUG ===~s==>~n~s~n", - [Info, term(X, Opts, 0)] - ), + io:format(standard_error, "~s~n", [render_debug(X, Info, Opts)]), X. print(X, Mod, Func, LineNum) -> print(X, debug_trace(Mod, Func, LineNum, #{}), #{}). print(X, Mod, Func, LineNum, Opts) -> + io:format(standard_error, "~s~n", [format_debug(X, Mod, Func, LineNum, Opts)]), + X. + +%% @doc Format a debug message without writing it, preserving the standard layout. +format_debug(X, Mod, Func, LineNum, Opts) -> Now = erlang:system_time(millisecond), Last = erlang:put(last_debug_print, Now), TSDiff = case Last of undefined -> 0; _ -> Now - Last end, @@ -61,7 +64,16 @@ print(X, Mod, Func, LineNum, Opts) -> ] ) ), - print(X, Info, Opts). + render_debug(X, Info, Opts). + +%% @doc Render a debug message using the standard HyperBEAM layout. +render_debug(X, Info, Opts) -> + hb_util:bin( + io_lib:format( + "=== HB DEBUG ===~s==>~n~s", + [Info, term(X, Opts, 0)] + ) + ). %% @doc Retreive the server ID of the calling process, if known. server_id() -> @@ -401,9 +413,9 @@ escape_format(Else) -> Else. %% @doc Format an error message as a string. error(ErrorMsg, Opts) -> - Type = hb_ao:get(<<"type">>, ErrorMsg, <<"">>, Opts), - Details = hb_ao:get(<<"details">>, ErrorMsg, <<"">>, Opts), - Stacktrace = hb_ao:get(<<"stacktrace">>, ErrorMsg, <<"">>, Opts), + Type = hb_maps:get(<<"type">>, ErrorMsg, <<"[No type]">>, Opts), + Details = hb_maps:get(<<"details">>, ErrorMsg, <<"[No details]">>, Opts), + Stacktrace = hb_maps:get(<<"stacktrace">>, ErrorMsg, <<"[No trace]">>, Opts), hb_util:bin( [ <<"Termination type: '">>, Type, @@ -595,6 +607,63 @@ trace_short(Type) when is_atom(Type) -> trace_short(get_trace(Type)); trace_short(Trace) when is_list(Trace) -> lists:join(" / ", lists:reverse(trace_to_list(Trace))). +process_from_trace([]) -> + <<"unknown">>; +process_from_trace(Trace) -> + % Prefer the outermost non-glue MFA (walk from trace bottom / + % process entry). That matches a caller above pmap/proc_lib glue and + % stays stable when the innermost slot is generic (e.g. timer:sleep) while + % a user job remains deeper in the chain. + case process_from_trace(lists:reverse(Trace), false) of + none -> + <<"unknown">>; + Found -> + Found + end. + +%% @doc First non-glue TraceElement scanning `Trace` from its head. +process_from_trace([], _) -> + none; +process_from_trace([TraceElement | Rest], Spawner) -> + case {trace_element_is_glue(TraceElement), Spawner} of + {true, _} -> + % Flag whether or not this is an anonymous process spawned + % by hb_pmap. + NextSpawner = case TraceElement of + {hb_pmap, _, _, _} -> + hb_pmap; + _ -> + Spawner + end, + process_from_trace(Rest, NextSpawner); + {false, false} -> + hb_util:bin(trace_element(TraceElement)); + {false, Spawner} -> + << + (hb_util:bin(Spawner))/binary, + "->", + (hb_util:bin(trace_element(TraceElement)))/binary + >> + end. + +trace_element_is_glue({proc_lib, init_p_do_apply, _, _}) -> + true; +trace_element_is_glue({hb_pmap, F, _, _}) -> + is_erlang_generated_fun_name(F); +trace_element_is_glue(_) -> + false. + +%% @doc True for compiler-generated fun atoms like `'-foo/1-fun-0-'`. +is_erlang_generated_fun_name(Func) when is_atom(Func) -> + case atom_to_binary(Func, utf8) of + <<"-", Rest/binary>> -> + binary:match(Rest, <<"-fun-">>) =/= nomatch; + _ -> + false + end; +is_erlang_generated_fun_name(_) -> + false. + %% @doc Format a trace element in form `mod:line' or `mod:func' for Erlang %% traces, or their raw form for others. trace_element(Bin) when is_binary(Bin) -> Bin; @@ -958,7 +1027,7 @@ format_key(true, Committed, Key, ToPrint, Opts) -> case lists:member(NormKey = hb_ao:normalize_key(Key, Opts), Committed) of true when ToPrint == undefined -> <<"* ", NormKey/binary>>; true -> <<"* ", ToPrint/binary>>; - false -> format_key(false, Committed, Key, undefined, Opts) + false -> format_key(false, Committed, Key, ToPrint, Opts) end. %% @doc Return a formatted list of short IDs, given a raw list of IDs. diff --git a/src/hb_gateway_client.erl b/src/hb_gateway_client.erl index 6165841cf..8826bfc39 100644 --- a/src/hb_gateway_client.erl +++ b/src/hb_gateway_client.erl @@ -110,14 +110,19 @@ data(ID, Opts) -> }, case hb_http:request(Req, Opts) of {ok, Res} -> + Data = + case hb_maps:find(<<"data">>, Res, Opts) of + {ok, D} -> D; + _ -> hb_ao:get(<<"body">>, Res, <<>>, Opts) + end, ?event(gateway, {data, {id, ID}, {response, Res}, - {body, hb_ao:get(<<"body">>, Res, <<>>, Opts)} + {data, Data} } ), - {ok, hb_ao:get(<<"body">>, Res, <<>>, Opts)}; + {ok, Data}; Res -> ?event(gateway, {request_error, {id, ID}, {response, Res}}), {error, no_viable_gateway} @@ -257,7 +262,8 @@ result_to_message(ExpectedID, Item, Opts) -> ), SignatureType = case byte_size(Signature) of - 65 -> {ecdsa, 256}; + 64 -> {eddsa, ed25519}; + 65 -> {ecdsa, secp256k1}; 512 -> {rsa, 65537}; _ -> unsupported_tx_signature_type end, @@ -424,15 +430,48 @@ l1_transaction_test() -> %% @doc Test l2 message from graphql l2_dataitem_test() -> _Node = hb_http_server:start_node(#{}), - {ok, Res} = read(<<"oyo3_hCczcU7uYhfByFZ3h0ELfeMMzNacT-KpRoJK6g">>, #{}), + {ok, Res} = read(ID = <<"oyo3_hCczcU7uYhfByFZ3h0ELfeMMzNacT-KpRoJK6g">>, #{}), ?event(gateway, {l2_dataitem, Res}), + Opts = #{}, + CommitmentType = hb_util:deep_get( + [<<"commitments">>, ID, <<"type">>], + Res, + not_found, + Opts + ), + ?assertEqual(?RSA_SIGN_TYPE, CommitmentType), Data = maps:get(<<"data">>, Res), ?assertEqual(<<"Hello World">>, Data). +%% @doc ed25519 L2 Transaction test +l2_dataitem_ed25519_test() -> + _Node = hb_http_server:start_node(#{}), + ID = <<"AwrAs-HaBlc8xeI8sw6Wpbi7A0weQWeXYwW20CpX5oM">>, + {ok, Res} = read(ID, #{}), + ?event(gateway, {l2_dataitem, Res}), + Opts = #{}, + CommitmentType = hb_util:deep_get( + [<<"commitments">>, ID, <<"type">>], + Res, + not_found, + Opts + ), + ?assertEqual(?EDDSA_SIGN_TYPE, CommitmentType), + CommitmentCommitter = hb_util:deep_get( + [<<"commitments">>, ID, <<"committer">>], + Res, + not_found, + Opts + ), + ?assertEqual(<<"ejhYD9Cw9VCsVik6yGLoclo3CLRvAITHTZamLY_6ro4">>, CommitmentCommitter), + %% Check Data + Data = maps:get(<<"data">>, Res), + ?assertEqual(<<"{\"displayName\":\"Test Hub\",\"description\":\"This is a test hub created in the test suite\",\"externalurl\":\"\",\"image\":\"\"}">>, Data). + %% @doc Test optimistic index ao_dataitem_test() -> _Node = hb_http_server:start_node(#{}), {ok, Res} = read(<<"oyo3_hCczcU7uYhfByFZ3h0ELfeMMzNacT-KpRoJK6g">>, #{}), ?event(gateway, {l2_dataitem, Res}), Data = maps:get(<<"data">>, Res), - ?assertEqual(<<"Hello World">>, Data). + ?assertEqual(<<"Hello World">>, Data). \ No newline at end of file diff --git a/src/hb_http.erl b/src/hb_http.erl index 197ba2483..d9bde0da5 100644 --- a/src/hb_http.erl +++ b/src/hb_http.erl @@ -14,6 +14,7 @@ -define(DEFAULT_FILTER_KEYS, [<<"content-length">>]). start() -> + init_prometheus(), httpc:set_options([{max_keep_alive_length, 0}]), ok. @@ -104,23 +105,33 @@ request(Method, Peer, Path, RawMessage, Opts) -> ), StartTime = os:system_time(millisecond), % Perform the HTTP request. - {_ErlStatus, Status, Headers, Body} = hb_http_client:request(Req, Opts), + Response = hb_http_client:request(Req, Opts), % Process the response. EndTime = os:system_time(millisecond), - ?event(http_outbound, - { - http_response, - {req, Req}, - {response, - #{ - status => Status, - headers => Headers, - body => Body - } - } - }, - Opts - ), + Duration = EndTime - StartTime, + case Response of + {_ErlStatus, Status, Headers, Body} -> + ?event(http_outbound, + { + http_response, + {req, Req}, + {response, + #{ + status => Status, + headers => Headers, + body => Body + } + } + }, + Opts + ), + request_response(Method, Peer, Path, Response, Duration, Opts); + Error -> + Error + end. + +request_response(Method, Peer, Path, Response, Duration, Opts) -> + {_ErlStatus, Status, Headers, Body} = Response, % Convert the set-cookie headers into a cookie message, if they are present. % We do this by extracting the set-cookie headers and converting them into a % cookie message if they are present. @@ -154,14 +165,14 @@ request(Method, Peer, Path, RawMessage, Opts) -> % constructed from the header key-value pair list. HeaderMap = hb_maps:merge(hb_maps:from_list(Headers), MaybeSetCookie, Opts), NormHeaderMap = hb_ao:normalize_keys(HeaderMap, Opts), - ?event(http_outbound, + ?event(debug_http_outbound, {normalized_response_headers, {norm_header_map, NormHeaderMap}}, Opts ), - ?event(http_short, + ?event(http_client_short, {received, {status, Status}, - {duration, EndTime - StartTime}, + {duration, Duration}, {method, Method}, {peer, Peer}, {path, {string, Path}}, @@ -174,13 +185,13 @@ request(Method, Peer, Path, RawMessage, Opts) -> Key when is_binary(Key) -> Msg = http_response_to_httpsig(Status, NormHeaderMap, Body, Opts), ?event( - http_outbound, + debug_http_outbound, {result_is_single_key, {key, Key}, {msg, Msg}}, Opts ), case {Key, hb_maps:get(Key, Msg, undefined, Opts)} of {<<"body">>, undefined} -> - {response_status_to_atom(Status), <<>>}; + {hb_http_client:response_status_to_atom(Status), <<>>}; {_, undefined} -> {failure, << @@ -195,7 +206,7 @@ request(Method, Peer, Path, RawMessage, Opts) -> >> }; {_, Value} -> - {response_status_to_atom(Status), Value} + {hb_http_client:response_status_to_atom(Status), Value} end; false -> % Find the codec device from the headers, if set. @@ -215,25 +226,16 @@ request(Method, Peer, Path, RawMessage, Opts) -> ) end. -%% @doc Convert a HTTP status code to a status atom. -response_status_to_atom(Status) -> - case Status of - 201 -> created; - X when X < 400 -> ok; - X when X < 500 -> error; - _ -> failure - end. - %% @doc Convert an HTTP response to a message. outbound_result_to_message(<<"ans104@1.0">>, Status, Headers, Body, Opts) -> - ?event(http_outbound, + ?event(debug_http_outbound, {result_is_ans104, {headers, Headers}, {body, Body}}, Opts ), try ar_bundles:deserialize(Body) of Deserialized -> { - response_status_to_atom(Status), + hb_http_client:response_status_to_atom(Status), hb_message:convert( Deserialized, <<"structured@1.0">>, @@ -257,10 +259,21 @@ outbound_result_to_message(<<"ans104@1.0">>, Status, Headers, Body, Opts) -> outbound_result_to_message(<<"httpsig@1.0">>, Status, Headers, Body, Opts) end; outbound_result_to_message(<<"httpsig@1.0">>, Status, Headers, Body, Opts) -> - ?event(http_outbound, {result_is_httpsig, {body, Body}}, Opts), + ?event(debug_http_outbound, {result_is_httpsig, {body, Body}}, Opts), { - response_status_to_atom(Status), + hb_http_client:response_status_to_atom(Status), http_response_to_httpsig(Status, Headers, Body, Opts) + }; +outbound_result_to_message(Codec, Status, Headers, Body, Opts) -> + ?event(debug, {headers, Headers}), + { + hb_http_client:response_status_to_atom(Status), + hb_message:convert( + Body, + <<"structured@1.0">>, + Codec, + Opts + ) }. %% @doc Convert a HTTP response to a httpsig message. @@ -298,7 +311,7 @@ route_to_request(M, {ok, #{ <<"uri">> := XPath, <<"opts">> := ReqOpts}}, Opts) - % The request is a direct HTTP URL, so we need to split the path into a % host and path. URI = uri_string:parse(XPath), - ?event(http_outbound, {parsed_uri, {uri, {explicit, URI}}}), + ?event(debug_http_outbound, {parsed_uri, {uri, {explicit, URI}}}), Method = hb_ao:get(<<"method">>, M, <<"GET">>, Opts), % We must remove the path and host from the message, because they are not % valid for outbound requests. The path is retrieved from the route, and @@ -324,10 +337,10 @@ route_to_request(M, {ok, #{ <<"uri">> := XPath, <<"opts">> := ReqOpts}}, Opts) - Query -> [<<"?", Query/binary>>] end, Path = iolist_to_binary(PathParts), - ?event(http_outbound, {parsed_req, {node, Node}, {method, Method}, {path, Path}}), + ?event(debug_http_outbound, {parsed_req, {node, Node}, {method, Method}, {path, Path}}), {ok, Method, Node, Path, MsgWithoutMeta, hb_util:deep_merge(Opts, ReqOpts, Opts)}; route_to_request(M, {ok, Routes}, Opts) -> - ?event(http_outbound, {found_routes, {req, M}, {routes, Routes}}), + ?event(debug_http_outbound, {found_routes, {req, M}, {routes, Routes}}), % The result is a route, so we leave it to `request' to handle it. Path = hb_ao:get(<<"path">>, M, <<"/">>, Opts), Method = hb_ao:get(<<"method">>, M, <<"GET">>, Opts), @@ -358,7 +371,7 @@ prepare_request(Format, Method, Peer, Path, RawMessage, Opts) -> Opts ), {ok, CookieReset} = dev_codec_cookie:reset(Message, Opts), - ?event(http, {cookie_lines, CookieLines}), + ?event(debug_http, {cookie_lines, CookieLines}), { #{ <<"cookie">> => CookieLines }, CookieReset @@ -407,8 +420,8 @@ prepare_request(Format, Method, Peer, Path, RawMessage, Opts) -> ), Body = hb_maps:get(<<"body">>, FullEncoding, <<>>, Opts), Headers = hb_maps:without([<<"body">>], FullEncoding, Opts), - ?event(http, {request_headers, {explicit, {headers, Headers}}}), - ?event(http, {request_body, {explicit, {body, Body}}}), + ?event(debug_http, {request_headers, {explicit, {headers, Headers}}}), + ?event(debug_http, {request_body, {explicit, {body, Body}}}), hb_maps:merge( ReqBase, #{ headers => maps:merge(MaybeCookie, Headers), body => Body }, @@ -460,7 +473,7 @@ prepare_request(Format, Method, Peer, Path, RawMessage, Opts) -> %% @doc Reply to the client's HTTP request with a message. reply(Req, TABMReq, Message, Opts) -> Status = - case hb_ao:get(<<"status">>, Message, Opts) of + case hb_maps:get(<<"status">>, Message, not_found, Opts) of not_found -> 200; S-> S end, @@ -468,8 +481,14 @@ reply(Req, TABMReq, Message, Opts) -> reply(Req, TABMReq, BinStatus, RawMessage, Opts) when is_binary(BinStatus) -> reply(Req, TABMReq, binary_to_integer(BinStatus), RawMessage, Opts); reply(InitReq, TABMReq, RawStatus, RawMessage, Opts) -> + ReplyStartTime = os:system_time(millisecond), KeyNormMessage = hb_ao:normalize_keys(RawMessage, Opts), - {ok, Req, Message} = reply_handle_cookies(InitReq, KeyNormMessage, Opts), + {ok, Req, Message} = + try reply_handle_cookies(InitReq, KeyNormMessage, Opts) + catch _Type:Error:_Stacktrace -> + ?event(warning, {reply_handle_cookies_error, {error, Error}}, Opts), + {ok, InitReq, KeyNormMessage} + end, {Status, HeadersBeforeCors, EncodedBody} = encode_reply( RawStatus, @@ -481,7 +500,7 @@ reply(InitReq, TABMReq, RawStatus, RawMessage, Opts) -> ReqHdr = cowboy_req:header(<<"access-control-request-headers">>, Req, <<"">>), HeadersWithCors = add_cors_headers(HeadersBeforeCors, ReqHdr, Opts), EncodedHeaders = hb_private:reset(HeadersWithCors), - ?event(http, + ?event(debug_http, {http_replying, {status, {explicit, Status}}, {path, hb_maps:get(<<"path">>, Req, undefined_path, Opts)}, @@ -492,26 +511,43 @@ reply(InitReq, TABMReq, RawStatus, RawMessage, Opts) -> ), ReqBeforeStream = Req#{ resp_headers => EncodedHeaders }, PostStreamReq = cowboy_req:stream_reply(Status, #{}, ReqBeforeStream), - cowboy_req:stream_body(EncodedBody, nofin, PostStreamReq), + Fin = + case should_finalize_stream(Status, EncodedBody) of + true -> fin; + false -> nofin + end, + cowboy_req:stream_body(EncodedBody, Fin, PostStreamReq), EndTime = os:system_time(millisecond), - ?event(http, {reply_headers, {explicit, PostStreamReq}}), - ?event(http_short, + ReqDuration = EndTime - hb_maps:get(start_time, Req, undefined, Opts), + ReplyDuration = EndTime - ReplyStartTime, + record_request_metric( + ReqDuration * 1000000, + ReplyDuration * 1000000, + Status + ), + ?event(debug_http, {reply_headers, {explicit, PostStreamReq}}), + ?event(http_server_short, {sent, {status, Status}, {duration, EndTime - hb_maps:get(start_time, Req, undefined, Opts)}, + {body_size, byte_size(EncodedBody)}, {method, cowboy_req:method(Req)}, + {ip, {string, real_ip(Req, Opts)}}, {path, {string, uri_string:percent_decode( hb_maps:get(<<"path">>, TABMReq, <<"[NO PATH]">>, Opts) ) } - }, - {body_size, byte_size(EncodedBody)} + } } ), {ok, PostStreamReq, no_state}. +%% @doc Determine if the stream should be finalized. +should_finalize_stream(429, _EncodedBody) -> true; +should_finalize_stream(_, _EncodedBody) -> false. + %% @doc Handle replying with cookies if the message contains them. Returns the %% new Cowboy `Req` object, and the message with the cookies removed. Both %% `set-cookie' and `cookie' fields are treated as viable sources of cookies. @@ -580,7 +616,7 @@ add_cors_headers(Msg, ReqHdr, Opts) -> %% @doc Generate the headers and body for a HTTP response message. encode_reply(Status, TABMReq, Message, Opts) -> Codec = accept_to_codec(TABMReq, Message, Opts), - ?event(http, {encoding_reply, {codec, Codec}, {message, Message}}), + ?event(debug_http, {encoding_reply, {codec, Codec}, {message, Message}}), BaseHdrs = hb_maps:merge( #{ @@ -596,7 +632,7 @@ encode_reply(Status, TABMReq, Message, Opts) -> hb_util:atom( hb_maps:get(<<"accept-bundle">>, TABMReq, false, Opts) ), - ?event(http, + ?event(debug_http, {encoding_reply, {status, Status}, {codec, Codec}, @@ -633,17 +669,18 @@ encode_reply(Status, TABMReq, Message, Opts) -> maps:get(<<"body">>, ErrMsg, <<>>) }; {_, <<"httpsig@1.0">>, _} -> + IsBundle = hb_message:is_bundle(Message, Opts), TABM = hb_message:convert( Message, tabm, - <<"structured@1.0">>, + #{ <<"device">> => <<"structured@1.0">>, <<"bundle">> => IsBundle }, Opts#{ topic => ao_internal } ), {ok, EncMessage} = dev_codec_httpsig:to( TABM, - case AcceptBundle of + case AcceptBundle or IsBundle of true -> #{ <<"path">> => <<"to">>, @@ -693,20 +730,6 @@ encode_reply(Status, TABMReq, Message, Opts) -> ) ) }; - {_, <<"manifest@1.0">>, _} -> - MessageID = hb_message:id(Message, signed, Opts), - { - 307, - #{ - <<"location">> => - << - "/", - MessageID/binary, - "~manifest@1.0/index" - >> - }, - <<"Manifesting your data...">> - }; _ -> % Other codecs are already in binary format, so we can just convert % the message to the codec. We also include all of the top-level @@ -762,12 +785,6 @@ accept_to_codec(OriginalReq, Reply = #{ <<"content-type">> := Link }, Opts) when Reply#{ <<"content-type">> => hb_cache:ensure_loaded(Link, Opts) }, Opts ); -accept_to_codec( - _, - #{ <<"content-type">> := <<"application/x.arweave-manifest", _/binary>> }, - _Opts - ) -> - <<"manifest@1.0">>; accept_to_codec(_OriginalReq, #{ <<"content-type">> := CT }, _Opts) -> <<"httpsig@1.0">>; accept_to_codec(OriginalReq, _, Opts) -> @@ -850,7 +867,7 @@ req_to_tabm_singleton(Req, Body, Opts) -> error -> default_codec(Opts) end end, - ?event(http, + ?event(debug_http, {parsing_req, {path, FullPath}, {query, QueryKeys}, @@ -861,7 +878,7 @@ req_to_tabm_singleton(Req, Body, Opts) -> ?event({req_to_tabm_singleton, {codec, Codec}}), case Codec of <<"httpsig@1.0">> -> - ?event( + ?event(debug_http, {req_to_tabm_singleton, {request, {explicit, Req}, {body, {string, Body}} @@ -890,9 +907,31 @@ req_to_tabm_singleton(Req, Body, Opts) -> false -> throw({invalid_ans104_signature, Item}) end; + <<"tx@1.0">> -> + TX = ar_tx:json_struct_to_tx(hb_json:decode(Body)), + ?event(debug_accept, + {deserialized_tx, + {tx, TX}, + {exact, {explicit, TX}} + } + ), + case ar_tx:verify(TX) of + true -> + ?event(debug_tx, {valid_tx_signature, TX}), + StructuredTX = + hb_message:convert( + TX, + <<"structured@1.0">>, + <<"tx@1.0">>, + Opts + ), + normalize_unsigned(PrimitiveMsg, Req, StructuredTX, Opts); + false -> + throw({invalid_tx_signature, TX}) + end; Codec -> % Assume that the codec stores the encoded message in the `body' field. - ?event(http, {decoding_body, {codec, Codec}, {body, {string, Body}}}), + ?event(debug_http, {decoding_body, {codec, Codec}, {body, {string, Body}}}), Decoded = hb_message:convert( Body, @@ -901,7 +940,7 @@ req_to_tabm_singleton(Req, Body, Opts) -> Opts ), ReqMessage = hb_maps:merge(PrimitiveMsg, Decoded, Opts), - ?event( + ?event(debug_http, {verifying_encoded_message, {codec, Codec}, {body, {string, Body}}, @@ -932,7 +971,7 @@ httpsig_to_tabm_singleton(PrimMsg, Req, Body, Opts) -> ), Opts ), - ?event(http, {decoded, Decoded}, Opts), + ?event(debug_http, {decoded, Decoded}, Opts), ForceSignedRequests = hb_opts:get(force_signed_requests, false, Opts), case (not ForceSignedRequests) orelse hb_message:verify(Decoded, all, Opts) of true -> @@ -972,7 +1011,7 @@ httpsig_to_tabm_singleton(PrimMsg, Req, Body, Opts) -> %% 1. The path in the message %% 2. The path in the request URI normalize_unsigned(PrimMsg, Req = #{ headers := RawHeaders }, Msg, Opts) -> - ?event({adding_method_and_path_from_request, {explicit, Req}}), + ?event(debug_http, {adding_method_and_path_from_request, {explicit, Req}}), Method = cowboy_req:method(Req), MsgPath = hb_maps:get( @@ -1042,34 +1081,86 @@ normalize_unsigned(PrimMsg, Req = #{ headers := RawHeaders }, Msg, Opts) -> <<"">> -> hb_message:without_unless_signed(<<"body">>, WithCookie, Opts); _ -> WithCookie end, + RealIP = real_ip(Req, Opts), WithPeer = case hb_maps:get(<<"ao-peer-port">>, NormalBody, undefined, Opts) of undefined -> NormalBody; P2PPort -> - % Calculate the peer address from the request. We honor the - % `x-real-ip' header if it is present. - RealIP = - case hb_maps:get(<<"x-real-ip">>, RawHeaders, undefined, Opts) of - undefined -> - {{A, B, C, D}, _} = cowboy_req:peer(Req), - hb_util:bin( - io_lib:format( - "~b.~b.~b.~b", - [A, B, C, D] - ) - ); - IP -> IP - end, Peer = <>, (hb_message:without_unless_signed(<<"ao-peer-port">>, NormalBody, Opts))#{ <<"ao-peer">> => Peer } end, + WithPrivIP = hb_private:set(WithPeer, <<"ip">>, RealIP, Opts), % Add device from PrimMsg if present - case maps:get(<<"device">>, PrimMsg, not_found) of - not_found -> WithPeer; - Device -> WithPeer#{<<"device">> => Device} + WithDevice = case maps:get(<<"device">>, PrimMsg, not_found) of + not_found -> WithPrivIP; + Device -> WithPrivIP#{<<"device">> => Device} + end, + Host = cowboy_req:host(Req), + WithDevice#{<<"host">> => Host}. + +%% @doc Determine the caller, honoring the `x-real-ip' header if present. +real_ip(Req = #{ headers := RawHeaders }, Opts) -> + case hb_maps:get(<<"x-real-ip">>, RawHeaders, undefined, Opts) of + undefined -> + {{A, B, C, D}, _} = cowboy_req:peer(Req), + hb_util:bin( + io_lib:format( + "~b.~b.~b.~b", + [A, B, C, D] + ) + ); + IP -> IP end. +%%% Metrics + +init_prometheus() -> + hb_prometheus:declare(histogram, [ + {name, http_server_encoding_duration_seconds}, + {labels, [status_code]}, + {buckets, [0.001, 0.0025, 0.005, + 0.01, 0.025, 0.05, + 0.1, 0.25, 0.5, + 1, 2.5, 5, + 10, 30, 60]}, + { + help, + "The total duration of an hb_http:reply call. This starts when a response" + "is ready to send back to the client and ends when the message is deliver" + "to the client." + } + ]), + hb_prometheus:declare(histogram, [ + {name, http_server_duration_seconds}, + {labels, [status_code]}, + {buckets, [0.001, 0.0025, 0.005, + 0.01, 0.025, 0.05, + 0.1, 0.25, 0.5, + 1, 2.5, 5, + 10, 30, 60]}, + { + help, + "The total duration of an hb_http_server request call." + } + ]). + +record_request_metric(TotalDuration, ReplyDuration, StatusCode) -> + spawn( + fun() -> + hb_prometheus:observe( + TotalDuration, + http_server_duration_seconds, + [StatusCode] + ), + hb_prometheus:observe( + ReplyDuration, + http_server_encoding_duration_seconds, + [StatusCode] + ) + end + ). + %%% Tests test_opts() -> @@ -1331,3 +1422,16 @@ parallel_request_test() -> Opts ) ). + +request_error_handling_test() -> + Opts = #{}, + NonExistentDomain = <<"http://nonexistent.invalid:80">>, + Result = hb_http:request( + <<"GET">>, + NonExistentDomain, + <<"/">>, + #{}, + Opts + ), + % The result should be an error tuple, not crash with badmatch + ?assertMatch({error, _}, Result). \ No newline at end of file diff --git a/src/hb_http_client.erl b/src/hb_http_client.erl index 3611cc088..e02135d39 100644 --- a/src/hb_http_client.erl +++ b/src/hb_http_client.erl @@ -3,43 +3,69 @@ -module(hb_http_client). -behaviour(gen_server). -include("include/hb.hrl"). --export([start_link/1, request/2]). --export([init/1, handle_cast/2, handle_call/3, handle_info/2, terminate/2]). +-include("include/hb_opts.hrl"). +-include("include/hb_http_client.hrl"). +%% Public API +-export([request/2, response_status_to_atom/1, setup_conn/1]). +%% GenServer +-export([start_link/1, init/1]). +-export([handle_cast/2, handle_call/3, handle_info/2, terminate/2]). +-export([init_prometheus/0]). -record(state, { - pid_by_peer = #{}, - status_by_pid = #{}, opts = #{} }). --define(DEFAULT_RETRIES, 0). --define(DEFAULT_RETRY_TIME, 1000). --define(DEFAULT_KEEPALIVE_TIMEOUT, 60_000). --define(DEFAULT_CONNECT_TIMEOUT, 60_000). - %%% ================================================================== %%% Public interface. %%% ================================================================== +%% @doc Use Opts to configure connection pool size. +setup_conn(Opts) -> + MaxConnections = + hb_opts:get(http_client_hackney_max_connections, ?DEFAULT_HACKNEY_MAX_CONNECTIONS, Opts), + KeepAlive = hb_opts:get(http_client_keepalive, ?DEFAULT_KEEPALIVE_TIMEOUT, Opts), + ?event(connection_pool, {http_client_hackney_max_connections, MaxConnections}), + hackney_pool:set_max_connections(?HACKNEY_POOL, MaxConnections), + hackney_pool:set_timeout(?HACKNEY_POOL, KeepAlive). + start_link(Opts) -> gen_server:start_link({local, ?MODULE}, ?MODULE, Opts, []). +%% @doc Convert a HTTP status code to a status atom. +response_status_to_atom(Status) -> + case Status of + 201 -> created; + X when X < 400 -> ok; + X when X < 500 -> error; + _ -> failure + end. + request(Args, Opts) -> - request(Args, hb_opts:get(http_retry, ?DEFAULT_RETRIES, Opts), Opts). + Opts1 = hb_opts:mimic_default_types(Opts, existing, Opts), + request(Args, hb_opts:get(http_retry, ?DEFAULT_RETRIES, Opts1), Opts1). request(Args, RemainingRetries, Opts) -> - case do_request(Args, Opts) of - {error, Details} -> maybe_retry(RemainingRetries, Args, Details, Opts); - {ok, Status, Headers, Body} -> {ok, Status, Headers, Body} + Response = do_request(Args, Opts), + case Response of + {error, _Details} -> maybe_retry(RemainingRetries, Args, Response, Opts); + {ok, Status, _Headers, _Body} -> + StatusAtom = response_status_to_atom(Status), + RetryResponses = hb_opts:get(http_retry_response, [], Opts), + case lists:member(StatusAtom, RetryResponses) of + true -> maybe_retry(RemainingRetries, Args, Response, Opts); + false -> Response + end end. do_request(Args, Opts) -> - case hb_opts:get(http_client, gun, Opts) of + case hb_opts:get(http_client, ?DEFAULT_HTTP_CLIENT, Opts) of gun -> gun_req(Args, Opts); - httpc -> httpc_req(Args, Opts) + httpc -> httpc_req(Args, Opts); + hackney -> hackney_req(Args, Opts) end. -maybe_retry(0, _, ErrDetails, _) -> {error, ErrDetails}; -maybe_retry(Remaining, Args, ErrDetails, Opts) -> +maybe_retry(0, _, OriginalResponse, _) -> OriginalResponse; +maybe_retry(Remaining, Args, OriginalResponse, Opts) -> RetryBaseTime = hb_opts:get(http_retry_time, ?DEFAULT_RETRY_TIME, Opts), RetryTime = case hb_opts:get(http_retry_mode, backoff, Opts) of @@ -48,6 +74,10 @@ maybe_retry(Remaining, Args, ErrDetails, Opts) -> BaseRetries = hb_opts:get(http_retry, ?DEFAULT_RETRIES, Opts), RetryBaseTime * (1 + (BaseRetries - Remaining)) end, + ErrDetails = case OriginalResponse of + {error, Details} -> Details; + {ok, Status, _, _} -> Status + end, ?event( warning, {retrying_http_request, @@ -68,148 +98,168 @@ httpc_req(Args, Opts) -> body := Body } = Args, ?event({httpc_req, Args}), - {Host, Port} = parse_peer(Peer, Opts), - Scheme = case Port of - 443 -> "https"; - _ -> "http" - end, - ?event(http_client, {httpc_req, {explicit, Args}}), - URL = binary_to_list(iolist_to_binary([Scheme, "://", Host, ":", integer_to_binary(Port), Path])), - FilteredHeaders = hb_maps:without([<<"content-type">>, <<"cookie">>], Headers, Opts), - HeaderKV = - [ - {binary_to_list(Key), binary_to_list(Value)} - || - {Key, Value} <- hb_maps:to_list(FilteredHeaders, Opts) - ] ++ - [ - {<<"cookie">>, CookieLine} - || - CookieLine <- - case hb_maps:get(<<"cookie">>, Headers, [], Opts) of - Binary when is_binary(Binary) -> - [Binary]; - List when is_list(List) -> - List - end - ], - Method = binary_to_existing_atom(hb_util:to_lower(RawMethod)), - ContentType = hb_maps:get(<<"content-type">>, Headers, <<"application/octet-stream">>, Opts), - Request = - case Method of - get -> - { - URL, - HeaderKV - }; - _ -> - { - URL, - HeaderKV, - binary_to_list(ContentType), - Body - } - end, - ?event({http_client_outbound, Method, URL, Request}), - HTTPCOpts = [{full_result, true}, {body_format, binary}], - StartTime = os:system_time(millisecond), - case httpc:request(Method, Request, [], HTTPCOpts) of - {ok, {{_, Status, _}, RawRespHeaders, RespBody}} -> - EndTime = os:system_time(millisecond), - RespHeaders = + case parse_peer(Peer, Opts) of + {error, _} = Err -> Err; + {ok, {Host, Port}} -> + Scheme = case Port of + 443 -> "https"; + _ -> "http" + end, + ?event(debug_http_client, {httpc_req, {explicit, Args}}), + URL = binary_to_list(iolist_to_binary([Scheme, "://", Host, ":", integer_to_binary(Port), Path])), + FilteredHeaders = hb_maps:without([<<"content-type">>, <<"cookie">>], Headers, Opts), + HeaderKV = [ - {list_to_binary(Key), list_to_binary(Value)} + {binary_to_list(Key), binary_to_list(Value)} || - {Key, Value} <- RawRespHeaders + {Key, Value} <- hb_maps:to_list(FilteredHeaders, Opts) + ] ++ + [ + {<<"cookie">>, CookieLine} + || + CookieLine <- + case hb_maps:get(<<"cookie">>, Headers, [], Opts) of + Binary when is_binary(Binary) -> + [Binary]; + List when is_list(List) -> + List + end ], - ?event(http_client, {httpc_resp, Status, RespHeaders, RespBody}), + Method = binary_to_existing_atom(hb_util:to_lower(RawMethod)), + ContentType = hb_maps:get(<<"content-type">>, Headers, <<"application/octet-stream">>, Opts), + Request = + case Method of + get -> + {URL, HeaderKV}; + _ -> + upload_metric(Body), + {URL, HeaderKV, binary_to_list(ContentType), Body} + end, + ?event({http_client_outbound, Method, URL, Request}), + HTTPCOpts = [{full_result, true}, {body_format, binary}], + StartTime = os:system_time(native), + case httpc:request(Method, Request, [], HTTPCOpts) of + {ok, {{_, Status, _}, RawRespHeaders, RespBody}} -> + download_metric(RespBody), + EndTime = os:system_time(native), + RespHeaders = + [ + {list_to_binary(Key), list_to_binary(Value)} + || + {Key, Value} <- RawRespHeaders + ], + ?event(debug_http_client, {httpc_resp, Status, RespHeaders, RespBody}), + record_duration(#{ + <<"request-method">> => method_to_bin(Method), + <<"request-path">> => hb_util:bin(Path), + <<"status-class">> => get_status_class(Status), + <<"duration">> => EndTime - StartTime + }, + Opts + ), + {ok, Status, RespHeaders, RespBody}; + {error, Reason} -> + ?event(http_client, {httpc_error, Reason}), + {error, Reason} + end + end. + +hackney_req(Args, Opts) -> + #{ + peer := Peer, + path := Path, + method := RawMethod, + headers := Headers, + body := Body + } = Args, + ?event({hackney_req, Args}), + case parse_peer(Peer, Opts) of + {error, _} = Err -> Err; + {ok, {Host, Port}} -> + Scheme = case Port of + 443 -> <<"https">>; + _ -> <<"http">> + end, + URL = <>, + Method = string:uppercase(hb_util:bin(RawMethod)), + HeaderList = + [{Key, Value} || {Key, Value} <- hb_maps:to_list(Headers, Opts)], + upload_metric(#{method => Method, body => Body}), + ConnTimeout = hb_opts:get(http_client_connect_timeout, ?DEFAULT_CONNECT_TIMEOUT, Opts), + RecvTimeout = hb_opts:get(http_client_hackney_recv_timeout, ?DEFAULT_HACKNEY_RECEIVE_TIMEOUT, Opts), + CheckoutTimeout = hb_opts:get(http_client_hackney_checkout_timeout, ?DEFAULT_HACKNEY_CHECKOUT_TIMEOUT, Opts), + HackneyOpts = [with_body, + {pool, ?HACKNEY_POOL}, + {connect_timeout, ConnTimeout}, + {connect_options, [{nodelay, true}]}, + {checkout_timeout, CheckoutTimeout}, + {recv_timeout, RecvTimeout}], + StartTime = erlang:monotonic_time(native), + Response = case hackney:request(Method, URL, HeaderList, Body, HackneyOpts) of + {ok, Status, RespHeaders, RespBody} -> + download_metric(RespBody), + ?event(debug_http_client, {hackney_resp, Status, RespHeaders, RespBody}), + {ok, Status, RespHeaders, RespBody}; + {ok, Status, RespHeaders} -> + ?event(debug_http_client, {hackney_resp, Status, RespHeaders, no_body}), + {ok, Status, RespHeaders, <<>>}; + {error, Reason} -> + ?event(http_client, {hackney_error, Reason}), + {error, Reason} + end, + EndTime = erlang:monotonic_time(native), record_duration(#{ <<"request-method">> => method_to_bin(Method), <<"request-path">> => hb_util:bin(Path), - <<"status-class">> => get_status_class(Status), + <<"status-class">> => get_status_class(Response), <<"duration">> => EndTime - StartTime }, Opts ), - {ok, Status, RespHeaders, RespBody}; - {error, Reason} -> - ?event(http_client, {httpc_error, Reason}), - {error, Reason} + record_response_status(Method, Response, Path), + Response end. gun_req(Args, Opts) -> - gun_req(Args, false, Opts). -gun_req(Args, ReestablishedConnection, Opts) -> - StartTime = os:system_time(millisecond), - #{ peer := Peer, path := Path, method := Method } = Args, + StartTime = os:system_time(native), + #{ path := Path, method := Method } = Args, + ConnectTimeout = hb_opts:get(http_client_connect_timeout, ?DEFAULT_CONNECT_TIMEOUT, Opts), Response = - case catch gen_server:call(?MODULE, {get_connection, Args, Opts}, infinity) of - {ok, PID} -> - ar_rate_limiter:throttle(Peer, Path, Opts), - case do_gun_request(PID, Args, Opts) of - {error, Error} when Error == {shutdown, normal}; - Error == noproc -> - case ReestablishedConnection of - true -> {error, client_error}; - false -> gun_req(Args, true, Opts) - end; - Reply -> - Reply - end; - {'EXIT', _} -> - {error, client_error}; - Error -> - Error - end, - EndTime = os:system_time(millisecond), - %% Only log the metric for the top-level call to req/2 - not the recursive call - %% that happens when the connection is reestablished. - case ReestablishedConnection of - true -> - ok; - false -> - record_duration(#{ - <<"request-method">> => method_to_bin(Method), - <<"request-path">> => hb_util:bin(Path), - <<"status-class">> => get_status_class(Response), - <<"duration">> => EndTime - StartTime - }, - Opts - ) - end, + case open_connection(Args, Opts) of + {error, _} = Err -> + Err; + {ok, PID} -> + case gun:await_up(PID, ConnectTimeout) of + {error, Reason} -> + gun:close(PID), + {error, Reason}; + {ok, _Protocol} -> + Result = do_gun_request(PID, Args, Opts), + gun:close(PID), + Result + end + end, + EndTime = os:system_time(native), + record_duration(#{ + <<"request-method">> => method_to_bin(Method), + <<"request-path">> => hb_util:bin(Path), + <<"status-class">> => get_status_class(Response), + <<"duration">> => EndTime - StartTime + }, + Opts + ), Response. -%% @doc Record the duration of the request in an async process. We write the -%% data to prometheus if the application is enabled, as well as invoking the -%% `http_monitor' if appropriate. -record_duration(Details, Opts) -> - spawn( - fun() -> - % First, write to prometheus if it is enabled. Prometheus works - % only with strings as lists, so we encode the data before granting - % it. - GetFormat = fun(Key) -> hb_util:list(maps:get(Key, Details)) end, - case application:get_application(prometheus) of - undefined -> ok; - _ -> - prometheus_histogram:observe( - http_request_duration_seconds, - lists:map( - GetFormat, - [ - <<"request-method">>, - <<"status-class">> - ] - ), - maps:get(<<"duration">>, Details) - ) - end, - maybe_invoke_monitor( - Details#{ <<"path">> => <<"duration">> }, - Opts - ) - end - ). +%% @doc Start the hackney connection pool with default settings. +%% Overridden at runtime by setup_conn/1 once node config is available. +init_hackney_pool() -> + hackney_pool:start_pool(?HACKNEY_POOL, [ + {max_connections, ?DEFAULT_HACKNEY_MAX_CONNECTIONS}, + {timeout, ?DEFAULT_KEEPALIVE_TIMEOUT} + ]). %% @doc Invoke the HTTP monitor message with AO-Core, if it is set in the %% node message key. We invoke the given message with the `body' set to a signed @@ -247,7 +297,7 @@ maybe_invoke_monitor(Details, Opts) -> % execute. ReqMsgs = hb_singleton:from(Req, Opts), Res = hb_ao:resolve_many(ReqMsgs, Opts), - ?event(http_monitor, {resolved_monitor, Res}) + ?event(debug_http_monitor, {resolved_monitor, Res}) end. %%% ================================================================== @@ -255,6 +305,7 @@ maybe_invoke_monitor(Details, Opts) -> %%% ================================================================== init(Opts) -> + init_hackney_pool(), case hb_opts:get(prometheus, not hb_features:test(), Opts) of true -> ?event({starting_prometheus_application, @@ -263,7 +314,8 @@ init(Opts) -> ), try application:ensure_all_started([prometheus, prometheus_cowboy]), - init_prometheus(Opts) + init_prometheus(), + {ok, #state{ opts = Opts }} catch Type:Reason:Stack -> ?event(warning, @@ -278,91 +330,6 @@ init(Opts) -> false -> {ok, #state{ opts = Opts }} end. -init_prometheus(Opts) -> - application:ensure_all_started([prometheus, prometheus_cowboy]), - prometheus_counter:new([ - {name, gun_requests_total}, - {labels, [http_method, status_class]}, - { - help, - "The total number of GUN requests." - } - ]), - prometheus_gauge:new([{name, outbound_connections}, - {help, "The current number of the open outbound network connections"}]), - prometheus_histogram:new([ - {name, http_request_duration_seconds}, - {buckets, [0.01, 0.1, 0.5, 1, 5, 10, 30, 60]}, - {labels, [http_method, status_class]}, - { - help, - "The total duration of an hb_http_client:req call. This includes more than" - " just the GUN request itself (e.g. establishing a connection, " - "throttling, etc...)" - } - ]), - prometheus_histogram:new([ - {name, http_client_get_chunk_duration_seconds}, - {buckets, [0.1, 1, 10, 60]}, - {labels, [status_class, peer]}, - { - help, - "The total duration of an HTTP GET chunk request made to a peer." - } - ]), - prometheus_counter:new([ - {name, http_client_downloaded_bytes_total}, - {help, "The total amount of bytes requested via HTTP, per remote endpoint"} - ]), - prometheus_counter:new([ - {name, http_client_uploaded_bytes_total}, - {help, "The total amount of bytes posted via HTTP, per remote endpoint"} - ]), - ?event(started), - {ok, #state{ opts = Opts }}. - -handle_call({get_connection, Args, Opts}, From, - #state{ pid_by_peer = PIDPeer, status_by_pid = StatusByPID } = State) -> - Peer = hb_maps:get(peer, Args, undefined, Opts), - case hb_maps:get(Peer, PIDPeer, not_found, Opts) of - not_found -> - {ok, PID} = open_connection(Args, hb_maps:merge(State#state.opts, Opts, Opts)), - MonitorRef = monitor(process, PID), - PIDPeer2 = hb_maps:put(Peer, PID, PIDPeer, Opts), - StatusByPID2 = - hb_maps:put( - PID, - {{connecting, [{From, Args}]}, MonitorRef, Peer}, - StatusByPID, - Opts - ), - { - reply, - {ok, PID}, - State#state{ - pid_by_peer = PIDPeer2, - status_by_pid = StatusByPID2 - } - }; - PID -> - case hb_maps:get(PID, StatusByPID, undefined, Opts) of - {{connecting, PendingRequests}, MonitorRef, Peer} -> - StatusByPID2 = - hb_maps:put(PID, - { - {connecting, [{From, Args} | PendingRequests]}, - MonitorRef, - Peer - }, - StatusByPID, - Opts - ), - {noreply, State#state{ status_by_pid = StatusByPID2 }}; - {connected, _MonitorRef, Peer} -> - {reply, {ok, PID}, State} - end - end; - handle_call(Request, _From, State) -> ?event(warning, {unhandled_call, {module, ?MODULE}, {request, Request}}), {reply, ok, State}. @@ -371,147 +338,39 @@ handle_cast(Cast, State) -> ?event(warning, {unhandled_cast, {module, ?MODULE}, {cast, Cast}}), {noreply, State}. -handle_info({gun_up, PID, _Protocol}, #state{ status_by_pid = StatusByPID } = State) -> - case hb_maps:get(PID, StatusByPID, not_found) of - not_found -> - %% A connection timeout should have occurred. - {noreply, State}; - {{connecting, PendingRequests}, MonitorRef, Peer} -> - [gen_server:reply(ReplyTo, {ok, PID}) || {ReplyTo, _} <- PendingRequests], - StatusByPID2 = hb_maps:put(PID, {connected, MonitorRef, Peer}, StatusByPID), - inc_prometheus_gauge(outbound_connections), - {noreply, State#state{ status_by_pid = StatusByPID2 }}; - {connected, _MonitorRef, Peer} -> - ?event(warning, - {gun_up_pid_already_exists, {peer, Peer}}), - {noreply, State} - end; - -handle_info({gun_error, PID, Reason}, - #state{ pid_by_peer = PIDByPeer, status_by_pid = StatusByPID } = State) -> - case hb_maps:get(PID, StatusByPID, not_found) of - not_found -> - ?event(warning, {gun_connection_error_with_unknown_pid}), - {noreply, State}; - {Status, _MonitorRef, Peer} -> - PIDByPeer2 = hb_maps:remove(Peer, PIDByPeer), - StatusByPID2 = hb_maps:remove(PID, StatusByPID), - Reason2 = - case Reason of - timeout -> - connect_timeout; - {Type, _} -> - Type; - _ -> - Reason - end, - case Status of - {connecting, PendingRequests} -> - reply_error(PendingRequests, Reason2); - connected -> - dec_prometheus_gauge(outbound_connections), - ok - end, - gun:shutdown(PID), - ?event({connection_error, {reason, Reason}}), - {noreply, State#state{ status_by_pid = StatusByPID2, pid_by_peer = PIDByPeer2 }} - end; - -handle_info({gun_down, PID, Protocol, Reason, _KilledStreams, _UnprocessedStreams}, - #state{ pid_by_peer = PIDByPeer, status_by_pid = StatusByPID } = State) -> - case hb_maps:get(PID, StatusByPID, not_found) of - not_found -> - ?event(warning, - {gun_connection_down_with_unknown_pid, {protocol, Protocol}}), - {noreply, State}; - {Status, _MonitorRef, Peer} -> - PIDByPeer2 = hb_maps:remove(Peer, PIDByPeer), - StatusByPID2 = hb_maps:remove(PID, StatusByPID), - Reason2 = - case Reason of - {Type, _} -> - Type; - _ -> - Reason - end, - case Status of - {connecting, PendingRequests} -> - reply_error(PendingRequests, Reason2); - _ -> - dec_prometheus_gauge(outbound_connections), - ok - end, - {noreply, - State#state{ - status_by_pid = StatusByPID2, - pid_by_peer = PIDByPeer2 - } - } - end; - -handle_info({'DOWN', _Ref, process, PID, Reason}, - #state{ pid_by_peer = PIDByPeer, status_by_pid = StatusByPID } = State) -> - case hb_maps:get(PID, StatusByPID, not_found) of - not_found -> - {noreply, State}; - {Status, _MonitorRef, Peer} -> - PIDByPeer2 = hb_maps:remove(Peer, PIDByPeer), - StatusByPID2 = hb_maps:remove(PID, StatusByPID), - case Status of - {connecting, PendingRequests} -> - reply_error(PendingRequests, Reason); - _ -> - dec_prometheus_gauge(outbound_connections), - ok - end, - {noreply, - State#state{ - status_by_pid = StatusByPID2, - pid_by_peer = PIDByPeer2 - } - } - end; +handle_info({gun_up, _PID, _Protocol}, State) -> + {noreply, State}; + +handle_info({gun_error, PID, Reason}, State) -> + ?event(warning, {gun_connection_error, {pid, PID}, {reason, Reason}}), + {noreply, State}; + +handle_info({gun_down, PID, Protocol, Reason, _KilledStreams, _UnprocessedStreams}, State) -> + ?event(warning, {gun_connection_down, {pid, PID}, {protocol, Protocol}, {reason, Reason}}), + {noreply, State}; + +handle_info({'DOWN', _Ref, process, PID, Reason}, State) -> + ?event(warning, {gun_process_down, {pid, PID}, {reason, Reason}}), + {noreply, State}; handle_info(Message, State) -> ?event(warning, {unhandled_info, {module, ?MODULE}, {message, Message}}), {noreply, State}. -terminate(Reason, #state{ status_by_pid = StatusByPID }) -> - ?event(info,{http_client_terminating, {reason, Reason}}), - hb_maps:map(fun(PID, _Status) -> gun:shutdown(PID) end, StatusByPID), +terminate(_Reason, _State) -> ok. %%% ================================================================== %%% Private functions. %%% ================================================================== -%% @doc Safe wrapper for prometheus_gauge:inc/2. -inc_prometheus_gauge(Name) -> - case application:get_application(prometheus) of - undefined -> ok; - _ -> - try prometheus_gauge:inc(Name) - catch _:_ -> - init_prometheus(#{}), - prometheus_gauge:inc(Name) - end - end. - -%% @doc Safe wrapper for prometheus_gauge:dec/2. -dec_prometheus_gauge(Name) -> - case application:get_application(prometheus) of - undefined -> ok; - _ -> prometheus_gauge:dec(Name) - end. - -inc_prometheus_counter(Name, Labels, Value) -> - case application:get_application(prometheus) of - undefined -> ok; - _ -> prometheus_counter:inc(Name, Labels, Value) +open_connection(#{ peer := Peer }, Opts) -> + case parse_peer(Peer, Opts) of + {error, _} = Err -> Err; + {ok, {Host, Port}} -> open_connection_gun(Host, Port, Peer, Opts) end. -open_connection(#{ peer := Peer }, Opts) -> - {Host, Port} = parse_peer(Peer, Opts), +open_connection_gun(Host, Port, Peer, Opts) -> ?event(http_outbound, {parsed_peer, {peer, Peer}, {host, Host}, {port, Port}}), BaseGunOpts = #{ @@ -519,7 +378,7 @@ open_connection(#{ peer := Peer }, Opts) -> #{ keepalive => hb_opts:get( - http_keepalive, + http_client_keepalive, ?DEFAULT_KEEPALIVE_TIMEOUT, Opts ) @@ -527,7 +386,7 @@ open_connection(#{ peer := Peer }, Opts) -> retry => 0, connect_timeout => hb_opts:get( - http_connect_timeout, + http_client_connect_timeout, ?DEFAULT_CONNECT_TIMEOUT, Opts ) @@ -546,7 +405,8 @@ open_connection(#{ peer := Peer }, Opts) -> GunOpts = case Proto = hb_opts:get(protocol, DefaultProto, Opts) of http3 -> BaseGunOpts#{protocols => [http3], transport => quic}; - _ -> BaseGunOpts + http2 -> BaseGunOpts#{protocols => [http2]}; + http1 -> BaseGunOpts#{protocols => [http]} end, ?event(http_outbound, {gun_open, @@ -562,61 +422,23 @@ parse_peer(Peer, Opts) -> Parsed = uri_string:parse(Peer), case Parsed of #{ host := Host, port := Port } -> - {hb_util:list(Host), Port}; + {ok, {hb_util:list(Host), Port}}; URI = #{ host := Host } -> - { + {ok, { hb_util:list(Host), case hb_maps:get(scheme, URI, undefined, Opts) of <<"https">> -> 443; _ -> hb_opts:get(port, 8734, Opts) end - } + }}; + _ -> + {error, {bad_peer, Peer}} end. -reply_error([], _Reason) -> - ok; -reply_error([PendingRequest | PendingRequests], Reason) -> - ReplyTo = element(1, PendingRequest), - Args = element(2, PendingRequest), - Method = hb_maps:get(method, Args), - record_response_status(Method, {error, Reason}), - gen_server:reply(ReplyTo, {error, Reason}), - reply_error(PendingRequests, Reason). - -record_response_status(Method, Response) -> - inc_prometheus_counter(gun_requests_total, - [ - hb_util:list(method_to_bin(Method)), - hb_util:list(get_status_class(Response)) - ], - 1 - ). - -method_to_bin(get) -> - <<"GET">>; -method_to_bin(post) -> - <<"POST">>; -method_to_bin(put) -> - <<"PUT">>; -method_to_bin(head) -> - <<"HEAD">>; -method_to_bin(delete) -> - <<"DELETE">>; -method_to_bin(connect) -> - <<"CONNECT">>; -method_to_bin(options) -> - <<"OPTIONS">>; -method_to_bin(trace) -> - <<"TRACE">>; -method_to_bin(patch) -> - <<"PATCH">>; -method_to_bin(_) -> - <<"unknown">>. - do_gun_request(PID, Args, Opts) -> Timer = inet:start_timer( - hb_opts:get(http_request_send_timeout, no_request_send_timeout, Opts) + hb_opts:get(http_client_send_timeout, no_request_send_timeout, Opts) ), Method = hb_maps:get(method, Args, undefined, Opts), Path = hb_maps:get(path, Args, undefined, Opts), @@ -649,13 +471,17 @@ do_gun_request(PID, Args, Opts) -> Ref = gun:request(PID, Method, Path, Headers, Body), ResponseArgs = #{ - pid => PID, stream_ref => Ref, - timer => Timer, limit => hb_maps:get(limit, Args, infinity, Opts), - counter => 0, acc => [], start => os:system_time(microsecond), + pid => PID, + stream_ref => Ref, + timer => Timer, + limit => hb_maps:get(limit, Args, infinity, Opts), + counter => 0, + acc => [], + start => os:system_time(microsecond), is_peer_request => hb_maps:get(is_peer_request, Args, true, Opts) }, Response = await_response(hb_maps:merge(Args, ResponseArgs, Opts), Opts), - record_response_status(Method, Response), + record_response_status(Method, Response, Path), inet:stop_timer(Timer), Response. @@ -700,23 +526,29 @@ await_response(Args, Opts) -> FinData }; {error, timeout} = Response -> - record_response_status(Method, Response), + record_response_status(Method, Response, Path), + ?event(http_outbound, {gun_cancel, {path, Path}}), gun:cancel(PID, Ref), - log(warn, gun_await_process_down, Args, Response, Opts), + log(warning, gun_await_process_down, Args, timeout, Opts), Response; + {error,{connection_error,{stream_closed, Message}}} = Response -> + ?event(http_outbound, {gun_cancel, {path, Path}, {message, Message}}), + gun:cancel(PID, Ref), + Response; {error, Reason} = Response when is_tuple(Reason) -> - record_response_status(Method, Response), - log(warn, gun_await_process_down, Args, Reason, Opts), + record_response_status(Method, Response, Path), + log(warning, gun_await_process_down, Args, Reason, Opts), Response; Response -> - record_response_status(Method, Response), - log(warn, gun_await_unknown, Args, Response, Opts), + record_response_status(Method, Response, Path), + log(warning, gun_await_unknown, Args, Response, Opts), Response end. +%% @doc Debug `http` state logging. log(Type, Event, #{method := Method, peer := Peer, path := Path}, Reason, Opts) -> ?event( - http, + Type, {gun_log, {type, Type}, {event, Event}, @@ -729,15 +561,123 @@ log(Type, Event, #{method := Method, peer := Peer, path := Path}, Reason, Opts) ), ok. +%% Metrics + +init_prometheus() -> + hb_prometheus:declare(counter, [ + {name, gun_requests_total}, + {labels, [http_method, status_class, category]}, + { + help, + "The total number of GUN requests." + } + ]), + hb_prometheus:declare(gauge, [{name, outbound_connections}, + {help, "The current number of the open outbound network connections"}]), + hb_prometheus:declare(histogram, [ + {name, http_client_duration_seconds}, + {buckets, [0.01, 0.1, 0.5, 1, 5, 10, 30, 60]}, + {labels, [http_method, status_class, category]}, + { + help, + "The total duration of an hb_http_client:req call. This includes more than" + " just the GUN request itself (e.g. establishing a connection, " + "throttling, etc...)" + } + ]), + hb_prometheus:declare(histogram, [ + {name, http_client_get_chunk_duration_seconds}, + {buckets, [0.1, 1, 10, 60]}, + {labels, [status_class, peer]}, + { + help, + "The total duration of an HTTP GET chunk request made to a peer." + } + ]), + hb_prometheus:declare(counter, [ + {name, http_client_downloaded_bytes_total}, + {help, "The total amount of bytes requested via HTTP, per remote endpoint"} + ]), + hb_prometheus:declare(counter, [ + {name, http_client_uploaded_bytes_total}, + {help, "The total amount of bytes posted via HTTP, per remote endpoint"} + ]), + hb_prometheus:declare(histogram, [ + {name, arweave_chunk_load_requested_bytes}, + {buckets, [ + 262144, 1048576, 10485760, 104857600, + 524288000, 1073741824 + ]}, + {help, + "Bytes requested per generate_offsets call" + " in dev_arweave chunk loading"} + ]), + ?event(started), + ok. + +%% @doc Record the duration of the request in an async process. We write the +%% data to prometheus if the application is enabled, as well as invoking the +%% `http_monitor' if appropriate. +record_duration(Details, Opts) -> + spawn( + fun() -> + % Prometheus works only with strings as lists, so we encode the + % data before granting it. + GetFormat = + fun + (<<"request-category">>) -> + path_to_category(maps:get(<<"request-path">>, Details)); + (Key) -> + hb_util:list(maps:get(Key, Details)) + end, + Labels = lists:map( + GetFormat, + [ + <<"request-method">>, + <<"status-class">>, + <<"request-category">> + ]), + hb_prometheus:observe( + maps:get(<<"duration">>, Details), + http_client_duration_seconds, + Labels + ), + maybe_invoke_monitor( + Details#{ <<"path">> => <<"duration">> }, + Opts + ) + end + ). + +record_response_status(Method, Response) -> + record_response_status(Method, Response, undefined). +record_response_status(Method, Response, Path) -> + hb_prometheus:inc( + counter, + gun_requests_total, + [ + hb_util:list(method_to_bin(Method)), + hb_util:list(get_status_class(Response)), + hb_util:list(path_to_category(Path)) + ], + 1 + ). + download_metric(Data) -> - inc_prometheus_counter( + hb_prometheus:inc( + counter, http_client_downloaded_bytes_total, [], byte_size(Data) ). -upload_metric(#{method := post, body := Body}) -> - inc_prometheus_counter( +%% @doc Record instances of uploaded bytes to the remote server. +upload_metric(#{method := Method, body := Body}) when is_atom(Method) -> + upload_metric(#{ method => hb_util:bin(Method), body => Body }); +upload_metric(#{ method := <<"POST">>, body := Body}) -> upload_metric(Body); +upload_metric(#{ method := <<"PUT">>, body := Body}) -> upload_metric(Body); +upload_metric(Body) when is_binary(Body) -> + hb_prometheus:inc(counter, http_client_uploaded_bytes_total, [], byte_size(Body) @@ -745,32 +685,91 @@ upload_metric(#{method := post, body := Body}) -> upload_metric(_) -> ok. +method_to_bin(get) -> + <<"GET">>; +method_to_bin(post) -> + <<"POST">>; +method_to_bin(put) -> + <<"PUT">>; +method_to_bin(head) -> + <<"HEAD">>; +method_to_bin(delete) -> + <<"DELETE">>; +method_to_bin(connect) -> + <<"CONNECT">>; +method_to_bin(options) -> + <<"OPTIONS">>; +method_to_bin(trace) -> + <<"TRACE">>; +method_to_bin(patch) -> + <<"PATCH">>; +method_to_bin(Method) when is_binary(Method) -> + Method; +method_to_bin(_) -> + <<"unknown">>. + % @doc Return the HTTP status class label for cowboy_requests_total and % gun_requests_total metrics. get_status_class({ok, {{Status, _}, _, _, _, _}}) -> get_status_class(Status); +get_status_class({ok, Status, _RespondeHeaders, _Body}) -> + get_status_class(Status); +get_status_class({error, closed}) -> + <<"closed">>; +get_status_class({error, checkout_timeout}) -> + <<"checkout-timeout">>; +get_status_class({error, nxdomain}) -> + <<"nxdomain">>; get_status_class({error, connection_closed}) -> - <<"connection_closed">>; + <<"connection-closed">>; get_status_class({error, connect_timeout}) -> - <<"connect_timeout">>; + <<"connect-timeout">>; get_status_class({error, timeout}) -> <<"timeout">>; get_status_class({error,{shutdown,timeout}}) -> - <<"shutdown_timeout">>; + <<"shutdown-timeout">>; get_status_class({error, econnrefused}) -> <<"econnrefused">>; get_status_class({error, {shutdown,econnrefused}}) -> - <<"shutdown_econnrefused">>; + <<"shutdown-econnrefused">>; +get_status_class({error, {down, {shutdown, econnrefused}}}) -> + <<"shutdown-econnrefused">>; get_status_class({error, {shutdown,ehostunreach}}) -> - <<"shutdown_ehostunreach">>; + <<"shutdown-ehostunreach">>; get_status_class({error, {shutdown,normal}}) -> - <<"shutdown_normal">>; + <<"shutdown-normal">>; get_status_class({error, {closed,_}}) -> <<"closed">>; get_status_class({error, noproc}) -> <<"noproc">>; +get_status_class({error, {connection_error, {stream_closed, _Message}}}) -> + <<"stream-closed">>; +get_status_class({error, {stream_error, {stream_error, too_many_streams, _Message}}}) -> + <<"too-many-streams">>; +get_status_class({error, {stream_error, {stream_error, refused_stream, _Message}}}) -> + <<"refused-stream">>; +get_status_class({error, {stream_error, {goaway, no_error, _Message}}}) -> + <<"go-away">>; +get_status_class({error, {stream_error, {closed, {error, einval}}}}) -> + <<"closed-einval">>; +get_status_class({error, {down, shutdown}}) -> + <<"down-shutdown">>; +get_status_class({error, {stream_error, closed}}) -> + <<"stream-closed">>; +get_status_class({error, {stream_error, {closed, {error, closed}}}}) -> + <<"stream-closed">>; +get_status_class({error, {stream_error, closing}}) -> + <<"stream-closing">>; +get_status_class({error, {down, noproc}}) -> + <<"noproc">>; +get_status_class({error, {stream_error, {closed, normal}}}) -> + <<"stream-closed">>; get_status_class(208) -> - <<"already_processed">>; + <<"already-processed">>; +get_status_class(404) -> + <<"not-found">>; +get_status_class(429) -> + <<"too-many-requests">>; get_status_class(Data) when is_integer(Data), Data > 0 -> hb_util:bin(prometheus_http:status_class(Data)); get_status_class(Data) when is_binary(Data) -> @@ -779,8 +778,26 @@ get_status_class(Data) when is_binary(Data) -> <<"unknown">>; Status -> get_status_class(Status) - end; + end; get_status_class(Data) when is_atom(Data) -> atom_to_binary(Data); -get_status_class(_) -> - <<"unknown">>. \ No newline at end of file +get_status_class(StatusClass) -> + ?event(warning, {unknown_status_class, {status_class, StatusClass}}), + <<"unknown">>. + +%% @doc Convert path to category for grafana labels. +path_to_category(Path) -> + case Path of + <<"/graphql">> -> <<"GraphQL">>; + <<"/raw", _/binary>> -> <<"Raw">>; + <<"/tx/", _/binary>> -> <<"TX">>; + <<"/tx_anchor", _/binary>> -> <<"TX Anchor">>; + <<"/chunk", _/binary>> -> <<"Chunk">>; + <<"/price/", _/binary>> -> <<"Price">>; + <<"/block/height/", _/binary>> -> <<"Block Height">>; + <<"/block/current", _/binary>> -> <<"Current Block">>; + <<"/price", _/binary>> -> <<"Price">>; + <<"/~cache@1.0/read", _/binary>> -> <<"Remote Read">>; + undefined -> <<"unknown">>; + _ -> <<"unknown">> + end. diff --git a/src/hb_http_client_sup.erl b/src/hb_http_client_sup.erl index 54c060610..39d08d006 100644 --- a/src/hb_http_client_sup.erl +++ b/src/hb_http_client_sup.erl @@ -13,6 +13,7 @@ -define(CHILD(I, Type, Opts), {I, {I, start_link, Opts}, permanent, ?SHUTDOWN_TIMEOUT, Type, [I]}). start_link(Opts) -> + hb_prometheus:ensure_started(), supervisor:start_link({local, ?MODULE}, ?MODULE, Opts). init(Opts) -> diff --git a/src/hb_http_client_tests.erl b/src/hb_http_client_tests.erl new file mode 100644 index 000000000..ef2e2f249 --- /dev/null +++ b/src/hb_http_client_tests.erl @@ -0,0 +1,64 @@ +-module(hb_http_client_tests). +-include("include/hb.hrl"). +-include("include/hb_http_client.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +hackney_basic_request_test_() -> + {timeout, 30, fun() -> + application:ensure_all_started(hb), + Args = #{ + peer => <<"https://arweave.net">>, + path => <<"/info">>, + method => <<"GET">>, + headers => #{}, + body => <<>> + }, + Opts = #{http_client => hackney, http_retry => 0}, + {ok, 200, _, _} = hb_http_client:request(Args, Opts) + end}. + +hackney_bad_peer_test_() -> + {timeout, 30, fun() -> + application:ensure_all_started(hb), + ?assert(erlang:whereis(hb_http_client) =/= undefined), + ValidArgs = #{ + peer => <<"https://arweave.net">>, + path => <<"/info">>, + method => <<"GET">>, + headers => #{}, + body => <<>> + }, + Opts = #{http_client => hackney, http_retry => 0}, + {ok, 200, _, _} = hb_http_client:request(ValidArgs, Opts), + BadArgs = ValidArgs#{peer => <<"not-a-valid-uri">>}, + BadResult = hb_http_client:request(BadArgs, Opts), + ?event(http_client_tests, {hackney_bad_peer_result, BadResult}), + ?assertMatch({error, _}, BadResult), + timer:sleep(500), + ?assert(erlang:whereis(hb_http_client) =/= undefined, + "gen_server must survive a bad peer URI with hackney backend"), + {ok, 200, _, _} = hb_http_client:request(ValidArgs, Opts) + end}. + +hackney_post_test_() -> + {timeout, 30, fun() -> + application:ensure_all_started(hb), + Args = #{ + peer => <<"https://arweave.net">>, + path => <<"/info">>, + method => <<"POST">>, + headers => #{}, + body => <<"{}">> + }, + Opts = #{http_client => hackney, http_retry => 0}, + Result = hb_http_client:request(Args, Opts), + ?event(http_client_tests, {hackney_post_result, summarize(Result)}), + ?assertMatch({ok, _, _, _}, Result) + end}. + +summarize({caught, C, R}) when is_tuple(R) -> + {caught, C, element(1, R)}; +summarize({caught, C, R}) -> + {caught, C, R}; +summarize(Other) -> + Other. diff --git a/src/hb_http_multi.erl b/src/hb_http_multi.erl index 4b8eb030f..c428c6b1d 100644 --- a/src/hb_http_multi.erl +++ b/src/hb_http_multi.erl @@ -62,7 +62,7 @@ request(Config, Method, Path, Message, Opts) -> {raw_message, Message}, {message_to_send, MultirequestMsg} }), - AllResults = + {AdmissibleResults, AllResponses} = if Parallel =/= false -> parallel_multirequest( Parallel, @@ -88,9 +88,9 @@ request(Config, Method, Path, Message, Opts) -> Opts ) end, - ?event(http, {multirequest_results, {results, AllResults}}), - case AllResults of - [] -> {error, no_viable_responses}; + ?event(debug_http, {multirequest_results, {admissible_results, AdmissibleResults}, {all_responses, AllResponses}}), + case AdmissibleResults of + [] -> {error, {no_viable_responses, AllResponses}}; Results -> if Responses == 1 -> hd(Results); true -> Results end end. @@ -147,30 +147,29 @@ is_admissible(_, _, _, _, _) -> false. %% @doc Serially request a message, collecting responses until the required %% number of responses have been gathered. Ensure that the statuses are %% allowed, according to the configuration. -serial_multirequest(_Nodes, 0, _Method, _Path, _Message, _Admissible, _Statuses, _Opts) -> []; -serial_multirequest([], _, _Method, _Path, _Message, _Admissible, _Statuses, _Opts) -> []; +%% Returns {AdmissibleList, AllList} where AdmissibleList contains only +%% admissible responses and AllList contains all responses. +serial_multirequest(_Nodes, 0, _Method, _Path, _Message, _Admissible, _Statuses, _Opts) -> {[], []}; +serial_multirequest([], _, _Method, _Path, _Message, _Admissible, _Statuses, _Opts) -> {[], []}; serial_multirequest([Node|Nodes], Remaining, Method, Path, Message, Admissible, Statuses, Opts) -> {ErlStatus, Res} = hb_http:request(Method, Node, Path, Message, Opts), case is_admissible(ErlStatus, Res, Admissible, Statuses, Opts) of true -> - ?event(http, {admissible_status, {response, Res}}), - [ - {ErlStatus, Res} - | - serial_multirequest( - Nodes, - Remaining - 1, - Method, - Path, - Message, - Admissible, - Statuses, - Opts - ) - ]; + ?event(debug_http, {admissible_status, {response, Res}}), + {AdmissibleAcc, AllAcc} = serial_multirequest( + Nodes, + Remaining - 1, + Method, + Path, + Message, + Admissible, + Statuses, + Opts + ), + {[{ErlStatus, Res} | AdmissibleAcc], [{ErlStatus, Res} | AllAcc]}; false -> - ?event(http, {inadmissible_status, {response, Res}}), - serial_multirequest( + ?event(debug_http, {inadmissible_status, {response, Res}}), + {AdmissibleAcc, AllAcc} = serial_multirequest( Nodes, Remaining, Method, @@ -179,7 +178,8 @@ serial_multirequest([Node|Nodes], Remaining, Method, Path, Message, Admissible, Admissible, Statuses, Opts - ) + ), + {AdmissibleAcc, [{ErlStatus, Res} | AllAcc]} end. %% @doc Dispatch the same HTTP request to many nodes in parallel. @@ -188,7 +188,7 @@ parallel_multirequest(true, Nodes, Responses, StopAfter, Method, Path, Message, parallel_multirequest(MaxWorkers, Nodes, Responses, StopAfter, Method, Path, Message, Admissible, Statuses, Opts) -> Ref = make_ref(), {Workers, Queue} = start_workers(MaxWorkers, Ref, Nodes, Method, Path, Message, Opts), - parallel_responses([], Workers, Queue, {Method, Path, Message}, Ref, Responses, StopAfter, Admissible, Statuses, Opts). + parallel_responses([], [], Workers, Queue, {Method, Path, Message}, Ref, Responses, StopAfter, Admissible, Statuses, Opts). %% @doc Start a new fleet of workers, returning the list of worker PIDs. start_workers(Count, Ref, Nodes, Method, Path, Message, Opts) -> @@ -200,7 +200,10 @@ start_workers(Count, Ref, Nodes, Method, Path, Message, Opts) -> fun(Node) -> spawn( fun() -> - Res = hb_http:request(Method, Node, Path, Message, Opts), + Res = + try hb_http:request(Method, Node, Path, Message, Opts) + catch C:R -> {error, {worker_crash, C, R}} + end, receive no_reply -> stopping after 0 -> Parent ! {Ref, self(), Res} end @@ -262,28 +265,32 @@ admissible_response(Response, Msg, Opts) -> %% @doc Collect the necessary number of responses, and stop workers if %% configured to do so. -parallel_responses(Res, [], _, _, Ref, _Awaiting, _StopAfter, _Admissible, _Statuses, _Opts) -> +%% Returns {AdmissibleList, AllList} where AdmissibleList contains only +%% admissible responses and AllList contains all responses. +parallel_responses(AdmissibleRes, AllRes, [], _, _, Ref, _Awaiting, _StopAfter, _Admissible, _Statuses, _Opts) -> empty_inbox(Ref), - Res; -parallel_responses(Res, Procs, _, _, Ref, 0, false, _Admissible, _Statuses, _Opts) -> + {AdmissibleRes, AllRes}; +parallel_responses(AdmissibleRes, AllRes, Procs, _, _, Ref, 0, false, _Admissible, _Statuses, _Opts) -> lists:foreach(fun(P) -> P ! no_reply end, Procs), empty_inbox(Ref), - Res; -parallel_responses(Res, Procs, _, _, Ref, 0, true, _Admissible, _Statuses, _Opts) -> + {AdmissibleRes, AllRes}; +parallel_responses(AdmissibleRes, AllRes, Procs, _, _, Ref, 0, true, _Admissible, _Statuses, _Opts) -> lists:foreach(fun(P) -> exit(P, kill) end, Procs), empty_inbox(Ref), - Res; -parallel_responses(Res, Procs, Queue, {Method, Path, Message}, Ref, Awaiting, StopAfter, Admissible, Statuses, Opts) -> + {AdmissibleRes, AllRes}; +parallel_responses(AdmissibleRes, AllRes, Procs, Queue, {Method, Path, Message}, Ref, Awaiting, StopAfter, Admissible, Statuses, Opts) -> receive {Ref, Pid, {Status, NewRes}} -> WorkersWithoutPid = lists:delete(Pid, Procs), {RefilledWorkers, NewQueue} = start_workers(1, Ref, Queue, Method, Path, Message, Opts), NewProcs = RefilledWorkers ++ WorkersWithoutPid, + NewAllRes = [{Status, NewRes} | AllRes], case is_admissible(Status, NewRes, Admissible, Statuses, Opts) of true -> parallel_responses( - [{Status, NewRes} | Res], + [{Status, NewRes} | AdmissibleRes], + NewAllRes, NewProcs, NewQueue, {Method, Path, Message}, @@ -296,7 +303,8 @@ parallel_responses(Res, Procs, Queue, {Method, Path, Message}, Ref, Awaiting, St ); false -> parallel_responses( - Res, + AdmissibleRes, + NewAllRes, NewProcs, NewQueue, {Method, Path, Message}, @@ -319,3 +327,119 @@ empty_inbox(Ref) -> after 0 -> ok end. + +%%% Tests + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +good() -> ao_node(hb_http_server:start_node(#{})). +slow(Ms) -> ao_node(hb_http_server:start_node(slow_node_opts(Ms))). +crash() -> #{<<"opts">> => #{http_client => httpc}}. + +ao_node(URL) -> + #{<<"uri">> => <>, + <<"opts">> => #{http_client => httpc}}. + +dead_node() -> + {ok, S} = gen_tcp:listen(0, []), + {ok, Port} = inet:port(S), + ok = gen_tcp:close(S), + #{<<"uri">> => iolist_to_binary(["http://localhost:", integer_to_list(Port)]), + <<"opts">> => #{http_client => httpc}}. + +slow_node_opts(Ms) -> + #{test_delay => Ms, + on => #{<<"request">> => + #{<<"device">> => <<"test-device@1.0">>, <<"path">> => <<"delay">>}}}. + +multi(Nodes, Extra) -> + Config = Extra#{<<"nodes">> => Nodes, <<"admissible-status">> => 200}, + hb_http_multi:request(Config, <<"GET">>, <<"/">>, #{}, #{}). + +multirequest_test_() -> + {setup, + fun() -> + #{fast => good(), slow1 => slow(500), slow2 => slow(500), + good1 => good(), good2 => good(), good3 => good()} + end, + fun(N) -> {timeout, 30, [ + {"serial fallback", fun() -> + ?assertMatch({ok, _}, + multi([dead_node(), maps:get(fast, N)], #{})) + end}, + {"parallel race", fun() -> + T0 = erlang:monotonic_time(millisecond), + ?assertMatch({ok, _}, + multi([maps:get(fast, N), maps:get(slow1, N), maps:get(slow2, N)], + #{<<"parallel">> => true, <<"stop-after">> => true})), + ?assert(erlang:monotonic_time(millisecond) - T0 < 500) + end}, + {"parallel broadcast", fun() -> + ?assertMatch([_, _, _], + multi([dead_node(), + maps:get(good1, N), maps:get(good2, N), maps:get(good3, N), + maps:get(slow1, N)], + #{<<"parallel">> => true, <<"responses">> => 3, + <<"stop-after">> => false})) + end}, + {"parallel crash", fun() -> + ?assertMatch({error, {no_viable_responses, _}}, + multi([crash(), crash()], + #{<<"parallel">> => true, <<"stop-after">> => true})) + end} + ]} end}. + +%% @doc Parallel race using the actual /arweave route config from hb_opts: +%% one fast node returns 200 immediately, two slow nodes are still processing. +%% The call must return before the slow nodes finish. +parallel_race_stops_at_first_admissible_test_() -> + {timeout, 30, fun parallel_race_stops_at_first_admissible/0}. +parallel_race_stops_at_first_admissible() -> + Delay = 500, + FastURL = hb_http_server:start_node(#{}), + SlowURL1 = hb_http_server:start_node(slow_node_opts(Delay)), + SlowURL2 = hb_http_server:start_node(slow_node_opts(Delay)), + Routes = maps:get(routes, hb_opts:default_message()), + [ArweaveRoute] = + [R || R <- Routes, + maps:get(<<"template">>, R, undefined) =:= <<"^/arweave">>, + maps:is_key(<<"nodes">>, R)], + Config = ArweaveRoute#{ + <<"nodes">> => [ao_node(FastURL), ao_node(SlowURL1), ao_node(SlowURL2)] + }, + T0 = erlang:monotonic_time(millisecond), + Result = hb_http_multi:request(Config, <<"GET">>, <<"/">>, #{}, #{}), + Elapsed = erlang:monotonic_time(millisecond) - T0, + ?assertMatch({ok, _}, Result), + ?assert(Elapsed < Delay). + +%% @doc Serial fallback: unreachable nodes are skipped until a live one +%% responds with 200. +serial_fallback_skips_non_admissible_test_() -> + {timeout, 30, fun serial_fallback_skips_non_admissible/0}. +serial_fallback_skips_non_admissible() -> + GoodURL = hb_http_server:start_node(#{}), + Config = #{ + <<"nodes">> => [dead_node(), dead_node(), ao_node(GoodURL)], + <<"parallel">> => 1, + <<"stop-after">> => true, + <<"admissible-status">> => 200 + }, + Result = hb_http_multi:request(Config, <<"GET">>, <<"/">>, #{}, #{}), + ?assertMatch({ok, _}, Result). + +%% @doc No admissible node: all unreachable, error tuple returned. +no_admissible_node_returns_error_test_() -> + {timeout, 30, fun no_admissible_node_returns_error/0}. +no_admissible_node_returns_error() -> + Config = #{ + <<"nodes">> => [dead_node(), dead_node()], + <<"parallel">> => 1, + <<"stop-after">> => true, + <<"admissible-status">> => 200 + }, + Result = hb_http_multi:request(Config, <<"GET">>, <<"/">>, #{}, #{}), + ?assertMatch({error, {no_viable_responses, _}}, Result). + +-endif. diff --git a/src/hb_http_server.erl b/src/hb_http_server.erl index 3fe5eb726..dadd717ae 100644 --- a/src/hb_http_server.erl +++ b/src/hb_http_server.erl @@ -36,6 +36,7 @@ start() -> hb_opts:default_message_with_env(), Loaded ), + hb_http_client:setup_conn(MergedConfig), %% Apply store defaults before starting store StoreOpts = hb_opts:get(store, no_store, MergedConfig), StoreDefaults = hb_opts:get(store_defaults, #{}, MergedConfig), @@ -77,6 +78,8 @@ start(Opts) -> ]), hb:init(), BaseOpts = set_default_opts(Opts), + ok = hb_process_sampler:ensure_started(BaseOpts), + ok = hb_system_monitor:ensure_started(BaseOpts), {ok, Listener, _Port} = new_server(BaseOpts), {ok, Listener}. @@ -124,7 +127,7 @@ print_greeter(Config, PrivWallet) -> io_lib:format( "http://~s:~p", [ - hb_opts:get(host, <<"localhost">>, Config), + hb_opts:get(node_host, <<"localhost">>, Config), hb_opts:get(port, 8734, Config) ] ) @@ -196,7 +199,7 @@ new_server(RawNodeMsg) -> % Attempt to start the prometheus application, if possible. try application:ensure_all_started([prometheus, prometheus_cowboy, prometheus_ranch]), - prometheus_registry:register_collector(hb_metrics_collector), + prometheus_registry:register_collectors([hb_metrics_collector]), ProtoOpts#{ metrics_callback => fun prometheus_cowboy2_instrumenter:observe/1, @@ -364,7 +367,7 @@ cors_reply(Req, _ServerID) -> <<"access-control-allow-methods">> => <<"GET, POST, PUT, DELETE, OPTIONS, PATCH">> }, Req), - ?event(http_debug, {cors_reply, {req, Req}, {req2, Req2}}), + ?event(debug_http, {cors_reply, {req, Req}, {req2, Req2}}), {ok, Req2, no_state}. %% @doc Handle all non-CORS preflight requests as AO-Core requests. Execution @@ -378,74 +381,55 @@ handle_request(RawReq, Body, ServerID) -> Req = RawReq#{ start_time => StartTime }, NodeMsg = get_opts(#{ http_server => ServerID }), put(server_id, ServerID), - case {cowboy_req:path(RawReq), cowboy_req:qs(RawReq)} of - {<<"/">>, <<>>} -> - % If the request is for the root path, serve a redirect to the default - % request of the node. - Req2 = cowboy_req:reply( - 302, - #{ - <<"location">> => - hb_opts:get( - default_request, - <<"/~hyperbuddy@1.0/index">>, - NodeMsg - ) - }, - RawReq - ), - {ok, Req2, no_state}; - _ -> - % The request is of normal AO-Core form, so we parse it and invoke - % the meta@1.0 device to handle it. - ?event(http, - { - http_inbound, - {cowboy_req, {explicit, Req}, {body, {string, Body}}} - } - ), - % Parse the HTTP request into HyerBEAM's message format. - try hb_http:req_to_tabm_singleton(Req, Body, NodeMsg) of - ReqSingleton -> - try - CommitmentCodec = - hb_http:accept_to_codec(ReqSingleton, NodeMsg), - ?event(http, - {parsed_singleton, - {req_singleton, ReqSingleton}, - {accept_codec, CommitmentCodec}}, - #{} - ), - % Invoke the meta@1.0 device to handle the request. - {ok, Res} = - dev_meta:handle( - NodeMsg#{ - commitment_device => CommitmentCodec - }, - ReqSingleton - ), - hb_http:reply(Req, ReqSingleton, Res, NodeMsg) - catch - Type:Details:Stacktrace -> - handle_error( - Req, - ReqSingleton, - Type, - Details, - Stacktrace, - NodeMsg - ) - end - catch ParseError:ParseDetails:ParseStacktrace -> - handle_error( - Req, - #{}, - ParseError, - ParseDetails, - ParseStacktrace, - NodeMsg - ) + % The request is of normal AO-Core form, so we parse it and invoke + % the meta@1.0 device to handle it. + ?event(http, + { + http_inbound, + {cowboy_req, {explicit, Req}, {body, {string, Body}}} + } + ), + % Parse the HTTP request into HyerBEAM's message format. + try hb_http:req_to_tabm_singleton(Req, Body, NodeMsg) of + ReqSingleton -> + try + CommitmentCodec = + hb_http:accept_to_codec(ReqSingleton, NodeMsg), + ?event(http, + {parsed_singleton, + {req_singleton, ReqSingleton}, + {accept_codec, CommitmentCodec}}, + #{} + ), + % Invoke the meta@1.0 device to handle the request. + {ok, Res} = + dev_meta:handle( + NodeMsg#{ + commitment_device => CommitmentCodec + }, + ReqSingleton + ), + hb_http:reply(Req, ReqSingleton, Res, NodeMsg) + catch + Type:Details:Stacktrace -> + handle_error( + Req, + ReqSingleton, + Type, + Details, + Stacktrace, + NodeMsg + ) end + catch ParseError:ParseDetails:ParseStacktrace -> + handle_error( + Req, + #{}, + ParseError, + ParseDetails, + ParseStacktrace, + NodeMsg + ) end. %% @doc Return a 500 error response to the client. @@ -463,6 +447,8 @@ handle_error(Req, Singleton, Type, Details, Stacktrace, NodeMsg) -> ?event( http_error, {returning_500_error, + {method, cowboy_req:method(Req)}, + {path, cowboy_req:path(Req)}, {string, hb_format:indent_lines( <<"\n", ErrorBin/binary, "\n">>, @@ -588,6 +574,8 @@ start_node(Opts) -> hb:init(), hb_sup:start_link(Opts), ServerOpts = set_default_opts(Opts), + ok = hb_process_sampler:ensure_started(ServerOpts), + ok = hb_system_monitor:ensure_started(ServerOpts), {ok, _Listener, Port} = new_server(ServerOpts), <<"http://localhost:", (hb_util:bin(Port))/binary, "/">>. diff --git a/src/hb_maps.erl b/src/hb_maps.erl index 391e018dc..bcb38d2d7 100644 --- a/src/hb_maps.erl +++ b/src/hb_maps.erl @@ -17,7 +17,7 @@ %%% yourself from the inevitable issues that will arise from using this %%% module without understanding the full implications. You have been warned. -module(hb_maps). --export([get/2, get/3, get/4, put/3, put/4, find/2, find/3]). +-export([get/2, get/3, get/4, get_first/2, get_first/3, put/3, put/4, find/2, find/3]). -export([is_key/2, is_key/3, keys/1, keys/2, values/1, values/2]). -export([map/2, map/3, filter/2, filter/3, filtermap/2, filtermap/3]). -export([fold/3, fold/4, take/2, take/3, size/1, size/2]). @@ -26,6 +26,27 @@ -export([from_list/1, to_list/1, to_list/2]). -include_lib("eunit/include/eunit.hrl"). +%%% HyperBEAM-specific functions + +-spec get_first( + Paths :: [{Base :: map() | binary(), Path :: binary()}], + Opts :: map() +) -> term(). +get_first(Paths, Opts) -> + get_first(Paths, not_found, Opts). + +-spec get_first( + Paths :: [{Base :: map() | binary(), Path :: binary()}], + Default :: term(), + Opts :: map() +) -> term(). +get_first([], Default, _Opts) -> Default; +get_first([{Base, Path}|Paths], Default, Opts) -> + case find(Path, Base, Opts) of + {ok, Value} -> Value; + error -> get_first(Paths, Default, Opts) + end. + -spec get(Key :: term(), Map :: map()) -> term(). get(Key, Map) -> get(Key, Map, undefined). diff --git a/src/hb_message.erl b/src/hb_message.erl index b34267811..f5d61c4ec 100644 --- a/src/hb_message.erl +++ b/src/hb_message.erl @@ -66,6 +66,7 @@ -export([with_only_committed/2, without_unless_signed/3]). -export([with_commitments/3, without_commitments/3, uncommitted_deep/2]). -export([diff/3, match/2, match/3, match/4, find_target/3]). +-export([is_bundle/2]). %%% Helpers: -export([default_tx_list/0, filter_default_keys/1]). %%% Debugging tools: @@ -694,7 +695,7 @@ unsafe_match(RawMap1, RawMap2, Mode, Path, Opts) -> fun(Key) -> lists:member(Key, Keys1) end, Keys1 ), - ?event(match, + ?event(debug_match, {match, {keys1, Keys1}, {keys2, Keys2}, @@ -709,7 +710,7 @@ unsafe_match(RawMap1, RawMap2, Mode, Path, Opts) -> lists:all( fun(<<"commitments">>) -> true; (Key) -> - ?event(match, {matching_key, Key}), + ?event(debug_match, {matching_key, Key}), Val1 = hb_ao:normalize_keys( hb_maps:get(Key, NormMap1, not_found, Opts), @@ -992,3 +993,12 @@ default_tx_message() -> default_tx_list() -> Keys = lists:map(fun hb_ao:normalize_key/1, record_info(fields, tx)), lists:zip(Keys, tl(tuple_to_list(#tx{}))). + +is_bundle(Message, Opts) -> + Commitments = maps:get(<<"commitments">>, Message, #{}), + MessageID = hb_message:id(Message, signed, Opts), + case maps:get(MessageID, Commitments, #{}) of + #{<<"bundle">> := Value} -> hb_util:bool(Value); + _ -> false + end. + diff --git a/src/hb_message_test_vectors.erl b/src/hb_message_test_vectors.erl index d20e0e2bd..54a311074 100644 --- a/src/hb_message_test_vectors.erl +++ b/src/hb_message_test_vectors.erl @@ -23,6 +23,16 @@ test_codecs() -> #{ <<"device">> => <<"httpsig@1.0">>, <<"bundle">> => true }, <<"flat@1.0">>, <<"ans104@1.0">>, + #{ + <<"device">> => <<"ans104@1.0">>, + <<"type">> => ?EDDSA_SIGN_TYPE, + <<"with-opts">> => ed25519 + }, + #{ + <<"device">> => <<"ans104@1.0">>, + <<"type">> => ?ETHEREUM_SIGN_TYPE, + <<"with-opts">> => ethereum + }, #{ <<"device">> => <<"ans104@1.0">>, <<"bundle">> => true }, <<"json@1.0">>, #{ <<"device">> => <<"json@1.0">>, <<"bundle">> => true }, @@ -40,7 +50,17 @@ suite_test_opts() -> parallel => true, desc => <<"Default opts">>, opts => test_opts(normal) - } + }, + #{ + name => ed25519, + desc => <<"Ed25519 opts">>, + opts => test_opts(ed25519) + }, + #{ + name => ecdsa, + desc => <<"Secp256k1 (Ethereum) opts">>, + opts => test_opts(ethereum) + } ]. suite_test_opts(OptsName) -> [ O || O = #{ name := OName } <- suite_test_opts(), OName == OptsName ]. @@ -49,7 +69,17 @@ test_opts(normal) -> #{ store => hb_test_utils:test_store(), priv_wallet => hb:wallet() - }. + }; +test_opts(ed25519) -> + #{ + store => hb_test_utils:test_store(), + priv_wallet => ar_wallet:new({eddsa, ed25519}) + }; +test_opts(ethereum) -> + #{ + store => hb_test_utils:test_store(), + priv_wallet => ar_wallet:new(ethereum) + }. test_suite() -> [ @@ -171,31 +201,43 @@ test_suite() -> suite_test_() -> hb_test_utils:suite_with_opts( codec_test_suite( - test_codecs(), - normal + test_codecs() ), - suite_test_opts(normal) + suite_test_opts() ). %% @doc Run the test suite for a set of codecs, using the given options type. %% Unlike normal `hb_test_utils:suite_with_opts/2' users, this suite generator %% creates a new options message for each individual test, such that stores %% are completely isolated from each other. -codec_test_suite(Codecs, OptsType) -> +codec_test_suite(Codecs) -> lists:flatmap( - fun(CodecName) -> - lists:map(fun({Desc, Test}) -> - TestName = - binary_to_list( - << (suite_name(CodecName))/binary, ": ", Desc/binary >> - ), - TestSpecificOpts = test_opts(OptsType), - { - Desc, - TestName, - fun(_SuiteOpts) -> Test(CodecName, TestSpecificOpts) end - } - end, test_suite()) + fun(CodecSpec) -> + lists:filtermap( + fun({Desc, Test}) -> + TestName = + binary_to_list( + << (suite_name(CodecSpec))/binary, ": ", Desc/binary >> + ), + OptsType = + case is_map(CodecSpec) of + true -> maps:get(<<"with-opts">>, CodecSpec, normal); + false -> normal + end, + TestSpecificOpts = test_opts(OptsType), + { + true, + { + Desc, + TestName, + fun(_SuiteOpts) -> + Test(CodecSpec, TestSpecificOpts) + end + } + } + end, + test_suite() + ) end, Codecs ). @@ -209,6 +251,14 @@ suite_name(CodecSpec) when is_map(CodecSpec) -> true -> << CodecName/binary, " (bundle)">> end. +%% @doc Determine if the given codec setup is relevant, given the `OptsType' +%% specified. +is_relevant_opts(#{ <<"with-opts">> := RelevantOpts }, OptsType) -> + lists:member(OptsType, RelevantOpts); +is_relevant_opts(_Codec, _OptsType) -> true. + +%% @doc Determine if a CodecSpec (either binary, map, or list thereof) matches +%% a given codec device name binary. is_device_codec(Devices, Codec) when is_list(Devices) -> lists:any(fun(Device) -> is_device_codec(Device, Codec) end, Devices); is_device_codec(Device, Codec) when Device == Codec -> @@ -1455,7 +1505,7 @@ sign_links_test(Codec, Opts) -> bundled_and_unbundled_ids_differ_test(Codec = #{ <<"bundle">> := true }, Opts) -> SignatureType = case is_device_codec([<<"ans104@1.0">>, <<"tx@1.0">>], Codec) of - true -> <<"rsa-pss-sha256">>; + true -> ?RSA_SIGN_TYPE; false -> <<"hmac-sha256">> end, Msg = #{ @@ -1648,4 +1698,53 @@ bundled_ordering_test(Codec = #{ <<"bundle">> := true }, Opts) -> ?assert(MatchRes), ?assert(hb_message:verify(Decoded, all, Opts)); bundled_ordering_test(_Codec, _Opts) -> - skip. \ No newline at end of file + skip. + +rsa_wallet_not_match_message_ed25519_type_test() -> + Opts = #{priv_wallet => ar_wallet:new(?RSA_KEY_TYPE)}, + SignatureType = ?EDDSA_SIGN_TYPE, + Msg = #{<<"a">> => <<"b">>}, + ?assertThrow({wrong_wallet_to_sign, + {request_type, ?EDDSA_SIGN_TYPE}, + {wallet_type, {rsa,65537}}}, + hb_message:commit( + Msg, + Opts, + #{<<"commitment-device">> => <<"ans104@1.0">>, <<"type">> => SignatureType} + )). + +ed25519_wallet_not_match_message_rsa_type_test() -> + Opts = #{priv_wallet => ar_wallet:new(?EDDSA_KEY_TYPE)}, + SignatureType = ?RSA_SIGN_TYPE, + Msg = #{<<"a">> => <<"b">>}, + ?assertThrow({wrong_wallet_to_sign, + {request_type, ?RSA_SIGN_TYPE}, + {wallet_type, {eddsa,ed25519}}}, + hb_message:commit( + Msg, + Opts, + #{<<"commitment-device">> => <<"ans104@1.0">>, <<"type">> => SignatureType} + )). + +ethereum_wallet_not_match_message_rsa_type_test() -> + Opts = #{priv_wallet => ar_wallet:new(?ETHEREUM_KEY_TYPE)}, + SignatureType = ?RSA_SIGN_TYPE, + Msg = #{<<"a">> => <<"b">>}, + ?assertThrow({wrong_wallet_to_sign, + {request_type, ?RSA_SIGN_TYPE}, + {wallet_type, ethereum}}, + hb_message:commit( + Msg, + Opts, + #{<<"commitment-device">> => <<"ans104@1.0">>, <<"type">> => SignatureType} + )). + +ethereum_wallet_match_message_ethereum_type_test() -> + Opts = #{priv_wallet => ar_wallet:new(?ETHEREUM_KEY_TYPE)}, + SignatureType = <<"ethereum">>, + Msg = #{<<"a">> => <<"b">>}, + ?assert(is_map(hb_message:commit( + Msg, + Opts, + #{<<"commitment-device">> => <<"ans104@1.0">>, <<"type">> => SignatureType} + ))). diff --git a/src/hb_metrics_collector.erl b/src/hb_metrics_collector.erl index 18804b04b..2a9d9783f 100644 --- a/src/hb_metrics_collector.erl +++ b/src/hb_metrics_collector.erl @@ -8,6 +8,7 @@ ] ). -behaviour(prometheus_collector). +-include("include/hb_http_client.hrl"). %%==================================================================== %% Collector API %%==================================================================== @@ -23,7 +24,7 @@ collect_mf(_Registry, Callback) -> ) ), - SystemLoad = cpu_sup:avg5(), + SystemLoad = safe_avg5(), Callback( create_gauge( @@ -36,6 +37,29 @@ collect_mf(_Registry, Callback) -> ) ), + {InUse, Free, Queue} = hackney_pool_stats(), + Callback( + create_gauge( + hackney_pool_in_use, + "Hackney connections currently in use", + InUse + ) + ), + Callback( + create_gauge( + hackney_pool_free, + "Idle hackney connections available in the pool", + Free + ) + ), + Callback( + create_gauge( + hackney_pool_queue, + "Requests waiting for a hackney connection", + Queue + ) + ), + ok. collect_metrics(system_load, SystemLoad) -> %% Return the gauge metric with no labels @@ -45,18 +69,52 @@ collect_metrics(system_load, SystemLoad) -> ] ); collect_metrics(process_uptime_seconds, Uptime) -> - %% Convert the uptime from milliseconds to seconds UptimeSeconds = Uptime / 1000, - - %% Return the gauge metric with no labels - prometheus_model_helpers:gauge_metrics( - [ - {[], UptimeSeconds} - ] - ). + prometheus_model_helpers:gauge_metrics([{[], UptimeSeconds}]); +collect_metrics(hackney_pool_in_use, Value) -> + prometheus_model_helpers:gauge_metrics([{[], Value}]); +collect_metrics(hackney_pool_free, Value) -> + prometheus_model_helpers:gauge_metrics([{[], Value}]); +collect_metrics(hackney_pool_queue, Value) -> + prometheus_model_helpers:gauge_metrics([{[], Value}]). %%==================================================================== %% Private Functions %%==================================================================== + +%% @doc Wrapper around cpu_sup:avg5/0 with a 2-second timeout. +%% cpu_sup:avg5/0 uses an infinity timeout to os_mon internally; +%% if the port program stalls, it blocks the Prometheus scrape indefinitely. +%% On timeout, the worker is killed to avoid leaking blocked processes. +safe_avg5() -> + Ref = make_ref(), + Self = self(), + {Pid, MonRef} = spawn_monitor(fun() -> Self ! {Ref, catch cpu_sup:avg5()} end), + receive + {Ref, Load} when is_integer(Load) -> + erlang:demonitor(MonRef, [flush]), + Load; + {Ref, _} -> + erlang:demonitor(MonRef, [flush]), + 0; + {'DOWN', MonRef, process, Pid, _} -> + 0 + after 2000 -> + exit(Pid, kill), + erlang:demonitor(MonRef, [flush]), + receive {Ref, _} -> ok after 0 -> ok end, + 0 + end. + +%% @doc Read hackney pool stats at scrape time. +hackney_pool_stats() -> + try hackney_pool:get_stats(?HACKNEY_POOL) of + Stats -> + {proplists:get_value(in_use_count, Stats, 0), + proplists:get_value(free_count, Stats, 0), + proplists:get_value(queue_count, Stats, 0)} + catch _:_ -> {0, 0, 0} + end. + create_gauge(Name, Help, Data) -> prometheus_model_helpers:create_mf(Name, Help, gauge, ?MODULE, Data). \ No newline at end of file diff --git a/src/hb_name.erl b/src/hb_name.erl index e85c05162..1aecccbf9 100644 --- a/src/hb_name.erl +++ b/src/hb_name.erl @@ -5,6 +5,7 @@ %%% There can only ever be one registrant for a given name at a time. -module(hb_name). -export([start/0, register/1, register/2, unregister/1, lookup/1, all/0]). +-export([singleton/2]). -include("include/hb.hrl"). -include_lib("eunit/include/eunit.hrl"). -define(NAME_TABLE, hb_name_registry). @@ -58,6 +59,45 @@ unregister(Name) -> ets:delete(?NAME_TABLE, Name), ok. +%% @doc Atomic singleton lookup/spawn+register operation. +%% +%% Multiple callers may simultanoeously invoke this function, but the PID of +%% only one surviving surivor will be registered and returned to all callers. +%% If the given function crashes on spawn, then the operation will retry the +%% operation until they successfully spawn and register a process -- which will +%% promptly fail. The result is that the intended semantics are still preserved: +%% Calling `singleton' will always return the PID of a process that owns that name +%% at the time of return. +singleton(Name, Fun) -> + case lookup(Name) of + Registered when is_pid(Registered) -> Registered; + undefined -> singleton_spawn(Name, Fun) + end. + +%% @doc Perform the actual atomic spawn+register operation. +singleton_spawn(Name, Fun) -> + start(), + Parent = self(), + ReadyRef = make_ref(), + PID = + spawn( + fun() -> + Spawned = self(), + case catch ?MODULE:register(Name, Spawned) of + ok -> + Parent ! {spawned, ReadyRef, Spawned}, + Fun(); + _ -> + Parent ! {spawn_failed, ReadyRef}, + ok + end + end + ), + receive + {spawned, ReadyRef, PID} -> PID; + {spawn_failed, ReadyRef} -> singleton(Name, Fun) + end. + %%% @doc Lookup a name -> PID. lookup(Name) when is_atom(Name) -> case whereis(Name) of @@ -118,6 +158,14 @@ atom_test() -> term_test() -> basic_test({term, os:timestamp()}). +singleton_returns_spawned_pid_test() -> + Name = {singleton, os:timestamp()}, + Pid = singleton(Name, fun() -> receive stop -> ok end end), + ?assertEqual(Pid, lookup(Name)), + ?assertNotEqual(self(), Pid), + Pid ! stop, + hb_name:unregister(Name). + concurrency_test() -> Name = {concurrent_test, os:timestamp()}, SuccessCount = length([R || R <- spawn_test_workers(Name), R =:= ok]), diff --git a/src/hb_opts.erl b/src/hb_opts.erl index 1000d8361..c3d40c6e9 100644 --- a/src/hb_opts.erl +++ b/src/hb_opts.erl @@ -18,17 +18,41 @@ -export([ensure_node_history/2]). -export([check_required_opts/2]). -include("include/hb.hrl"). +-include("include/hb_opts.hrl"). +-include("include/hb_arweave_nodes.hrl"). %%% Environment variables that can be used to override the default message. -ifdef(TEST). --define(DEFAULT_PRINT_OPTS, [error, http_error, cron_error]). +-define(DEFAULT_PRINT_OPTS, + [ + error, http_error, cron_error, hook_error + ] +). -else. -define(DEFAULT_PRINT_OPTS, [ - error, http_error, cron_error, - http_short, compute_short, push_short, copycat_short + error, http_error, cron_error, hook_error, warning, + http_server_short, + compute_short, push_short, copycat_short, bundler_short + ] +). +-endif. + +%%% Default name resolvers. In test mode, we do not use any name resolvers, but +%%% in-production mode we preload the ARNS snapshot as a baseline. +-ifndef(TEST). +-define(DEFAULT_NAME_RESOLVERS, + [ + #{ <<"device">> => <<"arweave@2.9">> }, + #{ <<"device">> => <<"b32-name@1.0">> }, + << + "G_gb7SAgogHMtmqycwaHaC6uC-CZ3akACdFv5PUaEE8", + "~json@1.0/deserialize&target=data" + >> ] ). +-else. +-define(DEFAULT_NAME_RESOLVERS, []). -endif. -ifdef(AO_PROFILING). @@ -41,6 +65,8 @@ <<"name">> => <<"cache-mainnet/lmdb">>, <<"store-module">> => hb_store_lmdb }). +-define(DEFAULT_GATEWAY, <<"https://arweave.net">>). +-define(DEFAULT_HTTP_OPTS, #{http_client => ?DEFAULT_HTTP_CLIENT, protocol => http2}). -define(ENV_KEYS, #{ priv_key_location => {"HB_KEY", "hyperbeam-key.json"}, @@ -55,6 +81,15 @@ fun topic_list_to_atoms/1, {preparsed, ?DEFAULT_PRINT_OPTS} }, + debug_log => + { + "HB_LOG", + fun topic_list_to_atoms/1, + {preparsed, false} + }, + log_dir => {"HB_LOG_DIR", fun hb_util:bin/1, "logs"}, + log_max_files => {"HB_LOG_MAX_FILES", fun hb_util:int/1, "5"}, + log_max_bytes => {"HB_LOG_MAX_BYTES", fun hb_util:int/1, "52428800"}, lua_scripts => {"LUA_SCRIPTS", "scripts"}, lua_tests => {"LUA_TESTS", fun dev_lua_test:parse_spec/1, tests}, default_index => @@ -113,8 +148,8 @@ default_message() -> hb_config_location => <<"config.flat">>, initialized => true, %% What HTTP client should the node use? - %% Options: gun, httpc - http_client => gun, + %% Options: gun, httpc, hackney + http_client => ?DEFAULT_HTTP_CLIENT, %% Scheduling mode: Determines when the SU should inform the recipient %% that an assignment has been scheduled for a message. %% Options: aggressive(!), local_confirmation, remote_confirmation, @@ -125,7 +160,7 @@ default_message() -> %% Options: aggressive, lazy compute_mode => lazy, %% Choice of remote nodes for tasks that are not local to hyperbeam. - gateway => <<"https://arweave.net">>, + gateway => ?DEFAULT_GATEWAY, bundler_ans104 => <<"https://up.arweave.net:443">>, %% Location of the wallet keyfile on disk that this node will use. priv_key_location => <<"hyperbeam-key.json">>, @@ -136,10 +171,12 @@ default_message() -> %% Preloaded devices for the node to use. These names override %% resolution of devices via ID to the default implementations. preloaded_devices => [ - #{<<"name">> => <<"arweave@2.9-pre">>, <<"module">> => dev_arweave}, + #{<<"name">> => <<"arweave@2.9">>, <<"module">> => dev_arweave}, #{<<"name">> => <<"apply@1.0">>, <<"module">> => dev_apply}, #{<<"name">> => <<"auth-hook@1.0">>, <<"module">> => dev_auth_hook}, #{<<"name">> => <<"ans104@1.0">>, <<"module">> => dev_codec_ans104}, + #{<<"name">> => <<"b32-name@1.0">>, <<"module">> => dev_b32_name}, + #{<<"name">> => <<"blacklist@1.0">>, <<"module">> => dev_blacklist}, #{<<"name">> => <<"bundler@1.0">>, <<"module">> => dev_bundler}, #{<<"name">> => <<"compute@1.0">>, <<"module">> => dev_cu}, #{<<"name">> => <<"cache@1.0">>, <<"module">> => dev_cache}, @@ -178,6 +215,7 @@ default_message() -> #{<<"name">> => <<"profile@1.0">>, <<"module">> => dev_profile}, #{<<"name">> => <<"push@1.0">>, <<"module">> => dev_push}, #{<<"name">> => <<"query@1.0">>, <<"module">> => dev_query}, + #{<<"name">> => <<"rate-limit@1.0">>, <<"module">> => dev_rate_limit}, #{<<"name">> => <<"relay@1.0">>, <<"module">> => dev_relay}, #{<<"name">> => <<"router@1.0">>, <<"module">> => dev_router}, #{<<"name">> => <<"scheduler@1.0">>, <<"module">> => dev_scheduler}, @@ -209,11 +247,13 @@ default_message() -> trusted_device_signers => [], %% What should the node do if a client error occurs? client_error_strategy => throw, - %% HTTP request options - http_connect_timeout => 5000, - http_keepalive => 120000, - http_request_send_timeout => 300_000, + %% HTTP client request options + http_client_connect_timeout => 5000, + http_client_keepalive => 120000, + http_client_send_timeout => 300_000, port => 8734, + process_sampler => true, + process_sampler_interval => 15000, wasm_allow_aot => false, %% Options for the relay device relay_http_client => httpc, @@ -227,21 +267,26 @@ default_message() -> node_history => [], debug_stack_depth => 40, debug_print => false, + debug_log => false, + log_dir => <<"logs">>, + log_max_files => 5, + log_max_bytes => 52428800, debug_print_map_line_threshold => 30, debug_print_binary_max => 60, debug_print_indent => 2, debug_print_truncate => 30, stack_print_prefixes => ["hb", "dev", "ar", "maps"], debug_print_trace => short, % `short` | `false`. Has performance impact. + debug_print_verify => false, debug_print_metadata => true, - debug_print_gen_id => true, + debug_print_gen_id => false, debug_print_committers => true, debug_print_comm_device => true, debug_print_comm_type => true, debug_trace_type => ?DEFAULT_TRACE_TYPE, short_trace_len => 20, debug_show_priv => if_present, - debug_resolve_links => true, + debug_resolve_links => false, debug_print_fail_mode => long, trusted => #{}, snp_enforced_keys => [ @@ -249,6 +294,7 @@ default_message() -> initrd, append, vmm_type, guest_features ], + name_resolvers => ?DEFAULT_NAME_RESOLVERS, routes => [ %% Local CU routes. #{ @@ -274,201 +320,85 @@ default_message() -> [ #{ <<"prefix">> => <<"https://ao-search-gateway.goldsky.com">>, - <<"opts">> => #{ http_client => httpc, protocol => http2 } + <<"opts">> => #{ http_client => ?DEFAULT_HTTP_CLIENT, protocol => http2 } }, #{ <<"prefix">> => <<"https://arweave-search.goldsky.com">>, - <<"opts">> => #{ http_client => httpc, protocol => http2 } + <<"opts">> => #{ http_client => ?DEFAULT_HTTP_CLIENT, protocol => http2 } }, #{ - <<"prefix">> => <<"https://arweave.net">>, - <<"opts">> => #{ http_client => gun, protocol => http2 } + <<"prefix">> => ?DEFAULT_GATEWAY, + <<"opts">> => #{ http_client => ?DEFAULT_HTTP_CLIENT, protocol => http2 } } ] }, - %% Chunk requests: route to the nearest data nodes by - %% partition midpoint (byte offset). Tries 4 at a time, - %% ordered by proximity, until one returns 200. + %% chunk requests: route to the nearest data nodes by + %% partition midpoint (byte offset). #{ - <<"template">> => <<"^/arweave/chunk">>, - <<"nodes">> => - [ - %% Partitions 0-15 - #{ - <<"match">> => <<"^/arweave">>, - <<"center">> => 28_800_000_000_000, - <<"with">> => <<"http://data-1.arweave.xyz:1984">>, - <<"opts">> => #{ http_client => httpc } - }, - #{ - <<"match">> => <<"^/arweave">>, - <<"center">> => 28_800_000_000_000, - <<"with">> => <<"http://data-13.arweave.xyz:1984">>, - <<"opts">> => #{ http_client => httpc } - }, - %% Partitions 16-31 - #{ - <<"match">> => <<"^/arweave">>, - <<"center">> => 86_400_000_000_000, - <<"with">> => <<"http://data-2.arweave.xyz:1984">>, - <<"opts">> => #{ http_client => httpc } - }, - #{ - <<"match">> => <<"^/arweave">>, - <<"center">> => 86_400_000_000_000, - <<"with">> => <<"http://data-3.arweave.xyz:1984">>, - <<"opts">> => #{ http_client => httpc } - }, - #{ - <<"match">> => <<"^/arweave">>, - <<"center">> => 86_400_000_000_000, - <<"with">> => <<"http://data-14.arweave.xyz:1984">>, - <<"opts">> => #{ http_client => httpc } - }, - #{ - <<"match">> => <<"^/arweave">>, - <<"center">> => 86_400_000_000_000, - <<"with">> => <<"http://data-15.arweave.xyz:1984">>, - <<"opts">> => #{ http_client => httpc } - }, - %% Partitions 32-47 - #{ - <<"match">> => <<"^/arweave">>, - <<"center">> => 144_000_000_000_000, - <<"with">> => <<"http://data-4.arweave.xyz:1984">>, - <<"opts">> => #{ http_client => httpc } - }, - #{ - <<"match">> => <<"^/arweave">>, - <<"center">> => 144_000_000_000_000, - <<"with">> => <<"http://data-5.arweave.xyz:1984">>, - <<"opts">> => #{ http_client => httpc } - }, - #{ - <<"match">> => <<"^/arweave">>, - <<"center">> => 144_000_000_000_000, - <<"with">> => <<"http://data-16.arweave.xyz:1984">>, - <<"opts">> => #{ http_client => httpc } - }, - #{ - <<"match">> => <<"^/arweave">>, - <<"center">> => 144_000_000_000_000, - <<"with">> => <<"http://data-17.arweave.xyz:1984">>, - <<"opts">> => #{ http_client => httpc } - }, - %% Partitions 48-63 - #{ - <<"match">> => <<"^/arweave">>, - <<"center">> => 201_600_000_000_000, - <<"with">> => <<"http://data-6.arweave.xyz:1984">>, - <<"opts">> => #{ http_client => httpc } - }, - #{ - <<"match">> => <<"^/arweave">>, - <<"center">> => 201_600_000_000_000, - <<"with">> => <<"http://data-7.arweave.xyz:1984">>, - <<"opts">> => #{ http_client => httpc } - }, - %% Partitions 48-107 (tip nodes) - #{ - <<"match">> => <<"^/arweave">>, - <<"center">> => 280_800_000_000_000, - <<"with">> => <<"http://tip-1.arweave.xyz:1984">>, - <<"opts">> => #{ http_client => httpc } - }, - #{ - <<"match">> => <<"^/arweave">>, - <<"center">> => 280_800_000_000_000, - <<"with">> => <<"http://tip-2.arweave.xyz:1984">>, - <<"opts">> => #{ http_client => httpc } - }, - #{ - <<"match">> => <<"^/arweave">>, - <<"center">> => 280_800_000_000_000, - <<"with">> => <<"http://tip-3.arweave.xyz:1984">>, - <<"opts">> => #{ http_client => httpc } - }, - #{ - <<"match">> => <<"^/arweave">>, - <<"center">> => 280_800_000_000_000, - <<"with">> => <<"http://tip-4.arweave.xyz:1984">>, - <<"opts">> => #{ http_client => httpc } - }, - #{ - <<"match">> => <<"^/arweave">>, - <<"center">> => 280_800_000_000_000, - <<"with">> => <<"http://tip-5.arweave.xyz:1984">>, - <<"opts">> => #{ http_client => httpc } - }, - %% Partitions 64-126 - #{ - <<"match">> => <<"^/arweave">>, - <<"center">> => 343_800_000_000_000, - <<"with">> => <<"http://data-8.arweave.xyz:1984">>, - <<"opts">> => #{ http_client => httpc } - }, - %% Partitions 75-138 - #{ - <<"match">> => <<"^/arweave">>, - <<"center">> => 385_200_000_000_000, - <<"with">> => <<"http://data-9.arweave.xyz:1984">>, - <<"opts">> => #{ http_client => httpc } - }, - #{ - <<"match">> => <<"^/arweave">>, - <<"center">> => 385_200_000_000_000, - <<"with">> => <<"http://data-10.arweave.xyz:1984">>, - <<"opts">> => #{ http_client => httpc } - }, - #{ - <<"match">> => <<"^/arweave">>, - <<"center">> => 385_200_000_000_000, - <<"with">> => <<"http://data-11.arweave.xyz:1984">>, - <<"opts">> => #{ http_client => httpc } - }, - #{ - <<"match">> => <<"^/arweave">>, - <<"center">> => 385_200_000_000_000, - <<"with">> => <<"http://data-12.arweave.xyz:1984">>, - <<"opts">> => #{ http_client => httpc } - } - ], - <<"strategy">> => <<"Nearest-Integer">>, - <<"choose">> => 22, - <<"parallel">> => 4, + <<"template">> => + #{ + <<"path">> => <<"^/arweave/chunk">>, + <<"method">> => <<"GET">> + }, + <<"nodes">> => add_opts(?ARWEAVE_BOOTSTRAP_DATA_NODES ++ ?ARWEAVE_BOOTSTRAP_TIP_NODES), + <<"strategy">> => <<"Shuffled-Range">>, + <<"choose">> => + length( + ?ARWEAVE_BOOTSTRAP_DATA_NODES + ++ ?ARWEAVE_BOOTSTRAP_TIP_NODES + ), + <<"parallel">> => 1, <<"responses">> => 1, <<"stop-after">> => true, <<"admissible-status">> => 200 }, + #{ + <<"template">> => + #{ + <<"path">> => <<"^/arweave/chunk">>, + <<"method">> => <<"POST">> + }, + <<"nodes">> => add_opts(?ARWEAVE_BOOTSTRAP_DATA_NODES ++ ?ARWEAVE_BOOTSTRAP_TIP_NODES), + <<"strategy">> => <<"Shuffled-Range">>, + <<"choose">> => + length( + ?ARWEAVE_BOOTSTRAP_DATA_NODES + ++ ?ARWEAVE_BOOTSTRAP_TIP_NODES + ), + <<"parallel">> => 5, + <<"responses">> => 3, %% keep going until we get 3x 200s + <<"stop-after">> => true, + <<"admissible-status">> => 200 + }, + #{ + <<"template">> => + #{ + <<"path">> => <<"^/arweave/tx">>, + <<"method">> => <<"POST">> + }, + <<"nodes">> => add_opts(?ARWEAVE_BOOTSTRAP_CHAIN_NODES ++ ?ARWEAVE_BOOTSTRAP_TIP_NODES), + <<"parallel">> => true, + <<"responses">> => 3, + <<"stop-after">> => false, + <<"admissible-status">> => 200 + }, % Raw data requests via arweave.net gateway. #{ <<"template">> => <<"^/arweave/raw">>, <<"node">> => #{ <<"match">> => <<"^/arweave">>, - <<"with">> => <<"https://arweave.net">>, - <<"opts">> => #{ http_client => httpc, protocol => http2 } + <<"with">> => ?DEFAULT_GATEWAY, + <<"opts">> => #{ http_client => ?DEFAULT_HTTP_CLIENT, protocol => http2 } } }, - %% General Arweave requests: race both chain nodes, take + %% General Arweave requests: race all chain nodes, take %% the first 200. #{ <<"template">> => <<"^/arweave">>, - <<"nodes">> => - [ - #{ - <<"match">> => <<"^/arweave">>, - <<"with">> => <<"http://chain-1.arweave.xyz:1984">>, - <<"opts">> => #{ http_client => httpc, protocol => http2 } - }, - #{ - <<"match">> => <<"^/arweave">>, - <<"with">> => <<"http://chain-2.arweave.xyz:1984">>, - <<"opts">> => #{ http_client => httpc, protocol => http2 } - } - ], + <<"nodes">> => add_opts(?ARWEAVE_BOOTSTRAP_CHAIN_NODES), <<"parallel">> => true, - <<"stop-after">> => 1, + <<"stop-after">> => true, <<"admissible-status">> => 200 }, %% Raw data requests via arweave.net gateway. TODO: Update later. @@ -476,8 +406,8 @@ default_message() -> <<"template">> => <<"/raw">>, <<"node">> => #{ - <<"prefix">> => <<"https://arweave.net">>, - <<"opts">> => #{ http_client => gun, protocol => http2 } + <<"prefix">> => ?DEFAULT_GATEWAY, + <<"opts">> => #{ http_client => ?DEFAULT_HTTP_CLIENT, protocol => http2 } } } ], @@ -488,6 +418,12 @@ default_message() -> <<"store-module">> => hb_store_fs, <<"name">> => <<"cache-mainnet">> }, + #{ + <<"store-module">> => hb_store_arweave, + <<"name">> => <<"cache-arweave">>, + <<"index-store">> => [?DEFAULT_PRIMARY_STORE], + <<"arweave-node">> => ?DEFAULT_GATEWAY + }, #{ <<"store-module">> => hb_store_gateway, <<"subindex">> => [ @@ -518,6 +454,8 @@ default_message() -> % services do not provide the `anchor' or `last_tx' fields, so their % responses are not verifiable. ans104_trust_gql => true, + % Number of chunks to fetch in parallel when loading a TX or dataitem. + arweave_chunk_fetch_concurrency => 5, http_extra_opts => #{ force_message => true, @@ -532,19 +470,34 @@ default_message() -> routes => [] }, on => #{ - <<"request">> => #{ - <<"device">> => <<"auth-hook@1.0">>, - <<"path">> => <<"request">>, - <<"when">> => #{ - <<"keys">> => [<<"authorization">>, <<"!">>] - }, - <<"secret-provider">> => + <<"request">> => + [ + #{ + <<"device">> => <<"rate-limit@1.0">> + }, + #{ + <<"device">> => <<"auth-hook@1.0">>, + <<"path">> => <<"request">>, + <<"when">> => #{ + <<"keys">> => [<<"authorization">>, <<"!">>] + }, + <<"secret-provider">> => + #{ + <<"device">> => <<"http-auth@1.0">>, + <<"access-control">> => + #{ <<"device">> => <<"http-auth@1.0">> } + } + }, + #{ + <<"device">> => <<"name@1.0">> + }, #{ - <<"device">> => <<"http-auth@1.0">>, - <<"access-control">> => - #{ <<"device">> => <<"http-auth@1.0">> } + <<"device">> => <<"manifest@1.0">> + }, + #{ + <<"device">> => <<"blacklist@1.0">> } - } + ] }, scheduler_default_commitment_spec => <<"httpsig@1.0">>, genesis_wasm_import_authorities => @@ -933,6 +886,17 @@ ensure_node_history(Opts, RequiredOpts) -> {error, validation_failed} end. +%% @doc Util to add opts to nodes. +add_opts(Items) -> + add_opts(Items, ?DEFAULT_HTTP_OPTS). +add_opts(Items, Opts) -> + lists:map( + fun (Item) when is_map(Item) -> + Item#{<<"opts">> => Opts} + end, + Items + ). + %%% Tests -ifdef(TEST). @@ -942,7 +906,11 @@ global_get_test() -> ?assertEqual(debug, ?MODULE:get(mode)), ?assertEqual(debug, ?MODULE:get(mode, production)), ?assertEqual(undefined, ?MODULE:get(unset_global_key)), - ?assertEqual(1234, ?MODULE:get(unset_global_key, 1234)). + ?assertEqual(1234, ?MODULE:get(unset_global_key, 1234)), + ?assertEqual(false, ?MODULE:get(debug_log)), + ?assertEqual(<<"logs">>, ?MODULE:get(log_dir)), + ?assertEqual(5, ?MODULE:get(log_max_files)), + ?assertEqual(52428800, ?MODULE:get(log_max_bytes)). local_get_test() -> Local = #{ only => local }, @@ -970,14 +938,14 @@ global_preference_test() -> load_flat_test() -> % File contents: % port: 1234 - % host: https://ao.computer + % node_host: https://ao.computer % await-inprogress: false {ok, Conf} = load("test/config.flat", #{}), ?event({loaded, {explicit, Conf}}), % Ensure we convert types as expected. ?assertEqual(1234, hb_maps:get(port, Conf)), % A binary - ?assertEqual(<<"https://ao.computer">>, hb_maps:get(host, Conf)), + ?assertEqual(<<"https://ao.computer">>, hb_maps:get(node_host, Conf)), % An atom, where the key contained a header-key `-' rather than a `_'. ?assertEqual(false, hb_maps:get(await_inprogress, Conf)). @@ -987,7 +955,7 @@ load_json_test() -> ?assertEqual(1234, hb_maps:get(port, Conf)), ?assertEqual(9001, hb_maps:get(example, Conf)), % A binary - ?assertEqual(<<"https://ao.computer">>, hb_maps:get(host, Conf)), + ?assertEqual(<<"https://ao.computer">>, hb_maps:get(node_host, Conf)), % An atom, where the key contained a header-key `-' rather than a `_'. ?assertEqual(false, hb_maps:get(await_inprogress, Conf)), % Ensure that a store with `ao-types' is loaded correctly. diff --git a/src/hb_pmap.erl b/src/hb_pmap.erl new file mode 100644 index 000000000..1baf88564 --- /dev/null +++ b/src/hb_pmap.erl @@ -0,0 +1,156 @@ +%% @doc Concurrency-limited parallel map that preserves input order. +%% Spawns up to MaxWorkers workers and refills the pool as workers complete. +-module(hb_pmap). + +-export([parallel_map/3]). +-include("include/hb.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +parallel_map(Items, Fun, MaxWorkers) when is_list(Items), is_function(Fun, 1) -> + Workers = max(1, MaxWorkers), + Parent = self(), + ItemsWithRefs = [{Item, make_ref()} || Item <- Items], + {ToSpawn, Remaining} = + lists:split(min(length(ItemsWithRefs), Workers), ItemsWithRefs), + ActiveRefs = [spawn_worker(IWR, Fun, Parent) || IWR <- ToSpawn], + ResultsMap = collect(ActiveRefs, Remaining, Fun, Parent, #{}), + [maps:get(Ref, ResultsMap) || {_Item, Ref} <- ItemsWithRefs]. + +spawn_worker({Item, Ref}, Fun, Parent) -> + spawn( + fun() -> + try + Parent ! {hb_pmap_result, Ref, Fun(Item)} + catch + Class:Reason:Stacktrace -> + ?event(pmap_error, {pmap_worker_crashed, + {class, Class}, + {reason, Reason}, + {stacktrace, {trace, Stacktrace}}}), + Parent ! { + hb_pmap_worker_crash, + Ref, + Class, + Reason, + Stacktrace + } + end + end + ), + Ref. + +collect([], [], _Fun, _Parent, Results) -> + Results; +collect(Active, Remaining, Fun, Parent, Results) -> + receive + {hb_pmap_result, Ref, Result} -> + NewResults = Results#{Ref => Result}, + NewActive = lists:delete(Ref, Active), + case Remaining of + [] -> + collect(NewActive, [], Fun, Parent, NewResults); + [Next | Rest] -> + NextRef = spawn_worker(Next, Fun, Parent), + collect( + [NextRef | NewActive], + Rest, + Fun, + Parent, + NewResults + ) + end; + {hb_pmap_worker_crash, _Ref, Class, Reason, Stacktrace} -> + throw({pmap_worker_crashed, Class, Reason, Stacktrace}) + end. + +%%% Tests + +%% @doc Verifies empty input returns an empty result list. +empty_input_test() -> + ?assertEqual([], parallel_map([], fun(X) -> X end, 4)). + +%% @doc Covers normal-path behavior across worker configs: +%% output order, per-item single execution, and max in-flight worker bounds. +instrumented_normal_path_test() -> + Items = [1, 2, 3, 4, 5, 6], + ExpectedResults = [Item * 10 || Item <- Items], + lists:foreach( + fun(MaxWorkers) -> + #{ + results := Results, + started := Started, + completed := Completed, + in_flight := InFlight, + peak := Peak + } = + run_instrumented_case(Items, MaxWorkers), + EffectiveWorkers = min(max(1, MaxWorkers), length(Items)), + ?assertEqual(ExpectedResults, Results), + ?assertEqual(length(Items), Started), + ?assertEqual(length(Items), Completed), + ?assertEqual(0, InFlight), + ?assert(Peak =< EffectiveWorkers), + ?assert(Peak >= 1) + end, + [0, 3, 10] + ). + +%% @doc Verifies worker exceptions fail fast instead of hanging. +worker_crash_fails_fast_test() -> + ?assertMatch( + {pmap_worker_crashed, error, boom, _}, + catch parallel_map( + [1, 2, 3], + fun + (2) -> erlang:error(boom); + (Item) -> Item + end, + 2 + ) + ). + +%% @doc Runs a single instrumented parallel_map/3 case and returns +%% aggregated execution stats and final ordered results. +run_instrumented_case(Items, MaxWorkers) -> + Counters = atomics:new(4, []), + Results = + parallel_map( + Items, + fun(Item) -> + mark_worker_started(Counters), + % Reverse completion order to stress order preservation. + timer:sleep((length(Items) - Item) * 5), + mark_worker_completed(Counters), + Item * 10 + end, + MaxWorkers + ), + #{ + results => Results, + started => atomics:get(Counters, 1), + completed => atomics:get(Counters, 2), + in_flight => atomics:get(Counters, 3), + peak => atomics:get(Counters, 4) + }. + +mark_worker_started(Counters) -> + _ = atomics:add_get(Counters, 1, 1), + InFlight = atomics:add_get(Counters, 3, 1), + update_peak(Counters, InFlight). + +mark_worker_completed(Counters) -> + _ = atomics:add_get(Counters, 2, 1), + _ = atomics:add_get(Counters, 3, -1), + ok. + +update_peak(Counters, InFlight) -> + Peak = atomics:get(Counters, 4), + case InFlight =< Peak of + true -> + ok; + false -> + case atomics:compare_exchange(Counters, 4, Peak, InFlight) of + Peak -> ok; + _ -> update_peak(Counters, InFlight) + end + end. diff --git a/src/hb_process_sampler.erl b/src/hb_process_sampler.erl new file mode 100644 index 000000000..a9d90266f --- /dev/null +++ b/src/hb_process_sampler.erl @@ -0,0 +1,282 @@ +%%% @doc Sample BEAM process state for diagnostics. +-module(hb_process_sampler). + +-export([ensure_started/1]). + +-include("include/hb.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +-define(DEFAULT_SAMPLE_PROCESSES_INTERVAL, 15000). + +%% @doc Ensure the process sampler singleton is started if enabled. +ensure_started(Opts) -> + ProcessSamplerEnabled = + hb_opts:get(process_sampler, not hb_features:test(), Opts) + andalso hb_opts:get(prometheus, not hb_features:test(), Opts), + ?event(process_sampler, {process_sampler_enabled, ProcessSamplerEnabled}), + case ProcessSamplerEnabled of + true -> + _ = hb_name:singleton(?MODULE, fun() -> start(Opts) end), + ok; + false -> + ok + end. + +%% @doc Initialize the process sampler and enter its receive loop. +start(Opts) -> + ?event(process_sampler, {starting_process_sampler, + {interval, hb_opts:get(process_sampler_interval, ?DEFAULT_SAMPLE_PROCESSES_INTERVAL, Opts)}}), + hb_prometheus:ensure_started(), + schedule_process_sample(Opts), + loop( + #{ + opts => Opts + } + ). + +%% @doc Receive loop for the process sampler. +loop(State = #{ opts := Opts }) -> + receive + sample_processes -> + sample_processes(State), + schedule_process_sample(Opts), + loop(State); + Message -> + ?event(warning, {unhandled_info, {module, ?MODULE}, {message, Message}}), + loop(State) + end. + +%% @doc Schedule the next process sample if enabled. +schedule_process_sample(Opts) -> + case hb_opts:get( + process_sampler_interval, + ?DEFAULT_SAMPLE_PROCESSES_INTERVAL, + Opts + ) of + Interval when is_integer(Interval) andalso Interval > 0 -> + erlang:send_after(Interval, self(), sample_processes); + _ -> + ok + end. + +%% @doc Sample all BEAM processes and report aggregate metrics. +sample_processes(#{ opts := Opts }) -> + StartTime = erlang:monotonic_time(), + try + Processes = erlang:processes(), + ProcessData = + lists:filtermap( + fun(PID) -> process_function(PID, Opts) end, + Processes + ), + ProcessMetrics = accumulate_process_metrics(ProcessData), + report_process_metrics(ProcessMetrics), + EndTime = erlang:monotonic_time(), + ElapsedTime = + erlang:convert_time_unit( + EndTime - StartTime, + native, + microsecond + ), + ?event( + process_sampler, + {sample_processes, + {processes, length(Processes)}, + {elapsed_ms, ElapsedTime / 1000} + }, + Opts + ) + catch + Class:Reason:Stacktrace -> + ?event( + warning, + {process_sampler_failed, + {class, Class}, + {reason, Reason}, + {stacktrace, {trace, Stacktrace}} + }, + Opts + ) + end. + +%% @doc Sum process memory, reductions, and mailbox sizes by process name. +accumulate_process_metrics(ProcessData) -> + lists:foldl( + fun({_Status, ProcessName, Memory, Reductions, MsgQueueLen}, Acc) -> + {MemoryTotal, ReductionsTotal, MsgQueueLenTotal} = + maps:get(ProcessName, Acc, {0, 0, 0}), + maps:put( + ProcessName, + { + MemoryTotal + Memory, + ReductionsTotal + Reductions, + MsgQueueLenTotal + MsgQueueLen + }, + Acc + ) + end, + #{}, + ProcessData + ). + +%% @doc Report aggregate process metrics to Prometheus. +report_process_metrics(ProcessMetrics) -> + reset_process_info_metric(), + maps:foreach( + fun(ProcessName, {Memory, Reductions, MsgQueueLen}) -> + prometheus_gauge:set(process_info, [ProcessName, <<"memory">>], Memory), + prometheus_gauge:set( + process_info, + [ProcessName, <<"reductions">>], + Reductions + ), + prometheus_gauge:set( + process_info, + [ProcessName, <<"message_queue">>], + MsgQueueLen + ) + end, + ProcessMetrics + ), + report_memory_metrics(). + +%% @doc Recreate the per-process metric family to clear exited-process labels. +reset_process_info_metric() -> + _ = prometheus_gauge:deregister(process_info), + ok = + prometheus_gauge:new( + [ + {name, process_info}, + {labels, [process, type]}, + {help, + "Sampling info about active processes." + " Only set when process_sampler is enabled."} + ] + ). + +%% @doc Report BEAM memory totals through the process_info metric family. +report_memory_metrics() -> + prometheus_gauge:set( + process_info, + [<<"total">>, <<"memory">>], + erlang:memory(total) + ), + prometheus_gauge:set( + process_info, + [<<"processes">>, <<"memory">>], + erlang:memory(processes) + ), + prometheus_gauge:set( + process_info, + [<<"processes_used">>, <<"memory">>], + erlang:memory(processes_used) + ), + prometheus_gauge:set( + process_info, + [<<"system">>, <<"memory">>], + erlang:memory(system) + ), + prometheus_gauge:set( + process_info, + [<<"atom">>, <<"memory">>], + erlang:memory(atom) + ), + prometheus_gauge:set( + process_info, + [<<"atom_used">>, <<"memory">>], + erlang:memory(atom_used) + ), + prometheus_gauge:set( + process_info, + [<<"binary">>, <<"memory">>], + erlang:memory(binary) + ), + prometheus_gauge:set( + process_info, + [<<"code">>, <<"memory">>], + erlang:memory(code) + ), + prometheus_gauge:set( + process_info, + [<<"ets">>, <<"memory">>], + erlang:memory(ets) + ). + +%% @doc Sample a single process and return aggregate data for it. +process_function(PID, _Opts) -> + case process_info( + PID, + [ + current_stacktrace, + registered_name, + status, + memory, + reductions, + message_queue_len + ] + ) of + [{current_stacktrace, Stack}, + {registered_name, Name}, + {status, Status}, + {memory, Memory}, + {reductions, Reductions}, + {message_queue_len, MsgQueueLen}] -> + ProcessName = process_name(Name, Stack), + {true, {Status, ProcessName, Memory, Reductions, MsgQueueLen}}; + _ -> + false + end. + +%% @doc Resolve a readable process name from its registration or stack. +process_name([], Stack) -> + hb_format:process_from_trace(Stack); +process_name(Name, _Stack) -> + hb_util:bin(Name). + +%%% Tests + +%% @doc process_name/2: outermost non-glue MFA from a `current_stacktrace`-ordered list +%% (inner = head). Inner slots may be arbitrary MFAs; outer tail is pmap/proc_lib glue. +process_name_from_stack_test() -> + ?assertEqual( + <<"hb_pmap->job:run">>, + process_name( + [], + [ + {timer, sleep, 1, []}, + {helper, nested, 1, []}, + {job, run, 1, []}, + {hb_pmap, '-spawn_worker/3-fun-0-', 4, []}, + {proc_lib, init_p_do_apply, 3, []} + ] + ) + ). + +%% @doc No spawner prefix when the trace has no pmap worker spawn closure. +process_name_from_stack_no_pmap_prefix_test() -> + ?assertEqual( + <<"job:run">>, + process_name( + [], + [ + {timer, sleep, 1, []}, + {job, run, 1, []}, + {proc_lib, init_p_do_apply, 3, []} + ] + ) + ). + +%% @doc Ensure registered names are returned directly. +process_name_registered_test() -> + ?assertEqual(<<"my_proc">>, process_name(my_proc, [])). + +%% @doc Ensure aggregate process metrics are summed by process name. +accumulate_process_metrics_test() -> + Metrics = + accumulate_process_metrics( + [ + {running, <<"worker">>, 10, 20, 1}, + {running, <<"worker">>, 5, 3, 2} + ] + ), + ?assertEqual({15, 23, 3}, maps:get(<<"worker">>, Metrics)). diff --git a/src/hb_prometheus.erl b/src/hb_prometheus.erl new file mode 100644 index 000000000..f4917da77 --- /dev/null +++ b/src/hb_prometheus.erl @@ -0,0 +1,121 @@ +%%% @doc HyperBEAM wrapper for Prometheus metrics. +-module(hb_prometheus). +-export([ensure_started/0, declare/2, measure_and_report/2, measure_and_report/3]). +-export([observe/2, observe/3, inc/2, inc/3, inc/4, dec/2, dec/3, dec/4]). +-define(STARTED_CACHE_KEY, {?MODULE, started}). + +%% @doc Ensure the Prometheus application has been started. Caches startup +%% failure with a timestamp to avoid repeated blocking ensure_all_started +%% calls on hot paths, but retries after a cooldown period. +ensure_started() -> + case is_started() of + true -> ok; + false -> + application:ensure_all_started( + [prometheus, prometheus_cowboy, prometheus_ranch] + ), + wait_for_prometheus_started() + end. + +%% @doc Lazy wait for prometheus to come up, after we have started the application. +wait_for_prometheus_started() -> + case is_started() of + true -> ok; + false -> + timer:sleep(1), + wait_for_prometheus_started() + end. + +%% @doc Check if prometheus has been started. +%% The application itself may return `ok` to Erlang before it is actually ready +%% for use, so we wait for the `ets` table to be created instead. +is_started() -> + case erlang:get(?STARTED_CACHE_KEY) of + true -> + true; + _ -> + case ets:whereis(prometheus_registry_table) of + undefined -> + false; + _ -> + erlang:put(?STARTED_CACHE_KEY, true), + true + end + end. + +%% @doc Declare a new Prometheus metric in a replay-safe manner. +declare(Type, Metric) -> + case ensure_started() of + ok -> + try do_declare(Type, Metric) + catch + error:mfa_already_exists -> + ok; + error:{mf_already_exists, _, _} -> + ok + end; + _ -> ok + end. + +do_declare(histogram, Metric) -> prometheus_histogram:declare(Metric); +do_declare(counter, Metric) -> prometheus_counter:declare(Metric); +do_declare(gauge, Metric) -> prometheus_gauge:declare(Metric); +do_declare(Type, _Metric) -> throw({unsupported_metric_type, Type}). + +%% @doc Measure function duration and report metric, ensuring that the Prometheus +%% application has been started first. If Prometheus is unavailable, the function +%% is executed without measurement. +measure_and_report(Fun, Metric) when is_function(Fun) -> + measure_and_report(Fun, Metric, []). +measure_and_report(Fun, Metric, Labels) when is_function(Fun) -> + Start = erlang:monotonic_time(), + try Fun() + after + DurationNative = erlang:monotonic_time() - Start, + observe(DurationNative, Metric, Labels) + end. + +observe(Duration, Metric) when is_integer(Duration) -> + observe(Duration, Metric, []). +observe(Duration, Metric, Labels) when is_integer(Duration) -> + case ensure_started() of + ok -> + try prometheus_histogram:observe(Metric, Labels, Duration) + catch _:_ -> ok + end; + _ -> + ok + end. + +inc(Type, Metrics) -> + inc(Type, Metrics, []). +inc(Type, Metrics, Labels) -> + inc(Type, Metrics, Labels, 1). +inc(Type, Metrics, Labels, Value) -> + case ensure_started() of + ok -> + try do_inc(Type, Metrics, Labels, Value) + catch error:mfa_already_exists -> ok + end; + _ -> ok + end. +do_inc(counter, Name, Labels, Value) -> + prometheus_counter:inc(Name, Labels, Value); +do_inc(gauge, Name, Labels, Value) -> + prometheus_gauge:inc(Name, Labels, Value). + +dec(Type, Metrics) -> + dec(Type, Metrics, []). +dec(Type, Metrics, Labels) -> + dec(Type, Metrics, Labels, 1). +dec(Type, Metrics, Labels, Value) -> + case ensure_started() of + ok -> + try do_dec(Type, Metrics, Labels, Value) + catch error:mfa_already_exists -> ok + end; + _ -> ok + end. + +do_dec(gauge, Name, Labels, Value) -> + prometheus_gauge:dec(Name, Labels, Value). \ No newline at end of file diff --git a/src/hb_singleton.erl b/src/hb_singleton.erl index 7b086afc8..a38cf957c 100644 --- a/src/hb_singleton.erl +++ b/src/hb_singleton.erl @@ -335,19 +335,20 @@ do_build(I, [Msg | Rest], ScopedKeys, Opts) -> %% 2. Part subpath resolutions %% 3. Inlined key-value pairs %% 4. Device specifier -parse_part(ID, _Opts) when ?IS_ID(ID) -> ID; parse_part(Part, Opts) -> case maybe_subpath(Part, Opts) of {resolve, Subpath} -> {resolve, Subpath}; Part -> case part([$&, $~, $+, $ , $=], Part) of + {no_match, PartKey, <<>>} when ?IS_ID(PartKey) -> + PartKey; {no_match, PartKey, <<>>} -> #{ <<"path">> => PartKey }; {Sep, PartKey, PartModBin} -> parse_part_mods( << Sep:8/integer, PartModBin/binary >>, #{ <<"path">> => PartKey }, - Opts + Opts ) end end. @@ -772,6 +773,20 @@ inlined_keys_test() -> ?assertEqual(not_found, hb_maps:get(<<"k1">>, Base, not_found)), ?assertEqual(not_found, hb_maps:get(<<"k2">>, Msg2, not_found)). +inlined_keys_long_segment_test() -> + Req = #{ + <<"path">> => + <<"/chunk&offset=377813969707255&length=262144">> + }, + Msgs = from(Req, #{}), + ?assertEqual(2, length(Msgs)), + [Base, Msg] = Msgs, + ?assertEqual(<<"chunk">>, hb_maps:get(<<"path">>, Msg)), + ?assertEqual(<<"377813969707255">>, hb_maps:get(<<"offset">>, Msg)), + ?assertEqual(<<"262144">>, hb_maps:get(<<"length">>, Msg)), + ?assertEqual(not_found, hb_maps:get(<<"offset">>, Base, not_found)), + ?assertEqual(not_found, hb_maps:get(<<"length">>, Base, not_found)). + inlined_quoted_key_test() -> Req = #{ <<"method">> => <<"POST">>, diff --git a/src/hb_store.erl b/src/hb_store.erl index 6b34ec4a5..9cbfc19c6 100644 --- a/src/hb_store.erl +++ b/src/hb_store.erl @@ -76,9 +76,9 @@ behavior_info(callbacks) -> %% @doc Store access policies to function names. -define(STORE_ACCESS_POLICIES, #{ - <<"read">> => [read, resolve, list, type, path, add_path, join], - <<"write">> => [write, make_link, make_group, reset, path, add_path, join], - <<"admin">> => [start, stop, reset] + <<"read">> => [read, resolve, list, type, path, add_path, join, scope], + <<"write">> => [write, make_link, make_group, reset, path, add_path, join, scope], + <<"admin">> => [start, stop, reset, scope] }). %%% Store named terms registry functions. @@ -432,7 +432,7 @@ test_stores() -> (hb_test_utils:test_store(hb_store_lmdb))#{ <<"benchmark-scale">> => 0.5 }, - (hb_test_utils:test_store(hb_store_ets))#{ + (hb_test_utils:test_store(hb_store_volatile))#{ <<"benchmark-scale">> => 0.01 } ] ++ rocks_stores(). @@ -1049,3 +1049,12 @@ make_link_access_test() -> ?event(testing, {read_linked_value, ReadResult}), ?assertEqual({ok, TestValue}, ReadResult), ?assertEqual(ok, LinkResult). + +%% Prevent stores with access property to return local scope if they are defined as remote. +get_store_scope_access_test() -> + ReadStore = #{<<"store-module">> => hb_store_remote_node, <<"access">> => [<<"read">>]}, + ?assertEqual(remote, get_store_scope(ReadStore)), + WriteStore = #{<<"store-module">> => hb_store_remote_node, <<"access">> => [<<"write">>]}, + ?assertEqual(remote, get_store_scope(WriteStore)), + AdminStore = #{<<"store-module">> => hb_store_remote_node, <<"access">> => [<<"admin">>]}, + ?assertEqual(remote, get_store_scope(AdminStore)). diff --git a/src/hb_store_arweave.erl b/src/hb_store_arweave.erl new file mode 100644 index 000000000..7fa448a2f --- /dev/null +++ b/src/hb_store_arweave.erl @@ -0,0 +1,401 @@ +%%% @doc A store implementation that relays to an Arweave node, using an +%%% intermediate cache of offsets as an ID->ArweaveLocation mapping. +-module(hb_store_arweave). +%%% Store API: +-export([scope/0, scope/1, type/2, read/2, start/1]). +%%% Unused Store API: +-export([resolve/2, write/3, make_link/3, make_group/2]). +%%% Indexing API: +-export([store_from_opts/1, write_offset/5, read_offset/2, read_chunks/3]). +-include("include/hb.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +-define(PARTITION_SIZE, 3_600_000_000_000). + +%% @doc Find the first Arweave store from the given node message. Searches first +%% for the `arweave_index_store' option, and if not found, searches the main +%% `store' list for the first Arweave store with an index. +store_from_opts(Opts) -> + case hb_opts:get(arweave_index_store, no_store, Opts) of + no_store -> first_arweave_store(hb_opts:get(store, [], Opts)); + IndexStoreOpts -> IndexStoreOpts + end. + +%% @doc Find the first Arweave store with an index from a list of stores. +first_arweave_store(NonList) when not is_list(NonList) -> + first_arweave_store([NonList]); +first_arweave_store([]) -> no_store; +first_arweave_store( + [Store = #{<<"store-module">> := ?MODULE, <<"index-store">> := _ } | _] +) -> Store; +first_arweave_store([_ | Rest]) -> first_arweave_store(Rest). + +%% @doc Start the Arweave store, and the downstream associated index store. +start(#{<<"index-store">> := IndexStore}) -> + init_prometheus(), + hb_store:start(IndexStore). + +%% @doc Although the index is local, loading an item via the index will make +%% requests to a remote node, so we define the scope as remote. +scope() -> remote. +scope(#{ <<"scope">> := Scope }) -> Scope; +scope(_) -> scope(). + +%% @doc Resolve a key path in the Arweave store, ignoring other paths. +resolve(_, ID) when ?IS_ID(ID) -> ID; +resolve(_, _) -> not_found. + +%% @doc Unsupported. +write(_, _, _) -> not_found. + +%% @doc Unsupported. +make_link(_, _, _) -> not_found. + +%% @doc Unsupported. +make_group(_, _) -> not_found. + +%% @doc Get the type of the data at the given key. We potentially cache the +%% result, so that we don't have to read the data from the GraphQL route +%% multiple times. +type(#{ <<"index-store">> := IndexStore }, ID) when ?IS_ID(ID) -> + case hb_store:read(IndexStore, hb_store_arweave_offset:path(ID)) of + {ok, _Offset} -> simple; + _ -> not_found + end; +type(_, _) -> not_found. + +%% @doc Read the offset of the data at the given key. +read_offset(#{ <<"index-store">> := IndexStore }, ID) -> + ReadRes = + hb_prometheus:measure_and_report( + fun() -> + hb_store:read(IndexStore, hb_store_arweave_offset:path(ID)) + end, + hb_store_arweave_index_check_duration_seconds + ), + case ReadRes of + {ok, OffsetBinary} -> + {Version, CodecName, StartOffset, Length} = + hb_store_arweave_offset:decode(OffsetBinary), + {ok, #{ + <<"version">> => Version, + <<"codec-device">> => CodecName, + <<"start-offset">> => StartOffset, + <<"length">> => Length + }}; + _ -> + not_found + end; +read_offset(_, _) -> not_found. + +%% @doc Read the data at the given key, reading the `local-store' first if +%% available. +read(StoreOpts, ID) when ?IS_ID(ID) -> + case hb_store_remote_node:read_local_cache(StoreOpts, ID) of + {ok, Message} -> {ok, Message}; + not_found -> do_read(StoreOpts, ID) + end; +read(_, _) -> not_found. + +%% @doc Read the data at the given key, reading the provided Arweave index store +%% as a source of offsets. After offsets have been found, the data is loaded +%% through the `~arweave@2.9` device -- either as an ANS-104 item or a TX. +do_read(StoreOpts, ID) -> + case read_offset(StoreOpts, ID) of + {ok, + #{ + <<"version">> := Version, + <<"codec-device">> := CodecName, + <<"start-offset">> := StartOffset, + <<"length">> := Length + }} -> + Loaded = + case CodecName of + <<"ans104@1.0">> -> + load_item(ID, StartOffset, Length, StoreOpts); + <<"tx@1.0">> -> + load_tx(ID, StartOffset, Length, StoreOpts) + end, + case Loaded of + {ok, Message} -> + hb_store_remote_node:maybe_cache(StoreOpts, Message), + ?event( + arweave_offsets, + {read_ok, + {id, {string, ID}}, + {format_version, Version}, + {type, CodecName}, + {start_offset, StartOffset}, + {length, Length} + } + ), + record_partition_metric(StartOffset, ok), + Loaded; + {error, Reason} -> + ?event( + arweave_offsets, + {read_chunks_not_found, + {id, {string, ID}}, + {format_version, Version}, + {type, CodecName}, + {start_offset, StartOffset}, + {length, Length}, + {reason, Reason} + } + ), + record_partition_metric(StartOffset, not_found), + if Reason =:= not_found -> not_found; + true -> {error, Reason} + end + end; + not_found -> + ?event( + arweave_offsets, + {miss, {id, {explicit, ID}}} + ), + not_found + end. + +%% @doc Load an ANS-104 item from the given start offset and length. +%% Returns an `ok' tuple with the deserialized item, or an `error' tuple with +%% the reason. The `StartOffset` is the precise starting byte of the item _header_, +%% not the data segment. The `Length` covers the full size of the item, including +%% header. The `ExpectedID` is verified against the deserialized item's ID to +%% guard against stale offsets (e.g. after a reorg). +load_item(ExpectedID, StartOffset, Length, Opts) -> + hb_prometheus:measure_and_report( + fun() -> + case read_chunks(StartOffset, Length, Opts) of + {ok, SerializedItem} -> + Item = + ar_bundles:deserialize(SerializedItem), + case hb_util:encode(Item#tx.id) of + ExpectedID -> + {ok, hb_message:convert( + Item, + <<"structured@1.0">>, + <<"ans104@1.0">>, + Opts + )}; + ActualID -> + {error, + {id_mismatch, + ExpectedID, ActualID}} + end; + {error, Reason} -> + {error, Reason} + end + end, + hb_store_arweave_chunk_fetch_duration_seconds, + [load_item] + ). + +%% @doc Load a TX from the given start offset and length. The `StartOffset' is +%% the start of the first chunk of the data and runs for the length of the data +%% segment, ignoring header size. +load_tx(ID, StartOffset, Length, Opts) -> + hb_prometheus:measure_and_report( + fun() -> + {ok, StructuredTXHeader} = hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"tx">>, + <<"tx">> => ID, + <<"exclude-data">> => true + }, + Opts + ), + TXHeader = + hb_message:convert( + StructuredTXHeader, + <<"tx@1.0">>, + <<"structured@1.0">>, + Opts + ), + case Length of + 0 -> + {ok, hb_message:convert( + TXHeader, + <<"structured@1.0">>, + <<"tx@1.0">>, + Opts)}; + _ -> + case read_chunks(StartOffset, Length, Opts) of + {ok, Data} -> + {ok, hb_message:convert( + TXHeader#tx{data = Data}, + <<"structured@1.0">>, + <<"tx@1.0">>, + Opts + )}; + {error, Reason} -> + {error, Reason} + end + end + end, + hb_store_arweave_chunk_fetch_duration_seconds, + [load_tx] + ). + +%% @doc Read the chunks from the given start offset and length using the +%% `~arweave@2.9` device. +read_chunks(StartOffset, Length, Opts) -> + hb_ao:resolve( + #{ <<"device">> => <<"arweave@2.9">> }, + #{ + <<"path">> => <<"chunk">>, + <<"offset">> => StartOffset + 1, + <<"length">> => Length + }, + Opts + ). + +%% @doc Write offset information to the index store. +write_offset( + #{ <<"index-store">> := IndexStore }, + ID, + CodecName, + StartOffset, + Length + ) -> + Value = hb_store_arweave_offset:encode(CodecName, StartOffset, Length), + ?event( + debug_store_arweave, + {writing_offset, + {id, {explicit, ID}}, + {type, CodecName}, + {start_offset, StartOffset}, + {length, Length}, + {value, {explicit, Value}} + } + ), + hb_store:write(IndexStore, hb_store_arweave_offset:path(ID), Value). + +%% @doc Record the partition that data is found in when it is requested. +record_partition_metric(Offset, Result) when is_integer(Offset) -> + spawn(fun() -> + hb_prometheus:inc( + counter, + hb_store_arweave_requests_partition, + [Offset div ?PARTITION_SIZE, hb_util:bin(Result)], + 1 + ) + end). + +%% @doc Initialize the Prometheus metrics for the Arweave store. Executed on +%% `start/1' of the store. +init_prometheus() -> + hb_prometheus:declare( + histogram, + [ + {name, hb_store_arweave_index_check_duration_seconds}, + {buckets, [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 2, 5, 10]}, + {help, "How much it takes to check the index"} + ] + ), + hb_prometheus:declare( + histogram, + [ + {name, hb_store_arweave_chunk_fetch_duration_seconds}, + {buckets, [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30, 60]}, + {labels, [type]}, + {help, "How much it takes to check the index"} + ] + ), + hb_prometheus:declare( + counter, + [ + {name, hb_store_arweave_requests_partition}, + {labels, [partition, result]}, + {help, "Partition where chunks are being requested"} + ] + ), + % We also depend on the HTTP client, so we ensure its prometheus metrics are + % initialized, too. + hb_http_client:init_prometheus(). + +%%% Tests + +write_read_tx_test() -> + Store = [hb_test_utils:test_store()], + Opts = #{ + <<"index-store">> => Store + }, + ID = <<"bndIwac23-s0K11TLC1N7z472sLGAkiOdhds87ZywoE">>, + EndOffset = 363524457284025, + Size = 8387, + StartOffset = EndOffset - Size, + ok = write_offset(Opts, ID, <<"tx@1.0">>, StartOffset, Size), + {ok, Bundle} = read(Opts, ID), + ?assert(hb_message:verify(Bundle, all, #{})), + {ok, Child} = + hb_ao:resolve( + Bundle, + <<"1/2">>, + #{} + ), + ?assert(hb_message:verify(Child, all, #{})), + ExpectedChild = #{ + <<"data">> => + << + "{\"totalTickedRewardsDistributed\":0,\"distributedEpochIndexes\"" + ":[],\"newDemandFactors\":[],\"newEpochIndexes\":[],\"" + "tickedRewardDistributions\":[],\"newPruneGatewaysResults\"" + ":[{\"delegateStakeReturned\":0,\"stakeSlashed\":0,\"" + "gatewayStakeReturned\":0,\"delegateStakeWithdrawing\":0,\"" + "prunedGateways\":[],\"slashedGateways\":[],\"" + "gatewayStakeWithdrawing\":0}]}">>, + <<"data-protocol">> => <<"ao">>, + <<"from-module">> => <<"cbn0KKrBZH7hdNkNokuXLtGryrWM--PjSTBqIzw9Kkk">>, + <<"from-process">> => <<"agYcCFJtrMG6cqMuZfskIkFTGvUPddICmtQSBIoPdiA">>, + <<"anchor">> => <<"MDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAyODAxODg">>, + <<"reference">> => <<"280188">>, + <<"target">> => <<"1R5QEtX53Z_RRQJwzFWf40oXiPW2FibErT_h02pu8MU">>, + <<"type">> => <<"Message">>, + <<"variant">> => <<"ao.TN.1">> + }, + ?assert(hb_message:match(ExpectedChild, Child, only_present)), + ok. + +%% @doc Stale ANS-104 offset: fake ID pointing to a known bundle TX's +%% data range. The deserialized item's ID won't match the fake ID. +stale_ans104_offset_returns_error_test() -> + Store = [hb_test_utils:test_store()], + Opts = #{<<"index-store">> => Store}, + FakeID = <<"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA">>, + RealEndOffset = 363524457284025, + RealSize = 8387, + RealStartOffset = RealEndOffset - RealSize, + ok = write_offset(Opts, FakeID, <<"ans104@1.0">>, RealStartOffset, RealSize), + Result = read(Opts, FakeID), + ?assertMatch({error, {id_mismatch, _, _}}, Result). + +%% @doc Stale TX offset must crash with data_root_mismatch. +%% Uses a real TX ID but points the offset at a different TX's data. +%% The header is fetched by ID (correct), but the chunk data won't +%% match the header's data_root. The mismatch is caught by +%% dev_arweave_common:normalize_data_root during conversion. +stale_tx_offset_returns_error_test() -> + Store = [hb_test_utils:test_store()], + Opts = #{<<"index-store">> => Store}, + RealID = <<"bndIwac23-s0K11TLC1N7z472sLGAkiOdhds87ZywoE">>, + WrongOffset = 155309918167286, + WrongSize = 2, + ok = write_offset(Opts, RealID, <<"tx@1.0">>, WrongOffset, WrongSize), + ?assertError( + {data_root_mismatch, _, _, _}, + read(Opts, RealID)). + +%% @doc The L1 TX has bundle tags, but data is not a valid bundle. +write_read_fake_bundle_tx_test() -> + Store = [hb_test_utils:test_store()], + Opts = #{ + <<"index-store">> => Store + }, + ID = <<"cGNURX2IUt98VKVIeXSfYe6eulNwPEqijaQfvatzd_o">>, + Size = 2, + StartOffset = 155309918167286, + ok = write_offset(Opts, ID, <<"tx@1.0">>, StartOffset, Size), + {ok, TX} = read(Opts, ID), + ?assert(hb_message:verify(TX, all, #{})), + ok. diff --git a/src/hb_store_arweave_offset.erl b/src/hb_store_arweave_offset.erl new file mode 100644 index 000000000..7b4d5a914 --- /dev/null +++ b/src/hb_store_arweave_offset.erl @@ -0,0 +1,92 @@ +%%% @doc Succinct encoding and decoding for Arweave data offset indexing. +%%% Arweave data items are extremely numerous (>25,000,000,000 as of Feb 2026), and +%%% as such small optimizations to the encoding of their offsets have a significant +%%% effect. For exampple, a single byte sized in the encoding at time of writing +%%% saves ~25 GB of storage. +%%% +%%% The encoding is as follows: +%%% << Version:4, Codec:4, StartOffset:64, Length/binary >> +%%% where: +%%% - Version: 4-bit unsigned integer. Max: 15. Current: version `1`. +%%% - Codec: 4-bit unsigned integer. Max: 15. +%%% - StartOffset: 64-bit uint. Max: 2^64-1. +%%% - Length: unsigned variable-length integer. +%%% +%%% Codecs: +%%% - 0: `tx@1.0`: An Arweave transaction. +%%% - 1: [Reserved for ANS-102: The initial JSON data item format.] +%%% - 2: `~ans104@1.0`: Binary data items. +%%% - 3: [Reserved for `~httpsig@1.0`: RFC-9421 compatible HTTP signed messages.] +%%% +%%% Codec indexes should, in general, be sorted by the time of their first write +%%% to Arweave: Arweave TXs as 0, ANS-102 as 1, ANS-104 as 2, etc. +%%% +%%% All `length` values are read by decoding all of the remaining bytes in the +%%% offset encoding as an unsigned big-endian integer. This allows the length +%%% to contract to only the number of bytes actually necessary to represent it. +%%% +-module(hb_store_arweave_offset). +-export([encode/3, decode/1, path/1]). +-include("include/hb.hrl"). + +%% @doc Determine if a value is within a given unsigned bit range. +-define(IN_BIT_RANGE(X, Bits), (X >= 0 andalso X < (1 bsl Bits))). + +-define(OFFSET_SZ, (8*8)). % 64-bit uint. Max: 2^64-1. +-define(FORMAT_VERSION, 1). % 4-bit uint. Max: 15. + +%% @doc Reserved for future use. At the present time, store containing offsets are +%% expected to be utilized only as sub-stores to a `hb_store_arweave' store. As +%% as consequence, the path is simply the ID of the data item, with the prefix +%% of `~arweave@2.9/offset/` implied. +path(ID) when ?IS_ID(ID) -> hb_util:native_id(ID); +path(ID) -> throw({cannot_encode_path, ID}). + +%% @doc Encode the offset of the data if it is valid. Throws `cannot_encode_offset' +%% if invalid. +encode(Type, StartOffset, Length) + when + (Type == true orelse Type == false orelse is_binary(Type)) + andalso ?IN_BIT_RANGE(StartOffset, ?OFFSET_SZ*8) + andalso is_integer(Length) andalso Length >= 0 + -> + << + (encode_format(Type))/binary, + StartOffset:?OFFSET_SZ, + (binary:encode_unsigned(Length))/binary + >>; +encode(IsTX, StartOffset, Length) -> + throw({cannot_encode_offset, {IsTX, StartOffset, Length}}). + +decode(<>) -> + {Version, CodecName} = decode_format(Format), + {Version, CodecName, StartOffset, binary:decode_unsigned(Length)}; +decode(Binary) -> + throw({cannot_decode_offset, Binary}). + +%% @doc Encode the type of the data. +encode_type(<<"tx@1.0">>) -> 0; +encode_type(<<"ans102@1.0">>) -> 1; +encode_type(<<"ans104@1.0">>) -> 2; +encode_type(<<"httpsig@1.0">>) -> 3; +encode_type(Type) -> throw({cannot_encode_type, Type}). + +%% @doc Decode the type of the data to a binary codec name. +decode_type(0) -> <<"tx@1.0">>; +decode_type(1) -> <<"ans102@1.0">>; +decode_type(2) -> <<"ans104@1.0">>; +decode_type(3) -> <<"httpsig@1.0">>; +decode_type(Type) -> throw({cannot_decode_type, Type}). + +%% @doc Encode the format of the offset. See the module documentation for the +%% present index of supported codecs. +encode_format(CodecName) -> + << ?FORMAT_VERSION:4, (encode_type(CodecName)):4 >>; +encode_format(CodecName) -> + throw({cannot_encode_format, CodecName}). + +%% @doc Decode the format of the offset. +decode_format(<>) -> + {FormatVersion, decode_type(CodecName)}; +decode_format(Binary) -> + throw({cannot_decode_format, Binary}). \ No newline at end of file diff --git a/src/hb_store_gateway.erl b/src/hb_store_gateway.erl index 9799b3a14..ea992a30b 100644 --- a/src/hb_store_gateway.erl +++ b/src/hb_store_gateway.erl @@ -475,7 +475,8 @@ verifiability_test() -> ?assert(hb_message:verify(Structured)). %% @doc Reading an unsupported signature type transaction should fail -failure_to_process_message_test() -> +%% TODO: Enable when we find a TX that we don't support +failure_to_process_message_test_disabled() -> hb_http_server:start_node(#{}), ?assertEqual(failure, hb_cache:read( diff --git a/src/hb_store_lmdb.erl b/src/hb_store_lmdb.erl index 88f837d85..e13e46702 100644 --- a/src/hb_store_lmdb.erl +++ b/src/hb_store_lmdb.erl @@ -30,7 +30,7 @@ -include("include/hb.hrl"). %% Configuration constants with reasonable defaults --define(DEFAULT_SIZE, 16 * 1024 * 1024 * 1024). % 16GB default database size +-define(DEFAULT_SIZE, 2 * 1024 * 1024 * 1024 * 1024). % 2TiB default database size -define(DEFAULT_BATCH_SIZE, 5_000). % Flush keys on every read or % every 5,000 write operations. -define(MAX_REDIRECTS, 1000). % Only resolve 1000 links to data @@ -49,6 +49,7 @@ %% @param StoreOpts A map containing database configuration options %% @returns {ok, ServerPid} on success, {error, Reason} on failure start(Opts = #{ <<"name">> := DataDir }) -> + init_prometheus(), % Ensure the directory exists before opening LMDB environment DataDirPath = hb_util:list(DataDir), ok = ensure_dir(DataDirPath), @@ -65,6 +66,10 @@ start(Opts = #{ <<"name">> := DataDir }) -> no_mem_init, no_sync ] ++ + case maps:get(<<"read-ahead">>, Opts, true) of + true -> []; + false -> [no_readahead] + end ++ case maps:get(<<"read-only">>, Opts, false) of true -> [no_lock]; false -> [] @@ -184,14 +189,18 @@ write(Opts, Path, Value) -> -spec read(map(), binary() | list()) -> {ok, binary()} | {error, term()}. read(Opts, PathParts) when is_list(PathParts) -> read(Opts, to_path(PathParts)); -read(Opts, Path) -> +read(#{<<"name">> := Name} = Opts, Path) -> % Try direct read first (fast path for non-link paths) - case read_with_links(Opts, Path) of - {ok, Value} -> + StartTime = erlang:monotonic_time(), + ReadRes = read_with_links(Opts, Path), + case ReadRes of + {ok, Value} -> + sample_metrics(Name, StartTime, hit), {ok, Value}; not_found -> + sample_metrics(Name, StartTime, miss), try - PathParts = binary:split(Path, <<"/">>, [global]), + PathParts = binary:split(Path, <<"/">>, [global, trim_all]), case resolve_path_links(Opts, PathParts) of {ok, ResolvedPathParts} -> ResolvedPathBin = to_path(ResolvedPathParts), @@ -206,7 +215,7 @@ read(Opts, Path) -> resolve_path_links_failed, {class, Class}, {reason, Reason}, - {stacktrace, Stacktrace}, + {stacktrace, {trace, Stacktrace}}, {path, Path} } ), @@ -240,12 +249,26 @@ to_path(PathParts) -> %% in-process pending writes, if necessary. %% %% Returns {ok, Value} or not_found. -read_direct(Opts, Path) -> +read_direct(#{<<"name">> := Name} = Opts, Path) -> #{ <<"db">> := DBInstance } = find_env(Opts), case elmdb:get(DBInstance, Path) of {ok, Value} -> {ok, Value}; {error, not_found} -> not_found; % Normalize error format - not_found -> not_found % Handle both old and new format + not_found -> not_found; % Handle both old and new format + {error, transaction_error, Message} = Err -> + ?event(lmdb_store, + {transaction_error, + {path, Path}, + {db_name, Name}, + {message, Message}}), + Err; + {error, database_error, ErrorMessage} = Err -> + ?event(lmdb_store, + {database_error, + {path, Path}, + {db_name, Name}, + {msg, ErrorMessage}}), + Err end. %% @doc Read a value directly from the database with link resolution. @@ -589,6 +612,32 @@ reset(Opts) -> ok end. +%% @doc Sample roughly 1/1024 reads using the start timestamp and scale the +%% hit counter by the same factor to preserve an approximate total. +sample_metrics(_Name, StartTime, _Type) when (StartTime band 1023) =/= 0 -> + ok; +sample_metrics(Name, StartTime, Type) -> + ReadTime = erlang:monotonic_time() - StartTime, + hb_prometheus:observe(ReadTime, hb_store_lmdb_duration_seconds, [read, Name]), + case Type of + hit -> hb_prometheus:inc(counter, hb_store_lmdb_hit, [Name], 1024); + miss -> ok + end. + +init_prometheus() -> + hb_prometheus:declare(histogram, [ + {name, hb_store_lmdb_duration_seconds}, + {labels, [function, store_name]}, + {buckets, [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 20]}, + {help, "Duration of lmdb operations in microseconds"} + ]), + hb_prometheus:declare(counter, [ + {name, hb_store_lmdb_hit}, + {labels, [name]}, + {help, "LMDB name requested"} + ]), + ok. + %% @doc Test suite demonstrating basic store operations. %% %% The following functions implement unit tests using EUnit to verify that @@ -958,29 +1007,29 @@ isolated_type_debug_test() -> % 2. Create nested groups for "commitments" and "other-test-key" CommitmentsPath = <>, OtherKeyPath = <>, - ?event(isolated_debug, {creating_nested_groups, CommitmentsPath, OtherKeyPath}), + ?event(debug_isolated, {creating_nested_groups, CommitmentsPath, OtherKeyPath}), make_group(StoreOpts, CommitmentsPath), make_group(StoreOpts, OtherKeyPath), % 3. Add some actual data within those groups write(StoreOpts, <>, <<"signature_data_1">>), write(StoreOpts, <>, <<"nested_value">>), % 4. Test type detection on the nested paths - ?event(isolated_debug, {testing_main_message_type}), + ?event(debug_isolated, {testing_main_message_type}), MainType = type(StoreOpts, MessageID), - ?event(isolated_debug, {main_message_type, MainType}), - ?event(isolated_debug, {testing_commitments_type}), + ?event(debug_isolated, {main_message_type, MainType}), + ?event(debug_isolated, {testing_commitments_type}), CommitmentsType = type(StoreOpts, CommitmentsPath), - ?event(isolated_debug, {commitments_type, CommitmentsType}), - ?event(isolated_debug, {testing_other_key_type}), + ?event(debug_isolated, {commitments_type, CommitmentsType}), + ?event(debug_isolated, {testing_other_key_type}), OtherKeyType = type(StoreOpts, OtherKeyPath), - ?event(isolated_debug, {other_key_type, OtherKeyType}), + ?event(debug_isolated, {other_key_type, OtherKeyType}), % 5. Test what happens when reading these nested paths - ?event(isolated_debug, {reading_commitments_directly}), + ?event(debug_isolated, {reading_commitments_directly}), CommitmentsResult = read(StoreOpts, CommitmentsPath), - ?event(isolated_debug, {commitments_read_result, CommitmentsResult}), - ?event(isolated_debug, {reading_other_key_directly}), + ?event(debug_isolated, {commitments_read_result, CommitmentsResult}), + ?event(debug_isolated, {reading_other_key_directly}), OtherKeyResult = read(StoreOpts, OtherKeyPath), - ?event(isolated_debug, {other_key_read_result, OtherKeyResult}), + ?event(debug_isolated, {other_key_read_result, OtherKeyResult}), stop(StoreOpts). %% @doc Test that list function resolves links correctly @@ -1003,4 +1052,4 @@ list_with_link_test() -> {ok, LinkChildren} = list(StoreOpts, <<"link-to-group">>), ?event({link_children, LinkChildren}), ?assertEqual(ExpectedChildren, lists:sort(LinkChildren)), - stop(StoreOpts). + stop(StoreOpts). \ No newline at end of file diff --git a/src/hb_store_multi.erl b/src/hb_store_multi.erl index 873cdbc5c..470460606 100644 --- a/src/hb_store_multi.erl +++ b/src/hb_store_multi.erl @@ -1,13 +1,17 @@ %%% @doc A store implementation that wraps many other stores and dispatches %%% operations to them in parallel. It can be configured to wait for a certain %%% number of results before returning, or to return as soon as possible. +%%% %%% Expects a store options message of the following form: %%% /stores/1..n: Sub-store definition messages. %%% /confirmations: Number of confirmations to require for write operations. %%% /workers-per-store: Number of worker processes to spawn for each store %%% (default: 3). Work is distributed evenly across each. -%%% Each sub-store may additionally specify: -%%% /num_workers: Number of worker processes to spawn for the store (default: 1). +%%% +%%% Each sub-store may additionally specify a specific number of store workers +%%% to spawn, overriding the 'global' store configuration for that individual +%%% case. This parameter can be specified in the store's own configuration using +%%% the `workers-per-store' key. -module(hb_store_multi). -behaviour(hb_store). -export([start/1, stop/1, reset/1, scope/0, scope/1]). @@ -182,21 +186,31 @@ make_group(StoreOpts, Path) -> %%% Worker operations. %% @doc Start a worker process for each store and return the updated store options. -%% The number of workers per store is controlled by the `num_workers' key in -%% the store options (default: 1). -store_with_workers(StoreOpts = #{ <<"stores">> := Stores }) -> - StoreOpts#{ +%% The number of workers per store is controlled by the `num-workers' key in +%% the store options, or globally in the multi store with `num-workers-per-store' +%% (default: 3). +store_with_workers(MultiStoreOpts = #{ <<"stores">> := Stores }) -> + GlobalWorkersPerStore = + maps:get( + <<"workers-per-store">>, + MultiStoreOpts, + ?DEFAULT_STORE_WORKERS + ), + MultiStoreOpts#{ <<"stores">> := lists:map( - fun(Store) -> - NumWorkers = - maps:get( + fun(StoreOpts) -> + StoreNumWorkers = + case maps:get( <<"workers-per-store">>, - Store, - ?DEFAULT_STORE_WORKERS - ), - Workers = [start_worker(Store) || _ <- lists:seq(1, NumWorkers)], - Store#{ <<"workers">> => Workers } + StoreOpts, + undefined + ) of + undefined -> GlobalWorkersPerStore; + NumWorkersPerStore -> NumWorkersPerStore + end, + Workers = [start_worker(StoreOpts) || _ <- lists:seq(1, StoreNumWorkers)], + StoreOpts#{ <<"workers">> => Workers } end, Stores ) diff --git a/src/hb_store_remote_node.erl b/src/hb_store_remote_node.erl index 8e5dcc007..90890e9c3 100644 --- a/src/hb_store_remote_node.erl +++ b/src/hb_store_remote_node.erl @@ -4,7 +4,7 @@ %%% been written to the remote node. In that case, the node would probably want %%% to upload it to an Arweave bundler to ensure persistence, too. -module(hb_store_remote_node). --export([scope/1, type/2, read/2, write/3, make_link/3, resolve/2]). +-export([scope/1, type/2, read/2, write/3, make_link/3, make_group/2, resolve/2]). %%% Public utilities. -export([maybe_cache/2, maybe_cache/3, read_local_cache/2]). -include("include/hb.hrl"). @@ -52,6 +52,8 @@ type(Opts = #{ <<"node">> := Node }, Key) -> %% @param Opts A map of options (including node configuration). %% @param Key The key to read. %% @returns {ok, Msg} on success or not_found if the key is missing. +read(#{ <<"only-ids">> := true }, Key) when not ?IS_ID(Key) -> + not_found; read(Opts = #{ <<"node">> := Node }, Key) -> ?event(store_remote_node, {executing_read, {node, Node}, {key, Key}}), HTTPRes = @@ -70,7 +72,8 @@ read(Opts = #{ <<"node">> := Node }, Key) -> {error, _Err} -> ?event(store_remote_node, {read_not_found, {key, Key}}), not_found - end. + end; +read(_, _) -> not_found. %% @doc Cache the data if the cache is enabled. The `local-store' option may %% either be `false' or a store definition to use as the local cache. Additional @@ -124,10 +127,8 @@ maybe_cache(StoreOpts, Data, Links) -> read_local_cache(StoreOpts, ID) -> ?event({read_local_cache, StoreOpts, ID}), case hb_maps:get(<<"local-store">>, StoreOpts, false, StoreOpts) of - false -> - not_found; - Store -> - hb_cache:read(ID, #{store => Store}) + false -> not_found; + Store -> hb_cache:read(ID, #{ store => Store }) end. %% @doc Write a key to the remote node. @@ -140,6 +141,8 @@ read_local_cache(StoreOpts, ID) -> %% @param Key The key to write. %% @param Value The value to store. %% @returns {ok, Path} on success or {error, Reason} on failure. +write(#{ <<"read-only">> := true }, _Key, _Value) -> + not_found; write(Opts = #{ <<"node">> := Node }, Key, Value) -> ?event({write, {node, Node}, {key, Key}, {value, Value}}), WriteMsg = #{ @@ -167,6 +170,8 @@ write(Opts = #{ <<"node">> := Node }, Key, Value) -> %% Constructs an HTTP POST link request. If a wallet is provided, %% the message is signed. Returns {ok, Path} on HTTP 200, or %% {error, Reason} on failure. +make_link(#{ <<"read-only">> := true }, _Source, _Destination) -> + not_found; make_link(Opts = #{ <<"node">> := Node }, Source, Destination) -> ?event({make_remote_link, {node, Node}, {source, Source}, {destination, Destination}}), @@ -191,6 +196,9 @@ make_link(Opts = #{ <<"node">> := Node }, Source, Destination) -> {error, Err} end. +%% @doc Remote store `make_group/2' is a no-op. +make_group(_StoreOpts, _Path) -> not_found. + %%%-------------------------------------------------------------------- %%% Tests %%%-------------------------------------------------------------------- @@ -222,4 +230,25 @@ read_test() -> #{ <<"store-module">> => hb_store_remote_node, <<"node">> => Node } ], {ok, RetrievedMsg} = hb_cache:read(ID, #{ store => RemoteStore }), - ?assertMatch(#{ <<"test-key">> := Rand }, hb_cache:ensure_all_loaded(RetrievedMsg)). \ No newline at end of file + ?assertMatch(#{ <<"test-key">> := Rand }, hb_cache:ensure_all_loaded(RetrievedMsg)). + +read_only_ids_test() -> + LocalStore = hb_test_utils:test_store(), + hb_store:reset(LocalStore), + {ok, ID} = + hb_cache:write( + <<"message">>, + #{ store => LocalStore } + ), + Node = + hb_http_server:start_node( + #{ + store => LocalStore + } + ), + RemoteStore = [ + #{ <<"store-module">> => hb_store_remote_node, + <<"node">> => Node, + <<"only-ids">> => true } + ], + ?assertEqual(not_found, hb_cache:read(ID, #{ store => RemoteStore })). diff --git a/src/hb_store_ets.erl b/src/hb_store_volatile.erl similarity index 76% rename from src/hb_store_ets.erl rename to src/hb_store_volatile.erl index f112fc2ab..6ddd14ae1 100644 --- a/src/hb_store_ets.erl +++ b/src/hb_store_volatile.erl @@ -1,31 +1,37 @@ -%%% @doc A lightweight in-memory HyperBEAM store backed by ETS. +%%% @doc A lightweight in-memory HyperBEAM store backed by ETS. The store is +%%% volatile: It does not persist data to disk ever, and -- critically -- can +%%% be configured to expire all data periodically. This is useful for testing +%%% and as a short-term in-memory cache, not for instances where an `ok` from +%%% the `write` function should imply data persistence. %%% %%% This store keeps all data in-memory and does not flush to any persistent %%% backend. It supports the core `hb_store` interface semantics used by %%% `hb_store` and `hb_cache`: writes, reads, groups, links, type checks, %%% path resolution, and resets. --module(hb_store_ets). +-module(hb_store_volatile). -export([start/1, stop/1, reset/1, scope/0, scope/1]). -export([write/3, read/2, list/2, type/2, make_link/3, make_group/2, resolve/2]). -include("include/hb.hrl"). +-include_lib("eunit/include/eunit.hrl"). -define(ROOT_GROUP, <<"/">>). -define(MAX_REDIRECTS, 32). %% @doc Start the ETS-backed store and return the store instance message. -start(#{ <<"name">> := Name }) -> +start(StoreOpts = #{ <<"name">> := Name }) -> ?event(cache_ets, {starting_ets_store, Name}), Parent = self(), spawn( fun() -> - Table = ets:new(hb_store_ets, [ + Table = ets:new(hb_store_volatile, [ set, public, {read_concurrency, true}, {write_concurrency, true} ]), Parent ! {ok, #{ <<"pid">> => self(), <<"ets-table">> => Table }}, - owner_loop() + maybe_start_ttl_timer(StoreOpts, self()), + owner_loop(StoreOpts) end ), receive @@ -35,13 +41,23 @@ start(#{ <<"name">> := Name }) -> %% @doc Owner loop for the ETS store. Simply waits for a stop message and exits. %% Until the store is stopped, the table will remain alive. -owner_loop() -> +owner_loop(StoreOpts) -> receive {stop, From, Ref} -> From ! {ok, Ref}, exit(normal); + reset -> + reset(StoreOpts), + maybe_start_ttl_timer(StoreOpts, self()), + owner_loop(StoreOpts); _ -> - owner_loop() + owner_loop(StoreOpts) + end. + +maybe_start_ttl_timer(StoreOpts, PID) -> + case maps:get(<<"max-ttl">>, StoreOpts, infinity) of + infinity -> skip; + MaxTTL -> timer:send_after(hb_util:int(MaxTTL) * 1000, PID, reset) end. %% @doc Stop the ETS owner process (which also drops the table). @@ -62,6 +78,7 @@ scope(_) -> scope(). reset(Opts) -> #{ <<"ets-table">> := Table } = hb_store:find(Opts), ets:delete_all_objects(Table), + ?event(store_volatile, {reset, {table, Table}}), ok. %% @doc Write a value at the key path. @@ -69,6 +86,7 @@ write(Opts, RawKey, Value) -> Key = hb_store:join(RawKey), #{ <<"ets-table">> := Table } = hb_store:find(Opts), ensure_parent_groups(Table, Key), + ?event(store_volatile, {write, {key, Key}}), ets:insert(Table, {Key, {raw, Value}}), ok. @@ -81,10 +99,13 @@ read_resolved(_Opts, _Key, Depth) when Depth > ?MAX_REDIRECTS -> read_resolved(Opts, Key, Depth) -> case lookup_entry(Opts, Key) of {raw, Value} -> + ?event(store_volatile, {hit, {key, Key}}), {ok, Value}; {link, Link} -> + ?event(store_volatile, {hit, {key, Key}}), read_resolved(Opts, hb_store:join(Link), Depth + 1); _ -> + ?event(store_volatile, {miss, {key, Key}}), not_found end. @@ -217,3 +238,23 @@ add_group_child(Table, GroupPath, Child) -> end, ets:insert(Table, {GroupPath, {group, sets:add_element(Child, Set)}}), ok. + +%%% Tests + +max_ttl_test() -> + StoreOpts = + #{ + <<"store-module">> => ?MODULE, + <<"name">> => <<"ets-max-ttl-test">>, + <<"max-ttl">> => 1 + }, + hb_store:start(StoreOpts), + hb_store:write(StoreOpts, <<"a">>, <<"b">>), + ?assertEqual({ok, <<"b">>}, hb_store:read(StoreOpts, <<"a">>)), + timer:sleep(1250), + ?assertEqual(not_found, hb_store:read(StoreOpts, <<"a">>)), + hb_store:write(StoreOpts, <<"a">>, <<"c">>), + ?assertEqual({ok, <<"c">>}, hb_store:read(StoreOpts, <<"a">>)), + timer:sleep(1250), + ?assertEqual(not_found, hb_store:read(StoreOpts, <<"a">>)), + hb_store:stop(StoreOpts). \ No newline at end of file diff --git a/src/hb_structured_fields.erl b/src/hb_structured_fields.erl index 658202e15..c4faef841 100644 --- a/src/hb_structured_fields.erl +++ b/src/hb_structured_fields.erl @@ -311,7 +311,9 @@ parse_bare_item(<<"?0", R/bits>>) -> {false, R}; parse_bare_item(<<"?1", R/bits>>) -> % Parse a boolean true. - {true, R}. + {true, R}; +parse_bare_item(<<>>) -> + {{binary, <<>>}, <<>>}. %% @doc Parse an integer or decimal binary. parse_number(<>, L, Acc) when ?IS_DIGIT(C) -> diff --git a/src/hb_system_monitor.erl b/src/hb_system_monitor.erl new file mode 100644 index 000000000..baadb1182 --- /dev/null +++ b/src/hb_system_monitor.erl @@ -0,0 +1,332 @@ +%%% @doc Monitor BEAM system events that indicate scheduler starvation, +%%% long-running NIFs/drivers, GC pauses, and mailbox buildup. Uses +%%% erlang:system_monitor/2 to receive notifications when thresholds are +%%% breached and logs them through the ?event system. +%%% +%%% When a long_schedule event exceeds the deep inspection threshold, +%%% the monitor grabs process_info for the offending PID (stacktrace, +%%% current function, memory, message queue, reductions). This is +%%% rate-limited to avoid flooding. +-module(hb_system_monitor). + +-export([ensure_started/1]). + +-include("include/hb.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +-define(DEFAULT_LONG_SCHEDULE_MS, 40). +-define(DEFAULT_LONG_GC_MS, 50). +-define(DEFAULT_LARGE_HEAP_WORDS, 40 * 1024 * 1024). +-define(DEFAULT_LONG_MSG_QUEUE_ENABLE, 10_000). +-define(DEFAULT_LONG_MSG_QUEUE_DISABLE, 1_000). +-define(DEFAULT_DEEP_INSPECT_MS, 90). +-define(DEFAULT_DEEP_INSPECT_INTERVAL_MS, 1_000). + +%% @doc Ensure the system monitor singleton is started if enabled. +ensure_started(Opts) -> + Enabled = hb_opts:get(system_monitor, not hb_features:test(), Opts), + ?event(system_monitor, {system_monitor_enabled, Enabled}), + case Enabled of + true -> + _ = hb_name:singleton(?MODULE, fun() -> start(Opts) end), + ok; + false -> + ok + end. + +%% @doc Start the system monitor process. +start(Opts) -> + hb_prometheus:ensure_started(), + init_prometheus(), + MonitorOpts = build_monitor_opts(Opts), + ?event(system_monitor, {starting_system_monitor, MonitorOpts}), + erlang:system_monitor(self(), MonitorOpts), + loop(#{ + opts => Opts, + last_deep_inspect => + erlang:monotonic_time(millisecond) + - ?DEFAULT_DEEP_INSPECT_INTERVAL_MS + }). + +%% @doc Build the erlang:system_monitor/2 option list from config. +build_monitor_opts(Opts) -> + LongSchedule = + hb_opts:get(long_schedule_ms, ?DEFAULT_LONG_SCHEDULE_MS, Opts), + LongGC = + hb_opts:get(long_gc_ms, ?DEFAULT_LONG_GC_MS, Opts), + LargeHeap = + hb_opts:get(large_heap_words, ?DEFAULT_LARGE_HEAP_WORDS, Opts), + MsgQueueEnable = + hb_opts:get( + long_msg_queue_enable, + ?DEFAULT_LONG_MSG_QUEUE_ENABLE, + Opts + ), + MsgQueueDisable = + hb_opts:get( + long_msg_queue_disable, + ?DEFAULT_LONG_MSG_QUEUE_DISABLE, + Opts + ), + [ + {long_schedule, LongSchedule}, + {long_gc, LongGC}, + {large_heap, LargeHeap}, + {long_message_queue, {MsgQueueDisable, MsgQueueEnable}}, + busy_port, + busy_dist_port + ]. + +%% @doc Receive loop for system monitor messages. +loop(State) -> + receive + {monitor, PidOrPort, long_schedule, Info} -> + ?event(system_monitor, + {long_schedule, PidOrPort, Info}), + InLoc = format_location( + proplists:get_value(in, Info, undefined)), + OutLoc = format_location( + proplists:get_value(out, Info, undefined)), + hb_prometheus:inc(counter, + system_monitor_long_schedule_total, + [InLoc, OutLoc]), + State2 = maybe_deep_inspect(PidOrPort, Info, State), + loop(State2); + {monitor, Pid, long_gc, Info} -> + ?event(system_monitor, + {long_gc, Pid, Info}), + hb_prometheus:inc(counter, system_monitor_events_total, + [long_gc]), + loop(State); + {monitor, Pid, large_heap, Info} -> + ?event(system_monitor, + {large_heap, Pid, Info}), + hb_prometheus:inc(counter, system_monitor_events_total, + [large_heap]), + loop(State); + {monitor, Pid, long_message_queue, Long} -> + ?event(system_monitor, + {long_message_queue, Pid, Long}), + hb_prometheus:inc(counter, system_monitor_events_total, + [long_message_queue]), + loop(State); + {monitor, Pid, busy_port, Port} -> + ?event(system_monitor, + {busy_port, Pid, Port}), + hb_prometheus:inc(counter, system_monitor_events_total, + [busy_port]), + loop(State); + {monitor, Pid, busy_dist_port, Port} -> + ?event(system_monitor, + {busy_dist_port, Pid, Port}), + hb_prometheus:inc(counter, system_monitor_events_total, + [busy_dist_port]), + loop(State); + Message -> + ?event(warning, + {unhandled_info, {module, ?MODULE}, {message, Message}}), + loop(State) + end. + +%% @doc Declare prometheus metrics for system monitor events. +init_prometheus() -> + hb_prometheus:declare(counter, [ + {name, system_monitor_events_total}, + {labels, [event]}, + {help, "Count of erlang:system_monitor events by type"} + ]), + hb_prometheus:declare(counter, [ + {name, system_monitor_long_schedule_total}, + {labels, [scheduled_in, scheduled_out]}, + {help, + "Count of long_schedule events" + " labeled by in/out function"} + ]), + hb_prometheus:declare(counter, [ + {name, system_monitor_deep_inspect_total}, + {labels, [entry, location]}, + {help, + "Count of deep inspections." + " entry=outermost stack frame," + " location=mid/current frame"} + ]). + +%% @doc Format a schedule location for use as a prometheus label. +format_location(undefined) -> + <<"undefined">>; +format_location({Mod, Func, Arity}) -> + << + (atom_to_binary(Mod))/binary, ":", + (atom_to_binary(Func))/binary, "/", + (integer_to_binary(Arity))/binary + >>; +format_location(_) -> + <<"unknown">>. + +%% @doc If the timeout exceeds the deep inspection threshold and +%% enough time has passed since the last inspection, grab detailed +%% process info for the offending PID. +maybe_deep_inspect(PidOrPort, _Info, State) when is_port(PidOrPort) -> + State; +maybe_deep_inspect(Pid, Info, #{opts := Opts, last_deep_inspect := Last} = State) -> + Threshold = + hb_opts:get(deep_inspect_ms, ?DEFAULT_DEEP_INSPECT_MS, Opts), + Cooldown = + hb_opts:get( + deep_inspect_interval_ms, + ?DEFAULT_DEEP_INSPECT_INTERVAL_MS, + Opts + ), + Timeout = proplists:get_value(timeout, Info, 0), + Now = erlang:monotonic_time(millisecond), + Elapsed = Now - Last, + case Timeout >= Threshold andalso Elapsed >= Cooldown of + true -> + try + deep_inspect(Pid, Info) + catch + Class:Reason -> + ?event(system_monitor, + {deep_inspect_error, Pid, Class, Reason}) + end, + State#{last_deep_inspect => Now}; + false -> + State + end. + +%% @doc Grab detailed process info, log it, and record in prometheus. +%% The `entry` label is the outermost non-glue frame (why the process +%% exists). The `location` label is `mid/current` where mid is roughly +%% the middle of the stack and current is the innermost frame — enough +%% to tell what region of the codebase was active without exploding +%% prometheus cardinality. +deep_inspect(Pid, ScheduleInfo) -> + ProcInfo = safe_process_info(Pid, [ + registered_name, + current_function, + current_stacktrace, + initial_call, + message_queue_len, + memory, + reductions, + dictionary, + status + ]), + ?event(system_monitor, {deep_inspect, Pid, ScheduleInfo, ProcInfo}), + Stack = proplists:get_value(current_stacktrace, ProcInfo, []), + Entry = stack_entry(Stack), + Location = stack_location(Stack), + hb_prometheus:inc(counter, system_monitor_deep_inspect_total, + [Entry, Location]). + +%% @doc Extract the outermost non-glue frame as the entry point. +stack_entry([]) -> + <<"unknown">>; +stack_entry(Stack) -> + hb_format:process_from_trace(Stack). + +%% @doc Build a compact location label from the stack: `mid/current`. +%% Current is the innermost frame (head of stacktrace), mid is +%% roughly 1/3 from the bottom — a frame that gives codebase context +%% without being the generic entry or the leaf. +stack_location([]) -> + <<"unknown">>; +stack_location([Only]) -> + format_frame(Only); +stack_location(Stack) -> + Current = hd(Stack), + Len = length(Stack), + MidIdx = max(1, Len - (Len div 3)), + Mid = lists:nth(MidIdx, Stack), + case Mid =:= Current of + true -> + format_frame(Current); + false -> + << + (format_frame(Mid))/binary, "/", + (format_frame(Current))/binary + >> + end. + +%% @doc Format a single stack frame as `mod:func/arity`. +format_frame({Mod, Func, Arity, _}) -> + format_location({Mod, Func, Arity}); +format_frame({Mod, Func, Arity}) -> + format_location({Mod, Func, Arity}); +format_frame(_) -> + <<"unknown">>. + +%% @doc Safely retrieve process info. The process may have died +%% between the monitor event and our inspection. +safe_process_info(Pid, Items) -> + try erlang:process_info(Pid, Items) of + undefined -> [{status, dead}]; + Info -> Info + catch + _:_ -> [{status, dead}] + end. + +%% ================================================================= +%% Tests +%% ================================================================= + +%% @doc Test that deep_inspect captures process info for a living process. +deep_inspect_live_process_test() -> + hb_prometheus:ensure_started(), + init_prometheus(), + Info = [{timeout, 100}, {in, undefined}, {out, undefined}], + Pid = spawn(fun() -> receive stop -> ok end end), + deep_inspect(Pid, Info), + ProcInfo = safe_process_info(Pid, [status, memory]), + ?assertMatch([{status, _}, {memory, _}], ProcInfo), + Pid ! stop. + +%% @doc Test that deep_inspect handles a dead process gracefully. +deep_inspect_dead_process_test() -> + hb_prometheus:ensure_started(), + init_prometheus(), + Info = [{timeout, 100}, {in, undefined}, {out, undefined}], + Pid = spawn(fun() -> ok end), + timer:sleep(10), + deep_inspect(Pid, Info), + ProcInfo = safe_process_info(Pid, [status]), + ?assertEqual([{status, dead}], ProcInfo). + +%% @doc Test that maybe_deep_inspect fires when threshold is exceeded. +maybe_deep_inspect_fires_test() -> + Pid = spawn(fun() -> receive stop -> ok end end), + Info = [{timeout, 100}, {in, undefined}, {out, undefined}], + Now = erlang:monotonic_time(millisecond), + State = #{ + opts => #{deep_inspect_ms => 50, deep_inspect_interval_ms => 0}, + last_deep_inspect => Now - 1000 + }, + State2 = maybe_deep_inspect(Pid, Info, State), + #{last_deep_inspect := LastTime} = State2, + ?assert(LastTime >= Now), + Pid ! stop. + +%% @doc Test that maybe_deep_inspect respects cooldown. +maybe_deep_inspect_cooldown_test() -> + Pid = spawn(fun() -> receive stop -> ok end end), + Info = [{timeout, 100}, {in, undefined}, {out, undefined}], + Now = erlang:monotonic_time(millisecond), + State = #{ + opts => #{deep_inspect_ms => 50, deep_inspect_interval_ms => 60_000}, + last_deep_inspect => Now + }, + State2 = maybe_deep_inspect(Pid, Info, State), + ?assertEqual(Now, maps:get(last_deep_inspect, State2)), + Pid ! stop. + +%% @doc Test that maybe_deep_inspect skips when below threshold. +maybe_deep_inspect_below_threshold_test() -> + Pid = spawn(fun() -> receive stop -> ok end end), + Info = [{timeout, 10}, {in, undefined}, {out, undefined}], + State = #{ + opts => #{deep_inspect_ms => 50, deep_inspect_interval_ms => 0}, + last_deep_inspect => 0 + }, + State2 = maybe_deep_inspect(Pid, Info, State), + ?assertEqual(0, maps:get(last_deep_inspect, State2)), + Pid ! stop. diff --git a/src/hb_test_utils.erl b/src/hb_test_utils.erl index 5aff04bd3..2deb1ca44 100644 --- a/src/hb_test_utils.erl +++ b/src/hb_test_utils.erl @@ -7,11 +7,12 @@ -export([benchmark/1, benchmark/2, benchmark/3, benchmark_iterations/2]). -export([benchmark_print/2, benchmark_print/3, benchmark_print/4]). -export([compare_events/3, compare_events/4, compare_events/5]). +-export([preload/2]). -include("include/hb.hrl"). -include_lib("eunit/include/eunit.hrl"). %%% The default store module to use for testing. --define(DEFAULT_STORE_MODULE, hb_store_ets). +-define(DEFAULT_STORE_MODULE, hb_store_volatile). %%% The number of seconds to run a benchmark for when no time is specified. -define(DEFAULT_BENCHMARK_TIME, 1). @@ -261,3 +262,16 @@ format_time(Time) when is_integer(Time) -> hb_util:human_int(Time) ++ "s"; format_time(Time) -> hb_util:human_int(Time * 1000) ++ "ms". + +%% @doc Load ans104 binary files to a store. +preload(Opts, File) -> + {ok, SerializedItem} = file:read_file(hb_util:bin(File)), + Message = + hb_message:convert( + ar_bundles:deserialize(SerializedItem), + <<"structured@1.0">>, + <<"ans104@1.0">>, + Opts + ), + hb_cache:write(Message, Opts). + diff --git a/src/hb_util.erl b/src/hb_util.erl index 463dafabe..2ac1022c9 100644 --- a/src/hb_util.erl +++ b/src/hb_util.erl @@ -1,6 +1,6 @@ %% @doc A collection of utility functions for building with HyperBEAM. -module(hb_util). --export([int/1, float/1, atom/1, bin/1, list/1, map/1]). +-export([int/1, float/1, atom/1, bin/1, list/1, map/1, bool/1, bool_int/1]). -export([safe_int/1]). -export([ceil_int/2, floor_int/2]). -export([id/1, id/2, native_id/1, human_id/1, human_int/1, to_hex/1]). @@ -20,13 +20,14 @@ -export([maybe_throw/2]). -export([is_hb_module/1, is_hb_module/2, all_hb_modules/0]). -export([ok/1, ok/2, until/1, until/2, until/3, wait_until/2]). --export([count/2, mean/1, stddev/1, variance/1, weighted_random/1]). +-export([count/2, mean/1, stddev/1, variance/1, weighted_random/1, shuffle/1]). -export([unique/1]). -export([split_depth_string_aware/2, split_depth_string_aware_single/2]). -export([unquote/1, split_escaped_single/2]). -export([check_size/2, check_value/2, check_type/2, ok_or_throw/3]). -export([all_atoms/0, binary_is_atom/1]). -export([lower_case_keys/2]). +-export([base58_encode/1]). -include("include/hb.hrl"). @@ -83,6 +84,24 @@ bin(Value) when is_list(Value) -> bin(Value) when is_binary(Value) -> Value. +%% @doc Coerce a value to a boolean. +bool(Value) -> + case Value of + true -> true; + false -> false; + <<"true">> -> true; + <<"false">> -> false; + <<"1">> -> true; + <<"0">> -> false; + 1 -> true; + 0 -> false; + _ -> false + end. + +%% @doc Coerce a boolean to 1 or 0. +bool_int(true) -> 1; +bool_int(false) -> 0. + %% @doc Coerce a value to a string list. list(Value) when is_binary(Value) -> binary_to_list(Value); @@ -223,6 +242,8 @@ native_id(Wallet = {_Priv, _Pub}) -> %% is returned as is. human_id(Bin) when is_binary(Bin) andalso byte_size(Bin) == 32 -> encode(Bin); +human_id(Bin) when is_binary(Bin) andalso byte_size(Bin) == 44 -> + Bin; human_id(Bin) when is_binary(Bin) andalso byte_size(Bin) == 43 -> Bin; human_id(Bin) when is_binary(Bin) andalso byte_size(Bin) == 42 -> @@ -242,12 +263,12 @@ add_commas(List) -> List. %% @doc Encode a binary to URL safe base64 binary string. encode(Bin) -> - b64fast:encode(Bin). + b64rs:encode(Bin). %% @doc Try to decode a URL safe base64 into a binary or throw an error when %% invalid. decode(Input) -> - b64fast:decode(Input). + b64rs:decode(Input). %% @doc Safely encode a binary to URL safe base64. safe_encode(Bin) when is_binary(Bin) -> @@ -341,23 +362,45 @@ find_target_path(Msg, Opts) -> case hb_ao:get(<<"route-path">>, Msg, not_found, Opts) of not_found -> ?event({find_target_path, {msg, Msg}, not_found}), - hb_ao:get(<<"path">>, Msg, no_path, Opts); - RoutePath -> RoutePath + case hb_ao:get(<<"path">>, Msg, no_path, Opts) of + no_path -> no_path; + Path -> {<<"path">>, Path} + end; + RoutePath -> + {<<"route-path">>, RoutePath} end. %% @doc Check if a message matches a given template. %% Templates can be either: -%% - A map: Uses structural matching against the message +%% - A map: Optional path regex match, then structural matching for remaining keys %% - A binary regex: Matches against the message's target path %% Returns true/false for map templates, or regex match result for binary templates. -template_matches(ToMatch, Template, _Opts) when is_map(Template) -> - case hb_message:match(Template, ToMatch, primary) of - {mismatch, value, _Key, _Val1, _Val2} -> false; - Match -> Match +template_matches(ToMatch, Template, Opts) when is_map(Template) -> + case find_target_path(Template, Opts) of + no_path -> + template_message_match(ToMatch, Template, Opts); + {TargetKey, Regex} -> + template_regex_match(ToMatch, hb_ao:normalize_key(Regex), Opts) andalso + template_message_match( + ToMatch, + hb_maps:remove(TargetKey, Template, Opts), + Opts + ) end; template_matches(ToMatch, Regex, Opts) when is_binary(Regex) -> - MsgPath = find_target_path(ToMatch, Opts), - hb_path:regex_matches(MsgPath, Regex). + template_regex_match(ToMatch, Regex, Opts). + +template_regex_match(ToMatch, Regex, Opts) -> + case find_target_path(ToMatch, Opts) of + no_path -> false; + {_TargetKey, MsgPath} -> hb_path:regex_matches(MsgPath, Regex) + end. + +template_message_match(ToMatch, TemplateWithoutPath, Opts) -> + case hb_message:match(TemplateWithoutPath, ToMatch, primary, Opts) of + {mismatch, value, _Key, _Val1, _Val2} -> false; + Match -> Match + end. %% @doc Label a list of elements with a number. number(List) -> @@ -780,4 +823,20 @@ lower_case_keys(Map, Opts) -> #{}, Map, Opts - ). \ No newline at end of file + ). + +%% @doc Base58 encode. +base58_encode(<<0, Rest/binary>>) -> + Encoded = base58_encode(Rest), + <<$1, Encoded/binary>>; +base58_encode(Bin) when is_binary(Bin) -> + base58_encode_int(binary:decode_unsigned(Bin)). + +base58_encode_int(0) -> + <<>>; +base58_encode_int(N) -> + Alphabet = <<"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz">>, + Rem = N rem 58, + Char = binary:at(Alphabet, Rem), + Rest = base58_encode_int(N div 58), + <>. \ No newline at end of file diff --git a/src/html/hyperbuddy@1.0/index.html b/src/html/hyperbuddy@1.0/index.html index 049ae14fa..aa2f6ef7f 100644 --- a/src/html/hyperbuddy@1.0/index.html +++ b/src/html/hyperbuddy@1.0/index.html @@ -5,10 +5,10 @@ HyperBEAM - +
- + diff --git a/src/include/ar.hrl b/src/include/ar.hrl index 751acef95..b0ff51358 100644 --- a/src/include/ar.hrl +++ b/src/include/ar.hrl @@ -14,6 +14,7 @@ -define(DEFAULT_ANCHOR, <<>>). -define(DEFAULT_TARGET, <<>>). -define(DEFAULT_DATA_ROOT, <<>>). +-define(DEFAULT_DATA_SIZE, 0). -define(DEFAULT_QUANTITY, 0). -define(DEFAULT_REWARD, 0). @@ -54,7 +55,7 @@ data = ?DEFAULT_DATA, manifest = undefined, %% Size in bytes of the transaction data. - data_size = 0, + data_size = ?DEFAULT_DATA_SIZE, %% Deprecated. Not used, not gossiped. data_tree = [], %% The Merkle root of the Merkle tree of data chunks. @@ -92,15 +93,34 @@ -define(HASH_ALG, sha256). -define(RSA_SIGN_ALG, rsa). +-define(RSA_SIGN_TYPE, <<"rsa-pss-sha256">>). -define(RSA_PRIV_KEY_SZ, 4096). -define(RSA_KEY_TYPE, {?RSA_SIGN_ALG, 65537}). -define(ECDSA_SIGN_ALG, ecdsa). +-define(ECDSA_SIGN_TYPE, <<"ecdsa-secp256k1-sha256">>). -define(ECDSA_TYPE_BYTE, <<2>>). +-define(ECDSA_KEY_TYPE, {?ECDSA_SIGN_ALG, secp256k1}). -define(EDDSA_SIGN_ALG, eddsa). +-define(EDDSA_SIGN_TYPE, <<"ed25519-sha512">>). -define(EDDSA_TYPE_BYTE, <<3>>). --define(ECDSA_KEY_TYPE, {?ECDSA_SIGN_ALG, secp256k1}). +-define(EDDSA_KEY_TYPE, {?EDDSA_SIGN_ALG, ed25519}). + +-define(SOLANA_SIGN_ALG, solana). +-define(SOLANA_SIGN_TYPE, <<"solana">>). +-define(SOLANA_TYPE_BYTE, <<4>>). +-define(SOLANA_KEY_TYPE, solana). + +-define(ETHEREUM_SIGN_ALG, ethereum). +-define(ETHEREUM_SIGN_TYPE, <<"ethereum">>). +-define(ETHEREUM_TYPE_BYTE, <<3>>). +-define(ETHEREUM_KEY_TYPE, ethereum). + +-define(TYPED_ETHEREUM_SIGN_ALG, typed_ethereum). +-define(TYPED_ETHEREUM_SIGN_TYPE, <<"typed_ethereum">>). +-define(TYPED_ETHEREUM_TYPE_BYTE, <<7>>). +-define(TYPED_ETHEREUM_KEY_TYPE, typed_ethereum). %% The default key type used by transactions that do not specify a signature type. -define(DEFAULT_KEY_TYPE, ?RSA_KEY_TYPE). @@ -112,3 +132,8 @@ -define(BUNDLE_KEYS, [ <<"bundle-format">>, <<"bundle-version">>, <<"bundle-map">>]). + +%% The threshold was determined on the mainnet at the 2.5 fork block. The chunks +%% submitted after the threshold must adhere to stricter validation rules. +%% This offset is about half way through partition 8 +-define(STRICT_DATA_SPLIT_THRESHOLD, 30_607_159_107_830). diff --git a/src/include/dev_bundler.hrl b/src/include/dev_bundler.hrl new file mode 100644 index 000000000..ce585f294 --- /dev/null +++ b/src/include/dev_bundler.hrl @@ -0,0 +1,41 @@ +%%% Shared state and task records for the bundler server and workers. + +-record(state, { + max_size, + max_idle_time, + max_items, + queue, + bytes, + workers, + task_queue, + bundles, + opts, + dispatch_ref +}). + +-record(task, { + bundle_id, + type, + data, + opts, + retry_count = 0 +}). + +-record(proof, { + proof, + status +}). + +-record(bundle, { + id, + items, + status, + tx, + proofs, + start_time +}). + +-define(DEFAULT_NUM_WORKERS, 20). +-define(DEFAULT_RETRY_BASE_DELAY_MS, 1000). +-define(DEFAULT_RETRY_MAX_DELAY_MS, 600000). +-define(DEFAULT_RETRY_JITTER, 0.25). diff --git a/src/include/hb.hrl b/src/include/hb.hrl index 62c968e18..8312ed4f8 100644 --- a/src/include/hb.hrl +++ b/src/include/hb.hrl @@ -32,7 +32,7 @@ -define(event(Topic, X), hb_event:log(Topic, X, ?MODULE, ?FUNCTION_NAME, ?LINE)). -define(event(Topic, X, Opts), hb_event:log(maps:get(topic, Opts, Topic), X, ?MODULE, ?FUNCTION_NAME, ?LINE, Opts)). -define(debug_wait(T), hb:debug_wait(T, ?MODULE, ?FUNCTION_NAME, ?LINE)). --define(debug_print(X), hb_format:print(X, ?MODULE, ?FUNCTION_NAME, ?LINE)). +-define(debug_print(X), hb_event:debug_print(X, ?MODULE, ?FUNCTION_NAME, ?LINE)). -define(no_prod(X), hb:no_prod(X, ?MODULE, ?LINE)). %%% Macro shortcuts for debugging. diff --git a/src/include/hb_arweave_nodes.hrl b/src/include/hb_arweave_nodes.hrl new file mode 100644 index 000000000..73cba8b53 --- /dev/null +++ b/src/include/hb_arweave_nodes.hrl @@ -0,0 +1,225 @@ +-define(ARWEAVE_BOOTSTRAP_DATA_NODES, +[ + %% Partitions 0-15 + #{ + <<"match">> => <<"^/arweave">>, + <<"min">> => 0, + <<"max">> => 57_600_000_000_000, + <<"center">> => 28_800_000_000_000, + <<"with">> => <<"http://data-1.arweave.xyz:1984">> + }, + #{ + <<"match">> => <<"^/arweave">>, + <<"min">> => 0, + <<"max">> => 57_600_000_000_000, + <<"center">> => 28_800_000_000_000, + <<"with">> => <<"http://data-13.arweave.xyz:1984">> + }, + %% Partitions 0-3 + #{ + <<"match">> => <<"^/arweave">>, + <<"min">> => 0, + <<"max">> => 14_400_000_000_000, + <<"center">> => 7_200_000_000_000, + <<"with">> => <<"http://data-2.arweave.xyz:1984">> + }, + %% Partitions 4-7 + #{ + <<"match">> => <<"^/arweave">>, + <<"min">> => 14_400_000_000_000, + <<"max">> => 28_800_000_000_000, + <<"center">> => 21_600_000_000_000, + <<"with">> => <<"http://data-3.arweave.xyz:1984">> + }, + %% Partitions 8-11 + #{ + <<"match">> => <<"^/arweave">>, + <<"min">> => 28_800_000_000_000, + <<"max">> => 43_200_000_000_000, + <<"center">> => 36_000_000_000_000, + <<"with">> => <<"http://data-4.arweave.xyz:1984">> + }, + %% Partitions 12-15 + #{ + <<"match">> => <<"^/arweave">>, + <<"min">> => 43_200_000_000_000, + <<"max">> => 57_600_000_000_000, + <<"center">> => 50_400_000_000_000, + <<"with">> => <<"http://data-5.arweave.xyz:1984">> + }, + %% Partitions 16-31 + #{ + <<"match">> => <<"^/arweave">>, + <<"min">> => 57_600_000_000_000, + <<"max">> => 115_200_000_000_000, + <<"center">> => 86_400_000_000_000, + <<"with">> => <<"http://data-2.arweave.xyz:1984">> + }, + #{ + <<"match">> => <<"^/arweave">>, + <<"min">> => 57_600_000_000_000, + <<"max">> => 115_200_000_000_000, + <<"center">> => 86_400_000_000_000, + <<"with">> => <<"http://data-3.arweave.xyz:1984">> + }, + #{ + <<"match">> => <<"^/arweave">>, + <<"min">> => 57_600_000_000_000, + <<"max">> => 115_200_000_000_000, + <<"center">> => 86_400_000_000_000, + <<"with">> => <<"http://data-14.arweave.xyz:1984">> + }, + #{ + <<"match">> => <<"^/arweave">>, + <<"min">> => 57_600_000_000_000, + <<"max">> => 115_200_000_000_000, + <<"center">> => 86_400_000_000_000, + <<"with">> => <<"http://data-15.arweave.xyz:1984">> + }, + %% Partitions 32-47 + #{ + <<"match">> => <<"^/arweave">>, + <<"min">> => 115_200_000_000_000, + <<"max">> => 172_800_000_000_000, + <<"center">> => 144_000_000_000_000, + <<"with">> => <<"http://data-4.arweave.xyz:1984">> + }, + #{ + <<"match">> => <<"^/arweave">>, + <<"min">> => 115_200_000_000_000, + <<"max">> => 172_800_000_000_000, + <<"center">> => 144_000_000_000_000, + <<"with">> => <<"http://data-5.arweave.xyz:1984">> + }, + #{ + <<"match">> => <<"^/arweave">>, + <<"min">> => 115_200_000_000_000, + <<"max">> => 172_800_000_000_000, + <<"center">> => 144_000_000_000_000, + <<"with">> => <<"http://data-16.arweave.xyz:1984">> + }, + #{ + <<"match">> => <<"^/arweave">>, + <<"min">> => 115_200_000_000_000, + <<"max">> => 172_800_000_000_000, + <<"center">> => 144_000_000_000_000, + <<"with">> => <<"http://data-17.arweave.xyz:1984">> + } + % Exclude these data nodes for now since their partitions are covered + % by the tip nodes (and the tip nodes are faster to read from). + % %% Partitions 48-63 + % #{ + % <<"match">> => <<"^/arweave">>, + % <<"min">> => 172_800_000_000_000, + % <<"max">> => 230_400_000_000_000, + % <<"center">> => 201_600_000_000_000, + % <<"with">> => <<"http://data-6.arweave.xyz:1984">>, + % <<"opts">> => #{ http_client => gun, protocol => http2 } + % }, + % #{ + % <<"match">> => <<"^/arweave">>, + % <<"min">> => 172_800_000_000_000, + % <<"max">> => 230_400_000_000_000, + % <<"center">> => 201_600_000_000_000, + % <<"with">> => <<"http://data-7.arweave.xyz:1984">>, + % <<"opts">> => #{ http_client => gun, protocol => http2 } + % }, + % %% Partitions 64-126 + % #{ + % <<"match">> => <<"^/arweave">>, + % <<"min">> => 230_400_000_000_000, + % <<"max">> => 457_200_000_000_000, + % <<"center">> => 343_800_000_000_000, + % <<"with">> => <<"http://data-8.arweave.xyz:1984">>, + % <<"opts">> => #{ http_client => gun, protocol => http2 } + % }, + % %% Partitions 75-138 + % #{ + % <<"match">> => <<"^/arweave">>, + % <<"min">> => 270_000_000_000_000, + % <<"max">> => 500_400_000_000_000, + % <<"center">> => 385_200_000_000_000, + % <<"with">> => <<"http://data-9.arweave.xyz:1984">>, + % <<"opts">> => #{ http_client => gun, protocol => http2 } + % }, + % #{ + % <<"match">> => <<"^/arweave">>, + % <<"min">> => 270_000_000_000_000, + % <<"max">> => 500_400_000_000_000, + % <<"center">> => 385_200_000_000_000, + % <<"with">> => <<"http://data-10.arweave.xyz:1984">>, + % <<"opts">> => #{ http_client => gun, protocol => http2 } + % }, + % #{ + % <<"match">> => <<"^/arweave">>, + % <<"min">> => 270_000_000_000_000, + % <<"max">> => 500_400_000_000_000, + % <<"center">> => 385_200_000_000_000, + % <<"with">> => <<"http://data-11.arweave.xyz:1984">>, + % <<"opts">> => #{ http_client => gun, protocol => http2 } + % }, + % #{ + % <<"match">> => <<"^/arweave">>, + % <<"min">> => 270_000_000_000_000, + % <<"max">> => 500_400_000_000_000, + % <<"center">> => 385_200_000_000_000, + % <<"with">> => <<"http://data-12.arweave.xyz:1984">>, + % <<"opts">> => #{ http_client => gun, protocol => http2 } + % } +]). + +-define(ARWEAVE_BOOTSTRAP_TIP_NODES, +[ + %% Partitions 48-107 + #{ + <<"match">> => <<"^/arweave">>, + <<"min">> => 172_800_000_000_000, + <<"max">> => 388_800_000_000_000, + <<"center">> => 280_800_000_000_000, + <<"with">> => <<"http://tip-1.arweave.xyz:1984">> + }, + #{ + <<"match">> => <<"^/arweave">>, + <<"min">> => 172_800_000_000_000, + <<"max">> => 388_800_000_000_000, + <<"center">> => 280_800_000_000_000, + <<"with">> => <<"http://tip-2.arweave.xyz:1984">> + }, + #{ + <<"match">> => <<"^/arweave">>, + <<"min">> => 172_800_000_000_000, + <<"max">> => 388_800_000_000_000, + <<"center">> => 280_800_000_000_000, + <<"with">> => <<"http://tip-3.arweave.xyz:1984">> + }, + #{ + <<"match">> => <<"^/arweave">>, + <<"min">> => 172_800_000_000_000, + <<"max">> => 388_800_000_000_000, + <<"center">> => 280_800_000_000_000, + <<"with">> => <<"http://tip-4.arweave.xyz:1984">> + }, + #{ + <<"match">> => <<"^/arweave">>, + <<"min">> => 172_800_000_000_000, + <<"max">> => 388_800_000_000_000, + <<"center">> => 280_800_000_000_000, + <<"with">> => <<"http://tip-5.arweave.xyz:1984">> + } +]). + +-define(ARWEAVE_BOOTSTRAP_CHAIN_NODES, +[ + #{ + <<"match">> => <<"^/arweave">>, + <<"with">> => <<"http://chain-3.arweave.xyz:1984">> + }, + #{ + <<"match">> => <<"^/arweave">>, + <<"with">> => <<"http://chain-1.arweave.xyz:1984">> + }, + #{ + <<"match">> => <<"^/arweave">>, + <<"with">> => <<"http://chain-2.arweave.xyz:1984">> + } +]). diff --git a/src/include/hb_http_client.hrl b/src/include/hb_http_client.hrl new file mode 100644 index 000000000..7bdcb0374 --- /dev/null +++ b/src/include/hb_http_client.hrl @@ -0,0 +1,10 @@ +-define(DEFAULT_RETRIES, 0). +-define(DEFAULT_RETRY_TIME, 1000). +-define(DEFAULT_KEEPALIVE_TIMEOUT, 60_000). +-define(DEFAULT_CONNECT_TIMEOUT, 60_000). +-define(DEFAULT_HACKNEY_RECEIVE_TIMEOUT, 120_000). +-define(DEFAULT_HACKNEY_CHECKOUT_TIMEOUT, 8_000). + +%% Hackney pool +-define(HACKNEY_POOL, hb_hackney_pool). +-define(DEFAULT_HACKNEY_MAX_CONNECTIONS, 100). diff --git a/src/include/hb_opts.hrl b/src/include/hb_opts.hrl new file mode 100644 index 000000000..b15061513 --- /dev/null +++ b/src/include/hb_opts.hrl @@ -0,0 +1,2 @@ +-define(DEFAULT_HTTP_CLIENT, hackney). + diff --git a/src/secp256k1_nif.erl b/src/secp256k1_nif.erl new file mode 100644 index 000000000..6c0b2fec6 --- /dev/null +++ b/src/secp256k1_nif.erl @@ -0,0 +1,52 @@ +-module(secp256k1_nif). +-export([sign/2, sign/3, ecrecover/2, ecrecover/3, sign_recoverable/2, recover_pk_and_verify/2]). + +-on_load(init/0). + +%% Based on Arweave's src/secp256k1_nif.erl + +init() -> + PrivDir = code:priv_dir(hb), + ok = erlang:load_nif(filename:join([PrivDir, "secp256k1_arweave"]), 0). + +sign_recoverable(_Digest, _PrivateBytes) -> + erlang:nif_error(nif_not_loaded). + +recover_pk_and_verify(_Digest, _Signature) -> + erlang:nif_error(nif_not_loaded). + +%% @doc DigestType can be `sha256` or `ethereum`. +sign(Msg, PrivBytes) -> + sign(Msg, PrivBytes, sha256). +sign(Msg, PrivBytes, DigestType) -> + Digest = digest_message(DigestType, Msg), + {ok, Signature} = sign_recoverable(Digest, PrivBytes), + Signature. + +%% @doc DigestType can be `sha256` or `ethereum`. +ecrecover(Msg, Signature) -> + ecrecover(Msg, Signature, sha256). +ecrecover(Msg, Signature, DigestType) -> + Digest = digest_message(DigestType, Msg), + NormalizedSig = normalize_signature(Signature, DigestType), + case recover_pk_and_verify(Digest, NormalizedSig) of + {ok, true, PubKey} -> {true, PubKey}; + {ok, false, _PubKey} -> {false, <<>>}; + {error, _Reason} -> {false, <<>>} + end. + +digest_message(sha256, Msg) -> crypto:hash(sha256, Msg); +digest_message(ethereum, Msg) -> ethereum_hash(Msg). + +%% @doc Normalize Ethereum v values: 27/28 -> 0/1 +normalize_signature(<>, ethereum) when V >= 27 -> + <>; +normalize_signature(Signature, _) -> + Signature. + +%% @doc Ethereum EIP-191 personal_sign hash: +%% keccak256("\x19Ethereum Signed Message:\n" + len(msg) + msg) +ethereum_hash(Msg) -> + Prefix = <<"\x19Ethereum Signed Message:\n">>, + Len = integer_to_binary(byte_size(Msg)), + hb_keccak:keccak_256(<>). diff --git a/test/arbundles.js/ans-104-manifest-42jky7O3rzKkMOfHBXgK-304YjulzEYqHc9qyjT3efA.bin b/test/arbundles.js/ans-104-manifest-42jky7O3rzKkMOfHBXgK-304YjulzEYqHc9qyjT3efA.bin new file mode 100644 index 000000000..898f953ae Binary files /dev/null and b/test/arbundles.js/ans-104-manifest-42jky7O3rzKkMOfHBXgK-304YjulzEYqHc9qyjT3efA.bin differ diff --git a/test/arbundles.js/ans-104-manifest-index-Tqh6oIS2CLUaDY11YUENlvvHmDim1q16pMyXAeSKsFM.bin b/test/arbundles.js/ans-104-manifest-index-Tqh6oIS2CLUaDY11YUENlvvHmDim1q16pMyXAeSKsFM.bin new file mode 100644 index 000000000..31357b60e Binary files /dev/null and b/test/arbundles.js/ans-104-manifest-index-Tqh6oIS2CLUaDY11YUENlvvHmDim1q16pMyXAeSKsFM.bin differ diff --git a/test/arbundles.js/ans-104-manifest-item-oLnQY-EgiYRg9XyO7yZ_mC0Ehy7TFR3UiDhFvxcohC4.bin b/test/arbundles.js/ans-104-manifest-item-oLnQY-EgiYRg9XyO7yZ_mC0Ehy7TFR3UiDhFvxcohC4.bin new file mode 100644 index 000000000..05097fd85 Binary files /dev/null and b/test/arbundles.js/ans-104-manifest-item-oLnQY-EgiYRg9XyO7yZ_mC0Ehy7TFR3UiDhFvxcohC4.bin differ diff --git a/test/arbundles.js/ans104-item-ed25519.bin b/test/arbundles.js/ans104-item-ed25519.bin new file mode 100644 index 000000000..3add4a4df Binary files /dev/null and b/test/arbundles.js/ans104-item-ed25519.bin differ diff --git a/test/arbundles.js/ans104-item-ethereum.bin b/test/arbundles.js/ans104-item-ethereum.bin new file mode 100644 index 000000000..2e800e1fe Binary files /dev/null and b/test/arbundles.js/ans104-item-ethereum.bin differ diff --git a/test/arbundles.js/ans104-item-solana.bin b/test/arbundles.js/ans104-item-solana.bin new file mode 100644 index 000000000..76dbe7db5 Binary files /dev/null and b/test/arbundles.js/ans104-item-solana.bin differ diff --git a/test/arbundles.js/upload-items.js b/test/arbundles.js/upload-items.js index f5e629f51..9bc63d475 100644 --- a/test/arbundles.js/upload-items.js +++ b/test/arbundles.js/upload-items.js @@ -4,11 +4,11 @@ const { ArweaveSigner, createData } = require("@dha-team/arbundles"); // Configuration const BUNDLER_URL = "http://localhost:8734"; -const WALLET_PATH = "../../hyperbeam-key.json"; +const DEFAULT_WALLET = "../../hyperbeam-key.json"; const CONCURRENT_UPLOADS = 100; // Number of parallel uploads -async function performanceTest(itemCount, bytesPerItem = 0) { - const wallet = require(WALLET_PATH); +async function performanceTest(walletPath, itemCount, bytesPerItem = 0) { + const wallet = require(path.resolve(walletPath)); const signer = new ArweaveSigner(wallet); const endpoint = `${BUNDLER_URL}/~bundler@1.0/item?codec-device=ans104@1.0`; @@ -68,6 +68,7 @@ async function performanceTest(itemCount, bytesPerItem = 0) { const uploadPromises = batch.map(async (item) => { try { + console.log(`Posting data item: ${item.id}`); const response = await fetch(endpoint, { method: "POST", headers: { @@ -132,24 +133,28 @@ async function performanceTest(itemCount, bytesPerItem = 0) { // Main execution if (require.main === module) { - const itemCount = parseInt(process.argv[2], 10); - const bytesPerItem = parseInt(process.argv[3], 10) || 0; - + // If the first arg looks like a number, treat it as itemCount and use the default wallet + const firstIsNumber = !isNaN(parseInt(process.argv[2], 10)); + const walletPath = firstIsNumber ? DEFAULT_WALLET : (process.argv[2] || DEFAULT_WALLET); + const itemCount = parseInt(firstIsNumber ? process.argv[2] : process.argv[3], 10); + const bytesPerItem = parseInt(firstIsNumber ? process.argv[3] : process.argv[4], 10) || 0; + if (!itemCount || itemCount < 1 || isNaN(itemCount)) { - console.error("Usage: node upload-items.js [bytes_per_item]"); + console.error("Usage: node upload-items.js [wallet_path] [bytes_per_item]"); console.error(""); console.error("Arguments:"); + console.error(" wallet_path - Path to Arweave wallet JSON (default: ../../hyperbeam-key.json)"); console.error(" number_of_items - Number of data items to create and upload"); console.error(" bytes_per_item - Minimum size of each item in bytes (optional)"); console.error(""); console.error("Examples:"); console.error(" node upload-items.js 100"); - console.error(" node upload-items.js 100 1024 # 100 items, ~1KB each"); - console.error(" node upload-items.js 50 10485760 # 50 items, ~10MB each"); + console.error(" node upload-items.js 100 1024"); + console.error(" node upload-items.js /path/to/wallet.json 100 1024"); process.exit(1); } - performanceTest(itemCount, bytesPerItem) + performanceTest(walletPath, itemCount, bytesPerItem) .then(() => { process.exit(0); }) diff --git a/test/config.flat b/test/config.flat index be8c0d876..a28c640d9 100644 --- a/test/config.flat +++ b/test/config.flat @@ -1,3 +1,3 @@ port: 1234 -host: https://ao.computer +node_host: https://ao.computer await-inprogress: false \ No newline at end of file diff --git a/test/config.json b/test/config.json index 0be064746..37c2e2c96 100644 --- a/test/config.json +++ b/test/config.json @@ -1,7 +1,7 @@ { "port": 1234, "example": 9001, - "host": "https://ao.computer", + "node_host": "https://ao.computer", "await_inprogress": false, "store": [ {