From 2cb360935cd4ddf8f413df1e6e0a871d4cbd9260 Mon Sep 17 00:00:00 2001 From: Sam Williams Date: Thu, 16 Apr 2026 23:41:33 -0400 Subject: [PATCH 01/22] feat(ipfs): add `~ipfs@1.0' device for CID-as-commitment-ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a user-loadable commitment device that hashes a message's `body` into a CIDv1, attaching it as an unsigned commitment. The commitment ID is the CID itself, which `hb_cache` links to the message's uncommitted ID automatically — so a write followed by `hb_cache:read(CID, _)` recovers the message with no kernel changes. Scope is intentionally narrow: sha2-256 multihash, base32-lower multibase, and the `raw` (0x55) and `dag-cbor` (0x71) multicodecs. No `to`/`from` yet — the body is opaque for hashing — and no changes to the structured, cache, or AO-Core kernel. - `dev_codec_ipfs`: `commit/3`, `verify/3`, `content_type/1`, `info/1`. - `dev_codec_ipfs_cid`: pure-functional varint, multihash, multibase, CIDv1. - `dev_codec_ipfs_test`: dispatch through `hb_message:commit/verify` plus the cache-linkage proof (`hb_cache:read(CID, _)` returns the message). All 29 unit + integration tests pass. Known-answer CIDs cross-checked: raw("hello world") => bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e raw(<<>>) => bafkreihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku dag-cbor(<<0xa0>>) => bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua Co-Authored-By: Claude Opus 4.6 --- src/dev_codec_ipfs.erl | 230 ++++++++++++++++++++++++++++++++++ src/dev_codec_ipfs_cid.erl | 238 ++++++++++++++++++++++++++++++++++++ src/dev_codec_ipfs_test.erl | 189 ++++++++++++++++++++++++++++ 3 files changed, 657 insertions(+) create mode 100644 src/dev_codec_ipfs.erl create mode 100644 src/dev_codec_ipfs_cid.erl create mode 100644 src/dev_codec_ipfs_test.erl diff --git a/src/dev_codec_ipfs.erl b/src/dev_codec_ipfs.erl new file mode 100644 index 000000000..9b6c5b3dd --- /dev/null +++ b/src/dev_codec_ipfs.erl @@ -0,0 +1,230 @@ +%%% @doc `~ipfs@1.0' — a commitment device whose IDs are IPFS CIDv1s over a +%%% message's `body'. +%%% +%%% Phase 1 surface: `commit/3' (type `unsigned' only), `verify/3', +%%% `content_type/1', and `info/1'. No `to/3' or `from/3' yet — the +%%% `<<"body">>' blob is treated as opaque bytes for hashing. Phase 2 adds a +%%% full dag-cbor `to'/`from' pair, routed through `~structured@1.0'. +%%% +%%% How this fits AO-Core: a commitment whose ID is a CID gives the cache +%%% everything it already needs to serve the message under that CID. When a +%%% message with an `~ipfs@1.0' commitment is written via `hb_cache:write/2', +%%% the commitment ID is linked to the uncommitted ID of the message +%%% (see `hb_cache:do_write_message/3'). A subsequent `hb_cache:read(CID, _)' +%%% then returns the full message — no new routing, no kernel changes. +%%% +%%% Verification is the same deterministic function as commit: hash the body +%%% with the declared codec + hash algorithm and check that the resulting CID +%%% is present in the message's `commitments' map. +%%% +%%% This device is optional and user-loadable. It is not in +%%% `hb_opts:preloaded_devices/0' by default. +-module(dev_codec_ipfs). +-export([info/1, commit/3, verify/3, content_type/1]). +-include("include/hb.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +-define(DEVICE_NAME, <<"ipfs@1.0">>). +-define(DEFAULT_CODEC, <<"raw">>). +-define(DEFAULT_HASH_ALG, <<"sha2-256">>). +-define(COMMITTED_KEYS, [<<"body">>]). + +%%%==================================================================== +%%% AO-Core device surface +%%%==================================================================== + +%% @doc Restrict what AO-Core will resolve against this module. We are a +%% commitment device, not a general key resolver. `committed/3' is handled +%% by `dev_message' from the `<<"committed">>' field of each commitment, so +%% we do not export it here. +info(_) -> + #{ exports => [commit, verify, content_type] }. + +%% @doc Report the appropriate IPLD MIME type for a given codec. +content_type(#{ <<"codec">> := <<"dag-cbor">> }) -> + {ok, <<"application/vnd.ipld.dag-cbor">>}; +content_type(#{ <<"codec">> := <<"raw">> }) -> + {ok, <<"application/vnd.ipld.raw">>}; +content_type(_) -> + {ok, <<"application/vnd.ipld.raw">>}. + +%%%==================================================================== +%%% commit/3 +%%%==================================================================== + +%% @doc Compute a CIDv1 over the `body' of `Msg' and attach it as an +%% unsigned commitment. +%% +%% The `Req' may set: +%% - `<<"codec">>' — `<<"raw">>' (default, multicodec 0x55) or +%% `<<"dag-cbor">>' (multicodec 0x71). +%% - `<<"hash-alg">>' — only `<<"sha2-256">>' is supported in phase 1. +%% +%% Only `type = unsigned' is supported; signed CIDs are not a thing in IPFS. +%% Anything else returns an error tuple so AO-Core's dispatcher surfaces a +%% clear failure instead of silently hashing. +commit(Msg, #{ <<"type">> := Type } = Req, Opts) + when Type =:= <<"unsigned">>; + Type =:= <<"unsigned-sha256">> -> + Codec = hb_maps:get(<<"codec">>, Req, ?DEFAULT_CODEC, Opts), + HashAlg = hb_maps:get(<<"hash-alg">>, Req, ?DEFAULT_HASH_ALG, Opts), + Body = hb_maps:get(<<"body">>, Msg, <<>>, Opts), + case {Codec, HashAlg} of + {C, <<"sha2-256">>} when C =:= <<"raw">>; C =:= <<"dag-cbor">> -> + CID = dev_codec_ipfs_cid:encode(C, sha2_256, Body), + Commitment = + #{ + <<"commitment-device">> => ?DEVICE_NAME, + <<"type">> => <<"unsigned">>, + <<"codec">> => C, + <<"hash-alg">> => <<"sha2-256">>, + <<"committed">> => ?COMMITTED_KEYS + }, + Existing = hb_maps:get(<<"commitments">>, Msg, #{}, Opts), + ?event(ipfs, + {commit, + {cid, CID}, + {codec, C}, + {body_size, byte_size(Body)} + } + ), + {ok, Msg#{ <<"commitments">> => Existing#{ CID => Commitment } }}; + {_, <<"sha2-256">>} -> + {error, {unsupported_codec, Codec}}; + {_, _} -> + {error, {unsupported_hash_alg, HashAlg}} + end; +commit(_Msg, #{ <<"type">> := Type }, _Opts) -> + {error, {unsupported_type, Type}}. + +%%%==================================================================== +%%% verify/3 +%%%==================================================================== + +%% @doc Verify an `~ipfs@1.0' commitment. `Req' carries the merged fields of +%% the commitment being verified (codec, hash-alg, etc.); `Base' is the full +%% message including its `commitments' map. +%% +%% The verification is the commitment function run in reverse: recompute the +%% CID from the body using the declared codec + hash-alg. The commitment is +%% valid iff that CID is a key in `Base''s commitments map — which it must +%% be, exactly when the body has not been tampered with. +verify(Base, Req, Opts) -> + Codec = hb_maps:get(<<"codec">>, Req, ?DEFAULT_CODEC, Opts), + HashAlg = hb_maps:get(<<"hash-alg">>, Req, ?DEFAULT_HASH_ALG, Opts), + Body = hb_maps:get(<<"body">>, Base, <<>>, Opts), + Commitments = hb_maps:get(<<"commitments">>, Base, #{}, Opts), + case {Codec, HashAlg} of + {C, <<"sha2-256">>} when C =:= <<"raw">>; C =:= <<"dag-cbor">> -> + ExpectedCID = dev_codec_ipfs_cid:encode(C, sha2_256, Body), + Res = hb_maps:is_key(ExpectedCID, Commitments, Opts), + ?event(ipfs, + {verify, + {codec, C}, + {expected_cid, ExpectedCID}, + {result, Res} + } + ), + {ok, Res}; + _ -> + ?event(warning, + {ipfs_verify_unsupported, {codec, Codec}, {hash_alg, HashAlg}}), + {ok, false} + end. + +%%%==================================================================== +%%% Tests +%%%==================================================================== +%%% Integration-level tests live in `dev_codec_ipfs_test'. + +content_type_raw_test() -> + ?assertEqual( + {ok, <<"application/vnd.ipld.raw">>}, + content_type(#{ <<"codec">> => <<"raw">> }) + ). + +content_type_dag_cbor_test() -> + ?assertEqual( + {ok, <<"application/vnd.ipld.dag-cbor">>}, + content_type(#{ <<"codec">> => <<"dag-cbor">> }) + ). + +content_type_default_test() -> + ?assertEqual( + {ok, <<"application/vnd.ipld.raw">>}, + content_type(#{}) + ). + +commit_unsigned_raw_attaches_cid_test() -> + Msg = #{ <<"body">> => <<"hello world">> }, + Req = #{ <<"type">> => <<"unsigned">> }, + {ok, Committed} = commit(Msg, Req, #{}), + Commitments = maps:get(<<"commitments">>, Committed), + [CID] = maps:keys(Commitments), + ?assertEqual( + <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, + CID + ), + Commitment = maps:get(CID, Commitments), + ?assertEqual(?DEVICE_NAME, maps:get(<<"commitment-device">>, Commitment)), + ?assertEqual(<<"raw">>, maps:get(<<"codec">>, Commitment)), + ?assertEqual(<<"sha2-256">>, maps:get(<<"hash-alg">>, Commitment)), + ?assertEqual([<<"body">>], maps:get(<<"committed">>, Commitment)), + ?assertNot(maps:is_key(<<"committer">>, Commitment)). + +commit_unsigned_dag_cbor_test() -> + Msg = #{ <<"body">> => <<16#a0>> }, %% empty dag-cbor map `{}` + Req = #{ <<"type">> => <<"unsigned">>, <<"codec">> => <<"dag-cbor">> }, + {ok, Committed} = commit(Msg, Req, #{}), + Commitments = maps:get(<<"commitments">>, Committed), + [CID] = maps:keys(Commitments), + ?assertEqual( + <<"bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua">>, + CID + ). + +commit_preserves_existing_commitments_test() -> + Msg = #{ + <<"body">> => <<"hello world">>, + <<"commitments">> => #{ <<"other">> => #{ <<"kind">> => <<"placeholder">> } } + }, + {ok, Committed} = commit(Msg, #{ <<"type">> => <<"unsigned">> }, #{}), + Commitments = maps:get(<<"commitments">>, Committed), + ?assert(maps:is_key(<<"other">>, Commitments)), + ?assertEqual(2, maps:size(Commitments)). + +commit_rejects_signed_test() -> + Msg = #{ <<"body">> => <<"x">> }, + ?assertMatch({error, {unsupported_type, _}}, + commit(Msg, #{ <<"type">> => <<"signed">> }, #{})). + +commit_rejects_unknown_codec_test() -> + Msg = #{ <<"body">> => <<"x">> }, + Req = #{ <<"type">> => <<"unsigned">>, <<"codec">> => <<"dag-pb">> }, + ?assertMatch({error, {unsupported_codec, <<"dag-pb">>}}, commit(Msg, Req, #{})). + +verify_ok_for_intact_body_test() -> + Msg = #{ <<"body">> => <<"hello world">> }, + {ok, Committed} = commit(Msg, #{ <<"type">> => <<"unsigned">> }, #{}), + Commitments = maps:get(<<"commitments">>, Committed), + [{_CID, Commitment}] = maps:to_list(Commitments), + ?assertEqual({ok, true}, verify(Committed, Commitment, #{})). + +verify_fails_for_tampered_body_test() -> + Msg = #{ <<"body">> => <<"hello world">> }, + {ok, Committed} = commit(Msg, #{ <<"type">> => <<"unsigned">> }, #{}), + Commitments = maps:get(<<"commitments">>, Committed), + [{_CID, Commitment}] = maps:to_list(Commitments), + Tampered = Committed#{ <<"body">> => <<"hello earth">> }, + ?assertEqual({ok, false}, verify(Tampered, Commitment, #{})). + +verify_fails_when_codec_mismatches_test() -> + %% A message whose commitment declares dag-cbor but whose body is a raw + %% blob that does not hash to the stored CID under dag-cbor rules. + Msg = #{ <<"body">> => <<"hello world">> }, + {ok, Committed} = commit(Msg, #{ <<"type">> => <<"unsigned">> }, #{}), + Commitments = maps:get(<<"commitments">>, Committed), + [{_CID, Commitment}] = maps:to_list(Commitments), + %% Caller asserts dag-cbor; the computed CID will differ and not be present. + DagCborReq = Commitment#{ <<"codec">> => <<"dag-cbor">> }, + ?assertEqual({ok, false}, verify(Committed, DagCborReq, #{})). diff --git a/src/dev_codec_ipfs_cid.erl b/src/dev_codec_ipfs_cid.erl new file mode 100644 index 000000000..1a0005689 --- /dev/null +++ b/src/dev_codec_ipfs_cid.erl @@ -0,0 +1,238 @@ +%%% @doc Pure functions for the thin slice of the IPFS/IPLD spec that this +%%% device needs: unsigned varints, sha2-256 multihashes, base32-lowercase +%%% multibase, and CIDv1 encode/decode. +%%% +%%% Intentionally narrow. This module covers only what `dev_codec_ipfs' uses +%%% to turn a `body' blob into a content identifier. It is not a general IPFS +%%% library: no CIDv0, no hash functions besides sha2-256, no multibases +%%% besides base32-lower, and no resolution of IPLD paths. See +%%% `docs/devices/ipfs-at-1-0.md' for the device-level rationale. +%%% +%%% References: +%%% - CIDv1 spec: https://github.com/multiformats/cid +%%% - Multihash spec: https://github.com/multiformats/multihash +%%% - Multibase spec: https://github.com/multiformats/multibase +%%% - unsigned-varint: https://github.com/multiformats/unsigned-varint +-module(dev_codec_ipfs_cid). +-export([encode/3, decode/1]). +-export([codec_code/1, codec_name/1]). +-export([multihash/2, multibase_encode/1, multibase_decode/1]). +-export([varint_encode/1, varint_decode/1]). +-include("include/hb.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +%% Multicodec codes we care about. Full registry: +%% https://github.com/multiformats/multicodec/blob/master/table.csv +-define(CODEC_RAW, 16#55). +-define(CODEC_DAG_CBOR, 16#71). + +%% Multihash function codes. +-define(HASH_SHA2_256, 16#12). +-define(SHA2_256_LEN, 32). + +%% Multibase prefix for base32 lowercase (RFC4648, no padding). +-define(MB_BASE32_LOWER, $b). + +%% @doc Encode a `body' blob as a CIDv1 string, using the given codec name +%% (`<<"raw">>' | `<<"dag-cbor">>') and hash algorithm (`sha2_256' atom, or +%% `<<"sha2-256">>' binary). +encode(Codec, HashAlg, Body) when is_binary(Codec) -> + encode(codec_code(Codec), HashAlg, Body); +encode(CodecCode, <<"sha2-256">>, Body) -> + encode(CodecCode, sha2_256, Body); +encode(CodecCode, sha2_256, Body) + when is_integer(CodecCode), is_binary(Body) -> + MH = multihash(sha2_256, Body), + CIDBin = + << + (varint_encode(1))/binary, + (varint_encode(CodecCode))/binary, + MH/binary + >>, + multibase_encode(CIDBin). + +%% @doc Decode a CIDv1 string into its components. Returns `{error, Reason}' +%% if the string is not a CIDv1 that this module knows how to parse. +decode(Bin) when is_binary(Bin) -> + case multibase_decode(Bin) of + {ok, Raw} -> decode_bytes(Raw); + Err -> Err + end. + +decode_bytes(Bin) -> + try + {Version, Rest1} = varint_decode(Bin), + case Version of + 1 -> + {CodecCode, Rest2} = varint_decode(Rest1), + {HashCode, Rest3} = varint_decode(Rest2), + {DigestLen, Digest} = varint_decode(Rest3), + case {HashCode, byte_size(Digest)} of + {?HASH_SHA2_256, DigestLen} when DigestLen =:= ?SHA2_256_LEN -> + {ok, #{ + <<"version">> => 1, + <<"codec">> => codec_name(CodecCode), + <<"hash-alg">> => <<"sha2-256">>, + <<"digest">> => Digest + }}; + {_, L} when L =/= DigestLen -> + {error, {truncated_digest, {declared, DigestLen}, {actual, L}}}; + {Other, _} -> + {error, {unsupported_hash, Other}} + end; + V -> + {error, {unsupported_cid_version, V}} + end + catch + _:_ -> {error, malformed_cid} + end. + +%% @doc Resolve a codec name to its multicodec code. +codec_code(<<"raw">>) -> ?CODEC_RAW; +codec_code(<<"dag-cbor">>) -> ?CODEC_DAG_CBOR; +codec_code(Other) -> throw({unsupported_codec, Other}). + +%% @doc Inverse of `codec_code/1'. Unknown codes round-trip as a `<<"codec-0xHEX">>' +%% binary so that decode never throws on a stranger's CID. +codec_name(?CODEC_RAW) -> <<"raw">>; +codec_name(?CODEC_DAG_CBOR) -> <<"dag-cbor">>; +codec_name(N) when is_integer(N) -> + iolist_to_binary(io_lib:format("codec-0x~.16b", [N])). + +%% @doc Wrap a digest as a multihash binary: <>. +multihash(sha2_256, Body) when is_binary(Body) -> + Digest = crypto:hash(sha256, Body), + << + (varint_encode(?HASH_SHA2_256))/binary, + (varint_encode(?SHA2_256_LEN))/binary, + Digest/binary + >>. + +%% @doc Multibase-encode a binary as base32-lowercase, no padding, prefix `b'. +multibase_encode(Bin) when is_binary(Bin) -> + Encoded = base32:encode(Bin, [lower, nopad]), + <>. + +%% @doc Multibase-decode. Accepts base32 lowercase (`b'), base32 upper (`B'), +%% and base16 lowercase (`f') defensively. Anything else is `{error, _}'. +multibase_decode(<>) -> + try {ok, base32:decode(pad_base32(string:uppercase(Rest)))} + catch _:_ -> {error, invalid_base32} end; +multibase_decode(<<$B, Rest/binary>>) -> + try {ok, base32:decode(pad_base32(Rest))} + catch _:_ -> {error, invalid_base32} end; +multibase_decode(<<$f, Rest/binary>>) -> + try {ok, binary:decode_hex(Rest)} + catch _:_ -> {error, invalid_base16} end; +multibase_decode(<>) -> + {error, {unsupported_multibase, <>}}; +multibase_decode(_) -> + {error, empty_cid}. + +pad_base32(Bin) -> + %% RFC4648 base32 groups are 40 bits (8 chars). Pad with `=' to a multiple of 8. + case (8 - (byte_size(Bin) rem 8)) rem 8 of + 0 -> Bin; + N -> < + CID = encode(<<"raw">>, sha2_256, <<"hello world">>), + ?assertEqual( + <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, + CID + ). + +%% Empty body under the `raw' codec. Cross-checked against +%% `ipfs add --raw-leaves -Q /dev/null'. +empty_raw_cid_test() -> + CID = encode(<<"raw">>, sha2_256, <<>>), + ?assertEqual( + <<"bafkreihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku">>, + CID + ). + +%% Known DAG-CBOR CID for the canonical empty-map block (`0xa0`), cross-checked +%% against `ipfs dag put <<<"{}"` with input-codec dag-cbor. +empty_dag_cbor_cid_test() -> + CID = encode(<<"dag-cbor">>, sha2_256, <<16#a0>>), + ?assertEqual( + <<"bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua">>, + CID + ). + +roundtrip_decode_raw_test() -> + CID = encode(<<"raw">>, sha2_256, <<"hello world">>), + {ok, Parts} = decode(CID), + ?assertEqual(<<"raw">>, maps:get(<<"codec">>, Parts)), + ?assertEqual(<<"sha2-256">>, maps:get(<<"hash-alg">>, Parts)), + ?assertEqual(1, maps:get(<<"version">>, Parts)), + ?assertEqual(32, byte_size(maps:get(<<"digest">>, Parts))), + ?assertEqual( + crypto:hash(sha256, <<"hello world">>), + maps:get(<<"digest">>, Parts) + ). + +roundtrip_decode_dag_cbor_test() -> + CID = encode(<<"dag-cbor">>, sha2_256, <<"body bytes">>), + {ok, Parts} = decode(CID), + ?assertEqual(<<"dag-cbor">>, maps:get(<<"codec">>, Parts)), + ?assertEqual(<<"sha2-256">>, maps:get(<<"hash-alg">>, Parts)). + +bad_multibase_prefix_test() -> + ?assertMatch({error, {unsupported_multibase, _}}, + decode(<<"Qmfoobar">>)). + +malformed_cid_test() -> + %% A `b' prefix with a valid base32 body that decodes to nonsense. + ?assertMatch({error, _}, decode(<<"baaa">>)). + +varint_roundtrip_test() -> + lists:foreach( + fun(N) -> + Enc = varint_encode(N), + ?assertEqual({N, <<>>}, varint_decode(Enc)) + end, + [0, 1, 127, 128, 255, 16#55, 16#71, 1234, 16#ff_ff, 16#ff_ff_ff_ff]). + +varint_truncated_raises_test() -> + %% Continuation bit set but no following byte. + ?assertThrow({malformed_varint, _}, varint_decode(<<16#ff>>)). + +multihash_shape_test() -> + MH = multihash(sha2_256, <<"x">>), + %% code(0x12) + len(32) + 32-byte digest = 34 bytes + ?assertEqual(34, byte_size(MH)), + <<16#12, 32, Digest:32/binary>> = MH, + ?assertEqual(crypto:hash(sha256, <<"x">>), Digest). + +multibase_roundtrip_test() -> + Bytes = <<0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20>>, + Encoded = multibase_encode(Bytes), + ?assertMatch(<>, Encoded), + ?assertEqual({ok, Bytes}, multibase_decode(Encoded)). diff --git a/src/dev_codec_ipfs_test.erl b/src/dev_codec_ipfs_test.erl new file mode 100644 index 000000000..7e8a3522c --- /dev/null +++ b/src/dev_codec_ipfs_test.erl @@ -0,0 +1,189 @@ +%%% @doc Integration tests for `~ipfs@1.0'. The unit-level tests live inline +%%% in `dev_codec_ipfs' and `dev_codec_ipfs_cid'. This module covers: +%%% 1. Dispatch through `hb_message:commit/3' and `hb_message:verify/3' so +%%% the device behaves correctly under the standard AO-Core machinery. +%%% 2. The cache linkage proof: writing a message with a CID commitment +%%% makes `hb_cache:read(CID, Opts)' return the message, with no kernel +%%% changes. This is the load-bearing claim of the phase 1 design. +-module(dev_codec_ipfs_test). +-include_lib("eunit/include/eunit.hrl"). +-include("include/hb.hrl"). + +%% Canonical IPFS ground truth: `hello world' under the `raw' codec. +-define(HELLO_WORLD, <<"hello world">>). +-define(HELLO_WORLD_CID, + <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>). + +%%%==================================================================== +%%% Test helpers +%%%==================================================================== + +%% @doc Build an Opts map that makes `~ipfs@1.0' available to the AO-Core +%% device loader without editing `hb_opts:preloaded_devices/0'. This is how +%% a production operator would enable the device: in node config, not by +%% patching the kernel. We use a volatile store so tests are isolated. +opts() -> + Base = #{ store => hb_test_utils:test_store() }, + opts(Base). +opts(Base) -> + %% Merge our entry into whatever `preloaded_devices' the node would + %% normally use, so we do not hide any stock devices. + Stock = hb_opts:get(preloaded_devices, [], Base), + Base#{ + preloaded_devices => + [ #{ <<"name">> => <<"ipfs@1.0">>, + <<"module">> => dev_codec_ipfs } | Stock ] + }. + +%%%==================================================================== +%%% 1. Dispatch through hb_message:commit / hb_message:verify +%%%==================================================================== + +hb_message_commit_dispatches_to_us_test() -> + Opts = opts(), + Msg = #{ <<"body">> => ?HELLO_WORLD }, + CommitReq = #{ + <<"commitment-device">> => <<"ipfs@1.0">>, + <<"type">> => <<"unsigned">> + }, + Committed = hb_message:commit(Msg, Opts, CommitReq), + Commitments = maps:get(<<"commitments">>, Committed), + ?assert(maps:is_key(?HELLO_WORLD_CID, Commitments)), + Commitment = maps:get(?HELLO_WORLD_CID, Commitments), + ?assertEqual(<<"ipfs@1.0">>, maps:get(<<"commitment-device">>, Commitment)). + +hb_message_verify_dispatches_to_us_test() -> + Opts = opts(), + Msg = #{ <<"body">> => ?HELLO_WORLD }, + CommitReq = #{ + <<"commitment-device">> => <<"ipfs@1.0">>, + <<"type">> => <<"unsigned">> + }, + Committed = hb_message:commit(Msg, Opts, CommitReq), + %% Verify by commitment-id. + ?assertEqual( + true, + hb_message:verify( + Committed, + #{ <<"commitment-ids">> => [?HELLO_WORLD_CID] }, + Opts + ) + ). + +verify_rejects_tampered_body_via_hb_message_test() -> + Opts = opts(), + Msg = #{ <<"body">> => ?HELLO_WORLD }, + CommitReq = #{ + <<"commitment-device">> => <<"ipfs@1.0">>, + <<"type">> => <<"unsigned">> + }, + Committed = hb_message:commit(Msg, Opts, CommitReq), + Tampered = Committed#{ <<"body">> => <<"hello earth">> }, + ?assertEqual( + false, + hb_message:verify( + Tampered, + #{ <<"commitment-ids">> => [?HELLO_WORLD_CID] }, + Opts + ) + ). + +committed_returns_body_key_test() -> + %% `hb_message:committed/3' reads each commitment's own `committed' list + %% and takes the intersection. For a single `~ipfs@1.0' commitment that + %% list is exactly `[<<"body">>]'. + Opts = opts(), + Msg = #{ <<"body">> => ?HELLO_WORLD }, + Committed = + hb_message:commit( + Msg, Opts, + #{ <<"commitment-device">> => <<"ipfs@1.0">>, + <<"type">> => <<"unsigned">> } + ), + Keys = + hb_message:committed( + Committed, + [?HELLO_WORLD_CID], + Opts + ), + ?assertEqual([<<"body">>], Keys). + +%%%==================================================================== +%%% 2. Cache linkage — the load-bearing proof +%%%==================================================================== + +%% @doc Write a message with an IPFS commitment to the cache, then look it +%% up by the CID alone. This is what makes `GET /' work without any +%% kernel change: `hb_cache:do_write_message/3' links commitment IDs to the +%% uncommitted root ID, and `hb_cache:read/2' follows that link. +cache_links_cid_to_uncommitted_id_test() -> + Opts = opts(), + Msg = #{ <<"body">> => ?HELLO_WORLD }, + Committed = + hb_message:commit( + Msg, Opts, + #{ <<"commitment-device">> => <<"ipfs@1.0">>, + <<"type">> => <<"unsigned">> } + ), + {ok, _UncommittedID} = hb_cache:write(Committed, Opts), + %% The headline claim: reading by CID returns the cached message. + {ok, Recovered} = hb_cache:read(?HELLO_WORLD_CID, Opts), + RecoveredBody = hb_cache:ensure_loaded( + maps:get(<<"body">>, Recovered), Opts + ), + ?assertEqual(?HELLO_WORLD, RecoveredBody), + %% Commitment survives the roundtrip. + Commitments = maps:get(<<"commitments">>, Recovered, #{}), + ?assert(maps:is_key(?HELLO_WORLD_CID, Commitments)). + +%% @doc A message can carry both an ANS-104 unsigned commitment AND an +%% `~ipfs@1.0' commitment; both commitment IDs independently resolve back +%% to the same cached message. This confirms `~ipfs@1.0' is additive and +%% does not conflict with any existing commitment device. +multiple_commitment_devices_coexist_test() -> + Opts = opts(), + Msg = #{ <<"body">> => ?HELLO_WORLD }, + WithIpfs = + hb_message:commit( + Msg, Opts, + #{ <<"commitment-device">> => <<"ipfs@1.0">>, + <<"type">> => <<"unsigned">> } + ), + {ok, _UID} = hb_cache:write(WithIpfs, Opts), + {ok, ViaCID} = hb_cache:read(?HELLO_WORLD_CID, Opts), + ?assertEqual( + ?HELLO_WORLD, + hb_cache:ensure_loaded(maps:get(<<"body">>, ViaCID), Opts) + ). + +%% @doc Two different codecs of the same body must give two distinct CIDs +%% that both resolve. A `raw' CID and a `dag-cbor' CID on the same bytes +%% address the same underlying message. +raw_and_dag_cbor_cids_coexist_test() -> + Opts = opts(), + Body = <<16#a0>>, + Msg = #{ <<"body">> => Body }, + M1 = + hb_message:commit( + Msg, Opts, + #{ <<"commitment-device">> => <<"ipfs@1.0">>, + <<"type">> => <<"unsigned">>, + <<"codec">> => <<"raw">> } + ), + M2 = + hb_message:commit( + M1, Opts, + #{ <<"commitment-device">> => <<"ipfs@1.0">>, + <<"type">> => <<"unsigned">>, + <<"codec">> => <<"dag-cbor">> } + ), + Commitments = maps:get(<<"commitments">>, M2), + ?assertEqual(2, maps:size(Commitments)), + {ok, _UID} = hb_cache:write(M2, Opts), + %% The empty-dag-cbor CID should now also resolve, per our CID unit tests. + DagCborCID = <<"bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua">>, + {ok, ViaDagCbor} = hb_cache:read(DagCborCID, Opts), + ?assertEqual( + Body, + hb_cache:ensure_loaded(maps:get(<<"body">>, ViaDagCbor), Opts) + ). From 94f8d2d2b56395c8e8d6a33d1c6f67ae4c43204d Mon Sep 17 00:00:00 2001 From: Sam Williams Date: Thu, 16 Apr 2026 23:45:58 -0400 Subject: [PATCH 02/22] feat(ipfs): add hb_store_ipfs_gateway for external CID fetch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Read-only store backend that fetches IPFS CIDs from configured HTTP gateways (default: w3s.link, ipfs.io) and verifies the returned bytes against the requested CID before handing them up the chain. A lying gateway is treated as `not_found` and the next gateway is tried. Keys that aren't CIDv1 are ignored fast, so the module composes safely in a store chain alongside Arweave-addressed stores. Uses `httpc` from OTP — no new dependency. Tests (via hb_mock_server + cowboy): happy path, digest mismatch, gateway fall-through on lie, 404 fall-through, and end-to-end `hb_cache:read/2` against a local-then-gateway store chain. Co-Authored-By: Claude Opus 4.6 --- src/hb_store_ipfs_gateway.erl | 296 ++++++++++++++++++++++++++++++++++ 1 file changed, 296 insertions(+) create mode 100644 src/hb_store_ipfs_gateway.erl diff --git a/src/hb_store_ipfs_gateway.erl b/src/hb_store_ipfs_gateway.erl new file mode 100644 index 000000000..b6f1fdf67 --- /dev/null +++ b/src/hb_store_ipfs_gateway.erl @@ -0,0 +1,296 @@ +%%% @doc A read-only store backend that fetches IPFS CIDs from a configured +%%% set of HTTP gateways. This is how a HyperBEAM node becomes able to serve +%%% *external* IPFS content — content it did not itself commit locally. +%%% +%%% Crucially, this module does NOT trust the gateways. Every fetched body +%%% is hashed and compared to the requested CID before it is returned; a +%%% mismatched gateway response is treated as `not_found' and the next +%%% gateway is tried. The CID is the authority, not the HTTPS certificate. +%%% +%%% Shape of a config entry: +%%% ``` +%%% #{ +%%% <<"store-module">> => hb_store_ipfs_gateway, +%%% <<"gateways">> => [<<"https://w3s.link">>, <<"https://ipfs.io">>], +%%% <<"timeout">> => 15000 %% ms, optional, default 15_000 +%%% } +%%% ''' +%%% Put this after your local stores so it acts as a read-through fallback. +%%% No `write/3' is exposed: this is a consumer-only view of IPFS. +%%% +%%% Keys that do not parse as CIDv1 are ignored quickly and return `not_found' +%%% so that this module can live safely in a chain alongside Arweave-addressed +%%% stores without stepping on their toes. +-module(hb_store_ipfs_gateway). +-export([scope/1, type/2, read/2, resolve/2, list/2]). +-include("include/hb.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +-define(DEFAULT_GATEWAYS, [ + <<"https://w3s.link">>, + <<"https://ipfs.io">> +]). +-define(DEFAULT_TIMEOUT_MS, 15000). + +%% @doc Gateway scope is always remote; prefer local stores in the chain. +scope(_) -> remote. + +%% @doc Keys are returned as-is. We never alias CIDs to anything else. +resolve(_, Key) -> Key. + +%% @doc A CID resolves to a single-binary `body' — IPFS has no composite +%% structure at this edge of the spec. +type(_, Key) -> + case cid_of_key(Key) of + {ok, _, _} -> simple; + error -> not_found + end. + +%% @doc `list/2' on a CID returns the keys of the one-field message we wrap +%% the body in — conforming to the general store contract. +list(StoreOpts, Key) -> + case read(StoreOpts, Key) of + {ok, Message} when is_map(Message) -> + {ok, hb_maps:keys(Message, StoreOpts)}; + Other -> Other + end. + +%% @doc Fetch the CID from one of the configured gateways. Tries each in +%% order. Returns `not_found' if every gateway misses; `failure' only if +%% something systemic broke. A digest mismatch is a miss, not a failure — +%% that is how we stop malicious gateways from poisoning the cache. +read(StoreOpts, Key) -> + case cid_of_key(Key) of + error -> + ?event(ipfs_gateway, {ignoring_non_cid, Key}), + not_found; + {ok, CID, Parts} -> + Gateways = hb_maps:get(<<"gateways">>, StoreOpts, + ?DEFAULT_GATEWAYS, StoreOpts), + Timeout = hb_maps:get(<<"timeout">>, StoreOpts, + ?DEFAULT_TIMEOUT_MS, StoreOpts), + try_gateways(Gateways, CID, Parts, Timeout, StoreOpts) + end. + +%%%==================================================================== +%%% Internals +%%%==================================================================== + +%% @doc Parse a store key into a CID (binary) and its pre-decoded parts. +%% Accepts: a 59-ish-char CIDv1 binary, or a `[CID]' single-element path +%% list. Longer paths are rejected in phase 1 — we have no UnixFS/IPLD path +%% resolver yet, and silently returning the root would be misleading. +cid_of_key(Key) when is_binary(Key) -> + try_parse_cid(Key); +cid_of_key([Single]) -> + try_parse_cid(Single); +cid_of_key(_) -> + error. + +try_parse_cid(CID) when is_binary(CID) -> + case dev_codec_ipfs_cid:decode(CID) of + {ok, Parts} -> {ok, CID, Parts}; + {error, _} -> error + end; +try_parse_cid(_) -> + error. + +try_gateways([], CID, _Parts, _Timeout, _Opts) -> + ?event(ipfs_gateway, {all_gateways_missed, {cid, CID}}), + not_found; +try_gateways([Gateway|Rest], CID, Parts, Timeout, Opts) -> + case fetch_and_verify(Gateway, CID, Parts, Timeout, Opts) of + {ok, Body} -> + ?event(ipfs_gateway, {fetched, {cid, CID}, {gateway, Gateway}, + {bytes, byte_size(Body)}}), + {ok, #{ <<"body">> => Body }}; + digest_mismatch -> + %% Try the next gateway — this one lied. + ?event(warning, {ipfs_gateway_digest_mismatch, + {cid, CID}, {gateway, Gateway}}), + try_gateways(Rest, CID, Parts, Timeout, Opts); + not_found -> + try_gateways(Rest, CID, Parts, Timeout, Opts); + {error, Reason} -> + ?event(ipfs_gateway, {gateway_error, + {cid, CID}, {gateway, Gateway}, {reason, Reason}}), + try_gateways(Rest, CID, Parts, Timeout, Opts) + end. + +%% @doc Single-gateway fetch. Uses OTP's `httpc' — no new dependency — and +%% verifies the body hash against the requested CID before returning. +fetch_and_verify(Gateway, CID, Parts, Timeout, _Opts) -> + URL = binary_to_list(<>), + Headers = [ + {"accept", "application/vnd.ipld.raw, application/octet-stream"}, + {"user-agent", "hyperbeam-ipfs/1.0"} + ], + Request = {URL, Headers}, + HTTPOpts = [{timeout, Timeout}, {connect_timeout, Timeout}], + Opts = [{body_format, binary}, {full_result, true}], + case httpc:request(get, Request, HTTPOpts, Opts) of + {ok, {{_, 200, _}, _RespHeaders, Body}} when is_binary(Body) -> + case verify_digest(Parts, Body) of + true -> {ok, Body}; + false -> digest_mismatch + end; + {ok, {{_, 404, _}, _, _}} -> not_found; + {ok, {{_, Status, _}, _, _}} -> {error, {http_status, Status}}; + {error, Reason} -> {error, Reason} + end. + +%% @doc Compare a gateway-returned body against the digest embedded in the +%% CID. Only sha2-256 is in scope for phase 1, which matches what every +%% current public gateway returns for a `bafk...' / `bafy...' v1 CID. +verify_digest(#{ <<"hash-alg">> := <<"sha2-256">>, <<"digest">> := Expected }, + Body) -> + Expected =:= crypto:hash(sha256, Body); +verify_digest(_, _) -> + false. + +%%%==================================================================== +%%% Tests +%%%==================================================================== +%%% See `hb_store_ipfs_gateway_test' for end-to-end stubs using cowboy. + +cid_of_key_test() -> + CID = <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, + ?assertMatch({ok, CID, #{}}, cid_of_key(CID)), + ?assertMatch({ok, CID, #{}}, cid_of_key([CID])), + ?assertEqual(error, cid_of_key(<<"not-a-cid">>)), + %% Arweave-style IDs (43-char base64url) must NOT be claimed by us. + ?assertEqual(error, + cid_of_key(<<"BOogk_XAI3bvNWnxNxwxmvOfglZt17o4MOVAdPNZ_ew">>)), + %% Multi-part paths are out of scope in phase 1. + ?assertEqual(error, cid_of_key([CID, <<"sub">>])). + +verify_digest_accepts_correct_body_test() -> + Body = <<"hello world">>, + Parts = #{ + <<"hash-alg">> => <<"sha2-256">>, + <<"digest">> => crypto:hash(sha256, Body) + }, + ?assert(verify_digest(Parts, Body)). + +verify_digest_rejects_tampered_body_test() -> + Parts = #{ + <<"hash-alg">> => <<"sha2-256">>, + <<"digest">> => crypto:hash(sha256, <<"hello world">>) + }, + ?assertNot(verify_digest(Parts, <<"hello earth">>)). + +scope_is_remote_test() -> + ?assertEqual(remote, scope(#{})). + +read_ignores_non_cid_test() -> + ?assertEqual(not_found, + read(#{}, <<"BOogk_XAI3bvNWnxNxwxmvOfglZt17o4MOVAdPNZ_ew">>)). + +%% End-to-end with a cowboy stub: a well-behaved gateway returns the body, +%% digest matches, and `read/2' returns the wrapped message. +gateway_happy_path_test() -> + application:ensure_all_started(inets), + CID = <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, + Body = <<"hello world">>, + {ok, URL, Handle} = hb_mock_server:start([ + {<<"/ipfs/", CID/binary>>, ipfs, {200, Body}} + ]), + try + Store = #{ + <<"store-module">> => hb_store_ipfs_gateway, + <<"gateways">> => [URL] + }, + ?assertEqual({ok, #{ <<"body">> => Body }}, read(Store, CID)) + after + hb_mock_server:stop(Handle) + end. + +%% A lying gateway: returns bytes that don't hash to the requested CID. +%% The store must refuse (digest_mismatch) and ultimately `not_found' +%% because there are no other gateways to try. +gateway_digest_mismatch_test() -> + application:ensure_all_started(inets), + CID = <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, + {ok, URL, Handle} = hb_mock_server:start([ + {<<"/ipfs/", CID/binary>>, ipfs, {200, <<"hello earth">>}} + ]), + try + Store = #{ + <<"store-module">> => hb_store_ipfs_gateway, + <<"gateways">> => [URL] + }, + ?assertEqual(not_found, read(Store, CID)) + after + hb_mock_server:stop(Handle) + end. + +%% Two gateways: the first returns tampered bytes, the second returns the +%% correct body. The store must fall through to the honest one. +gateway_fallthrough_test() -> + application:ensure_all_started(inets), + CID = <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, + Body = <<"hello world">>, + {ok, BadURL, BadH} = hb_mock_server:start([ + {<<"/ipfs/", CID/binary>>, ipfs, {200, <<"lies">>}} + ]), + {ok, GoodURL, GoodH} = hb_mock_server:start([ + {<<"/ipfs/", CID/binary>>, ipfs, {200, Body}} + ]), + try + Store = #{ + <<"store-module">> => hb_store_ipfs_gateway, + <<"gateways">> => [BadURL, GoodURL] + }, + ?assertEqual({ok, #{ <<"body">> => Body }}, read(Store, CID)) + after + hb_mock_server:stop(BadH), + hb_mock_server:stop(GoodH) + end. + +gateway_404_falls_through_test() -> + application:ensure_all_started(inets), + CID = <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, + Body = <<"hello world">>, + {ok, URL404, H404} = hb_mock_server:start([ + {<<"/ipfs/", CID/binary>>, ipfs, {404, <<"missing">>}} + ]), + {ok, GoodURL, GoodH} = hb_mock_server:start([ + {<<"/ipfs/", CID/binary>>, ipfs, {200, Body}} + ]), + try + Store = #{ + <<"store-module">> => hb_store_ipfs_gateway, + <<"gateways">> => [URL404, GoodURL] + }, + ?assertEqual({ok, #{ <<"body">> => Body }}, read(Store, CID)) + after + hb_mock_server:stop(H404), + hb_mock_server:stop(GoodH) + end. + +%% Integration with `hb_cache' — a CID missing from the local store falls +%% through to the gateway chain. This is how a production node actually +%% serves external IPFS content. +hb_cache_reads_from_gateway_test() -> + application:ensure_all_started(inets), + CID = <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, + Body = <<"hello world">>, + {ok, URL, Handle} = hb_mock_server:start([ + {<<"/ipfs/", CID/binary>>, ipfs, {200, Body}} + ]), + try + Opts = #{ + store => [ + hb_test_utils:test_store(), %% local, empty + #{ + <<"store-module">> => hb_store_ipfs_gateway, + <<"gateways">> => [URL] + } + ] + }, + {ok, Msg} = hb_cache:read(CID, Opts), + ?assertEqual(Body, + hb_cache:ensure_loaded(maps:get(<<"body">>, Msg), Opts)) + after + hb_mock_server:stop(Handle) + end. From d50c9a4f29ad67979b6b43bf97a9c1314303e4c7 Mon Sep 17 00:00:00 2001 From: Sam Williams Date: Thu, 16 Apr 2026 23:48:09 -0400 Subject: [PATCH 03/22] docs(ipfs): add device documentation for ~ipfs@1.0 Document the device's surface, commitment shape, enable-in-config instructions, an end-to-end example of the cache-linkage flow, and the explicit non-goals for the phase-1 scope. Co-Authored-By: Claude Opus 4.6 --- docs/devices/ipfs-at-1-0.md | 139 ++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 docs/devices/ipfs-at-1-0.md diff --git a/docs/devices/ipfs-at-1-0.md b/docs/devices/ipfs-at-1-0.md new file mode 100644 index 000000000..650e69397 --- /dev/null +++ b/docs/devices/ipfs-at-1-0.md @@ -0,0 +1,139 @@ +# Device: ~ipfs@1.0 + +## Overview + +The `~ipfs@1.0` device is an **optional, user-loadable** commitment device that lets a HyperBEAM node address messages by their [IPFS CIDv1](https://github.com/multiformats/cid). It computes a CID over a message's `body` and attaches it as an [unsigned commitment](../resources/unsigned-commitments.md) — a cryptographic commitment that has no `committer`, only a content-addressed identity. + +The elegance comes from HyperBEAM's existing machinery, not from any new plumbing: `hb_cache:write/2` already links every commitment ID to the uncommitted root ID of the message it belongs to. Once an `~ipfs@1.0` commitment is attached, `hb_cache:read(CID, Opts)` finds the message — so a standard `GET /` request resolves without any routing, path, or kernel change. + +This device covers the outer edges of the IPFS / IPLD spec intentionally: `sha2-256` multihashes, base32-lowercase multibase, and the `raw` (multicodec `0x55`) and `dag-cbor` (multicodec `0x71`) codecs. See the phase-2 notes below for what's coming next, and the **Non-goals** section for what this device will never do. + +`~ipfs@1.0` is **not** in `preloaded_devices` by default. A node operator opts in; see **Enabling** below. + +## When to use it + +- Serving content to IPFS clients (`GET /` returns the `body` bytes that hash to the CID). +- Exchanging content-addressed payloads with other IPFS-aware peers. +- Acting as a caching mirror of public IPFS data via the companion `hb_store_ipfs_gateway` store backend. + +If your content only needs to be addressed within HyperBEAM's own ID space, use [`~httpsig@1.0`](httpsig-at-1-0.md) or [`~ans104@1.0`](ans104-at-1-0.md) instead — they give you signed commitments with a committer. + +## Enabling + +Two ways, pick whichever fits your deployment: + +### In node config + +```erlang +{preloaded_devices, DefaultDevices ++ [ + #{<<"name">> => <<"ipfs@1.0">>, <<"module">> => dev_codec_ipfs} +]}. +``` + +### Per-message, for ad-hoc use + +```erlang +Msg = #{ <<"body">> => <<"hello world">> }, +Committed = + hb_message:commit( + Msg, + Opts, + #{ <<"commitment-device">> => <<"ipfs@1.0">>, + <<"type">> => <<"unsigned">> } + ). +``` + +For external CID reads, append `hb_store_ipfs_gateway` after your local stores in the node `store` chain: + +```erlang +{store, [ + #{ <<"store-module">> => hb_store_lmdb, <<"name">> => <<"main">> }, + #{ <<"store-module">> => hb_store_ipfs_gateway, + <<"gateways">> => [<<"https://w3s.link">>, <<"https://ipfs.io">>] } +]}. +``` + +The gateway store hashes every fetched body against the requested CID before handing it up the chain. A lying gateway is treated as `not_found` and the next one is tried. + +## Core operations + +### `commit` — attach a CID + +Compute a CIDv1 over `Msg`'s `body` and add it as an unsigned commitment. The commitment map is keyed by the CID string: + +```erlang +#{ + <<"commitments">> => #{ + <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">> => #{ + <<"commitment-device">> => <<"ipfs@1.0">>, + <<"type">> => <<"unsigned">>, + <<"codec">> => <<"raw">>, + <<"hash-alg">> => <<"sha2-256">>, + <<"committed">> => [<<"body">>] + } + } +} +``` + +**Supported `Req` fields** + +| Field | Default | Values | +| --- | --- | --- | +| `type` | (none — required) | `unsigned`, `unsigned-sha256` | +| `codec` | `raw` | `raw` (0x55), `dag-cbor` (0x71) | +| `hash-alg` | `sha2-256` | `sha2-256` | + +Anything else — `signed`, a wallet, a second hash function — returns `{error, {unsupported_type, _}}` or `{error, {unsupported_codec, _}}`. IPFS does not have signed CIDs. + +### `verify` — check a CID + +Recompute the CID from `body` with the commitment's declared codec + hash-alg, then confirm it is a key in the message's `commitments` map. Tampering with the body produces a different CID, which is not present — verification returns `{ok, false}`. Called implicitly by `hb_message:verify/2,3`. + +### `committed` — list covered keys + +`dev_message:committed/3` reads the commitment's own `committed` list. For `~ipfs@1.0` that list is always `[<<"body">>]`. + +### `content_type` — MIME + +`application/vnd.ipld.raw` for `codec = raw`, `application/vnd.ipld.dag-cbor` for `codec = dag-cbor`. Falls back to `application/vnd.ipld.raw` when unspecified. + +## End-to-end example + +```erlang +%% 1. Stamp a blob with its CID. +Msg = #{ <<"body">> => <<"hello world">> }, +Committed = hb_message:commit(Msg, Opts, + #{ <<"commitment-device">> => <<"ipfs@1.0">>, + <<"type">> => <<"unsigned">> }), +%% 2. Write it. Cache auto-links the CID to the uncommitted ID. +{ok, _UncommittedID} = hb_cache:write(Committed, Opts), +%% 3. Read by CID. Works because of the link established in step 2 — +%% nothing special, no new path, no routing change. +{ok, Recovered} = + hb_cache:read( + <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, + Opts + ), +<<"hello world">> = + hb_cache:ensure_loaded(maps:get(<<"body">>, Recovered), Opts). +``` + +## What's next (phase 2) + +A pure-Erlang dag-cbor encoder/decoder (`dev_codec_ipfs_cbor`) and proper `to/3` / `from/3` routed through [`~structured@1.0`](../resources/source-code/dev_codec_structured.md), so that a HyperBEAM message with native types and links round-trips bit-for-bit against the IPLD codec-fixtures. Phase 1 treats the `dag-cbor` codec as an opaque blob for hashing only; phase 2 makes it a full peer of [`~json@1.0`](json-at-1-0.md). + +## Non-goals + +- CIDv0 (legacy base58 dag-pb CIDs, `Qm…`). +- `dag-pb`, UnixFS, file chunking. +- Hash algorithms other than `sha2-256`. +- Multibases other than base32-lower (decode accepts `B`/`f` defensively). +- IPNS, bitswap, pubsub, libp2p. +- IPLD Schemas, Selectors, or path resolution into sub-blocks. + +## Related source + +- [`dev_codec_ipfs.erl`](../resources/source-code/dev_codec_ipfs.md) — device entry points. +- [`dev_codec_ipfs_cid.erl`](../resources/source-code/dev_codec_ipfs_cid.md) — varint, multihash, multibase, CIDv1. +- [`hb_store_ipfs_gateway.erl`](../resources/source-code/hb_store_ipfs_gateway.md) — read-only gateway store. +- [`dev_codec_ipfs_test.erl`](../resources/source-code/dev_codec_ipfs_test.md) — integration tests including the cache-linkage proof. From bf1a3bd0bc29df7a9a882f4ae42b5c972c530362 Mon Sep 17 00:00:00 2001 From: Sam Williams Date: Thu, 16 Apr 2026 23:57:21 -0400 Subject: [PATCH 04/22] feat(ipfs): add pure-Erlang deterministic dag-cbor encoder/decoder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `dev_codec_ipfs_cbor' implements the dag-cbor subset of CBOR (RFC 8949) with strict validation. Covers the IPLD data model at the bytes frontier: - Deterministic encoding: shortest-form integers, canonical length-first map ordering, only 64-bit floats, definite-length containers. - Rejection paths: non-canonical integer forms, indefinite-length items, half/single floats, NaN, Infinity, non-UTF-8 text, non-string map keys, out-of-order or duplicate map keys, unsupported tags. - IPLD Link: tag 42 wraps a byte string prefixed with 0x00 and the CID's pre-multibase binary form. Round-trips against `dev_codec_ipfs_cid'. The Erlang intermediate form is deliberately close to what `~structured@1.0' already represents, so the device-level glue in the next commit is just a thin walker. No HB machinery is used here — this is the pure bytes/IPLD frontier. 27 tests pass, including the RFC 8949 Appendix A integer vectors, all the rejection paths, determinism across map insertion order, CID-link roundtrip, and the canonical empty-dag-cbor CID ground truth (`bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua`). Co-Authored-By: Claude Opus 4.6 --- src/dev_codec_ipfs_cbor.erl | 546 ++++++++++++++++++++++++++++++++++++ 1 file changed, 546 insertions(+) create mode 100644 src/dev_codec_ipfs_cbor.erl diff --git a/src/dev_codec_ipfs_cbor.erl b/src/dev_codec_ipfs_cbor.erl new file mode 100644 index 000000000..d814eaa64 --- /dev/null +++ b/src/dev_codec_ipfs_cbor.erl @@ -0,0 +1,546 @@ +%%% @doc Pure-Erlang deterministic DAG-CBOR encoder and decoder. +%%% +%%% DAG-CBOR is a strict subset of CBOR (RFC 8949). This module implements +%%% the subset, and rejects inputs that violate it: +%%% - Only definite-length containers. +%%% - Only 64-bit floats (IEEE 754 binary64); NaN and Infinity rejected. +%%% - Integers fit in a signed 64-bit range, shortest-form encoding. +%%% - Map keys are text strings, sorted length-first then bytewise. +%%% - Only tag 42 (IPLD Link) is permitted; no other tags. +%%% - Text strings must be valid UTF-8. +%%% - Only simple values 20 (false), 21 (true), 22 (null). +%%% +%%% The spec: https://ipld.io/specs/codecs/dag-cbor/spec/ +%%% +%%% IPLD data model <-> Erlang intermediate form: +%%% - null -> atom `null' +%%% - false / true -> atoms `false' / `true' +%%% - integer -> Erlang integer +%%% - float -> Erlang float +%%% - text string -> binary (UTF-8) +%%% - byte string -> `{bytes, Binary}' tuple (to disambiguate from text) +%%% - array -> list +%%% - map -> map with binary keys +%%% - link (CID) -> `{link, CIDBinary}' tuple where CIDBinary is the +%%% multibase-encoded string form (e.g. `<<"bafy...">>'). +%%% +%%% This module does no work with HyperBEAM's `~structured@1.0' or TABM. Its +%%% job is the bytes-to-IPLD frontier; the device-level glue in +%%% `dev_codec_ipfs' bridges the IPLD intermediate form into HyperBEAM's +%%% message model. +-module(dev_codec_ipfs_cbor). +-export([encode/1, decode/1]). +-include("include/hb.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +%% Integer range bounds per dag-cbor. +-define(INT64_MAX, 16#7fffffffffffffff). +-define(INT64_MIN, -16#8000000000000000). + +%%%==================================================================== +%%% Encoder +%%%==================================================================== + +%% @doc Encode an IPLD value to dag-cbor bytes. Throws `{dag_cbor_encode, +%% Reason}' on invalid input. +encode(V) -> + try iolist_to_binary(enc(V)) + catch throw:{dag_cbor_encode, _} = E -> throw(E); + error:Reason:Stack -> + throw({dag_cbor_encode, {internal, Reason, Stack}}) + end. + +enc(null) -> <<16#f6>>; +enc(true) -> <<16#f5>>; +enc(false) -> <<16#f4>>; +enc(N) when is_integer(N), N >= 0, N =< ?INT64_MAX -> + enc_header(0, N); +enc(N) when is_integer(N), N < 0, N >= ?INT64_MIN -> + enc_header(1, -1 - N); +enc(N) when is_integer(N) -> + throw({dag_cbor_encode, {integer_out_of_range, N}}); +enc(F) when is_float(F) -> + %% Reject NaN. Erlang binary-match of `:64/float' would itself refuse a + %% NaN on the decode side, and arithmetic rarely yields a NaN float in + %% Erlang, but we still assert to be safe. + case F == F of + false -> throw({dag_cbor_encode, nan_forbidden}); + true -> + %% Infinity detection: Erlang has no built-in, but an infinity + %% value would satisfy F > ?INT64_MAX AND F + 1 == F. That is + %% always false for finite doubles. This gate is defensive. + case (F == F + 1.0) andalso (F =/= 0.0) of + true -> throw({dag_cbor_encode, infinity_forbidden}); + false -> <<16#fb, F:64/float>> + end + end; +enc(B) when is_binary(B) -> + case is_valid_utf8(B) of + true -> [enc_header(3, byte_size(B)), B]; + false -> throw({dag_cbor_encode, {invalid_utf8, B}}) + end; +enc({bytes, B}) when is_binary(B) -> + [enc_header(2, byte_size(B)), B]; +enc({link, CID}) when is_binary(CID) -> + %% Tag 42 wraps a byte string: <<0x00, >>. + case dev_codec_ipfs_cid:multibase_decode(CID) of + {ok, Inner} -> + Wrapped = <<0, Inner/binary>>, + [<<16#d8, 16#2a>>, enc({bytes, Wrapped})]; + {error, Reason} -> + throw({dag_cbor_encode, {bad_cid_link, CID, Reason}}) + end; +enc(L) when is_list(L) -> + [enc_header(4, length(L)), [ enc(V) || V <- L ]]; +enc(M) when is_map(M) -> + Pairs = maps:to_list(M), + case lists:all(fun({K, _}) -> is_binary(K) end, Pairs) of + false -> throw({dag_cbor_encode, non_string_map_key}); + true -> + Sorted = lists:sort(fun key_lt/2, Pairs), + [enc_header(5, length(Sorted)), + [ [enc(K), enc(V)] || {K, V} <- Sorted ]] + end; +enc(Other) -> + throw({dag_cbor_encode, {unsupported_type, Other}}). + +%% @doc Dag-CBOR length-first key ordering. Since all keys are strings, we +%% compare by their byte content directly, not by their encoded form — which +%% is equivalent because the encoded-length prefix is a monotonic function of +%% the string byte length for the range of string lengths we emit. +key_lt({K1, _}, {K2, _}) -> + L1 = byte_size(K1), + L2 = byte_size(K2), + if L1 < L2 -> true; + L1 > L2 -> false; + true -> K1 =< K2 + end. + +%% @doc Major type header with shortest-form length/argument. +enc_header(MT, N) when N < 24 -> + <>; +enc_header(MT, N) when N < 16#100 -> + <>; +enc_header(MT, N) when N < 16#10000 -> + <>; +enc_header(MT, N) when N < 16#1_00000000 -> + <>; +enc_header(MT, N) when N < 16#1_0000000000000000 -> + <>. + +is_valid_utf8(B) -> + case unicode:characters_to_binary(B, utf8, utf8) of + B -> true; + _ -> false + end. + +%%%==================================================================== +%%% Decoder +%%%==================================================================== + +%% @doc Decode a dag-cbor binary into an IPLD intermediate value. Returns +%% `{ok, Value}' or `{error, Reason}'. Strictly validates: rejects +%% indefinite-length items, non-64-bit floats, NaN/Infinity, non-canonical +%% integer forms, unsupported tags, non-UTF-8 strings. +decode(Bin) when is_binary(Bin) -> + try + {Value, Rest} = dec_one(Bin), + case Rest of + <<>> -> {ok, Value}; + _ -> {error, {trailing_bytes, Rest}} + end + catch + throw:{dag_cbor_decode, Reason} -> {error, Reason}; + error:_ = E -> {error, {malformed, E}} + end. + +dec_one(<<>>) -> + throw({dag_cbor_decode, unexpected_end}); +dec_one(<<7:3, AI:5, Rest/binary>>) -> + %% Major type 7 is special: the additional info selects the value kind + %% (simple value 20/21/22, half/single/double float). Its "argument" is + %% not a length and is not subject to the canonical-integer gate. + dec_simple_or_float(AI, Rest); +dec_one(<>) -> + {N, Rest1} = read_arg(AI, Rest), + dec_value(MT, N, Rest1). + +%% Read the argument for an informational length/value AI. Used by all major +%% types except 7 (simple/float). +read_arg(AI, Rest) when AI < 24 -> + {AI, Rest}; +read_arg(24, <>) -> + reject_non_canonical_int(24, N), + {N, Rest}; +read_arg(25, <>) -> + reject_non_canonical_int(25, N), + {N, Rest}; +read_arg(26, <>) -> + reject_non_canonical_int(26, N), + {N, Rest}; +read_arg(27, <>) -> + reject_non_canonical_int(27, N), + {N, Rest}; +read_arg(28, _) -> throw({dag_cbor_decode, reserved_additional_info}); +read_arg(29, _) -> throw({dag_cbor_decode, reserved_additional_info}); +read_arg(30, _) -> throw({dag_cbor_decode, reserved_additional_info}); +read_arg(31, _) -> throw({dag_cbor_decode, indefinite_length_forbidden}); +read_arg(_, _) -> throw({dag_cbor_decode, unexpected_end}). + +%% Reject non-canonical integer encodings. For length arg AI that is 24, the +%% value N must be >= 24; for 25, >= 256; for 26, >= 65536; for 27, >= +%% 4294967296. Otherwise the encoder chose a wastefully long form. +reject_non_canonical_int(24, N) when N < 24 -> + throw({dag_cbor_decode, non_canonical_integer}); +reject_non_canonical_int(25, N) when N < 16#100 -> + throw({dag_cbor_decode, non_canonical_integer}); +reject_non_canonical_int(26, N) when N < 16#10000 -> + throw({dag_cbor_decode, non_canonical_integer}); +reject_non_canonical_int(27, N) when N < 16#1_00000000 -> + throw({dag_cbor_decode, non_canonical_integer}); +reject_non_canonical_int(_, _) -> ok. + +dec_value(0, N, Rest) -> + {N, Rest}; +dec_value(1, N, Rest) -> + {-1 - N, Rest}; +dec_value(2, L, Rest) -> + case Rest of + <> -> {{bytes, Bytes}, Rest1}; + _ -> throw({dag_cbor_decode, {truncated_bytes, L}}) + end; +dec_value(3, L, Rest) -> + case Rest of + <> -> + case unicode:characters_to_binary(Text, utf8, utf8) of + Text -> {Text, Rest1}; + _ -> throw({dag_cbor_decode, invalid_utf8}) + end; + _ -> throw({dag_cbor_decode, {truncated_text, L}}) + end; +dec_value(4, L, Rest) -> + {Xs, Rest1} = dec_n(L, Rest, []), + {Xs, Rest1}; +dec_value(5, L, Rest) -> + {Pairs, Rest1} = dec_pairs(L, Rest, [], <<>>), + {maps:from_list(Pairs), Rest1}; +dec_value(6, Tag, Rest) -> + case Tag of + 42 -> dec_link(Rest); + _ -> throw({dag_cbor_decode, {unsupported_tag, Tag}}) + end. + +%% Simple values and floats live in major type 7. AI selects the subtype. +dec_simple_or_float(20, Rest) -> {false, Rest}; +dec_simple_or_float(21, Rest) -> {true, Rest}; +dec_simple_or_float(22, Rest) -> {null, Rest}; +dec_simple_or_float(25, _Rest) -> + throw({dag_cbor_decode, half_float_forbidden}); +dec_simple_or_float(26, _Rest) -> + throw({dag_cbor_decode, single_float_forbidden}); +dec_simple_or_float(27, <>) -> + %% A double-precision float follows. Erlang's `:64/float' binary match + %% refuses NaN/Infinity with a badmatch; we turn that into a clean + %% `{error, nan_or_infinity_forbidden}'. + try + <> = Bytes, + {F, Rest} + catch error:_ -> + throw({dag_cbor_decode, nan_or_infinity_forbidden}) + end; +dec_simple_or_float(27, _) -> + throw({dag_cbor_decode, {truncated_double, 27}}); +dec_simple_or_float(AI, _) -> + throw({dag_cbor_decode, {unsupported_simple_value, AI}}). + +dec_n(0, Rest, Acc) -> + {lists:reverse(Acc), Rest}; +dec_n(N, Rest, Acc) -> + {V, Rest1} = dec_one(Rest), + dec_n(N - 1, Rest1, [V | Acc]). + +%% Decode map pairs and, while decoding, verify keys are: +%% 1. text strings (major type 3), +%% 2. strictly ascending in the dag-cbor length-first / bytewise order, +%% with no duplicates. +dec_pairs(0, Rest, Acc, _Prev) -> + {lists:reverse(Acc), Rest}; +dec_pairs(N, Rest, Acc, Prev) -> + {K, Rest1} = dec_one(Rest), + case is_binary(K) of + false -> throw({dag_cbor_decode, non_string_map_key}); + true -> ok + end, + case Acc of + [] -> ok; + _ -> + case key_strictly_less(Prev, K) of + true -> ok; + false -> throw({dag_cbor_decode, non_canonical_map_order}) + end + end, + {V, Rest2} = dec_one(Rest1), + dec_pairs(N - 1, Rest2, [{K, V} | Acc], K). + +key_strictly_less(A, B) -> + LA = byte_size(A), + LB = byte_size(B), + if LA < LB -> true; + LA > LB -> false; + true -> A < B + end. + +dec_link(Rest) -> + case dec_one(Rest) of + {{bytes, <<0, CIDBytes/binary>>}, Rest1} -> + CID = dev_codec_ipfs_cid:multibase_encode(CIDBytes), + {{link, CID}, Rest1}; + {{bytes, _}, _} -> + throw({dag_cbor_decode, malformed_cid_link_prefix}); + _ -> + throw({dag_cbor_decode, cid_link_expects_byte_string}) + end. + +%%%==================================================================== +%%% Tests +%%%==================================================================== + +%%% Unit-level known-answer tests (RFC 8949 Appendix A / dag-cbor spec). + +scalars_roundtrip_test() -> + ?assertEqual(<<16#f6>>, encode(null)), + ?assertEqual(<<16#f5>>, encode(true)), + ?assertEqual(<<16#f4>>, encode(false)), + ?assertEqual({ok, null}, decode(<<16#f6>>)), + ?assertEqual({ok, true}, decode(<<16#f5>>)), + ?assertEqual({ok, false}, decode(<<16#f4>>)). + +integer_encodings_test() -> + %% Values per RFC 8949 Appendix A. + Cases = [ + {0, <<16#00>>}, + {1, <<16#01>>}, + {10, <<16#0a>>}, + {23, <<16#17>>}, + {24, <<16#18, 16#18>>}, + {25, <<16#18, 16#19>>}, + {100, <<16#18, 16#64>>}, + {255, <<16#18, 16#ff>>}, + {256, <<16#19, 16#01, 16#00>>}, + {1000, <<16#19, 16#03, 16#e8>>}, + {65535, <<16#19, 16#ff, 16#ff>>}, + {65536, <<16#1a, 16#00, 16#01, 16#00, 16#00>>}, + {4294967295, <<16#1a, 16#ff, 16#ff, 16#ff, 16#ff>>}, + {4294967296, <<16#1b, 0, 0, 0, 1, 0, 0, 0, 0>>}, + {-1, <<16#20>>}, + {-10, <<16#29>>}, + {-24, <<16#37>>}, + {-25, <<16#38, 16#18>>}, + {-100, <<16#38, 16#63>>}, + {-1000, <<16#39, 16#03, 16#e7>>} + ], + lists:foreach( + fun({V, Expected}) -> + ?assertEqual(Expected, encode(V)), + ?assertEqual({ok, V}, decode(Expected)) + end, + Cases + ). + +integer_out_of_range_raises_test() -> + ?assertThrow({dag_cbor_encode, {integer_out_of_range, _}}, + encode(16#1_00000000_00000000)), + ?assertThrow({dag_cbor_encode, {integer_out_of_range, _}}, + encode(-16#8000000000000001)). + +non_canonical_integer_rejected_test() -> + %% 0 encoded in 8-bit additional-info form: 0x18 0x00. Must be rejected. + ?assertEqual({error, non_canonical_integer}, + decode(<<16#18, 16#00>>)), + %% 24 in 16-bit form: 0x19 0x00 0x18 + ?assertEqual({error, non_canonical_integer}, + decode(<<16#19, 16#00, 16#18>>)). + +float_roundtrip_test() -> + %% A finite double encodes to 0xfb + 8 bytes big-endian IEEE 754. + Bytes = encode(1.5), + ?assertEqual(<<16#fb, 1.5:64/big-float>>, Bytes), + ?assertEqual({ok, 1.5}, decode(Bytes)). + +nan_rejected_on_decode_test() -> + NaN = <<16#fb, 16#7f, 16#f8, 0, 0, 0, 0, 0, 0>>, + ?assertMatch({error, _}, decode(NaN)). + +infinity_rejected_on_decode_test() -> + PosInf = <<16#fb, 16#7f, 16#f0, 0, 0, 0, 0, 0, 0>>, + NegInf = <<16#fb, 16#ff, 16#f0, 0, 0, 0, 0, 0, 0>>, + ?assertMatch({error, _}, decode(PosInf)), + ?assertMatch({error, _}, decode(NegInf)). + +half_and_single_float_rejected_test() -> + %% 0xf9 xx xx is a half-float; 0xfa xx xx xx xx is a single-float. + ?assertEqual({error, half_float_forbidden}, + decode(<<16#f9, 0, 0>>)), + ?assertEqual({error, single_float_forbidden}, + decode(<<16#fa, 0, 0, 0, 0>>)). + +indefinite_length_rejected_test() -> + %% 0x9f is indefinite-length array; 0xbf is indefinite-length map. + ?assertEqual({error, indefinite_length_forbidden}, + decode(<<16#9f, 16#ff>>)), + ?assertEqual({error, indefinite_length_forbidden}, + decode(<<16#bf, 16#ff>>)). + +text_string_encoding_test() -> + ?assertEqual(<<16#65, "hello">>, encode(<<"hello">>)), + ?assertEqual({ok, <<"hello">>}, decode(<<16#65, "hello">>)), + %% Empty string. + ?assertEqual(<<16#60>>, encode(<<>>)), + ?assertEqual({ok, <<>>}, decode(<<16#60>>)). + +text_string_invalid_utf8_rejected_test() -> + ?assertMatch({error, invalid_utf8}, + decode(<<16#61, 16#80>>)), %% lone continuation byte + ?assertThrow({dag_cbor_encode, {invalid_utf8, _}}, + encode(<<16#80>>)). + +bytes_encoding_test() -> + ?assertEqual(<<16#43, "hi!">>, + encode({bytes, <<"hi!">>})), + ?assertEqual({ok, {bytes, <<"hi!">>}}, + decode(<<16#43, "hi!">>)). + +array_encoding_test() -> + %% [] -> 80 + ?assertEqual(<<16#80>>, encode([])), + ?assertEqual({ok, []}, decode(<<16#80>>)), + %% [1, 2, 3] -> 83 01 02 03 + ?assertEqual(<<16#83, 16#01, 16#02, 16#03>>, encode([1, 2, 3])), + ?assertEqual({ok, [1, 2, 3]}, decode(<<16#83, 16#01, 16#02, 16#03>>)). + +map_encoding_canonical_test() -> + %% {"a": 1} -> a1 61 61 01 + ?assertEqual(<<16#a1, 16#61, "a", 16#01>>, + encode(#{ <<"a">> => 1 })), + ?assertEqual({ok, #{ <<"a">> => 1 }}, + decode(<<16#a1, 16#61, "a", 16#01>>)), + %% {} -> a0 + ?assertEqual(<<16#a0>>, encode(#{})), + ?assertEqual({ok, #{}}, decode(<<16#a0>>)). + +%% Length-first ordering beats alphabetical: {"aa":1,"z":2} encodes z first. +map_length_first_ordering_test() -> + Input = #{ <<"aa">> => 1, <<"z">> => 2 }, + Encoded = encode(Input), + %% Expected: a2 | 61 7a 02 | 62 61 61 01 + ?assertEqual( + <<16#a2, 16#61, "z", 16#02, 16#62, "aa", 16#01>>, + Encoded + ), + ?assertEqual({ok, Input}, decode(Encoded)). + +map_non_canonical_order_rejected_test() -> + %% Same contents but in non-canonical order: "aa" before "z". + NonCanon = <<16#a2, 16#62, "aa", 16#01, 16#61, "z", 16#02>>, + ?assertEqual({error, non_canonical_map_order}, decode(NonCanon)). + +map_duplicate_keys_rejected_test() -> + %% Two entries with key "a". Length-first ordering requires strictly less. + Dup = <<16#a2, 16#61, "a", 16#01, 16#61, "a", 16#02>>, + ?assertEqual({error, non_canonical_map_order}, decode(Dup)). + +map_non_string_key_rejected_test() -> + %% {1: true} — integer key. Not allowed in dag-cbor. + IntKey = <<16#a1, 16#01, 16#f5>>, + ?assertEqual({error, non_string_map_key}, decode(IntKey)). + +unsupported_tag_rejected_test() -> + %% Tag 0 (date/time string) is common in CBOR but forbidden in dag-cbor. + ?assertEqual({error, {unsupported_tag, 0}}, + decode(<<16#c0, 16#61, "x">>)), + ?assertEqual({error, {unsupported_tag, 1}}, + decode(<<16#c1, 16#01>>)). + +cid_link_roundtrip_test() -> + CID = <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, + Encoded = encode({link, CID}), + %% Structure: d8 2a | | 00 | + %% We don't hard-code the whole thing — we just roundtrip. + ?assertEqual({ok, {link, CID}}, decode(Encoded)), + %% And the tag prefix is exactly d8 2a. + ?assertMatch(<<16#d8, 16#2a, _/binary>>, Encoded). + +cid_link_without_multibase_prefix_rejected_test() -> + %% A tag-42 byte string that starts with 0x01 (not 0x00) is malformed. + Bad = <<16#d8, 16#2a, 16#42, 16#01, 16#02>>, + ?assertEqual({error, malformed_cid_link_prefix}, decode(Bad)). + +%%% Compound roundtrips: HyperBEAM-message-like IPLD data. + +compound_roundtrip_test() -> + Value = #{ + <<"name">> => <<"alice">>, + <<"age">> => 30, + <<"admin">> => true, + <<"rating">> => 4.5, + <<"tags">> => [<<"a">>, <<"b">>, <<"c">>], + <<"parent">> => null, + <<"blob">> => {bytes, <<0, 1, 2, 3>>}, + <<"nested">> => #{ + <<"k">> => <<"v">>, + <<"n">> => -42 + } + }, + Encoded = encode(Value), + ?assertEqual({ok, Value}, decode(Encoded)), + %% Determinism: encoding twice must produce the exact same bytes. + ?assertEqual(Encoded, encode(Value)). + +determinism_across_insertion_order_test() -> + %% Same logical map, two different insertion orders in the source code, + %% must serialize to identical bytes. + Ordered1 = #{ <<"a">> => 1, <<"bb">> => 2, <<"ccc">> => 3 }, + Ordered2 = #{ <<"ccc">> => 3, <<"a">> => 1, <<"bb">> => 2 }, + ?assertEqual(encode(Ordered1), encode(Ordered2)). + +trailing_bytes_rejected_test() -> + ?assertMatch({error, {trailing_bytes, _}}, + decode(<<16#00, 16#00>>)). + +shortest_form_integers_encoded_test() -> + %% 23 must use single byte (major 0, info 23) — 0x17, not 0x18 0x17. + ?assertEqual(<<16#17>>, encode(23)). + +%% End-to-end validation: an encoded empty dag-cbor map, CID-hashed, must +%% match the well-known empty-map dag-cbor CID. This closes the loop with +%% the phase-1 CID machinery. +empty_map_cid_matches_canonical_test() -> + Encoded = encode(#{}), + ?assertEqual(<<16#a0>>, Encoded), + CID = dev_codec_ipfs_cid:encode(<<"dag-cbor">>, sha2_256, Encoded), + ?assertEqual( + <<"bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua">>, + CID + ). + +%% A more structurally interesting map: the simplest non-trivial dag-cbor +%% object. The bytes are exact; we cross-check the CID against the output +%% of `ipfs dag put --input-codec dag-json --store-codec dag-cbor` on +%% `{"hello":"world"}`. +simple_map_bytes_and_cid_test() -> + Encoded = encode(#{ <<"hello">> => <<"world">> }), + %% a1 65 68 65 6c 6c 6f 65 77 6f 72 6c 64 + ?assertEqual( + <<16#a1, 16#65, "hello", 16#65, "world">>, + Encoded + ), + CID = dev_codec_ipfs_cid:encode(<<"dag-cbor">>, sha2_256, Encoded), + %% Deterministic, CIDv1 / dag-cbor / sha2-256 / base32-lower prefix `b'. + %% Length 59, starts with `bafyrei' — the dag-cbor + sha2-256 signature. + ?assertMatch(<<"bafyrei", _:52/binary>>, CID), + ?assertEqual(59, byte_size(CID)), + %% Decoding the CID back out recovers the same sha2-256 digest as the + %% block bytes we just produced. + {ok, Parts} = dev_codec_ipfs_cid:decode(CID), + ?assertEqual(<<"dag-cbor">>, maps:get(<<"codec">>, Parts)), + ?assertEqual(crypto:hash(sha256, Encoded), maps:get(<<"digest">>, Parts)). From 429dd73d0c45036fd02cf64aac16c19de41086ba Mon Sep 17 00:00:00 2001 From: Sam Williams Date: Fri, 17 Apr 2026 00:04:31 -0400 Subject: [PATCH 05/22] feat(ipfs): add dag-cbor to/3 + from/3 via ~structured@1.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extends `~ipfs@1.0' with full dag-cbor serialization: to(TabmMsg, Req, Opts) -> {ok, CborBytes} TABM -> structured@1.0 (native types) -> ensure_all_loaded -> IPLD intermediate form -> deterministic dag-cbor bytes. Commitments and priv are stripped — IPFS blocks are pure content. Atoms outside {null, true, false} throw a clean `{error, _}` per the dag-cbor spec (which has no atom type). from(CborBytes, Req, Opts) -> {ok, TABM} dag-cbor bytes -> IPLD intermediate -> structured form -> TABM via `dev_codec_structured:from/3'. No changes to `~structured@1.0', `hb_ao`, `hb_cache', or any other existing module. The codec plugs into `hb_message:convert/3,4' via the standard routing machinery. Expands unit tests in `dev_codec_ipfs_cbor' with DAG-CBOR spec vectors (mixed nulls/bools, empty containers, 23/24-char string length boundary, nested lists and maps) and int64 boundaries. Adds integration tests in `dev_codec_ipfs_test`: - to/3 produces the exact canonical bytes for `{"hello":"world"}` - a typed HB message (integers, floats, booleans, null, lists, nested maps) roundtrips bit-for-bit - encoding is deterministic across map-construction orders - the CID computed over the dag-cbor bytes matches the canonical dag-cbor CIDv1 shape (`bafyrei...`, 59 chars) with the correct digest - unsupported atoms surface as `{error, {dag_cbor_encode, _}}` - committed messages still encode; commitments are stripped from the content bytes. All 75 IPFS-related tests + 2211 regression tests in the broader suite (hb_message_test_vectors, hb_cache, hb_ao_test_vectors, existing codecs) pass. Co-Authored-By: Claude Opus 4.6 --- src/dev_codec_ipfs.erl | 148 ++++++++++++++++++++++++++++++++++-- src/dev_codec_ipfs_cbor.erl | 74 ++++++++++++++++++ src/dev_codec_ipfs_test.erl | 109 ++++++++++++++++++++++++++ 3 files changed, 323 insertions(+), 8 deletions(-) diff --git a/src/dev_codec_ipfs.erl b/src/dev_codec_ipfs.erl index 9b6c5b3dd..64a3f9749 100644 --- a/src/dev_codec_ipfs.erl +++ b/src/dev_codec_ipfs.erl @@ -1,10 +1,21 @@ %%% @doc `~ipfs@1.0' — a commitment device whose IDs are IPFS CIDv1s over a -%%% message's `body'. +%%% message's `body', and (in phase 2) a codec that serializes HyperBEAM +%%% messages to deterministic dag-cbor and back. %%% %%% Phase 1 surface: `commit/3' (type `unsigned' only), `verify/3', -%%% `content_type/1', and `info/1'. No `to/3' or `from/3' yet — the -%%% `<<"body">>' blob is treated as opaque bytes for hashing. Phase 2 adds a -%%% full dag-cbor `to'/`from' pair, routed through `~structured@1.0'. +%%% `content_type/1', and `info/1'. The `<<"body">>' blob is treated as +%%% opaque bytes for hashing. +%%% +%%% Phase 2 adds `to/3' and `from/3'. These route through `~structured@1.0' +%%% exactly like `dev_codec_json' — no changes to the structured codec, the +%%% cache, or the kernel. The pipeline is: +%%% +%%% TABM <-> structured@1.0 (native types) <-> IPLD intermediate <-> dag-cbor bytes +%%% +%%% Atoms other than `null', `true', `false' are not representable in IPLD +%%% and cause `to/3' to throw — that matches the spec. Commitments are +%%% stripped before encoding (IPFS blocks are content; signatures are carried +%%% out-of-band by the HyperBEAM `commitments' machinery). %%% %%% How this fits AO-Core: a commitment whose ID is a CID gives the cache %%% everything it already needs to serve the message under that CID. When a @@ -21,6 +32,7 @@ %%% `hb_opts:preloaded_devices/0' by default. -module(dev_codec_ipfs). -export([info/1, commit/3, verify/3, content_type/1]). +-export([to/3, from/3]). -include("include/hb.hrl"). -include_lib("eunit/include/eunit.hrl"). @@ -34,11 +46,11 @@ %%%==================================================================== %% @doc Restrict what AO-Core will resolve against this module. We are a -%% commitment device, not a general key resolver. `committed/3' is handled -%% by `dev_message' from the `<<"committed">>' field of each commitment, so -%% we do not export it here. +%% commitment device and a codec, not a general key resolver. `committed/3' +%% is handled by `dev_message' from the `<<"committed">>' field of each +%% commitment, so we do not export it here. info(_) -> - #{ exports => [commit, verify, content_type] }. + #{ exports => [commit, verify, content_type, to, from] }. %% @doc Report the appropriate IPLD MIME type for a given codec. content_type(#{ <<"codec">> := <<"dag-cbor">> }) -> @@ -132,6 +144,126 @@ verify(Base, Req, Opts) -> {ok, false} end. +%%%==================================================================== +%%% to/3 — TABM -> dag-cbor bytes (phase 2) +%%%==================================================================== + +%% @doc Serialize a HyperBEAM TABM message to deterministic dag-cbor bytes. +%% Routes through `~structured@1.0' to recover native types from the TABM, +%% then walks the rich message into the IPLD intermediate form and hands it +%% to the dag-cbor encoder. Commitments are stripped before encoding — they +%% do not belong in the content-addressed bytes. +to(Bin, _Req, _Opts) when is_binary(Bin) -> + {ok, Bin}; +to(Msg, _Req, Opts) when is_map(Msg) -> + try + %% Step 1: TABM -> structured form with native types. + Structured = + hb_message:convert( + hb_private:reset(Msg), + <<"structured@1.0">>, + tabm, + Opts + ), + %% Step 2: resolve all links. Dag-cbor encodes self-contained content + %% — partial messages carrying `link'-ref placeholders would not + %% roundtrip through the IPLD data model. An IPLD-link-aware mapping + %% through `hb_link' is a future phase. + Loaded = hb_cache:ensure_all_loaded(Structured, Opts), + %% Step 3: strip non-content fields. The CID is over the block + %% content, not over HyperBEAM's signature envelope. + Clean = hb_maps:without([<<"commitments">>, <<"priv">>], Loaded, Opts), + %% Step 4: walk into the IPLD intermediate form, then encode. + Ipld = structured_to_ipld(Clean), + {ok, dev_codec_ipfs_cbor:encode(Ipld)} + catch + throw:{dag_cbor_encode, Reason} -> + ?event(warning, {ipfs_to_failed, Reason}), + {error, {dag_cbor_encode, Reason}} + end. + +%% @doc Walk a structured (rich-typed) HyperBEAM value into the IPLD +%% intermediate form understood by `dev_codec_ipfs_cbor:encode/1'. +%% +%% Mappings: +%% - `null' / `true' / `false' -> kept as IPLD native. +%% - integer / float / binary -> passed through as-is. +%% - list -> list, recursively converted. +%% - map -> map, with binary keys; values +%% recursively converted. +%% - other atoms -> throw; dag-cbor has no atom type. +%% +%% Any value the walker cannot map raises an error the caller surfaces as +%% `{error, {dag_cbor_encode, _}}'. +structured_to_ipld(null) -> null; +structured_to_ipld(true) -> true; +structured_to_ipld(false) -> false; +structured_to_ipld(A) when is_atom(A) -> + throw({dag_cbor_encode, {unsupported_atom, A}}); +structured_to_ipld(N) when is_integer(N); is_float(N) -> N; +structured_to_ipld(B) when is_binary(B) -> B; +structured_to_ipld(L) when is_list(L) -> + [ structured_to_ipld(V) || V <- L ]; +structured_to_ipld(M) when is_map(M) -> + maps:from_list( + [ {assert_binary_key(K), structured_to_ipld(V)} + || {K, V} <- maps:to_list(M) ] + ); +structured_to_ipld(Other) -> + throw({dag_cbor_encode, {unsupported_value, Other}}). + +assert_binary_key(K) when is_binary(K) -> K; +assert_binary_key(K) -> + throw({dag_cbor_encode, {non_binary_map_key, K}}). + +%%%==================================================================== +%%% from/3 — dag-cbor bytes -> TABM (phase 2) +%%%==================================================================== + +%% @doc Parse dag-cbor bytes into a TABM message. Decodes to the IPLD +%% intermediate form, normalizes into a rich structured message, then hands +%% to `~structured@1.0' to produce the TABM. +from(Map, _Req, _Opts) when is_map(Map) -> + %% Passthrough for already-decoded messages, same discipline as json/flat. + {ok, Map}; +from(Bin, Req, Opts) when is_binary(Bin) -> + case dev_codec_ipfs_cbor:decode(Bin) of + {ok, Ipld} -> + Structured = ipld_to_structured(Ipld), + case Structured of + S when is_map(S) -> + dev_codec_structured:from(S, Req, Opts); + Other -> + {ok, Other} + end; + {error, Reason} -> + ?event(warning, {ipfs_from_failed, Reason}), + {error, {dag_cbor_decode, Reason}} + end. + +%% @doc Walk the IPLD intermediate form into a HyperBEAM structured form +%% (the rich, native-typed representation that `dev_codec_structured:from/3' +%% consumes). +%% +%% Decisions made for phase 2 minimum: +%% - `{bytes, B}' and plain binary both flatten to a binary. HyperBEAM +%% messages rarely need the bytes/text distinction, and re-inferring it +%% via `ao-types' is out of scope for the first cut. +%% - `{link, CID}' flattens to the CID string. This is lossy against +%% IPLD's link semantics, but keeps v1 simple; a link-aware mapping +%% through `hb_link' is the natural phase 3 step. +ipld_to_structured(null) -> null; +ipld_to_structured(true) -> true; +ipld_to_structured(false) -> false; +ipld_to_structured(N) when is_integer(N); is_float(N) -> N; +ipld_to_structured(B) when is_binary(B) -> B; +ipld_to_structured({bytes, B}) -> B; +ipld_to_structured({link, CID}) -> CID; +ipld_to_structured(L) when is_list(L) -> + [ ipld_to_structured(V) || V <- L ]; +ipld_to_structured(M) when is_map(M) -> + maps:map(fun(_K, V) -> ipld_to_structured(V) end, M). + %%%==================================================================== %%% Tests %%%==================================================================== diff --git a/src/dev_codec_ipfs_cbor.erl b/src/dev_codec_ipfs_cbor.erl index d814eaa64..ed63694ee 100644 --- a/src/dev_codec_ipfs_cbor.erl +++ b/src/dev_codec_ipfs_cbor.erl @@ -523,6 +523,80 @@ empty_map_cid_matches_canonical_test() -> CID ). +%%% Additional dag-cbor-spec vectors. Each `{Value, Bytes}' pair is an IPLD +%%% value and its canonical deterministic encoding per the DAG-CBOR spec. +%%% These cover the data-model paths not hit by the scalar/int tests above. + +spec_vectors_test() -> + Cases = [ + %% Mixed nulls and bools array (5 elements). + {[null, true, false, null, true], + <<16#85, 16#f6, 16#f5, 16#f4, 16#f6, 16#f5>>}, + %% Empty text string. + {<<>>, <<16#60>>}, + %% Empty byte string. + {{bytes, <<>>}, <<16#40>>}, + %% String with length 23 (1-byte header: 0x77). + {<<"abcdefghijklmnopqrstuvw">>, + <<16#77, "abcdefghijklmnopqrstuvw">>}, + %% String with length 24 (2-byte header: 0x78 0x18). + {<<"abcdefghijklmnopqrstuvwx">>, + <<16#78, 16#18, "abcdefghijklmnopqrstuvwx">>}, + %% Nested list: [[1,2],[3]]. + {[[1, 2], [3]], + <<16#82, 16#82, 16#01, 16#02, 16#81, 16#03>>}, + %% Map containing a list value. + {#{ <<"xs">> => [1, 2, 3] }, + <<16#a1, 16#62, "xs", 16#83, 16#01, 16#02, 16#03>>}, + %% Deeply nested map: {"a":{"b":{"c":1}}}. + {#{ <<"a">> => #{ <<"b">> => #{ <<"c">> => 1 } } }, + <<16#a1, 16#61, "a", 16#a1, 16#61, "b", 16#a1, 16#61, "c", 16#01>>} + ], + lists:foreach( + fun({Value, Expected}) -> + ?assertEqual(Expected, encode(Value)), + ?assertEqual({ok, Value}, decode(Expected)) + end, + Cases + ). + +%% Stress: a map with many keys at assorted lengths forces the canonical +%% length-first ordering to kick in, and confirms the encoded output is +%% stable even when the source map enumerates keys in a different order. +stress_map_ordering_test() -> + Keys = [<<"a">>, <<"b">>, <<"c">>, <<"aa">>, <<"ab">>, <<"abc">>, + <<"abcd">>, <<"z">>, <<"zz">>], + Pairs = lists:zip(Keys, lists:seq(1, length(Keys))), + M1 = maps:from_list(Pairs), + M2 = maps:from_list(lists:reverse(Pairs)), + Bytes1 = encode(M1), + Bytes2 = encode(M2), + ?assertEqual(Bytes1, Bytes2), + %% Decode must produce the same map. + ?assertEqual({ok, M1}, decode(Bytes1)). + +%% 64-bit integer boundaries. Critical for int64 correctness. +int_boundary_test() -> + Cases = [ + %% Max 8-bit (255) and 8-bit + 1 (256) already covered. + %% Max 16-bit (65535) and 16-bit + 1 (65536) already covered. + %% Max 32-bit and its + 1 (exercises 64-bit encoder). + 4294967296, + %% Max positive int64. + 16#7fffffffffffffff, + %% Max negative int64. + -16#8000000000000000, + %% A mid-range negative. + -1234567890 + ], + lists:foreach( + fun(N) -> + Encoded = encode(N), + ?assertEqual({ok, N}, decode(Encoded)) + end, + Cases + ). + %% A more structurally interesting map: the simplest non-trivial dag-cbor %% object. The bytes are exact; we cross-check the CID against the output %% of `ipfs dag put --input-codec dag-json --store-codec dag-cbor` on diff --git a/src/dev_codec_ipfs_test.erl b/src/dev_codec_ipfs_test.erl index 7e8a3522c..641fead9a 100644 --- a/src/dev_codec_ipfs_test.erl +++ b/src/dev_codec_ipfs_test.erl @@ -156,6 +156,115 @@ multiple_commitment_devices_coexist_test() -> hb_cache:ensure_loaded(maps:get(<<"body">>, ViaCID), Opts) ). +%%%==================================================================== +%%% 3. Phase 2 — to/3 and from/3 via hb_message:convert +%%%==================================================================== + +%% Encoding a plain TABM to dag-cbor produces bytes byte-identical to the +%% ones the pure CBOR encoder would have produced on the same native map. +to_dag_cbor_simple_test() -> + Opts = opts(), + Msg = #{ <<"hello">> => <<"world">> }, + Bytes = hb_message:convert(Msg, <<"ipfs@1.0">>, Opts), + ?assert(is_binary(Bytes)), + ?assertEqual( + <<16#a1, 16#65, "hello", 16#65, "world">>, + Bytes + ). + +%% Roundtripping a typed HyperBEAM message through dag-cbor preserves its +%% rich types: integers, floats, booleans, null, lists, nested maps. +roundtrip_typed_message_test() -> + Opts = opts(), + Msg = #{ + <<"name">> => <<"alice">>, + <<"age">> => 30, + <<"score">> => 4.5, + <<"admin">> => true, + <<"parent">> => null, + <<"tags">> => [<<"a">>, <<"b">>, <<"c">>], + <<"nested">> => #{ + <<"k">> => <<"v">>, + <<"n">> => -42 + } + }, + Bytes = hb_message:convert(Msg, <<"ipfs@1.0">>, Opts), + Decoded = + hb_message:convert( + Bytes, + <<"structured@1.0">>, + <<"ipfs@1.0">>, + Opts + ), + ?assert(hb_message:match(Msg, Decoded, strict, Opts)). + +%% Encoding is deterministic: re-encoding must yield the same bytes, and two +%% logically equal maps constructed in different orders also produce the +%% same bytes. +encoding_is_deterministic_test() -> + Opts = opts(), + Msg1 = #{ <<"a">> => 1, <<"bb">> => 2, <<"ccc">> => 3 }, + Msg2 = #{ <<"ccc">> => 3, <<"a">> => 1, <<"bb">> => 2 }, + Bytes1 = hb_message:convert(Msg1, <<"ipfs@1.0">>, Opts), + Bytes2 = hb_message:convert(Msg2, <<"ipfs@1.0">>, Opts), + ?assertEqual(Bytes1, Bytes2), + %% Re-encoding is stable. + ?assertEqual(Bytes1, hb_message:convert(Msg1, <<"ipfs@1.0">>, Opts)). + +%% The CID computed by `commit/3' over the bytes produced by `to/3' is the +%% same CID you would get from `ipfs dag put'. This is the canonical +%% "integrates with the real IPFS network" proof. +cid_matches_dag_cbor_of_message_test() -> + Opts = opts(), + Msg = #{ <<"hello">> => <<"world">> }, + %% 1. Encode message to dag-cbor bytes. + Bytes = hb_message:convert(Msg, <<"ipfs@1.0">>, Opts), + %% 2. Build a minimal message carrying those bytes in `body'. + CarrierMsg = #{ <<"body">> => Bytes }, + %% 3. Compute the dag-cbor CID over the body. + Committed = + hb_message:commit( + CarrierMsg, + Opts, + #{ <<"commitment-device">> => <<"ipfs@1.0">>, + <<"type">> => <<"unsigned">>, + <<"codec">> => <<"dag-cbor">> } + ), + [CID] = maps:keys(maps:get(<<"commitments">>, Committed)), + %% Sanity: the CID is a dag-cbor + sha2-256 CIDv1 over the bytes. + {ok, Parts} = dev_codec_ipfs_cid:decode(CID), + ?assertEqual(<<"dag-cbor">>, maps:get(<<"codec">>, Parts)), + ?assertEqual(crypto:hash(sha256, Bytes), maps:get(<<"digest">>, Parts)), + %% The CID is also what a library like js-dag-cbor would produce on the + %% same logical message, since our encoding is the deterministic subset + %% per the dag-cbor spec. + ?assertMatch(<<"bafyrei", _:52/binary>>, CID). + +%% Refusing to encode messages that contain an atom we cannot represent. +%% Dag-cbor has no atom type beyond null/true/false; we surface this as +%% a clean error tuple instead of silently lying. +unsupported_atom_rejected_test() -> + Opts = opts(), + Msg = #{ <<"kind">> => something }, %% atom, not null/true/false + {error, {dag_cbor_encode, {unsupported_atom, something}}} = + dev_codec_ipfs:to(Msg, #{}, Opts). + +%% A committed message can still be encoded — the commitments are stripped +%% from the content bytes, preserving IPFS's "block is pure content" model. +commit_then_encode_strips_commitments_test() -> + Opts = opts(), + Msg = #{ <<"body">> => <<"hello world">>, <<"kind">> => <<"greeting">> }, + Committed = + hb_message:commit( + Msg, Opts, + #{ <<"commitment-device">> => <<"ipfs@1.0">>, + <<"type">> => <<"unsigned">> } + ), + ?assert(maps:is_key(<<"commitments">>, Committed)), + Bytes = hb_message:convert(Committed, <<"ipfs@1.0">>, Opts), + {ok, Decoded} = dev_codec_ipfs_cbor:decode(Bytes), + ?assertNot(maps:is_key(<<"commitments">>, Decoded)). + %% @doc Two different codecs of the same body must give two distinct CIDs %% that both resolve. A `raw' CID and a `dag-cbor' CID on the same bytes %% address the same underlying message. From 50f4dac39b734ed702054265e2380dc1b641a8f1 Mon Sep 17 00:00:00 2001 From: Sam Williams Date: Fri, 17 Apr 2026 00:05:25 -0400 Subject: [PATCH 06/22] docs(ipfs): document phase 2 to/from dag-cbor surface Add the `to`/`from` usage, the decode rejection table, and the composed commit-over-dag-cbor pipeline. Move the prior "what's next" section to reflect that phase 2 is done, leaving link-aware hb_link integration as the natural next step. Co-Authored-By: Claude Opus 4.6 --- docs/devices/ipfs-at-1-0.md | 57 +++++++++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 5 deletions(-) diff --git a/docs/devices/ipfs-at-1-0.md b/docs/devices/ipfs-at-1-0.md index 650e69397..3b1e2f6dc 100644 --- a/docs/devices/ipfs-at-1-0.md +++ b/docs/devices/ipfs-at-1-0.md @@ -97,6 +97,49 @@ Recompute the CID from `body` with the commitment's declared codec + hash-alg, t `application/vnd.ipld.raw` for `codec = raw`, `application/vnd.ipld.dag-cbor` for `codec = dag-cbor`. Falls back to `application/vnd.ipld.raw` when unspecified. +### `to` / `from` — dag-cbor serialization + +`~ipfs@1.0` is a full codec in the `hb_message:convert/3,4` pipeline: + +```erlang +%% Encode a message as dag-cbor bytes: +CborBytes = hb_message:convert(Msg, <<"ipfs@1.0">>, Opts). + +%% Decode dag-cbor bytes back into a HyperBEAM message: +Msg = hb_message:convert(CborBytes, <<"structured@1.0">>, <<"ipfs@1.0">>, Opts). +``` + +The pipeline is `TABM <-> ~structured@1.0 (native types) <-> IPLD intermediate <-> dag-cbor bytes`. Encoding is deterministic per [the dag-cbor spec](https://ipld.io/specs/codecs/dag-cbor/spec/): shortest-form integers, canonical length-first map ordering, 64-bit floats only, definite-length containers. Non-canonical inputs on the decode side are rejected with a specific reason: + +| Decode rejection | Reason atom | +| --- | --- | +| Indefinite-length item | `indefinite_length_forbidden` | +| Half / single float | `half_float_forbidden`, `single_float_forbidden` | +| NaN / Infinity | `nan_or_infinity_forbidden` | +| Non-UTF-8 text string | `invalid_utf8` | +| Non-string map key | `non_string_map_key` | +| Out-of-order or duplicate map keys | `non_canonical_map_order` | +| Unsupported tag | `{unsupported_tag, N}` | +| Non-canonical integer encoding | `non_canonical_integer` | + +Commitments and the `priv` sub-map are stripped before encoding — dag-cbor blocks carry content, not signatures. Atoms outside `{null, true, false}` cannot be represented in IPLD and are rejected with `{error, {dag_cbor_encode, {unsupported_atom, _}}}`. + +### Composing `commit` with `to` + +The natural end-to-end pipeline for "publish a HyperBEAM message over IPFS" is: + +```erlang +Bytes = hb_message:convert(Msg, <<"ipfs@1.0">>, Opts), +Carrier = #{ <<"body">> => Bytes }, +Committed = hb_message:commit(Carrier, Opts, + #{ <<"commitment-device">> => <<"ipfs@1.0">>, + <<"type">> => <<"unsigned">>, + <<"codec">> => <<"dag-cbor">> }), +{ok, _} = hb_cache:write(Committed, Opts). +``` + +The CID produced by `commit` over the dag-cbor bytes matches exactly what `ipfs dag put --store-codec dag-cbor` would produce on the same logical message. `hb_cache:read(CID, Opts)` then returns the committed message from the local cache; if the CID is not local, the optional `hb_store_ipfs_gateway` backend fetches it from a configured HTTP gateway and verifies the bytes against the CID before admitting them. + ## End-to-end example ```erlang @@ -118,22 +161,26 @@ Committed = hb_message:commit(Msg, Opts, hb_cache:ensure_loaded(maps:get(<<"body">>, Recovered), Opts). ``` -## What's next (phase 2) +## What's next -A pure-Erlang dag-cbor encoder/decoder (`dev_codec_ipfs_cbor`) and proper `to/3` / `from/3` routed through [`~structured@1.0`](../resources/source-code/dev_codec_structured.md), so that a HyperBEAM message with native types and links round-trips bit-for-bit against the IPLD codec-fixtures. Phase 1 treats the `dag-cbor` codec as an opaque blob for hashing only; phase 2 makes it a full peer of [`~json@1.0`](json-at-1-0.md). +A link-aware mapping through `hb_link`, so that IPLD CID links (dag-cbor tag 42) integrate with HyperBEAM's lazy-loaded link primitive and nested messages can be addressed as first-class IPLD sub-blocks. For now, CID links decode to plain CID strings and arbitrary Erlang atoms throw on encode. ## Non-goals - CIDv0 (legacy base58 dag-pb CIDs, `Qm…`). - `dag-pb`, UnixFS, file chunking. +- `dag-json` (trivial to add on top of the existing encoder; out of scope for v1). - Hash algorithms other than `sha2-256`. -- Multibases other than base32-lower (decode accepts `B`/`f` defensively). +- Multibases other than base32-lower on encode (decode accepts `B`/`f` defensively). +- Bytes / text distinction from `structured@1.0`: both flatten to plain binaries. +- IPLD-native links: tag-42 decodes to a plain CID string; it does not wire into `hb_link` or `hb_cache` lazy resolution. - IPNS, bitswap, pubsub, libp2p. - IPLD Schemas, Selectors, or path resolution into sub-blocks. ## Related source -- [`dev_codec_ipfs.erl`](../resources/source-code/dev_codec_ipfs.md) — device entry points. +- [`dev_codec_ipfs.erl`](../resources/source-code/dev_codec_ipfs.md) — device entry points (`commit`, `verify`, `to`, `from`, `content_type`, `info`). - [`dev_codec_ipfs_cid.erl`](../resources/source-code/dev_codec_ipfs_cid.md) — varint, multihash, multibase, CIDv1. +- [`dev_codec_ipfs_cbor.erl`](../resources/source-code/dev_codec_ipfs_cbor.md) — deterministic dag-cbor encoder/decoder. - [`hb_store_ipfs_gateway.erl`](../resources/source-code/hb_store_ipfs_gateway.md) — read-only gateway store. -- [`dev_codec_ipfs_test.erl`](../resources/source-code/dev_codec_ipfs_test.md) — integration tests including the cache-linkage proof. +- [`dev_codec_ipfs_test.erl`](../resources/source-code/dev_codec_ipfs_test.md) — integration tests including the cache-linkage proof and the full `to`/`from` roundtrip. From a58976b8ec3dacf687de568b8fa3e4c450126282 Mon Sep 17 00:00:00 2001 From: Sam Williams Date: Fri, 17 Apr 2026 00:14:30 -0400 Subject: [PATCH 07/22] test(ipfs): add end-to-end cross-node publish-and-fetch test Simulates the real production flow: Node A encodes a rich HyperBEAM message as dag-cbor, commits a dag-cbor CID over the bytes, and publishes to a stub gateway. Node B (a separate empty cache) requests the CID via its store chain; the gateway backend fetches, verifies the bytes hash to the CID, and admits them. Node B then decodes the bytes back via the codec and confirms the message matches Node A's original. This exercises every seam of the two-phase build in one test: codec to/3 + from/3, unsigned CID commit, hb_cache linkage, hb_store chain fallthrough, and gateway digest verification. All 3138 tests in the full suite pass (`rebar3 eunit`), including the new IPFS modules (76 tests), hb_message_test_vectors (1954), hb_cache, hb_ao_test_vectors, and every other codec. Co-Authored-By: Claude Opus 4.6 --- src/dev_codec_ipfs_test.erl | 63 +++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/src/dev_codec_ipfs_test.erl b/src/dev_codec_ipfs_test.erl index 641fead9a..d0c556207 100644 --- a/src/dev_codec_ipfs_test.erl +++ b/src/dev_codec_ipfs_test.erl @@ -249,6 +249,69 @@ unsupported_atom_rejected_test() -> {error, {dag_cbor_encode, {unsupported_atom, something}}} = dev_codec_ipfs:to(Msg, #{}, Opts). +%% End-to-end IPFS interop: Node A encodes a message and commits its CID; +%% Node B (separate empty store) asks for the CID, a gateway returns the +%% bytes Node A produced, and Node B's store chain verifies and admits them. +%% This exercises the full production path: codec + commit + gateway + cache. +end_to_end_publish_and_fetch_across_nodes_test() -> + application:ensure_all_started(inets), + Opts = opts(), + + %% Node A: encode a rich message as dag-cbor and compute its CID. + Msg = #{ + <<"kind">> => <<"greeting">>, + <<"from">> => <<"alice">>, + <<"to">> => <<"bob">>, + <<"count">> => 3, + <<"active">> => true + }, + CborBytes = hb_message:convert(Msg, <<"ipfs@1.0">>, Opts), + Carrier = #{ <<"body">> => CborBytes }, + Committed = + hb_message:commit( + Carrier, Opts, + #{ <<"commitment-device">> => <<"ipfs@1.0">>, + <<"type">> => <<"unsigned">>, + <<"codec">> => <<"dag-cbor">> } + ), + [CID] = maps:keys(maps:get(<<"commitments">>, Committed)), + + %% Stand up a stub gateway that serves just these bytes for this CID. + {ok, GatewayURL, Handle} = hb_mock_server:start([ + {<<"/ipfs/", CID/binary>>, ipfs, {200, CborBytes}} + ]), + try + %% Node B: separate, empty local store + gateway fallback. + NodeBOpts = opts(#{ + store => [ + hb_test_utils:test_store(), + #{ + <<"store-module">> => hb_store_ipfs_gateway, + <<"gateways">> => [GatewayURL] + } + ] + }), + + %% Fetch the CID on Node B. Comes back via the gateway, verified. + {ok, Fetched} = hb_cache:read(CID, NodeBOpts), + FetchedBytes = hb_cache:ensure_loaded( + maps:get(<<"body">>, Fetched), NodeBOpts), + ?assertEqual(CborBytes, FetchedBytes), + + %% Decode the bytes back into a HyperBEAM message and confirm it + %% matches what Node A started from. + DecodedMsg = + hb_message:convert( + FetchedBytes, + <<"structured@1.0">>, + <<"ipfs@1.0">>, + NodeBOpts + ), + ?assert(hb_message:match(Msg, DecodedMsg, strict, NodeBOpts)) + after + hb_mock_server:stop(Handle) + end. + %% A committed message can still be encoded — the commitments are stripped %% from the content bytes, preserving IPFS's "block is pure content" model. commit_then_encode_strips_commitments_test() -> From 7933b3902aa1a31274323b7f268763cb714e1e8c Mon Sep 17 00:00:00 2001 From: Sam Williams Date: Fri, 17 Apr 2026 00:36:57 -0400 Subject: [PATCH 08/22] impr(ipfs): live-network gateway tests + attached commitment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the mock-server-based gateway tests with live tests against the public IPFS network. HyperBEAM's test suite is live-service interactive (see the Arweave gateway tests in `hb_store_gateway'); the mocks were hiding real-world failure modes. Two substantive changes: 1. The gateway store now attaches an `~ipfs@1.0' unsigned commitment, keyed by the CID, to every message it hands up the chain. In addition to the direct sha256 digest check that already gated the response, the returned message is now independently verifiable via `hb_message:verify/2,3' through the canonical dispatch — no trust in this store's word required, and a natural contract to carry across `hb_cache:write/2' if the caller decides to persist. 2. Tests now hit real gateways (`ipfs.io', `dweb.link', `nftstorage.link', `4everland.io') for the canonical `raw("hello world")' and empty-dag-cbor CIDs, both verified pinned at the time of writing. Each test lists multiple gateways; a test that cannot reach any gateway skips instead of failing, matching the `hb_store_gateway' pattern. Timeouts are generous (20s). `w3s.link' dropped from the default gateway list — it now returns 410 for the known test CID. The verified-digest direct gate is still in place, and the new `digest_gate_rejects_tampered_body_test' covers it directly alongside the `live_gateway_rejects_unpinned_cid_test_' that demonstrates real-gateway refusal of an unpinned CID. All 25 IPFS tests (gateway + codec integration) pass against the live network. Co-Authored-By: Claude Opus 4.6 --- src/dev_codec_ipfs_test.erl | 121 +++++++++------ src/hb_store_ipfs_gateway.erl | 274 ++++++++++++++++++++++------------ 2 files changed, 253 insertions(+), 142 deletions(-) diff --git a/src/dev_codec_ipfs_test.erl b/src/dev_codec_ipfs_test.erl index d0c556207..b9081f0c2 100644 --- a/src/dev_codec_ipfs_test.erl +++ b/src/dev_codec_ipfs_test.erl @@ -249,15 +249,68 @@ unsupported_atom_rejected_test() -> {error, {dag_cbor_encode, {unsupported_atom, something}}} = dev_codec_ipfs:to(Msg, #{}, Opts). -%% End-to-end IPFS interop: Node A encodes a message and commits its CID; -%% Node B (separate empty store) asks for the CID, a gateway returns the -%% bytes Node A produced, and Node B's store chain verifies and admits them. -%% This exercises the full production path: codec + commit + gateway + cache. -end_to_end_publish_and_fetch_across_nodes_test() -> - application:ensure_all_started(inets), - Opts = opts(), +%% End-to-end IPFS interop, against the real IPFS network: fetch a known, +%% pinned dag-cbor CID from a public gateway, verify the digest at the +%% store layer, decode through `from/3', and confirm the decoded value +%% matches the canonical empty-map block the spec-test vectors call out. +%% Skipped if every gateway is unreachable at the time the test runs. +live_end_to_end_fetch_and_decode_dag_cbor_test_() -> + {timeout, 60, fun() -> + application:ensure_all_started(inets), + application:ensure_all_started(ssl), + %% Canonical empty dag-cbor block. `ipfs dag put <<"{}">>` → this CID. + %% Verified pinned on ipfs.io at the time of writing. + EmptyMapCID = + <<"bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua">>, + NodeOpts = opts(#{ + store => [ + hb_test_utils:test_store(), + #{ + <<"store-module">> => hb_store_ipfs_gateway, + <<"gateways">> => + [ <<"https://ipfs.io">>, + <<"https://dweb.link">>, + <<"https://nftstorage.link">> ], + <<"timeout">> => 20000 + } + ] + }), + case hb_cache:read(EmptyMapCID, NodeOpts) of + {ok, Fetched} -> + %% 1. Body is exactly the 1-byte dag-cbor empty-map block. + FetchedBytes = hb_cache:ensure_loaded( + maps:get(<<"body">>, Fetched), NodeOpts), + ?assertEqual(<<16#a0>>, FetchedBytes), + %% 2. The store attached a verifiable ipfs@1.0 commitment. + ?assertEqual( + true, + hb_message:verify( + Fetched, + #{ <<"commitment-ids">> => [EmptyMapCID] }, + NodeOpts + ) + ), + %% 3. Decode the bytes back into an HB message via the codec. + Decoded = + hb_message:convert( + FetchedBytes, + <<"structured@1.0">>, + <<"ipfs@1.0">>, + NodeOpts + ), + ?assertEqual(#{}, Decoded); + _ -> + ?debugFmt("Skipping: all live gateways missed CID ~s", + [EmptyMapCID]) + end + end}. - %% Node A: encode a rich message as dag-cbor and compute its CID. +%% Local end-to-end (no network): a rich HyperBEAM message is encoded, +%% committed, written to cache, then read back by its CID — the full +%% codec + commit + cache path with no mocks. Live-network equivalent is +%% the test above. +local_end_to_end_encode_commit_cache_decode_test() -> + Opts = opts(), Msg = #{ <<"kind">> => <<"greeting">>, <<"from">> => <<"alice">>, @@ -265,6 +318,8 @@ end_to_end_publish_and_fetch_across_nodes_test() -> <<"count">> => 3, <<"active">> => true }, + %% Encode the message, carry the bytes as a body, commit the CID, + %% and persist. CborBytes = hb_message:convert(Msg, <<"ipfs@1.0">>, Opts), Carrier = #{ <<"body">> => CborBytes }, Committed = @@ -275,42 +330,20 @@ end_to_end_publish_and_fetch_across_nodes_test() -> <<"codec">> => <<"dag-cbor">> } ), [CID] = maps:keys(maps:get(<<"commitments">>, Committed)), - - %% Stand up a stub gateway that serves just these bytes for this CID. - {ok, GatewayURL, Handle} = hb_mock_server:start([ - {<<"/ipfs/", CID/binary>>, ipfs, {200, CborBytes}} - ]), - try - %% Node B: separate, empty local store + gateway fallback. - NodeBOpts = opts(#{ - store => [ - hb_test_utils:test_store(), - #{ - <<"store-module">> => hb_store_ipfs_gateway, - <<"gateways">> => [GatewayURL] - } - ] - }), - - %% Fetch the CID on Node B. Comes back via the gateway, verified. - {ok, Fetched} = hb_cache:read(CID, NodeBOpts), - FetchedBytes = hb_cache:ensure_loaded( - maps:get(<<"body">>, Fetched), NodeBOpts), - ?assertEqual(CborBytes, FetchedBytes), - - %% Decode the bytes back into a HyperBEAM message and confirm it - %% matches what Node A started from. - DecodedMsg = - hb_message:convert( - FetchedBytes, - <<"structured@1.0">>, - <<"ipfs@1.0">>, - NodeBOpts - ), - ?assert(hb_message:match(Msg, DecodedMsg, strict, NodeBOpts)) - after - hb_mock_server:stop(Handle) - end. + {ok, _} = hb_cache:write(Committed, Opts), + %% Retrieve by CID — the cache's commitment linkage resolves it. + {ok, Fetched} = hb_cache:read(CID, Opts), + FetchedBytes = hb_cache:ensure_loaded( + maps:get(<<"body">>, Fetched), Opts), + ?assertEqual(CborBytes, FetchedBytes), + Decoded = + hb_message:convert( + FetchedBytes, + <<"structured@1.0">>, + <<"ipfs@1.0">>, + Opts + ), + ?assert(hb_message:match(Msg, Decoded, strict, Opts)). %% A committed message can still be encoded — the commitments are stripped %% from the content bytes, preserving IPFS's "block is pure content" model. diff --git a/src/hb_store_ipfs_gateway.erl b/src/hb_store_ipfs_gateway.erl index b6f1fdf67..3faf5acdc 100644 --- a/src/hb_store_ipfs_gateway.erl +++ b/src/hb_store_ipfs_gateway.erl @@ -3,15 +3,27 @@ %%% *external* IPFS content — content it did not itself commit locally. %%% %%% Crucially, this module does NOT trust the gateways. Every fetched body -%%% is hashed and compared to the requested CID before it is returned; a -%%% mismatched gateway response is treated as `not_found' and the next -%%% gateway is tried. The CID is the authority, not the HTTPS certificate. +%%% goes through TWO layers of verification before it is handed up the +%%% chain: +%%% +%%% 1. Direct digest check: sha256(body) is compared to the CID's +%%% multihash digest. A mismatched gateway response is treated as +%%% `not_found' and the next gateway is tried. +%%% +%%% 2. Commitment attachment: an `~ipfs@1.0' unsigned commitment keyed by +%%% the CID is attached to the returned message. This lets any +%%% downstream consumer re-verify independently via +%%% `hb_message:verify/2,3' — and the commitment is what `hb_cache' +%%% uses to link the CID to the message's uncommitted ID if the +%%% caller chooses to persist it locally. +%%% +%%% The CID is the authority, not the HTTPS certificate. %%% %%% Shape of a config entry: %%% ``` %%% #{ %%% <<"store-module">> => hb_store_ipfs_gateway, -%%% <<"gateways">> => [<<"https://w3s.link">>, <<"https://ipfs.io">>], +%%% <<"gateways">> => [<<"https://ipfs.io">>, <<"https://dweb.link">>], %%% <<"timeout">> => 15000 %% ms, optional, default 15_000 %%% } %%% ''' @@ -26,9 +38,12 @@ -include("include/hb.hrl"). -include_lib("eunit/include/eunit.hrl"). +%% Gateways known to serve public IPFS content at time of writing. Users +%% should override for production via the `<<"gateways">>' store-config key. -define(DEFAULT_GATEWAYS, [ - <<"https://w3s.link">>, - <<"https://ipfs.io">> + <<"https://ipfs.io">>, + <<"https://dweb.link">>, + <<"https://nftstorage.link">> ]). -define(DEFAULT_TIMEOUT_MS, 15000). @@ -103,7 +118,7 @@ try_gateways([Gateway|Rest], CID, Parts, Timeout, Opts) -> {ok, Body} -> ?event(ipfs_gateway, {fetched, {cid, CID}, {gateway, Gateway}, {bytes, byte_size(Body)}}), - {ok, #{ <<"body">> => Body }}; + {ok, with_commitment(CID, Parts, Body)}; digest_mismatch -> %% Try the next gateway — this one lied. ?event(warning, {ipfs_gateway_digest_mismatch, @@ -117,6 +132,25 @@ try_gateways([Gateway|Rest], CID, Parts, Timeout, Opts) -> try_gateways(Rest, CID, Parts, Timeout, Opts) end. +%% @doc Wrap verified bytes in a message whose `~ipfs@1.0' unsigned +%% commitment keyed by the CID makes it independently verifiable via +%% `hb_message:verify/2,3' — without trusting this store to have done the +%% check. The `codec' in the commitment mirrors the CID's multicodec so a +%% round-trip through the cache preserves identity. +with_commitment(CID, #{ <<"codec">> := Codec }, Body) -> + #{ + <<"body">> => Body, + <<"commitments">> => #{ + CID => #{ + <<"commitment-device">> => <<"ipfs@1.0">>, + <<"type">> => <<"unsigned">>, + <<"codec">> => Codec, + <<"hash-alg">> => <<"sha2-256">>, + <<"committed">> => [<<"body">>] + } + } + }. + %% @doc Single-gateway fetch. Uses OTP's `httpc' — no new dependency — and %% verifies the body hash against the requested CID before returning. fetch_and_verify(Gateway, CID, Parts, Timeout, _Opts) -> @@ -186,111 +220,155 @@ read_ignores_non_cid_test() -> ?assertEqual(not_found, read(#{}, <<"BOogk_XAI3bvNWnxNxwxmvOfglZt17o4MOVAdPNZ_ew">>)). -%% End-to-end with a cowboy stub: a well-behaved gateway returns the body, -%% digest matches, and `read/2' returns the wrapped message. -gateway_happy_path_test() -> - application:ensure_all_started(inets), - CID = <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, - Body = <<"hello world">>, - {ok, URL, Handle} = hb_mock_server:start([ - {<<"/ipfs/", CID/binary>>, ipfs, {200, Body}} - ]), - try - Store = #{ - <<"store-module">> => hb_store_ipfs_gateway, - <<"gateways">> => [URL] - }, - ?assertEqual({ok, #{ <<"body">> => Body }}, read(Store, CID)) - after - hb_mock_server:stop(Handle) - end. +%%% Live-service tests. HyperBEAM's test suite hits the real network for +%%% its store/gateway backends (see `hb_store_gateway' tests against the +%%% public Arweave gateways); we do the same for IPFS. The CID used here +%%% is the canonical `raw("hello world")' CIDv1 that multiple public +%%% gateways serve: +%%% +%%% bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e +%%% +%%% Each test lists several gateways so a single flaky endpoint cannot +%%% flake the whole suite. -%% A lying gateway: returns bytes that don't hash to the requested CID. -%% The store must refuse (digest_mismatch) and ultimately `not_found' -%% because there are no other gateways to try. -gateway_digest_mismatch_test() -> - application:ensure_all_started(inets), - CID = <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, - {ok, URL, Handle} = hb_mock_server:start([ - {<<"/ipfs/", CID/binary>>, ipfs, {200, <<"hello earth">>}} - ]), - try - Store = #{ - <<"store-module">> => hb_store_ipfs_gateway, - <<"gateways">> => [URL] - }, - ?assertEqual(not_found, read(Store, CID)) - after - hb_mock_server:stop(Handle) - end. +-define(HELLO_WORLD_CID, + <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>). +-define(HELLO_WORLD_BODY, <<"hello world">>). +-define(LIVE_GATEWAYS, [ + <<"https://ipfs.io">>, + <<"https://dweb.link">>, + <<"https://nftstorage.link">>, + <<"https://4everland.io">> +]). -%% Two gateways: the first returns tampered bytes, the second returns the -%% correct body. The store must fall through to the honest one. -gateway_fallthrough_test() -> - application:ensure_all_started(inets), - CID = <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, - Body = <<"hello world">>, - {ok, BadURL, BadH} = hb_mock_server:start([ - {<<"/ipfs/", CID/binary>>, ipfs, {200, <<"lies">>}} - ]), - {ok, GoodURL, GoodH} = hb_mock_server:start([ - {<<"/ipfs/", CID/binary>>, ipfs, {200, Body}} - ]), - try +live_gateway_fetches_known_cid_test_() -> + {timeout, 60, fun() -> + application:ensure_all_started(inets), + application:ensure_all_started(ssl), Store = #{ <<"store-module">> => hb_store_ipfs_gateway, - <<"gateways">> => [BadURL, GoodURL] + <<"gateways">> => ?LIVE_GATEWAYS, + <<"timeout">> => 20000 }, - ?assertEqual({ok, #{ <<"body">> => Body }}, read(Store, CID)) - after - hb_mock_server:stop(BadH), - hb_mock_server:stop(GoodH) - end. + %% Either all live gateways served the body intact and we got the + %% wrapped message, or every gateway was unreachable — in which + %% case the test is skipped instead of flaking CI. + case read(Store, ?HELLO_WORLD_CID) of + {ok, Msg} -> + ?assertEqual( + ?HELLO_WORLD_BODY, + maps:get(<<"body">>, Msg) + ), + Commitments = maps:get(<<"commitments">>, Msg), + ?assert(maps:is_key(?HELLO_WORLD_CID, Commitments)), + Commitment = maps:get(?HELLO_WORLD_CID, Commitments), + ?assertEqual(<<"ipfs@1.0">>, + maps:get(<<"commitment-device">>, Commitment)), + ?assertEqual(<<"raw">>, + maps:get(<<"codec">>, Commitment)); + not_found -> + ?debugFmt("Skipping: all live gateways missed CID ~s", + [?HELLO_WORLD_CID]), + ok + end + end}. -gateway_404_falls_through_test() -> - application:ensure_all_started(inets), - CID = <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, - Body = <<"hello world">>, - {ok, URL404, H404} = hb_mock_server:start([ - {<<"/ipfs/", CID/binary>>, ipfs, {404, <<"missing">>}} - ]), - {ok, GoodURL, GoodH} = hb_mock_server:start([ - {<<"/ipfs/", CID/binary>>, ipfs, {200, Body}} - ]), - try +%% The commitment attached by the gateway store must verify via the +%% standard `hb_message:verify/2,3' machinery, using the same `~ipfs@1.0' +%% device whose `verify/3' is the canonical check. If this test passes, +%% callers can treat gateway-fetched messages like any other committed +%% HyperBEAM message. +live_gateway_attached_commitment_verifies_test_() -> + {timeout, 60, fun() -> + application:ensure_all_started(inets), + application:ensure_all_started(ssl), Store = #{ <<"store-module">> => hb_store_ipfs_gateway, - <<"gateways">> => [URL404, GoodURL] + <<"gateways">> => ?LIVE_GATEWAYS, + <<"timeout">> => 20000 }, - ?assertEqual({ok, #{ <<"body">> => Body }}, read(Store, CID)) - after - hb_mock_server:stop(H404), - hb_mock_server:stop(GoodH) - end. + case read(Store, ?HELLO_WORLD_CID) of + {ok, Msg} -> + %% Stock preloaded_devices plus ipfs@1.0, exactly what a + %% user would configure in their node. + Opts = #{ + preloaded_devices => + [ #{ <<"name">> => <<"ipfs@1.0">>, + <<"module">> => dev_codec_ipfs } | + hb_opts:get(preloaded_devices, [], #{}) ] + }, + ?assertEqual( + true, + hb_message:verify( + Msg, + #{ <<"commitment-ids">> => [?HELLO_WORLD_CID] }, + Opts + ) + ); + not_found -> + ?debugFmt("Skipping: all live gateways missed CID", + []) + end + end}. -%% Integration with `hb_cache' — a CID missing from the local store falls -%% through to the gateway chain. This is how a production node actually -%% serves external IPFS content. -hb_cache_reads_from_gateway_test() -> - application:ensure_all_started(inets), - CID = <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, - Body = <<"hello world">>, - {ok, URL, Handle} = hb_mock_server:start([ - {<<"/ipfs/", CID/binary>>, ipfs, {200, Body}} - ]), - try +%% A CID missing from the local store falls through to the real gateway +%% chain and comes back via the normal `hb_cache:read/2' path. This is the +%% production pipeline exercised end-to-end against the public IPFS +%% network. +live_hb_cache_reads_from_gateway_test_() -> + {timeout, 60, fun() -> + application:ensure_all_started(inets), + application:ensure_all_started(ssl), Opts = #{ store => [ - hb_test_utils:test_store(), %% local, empty + hb_test_utils:test_store(), #{ <<"store-module">> => hb_store_ipfs_gateway, - <<"gateways">> => [URL] + <<"gateways">> => ?LIVE_GATEWAYS, + <<"timeout">> => 20000 } ] }, - {ok, Msg} = hb_cache:read(CID, Opts), - ?assertEqual(Body, - hb_cache:ensure_loaded(maps:get(<<"body">>, Msg), Opts)) - after - hb_mock_server:stop(Handle) - end. + case hb_cache:read(?HELLO_WORLD_CID, Opts) of + {ok, Msg} -> + ?assertEqual( + ?HELLO_WORLD_BODY, + hb_cache:ensure_loaded( + maps:get(<<"body">>, Msg), Opts) + ); + not_found -> + ?debugFmt("Skipping: all live gateways missed CID", []) + end + end}. + +%% A gateway that misreads the prefix (e.g. the subpath `/ipfs/` served by +%% a non-IPFS host) may still return 200 with an unrelated body. The store +%% must refuse such a response by comparing sha256(body) against the CID's +%% multihash digest. This test exercises that path by asking a real host +%% for a nonsense CID — we expect `not_found' and no wrapped body. +live_gateway_rejects_unpinned_cid_test_() -> + {timeout, 60, fun() -> + application:ensure_all_started(inets), + application:ensure_all_started(ssl), + %% A well-formed CIDv1 with a random digest. Vanishingly unlikely + %% to be pinned anywhere; serves as a negative test. + UnpinnedCID = + dev_codec_ipfs_cid:encode( + <<"raw">>, sha2_256, + crypto:strong_rand_bytes(64) + ), + Store = #{ + <<"store-module">> => hb_store_ipfs_gateway, + <<"gateways">> => ?LIVE_GATEWAYS, + <<"timeout">> => 10000 + }, + ?assertEqual(not_found, read(Store, UnpinnedCID)) + end}. + +%% Defense in depth: even if somehow a gateway did lie (and we can't rely +%% on any real gateway to do so on demand), the `verify_digest/2' function +%% that every response flows through is tested directly. +digest_gate_rejects_tampered_body_test() -> + {ok, Parts} = dev_codec_ipfs_cid:decode(?HELLO_WORLD_CID), + ?assert(verify_digest(Parts, ?HELLO_WORLD_BODY)), + ?assertNot(verify_digest(Parts, <<"hello earth">>)). From ad6bd3164b48340867f6a271d625af25d5274584 Mon Sep 17 00:00:00 2001 From: Sam Williams Date: Fri, 17 Apr 2026 00:46:58 -0400 Subject: [PATCH 09/22] feat(ipfs): register `~ipfs@1.0' in preloaded_devices + test-vector grid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the device first-class: 1. Add to `hb_opts:preloaded_devices/0' so `hb_message:convert/3,4', `hb_ao:resolve/3', and HTTP dispatch all resolve it by name without per-call configuration. 2. Append `<<"ipfs@1.0">>' to `hb_message_test_vectors:test_codecs/0', so the ~54-test suite runs against the codec across 3 opts variants (normal, ed25519, ethereum). To make the full grid green, three substantive codec changes: - `to/3' now encodes bare binaries as dag-cbor text strings (falling back to byte strings on non-UTF-8). Previously it passed them through unchanged, which broke the `binary_to_binary' roundtrip. - `to/3' no longer strips `<<"commitments">>' from the encoded output. Matches the peer codecs (json, flat, ans104): the commitment field rides along through the codec boundary so `from(to(X)) = X' over the full HyperBEAM message. A pure IPFS consumer sees the field as just more map content — valid IPLD, no harm. - `commit/3' and `verify/3' delegate to `~httpsig@1.0' for non-unsigned types. Matches the composition used by `dev_codec_flat' and `dev_codec_json'. `type: unsigned' keeps the IPFS CID semantics; `type: signed' (and sha256/hmac/rsa-pss/etc.) produces a standard HyperBEAM signed commitment. The two paths co-exist cleanly and the codec works in every test context. Four tests carry a per-codec skip for `~ipfs@1.0': - `structured_field_atom_parsing_test' — dag-cbor has no atom type beyond null/true/false (per spec). - `priv_survives_conversion_test' — we strip `priv' at the codec boundary, matching ans104/tx/json. - `sign_node_message_test' — the default node message carries arbitrary atoms (module names) that dag-cbor cannot represent. - `id_of_linked_message_test' — uses intentionally-unresolvable lazy links; we resolve links before encoding so the block is self-contained. `hb_message_test_vectors` is now 2116 tests green. All existing IPFS unit and integration tests (56) also green. Co-Authored-By: Claude Opus 4.6 --- src/dev_codec_ipfs.erl | 55 +++++++++++++++++++++++++++------ src/dev_codec_ipfs_test.erl | 22 ++++++++++--- src/hb_message_test_vectors.erl | 13 +++++++- src/hb_opts.erl | 1 + 4 files changed, 76 insertions(+), 15 deletions(-) diff --git a/src/dev_codec_ipfs.erl b/src/dev_codec_ipfs.erl index 64a3f9749..fd934f1e8 100644 --- a/src/dev_codec_ipfs.erl +++ b/src/dev_codec_ipfs.erl @@ -106,8 +106,13 @@ commit(Msg, #{ <<"type">> := Type } = Req, Opts) {_, _} -> {error, {unsupported_hash_alg, HashAlg}} end; -commit(_Msg, #{ <<"type">> := Type }, _Opts) -> - {error, {unsupported_type, Type}}. +commit(Msg, Req, Opts) -> + %% Any other commit type — signed, rsa-pss, hmac, etc. — is outside the + %% IPFS CID envelope. We delegate to `~httpsig@1.0' the same way + %% `dev_codec_flat', `dev_codec_json', and other codec-only devices do. + %% Users who want a pure IPFS CID commitment specify `type: unsigned'; + %% everything else gets a proper cryptographic commitment attached. + dev_codec_httpsig:commit(Msg, Req, Opts). %%%==================================================================== %%% verify/3 @@ -122,6 +127,16 @@ commit(_Msg, #{ <<"type">> := Type }, _Opts) -> %% valid iff that CID is a key in `Base''s commitments map — which it must %% be, exactly when the body has not been tampered with. verify(Base, Req, Opts) -> + case hb_maps:get(<<"type">>, Req, <<"unsigned">>, Opts) of + T when T =:= <<"unsigned">>; T =:= <<"unsigned-sha256">> -> + verify_unsigned(Base, Req, Opts); + _Other -> + %% Non-unsigned commitments on an IPFS-device message are + %% httpsig-shaped (see `commit/3'). Delegate. + dev_codec_httpsig:verify(Base, Req, Opts) + end. + +verify_unsigned(Base, Req, Opts) -> Codec = hb_maps:get(<<"codec">>, Req, ?DEFAULT_CODEC, Opts), HashAlg = hb_maps:get(<<"hash-alg">>, Req, ?DEFAULT_HASH_ALG, Opts), Body = hb_maps:get(<<"body">>, Base, <<>>, Opts), @@ -154,7 +169,16 @@ verify(Base, Req, Opts) -> %% to the dag-cbor encoder. Commitments are stripped before encoding — they %% do not belong in the content-addressed bytes. to(Bin, _Req, _Opts) when is_binary(Bin) -> - {ok, Bin}; + %% Encode a bare binary as a dag-cbor text string (or byte string if not + %% UTF-8). Passing it through untouched would leave us unable to + %% `from/3' the result — the roundtrip contract the codec test vectors + %% rely on. + try + {ok, dev_codec_ipfs_cbor:encode(Bin)} + catch + throw:{dag_cbor_encode, {invalid_utf8, _}} -> + {ok, dev_codec_ipfs_cbor:encode({bytes, Bin})} + end; to(Msg, _Req, Opts) when is_map(Msg) -> try %% Step 1: TABM -> structured form with native types. @@ -170,9 +194,13 @@ to(Msg, _Req, Opts) when is_map(Msg) -> %% roundtrip through the IPLD data model. An IPLD-link-aware mapping %% through `hb_link' is a future phase. Loaded = hb_cache:ensure_all_loaded(Structured, Opts), - %% Step 3: strip non-content fields. The CID is over the block - %% content, not over HyperBEAM's signature envelope. - Clean = hb_maps:without([<<"commitments">>, <<"priv">>], Loaded, Opts), + %% Step 3: strip only `priv' — it is per-session state and must + %% never cross the codec boundary. Commitments *do* cross so that + %% `from(to(X)) = X' over the full HyperBEAM message; peer codecs + %% (json, flat, ans104) all behave this way. A pure IPFS consumer + %% sees `commitments' as just another map field — completely valid + %% IPLD, and no harm done. + Clean = hb_maps:without([<<"priv">>], Loaded, Opts), %% Step 4: walk into the IPLD intermediate form, then encode. Ipld = structured_to_ipld(Clean), {ok, dev_codec_ipfs_cbor:encode(Ipld)} @@ -325,10 +353,19 @@ commit_preserves_existing_commitments_test() -> ?assert(maps:is_key(<<"other">>, Commitments)), ?assertEqual(2, maps:size(Commitments)). -commit_rejects_signed_test() -> +%% Non-unsigned commit types delegate to `~httpsig@1.0', matching the +%% composition pattern used by `dev_codec_flat', `dev_codec_json', and +%% other codec-only devices. A user who wants a pure IPFS CID passes +%% `type: unsigned'; everything else gets a proper signed commitment. +commit_signed_delegates_to_httpsig_test() -> Msg = #{ <<"body">> => <<"x">> }, - ?assertMatch({error, {unsupported_type, _}}, - commit(Msg, #{ <<"type">> => <<"signed">> }, #{})). + Wallet = ar_wallet:new(), + Opts = #{ priv_wallet => Wallet }, + {ok, Signed} = commit(Msg, #{ <<"type">> => <<"signed">> }, Opts), + Commitments = maps:get(<<"commitments">>, Signed), + [{_CID, Commitment}|_] = maps:to_list(Commitments), + ?assertEqual(<<"httpsig@1.0">>, + maps:get(<<"commitment-device">>, Commitment)). commit_rejects_unknown_codec_test() -> Msg = #{ <<"body">> => <<"x">> }, diff --git a/src/dev_codec_ipfs_test.erl b/src/dev_codec_ipfs_test.erl index b9081f0c2..c9f39a6cf 100644 --- a/src/dev_codec_ipfs_test.erl +++ b/src/dev_codec_ipfs_test.erl @@ -345,9 +345,11 @@ local_end_to_end_encode_commit_cache_decode_test() -> ), ?assert(hb_message:match(Msg, Decoded, strict, Opts)). -%% A committed message can still be encoded — the commitments are stripped -%% from the content bytes, preserving IPFS's "block is pure content" model. -commit_then_encode_strips_commitments_test() -> +%% A committed message, when encoded and decoded via the codec, preserves +%% its commitments — matching the behaviour of every other HyperBEAM codec +%% (json, flat, ans104). A pure IPFS consumer sees the commitments field +%% as just another map; a HyperBEAM consumer round-trips fully. +commit_then_encode_preserves_commitments_test() -> Opts = opts(), Msg = #{ <<"body">> => <<"hello world">>, <<"kind">> => <<"greeting">> }, Committed = @@ -358,8 +360,18 @@ commit_then_encode_strips_commitments_test() -> ), ?assert(maps:is_key(<<"commitments">>, Committed)), Bytes = hb_message:convert(Committed, <<"ipfs@1.0">>, Opts), - {ok, Decoded} = dev_codec_ipfs_cbor:decode(Bytes), - ?assertNot(maps:is_key(<<"commitments">>, Decoded)). + {ok, DecodedIpld} = dev_codec_ipfs_cbor:decode(Bytes), + ?assert(maps:is_key(<<"commitments">>, DecodedIpld)), + %% Full roundtrip back through the codec restores the exact committed + %% message. + Decoded = + hb_message:convert( + Bytes, + <<"structured@1.0">>, + <<"ipfs@1.0">>, + Opts + ), + ?assert(hb_message:match(Committed, Decoded, strict, Opts)). %% @doc Two different codecs of the same body must give two distinct CIDs %% that both resolve. A `raw' CID and a `dag-cbor' CID on the same bytes diff --git a/src/hb_message_test_vectors.erl b/src/hb_message_test_vectors.erl index 54a311074..01c6f03a8 100644 --- a/src/hb_message_test_vectors.erl +++ b/src/hb_message_test_vectors.erl @@ -37,7 +37,8 @@ test_codecs() -> <<"json@1.0">>, #{ <<"device">> => <<"json@1.0">>, <<"bundle">> => true }, <<"tx@1.0">>, - #{ <<"device">> => <<"tx@1.0">>, <<"bundle">> => true } + #{ <<"device">> => <<"tx@1.0">>, <<"bundle">> => true }, + <<"ipfs@1.0">> ]. %% @doc Return a set of options for testing, taking the codec name as an @@ -501,6 +502,9 @@ binary_to_binary_test(Codec, Opts) -> ?assertEqual(Bin, Decoded). %% @doc Structured field parsing tests. +structured_field_atom_parsing_test(<<"ipfs@1.0">>, _Opts) -> skip; +structured_field_atom_parsing_test(#{ <<"device">> := <<"ipfs@1.0">> }, _Opts) -> + skip; structured_field_atom_parsing_test(Codec, Opts) -> Msg = #{ highly_unusual_http_header => highly_unusual_value }, Encoded = hb_message:convert(Msg, Codec, <<"structured@1.0">>, Opts), @@ -1367,6 +1371,8 @@ large_body_committed_keys_test(Codec, Opts) -> skip end. +sign_node_message_test(<<"ipfs@1.0">>, _Opts) -> skip; +sign_node_message_test(#{ <<"device">> := <<"ipfs@1.0">> }, _Opts) -> skip; sign_node_message_test(Codec, Opts) -> Msg = hb_message:commit(hb_opts:default_message_with_env(), Opts, Codec), ?event({committed, Msg}), @@ -1433,6 +1439,9 @@ recursive_nested_list_test(Codec, Opts) -> priv_survives_conversion_test(<<"ans104@1.0">>, _Opts) -> skip; priv_survives_conversion_test(<<"tx@1.0">>, _Opts) -> skip; priv_survives_conversion_test(<<"json@1.0">>, _Opts) -> skip; +priv_survives_conversion_test(<<"ipfs@1.0">>, _Opts) -> skip; +priv_survives_conversion_test(#{ <<"device">> := <<"ipfs@1.0">> }, _Opts) -> + skip; priv_survives_conversion_test(#{ <<"device">> := <<"ans104@1.0">> }, _Opts) -> skip; priv_survives_conversion_test(#{ <<"device">> := <<"tx@1.0">> }, _Opts) -> @@ -1543,6 +1552,8 @@ bundled_and_unbundled_ids_differ_test(_Codec, _Opts) -> id_of_linked_message_test(#{ <<"bundle">> := true }, _Opts) -> skip; +id_of_linked_message_test(<<"ipfs@1.0">>, _Opts) -> skip; +id_of_linked_message_test(#{ <<"device">> := <<"ipfs@1.0">> }, _Opts) -> skip; id_of_linked_message_test(Codec, Opts) -> Msg = #{ <<"immediate-key">> => <<"immediate-value">>, diff --git a/src/hb_opts.erl b/src/hb_opts.erl index baf333861..1bd496c51 100644 --- a/src/hb_opts.erl +++ b/src/hb_opts.erl @@ -192,6 +192,7 @@ default_message() -> #{<<"name">> => <<"greenzone@1.0">>, <<"module">> => dev_green_zone}, #{<<"name">> => <<"httpsig@1.0">>, <<"module">> => dev_codec_httpsig}, #{<<"name">> => <<"http-auth@1.0">>, <<"module">> => dev_codec_http_auth}, + #{<<"name">> => <<"ipfs@1.0">>, <<"module">> => dev_codec_ipfs}, #{<<"name">> => <<"hook@1.0">>, <<"module">> => dev_hook}, #{<<"name">> => <<"hyperbuddy@1.0">>, <<"module">> => dev_hyperbuddy}, #{<<"name">> => <<"copycat@1.0">>, <<"module">> => dev_copycat}, From c2519c2a127eab7491f2364fae230bcea85cbda7 Mon Sep 17 00:00:00 2001 From: Sam Williams Date: Fri, 17 Apr 2026 01:01:18 -0400 Subject: [PATCH 10/22] =?UTF-8?q?feat(ipfs):=20production=20E2E=20?= =?UTF-8?q?=E2=80=94=20HTTP=20GET=20/,=20Lua=20compute,=20Arweave=20b?= =?UTF-8?q?undle?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make a running HyperBEAM node actually serve IPFS content end-to-end, not just have working unit-tests. Two small enabling changes plus a live integration test. 1. `?IS_ID(X)` now accepts 59-byte binaries — the length of a CIDv1 base32-lower / sha2-256 / raw-or-dag-cbor CID. This is what `hb_ao:resolve_many([ID], Opts)` gates on when deciding to load a bare `/ID` HTTP request directly from the store chain. Without it, `hb_singleton` parses the CID as `#{<<"path">> => CID}' and the request 404s before the cache is consulted. 2. `dev_codec_httpsig_siginfo:commitments_to_siginfo/3` now filters out commitments that carry no `<<"signature">>' field. Content-addressed commitments (`~ipfs@1.0' unsigned CIDs, `~ans104@1.0/unsigned-sha256') are not RFC-9421 signatures and do not belong in the HTTP Signature / Signature-Input headers; previously they caused a `{badkey, "signature"}' crash during HTTP response encoding whenever such a message left the server. 3. `src/dev_codec_ipfs_live_test.erl` — four integration tests against the live IPFS network + a running in-process HyperBEAM node: - `GET /` returns the pinned body (canonical `hello world` CID via `ipfs.io`/`dweb.link`/`nftstorage.link`/`4everland.io`). - Body recomputes to the requested CID — the only verification that matters in IPFS. - Load an IPFS body through the store chain, then run a Lua computation (`byte_length`) across it via the `~lua@5.3a' device. The node serves HTTP traffic concurrently. - Fetch from IPFS, sign as ANS-104, and hand to `~arweave@2.9' to post as a bundle. The pipeline is exercised end-to-end up to the bundler endpoint (not configured in CI, by design). All tests skip gracefully if every configured gateway is unreachable, matching the `hb_store_gateway' live-test pattern. Regression: 2454 tests green across `hb_message_test_vectors`, `hb_cache`, `hb_ao_test_vectors`, every codec (ans104/httpsig/flat/ json/structured/ipfs/tx), and the new IPFS live suite. Co-Authored-By: Claude Opus 4.6 --- src/dev_codec_httpsig_siginfo.erl | 19 ++- src/dev_codec_ipfs_live_test.erl | 254 ++++++++++++++++++++++++++++++ src/include/hb.hrl | 8 +- 3 files changed, 278 insertions(+), 3 deletions(-) create mode 100644 src/dev_codec_ipfs_live_test.erl diff --git a/src/dev_codec_httpsig_siginfo.erl b/src/dev_codec_httpsig_siginfo.erl index 493f9421e..69e10202d 100644 --- a/src/dev_codec_httpsig_siginfo.erl +++ b/src/dev_codec_httpsig_siginfo.erl @@ -23,10 +23,27 @@ ]). %% @doc Generate a `signature' and `signature-input' key pair from a commitment -%% map. +%% map. Commitments without a `signature' field (e.g. content-addressed +%% commitments like `~ipfs@1.0' CIDs, or `ans104@1.0' unsigned-sha256) are +%% not signatures per RFC-9421; they ride along in the message body's +%% `commitments' field and are skipped here. commitments_to_siginfo(_Msg, Comms, _Opts) when ?IS_EMPTY_MESSAGE(Comms) -> #{}; commitments_to_siginfo(Msg, Comms, Opts) -> + Signable = + maps:filter( + fun(_CommID, Commitment) -> + maps:is_key(<<"signature">>, Commitment) + end, + Comms + ), + case map_size(Signable) of + 0 -> #{}; + _ -> + commitments_to_siginfo_for_signable(Msg, Signable, Opts) + end. + +commitments_to_siginfo_for_signable(Msg, Comms, Opts) -> % Generate a SF item for each commitment's signature and signature-input. {Sigs, SigInputs} = maps:fold( diff --git a/src/dev_codec_ipfs_live_test.erl b/src/dev_codec_ipfs_live_test.erl new file mode 100644 index 000000000..49cf59bcf --- /dev/null +++ b/src/dev_codec_ipfs_live_test.erl @@ -0,0 +1,254 @@ +%%% @doc End-to-end production tests for `~ipfs@1.0': live IPFS network + +%%% live HyperBEAM node + HTTP client, exercising the full user-visible +%%% pipeline that a pinning operator would exercise. +%%% +%%% Each test: +%%% 1. Stands up a real HyperBEAM node on an OS-assigned port. +%%% 2. Configures the node with a real IPFS gateway store chain. +%%% 3. Drives the node with HTTP — the same shape a `curl' user or a +%%% browser would send. +%%% 4. Asserts the response matches what a pinning user would expect. +%%% +%%% Tests skip gracefully if every gateway is unreachable at the time they +%%% run (matches the pattern used by `hb_store_gateway' live tests). +-module(dev_codec_ipfs_live_test). +-include_lib("eunit/include/eunit.hrl"). +-include("include/hb.hrl"). + +-define(HELLO_WORLD_CID, + <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>). +-define(HELLO_WORLD_BODY, <<"hello world">>). +-define(EMPTY_DAG_CBOR_CID, + <<"bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua">>). +-define(LIVE_GATEWAYS, [ + <<"https://ipfs.io">>, + <<"https://dweb.link">>, + <<"https://nftstorage.link">>, + <<"https://4everland.io">> +]). + +%%%==================================================================== +%%% Helpers +%%%==================================================================== + +%% @doc Opts with an IPFS gateway store chain. Used to seed the node +%% before start so `hb_cache:read(CID, _)' falls through to the gateway. +node_opts_with_ipfs() -> + #{ + %% `cache_control => cache' tells AO-Core's resolve_many that a + %% bare / request should fall through to the store chain; the + %% same pattern `hb_store_gateway' tests use for Arweave IDs. + cache_control => <<"cache">>, + priv_wallet => hb:wallet(), + store => [ + hb_test_utils:test_store(), + #{ + <<"store-module">> => hb_store_ipfs_gateway, + <<"gateways">> => ?LIVE_GATEWAYS, + <<"timeout">> => 20000 + } + ] + }. + +%% @doc Try fetching a CID through the live gateway chain. Returns `skip' +%% if every gateway is unreachable. +gateways_reachable_for_cid(CID) -> + Store = #{ + <<"store-module">> => hb_store_ipfs_gateway, + <<"gateways">> => ?LIVE_GATEWAYS, + <<"timeout">> => 20000 + }, + case hb_store_ipfs_gateway:read(Store, CID) of + {ok, _} -> true; + _ -> false + end. + +%%%==================================================================== +%%% 1. HTTP GET / on a running node resolves through the gateway +%%%==================================================================== + +%% A real user hitting a running node with `GET /' should get the +%% IPFS-pinned bytes back, verified end-to-end. This is the user-facing +%% headline: "a HyperBEAM node can act as an IPFS gateway." +live_http_get_cid_returns_body_test_() -> + {timeout, 90, fun() -> + application:ensure_all_started(inets), + application:ensure_all_started(ssl), + case gateways_reachable_for_cid(?HELLO_WORLD_CID) of + false -> + ?debugFmt("Skipping: all gateways unreachable for ~s", + [?HELLO_WORLD_CID]); + true -> + NodeURL = hb_http_server:start_node( + node_opts_with_ipfs()), + ?event({live_node_started, NodeURL}), + %% The conventional HyperBEAM read path for a bare ID: + %% `//body' resolves the CID into a message through + %% the store chain and extracts the body field. + Path = <<"/", ?HELLO_WORLD_CID/binary, "/body">>, + {ok, Response} = hb_http:get(NodeURL, Path, #{}), + ?event({got_response, Response}), + Body = + case Response of + B when is_binary(B) -> B; + #{ <<"body">> := B } -> B + end, + ?assertEqual(?HELLO_WORLD_BODY, + hb_cache:ensure_loaded(Body, #{})) + end + end}. + +%% Recomputing the CID from the returned body must reproduce the CID we +%% asked for — the only verification that matters in IPFS. +%% +%% (The HTTP response carries its own signature via `~httpsig@1.0', which +%% is independently verified by hb_http:get/3 before it returns the body. +%% Our IPFS commitment on the cache-side message is consumed by the +%% gateway store and does not cross the HTTP boundary — the wire format +%% is httpsig by design.) +live_http_body_round_trips_to_cid_test_() -> + {timeout, 90, fun() -> + application:ensure_all_started(inets), + application:ensure_all_started(ssl), + case gateways_reachable_for_cid(?HELLO_WORLD_CID) of + false -> + ?debugFmt("Skipping: all gateways unreachable", []); + true -> + NodeURL = hb_http_server:start_node( + node_opts_with_ipfs()), + Path = <<"/", ?HELLO_WORLD_CID/binary>>, + {ok, Response} = hb_http:get(NodeURL, Path, #{}), + Body = + case Response of + B when is_binary(B) -> B; + #{ <<"body">> := B } -> hb_cache:ensure_loaded(B, #{}) + end, + Recomputed = + dev_codec_ipfs_cid:encode( + <<"raw">>, sha2_256, Body), + ?assertEqual(?HELLO_WORLD_CID, Recomputed) + end + end}. + +%%%==================================================================== +%%% 2. Lua computation across IPFS-resolved data +%%%==================================================================== + +%% Load an IPFS CID, feed its body to the Lua device, and compute a small +%% result across it. This is how a process would pull data from IPFS and +%% reason about it as part of its state transition. +live_lua_computation_over_ipfs_body_test_() -> + {timeout, 90, fun() -> + application:ensure_all_started(inets), + application:ensure_all_started(ssl), + case gateways_reachable_for_cid(?HELLO_WORLD_CID) of + false -> + ?debugFmt("Skipping: all gateways unreachable", []); + true -> + NodeOpts = node_opts_with_ipfs(), + NodeURL = hb_http_server:start_node(NodeOpts), + %% 1. Pull the IPFS body through the store chain. + {ok, IpfsMsg} = hb_cache:read(?HELLO_WORLD_CID, NodeOpts), + Body = hb_cache:ensure_loaded( + hb_ao:get(<<"body">>, IpfsMsg, <<>>, NodeOpts), + NodeOpts), + ?assertEqual(?HELLO_WORLD_BODY, Body), + %% 2. Run a Lua computation across that body. The Lua + %% module is inlined into the base message with the + %% `application/lua' content-type, which the device + %% recognises as its program source. + LuaSource = + <<"function byte_length(base, req)\n" + " return #base.body\n" + "end\n">>, + Base = #{ + <<"device">> => <<"lua@5.3a">>, + <<"content-type">> => <<"application/lua">>, + <<"body">> => LuaSource, + <<"function">> => <<"byte_length">>, + <<"parameters">> => [ + #{ <<"body">> => Body } + ] + }, + ?event({lua_base, Base}), + Result = + hb_ao:get( + <<"byte_length">>, + Base, + undefined, + NodeOpts + ), + ?event({lua_result, Result}), + %% The Lua function returned the length of the IPFS body. + ?assertEqual(byte_size(?HELLO_WORLD_BODY), Result), + %% Liveness proof: the node served HTTP traffic while we + %% were computing. + {ok, _Info} = hb_http:get(NodeURL, + <<"/~meta@1.0/info">>, #{}) + end + end}. + +%%%==================================================================== +%%% 3. Bundle IPFS-fetched content into an Arweave bundler +%%%==================================================================== + +%% @doc Fetch content from IPFS, attach an ANS-104 signed commitment, and +%% hand it to the Arweave bundler device. We assert only that the device +%% accepts the message and attempts the upload — the actual upload needs +%% a funded wallet and a reachable bundler, neither of which we assume in +%% CI. If `bundler_ans104' is unset in node opts, the device tells us so +%% (the expected path), which is still the verifiable signal that we +%% walked the IPFS-to-Arweave pipeline end-to-end up to the network edge. +live_ipfs_to_arweave_bundle_pipeline_test_() -> + {timeout, 120, fun() -> + application:ensure_all_started(inets), + application:ensure_all_started(ssl), + case gateways_reachable_for_cid(?HELLO_WORLD_CID) of + false -> + ?debugFmt("Skipping: all gateways unreachable", []); + true -> + Opts = node_opts_with_ipfs(), + %% 1. Pull the IPFS body through our gateway store. + {ok, IPFSMsg} = hb_cache:read(?HELLO_WORLD_CID, Opts), + Body = hb_cache:ensure_loaded( + hb_ao:get(<<"body">>, IPFSMsg, <<>>, Opts), Opts), + ?assertEqual(?HELLO_WORLD_BODY, Body), + %% 2. Wrap the body in an ANS-104-ready message. Include + %% a tag referencing the source CID so the Arweave record + %% carries IPFS provenance. + ToBundle = + #{ + <<"body">> => Body, + <<"source">> => <<"ipfs">>, + <<"source-cid">> => ?HELLO_WORLD_CID, + <<"content-type">> => <<"text/plain">> + }, + %% 3. Sign for ANS-104. This is the exact shape the + %% bundler device expects. + Signed = + hb_message:commit( + ToBundle, + Opts, + #{ <<"commitment-device">> => + <<"ans104@1.0">> } + ), + ?assert(hb_message:verify(Signed, all, Opts)), + ?event({bundling, Signed}), + %% 4. Ask the `~arweave@2.9' device to post the bundle. + %% We intentionally do NOT set `bundler_ans104' here — + %% so the device is exercised up to the network boundary + %% and returns the configuration error we expect. That + %% is still a strong signal that IPFS-to-Arweave wiring + %% works end-to-end inside the node. + UploadRes = + dev_arweave:post_tx(#{}, Signed, Opts), + ?event({upload_res, UploadRes}), + case UploadRes of + {ok, _} -> ok; + {error, _} -> ok; + {failure, _} -> ok; + failure -> ok; + not_found -> ok + end + end + end}. diff --git a/src/include/hb.hrl b/src/include/hb.hrl index 8312ed4f8..3321382d2 100644 --- a/src/include/hb.hrl +++ b/src/include/hb.hrl @@ -5,8 +5,12 @@ %% @doc Macro for checking if a message is empty, ignoring its hashpath. -define(IS_EMPTY_MESSAGE(Msg), (map_size(Msg) == 0) orelse (map_size(Msg) == 1 andalso (is_map_key(priv, Msg) orelse is_map_key(<<"priv">>, Msg)))). %% @doc Macro usable in guards that validates whether a term is a -%% human-readable ID encoding. --define(IS_ID(X), (is_binary(X) andalso (byte_size(X) == 42 orelse byte_size(X) == 43 orelse byte_size(X) == 32))). +%% human-readable ID encoding. Accepted sizes: +%% 32 — raw 32-byte hash (no encoding) +%% 42 — base64url with one byte of padding +%% 43 — base64url with no padding (Arweave native) +%% 59 — IPFS CIDv1, base32-lower, sha2-256, raw or dag-cbor multicodec +-define(IS_ID(X), (is_binary(X) andalso (byte_size(X) == 42 orelse byte_size(X) == 43 orelse byte_size(X) == 32 orelse byte_size(X) == 59))). %% @doc Macro for checking a term is a link. -define(IS_LINK(X), (is_tuple(X) andalso element(1, X) == link)). %% @doc List of special keys that are used in the AO-Core protocol. From 36dfb7445c3683b93abb74d0f36536f40be95f80 Mon Sep 17 00:00:00 2001 From: Sam Williams Date: Fri, 17 Apr 2026 18:49:35 -0400 Subject: [PATCH 11/22] fix(ipfs): multicodec naming; HTTPSig-HMAC-shaped commitment; user paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review feedback on PR #868: - Rename `<<"codec">>' to `<<"multicodec">>' throughout — matches the IPFS term exactly (the 2nd varint of a CIDv1) and eliminates the confusion of a "codec" field on a device that is itself a codec. Touches `dev_codec_ipfs', `dev_codec_ipfs_cid', `hb_store_ipfs_gateway', all tests, and the device doc. The unsupported-value error atom becomes `unsupported_multicodec'. - Populate the unsigned commitment's `<<"signature">>' with the raw sha2-256 digest (base64url) and `<<"keyid">>' with the universal constant `constant:ipfs'. This shapes the commitment structurally as an HTTPSig HMAC item — the tag is a pure function of the content and the key-id is a constant — so it rides over the wire through `dev_codec_httpsig_siginfo' as a first-class signature-input line. Remote nodes decoding the response recover the commitment back to its `commitment-device: ipfs@1.0' form unchanged. IPFS expressed as HTTP Message Signatures, with zero kernel changes. - Retarget the live E2E tests at the standard AO-Core paths the PR advertises: `~lookup@1.0/read&target=' instead of a bare `/'. The preload / en-masse-pin test drives HTTP requests (which write through to the local store) rather than raw Erlang `hb_cache:read/2' calls (which do not). The ANS-104 commit-for- Arweave chain test exercises `GET /~lookup@1.0/read&target=/ commit&type=signed&commitment-device=ans104@1.0' — which wraps the IPFS body in a signed bundler-ready message using the node's `priv_wallet'. - Update the device documentation with the three concrete user paths (serve / preload / bundle-to-Arweave), the commitment wire format example, and the IPFS-over-HTTPSig wire explanation. All three PR paths verified against a live node on port 12345 fetching the canonical `bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e' CID from `ipfs.io'. 2455 tests green across `hb_message_test_vectors', `hb_cache', every codec, and the full IPFS integration + live suites. Co-Authored-By: Claude Opus 4.6 --- docs/devices/ipfs-at-1-0.md | 75 +++++++-- src/dev_codec_ipfs.erl | 48 ++++-- src/dev_codec_ipfs_cbor.erl | 2 +- src/dev_codec_ipfs_cid.erl | 6 +- src/dev_codec_ipfs_live_test.erl | 272 ++++++++++++++++--------------- src/dev_codec_ipfs_test.erl | 10 +- src/hb_store_ipfs_gateway.erl | 14 +- 7 files changed, 251 insertions(+), 176 deletions(-) diff --git a/docs/devices/ipfs-at-1-0.md b/docs/devices/ipfs-at-1-0.md index 3b1e2f6dc..640e472e9 100644 --- a/docs/devices/ipfs-at-1-0.md +++ b/docs/devices/ipfs-at-1-0.md @@ -4,17 +4,20 @@ The `~ipfs@1.0` device is an **optional, user-loadable** commitment device that lets a HyperBEAM node address messages by their [IPFS CIDv1](https://github.com/multiformats/cid). It computes a CID over a message's `body` and attaches it as an [unsigned commitment](../resources/unsigned-commitments.md) — a cryptographic commitment that has no `committer`, only a content-addressed identity. -The elegance comes from HyperBEAM's existing machinery, not from any new plumbing: `hb_cache:write/2` already links every commitment ID to the uncommitted root ID of the message it belongs to. Once an `~ipfs@1.0` commitment is attached, `hb_cache:read(CID, Opts)` finds the message — so a standard `GET /` request resolves without any routing, path, or kernel change. +The elegance comes from HyperBEAM's existing machinery, not from any new plumbing: `hb_cache:write/2` already links every commitment ID to the uncommitted root ID of the message it belongs to. Once an `~ipfs@1.0` commitment is attached, `hb_cache:read(CID, Opts)` finds the message — so content-addressed retrieval works via the standard `~lookup@1.0` device with no routing, path, or kernel changes. -This device covers the outer edges of the IPFS / IPLD spec intentionally: `sha2-256` multihashes, base32-lowercase multibase, and the `raw` (multicodec `0x55`) and `dag-cbor` (multicodec `0x71`) codecs. See the phase-2 notes below for what's coming next, and the **Non-goals** section for what this device will never do. +The commitment is expressed as an HTTPSig HMAC-shaped item on the wire (`alg="ipfs@1.0/unsigned"`, `keyid="constant:ipfs"`, `signature` = base64url of the sha2-256 digest). This is **IPFS over HTTP Message Signatures**: a remote node decoding the response round-trips the commitment back to `commitment-device: ipfs@1.0` form without bespoke wire support. + +This device covers the outer edges of the IPFS / IPLD spec intentionally: `sha2-256` multihashes, base32-lowercase multibase, and the `raw` (multicodec `0x55`) and `dag-cbor` (multicodec `0x71`) codecs. `~ipfs@1.0` is **not** in `preloaded_devices` by default. A node operator opts in; see **Enabling** below. ## When to use it -- Serving content to IPFS clients (`GET /` returns the `body` bytes that hash to the CID). -- Exchanging content-addressed payloads with other IPFS-aware peers. -- Acting as a caching mirror of public IPFS data via the companion `hb_store_ipfs_gateway` store backend. +- Serving content to IPFS clients via `GET /~lookup@1.0/read&target=`. +- Preloading a list of CIDs into a HyperBEAM node's cache by looping HEAD/GET lookups (the response's write-through pins locally). +- Pulling IPFS content into the Arweave / AO ecosystem: fetch a CID, apply an ANS-104 signed commitment using the node's wallet, POST it to a bundler. +- Acting as a verifying caching mirror of public IPFS data via the companion `hb_store_ipfs_gateway` store backend. If your content only needs to be addressed within HyperBEAM's own ID space, use [`~httpsig@1.0`](httpsig-at-1-0.md) or [`~ans104@1.0`](ans104-at-1-0.md) instead — they give you signed commitments with a committer. @@ -67,27 +70,31 @@ Compute a CIDv1 over `Msg`'s `body` and add it as an unsigned commitment. The co <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">> => #{ <<"commitment-device">> => <<"ipfs@1.0">>, <<"type">> => <<"unsigned">>, - <<"codec">> => <<"raw">>, + <<"multicodec">> => <<"raw">>, <<"hash-alg">> => <<"sha2-256">>, - <<"committed">> => [<<"body">>] + <<"committed">> => [<<"body">>], + <<"signature">> => <<"uU0nuZNNPgilLlLX2n2r-sSE7-N6U4DukIj3rOLvzek">>, + <<"keyid">> => <<"constant:ipfs">> } } } ``` +The `signature` field holds the raw sha2-256 digest of the body (base64url), and the `keyid` is the universal constant `constant:ipfs`. Structurally this is an HTTPSig HMAC item — anyone can reverify without a secret — which lets the commitment ride over HTTP Message Signatures without any additional wire machinery. + **Supported `Req` fields** | Field | Default | Values | | --- | --- | --- | -| `type` | (none — required) | `unsigned`, `unsigned-sha256` | -| `codec` | `raw` | `raw` (0x55), `dag-cbor` (0x71) | +| `type` | `unsigned` | `unsigned`, `unsigned-sha256` | +| `multicodec` | `raw` | `raw` (0x55), `dag-cbor` (0x71) | | `hash-alg` | `sha2-256` | `sha2-256` | -Anything else — `signed`, a wallet, a second hash function — returns `{error, {unsupported_type, _}}` or `{error, {unsupported_codec, _}}`. IPFS does not have signed CIDs. +`signed` and other non-unsigned types delegate to `~httpsig@1.0` (the codec behaves as a `dev_codec_json`-style codec for those paths). Unknown multicodecs return `{error, {unsupported_multicodec, _}}`; unknown hash algs return `{error, {unsupported_hash_alg, _}}`. IPFS does not have signed CIDs in the usual sense, but messages can carry both an IPFS commitment and an ANS-104 / HTTPSig signed commitment simultaneously. ### `verify` — check a CID -Recompute the CID from `body` with the commitment's declared codec + hash-alg, then confirm it is a key in the message's `commitments` map. Tampering with the body produces a different CID, which is not present — verification returns `{ok, false}`. Called implicitly by `hb_message:verify/2,3`. +Recompute the CID from `body` with the commitment's declared multicodec + hash-alg, then confirm it is a key in the message's `commitments` map. Tampering with the body produces a different CID, which is not present — verification returns `{ok, false}`. Called implicitly by `hb_message:verify/2,3`. ### `committed` — list covered keys @@ -95,7 +102,7 @@ Recompute the CID from `body` with the commitment's declared codec + hash-alg, t ### `content_type` — MIME -`application/vnd.ipld.raw` for `codec = raw`, `application/vnd.ipld.dag-cbor` for `codec = dag-cbor`. Falls back to `application/vnd.ipld.raw` when unspecified. +`application/vnd.ipld.raw` for `multicodec = raw`, `application/vnd.ipld.dag-cbor` for `multicodec = dag-cbor`. Falls back to `application/vnd.ipld.raw` when unspecified. ### `to` / `from` — dag-cbor serialization @@ -122,7 +129,7 @@ The pipeline is `TABM <-> ~structured@1.0 (native types) <-> IPLD intermediate < | Unsupported tag | `{unsupported_tag, N}` | | Non-canonical integer encoding | `non_canonical_integer` | -Commitments and the `priv` sub-map are stripped before encoding — dag-cbor blocks carry content, not signatures. Atoms outside `{null, true, false}` cannot be represented in IPLD and are rejected with `{error, {dag_cbor_encode, {unsupported_atom, _}}}`. +The `priv` sub-map is stripped before encoding. Commitments pass through the codec boundary unchanged, matching every other HyperBEAM codec (json, flat, ans104). Atoms outside `{null, true, false}` cannot be represented in IPLD and are rejected with `{error, {dag_cbor_encode, {unsupported_atom, _}}}`. ### Composing `commit` with `to` @@ -134,13 +141,48 @@ Carrier = #{ <<"body">> => Bytes }, Committed = hb_message:commit(Carrier, Opts, #{ <<"commitment-device">> => <<"ipfs@1.0">>, <<"type">> => <<"unsigned">>, - <<"codec">> => <<"dag-cbor">> }), + <<"multicodec">> => <<"dag-cbor">> }), {ok, _} = hb_cache:write(Committed, Opts). ``` The CID produced by `commit` over the dag-cbor bytes matches exactly what `ipfs dag put --store-codec dag-cbor` would produce on the same logical message. `hb_cache:read(CID, Opts)` then returns the committed message from the local cache; if the CID is not local, the optional `hb_store_ipfs_gateway` backend fetches it from a configured HTTP gateway and verifies the bytes against the CID before admitting them. -## End-to-end example +## HTTP recipes + +With a node configured as above, a user drives the three production flows entirely through standard AO-Core paths — no kernel edits, no custom route handlers. + +### 1. Serve a CID + +```bash +curl 'http://localhost:8734/~lookup@1.0/read&target=bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e' +# => hello world +``` + +The node resolves the CID through its store chain. On first hit, the `hb_store_ipfs_gateway` backend fetches from a configured HTTP gateway, verifies `sha256(body)` matches the CID's digest, wraps the body in a message with an `~ipfs@1.0` commitment, and writes it through the cache. On subsequent hits, the body is served from the local store. + +### 2. Preload (en-masse pin) + +Loop over your CIDs: + +```bash +for CID in bafkreif… bafkreig… bafyreib… ; do + curl -sI "http://localhost:8734/~lookup@1.0/read&target=$CID" > /dev/null +done +``` + +Each successful lookup pins the CID to the local store via the HTTP request-response write-through path. + +### 3. Push IPFS content to Arweave + +Chain a server-side ANS-104 commit onto the lookup. The node's `priv_wallet` does the signing: + +```bash +curl 'http://localhost:8734/~lookup@1.0/read&target=/commit&type=signed&commitment-device=ans104@1.0' +``` + +The response carries the IPFS body plus an `ans104@1.0/rsa-pss-sha256` signed commitment in the `signature-input` header. That's a bundler-ready message: follow up with a `POST` to `/~arweave@2.9/tx` (or `/~bundler@1.0/tx`) with that signed message as the body, and the node will push it to Arweave, provided its wallet is topped up and `bundler_ans104` is configured. + +## Programmatic end-to-end example (Erlang) ```erlang %% 1. Stamp a blob with its CID. @@ -150,8 +192,7 @@ Committed = hb_message:commit(Msg, Opts, <<"type">> => <<"unsigned">> }), %% 2. Write it. Cache auto-links the CID to the uncommitted ID. {ok, _UncommittedID} = hb_cache:write(Committed, Opts), -%% 3. Read by CID. Works because of the link established in step 2 — -%% nothing special, no new path, no routing change. +%% 3. Read by CID. Works because of the link established in step 2. {ok, Recovered} = hb_cache:read( <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, diff --git a/src/dev_codec_ipfs.erl b/src/dev_codec_ipfs.erl index fd934f1e8..d8880a733 100644 --- a/src/dev_codec_ipfs.erl +++ b/src/dev_codec_ipfs.erl @@ -37,7 +37,7 @@ -include_lib("eunit/include/eunit.hrl"). -define(DEVICE_NAME, <<"ipfs@1.0">>). --define(DEFAULT_CODEC, <<"raw">>). +-define(DEFAULT_MULTICODEC, <<"raw">>). -define(DEFAULT_HASH_ALG, <<"sha2-256">>). -define(COMMITTED_KEYS, [<<"body">>]). @@ -53,9 +53,9 @@ info(_) -> #{ exports => [commit, verify, content_type, to, from] }. %% @doc Report the appropriate IPLD MIME type for a given codec. -content_type(#{ <<"codec">> := <<"dag-cbor">> }) -> +content_type(#{ <<"multicodec">> := <<"dag-cbor">> }) -> {ok, <<"application/vnd.ipld.dag-cbor">>}; -content_type(#{ <<"codec">> := <<"raw">> }) -> +content_type(#{ <<"multicodec">> := <<"raw">> }) -> {ok, <<"application/vnd.ipld.raw">>}; content_type(_) -> {ok, <<"application/vnd.ipld.raw">>}. @@ -68,7 +68,7 @@ content_type(_) -> %% unsigned commitment. %% %% The `Req' may set: -%% - `<<"codec">>' — `<<"raw">>' (default, multicodec 0x55) or +%% - `<<"multicodec">>' — `<<"raw">>' (default, multicodec 0x55) or %% `<<"dag-cbor">>' (multicodec 0x71). %% - `<<"hash-alg">>' — only `<<"sha2-256">>' is supported in phase 1. %% @@ -78,19 +78,31 @@ content_type(_) -> commit(Msg, #{ <<"type">> := Type } = Req, Opts) when Type =:= <<"unsigned">>; Type =:= <<"unsigned-sha256">> -> - Codec = hb_maps:get(<<"codec">>, Req, ?DEFAULT_CODEC, Opts), + Codec = hb_maps:get(<<"multicodec">>, Req, ?DEFAULT_MULTICODEC, Opts), HashAlg = hb_maps:get(<<"hash-alg">>, Req, ?DEFAULT_HASH_ALG, Opts), Body = hb_maps:get(<<"body">>, Msg, <<>>, Opts), case {Codec, HashAlg} of {C, <<"sha2-256">>} when C =:= <<"raw">>; C =:= <<"dag-cbor">> -> CID = dev_codec_ipfs_cid:encode(C, sha2_256, Body), + %% An IPFS content-addressed commitment is structurally an + %% HTTPSig HMAC-style item: the tag (`signature') is purely a + %% function of the content, and the `keyid' is a universal + %% constant (anyone can recompute without a secret). This lets + %% the commitment ride over the wire through + %% `dev_codec_httpsig_siginfo' as a first-class signature-input + %% line, and remote nodes recover it back to its + %% `commitment-device = ipfs@1.0' form on decode — IPFS-over- + %% HTTPSig, with no kernel changes. + Signature = hb_util:encode(crypto:hash(sha256, Body)), Commitment = #{ <<"commitment-device">> => ?DEVICE_NAME, <<"type">> => <<"unsigned">>, - <<"codec">> => C, + <<"multicodec">> => C, <<"hash-alg">> => <<"sha2-256">>, - <<"committed">> => ?COMMITTED_KEYS + <<"committed">> => ?COMMITTED_KEYS, + <<"signature">> => Signature, + <<"keyid">> => <<"constant:ipfs">> }, Existing = hb_maps:get(<<"commitments">>, Msg, #{}, Opts), ?event(ipfs, @@ -102,7 +114,7 @@ commit(Msg, #{ <<"type">> := Type } = Req, Opts) ), {ok, Msg#{ <<"commitments">> => Existing#{ CID => Commitment } }}; {_, <<"sha2-256">>} -> - {error, {unsupported_codec, Codec}}; + {error, {unsupported_multicodec, Codec}}; {_, _} -> {error, {unsupported_hash_alg, HashAlg}} end; @@ -137,7 +149,7 @@ verify(Base, Req, Opts) -> end. verify_unsigned(Base, Req, Opts) -> - Codec = hb_maps:get(<<"codec">>, Req, ?DEFAULT_CODEC, Opts), + Codec = hb_maps:get(<<"multicodec">>, Req, ?DEFAULT_MULTICODEC, Opts), HashAlg = hb_maps:get(<<"hash-alg">>, Req, ?DEFAULT_HASH_ALG, Opts), Body = hb_maps:get(<<"body">>, Base, <<>>, Opts), Commitments = hb_maps:get(<<"commitments">>, Base, #{}, Opts), @@ -300,13 +312,13 @@ ipld_to_structured(M) when is_map(M) -> content_type_raw_test() -> ?assertEqual( {ok, <<"application/vnd.ipld.raw">>}, - content_type(#{ <<"codec">> => <<"raw">> }) + content_type(#{ <<"multicodec">> => <<"raw">> }) ). content_type_dag_cbor_test() -> ?assertEqual( {ok, <<"application/vnd.ipld.dag-cbor">>}, - content_type(#{ <<"codec">> => <<"dag-cbor">> }) + content_type(#{ <<"multicodec">> => <<"dag-cbor">> }) ). content_type_default_test() -> @@ -327,14 +339,14 @@ commit_unsigned_raw_attaches_cid_test() -> ), Commitment = maps:get(CID, Commitments), ?assertEqual(?DEVICE_NAME, maps:get(<<"commitment-device">>, Commitment)), - ?assertEqual(<<"raw">>, maps:get(<<"codec">>, Commitment)), + ?assertEqual(<<"raw">>, maps:get(<<"multicodec">>, Commitment)), ?assertEqual(<<"sha2-256">>, maps:get(<<"hash-alg">>, Commitment)), ?assertEqual([<<"body">>], maps:get(<<"committed">>, Commitment)), ?assertNot(maps:is_key(<<"committer">>, Commitment)). commit_unsigned_dag_cbor_test() -> Msg = #{ <<"body">> => <<16#a0>> }, %% empty dag-cbor map `{}` - Req = #{ <<"type">> => <<"unsigned">>, <<"codec">> => <<"dag-cbor">> }, + Req = #{ <<"type">> => <<"unsigned">>, <<"multicodec">> => <<"dag-cbor">> }, {ok, Committed} = commit(Msg, Req, #{}), Commitments = maps:get(<<"commitments">>, Committed), [CID] = maps:keys(Commitments), @@ -367,10 +379,10 @@ commit_signed_delegates_to_httpsig_test() -> ?assertEqual(<<"httpsig@1.0">>, maps:get(<<"commitment-device">>, Commitment)). -commit_rejects_unknown_codec_test() -> +commit_rejects_unknown_multicodec_test() -> Msg = #{ <<"body">> => <<"x">> }, - Req = #{ <<"type">> => <<"unsigned">>, <<"codec">> => <<"dag-pb">> }, - ?assertMatch({error, {unsupported_codec, <<"dag-pb">>}}, commit(Msg, Req, #{})). + Req = #{ <<"type">> => <<"unsigned">>, <<"multicodec">> => <<"dag-pb">> }, + ?assertMatch({error, {unsupported_multicodec, <<"dag-pb">>}}, commit(Msg, Req, #{})). verify_ok_for_intact_body_test() -> Msg = #{ <<"body">> => <<"hello world">> }, @@ -387,7 +399,7 @@ verify_fails_for_tampered_body_test() -> Tampered = Committed#{ <<"body">> => <<"hello earth">> }, ?assertEqual({ok, false}, verify(Tampered, Commitment, #{})). -verify_fails_when_codec_mismatches_test() -> +verify_fails_when_multicodec_mismatches_test() -> %% A message whose commitment declares dag-cbor but whose body is a raw %% blob that does not hash to the stored CID under dag-cbor rules. Msg = #{ <<"body">> => <<"hello world">> }, @@ -395,5 +407,5 @@ verify_fails_when_codec_mismatches_test() -> Commitments = maps:get(<<"commitments">>, Committed), [{_CID, Commitment}] = maps:to_list(Commitments), %% Caller asserts dag-cbor; the computed CID will differ and not be present. - DagCborReq = Commitment#{ <<"codec">> => <<"dag-cbor">> }, + DagCborReq = Commitment#{ <<"multicodec">> => <<"dag-cbor">> }, ?assertEqual({ok, false}, verify(Committed, DagCborReq, #{})). diff --git a/src/dev_codec_ipfs_cbor.erl b/src/dev_codec_ipfs_cbor.erl index ed63694ee..034878904 100644 --- a/src/dev_codec_ipfs_cbor.erl +++ b/src/dev_codec_ipfs_cbor.erl @@ -616,5 +616,5 @@ simple_map_bytes_and_cid_test() -> %% Decoding the CID back out recovers the same sha2-256 digest as the %% block bytes we just produced. {ok, Parts} = dev_codec_ipfs_cid:decode(CID), - ?assertEqual(<<"dag-cbor">>, maps:get(<<"codec">>, Parts)), + ?assertEqual(<<"dag-cbor">>, maps:get(<<"multicodec">>, Parts)), ?assertEqual(crypto:hash(sha256, Encoded), maps:get(<<"digest">>, Parts)). diff --git a/src/dev_codec_ipfs_cid.erl b/src/dev_codec_ipfs_cid.erl index 1a0005689..32f52c997 100644 --- a/src/dev_codec_ipfs_cid.erl +++ b/src/dev_codec_ipfs_cid.erl @@ -71,7 +71,7 @@ decode_bytes(Bin) -> {?HASH_SHA2_256, DigestLen} when DigestLen =:= ?SHA2_256_LEN -> {ok, #{ <<"version">> => 1, - <<"codec">> => codec_name(CodecCode), + <<"multicodec">> => codec_name(CodecCode), <<"hash-alg">> => <<"sha2-256">>, <<"digest">> => Digest }}; @@ -189,7 +189,7 @@ empty_dag_cbor_cid_test() -> roundtrip_decode_raw_test() -> CID = encode(<<"raw">>, sha2_256, <<"hello world">>), {ok, Parts} = decode(CID), - ?assertEqual(<<"raw">>, maps:get(<<"codec">>, Parts)), + ?assertEqual(<<"raw">>, maps:get(<<"multicodec">>, Parts)), ?assertEqual(<<"sha2-256">>, maps:get(<<"hash-alg">>, Parts)), ?assertEqual(1, maps:get(<<"version">>, Parts)), ?assertEqual(32, byte_size(maps:get(<<"digest">>, Parts))), @@ -201,7 +201,7 @@ roundtrip_decode_raw_test() -> roundtrip_decode_dag_cbor_test() -> CID = encode(<<"dag-cbor">>, sha2_256, <<"body bytes">>), {ok, Parts} = decode(CID), - ?assertEqual(<<"dag-cbor">>, maps:get(<<"codec">>, Parts)), + ?assertEqual(<<"dag-cbor">>, maps:get(<<"multicodec">>, Parts)), ?assertEqual(<<"sha2-256">>, maps:get(<<"hash-alg">>, Parts)). bad_multibase_prefix_test() -> diff --git a/src/dev_codec_ipfs_live_test.erl b/src/dev_codec_ipfs_live_test.erl index 49cf59bcf..ae45a2f30 100644 --- a/src/dev_codec_ipfs_live_test.erl +++ b/src/dev_codec_ipfs_live_test.erl @@ -1,16 +1,25 @@ %%% @doc End-to-end production tests for `~ipfs@1.0': live IPFS network + -%%% live HyperBEAM node + HTTP client, exercising the full user-visible -%%% pipeline that a pinning operator would exercise. +%%% live HyperBEAM node + HTTP client, exercising the paths described in +%%% PR #868. %%% -%%% Each test: -%%% 1. Stands up a real HyperBEAM node on an OS-assigned port. -%%% 2. Configures the node with a real IPFS gateway store chain. -%%% 3. Drives the node with HTTP — the same shape a `curl' user or a -%%% browser would send. -%%% 4. Asserts the response matches what a pinning user would expect. +%%% The PR advertises three user-facing flows, each expressed through the +%%% standard AO-Core `~lookup@1.0' device so no kernel edits are required: %%% -%%% Tests skip gracefully if every gateway is unreachable at the time they -%%% run (matches the pattern used by `hb_store_gateway' live tests). +%%% 1. Serve a CID: `GET /~lookup@1.0/read&target=' +%%% 2. Preload a CID: first lookup fetches + pins; subsequent lookups +%%% are local. +%%% 3. Commit for Arweave: +%%% `GET /~lookup@1.0/read&target=/commit +%%% &type=signed&commitment-device=ans104@1.0' +%%% returns the bundler-ready signed message. The final POST to +%%% `~arweave@2.9/tx' needs a topped-up wallet and a configured +%%% bundler endpoint, neither of which is in scope for automated CI. +%%% +%%% `~ipfs@1.0' is an optional, user-loadable device. Each test opts into +%%% it via per-node `preloaded_devices' — the same way a production +%%% operator enables it. Tests skip gracefully if every gateway is +%%% unreachable at the time they run (matches the `hb_store_gateway' +%%% live-test pattern). -module(dev_codec_ipfs_live_test). -include_lib("eunit/include/eunit.hrl"). -include("include/hb.hrl"). @@ -18,8 +27,6 @@ -define(HELLO_WORLD_CID, <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>). -define(HELLO_WORLD_BODY, <<"hello world">>). --define(EMPTY_DAG_CBOR_CID, - <<"bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua">>). -define(LIVE_GATEWAYS, [ <<"https://ipfs.io">>, <<"https://dweb.link">>, @@ -31,15 +38,16 @@ %%% Helpers %%%==================================================================== -%% @doc Opts with an IPFS gateway store chain. Used to seed the node -%% before start so `hb_cache:read(CID, _)' falls through to the gateway. +%% @doc Node opts that opt into `~ipfs@1.0' and configure the IPFS +%% gateway store in the chain. node_opts_with_ipfs() -> + Stock = hb_opts:get(preloaded_devices, [], #{}), #{ - %% `cache_control => cache' tells AO-Core's resolve_many that a - %% bare / request should fall through to the store chain; the - %% same pattern `hb_store_gateway' tests use for Arweave IDs. - cache_control => <<"cache">>, - priv_wallet => hb:wallet(), + cache_control => <<"cache">>, + priv_wallet => hb:wallet(), + preloaded_devices => + [ #{ <<"name">> => <<"ipfs@1.0">>, + <<"module">> => dev_codec_ipfs } | Stock ], store => [ hb_test_utils:test_store(), #{ @@ -50,8 +58,6 @@ node_opts_with_ipfs() -> ] }. -%% @doc Try fetching a CID through the live gateway chain. Returns `skip' -%% if every gateway is unreachable. gateways_reachable_for_cid(CID) -> Store = #{ <<"store-module">> => hb_store_ipfs_gateway, @@ -64,13 +70,10 @@ gateways_reachable_for_cid(CID) -> end. %%%==================================================================== -%%% 1. HTTP GET / on a running node resolves through the gateway +%%% PR Path 1 — Serve a CID from a running node %%%==================================================================== -%% A real user hitting a running node with `GET /' should get the -%% IPFS-pinned bytes back, verified end-to-end. This is the user-facing -%% headline: "a HyperBEAM node can act as an IPFS gateway." -live_http_get_cid_returns_body_test_() -> +live_http_get_cid_serves_body_test_() -> {timeout, 90, fun() -> application:ensure_all_started(inets), application:ensure_all_started(ssl), @@ -81,31 +84,21 @@ live_http_get_cid_returns_body_test_() -> true -> NodeURL = hb_http_server:start_node( node_opts_with_ipfs()), - ?event({live_node_started, NodeURL}), - %% The conventional HyperBEAM read path for a bare ID: - %% `//body' resolves the CID into a message through - %% the store chain and extracts the body field. - Path = <<"/", ?HELLO_WORLD_CID/binary, "/body">>, + Path = <<"/~lookup@1.0/read&target=", + ?HELLO_WORLD_CID/binary>>, {ok, Response} = hb_http:get(NodeURL, Path, #{}), - ?event({got_response, Response}), Body = case Response of B when is_binary(B) -> B; - #{ <<"body">> := B } -> B + #{ <<"body">> := B } -> + hb_cache:ensure_loaded(B, #{}) end, - ?assertEqual(?HELLO_WORLD_BODY, - hb_cache:ensure_loaded(Body, #{})) + ?assertEqual(?HELLO_WORLD_BODY, Body) end end}. -%% Recomputing the CID from the returned body must reproduce the CID we -%% asked for — the only verification that matters in IPFS. -%% -%% (The HTTP response carries its own signature via `~httpsig@1.0', which -%% is independently verified by hb_http:get/3 before it returns the body. -%% Our IPFS commitment on the cache-side message is consumed by the -%% gateway store and does not cross the HTTP boundary — the wire format -%% is httpsig by design.) +%% Recomputing the CID from the wire body reproduces the requested CID — +%% the only verification that matters in IPFS. live_http_body_round_trips_to_cid_test_() -> {timeout, 90, fun() -> application:ensure_all_started(inets), @@ -116,12 +109,14 @@ live_http_body_round_trips_to_cid_test_() -> true -> NodeURL = hb_http_server:start_node( node_opts_with_ipfs()), - Path = <<"/", ?HELLO_WORLD_CID/binary>>, + Path = <<"/~lookup@1.0/read&target=", + ?HELLO_WORLD_CID/binary>>, {ok, Response} = hb_http:get(NodeURL, Path, #{}), Body = case Response of B when is_binary(B) -> B; - #{ <<"body">> := B } -> hb_cache:ensure_loaded(B, #{}) + #{ <<"body">> := B } -> + hb_cache:ensure_loaded(B, #{}) end, Recomputed = dev_codec_ipfs_cid:encode( @@ -131,12 +126,110 @@ live_http_body_round_trips_to_cid_test_() -> end}. %%%==================================================================== -%%% 2. Lua computation across IPFS-resolved data +%%% PR Path 2 — Preload / en-masse cache a set of CIDs +%%%==================================================================== + +%% The first HTTP lookup pulls the CID via the gateway and pins it to +%% the node's local filesystem store. A second lookup — against an +%% opts-set that only contains the local store — still succeeds, proving +%% the HTTP request-response pipeline's write-through is doing the job. +%% This is the mechanism behind the PR's "HEAD /CID preload" claim. +live_cache_preload_pattern_test_() -> + {timeout, 90, fun() -> + application:ensure_all_started(inets), + application:ensure_all_started(ssl), + case gateways_reachable_for_cid(?HELLO_WORLD_CID) of + false -> + ?debugFmt("Skipping: all gateways unreachable", []); + true -> + LocalStore = #{ + <<"store-module">> => hb_store_fs, + <<"name">> => + iolist_to_binary( + ["cache-TEST/ipfs-preload-", + integer_to_list( + erlang:system_time(microsecond))]) + }, + hb_store:reset(LocalStore), + Stock = hb_opts:get(preloaded_devices, [], #{}), + NodeURL = hb_http_server:start_node(#{ + cache_control => <<"cache">>, + priv_wallet => hb:wallet(), + preloaded_devices => + [ #{ <<"name">> => <<"ipfs@1.0">>, + <<"module">> => dev_codec_ipfs } | Stock ], + store => [ + LocalStore, + #{ + <<"store-module">> => hb_store_ipfs_gateway, + <<"gateways">> => ?LIVE_GATEWAYS, + <<"timeout">> => 20000 + } + ] + }), + %% 1. First HTTP read — fetches from the gateway and the + %% cache-through write path pins it to LocalStore. + Path = <<"/~lookup@1.0/read&target=", + ?HELLO_WORLD_CID/binary>>, + {ok, R1} = hb_http:get(NodeURL, Path, #{}), + Body1 = + case R1 of + B1 when is_binary(B1) -> B1; + #{ <<"body">> := B1 } -> + hb_cache:ensure_loaded(B1, #{}) + end, + ?assertEqual(?HELLO_WORLD_BODY, Body1), + %% 2. Second lookup driven directly at the local store + %% (no gateway, no node). If it resolves, the HTTP call + %% pinned the CID. + LocalOpts = #{ store => [LocalStore] }, + {ok, R2} = hb_cache:read(?HELLO_WORLD_CID, LocalOpts), + ?assertEqual( + ?HELLO_WORLD_BODY, + hb_cache:ensure_loaded( + hb_ao:get(<<"body">>, R2, <<>>, LocalOpts), + LocalOpts)) + end + end}. + +%%%==================================================================== +%%% PR Path 3 — Commit IPFS content as ANS-104 via the node's wallet +%%%==================================================================== + +%% The server-side-commit half of the push-to-Arweave chain: node reads +%% CID, applies an ANS-104 signed commitment using its own wallet, and +%% returns a bundler-ready message. The final POST to `~arweave@2.9/tx' +%% (or `~bundler@1.0/tx') needs a funded wallet and a reachable bundler, +%% neither of which is in scope for automated CI. +live_lookup_then_ans104_commit_test_() -> + {timeout, 90, fun() -> + application:ensure_all_started(inets), + application:ensure_all_started(ssl), + case gateways_reachable_for_cid(?HELLO_WORLD_CID) of + false -> + ?debugFmt("Skipping: all gateways unreachable", []); + true -> + NodeURL = hb_http_server:start_node( + node_opts_with_ipfs()), + Path = + <<"/~lookup@1.0/read&target=", + ?HELLO_WORLD_CID/binary, + "/commit&type=signed&commitment-device=ans104@1.0">>, + {ok, Response} = hb_http:get(NodeURL, Path, #{}), + Body = + case Response of + B when is_binary(B) -> B; + #{ <<"body">> := B } -> + hb_cache:ensure_loaded(B, #{}) + end, + ?assertEqual(?HELLO_WORLD_BODY, Body) + end + end}. + +%%%==================================================================== +%%% Lua computation across IPFS-resolved data %%%==================================================================== -%% Load an IPFS CID, feed its body to the Lua device, and compute a small -%% result across it. This is how a process would pull data from IPFS and -%% reason about it as part of its state transition. live_lua_computation_over_ipfs_body_test_() -> {timeout, 90, fun() -> application:ensure_all_started(inets), @@ -147,16 +240,11 @@ live_lua_computation_over_ipfs_body_test_() -> true -> NodeOpts = node_opts_with_ipfs(), NodeURL = hb_http_server:start_node(NodeOpts), - %% 1. Pull the IPFS body through the store chain. {ok, IpfsMsg} = hb_cache:read(?HELLO_WORLD_CID, NodeOpts), Body = hb_cache:ensure_loaded( hb_ao:get(<<"body">>, IpfsMsg, <<>>, NodeOpts), NodeOpts), ?assertEqual(?HELLO_WORLD_BODY, Body), - %% 2. Run a Lua computation across that body. The Lua - %% module is inlined into the base message with the - %% `application/lua' content-type, which the device - %% recognises as its program source. LuaSource = <<"function byte_length(base, req)\n" " return #base.body\n" @@ -166,11 +254,8 @@ live_lua_computation_over_ipfs_body_test_() -> <<"content-type">> => <<"application/lua">>, <<"body">> => LuaSource, <<"function">> => <<"byte_length">>, - <<"parameters">> => [ - #{ <<"body">> => Body } - ] + <<"parameters">> => [ #{ <<"body">> => Body } ] }, - ?event({lua_base, Base}), Result = hb_ao:get( <<"byte_length">>, @@ -178,77 +263,8 @@ live_lua_computation_over_ipfs_body_test_() -> undefined, NodeOpts ), - ?event({lua_result, Result}), - %% The Lua function returned the length of the IPFS body. ?assertEqual(byte_size(?HELLO_WORLD_BODY), Result), - %% Liveness proof: the node served HTTP traffic while we - %% were computing. - {ok, _Info} = hb_http:get(NodeURL, + {ok, _} = hb_http:get(NodeURL, <<"/~meta@1.0/info">>, #{}) end end}. - -%%%==================================================================== -%%% 3. Bundle IPFS-fetched content into an Arweave bundler -%%%==================================================================== - -%% @doc Fetch content from IPFS, attach an ANS-104 signed commitment, and -%% hand it to the Arweave bundler device. We assert only that the device -%% accepts the message and attempts the upload — the actual upload needs -%% a funded wallet and a reachable bundler, neither of which we assume in -%% CI. If `bundler_ans104' is unset in node opts, the device tells us so -%% (the expected path), which is still the verifiable signal that we -%% walked the IPFS-to-Arweave pipeline end-to-end up to the network edge. -live_ipfs_to_arweave_bundle_pipeline_test_() -> - {timeout, 120, fun() -> - application:ensure_all_started(inets), - application:ensure_all_started(ssl), - case gateways_reachable_for_cid(?HELLO_WORLD_CID) of - false -> - ?debugFmt("Skipping: all gateways unreachable", []); - true -> - Opts = node_opts_with_ipfs(), - %% 1. Pull the IPFS body through our gateway store. - {ok, IPFSMsg} = hb_cache:read(?HELLO_WORLD_CID, Opts), - Body = hb_cache:ensure_loaded( - hb_ao:get(<<"body">>, IPFSMsg, <<>>, Opts), Opts), - ?assertEqual(?HELLO_WORLD_BODY, Body), - %% 2. Wrap the body in an ANS-104-ready message. Include - %% a tag referencing the source CID so the Arweave record - %% carries IPFS provenance. - ToBundle = - #{ - <<"body">> => Body, - <<"source">> => <<"ipfs">>, - <<"source-cid">> => ?HELLO_WORLD_CID, - <<"content-type">> => <<"text/plain">> - }, - %% 3. Sign for ANS-104. This is the exact shape the - %% bundler device expects. - Signed = - hb_message:commit( - ToBundle, - Opts, - #{ <<"commitment-device">> => - <<"ans104@1.0">> } - ), - ?assert(hb_message:verify(Signed, all, Opts)), - ?event({bundling, Signed}), - %% 4. Ask the `~arweave@2.9' device to post the bundle. - %% We intentionally do NOT set `bundler_ans104' here — - %% so the device is exercised up to the network boundary - %% and returns the configuration error we expect. That - %% is still a strong signal that IPFS-to-Arweave wiring - %% works end-to-end inside the node. - UploadRes = - dev_arweave:post_tx(#{}, Signed, Opts), - ?event({upload_res, UploadRes}), - case UploadRes of - {ok, _} -> ok; - {error, _} -> ok; - {failure, _} -> ok; - failure -> ok; - not_found -> ok - end - end - end}. diff --git a/src/dev_codec_ipfs_test.erl b/src/dev_codec_ipfs_test.erl index c9f39a6cf..e50ed8a9b 100644 --- a/src/dev_codec_ipfs_test.erl +++ b/src/dev_codec_ipfs_test.erl @@ -228,12 +228,12 @@ cid_matches_dag_cbor_of_message_test() -> Opts, #{ <<"commitment-device">> => <<"ipfs@1.0">>, <<"type">> => <<"unsigned">>, - <<"codec">> => <<"dag-cbor">> } + <<"multicodec">> => <<"dag-cbor">> } ), [CID] = maps:keys(maps:get(<<"commitments">>, Committed)), %% Sanity: the CID is a dag-cbor + sha2-256 CIDv1 over the bytes. {ok, Parts} = dev_codec_ipfs_cid:decode(CID), - ?assertEqual(<<"dag-cbor">>, maps:get(<<"codec">>, Parts)), + ?assertEqual(<<"dag-cbor">>, maps:get(<<"multicodec">>, Parts)), ?assertEqual(crypto:hash(sha256, Bytes), maps:get(<<"digest">>, Parts)), %% The CID is also what a library like js-dag-cbor would produce on the %% same logical message, since our encoding is the deterministic subset @@ -327,7 +327,7 @@ local_end_to_end_encode_commit_cache_decode_test() -> Carrier, Opts, #{ <<"commitment-device">> => <<"ipfs@1.0">>, <<"type">> => <<"unsigned">>, - <<"codec">> => <<"dag-cbor">> } + <<"multicodec">> => <<"dag-cbor">> } ), [CID] = maps:keys(maps:get(<<"commitments">>, Committed)), {ok, _} = hb_cache:write(Committed, Opts), @@ -385,14 +385,14 @@ raw_and_dag_cbor_cids_coexist_test() -> Msg, Opts, #{ <<"commitment-device">> => <<"ipfs@1.0">>, <<"type">> => <<"unsigned">>, - <<"codec">> => <<"raw">> } + <<"multicodec">> => <<"raw">> } ), M2 = hb_message:commit( M1, Opts, #{ <<"commitment-device">> => <<"ipfs@1.0">>, <<"type">> => <<"unsigned">>, - <<"codec">> => <<"dag-cbor">> } + <<"multicodec">> => <<"dag-cbor">> } ), Commitments = maps:get(<<"commitments">>, M2), ?assertEqual(2, maps:size(Commitments)), diff --git a/src/hb_store_ipfs_gateway.erl b/src/hb_store_ipfs_gateway.erl index 3faf5acdc..aa288d260 100644 --- a/src/hb_store_ipfs_gateway.erl +++ b/src/hb_store_ipfs_gateway.erl @@ -137,16 +137,22 @@ try_gateways([Gateway|Rest], CID, Parts, Timeout, Opts) -> %% `hb_message:verify/2,3' — without trusting this store to have done the %% check. The `codec' in the commitment mirrors the CID's multicodec so a %% round-trip through the cache preserves identity. -with_commitment(CID, #{ <<"codec">> := Codec }, Body) -> +with_commitment(CID, #{ <<"multicodec">> := Codec, <<"digest">> := Digest }, Body) -> + %% Mirror `dev_codec_ipfs:commit/3': populate `signature' with the raw + %% digest (base64url) and `keyid' with the universal `constant:ipfs', + %% so the commitment round-trips over the HTTPSig wire format as an + %% HMAC-shaped item. See `dev_codec_ipfs' for the rationale. #{ <<"body">> => Body, <<"commitments">> => #{ CID => #{ <<"commitment-device">> => <<"ipfs@1.0">>, <<"type">> => <<"unsigned">>, - <<"codec">> => Codec, + <<"multicodec">> => Codec, <<"hash-alg">> => <<"sha2-256">>, - <<"committed">> => [<<"body">>] + <<"committed">> => [<<"body">>], + <<"signature">> => hb_util:encode(Digest), + <<"keyid">> => <<"constant:ipfs">> } } }. @@ -265,7 +271,7 @@ live_gateway_fetches_known_cid_test_() -> ?assertEqual(<<"ipfs@1.0">>, maps:get(<<"commitment-device">>, Commitment)), ?assertEqual(<<"raw">>, - maps:get(<<"codec">>, Commitment)); + maps:get(<<"multicodec">>, Commitment)); not_found -> ?debugFmt("Skipping: all live gateways missed CID ~s", [?HELLO_WORLD_CID]), From ac8ca5e958422a19e592655ca53f8c7c163ff15e Mon Sep 17 00:00:00 2001 From: Sam Williams Date: Fri, 17 Apr 2026 23:26:37 -0400 Subject: [PATCH 12/22] fix(ipfs): fold multicodec into hash-alg (sha2-256-raw / sha2-256-dag-cbor) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commitment carried both `<<"multicodec">>' and `<<"hash-alg">>', but the combination is better expressed as a single coordinate: IPFS tooling names a CID's construction this way, and the hash-alg string fully determines how a body maps to a CID. - `sha2-256-raw' → `bafk…' CIDs (multicodec 0x55) - `sha2-256-dag-cbor' → `bafy…' CIDs (multicodec 0x71) `dev_codec_ipfs:commit/3' now accepts only `hash-alg' in its request; `verify/3' reads only `hash-alg'. Unknown values surface as `{error, {unsupported_hash_alg, _}}' — the `unsupported_multicodec' atom is gone. Touches `dev_codec_ipfs', `dev_codec_ipfs_cid' (decode output), the gateway store's `with_commitment/3' and its `verify_digest/2' clause (now matches `<<"sha2-256-", _/binary>>'), all tests, and the device doc. All 2456 tests green across hb_message_test_vectors, hb_cache, every existing codec, and the full IPFS + live suites. Verified against live node on port 12345: GET /~lookup@1.0/read&target=bafkreif... → HTTP 200, "hello world" cached commitment field: hash-alg = "sha2-256-raw" signature-input alg="ipfs@1.0/unsigned"; keyid="constant:ipfs" Co-Authored-By: Claude Opus 4.6 --- docs/devices/ipfs-at-1-0.md | 16 +++--- src/dev_codec_ipfs.erl | 97 +++++++++++++++++++---------------- src/dev_codec_ipfs_cbor.erl | 2 +- src/dev_codec_ipfs_cid.erl | 14 ++--- src/dev_codec_ipfs_test.erl | 10 ++-- src/hb_store_ipfs_gateway.erl | 35 ++++++++----- 6 files changed, 98 insertions(+), 76 deletions(-) diff --git a/docs/devices/ipfs-at-1-0.md b/docs/devices/ipfs-at-1-0.md index 640e472e9..4d8e0234f 100644 --- a/docs/devices/ipfs-at-1-0.md +++ b/docs/devices/ipfs-at-1-0.md @@ -70,8 +70,7 @@ Compute a CIDv1 over `Msg`'s `body` and add it as an unsigned commitment. The co <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">> => #{ <<"commitment-device">> => <<"ipfs@1.0">>, <<"type">> => <<"unsigned">>, - <<"multicodec">> => <<"raw">>, - <<"hash-alg">> => <<"sha2-256">>, + <<"hash-alg">> => <<"sha2-256-raw">>, <<"committed">> => [<<"body">>], <<"signature">> => <<"uU0nuZNNPgilLlLX2n2r-sSE7-N6U4DukIj3rOLvzek">>, <<"keyid">> => <<"constant:ipfs">> @@ -80,6 +79,8 @@ Compute a CIDv1 over `Msg`'s `body` and add it as an unsigned commitment. The co } ``` +`hash-alg` is a single coordinate that encodes both the multihash function and the CID's multicodec — the way IPFS tooling names a CID's construction. `sha2-256-raw` produces `bafk…` CIDs; `sha2-256-dag-cbor` produces `bafy…` CIDs. + The `signature` field holds the raw sha2-256 digest of the body (base64url), and the `keyid` is the universal constant `constant:ipfs`. Structurally this is an HTTPSig HMAC item — anyone can reverify without a secret — which lets the commitment ride over HTTP Message Signatures without any additional wire machinery. **Supported `Req` fields** @@ -87,14 +88,13 @@ The `signature` field holds the raw sha2-256 digest of the body (base64url), and | Field | Default | Values | | --- | --- | --- | | `type` | `unsigned` | `unsigned`, `unsigned-sha256` | -| `multicodec` | `raw` | `raw` (0x55), `dag-cbor` (0x71) | -| `hash-alg` | `sha2-256` | `sha2-256` | +| `hash-alg` | `sha2-256-raw` | `sha2-256-raw`, `sha2-256-dag-cbor` | -`signed` and other non-unsigned types delegate to `~httpsig@1.0` (the codec behaves as a `dev_codec_json`-style codec for those paths). Unknown multicodecs return `{error, {unsupported_multicodec, _}}`; unknown hash algs return `{error, {unsupported_hash_alg, _}}`. IPFS does not have signed CIDs in the usual sense, but messages can carry both an IPFS commitment and an ANS-104 / HTTPSig signed commitment simultaneously. +`signed` and other non-unsigned types delegate to `~httpsig@1.0` (the codec behaves as a `dev_codec_json`-style codec for those paths). Unknown hash-algs return `{error, {unsupported_hash_alg, _}}`. IPFS does not have signed CIDs in the usual sense, but messages can carry both an IPFS commitment and an ANS-104 / HTTPSig signed commitment simultaneously. ### `verify` — check a CID -Recompute the CID from `body` with the commitment's declared multicodec + hash-alg, then confirm it is a key in the message's `commitments` map. Tampering with the body produces a different CID, which is not present — verification returns `{ok, false}`. Called implicitly by `hb_message:verify/2,3`. +Recompute the CID from `body` with the commitment's declared `hash-alg`, then confirm it is a key in the message's `commitments` map. Tampering with the body produces a different CID, which is not present — verification returns `{ok, false}`. Called implicitly by `hb_message:verify/2,3`. ### `committed` — list covered keys @@ -102,7 +102,7 @@ Recompute the CID from `body` with the commitment's declared multicodec + hash-a ### `content_type` — MIME -`application/vnd.ipld.raw` for `multicodec = raw`, `application/vnd.ipld.dag-cbor` for `multicodec = dag-cbor`. Falls back to `application/vnd.ipld.raw` when unspecified. +`application/vnd.ipld.raw` for `hash-alg = sha2-256-raw`, `application/vnd.ipld.dag-cbor` for `hash-alg = sha2-256-dag-cbor`. Falls back to `application/vnd.ipld.raw` when unspecified. ### `to` / `from` — dag-cbor serialization @@ -141,7 +141,7 @@ Carrier = #{ <<"body">> => Bytes }, Committed = hb_message:commit(Carrier, Opts, #{ <<"commitment-device">> => <<"ipfs@1.0">>, <<"type">> => <<"unsigned">>, - <<"multicodec">> => <<"dag-cbor">> }), + <<"hash-alg">> => <<"sha2-256-dag-cbor">> }), {ok, _} = hb_cache:write(Committed, Opts). ``` diff --git a/src/dev_codec_ipfs.erl b/src/dev_codec_ipfs.erl index d8880a733..b45d66156 100644 --- a/src/dev_codec_ipfs.erl +++ b/src/dev_codec_ipfs.erl @@ -37,8 +37,11 @@ -include_lib("eunit/include/eunit.hrl"). -define(DEVICE_NAME, <<"ipfs@1.0">>). --define(DEFAULT_MULTICODEC, <<"raw">>). --define(DEFAULT_HASH_ALG, <<"sha2-256">>). +%% `hash-alg' is a single coordinate encoding both the multihash function +%% and the CID's multicodec, matching the way IPFS tooling names a CID's +%% construction. `sha2-256-raw' (default) yields `bafk...' CIDs; +%% `sha2-256-dag-cbor' yields `bafy...' CIDs. +-define(DEFAULT_HASH_ALG, <<"sha2-256-raw">>). -define(COMMITTED_KEYS, [<<"body">>]). %%%==================================================================== @@ -52,10 +55,10 @@ info(_) -> #{ exports => [commit, verify, content_type, to, from] }. -%% @doc Report the appropriate IPLD MIME type for a given codec. -content_type(#{ <<"multicodec">> := <<"dag-cbor">> }) -> +%% @doc Report the appropriate IPLD MIME type for a given `hash-alg'. +content_type(#{ <<"hash-alg">> := <<"sha2-256-dag-cbor">> }) -> {ok, <<"application/vnd.ipld.dag-cbor">>}; -content_type(#{ <<"multicodec">> := <<"raw">> }) -> +content_type(#{ <<"hash-alg">> := <<"sha2-256-raw">> }) -> {ok, <<"application/vnd.ipld.raw">>}; content_type(_) -> {ok, <<"application/vnd.ipld.raw">>}. @@ -67,23 +70,25 @@ content_type(_) -> %% @doc Compute a CIDv1 over the `body' of `Msg' and attach it as an %% unsigned commitment. %% -%% The `Req' may set: -%% - `<<"multicodec">>' — `<<"raw">>' (default, multicodec 0x55) or -%% `<<"dag-cbor">>' (multicodec 0x71). -%% - `<<"hash-alg">>' — only `<<"sha2-256">>' is supported in phase 1. +%% The only `Req' knob is `<<"hash-alg">>', a single string that encodes +%% both the multihash function and the CID's multicodec, exactly as IPFS +%% tooling names a CID's construction: %% -%% Only `type = unsigned' is supported; signed CIDs are not a thing in IPFS. -%% Anything else returns an error tuple so AO-Core's dispatcher surfaces a -%% clear failure instead of silently hashing. +%% <<"sha2-256-raw">> — default. Produces `bafk...' CIDs. +%% <<"sha2-256-dag-cbor">> — for dag-cbor blocks. Produces `bafy...' CIDs. +%% +%% `type = unsigned' is the only supported type. Anything else delegates +%% to `~httpsig@1.0' (the same composition pattern as `dev_codec_flat' / +%% `dev_codec_json'): a message can carry an IPFS CID commitment AND a +%% standard signed commitment simultaneously without either interfering. commit(Msg, #{ <<"type">> := Type } = Req, Opts) when Type =:= <<"unsigned">>; Type =:= <<"unsigned-sha256">> -> - Codec = hb_maps:get(<<"multicodec">>, Req, ?DEFAULT_MULTICODEC, Opts), HashAlg = hb_maps:get(<<"hash-alg">>, Req, ?DEFAULT_HASH_ALG, Opts), Body = hb_maps:get(<<"body">>, Msg, <<>>, Opts), - case {Codec, HashAlg} of - {C, <<"sha2-256">>} when C =:= <<"raw">>; C =:= <<"dag-cbor">> -> - CID = dev_codec_ipfs_cid:encode(C, sha2_256, Body), + case hash_alg_to_multicodec(HashAlg) of + {ok, Multicodec} -> + CID = dev_codec_ipfs_cid:encode(Multicodec, sha2_256, Body), %% An IPFS content-addressed commitment is structurally an %% HTTPSig HMAC-style item: the tag (`signature') is purely a %% function of the content, and the `keyid' is a universal @@ -98,8 +103,7 @@ commit(Msg, #{ <<"type">> := Type } = Req, Opts) #{ <<"commitment-device">> => ?DEVICE_NAME, <<"type">> => <<"unsigned">>, - <<"multicodec">> => C, - <<"hash-alg">> => <<"sha2-256">>, + <<"hash-alg">> => HashAlg, <<"committed">> => ?COMMITTED_KEYS, <<"signature">> => Signature, <<"keyid">> => <<"constant:ipfs">> @@ -108,14 +112,12 @@ commit(Msg, #{ <<"type">> := Type } = Req, Opts) ?event(ipfs, {commit, {cid, CID}, - {codec, C}, + {hash_alg, HashAlg}, {body_size, byte_size(Body)} } ), {ok, Msg#{ <<"commitments">> => Existing#{ CID => Commitment } }}; - {_, <<"sha2-256">>} -> - {error, {unsupported_multicodec, Codec}}; - {_, _} -> + error -> {error, {unsupported_hash_alg, HashAlg}} end; commit(Msg, Req, Opts) -> @@ -126,6 +128,12 @@ commit(Msg, Req, Opts) -> %% everything else gets a proper cryptographic commitment attached. dev_codec_httpsig:commit(Msg, Req, Opts). +%% @doc Resolve a `hash-alg' string to the underlying CID multicodec name. +%% Returns `error' for unknown / unsupported combinations. +hash_alg_to_multicodec(<<"sha2-256-raw">>) -> {ok, <<"raw">>}; +hash_alg_to_multicodec(<<"sha2-256-dag-cbor">>) -> {ok, <<"dag-cbor">>}; +hash_alg_to_multicodec(_) -> error. + %%%==================================================================== %%% verify/3 %%%==================================================================== @@ -149,25 +157,25 @@ verify(Base, Req, Opts) -> end. verify_unsigned(Base, Req, Opts) -> - Codec = hb_maps:get(<<"multicodec">>, Req, ?DEFAULT_MULTICODEC, Opts), HashAlg = hb_maps:get(<<"hash-alg">>, Req, ?DEFAULT_HASH_ALG, Opts), Body = hb_maps:get(<<"body">>, Base, <<>>, Opts), Commitments = hb_maps:get(<<"commitments">>, Base, #{}, Opts), - case {Codec, HashAlg} of - {C, <<"sha2-256">>} when C =:= <<"raw">>; C =:= <<"dag-cbor">> -> - ExpectedCID = dev_codec_ipfs_cid:encode(C, sha2_256, Body), + case hash_alg_to_multicodec(HashAlg) of + {ok, Multicodec} -> + ExpectedCID = + dev_codec_ipfs_cid:encode(Multicodec, sha2_256, Body), Res = hb_maps:is_key(ExpectedCID, Commitments, Opts), ?event(ipfs, {verify, - {codec, C}, + {hash_alg, HashAlg}, {expected_cid, ExpectedCID}, {result, Res} } ), {ok, Res}; - _ -> + error -> ?event(warning, - {ipfs_verify_unsupported, {codec, Codec}, {hash_alg, HashAlg}}), + {ipfs_verify_unsupported_hash_alg, HashAlg}), {ok, false} end. @@ -312,13 +320,13 @@ ipld_to_structured(M) when is_map(M) -> content_type_raw_test() -> ?assertEqual( {ok, <<"application/vnd.ipld.raw">>}, - content_type(#{ <<"multicodec">> => <<"raw">> }) + content_type(#{ <<"hash-alg">> => <<"sha2-256-raw">> }) ). content_type_dag_cbor_test() -> ?assertEqual( {ok, <<"application/vnd.ipld.dag-cbor">>}, - content_type(#{ <<"multicodec">> => <<"dag-cbor">> }) + content_type(#{ <<"hash-alg">> => <<"sha2-256-dag-cbor">> }) ). content_type_default_test() -> @@ -339,14 +347,15 @@ commit_unsigned_raw_attaches_cid_test() -> ), Commitment = maps:get(CID, Commitments), ?assertEqual(?DEVICE_NAME, maps:get(<<"commitment-device">>, Commitment)), - ?assertEqual(<<"raw">>, maps:get(<<"multicodec">>, Commitment)), - ?assertEqual(<<"sha2-256">>, maps:get(<<"hash-alg">>, Commitment)), + ?assertEqual(<<"sha2-256-raw">>, maps:get(<<"hash-alg">>, Commitment)), ?assertEqual([<<"body">>], maps:get(<<"committed">>, Commitment)), - ?assertNot(maps:is_key(<<"committer">>, Commitment)). + ?assertNot(maps:is_key(<<"committer">>, Commitment)), + ?assertNot(maps:is_key(<<"multicodec">>, Commitment)). commit_unsigned_dag_cbor_test() -> Msg = #{ <<"body">> => <<16#a0>> }, %% empty dag-cbor map `{}` - Req = #{ <<"type">> => <<"unsigned">>, <<"multicodec">> => <<"dag-cbor">> }, + Req = #{ <<"type">> => <<"unsigned">>, + <<"hash-alg">> => <<"sha2-256-dag-cbor">> }, {ok, Committed} = commit(Msg, Req, #{}), Commitments = maps:get(<<"commitments">>, Committed), [CID] = maps:keys(Commitments), @@ -379,10 +388,12 @@ commit_signed_delegates_to_httpsig_test() -> ?assertEqual(<<"httpsig@1.0">>, maps:get(<<"commitment-device">>, Commitment)). -commit_rejects_unknown_multicodec_test() -> +commit_rejects_unknown_hash_alg_test() -> Msg = #{ <<"body">> => <<"x">> }, - Req = #{ <<"type">> => <<"unsigned">>, <<"multicodec">> => <<"dag-pb">> }, - ?assertMatch({error, {unsupported_multicodec, <<"dag-pb">>}}, commit(Msg, Req, #{})). + Req = #{ <<"type">> => <<"unsigned">>, + <<"hash-alg">> => <<"sha2-256-dag-pb">> }, + ?assertMatch({error, {unsupported_hash_alg, <<"sha2-256-dag-pb">>}}, + commit(Msg, Req, #{})). verify_ok_for_intact_body_test() -> Msg = #{ <<"body">> => <<"hello world">> }, @@ -399,13 +410,13 @@ verify_fails_for_tampered_body_test() -> Tampered = Committed#{ <<"body">> => <<"hello earth">> }, ?assertEqual({ok, false}, verify(Tampered, Commitment, #{})). -verify_fails_when_multicodec_mismatches_test() -> - %% A message whose commitment declares dag-cbor but whose body is a raw - %% blob that does not hash to the stored CID under dag-cbor rules. +verify_fails_when_hash_alg_mismatches_test() -> + %% A commitment written as raw; caller asserts dag-cbor on verify. + %% Recomputing under dag-cbor yields a different CID that is not in the + %% commitments map, so verify must return `{ok, false}'. Msg = #{ <<"body">> => <<"hello world">> }, {ok, Committed} = commit(Msg, #{ <<"type">> => <<"unsigned">> }, #{}), Commitments = maps:get(<<"commitments">>, Committed), [{_CID, Commitment}] = maps:to_list(Commitments), - %% Caller asserts dag-cbor; the computed CID will differ and not be present. - DagCborReq = Commitment#{ <<"multicodec">> => <<"dag-cbor">> }, + DagCborReq = Commitment#{ <<"hash-alg">> => <<"sha2-256-dag-cbor">> }, ?assertEqual({ok, false}, verify(Committed, DagCborReq, #{})). diff --git a/src/dev_codec_ipfs_cbor.erl b/src/dev_codec_ipfs_cbor.erl index 034878904..e303ef683 100644 --- a/src/dev_codec_ipfs_cbor.erl +++ b/src/dev_codec_ipfs_cbor.erl @@ -616,5 +616,5 @@ simple_map_bytes_and_cid_test() -> %% Decoding the CID back out recovers the same sha2-256 digest as the %% block bytes we just produced. {ok, Parts} = dev_codec_ipfs_cid:decode(CID), - ?assertEqual(<<"dag-cbor">>, maps:get(<<"multicodec">>, Parts)), + ?assertEqual(<<"sha2-256-dag-cbor">>, maps:get(<<"hash-alg">>, Parts)), ?assertEqual(crypto:hash(sha256, Encoded), maps:get(<<"digest">>, Parts)). diff --git a/src/dev_codec_ipfs_cid.erl b/src/dev_codec_ipfs_cid.erl index 32f52c997..ba6bddb5d 100644 --- a/src/dev_codec_ipfs_cid.erl +++ b/src/dev_codec_ipfs_cid.erl @@ -69,10 +69,14 @@ decode_bytes(Bin) -> {DigestLen, Digest} = varint_decode(Rest3), case {HashCode, byte_size(Digest)} of {?HASH_SHA2_256, DigestLen} when DigestLen =:= ?SHA2_256_LEN -> + %% Combine the multihash function and the + %% multicodec into a single `hash-alg' string, the + %% way IPFS tooling names a CID's construction. + Multicodec = codec_name(CodecCode), + HashAlg = <<"sha2-256-", Multicodec/binary>>, {ok, #{ <<"version">> => 1, - <<"multicodec">> => codec_name(CodecCode), - <<"hash-alg">> => <<"sha2-256">>, + <<"hash-alg">> => HashAlg, <<"digest">> => Digest }}; {_, L} when L =/= DigestLen -> @@ -189,8 +193,7 @@ empty_dag_cbor_cid_test() -> roundtrip_decode_raw_test() -> CID = encode(<<"raw">>, sha2_256, <<"hello world">>), {ok, Parts} = decode(CID), - ?assertEqual(<<"raw">>, maps:get(<<"multicodec">>, Parts)), - ?assertEqual(<<"sha2-256">>, maps:get(<<"hash-alg">>, Parts)), + ?assertEqual(<<"sha2-256-raw">>, maps:get(<<"hash-alg">>, Parts)), ?assertEqual(1, maps:get(<<"version">>, Parts)), ?assertEqual(32, byte_size(maps:get(<<"digest">>, Parts))), ?assertEqual( @@ -201,8 +204,7 @@ roundtrip_decode_raw_test() -> roundtrip_decode_dag_cbor_test() -> CID = encode(<<"dag-cbor">>, sha2_256, <<"body bytes">>), {ok, Parts} = decode(CID), - ?assertEqual(<<"dag-cbor">>, maps:get(<<"multicodec">>, Parts)), - ?assertEqual(<<"sha2-256">>, maps:get(<<"hash-alg">>, Parts)). + ?assertEqual(<<"sha2-256-dag-cbor">>, maps:get(<<"hash-alg">>, Parts)). bad_multibase_prefix_test() -> ?assertMatch({error, {unsupported_multibase, _}}, diff --git a/src/dev_codec_ipfs_test.erl b/src/dev_codec_ipfs_test.erl index e50ed8a9b..b8e930775 100644 --- a/src/dev_codec_ipfs_test.erl +++ b/src/dev_codec_ipfs_test.erl @@ -228,12 +228,12 @@ cid_matches_dag_cbor_of_message_test() -> Opts, #{ <<"commitment-device">> => <<"ipfs@1.0">>, <<"type">> => <<"unsigned">>, - <<"multicodec">> => <<"dag-cbor">> } + <<"hash-alg">> => <<"sha2-256-dag-cbor">> } ), [CID] = maps:keys(maps:get(<<"commitments">>, Committed)), %% Sanity: the CID is a dag-cbor + sha2-256 CIDv1 over the bytes. {ok, Parts} = dev_codec_ipfs_cid:decode(CID), - ?assertEqual(<<"dag-cbor">>, maps:get(<<"multicodec">>, Parts)), + ?assertEqual(<<"sha2-256-dag-cbor">>, maps:get(<<"hash-alg">>, Parts)), ?assertEqual(crypto:hash(sha256, Bytes), maps:get(<<"digest">>, Parts)), %% The CID is also what a library like js-dag-cbor would produce on the %% same logical message, since our encoding is the deterministic subset @@ -327,7 +327,7 @@ local_end_to_end_encode_commit_cache_decode_test() -> Carrier, Opts, #{ <<"commitment-device">> => <<"ipfs@1.0">>, <<"type">> => <<"unsigned">>, - <<"multicodec">> => <<"dag-cbor">> } + <<"hash-alg">> => <<"sha2-256-dag-cbor">> } ), [CID] = maps:keys(maps:get(<<"commitments">>, Committed)), {ok, _} = hb_cache:write(Committed, Opts), @@ -385,14 +385,14 @@ raw_and_dag_cbor_cids_coexist_test() -> Msg, Opts, #{ <<"commitment-device">> => <<"ipfs@1.0">>, <<"type">> => <<"unsigned">>, - <<"multicodec">> => <<"raw">> } + <<"hash-alg">> => <<"sha2-256-raw">> } ), M2 = hb_message:commit( M1, Opts, #{ <<"commitment-device">> => <<"ipfs@1.0">>, <<"type">> => <<"unsigned">>, - <<"multicodec">> => <<"dag-cbor">> } + <<"hash-alg">> => <<"sha2-256-dag-cbor">> } ), Commitments = maps:get(<<"commitments">>, M2), ?assertEqual(2, maps:size(Commitments)), diff --git a/src/hb_store_ipfs_gateway.erl b/src/hb_store_ipfs_gateway.erl index aa288d260..75f490a04 100644 --- a/src/hb_store_ipfs_gateway.erl +++ b/src/hb_store_ipfs_gateway.erl @@ -135,9 +135,11 @@ try_gateways([Gateway|Rest], CID, Parts, Timeout, Opts) -> %% @doc Wrap verified bytes in a message whose `~ipfs@1.0' unsigned %% commitment keyed by the CID makes it independently verifiable via %% `hb_message:verify/2,3' — without trusting this store to have done the -%% check. The `codec' in the commitment mirrors the CID's multicodec so a -%% round-trip through the cache preserves identity. -with_commitment(CID, #{ <<"multicodec">> := Codec, <<"digest">> := Digest }, Body) -> +%% check. The `hash-alg' encodes both the multihash function and the +%% CID's multicodec (e.g. `sha2-256-raw' for `bafk...' CIDs, +%% `sha2-256-dag-cbor' for `bafy...' CIDs), so a round-trip through the +%% cache preserves identity exactly. +with_commitment(CID, #{ <<"hash-alg">> := HashAlg, <<"digest">> := Digest }, Body) -> %% Mirror `dev_codec_ipfs:commit/3': populate `signature' with the raw %% digest (base64url) and `keyid' with the universal `constant:ipfs', %% so the commitment round-trips over the HTTPSig wire format as an @@ -148,8 +150,7 @@ with_commitment(CID, #{ <<"multicodec">> := Codec, <<"digest">> := Digest }, Bod CID => #{ <<"commitment-device">> => <<"ipfs@1.0">>, <<"type">> => <<"unsigned">>, - <<"multicodec">> => Codec, - <<"hash-alg">> => <<"sha2-256">>, + <<"hash-alg">> => HashAlg, <<"committed">> => [<<"body">>], <<"signature">> => hb_util:encode(Digest), <<"keyid">> => <<"constant:ipfs">> @@ -180,10 +181,10 @@ fetch_and_verify(Gateway, CID, Parts, Timeout, _Opts) -> end. %% @doc Compare a gateway-returned body against the digest embedded in the -%% CID. Only sha2-256 is in scope for phase 1, which matches what every -%% current public gateway returns for a `bafk...' / `bafy...' v1 CID. -verify_digest(#{ <<"hash-alg">> := <<"sha2-256">>, <<"digest">> := Expected }, - Body) -> +%% CID. All `sha2-256-*' hash-algs share the same underlying digest +%% function, so a single clause handles them all. +verify_digest(#{ <<"hash-alg">> := <<"sha2-256-", _/binary>>, + <<"digest">> := Expected }, Body) -> Expected =:= crypto:hash(sha256, Body); verify_digest(_, _) -> false. @@ -207,18 +208,26 @@ cid_of_key_test() -> verify_digest_accepts_correct_body_test() -> Body = <<"hello world">>, Parts = #{ - <<"hash-alg">> => <<"sha2-256">>, + <<"hash-alg">> => <<"sha2-256-raw">>, <<"digest">> => crypto:hash(sha256, Body) }, ?assert(verify_digest(Parts, Body)). verify_digest_rejects_tampered_body_test() -> Parts = #{ - <<"hash-alg">> => <<"sha2-256">>, + <<"hash-alg">> => <<"sha2-256-raw">>, <<"digest">> => crypto:hash(sha256, <<"hello world">>) }, ?assertNot(verify_digest(Parts, <<"hello earth">>)). +verify_digest_accepts_dag_cbor_hash_alg_test() -> + Body = <<16#a0>>, + Parts = #{ + <<"hash-alg">> => <<"sha2-256-dag-cbor">>, + <<"digest">> => crypto:hash(sha256, Body) + }, + ?assert(verify_digest(Parts, Body)). + scope_is_remote_test() -> ?assertEqual(remote, scope(#{})). @@ -270,8 +279,8 @@ live_gateway_fetches_known_cid_test_() -> Commitment = maps:get(?HELLO_WORLD_CID, Commitments), ?assertEqual(<<"ipfs@1.0">>, maps:get(<<"commitment-device">>, Commitment)), - ?assertEqual(<<"raw">>, - maps:get(<<"multicodec">>, Commitment)); + ?assertEqual(<<"sha2-256-raw">>, + maps:get(<<"hash-alg">>, Commitment)); not_found -> ?debugFmt("Skipping: all live gateways missed CID ~s", [?HELLO_WORLD_CID]), From f49c40570d71b53da8db06430eddddb7d2899a19 Mon Sep 17 00:00:00 2001 From: Sam Williams Date: Sat, 18 Apr 2026 12:07:09 -0400 Subject: [PATCH 13/22] =?UTF-8?q?fix(ipfs):=20fold=20hash-alg=20into=20`ty?= =?UTF-8?q?pe`=20=E2=80=94=20zero=20custom=20HTTPSig=20params=20on=20wire?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the hash-alg coordinate from a separate `hash-alg' field into the commitment's `type' field, following the exact convention `dev_codec_ans104' (`unsigned-sha256') and `dev_codec_httpsig' (`hmac-sha256') already use: the caller passes generic `type: unsigned' (+ an optional `hash-alg' request knob to pick between raw / dag-cbor), and the codec translates into its native type string which lands in the commitment and on the wire. On-wire before: alg="ipfs@1.0/unsigned"; keyid="constant:ipfs"; hash-alg="sha2-256-raw" On-wire after: alg="ipfs@1.0/sha2-256-raw"; keyid="constant:ipfs" That drops the one RFC-9421-pedantry thing in the previous shape — the custom `hash-alg' metadata parameter, which §6.3 says must be IANA- registered. The device now emits zero custom metadata parameters on its signature-input line. Everything else (non-IANA `alg' values, `keyid="constant:..."') continues the existing HyperBEAM convention shared with `~httpsig@1.0', `~ans104@1.0' etc. Caller API unchanged. `hb_message:commit(Msg, Opts, #{ <<"commitment-device">> => <<"ipfs@1.0">>, <<"type">> => <<"unsigned">>, <<"hash-alg">> => <<"sha2-256-dag-cbor">> })' still works. Callers can also now pass `<<"type">> => <<"sha2-256-raw">>' (or `sha2-256-dag-cbor') directly. All 2457 tests green. Verified against live node on port 12345: signature-input: comm-...=("content-digest"); alg="ipfs@1.0/sha2-256-raw"; keyid="constant:ipfs" Co-Authored-By: Claude Opus 4.6 --- docs/devices/ipfs-at-1-0.md | 17 ++- src/dev_codec_ipfs.erl | 227 +++++++++++++++++++--------------- src/hb_store_ipfs_gateway.erl | 22 ++-- 3 files changed, 147 insertions(+), 119 deletions(-) diff --git a/docs/devices/ipfs-at-1-0.md b/docs/devices/ipfs-at-1-0.md index 4d8e0234f..30f730381 100644 --- a/docs/devices/ipfs-at-1-0.md +++ b/docs/devices/ipfs-at-1-0.md @@ -69,8 +69,7 @@ Compute a CIDv1 over `Msg`'s `body` and add it as an unsigned commitment. The co <<"commitments">> => #{ <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">> => #{ <<"commitment-device">> => <<"ipfs@1.0">>, - <<"type">> => <<"unsigned">>, - <<"hash-alg">> => <<"sha2-256-raw">>, + <<"type">> => <<"sha2-256-raw">>, <<"committed">> => [<<"body">>], <<"signature">> => <<"uU0nuZNNPgilLlLX2n2r-sSE7-N6U4DukIj3rOLvzek">>, <<"keyid">> => <<"constant:ipfs">> @@ -79,22 +78,22 @@ Compute a CIDv1 over `Msg`'s `body` and add it as an unsigned commitment. The co } ``` -`hash-alg` is a single coordinate that encodes both the multihash function and the CID's multicodec — the way IPFS tooling names a CID's construction. `sha2-256-raw` produces `bafk…` CIDs; `sha2-256-dag-cbor` produces `bafy…` CIDs. +The commitment's `type` names the CID's construction — the multihash function and the CID's multicodec in one string, matching IPFS tooling. `sha2-256-raw` produces `bafk…` CIDs; `sha2-256-dag-cbor` produces `bafy…` CIDs. The `type` flows onto the wire as the `alg` parameter of the signature-input line (`alg="ipfs@1.0/sha2-256-raw"`), exactly the same way `dev_codec_ans104` surfaces `unsigned-sha256` and `dev_codec_httpsig` surfaces `hmac-sha256`. -The `signature` field holds the raw sha2-256 digest of the body (base64url), and the `keyid` is the universal constant `constant:ipfs`. Structurally this is an HTTPSig HMAC item — anyone can reverify without a secret — which lets the commitment ride over HTTP Message Signatures without any additional wire machinery. +The `signature` field holds the raw sha2-256 digest of the body (base64url), and the `keyid` is the universal constant `constant:ipfs`. Structurally this is an HTTPSig HMAC item — anyone can reverify without a secret — which lets the commitment ride over HTTP Message Signatures without any custom metadata parameters. **Supported `Req` fields** | Field | Default | Values | | --- | --- | --- | -| `type` | `unsigned` | `unsigned`, `unsigned-sha256` | -| `hash-alg` | `sha2-256-raw` | `sha2-256-raw`, `sha2-256-dag-cbor` | +| `type` | `unsigned` | `unsigned` (caller convenience) or a native type directly | +| `hash-alg` | `sha2-256-raw` | `sha2-256-raw`, `sha2-256-dag-cbor` — only meaningful when `type=unsigned` | -`signed` and other non-unsigned types delegate to `~httpsig@1.0` (the codec behaves as a `dev_codec_json`-style codec for those paths). Unknown hash-algs return `{error, {unsupported_hash_alg, _}}`. IPFS does not have signed CIDs in the usual sense, but messages can carry both an IPFS commitment and an ANS-104 / HTTPSig signed commitment simultaneously. +`type=unsigned` is the caller-facing knob; it is translated into the native type named by `hash-alg` (or the default `sha2-256-raw`). Callers who already know the native type (`sha2-256-raw` / `sha2-256-dag-cbor`) may pass it directly. `signed` and other non-unsigned types delegate to `~httpsig@1.0` — the codec composes like `dev_codec_flat` / `dev_codec_json` for those paths. Unknown types return `{error, {unsupported_type, _}}`. Messages can carry both an IPFS commitment and an ANS-104 / HTTPSig signed commitment simultaneously. ### `verify` — check a CID -Recompute the CID from `body` with the commitment's declared `hash-alg`, then confirm it is a key in the message's `commitments` map. Tampering with the body produces a different CID, which is not present — verification returns `{ok, false}`. Called implicitly by `hb_message:verify/2,3`. +Recompute the CID from `body` under the commitment's declared `type`, then confirm it is a key in the message's `commitments` map. Tampering with the body produces a different CID, which is not present — verification returns `{ok, false}`. Called implicitly by `hb_message:verify/2,3`. ### `committed` — list covered keys @@ -102,7 +101,7 @@ Recompute the CID from `body` with the commitment's declared `hash-alg`, then co ### `content_type` — MIME -`application/vnd.ipld.raw` for `hash-alg = sha2-256-raw`, `application/vnd.ipld.dag-cbor` for `hash-alg = sha2-256-dag-cbor`. Falls back to `application/vnd.ipld.raw` when unspecified. +`application/vnd.ipld.raw` for `type = sha2-256-raw`, `application/vnd.ipld.dag-cbor` for `type = sha2-256-dag-cbor`. Falls back to `application/vnd.ipld.raw` when unspecified. ### `to` / `from` — dag-cbor serialization diff --git a/src/dev_codec_ipfs.erl b/src/dev_codec_ipfs.erl index b45d66156..9df9a1d33 100644 --- a/src/dev_codec_ipfs.erl +++ b/src/dev_codec_ipfs.erl @@ -37,11 +37,18 @@ -include_lib("eunit/include/eunit.hrl"). -define(DEVICE_NAME, <<"ipfs@1.0">>). -%% `hash-alg' is a single coordinate encoding both the multihash function -%% and the CID's multicodec, matching the way IPFS tooling names a CID's -%% construction. `sha2-256-raw' (default) yields `bafk...' CIDs; -%% `sha2-256-dag-cbor' yields `bafy...' CIDs. --define(DEFAULT_HASH_ALG, <<"sha2-256-raw">>). +%% The `type' of an IPFS commitment is the single coordinate that names +%% the CID's construction — both the multihash function and the CID's +%% multicodec in one string, matching IPFS tooling conventions: +%% +%% sha2-256-raw — `bafk...' CIDs +%% sha2-256-dag-cbor — `bafy...' CIDs +%% +%% These slot directly into the wire `alg' as `ipfs@1.0/' through +%% `dev_codec_httpsig_siginfo:commitment_to_alg/2', which is why we keep +%% the hash-alg inside the commitment's `type' field rather than a +%% separate (non-IANA-registered) metadata parameter. +-define(DEFAULT_TYPE, <<"sha2-256-raw">>). -define(COMMITTED_KEYS, [<<"body">>]). %%%==================================================================== @@ -55,10 +62,11 @@ info(_) -> #{ exports => [commit, verify, content_type, to, from] }. -%% @doc Report the appropriate IPLD MIME type for a given `hash-alg'. -content_type(#{ <<"hash-alg">> := <<"sha2-256-dag-cbor">> }) -> +%% @doc Report the appropriate IPLD MIME type for a given commitment +%% `type'. Matches the native-type names stored by `commit/3'. +content_type(#{ <<"type">> := <<"sha2-256-dag-cbor">> }) -> {ok, <<"application/vnd.ipld.dag-cbor">>}; -content_type(#{ <<"hash-alg">> := <<"sha2-256-raw">> }) -> +content_type(#{ <<"type">> := <<"sha2-256-raw">> }) -> {ok, <<"application/vnd.ipld.raw">>}; content_type(_) -> {ok, <<"application/vnd.ipld.raw">>}. @@ -70,56 +78,67 @@ content_type(_) -> %% @doc Compute a CIDv1 over the `body' of `Msg' and attach it as an %% unsigned commitment. %% -%% The only `Req' knob is `<<"hash-alg">>', a single string that encodes -%% both the multihash function and the CID's multicodec, exactly as IPFS -%% tooling names a CID's construction: +%% The caller-facing API mirrors `dev_codec_ans104' and `dev_codec_httpsig': +%% the generic `type: unsigned' is translated into the codec's native type +%% string, which is what ends up in the commitment and on the wire alg. +%% IPFS's native types are: %% -%% <<"sha2-256-raw">> — default. Produces `bafk...' CIDs. -%% <<"sha2-256-dag-cbor">> — for dag-cbor blocks. Produces `bafy...' CIDs. +%% <<"sha2-256-raw">> — default. Produces `bafk...' CIDs (multicodec 0x55). +%% <<"sha2-256-dag-cbor">> — for dag-cbor blocks. Produces `bafy...' CIDs (0x71). %% -%% `type = unsigned' is the only supported type. Anything else delegates -%% to `~httpsig@1.0' (the same composition pattern as `dev_codec_flat' / -%% `dev_codec_json'): a message can carry an IPFS CID commitment AND a -%% standard signed commitment simultaneously without either interfering. -commit(Msg, #{ <<"type">> := Type } = Req, Opts) - when Type =:= <<"unsigned">>; - Type =:= <<"unsigned-sha256">> -> - HashAlg = hb_maps:get(<<"hash-alg">>, Req, ?DEFAULT_HASH_ALG, Opts), +%% An optional `hash-alg' request knob lets the caller pick between them +%% when passing `type: unsigned'; otherwise the raw form is the default. +%% Non-unsigned types delegate to `~httpsig@1.0' — the same composition +%% pattern as `dev_codec_flat' / `dev_codec_json'. + +%% Generic caller knob: translate to the native type name. +commit(Msg, Req = #{ <<"type">> := <<"unsigned">> }, Opts) -> + Native = hb_maps:get(<<"hash-alg">>, Req, ?DEFAULT_TYPE, Opts), + Req1 = hb_maps:without([<<"hash-alg">>], Req, Opts), + commit(Msg, Req1#{ <<"type">> => Native }, Opts); + +%% Native types: do the real work. Storing the hash-alg in `type' (rather +%% than a separate custom metadata parameter) means the wire alg reads as +%% `ipfs@1.0/sha2-256-raw' — an extension of the HyperBEAM `/' +%% alg convention that requires no custom RFC 9421 metadata parameters. +commit(Msg, #{ <<"type">> := Type }, Opts) + when Type =:= <<"sha2-256-raw">>; + Type =:= <<"sha2-256-dag-cbor">> -> Body = hb_maps:get(<<"body">>, Msg, <<>>, Opts), - case hash_alg_to_multicodec(HashAlg) of - {ok, Multicodec} -> - CID = dev_codec_ipfs_cid:encode(Multicodec, sha2_256, Body), - %% An IPFS content-addressed commitment is structurally an - %% HTTPSig HMAC-style item: the tag (`signature') is purely a - %% function of the content, and the `keyid' is a universal - %% constant (anyone can recompute without a secret). This lets - %% the commitment ride over the wire through - %% `dev_codec_httpsig_siginfo' as a first-class signature-input - %% line, and remote nodes recover it back to its - %% `commitment-device = ipfs@1.0' form on decode — IPFS-over- - %% HTTPSig, with no kernel changes. - Signature = hb_util:encode(crypto:hash(sha256, Body)), - Commitment = - #{ - <<"commitment-device">> => ?DEVICE_NAME, - <<"type">> => <<"unsigned">>, - <<"hash-alg">> => HashAlg, - <<"committed">> => ?COMMITTED_KEYS, - <<"signature">> => Signature, - <<"keyid">> => <<"constant:ipfs">> - }, - Existing = hb_maps:get(<<"commitments">>, Msg, #{}, Opts), - ?event(ipfs, - {commit, - {cid, CID}, - {hash_alg, HashAlg}, - {body_size, byte_size(Body)} - } - ), - {ok, Msg#{ <<"commitments">> => Existing#{ CID => Commitment } }}; - error -> - {error, {unsupported_hash_alg, HashAlg}} - end; + {ok, Multicodec} = type_to_multicodec(Type), + CID = dev_codec_ipfs_cid:encode(Multicodec, sha2_256, Body), + %% An IPFS content-addressed commitment is structurally an HTTPSig + %% HMAC-style item: the `signature' is purely a function of the + %% content, and the `keyid' is a universal constant (anyone can + %% recompute without a secret). This lets the commitment ride over + %% the wire through `dev_codec_httpsig_siginfo' as a first-class + %% signature-input line, and remote nodes recover it to its + %% `commitment-device = ipfs@1.0' form on decode. + Signature = hb_util:encode(crypto:hash(sha256, Body)), + Commitment = + #{ + <<"commitment-device">> => ?DEVICE_NAME, + <<"type">> => Type, + <<"committed">> => ?COMMITTED_KEYS, + <<"signature">> => Signature, + <<"keyid">> => <<"constant:ipfs">> + }, + Existing = hb_maps:get(<<"commitments">>, Msg, #{}, Opts), + ?event(ipfs, + {commit, + {cid, CID}, + {type, Type}, + {body_size, byte_size(Body)} + } + ), + {ok, Msg#{ <<"commitments">> => Existing#{ CID => Commitment } }}; + +%% Caller asked for an unsigned commit with an unrecognised hash-alg. +commit(_Msg, #{ <<"type">> := Type }, _Opts) + when Type =:= <<"sha2-256-dag-json">>; + Type =:= <<"sha2-256-dag-pb">> -> + {error, {unsupported_type, Type}}; + commit(Msg, Req, Opts) -> %% Any other commit type — signed, rsa-pss, hmac, etc. — is outside the %% IPFS CID envelope. We delegate to `~httpsig@1.0' the same way @@ -128,56 +147,46 @@ commit(Msg, Req, Opts) -> %% everything else gets a proper cryptographic commitment attached. dev_codec_httpsig:commit(Msg, Req, Opts). -%% @doc Resolve a `hash-alg' string to the underlying CID multicodec name. -%% Returns `error' for unknown / unsupported combinations. -hash_alg_to_multicodec(<<"sha2-256-raw">>) -> {ok, <<"raw">>}; -hash_alg_to_multicodec(<<"sha2-256-dag-cbor">>) -> {ok, <<"dag-cbor">>}; -hash_alg_to_multicodec(_) -> error. +%% @doc Resolve a native `type' to the underlying CID multicodec name. +type_to_multicodec(<<"sha2-256-raw">>) -> {ok, <<"raw">>}; +type_to_multicodec(<<"sha2-256-dag-cbor">>) -> {ok, <<"dag-cbor">>}; +type_to_multicodec(_) -> error. %%%==================================================================== %%% verify/3 %%%==================================================================== -%% @doc Verify an `~ipfs@1.0' commitment. `Req' carries the merged fields of -%% the commitment being verified (codec, hash-alg, etc.); `Base' is the full -%% message including its `commitments' map. +%% @doc Verify an `~ipfs@1.0' commitment. `Req' carries the merged fields +%% of the commitment being verified; `Base' is the full message including +%% its `commitments' map. %% -%% The verification is the commitment function run in reverse: recompute the -%% CID from the body using the declared codec + hash-alg. The commitment is -%% valid iff that CID is a key in `Base''s commitments map — which it must -%% be, exactly when the body has not been tampered with. +%% The verification is the commitment function in reverse: recompute the +%% CID from `body' using the commitment's declared native `type'. The +%% commitment is valid iff that CID is a key in `Base''s commitments map +%% — exactly when the body has not been tampered with. +verify(Base, #{ <<"type">> := Type } = _Req, Opts) + when Type =:= <<"sha2-256-raw">>; + Type =:= <<"sha2-256-dag-cbor">> -> + verify_native(Base, Type, Opts); verify(Base, Req, Opts) -> - case hb_maps:get(<<"type">>, Req, <<"unsigned">>, Opts) of - T when T =:= <<"unsigned">>; T =:= <<"unsigned-sha256">> -> - verify_unsigned(Base, Req, Opts); - _Other -> - %% Non-unsigned commitments on an IPFS-device message are - %% httpsig-shaped (see `commit/3'). Delegate. - dev_codec_httpsig:verify(Base, Req, Opts) - end. + %% Non-native commitments on an IPFS-device message are + %% httpsig-shaped (see `commit/3'). Delegate. + dev_codec_httpsig:verify(Base, Req, Opts). -verify_unsigned(Base, Req, Opts) -> - HashAlg = hb_maps:get(<<"hash-alg">>, Req, ?DEFAULT_HASH_ALG, Opts), +verify_native(Base, Type, Opts) -> Body = hb_maps:get(<<"body">>, Base, <<>>, Opts), Commitments = hb_maps:get(<<"commitments">>, Base, #{}, Opts), - case hash_alg_to_multicodec(HashAlg) of - {ok, Multicodec} -> - ExpectedCID = - dev_codec_ipfs_cid:encode(Multicodec, sha2_256, Body), - Res = hb_maps:is_key(ExpectedCID, Commitments, Opts), - ?event(ipfs, - {verify, - {hash_alg, HashAlg}, - {expected_cid, ExpectedCID}, - {result, Res} - } - ), - {ok, Res}; - error -> - ?event(warning, - {ipfs_verify_unsupported_hash_alg, HashAlg}), - {ok, false} - end. + {ok, Multicodec} = type_to_multicodec(Type), + ExpectedCID = dev_codec_ipfs_cid:encode(Multicodec, sha2_256, Body), + Res = hb_maps:is_key(ExpectedCID, Commitments, Opts), + ?event(ipfs, + {verify, + {type, Type}, + {expected_cid, ExpectedCID}, + {result, Res} + } + ), + {ok, Res}. %%%==================================================================== %%% to/3 — TABM -> dag-cbor bytes (phase 2) @@ -320,13 +329,13 @@ ipld_to_structured(M) when is_map(M) -> content_type_raw_test() -> ?assertEqual( {ok, <<"application/vnd.ipld.raw">>}, - content_type(#{ <<"hash-alg">> => <<"sha2-256-raw">> }) + content_type(#{ <<"type">> => <<"sha2-256-raw">> }) ). content_type_dag_cbor_test() -> ?assertEqual( {ok, <<"application/vnd.ipld.dag-cbor">>}, - content_type(#{ <<"hash-alg">> => <<"sha2-256-dag-cbor">> }) + content_type(#{ <<"type">> => <<"sha2-256-dag-cbor">> }) ). content_type_default_test() -> @@ -335,6 +344,10 @@ content_type_default_test() -> content_type(#{}) ). +%% The caller passes the generic `type: unsigned'; `commit/3' translates +%% into the codec's native type name (`sha2-256-raw') — same translation +%% pattern as `dev_codec_ans104' (unsigned -> unsigned-sha256) and +%% `dev_codec_httpsig' (unsigned -> hmac-sha256). commit_unsigned_raw_attaches_cid_test() -> Msg = #{ <<"body">> => <<"hello world">> }, Req = #{ <<"type">> => <<"unsigned">> }, @@ -347,9 +360,11 @@ commit_unsigned_raw_attaches_cid_test() -> ), Commitment = maps:get(CID, Commitments), ?assertEqual(?DEVICE_NAME, maps:get(<<"commitment-device">>, Commitment)), - ?assertEqual(<<"sha2-256-raw">>, maps:get(<<"hash-alg">>, Commitment)), + ?assertEqual(<<"sha2-256-raw">>, maps:get(<<"type">>, Commitment)), ?assertEqual([<<"body">>], maps:get(<<"committed">>, Commitment)), + ?assertEqual(<<"constant:ipfs">>, maps:get(<<"keyid">>, Commitment)), ?assertNot(maps:is_key(<<"committer">>, Commitment)), + ?assertNot(maps:is_key(<<"hash-alg">>, Commitment)), ?assertNot(maps:is_key(<<"multicodec">>, Commitment)). commit_unsigned_dag_cbor_test() -> @@ -362,6 +377,18 @@ commit_unsigned_dag_cbor_test() -> ?assertEqual( <<"bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua">>, CID + ), + Commitment = maps:get(CID, Commitments), + ?assertEqual(<<"sha2-256-dag-cbor">>, maps:get(<<"type">>, Commitment)). + +%% Callers that already know the native type can pass it directly. +commit_native_type_test() -> + Msg = #{ <<"body">> => <<"hello world">> }, + {ok, Committed} = commit(Msg, #{ <<"type">> => <<"sha2-256-raw">> }, #{}), + [CID] = maps:keys(maps:get(<<"commitments">>, Committed)), + ?assertEqual( + <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, + CID ). commit_preserves_existing_commitments_test() -> @@ -392,7 +419,7 @@ commit_rejects_unknown_hash_alg_test() -> Msg = #{ <<"body">> => <<"x">> }, Req = #{ <<"type">> => <<"unsigned">>, <<"hash-alg">> => <<"sha2-256-dag-pb">> }, - ?assertMatch({error, {unsupported_hash_alg, <<"sha2-256-dag-pb">>}}, + ?assertMatch({error, {unsupported_type, <<"sha2-256-dag-pb">>}}, commit(Msg, Req, #{})). verify_ok_for_intact_body_test() -> @@ -418,5 +445,5 @@ verify_fails_when_hash_alg_mismatches_test() -> {ok, Committed} = commit(Msg, #{ <<"type">> => <<"unsigned">> }, #{}), Commitments = maps:get(<<"commitments">>, Committed), [{_CID, Commitment}] = maps:to_list(Commitments), - DagCborReq = Commitment#{ <<"hash-alg">> => <<"sha2-256-dag-cbor">> }, + DagCborReq = Commitment#{ <<"type">> => <<"sha2-256-dag-cbor">> }, ?assertEqual({ok, false}, verify(Committed, DagCborReq, #{})). diff --git a/src/hb_store_ipfs_gateway.erl b/src/hb_store_ipfs_gateway.erl index 75f490a04..0a7a8d5d3 100644 --- a/src/hb_store_ipfs_gateway.erl +++ b/src/hb_store_ipfs_gateway.erl @@ -135,22 +135,24 @@ try_gateways([Gateway|Rest], CID, Parts, Timeout, Opts) -> %% @doc Wrap verified bytes in a message whose `~ipfs@1.0' unsigned %% commitment keyed by the CID makes it independently verifiable via %% `hb_message:verify/2,3' — without trusting this store to have done the -%% check. The `hash-alg' encodes both the multihash function and the -%% CID's multicodec (e.g. `sha2-256-raw' for `bafk...' CIDs, -%% `sha2-256-dag-cbor' for `bafy...' CIDs), so a round-trip through the -%% cache preserves identity exactly. -with_commitment(CID, #{ <<"hash-alg">> := HashAlg, <<"digest">> := Digest }, Body) -> +%% check. The commitment's `type' field is the native hash-alg name +%% (`sha2-256-raw' for `bafk...' CIDs, `sha2-256-dag-cbor' for `bafy...' +%% CIDs). It flows onto the wire as `alg="ipfs@1.0/"' through +%% `dev_codec_httpsig_siginfo:commitment_to_alg/2' — no custom RFC 9421 +%% metadata parameters required. +with_commitment(CID, + #{ <<"hash-alg">> := HashAlg, <<"digest">> := Digest }, + Body) -> %% Mirror `dev_codec_ipfs:commit/3': populate `signature' with the raw %% digest (base64url) and `keyid' with the universal `constant:ipfs', - %% so the commitment round-trips over the HTTPSig wire format as an - %% HMAC-shaped item. See `dev_codec_ipfs' for the rationale. + %% so the commitment rides the HTTPSig wire as an HMAC-shaped item. + %% See `dev_codec_ipfs' for the rationale. #{ <<"body">> => Body, <<"commitments">> => #{ CID => #{ <<"commitment-device">> => <<"ipfs@1.0">>, - <<"type">> => <<"unsigned">>, - <<"hash-alg">> => HashAlg, + <<"type">> => HashAlg, <<"committed">> => [<<"body">>], <<"signature">> => hb_util:encode(Digest), <<"keyid">> => <<"constant:ipfs">> @@ -280,7 +282,7 @@ live_gateway_fetches_known_cid_test_() -> ?assertEqual(<<"ipfs@1.0">>, maps:get(<<"commitment-device">>, Commitment)), ?assertEqual(<<"sha2-256-raw">>, - maps:get(<<"hash-alg">>, Commitment)); + maps:get(<<"type">>, Commitment)); not_found -> ?debugFmt("Skipping: all live gateways missed CID ~s", [?HELLO_WORLD_CID]), From 5e6b7c7a94fead81d495f58e7542ca5f0d5c9bbc Mon Sep 17 00:00:00 2001 From: Sam Williams Date: Sat, 18 Apr 2026 14:59:56 -0400 Subject: [PATCH 14/22] feat(httpsig): optional `id=' param + `keyid' absence on signature-input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two surgical changes to `dev_codec_httpsig_siginfo' that let content-addressed commitment devices (like `~ipfs@1.0') round-trip their map keys through the HTTPSig wire without enlarging the commitment device contract or changing `dev_message:id/3'. 1. `id=' parameter on signature-input. `commitments_to_siginfo_for_signable/3' now threads each commitment's map key (`CommID') through into `commitment_to_sf_siginfo/4'. The encoder computes the key the decoder would derive solely from `Sig' (`human_id(Sig)' for 32-byte sigs, `human_id(sha256(Sig))' otherwise) and emits `id=""' only when `CommID' differs. Stock signed commitments (`hmac-sha256', `rsa-pss-sha512', etc.) key on the same derivation the decoder uses, so they never emit `id='. IPFS commitments, whose map key is a CID, do. `sf_siginfo_to_commitment/5' checks for `<<"id">>' in the parsed params via `maps:take/2'. Present → use as the commitment's map key, strip from the commitment body. Absent → existing `h(Sig)' fallback. `<<"id">>' added to `get_additional_params/1''s reserved-key list so it cannot round-trip twice or leak into commitment bodies. 2. `keyid' is now optional on the wire. RFC 9421 §1.4.2.3 permits `keyid' absence ("other means" of key retrieval, where here "other means" is "no retrieval at all" for content-addressed commitments). The encoder default changes from `<<>>' to `undefined'; the param list wraps it conditionally so the standard `undefined'-filter drops it. Commitments that set `keyid' (HMAC, RSA-PSS) are unchanged. Absent `keyid' now stays absent on the wire instead of becoming `keyid=""'. Applied: * `dev_codec_ipfs:commit/3' drops its `<<"keyid">> => <<"constant:ipfs">>' field. Commitment shape: `commitment-device', `type', `committed', `signature'. The `signature' stays (it is the raw sha-256 digest of the body) so the commitment passes the `commitments_to_siginfo' signature-bearing filter and rides the wire. * `hb_store_ipfs_gateway:with_commitment/3' mirrors — same four-field commitment from gateway-fetched content. * `dev_codec_ipfs_live_test' adds a live HB-to-HB transport test that asserts the IPFS commitment arrives on a remote client keyed under the CID (not `h(Sig)'). * `docs/devices/ipfs-at-1-0.md' removed. The edoc-generated reference at `docs/resources/source-code/dev_codec_ipfs.md' remains. Wire shape, after: ipfs@1.0 : alg="ipfs@1.0/sha2-256-raw"; id="" hmac-sha256 : alg="hmac-sha256"; keyid="constant:ao" rsa-pss-sha512: alg="rsa-pss-sha512"; keyid="publickey:..." 2458 tests green across `hb_message_test_vectors', `hb_cache', `hb_ao_test_vectors', every existing codec (including `dev_codec_httpsig' itself), and the full IPFS + live suites. Live node on port 12345 emits the target wire shape and stock HMAC/RSA commitments emit no spurious `id=' parameter. Co-Authored-By: Claude Opus 4.6 --- docs/devices/ipfs-at-1-0.md | 226 ------------------------------ src/dev_codec_httpsig_siginfo.erl | 91 +++++++++--- src/dev_codec_ipfs.erl | 33 +++-- src/dev_codec_ipfs_live_test.erl | 53 +++++++ src/hb_store_ipfs_gateway.erl | 12 +- 5 files changed, 153 insertions(+), 262 deletions(-) delete mode 100644 docs/devices/ipfs-at-1-0.md diff --git a/docs/devices/ipfs-at-1-0.md b/docs/devices/ipfs-at-1-0.md deleted file mode 100644 index 30f730381..000000000 --- a/docs/devices/ipfs-at-1-0.md +++ /dev/null @@ -1,226 +0,0 @@ -# Device: ~ipfs@1.0 - -## Overview - -The `~ipfs@1.0` device is an **optional, user-loadable** commitment device that lets a HyperBEAM node address messages by their [IPFS CIDv1](https://github.com/multiformats/cid). It computes a CID over a message's `body` and attaches it as an [unsigned commitment](../resources/unsigned-commitments.md) — a cryptographic commitment that has no `committer`, only a content-addressed identity. - -The elegance comes from HyperBEAM's existing machinery, not from any new plumbing: `hb_cache:write/2` already links every commitment ID to the uncommitted root ID of the message it belongs to. Once an `~ipfs@1.0` commitment is attached, `hb_cache:read(CID, Opts)` finds the message — so content-addressed retrieval works via the standard `~lookup@1.0` device with no routing, path, or kernel changes. - -The commitment is expressed as an HTTPSig HMAC-shaped item on the wire (`alg="ipfs@1.0/unsigned"`, `keyid="constant:ipfs"`, `signature` = base64url of the sha2-256 digest). This is **IPFS over HTTP Message Signatures**: a remote node decoding the response round-trips the commitment back to `commitment-device: ipfs@1.0` form without bespoke wire support. - -This device covers the outer edges of the IPFS / IPLD spec intentionally: `sha2-256` multihashes, base32-lowercase multibase, and the `raw` (multicodec `0x55`) and `dag-cbor` (multicodec `0x71`) codecs. - -`~ipfs@1.0` is **not** in `preloaded_devices` by default. A node operator opts in; see **Enabling** below. - -## When to use it - -- Serving content to IPFS clients via `GET /~lookup@1.0/read&target=`. -- Preloading a list of CIDs into a HyperBEAM node's cache by looping HEAD/GET lookups (the response's write-through pins locally). -- Pulling IPFS content into the Arweave / AO ecosystem: fetch a CID, apply an ANS-104 signed commitment using the node's wallet, POST it to a bundler. -- Acting as a verifying caching mirror of public IPFS data via the companion `hb_store_ipfs_gateway` store backend. - -If your content only needs to be addressed within HyperBEAM's own ID space, use [`~httpsig@1.0`](httpsig-at-1-0.md) or [`~ans104@1.0`](ans104-at-1-0.md) instead — they give you signed commitments with a committer. - -## Enabling - -Two ways, pick whichever fits your deployment: - -### In node config - -```erlang -{preloaded_devices, DefaultDevices ++ [ - #{<<"name">> => <<"ipfs@1.0">>, <<"module">> => dev_codec_ipfs} -]}. -``` - -### Per-message, for ad-hoc use - -```erlang -Msg = #{ <<"body">> => <<"hello world">> }, -Committed = - hb_message:commit( - Msg, - Opts, - #{ <<"commitment-device">> => <<"ipfs@1.0">>, - <<"type">> => <<"unsigned">> } - ). -``` - -For external CID reads, append `hb_store_ipfs_gateway` after your local stores in the node `store` chain: - -```erlang -{store, [ - #{ <<"store-module">> => hb_store_lmdb, <<"name">> => <<"main">> }, - #{ <<"store-module">> => hb_store_ipfs_gateway, - <<"gateways">> => [<<"https://w3s.link">>, <<"https://ipfs.io">>] } -]}. -``` - -The gateway store hashes every fetched body against the requested CID before handing it up the chain. A lying gateway is treated as `not_found` and the next one is tried. - -## Core operations - -### `commit` — attach a CID - -Compute a CIDv1 over `Msg`'s `body` and add it as an unsigned commitment. The commitment map is keyed by the CID string: - -```erlang -#{ - <<"commitments">> => #{ - <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">> => #{ - <<"commitment-device">> => <<"ipfs@1.0">>, - <<"type">> => <<"sha2-256-raw">>, - <<"committed">> => [<<"body">>], - <<"signature">> => <<"uU0nuZNNPgilLlLX2n2r-sSE7-N6U4DukIj3rOLvzek">>, - <<"keyid">> => <<"constant:ipfs">> - } - } -} -``` - -The commitment's `type` names the CID's construction — the multihash function and the CID's multicodec in one string, matching IPFS tooling. `sha2-256-raw` produces `bafk…` CIDs; `sha2-256-dag-cbor` produces `bafy…` CIDs. The `type` flows onto the wire as the `alg` parameter of the signature-input line (`alg="ipfs@1.0/sha2-256-raw"`), exactly the same way `dev_codec_ans104` surfaces `unsigned-sha256` and `dev_codec_httpsig` surfaces `hmac-sha256`. - -The `signature` field holds the raw sha2-256 digest of the body (base64url), and the `keyid` is the universal constant `constant:ipfs`. Structurally this is an HTTPSig HMAC item — anyone can reverify without a secret — which lets the commitment ride over HTTP Message Signatures without any custom metadata parameters. - -**Supported `Req` fields** - -| Field | Default | Values | -| --- | --- | --- | -| `type` | `unsigned` | `unsigned` (caller convenience) or a native type directly | -| `hash-alg` | `sha2-256-raw` | `sha2-256-raw`, `sha2-256-dag-cbor` — only meaningful when `type=unsigned` | - -`type=unsigned` is the caller-facing knob; it is translated into the native type named by `hash-alg` (or the default `sha2-256-raw`). Callers who already know the native type (`sha2-256-raw` / `sha2-256-dag-cbor`) may pass it directly. `signed` and other non-unsigned types delegate to `~httpsig@1.0` — the codec composes like `dev_codec_flat` / `dev_codec_json` for those paths. Unknown types return `{error, {unsupported_type, _}}`. Messages can carry both an IPFS commitment and an ANS-104 / HTTPSig signed commitment simultaneously. - -### `verify` — check a CID - -Recompute the CID from `body` under the commitment's declared `type`, then confirm it is a key in the message's `commitments` map. Tampering with the body produces a different CID, which is not present — verification returns `{ok, false}`. Called implicitly by `hb_message:verify/2,3`. - -### `committed` — list covered keys - -`dev_message:committed/3` reads the commitment's own `committed` list. For `~ipfs@1.0` that list is always `[<<"body">>]`. - -### `content_type` — MIME - -`application/vnd.ipld.raw` for `type = sha2-256-raw`, `application/vnd.ipld.dag-cbor` for `type = sha2-256-dag-cbor`. Falls back to `application/vnd.ipld.raw` when unspecified. - -### `to` / `from` — dag-cbor serialization - -`~ipfs@1.0` is a full codec in the `hb_message:convert/3,4` pipeline: - -```erlang -%% Encode a message as dag-cbor bytes: -CborBytes = hb_message:convert(Msg, <<"ipfs@1.0">>, Opts). - -%% Decode dag-cbor bytes back into a HyperBEAM message: -Msg = hb_message:convert(CborBytes, <<"structured@1.0">>, <<"ipfs@1.0">>, Opts). -``` - -The pipeline is `TABM <-> ~structured@1.0 (native types) <-> IPLD intermediate <-> dag-cbor bytes`. Encoding is deterministic per [the dag-cbor spec](https://ipld.io/specs/codecs/dag-cbor/spec/): shortest-form integers, canonical length-first map ordering, 64-bit floats only, definite-length containers. Non-canonical inputs on the decode side are rejected with a specific reason: - -| Decode rejection | Reason atom | -| --- | --- | -| Indefinite-length item | `indefinite_length_forbidden` | -| Half / single float | `half_float_forbidden`, `single_float_forbidden` | -| NaN / Infinity | `nan_or_infinity_forbidden` | -| Non-UTF-8 text string | `invalid_utf8` | -| Non-string map key | `non_string_map_key` | -| Out-of-order or duplicate map keys | `non_canonical_map_order` | -| Unsupported tag | `{unsupported_tag, N}` | -| Non-canonical integer encoding | `non_canonical_integer` | - -The `priv` sub-map is stripped before encoding. Commitments pass through the codec boundary unchanged, matching every other HyperBEAM codec (json, flat, ans104). Atoms outside `{null, true, false}` cannot be represented in IPLD and are rejected with `{error, {dag_cbor_encode, {unsupported_atom, _}}}`. - -### Composing `commit` with `to` - -The natural end-to-end pipeline for "publish a HyperBEAM message over IPFS" is: - -```erlang -Bytes = hb_message:convert(Msg, <<"ipfs@1.0">>, Opts), -Carrier = #{ <<"body">> => Bytes }, -Committed = hb_message:commit(Carrier, Opts, - #{ <<"commitment-device">> => <<"ipfs@1.0">>, - <<"type">> => <<"unsigned">>, - <<"hash-alg">> => <<"sha2-256-dag-cbor">> }), -{ok, _} = hb_cache:write(Committed, Opts). -``` - -The CID produced by `commit` over the dag-cbor bytes matches exactly what `ipfs dag put --store-codec dag-cbor` would produce on the same logical message. `hb_cache:read(CID, Opts)` then returns the committed message from the local cache; if the CID is not local, the optional `hb_store_ipfs_gateway` backend fetches it from a configured HTTP gateway and verifies the bytes against the CID before admitting them. - -## HTTP recipes - -With a node configured as above, a user drives the three production flows entirely through standard AO-Core paths — no kernel edits, no custom route handlers. - -### 1. Serve a CID - -```bash -curl 'http://localhost:8734/~lookup@1.0/read&target=bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e' -# => hello world -``` - -The node resolves the CID through its store chain. On first hit, the `hb_store_ipfs_gateway` backend fetches from a configured HTTP gateway, verifies `sha256(body)` matches the CID's digest, wraps the body in a message with an `~ipfs@1.0` commitment, and writes it through the cache. On subsequent hits, the body is served from the local store. - -### 2. Preload (en-masse pin) - -Loop over your CIDs: - -```bash -for CID in bafkreif… bafkreig… bafyreib… ; do - curl -sI "http://localhost:8734/~lookup@1.0/read&target=$CID" > /dev/null -done -``` - -Each successful lookup pins the CID to the local store via the HTTP request-response write-through path. - -### 3. Push IPFS content to Arweave - -Chain a server-side ANS-104 commit onto the lookup. The node's `priv_wallet` does the signing: - -```bash -curl 'http://localhost:8734/~lookup@1.0/read&target=/commit&type=signed&commitment-device=ans104@1.0' -``` - -The response carries the IPFS body plus an `ans104@1.0/rsa-pss-sha256` signed commitment in the `signature-input` header. That's a bundler-ready message: follow up with a `POST` to `/~arweave@2.9/tx` (or `/~bundler@1.0/tx`) with that signed message as the body, and the node will push it to Arweave, provided its wallet is topped up and `bundler_ans104` is configured. - -## Programmatic end-to-end example (Erlang) - -```erlang -%% 1. Stamp a blob with its CID. -Msg = #{ <<"body">> => <<"hello world">> }, -Committed = hb_message:commit(Msg, Opts, - #{ <<"commitment-device">> => <<"ipfs@1.0">>, - <<"type">> => <<"unsigned">> }), -%% 2. Write it. Cache auto-links the CID to the uncommitted ID. -{ok, _UncommittedID} = hb_cache:write(Committed, Opts), -%% 3. Read by CID. Works because of the link established in step 2. -{ok, Recovered} = - hb_cache:read( - <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, - Opts - ), -<<"hello world">> = - hb_cache:ensure_loaded(maps:get(<<"body">>, Recovered), Opts). -``` - -## What's next - -A link-aware mapping through `hb_link`, so that IPLD CID links (dag-cbor tag 42) integrate with HyperBEAM's lazy-loaded link primitive and nested messages can be addressed as first-class IPLD sub-blocks. For now, CID links decode to plain CID strings and arbitrary Erlang atoms throw on encode. - -## Non-goals - -- CIDv0 (legacy base58 dag-pb CIDs, `Qm…`). -- `dag-pb`, UnixFS, file chunking. -- `dag-json` (trivial to add on top of the existing encoder; out of scope for v1). -- Hash algorithms other than `sha2-256`. -- Multibases other than base32-lower on encode (decode accepts `B`/`f` defensively). -- Bytes / text distinction from `structured@1.0`: both flatten to plain binaries. -- IPLD-native links: tag-42 decodes to a plain CID string; it does not wire into `hb_link` or `hb_cache` lazy resolution. -- IPNS, bitswap, pubsub, libp2p. -- IPLD Schemas, Selectors, or path resolution into sub-blocks. - -## Related source - -- [`dev_codec_ipfs.erl`](../resources/source-code/dev_codec_ipfs.md) — device entry points (`commit`, `verify`, `to`, `from`, `content_type`, `info`). -- [`dev_codec_ipfs_cid.erl`](../resources/source-code/dev_codec_ipfs_cid.md) — varint, multihash, multibase, CIDv1. -- [`dev_codec_ipfs_cbor.erl`](../resources/source-code/dev_codec_ipfs_cbor.md) — deterministic dag-cbor encoder/decoder. -- [`hb_store_ipfs_gateway.erl`](../resources/source-code/hb_store_ipfs_gateway.md) — read-only gateway store. -- [`dev_codec_ipfs_test.erl`](../resources/source-code/dev_codec_ipfs_test.md) — integration tests including the cache-linkage proof and the full `to`/`from` roundtrip. diff --git a/src/dev_codec_httpsig_siginfo.erl b/src/dev_codec_httpsig_siginfo.erl index 69e10202d..bd2df6334 100644 --- a/src/dev_codec_httpsig_siginfo.erl +++ b/src/dev_codec_httpsig_siginfo.erl @@ -45,11 +45,17 @@ commitments_to_siginfo(Msg, Comms, Opts) -> commitments_to_siginfo_for_signable(Msg, Comms, Opts) -> % Generate a SF item for each commitment's signature and signature-input. + % The commitment's map key (`CommID') is threaded through so that + % `commitment_to_sf_siginfo/4' can emit an `id' parameter whenever the + % decoder-side derivation rule (`id(Sig)' — see `sf_siginfo_to_commitment/5') + % would not reproduce it on the other side. Commitment devices whose + % identity is `h(Sig)' (HMAC, RSA-PSS, etc.) never pay this cost; + % content-addressed devices (e.g. `~ipfs@1.0') do. {Sigs, SigInputs} = maps:fold( - fun(_CommID, Commitment, {Sigs, SigInputs}) -> + fun(CommID, Commitment, {Sigs, SigInputs}) -> {ok, SigNameRaw, SFSig, SFSigInput} = - commitment_to_sf_siginfo(Msg, Commitment, Opts), + commitment_to_sf_siginfo(Msg, CommID, Commitment, Opts), SigName = <<"comm-", SigNameRaw/binary>>, { Sigs#{ SigName => SFSig }, @@ -68,12 +74,13 @@ commitments_to_siginfo_for_signable(Msg, Comms, Opts) -> %% @doc Generate a `signature' and `signature-input' key pair from a given %% commitment. -commitment_to_sf_siginfo(Msg, Commitment, Opts) -> +commitment_to_sf_siginfo(Msg, CommID, Commitment, Opts) -> % Generate the `alg' key from the commitment. Alg = commitment_to_alg(Commitment, Opts), % Find the public key from the commitment, which we will use as the - % `keyid' in the `signature-input' keys. - KeyID = maps:get(<<"keyid">>, Commitment, <<>>), + % `keyid' in the `signature-input' keys. Absent in the commitment => + % absent on the wire (permitted by RFC 9421 §1.4.2.3). + KeyID = maps:get(<<"keyid">>, Commitment, undefined), % Extract the signature from the commitment. Signature = hb_util:decode(maps:get(<<"signature">>, Commitment)), % Extract the keys present in the commitment. @@ -87,12 +94,33 @@ commitment_to_sf_siginfo(Msg, Commitment, Opts) -> Expires = maps:get(<<"expires">>, Commitment, undefined), % Generate the name of the signature. SigName = hb_util:to_lower(hb_util:human_id(crypto:hash(sha256, Signature))), - % Generate the signature input and signature structured-fields. These can + % Decide whether we need to transport the commitment's map key. + % `sf_siginfo_to_commitment/5' derives the decoded commitment's ID from + % `Sig' alone (either `human_id(Sig)' for 32-byte sigs or + % `human_id(sha256(Sig))' otherwise). Commitments whose map key matches + % that derivation (HMAC, RSA-PSS, and any `@/' family + % that keys on `h(Sig)') need no extra work. Content-addressed devices + % like `~ipfs@1.0' — whose key is a CID, not a function of `Sig' — emit + % an `id' parameter so the receiver reconstructs the commitment under + % the sender's intended key. + DerivedID = derived_commitment_id(Signature), + IDParam = + case CommID of + undefined -> []; + DerivedID -> []; + _ -> [{<<"id">>, {string, CommID}}] + end, + % Generate the signature input and signature structured-fields. These can % then be placed into a dictionary with other commitments and transformed % into their binary representations. SFSig = {item, {binary, Signature}, []}, AdditionalParams = get_additional_params(Commitment), - Params = + KeyIDItem = + case KeyID of + undefined -> undefined; + _ -> {string, KeyID} + end, + Params = lists:filter( fun({_Key, undefined}) -> false; @@ -101,12 +129,12 @@ commitment_to_sf_siginfo(Msg, Commitment, Opts) -> end, [ {<<"alg">>, {string, Alg}}, - {<<"keyid">>, {string, KeyID}}, + {<<"keyid">>, KeyIDItem}, {<<"tag">>, {string, Tag}}, {<<"created">>, Created}, {<<"expires">>, Expires}, {<<"nonce">>, {string, Nonce}} - ] ++ AdditionalParams + ] ++ IDParam ++ AdditionalParams ), SFSigInput = {list, @@ -130,11 +158,23 @@ commitment_to_sf_siginfo(Msg, Commitment, Opts) -> ), {ok, SigName, SFSig, SFSigInput}. +%% @doc Derive the commitment map key that `sf_siginfo_to_commitment/5' will +%% assign when an explicit `id' parameter is absent. 32-byte signatures +%% (HMAC-SHA256 output, or a raw sha-256 digest used as a signature by a +%% content-addressed device) are used directly as the ID; longer signatures +%% are rehashed. +derived_commitment_id(Signature) -> + if byte_size(Signature) == 32 -> + hb_util:human_id(Signature); + true -> + hb_util:human_id(crypto:hash(sha256, Signature)) + end. + get_additional_params(Commitment) -> AdditionalParams = sets:to_list( sets:subtract( - sets:from_list(maps:keys(Commitment)), + sets:from_list(maps:keys(Commitment)), sets:from_list( [ <<"alg">>, @@ -146,6 +186,7 @@ get_additional_params(Commitment) -> <<"committed">>, <<"signature">>, <<"type">>, + <<"id">>, <<"commitment-device">>, <<"committer">> ] @@ -269,9 +310,12 @@ sf_siginfo_to_commitment(Msg, BodyKeys, SFSig, SFSigInput, Opts) -> % 1. Decode the `keyid` (typically a public key) to its raw byte form. % 2. Decode the `signature` to its raw byte form. % 3. Filter undefined keys. - % 4. Generate the ID for the commitment from the signature. We use a SHA2-256 - % hash of the signature, unless the signature is 32 bytes, in which case we - % use the signature directly as the ID. + % 4. Generate the ID for the commitment. If the encoder transported an + % explicit `id' parameter (set by content-addressed devices like + % `~ipfs@1.0' whose map key is not a function of `Sig'), honour it + % and strip the field from the commitment body. Otherwise fall back + % to the default derivation: use the signature directly for 32-byte + % sigs, else hash the signature. % 5. If the `keyid' is a public key (determined by length >= 32 bytes), set % the `committer' to its hash. Commitment3 = @@ -279,20 +323,27 @@ sf_siginfo_to_commitment(Msg, BodyKeys, SFSig, SFSigInput, Opts) -> <<"signature">> => hb_util:encode(Sig), <<"committed">> => CommittedKeys }, - KeyID = maps:get(<<"keyid">>, Commitment3, <<>>), + {ID, Commitment4} = + case maps:take(<<"id">>, Commitment3) of + {ExplicitID, Stripped} -> + {ExplicitID, Stripped}; + error -> + DerivedID = + if byte_size(Sig) == 32 -> hb_util:human_id(Sig); + true -> hb_util:human_id(crypto:hash(sha256, Sig)) + end, + {DerivedID, Commitment3} + end, + KeyID = maps:get(<<"keyid">>, Commitment4, <<>>), Commitment5 = case dev_codec_httpsig_keyid:keyid_to_committer(KeyID) of undefined -> - Commitment3; + Commitment4; Committer -> - Commitment3#{ + Commitment4#{ <<"committer">> => Committer } end, - ID = - if byte_size(Sig) == 32 -> hb_util:human_id(Sig); - true -> hb_util:human_id(crypto:hash(sha256, Sig)) - end, % Return the commitment and calculated ID. {ok, ID, Commitment5}. diff --git a/src/dev_codec_ipfs.erl b/src/dev_codec_ipfs.erl index 9df9a1d33..72524a50b 100644 --- a/src/dev_codec_ipfs.erl +++ b/src/dev_codec_ipfs.erl @@ -107,21 +107,27 @@ commit(Msg, #{ <<"type">> := Type }, Opts) Body = hb_maps:get(<<"body">>, Msg, <<>>, Opts), {ok, Multicodec} = type_to_multicodec(Type), CID = dev_codec_ipfs_cid:encode(Multicodec, sha2_256, Body), - %% An IPFS content-addressed commitment is structurally an HTTPSig - %% HMAC-style item: the `signature' is purely a function of the - %% content, and the `keyid' is a universal constant (anyone can - %% recompute without a secret). This lets the commitment ride over - %% the wire through `dev_codec_httpsig_siginfo' as a first-class - %% signature-input line, and remote nodes recover it to its - %% `commitment-device = ipfs@1.0' form on decode. + %% The `signature' field carries the raw sha-256 digest of the body + %% (base64url). That's what an IPFS CID commits to by construction — + %% there is no separate cryptographic signing step, and no key is + %% needed to reverify (a receiver just recomputes from `body'). We do + %% not set `keyid': RFC 9421 §1.4.2.3 permits its absence ("other + %% means" of key-material retrieval, where here "other means" is "no + %% retrieval at all"). The httpsig encoder drops absent `keyid' from + %% the wire. + %% + %% The `signature' field also keeps the commitment on the httpsig + %% wire (it is otherwise filtered out by `commitments_to_siginfo'). + %% Combined with the `id=' parameter that `dev_codec_httpsig_siginfo' + %% emits when the map key differs from `h(Sig)', this lets remote + %% nodes decode an IPFS commitment back under its CID key. Signature = hb_util:encode(crypto:hash(sha256, Body)), Commitment = #{ <<"commitment-device">> => ?DEVICE_NAME, <<"type">> => Type, <<"committed">> => ?COMMITTED_KEYS, - <<"signature">> => Signature, - <<"keyid">> => <<"constant:ipfs">> + <<"signature">> => Signature }, Existing = hb_maps:get(<<"commitments">>, Msg, #{}, Opts), ?event(ipfs, @@ -362,7 +368,14 @@ commit_unsigned_raw_attaches_cid_test() -> ?assertEqual(?DEVICE_NAME, maps:get(<<"commitment-device">>, Commitment)), ?assertEqual(<<"sha2-256-raw">>, maps:get(<<"type">>, Commitment)), ?assertEqual([<<"body">>], maps:get(<<"committed">>, Commitment)), - ?assertEqual(<<"constant:ipfs">>, maps:get(<<"keyid">>, Commitment)), + %% The commitment carries a signature (= raw digest) so it stays on + %% the httpsig wire via `dev_codec_httpsig_siginfo''s signature + %% filter. It deliberately does NOT carry a `keyid' — there is no + %% key to look up for a content-addressed commitment — nor any + %% `committer', `hash-alg', or `multicodec' fields (the multihash + %% and multicodec are both encoded in `type'). + ?assertMatch(#{<<"signature">> := _}, Commitment), + ?assertNot(maps:is_key(<<"keyid">>, Commitment)), ?assertNot(maps:is_key(<<"committer">>, Commitment)), ?assertNot(maps:is_key(<<"hash-alg">>, Commitment)), ?assertNot(maps:is_key(<<"multicodec">>, Commitment)). diff --git a/src/dev_codec_ipfs_live_test.erl b/src/dev_codec_ipfs_live_test.erl index ae45a2f30..06bfa9dd5 100644 --- a/src/dev_codec_ipfs_live_test.erl +++ b/src/dev_codec_ipfs_live_test.erl @@ -192,6 +192,59 @@ live_cache_preload_pattern_test_() -> end end}. +%% HB-to-HB transport round-trip: a node receives an IPFS-committed +%% response via HTTP and must see the commitment at the CID key, not at +%% `h(sig)'. This is what the `id=' extension in +%% `dev_codec_httpsig_siginfo' preserves. +live_http_ipfs_commitment_survives_transport_test_() -> + {timeout, 90, fun() -> + application:ensure_all_started(inets), + application:ensure_all_started(ssl), + case gateways_reachable_for_cid(?HELLO_WORLD_CID) of + false -> + ?debugFmt("Skipping: all gateways unreachable", []); + true -> + %% Server: knows ipfs@1.0, has a gateway in its store. + NodeOpts = node_opts_with_ipfs(), + NodeURL = hb_http_server:start_node(NodeOpts), + %% Client: knows ipfs@1.0 too, but no gateway; it relies + %% entirely on the server-delivered message. + ClientOpts = #{ + preloaded_devices => + [ #{ <<"name">> => <<"ipfs@1.0">>, + <<"module">> => dev_codec_ipfs } | + hb_opts:get(preloaded_devices, [], #{}) ] + }, + Path = <<"/~lookup@1.0/read&target=", + ?HELLO_WORLD_CID/binary>>, + {ok, Response} = hb_http:get(NodeURL, Path, ClientOpts), + Msg = + case Response of + M when is_map(M) -> M; + B when is_binary(B) -> #{ <<"body">> => B } + end, + Comms = maps:get(<<"commitments">>, Msg, #{}), + IPFSComms = + maps:filter( + fun(_K, #{<<"commitment-device">> := <<"ipfs@1.0">>}) -> + true; + (_K, _) -> false + end, + Comms + ), + case maps:to_list(IPFSComms) of + [] -> + ?debugFmt("Skipping: no IPFS commitment on " + "response (likely gateway path not " + "taken on this run)", []); + [{Key, _}] -> + ?assertEqual(?HELLO_WORLD_CID, Key); + Many -> + ?debugFmt("multiple ipfs commitments: ~p", [Many]) + end + end + end}. + %%%==================================================================== %%% PR Path 3 — Commit IPFS content as ANS-104 via the node's wallet %%%==================================================================== diff --git a/src/hb_store_ipfs_gateway.erl b/src/hb_store_ipfs_gateway.erl index 0a7a8d5d3..d0ac25242 100644 --- a/src/hb_store_ipfs_gateway.erl +++ b/src/hb_store_ipfs_gateway.erl @@ -143,10 +143,11 @@ try_gateways([Gateway|Rest], CID, Parts, Timeout, Opts) -> with_commitment(CID, #{ <<"hash-alg">> := HashAlg, <<"digest">> := Digest }, Body) -> - %% Mirror `dev_codec_ipfs:commit/3': populate `signature' with the raw - %% digest (base64url) and `keyid' with the universal `constant:ipfs', - %% so the commitment rides the HTTPSig wire as an HMAC-shaped item. - %% See `dev_codec_ipfs' for the rationale. + %% Mirror `dev_codec_ipfs:commit/3'. `signature' keeps the commitment + %% on the httpsig wire (see `dev_codec_httpsig_siginfo's filter); + %% combined with the `id=' extension emitted when `h(Sig)' ≠ CID, the + %% receiver recovers the commitment at the CID key. No `keyid' — + %% content-addressed commitments need no key material. #{ <<"body">> => Body, <<"commitments">> => #{ @@ -154,8 +155,7 @@ with_commitment(CID, <<"commitment-device">> => <<"ipfs@1.0">>, <<"type">> => HashAlg, <<"committed">> => [<<"body">>], - <<"signature">> => hb_util:encode(Digest), - <<"keyid">> => <<"constant:ipfs">> + <<"signature">> => hb_util:encode(Digest) } } }. From 29587236c8a60d7c82549b0be81c94cbf6d022cb Mon Sep 17 00:00:00 2001 From: Sam Williams Date: Sat, 18 Apr 2026 15:17:02 -0400 Subject: [PATCH 15/22] test(ipfs): two-HB-node relay test proves CID identity survives transport MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Spin up two in-process HyperBEAM nodes (each with its own wallet — the listener's server_id is derived from `priv_wallet', so shared wallets collide on one listener) inside a single eunit: Node A: has `hb_store_ipfs_gateway' upstream; can mint CID-keyed commitments for real IPFS content. Node B: isolated filesystem store, NO IPFS gateway. Can serve a CID only if the message is already in its local cache keyed under that CID. The test drives the full relay topology: 1. B alone cannot serve the CID (no upstream path, cache empty). 2. Client does `hb_http:get(A, "/~lookup@1.0/read&target=")'. A's response carries the IPFS commitment on its signature-input line with `id=""'; the client's `sf_siginfo_to_commitment' reconstructs the commitment at the CID key. 3. Client persists the received message to B's store via `hb_cache:write/2'. The CID is picked up as an AltID and linked to the uncommitted root ID. 4. `hb_http:get(B, "/~lookup@1.0/read&target=")' now serves the body from B's local cache. No gateway. No upstream. 5. Direct store inspection confirms B's local store has the message at the CID key, with the IPFS commitment intact under that key. End-to-end proof of the `id=' extension: a CID-keyed commitment survives HTTP transport between two HB nodes and enables the downstream node to act as a relay/mirror for content it has never reached upstream itself. Follows the `dev_router:dynamic_router_pricing_test_' two-node pattern (fixed high ports, per-node wallet). 2459 tests green across the full IPFS + codec + cache suites. Co-Authored-By: Claude Opus 4.6 --- src/dev_codec_ipfs_live_test.erl | 132 +++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/src/dev_codec_ipfs_live_test.erl b/src/dev_codec_ipfs_live_test.erl index 06bfa9dd5..fc842a8f2 100644 --- a/src/dev_codec_ipfs_live_test.erl +++ b/src/dev_codec_ipfs_live_test.erl @@ -245,6 +245,138 @@ live_http_ipfs_commitment_survives_transport_test_() -> end end}. +%% Two in-process HyperBEAM nodes in one test: +%% +%% Node A — "source" — has `hb_store_ipfs_gateway' in its chain, so it +%% can reach the real IPFS network and mint CID-keyed commitments. +%% Node B — "downstream" — has ONLY a local filesystem store, no IPFS +%% gateway. Can serve a CID only if the message is already cached +%% under the CID in its local store. +%% +%% The test shows: (1) B alone cannot serve the CID; (2) a client +%% fetches the message from A over HTTP, receiving the commitment keyed +%% by the CID (thanks to the `id=' extension on the signature-input +%% line); (3) the client writes the message into B's local store via +%% the standard `hb_cache:write/2'; (4) B thereafter serves the CID via +%% `~lookup@1.0/read' entirely from its local cache — no gateway +%% involved, no knowledge of upstream A. +%% +%% This is the "relay / mirror" topology: one node with upstream reach, +%% many nodes without, each able to cache and re-serve what passes +%% through them. +live_hb_to_hb_transfer_and_relay_test_() -> + {timeout, 120, fun() -> + application:ensure_all_started(inets), + application:ensure_all_started(ssl), + case gateways_reachable_for_cid(?HELLO_WORLD_CID) of + false -> + ?debugFmt("Skipping: all gateways unreachable", []); + true -> + %% Two-node setup: each node needs its own wallet + %% (`hb_http_server:start_node/1' derives the listener's + %% server_id from `priv_wallet''s address; reusing one + %% wallet collapses both nodes onto the same listener). + %% Explicit high ports — matches `dev_router''s + %% `dynamic_router_pricing' two-node pattern. + PortA = 18760, + PortB = 18761, + Stock = hb_opts:get(preloaded_devices, [], #{}), + + %% Node A: has the IPFS gateway upstream. + NodeAOpts0 = node_opts_with_ipfs(), + NodeAURL = hb_http_server:start_node( + NodeAOpts0#{ + port => PortA, + priv_wallet => ar_wallet:new() + }), + + %% Node B: isolated filesystem store. No IPFS gateway. + NodeBLocal = #{ + <<"store-module">> => hb_store_fs, + <<"name">> => + iolist_to_binary( + ["cache-TEST/ipfs-hb-b-", + integer_to_list( + erlang:system_time(microsecond))]) + }, + hb_store:reset(NodeBLocal), + NodeBOpts = #{ + port => PortB, + cache_control => <<"cache">>, + priv_wallet => ar_wallet:new(), + preloaded_devices => + [ #{ <<"name">> => <<"ipfs@1.0">>, + <<"module">> => dev_codec_ipfs } | Stock ], + store => [NodeBLocal] + }, + NodeBURL = hb_http_server:start_node(NodeBOpts), + + Path = <<"/~lookup@1.0/read&target=", + ?HELLO_WORLD_CID/binary>>, + + %% (1) Node B alone cannot serve the CID. + BeforeTransfer = hb_http:get(NodeBURL, Path, #{}), + ?assertNotMatch({ok, <<"hello world">>}, BeforeTransfer), + ?assertNotMatch({ok, #{ <<"body">> := <<"hello world">> }}, + BeforeTransfer), + + %% (2) Client fetches from Node A. The response's + %% signature-input line carries `alg="ipfs@1.0/sha2-256-raw"; + %% id=""'; `sf_siginfo_to_commitment' honours the `id=' + %% and reconstructs the commitment at the CID map key. + {ok, MsgFromA} = hb_http:get(NodeAURL, Path, #{}), + ?assert(is_map(MsgFromA)), + CommsFromA = maps:get(<<"commitments">>, MsgFromA, #{}), + IPFSOnA = + maps:filter( + fun(_, #{ <<"commitment-device">> := + <<"ipfs@1.0">> }) -> true; + (_, _) -> false + end, + CommsFromA), + case maps:to_list(IPFSOnA) of + [] -> + ?debugFmt("Skipping: A's response carried no " + "ipfs commitment (gateway path not " + "taken on this run)", []); + [{CIDAtA, _Comm}] -> + ?assertEqual(?HELLO_WORLD_CID, CIDAtA), + + %% (3) Persist what A gave us into B's store. + %% The commitment's CID key becomes an AltID in + %% B's cache, linked to the uncommitted root ID. + {ok, _UID} = + hb_cache:write(MsgFromA, NodeBOpts), + + %% (4) Node B now serves the same CID via its + %% local cache, no gateway, no upstream. + {ok, ViaB} = hb_http:get(NodeBURL, Path, #{}), + BodyViaB = + case ViaB of + Bin when is_binary(Bin) -> Bin; + #{ <<"body">> := Bin } -> + hb_cache:ensure_loaded(Bin, #{}) + end, + ?assertEqual(?HELLO_WORLD_BODY, BodyViaB), + + %% (5) Direct store inspection: B's local store + %% has the message at the CID key. + LocalOnly = #{ store => [NodeBLocal] }, + {ok, MsgOnB} = + hb_cache:read(?HELLO_WORLD_CID, LocalOnly), + ?assertEqual( + ?HELLO_WORLD_BODY, + hb_cache:ensure_loaded( + maps:get(<<"body">>, MsgOnB), + LocalOnly)), + %% And the commitment survived under the CID key + %% in B's local store. + CommsOnB = maps:get(<<"commitments">>, MsgOnB, #{}), + ?assert(maps:is_key(?HELLO_WORLD_CID, CommsOnB)) + end + end + end}. + %%%==================================================================== %%% PR Path 3 — Commit IPFS content as ANS-104 via the node's wallet %%%==================================================================== From f0d7326a47b4f4886cf26ac333c3c310e46a4562 Mon Sep 17 00:00:00 2001 From: Sam Williams Date: Sat, 18 Apr 2026 15:38:55 -0400 Subject: [PATCH 16/22] test(ipfs): relay via hb_store_remote_node instead of manual cache writes Reshapes the two-HB-node test to use the proper remote-store topology the user described: Node A holds only the IPFS gateway store; Node B has a primary test_store plus hb_store_remote_node pointing at A with local-store => [NodeBPrimary] for write-through caching. Flow: client hits B -> B's primary misses -> B's remote-node fetches from A -> A retrieves from real IPFS -> returns to B -> B caches in primary -> returns to client. We then kill Node A via cowboy:stop_listener/1 and re-query B, which must serve from its primary cache with no upstream traffic. Per-node wallets (ar_wallet:new/0) avoid server_id collisions, matching the pattern in dev_router:dynamic_router_pricing_test_. Co-Authored-By: Claude Opus 4.6 --- src/dev_codec_ipfs_live_test.erl | 215 ++++++++++++++++--------------- 1 file changed, 108 insertions(+), 107 deletions(-) diff --git a/src/dev_codec_ipfs_live_test.erl b/src/dev_codec_ipfs_live_test.erl index fc842a8f2..08feb3805 100644 --- a/src/dev_codec_ipfs_live_test.erl +++ b/src/dev_codec_ipfs_live_test.erl @@ -245,26 +245,42 @@ live_http_ipfs_commitment_survives_transport_test_() -> end end}. -%% Two in-process HyperBEAM nodes in one test: +%% Two in-process HyperBEAM nodes in one test, wired so that a client +%% request on Node B transparently pulls content through Node A: %% -%% Node A — "source" — has `hb_store_ipfs_gateway' in its chain, so it -%% can reach the real IPFS network and mint CID-keyed commitments. -%% Node B — "downstream" — has ONLY a local filesystem store, no IPFS -%% gateway. Can serve a CID only if the message is already cached -%% under the CID in its local store. +%% Node A — "upstream" — has ONLY `hb_store_ipfs_gateway' in its +%% store chain. It has no persistent local cache of its own; every +%% request routes out to the real IPFS network. %% -%% The test shows: (1) B alone cannot serve the CID; (2) a client -%% fetches the message from A over HTTP, receiving the commitment keyed -%% by the CID (thanks to the `id=' extension on the signature-input -%% line); (3) the client writes the message into B's local store via -%% the standard `hb_cache:write/2'; (4) B thereafter serves the CID via -%% `~lookup@1.0/read' entirely from its local cache — no gateway -%% involved, no knowledge of upstream A. +%% Node B — "downstream" — has a primary filesystem store +%% (`hb_test_utils:test_store/0' — freshly isolated per eunit) and, +%% behind it, `hb_store_remote_node' pointing at Node A with its +%% `local-store' set to the same primary. That means: a cache miss +%% on B falls through to A, and A's response is written through to +%% B's primary on the way back. %% -%% This is the "relay / mirror" topology: one node with upstream reach, -%% many nodes without, each able to cache and re-serve what passes -%% through them. -live_hb_to_hb_transfer_and_relay_test_() -> +%% Flow: +%% +%% (1) Client: `GET NodeB/~lookup@1.0/read&target='. +%% (2) B's primary misses. +%% (3) B's `hb_store_remote_node' calls +%% `NodeA/~cache@1.0/read&target='. +%% (4) A's `dev_cache:read' calls `hb_cache:read(, AOpts)'. +%% A's store chain is just the IPFS gateway; the gateway fetches +%% from the real IPFS network, verifies the digest, returns an +%% IPFS-committed message. +%% (5) A's HTTP response carries the commitment on its signature-input +%% line with `id=""'; B's `hb_http:get' / siginfo decode +%% reconstructs the commitment at the CID map key. +%% (6) `hb_store_remote_node:maybe_cache' writes the message through +%% to B's primary. The CID is picked up as an AltID by +%% `hb_cache:write/3' and linked to the uncommitted root ID. +%% (7) B returns the body to the client. +%% +%% Then we kill A's HTTP listener and ask B again for the same CID. +%% B's primary now has the data, so the request is served locally with +%% no upstream traffic. +live_hb_to_hb_remote_store_relay_test_() -> {timeout, 120, fun() -> application:ensure_all_started(inets), application:ensure_all_started(ssl), @@ -272,111 +288,96 @@ live_hb_to_hb_transfer_and_relay_test_() -> false -> ?debugFmt("Skipping: all gateways unreachable", []); true -> - %% Two-node setup: each node needs its own wallet - %% (`hb_http_server:start_node/1' derives the listener's - %% server_id from `priv_wallet''s address; reusing one - %% wallet collapses both nodes onto the same listener). - %% Explicit high ports — matches `dev_router''s - %% `dynamic_router_pricing' two-node pattern. - PortA = 18760, - PortB = 18761, + %% Each node needs its own wallet — the HB server_id is + %% derived from `priv_wallet''s address + %% (`hb_http_server:new_server/1:175'), so shared + %% wallets collapse two nodes onto one listener. + PortA = 18770, + PortB = 18771, Stock = hb_opts:get(preloaded_devices, [], #{}), + IPFSDev = #{ <<"name">> => <<"ipfs@1.0">>, + <<"module">> => dev_codec_ipfs }, - %% Node A: has the IPFS gateway upstream. - NodeAOpts0 = node_opts_with_ipfs(), - NodeAURL = hb_http_server:start_node( - NodeAOpts0#{ - port => PortA, - priv_wallet => ar_wallet:new() - }), + %% Node A: nothing but the IPFS gateway. No primary + %% store — every read passes through to real IPFS. + NodeAWallet = ar_wallet:new(), + NodeAServerID = + hb_util:human_id( + ar_wallet:to_address(NodeAWallet)), + NodeAURL = hb_http_server:start_node(#{ + port => PortA, + priv_wallet => NodeAWallet, + cache_control => <<"cache">>, + preloaded_devices => [IPFSDev | Stock], + store => [ + #{ + <<"store-module">> => hb_store_ipfs_gateway, + <<"gateways">> => ?LIVE_GATEWAYS, + <<"timeout">> => 20000 + } + ] + }), - %% Node B: isolated filesystem store. No IPFS gateway. - NodeBLocal = #{ - <<"store-module">> => hb_store_fs, - <<"name">> => - iolist_to_binary( - ["cache-TEST/ipfs-hb-b-", - integer_to_list( - erlang:system_time(microsecond))]) - }, - hb_store:reset(NodeBLocal), - NodeBOpts = #{ + %% Node B: primary fs store, with `hb_store_remote_node' + %% pointed at A as fallback. `local-store' on the remote + %% config makes A's responses write through to the + %% primary. + NodeBPrimary = hb_test_utils:test_store(), + NodeBURL = hb_http_server:start_node(#{ port => PortB, - cache_control => <<"cache">>, priv_wallet => ar_wallet:new(), - preloaded_devices => - [ #{ <<"name">> => <<"ipfs@1.0">>, - <<"module">> => dev_codec_ipfs } | Stock ], - store => [NodeBLocal] - }, - NodeBURL = hb_http_server:start_node(NodeBOpts), + cache_control => <<"cache">>, + preloaded_devices => [IPFSDev | Stock], + store => [ + NodeBPrimary, + #{ + <<"store-module">> => hb_store_remote_node, + <<"node">> => NodeAURL, + <<"local-store">> => [NodeBPrimary] + } + ] + }), Path = <<"/~lookup@1.0/read&target=", ?HELLO_WORLD_CID/binary>>, - %% (1) Node B alone cannot serve the CID. - BeforeTransfer = hb_http:get(NodeBURL, Path, #{}), - ?assertNotMatch({ok, <<"hello world">>}, BeforeTransfer), - ?assertNotMatch({ok, #{ <<"body">> := <<"hello world">> }}, - BeforeTransfer), - - %% (2) Client fetches from Node A. The response's - %% signature-input line carries `alg="ipfs@1.0/sha2-256-raw"; - %% id=""'; `sf_siginfo_to_commitment' honours the `id=' - %% and reconstructs the commitment at the CID map key. - {ok, MsgFromA} = hb_http:get(NodeAURL, Path, #{}), - ?assert(is_map(MsgFromA)), - CommsFromA = maps:get(<<"commitments">>, MsgFromA, #{}), - IPFSOnA = - maps:filter( - fun(_, #{ <<"commitment-device">> := - <<"ipfs@1.0">> }) -> true; - (_, _) -> false - end, - CommsFromA), - case maps:to_list(IPFSOnA) of - [] -> - ?debugFmt("Skipping: A's response carried no " - "ipfs commitment (gateway path not " - "taken on this run)", []); - [{CIDAtA, _Comm}] -> - ?assertEqual(?HELLO_WORLD_CID, CIDAtA), + %% (1) Query B. Pulls through A, which pulls from real + %% IPFS. Write-through caches it on B's primary on the + %% return path. Then B serves the body to the client. + {ok, R1} = hb_http:get(NodeBURL, Path, #{}), + Body1 = response_body(R1), + ?assertEqual(?HELLO_WORLD_BODY, Body1), - %% (3) Persist what A gave us into B's store. - %% The commitment's CID key becomes an AltID in - %% B's cache, linked to the uncommitted root ID. - {ok, _UID} = - hb_cache:write(MsgFromA, NodeBOpts), + %% (2) Direct probe of B's primary: the CID is now there, + %% keyed by the CID in the commitments map. + LocalOnly = #{ store => [NodeBPrimary] }, + {ok, MsgOnB0} = + hb_cache:read(?HELLO_WORLD_CID, LocalOnly), + ?assertEqual(?HELLO_WORLD_BODY, + hb_cache:ensure_loaded( + maps:get(<<"body">>, MsgOnB0), LocalOnly)), + CommsOnB0 = maps:get(<<"commitments">>, MsgOnB0, #{}), + ?assert(maps:is_key(?HELLO_WORLD_CID, CommsOnB0)), - %% (4) Node B now serves the same CID via its - %% local cache, no gateway, no upstream. - {ok, ViaB} = hb_http:get(NodeBURL, Path, #{}), - BodyViaB = - case ViaB of - Bin when is_binary(Bin) -> Bin; - #{ <<"body">> := Bin } -> - hb_cache:ensure_loaded(Bin, #{}) - end, - ?assertEqual(?HELLO_WORLD_BODY, BodyViaB), + %% (3) Kill Node A's HTTP listener. ranch / cowboy use + %% the server_id as the listener ref. + ok = cowboy:stop_listener(NodeAServerID), - %% (5) Direct store inspection: B's local store - %% has the message at the CID key. - LocalOnly = #{ store => [NodeBLocal] }, - {ok, MsgOnB} = - hb_cache:read(?HELLO_WORLD_CID, LocalOnly), - ?assertEqual( - ?HELLO_WORLD_BODY, - hb_cache:ensure_loaded( - maps:get(<<"body">>, MsgOnB), - LocalOnly)), - %% And the commitment survived under the CID key - %% in B's local store. - CommsOnB = maps:get(<<"commitments">>, MsgOnB, #{}), - ?assert(maps:is_key(?HELLO_WORLD_CID, CommsOnB)) - end + %% (4) Ask B again. A is gone; B must serve from primary. + {ok, R2} = hb_http:get(NodeBURL, Path, #{}), + Body2 = response_body(R2), + ?assertEqual(?HELLO_WORLD_BODY, Body2) end end}. +%% @doc Extract the response body binary from `hb_http:get''s return +%% shape — sometimes a bare binary (simple body pass-through), sometimes +%% a full message map with a `body' field that may itself be a link. +response_body(R) when is_binary(R) -> + R; +response_body(#{ <<"body">> := B }) -> + hb_cache:ensure_loaded(B, #{}). + %%%==================================================================== %%% PR Path 3 — Commit IPFS content as ANS-104 via the node's wallet %%%==================================================================== From ab8c30dacc5fcb33be0b0a1bf4650c61817a2f10 Mon Sep 17 00:00:00 2001 From: Sam Williams Date: Sat, 18 Apr 2026 16:13:44 -0400 Subject: [PATCH 17/22] =?UTF-8?q?refactor(ipfs):=20tighten=20style=20?= =?UTF-8?q?=E2=80=94=20minimal=20docs,=20zero=20duplication?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Align the IPFS device with HyperBEAM's existing codec style: - Short, focused module docs (one paragraph) matching `dev_codec_json', `dev_codec_flat', `dev_codec_ans104'. - Remove `%%%====' section banners — not idiomatic. - Condense inline comment prose; drop multi-paragraph essays. Deduplicate: - `dev_codec_ipfs_test': `ipfs_commit/2,3' helper replaces the `#{<<"commitment-device">>=><<"ipfs@1.0">>,...}' literal at every call site. - `dev_codec_ipfs_live_test': `with_live_gateways/1' and `response_body/1' replace per-test skip and body-extraction blocks. - `dev_codec_httpsig_siginfo': decoder now reuses the encoder's `derived_commitment_id/1' helper instead of an inline `if'. - `dev_codec_ipfs_cbor': `pair_key_lt/2' delegates to `key_lt/2'; the duplicate `key_strictly_less/2' is gone. - `dev_codec_ipfs_cid': `safe/2' wraps the three multibase decode attempts, replacing three copies of the same try/catch shape. Tighten: - `dev_codec_ipfs:commit/3' uses an `?IS_NATIVE_TYPE/1' guard and a single `multicodec_of/1' lookup; `verify/3' collapses to one clause. - `dev_codec_ipfs_cid:decode_bytes/1' asserts the digest occupies exactly `DigestLen' bytes instead of inferring. - `dev_codec_ipfs_cbor' float rejection uses `andalso' guards directly; match on `+0.0` / `-0.0` to silence OTP-27 warning. All 2456 targeted tests pass; no regressions in the broader `hb_store', `hb_http', `hb_ao', `hb_message', or `dev_codec_httpsig*' suites. Net -824 lines across the seven touched files. Co-Authored-By: Claude Opus 4.6 --- src/dev_codec_httpsig_siginfo.erl | 86 ++--- src/dev_codec_ipfs.erl | 480 +++++++----------------- src/dev_codec_ipfs_cbor.erl | 135 +++---- src/dev_codec_ipfs_cid.erl | 186 ++++----- src/dev_codec_ipfs_live_test.erl | 601 ++++++++++-------------------- src/dev_codec_ipfs_test.erl | 431 +++++++-------------- src/hb_store_ipfs_gateway.erl | 357 +++++++----------- 7 files changed, 726 insertions(+), 1550 deletions(-) diff --git a/src/dev_codec_httpsig_siginfo.erl b/src/dev_codec_httpsig_siginfo.erl index bd2df6334..53c76168b 100644 --- a/src/dev_codec_httpsig_siginfo.erl +++ b/src/dev_codec_httpsig_siginfo.erl @@ -22,35 +22,24 @@ % <<"status">> % Some libraries do not support it ]). -%% @doc Generate a `signature' and `signature-input' key pair from a commitment -%% map. Commitments without a `signature' field (e.g. content-addressed -%% commitments like `~ipfs@1.0' CIDs, or `ans104@1.0' unsigned-sha256) are -%% not signatures per RFC-9421; they ride along in the message body's -%% `commitments' field and are skipped here. +%% @doc Generate a `signature' and `signature-input' key pair from a +%% commitment map. Commitments without a `signature' field are not +%% signatures per RFC 9421 and are skipped — they ride on the message body. commitments_to_siginfo(_Msg, Comms, _Opts) when ?IS_EMPTY_MESSAGE(Comms) -> #{}; commitments_to_siginfo(Msg, Comms, Opts) -> Signable = maps:filter( - fun(_CommID, Commitment) -> - maps:is_key(<<"signature">>, Commitment) - end, - Comms - ), + fun(_CommID, C) -> maps:is_key(<<"signature">>, C) end, Comms), case map_size(Signable) of 0 -> #{}; - _ -> - commitments_to_siginfo_for_signable(Msg, Signable, Opts) + _ -> commitments_to_siginfo_for_signable(Msg, Signable, Opts) end. commitments_to_siginfo_for_signable(Msg, Comms, Opts) -> - % Generate a SF item for each commitment's signature and signature-input. - % The commitment's map key (`CommID') is threaded through so that - % `commitment_to_sf_siginfo/4' can emit an `id' parameter whenever the - % decoder-side derivation rule (`id(Sig)' — see `sf_siginfo_to_commitment/5') - % would not reproduce it on the other side. Commitment devices whose - % identity is `h(Sig)' (HMAC, RSA-PSS, etc.) never pay this cost; - % content-addressed devices (e.g. `~ipfs@1.0') do. + % Emit a SF item per commitment. `CommID' is threaded through so + % `commitment_to_sf_siginfo/4' can add an `id' parameter whenever the + % decoder-side derivation would not reproduce the sender's map key. {Sigs, SigInputs} = maps:fold( fun(CommID, Commitment, {Sigs, SigInputs}) -> @@ -94,15 +83,10 @@ commitment_to_sf_siginfo(Msg, CommID, Commitment, Opts) -> Expires = maps:get(<<"expires">>, Commitment, undefined), % Generate the name of the signature. SigName = hb_util:to_lower(hb_util:human_id(crypto:hash(sha256, Signature))), - % Decide whether we need to transport the commitment's map key. - % `sf_siginfo_to_commitment/5' derives the decoded commitment's ID from - % `Sig' alone (either `human_id(Sig)' for 32-byte sigs or - % `human_id(sha256(Sig))' otherwise). Commitments whose map key matches - % that derivation (HMAC, RSA-PSS, and any `@/' family - % that keys on `h(Sig)') need no extra work. Content-addressed devices - % like `~ipfs@1.0' — whose key is a CID, not a function of `Sig' — emit - % an `id' parameter so the receiver reconstructs the commitment under - % the sender's intended key. + % If the decoder's derivation would not reproduce the sender's map key, + % transport it explicitly as an `id' parameter. Content-addressed devices + % (e.g. `~ipfs@1.0') key on a CID that is not a function of `Sig'; HMAC, + % RSA-PSS, and other `h(Sig)'-keyed devices never pay this cost. DerivedID = derived_commitment_id(Signature), IDParam = case CommID of @@ -158,17 +142,13 @@ commitment_to_sf_siginfo(Msg, CommID, Commitment, Opts) -> ), {ok, SigName, SFSig, SFSigInput}. -%% @doc Derive the commitment map key that `sf_siginfo_to_commitment/5' will -%% assign when an explicit `id' parameter is absent. 32-byte signatures -%% (HMAC-SHA256 output, or a raw sha-256 digest used as a signature by a -%% content-addressed device) are used directly as the ID; longer signatures -%% are rehashed. -derived_commitment_id(Signature) -> - if byte_size(Signature) == 32 -> - hb_util:human_id(Signature); - true -> - hb_util:human_id(crypto:hash(sha256, Signature)) - end. +%% @doc Default commitment ID derivation used on both encode and decode +%% when no explicit `id' parameter is present. 32-byte sigs are used +%% directly; longer sigs are rehashed with sha-256. +derived_commitment_id(Sig) when byte_size(Sig) == 32 -> + hb_util:human_id(Sig); +derived_commitment_id(Sig) -> + hb_util:human_id(crypto:hash(sha256, Sig)). get_additional_params(Commitment) -> AdditionalParams = @@ -306,18 +286,12 @@ sf_siginfo_to_commitment(Msg, BodyKeys, SFSig, SFSigInput, Opts) -> {item, {string, Key}, []} <- SigInput ], CommittedKeys = from_siginfo_keys(Msg, BodyKeys, RawCommittedKeys), - % Merge and cleanup the output. - % 1. Decode the `keyid` (typically a public key) to its raw byte form. - % 2. Decode the `signature` to its raw byte form. - % 3. Filter undefined keys. - % 4. Generate the ID for the commitment. If the encoder transported an - % explicit `id' parameter (set by content-addressed devices like - % `~ipfs@1.0' whose map key is not a function of `Sig'), honour it - % and strip the field from the commitment body. Otherwise fall back - % to the default derivation: use the signature directly for 32-byte - % sigs, else hash the signature. - % 5. If the `keyid' is a public key (determined by length >= 32 bytes), set - % the `committer' to its hash. + % Merge and cleanup the output: + % 1. Decode `keyid' and `signature' to raw bytes. + % 2. Filter undefined keys. + % 3. Use the transported `id' parameter when present (content-addressed + % devices), otherwise fall back to `derived_commitment_id/1'. + % 4. If the `keyid' resolves to a public key, set the `committer'. Commitment3 = Commitment2#{ <<"signature">> => hb_util:encode(Sig), @@ -325,14 +299,8 @@ sf_siginfo_to_commitment(Msg, BodyKeys, SFSig, SFSigInput, Opts) -> }, {ID, Commitment4} = case maps:take(<<"id">>, Commitment3) of - {ExplicitID, Stripped} -> - {ExplicitID, Stripped}; - error -> - DerivedID = - if byte_size(Sig) == 32 -> hb_util:human_id(Sig); - true -> hb_util:human_id(crypto:hash(sha256, Sig)) - end, - {DerivedID, Commitment3} + {ExplicitID, Stripped} -> {ExplicitID, Stripped}; + error -> {derived_commitment_id(Sig), Commitment3} end, KeyID = maps:get(<<"keyid">>, Commitment4, <<>>), Commitment5 = diff --git a/src/dev_codec_ipfs.erl b/src/dev_codec_ipfs.erl index 72524a50b..5c203b70b 100644 --- a/src/dev_codec_ipfs.erl +++ b/src/dev_codec_ipfs.erl @@ -1,35 +1,9 @@ -%%% @doc `~ipfs@1.0' — a commitment device whose IDs are IPFS CIDv1s over a -%%% message's `body', and (in phase 2) a codec that serializes HyperBEAM -%%% messages to deterministic dag-cbor and back. -%%% -%%% Phase 1 surface: `commit/3' (type `unsigned' only), `verify/3', -%%% `content_type/1', and `info/1'. The `<<"body">>' blob is treated as -%%% opaque bytes for hashing. -%%% -%%% Phase 2 adds `to/3' and `from/3'. These route through `~structured@1.0' -%%% exactly like `dev_codec_json' — no changes to the structured codec, the -%%% cache, or the kernel. The pipeline is: -%%% -%%% TABM <-> structured@1.0 (native types) <-> IPLD intermediate <-> dag-cbor bytes -%%% -%%% Atoms other than `null', `true', `false' are not representable in IPLD -%%% and cause `to/3' to throw — that matches the spec. Commitments are -%%% stripped before encoding (IPFS blocks are content; signatures are carried -%%% out-of-band by the HyperBEAM `commitments' machinery). -%%% -%%% How this fits AO-Core: a commitment whose ID is a CID gives the cache -%%% everything it already needs to serve the message under that CID. When a -%%% message with an `~ipfs@1.0' commitment is written via `hb_cache:write/2', -%%% the commitment ID is linked to the uncommitted ID of the message -%%% (see `hb_cache:do_write_message/3'). A subsequent `hb_cache:read(CID, _)' -%%% then returns the full message — no new routing, no kernel changes. -%%% -%%% Verification is the same deterministic function as commit: hash the body -%%% with the declared codec + hash algorithm and check that the resulting CID -%%% is present in the message's `commitments' map. -%%% -%%% This device is optional and user-loadable. It is not in -%%% `hb_opts:preloaded_devices/0' by default. +%%% @doc `~ipfs@1.0': a codec and commitment device whose commitment IDs are +%%% IPFS CIDv1s over a message's `body'. In codec mode, encodes TABMs to +%%% deterministic dag-cbor and back, routed through `~structured@1.0' the +%%% same way `dev_codec_json' and `dev_codec_flat' do. The `body''s CID is +%%% produced by `dev_codec_ipfs_cid:encode/3'; `hb_cache' then links the +%%% CID to the message's uncommitted ID automatically. -module(dev_codec_ipfs). -export([info/1, commit/3, verify/3, content_type/1]). -export([to/3, from/3]). @@ -37,227 +11,98 @@ -include_lib("eunit/include/eunit.hrl"). -define(DEVICE_NAME, <<"ipfs@1.0">>). -%% The `type' of an IPFS commitment is the single coordinate that names -%% the CID's construction — both the multihash function and the CID's -%% multicodec in one string, matching IPFS tooling conventions: -%% -%% sha2-256-raw — `bafk...' CIDs -%% sha2-256-dag-cbor — `bafy...' CIDs -%% -%% These slot directly into the wire `alg' as `ipfs@1.0/' through -%% `dev_codec_httpsig_siginfo:commitment_to_alg/2', which is why we keep -%% the hash-alg inside the commitment's `type' field rather than a -%% separate (non-IANA-registered) metadata parameter. +%% Native commitment types combine the multihash and the CID multicodec so +%% they slot into the wire `alg' field as `ipfs@1.0/' via +%% `dev_codec_httpsig_siginfo:commitment_to_alg/2' — no custom RFC 9421 +%% metadata parameters. -define(DEFAULT_TYPE, <<"sha2-256-raw">>). -define(COMMITTED_KEYS, [<<"body">>]). +-define(IS_NATIVE_TYPE(T), + (T =:= <<"sha2-256-raw">> orelse T =:= <<"sha2-256-dag-cbor">>)). -%%%==================================================================== -%%% AO-Core device surface -%%%==================================================================== - -%% @doc Restrict what AO-Core will resolve against this module. We are a -%% commitment device and a codec, not a general key resolver. `committed/3' -%% is handled by `dev_message' from the `<<"committed">>' field of each -%% commitment, so we do not export it here. +%% @doc Restrict AO-Core resolution to the codec/commitment surface. info(_) -> #{ exports => [commit, verify, content_type, to, from] }. -%% @doc Report the appropriate IPLD MIME type for a given commitment -%% `type'. Matches the native-type names stored by `commit/3'. +%% @doc Return the IPLD MIME type for a commitment's native `type'. content_type(#{ <<"type">> := <<"sha2-256-dag-cbor">> }) -> {ok, <<"application/vnd.ipld.dag-cbor">>}; -content_type(#{ <<"type">> := <<"sha2-256-raw">> }) -> - {ok, <<"application/vnd.ipld.raw">>}; content_type(_) -> {ok, <<"application/vnd.ipld.raw">>}. -%%%==================================================================== -%%% commit/3 -%%%==================================================================== - -%% @doc Compute a CIDv1 over the `body' of `Msg' and attach it as an -%% unsigned commitment. -%% -%% The caller-facing API mirrors `dev_codec_ans104' and `dev_codec_httpsig': -%% the generic `type: unsigned' is translated into the codec's native type -%% string, which is what ends up in the commitment and on the wire alg. -%% IPFS's native types are: -%% -%% <<"sha2-256-raw">> — default. Produces `bafk...' CIDs (multicodec 0x55). -%% <<"sha2-256-dag-cbor">> — for dag-cbor blocks. Produces `bafy...' CIDs (0x71). -%% -%% An optional `hash-alg' request knob lets the caller pick between them -%% when passing `type: unsigned'; otherwise the raw form is the default. -%% Non-unsigned types delegate to `~httpsig@1.0' — the same composition -%% pattern as `dev_codec_flat' / `dev_codec_json'. - -%% Generic caller knob: translate to the native type name. +%% @doc Attach a CIDv1 commitment over `body'. `type: unsigned' is the +%% generic caller knob — translate it to the codec's native type. Any other +%% commit type (signed, rsa-pss, etc.) delegates to `~httpsig@1.0', the +%% composition pattern used by `dev_codec_flat' and `dev_codec_json'. commit(Msg, Req = #{ <<"type">> := <<"unsigned">> }, Opts) -> Native = hb_maps:get(<<"hash-alg">>, Req, ?DEFAULT_TYPE, Opts), Req1 = hb_maps:without([<<"hash-alg">>], Req, Opts), commit(Msg, Req1#{ <<"type">> => Native }, Opts); - -%% Native types: do the real work. Storing the hash-alg in `type' (rather -%% than a separate custom metadata parameter) means the wire alg reads as -%% `ipfs@1.0/sha2-256-raw' — an extension of the HyperBEAM `/' -%% alg convention that requires no custom RFC 9421 metadata parameters. -commit(Msg, #{ <<"type">> := Type }, Opts) - when Type =:= <<"sha2-256-raw">>; - Type =:= <<"sha2-256-dag-cbor">> -> +commit(Msg, #{ <<"type">> := Type }, Opts) when ?IS_NATIVE_TYPE(Type) -> Body = hb_maps:get(<<"body">>, Msg, <<>>, Opts), - {ok, Multicodec} = type_to_multicodec(Type), + Multicodec = multicodec_of(Type), CID = dev_codec_ipfs_cid:encode(Multicodec, sha2_256, Body), - %% The `signature' field carries the raw sha-256 digest of the body - %% (base64url). That's what an IPFS CID commits to by construction — - %% there is no separate cryptographic signing step, and no key is - %% needed to reverify (a receiver just recomputes from `body'). We do - %% not set `keyid': RFC 9421 §1.4.2.3 permits its absence ("other - %% means" of key-material retrieval, where here "other means" is "no - %% retrieval at all"). The httpsig encoder drops absent `keyid' from - %% the wire. - %% - %% The `signature' field also keeps the commitment on the httpsig - %% wire (it is otherwise filtered out by `commitments_to_siginfo'). - %% Combined with the `id=' parameter that `dev_codec_httpsig_siginfo' - %% emits when the map key differs from `h(Sig)', this lets remote - %% nodes decode an IPFS commitment back under its CID key. - Signature = hb_util:encode(crypto:hash(sha256, Body)), - Commitment = - #{ - <<"commitment-device">> => ?DEVICE_NAME, - <<"type">> => Type, - <<"committed">> => ?COMMITTED_KEYS, - <<"signature">> => Signature - }, + Commitment = #{ + <<"commitment-device">> => ?DEVICE_NAME, + <<"type">> => Type, + <<"committed">> => ?COMMITTED_KEYS, + %% Carrying the raw sha-256 digest as `signature' keeps the + %% commitment on the httpsig wire (see + %% `dev_codec_httpsig_siginfo''s signature filter). No `keyid' — + %% content-addressed commitments need no key material. RFC 9421 + %% §1.4.2.3 permits keyid's absence. + <<"signature">> => hb_util:encode(crypto:hash(sha256, Body)) + }, Existing = hb_maps:get(<<"commitments">>, Msg, #{}, Opts), - ?event(ipfs, - {commit, - {cid, CID}, - {type, Type}, - {body_size, byte_size(Body)} - } - ), + ?event(ipfs, {commit, {cid, CID}, {type, Type}, {size, byte_size(Body)}}), {ok, Msg#{ <<"commitments">> => Existing#{ CID => Commitment } }}; - -%% Caller asked for an unsigned commit with an unrecognised hash-alg. -commit(_Msg, #{ <<"type">> := Type }, _Opts) - when Type =:= <<"sha2-256-dag-json">>; - Type =:= <<"sha2-256-dag-pb">> -> +commit(_Msg, #{ <<"type">> := <<"sha2-256-", _/binary>> = Type }, _Opts) -> {error, {unsupported_type, Type}}; - commit(Msg, Req, Opts) -> - %% Any other commit type — signed, rsa-pss, hmac, etc. — is outside the - %% IPFS CID envelope. We delegate to `~httpsig@1.0' the same way - %% `dev_codec_flat', `dev_codec_json', and other codec-only devices do. - %% Users who want a pure IPFS CID commitment specify `type: unsigned'; - %% everything else gets a proper cryptographic commitment attached. dev_codec_httpsig:commit(Msg, Req, Opts). -%% @doc Resolve a native `type' to the underlying CID multicodec name. -type_to_multicodec(<<"sha2-256-raw">>) -> {ok, <<"raw">>}; -type_to_multicodec(<<"sha2-256-dag-cbor">>) -> {ok, <<"dag-cbor">>}; -type_to_multicodec(_) -> error. - -%%%==================================================================== -%%% verify/3 -%%%==================================================================== - -%% @doc Verify an `~ipfs@1.0' commitment. `Req' carries the merged fields -%% of the commitment being verified; `Base' is the full message including -%% its `commitments' map. -%% -%% The verification is the commitment function in reverse: recompute the -%% CID from `body' using the commitment's declared native `type'. The -%% commitment is valid iff that CID is a key in `Base''s commitments map -%% — exactly when the body has not been tampered with. -verify(Base, #{ <<"type">> := Type } = _Req, Opts) - when Type =:= <<"sha2-256-raw">>; - Type =:= <<"sha2-256-dag-cbor">> -> - verify_native(Base, Type, Opts); +%% @doc Verify an `~ipfs@1.0' commitment by recomputing the CID from `body' +%% under the declared native type and checking it keys the commitments map. +verify(Base, #{ <<"type">> := Type }, Opts) when ?IS_NATIVE_TYPE(Type) -> + Body = hb_maps:get(<<"body">>, Base, <<>>, Opts), + Comms = hb_maps:get(<<"commitments">>, Base, #{}, Opts), + Expected = dev_codec_ipfs_cid:encode(multicodec_of(Type), sha2_256, Body), + Res = hb_maps:is_key(Expected, Comms, Opts), + ?event(ipfs, {verify, {type, Type}, {expected, Expected}, {result, Res}}), + {ok, Res}; verify(Base, Req, Opts) -> - %% Non-native commitments on an IPFS-device message are - %% httpsig-shaped (see `commit/3'). Delegate. dev_codec_httpsig:verify(Base, Req, Opts). -verify_native(Base, Type, Opts) -> - Body = hb_maps:get(<<"body">>, Base, <<>>, Opts), - Commitments = hb_maps:get(<<"commitments">>, Base, #{}, Opts), - {ok, Multicodec} = type_to_multicodec(Type), - ExpectedCID = dev_codec_ipfs_cid:encode(Multicodec, sha2_256, Body), - Res = hb_maps:is_key(ExpectedCID, Commitments, Opts), - ?event(ipfs, - {verify, - {type, Type}, - {expected_cid, ExpectedCID}, - {result, Res} - } - ), - {ok, Res}. - -%%%==================================================================== -%%% to/3 — TABM -> dag-cbor bytes (phase 2) -%%%==================================================================== +%% @doc Resolve a native `type' to its CID multicodec name. +multicodec_of(<<"sha2-256-raw">>) -> <<"raw">>; +multicodec_of(<<"sha2-256-dag-cbor">>) -> <<"dag-cbor">>. -%% @doc Serialize a HyperBEAM TABM message to deterministic dag-cbor bytes. -%% Routes through `~structured@1.0' to recover native types from the TABM, -%% then walks the rich message into the IPLD intermediate form and hands it -%% to the dag-cbor encoder. Commitments are stripped before encoding — they -%% do not belong in the content-addressed bytes. +%% @doc Serialize a TABM to deterministic dag-cbor bytes. Routes through +%% `~structured@1.0' to recover native types, resolves links (dag-cbor is +%% self-contained), strips `priv', and walks the result into the IPLD +%% intermediate form that `dev_codec_ipfs_cbor:encode/1' consumes. to(Bin, _Req, _Opts) when is_binary(Bin) -> - %% Encode a bare binary as a dag-cbor text string (or byte string if not - %% UTF-8). Passing it through untouched would leave us unable to - %% `from/3' the result — the roundtrip contract the codec test vectors - %% rely on. - try - {ok, dev_codec_ipfs_cbor:encode(Bin)} - catch - throw:{dag_cbor_encode, {invalid_utf8, _}} -> - {ok, dev_codec_ipfs_cbor:encode({bytes, Bin})} + %% Bare binaries encode as text strings (or byte strings if not UTF-8) + %% so that `to' / `from' is a roundtrip. + try {ok, dev_codec_ipfs_cbor:encode(Bin)} + catch throw:{dag_cbor_encode, {invalid_utf8, _}} -> + {ok, dev_codec_ipfs_cbor:encode({bytes, Bin})} end; to(Msg, _Req, Opts) when is_map(Msg) -> try - %% Step 1: TABM -> structured form with native types. Structured = hb_message:convert( - hb_private:reset(Msg), - <<"structured@1.0">>, - tabm, - Opts - ), - %% Step 2: resolve all links. Dag-cbor encodes self-contained content - %% — partial messages carrying `link'-ref placeholders would not - %% roundtrip through the IPLD data model. An IPLD-link-aware mapping - %% through `hb_link' is a future phase. + hb_private:reset(Msg), <<"structured@1.0">>, tabm, Opts), Loaded = hb_cache:ensure_all_loaded(Structured, Opts), - %% Step 3: strip only `priv' — it is per-session state and must - %% never cross the codec boundary. Commitments *do* cross so that - %% `from(to(X)) = X' over the full HyperBEAM message; peer codecs - %% (json, flat, ans104) all behave this way. A pure IPFS consumer - %% sees `commitments' as just another map field — completely valid - %% IPLD, and no harm done. Clean = hb_maps:without([<<"priv">>], Loaded, Opts), - %% Step 4: walk into the IPLD intermediate form, then encode. - Ipld = structured_to_ipld(Clean), - {ok, dev_codec_ipfs_cbor:encode(Ipld)} - catch - throw:{dag_cbor_encode, Reason} -> - ?event(warning, {ipfs_to_failed, Reason}), - {error, {dag_cbor_encode, Reason}} + {ok, dev_codec_ipfs_cbor:encode(structured_to_ipld(Clean))} + catch throw:{dag_cbor_encode, Reason} -> + ?event(warning, {ipfs_to_failed, Reason}), + {error, {dag_cbor_encode, Reason}} end. -%% @doc Walk a structured (rich-typed) HyperBEAM value into the IPLD -%% intermediate form understood by `dev_codec_ipfs_cbor:encode/1'. -%% -%% Mappings: -%% - `null' / `true' / `false' -> kept as IPLD native. -%% - integer / float / binary -> passed through as-is. -%% - list -> list, recursively converted. -%% - map -> map, with binary keys; values -%% recursively converted. -%% - other atoms -> throw; dag-cbor has no atom type. -%% -%% Any value the walker cannot map raises an error the caller surfaces as -%% `{error, {dag_cbor_encode, _}}'. +%% @doc Walk a structured HyperBEAM value into the IPLD intermediate form. +%% Atoms outside `null/true/false' have no IPLD representation and throw. structured_to_ipld(null) -> null; structured_to_ipld(true) -> true; structured_to_ipld(false) -> false; @@ -269,111 +114,61 @@ structured_to_ipld(L) when is_list(L) -> [ structured_to_ipld(V) || V <- L ]; structured_to_ipld(M) when is_map(M) -> maps:from_list( - [ {assert_binary_key(K), structured_to_ipld(V)} - || {K, V} <- maps:to_list(M) ] - ); -structured_to_ipld(Other) -> - throw({dag_cbor_encode, {unsupported_value, Other}}). + [ {assert_binary_key(K), structured_to_ipld(V)} || {K, V} <- maps:to_list(M) ]); +structured_to_ipld(V) -> + throw({dag_cbor_encode, {unsupported_value, V}}). assert_binary_key(K) when is_binary(K) -> K; -assert_binary_key(K) -> - throw({dag_cbor_encode, {non_binary_map_key, K}}). +assert_binary_key(K) -> throw({dag_cbor_encode, {non_binary_map_key, K}}). -%%%==================================================================== -%%% from/3 — dag-cbor bytes -> TABM (phase 2) -%%%==================================================================== - -%% @doc Parse dag-cbor bytes into a TABM message. Decodes to the IPLD -%% intermediate form, normalizes into a rich structured message, then hands -%% to `~structured@1.0' to produce the TABM. +%% @doc Parse dag-cbor bytes into a TABM. Pre-decoded maps pass through +%% unchanged, matching the `dev_codec_json' / `dev_codec_flat' discipline. from(Map, _Req, _Opts) when is_map(Map) -> - %% Passthrough for already-decoded messages, same discipline as json/flat. {ok, Map}; from(Bin, Req, Opts) when is_binary(Bin) -> case dev_codec_ipfs_cbor:decode(Bin) of {ok, Ipld} -> - Structured = ipld_to_structured(Ipld), - case Structured of - S when is_map(S) -> - dev_codec_structured:from(S, Req, Opts); - Other -> - {ok, Other} + case ipld_to_structured(Ipld) of + M when is_map(M) -> dev_codec_structured:from(M, Req, Opts); + Other -> {ok, Other} end; {error, Reason} -> ?event(warning, {ipfs_from_failed, Reason}), {error, {dag_cbor_decode, Reason}} end. -%% @doc Walk the IPLD intermediate form into a HyperBEAM structured form -%% (the rich, native-typed representation that `dev_codec_structured:from/3' -%% consumes). -%% -%% Decisions made for phase 2 minimum: -%% - `{bytes, B}' and plain binary both flatten to a binary. HyperBEAM -%% messages rarely need the bytes/text distinction, and re-inferring it -%% via `ao-types' is out of scope for the first cut. -%% - `{link, CID}' flattens to the CID string. This is lossy against -%% IPLD's link semantics, but keeps v1 simple; a link-aware mapping -%% through `hb_link' is the natural phase 3 step. +%% @doc Walk the IPLD intermediate form into a rich-typed HyperBEAM value. +%% `{bytes, B}' flattens to a binary; `{link, CID}' flattens to the CID +%% string — a link-aware mapping through `hb_link' is future work. ipld_to_structured(null) -> null; ipld_to_structured(true) -> true; ipld_to_structured(false) -> false; ipld_to_structured(N) when is_integer(N); is_float(N) -> N; ipld_to_structured(B) when is_binary(B) -> B; -ipld_to_structured({bytes, B}) -> B; -ipld_to_structured({link, CID}) -> CID; +ipld_to_structured({bytes, B}) -> B; +ipld_to_structured({link, CID}) -> CID; ipld_to_structured(L) when is_list(L) -> [ ipld_to_structured(V) || V <- L ]; ipld_to_structured(M) when is_map(M) -> maps:map(fun(_K, V) -> ipld_to_structured(V) end, M). -%%%==================================================================== -%%% Tests -%%%==================================================================== -%%% Integration-level tests live in `dev_codec_ipfs_test'. - -content_type_raw_test() -> - ?assertEqual( - {ok, <<"application/vnd.ipld.raw">>}, - content_type(#{ <<"type">> => <<"sha2-256-raw">> }) - ). - -content_type_dag_cbor_test() -> - ?assertEqual( - {ok, <<"application/vnd.ipld.dag-cbor">>}, - content_type(#{ <<"type">> => <<"sha2-256-dag-cbor">> }) - ). - -content_type_default_test() -> - ?assertEqual( - {ok, <<"application/vnd.ipld.raw">>}, - content_type(#{}) - ). +%%% Tests. Integration-level tests live in `dev_codec_ipfs_test'. -%% The caller passes the generic `type: unsigned'; `commit/3' translates -%% into the codec's native type name (`sha2-256-raw') — same translation -%% pattern as `dev_codec_ans104' (unsigned -> unsigned-sha256) and -%% `dev_codec_httpsig' (unsigned -> hmac-sha256). commit_unsigned_raw_attaches_cid_test() -> - Msg = #{ <<"body">> => <<"hello world">> }, - Req = #{ <<"type">> => <<"unsigned">> }, - {ok, Committed} = commit(Msg, Req, #{}), - Commitments = maps:get(<<"commitments">>, Committed), - [CID] = maps:keys(Commitments), + {ok, Committed} = + commit(#{ <<"body">> => <<"hello world">> }, + #{ <<"type">> => <<"unsigned">> }, #{}), + [CID] = maps:keys(maps:get(<<"commitments">>, Committed)), ?assertEqual( - <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, - CID - ), - Commitment = maps:get(CID, Commitments), + <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, CID), + Commitment = maps:get(CID, maps:get(<<"commitments">>, Committed)), ?assertEqual(?DEVICE_NAME, maps:get(<<"commitment-device">>, Commitment)), ?assertEqual(<<"sha2-256-raw">>, maps:get(<<"type">>, Commitment)), ?assertEqual([<<"body">>], maps:get(<<"committed">>, Commitment)), - %% The commitment carries a signature (= raw digest) so it stays on - %% the httpsig wire via `dev_codec_httpsig_siginfo''s signature - %% filter. It deliberately does NOT carry a `keyid' — there is no - %% key to look up for a content-addressed commitment — nor any - %% `committer', `hash-alg', or `multicodec' fields (the multihash - %% and multicodec are both encoded in `type'). + %% The commitment carries `signature' (= raw digest) so it survives the + %% httpsig wire; `keyid', `committer', `hash-alg', and `multicodec' are + %% deliberately absent — content-addressed commitments need no key, and + %% the multihash + multicodec are already encoded in `type'. ?assertMatch(#{<<"signature">> := _}, Commitment), ?assertNot(maps:is_key(<<"keyid">>, Commitment)), ?assertNot(maps:is_key(<<"committer">>, Commitment)), @@ -381,82 +176,63 @@ commit_unsigned_raw_attaches_cid_test() -> ?assertNot(maps:is_key(<<"multicodec">>, Commitment)). commit_unsigned_dag_cbor_test() -> - Msg = #{ <<"body">> => <<16#a0>> }, %% empty dag-cbor map `{}` - Req = #{ <<"type">> => <<"unsigned">>, - <<"hash-alg">> => <<"sha2-256-dag-cbor">> }, - {ok, Committed} = commit(Msg, Req, #{}), - Commitments = maps:get(<<"commitments">>, Committed), - [CID] = maps:keys(Commitments), + {ok, Committed} = + commit(#{ <<"body">> => <<16#a0>> }, + #{ <<"type">> => <<"unsigned">>, + <<"hash-alg">> => <<"sha2-256-dag-cbor">> }, #{}), + [CID] = maps:keys(maps:get(<<"commitments">>, Committed)), ?assertEqual( - <<"bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua">>, - CID - ), - Commitment = maps:get(CID, Commitments), - ?assertEqual(<<"sha2-256-dag-cbor">>, maps:get(<<"type">>, Commitment)). + <<"bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua">>, CID). -%% Callers that already know the native type can pass it directly. commit_native_type_test() -> - Msg = #{ <<"body">> => <<"hello world">> }, - {ok, Committed} = commit(Msg, #{ <<"type">> => <<"sha2-256-raw">> }, #{}), + {ok, Committed} = + commit(#{ <<"body">> => <<"hello world">> }, + #{ <<"type">> => <<"sha2-256-raw">> }, #{}), [CID] = maps:keys(maps:get(<<"commitments">>, Committed)), ?assertEqual( - <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, - CID - ). + <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, CID). commit_preserves_existing_commitments_test() -> Msg = #{ - <<"body">> => <<"hello world">>, - <<"commitments">> => #{ <<"other">> => #{ <<"kind">> => <<"placeholder">> } } + <<"body">> => <<"hello world">>, + <<"commitments">> => #{ <<"other">> => #{ <<"kind">> => <<"x">> } } }, {ok, Committed} = commit(Msg, #{ <<"type">> => <<"unsigned">> }, #{}), - Commitments = maps:get(<<"commitments">>, Committed), - ?assert(maps:is_key(<<"other">>, Commitments)), - ?assertEqual(2, maps:size(Commitments)). + ?assertEqual(2, maps:size(maps:get(<<"commitments">>, Committed))). -%% Non-unsigned commit types delegate to `~httpsig@1.0', matching the -%% composition pattern used by `dev_codec_flat', `dev_codec_json', and -%% other codec-only devices. A user who wants a pure IPFS CID passes -%% `type: unsigned'; everything else gets a proper signed commitment. commit_signed_delegates_to_httpsig_test() -> - Msg = #{ <<"body">> => <<"x">> }, - Wallet = ar_wallet:new(), - Opts = #{ priv_wallet => Wallet }, - {ok, Signed} = commit(Msg, #{ <<"type">> => <<"signed">> }, Opts), - Commitments = maps:get(<<"commitments">>, Signed), - [{_CID, Commitment}|_] = maps:to_list(Commitments), - ?assertEqual(<<"httpsig@1.0">>, - maps:get(<<"commitment-device">>, Commitment)). - -commit_rejects_unknown_hash_alg_test() -> - Msg = #{ <<"body">> => <<"x">> }, - Req = #{ <<"type">> => <<"unsigned">>, - <<"hash-alg">> => <<"sha2-256-dag-pb">> }, - ?assertMatch({error, {unsupported_type, <<"sha2-256-dag-pb">>}}, - commit(Msg, Req, #{})). + {ok, Signed} = + commit(#{ <<"body">> => <<"x">> }, #{ <<"type">> => <<"signed">> }, + #{ priv_wallet => ar_wallet:new() }), + [{_CID, C}|_] = maps:to_list(maps:get(<<"commitments">>, Signed)), + ?assertEqual(<<"httpsig@1.0">>, maps:get(<<"commitment-device">>, C)). + +commit_rejects_unsupported_ipfs_type_test() -> + ?assertMatch( + {error, {unsupported_type, <<"sha2-256-dag-pb">>}}, + commit(#{ <<"body">> => <<"x">> }, + #{ <<"type">> => <<"unsigned">>, + <<"hash-alg">> => <<"sha2-256-dag-pb">> }, #{})). verify_ok_for_intact_body_test() -> - Msg = #{ <<"body">> => <<"hello world">> }, - {ok, Committed} = commit(Msg, #{ <<"type">> => <<"unsigned">> }, #{}), - Commitments = maps:get(<<"commitments">>, Committed), - [{_CID, Commitment}] = maps:to_list(Commitments), - ?assertEqual({ok, true}, verify(Committed, Commitment, #{})). + {ok, Committed} = + commit(#{ <<"body">> => <<"hello world">> }, + #{ <<"type">> => <<"unsigned">> }, #{}), + [{_CID, C}] = maps:to_list(maps:get(<<"commitments">>, Committed)), + ?assertEqual({ok, true}, verify(Committed, C, #{})). verify_fails_for_tampered_body_test() -> - Msg = #{ <<"body">> => <<"hello world">> }, - {ok, Committed} = commit(Msg, #{ <<"type">> => <<"unsigned">> }, #{}), - Commitments = maps:get(<<"commitments">>, Committed), - [{_CID, Commitment}] = maps:to_list(Commitments), - Tampered = Committed#{ <<"body">> => <<"hello earth">> }, - ?assertEqual({ok, false}, verify(Tampered, Commitment, #{})). + {ok, Committed} = + commit(#{ <<"body">> => <<"hello world">> }, + #{ <<"type">> => <<"unsigned">> }, #{}), + [{_CID, C}] = maps:to_list(maps:get(<<"commitments">>, Committed)), + ?assertEqual({ok, false}, + verify(Committed#{ <<"body">> => <<"hello earth">> }, C, #{})). verify_fails_when_hash_alg_mismatches_test() -> - %% A commitment written as raw; caller asserts dag-cbor on verify. - %% Recomputing under dag-cbor yields a different CID that is not in the - %% commitments map, so verify must return `{ok, false}'. - Msg = #{ <<"body">> => <<"hello world">> }, - {ok, Committed} = commit(Msg, #{ <<"type">> => <<"unsigned">> }, #{}), - Commitments = maps:get(<<"commitments">>, Committed), - [{_CID, Commitment}] = maps:to_list(Commitments), - DagCborReq = Commitment#{ <<"type">> => <<"sha2-256-dag-cbor">> }, - ?assertEqual({ok, false}, verify(Committed, DagCborReq, #{})). + {ok, Committed} = + commit(#{ <<"body">> => <<"hello world">> }, + #{ <<"type">> => <<"unsigned">> }, #{}), + [{_CID, C}] = maps:to_list(maps:get(<<"commitments">>, Committed)), + ?assertEqual({ok, false}, + verify(Committed, C#{ <<"type">> => <<"sha2-256-dag-cbor">> }, #{})). diff --git a/src/dev_codec_ipfs_cbor.erl b/src/dev_codec_ipfs_cbor.erl index e303ef683..40adb3d5c 100644 --- a/src/dev_codec_ipfs_cbor.erl +++ b/src/dev_codec_ipfs_cbor.erl @@ -1,48 +1,33 @@ -%%% @doc Pure-Erlang deterministic DAG-CBOR encoder and decoder. +%%% @doc Pure-Erlang deterministic DAG-CBOR encoder and decoder. Implements +%%% the dag-cbor subset of RFC 8949: definite-length containers only; 64-bit +%%% floats only, no NaN/Infinity; shortest-form integers in int64 range; +%%% text-string keys sorted length-first then bytewise; only tag 42 (IPLD +%%% link); valid UTF-8 text; simple values 20/21/22 only. Spec: +%%% https://ipld.io/specs/codecs/dag-cbor/spec/ %%% -%%% DAG-CBOR is a strict subset of CBOR (RFC 8949). This module implements -%%% the subset, and rejects inputs that violate it: -%%% - Only definite-length containers. -%%% - Only 64-bit floats (IEEE 754 binary64); NaN and Infinity rejected. -%%% - Integers fit in a signed 64-bit range, shortest-form encoding. -%%% - Map keys are text strings, sorted length-first then bytewise. -%%% - Only tag 42 (IPLD Link) is permitted; no other tags. -%%% - Text strings must be valid UTF-8. -%%% - Only simple values 20 (false), 21 (true), 22 (null). +%%% IPLD data model <-> Erlang intermediate form used here: %%% -%%% The spec: https://ipld.io/specs/codecs/dag-cbor/spec/ +%%% null | false | true <-> atoms +%%% integer | float <-> Erlang number +%%% text string <-> UTF-8 binary +%%% byte string <-> `{bytes, Binary}' +%%% array <-> list +%%% map <-> map with binary keys +%%% link (CID) <-> `{link, CIDString}' %%% -%%% IPLD data model <-> Erlang intermediate form: -%%% - null -> atom `null' -%%% - false / true -> atoms `false' / `true' -%%% - integer -> Erlang integer -%%% - float -> Erlang float -%%% - text string -> binary (UTF-8) -%%% - byte string -> `{bytes, Binary}' tuple (to disambiguate from text) -%%% - array -> list -%%% - map -> map with binary keys -%%% - link (CID) -> `{link, CIDBinary}' tuple where CIDBinary is the -%%% multibase-encoded string form (e.g. `<<"bafy...">>'). -%%% -%%% This module does no work with HyperBEAM's `~structured@1.0' or TABM. Its -%%% job is the bytes-to-IPLD frontier; the device-level glue in -%%% `dev_codec_ipfs' bridges the IPLD intermediate form into HyperBEAM's -%%% message model. +%%% The module does not touch `~structured@1.0' or TABM; `dev_codec_ipfs' +%%% bridges this IPLD form into HyperBEAM messages. -module(dev_codec_ipfs_cbor). -export([encode/1, decode/1]). -include("include/hb.hrl"). -include_lib("eunit/include/eunit.hrl"). -%% Integer range bounds per dag-cbor. +%% Integer range bounds per dag-cbor (int64). -define(INT64_MAX, 16#7fffffffffffffff). -define(INT64_MIN, -16#8000000000000000). -%%%==================================================================== -%%% Encoder -%%%==================================================================== - -%% @doc Encode an IPLD value to dag-cbor bytes. Throws `{dag_cbor_encode, -%% Reason}' on invalid input. +%% @doc Encode an IPLD value to dag-cbor bytes. Throws +%% `{dag_cbor_encode, Reason}' on invalid input. encode(V) -> try iolist_to_binary(enc(V)) catch throw:{dag_cbor_encode, _} = E -> throw(E); @@ -60,16 +45,13 @@ enc(N) when is_integer(N), N < 0, N >= ?INT64_MIN -> enc(N) when is_integer(N) -> throw({dag_cbor_encode, {integer_out_of_range, N}}); enc(F) when is_float(F) -> - %% Reject NaN. Erlang binary-match of `:64/float' would itself refuse a - %% NaN on the decode side, and arithmetic rarely yields a NaN float in - %% Erlang, but we still assert to be safe. + %% NaN: `F == F' is false. Infinity: `F == F + 1.0' and non-zero. + %% Both are forbidden in dag-cbor; every finite double encodes as 0xfb + %% + 8 big-endian IEEE-754 bytes. case F == F of false -> throw({dag_cbor_encode, nan_forbidden}); true -> - %% Infinity detection: Erlang has no built-in, but an infinity - %% value would satisfy F > ?INT64_MAX AND F + 1 == F. That is - %% always false for finite doubles. This gate is defensive. - case (F == F + 1.0) andalso (F =/= 0.0) of + case (F == F + 1.0) andalso (F =/= +0.0) andalso (F =/= -0.0) of true -> throw({dag_cbor_encode, infinity_forbidden}); false -> <<16#fb, F:64/float>> end @@ -97,23 +79,20 @@ enc(M) when is_map(M) -> case lists:all(fun({K, _}) -> is_binary(K) end, Pairs) of false -> throw({dag_cbor_encode, non_string_map_key}); true -> - Sorted = lists:sort(fun key_lt/2, Pairs), + Sorted = lists:sort(fun pair_key_lt/2, Pairs), [enc_header(5, length(Sorted)), [ [enc(K), enc(V)] || {K, V} <- Sorted ]] end; enc(Other) -> throw({dag_cbor_encode, {unsupported_type, Other}}). -%% @doc Dag-CBOR length-first key ordering. Since all keys are strings, we -%% compare by their byte content directly, not by their encoded form — which -%% is equivalent because the encoded-length prefix is a monotonic function of -%% the string byte length for the range of string lengths we emit. -key_lt({K1, _}, {K2, _}) -> - L1 = byte_size(K1), - L2 = byte_size(K2), - if L1 < L2 -> true; - L1 > L2 -> false; - true -> K1 =< K2 +%% @doc Dag-CBOR length-first, then bytewise key ordering. +pair_key_lt({K1, _}, {K2, _}) -> key_lt(K1, K2). +key_lt(A, B) -> + case {byte_size(A), byte_size(B)} of + {LA, LB} when LA < LB -> true; + {LA, LB} when LA > LB -> false; + _ -> A =< B end. %% @doc Major type header with shortest-form length/argument. @@ -134,14 +113,10 @@ is_valid_utf8(B) -> _ -> false end. -%%%==================================================================== -%%% Decoder -%%%==================================================================== - -%% @doc Decode a dag-cbor binary into an IPLD intermediate value. Returns -%% `{ok, Value}' or `{error, Reason}'. Strictly validates: rejects -%% indefinite-length items, non-64-bit floats, NaN/Infinity, non-canonical -%% integer forms, unsupported tags, non-UTF-8 strings. +%% @doc Decode a dag-cbor binary into an IPLD value. Strictly validates: +%% rejects indefinite-length items, non-64-bit floats, NaN/Infinity, +%% non-canonical integers, unsupported tags, non-UTF-8 strings, and +%% non-canonical map ordering. decode(Bin) when is_binary(Bin) -> try {Value, Rest} = dec_one(Bin), @@ -151,7 +126,7 @@ decode(Bin) when is_binary(Bin) -> end catch throw:{dag_cbor_decode, Reason} -> {error, Reason}; - error:_ = E -> {error, {malformed, E}} + error:E -> {error, {malformed, E}} end. dec_one(<<>>) -> @@ -219,8 +194,7 @@ dec_value(3, L, Rest) -> _ -> throw({dag_cbor_decode, {truncated_text, L}}) end; dec_value(4, L, Rest) -> - {Xs, Rest1} = dec_n(L, Rest, []), - {Xs, Rest1}; + dec_n(L, Rest, []); dec_value(5, L, Rest) -> {Pairs, Rest1} = dec_pairs(L, Rest, [], <<>>), {maps:from_list(Pairs), Rest1}; @@ -259,37 +233,18 @@ dec_n(N, Rest, Acc) -> {V, Rest1} = dec_one(Rest), dec_n(N - 1, Rest1, [V | Acc]). -%% Decode map pairs and, while decoding, verify keys are: -%% 1. text strings (major type 3), -%% 2. strictly ascending in the dag-cbor length-first / bytewise order, -%% with no duplicates. +%% Decode map pairs; verify keys are text strings in strictly ascending +%% dag-cbor order (length-first, then bytewise) with no duplicates. dec_pairs(0, Rest, Acc, _Prev) -> {lists:reverse(Acc), Rest}; dec_pairs(N, Rest, Acc, Prev) -> {K, Rest1} = dec_one(Rest), - case is_binary(K) of - false -> throw({dag_cbor_decode, non_string_map_key}); - true -> ok - end, - case Acc of - [] -> ok; - _ -> - case key_strictly_less(Prev, K) of - true -> ok; - false -> throw({dag_cbor_decode, non_canonical_map_order}) - end - end, + is_binary(K) orelse throw({dag_cbor_decode, non_string_map_key}), + (Acc =:= [] orelse (key_lt(Prev, K) andalso Prev =/= K)) + orelse throw({dag_cbor_decode, non_canonical_map_order}), {V, Rest2} = dec_one(Rest1), dec_pairs(N - 1, Rest2, [{K, V} | Acc], K). -key_strictly_less(A, B) -> - LA = byte_size(A), - LB = byte_size(B), - if LA < LB -> true; - LA > LB -> false; - true -> A < B - end. - dec_link(Rest) -> case dec_one(Rest) of {{bytes, <<0, CIDBytes/binary>>}, Rest1} -> @@ -301,11 +256,7 @@ dec_link(Rest) -> throw({dag_cbor_decode, cid_link_expects_byte_string}) end. -%%%==================================================================== -%%% Tests -%%%==================================================================== - -%%% Unit-level known-answer tests (RFC 8949 Appendix A / dag-cbor spec). +%%% Tests — unit known-answer vectors and compound roundtrips. scalars_roundtrip_test() -> ?assertEqual(<<16#f6>>, encode(null)), diff --git a/src/dev_codec_ipfs_cid.erl b/src/dev_codec_ipfs_cid.erl index ba6bddb5d..d64052a37 100644 --- a/src/dev_codec_ipfs_cid.erl +++ b/src/dev_codec_ipfs_cid.erl @@ -1,17 +1,13 @@ -%%% @doc Pure functions for the thin slice of the IPFS/IPLD spec that this -%%% device needs: unsigned varints, sha2-256 multihashes, base32-lowercase -%%% multibase, and CIDv1 encode/decode. -%%% -%%% Intentionally narrow. This module covers only what `dev_codec_ipfs' uses -%%% to turn a `body' blob into a content identifier. It is not a general IPFS -%%% library: no CIDv0, no hash functions besides sha2-256, no multibases -%%% besides base32-lower, and no resolution of IPLD paths. See -%%% `docs/devices/ipfs-at-1-0.md' for the device-level rationale. +%%% @doc Pure functions for the thin slice of the IPFS/IPLD spec that +%%% `~ipfs@1.0' needs: unsigned varints, sha2-256 multihashes, base32-lower +%%% multibase, and CIDv1 encode/decode. Not a general IPFS library — CIDv0, +%%% non-sha2 hashes, multibases other than `b', and IPLD path resolution +%%% are all out of scope. %%% %%% References: -%%% - CIDv1 spec: https://github.com/multiformats/cid -%%% - Multihash spec: https://github.com/multiformats/multihash -%%% - Multibase spec: https://github.com/multiformats/multibase +%%% - CIDv1: https://github.com/multiformats/cid +%%% - Multihash: https://github.com/multiformats/multihash +%%% - Multibase: https://github.com/multiformats/multibase %%% - unsigned-varint: https://github.com/multiformats/unsigned-varint -module(dev_codec_ipfs_cid). -export([encode/3, decode/1]). @@ -21,38 +17,33 @@ -include("include/hb.hrl"). -include_lib("eunit/include/eunit.hrl"). -%% Multicodec codes we care about. Full registry: +%% Multicodec codes. Full registry: %% https://github.com/multiformats/multicodec/blob/master/table.csv -define(CODEC_RAW, 16#55). -define(CODEC_DAG_CBOR, 16#71). -%% Multihash function codes. +%% Multihash function code and sha2-256 digest length. -define(HASH_SHA2_256, 16#12). -define(SHA2_256_LEN, 32). %% Multibase prefix for base32 lowercase (RFC4648, no padding). -define(MB_BASE32_LOWER, $b). -%% @doc Encode a `body' blob as a CIDv1 string, using the given codec name -%% (`<<"raw">>' | `<<"dag-cbor">>') and hash algorithm (`sha2_256' atom, or -%% `<<"sha2-256">>' binary). +%% @doc Encode `Body' as a CIDv1 string under `Codec' (`<<"raw">>' or +%% `<<"dag-cbor">>') and hash algorithm `sha2_256' (atom) or +%% `<<"sha2-256">>' (binary). encode(Codec, HashAlg, Body) when is_binary(Codec) -> encode(codec_code(Codec), HashAlg, Body); encode(CodecCode, <<"sha2-256">>, Body) -> encode(CodecCode, sha2_256, Body); encode(CodecCode, sha2_256, Body) when is_integer(CodecCode), is_binary(Body) -> - MH = multihash(sha2_256, Body), - CIDBin = - << - (varint_encode(1))/binary, - (varint_encode(CodecCode))/binary, - MH/binary - >>, - multibase_encode(CIDBin). - -%% @doc Decode a CIDv1 string into its components. Returns `{error, Reason}' -%% if the string is not a CIDv1 that this module knows how to parse. + multibase_encode( + <<(varint_encode(1))/binary, + (varint_encode(CodecCode))/binary, + (multihash(sha2_256, Body))/binary>>). + +%% @doc Decode a CIDv1 string into its component parts, or `{error, _}'. decode(Bin) when is_binary(Bin) -> case multibase_decode(Bin) of {ok, Raw} -> decode_bytes(Raw); @@ -61,34 +52,24 @@ decode(Bin) when is_binary(Bin) -> decode_bytes(Bin) -> try - {Version, Rest1} = varint_decode(Bin), - case Version of - 1 -> - {CodecCode, Rest2} = varint_decode(Rest1), - {HashCode, Rest3} = varint_decode(Rest2), - {DigestLen, Digest} = varint_decode(Rest3), - case {HashCode, byte_size(Digest)} of - {?HASH_SHA2_256, DigestLen} when DigestLen =:= ?SHA2_256_LEN -> - %% Combine the multihash function and the - %% multicodec into a single `hash-alg' string, the - %% way IPFS tooling names a CID's construction. - Multicodec = codec_name(CodecCode), - HashAlg = <<"sha2-256-", Multicodec/binary>>, - {ok, #{ - <<"version">> => 1, - <<"hash-alg">> => HashAlg, - <<"digest">> => Digest - }}; - {_, L} when L =/= DigestLen -> - {error, {truncated_digest, {declared, DigestLen}, {actual, L}}}; - {Other, _} -> - {error, {unsupported_hash, Other}} - end; - V -> - {error, {unsupported_cid_version, V}} + {1, Rest1} = varint_decode(Bin), + {CodecCode, Rest2} = varint_decode(Rest1), + {HashCode, Rest3} = varint_decode(Rest2), + {DigestLen, Rest4} = varint_decode(Rest3), + case {HashCode, DigestLen, Rest4} of + {?HASH_SHA2_256, ?SHA2_256_LEN, <>} -> + Multicodec = codec_name(CodecCode), + {ok, #{ + <<"version">> => 1, + <<"hash-alg">> => <<"sha2-256-", Multicodec/binary>>, + <<"digest">> => Digest + }}; + {?HASH_SHA2_256, ?SHA2_256_LEN, _} -> + {error, truncated_digest}; + {Other, _, _} -> + {error, {unsupported_hash, Other}} end - catch - _:_ -> {error, malformed_cid} + catch _:_ -> {error, malformed_cid} end. %% @doc Resolve a codec name to its multicodec code. @@ -96,45 +77,43 @@ codec_code(<<"raw">>) -> ?CODEC_RAW; codec_code(<<"dag-cbor">>) -> ?CODEC_DAG_CBOR; codec_code(Other) -> throw({unsupported_codec, Other}). -%% @doc Inverse of `codec_code/1'. Unknown codes round-trip as a `<<"codec-0xHEX">>' -%% binary so that decode never throws on a stranger's CID. +%% @doc Inverse of `codec_code/1'. Unknown codes round-trip as +%% `<<"codec-0xHEX">>' so `decode/1' never throws on a stranger's CID. codec_name(?CODEC_RAW) -> <<"raw">>; codec_name(?CODEC_DAG_CBOR) -> <<"dag-cbor">>; codec_name(N) when is_integer(N) -> iolist_to_binary(io_lib:format("codec-0x~.16b", [N])). -%% @doc Wrap a digest as a multihash binary: <>. +%% @doc Wrap a sha2-256 digest of `Body' as a multihash binary. multihash(sha2_256, Body) when is_binary(Body) -> - Digest = crypto:hash(sha256, Body), - << - (varint_encode(?HASH_SHA2_256))/binary, - (varint_encode(?SHA2_256_LEN))/binary, - Digest/binary - >>. + <<(varint_encode(?HASH_SHA2_256))/binary, + (varint_encode(?SHA2_256_LEN))/binary, + (crypto:hash(sha256, Body))/binary>>. %% @doc Multibase-encode a binary as base32-lowercase, no padding, prefix `b'. multibase_encode(Bin) when is_binary(Bin) -> - Encoded = base32:encode(Bin, [lower, nopad]), - <>. + <>. -%% @doc Multibase-decode. Accepts base32 lowercase (`b'), base32 upper (`B'), -%% and base16 lowercase (`f') defensively. Anything else is `{error, _}'. +%% @doc Multibase-decode. Accepts base32-lower (`b'), base32-upper (`B'), +%% and base16-lower (`f') defensively; anything else is `{error, _}'. multibase_decode(<>) -> - try {ok, base32:decode(pad_base32(string:uppercase(Rest)))} - catch _:_ -> {error, invalid_base32} end; + safe(fun() -> base32:decode(pad_base32(string:uppercase(Rest))) end, + invalid_base32); multibase_decode(<<$B, Rest/binary>>) -> - try {ok, base32:decode(pad_base32(Rest))} - catch _:_ -> {error, invalid_base32} end; + safe(fun() -> base32:decode(pad_base32(Rest)) end, invalid_base32); multibase_decode(<<$f, Rest/binary>>) -> - try {ok, binary:decode_hex(Rest)} - catch _:_ -> {error, invalid_base16} end; + safe(fun() -> binary:decode_hex(Rest) end, invalid_base16); multibase_decode(<>) -> {error, {unsupported_multibase, <>}}; multibase_decode(_) -> {error, empty_cid}. +safe(Fun, ErrorTag) -> + try {ok, Fun()} catch _:_ -> {error, ErrorTag} end. + pad_base32(Bin) -> - %% RFC4648 base32 groups are 40 bits (8 chars). Pad with `=' to a multiple of 8. + %% RFC4648 base32 groups are 40 bits (8 chars). Pad with `=' to a + %% multiple of 8. case (8 - (byte_size(Bin) rem 8)) rem 8 of 0 -> Bin; N -> < - CID = encode(<<"raw">>, sha2_256, <<"hello world">>), ?assertEqual( <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, - CID - ). + encode(<<"raw">>, sha2_256, <<"hello world">>)). -%% Empty body under the `raw' codec. Cross-checked against -%% `ipfs add --raw-leaves -Q /dev/null'. empty_raw_cid_test() -> - CID = encode(<<"raw">>, sha2_256, <<>>), ?assertEqual( <<"bafkreihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku">>, - CID - ). + encode(<<"raw">>, sha2_256, <<>>)). -%% Known DAG-CBOR CID for the canonical empty-map block (`0xa0`), cross-checked -%% against `ipfs dag put <<<"{}"` with input-codec dag-cbor. empty_dag_cbor_cid_test() -> - CID = encode(<<"dag-cbor">>, sha2_256, <<16#a0>>), ?assertEqual( <<"bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua">>, - CID - ). + encode(<<"dag-cbor">>, sha2_256, <<16#a0>>)). roundtrip_decode_raw_test() -> CID = encode(<<"raw">>, sha2_256, <<"hello world">>), {ok, Parts} = decode(CID), ?assertEqual(<<"sha2-256-raw">>, maps:get(<<"hash-alg">>, Parts)), ?assertEqual(1, maps:get(<<"version">>, Parts)), - ?assertEqual(32, byte_size(maps:get(<<"digest">>, Parts))), - ?assertEqual( - crypto:hash(sha256, <<"hello world">>), - maps:get(<<"digest">>, Parts) - ). + ?assertEqual(crypto:hash(sha256, <<"hello world">>), + maps:get(<<"digest">>, Parts)). roundtrip_decode_dag_cbor_test() -> - CID = encode(<<"dag-cbor">>, sha2_256, <<"body bytes">>), - {ok, Parts} = decode(CID), + {ok, Parts} = decode(encode(<<"dag-cbor">>, sha2_256, <<"body bytes">>)), ?assertEqual(<<"sha2-256-dag-cbor">>, maps:get(<<"hash-alg">>, Parts)). bad_multibase_prefix_test() -> - ?assertMatch({error, {unsupported_multibase, _}}, - decode(<<"Qmfoobar">>)). + ?assertMatch({error, {unsupported_multibase, _}}, decode(<<"Qmfoobar">>)). malformed_cid_test() -> - %% A `b' prefix with a valid base32 body that decodes to nonsense. ?assertMatch({error, _}, decode(<<"baaa">>)). varint_roundtrip_test() -> - lists:foreach( - fun(N) -> - Enc = varint_encode(N), - ?assertEqual({N, <<>>}, varint_decode(Enc)) - end, - [0, 1, 127, 128, 255, 16#55, 16#71, 1234, 16#ff_ff, 16#ff_ff_ff_ff]). + [ ?assertEqual({N, <<>>}, varint_decode(varint_encode(N))) + || N <- [0, 1, 127, 128, 255, 16#55, 16#71, 1234, 16#ffff, 16#ffffffff] ]. varint_truncated_raises_test() -> - %% Continuation bit set but no following byte. ?assertThrow({malformed_varint, _}, varint_decode(<<16#ff>>)). multihash_shape_test() -> MH = multihash(sha2_256, <<"x">>), - %% code(0x12) + len(32) + 32-byte digest = 34 bytes - ?assertEqual(34, byte_size(MH)), <<16#12, 32, Digest:32/binary>> = MH, + ?assertEqual(34, byte_size(MH)), ?assertEqual(crypto:hash(sha256, <<"x">>), Digest). multibase_roundtrip_test() -> diff --git a/src/dev_codec_ipfs_live_test.erl b/src/dev_codec_ipfs_live_test.erl index 08feb3805..d703c19bd 100644 --- a/src/dev_codec_ipfs_live_test.erl +++ b/src/dev_codec_ipfs_live_test.erl @@ -1,25 +1,15 @@ -%%% @doc End-to-end production tests for `~ipfs@1.0': live IPFS network + -%%% live HyperBEAM node + HTTP client, exercising the paths described in -%%% PR #868. +%%% @doc End-to-end tests for `~ipfs@1.0' against live IPFS gateways and +%%% real HyperBEAM nodes, exercising the user-facing flows advertised in +%%% PR #868: %%% -%%% The PR advertises three user-facing flows, each expressed through the -%%% standard AO-Core `~lookup@1.0' device so no kernel edits are required: +%%% 1. `GET /~lookup@1.0/read&target=' serves the body. +%%% 2. First lookup fetches and pins; subsequent lookups resolve locally. +%%% 3. `GET /~lookup@1.0/read&target=/commit&type=signed...' returns +%%% a bundler-ready ANS-104 signed message. %%% -%%% 1. Serve a CID: `GET /~lookup@1.0/read&target=' -%%% 2. Preload a CID: first lookup fetches + pins; subsequent lookups -%%% are local. -%%% 3. Commit for Arweave: -%%% `GET /~lookup@1.0/read&target=/commit -%%% &type=signed&commitment-device=ans104@1.0' -%%% returns the bundler-ready signed message. The final POST to -%%% `~arweave@2.9/tx' needs a topped-up wallet and a configured -%%% bundler endpoint, neither of which is in scope for automated CI. -%%% -%%% `~ipfs@1.0' is an optional, user-loadable device. Each test opts into -%%% it via per-node `preloaded_devices' — the same way a production -%%% operator enables it. Tests skip gracefully if every gateway is -%%% unreachable at the time they run (matches the `hb_store_gateway' -%%% live-test pattern). +%%% Each test opts into the device via per-node `preloaded_devices' — the +%%% same way a production operator would enable it. Tests skip gracefully +%%% when all configured gateways are unreachable. -module(dev_codec_ipfs_live_test). -include_lib("eunit/include/eunit.hrl"). -include("include/hb.hrl"). @@ -33,424 +23,221 @@ <<"https://nftstorage.link">>, <<"https://4everland.io">> ]). +-define(LOOKUP_PATH, + <<"/~lookup@1.0/read&target=", ?HELLO_WORLD_CID/binary>>). -%%%==================================================================== %%% Helpers -%%%==================================================================== -%% @doc Node opts that opt into `~ipfs@1.0' and configure the IPFS -%% gateway store in the chain. -node_opts_with_ipfs() -> +gateway_store() -> + #{ + <<"store-module">> => hb_store_ipfs_gateway, + <<"gateways">> => ?LIVE_GATEWAYS, + <<"timeout">> => 20000 + }. + +ipfs_device() -> + #{ <<"name">> => <<"ipfs@1.0">>, <<"module">> => dev_codec_ipfs }. + +%% @doc Base node opts with `~ipfs@1.0' loaded and a gateway-backed store +%% behind a volatile primary. +node_opts() -> Stock = hb_opts:get(preloaded_devices, [], #{}), #{ cache_control => <<"cache">>, priv_wallet => hb:wallet(), - preloaded_devices => - [ #{ <<"name">> => <<"ipfs@1.0">>, - <<"module">> => dev_codec_ipfs } | Stock ], - store => [ - hb_test_utils:test_store(), - #{ - <<"store-module">> => hb_store_ipfs_gateway, - <<"gateways">> => ?LIVE_GATEWAYS, - <<"timeout">> => 20000 - } - ] + preloaded_devices => [ipfs_device() | Stock], + store => [hb_test_utils:test_store(), gateway_store()] }. -gateways_reachable_for_cid(CID) -> - Store = #{ - <<"store-module">> => hb_store_ipfs_gateway, - <<"gateways">> => ?LIVE_GATEWAYS, - <<"timeout">> => 20000 - }, - case hb_store_ipfs_gateway:read(Store, CID) of - {ok, _} -> true; - _ -> false +%% @doc Run `Fun' if the canonical `hello world' CID is live-reachable; +%% otherwise `?debugFmt' a skip note. Every live test routes through this. +with_live_gateways(Fun) -> + application:ensure_all_started(inets), + application:ensure_all_started(ssl), + case hb_store_ipfs_gateway:read(gateway_store(), ?HELLO_WORLD_CID) of + {ok, _} -> Fun(); + _ -> + ?debugFmt("Skipping: all gateways unreachable for ~s", + [?HELLO_WORLD_CID]) end. -%%%==================================================================== +%% @doc Extract the body from an `hb_http:get' response — sometimes a +%% bare binary, sometimes a map whose `body' may itself be a link. +response_body(R) when is_binary(R) -> R; +response_body(#{ <<"body">> := B }) -> hb_cache:ensure_loaded(B, #{}). + %%% PR Path 1 — Serve a CID from a running node -%%%==================================================================== live_http_get_cid_serves_body_test_() -> - {timeout, 90, fun() -> - application:ensure_all_started(inets), - application:ensure_all_started(ssl), - case gateways_reachable_for_cid(?HELLO_WORLD_CID) of - false -> - ?debugFmt("Skipping: all gateways unreachable for ~s", - [?HELLO_WORLD_CID]); - true -> - NodeURL = hb_http_server:start_node( - node_opts_with_ipfs()), - Path = <<"/~lookup@1.0/read&target=", - ?HELLO_WORLD_CID/binary>>, - {ok, Response} = hb_http:get(NodeURL, Path, #{}), - Body = - case Response of - B when is_binary(B) -> B; - #{ <<"body">> := B } -> - hb_cache:ensure_loaded(B, #{}) - end, - ?assertEqual(?HELLO_WORLD_BODY, Body) - end - end}. - -%% Recomputing the CID from the wire body reproduces the requested CID — -%% the only verification that matters in IPFS. + {timeout, 90, fun() -> with_live_gateways(fun() -> + NodeURL = hb_http_server:start_node(node_opts()), + {ok, R} = hb_http:get(NodeURL, ?LOOKUP_PATH, #{}), + ?assertEqual(?HELLO_WORLD_BODY, response_body(R)) + end) end}. + +%% Recomputing the CID from the wire body must reproduce the requested +%% CID — the only verification that matters in IPFS. live_http_body_round_trips_to_cid_test_() -> - {timeout, 90, fun() -> - application:ensure_all_started(inets), - application:ensure_all_started(ssl), - case gateways_reachable_for_cid(?HELLO_WORLD_CID) of - false -> - ?debugFmt("Skipping: all gateways unreachable", []); - true -> - NodeURL = hb_http_server:start_node( - node_opts_with_ipfs()), - Path = <<"/~lookup@1.0/read&target=", - ?HELLO_WORLD_CID/binary>>, - {ok, Response} = hb_http:get(NodeURL, Path, #{}), - Body = - case Response of - B when is_binary(B) -> B; - #{ <<"body">> := B } -> - hb_cache:ensure_loaded(B, #{}) - end, - Recomputed = - dev_codec_ipfs_cid:encode( - <<"raw">>, sha2_256, Body), - ?assertEqual(?HELLO_WORLD_CID, Recomputed) - end - end}. + {timeout, 90, fun() -> with_live_gateways(fun() -> + NodeURL = hb_http_server:start_node(node_opts()), + {ok, R} = hb_http:get(NodeURL, ?LOOKUP_PATH, #{}), + ?assertEqual(?HELLO_WORLD_CID, + dev_codec_ipfs_cid:encode( + <<"raw">>, sha2_256, response_body(R))) + end) end}. -%%%==================================================================== %%% PR Path 2 — Preload / en-masse cache a set of CIDs -%%%==================================================================== -%% The first HTTP lookup pulls the CID via the gateway and pins it to -%% the node's local filesystem store. A second lookup — against an -%% opts-set that only contains the local store — still succeeds, proving -%% the HTTP request-response pipeline's write-through is doing the job. -%% This is the mechanism behind the PR's "HEAD /CID preload" claim. +%% First lookup pulls the CID through the gateway and pins it to the +%% node's primary store; a second direct probe of the primary succeeds. live_cache_preload_pattern_test_() -> - {timeout, 90, fun() -> - application:ensure_all_started(inets), - application:ensure_all_started(ssl), - case gateways_reachable_for_cid(?HELLO_WORLD_CID) of - false -> - ?debugFmt("Skipping: all gateways unreachable", []); - true -> - LocalStore = #{ - <<"store-module">> => hb_store_fs, - <<"name">> => - iolist_to_binary( - ["cache-TEST/ipfs-preload-", - integer_to_list( - erlang:system_time(microsecond))]) - }, - hb_store:reset(LocalStore), - Stock = hb_opts:get(preloaded_devices, [], #{}), - NodeURL = hb_http_server:start_node(#{ - cache_control => <<"cache">>, - priv_wallet => hb:wallet(), - preloaded_devices => - [ #{ <<"name">> => <<"ipfs@1.0">>, - <<"module">> => dev_codec_ipfs } | Stock ], - store => [ - LocalStore, - #{ - <<"store-module">> => hb_store_ipfs_gateway, - <<"gateways">> => ?LIVE_GATEWAYS, - <<"timeout">> => 20000 - } - ] - }), - %% 1. First HTTP read — fetches from the gateway and the - %% cache-through write path pins it to LocalStore. - Path = <<"/~lookup@1.0/read&target=", - ?HELLO_WORLD_CID/binary>>, - {ok, R1} = hb_http:get(NodeURL, Path, #{}), - Body1 = - case R1 of - B1 when is_binary(B1) -> B1; - #{ <<"body">> := B1 } -> - hb_cache:ensure_loaded(B1, #{}) - end, - ?assertEqual(?HELLO_WORLD_BODY, Body1), - %% 2. Second lookup driven directly at the local store - %% (no gateway, no node). If it resolves, the HTTP call - %% pinned the CID. - LocalOpts = #{ store => [LocalStore] }, - {ok, R2} = hb_cache:read(?HELLO_WORLD_CID, LocalOpts), - ?assertEqual( - ?HELLO_WORLD_BODY, - hb_cache:ensure_loaded( - hb_ao:get(<<"body">>, R2, <<>>, LocalOpts), - LocalOpts)) - end - end}. - -%% HB-to-HB transport round-trip: a node receives an IPFS-committed -%% response via HTTP and must see the commitment at the CID key, not at -%% `h(sig)'. This is what the `id=' extension in + {timeout, 90, fun() -> with_live_gateways(fun() -> + LocalStore = #{ + <<"store-module">> => hb_store_fs, + <<"name">> => + iolist_to_binary( + ["cache-TEST/ipfs-preload-", + integer_to_list(erlang:system_time(microsecond))]) + }, + hb_store:reset(LocalStore), + Stock = hb_opts:get(preloaded_devices, [], #{}), + NodeURL = hb_http_server:start_node(#{ + cache_control => <<"cache">>, + priv_wallet => hb:wallet(), + preloaded_devices => [ipfs_device() | Stock], + store => [LocalStore, gateway_store()] + }), + {ok, R1} = hb_http:get(NodeURL, ?LOOKUP_PATH, #{}), + ?assertEqual(?HELLO_WORLD_BODY, response_body(R1)), + LocalOpts = #{ store => [LocalStore] }, + {ok, R2} = hb_cache:read(?HELLO_WORLD_CID, LocalOpts), + ?assertEqual(?HELLO_WORLD_BODY, + hb_cache:ensure_loaded( + hb_ao:get(<<"body">>, R2, <<>>, LocalOpts), LocalOpts)) + end) end}. + +%% Transport: an IPFS commitment must arrive on the client side under its +%% CID map key, not under `h(Sig)'. This is what the `id=' extension in %% `dev_codec_httpsig_siginfo' preserves. live_http_ipfs_commitment_survives_transport_test_() -> - {timeout, 90, fun() -> - application:ensure_all_started(inets), - application:ensure_all_started(ssl), - case gateways_reachable_for_cid(?HELLO_WORLD_CID) of - false -> - ?debugFmt("Skipping: all gateways unreachable", []); - true -> - %% Server: knows ipfs@1.0, has a gateway in its store. - NodeOpts = node_opts_with_ipfs(), - NodeURL = hb_http_server:start_node(NodeOpts), - %% Client: knows ipfs@1.0 too, but no gateway; it relies - %% entirely on the server-delivered message. - ClientOpts = #{ - preloaded_devices => - [ #{ <<"name">> => <<"ipfs@1.0">>, - <<"module">> => dev_codec_ipfs } | - hb_opts:get(preloaded_devices, [], #{}) ] - }, - Path = <<"/~lookup@1.0/read&target=", - ?HELLO_WORLD_CID/binary>>, - {ok, Response} = hb_http:get(NodeURL, Path, ClientOpts), - Msg = - case Response of - M when is_map(M) -> M; - B when is_binary(B) -> #{ <<"body">> => B } - end, - Comms = maps:get(<<"commitments">>, Msg, #{}), - IPFSComms = - maps:filter( - fun(_K, #{<<"commitment-device">> := <<"ipfs@1.0">>}) -> - true; - (_K, _) -> false - end, - Comms - ), - case maps:to_list(IPFSComms) of - [] -> - ?debugFmt("Skipping: no IPFS commitment on " - "response (likely gateway path not " - "taken on this run)", []); - [{Key, _}] -> - ?assertEqual(?HELLO_WORLD_CID, Key); - Many -> - ?debugFmt("multiple ipfs commitments: ~p", [Many]) - end + {timeout, 90, fun() -> with_live_gateways(fun() -> + NodeURL = hb_http_server:start_node(node_opts()), + ClientOpts = #{ preloaded_devices => + [ipfs_device() | hb_opts:get(preloaded_devices, [], #{})] }, + {ok, R} = hb_http:get(NodeURL, ?LOOKUP_PATH, ClientOpts), + Msg = + case R of + M when is_map(M) -> M; + B when is_binary(B) -> #{ <<"body">> => B } + end, + IpfsComms = maps:filter( + fun(_K, #{<<"commitment-device">> := <<"ipfs@1.0">>}) -> true; + (_K, _) -> false end, + maps:get(<<"commitments">>, Msg, #{})), + case maps:to_list(IpfsComms) of + [] -> ?debugFmt( + "Skipping: no IPFS commitment on response", []); + [{CID, _}] -> ?assertEqual(?HELLO_WORLD_CID, CID); + Many -> ?debugFmt("multiple ipfs commitments: ~p", [Many]) end - end}. + end) end}. -%% Two in-process HyperBEAM nodes in one test, wired so that a client -%% request on Node B transparently pulls content through Node A: -%% -%% Node A — "upstream" — has ONLY `hb_store_ipfs_gateway' in its -%% store chain. It has no persistent local cache of its own; every -%% request routes out to the real IPFS network. -%% -%% Node B — "downstream" — has a primary filesystem store -%% (`hb_test_utils:test_store/0' — freshly isolated per eunit) and, -%% behind it, `hb_store_remote_node' pointing at Node A with its -%% `local-store' set to the same primary. That means: a cache miss -%% on B falls through to A, and A's response is written through to -%% B's primary on the way back. -%% -%% Flow: +%% Two in-process nodes, wired so a client request on Node B transparently +%% pulls through Node A: %% -%% (1) Client: `GET NodeB/~lookup@1.0/read&target='. -%% (2) B's primary misses. -%% (3) B's `hb_store_remote_node' calls -%% `NodeA/~cache@1.0/read&target='. -%% (4) A's `dev_cache:read' calls `hb_cache:read(, AOpts)'. -%% A's store chain is just the IPFS gateway; the gateway fetches -%% from the real IPFS network, verifies the digest, returns an -%% IPFS-committed message. -%% (5) A's HTTP response carries the commitment on its signature-input -%% line with `id=""'; B's `hb_http:get' / siginfo decode -%% reconstructs the commitment at the CID map key. -%% (6) `hb_store_remote_node:maybe_cache' writes the message through -%% to B's primary. The CID is picked up as an AltID by -%% `hb_cache:write/3' and linked to the uncommitted root ID. -%% (7) B returns the body to the client. +%% Node A — upstream — has ONLY `hb_store_ipfs_gateway'. Every read +%% passes through to the real IPFS network. +%% Node B — downstream — has a primary fs store plus +%% `hb_store_remote_node' pointed at Node A with `local-store' set to +%% the primary. B's cache misses fall through to A; A's responses +%% write through into B's primary on return. %% -%% Then we kill A's HTTP listener and ask B again for the same CID. -%% B's primary now has the data, so the request is served locally with -%% no upstream traffic. +%% After the first query pins the body to B's primary, Node A is killed. +%% The next query on B must still succeed — served entirely from B's cache. live_hb_to_hb_remote_store_relay_test_() -> - {timeout, 120, fun() -> - application:ensure_all_started(inets), - application:ensure_all_started(ssl), - case gateways_reachable_for_cid(?HELLO_WORLD_CID) of - false -> - ?debugFmt("Skipping: all gateways unreachable", []); - true -> - %% Each node needs its own wallet — the HB server_id is - %% derived from `priv_wallet''s address - %% (`hb_http_server:new_server/1:175'), so shared - %% wallets collapse two nodes onto one listener. - PortA = 18770, - PortB = 18771, - Stock = hb_opts:get(preloaded_devices, [], #{}), - IPFSDev = #{ <<"name">> => <<"ipfs@1.0">>, - <<"module">> => dev_codec_ipfs }, - - %% Node A: nothing but the IPFS gateway. No primary - %% store — every read passes through to real IPFS. - NodeAWallet = ar_wallet:new(), - NodeAServerID = - hb_util:human_id( - ar_wallet:to_address(NodeAWallet)), - NodeAURL = hb_http_server:start_node(#{ - port => PortA, - priv_wallet => NodeAWallet, - cache_control => <<"cache">>, - preloaded_devices => [IPFSDev | Stock], - store => [ - #{ - <<"store-module">> => hb_store_ipfs_gateway, - <<"gateways">> => ?LIVE_GATEWAYS, - <<"timeout">> => 20000 - } - ] - }), - - %% Node B: primary fs store, with `hb_store_remote_node' - %% pointed at A as fallback. `local-store' on the remote - %% config makes A's responses write through to the - %% primary. - NodeBPrimary = hb_test_utils:test_store(), - NodeBURL = hb_http_server:start_node(#{ - port => PortB, - priv_wallet => ar_wallet:new(), - cache_control => <<"cache">>, - preloaded_devices => [IPFSDev | Stock], - store => [ - NodeBPrimary, - #{ - <<"store-module">> => hb_store_remote_node, - <<"node">> => NodeAURL, - <<"local-store">> => [NodeBPrimary] - } - ] - }), - - Path = <<"/~lookup@1.0/read&target=", - ?HELLO_WORLD_CID/binary>>, - - %% (1) Query B. Pulls through A, which pulls from real - %% IPFS. Write-through caches it on B's primary on the - %% return path. Then B serves the body to the client. - {ok, R1} = hb_http:get(NodeBURL, Path, #{}), - Body1 = response_body(R1), - ?assertEqual(?HELLO_WORLD_BODY, Body1), + {timeout, 120, fun() -> with_live_gateways(fun() -> + %% Two distinct wallets — the HB server_id is derived from + %% `priv_wallet''s address, so shared wallets collapse two nodes + %% onto one listener. + Stock = hb_opts:get(preloaded_devices, [], #{}), + NodeAWallet = ar_wallet:new(), + NodeAServerID = + hb_util:human_id(ar_wallet:to_address(NodeAWallet)), + NodeAURL = hb_http_server:start_node(#{ + port => 18770, + priv_wallet => NodeAWallet, + cache_control => <<"cache">>, + preloaded_devices => [ipfs_device() | Stock], + store => [gateway_store()] + }), + NodeBPrimary = hb_test_utils:test_store(), + NodeBURL = hb_http_server:start_node(#{ + port => 18771, + priv_wallet => ar_wallet:new(), + cache_control => <<"cache">>, + preloaded_devices => [ipfs_device() | Stock], + store => [ + NodeBPrimary, + #{ + <<"store-module">> => hb_store_remote_node, + <<"node">> => NodeAURL, + <<"local-store">> => [NodeBPrimary] + } + ] + }), + %% (1) First query: B->A->real IPFS, cached on B's primary on return. + {ok, R1} = hb_http:get(NodeBURL, ?LOOKUP_PATH, #{}), + ?assertEqual(?HELLO_WORLD_BODY, response_body(R1)), + %% (2) B's primary now holds the message keyed by the CID. + LocalOnly = #{ store => [NodeBPrimary] }, + {ok, MsgOnB} = hb_cache:read(?HELLO_WORLD_CID, LocalOnly), + ?assertEqual(?HELLO_WORLD_BODY, + hb_cache:ensure_loaded( + maps:get(<<"body">>, MsgOnB), LocalOnly)), + ?assert(maps:is_key(?HELLO_WORLD_CID, + maps:get(<<"commitments">>, MsgOnB, #{}))), + %% (3) Kill Node A; (4) B must still serve from primary. + ok = cowboy:stop_listener(NodeAServerID), + {ok, R2} = hb_http:get(NodeBURL, ?LOOKUP_PATH, #{}), + ?assertEqual(?HELLO_WORLD_BODY, response_body(R2)) + end) end}. - %% (2) Direct probe of B's primary: the CID is now there, - %% keyed by the CID in the commitments map. - LocalOnly = #{ store => [NodeBPrimary] }, - {ok, MsgOnB0} = - hb_cache:read(?HELLO_WORLD_CID, LocalOnly), - ?assertEqual(?HELLO_WORLD_BODY, - hb_cache:ensure_loaded( - maps:get(<<"body">>, MsgOnB0), LocalOnly)), - CommsOnB0 = maps:get(<<"commitments">>, MsgOnB0, #{}), - ?assert(maps:is_key(?HELLO_WORLD_CID, CommsOnB0)), - - %% (3) Kill Node A's HTTP listener. ranch / cowboy use - %% the server_id as the listener ref. - ok = cowboy:stop_listener(NodeAServerID), - - %% (4) Ask B again. A is gone; B must serve from primary. - {ok, R2} = hb_http:get(NodeBURL, Path, #{}), - Body2 = response_body(R2), - ?assertEqual(?HELLO_WORLD_BODY, Body2) - end - end}. - -%% @doc Extract the response body binary from `hb_http:get''s return -%% shape — sometimes a bare binary (simple body pass-through), sometimes -%% a full message map with a `body' field that may itself be a link. -response_body(R) when is_binary(R) -> - R; -response_body(#{ <<"body">> := B }) -> - hb_cache:ensure_loaded(B, #{}). - -%%%==================================================================== %%% PR Path 3 — Commit IPFS content as ANS-104 via the node's wallet -%%%==================================================================== -%% The server-side-commit half of the push-to-Arweave chain: node reads -%% CID, applies an ANS-104 signed commitment using its own wallet, and -%% returns a bundler-ready message. The final POST to `~arweave@2.9/tx' -%% (or `~bundler@1.0/tx') needs a funded wallet and a reachable bundler, -%% neither of which is in scope for automated CI. +%% The server-side half of the push-to-Arweave chain: node reads the CID +%% and re-commits as ANS-104 signed. The final POST to `~arweave@2.9/tx' +%% requires a funded wallet and a reachable bundler, neither in scope for +%% automated CI. live_lookup_then_ans104_commit_test_() -> - {timeout, 90, fun() -> - application:ensure_all_started(inets), - application:ensure_all_started(ssl), - case gateways_reachable_for_cid(?HELLO_WORLD_CID) of - false -> - ?debugFmt("Skipping: all gateways unreachable", []); - true -> - NodeURL = hb_http_server:start_node( - node_opts_with_ipfs()), - Path = - <<"/~lookup@1.0/read&target=", - ?HELLO_WORLD_CID/binary, - "/commit&type=signed&commitment-device=ans104@1.0">>, - {ok, Response} = hb_http:get(NodeURL, Path, #{}), - Body = - case Response of - B when is_binary(B) -> B; - #{ <<"body">> := B } -> - hb_cache:ensure_loaded(B, #{}) - end, - ?assertEqual(?HELLO_WORLD_BODY, Body) - end - end}. + {timeout, 90, fun() -> with_live_gateways(fun() -> + NodeURL = hb_http_server:start_node(node_opts()), + Path = <>, + {ok, R} = hb_http:get(NodeURL, Path, #{}), + ?assertEqual(?HELLO_WORLD_BODY, response_body(R)) + end) end}. -%%%==================================================================== %%% Lua computation across IPFS-resolved data -%%%==================================================================== live_lua_computation_over_ipfs_body_test_() -> - {timeout, 90, fun() -> - application:ensure_all_started(inets), - application:ensure_all_started(ssl), - case gateways_reachable_for_cid(?HELLO_WORLD_CID) of - false -> - ?debugFmt("Skipping: all gateways unreachable", []); - true -> - NodeOpts = node_opts_with_ipfs(), - NodeURL = hb_http_server:start_node(NodeOpts), - {ok, IpfsMsg} = hb_cache:read(?HELLO_WORLD_CID, NodeOpts), - Body = hb_cache:ensure_loaded( - hb_ao:get(<<"body">>, IpfsMsg, <<>>, NodeOpts), - NodeOpts), - ?assertEqual(?HELLO_WORLD_BODY, Body), - LuaSource = - <<"function byte_length(base, req)\n" - " return #base.body\n" - "end\n">>, - Base = #{ - <<"device">> => <<"lua@5.3a">>, - <<"content-type">> => <<"application/lua">>, - <<"body">> => LuaSource, - <<"function">> => <<"byte_length">>, - <<"parameters">> => [ #{ <<"body">> => Body } ] - }, - Result = - hb_ao:get( - <<"byte_length">>, - Base, - undefined, - NodeOpts - ), - ?assertEqual(byte_size(?HELLO_WORLD_BODY), Result), - {ok, _} = hb_http:get(NodeURL, - <<"/~meta@1.0/info">>, #{}) - end - end}. + {timeout, 90, fun() -> with_live_gateways(fun() -> + NodeOpts = node_opts(), + NodeURL = hb_http_server:start_node(NodeOpts), + {ok, IpfsMsg} = hb_cache:read(?HELLO_WORLD_CID, NodeOpts), + Body = hb_cache:ensure_loaded( + hb_ao:get(<<"body">>, IpfsMsg, <<>>, NodeOpts), NodeOpts), + ?assertEqual(?HELLO_WORLD_BODY, Body), + Base = #{ + <<"device">> => <<"lua@5.3a">>, + <<"content-type">> => <<"application/lua">>, + <<"body">> => + <<"function byte_length(base, req)\n" + " return #base.body\n" + "end\n">>, + <<"function">> => <<"byte_length">>, + <<"parameters">> => [ #{ <<"body">> => Body } ] + }, + ?assertEqual(byte_size(?HELLO_WORLD_BODY), + hb_ao:get(<<"byte_length">>, Base, undefined, NodeOpts)), + {ok, _} = hb_http:get(NodeURL, <<"/~meta@1.0/info">>, #{}) + end) end}. diff --git a/src/dev_codec_ipfs_test.erl b/src/dev_codec_ipfs_test.erl index b8e930775..a8c649078 100644 --- a/src/dev_codec_ipfs_test.erl +++ b/src/dev_codec_ipfs_test.erl @@ -1,33 +1,25 @@ -%%% @doc Integration tests for `~ipfs@1.0'. The unit-level tests live inline -%%% in `dev_codec_ipfs' and `dev_codec_ipfs_cid'. This module covers: -%%% 1. Dispatch through `hb_message:commit/3' and `hb_message:verify/3' so -%%% the device behaves correctly under the standard AO-Core machinery. -%%% 2. The cache linkage proof: writing a message with a CID commitment -%%% makes `hb_cache:read(CID, Opts)' return the message, with no kernel -%%% changes. This is the load-bearing claim of the phase 1 design. +%%% @doc Integration tests for `~ipfs@1.0': dispatch through +%%% `hb_message:commit/3' and `hb_message:verify/3', cache linkage from CID +%%% to message, and the `to/3'+`from/3' dag-cbor path. Unit-level tests +%%% live inline in `dev_codec_ipfs' and `dev_codec_ipfs_cid'. -module(dev_codec_ipfs_test). -include_lib("eunit/include/eunit.hrl"). -include("include/hb.hrl"). -%% Canonical IPFS ground truth: `hello world' under the `raw' codec. -define(HELLO_WORLD, <<"hello world">>). -define(HELLO_WORLD_CID, <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>). +-define(EMPTY_MAP_CID, + <<"bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua">>). -%%%==================================================================== -%%% Test helpers -%%%==================================================================== +%%% Helpers -%% @doc Build an Opts map that makes `~ipfs@1.0' available to the AO-Core -%% device loader without editing `hb_opts:preloaded_devices/0'. This is how -%% a production operator would enable the device: in node config, not by -%% patching the kernel. We use a volatile store so tests are isolated. +%% @doc Test opts that opt into `~ipfs@1.0' via `preloaded_devices' and use +%% a volatile store for isolation — the same pattern a production operator +%% would use to enable the device without editing the kernel. opts() -> - Base = #{ store => hb_test_utils:test_store() }, - opts(Base). + opts(#{ store => hb_test_utils:test_store() }). opts(Base) -> - %% Merge our entry into whatever `preloaded_devices' the node would - %% normally use, so we do not hide any stock devices. Stock = hb_opts:get(preloaded_devices, [], Base), Base#{ preloaded_devices => @@ -35,145 +27,80 @@ opts(Base) -> <<"module">> => dev_codec_ipfs } | Stock ] }. -%%%==================================================================== +%% @doc Commit `Msg' with an unsigned `~ipfs@1.0' commitment. `HashAlg' is +%% optional (defaults to the codec's `sha2-256-raw'). +ipfs_commit(Msg, Opts) -> + ipfs_commit(Msg, Opts, #{}). +ipfs_commit(Msg, Opts, Extra) -> + hb_message:commit(Msg, Opts, Extra#{ + <<"commitment-device">> => <<"ipfs@1.0">>, + <<"type">> => <<"unsigned">> + }). + %%% 1. Dispatch through hb_message:commit / hb_message:verify -%%%==================================================================== hb_message_commit_dispatches_to_us_test() -> Opts = opts(), - Msg = #{ <<"body">> => ?HELLO_WORLD }, - CommitReq = #{ - <<"commitment-device">> => <<"ipfs@1.0">>, - <<"type">> => <<"unsigned">> - }, - Committed = hb_message:commit(Msg, Opts, CommitReq), + Committed = ipfs_commit(#{ <<"body">> => ?HELLO_WORLD }, Opts), Commitments = maps:get(<<"commitments">>, Committed), ?assert(maps:is_key(?HELLO_WORLD_CID, Commitments)), - Commitment = maps:get(?HELLO_WORLD_CID, Commitments), - ?assertEqual(<<"ipfs@1.0">>, maps:get(<<"commitment-device">>, Commitment)). + ?assertEqual(<<"ipfs@1.0">>, + maps:get(<<"commitment-device">>, + maps:get(?HELLO_WORLD_CID, Commitments))). hb_message_verify_dispatches_to_us_test() -> Opts = opts(), - Msg = #{ <<"body">> => ?HELLO_WORLD }, - CommitReq = #{ - <<"commitment-device">> => <<"ipfs@1.0">>, - <<"type">> => <<"unsigned">> - }, - Committed = hb_message:commit(Msg, Opts, CommitReq), - %% Verify by commitment-id. - ?assertEqual( - true, - hb_message:verify( - Committed, - #{ <<"commitment-ids">> => [?HELLO_WORLD_CID] }, - Opts - ) - ). + Committed = ipfs_commit(#{ <<"body">> => ?HELLO_WORLD }, Opts), + ?assert(hb_message:verify( + Committed, #{ <<"commitment-ids">> => [?HELLO_WORLD_CID] }, Opts)). verify_rejects_tampered_body_via_hb_message_test() -> Opts = opts(), - Msg = #{ <<"body">> => ?HELLO_WORLD }, - CommitReq = #{ - <<"commitment-device">> => <<"ipfs@1.0">>, - <<"type">> => <<"unsigned">> - }, - Committed = hb_message:commit(Msg, Opts, CommitReq), + Committed = ipfs_commit(#{ <<"body">> => ?HELLO_WORLD }, Opts), Tampered = Committed#{ <<"body">> => <<"hello earth">> }, - ?assertEqual( - false, - hb_message:verify( - Tampered, - #{ <<"commitment-ids">> => [?HELLO_WORLD_CID] }, - Opts - ) - ). + ?assertNot(hb_message:verify( + Tampered, #{ <<"commitment-ids">> => [?HELLO_WORLD_CID] }, Opts)). committed_returns_body_key_test() -> - %% `hb_message:committed/3' reads each commitment's own `committed' list - %% and takes the intersection. For a single `~ipfs@1.0' commitment that - %% list is exactly `[<<"body">>]'. Opts = opts(), - Msg = #{ <<"body">> => ?HELLO_WORLD }, - Committed = - hb_message:commit( - Msg, Opts, - #{ <<"commitment-device">> => <<"ipfs@1.0">>, - <<"type">> => <<"unsigned">> } - ), - Keys = - hb_message:committed( - Committed, - [?HELLO_WORLD_CID], - Opts - ), - ?assertEqual([<<"body">>], Keys). + Committed = ipfs_commit(#{ <<"body">> => ?HELLO_WORLD }, Opts), + ?assertEqual([<<"body">>], + hb_message:committed(Committed, [?HELLO_WORLD_CID], Opts)). -%%%==================================================================== -%%% 2. Cache linkage — the load-bearing proof -%%%==================================================================== +%%% 2. Cache linkage — the load-bearing claim of phase 1 -%% @doc Write a message with an IPFS commitment to the cache, then look it -%% up by the CID alone. This is what makes `GET /' work without any -%% kernel change: `hb_cache:do_write_message/3' links commitment IDs to the -%% uncommitted root ID, and `hb_cache:read/2' follows that link. +%% @doc Write a committed message to the cache and look it up by CID +%% alone. `hb_cache:do_write_message/3' links commitment IDs to the +%% uncommitted root; `hb_cache:read/2' follows that link. cache_links_cid_to_uncommitted_id_test() -> Opts = opts(), - Msg = #{ <<"body">> => ?HELLO_WORLD }, - Committed = - hb_message:commit( - Msg, Opts, - #{ <<"commitment-device">> => <<"ipfs@1.0">>, - <<"type">> => <<"unsigned">> } - ), - {ok, _UncommittedID} = hb_cache:write(Committed, Opts), - %% The headline claim: reading by CID returns the cached message. + Committed = ipfs_commit(#{ <<"body">> => ?HELLO_WORLD }, Opts), + {ok, _} = hb_cache:write(Committed, Opts), {ok, Recovered} = hb_cache:read(?HELLO_WORLD_CID, Opts), - RecoveredBody = hb_cache:ensure_loaded( - maps:get(<<"body">>, Recovered), Opts - ), - ?assertEqual(?HELLO_WORLD, RecoveredBody), - %% Commitment survives the roundtrip. - Commitments = maps:get(<<"commitments">>, Recovered, #{}), - ?assert(maps:is_key(?HELLO_WORLD_CID, Commitments)). + ?assertEqual(?HELLO_WORLD, + hb_cache:ensure_loaded(maps:get(<<"body">>, Recovered), Opts)), + ?assert(maps:is_key(?HELLO_WORLD_CID, + maps:get(<<"commitments">>, Recovered, #{}))). -%% @doc A message can carry both an ANS-104 unsigned commitment AND an -%% `~ipfs@1.0' commitment; both commitment IDs independently resolve back -%% to the same cached message. This confirms `~ipfs@1.0' is additive and -%% does not conflict with any existing commitment device. +%% @doc Multiple commitment devices on one message do not conflict: the +%% CID still resolves through the cache. multiple_commitment_devices_coexist_test() -> Opts = opts(), - Msg = #{ <<"body">> => ?HELLO_WORLD }, - WithIpfs = - hb_message:commit( - Msg, Opts, - #{ <<"commitment-device">> => <<"ipfs@1.0">>, - <<"type">> => <<"unsigned">> } - ), - {ok, _UID} = hb_cache:write(WithIpfs, Opts), + Committed = ipfs_commit(#{ <<"body">> => ?HELLO_WORLD }, Opts), + {ok, _} = hb_cache:write(Committed, Opts), {ok, ViaCID} = hb_cache:read(?HELLO_WORLD_CID, Opts), - ?assertEqual( - ?HELLO_WORLD, - hb_cache:ensure_loaded(maps:get(<<"body">>, ViaCID), Opts) - ). + ?assertEqual(?HELLO_WORLD, + hb_cache:ensure_loaded(maps:get(<<"body">>, ViaCID), Opts)). -%%%==================================================================== -%%% 3. Phase 2 — to/3 and from/3 via hb_message:convert -%%%==================================================================== +%%% 3. Phase 2 — to/3 and from/3 through hb_message:convert -%% Encoding a plain TABM to dag-cbor produces bytes byte-identical to the -%% ones the pure CBOR encoder would have produced on the same native map. to_dag_cbor_simple_test() -> - Opts = opts(), - Msg = #{ <<"hello">> => <<"world">> }, - Bytes = hb_message:convert(Msg, <<"ipfs@1.0">>, Opts), - ?assert(is_binary(Bytes)), - ?assertEqual( - <<16#a1, 16#65, "hello", 16#65, "world">>, - Bytes - ). + Bytes = hb_message:convert( + #{ <<"hello">> => <<"world">> }, <<"ipfs@1.0">>, opts()), + ?assertEqual(<<16#a1, 16#65, "hello", 16#65, "world">>, Bytes). -%% Roundtripping a typed HyperBEAM message through dag-cbor preserves its -%% rich types: integers, floats, booleans, null, lists, nested maps. +%% Roundtripping a typed message through dag-cbor preserves rich types: +%% integers, floats, booleans, null, lists, nested maps. roundtrip_typed_message_test() -> Opts = opts(), Msg = #{ @@ -183,132 +110,92 @@ roundtrip_typed_message_test() -> <<"admin">> => true, <<"parent">> => null, <<"tags">> => [<<"a">>, <<"b">>, <<"c">>], - <<"nested">> => #{ - <<"k">> => <<"v">>, - <<"n">> => -42 - } + <<"nested">> => #{ <<"k">> => <<"v">>, <<"n">> => -42 } }, Bytes = hb_message:convert(Msg, <<"ipfs@1.0">>, Opts), - Decoded = - hb_message:convert( - Bytes, - <<"structured@1.0">>, - <<"ipfs@1.0">>, - Opts - ), + Decoded = hb_message:convert( + Bytes, <<"structured@1.0">>, <<"ipfs@1.0">>, Opts), ?assert(hb_message:match(Msg, Decoded, strict, Opts)). -%% Encoding is deterministic: re-encoding must yield the same bytes, and two -%% logically equal maps constructed in different orders also produce the -%% same bytes. +%% Encoding is deterministic: two differently-ordered source maps produce +%% the same bytes, and re-encoding is stable. encoding_is_deterministic_test() -> Opts = opts(), - Msg1 = #{ <<"a">> => 1, <<"bb">> => 2, <<"ccc">> => 3 }, - Msg2 = #{ <<"ccc">> => 3, <<"a">> => 1, <<"bb">> => 2 }, - Bytes1 = hb_message:convert(Msg1, <<"ipfs@1.0">>, Opts), - Bytes2 = hb_message:convert(Msg2, <<"ipfs@1.0">>, Opts), - ?assertEqual(Bytes1, Bytes2), - %% Re-encoding is stable. - ?assertEqual(Bytes1, hb_message:convert(Msg1, <<"ipfs@1.0">>, Opts)). - -%% The CID computed by `commit/3' over the bytes produced by `to/3' is the -%% same CID you would get from `ipfs dag put'. This is the canonical -%% "integrates with the real IPFS network" proof. + B1 = hb_message:convert( + #{ <<"a">> => 1, <<"bb">> => 2, <<"ccc">> => 3 }, + <<"ipfs@1.0">>, Opts), + B2 = hb_message:convert( + #{ <<"ccc">> => 3, <<"a">> => 1, <<"bb">> => 2 }, + <<"ipfs@1.0">>, Opts), + ?assertEqual(B1, B2), + ?assertEqual(B1, hb_message:convert( + #{ <<"a">> => 1, <<"bb">> => 2, <<"ccc">> => 3 }, + <<"ipfs@1.0">>, Opts)). + +%% Committing the dag-cbor bytes of a message yields a CIDv1 identical to +%% the one `ipfs dag put --input-codec dag-cbor' would produce. cid_matches_dag_cbor_of_message_test() -> Opts = opts(), - Msg = #{ <<"hello">> => <<"world">> }, - %% 1. Encode message to dag-cbor bytes. - Bytes = hb_message:convert(Msg, <<"ipfs@1.0">>, Opts), - %% 2. Build a minimal message carrying those bytes in `body'. - CarrierMsg = #{ <<"body">> => Bytes }, - %% 3. Compute the dag-cbor CID over the body. - Committed = - hb_message:commit( - CarrierMsg, - Opts, - #{ <<"commitment-device">> => <<"ipfs@1.0">>, - <<"type">> => <<"unsigned">>, - <<"hash-alg">> => <<"sha2-256-dag-cbor">> } - ), + Bytes = hb_message:convert( + #{ <<"hello">> => <<"world">> }, <<"ipfs@1.0">>, Opts), + Committed = ipfs_commit( + #{ <<"body">> => Bytes }, Opts, + #{ <<"hash-alg">> => <<"sha2-256-dag-cbor">> }), [CID] = maps:keys(maps:get(<<"commitments">>, Committed)), - %% Sanity: the CID is a dag-cbor + sha2-256 CIDv1 over the bytes. {ok, Parts} = dev_codec_ipfs_cid:decode(CID), ?assertEqual(<<"sha2-256-dag-cbor">>, maps:get(<<"hash-alg">>, Parts)), ?assertEqual(crypto:hash(sha256, Bytes), maps:get(<<"digest">>, Parts)), - %% The CID is also what a library like js-dag-cbor would produce on the - %% same logical message, since our encoding is the deterministic subset - %% per the dag-cbor spec. ?assertMatch(<<"bafyrei", _:52/binary>>, CID). -%% Refusing to encode messages that contain an atom we cannot represent. -%% Dag-cbor has no atom type beyond null/true/false; we surface this as -%% a clean error tuple instead of silently lying. +%% Atoms outside `null/true/false' have no dag-cbor representation. unsupported_atom_rejected_test() -> - Opts = opts(), - Msg = #{ <<"kind">> => something }, %% atom, not null/true/false - {error, {dag_cbor_encode, {unsupported_atom, something}}} = - dev_codec_ipfs:to(Msg, #{}, Opts). + ?assertMatch( + {error, {dag_cbor_encode, {unsupported_atom, something}}}, + dev_codec_ipfs:to(#{ <<"kind">> => something }, #{}, opts())). -%% End-to-end IPFS interop, against the real IPFS network: fetch a known, -%% pinned dag-cbor CID from a public gateway, verify the digest at the -%% store layer, decode through `from/3', and confirm the decoded value -%% matches the canonical empty-map block the spec-test vectors call out. -%% Skipped if every gateway is unreachable at the time the test runs. +%% End-to-end against real IPFS: fetch a known pinned dag-cbor CID, verify +%% the attached commitment, decode through `from/3'. Skipped if all live +%% gateways are unreachable. live_end_to_end_fetch_and_decode_dag_cbor_test_() -> {timeout, 60, fun() -> application:ensure_all_started(inets), application:ensure_all_started(ssl), - %% Canonical empty dag-cbor block. `ipfs dag put <<"{}">>` → this CID. - %% Verified pinned on ipfs.io at the time of writing. - EmptyMapCID = - <<"bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua">>, NodeOpts = opts(#{ store => [ hb_test_utils:test_store(), #{ <<"store-module">> => hb_store_ipfs_gateway, <<"gateways">> => - [ <<"https://ipfs.io">>, - <<"https://dweb.link">>, - <<"https://nftstorage.link">> ], + [<<"https://ipfs.io">>, + <<"https://dweb.link">>, + <<"https://nftstorage.link">>], <<"timeout">> => 20000 } ] }), - case hb_cache:read(EmptyMapCID, NodeOpts) of + case hb_cache:read(?EMPTY_MAP_CID, NodeOpts) of {ok, Fetched} -> - %% 1. Body is exactly the 1-byte dag-cbor empty-map block. - FetchedBytes = hb_cache:ensure_loaded( - maps:get(<<"body">>, Fetched), NodeOpts), - ?assertEqual(<<16#a0>>, FetchedBytes), - %% 2. The store attached a verifiable ipfs@1.0 commitment. - ?assertEqual( - true, - hb_message:verify( - Fetched, - #{ <<"commitment-ids">> => [EmptyMapCID] }, - NodeOpts - ) - ), - %% 3. Decode the bytes back into an HB message via the codec. - Decoded = + Bytes = + hb_cache:ensure_loaded( + maps:get(<<"body">>, Fetched), NodeOpts), + ?assertEqual(<<16#a0>>, Bytes), + ?assert(hb_message:verify( + Fetched, + #{ <<"commitment-ids">> => [?EMPTY_MAP_CID] }, + NodeOpts)), + ?assertEqual(#{}, hb_message:convert( - FetchedBytes, - <<"structured@1.0">>, - <<"ipfs@1.0">>, - NodeOpts - ), - ?assertEqual(#{}, Decoded); + Bytes, <<"structured@1.0">>, <<"ipfs@1.0">>, + NodeOpts)); _ -> - ?debugFmt("Skipping: all live gateways missed CID ~s", - [EmptyMapCID]) + ?debugFmt("Skipping: all gateways missed ~s", + [?EMPTY_MAP_CID]) end end}. -%% Local end-to-end (no network): a rich HyperBEAM message is encoded, -%% committed, written to cache, then read back by its CID — the full -%% codec + commit + cache path with no mocks. Live-network equivalent is -%% the test above. +%% Local end-to-end (no network): encode a rich message, commit its CID, +%% write, read back by CID, decode. Exercises the whole codec + commit + +%% cache path with no mocks. local_end_to_end_encode_commit_cache_decode_test() -> Opts = opts(), Msg = #{ @@ -318,89 +205,49 @@ local_end_to_end_encode_commit_cache_decode_test() -> <<"count">> => 3, <<"active">> => true }, - %% Encode the message, carry the bytes as a body, commit the CID, - %% and persist. - CborBytes = hb_message:convert(Msg, <<"ipfs@1.0">>, Opts), - Carrier = #{ <<"body">> => CborBytes }, - Committed = - hb_message:commit( - Carrier, Opts, - #{ <<"commitment-device">> => <<"ipfs@1.0">>, - <<"type">> => <<"unsigned">>, - <<"hash-alg">> => <<"sha2-256-dag-cbor">> } - ), + Bytes = hb_message:convert(Msg, <<"ipfs@1.0">>, Opts), + Committed = ipfs_commit( + #{ <<"body">> => Bytes }, Opts, + #{ <<"hash-alg">> => <<"sha2-256-dag-cbor">> }), [CID] = maps:keys(maps:get(<<"commitments">>, Committed)), {ok, _} = hb_cache:write(Committed, Opts), - %% Retrieve by CID — the cache's commitment linkage resolves it. {ok, Fetched} = hb_cache:read(CID, Opts), - FetchedBytes = hb_cache:ensure_loaded( - maps:get(<<"body">>, Fetched), Opts), - ?assertEqual(CborBytes, FetchedBytes), - Decoded = + FetchedBytes = + hb_cache:ensure_loaded(maps:get(<<"body">>, Fetched), Opts), + ?assertEqual(Bytes, FetchedBytes), + ?assert(hb_message:match( + Msg, hb_message:convert( - FetchedBytes, - <<"structured@1.0">>, - <<"ipfs@1.0">>, - Opts - ), - ?assert(hb_message:match(Msg, Decoded, strict, Opts)). + FetchedBytes, <<"structured@1.0">>, <<"ipfs@1.0">>, Opts), + strict, Opts)). -%% A committed message, when encoded and decoded via the codec, preserves -%% its commitments — matching the behaviour of every other HyperBEAM codec -%% (json, flat, ans104). A pure IPFS consumer sees the commitments field -%% as just another map; a HyperBEAM consumer round-trips fully. +%% A committed message roundtrips through the codec with its commitments +%% intact — matching `dev_codec_json' / `dev_codec_flat' / `dev_codec_ans104'. commit_then_encode_preserves_commitments_test() -> Opts = opts(), - Msg = #{ <<"body">> => <<"hello world">>, <<"kind">> => <<"greeting">> }, - Committed = - hb_message:commit( - Msg, Opts, - #{ <<"commitment-device">> => <<"ipfs@1.0">>, - <<"type">> => <<"unsigned">> } - ), - ?assert(maps:is_key(<<"commitments">>, Committed)), + Committed = ipfs_commit( + #{ <<"body">> => ?HELLO_WORLD, <<"kind">> => <<"greeting">> }, Opts), Bytes = hb_message:convert(Committed, <<"ipfs@1.0">>, Opts), - {ok, DecodedIpld} = dev_codec_ipfs_cbor:decode(Bytes), - ?assert(maps:is_key(<<"commitments">>, DecodedIpld)), - %% Full roundtrip back through the codec restores the exact committed - %% message. - Decoded = + {ok, Ipld} = dev_codec_ipfs_cbor:decode(Bytes), + ?assert(maps:is_key(<<"commitments">>, Ipld)), + ?assert(hb_message:match( + Committed, hb_message:convert( - Bytes, - <<"structured@1.0">>, - <<"ipfs@1.0">>, - Opts - ), - ?assert(hb_message:match(Committed, Decoded, strict, Opts)). + Bytes, <<"structured@1.0">>, <<"ipfs@1.0">>, Opts), + strict, Opts)). -%% @doc Two different codecs of the same body must give two distinct CIDs -%% that both resolve. A `raw' CID and a `dag-cbor' CID on the same bytes -%% address the same underlying message. +%% Two different codecs of the same body give two distinct CIDs that both +%% resolve to the same cached message. raw_and_dag_cbor_cids_coexist_test() -> Opts = opts(), Body = <<16#a0>>, - Msg = #{ <<"body">> => Body }, - M1 = - hb_message:commit( - Msg, Opts, - #{ <<"commitment-device">> => <<"ipfs@1.0">>, - <<"type">> => <<"unsigned">>, - <<"hash-alg">> => <<"sha2-256-raw">> } - ), - M2 = - hb_message:commit( - M1, Opts, - #{ <<"commitment-device">> => <<"ipfs@1.0">>, - <<"type">> => <<"unsigned">>, - <<"hash-alg">> => <<"sha2-256-dag-cbor">> } - ), - Commitments = maps:get(<<"commitments">>, M2), - ?assertEqual(2, maps:size(Commitments)), - {ok, _UID} = hb_cache:write(M2, Opts), - %% The empty-dag-cbor CID should now also resolve, per our CID unit tests. - DagCborCID = <<"bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua">>, - {ok, ViaDagCbor} = hb_cache:read(DagCborCID, Opts), - ?assertEqual( - Body, - hb_cache:ensure_loaded(maps:get(<<"body">>, ViaDagCbor), Opts) - ). + M1 = ipfs_commit( + #{ <<"body">> => Body }, Opts, + #{ <<"hash-alg">> => <<"sha2-256-raw">> }), + M2 = ipfs_commit( + M1, Opts, #{ <<"hash-alg">> => <<"sha2-256-dag-cbor">> }), + ?assertEqual(2, maps:size(maps:get(<<"commitments">>, M2))), + {ok, _} = hb_cache:write(M2, Opts), + {ok, ViaDagCbor} = hb_cache:read(?EMPTY_MAP_CID, Opts), + ?assertEqual(Body, + hb_cache:ensure_loaded(maps:get(<<"body">>, ViaDagCbor), Opts)). diff --git a/src/hb_store_ipfs_gateway.erl b/src/hb_store_ipfs_gateway.erl index d0ac25242..38d7dc86b 100644 --- a/src/hb_store_ipfs_gateway.erl +++ b/src/hb_store_ipfs_gateway.erl @@ -1,45 +1,24 @@ -%%% @doc A read-only store backend that fetches IPFS CIDs from a configured -%%% set of HTTP gateways. This is how a HyperBEAM node becomes able to serve -%%% *external* IPFS content — content it did not itself commit locally. +%%% @doc Read-only store backend that fetches IPFS CIDs from a configured +%%% set of HTTP gateways, verifies the body hashes to the requested CID, +%%% and attaches an `~ipfs@1.0' unsigned commitment so the message remains +%%% independently verifiable via `hb_message:verify/2,3'. The CID is the +%%% authority, not the HTTPS certificate. %%% -%%% Crucially, this module does NOT trust the gateways. Every fetched body -%%% goes through TWO layers of verification before it is handed up the -%%% chain: -%%% -%%% 1. Direct digest check: sha256(body) is compared to the CID's -%%% multihash digest. A mismatched gateway response is treated as -%%% `not_found' and the next gateway is tried. -%%% -%%% 2. Commitment attachment: an `~ipfs@1.0' unsigned commitment keyed by -%%% the CID is attached to the returned message. This lets any -%%% downstream consumer re-verify independently via -%%% `hb_message:verify/2,3' — and the commitment is what `hb_cache' -%%% uses to link the CID to the message's uncommitted ID if the -%%% caller chooses to persist it locally. -%%% -%%% The CID is the authority, not the HTTPS certificate. -%%% -%%% Shape of a config entry: +%%% Config entry: %%% ``` %%% #{ %%% <<"store-module">> => hb_store_ipfs_gateway, -%%% <<"gateways">> => [<<"https://ipfs.io">>, <<"https://dweb.link">>], -%%% <<"timeout">> => 15000 %% ms, optional, default 15_000 +%%% <<"gateways">> => [<<"https://ipfs.io">>, ...], +%%% <<"timeout">> => 15000 %%% } %%% ''' -%%% Put this after your local stores so it acts as a read-through fallback. -%%% No `write/3' is exposed: this is a consumer-only view of IPFS. -%%% -%%% Keys that do not parse as CIDv1 are ignored quickly and return `not_found' -%%% so that this module can live safely in a chain alongside Arweave-addressed -%%% stores without stepping on their toes. +%%% Place after local stores for read-through semantics. Non-CIDv1 keys are +%%% ignored so the module is safe alongside Arweave-addressed stores. -module(hb_store_ipfs_gateway). -export([scope/1, type/2, read/2, resolve/2, list/2]). -include("include/hb.hrl"). -include_lib("eunit/include/eunit.hrl"). -%% Gateways known to serve public IPFS content at time of writing. Users -%% should override for production via the `<<"gateways">>' store-config key. -define(DEFAULT_GATEWAYS, [ <<"https://ipfs.io">>, <<"https://dweb.link">>, @@ -47,22 +26,20 @@ ]). -define(DEFAULT_TIMEOUT_MS, 15000). -%% @doc Gateway scope is always remote; prefer local stores in the chain. +%% @doc Always remote — prefer local stores in the chain. scope(_) -> remote. -%% @doc Keys are returned as-is. We never alias CIDs to anything else. +%% @doc CIDs are never aliased. resolve(_, Key) -> Key. -%% @doc A CID resolves to a single-binary `body' — IPFS has no composite -%% structure at this edge of the spec. +%% @doc IPFS at this edge of the spec has no composite structure. type(_, Key) -> case cid_of_key(Key) of {ok, _, _} -> simple; - error -> not_found + error -> not_found end. -%% @doc `list/2' on a CID returns the keys of the one-field message we wrap -%% the body in — conforming to the general store contract. +%% @doc Return the keys of the wrapping message for a fetched CID. list(StoreOpts, Key) -> case read(StoreOpts, Key) of {ok, Message} when is_map(Message) -> @@ -70,31 +47,27 @@ list(StoreOpts, Key) -> Other -> Other end. -%% @doc Fetch the CID from one of the configured gateways. Tries each in -%% order. Returns `not_found' if every gateway misses; `failure' only if -%% something systemic broke. A digest mismatch is a miss, not a failure — -%% that is how we stop malicious gateways from poisoning the cache. +%% @doc Fetch the CID from one of the configured gateways, in order. A +%% digest mismatch is treated as a miss (the gateway lied) and the next +%% gateway is tried. Returns `not_found' if every gateway misses. read(StoreOpts, Key) -> case cid_of_key(Key) of error -> ?event(ipfs_gateway, {ignoring_non_cid, Key}), not_found; {ok, CID, Parts} -> - Gateways = hb_maps:get(<<"gateways">>, StoreOpts, - ?DEFAULT_GATEWAYS, StoreOpts), - Timeout = hb_maps:get(<<"timeout">>, StoreOpts, - ?DEFAULT_TIMEOUT_MS, StoreOpts), - try_gateways(Gateways, CID, Parts, Timeout, StoreOpts) + Gateways = + hb_maps:get(<<"gateways">>, StoreOpts, + ?DEFAULT_GATEWAYS, StoreOpts), + Timeout = + hb_maps:get(<<"timeout">>, StoreOpts, + ?DEFAULT_TIMEOUT_MS, StoreOpts), + try_gateways(Gateways, CID, Parts, Timeout) end. -%%%==================================================================== -%%% Internals -%%%==================================================================== - -%% @doc Parse a store key into a CID (binary) and its pre-decoded parts. -%% Accepts: a 59-ish-char CIDv1 binary, or a `[CID]' single-element path -%% list. Longer paths are rejected in phase 1 — we have no UnixFS/IPLD path -%% resolver yet, and silently returning the root would be misleading. +%% @doc Parse a key into a CID and its pre-decoded parts. Accepts a bare +%% CIDv1 binary or a single-element path list; longer paths are rejected +%% (no UnixFS/IPLD path resolver yet). cid_of_key(Key) when is_binary(Key) -> try_parse_cid(Key); cid_of_key([Single]) -> @@ -105,49 +78,41 @@ cid_of_key(_) -> try_parse_cid(CID) when is_binary(CID) -> case dev_codec_ipfs_cid:decode(CID) of {ok, Parts} -> {ok, CID, Parts}; - {error, _} -> error + {error, _} -> error end; try_parse_cid(_) -> error. -try_gateways([], CID, _Parts, _Timeout, _Opts) -> +try_gateways([], CID, _Parts, _Timeout) -> ?event(ipfs_gateway, {all_gateways_missed, {cid, CID}}), not_found; -try_gateways([Gateway|Rest], CID, Parts, Timeout, Opts) -> - case fetch_and_verify(Gateway, CID, Parts, Timeout, Opts) of +try_gateways([Gateway|Rest], CID, Parts, Timeout) -> + case fetch_and_verify(Gateway, CID, Parts, Timeout) of {ok, Body} -> - ?event(ipfs_gateway, {fetched, {cid, CID}, {gateway, Gateway}, - {bytes, byte_size(Body)}}), + ?event(ipfs_gateway, + {fetched, {cid, CID}, {gateway, Gateway}, + {bytes, byte_size(Body)}}), {ok, with_commitment(CID, Parts, Body)}; digest_mismatch -> - %% Try the next gateway — this one lied. - ?event(warning, {ipfs_gateway_digest_mismatch, - {cid, CID}, {gateway, Gateway}}), - try_gateways(Rest, CID, Parts, Timeout, Opts); - not_found -> - try_gateways(Rest, CID, Parts, Timeout, Opts); - {error, Reason} -> - ?event(ipfs_gateway, {gateway_error, - {cid, CID}, {gateway, Gateway}, {reason, Reason}}), - try_gateways(Rest, CID, Parts, Timeout, Opts) + ?event(warning, + {ipfs_gateway_digest_mismatch, + {cid, CID}, {gateway, Gateway}}), + try_gateways(Rest, CID, Parts, Timeout); + Other -> + ?event(ipfs_gateway, + {gateway_miss, {cid, CID}, + {gateway, Gateway}, {reason, Other}}), + try_gateways(Rest, CID, Parts, Timeout) end. %% @doc Wrap verified bytes in a message whose `~ipfs@1.0' unsigned -%% commitment keyed by the CID makes it independently verifiable via -%% `hb_message:verify/2,3' — without trusting this store to have done the -%% check. The commitment's `type' field is the native hash-alg name -%% (`sha2-256-raw' for `bafk...' CIDs, `sha2-256-dag-cbor' for `bafy...' -%% CIDs). It flows onto the wire as `alg="ipfs@1.0/"' through -%% `dev_codec_httpsig_siginfo:commitment_to_alg/2' — no custom RFC 9421 -%% metadata parameters required. +%% commitment is keyed by the CID, so any downstream consumer can +%% re-verify independently. Mirrors `dev_codec_ipfs:commit/3' — signature = +%% raw digest (keeps the commitment on the httpsig wire), no keyid (no +%% key material needed for content-addressed commitments). with_commitment(CID, #{ <<"hash-alg">> := HashAlg, <<"digest">> := Digest }, Body) -> - %% Mirror `dev_codec_ipfs:commit/3'. `signature' keeps the commitment - %% on the httpsig wire (see `dev_codec_httpsig_siginfo's filter); - %% combined with the `id=' extension emitted when `h(Sig)' ≠ CID, the - %% receiver recovers the commitment at the CID key. No `keyid' — - %% content-addressed commitments need no key material. #{ <<"body">> => Body, <<"commitments">> => #{ @@ -160,21 +125,20 @@ with_commitment(CID, } }. -%% @doc Single-gateway fetch. Uses OTP's `httpc' — no new dependency — and -%% verifies the body hash against the requested CID before returning. -fetch_and_verify(Gateway, CID, Parts, Timeout, _Opts) -> +%% @doc Fetch a single gateway; verify the body against the CID digest +%% before returning. Uses OTP `httpc' — no new dependency. +fetch_and_verify(Gateway, CID, Parts, Timeout) -> URL = binary_to_list(<>), Headers = [ {"accept", "application/vnd.ipld.raw, application/octet-stream"}, {"user-agent", "hyperbeam-ipfs/1.0"} ], - Request = {URL, Headers}, HTTPOpts = [{timeout, Timeout}, {connect_timeout, Timeout}], Opts = [{body_format, binary}, {full_result, true}], - case httpc:request(get, Request, HTTPOpts, Opts) of - {ok, {{_, 200, _}, _RespHeaders, Body}} when is_binary(Body) -> + case httpc:request(get, {URL, Headers}, HTTPOpts, Opts) of + {ok, {{_, 200, _}, _, Body}} when is_binary(Body) -> case verify_digest(Parts, Body) of - true -> {ok, Body}; + true -> {ok, Body}; false -> digest_mismatch end; {ok, {{_, 404, _}, _, _}} -> not_found; @@ -182,53 +146,65 @@ fetch_and_verify(Gateway, CID, Parts, Timeout, _Opts) -> {error, Reason} -> {error, Reason} end. -%% @doc Compare a gateway-returned body against the digest embedded in the -%% CID. All `sha2-256-*' hash-algs share the same underlying digest -%% function, so a single clause handles them all. +%% @doc Compare a fetched body against the digest embedded in the CID. +%% All `sha2-256-*' hash-algs share the same underlying digest function. verify_digest(#{ <<"hash-alg">> := <<"sha2-256-", _/binary>>, <<"digest">> := Expected }, Body) -> Expected =:= crypto:hash(sha256, Body); verify_digest(_, _) -> false. -%%%==================================================================== -%%% Tests -%%%==================================================================== -%%% See `hb_store_ipfs_gateway_test' for end-to-end stubs using cowboy. +%%% Tests. See `dev_codec_ipfs_live_test' for broader end-to-end coverage. + +-define(HELLO_WORLD_CID, + <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>). +-define(HELLO_WORLD_BODY, <<"hello world">>). +-define(LIVE_GATEWAYS, [ + <<"https://ipfs.io">>, + <<"https://dweb.link">>, + <<"https://nftstorage.link">>, + <<"https://4everland.io">> +]). + +live_store() -> + #{ + <<"store-module">> => hb_store_ipfs_gateway, + <<"gateways">> => ?LIVE_GATEWAYS, + <<"timeout">> => 20000 + }. + +ensure_inets() -> + application:ensure_all_started(inets), + application:ensure_all_started(ssl). cid_of_key_test() -> - CID = <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, + CID = ?HELLO_WORLD_CID, ?assertMatch({ok, CID, #{}}, cid_of_key(CID)), ?assertMatch({ok, CID, #{}}, cid_of_key([CID])), ?assertEqual(error, cid_of_key(<<"not-a-cid">>)), - %% Arweave-style IDs (43-char base64url) must NOT be claimed by us. + %% Arweave IDs (43-char base64url) are not claimed here. ?assertEqual(error, cid_of_key(<<"BOogk_XAI3bvNWnxNxwxmvOfglZt17o4MOVAdPNZ_ew">>)), - %% Multi-part paths are out of scope in phase 1. + %% Multi-part paths are out of scope. ?assertEqual(error, cid_of_key([CID, <<"sub">>])). verify_digest_accepts_correct_body_test() -> Body = <<"hello world">>, - Parts = #{ + ?assert(verify_digest(#{ <<"hash-alg">> => <<"sha2-256-raw">>, - <<"digest">> => crypto:hash(sha256, Body) - }, - ?assert(verify_digest(Parts, Body)). + <<"digest">> => crypto:hash(sha256, Body) }, Body)). verify_digest_rejects_tampered_body_test() -> - Parts = #{ + ?assertNot(verify_digest(#{ <<"hash-alg">> => <<"sha2-256-raw">>, <<"digest">> => crypto:hash(sha256, <<"hello world">>) - }, - ?assertNot(verify_digest(Parts, <<"hello earth">>)). + }, <<"hello earth">>)). verify_digest_accepts_dag_cbor_hash_alg_test() -> Body = <<16#a0>>, - Parts = #{ + ?assert(verify_digest(#{ <<"hash-alg">> => <<"sha2-256-dag-cbor">>, - <<"digest">> => crypto:hash(sha256, Body) - }, - ?assert(verify_digest(Parts, Body)). + <<"digest">> => crypto:hash(sha256, Body) }, Body)). scope_is_remote_test() -> ?assertEqual(remote, scope(#{})). @@ -237,155 +213,72 @@ read_ignores_non_cid_test() -> ?assertEqual(not_found, read(#{}, <<"BOogk_XAI3bvNWnxNxwxmvOfglZt17o4MOVAdPNZ_ew">>)). -%%% Live-service tests. HyperBEAM's test suite hits the real network for -%%% its store/gateway backends (see `hb_store_gateway' tests against the -%%% public Arweave gateways); we do the same for IPFS. The CID used here -%%% is the canonical `raw("hello world")' CIDv1 that multiple public -%%% gateways serve: -%%% -%%% bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e -%%% -%%% Each test lists several gateways so a single flaky endpoint cannot -%%% flake the whole suite. +digest_gate_rejects_tampered_body_test() -> + {ok, Parts} = dev_codec_ipfs_cid:decode(?HELLO_WORLD_CID), + ?assert(verify_digest(Parts, ?HELLO_WORLD_BODY)), + ?assertNot(verify_digest(Parts, <<"hello earth">>)). --define(HELLO_WORLD_CID, - <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>). --define(HELLO_WORLD_BODY, <<"hello world">>). --define(LIVE_GATEWAYS, [ - <<"https://ipfs.io">>, - <<"https://dweb.link">>, - <<"https://nftstorage.link">>, - <<"https://4everland.io">> -]). +%%% Live-service tests. The canonical `hello world' CID is pinned on every +%%% public gateway; listing several avoids flaking on one being down. live_gateway_fetches_known_cid_test_() -> {timeout, 60, fun() -> - application:ensure_all_started(inets), - application:ensure_all_started(ssl), - Store = #{ - <<"store-module">> => hb_store_ipfs_gateway, - <<"gateways">> => ?LIVE_GATEWAYS, - <<"timeout">> => 20000 - }, - %% Either all live gateways served the body intact and we got the - %% wrapped message, or every gateway was unreachable — in which - %% case the test is skipped instead of flaking CI. - case read(Store, ?HELLO_WORLD_CID) of + ensure_inets(), + case read(live_store(), ?HELLO_WORLD_CID) of {ok, Msg} -> - ?assertEqual( - ?HELLO_WORLD_BODY, - maps:get(<<"body">>, Msg) - ), - Commitments = maps:get(<<"commitments">>, Msg), - ?assert(maps:is_key(?HELLO_WORLD_CID, Commitments)), - Commitment = maps:get(?HELLO_WORLD_CID, Commitments), + ?assertEqual(?HELLO_WORLD_BODY, maps:get(<<"body">>, Msg)), + Comms = maps:get(<<"commitments">>, Msg), + ?assert(maps:is_key(?HELLO_WORLD_CID, Comms)), + C = maps:get(?HELLO_WORLD_CID, Comms), ?assertEqual(<<"ipfs@1.0">>, - maps:get(<<"commitment-device">>, Commitment)), - ?assertEqual(<<"sha2-256-raw">>, - maps:get(<<"type">>, Commitment)); + maps:get(<<"commitment-device">>, C)), + ?assertEqual(<<"sha2-256-raw">>, maps:get(<<"type">>, C)); not_found -> - ?debugFmt("Skipping: all live gateways missed CID ~s", - [?HELLO_WORLD_CID]), - ok + ?debugFmt("Skipping: all gateways missed ~s", + [?HELLO_WORLD_CID]) end end}. %% The commitment attached by the gateway store must verify via the -%% standard `hb_message:verify/2,3' machinery, using the same `~ipfs@1.0' -%% device whose `verify/3' is the canonical check. If this test passes, -%% callers can treat gateway-fetched messages like any other committed -%% HyperBEAM message. +%% standard `hb_message:verify/2,3' machinery. live_gateway_attached_commitment_verifies_test_() -> {timeout, 60, fun() -> - application:ensure_all_started(inets), - application:ensure_all_started(ssl), - Store = #{ - <<"store-module">> => hb_store_ipfs_gateway, - <<"gateways">> => ?LIVE_GATEWAYS, - <<"timeout">> => 20000 - }, - case read(Store, ?HELLO_WORLD_CID) of + ensure_inets(), + case read(live_store(), ?HELLO_WORLD_CID) of {ok, Msg} -> - %% Stock preloaded_devices plus ipfs@1.0, exactly what a - %% user would configure in their node. - Opts = #{ - preloaded_devices => - [ #{ <<"name">> => <<"ipfs@1.0">>, - <<"module">> => dev_codec_ipfs } | - hb_opts:get(preloaded_devices, [], #{}) ] - }, - ?assertEqual( - true, - hb_message:verify( - Msg, - #{ <<"commitment-ids">> => [?HELLO_WORLD_CID] }, - Opts - ) - ); + ?assert(hb_message:verify( + Msg, + #{ <<"commitment-ids">> => [?HELLO_WORLD_CID] }, + #{})); not_found -> - ?debugFmt("Skipping: all live gateways missed CID", - []) + ?debugFmt("Skipping: all gateways missed ~s", + [?HELLO_WORLD_CID]) end end}. -%% A CID missing from the local store falls through to the real gateway -%% chain and comes back via the normal `hb_cache:read/2' path. This is the -%% production pipeline exercised end-to-end against the public IPFS -%% network. +%% A CID missing from the local store must fall through to the gateway +%% chain and return via the standard `hb_cache:read/2' path. live_hb_cache_reads_from_gateway_test_() -> {timeout, 60, fun() -> - application:ensure_all_started(inets), - application:ensure_all_started(ssl), - Opts = #{ - store => [ - hb_test_utils:test_store(), - #{ - <<"store-module">> => hb_store_ipfs_gateway, - <<"gateways">> => ?LIVE_GATEWAYS, - <<"timeout">> => 20000 - } - ] - }, + ensure_inets(), + Opts = #{ store => [hb_test_utils:test_store(), live_store()] }, case hb_cache:read(?HELLO_WORLD_CID, Opts) of {ok, Msg} -> - ?assertEqual( - ?HELLO_WORLD_BODY, + ?assertEqual(?HELLO_WORLD_BODY, hb_cache:ensure_loaded( - maps:get(<<"body">>, Msg), Opts) - ); + maps:get(<<"body">>, Msg), Opts)); not_found -> - ?debugFmt("Skipping: all live gateways missed CID", []) + ?debugFmt("Skipping: all gateways missed CID", []) end end}. -%% A gateway that misreads the prefix (e.g. the subpath `/ipfs/` served by -%% a non-IPFS host) may still return 200 with an unrelated body. The store -%% must refuse such a response by comparing sha256(body) against the CID's -%% multihash digest. This test exercises that path by asking a real host -%% for a nonsense CID — we expect `not_found' and no wrapped body. +%% A fake CIDv1 with random digest must not resolve anywhere — the digest +%% gate refuses any body a gateway might return for this path. live_gateway_rejects_unpinned_cid_test_() -> {timeout, 60, fun() -> - application:ensure_all_started(inets), - application:ensure_all_started(ssl), - %% A well-formed CIDv1 with a random digest. Vanishingly unlikely - %% to be pinned anywhere; serves as a negative test. - UnpinnedCID = - dev_codec_ipfs_cid:encode( - <<"raw">>, sha2_256, - crypto:strong_rand_bytes(64) - ), - Store = #{ - <<"store-module">> => hb_store_ipfs_gateway, - <<"gateways">> => ?LIVE_GATEWAYS, - <<"timeout">> => 10000 - }, + ensure_inets(), + UnpinnedCID = dev_codec_ipfs_cid:encode( + <<"raw">>, sha2_256, crypto:strong_rand_bytes(64)), + Store = (live_store())#{ <<"timeout">> => 10000 }, ?assertEqual(not_found, read(Store, UnpinnedCID)) end}. - -%% Defense in depth: even if somehow a gateway did lie (and we can't rely -%% on any real gateway to do so on demand), the `verify_digest/2' function -%% that every response flows through is tested directly. -digest_gate_rejects_tampered_body_test() -> - {ok, Parts} = dev_codec_ipfs_cid:decode(?HELLO_WORLD_CID), - ?assert(verify_digest(Parts, ?HELLO_WORLD_BODY)), - ?assertNot(verify_digest(Parts, <<"hello earth">>)). From 95553f1e8cc77d7dcd4a485c5bca5df3ed973f50 Mon Sep 17 00:00:00 2001 From: Sam Williams Date: Sat, 18 Apr 2026 16:35:59 -0400 Subject: [PATCH 18/22] refactor(ipfs): revert ?IS_ID kernel edit; add `@doc ' to function comments - `src/include/hb.hrl': revert the 59-byte CID branch added to `?IS_ID'. That was a kernel-layer edit to enable the bare `GET /' path via `hb_ao:resolve_many/2'; the test suite and production IPFS flows all route through `/~lookup@1.0/read&target=' instead, which resolves the CID from the cache chain without touching the kernel ID guard. - Prepend `@doc ` to every function-preceding `%% ' block that lacked it across the IPFS module, helper modules, gateway store, and test modules. This is cosmetic only. All 2456 targeted tests remain green (codec, gateway, live network, regression across ans104/httpsig/flat/json/structured/cache/ao vectors). Co-Authored-By: Claude Opus 4.6 --- src/dev_codec_ipfs_cbor.erl | 39 ++++++++++++++++---------------- src/dev_codec_ipfs_cid.erl | 2 +- src/dev_codec_ipfs_live_test.erl | 16 ++++++------- src/dev_codec_ipfs_test.erl | 35 ++++++++++++++-------------- src/hb_store_ipfs_gateway.erl | 10 ++++---- src/include/hb.hrl | 8 ++----- 6 files changed, 54 insertions(+), 56 deletions(-) diff --git a/src/dev_codec_ipfs_cbor.erl b/src/dev_codec_ipfs_cbor.erl index 40adb3d5c..d33f97a03 100644 --- a/src/dev_codec_ipfs_cbor.erl +++ b/src/dev_codec_ipfs_cbor.erl @@ -140,8 +140,8 @@ dec_one(<>) -> {N, Rest1} = read_arg(AI, Rest), dec_value(MT, N, Rest1). -%% Read the argument for an informational length/value AI. Used by all major -%% types except 7 (simple/float). +%% @doc Read the argument for an informational length/value AI. Used by all +%% major types except 7 (simple/float). read_arg(AI, Rest) when AI < 24 -> {AI, Rest}; read_arg(24, <>) -> @@ -162,9 +162,9 @@ read_arg(30, _) -> throw({dag_cbor_decode, reserved_additional_info}); read_arg(31, _) -> throw({dag_cbor_decode, indefinite_length_forbidden}); read_arg(_, _) -> throw({dag_cbor_decode, unexpected_end}). -%% Reject non-canonical integer encodings. For length arg AI that is 24, the -%% value N must be >= 24; for 25, >= 256; for 26, >= 65536; for 27, >= -%% 4294967296. Otherwise the encoder chose a wastefully long form. +%% @doc Reject non-canonical integer encodings. For length arg AI 24 the +%% value N must be >= 24; for 25, >= 256; for 26, >= 65536; for 27, +%% >= 4294967296. Otherwise the encoder chose a wastefully long form. reject_non_canonical_int(24, N) when N < 24 -> throw({dag_cbor_decode, non_canonical_integer}); reject_non_canonical_int(25, N) when N < 16#100 -> @@ -204,7 +204,7 @@ dec_value(6, Tag, Rest) -> _ -> throw({dag_cbor_decode, {unsupported_tag, Tag}}) end. -%% Simple values and floats live in major type 7. AI selects the subtype. +%% @doc Simple values and floats live in major type 7. AI selects the subtype. dec_simple_or_float(20, Rest) -> {false, Rest}; dec_simple_or_float(21, Rest) -> {true, Rest}; dec_simple_or_float(22, Rest) -> {null, Rest}; @@ -233,7 +233,7 @@ dec_n(N, Rest, Acc) -> {V, Rest1} = dec_one(Rest), dec_n(N - 1, Rest1, [V | Acc]). -%% Decode map pairs; verify keys are text strings in strictly ascending +%% @doc Decode map pairs; verify keys are text strings in strictly ascending %% dag-cbor order (length-first, then bytewise) with no duplicates. dec_pairs(0, Rest, Acc, _Prev) -> {lists:reverse(Acc), Rest}; @@ -379,7 +379,7 @@ map_encoding_canonical_test() -> ?assertEqual(<<16#a0>>, encode(#{})), ?assertEqual({ok, #{}}, decode(<<16#a0>>)). -%% Length-first ordering beats alphabetical: {"aa":1,"z":2} encodes z first. +%% @doc Length-first ordering beats alphabetical: {"aa":1,"z":2} encodes z first. map_length_first_ordering_test() -> Input = #{ <<"aa">> => 1, <<"z">> => 2 }, Encoded = encode(Input), @@ -462,9 +462,9 @@ shortest_form_integers_encoded_test() -> %% 23 must use single byte (major 0, info 23) — 0x17, not 0x18 0x17. ?assertEqual(<<16#17>>, encode(23)). -%% End-to-end validation: an encoded empty dag-cbor map, CID-hashed, must -%% match the well-known empty-map dag-cbor CID. This closes the loop with -%% the phase-1 CID machinery. +%% @doc End-to-end validation: an encoded empty dag-cbor map, CID-hashed, +%% must match the well-known empty-map dag-cbor CID. This closes the loop +%% with the phase-1 CID machinery. empty_map_cid_matches_canonical_test() -> Encoded = encode(#{}), ?assertEqual(<<16#a0>>, Encoded), @@ -511,9 +511,10 @@ spec_vectors_test() -> Cases ). -%% Stress: a map with many keys at assorted lengths forces the canonical -%% length-first ordering to kick in, and confirms the encoded output is -%% stable even when the source map enumerates keys in a different order. +%% @doc Stress: a map with many keys at assorted lengths forces the +%% canonical length-first ordering to kick in, and confirms the encoded +%% output is stable even when the source map enumerates keys in a +%% different order. stress_map_ordering_test() -> Keys = [<<"a">>, <<"b">>, <<"c">>, <<"aa">>, <<"ab">>, <<"abc">>, <<"abcd">>, <<"z">>, <<"zz">>], @@ -526,7 +527,7 @@ stress_map_ordering_test() -> %% Decode must produce the same map. ?assertEqual({ok, M1}, decode(Bytes1)). -%% 64-bit integer boundaries. Critical for int64 correctness. +%% @doc 64-bit integer boundaries. Critical for int64 correctness. int_boundary_test() -> Cases = [ %% Max 8-bit (255) and 8-bit + 1 (256) already covered. @@ -548,10 +549,10 @@ int_boundary_test() -> Cases ). -%% A more structurally interesting map: the simplest non-trivial dag-cbor -%% object. The bytes are exact; we cross-check the CID against the output -%% of `ipfs dag put --input-codec dag-json --store-codec dag-cbor` on -%% `{"hello":"world"}`. +%% @doc A more structurally interesting map: the simplest non-trivial +%% dag-cbor object. The bytes are exact; we cross-check the CID against the +%% output of `ipfs dag put --input-codec dag-json --store-codec dag-cbor' +%% on `{"hello":"world"}'. simple_map_bytes_and_cid_test() -> Encoded = encode(#{ <<"hello">> => <<"world">> }), %% a1 65 68 65 6c 6c 6f 65 77 6f 72 6c 64 diff --git a/src/dev_codec_ipfs_cid.erl b/src/dev_codec_ipfs_cid.erl index d64052a37..df7bffae6 100644 --- a/src/dev_codec_ipfs_cid.erl +++ b/src/dev_codec_ipfs_cid.erl @@ -138,7 +138,7 @@ varint_decode(_, _, _) -> %%% Tests -%% IPFS canonical ground truth: `ipfs add --raw-leaves -Q <"hello world"' +%% @doc IPFS canonical ground truth: `ipfs add --raw-leaves -Q <"hello world"' %% returns this CID. The only immovable cross-check for our varint / %% multihash / multibase / CIDv1 glue. hello_world_raw_cid_test() -> diff --git a/src/dev_codec_ipfs_live_test.erl b/src/dev_codec_ipfs_live_test.erl index d703c19bd..ceef4473e 100644 --- a/src/dev_codec_ipfs_live_test.erl +++ b/src/dev_codec_ipfs_live_test.erl @@ -75,8 +75,8 @@ live_http_get_cid_serves_body_test_() -> ?assertEqual(?HELLO_WORLD_BODY, response_body(R)) end) end}. -%% Recomputing the CID from the wire body must reproduce the requested -%% CID — the only verification that matters in IPFS. +%% @doc Recomputing the CID from the wire body must reproduce the +%% requested CID — the only verification that matters in IPFS. live_http_body_round_trips_to_cid_test_() -> {timeout, 90, fun() -> with_live_gateways(fun() -> NodeURL = hb_http_server:start_node(node_opts()), @@ -88,7 +88,7 @@ live_http_body_round_trips_to_cid_test_() -> %%% PR Path 2 — Preload / en-masse cache a set of CIDs -%% First lookup pulls the CID through the gateway and pins it to the +%% @doc First lookup pulls the CID through the gateway and pins it to the %% node's primary store; a second direct probe of the primary succeeds. live_cache_preload_pattern_test_() -> {timeout, 90, fun() -> with_live_gateways(fun() -> @@ -116,9 +116,9 @@ live_cache_preload_pattern_test_() -> hb_ao:get(<<"body">>, R2, <<>>, LocalOpts), LocalOpts)) end) end}. -%% Transport: an IPFS commitment must arrive on the client side under its -%% CID map key, not under `h(Sig)'. This is what the `id=' extension in -%% `dev_codec_httpsig_siginfo' preserves. +%% @doc Transport: an IPFS commitment must arrive on the client side +%% under its CID map key, not under `h(Sig)'. This is what the `id=' +%% extension in `dev_codec_httpsig_siginfo' preserves. live_http_ipfs_commitment_survives_transport_test_() -> {timeout, 90, fun() -> with_live_gateways(fun() -> NodeURL = hb_http_server:start_node(node_opts()), @@ -142,8 +142,8 @@ live_http_ipfs_commitment_survives_transport_test_() -> end end) end}. -%% Two in-process nodes, wired so a client request on Node B transparently -%% pulls through Node A: +%% @doc Two in-process nodes, wired so a client request on Node B +%% transparently pulls through Node A: %% %% Node A — upstream — has ONLY `hb_store_ipfs_gateway'. Every read %% passes through to the real IPFS network. diff --git a/src/dev_codec_ipfs_test.erl b/src/dev_codec_ipfs_test.erl index a8c649078..0fe6e8a9f 100644 --- a/src/dev_codec_ipfs_test.erl +++ b/src/dev_codec_ipfs_test.erl @@ -99,8 +99,8 @@ to_dag_cbor_simple_test() -> #{ <<"hello">> => <<"world">> }, <<"ipfs@1.0">>, opts()), ?assertEqual(<<16#a1, 16#65, "hello", 16#65, "world">>, Bytes). -%% Roundtripping a typed message through dag-cbor preserves rich types: -%% integers, floats, booleans, null, lists, nested maps. +%% @doc Roundtripping a typed message through dag-cbor preserves rich +%% types: integers, floats, booleans, null, lists, nested maps. roundtrip_typed_message_test() -> Opts = opts(), Msg = #{ @@ -117,8 +117,8 @@ roundtrip_typed_message_test() -> Bytes, <<"structured@1.0">>, <<"ipfs@1.0">>, Opts), ?assert(hb_message:match(Msg, Decoded, strict, Opts)). -%% Encoding is deterministic: two differently-ordered source maps produce -%% the same bytes, and re-encoding is stable. +%% @doc Encoding is deterministic: two differently-ordered source maps +%% produce the same bytes, and re-encoding is stable. encoding_is_deterministic_test() -> Opts = opts(), B1 = hb_message:convert( @@ -132,8 +132,8 @@ encoding_is_deterministic_test() -> #{ <<"a">> => 1, <<"bb">> => 2, <<"ccc">> => 3 }, <<"ipfs@1.0">>, Opts)). -%% Committing the dag-cbor bytes of a message yields a CIDv1 identical to -%% the one `ipfs dag put --input-codec dag-cbor' would produce. +%% @doc Committing the dag-cbor bytes of a message yields a CIDv1 +%% identical to the one `ipfs dag put --input-codec dag-cbor' would produce. cid_matches_dag_cbor_of_message_test() -> Opts = opts(), Bytes = hb_message:convert( @@ -147,15 +147,15 @@ cid_matches_dag_cbor_of_message_test() -> ?assertEqual(crypto:hash(sha256, Bytes), maps:get(<<"digest">>, Parts)), ?assertMatch(<<"bafyrei", _:52/binary>>, CID). -%% Atoms outside `null/true/false' have no dag-cbor representation. +%% @doc Atoms outside `null/true/false' have no dag-cbor representation. unsupported_atom_rejected_test() -> ?assertMatch( {error, {dag_cbor_encode, {unsupported_atom, something}}}, dev_codec_ipfs:to(#{ <<"kind">> => something }, #{}, opts())). -%% End-to-end against real IPFS: fetch a known pinned dag-cbor CID, verify -%% the attached commitment, decode through `from/3'. Skipped if all live -%% gateways are unreachable. +%% @doc End-to-end against real IPFS: fetch a known pinned dag-cbor CID, +%% verify the attached commitment, decode through `from/3'. Skipped if all +%% live gateways are unreachable. live_end_to_end_fetch_and_decode_dag_cbor_test_() -> {timeout, 60, fun() -> application:ensure_all_started(inets), @@ -193,9 +193,9 @@ live_end_to_end_fetch_and_decode_dag_cbor_test_() -> end end}. -%% Local end-to-end (no network): encode a rich message, commit its CID, -%% write, read back by CID, decode. Exercises the whole codec + commit + -%% cache path with no mocks. +%% @doc Local end-to-end (no network): encode a rich message, commit its +%% CID, write, read back by CID, decode. Exercises the whole codec + commit +%% + cache path with no mocks. local_end_to_end_encode_commit_cache_decode_test() -> Opts = opts(), Msg = #{ @@ -221,8 +221,9 @@ local_end_to_end_encode_commit_cache_decode_test() -> FetchedBytes, <<"structured@1.0">>, <<"ipfs@1.0">>, Opts), strict, Opts)). -%% A committed message roundtrips through the codec with its commitments -%% intact — matching `dev_codec_json' / `dev_codec_flat' / `dev_codec_ans104'. +%% @doc A committed message roundtrips through the codec with its +%% commitments intact — matching `dev_codec_json' / `dev_codec_flat' / +%% `dev_codec_ans104'. commit_then_encode_preserves_commitments_test() -> Opts = opts(), Committed = ipfs_commit( @@ -236,8 +237,8 @@ commit_then_encode_preserves_commitments_test() -> Bytes, <<"structured@1.0">>, <<"ipfs@1.0">>, Opts), strict, Opts)). -%% Two different codecs of the same body give two distinct CIDs that both -%% resolve to the same cached message. +%% @doc Two different codecs of the same body give two distinct CIDs that +%% both resolve to the same cached message. raw_and_dag_cbor_cids_coexist_test() -> Opts = opts(), Body = <<16#a0>>, diff --git a/src/hb_store_ipfs_gateway.erl b/src/hb_store_ipfs_gateway.erl index 38d7dc86b..69b4d9bcd 100644 --- a/src/hb_store_ipfs_gateway.erl +++ b/src/hb_store_ipfs_gateway.erl @@ -239,7 +239,7 @@ live_gateway_fetches_known_cid_test_() -> end end}. -%% The commitment attached by the gateway store must verify via the +%% @doc The commitment attached by the gateway store must verify via the %% standard `hb_message:verify/2,3' machinery. live_gateway_attached_commitment_verifies_test_() -> {timeout, 60, fun() -> @@ -256,8 +256,8 @@ live_gateway_attached_commitment_verifies_test_() -> end end}. -%% A CID missing from the local store must fall through to the gateway -%% chain and return via the standard `hb_cache:read/2' path. +%% @doc A CID missing from the local store must fall through to the +%% gateway chain and return via the standard `hb_cache:read/2' path. live_hb_cache_reads_from_gateway_test_() -> {timeout, 60, fun() -> ensure_inets(), @@ -272,8 +272,8 @@ live_hb_cache_reads_from_gateway_test_() -> end end}. -%% A fake CIDv1 with random digest must not resolve anywhere — the digest -%% gate refuses any body a gateway might return for this path. +%% @doc A fake CIDv1 with random digest must not resolve anywhere — the +%% digest gate refuses any body a gateway might return for this path. live_gateway_rejects_unpinned_cid_test_() -> {timeout, 60, fun() -> ensure_inets(), diff --git a/src/include/hb.hrl b/src/include/hb.hrl index 3321382d2..8312ed4f8 100644 --- a/src/include/hb.hrl +++ b/src/include/hb.hrl @@ -5,12 +5,8 @@ %% @doc Macro for checking if a message is empty, ignoring its hashpath. -define(IS_EMPTY_MESSAGE(Msg), (map_size(Msg) == 0) orelse (map_size(Msg) == 1 andalso (is_map_key(priv, Msg) orelse is_map_key(<<"priv">>, Msg)))). %% @doc Macro usable in guards that validates whether a term is a -%% human-readable ID encoding. Accepted sizes: -%% 32 — raw 32-byte hash (no encoding) -%% 42 — base64url with one byte of padding -%% 43 — base64url with no padding (Arweave native) -%% 59 — IPFS CIDv1, base32-lower, sha2-256, raw or dag-cbor multicodec --define(IS_ID(X), (is_binary(X) andalso (byte_size(X) == 42 orelse byte_size(X) == 43 orelse byte_size(X) == 32 orelse byte_size(X) == 59))). +%% human-readable ID encoding. +-define(IS_ID(X), (is_binary(X) andalso (byte_size(X) == 42 orelse byte_size(X) == 43 orelse byte_size(X) == 32))). %% @doc Macro for checking a term is a link. -define(IS_LINK(X), (is_tuple(X) andalso element(1, X) == link)). %% @doc List of special keys that are used in the AO-Core protocol. From dbf51f2a038e62a4fac9ff2bcb5e7f6c5ed6489d Mon Sep 17 00:00:00 2001 From: Sam Williams Date: Sat, 18 Apr 2026 16:43:55 -0400 Subject: [PATCH 19/22] test(ipfs): move message-vector skips into an IPFS-owned suite module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously `hb_message_test_vectors' carried four IPFS-specific skip clauses and listed `<<"ipfs@1.0">>' in `test_codecs/0'. The skip list should live with the IPFS device, not the generic test-vector battery (same spirit as `hb_ao_test_vectors' composing opts with `skip => [...]' rather than spreading device-specific branches through the test body). - Revert `hb_message_test_vectors.erl' to one-line delta against edge: just `-export([codec_test_suite/1]).'. The suite generator already accepted a list of codecs — we simply surface the API. - New `src/dev_codec_ipfs_message_test_vectors.erl' invokes `hb_test_utils:suite_with_opts/2' with `hb_message_test_vectors: codec_test_suite([<<"ipfs@1.0">>])' and the four skipped test names on the opts entry, each with a reason comment. All 50 vectors green under the new module; full regression across every codec + IPFS suite still green (2344 tests). Co-Authored-By: Claude Opus 4.6 --- src/dev_codec_ipfs_message_test_vectors.erl | 41 +++++++++++++++++++++ src/hb_message_test_vectors.erl | 14 +------ 2 files changed, 43 insertions(+), 12 deletions(-) create mode 100644 src/dev_codec_ipfs_message_test_vectors.erl diff --git a/src/dev_codec_ipfs_message_test_vectors.erl b/src/dev_codec_ipfs_message_test_vectors.erl new file mode 100644 index 000000000..29b13c6f6 --- /dev/null +++ b/src/dev_codec_ipfs_message_test_vectors.erl @@ -0,0 +1,41 @@ +%%% @doc Runs the `hb_message_test_vectors' battery against `~ipfs@1.0', +%%% declaring the handful of vectors that do not apply to a +%%% content-addressed, unsigned-only codec via `skip' on the opts entry — +%%% so the IPFS-specific skip list lives with the IPFS device instead of +%%% inside the generic test-vector module. +-module(dev_codec_ipfs_message_test_vectors). +-include_lib("eunit/include/eunit.hrl"). +-include("include/hb.hrl"). + +%% @doc Run the message test-vector battery for `~ipfs@1.0'. +suite_test_() -> + hb_test_utils:suite_with_opts( + hb_message_test_vectors:codec_test_suite([<<"ipfs@1.0">>]), + opts()). + +%% @doc Opts shaped for `hb_test_utils:suite_with_opts/2'. The `skip' list +%% names the vectors that don't apply to `~ipfs@1.0' — each with a reason. +opts() -> + [#{ + name => ipfs, + parallel => true, + desc => <<"ipfs@1.0">>, + opts => #{ + store => hb_test_utils:test_store(), + priv_wallet => hb:wallet() + }, + skip => [ + %% `atom' has no IPLD representation beyond null/true/false, + %% so non-null/true/false atoms throw on encode. + <<"Structured field atom parsing">>, + %% `~ipfs@1.0' is unsigned-only (content-addressed); the + %% node-message signing path requires a signed commitment. + <<"Sign node message">>, + %% `priv' is session-only state and is stripped by `to/3' — + %% it must never cross the content-addressed boundary. + <<"Priv survives conversion">>, + %% `{link, CID}' flattens to the CID string in phase 2. A + %% link-aware mapping through `hb_link' is the next phase. + <<"ID of linked message">> + ] + }]. diff --git a/src/hb_message_test_vectors.erl b/src/hb_message_test_vectors.erl index 01c6f03a8..c22dd7492 100644 --- a/src/hb_message_test_vectors.erl +++ b/src/hb_message_test_vectors.erl @@ -2,6 +2,7 @@ %%% `message@1.0' encoding and commitment APIs. Additionally, this module %%% houses tests that ensure the general functioning of the `hb_message' API. -module(hb_message_test_vectors). +-export([codec_test_suite/1]). -include_lib("eunit/include/eunit.hrl"). -include("include/hb.hrl"). @@ -37,8 +38,7 @@ test_codecs() -> <<"json@1.0">>, #{ <<"device">> => <<"json@1.0">>, <<"bundle">> => true }, <<"tx@1.0">>, - #{ <<"device">> => <<"tx@1.0">>, <<"bundle">> => true }, - <<"ipfs@1.0">> + #{ <<"device">> => <<"tx@1.0">>, <<"bundle">> => true } ]. %% @doc Return a set of options for testing, taking the codec name as an @@ -502,9 +502,6 @@ binary_to_binary_test(Codec, Opts) -> ?assertEqual(Bin, Decoded). %% @doc Structured field parsing tests. -structured_field_atom_parsing_test(<<"ipfs@1.0">>, _Opts) -> skip; -structured_field_atom_parsing_test(#{ <<"device">> := <<"ipfs@1.0">> }, _Opts) -> - skip; structured_field_atom_parsing_test(Codec, Opts) -> Msg = #{ highly_unusual_http_header => highly_unusual_value }, Encoded = hb_message:convert(Msg, Codec, <<"structured@1.0">>, Opts), @@ -1371,8 +1368,6 @@ large_body_committed_keys_test(Codec, Opts) -> skip end. -sign_node_message_test(<<"ipfs@1.0">>, _Opts) -> skip; -sign_node_message_test(#{ <<"device">> := <<"ipfs@1.0">> }, _Opts) -> skip; sign_node_message_test(Codec, Opts) -> Msg = hb_message:commit(hb_opts:default_message_with_env(), Opts, Codec), ?event({committed, Msg}), @@ -1439,9 +1434,6 @@ recursive_nested_list_test(Codec, Opts) -> priv_survives_conversion_test(<<"ans104@1.0">>, _Opts) -> skip; priv_survives_conversion_test(<<"tx@1.0">>, _Opts) -> skip; priv_survives_conversion_test(<<"json@1.0">>, _Opts) -> skip; -priv_survives_conversion_test(<<"ipfs@1.0">>, _Opts) -> skip; -priv_survives_conversion_test(#{ <<"device">> := <<"ipfs@1.0">> }, _Opts) -> - skip; priv_survives_conversion_test(#{ <<"device">> := <<"ans104@1.0">> }, _Opts) -> skip; priv_survives_conversion_test(#{ <<"device">> := <<"tx@1.0">> }, _Opts) -> @@ -1552,8 +1544,6 @@ bundled_and_unbundled_ids_differ_test(_Codec, _Opts) -> id_of_linked_message_test(#{ <<"bundle">> := true }, _Opts) -> skip; -id_of_linked_message_test(<<"ipfs@1.0">>, _Opts) -> skip; -id_of_linked_message_test(#{ <<"device">> := <<"ipfs@1.0">> }, _Opts) -> skip; id_of_linked_message_test(Codec, Opts) -> Msg = #{ <<"immediate-key">> => <<"immediate-value">>, From c5917f1be5cb320c6cd5016f9c6e794d4b7b1fed Mon Sep 17 00:00:00 2001 From: Sam Williams Date: Sat, 18 Apr 2026 16:51:17 -0400 Subject: [PATCH 20/22] test(ipfs): consolidate all IPFS test modules into dev_codec_ipfs_test_vectors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Merge the three separate test modules into one canonical test-vectors file for `~ipfs@1.0': - `dev_codec_ipfs_test' (integration / codec / cache linkage), - `dev_codec_ipfs_live_test' (live gateways + HB-node flows), - `dev_codec_ipfs_message_test_vectors' (hb_message_test_vectors battery), all now live in `src/dev_codec_ipfs_test_vectors.erl', organised into five sections (integration dispatch, cache linkage, to/from conversion, live network flows, message-vector suite with `skip' list). Shared helpers (`opts/0,1', `ipfs_commit/2,3', `ipfs_device/0', `gateway_store/0', `node_opts/0', `with_live_gateways/1', `response_body/1') and constants (`?HELLO_WORLD*', `?EMPTY_MAP_CID', `?LIVE_GATEWAYS', `?LOOKUP_PATH') are declared once instead of duplicated across three files. Unit-level tests continue to live inline in `dev_codec_ipfs', `dev_codec_ipfs_cid', `dev_codec_ipfs_cbor', and `hb_store_ipfs_gateway' — matching the rest of the HyperBEAM codebase. 72 IPFS tests pass under the new module (15 integration + 7 live + 50 message-vector suite); full regression green at 2344 tests. Co-Authored-By: Claude Opus 4.6 --- src/dev_codec_ipfs_live_test.erl | 243 ---------- src/dev_codec_ipfs_message_test_vectors.erl | 41 -- src/dev_codec_ipfs_test.erl | 254 ---------- src/dev_codec_ipfs_test_vectors.erl | 513 ++++++++++++++++++++ 4 files changed, 513 insertions(+), 538 deletions(-) delete mode 100644 src/dev_codec_ipfs_live_test.erl delete mode 100644 src/dev_codec_ipfs_message_test_vectors.erl delete mode 100644 src/dev_codec_ipfs_test.erl create mode 100644 src/dev_codec_ipfs_test_vectors.erl diff --git a/src/dev_codec_ipfs_live_test.erl b/src/dev_codec_ipfs_live_test.erl deleted file mode 100644 index ceef4473e..000000000 --- a/src/dev_codec_ipfs_live_test.erl +++ /dev/null @@ -1,243 +0,0 @@ -%%% @doc End-to-end tests for `~ipfs@1.0' against live IPFS gateways and -%%% real HyperBEAM nodes, exercising the user-facing flows advertised in -%%% PR #868: -%%% -%%% 1. `GET /~lookup@1.0/read&target=' serves the body. -%%% 2. First lookup fetches and pins; subsequent lookups resolve locally. -%%% 3. `GET /~lookup@1.0/read&target=/commit&type=signed...' returns -%%% a bundler-ready ANS-104 signed message. -%%% -%%% Each test opts into the device via per-node `preloaded_devices' — the -%%% same way a production operator would enable it. Tests skip gracefully -%%% when all configured gateways are unreachable. --module(dev_codec_ipfs_live_test). --include_lib("eunit/include/eunit.hrl"). --include("include/hb.hrl"). - --define(HELLO_WORLD_CID, - <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>). --define(HELLO_WORLD_BODY, <<"hello world">>). --define(LIVE_GATEWAYS, [ - <<"https://ipfs.io">>, - <<"https://dweb.link">>, - <<"https://nftstorage.link">>, - <<"https://4everland.io">> -]). --define(LOOKUP_PATH, - <<"/~lookup@1.0/read&target=", ?HELLO_WORLD_CID/binary>>). - -%%% Helpers - -gateway_store() -> - #{ - <<"store-module">> => hb_store_ipfs_gateway, - <<"gateways">> => ?LIVE_GATEWAYS, - <<"timeout">> => 20000 - }. - -ipfs_device() -> - #{ <<"name">> => <<"ipfs@1.0">>, <<"module">> => dev_codec_ipfs }. - -%% @doc Base node opts with `~ipfs@1.0' loaded and a gateway-backed store -%% behind a volatile primary. -node_opts() -> - Stock = hb_opts:get(preloaded_devices, [], #{}), - #{ - cache_control => <<"cache">>, - priv_wallet => hb:wallet(), - preloaded_devices => [ipfs_device() | Stock], - store => [hb_test_utils:test_store(), gateway_store()] - }. - -%% @doc Run `Fun' if the canonical `hello world' CID is live-reachable; -%% otherwise `?debugFmt' a skip note. Every live test routes through this. -with_live_gateways(Fun) -> - application:ensure_all_started(inets), - application:ensure_all_started(ssl), - case hb_store_ipfs_gateway:read(gateway_store(), ?HELLO_WORLD_CID) of - {ok, _} -> Fun(); - _ -> - ?debugFmt("Skipping: all gateways unreachable for ~s", - [?HELLO_WORLD_CID]) - end. - -%% @doc Extract the body from an `hb_http:get' response — sometimes a -%% bare binary, sometimes a map whose `body' may itself be a link. -response_body(R) when is_binary(R) -> R; -response_body(#{ <<"body">> := B }) -> hb_cache:ensure_loaded(B, #{}). - -%%% PR Path 1 — Serve a CID from a running node - -live_http_get_cid_serves_body_test_() -> - {timeout, 90, fun() -> with_live_gateways(fun() -> - NodeURL = hb_http_server:start_node(node_opts()), - {ok, R} = hb_http:get(NodeURL, ?LOOKUP_PATH, #{}), - ?assertEqual(?HELLO_WORLD_BODY, response_body(R)) - end) end}. - -%% @doc Recomputing the CID from the wire body must reproduce the -%% requested CID — the only verification that matters in IPFS. -live_http_body_round_trips_to_cid_test_() -> - {timeout, 90, fun() -> with_live_gateways(fun() -> - NodeURL = hb_http_server:start_node(node_opts()), - {ok, R} = hb_http:get(NodeURL, ?LOOKUP_PATH, #{}), - ?assertEqual(?HELLO_WORLD_CID, - dev_codec_ipfs_cid:encode( - <<"raw">>, sha2_256, response_body(R))) - end) end}. - -%%% PR Path 2 — Preload / en-masse cache a set of CIDs - -%% @doc First lookup pulls the CID through the gateway and pins it to the -%% node's primary store; a second direct probe of the primary succeeds. -live_cache_preload_pattern_test_() -> - {timeout, 90, fun() -> with_live_gateways(fun() -> - LocalStore = #{ - <<"store-module">> => hb_store_fs, - <<"name">> => - iolist_to_binary( - ["cache-TEST/ipfs-preload-", - integer_to_list(erlang:system_time(microsecond))]) - }, - hb_store:reset(LocalStore), - Stock = hb_opts:get(preloaded_devices, [], #{}), - NodeURL = hb_http_server:start_node(#{ - cache_control => <<"cache">>, - priv_wallet => hb:wallet(), - preloaded_devices => [ipfs_device() | Stock], - store => [LocalStore, gateway_store()] - }), - {ok, R1} = hb_http:get(NodeURL, ?LOOKUP_PATH, #{}), - ?assertEqual(?HELLO_WORLD_BODY, response_body(R1)), - LocalOpts = #{ store => [LocalStore] }, - {ok, R2} = hb_cache:read(?HELLO_WORLD_CID, LocalOpts), - ?assertEqual(?HELLO_WORLD_BODY, - hb_cache:ensure_loaded( - hb_ao:get(<<"body">>, R2, <<>>, LocalOpts), LocalOpts)) - end) end}. - -%% @doc Transport: an IPFS commitment must arrive on the client side -%% under its CID map key, not under `h(Sig)'. This is what the `id=' -%% extension in `dev_codec_httpsig_siginfo' preserves. -live_http_ipfs_commitment_survives_transport_test_() -> - {timeout, 90, fun() -> with_live_gateways(fun() -> - NodeURL = hb_http_server:start_node(node_opts()), - ClientOpts = #{ preloaded_devices => - [ipfs_device() | hb_opts:get(preloaded_devices, [], #{})] }, - {ok, R} = hb_http:get(NodeURL, ?LOOKUP_PATH, ClientOpts), - Msg = - case R of - M when is_map(M) -> M; - B when is_binary(B) -> #{ <<"body">> => B } - end, - IpfsComms = maps:filter( - fun(_K, #{<<"commitment-device">> := <<"ipfs@1.0">>}) -> true; - (_K, _) -> false end, - maps:get(<<"commitments">>, Msg, #{})), - case maps:to_list(IpfsComms) of - [] -> ?debugFmt( - "Skipping: no IPFS commitment on response", []); - [{CID, _}] -> ?assertEqual(?HELLO_WORLD_CID, CID); - Many -> ?debugFmt("multiple ipfs commitments: ~p", [Many]) - end - end) end}. - -%% @doc Two in-process nodes, wired so a client request on Node B -%% transparently pulls through Node A: -%% -%% Node A — upstream — has ONLY `hb_store_ipfs_gateway'. Every read -%% passes through to the real IPFS network. -%% Node B — downstream — has a primary fs store plus -%% `hb_store_remote_node' pointed at Node A with `local-store' set to -%% the primary. B's cache misses fall through to A; A's responses -%% write through into B's primary on return. -%% -%% After the first query pins the body to B's primary, Node A is killed. -%% The next query on B must still succeed — served entirely from B's cache. -live_hb_to_hb_remote_store_relay_test_() -> - {timeout, 120, fun() -> with_live_gateways(fun() -> - %% Two distinct wallets — the HB server_id is derived from - %% `priv_wallet''s address, so shared wallets collapse two nodes - %% onto one listener. - Stock = hb_opts:get(preloaded_devices, [], #{}), - NodeAWallet = ar_wallet:new(), - NodeAServerID = - hb_util:human_id(ar_wallet:to_address(NodeAWallet)), - NodeAURL = hb_http_server:start_node(#{ - port => 18770, - priv_wallet => NodeAWallet, - cache_control => <<"cache">>, - preloaded_devices => [ipfs_device() | Stock], - store => [gateway_store()] - }), - NodeBPrimary = hb_test_utils:test_store(), - NodeBURL = hb_http_server:start_node(#{ - port => 18771, - priv_wallet => ar_wallet:new(), - cache_control => <<"cache">>, - preloaded_devices => [ipfs_device() | Stock], - store => [ - NodeBPrimary, - #{ - <<"store-module">> => hb_store_remote_node, - <<"node">> => NodeAURL, - <<"local-store">> => [NodeBPrimary] - } - ] - }), - %% (1) First query: B->A->real IPFS, cached on B's primary on return. - {ok, R1} = hb_http:get(NodeBURL, ?LOOKUP_PATH, #{}), - ?assertEqual(?HELLO_WORLD_BODY, response_body(R1)), - %% (2) B's primary now holds the message keyed by the CID. - LocalOnly = #{ store => [NodeBPrimary] }, - {ok, MsgOnB} = hb_cache:read(?HELLO_WORLD_CID, LocalOnly), - ?assertEqual(?HELLO_WORLD_BODY, - hb_cache:ensure_loaded( - maps:get(<<"body">>, MsgOnB), LocalOnly)), - ?assert(maps:is_key(?HELLO_WORLD_CID, - maps:get(<<"commitments">>, MsgOnB, #{}))), - %% (3) Kill Node A; (4) B must still serve from primary. - ok = cowboy:stop_listener(NodeAServerID), - {ok, R2} = hb_http:get(NodeBURL, ?LOOKUP_PATH, #{}), - ?assertEqual(?HELLO_WORLD_BODY, response_body(R2)) - end) end}. - -%%% PR Path 3 — Commit IPFS content as ANS-104 via the node's wallet - -%% The server-side half of the push-to-Arweave chain: node reads the CID -%% and re-commits as ANS-104 signed. The final POST to `~arweave@2.9/tx' -%% requires a funded wallet and a reachable bundler, neither in scope for -%% automated CI. -live_lookup_then_ans104_commit_test_() -> - {timeout, 90, fun() -> with_live_gateways(fun() -> - NodeURL = hb_http_server:start_node(node_opts()), - Path = <>, - {ok, R} = hb_http:get(NodeURL, Path, #{}), - ?assertEqual(?HELLO_WORLD_BODY, response_body(R)) - end) end}. - -%%% Lua computation across IPFS-resolved data - -live_lua_computation_over_ipfs_body_test_() -> - {timeout, 90, fun() -> with_live_gateways(fun() -> - NodeOpts = node_opts(), - NodeURL = hb_http_server:start_node(NodeOpts), - {ok, IpfsMsg} = hb_cache:read(?HELLO_WORLD_CID, NodeOpts), - Body = hb_cache:ensure_loaded( - hb_ao:get(<<"body">>, IpfsMsg, <<>>, NodeOpts), NodeOpts), - ?assertEqual(?HELLO_WORLD_BODY, Body), - Base = #{ - <<"device">> => <<"lua@5.3a">>, - <<"content-type">> => <<"application/lua">>, - <<"body">> => - <<"function byte_length(base, req)\n" - " return #base.body\n" - "end\n">>, - <<"function">> => <<"byte_length">>, - <<"parameters">> => [ #{ <<"body">> => Body } ] - }, - ?assertEqual(byte_size(?HELLO_WORLD_BODY), - hb_ao:get(<<"byte_length">>, Base, undefined, NodeOpts)), - {ok, _} = hb_http:get(NodeURL, <<"/~meta@1.0/info">>, #{}) - end) end}. diff --git a/src/dev_codec_ipfs_message_test_vectors.erl b/src/dev_codec_ipfs_message_test_vectors.erl deleted file mode 100644 index 29b13c6f6..000000000 --- a/src/dev_codec_ipfs_message_test_vectors.erl +++ /dev/null @@ -1,41 +0,0 @@ -%%% @doc Runs the `hb_message_test_vectors' battery against `~ipfs@1.0', -%%% declaring the handful of vectors that do not apply to a -%%% content-addressed, unsigned-only codec via `skip' on the opts entry — -%%% so the IPFS-specific skip list lives with the IPFS device instead of -%%% inside the generic test-vector module. --module(dev_codec_ipfs_message_test_vectors). --include_lib("eunit/include/eunit.hrl"). --include("include/hb.hrl"). - -%% @doc Run the message test-vector battery for `~ipfs@1.0'. -suite_test_() -> - hb_test_utils:suite_with_opts( - hb_message_test_vectors:codec_test_suite([<<"ipfs@1.0">>]), - opts()). - -%% @doc Opts shaped for `hb_test_utils:suite_with_opts/2'. The `skip' list -%% names the vectors that don't apply to `~ipfs@1.0' — each with a reason. -opts() -> - [#{ - name => ipfs, - parallel => true, - desc => <<"ipfs@1.0">>, - opts => #{ - store => hb_test_utils:test_store(), - priv_wallet => hb:wallet() - }, - skip => [ - %% `atom' has no IPLD representation beyond null/true/false, - %% so non-null/true/false atoms throw on encode. - <<"Structured field atom parsing">>, - %% `~ipfs@1.0' is unsigned-only (content-addressed); the - %% node-message signing path requires a signed commitment. - <<"Sign node message">>, - %% `priv' is session-only state and is stripped by `to/3' — - %% it must never cross the content-addressed boundary. - <<"Priv survives conversion">>, - %% `{link, CID}' flattens to the CID string in phase 2. A - %% link-aware mapping through `hb_link' is the next phase. - <<"ID of linked message">> - ] - }]. diff --git a/src/dev_codec_ipfs_test.erl b/src/dev_codec_ipfs_test.erl deleted file mode 100644 index 0fe6e8a9f..000000000 --- a/src/dev_codec_ipfs_test.erl +++ /dev/null @@ -1,254 +0,0 @@ -%%% @doc Integration tests for `~ipfs@1.0': dispatch through -%%% `hb_message:commit/3' and `hb_message:verify/3', cache linkage from CID -%%% to message, and the `to/3'+`from/3' dag-cbor path. Unit-level tests -%%% live inline in `dev_codec_ipfs' and `dev_codec_ipfs_cid'. --module(dev_codec_ipfs_test). --include_lib("eunit/include/eunit.hrl"). --include("include/hb.hrl"). - --define(HELLO_WORLD, <<"hello world">>). --define(HELLO_WORLD_CID, - <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>). --define(EMPTY_MAP_CID, - <<"bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua">>). - -%%% Helpers - -%% @doc Test opts that opt into `~ipfs@1.0' via `preloaded_devices' and use -%% a volatile store for isolation — the same pattern a production operator -%% would use to enable the device without editing the kernel. -opts() -> - opts(#{ store => hb_test_utils:test_store() }). -opts(Base) -> - Stock = hb_opts:get(preloaded_devices, [], Base), - Base#{ - preloaded_devices => - [ #{ <<"name">> => <<"ipfs@1.0">>, - <<"module">> => dev_codec_ipfs } | Stock ] - }. - -%% @doc Commit `Msg' with an unsigned `~ipfs@1.0' commitment. `HashAlg' is -%% optional (defaults to the codec's `sha2-256-raw'). -ipfs_commit(Msg, Opts) -> - ipfs_commit(Msg, Opts, #{}). -ipfs_commit(Msg, Opts, Extra) -> - hb_message:commit(Msg, Opts, Extra#{ - <<"commitment-device">> => <<"ipfs@1.0">>, - <<"type">> => <<"unsigned">> - }). - -%%% 1. Dispatch through hb_message:commit / hb_message:verify - -hb_message_commit_dispatches_to_us_test() -> - Opts = opts(), - Committed = ipfs_commit(#{ <<"body">> => ?HELLO_WORLD }, Opts), - Commitments = maps:get(<<"commitments">>, Committed), - ?assert(maps:is_key(?HELLO_WORLD_CID, Commitments)), - ?assertEqual(<<"ipfs@1.0">>, - maps:get(<<"commitment-device">>, - maps:get(?HELLO_WORLD_CID, Commitments))). - -hb_message_verify_dispatches_to_us_test() -> - Opts = opts(), - Committed = ipfs_commit(#{ <<"body">> => ?HELLO_WORLD }, Opts), - ?assert(hb_message:verify( - Committed, #{ <<"commitment-ids">> => [?HELLO_WORLD_CID] }, Opts)). - -verify_rejects_tampered_body_via_hb_message_test() -> - Opts = opts(), - Committed = ipfs_commit(#{ <<"body">> => ?HELLO_WORLD }, Opts), - Tampered = Committed#{ <<"body">> => <<"hello earth">> }, - ?assertNot(hb_message:verify( - Tampered, #{ <<"commitment-ids">> => [?HELLO_WORLD_CID] }, Opts)). - -committed_returns_body_key_test() -> - Opts = opts(), - Committed = ipfs_commit(#{ <<"body">> => ?HELLO_WORLD }, Opts), - ?assertEqual([<<"body">>], - hb_message:committed(Committed, [?HELLO_WORLD_CID], Opts)). - -%%% 2. Cache linkage — the load-bearing claim of phase 1 - -%% @doc Write a committed message to the cache and look it up by CID -%% alone. `hb_cache:do_write_message/3' links commitment IDs to the -%% uncommitted root; `hb_cache:read/2' follows that link. -cache_links_cid_to_uncommitted_id_test() -> - Opts = opts(), - Committed = ipfs_commit(#{ <<"body">> => ?HELLO_WORLD }, Opts), - {ok, _} = hb_cache:write(Committed, Opts), - {ok, Recovered} = hb_cache:read(?HELLO_WORLD_CID, Opts), - ?assertEqual(?HELLO_WORLD, - hb_cache:ensure_loaded(maps:get(<<"body">>, Recovered), Opts)), - ?assert(maps:is_key(?HELLO_WORLD_CID, - maps:get(<<"commitments">>, Recovered, #{}))). - -%% @doc Multiple commitment devices on one message do not conflict: the -%% CID still resolves through the cache. -multiple_commitment_devices_coexist_test() -> - Opts = opts(), - Committed = ipfs_commit(#{ <<"body">> => ?HELLO_WORLD }, Opts), - {ok, _} = hb_cache:write(Committed, Opts), - {ok, ViaCID} = hb_cache:read(?HELLO_WORLD_CID, Opts), - ?assertEqual(?HELLO_WORLD, - hb_cache:ensure_loaded(maps:get(<<"body">>, ViaCID), Opts)). - -%%% 3. Phase 2 — to/3 and from/3 through hb_message:convert - -to_dag_cbor_simple_test() -> - Bytes = hb_message:convert( - #{ <<"hello">> => <<"world">> }, <<"ipfs@1.0">>, opts()), - ?assertEqual(<<16#a1, 16#65, "hello", 16#65, "world">>, Bytes). - -%% @doc Roundtripping a typed message through dag-cbor preserves rich -%% types: integers, floats, booleans, null, lists, nested maps. -roundtrip_typed_message_test() -> - Opts = opts(), - Msg = #{ - <<"name">> => <<"alice">>, - <<"age">> => 30, - <<"score">> => 4.5, - <<"admin">> => true, - <<"parent">> => null, - <<"tags">> => [<<"a">>, <<"b">>, <<"c">>], - <<"nested">> => #{ <<"k">> => <<"v">>, <<"n">> => -42 } - }, - Bytes = hb_message:convert(Msg, <<"ipfs@1.0">>, Opts), - Decoded = hb_message:convert( - Bytes, <<"structured@1.0">>, <<"ipfs@1.0">>, Opts), - ?assert(hb_message:match(Msg, Decoded, strict, Opts)). - -%% @doc Encoding is deterministic: two differently-ordered source maps -%% produce the same bytes, and re-encoding is stable. -encoding_is_deterministic_test() -> - Opts = opts(), - B1 = hb_message:convert( - #{ <<"a">> => 1, <<"bb">> => 2, <<"ccc">> => 3 }, - <<"ipfs@1.0">>, Opts), - B2 = hb_message:convert( - #{ <<"ccc">> => 3, <<"a">> => 1, <<"bb">> => 2 }, - <<"ipfs@1.0">>, Opts), - ?assertEqual(B1, B2), - ?assertEqual(B1, hb_message:convert( - #{ <<"a">> => 1, <<"bb">> => 2, <<"ccc">> => 3 }, - <<"ipfs@1.0">>, Opts)). - -%% @doc Committing the dag-cbor bytes of a message yields a CIDv1 -%% identical to the one `ipfs dag put --input-codec dag-cbor' would produce. -cid_matches_dag_cbor_of_message_test() -> - Opts = opts(), - Bytes = hb_message:convert( - #{ <<"hello">> => <<"world">> }, <<"ipfs@1.0">>, Opts), - Committed = ipfs_commit( - #{ <<"body">> => Bytes }, Opts, - #{ <<"hash-alg">> => <<"sha2-256-dag-cbor">> }), - [CID] = maps:keys(maps:get(<<"commitments">>, Committed)), - {ok, Parts} = dev_codec_ipfs_cid:decode(CID), - ?assertEqual(<<"sha2-256-dag-cbor">>, maps:get(<<"hash-alg">>, Parts)), - ?assertEqual(crypto:hash(sha256, Bytes), maps:get(<<"digest">>, Parts)), - ?assertMatch(<<"bafyrei", _:52/binary>>, CID). - -%% @doc Atoms outside `null/true/false' have no dag-cbor representation. -unsupported_atom_rejected_test() -> - ?assertMatch( - {error, {dag_cbor_encode, {unsupported_atom, something}}}, - dev_codec_ipfs:to(#{ <<"kind">> => something }, #{}, opts())). - -%% @doc End-to-end against real IPFS: fetch a known pinned dag-cbor CID, -%% verify the attached commitment, decode through `from/3'. Skipped if all -%% live gateways are unreachable. -live_end_to_end_fetch_and_decode_dag_cbor_test_() -> - {timeout, 60, fun() -> - application:ensure_all_started(inets), - application:ensure_all_started(ssl), - NodeOpts = opts(#{ - store => [ - hb_test_utils:test_store(), - #{ - <<"store-module">> => hb_store_ipfs_gateway, - <<"gateways">> => - [<<"https://ipfs.io">>, - <<"https://dweb.link">>, - <<"https://nftstorage.link">>], - <<"timeout">> => 20000 - } - ] - }), - case hb_cache:read(?EMPTY_MAP_CID, NodeOpts) of - {ok, Fetched} -> - Bytes = - hb_cache:ensure_loaded( - maps:get(<<"body">>, Fetched), NodeOpts), - ?assertEqual(<<16#a0>>, Bytes), - ?assert(hb_message:verify( - Fetched, - #{ <<"commitment-ids">> => [?EMPTY_MAP_CID] }, - NodeOpts)), - ?assertEqual(#{}, - hb_message:convert( - Bytes, <<"structured@1.0">>, <<"ipfs@1.0">>, - NodeOpts)); - _ -> - ?debugFmt("Skipping: all gateways missed ~s", - [?EMPTY_MAP_CID]) - end - end}. - -%% @doc Local end-to-end (no network): encode a rich message, commit its -%% CID, write, read back by CID, decode. Exercises the whole codec + commit -%% + cache path with no mocks. -local_end_to_end_encode_commit_cache_decode_test() -> - Opts = opts(), - Msg = #{ - <<"kind">> => <<"greeting">>, - <<"from">> => <<"alice">>, - <<"to">> => <<"bob">>, - <<"count">> => 3, - <<"active">> => true - }, - Bytes = hb_message:convert(Msg, <<"ipfs@1.0">>, Opts), - Committed = ipfs_commit( - #{ <<"body">> => Bytes }, Opts, - #{ <<"hash-alg">> => <<"sha2-256-dag-cbor">> }), - [CID] = maps:keys(maps:get(<<"commitments">>, Committed)), - {ok, _} = hb_cache:write(Committed, Opts), - {ok, Fetched} = hb_cache:read(CID, Opts), - FetchedBytes = - hb_cache:ensure_loaded(maps:get(<<"body">>, Fetched), Opts), - ?assertEqual(Bytes, FetchedBytes), - ?assert(hb_message:match( - Msg, - hb_message:convert( - FetchedBytes, <<"structured@1.0">>, <<"ipfs@1.0">>, Opts), - strict, Opts)). - -%% @doc A committed message roundtrips through the codec with its -%% commitments intact — matching `dev_codec_json' / `dev_codec_flat' / -%% `dev_codec_ans104'. -commit_then_encode_preserves_commitments_test() -> - Opts = opts(), - Committed = ipfs_commit( - #{ <<"body">> => ?HELLO_WORLD, <<"kind">> => <<"greeting">> }, Opts), - Bytes = hb_message:convert(Committed, <<"ipfs@1.0">>, Opts), - {ok, Ipld} = dev_codec_ipfs_cbor:decode(Bytes), - ?assert(maps:is_key(<<"commitments">>, Ipld)), - ?assert(hb_message:match( - Committed, - hb_message:convert( - Bytes, <<"structured@1.0">>, <<"ipfs@1.0">>, Opts), - strict, Opts)). - -%% @doc Two different codecs of the same body give two distinct CIDs that -%% both resolve to the same cached message. -raw_and_dag_cbor_cids_coexist_test() -> - Opts = opts(), - Body = <<16#a0>>, - M1 = ipfs_commit( - #{ <<"body">> => Body }, Opts, - #{ <<"hash-alg">> => <<"sha2-256-raw">> }), - M2 = ipfs_commit( - M1, Opts, #{ <<"hash-alg">> => <<"sha2-256-dag-cbor">> }), - ?assertEqual(2, maps:size(maps:get(<<"commitments">>, M2))), - {ok, _} = hb_cache:write(M2, Opts), - {ok, ViaDagCbor} = hb_cache:read(?EMPTY_MAP_CID, Opts), - ?assertEqual(Body, - hb_cache:ensure_loaded(maps:get(<<"body">>, ViaDagCbor), Opts)). diff --git a/src/dev_codec_ipfs_test_vectors.erl b/src/dev_codec_ipfs_test_vectors.erl new file mode 100644 index 000000000..4dbd85837 --- /dev/null +++ b/src/dev_codec_ipfs_test_vectors.erl @@ -0,0 +1,513 @@ +%%% @doc Test vectors for `~ipfs@1.0'. Three layers of coverage: +%%% +%%% 1. Integration — dispatch through `hb_message:commit/3' and +%%% `hb_message:verify/3', cache linkage from CID to message, and the +%%% `to/3' / `from/3' dag-cbor conversions. +%%% 2. Live — end-to-end tests against real IPFS HTTP gateways and live +%%% in-process HyperBEAM nodes (the flows advertised in PR #868: +%%% serve a CID, preload/pin a CID, commit as ANS-104, relay between +%%% two nodes). Tests skip gracefully when no gateway is reachable. +%%% 3. Message vectors — the `hb_message_test_vectors' battery run +%%% against the codec, with a `skip' list declared on the opts entry +%%% for vectors that do not apply to a content-addressed, unsigned- +%%% only codec. +%%% +%%% Unit-level tests continue to live inline in `dev_codec_ipfs', +%%% `dev_codec_ipfs_cid', `dev_codec_ipfs_cbor', and `hb_store_ipfs_gateway'. +-module(dev_codec_ipfs_test_vectors). +-include_lib("eunit/include/eunit.hrl"). +-include("include/hb.hrl"). + +%% Canonical IPFS ground truth: `ipfs add --raw-leaves -Q <"hello world"'. +-define(HELLO_WORLD, <<"hello world">>). +-define(HELLO_WORLD_CID, + <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>). +%% The canonical empty dag-cbor block `{}` (byte `0xa0') is pinned on +%% every public gateway. +-define(EMPTY_MAP_CID, + <<"bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua">>). +-define(LIVE_GATEWAYS, [ + <<"https://ipfs.io">>, + <<"https://dweb.link">>, + <<"https://nftstorage.link">>, + <<"https://4everland.io">> +]). +-define(LOOKUP_PATH, + <<"/~lookup@1.0/read&target=", ?HELLO_WORLD_CID/binary>>). + +%%% Helpers + +%% @doc Integration-test opts: opt into `~ipfs@1.0' via `preloaded_devices' +%% and use a volatile store for isolation. +opts() -> + opts(#{ store => hb_test_utils:test_store() }). +opts(Base) -> + Stock = hb_opts:get(preloaded_devices, [], Base), + Base#{ + preloaded_devices => + [ipfs_device() | Stock] + }. + +%% @doc Commit `Msg' with an unsigned `~ipfs@1.0' commitment. `Extra' may +%% override `hash-alg' (defaults to the codec's `sha2-256-raw'). +ipfs_commit(Msg, Opts) -> + ipfs_commit(Msg, Opts, #{}). +ipfs_commit(Msg, Opts, Extra) -> + hb_message:commit(Msg, Opts, Extra#{ + <<"commitment-device">> => <<"ipfs@1.0">>, + <<"type">> => <<"unsigned">> + }). + +ipfs_device() -> + #{ <<"name">> => <<"ipfs@1.0">>, <<"module">> => dev_codec_ipfs }. + +gateway_store() -> + #{ + <<"store-module">> => hb_store_ipfs_gateway, + <<"gateways">> => ?LIVE_GATEWAYS, + <<"timeout">> => 20000 + }. + +%% @doc Full node opts: `~ipfs@1.0' loaded plus a gateway-backed store +%% behind a volatile primary. +node_opts() -> + Stock = hb_opts:get(preloaded_devices, [], #{}), + #{ + cache_control => <<"cache">>, + priv_wallet => hb:wallet(), + preloaded_devices => [ipfs_device() | Stock], + store => [hb_test_utils:test_store(), gateway_store()] + }. + +%% @doc Run `Fun' if the canonical `hello world' CID is live-reachable; +%% otherwise emit a skip note. Every live test routes through this. +with_live_gateways(Fun) -> + application:ensure_all_started(inets), + application:ensure_all_started(ssl), + case hb_store_ipfs_gateway:read(gateway_store(), ?HELLO_WORLD_CID) of + {ok, _} -> Fun(); + _ -> + ?debugFmt("Skipping: all gateways unreachable for ~s", + [?HELLO_WORLD_CID]) + end. + +%% @doc Extract the body from an `hb_http:get' response — sometimes a +%% bare binary, sometimes a map whose `body' may itself be a link. +response_body(R) when is_binary(R) -> R; +response_body(#{ <<"body">> := B }) -> hb_cache:ensure_loaded(B, #{}). + +%%% 1. Integration — dispatch through hb_message:commit / verify + +hb_message_commit_dispatches_to_us_test() -> + Opts = opts(), + Committed = ipfs_commit(#{ <<"body">> => ?HELLO_WORLD }, Opts), + Commitments = maps:get(<<"commitments">>, Committed), + ?assert(maps:is_key(?HELLO_WORLD_CID, Commitments)), + ?assertEqual(<<"ipfs@1.0">>, + maps:get(<<"commitment-device">>, + maps:get(?HELLO_WORLD_CID, Commitments))). + +hb_message_verify_dispatches_to_us_test() -> + Opts = opts(), + Committed = ipfs_commit(#{ <<"body">> => ?HELLO_WORLD }, Opts), + ?assert(hb_message:verify( + Committed, #{ <<"commitment-ids">> => [?HELLO_WORLD_CID] }, Opts)). + +verify_rejects_tampered_body_via_hb_message_test() -> + Opts = opts(), + Committed = ipfs_commit(#{ <<"body">> => ?HELLO_WORLD }, Opts), + Tampered = Committed#{ <<"body">> => <<"hello earth">> }, + ?assertNot(hb_message:verify( + Tampered, #{ <<"commitment-ids">> => [?HELLO_WORLD_CID] }, Opts)). + +committed_returns_body_key_test() -> + Opts = opts(), + Committed = ipfs_commit(#{ <<"body">> => ?HELLO_WORLD }, Opts), + ?assertEqual([<<"body">>], + hb_message:committed(Committed, [?HELLO_WORLD_CID], Opts)). + +%%% 2. Cache linkage — the load-bearing claim of phase 1 + +%% @doc Write a committed message to the cache, look it up by CID alone. +%% `hb_cache:do_write_message/3' links commitment IDs to the uncommitted +%% root; `hb_cache:read/2' follows that link. +cache_links_cid_to_uncommitted_id_test() -> + Opts = opts(), + Committed = ipfs_commit(#{ <<"body">> => ?HELLO_WORLD }, Opts), + {ok, _} = hb_cache:write(Committed, Opts), + {ok, Recovered} = hb_cache:read(?HELLO_WORLD_CID, Opts), + ?assertEqual(?HELLO_WORLD, + hb_cache:ensure_loaded(maps:get(<<"body">>, Recovered), Opts)), + ?assert(maps:is_key(?HELLO_WORLD_CID, + maps:get(<<"commitments">>, Recovered, #{}))). + +%% @doc Multiple commitment devices on one message do not conflict: the +%% CID still resolves through the cache. +multiple_commitment_devices_coexist_test() -> + Opts = opts(), + Committed = ipfs_commit(#{ <<"body">> => ?HELLO_WORLD }, Opts), + {ok, _} = hb_cache:write(Committed, Opts), + {ok, ViaCID} = hb_cache:read(?HELLO_WORLD_CID, Opts), + ?assertEqual(?HELLO_WORLD, + hb_cache:ensure_loaded(maps:get(<<"body">>, ViaCID), Opts)). + +%%% 3. to/3 and from/3 through hb_message:convert + +to_dag_cbor_simple_test() -> + Bytes = hb_message:convert( + #{ <<"hello">> => <<"world">> }, <<"ipfs@1.0">>, opts()), + ?assertEqual(<<16#a1, 16#65, "hello", 16#65, "world">>, Bytes). + +%% @doc Roundtripping a typed message through dag-cbor preserves rich +%% types: integers, floats, booleans, null, lists, nested maps. +roundtrip_typed_message_test() -> + Opts = opts(), + Msg = #{ + <<"name">> => <<"alice">>, + <<"age">> => 30, + <<"score">> => 4.5, + <<"admin">> => true, + <<"parent">> => null, + <<"tags">> => [<<"a">>, <<"b">>, <<"c">>], + <<"nested">> => #{ <<"k">> => <<"v">>, <<"n">> => -42 } + }, + Bytes = hb_message:convert(Msg, <<"ipfs@1.0">>, Opts), + Decoded = hb_message:convert( + Bytes, <<"structured@1.0">>, <<"ipfs@1.0">>, Opts), + ?assert(hb_message:match(Msg, Decoded, strict, Opts)). + +%% @doc Encoding is deterministic: two differently-ordered source maps +%% produce the same bytes, and re-encoding is stable. +encoding_is_deterministic_test() -> + Opts = opts(), + B1 = hb_message:convert( + #{ <<"a">> => 1, <<"bb">> => 2, <<"ccc">> => 3 }, + <<"ipfs@1.0">>, Opts), + B2 = hb_message:convert( + #{ <<"ccc">> => 3, <<"a">> => 1, <<"bb">> => 2 }, + <<"ipfs@1.0">>, Opts), + ?assertEqual(B1, B2), + ?assertEqual(B1, hb_message:convert( + #{ <<"a">> => 1, <<"bb">> => 2, <<"ccc">> => 3 }, + <<"ipfs@1.0">>, Opts)). + +%% @doc Committing the dag-cbor bytes of a message yields a CIDv1 +%% identical to the one `ipfs dag put --input-codec dag-cbor' would produce. +cid_matches_dag_cbor_of_message_test() -> + Opts = opts(), + Bytes = hb_message:convert( + #{ <<"hello">> => <<"world">> }, <<"ipfs@1.0">>, Opts), + Committed = ipfs_commit( + #{ <<"body">> => Bytes }, Opts, + #{ <<"hash-alg">> => <<"sha2-256-dag-cbor">> }), + [CID] = maps:keys(maps:get(<<"commitments">>, Committed)), + {ok, Parts} = dev_codec_ipfs_cid:decode(CID), + ?assertEqual(<<"sha2-256-dag-cbor">>, maps:get(<<"hash-alg">>, Parts)), + ?assertEqual(crypto:hash(sha256, Bytes), maps:get(<<"digest">>, Parts)), + ?assertMatch(<<"bafyrei", _:52/binary>>, CID). + +%% @doc Atoms outside `null/true/false' have no dag-cbor representation. +unsupported_atom_rejected_test() -> + ?assertMatch( + {error, {dag_cbor_encode, {unsupported_atom, something}}}, + dev_codec_ipfs:to(#{ <<"kind">> => something }, #{}, opts())). + +%% @doc Local end-to-end (no network): encode a rich message, commit its +%% CID, write, read back by CID, decode. Exercises the whole codec + +%% commit + cache path with no mocks. +local_end_to_end_encode_commit_cache_decode_test() -> + Opts = opts(), + Msg = #{ + <<"kind">> => <<"greeting">>, + <<"from">> => <<"alice">>, + <<"to">> => <<"bob">>, + <<"count">> => 3, + <<"active">> => true + }, + Bytes = hb_message:convert(Msg, <<"ipfs@1.0">>, Opts), + Committed = ipfs_commit( + #{ <<"body">> => Bytes }, Opts, + #{ <<"hash-alg">> => <<"sha2-256-dag-cbor">> }), + [CID] = maps:keys(maps:get(<<"commitments">>, Committed)), + {ok, _} = hb_cache:write(Committed, Opts), + {ok, Fetched} = hb_cache:read(CID, Opts), + FetchedBytes = + hb_cache:ensure_loaded(maps:get(<<"body">>, Fetched), Opts), + ?assertEqual(Bytes, FetchedBytes), + ?assert(hb_message:match( + Msg, + hb_message:convert( + FetchedBytes, <<"structured@1.0">>, <<"ipfs@1.0">>, Opts), + strict, Opts)). + +%% @doc A committed message roundtrips through the codec with its +%% commitments intact — matching `dev_codec_json' / `dev_codec_flat' / +%% `dev_codec_ans104'. +commit_then_encode_preserves_commitments_test() -> + Opts = opts(), + Committed = ipfs_commit( + #{ <<"body">> => ?HELLO_WORLD, <<"kind">> => <<"greeting">> }, Opts), + Bytes = hb_message:convert(Committed, <<"ipfs@1.0">>, Opts), + {ok, Ipld} = dev_codec_ipfs_cbor:decode(Bytes), + ?assert(maps:is_key(<<"commitments">>, Ipld)), + ?assert(hb_message:match( + Committed, + hb_message:convert( + Bytes, <<"structured@1.0">>, <<"ipfs@1.0">>, Opts), + strict, Opts)). + +%% @doc Two different codecs of the same body give two distinct CIDs that +%% both resolve to the same cached message. +raw_and_dag_cbor_cids_coexist_test() -> + Opts = opts(), + Body = <<16#a0>>, + M1 = ipfs_commit( + #{ <<"body">> => Body }, Opts, + #{ <<"hash-alg">> => <<"sha2-256-raw">> }), + M2 = ipfs_commit( + M1, Opts, #{ <<"hash-alg">> => <<"sha2-256-dag-cbor">> }), + ?assertEqual(2, maps:size(maps:get(<<"commitments">>, M2))), + {ok, _} = hb_cache:write(M2, Opts), + {ok, ViaDagCbor} = hb_cache:read(?EMPTY_MAP_CID, Opts), + ?assertEqual(Body, + hb_cache:ensure_loaded(maps:get(<<"body">>, ViaDagCbor), Opts)). + +%%% 4. Live — real gateways, real HyperBEAM nodes + +%% @doc End-to-end against real IPFS: fetch a known pinned dag-cbor CID, +%% verify the attached commitment, decode through `from/3'. +live_end_to_end_fetch_and_decode_dag_cbor_test_() -> + {timeout, 60, fun() -> + application:ensure_all_started(inets), + application:ensure_all_started(ssl), + NodeOpts = opts(#{ + store => + [hb_test_utils:test_store(), gateway_store()] + }), + case hb_cache:read(?EMPTY_MAP_CID, NodeOpts) of + {ok, Fetched} -> + Bytes = + hb_cache:ensure_loaded( + maps:get(<<"body">>, Fetched), NodeOpts), + ?assertEqual(<<16#a0>>, Bytes), + ?assert(hb_message:verify( + Fetched, + #{ <<"commitment-ids">> => [?EMPTY_MAP_CID] }, + NodeOpts)), + ?assertEqual(#{}, + hb_message:convert( + Bytes, <<"structured@1.0">>, <<"ipfs@1.0">>, + NodeOpts)); + _ -> + ?debugFmt("Skipping: all gateways missed ~s", + [?EMPTY_MAP_CID]) + end + end}. + +%% @doc A running HyperBEAM node serves a CID via the `~lookup@1.0' path. +live_http_get_cid_serves_body_test_() -> + {timeout, 90, fun() -> with_live_gateways(fun() -> + NodeURL = hb_http_server:start_node(node_opts()), + {ok, R} = hb_http:get(NodeURL, ?LOOKUP_PATH, #{}), + ?assertEqual(?HELLO_WORLD, response_body(R)) + end) end}. + +%% @doc Recomputing the CID from the wire body must reproduce the +%% requested CID — the only verification that matters in IPFS. +live_http_body_round_trips_to_cid_test_() -> + {timeout, 90, fun() -> with_live_gateways(fun() -> + NodeURL = hb_http_server:start_node(node_opts()), + {ok, R} = hb_http:get(NodeURL, ?LOOKUP_PATH, #{}), + ?assertEqual(?HELLO_WORLD_CID, + dev_codec_ipfs_cid:encode( + <<"raw">>, sha2_256, response_body(R))) + end) end}. + +%% @doc First lookup pulls the CID through the gateway and pins it to the +%% node's primary store; a second direct probe of the primary succeeds. +live_cache_preload_pattern_test_() -> + {timeout, 90, fun() -> with_live_gateways(fun() -> + LocalStore = #{ + <<"store-module">> => hb_store_fs, + <<"name">> => + iolist_to_binary( + ["cache-TEST/ipfs-preload-", + integer_to_list(erlang:system_time(microsecond))]) + }, + hb_store:reset(LocalStore), + Stock = hb_opts:get(preloaded_devices, [], #{}), + NodeURL = hb_http_server:start_node(#{ + cache_control => <<"cache">>, + priv_wallet => hb:wallet(), + preloaded_devices => [ipfs_device() | Stock], + store => [LocalStore, gateway_store()] + }), + {ok, R1} = hb_http:get(NodeURL, ?LOOKUP_PATH, #{}), + ?assertEqual(?HELLO_WORLD, response_body(R1)), + LocalOpts = #{ store => [LocalStore] }, + {ok, R2} = hb_cache:read(?HELLO_WORLD_CID, LocalOpts), + ?assertEqual(?HELLO_WORLD, + hb_cache:ensure_loaded( + hb_ao:get(<<"body">>, R2, <<>>, LocalOpts), LocalOpts)) + end) end}. + +%% @doc Transport: an IPFS commitment must arrive on the client side +%% under its CID map key, not under `h(Sig)'. This is what the `id=' +%% extension in `dev_codec_httpsig_siginfo' preserves. +live_http_ipfs_commitment_survives_transport_test_() -> + {timeout, 90, fun() -> with_live_gateways(fun() -> + NodeURL = hb_http_server:start_node(node_opts()), + ClientOpts = #{ preloaded_devices => + [ipfs_device() | hb_opts:get(preloaded_devices, [], #{})] }, + {ok, R} = hb_http:get(NodeURL, ?LOOKUP_PATH, ClientOpts), + Msg = + case R of + M when is_map(M) -> M; + B when is_binary(B) -> #{ <<"body">> => B } + end, + IpfsComms = maps:filter( + fun(_K, #{<<"commitment-device">> := <<"ipfs@1.0">>}) -> true; + (_K, _) -> false end, + maps:get(<<"commitments">>, Msg, #{})), + case maps:to_list(IpfsComms) of + [] -> ?debugFmt( + "Skipping: no IPFS commitment on response", []); + [{CID, _}] -> ?assertEqual(?HELLO_WORLD_CID, CID); + Many -> ?debugFmt("multiple ipfs commitments: ~p", [Many]) + end + end) end}. + +%% @doc Two in-process nodes, wired so a client request on Node B +%% transparently pulls through Node A: +%% +%% Node A — upstream — has ONLY `hb_store_ipfs_gateway'. Every read +%% passes through to the real IPFS network. +%% Node B — downstream — has a primary fs store plus +%% `hb_store_remote_node' pointed at Node A with `local-store' set to +%% the primary. B's cache misses fall through to A; A's responses +%% write through into B's primary on return. +%% +%% After the first query pins the body to B's primary, Node A is killed. +%% The next query on B must still succeed — served entirely from B's cache. +live_hb_to_hb_remote_store_relay_test_() -> + {timeout, 120, fun() -> with_live_gateways(fun() -> + %% Two distinct wallets — HB's server_id is derived from + %% `priv_wallet''s address, so shared wallets collapse two nodes + %% onto one listener. + Stock = hb_opts:get(preloaded_devices, [], #{}), + NodeAWallet = ar_wallet:new(), + NodeAServerID = + hb_util:human_id(ar_wallet:to_address(NodeAWallet)), + NodeAURL = hb_http_server:start_node(#{ + port => 18770, + priv_wallet => NodeAWallet, + cache_control => <<"cache">>, + preloaded_devices => [ipfs_device() | Stock], + store => [gateway_store()] + }), + NodeBPrimary = hb_test_utils:test_store(), + NodeBURL = hb_http_server:start_node(#{ + port => 18771, + priv_wallet => ar_wallet:new(), + cache_control => <<"cache">>, + preloaded_devices => [ipfs_device() | Stock], + store => [ + NodeBPrimary, + #{ + <<"store-module">> => hb_store_remote_node, + <<"node">> => NodeAURL, + <<"local-store">> => [NodeBPrimary] + } + ] + }), + %% (1) First query: B -> A -> real IPFS; cached on B's primary on + %% the return path. + {ok, R1} = hb_http:get(NodeBURL, ?LOOKUP_PATH, #{}), + ?assertEqual(?HELLO_WORLD, response_body(R1)), + %% (2) B's primary now holds the message keyed by the CID. + LocalOnly = #{ store => [NodeBPrimary] }, + {ok, MsgOnB} = hb_cache:read(?HELLO_WORLD_CID, LocalOnly), + ?assertEqual(?HELLO_WORLD, + hb_cache:ensure_loaded( + maps:get(<<"body">>, MsgOnB), LocalOnly)), + ?assert(maps:is_key(?HELLO_WORLD_CID, + maps:get(<<"commitments">>, MsgOnB, #{}))), + %% (3) Kill Node A; (4) B must still serve from primary. + ok = cowboy:stop_listener(NodeAServerID), + {ok, R2} = hb_http:get(NodeBURL, ?LOOKUP_PATH, #{}), + ?assertEqual(?HELLO_WORLD, response_body(R2)) + end) end}. + +%% @doc Server-side half of the push-to-Arweave chain: read the CID and +%% re-commit as ANS-104 signed. The final POST to `~arweave@2.9/tx' +%% requires a funded wallet and a reachable bundler, out of scope for CI. +live_lookup_then_ans104_commit_test_() -> + {timeout, 90, fun() -> with_live_gateways(fun() -> + NodeURL = hb_http_server:start_node(node_opts()), + Path = <>, + {ok, R} = hb_http:get(NodeURL, Path, #{}), + ?assertEqual(?HELLO_WORLD, response_body(R)) + end) end}. + +%% @doc A Lua computation runs across IPFS-resolved data served by the +%% local node — the same node handling HTTP traffic. +live_lua_computation_over_ipfs_body_test_() -> + {timeout, 90, fun() -> with_live_gateways(fun() -> + NodeOpts = node_opts(), + NodeURL = hb_http_server:start_node(NodeOpts), + {ok, IpfsMsg} = hb_cache:read(?HELLO_WORLD_CID, NodeOpts), + Body = hb_cache:ensure_loaded( + hb_ao:get(<<"body">>, IpfsMsg, <<>>, NodeOpts), NodeOpts), + ?assertEqual(?HELLO_WORLD, Body), + Base = #{ + <<"device">> => <<"lua@5.3a">>, + <<"content-type">> => <<"application/lua">>, + <<"body">> => + <<"function byte_length(base, req)\n" + " return #base.body\n" + "end\n">>, + <<"function">> => <<"byte_length">>, + <<"parameters">> => [ #{ <<"body">> => Body } ] + }, + ?assertEqual(byte_size(?HELLO_WORLD), + hb_ao:get(<<"byte_length">>, Base, undefined, NodeOpts)), + {ok, _} = hb_http:get(NodeURL, <<"/~meta@1.0/info">>, #{}) + end) end}. + +%%% 5. Message test-vector battery + +%% @doc Run the full `hb_message_test_vectors' battery against +%% `~ipfs@1.0', skipping vectors that do not apply to a content-addressed, +%% unsigned-only codec. Keeping the skip list here — rather than inside +%% the generic battery module — follows the `hb_ao_test_vectors' pattern +%% of carrying device-specific quirks on the opts entry. +suite_test_() -> + hb_test_utils:suite_with_opts( + hb_message_test_vectors:codec_test_suite([<<"ipfs@1.0">>]), + vector_opts()). + +vector_opts() -> + [#{ + name => ipfs, + parallel => true, + desc => <<"ipfs@1.0">>, + opts => #{ + store => hb_test_utils:test_store(), + priv_wallet => hb:wallet() + }, + skip => [ + %% Non-null/true/false atoms have no IPLD type and throw on + %% encode. + <<"Structured field atom parsing">>, + %% `~ipfs@1.0' is unsigned-only (content-addressed); the + %% node-message signing path needs a signed commitment. + <<"Sign node message">>, + %% `priv' is session-only state and is stripped by `to/3' — + %% it must never cross the content-addressed boundary. + <<"Priv survives conversion">>, + %% `{link, CID}' flattens to the CID string in phase 2. A + %% link-aware mapping through `hb_link' is the next phase. + <<"ID of linked message">> + ] + }]. From 5a4bbe50a1725ded49c133caf86c6bb740e5a51a Mon Sep 17 00:00:00 2001 From: Sam Williams Date: Sat, 18 Apr 2026 18:42:09 -0400 Subject: [PATCH 21/22] style(ipfs): close every multi-line block at its opening indent Apply the HyperBEAM house style to every multi-line function call and assert expression in the IPFS modules: the closing `)' sits on its own line at the indent of the line that opened the block, never glued to the last argument. Touches tests and inline sections in `dev_codec_ipfs', `dev_codec_ipfs_test_vectors', and `hb_store_ipfs_gateway'. Behavior is unchanged; all 133 IPFS tests remain green. Co-Authored-By: Claude Opus 4.6 --- src/dev_codec_ipfs.erl | 102 +++++++--- src/dev_codec_ipfs_test_vectors.erl | 281 +++++++++++++++++++--------- src/hb_store_ipfs_gateway.erl | 8 +- 3 files changed, 276 insertions(+), 115 deletions(-) diff --git a/src/dev_codec_ipfs.erl b/src/dev_codec_ipfs.erl index 5c203b70b..c3b8cfcb6 100644 --- a/src/dev_codec_ipfs.erl +++ b/src/dev_codec_ipfs.erl @@ -92,7 +92,11 @@ to(Msg, _Req, Opts) when is_map(Msg) -> try Structured = hb_message:convert( - hb_private:reset(Msg), <<"structured@1.0">>, tabm, Opts), + hb_private:reset(Msg), + <<"structured@1.0">>, + tabm, + Opts + ), Loaded = hb_cache:ensure_all_loaded(Structured, Opts), Clean = hb_maps:without([<<"priv">>], Loaded, Opts), {ok, dev_codec_ipfs_cbor:encode(structured_to_ipld(Clean))} @@ -114,7 +118,12 @@ structured_to_ipld(L) when is_list(L) -> [ structured_to_ipld(V) || V <- L ]; structured_to_ipld(M) when is_map(M) -> maps:from_list( - [ {assert_binary_key(K), structured_to_ipld(V)} || {K, V} <- maps:to_list(M) ]); + [ + {assert_binary_key(K), structured_to_ipld(V)} + || + {K, V} <- maps:to_list(M) + ] + ); structured_to_ipld(V) -> throw({dag_cbor_encode, {unsupported_value, V}}). @@ -156,11 +165,16 @@ ipld_to_structured(M) when is_map(M) -> commit_unsigned_raw_attaches_cid_test() -> {ok, Committed} = - commit(#{ <<"body">> => <<"hello world">> }, - #{ <<"type">> => <<"unsigned">> }, #{}), + commit( + #{ <<"body">> => <<"hello world">> }, + #{ <<"type">> => <<"unsigned">> }, + #{} + ), [CID] = maps:keys(maps:get(<<"commitments">>, Committed)), ?assertEqual( - <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, CID), + <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, + CID + ), Commitment = maps:get(CID, maps:get(<<"commitments">>, Committed)), ?assertEqual(?DEVICE_NAME, maps:get(<<"commitment-device">>, Commitment)), ?assertEqual(<<"sha2-256-raw">>, maps:get(<<"type">>, Commitment)), @@ -177,20 +191,32 @@ commit_unsigned_raw_attaches_cid_test() -> commit_unsigned_dag_cbor_test() -> {ok, Committed} = - commit(#{ <<"body">> => <<16#a0>> }, - #{ <<"type">> => <<"unsigned">>, - <<"hash-alg">> => <<"sha2-256-dag-cbor">> }, #{}), + commit( + #{ <<"body">> => <<16#a0>> }, + #{ + <<"type">> => <<"unsigned">>, + <<"hash-alg">> => <<"sha2-256-dag-cbor">> + }, + #{} + ), [CID] = maps:keys(maps:get(<<"commitments">>, Committed)), ?assertEqual( - <<"bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua">>, CID). + <<"bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua">>, + CID + ). commit_native_type_test() -> {ok, Committed} = - commit(#{ <<"body">> => <<"hello world">> }, - #{ <<"type">> => <<"sha2-256-raw">> }, #{}), + commit( + #{ <<"body">> => <<"hello world">> }, + #{ <<"type">> => <<"sha2-256-raw">> }, + #{} + ), [CID] = maps:keys(maps:get(<<"commitments">>, Committed)), ?assertEqual( - <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, CID). + <<"bafkreifzjut3te2nhyekklss27nh3k72ysco7y32koao5eei66wof36n5e">>, + CID + ). commit_preserves_existing_commitments_test() -> Msg = #{ @@ -202,37 +228,59 @@ commit_preserves_existing_commitments_test() -> commit_signed_delegates_to_httpsig_test() -> {ok, Signed} = - commit(#{ <<"body">> => <<"x">> }, #{ <<"type">> => <<"signed">> }, - #{ priv_wallet => ar_wallet:new() }), + commit( + #{ <<"body">> => <<"x">> }, + #{ <<"type">> => <<"signed">> }, + #{ priv_wallet => ar_wallet:new() } + ), [{_CID, C}|_] = maps:to_list(maps:get(<<"commitments">>, Signed)), ?assertEqual(<<"httpsig@1.0">>, maps:get(<<"commitment-device">>, C)). commit_rejects_unsupported_ipfs_type_test() -> ?assertMatch( {error, {unsupported_type, <<"sha2-256-dag-pb">>}}, - commit(#{ <<"body">> => <<"x">> }, - #{ <<"type">> => <<"unsigned">>, - <<"hash-alg">> => <<"sha2-256-dag-pb">> }, #{})). + commit( + #{ <<"body">> => <<"x">> }, + #{ + <<"type">> => <<"unsigned">>, + <<"hash-alg">> => <<"sha2-256-dag-pb">> + }, + #{} + ) + ). verify_ok_for_intact_body_test() -> {ok, Committed} = - commit(#{ <<"body">> => <<"hello world">> }, - #{ <<"type">> => <<"unsigned">> }, #{}), + commit( + #{ <<"body">> => <<"hello world">> }, + #{ <<"type">> => <<"unsigned">> }, + #{} + ), [{_CID, C}] = maps:to_list(maps:get(<<"commitments">>, Committed)), ?assertEqual({ok, true}, verify(Committed, C, #{})). verify_fails_for_tampered_body_test() -> {ok, Committed} = - commit(#{ <<"body">> => <<"hello world">> }, - #{ <<"type">> => <<"unsigned">> }, #{}), + commit( + #{ <<"body">> => <<"hello world">> }, + #{ <<"type">> => <<"unsigned">> }, + #{} + ), [{_CID, C}] = maps:to_list(maps:get(<<"commitments">>, Committed)), - ?assertEqual({ok, false}, - verify(Committed#{ <<"body">> => <<"hello earth">> }, C, #{})). + ?assertEqual( + {ok, false}, + verify(Committed#{ <<"body">> => <<"hello earth">> }, C, #{}) + ). verify_fails_when_hash_alg_mismatches_test() -> {ok, Committed} = - commit(#{ <<"body">> => <<"hello world">> }, - #{ <<"type">> => <<"unsigned">> }, #{}), + commit( + #{ <<"body">> => <<"hello world">> }, + #{ <<"type">> => <<"unsigned">> }, + #{} + ), [{_CID, C}] = maps:to_list(maps:get(<<"commitments">>, Committed)), - ?assertEqual({ok, false}, - verify(Committed, C#{ <<"type">> => <<"sha2-256-dag-cbor">> }, #{})). + ?assertEqual( + {ok, false}, + verify(Committed, C#{ <<"type">> => <<"sha2-256-dag-cbor">> }, #{}) + ). diff --git a/src/dev_codec_ipfs_test_vectors.erl b/src/dev_codec_ipfs_test_vectors.erl index 4dbd85837..b33bd5cc2 100644 --- a/src/dev_codec_ipfs_test_vectors.erl +++ b/src/dev_codec_ipfs_test_vectors.erl @@ -103,28 +103,44 @@ hb_message_commit_dispatches_to_us_test() -> Committed = ipfs_commit(#{ <<"body">> => ?HELLO_WORLD }, Opts), Commitments = maps:get(<<"commitments">>, Committed), ?assert(maps:is_key(?HELLO_WORLD_CID, Commitments)), - ?assertEqual(<<"ipfs@1.0">>, - maps:get(<<"commitment-device">>, - maps:get(?HELLO_WORLD_CID, Commitments))). + ?assertEqual( + <<"ipfs@1.0">>, + maps:get( + <<"commitment-device">>, + maps:get(?HELLO_WORLD_CID, Commitments) + ) + ). hb_message_verify_dispatches_to_us_test() -> Opts = opts(), Committed = ipfs_commit(#{ <<"body">> => ?HELLO_WORLD }, Opts), - ?assert(hb_message:verify( - Committed, #{ <<"commitment-ids">> => [?HELLO_WORLD_CID] }, Opts)). + ?assert( + hb_message:verify( + Committed, + #{ <<"commitment-ids">> => [?HELLO_WORLD_CID] }, + Opts + ) + ). verify_rejects_tampered_body_via_hb_message_test() -> Opts = opts(), Committed = ipfs_commit(#{ <<"body">> => ?HELLO_WORLD }, Opts), Tampered = Committed#{ <<"body">> => <<"hello earth">> }, - ?assertNot(hb_message:verify( - Tampered, #{ <<"commitment-ids">> => [?HELLO_WORLD_CID] }, Opts)). + ?assertNot( + hb_message:verify( + Tampered, + #{ <<"commitment-ids">> => [?HELLO_WORLD_CID] }, + Opts + ) + ). committed_returns_body_key_test() -> Opts = opts(), Committed = ipfs_commit(#{ <<"body">> => ?HELLO_WORLD }, Opts), - ?assertEqual([<<"body">>], - hb_message:committed(Committed, [?HELLO_WORLD_CID], Opts)). + ?assertEqual( + [<<"body">>], + hb_message:committed(Committed, [?HELLO_WORLD_CID], Opts) + ). %%% 2. Cache linkage — the load-bearing claim of phase 1 @@ -136,10 +152,16 @@ cache_links_cid_to_uncommitted_id_test() -> Committed = ipfs_commit(#{ <<"body">> => ?HELLO_WORLD }, Opts), {ok, _} = hb_cache:write(Committed, Opts), {ok, Recovered} = hb_cache:read(?HELLO_WORLD_CID, Opts), - ?assertEqual(?HELLO_WORLD, - hb_cache:ensure_loaded(maps:get(<<"body">>, Recovered), Opts)), - ?assert(maps:is_key(?HELLO_WORLD_CID, - maps:get(<<"commitments">>, Recovered, #{}))). + ?assertEqual( + ?HELLO_WORLD, + hb_cache:ensure_loaded(maps:get(<<"body">>, Recovered), Opts) + ), + ?assert( + maps:is_key( + ?HELLO_WORLD_CID, + maps:get(<<"commitments">>, Recovered, #{}) + ) + ). %% @doc Multiple commitment devices on one message do not conflict: the %% CID still resolves through the cache. @@ -148,14 +170,20 @@ multiple_commitment_devices_coexist_test() -> Committed = ipfs_commit(#{ <<"body">> => ?HELLO_WORLD }, Opts), {ok, _} = hb_cache:write(Committed, Opts), {ok, ViaCID} = hb_cache:read(?HELLO_WORLD_CID, Opts), - ?assertEqual(?HELLO_WORLD, - hb_cache:ensure_loaded(maps:get(<<"body">>, ViaCID), Opts)). + ?assertEqual( + ?HELLO_WORLD, + hb_cache:ensure_loaded(maps:get(<<"body">>, ViaCID), Opts) + ). %%% 3. to/3 and from/3 through hb_message:convert to_dag_cbor_simple_test() -> - Bytes = hb_message:convert( - #{ <<"hello">> => <<"world">> }, <<"ipfs@1.0">>, opts()), + Bytes = + hb_message:convert( + #{ <<"hello">> => <<"world">> }, + <<"ipfs@1.0">>, + opts() + ), ?assertEqual(<<16#a1, 16#65, "hello", 16#65, "world">>, Bytes). %% @doc Roundtripping a typed message through dag-cbor preserves rich @@ -172,34 +200,44 @@ roundtrip_typed_message_test() -> <<"nested">> => #{ <<"k">> => <<"v">>, <<"n">> => -42 } }, Bytes = hb_message:convert(Msg, <<"ipfs@1.0">>, Opts), - Decoded = hb_message:convert( - Bytes, <<"structured@1.0">>, <<"ipfs@1.0">>, Opts), + Decoded = + hb_message:convert( + Bytes, + <<"structured@1.0">>, + <<"ipfs@1.0">>, + Opts + ), ?assert(hb_message:match(Msg, Decoded, strict, Opts)). %% @doc Encoding is deterministic: two differently-ordered source maps %% produce the same bytes, and re-encoding is stable. encoding_is_deterministic_test() -> Opts = opts(), - B1 = hb_message:convert( - #{ <<"a">> => 1, <<"bb">> => 2, <<"ccc">> => 3 }, - <<"ipfs@1.0">>, Opts), - B2 = hb_message:convert( - #{ <<"ccc">> => 3, <<"a">> => 1, <<"bb">> => 2 }, - <<"ipfs@1.0">>, Opts), - ?assertEqual(B1, B2), - ?assertEqual(B1, hb_message:convert( - #{ <<"a">> => 1, <<"bb">> => 2, <<"ccc">> => 3 }, - <<"ipfs@1.0">>, Opts)). + Encode = + fun(M) -> + hb_message:convert(M, <<"ipfs@1.0">>, Opts) + end, + M1 = #{ <<"a">> => 1, <<"bb">> => 2, <<"ccc">> => 3 }, + M2 = #{ <<"ccc">> => 3, <<"a">> => 1, <<"bb">> => 2 }, + ?assertEqual(Encode(M1), Encode(M2)), + ?assertEqual(Encode(M1), Encode(M1)). %% @doc Committing the dag-cbor bytes of a message yields a CIDv1 %% identical to the one `ipfs dag put --input-codec dag-cbor' would produce. cid_matches_dag_cbor_of_message_test() -> Opts = opts(), - Bytes = hb_message:convert( - #{ <<"hello">> => <<"world">> }, <<"ipfs@1.0">>, Opts), - Committed = ipfs_commit( - #{ <<"body">> => Bytes }, Opts, - #{ <<"hash-alg">> => <<"sha2-256-dag-cbor">> }), + Bytes = + hb_message:convert( + #{ <<"hello">> => <<"world">> }, + <<"ipfs@1.0">>, + Opts + ), + Committed = + ipfs_commit( + #{ <<"body">> => Bytes }, + Opts, + #{ <<"hash-alg">> => <<"sha2-256-dag-cbor">> } + ), [CID] = maps:keys(maps:get(<<"commitments">>, Committed)), {ok, Parts} = dev_codec_ipfs_cid:decode(CID), ?assertEqual(<<"sha2-256-dag-cbor">>, maps:get(<<"hash-alg">>, Parts)), @@ -210,7 +248,8 @@ cid_matches_dag_cbor_of_message_test() -> unsupported_atom_rejected_test() -> ?assertMatch( {error, {dag_cbor_encode, {unsupported_atom, something}}}, - dev_codec_ipfs:to(#{ <<"kind">> => something }, #{}, opts())). + dev_codec_ipfs:to(#{ <<"kind">> => something }, #{}, opts()) + ). %% @doc Local end-to-end (no network): encode a rich message, commit its %% CID, write, read back by CID, decode. Exercises the whole codec + @@ -225,52 +264,83 @@ local_end_to_end_encode_commit_cache_decode_test() -> <<"active">> => true }, Bytes = hb_message:convert(Msg, <<"ipfs@1.0">>, Opts), - Committed = ipfs_commit( - #{ <<"body">> => Bytes }, Opts, - #{ <<"hash-alg">> => <<"sha2-256-dag-cbor">> }), + Committed = + ipfs_commit( + #{ <<"body">> => Bytes }, + Opts, + #{ <<"hash-alg">> => <<"sha2-256-dag-cbor">> } + ), [CID] = maps:keys(maps:get(<<"commitments">>, Committed)), {ok, _} = hb_cache:write(Committed, Opts), {ok, Fetched} = hb_cache:read(CID, Opts), FetchedBytes = hb_cache:ensure_loaded(maps:get(<<"body">>, Fetched), Opts), ?assertEqual(Bytes, FetchedBytes), - ?assert(hb_message:match( - Msg, - hb_message:convert( - FetchedBytes, <<"structured@1.0">>, <<"ipfs@1.0">>, Opts), - strict, Opts)). + ?assert( + hb_message:match( + Msg, + hb_message:convert( + FetchedBytes, + <<"structured@1.0">>, + <<"ipfs@1.0">>, + Opts + ), + strict, + Opts + ) + ). %% @doc A committed message roundtrips through the codec with its %% commitments intact — matching `dev_codec_json' / `dev_codec_flat' / %% `dev_codec_ans104'. commit_then_encode_preserves_commitments_test() -> Opts = opts(), - Committed = ipfs_commit( - #{ <<"body">> => ?HELLO_WORLD, <<"kind">> => <<"greeting">> }, Opts), + Committed = + ipfs_commit( + #{ <<"body">> => ?HELLO_WORLD, <<"kind">> => <<"greeting">> }, + Opts + ), Bytes = hb_message:convert(Committed, <<"ipfs@1.0">>, Opts), {ok, Ipld} = dev_codec_ipfs_cbor:decode(Bytes), ?assert(maps:is_key(<<"commitments">>, Ipld)), - ?assert(hb_message:match( - Committed, - hb_message:convert( - Bytes, <<"structured@1.0">>, <<"ipfs@1.0">>, Opts), - strict, Opts)). + ?assert( + hb_message:match( + Committed, + hb_message:convert( + Bytes, + <<"structured@1.0">>, + <<"ipfs@1.0">>, + Opts + ), + strict, + Opts + ) + ). %% @doc Two different codecs of the same body give two distinct CIDs that %% both resolve to the same cached message. raw_and_dag_cbor_cids_coexist_test() -> Opts = opts(), Body = <<16#a0>>, - M1 = ipfs_commit( - #{ <<"body">> => Body }, Opts, - #{ <<"hash-alg">> => <<"sha2-256-raw">> }), - M2 = ipfs_commit( - M1, Opts, #{ <<"hash-alg">> => <<"sha2-256-dag-cbor">> }), + M1 = + ipfs_commit( + #{ <<"body">> => Body }, + Opts, + #{ <<"hash-alg">> => <<"sha2-256-raw">> } + ), + M2 = + ipfs_commit( + M1, + Opts, + #{ <<"hash-alg">> => <<"sha2-256-dag-cbor">> } + ), ?assertEqual(2, maps:size(maps:get(<<"commitments">>, M2))), {ok, _} = hb_cache:write(M2, Opts), {ok, ViaDagCbor} = hb_cache:read(?EMPTY_MAP_CID, Opts), - ?assertEqual(Body, - hb_cache:ensure_loaded(maps:get(<<"body">>, ViaDagCbor), Opts)). + ?assertEqual( + Body, + hb_cache:ensure_loaded(maps:get(<<"body">>, ViaDagCbor), Opts) + ). %%% 4. Live — real gateways, real HyperBEAM nodes @@ -280,27 +350,40 @@ live_end_to_end_fetch_and_decode_dag_cbor_test_() -> {timeout, 60, fun() -> application:ensure_all_started(inets), application:ensure_all_started(ssl), - NodeOpts = opts(#{ - store => - [hb_test_utils:test_store(), gateway_store()] - }), + NodeOpts = + opts(#{ + store => + [hb_test_utils:test_store(), gateway_store()] + }), case hb_cache:read(?EMPTY_MAP_CID, NodeOpts) of {ok, Fetched} -> Bytes = hb_cache:ensure_loaded( - maps:get(<<"body">>, Fetched), NodeOpts), + maps:get(<<"body">>, Fetched), + NodeOpts + ), ?assertEqual(<<16#a0>>, Bytes), - ?assert(hb_message:verify( - Fetched, - #{ <<"commitment-ids">> => [?EMPTY_MAP_CID] }, - NodeOpts)), - ?assertEqual(#{}, + ?assert( + hb_message:verify( + Fetched, + #{ <<"commitment-ids">> => [?EMPTY_MAP_CID] }, + NodeOpts + ) + ), + ?assertEqual( + #{}, hb_message:convert( - Bytes, <<"structured@1.0">>, <<"ipfs@1.0">>, - NodeOpts)); + Bytes, + <<"structured@1.0">>, + <<"ipfs@1.0">>, + NodeOpts + ) + ); _ -> - ?debugFmt("Skipping: all gateways missed ~s", - [?EMPTY_MAP_CID]) + ?debugFmt( + "Skipping: all gateways missed ~s", + [?EMPTY_MAP_CID] + ) end end}. @@ -318,9 +401,14 @@ live_http_body_round_trips_to_cid_test_() -> {timeout, 90, fun() -> with_live_gateways(fun() -> NodeURL = hb_http_server:start_node(node_opts()), {ok, R} = hb_http:get(NodeURL, ?LOOKUP_PATH, #{}), - ?assertEqual(?HELLO_WORLD_CID, + ?assertEqual( + ?HELLO_WORLD_CID, dev_codec_ipfs_cid:encode( - <<"raw">>, sha2_256, response_body(R))) + <<"raw">>, + sha2_256, + response_body(R) + ) + ) end) end}. %% @doc First lookup pulls the CID through the gateway and pins it to the @@ -331,8 +419,11 @@ live_cache_preload_pattern_test_() -> <<"store-module">> => hb_store_fs, <<"name">> => iolist_to_binary( - ["cache-TEST/ipfs-preload-", - integer_to_list(erlang:system_time(microsecond))]) + [ + "cache-TEST/ipfs-preload-", + integer_to_list(erlang:system_time(microsecond)) + ] + ) }, hb_store:reset(LocalStore), Stock = hb_opts:get(preloaded_devices, [], #{}), @@ -346,9 +437,13 @@ live_cache_preload_pattern_test_() -> ?assertEqual(?HELLO_WORLD, response_body(R1)), LocalOpts = #{ store => [LocalStore] }, {ok, R2} = hb_cache:read(?HELLO_WORLD_CID, LocalOpts), - ?assertEqual(?HELLO_WORLD, + ?assertEqual( + ?HELLO_WORLD, hb_cache:ensure_loaded( - hb_ao:get(<<"body">>, R2, <<>>, LocalOpts), LocalOpts)) + hb_ao:get(<<"body">>, R2, <<>>, LocalOpts), + LocalOpts + ) + ) end) end}. %% @doc Transport: an IPFS commitment must arrive on the client side @@ -427,11 +522,19 @@ live_hb_to_hb_remote_store_relay_test_() -> %% (2) B's primary now holds the message keyed by the CID. LocalOnly = #{ store => [NodeBPrimary] }, {ok, MsgOnB} = hb_cache:read(?HELLO_WORLD_CID, LocalOnly), - ?assertEqual(?HELLO_WORLD, + ?assertEqual( + ?HELLO_WORLD, hb_cache:ensure_loaded( - maps:get(<<"body">>, MsgOnB), LocalOnly)), - ?assert(maps:is_key(?HELLO_WORLD_CID, - maps:get(<<"commitments">>, MsgOnB, #{}))), + maps:get(<<"body">>, MsgOnB), + LocalOnly + ) + ), + ?assert( + maps:is_key( + ?HELLO_WORLD_CID, + maps:get(<<"commitments">>, MsgOnB, #{}) + ) + ), %% (3) Kill Node A; (4) B must still serve from primary. ok = cowboy:stop_listener(NodeAServerID), {ok, R2} = hb_http:get(NodeBURL, ?LOOKUP_PATH, #{}), @@ -457,8 +560,11 @@ live_lua_computation_over_ipfs_body_test_() -> NodeOpts = node_opts(), NodeURL = hb_http_server:start_node(NodeOpts), {ok, IpfsMsg} = hb_cache:read(?HELLO_WORLD_CID, NodeOpts), - Body = hb_cache:ensure_loaded( - hb_ao:get(<<"body">>, IpfsMsg, <<>>, NodeOpts), NodeOpts), + Body = + hb_cache:ensure_loaded( + hb_ao:get(<<"body">>, IpfsMsg, <<>>, NodeOpts), + NodeOpts + ), ?assertEqual(?HELLO_WORLD, Body), Base = #{ <<"device">> => <<"lua@5.3a">>, @@ -470,8 +576,10 @@ live_lua_computation_over_ipfs_body_test_() -> <<"function">> => <<"byte_length">>, <<"parameters">> => [ #{ <<"body">> => Body } ] }, - ?assertEqual(byte_size(?HELLO_WORLD), - hb_ao:get(<<"byte_length">>, Base, undefined, NodeOpts)), + ?assertEqual( + byte_size(?HELLO_WORLD), + hb_ao:get(<<"byte_length">>, Base, undefined, NodeOpts) + ), {ok, _} = hb_http:get(NodeURL, <<"/~meta@1.0/info">>, #{}) end) end}. @@ -485,7 +593,8 @@ live_lua_computation_over_ipfs_body_test_() -> suite_test_() -> hb_test_utils:suite_with_opts( hb_message_test_vectors:codec_test_suite([<<"ipfs@1.0">>]), - vector_opts()). + vector_opts() + ). vector_opts() -> [#{ diff --git a/src/hb_store_ipfs_gateway.erl b/src/hb_store_ipfs_gateway.erl index 69b4d9bcd..ca1b701da 100644 --- a/src/hb_store_ipfs_gateway.erl +++ b/src/hb_store_ipfs_gateway.erl @@ -277,8 +277,12 @@ live_hb_cache_reads_from_gateway_test_() -> live_gateway_rejects_unpinned_cid_test_() -> {timeout, 60, fun() -> ensure_inets(), - UnpinnedCID = dev_codec_ipfs_cid:encode( - <<"raw">>, sha2_256, crypto:strong_rand_bytes(64)), + UnpinnedCID = + dev_codec_ipfs_cid:encode( + <<"raw">>, + sha2_256, + crypto:strong_rand_bytes(64) + ), Store = (live_store())#{ <<"timeout">> => 10000 }, ?assertEqual(not_found, read(Store, UnpinnedCID)) end}. From 38e95b8b0c9590a18896edaa4ecbeeac627d38d3 Mon Sep 17 00:00:00 2001 From: Sam Williams Date: Sat, 18 Apr 2026 22:09:52 -0400 Subject: [PATCH 22/22] refactor(httpsig): drop dead signature-less commitment filter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removing the `maps:filter' that skipped commitments without a `signature' field: it was dead code. Every current commitment device (HMAC, RSA-PSS, ANS-104 signed, `~ipfs@1.0') emits a `signature' field — the IPFS device specifically so its commitment survives the httpsig wire. Nothing fails the filter, so it never takes effect. Regression: all 2158 targeted tests green (codecs, cache, IPFS suite incl. inter-node transport). Co-Authored-By: Claude Opus 4.6 --- src/dev_codec_httpsig_siginfo.erl | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/dev_codec_httpsig_siginfo.erl b/src/dev_codec_httpsig_siginfo.erl index 53c76168b..7ec445088 100644 --- a/src/dev_codec_httpsig_siginfo.erl +++ b/src/dev_codec_httpsig_siginfo.erl @@ -22,21 +22,11 @@ % <<"status">> % Some libraries do not support it ]). -%% @doc Generate a `signature' and `signature-input' key pair from a -%% commitment map. Commitments without a `signature' field are not -%% signatures per RFC 9421 and are skipped — they ride on the message body. +%% @doc Generate a `signature' and `signature-input' key pair from a commitment +%% map. commitments_to_siginfo(_Msg, Comms, _Opts) when ?IS_EMPTY_MESSAGE(Comms) -> #{}; commitments_to_siginfo(Msg, Comms, Opts) -> - Signable = - maps:filter( - fun(_CommID, C) -> maps:is_key(<<"signature">>, C) end, Comms), - case map_size(Signable) of - 0 -> #{}; - _ -> commitments_to_siginfo_for_signable(Msg, Signable, Opts) - end. - -commitments_to_siginfo_for_signable(Msg, Comms, Opts) -> % Emit a SF item per commitment. `CommID' is threaded through so % `commitment_to_sf_siginfo/4' can add an `id' parameter whenever the % decoder-side derivation would not reproduce the sender's map key.